diff --git a/media_platform/bilibili/client.py b/media_platform/bilibili/client.py index 9893f31..12fb1a6 100644 --- a/media_platform/bilibili/client.py +++ b/media_platform/bilibili/client.py @@ -15,6 +15,7 @@ # @Desc : bilibili 请求客户端 import asyncio import json +import random from typing import Any, Callable, Dict, List, Optional, Tuple, Union from urllib.parse import urlencode @@ -82,8 +83,12 @@ class BilibiliClient(AbstractApiClient): :return: """ local_storage = await self.playwright_page.evaluate("() => window.localStorage") - wbi_img_urls = local_storage.get("wbi_img_urls", "") or local_storage.get( - "wbi_img_url") + "-" + local_storage.get("wbi_sub_url") + wbi_img_urls = local_storage.get("wbi_img_urls", "") + if not wbi_img_urls: + img_url_from_storage = local_storage.get("wbi_img_url") + sub_url_from_storage = local_storage.get("wbi_sub_url") + if img_url_from_storage and sub_url_from_storage: + wbi_img_urls = f"{img_url_from_storage}-{sub_url_from_storage}" if wbi_img_urls and "-" in wbi_img_urls: img_url, sub_url = wbi_img_urls.split("-") else: @@ -271,8 +276,18 @@ class BilibiliClient(AbstractApiClient): break comment_list: List[Dict] = comments_res.get("replies", []) - is_end = cursor_info.get("is_end") - next_page = cursor_info.get("next") + + # 检查 is_end 和 next 是否存在 + if "is_end" not in cursor_info or "next" not in cursor_info: + utils.logger.warning(f"[BilibiliClient.get_video_all_comments] 'is_end' or 'next' not in cursor for video_id: {video_id}. Assuming end of comments.") + is_end = True + else: + is_end = cursor_info.get("is_end") + next_page = cursor_info.get("next") + + if not isinstance(is_end, bool): + utils.logger.warning(f"[BilibiliClient.get_video_all_comments] 'is_end' is not a boolean for video_id: {video_id}. Assuming end of comments.") + is_end = True if is_fetch_sub_comments: for comment in comment_list: comment_id = comment['rpid'] diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index 6e76da4..16706c7 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -219,10 +219,14 @@ class BilibiliCrawler(AbstractCrawler): for keyword in config.KEYWORDS.split(","): source_keyword_var.set(keyword) utils.logger.info(f"[BilibiliCrawler.search_by_keywords_in_time_range] Current search keyword: {keyword}") - total_notes_crawled = 0 + total_notes_crawled_for_keyword = 0 for day in pd.date_range(start=config.START_DAY, end=config.END_DAY, freq='D'): - if daily_limit and total_notes_crawled >= config.CRAWLER_MAX_NOTES_COUNT: + if daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT: + utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}', skipping remaining days.") + break + + if not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT: utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}', skipping remaining days.") break @@ -234,11 +238,10 @@ class BilibiliCrawler(AbstractCrawler): if notes_count_this_day >= config.MAX_NOTES_PER_DAY: utils.logger.info(f"[BilibiliCrawler.search] Reached MAX_NOTES_PER_DAY limit for {day.ctime()}.") break - if daily_limit and total_notes_crawled >= config.CRAWLER_MAX_NOTES_COUNT: + if daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT: utils.logger.info(f"[BilibiliCrawler.search] Reached CRAWLER_MAX_NOTES_COUNT limit for keyword '{keyword}'.") break - if not daily_limit and (page - start_page + 1) * bili_limit_count > config.CRAWLER_MAX_NOTES_COUNT: - # For non-daily-limit mode, we still respect the total count in a loose way per day. + if not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT: break try: @@ -264,12 +267,14 @@ class BilibiliCrawler(AbstractCrawler): for video_item in video_items: if video_item: - if daily_limit and total_notes_crawled >= config.CRAWLER_MAX_NOTES_COUNT: + if daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT: + break + if not daily_limit and total_notes_crawled_for_keyword >= config.CRAWLER_MAX_NOTES_COUNT: break if notes_count_this_day >= config.MAX_NOTES_PER_DAY: break notes_count_this_day += 1 - total_notes_crawled += 1 + total_notes_crawled_for_keyword += 1 video_id_list.append(video_item.get("View").get("aid")) await bilibili_store.update_bilibili_video(video_item) await bilibili_store.update_up_info(video_item)