From 355ed183dd2ad4726f9a724efc93f8d83ef778a3 Mon Sep 17 00:00:00 2001 From: Lei Cao <87511626+cllei12@users.noreply.github.com> Date: Sat, 5 Jul 2025 22:14:31 +0000 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=80=89=E6=8B=A9=E5=BE=AE?= =?UTF-8?q?=E5=8D=9A=E6=90=9C=E7=B4=A2=E7=B1=BB=E5=9E=8B=E7=9A=84=E9=85=8D?= =?UTF-8?q?=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config/base_config.py | 2 ++ media_platform/weibo/core.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/config/base_config.py b/config/base_config.py index 562e985..1411f8c 100644 --- a/config/base_config.py +++ b/config/base_config.py @@ -21,6 +21,8 @@ PUBLISH_TIME_TYPE = 0 CRAWLER_TYPE = ( "search" # 爬取类型,search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据) ) +# 微博搜索类型 default (综合) | real_time (实时) | popular (热门) | video (视频) +WEIBO_SEARCH_TYPE = "popular" # 自定义User Agent(暂时仅对XHS有效) UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0" diff --git a/media_platform/weibo/core.py b/media_platform/weibo/core.py index 13df4e4..ccc0fb1 100644 --- a/media_platform/weibo/core.py +++ b/media_platform/weibo/core.py @@ -124,6 +124,20 @@ class WeiboCrawler(AbstractCrawler): if config.CRAWLER_MAX_NOTES_COUNT < weibo_limit_count: config.CRAWLER_MAX_NOTES_COUNT = weibo_limit_count start_page = config.START_PAGE + + # Set the search type based on the configuration for weibo + if config.WEIBO_SEARCH_TYPE == "default": + search_type = SearchType.DEFAULT + elif config.WEIBO_SEARCH_TYPE == "real_time": + search_type = SearchType.REAL_TIME + elif config.WEIBO_SEARCH_TYPE == "popular": + search_type = SearchType.POPULAR + elif config.WEIBO_SEARCH_TYPE == "video": + search_type = SearchType.VIDEO + else: + utils.logger.error(f"[WeiboCrawler.search] Invalid WEIBO_SEARCH_TYPE: {config.WEIBO_SEARCH_TYPE}") + return + for keyword in config.KEYWORDS.split(","): source_keyword_var.set(keyword) utils.logger.info(f"[WeiboCrawler.search] Current search keyword: {keyword}") @@ -137,7 +151,7 @@ class WeiboCrawler(AbstractCrawler): search_res = await self.wb_client.get_note_by_keyword( keyword=keyword, page=page, - search_type=SearchType.DEFAULT + search_type=search_type ) note_id_list: List[str] = [] note_list = filter_search_result_card(search_res.get("cards"))