mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-22 06:51:00 +08:00
增加选择微博搜索类型的配置
This commit is contained in:
@@ -21,6 +21,8 @@ PUBLISH_TIME_TYPE = 0
|
||||
CRAWLER_TYPE = (
|
||||
"search" # 爬取类型,search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
|
||||
)
|
||||
# 微博搜索类型 default (综合) | real_time (实时) | popular (热门) | video (视频)
|
||||
WEIBO_SEARCH_TYPE = "popular"
|
||||
# 自定义User Agent(暂时仅对XHS有效)
|
||||
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"
|
||||
|
||||
|
||||
@@ -124,6 +124,20 @@ class WeiboCrawler(AbstractCrawler):
|
||||
if config.CRAWLER_MAX_NOTES_COUNT < weibo_limit_count:
|
||||
config.CRAWLER_MAX_NOTES_COUNT = weibo_limit_count
|
||||
start_page = config.START_PAGE
|
||||
|
||||
# Set the search type based on the configuration for weibo
|
||||
if config.WEIBO_SEARCH_TYPE == "default":
|
||||
search_type = SearchType.DEFAULT
|
||||
elif config.WEIBO_SEARCH_TYPE == "real_time":
|
||||
search_type = SearchType.REAL_TIME
|
||||
elif config.WEIBO_SEARCH_TYPE == "popular":
|
||||
search_type = SearchType.POPULAR
|
||||
elif config.WEIBO_SEARCH_TYPE == "video":
|
||||
search_type = SearchType.VIDEO
|
||||
else:
|
||||
utils.logger.error(f"[WeiboCrawler.search] Invalid WEIBO_SEARCH_TYPE: {config.WEIBO_SEARCH_TYPE}")
|
||||
return
|
||||
|
||||
for keyword in config.KEYWORDS.split(","):
|
||||
source_keyword_var.set(keyword)
|
||||
utils.logger.info(f"[WeiboCrawler.search] Current search keyword: {keyword}")
|
||||
@@ -137,7 +151,7 @@ class WeiboCrawler(AbstractCrawler):
|
||||
search_res = await self.wb_client.get_note_by_keyword(
|
||||
keyword=keyword,
|
||||
page=page,
|
||||
search_type=SearchType.DEFAULT
|
||||
search_type=search_type
|
||||
)
|
||||
note_id_list: List[str] = []
|
||||
note_list = filter_search_result_card(search_res.get("cards"))
|
||||
|
||||
Reference in New Issue
Block a user