mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-25 09:30:47 +08:00
Merge pull request #802 from Cae1anSou/fix/douyin-concurrent-comments
fix: fetch Douyin comments concurrently after each page instead of waiting for all pages
This commit is contained in:
@@ -151,19 +151,24 @@ class DouYinCrawler(AbstractCrawler):
|
||||
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed,账号也许被风控了。")
|
||||
break
|
||||
dy_search_id = posts_res.get("extra", {}).get("logid", "")
|
||||
page_aweme_list = []
|
||||
for post_item in posts_res.get("data"):
|
||||
try:
|
||||
aweme_info: Dict = (post_item.get("aweme_info") or post_item.get("aweme_mix_info", {}).get("mix_items")[0])
|
||||
except TypeError:
|
||||
continue
|
||||
aweme_list.append(aweme_info.get("aweme_id", ""))
|
||||
page_aweme_list.append(aweme_info.get("aweme_id", ""))
|
||||
await douyin_store.update_douyin_aweme(aweme_item=aweme_info)
|
||||
await self.get_aweme_media(aweme_item=aweme_info)
|
||||
|
||||
# Batch get note comments for the current page
|
||||
await self.batch_get_note_comments(page_aweme_list)
|
||||
|
||||
# Sleep after each page navigation
|
||||
await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
|
||||
utils.logger.info(f"[DouYinCrawler.search] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after page {page-1}")
|
||||
utils.logger.info(f"[DouYinCrawler.search] keyword:{keyword}, aweme_list:{aweme_list}")
|
||||
await self.batch_get_note_comments(aweme_list)
|
||||
|
||||
async def get_specified_awemes(self):
|
||||
"""Get the information and comments of the specified post from URLs or IDs"""
|
||||
|
||||
Reference in New Issue
Block a user