From c56b8c4c5dc8400c154d2afe29f5b27ad20982b7 Mon Sep 17 00:00:00 2001
From: Caelan_Windows <caelan123456@outlook.jp>
Date: Sat, 3 Jan 2026 01:47:24 +0800
Subject: [PATCH] fix(douyin): fetch comments concurrently after each page
 instead of waiting for all pages

- Moved batch_get_note_comments call inside the pagination loop
- Comments are now fetched immediately after each page of videos is processed
- This allows real-time observation of comment crawling progress
- Improves data availability by not waiting for all video data to be collected first
---
 media_platform/douyin/core.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py
index 3a0ec5d..9308b0f 100644
--- a/media_platform/douyin/core.py
+++ b/media_platform/douyin/core.py
@@ -151,19 +151,24 @@ class DouYinCrawler(AbstractCrawler):
                     utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed，账号也许被风控了。")
                     break
                 dy_search_id = posts_res.get("extra", {}).get("logid", "")
+                page_aweme_list = []
                 for post_item in posts_res.get("data"):
                     try:
                         aweme_info: Dict = (post_item.get("aweme_info") or post_item.get("aweme_mix_info", {}).get("mix_items")[0])
                     except TypeError:
                         continue
                     aweme_list.append(aweme_info.get("aweme_id", ""))
+                    page_aweme_list.append(aweme_info.get("aweme_id", ""))
                     await douyin_store.update_douyin_aweme(aweme_item=aweme_info)
                     await self.get_aweme_media(aweme_item=aweme_info)
+                
+                # Batch get note comments for the current page
+                await self.batch_get_note_comments(page_aweme_list)
+
                 # Sleep after each page navigation
                 await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
                 utils.logger.info(f"[DouYinCrawler.search] Sleeping for {config.CRAWLER_MAX_SLEEP_SEC} seconds after page {page-1}")
             utils.logger.info(f"[DouYinCrawler.search] keyword:{keyword}, aweme_list:{aweme_list}")
-            await self.batch_get_note_comments(aweme_list)
 
     async def get_specified_awemes(self):
         """Get the information and comments of the specified post from URLs or IDs"""