All_platform_comments_restrict

2026-06-09 11:27:26 +08:00 · 2024-10-23 16:32:02 +08:00
parent 19269c66fd
commit 7e53c4acfc
12 changed files with 44 additions and 19 deletions
--- a/media_platform/weibo/client.py
+++ b/media_platform/weibo/client.py
@@ -149,23 +149,28 @@ class WeiboClient:
        return await self.get(uri, params, headers=headers)

    async def get_note_all_comments(self, note_id: str, crawl_interval: float = 1.0,
-                                    callback: Optional[Callable] = None, ):
+                                    callback: Optional[Callable] = None,
+                                    max_count: int = 10,
+                                    ):
        """
        get note all comments include sub comments
        :param note_id:
        :param crawl_interval:
        :param callback:
+        :param max_count:
        :return:
        """

        result = []
        is_end = False
        max_id = -1
-        while not is_end:
+        while not is_end and len(result) < max_count:
            comments_res = await self.get_note_comments(note_id, max_id)
            max_id: int = comments_res.get("max_id")
            comment_list: List[Dict] = comments_res.get("data", [])
            is_end = max_id == 0
+            if len(result) + len(comment_list) > max_count:
+                comment_list = comment_list[:max_count - len(result)]
            if callback:  # 如果有回调函数，就执行回调函数
                await callback(note_id, comment_list)
            await asyncio.sleep(crawl_interval)
--- a/media_platform/weibo/core.py
+++ b/media_platform/weibo/core.py
@@ -206,7 +206,8 @@ class WeiboCrawler(AbstractCrawler):
                await self.wb_client.get_note_all_comments(
                    note_id=note_id,
                    crawl_interval=random.randint(1,3), # 微博对API的限流比较严重，所以延时提高一些
-                    callback=weibo_store.batch_update_weibo_note_comments
+                    callback=weibo_store.batch_update_weibo_note_comments,
+                    max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES
                )
            except DataFetchError as ex:
                utils.logger.error(f"[WeiboCrawler.get_note_comments] get note_id: {note_id} comment error: {ex}")