feat: xhs支持获取评论的点赞数量

2026-05-08 11:37:36 +08:00 · 2024-08-24 06:07:33 +08:00
parent ab7d8142af
commit 65699aa1cb
4 changed files with 65 additions and 1 deletions
--- a/media_platform/xhs/client.py
+++ b/media_platform/xhs/client.py
@@ -442,9 +442,10 @@ class XiaoHongShuClient(AbstractApiClient):
        }
        return await self.post(uri, data=data, return_response=True)

+    @retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
    async def get_note_by_id_from_html(self, note_id: str):
        """
-        通过解析网页版的笔记详情页HTML，获取笔记详情
+        通过解析网页版的笔记详情页HTML，获取笔记详情, 该接口可能会出现失败的情况，这里尝试重试3次
        copy from https://github.com/ReaJason/xhs/blob/eb1c5a0213f6fbb592f0a2897ee552847c69ea2d/xhs/core.py#L217-L259
        thanks for ReaJason
        Args:
--- a/media_platform/xhs/core.py
+++ b/media_platform/xhs/core.py
@@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Tuple

 from playwright.async_api import (BrowserContext, BrowserType, Page,
                                  async_playwright)
+from tenacity import RetryError

 import config
 from base.base_crawler import AbstractCrawler
@@ -197,6 +198,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
                    utils.logger.error(
                        f"[XiaoHongShuCrawler.get_note_detail_from_html] have not fund note detail note_id:{note_id}, err: {ex}")
                    return {}
+                except RetryError as ex:
+                    utils.logger.error(
+                        f"[XiaoHongShuCrawler.get_note_detail_from_html] Retry error, note_id:{note_id}, err: {ex}")

        get_note_detail_task_list = [
            get_note_detail_from_html_task(note_id=note_id, semaphore=asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)) for