mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-03-02 20:20:45 +08:00
feat: xhs支持获取评论的点赞数量
This commit is contained in:
@@ -442,9 +442,10 @@ class XiaoHongShuClient(AbstractApiClient):
|
||||
}
|
||||
return await self.post(uri, data=data, return_response=True)
|
||||
|
||||
@retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
|
||||
async def get_note_by_id_from_html(self, note_id: str):
|
||||
"""
|
||||
通过解析网页版的笔记详情页HTML,获取笔记详情
|
||||
通过解析网页版的笔记详情页HTML,获取笔记详情, 该接口可能会出现失败的情况,这里尝试重试3次
|
||||
copy from https://github.com/ReaJason/xhs/blob/eb1c5a0213f6fbb592f0a2897ee552847c69ea2d/xhs/core.py#L217-L259
|
||||
thanks for ReaJason
|
||||
Args:
|
||||
|
||||
@@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from playwright.async_api import (BrowserContext, BrowserType, Page,
|
||||
async_playwright)
|
||||
from tenacity import RetryError
|
||||
|
||||
import config
|
||||
from base.base_crawler import AbstractCrawler
|
||||
@@ -197,6 +198,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||
utils.logger.error(
|
||||
f"[XiaoHongShuCrawler.get_note_detail_from_html] have not fund note detail note_id:{note_id}, err: {ex}")
|
||||
return {}
|
||||
except RetryError as ex:
|
||||
utils.logger.error(
|
||||
f"[XiaoHongShuCrawler.get_note_detail_from_html] Retry error, note_id:{note_id}, err: {ex}")
|
||||
|
||||
get_note_detail_task_list = [
|
||||
get_note_detail_from_html_task(note_id=note_id, semaphore=asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)) for
|
||||
|
||||
Reference in New Issue
Block a user