diff --git a/cmd_arg/arg.py b/cmd_arg/arg.py index 0ae0a21..047e4f2 100644 --- a/cmd_arg/arg.py +++ b/cmd_arg/arg.py @@ -250,6 +250,14 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): rich_help_panel="Basic Configuration", ), ] = "", + max_comments_count_singlenotes: Annotated[ + int, + typer.Option( + "--max_comments_count_singlenotes", + help="Maximum number of first-level comments to crawl per post/video", + rich_help_panel="Comment Configuration", + ), + ] = config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES, ) -> SimpleNamespace: """MediaCrawler 命令行入口""" @@ -274,6 +282,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): config.CDP_HEADLESS = enable_headless config.SAVE_DATA_OPTION = save_data_option.value config.COOKIES = cookies + config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = max_comments_count_singlenotes # Set platform-specific ID lists for detail/creator mode if specified_id_list: diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 108d2c9..7047468 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -34,7 +34,6 @@ from tenacity import RetryError import config from base.base_crawler import AbstractCrawler -from config import CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES from model.m_xiaohongshu import NoteUrlInfo, CreatorUrlInfo from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool from store import xhs as xhs_store @@ -344,7 +343,7 @@ class XiaoHongShuCrawler(AbstractCrawler): xsec_token=xsec_token, crawl_interval=crawl_interval, callback=xhs_store.batch_update_xhs_note_comments, - max_count=CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES, + max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES, ) # Sleep after fetching comments