diff --git a/cmd_arg/arg.py b/cmd_arg/arg.py index 0ae0a21..8e7c360 100644 --- a/cmd_arg/arg.py +++ b/cmd_arg/arg.py @@ -250,6 +250,22 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): rich_help_panel="Basic Configuration", ), ] = "", + max_comments_per_post: Annotated[ + int, + typer.Option( + "--max_comments_per_post", + help="Maximum number of first-level comments to crawl per post/video", + rich_help_panel="Comment Configuration", + ), + ] = config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES, + xhs_sort_type: Annotated[ + str, + typer.Option( + "--xhs_sort_type", + help="XiaoHongShu sort type (e.g., popularity_descending, time_descending)", + rich_help_panel="Platform Specific Configuration", + ), + ] = "", ) -> SimpleNamespace: """MediaCrawler 命令行入口""" @@ -274,6 +290,11 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): config.CDP_HEADLESS = enable_headless config.SAVE_DATA_OPTION = save_data_option.value config.COOKIES = cookies + config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = max_comments_per_post + + # Set XiaoHongShu sort type if specified + if xhs_sort_type and platform == PlatformEnum.XHS: + config.SORT_TYPE = xhs_sort_type # Set platform-specific ID lists for detail/creator mode if specified_id_list: diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 108d2c9..7047468 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -34,7 +34,6 @@ from tenacity import RetryError import config from base.base_crawler import AbstractCrawler -from config import CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES from model.m_xiaohongshu import NoteUrlInfo, CreatorUrlInfo from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool from store import xhs as xhs_store @@ -344,7 +343,7 @@ class XiaoHongShuCrawler(AbstractCrawler): xsec_token=xsec_token, crawl_interval=crawl_interval, callback=xhs_store.batch_update_xhs_note_comments, - max_count=CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES, + max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES, ) # Sleep after fetching comments