mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-06 23:21:33 +08:00
Compare commits
3 Commits
be5b786a74
...
94553fd818
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
94553fd818 | ||
|
|
90f72536ba | ||
|
|
f7d27ab43a |
@@ -250,6 +250,14 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
rich_help_panel="Basic Configuration",
|
||||
),
|
||||
] = "",
|
||||
max_comments_count_singlenotes: Annotated[
|
||||
int,
|
||||
typer.Option(
|
||||
"--max_comments_count_singlenotes",
|
||||
help="Maximum number of first-level comments to crawl per post/video",
|
||||
rich_help_panel="Comment Configuration",
|
||||
),
|
||||
] = config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
|
||||
) -> SimpleNamespace:
|
||||
"""MediaCrawler 命令行入口"""
|
||||
|
||||
@@ -274,6 +282,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
config.CDP_HEADLESS = enable_headless
|
||||
config.SAVE_DATA_OPTION = save_data_option.value
|
||||
config.COOKIES = cookies
|
||||
config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = max_comments_count_singlenotes
|
||||
|
||||
# Set platform-specific ID lists for detail/creator mode
|
||||
if specified_id_list:
|
||||
|
||||
@@ -34,7 +34,6 @@ from tenacity import RetryError
|
||||
|
||||
import config
|
||||
from base.base_crawler import AbstractCrawler
|
||||
from config import CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES
|
||||
from model.m_xiaohongshu import NoteUrlInfo, CreatorUrlInfo
|
||||
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
|
||||
from store import xhs as xhs_store
|
||||
@@ -344,7 +343,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||
xsec_token=xsec_token,
|
||||
crawl_interval=crawl_interval,
|
||||
callback=xhs_store.batch_update_xhs_note_comments,
|
||||
max_count=CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
|
||||
max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
|
||||
)
|
||||
|
||||
# Sleep after fetching comments
|
||||
|
||||
Reference in New Issue
Block a user