mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 11:27:26 +08:00
feat: 添加命令行参数支持
- 添加 --max_comments_per_post 参数用于控制每个帖子爬取的评论数量 - 添加 --xhs_sort_type 参数用于控制小红书排序方式 - 修复小红书 core.py 中 CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES 的导入方式 从直接导入改为通过 config 模块访问,使命令行参数能正确生效
This commit is contained in:
@@ -34,7 +34,6 @@ from tenacity import RetryError
|
||||
|
||||
import config
|
||||
from base.base_crawler import AbstractCrawler
|
||||
from config import CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES
|
||||
from model.m_xiaohongshu import NoteUrlInfo, CreatorUrlInfo
|
||||
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
|
||||
from store import xhs as xhs_store
|
||||
@@ -344,7 +343,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
|
||||
xsec_token=xsec_token,
|
||||
crawl_interval=crawl_interval,
|
||||
callback=xhs_store.batch_update_xhs_note_comments,
|
||||
max_count=CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
|
||||
max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
|
||||
)
|
||||
|
||||
# Sleep after fetching comments
|
||||
|
||||
Reference in New Issue
Block a user