feat: 添加命令行参数支持

- 添加 --max_comments_per_post 参数用于控制每个帖子爬取的评论数量
- 添加 --xhs_sort_type 参数用于控制小红书排序方式
- 修复小红书 core.py 中 CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES 的导入方式
  从直接导入改为通过 config 模块访问,使命令行参数能正确生效
This commit is contained in:
wanzirong
2026-01-21 16:23:47 +08:00
parent be5b786a74
commit f7d27ab43a
2 changed files with 22 additions and 2 deletions

View File

@@ -34,7 +34,6 @@ from tenacity import RetryError
import config
from base.base_crawler import AbstractCrawler
from config import CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES
from model.m_xiaohongshu import NoteUrlInfo, CreatorUrlInfo
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
from store import xhs as xhs_store
@@ -344,7 +343,7 @@ class XiaoHongShuCrawler(AbstractCrawler):
xsec_token=xsec_token,
crawl_interval=crawl_interval,
callback=xhs_store.batch_update_xhs_note_comments,
max_count=CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
)
# Sleep after fetching comments