feat: other platfrom support the cdp mode

This commit is contained in:
程序员阿江(Relakkes)
2025-07-03 17:13:32 +08:00
parent c892c3324c
commit 848df2b491
9 changed files with 565 additions and 102 deletions

View File

@@ -22,7 +22,7 @@ CRAWLER_TYPE = (
"search" # 爬取类型search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
)
# 自定义User Agent暂时仅对XHS有效
UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"
# 是否开启 IP 代理
ENABLE_IP_PROXY = False
@@ -190,9 +190,9 @@ ZHIHU_CREATOR_URL_LIST = [
# 指定知乎需要爬取的帖子ID列表
ZHIHU_SPECIFIED_ID_LIST = [
"https://www.zhihu.com/question/826896610/answer/4885821440", # 回答
"https://zhuanlan.zhihu.com/p/673461588", # 文章
"https://www.zhihu.com/zvideo/1539542068422144000" # 视频
"https://www.zhihu.com/question/826896610/answer/4885821440", # 回答
"https://zhuanlan.zhihu.com/p/673461588", # 文章
"https://www.zhihu.com/zvideo/1539542068422144000", # 视频
]
# 词云相关
@@ -212,10 +212,10 @@ STOP_WORDS_FILE = "./docs/hit_stopwords.txt"
FONT_PATH = "./docs/STZHONGS.TTF"
# 爬取开始的天数,仅支持 bilibili 关键字搜索YYYY-MM-DD 格式,若为 None 则表示不设置时间范围,按照默认关键字最多返回 1000 条视频的结果处理
START_DAY = '2024-01-01'
START_DAY = "2024-01-01"
# 爬取结束的天数,仅支持 bilibili 关键字搜索YYYY-MM-DD 格式,若为 None 则表示不设置时间范围,按照默认关键字最多返回 1000 条视频的结果处理
END_DAY = '2024-01-01'
END_DAY = "2024-01-01"
# 是否开启按每一天进行爬取的选项,仅支持 bilibili 关键字搜索
# 若为 False则忽略 START_DAY 与 END_DAY 设置的值
@@ -233,4 +233,4 @@ START_CONTACTS_PAGE = 1
CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES = 100
# 爬取作者动态数量控制(单作者)
CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES = 50
CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES = 50