mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 03:17:25 +08:00
feat: other platfrom support the cdp mode
This commit is contained in:
@@ -22,7 +22,7 @@ CRAWLER_TYPE = (
|
||||
"search" # 爬取类型,search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
|
||||
)
|
||||
# 自定义User Agent(暂时仅对XHS有效)
|
||||
UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'
|
||||
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"
|
||||
|
||||
# 是否开启 IP 代理
|
||||
ENABLE_IP_PROXY = False
|
||||
@@ -190,9 +190,9 @@ ZHIHU_CREATOR_URL_LIST = [
|
||||
|
||||
# 指定知乎需要爬取的帖子ID列表
|
||||
ZHIHU_SPECIFIED_ID_LIST = [
|
||||
"https://www.zhihu.com/question/826896610/answer/4885821440", # 回答
|
||||
"https://zhuanlan.zhihu.com/p/673461588", # 文章
|
||||
"https://www.zhihu.com/zvideo/1539542068422144000" # 视频
|
||||
"https://www.zhihu.com/question/826896610/answer/4885821440", # 回答
|
||||
"https://zhuanlan.zhihu.com/p/673461588", # 文章
|
||||
"https://www.zhihu.com/zvideo/1539542068422144000", # 视频
|
||||
]
|
||||
|
||||
# 词云相关
|
||||
@@ -212,10 +212,10 @@ STOP_WORDS_FILE = "./docs/hit_stopwords.txt"
|
||||
FONT_PATH = "./docs/STZHONGS.TTF"
|
||||
|
||||
# 爬取开始的天数,仅支持 bilibili 关键字搜索,YYYY-MM-DD 格式,若为 None 则表示不设置时间范围,按照默认关键字最多返回 1000 条视频的结果处理
|
||||
START_DAY = '2024-01-01'
|
||||
START_DAY = "2024-01-01"
|
||||
|
||||
# 爬取结束的天数,仅支持 bilibili 关键字搜索,YYYY-MM-DD 格式,若为 None 则表示不设置时间范围,按照默认关键字最多返回 1000 条视频的结果处理
|
||||
END_DAY = '2024-01-01'
|
||||
END_DAY = "2024-01-01"
|
||||
|
||||
# 是否开启按每一天进行爬取的选项,仅支持 bilibili 关键字搜索
|
||||
# 若为 False,则忽略 START_DAY 与 END_DAY 设置的值
|
||||
@@ -233,4 +233,4 @@ START_CONTACTS_PAGE = 1
|
||||
CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES = 100
|
||||
|
||||
# 爬取作者动态数量控制(单作者)
|
||||
CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES = 50
|
||||
CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES = 50
|
||||
|
||||
Reference in New Issue
Block a user