mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-08 02:47:26 +08:00
finish_all_for_expand_bili
This commit is contained in:
@@ -10,16 +10,16 @@
|
||||
|
||||
|
||||
# 基础配置
|
||||
PLATFORM = "bili"
|
||||
PLATFORM = "xhs"
|
||||
KEYWORDS = "编程副业,编程兼职" # 关键词搜索配置,以英文逗号分隔
|
||||
LOGIN_TYPE = "phone" # qrcode or phone or cookie
|
||||
LOGIN_TYPE = "qrcode" # qrcode or phone or cookie
|
||||
COOKIES = ""
|
||||
# 具体值参见media_platform.xxx.field下的枚举值,暂时只支持小红书
|
||||
SORT_TYPE = "popularity_descending"
|
||||
# 具体值参见media_platform.xxx.field下的枚举值,暂时只支持抖音
|
||||
PUBLISH_TIME_TYPE = 0
|
||||
CRAWLER_TYPE = (
|
||||
"creator" # 爬取类型,search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
|
||||
"search" # 爬取类型,search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
|
||||
)
|
||||
# 自定义User Agent(暂时仅对XHS有效)
|
||||
UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'
|
||||
@@ -54,9 +54,6 @@ USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
|
||||
# 爬取开始页数 默认从第一页开始
|
||||
START_PAGE = 1
|
||||
|
||||
# 爬取粉丝列表开始页数 默认从第一页开始
|
||||
START_CONTACTS_PAGE = 1
|
||||
|
||||
# 爬取视频/帖子的数量控制
|
||||
CRAWLER_MAX_NOTES_COUNT = 200
|
||||
|
||||
@@ -147,11 +144,7 @@ DY_CREATOR_ID_LIST = [
|
||||
|
||||
# 指定bili创作者ID列表(sec_id)
|
||||
BILI_CREATOR_ID_LIST = [
|
||||
# "20813884",
|
||||
"520819684",
|
||||
# "472747194",
|
||||
# "519872016",
|
||||
# "372201438",
|
||||
"20813884",
|
||||
# ........................
|
||||
]
|
||||
|
||||
@@ -202,8 +195,15 @@ END_DAY = '2024-01-01'
|
||||
# 若为 True,则按照 START_DAY 至 END_DAY 按照每一天进行筛选,这样能够突破 1000 条视频的限制,最大程度爬取该关键词下的所有视频
|
||||
ALL_DAY = False
|
||||
|
||||
#!!! 下面仅支持 bilibili creator搜索
|
||||
# 爬取评论creator主页还是爬取creator动态和关系列表(True为前者)
|
||||
CREATOR_MODE = True
|
||||
|
||||
# 爬取creator粉丝列表时起始爬取页数
|
||||
START_CONTACTS_PAGE = 1
|
||||
|
||||
# 爬取作者粉丝和关注列表数量控制(单作者)
|
||||
CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES = 100
|
||||
|
||||
# 爬取作者动态粉丝和关注列表数量控制(单作者)
|
||||
# 爬取作者动态数量控制(单作者)
|
||||
CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES = 50
|
||||
@@ -12,16 +12,11 @@
|
||||
import os
|
||||
|
||||
# mysql config
|
||||
# RELATION_DB_PWD = os.getenv("RELATION_DB_PWD", "123456")
|
||||
# RELATION_DB_USER = os.getenv("RELATION_DB_USER", "root")
|
||||
# RELATION_DB_HOST = os.getenv("RELATION_DB_HOST", "localhost")
|
||||
# RELATION_DB_PORT = os.getenv("RELATION_DB_PORT", 3306)
|
||||
# RELATION_DB_NAME = os.getenv("RELATION_DB_NAME", "media_crawler")
|
||||
RELATION_DB_HOST = "47.94.233.47" # 替换为你的数据库域名/公网IP
|
||||
RELATION_DB_PORT = 3306 # 替换为你的数据库端口(通常3306)
|
||||
RELATION_DB_USER = "remote_user" # 替换为你的数据库用户名
|
||||
RELATION_DB_PWD = "314159" # 替换为你的数据库密码
|
||||
RELATION_DB_NAME = "Test" # 替换为你的数据库名称
|
||||
RELATION_DB_PWD = os.getenv("RELATION_DB_PWD", "123456")
|
||||
RELATION_DB_USER = os.getenv("RELATION_DB_USER", "root")
|
||||
RELATION_DB_HOST = os.getenv("RELATION_DB_HOST", "localhost")
|
||||
RELATION_DB_PORT = os.getenv("RELATION_DB_PORT", 3306)
|
||||
RELATION_DB_NAME = os.getenv("RELATION_DB_NAME", "media_crawler")
|
||||
|
||||
|
||||
# redis config
|
||||
|
||||
Reference in New Issue
Block a user