finish_all_for_expand_bili

This commit is contained in:
Bowenwin
2025-05-22 22:26:30 +08:00
parent 59619fff0a
commit 66843f216a
12 changed files with 51 additions and 63 deletions

View File

@@ -10,16 +10,16 @@
# 基础配置
PLATFORM = "bili"
PLATFORM = "xhs"
KEYWORDS = "编程副业,编程兼职" # 关键词搜索配置,以英文逗号分隔
LOGIN_TYPE = "phone" # qrcode or phone or cookie
LOGIN_TYPE = "qrcode" # qrcode or phone or cookie
COOKIES = ""
# 具体值参见media_platform.xxx.field下的枚举值暂时只支持小红书
SORT_TYPE = "popularity_descending"
# 具体值参见media_platform.xxx.field下的枚举值暂时只支持抖音
PUBLISH_TIME_TYPE = 0
CRAWLER_TYPE = (
"creator" # 爬取类型search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
"search" # 爬取类型search(关键词搜索) | detail(帖子详情)| creator(创作者主页数据)
)
# 自定义User Agent暂时仅对XHS有效
UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0'
@@ -54,9 +54,6 @@ USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
# 爬取开始页数 默认从第一页开始
START_PAGE = 1
# 爬取粉丝列表开始页数 默认从第一页开始
START_CONTACTS_PAGE = 1
# 爬取视频/帖子的数量控制
CRAWLER_MAX_NOTES_COUNT = 200
@@ -147,11 +144,7 @@ DY_CREATOR_ID_LIST = [
# 指定bili创作者ID列表(sec_id)
BILI_CREATOR_ID_LIST = [
# "20813884",
"520819684",
# "472747194",
# "519872016",
# "372201438",
"20813884",
# ........................
]
@@ -202,8 +195,15 @@ END_DAY = '2024-01-01'
# 若为 True则按照 START_DAY 至 END_DAY 按照每一天进行筛选,这样能够突破 1000 条视频的限制,最大程度爬取该关键词下的所有视频
ALL_DAY = False
#!!! 下面仅支持 bilibili creator搜索
# 爬取评论creator主页还是爬取creator动态和关系列表(True为前者)
CREATOR_MODE = True
# 爬取creator粉丝列表时起始爬取页数
START_CONTACTS_PAGE = 1
# 爬取作者粉丝和关注列表数量控制(单作者)
CRAWLER_MAX_CONTACTS_COUNT_SINGLENOTES = 100
# 爬取作者动态粉丝和关注列表数量控制(单作者)
# 爬取作者动态数量控制(单作者)
CRAWLER_MAX_DYNAMICS_COUNT_SINGLENOTES = 50

View File

@@ -12,16 +12,11 @@
import os
# mysql config
# RELATION_DB_PWD = os.getenv("RELATION_DB_PWD", "123456")
# RELATION_DB_USER = os.getenv("RELATION_DB_USER", "root")
# RELATION_DB_HOST = os.getenv("RELATION_DB_HOST", "localhost")
# RELATION_DB_PORT = os.getenv("RELATION_DB_PORT", 3306)
# RELATION_DB_NAME = os.getenv("RELATION_DB_NAME", "media_crawler")
RELATION_DB_HOST = "47.94.233.47" # 替换为你的数据库域名/公网IP
RELATION_DB_PORT = 3306 # 替换为你的数据库端口通常3306
RELATION_DB_USER = "remote_user" # 替换为你的数据库用户名
RELATION_DB_PWD = "314159" # 替换为你的数据库密码
RELATION_DB_NAME = "Test" # 替换为你的数据库名称
RELATION_DB_PWD = os.getenv("RELATION_DB_PWD", "123456")
RELATION_DB_USER = os.getenv("RELATION_DB_USER", "root")
RELATION_DB_HOST = os.getenv("RELATION_DB_HOST", "localhost")
RELATION_DB_PORT = os.getenv("RELATION_DB_PORT", 3306)
RELATION_DB_NAME = os.getenv("RELATION_DB_NAME", "media_crawler")
# redis config