i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase:

- api/: FastAPI server documentation and logger messages
- cache/: Cache abstraction layer comments and docstrings
- database/: Database models and MongoDB store documentation
- media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu)
- model/: Data model documentation
- proxy/: Proxy pool and provider documentation
- store/: Data storage layer comments
- tools/: Utility functions and browser automation
- test/: Test file documentation

Preserved: Chinese disclaimer header (lines 10-18) for legal compliance

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
程序员阿江(Relakkes)
2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions

View File

@@ -20,7 +20,7 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 18:44
# @Desc : bilibili 请求客户端
# @Desc : bilibili request client
import asyncio
import json
import random
@@ -47,7 +47,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
def __init__(
self,
timeout=60, # 若开启爬取媒体选项b 站的长视频需要更久的超时时间
timeout=60, # For media crawling, Bilibili long videos need a longer timeout
proxy=None,
*,
headers: Dict[str, str],
@@ -61,11 +61,11 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
self._host = "https://api.bilibili.com"
self.playwright_page = playwright_page
self.cookie_dict = cookie_dict
# 初始化代理池(来自 ProxyRefreshMixin
# Initialize proxy pool (from ProxyRefreshMixin)
self.init_proxy_pool(proxy_ip_pool)
async def request(self, method, url, **kwargs) -> Any:
# 每次请求前检测代理是否过期
# Check if proxy has expired before each request
await self._refresh_proxy_if_expired()
async with httpx.AsyncClient(proxy=self.proxy) as client:
@@ -82,8 +82,8 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
async def pre_request_data(self, req_data: Dict) -> Dict:
"""
发送请求进行请求参数签名
需要从 localStorage 拿 wbi_img_urls 这参数,值如下:
Send request to sign request parameters
Need to get wbi_img_urls parameter from localStorage, value as follows:
https://i0.hdslb.com/bfs/wbi/7cd084941338484aae1ad9425b84077c.png-https://i0.hdslb.com/bfs/wbi/4932caff0ff746eab6f01bf08b70ac45.png
:param req_data:
:return:
@@ -95,7 +95,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
async def get_wbi_keys(self) -> Tuple[str, str]:
"""
获取最新的 img_key sub_key
Get the latest img_key and sub_key
:return:
"""
local_storage = await self.playwright_page.evaluate("() => window.localStorage")
@@ -160,12 +160,12 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
) -> Dict:
"""
KuaiShou web search api
:param keyword: 搜索关键词
:param page: 分页参数具体第几页
:param page_size: 每一页参数的数量
:param order: 搜索结果排序,默认位综合排序
:param pubtime_begin_s: 发布时间开始时间戳
:param pubtime_end_s: 发布时间结束时间戳
:param keyword: Search keyword
:param page: Page number for pagination
:param page_size: Number of items per page
:param order: Sort order for search results, default is comprehensive sorting
:param pubtime_begin_s: Publish time start timestamp
:param pubtime_end_s: Publish time end timestamp
:return:
"""
uri = "/x/web-interface/wbi/search/type"
@@ -182,13 +182,13 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
async def get_video_info(self, aid: Union[int, None] = None, bvid: Union[str, None] = None) -> Dict:
"""
Bilibli web video detail api, aid bvid任选一个参数
:param aid: 稿件avid
:param bvid: 稿件bvid
Bilibli web video detail api, choose one parameter between aid and bvid
:param aid: Video aid
:param bvid: Video bvid
:return:
"""
if not aid and not bvid:
raise ValueError("请提供 aid bvid 中的至少一个参数")
raise ValueError("Please provide at least one parameter: aid or bvid")
uri = "/x/web-interface/view/detail"
params = dict()
@@ -201,12 +201,12 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
async def get_video_play_url(self, aid: int, cid: int) -> Dict:
"""
Bilibli web video play url api
:param aid: 稿件avid
:param aid: Video aid
:param cid: cid
:return:
"""
if not aid or not cid or aid <= 0 or cid <= 0:
raise ValueError("aid cid 必须存在")
raise ValueError("aid and cid must exist")
uri = "/x/player/wbi/playurl"
qn_value = getattr(config, "BILI_QN", 80)
params = {
@@ -233,7 +233,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
)
return None
except httpx.HTTPError as exc: # some wrong when call httpx.request method, such as connection error, client error, server error or response status code is not 2xx
utils.logger.error(f"[BilibiliClient.get_video_media] {exc.__class__.__name__} for {exc.request.url} - {exc}") # 保留原始异常类型名称,以便开发者调试
utils.logger.error(f"[BilibiliClient.get_video_media] {exc.__class__.__name__} for {exc.request.url} - {exc}") # Keep original exception type name for developer debugging
return None
async def get_video_comments(
@@ -243,9 +243,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
next: int = 0,
) -> Dict:
"""get video comments
:param video_id: 视频 ID
:param order_mode: 排序方式
:param next: 评论页选择
:param video_id: Video ID
:param order_mode: Sort order
:param next: Comment page selection
:return:
"""
uri = "/x/v2/reply/wbi/main"
@@ -266,7 +266,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
:param crawl_interval:
:param is_fetch_sub_comments:
:param callback:
max_count: 一次笔记爬取的最大评论数量
max_count: Maximum number of comments to crawl per note
:return:
"""
@@ -299,7 +299,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
comment_list: List[Dict] = comments_res.get("replies", [])
# 检查 is_end next 是否存在
# Check if is_end and next exist
if "is_end" not in cursor_info or "next" not in cursor_info:
utils.logger.warning(f"[BilibiliClient.get_video_all_comments] 'is_end' or 'next' not in cursor for video_id: {video_id}. Assuming end of comments.")
is_end = True
@@ -317,7 +317,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
{await self.get_video_all_level_two_comments(video_id, comment_id, CommentOrderType.DEFAULT, 10, crawl_interval, callback)}
if len(result) + len(comment_list) > max_count:
comment_list = comment_list[:max_count - len(result)]
if callback: # 如果有回调函数,就执行回调函数
if callback: # If there is a callback function, execute it
await callback(video_id, comment_list)
await asyncio.sleep(crawl_interval)
if not is_fetch_sub_comments:
@@ -336,10 +336,10 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
) -> Dict:
"""
get video all level two comments for a level one comment
:param video_id: 视频 ID
:param level_one_comment_id: 一级评论 ID
:param video_id: Video ID
:param level_one_comment_id: Level one comment ID
:param order_mode:
:param ps: 一页评论数
:param ps: Number of comments per page
:param crawl_interval:
:param callback:
:return:
@@ -349,7 +349,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
while True:
result = await self.get_video_level_two_comments(video_id, level_one_comment_id, pn, ps, order_mode)
comment_list: List[Dict] = result.get("replies", [])
if callback: # 如果有回调函数,就执行回调函数
if callback: # If there is a callback function, execute it
await callback(video_id, comment_list)
await asyncio.sleep(crawl_interval)
if (int(result["page"]["count"]) <= pn * ps):
@@ -366,9 +366,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
order_mode: CommentOrderType,
) -> Dict:
"""get video level two comments
:param video_id: 视频 ID
:param level_one_comment_id: 一级评论 ID
:param order_mode: 排序方式
:param video_id: Video ID
:param level_one_comment_id: Level one comment ID
:param order_mode: Sort order
:return:
"""
@@ -386,10 +386,10 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
async def get_creator_videos(self, creator_id: str, pn: int, ps: int = 30, order_mode: SearchOrderType = SearchOrderType.LAST_PUBLISH) -> Dict:
"""get all videos for a creator
:param creator_id: 创作者 ID
:param pn: 页数
:param ps: 一页视频数
:param order_mode: 排序方式
:param creator_id: Creator ID
:param pn: Page number
:param ps: Number of videos per page
:param order_mode: Sort order
:return:
"""
@@ -405,7 +405,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
async def get_creator_info(self, creator_id: int) -> Dict:
"""
get creator info
:param creator_id: 作者 ID
:param creator_id: Creator ID
"""
uri = "/x/space/wbi/acc/info"
post_data = {
@@ -421,9 +421,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
) -> Dict:
"""
get creator fans
:param creator_id: 创作者 ID
:param pn: 开始页数
:param ps: 每页数量
:param creator_id: Creator ID
:param pn: Start page number
:param ps: Number of items per page
:return:
"""
uri = "/x/relation/fans"
@@ -443,9 +443,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
) -> Dict:
"""
get creator followings
:param creator_id: 创作者 ID
:param pn: 开始页数
:param ps: 每页数量
:param creator_id: Creator ID
:param pn: Start page number
:param ps: Number of items per page
:return:
"""
uri = "/x/relation/followings"
@@ -460,8 +460,8 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
async def get_creator_dynamics(self, creator_id: int, offset: str = ""):
"""
get creator comments
:param creator_id: 创作者 ID
:param offset: 发送请求所需参数
:param creator_id: Creator ID
:param offset: Parameter required for sending request
:return:
"""
uri = "/x/polymer/web-dynamic/v1/feed/space"
@@ -485,9 +485,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
:param creator_info:
:param crawl_interval:
:param callback:
:param max_count: 一个up主爬取的最大粉丝数量
:param max_count: Maximum number of fans to crawl for a creator
:return: up主粉丝数列表
:return: List of creator fans
"""
creator_id = creator_info["id"]
result = []
@@ -499,7 +499,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
pn += 1
if len(result) + len(fans_list) > max_count:
fans_list = fans_list[:max_count - len(result)]
if callback: # 如果有回调函数,就执行回调函数
if callback: # If there is a callback function, execute it
await callback(creator_info, fans_list)
await asyncio.sleep(crawl_interval)
if not fans_list:
@@ -519,9 +519,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
:param creator_info:
:param crawl_interval:
:param callback:
:param max_count: 一个up主爬取的最大关注者数量
:param max_count: Maximum number of followings to crawl for a creator
:return: up主关注者列表
:return: List of creator followings
"""
creator_id = creator_info["id"]
result = []
@@ -533,7 +533,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
pn += 1
if len(result) + len(followings_list) > max_count:
followings_list = followings_list[:max_count - len(result)]
if callback: # 如果有回调函数,就执行回调函数
if callback: # If there is a callback function, execute it
await callback(creator_info, followings_list)
await asyncio.sleep(crawl_interval)
if not followings_list:
@@ -553,9 +553,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
:param creator_info:
:param crawl_interval:
:param callback:
:param max_count: 一个up主爬取的最大动态数量
:param max_count: Maximum number of dynamics to crawl for a creator
:return: up主关注者列表
:return: List of creator dynamics
"""
creator_id = creator_info["id"]
result = []

View File

@@ -20,7 +20,7 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 18:44
# @Desc : B站爬虫
# @Desc : Bilibili Crawler
import asyncio
import os
@@ -64,7 +64,7 @@ class BilibiliCrawler(AbstractCrawler):
self.index_url = "https://www.bilibili.com"
self.user_agent = utils.get_user_agent()
self.cdp_manager = None
self.ip_proxy_pool = None # 代理IP池用于代理自动刷新
self.ip_proxy_pool = None # Proxy IP pool for automatic proxy refresh
async def start(self):
playwright_proxy_format, httpx_proxy_format = None, None
@@ -74,9 +74,9 @@ class BilibiliCrawler(AbstractCrawler):
playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)
async with async_playwright() as playwright:
# 根据配置选择启动模式
# Choose launch mode based on configuration
if config.ENABLE_CDP_MODE:
utils.logger.info("[BilibiliCrawler] 使用CDP模式启动浏览器")
utils.logger.info("[BilibiliCrawler] Launching browser using CDP mode")
self.browser_context = await self.launch_browser_with_cdp(
playwright,
playwright_proxy_format,
@@ -84,7 +84,7 @@ class BilibiliCrawler(AbstractCrawler):
headless=config.CDP_HEADLESS,
)
else:
utils.logger.info("[BilibiliCrawler] 使用标准模式启动浏览器")
utils.logger.info("[BilibiliCrawler] Launching browser using standard mode")
# Launch a browser context.
chromium = playwright.chromium
self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS)
@@ -149,31 +149,31 @@ class BilibiliCrawler(AbstractCrawler):
end: str = config.END_DAY,
) -> Tuple[str, str]:
"""
获取 bilibili 作品发布日期起始时间戳 pubtime_begin_s 与发布日期结束时间戳 pubtime_end_s
Get bilibili publish start timestamp pubtime_begin_s and publish end timestamp pubtime_end_s
---
:param start: 发布日期起始时间,YYYY-MM-DD
:param end: 发布日期结束时间,YYYY-MM-DD
:param start: Publish date start time, YYYY-MM-DD
:param end: Publish date end time, YYYY-MM-DD
Note
---
- 搜索的时间范围为 start end,包含 start end
- 若要搜索同一天的内容,为了包含 start 当天的搜索内容,则 pubtime_end_s 的值应该为 pubtime_begin_s 的值加上一天再减去一秒,即 start 当天的最后一秒
- 如仅搜索 2024-01-05 的内容,pubtime_begin_s = 1704384000pubtime_end_s = 1704470399
转换为可读的 datetime 对象:pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0)pubtime_end_s = datetime.datetime(2024, 1, 5, 23, 59, 59)
- 若要搜索 start end 的内容,为了包含 end 当天的搜索内容,则 pubtime_end_s 的值应该为 pubtime_end_s 的值加上一天再减去一秒,即 end 当天的最后一秒
- 如搜索 2024-01-05 - 2024-01-06 的内容,pubtime_begin_s = 1704384000pubtime_end_s = 1704556799
转换为可读的 datetime 对象:pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0)pubtime_end_s = datetime.datetime(2024, 1, 6, 23, 59, 59)
- Search time range is from start to end, including both start and end
- To search content from the same day, to include search content from that day, pubtime_end_s should be pubtime_begin_s plus one day minus one second, i.e., the last second of start day
- For example, searching only 2024-01-05 content, pubtime_begin_s = 1704384000, pubtime_end_s = 1704470399
Converted to readable datetime objects: pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0), pubtime_end_s = datetime.datetime(2024, 1, 5, 23, 59, 59)
- To search content from start to end, to include search content from end day, pubtime_end_s should be pubtime_end_s plus one day minus one second, i.e., the last second of end day
- For example, searching 2024-01-05 - 2024-01-06 content, pubtime_begin_s = 1704384000, pubtime_end_s = 1704556799
Converted to readable datetime objects: pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0), pubtime_end_s = datetime.datetime(2024, 1, 6, 23, 59, 59)
"""
# 转换 start end datetime 对象
# Convert start and end to datetime objects
start_day: datetime = datetime.strptime(start, "%Y-%m-%d")
end_day: datetime = datetime.strptime(end, "%Y-%m-%d")
if start_day > end_day:
raise ValueError("Wrong time range, please check your start and end argument, to ensure that the start cannot exceed end")
elif start_day == end_day: # 搜索同一天的内容
end_day = (start_day + timedelta(days=1) - timedelta(seconds=1)) # 则将 end_day 设置为 start_day + 1 day - 1 second
else: # 搜索 start end
end_day = (end_day + timedelta(days=1) - timedelta(seconds=1)) # 则将 end_day 设置为 end_day + 1 day - 1 second
# 将其重新转换为时间戳
elif start_day == end_day: # Searching content from the same day
end_day = (start_day + timedelta(days=1) - timedelta(seconds=1)) # Set end_day to start_day + 1 day - 1 second
else: # Searching from start to end
end_day = (end_day + timedelta(days=1) - timedelta(seconds=1)) # Set end_day to end_day + 1 day - 1 second
# Convert back to timestamps
return str(int(start_day.timestamp())), str(int(end_day.timestamp()))
async def search_by_keywords(self):
@@ -203,8 +203,8 @@ class BilibiliCrawler(AbstractCrawler):
page=page,
page_size=bili_limit_count,
order=SearchOrderType.DEFAULT,
pubtime_begin_s=0, # 作品发布日期起始时间戳
pubtime_end_s=0, # 作品发布日期结束日期时间戳
pubtime_begin_s=0, # Publish date start timestamp
pubtime_end_s=0, # Publish date end timestamp
)
video_list: List[Dict] = videos_res.get("result")
@@ -508,7 +508,7 @@ class BilibiliCrawler(AbstractCrawler):
"height": 1080
},
user_agent=user_agent,
channel="chrome", # 使用系统的Chrome稳定版
channel="chrome", # Use system's stable Chrome version
)
return browser_context
else:
@@ -525,7 +525,7 @@ class BilibiliCrawler(AbstractCrawler):
headless: bool = True,
) -> BrowserContext:
"""
使用CDP模式启动浏览器
Launch browser using CDP mode
"""
try:
self.cdp_manager = CDPBrowserManager()
@@ -536,22 +536,22 @@ class BilibiliCrawler(AbstractCrawler):
headless=headless,
)
# 显示浏览器信息
# Display browser information
browser_info = await self.cdp_manager.get_browser_info()
utils.logger.info(f"[BilibiliCrawler] CDP浏览器信息: {browser_info}")
utils.logger.info(f"[BilibiliCrawler] CDP browser info: {browser_info}")
return browser_context
except Exception as e:
utils.logger.error(f"[BilibiliCrawler] CDP模式启动失败,回退到标准模式: {e}")
# 回退到标准模式
utils.logger.error(f"[BilibiliCrawler] CDP mode launch failed, fallback to standard mode: {e}")
# Fallback to standard mode
chromium = playwright.chromium
return await self.launch_browser(chromium, playwright_proxy, user_agent, headless)
async def close(self):
"""Close browser context"""
try:
# 如果使用CDP模式需要特殊处理
# If using CDP mode, special handling is required
if self.cdp_manager:
await self.cdp_manager.cleanup()
self.cdp_manager = None

View File

@@ -27,28 +27,28 @@ from enum import Enum
class SearchOrderType(Enum):
# 综合排序
# Comprehensive sorting
DEFAULT = ""
# 最多点击
# Most clicks
MOST_CLICK = "click"
# 最新发布
# Latest published
LAST_PUBLISH = "pubdate"
# 最多弹幕
# Most danmu (comments)
MOST_DANMU = "dm"
# 最多收藏
# Most bookmarks
MOST_MARK = "stow"
class CommentOrderType(Enum):
# 仅按热度
# By popularity only
DEFAULT = 0
# 按热度+按时间
# By popularity + time
MIXED = 1
# 按时间
# By time
TIME = 2

View File

@@ -21,8 +21,8 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 23:26
# @Desc : bilibili 请求参数签名
# 逆向实现参考:https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
# @Desc : bilibili request parameter signing
# Reverse engineering implementation reference: https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
import re
import urllib.parse
from hashlib import md5
@@ -45,7 +45,7 @@ class BilibiliSign:
def get_salt(self) -> str:
"""
获取加盐的 key
Get the salted key
:return:
"""
salt = ""
@@ -56,8 +56,8 @@ class BilibiliSign:
def sign(self, req_data: Dict) -> Dict:
"""
请求参数中加上当前时间戳对请求参数中的key进行字典序排序
再将请求参数进行 url 编码集合 salt 进行 md5 就可以生成w_rid参数了
Add current timestamp to request parameters, sort keys in dictionary order,
then URL encode the parameters and combine with salt to generate md5 for w_rid parameter
:param req_data:
:return:
"""
@@ -65,35 +65,35 @@ class BilibiliSign:
req_data.update({"wts": current_ts})
req_data = dict(sorted(req_data.items()))
req_data = {
# 过滤 value 中的 "!'()*" 字符
# Filter "!'()*" characters from values
k: ''.join(filter(lambda ch: ch not in "!'()*", str(v)))
for k, v
in req_data.items()
}
query = urllib.parse.urlencode(req_data)
salt = self.get_salt()
wbi_sign = md5((query + salt).encode()).hexdigest() # 计算 w_rid
wbi_sign = md5((query + salt).encode()).hexdigest() # Calculate w_rid
req_data['w_rid'] = wbi_sign
return req_data
def parse_video_info_from_url(url: str) -> VideoUrlInfo:
"""
从B站视频URL中解析出视频ID
Parse video ID from Bilibili video URL
Args:
url: B站视频链接
url: Bilibili video link
- https://www.bilibili.com/video/BV1dwuKzmE26/?spm_id_from=333.1387.homepage.video_card.click
- https://www.bilibili.com/video/BV1d54y1g7db
- BV1d54y1g7db (直接传入BV号)
- BV1d54y1g7db (directly pass BV number)
Returns:
VideoUrlInfo: 包含视频ID的对象
VideoUrlInfo: Object containing video ID
"""
# 如果传入的已经是BV号,直接返回
# If the input is already a BV number, return directly
if url.startswith("BV"):
return VideoUrlInfo(video_id=url)
# 使用正则表达式提取BV号
# 匹配 /video/BV... /video/av... 格式
# Use regex to extract BV number
# Match /video/BV... or /video/av... format
bv_pattern = r'/video/(BV[a-zA-Z0-9]+)'
match = re.search(bv_pattern, url)
@@ -101,26 +101,26 @@ def parse_video_info_from_url(url: str) -> VideoUrlInfo:
video_id = match.group(1)
return VideoUrlInfo(video_id=video_id)
raise ValueError(f"无法从URL中解析出视频ID: {url}")
raise ValueError(f"Unable to parse video ID from URL: {url}")
def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
"""
从B站创作者空间URL中解析出创作者ID
Parse creator ID from Bilibili creator space URL
Args:
url: B站创作者空间链接
url: Bilibili creator space link
- https://space.bilibili.com/434377496?spm_id_from=333.1007.0.0
- https://space.bilibili.com/20813884
- 434377496 (直接传入UID)
- 434377496 (directly pass UID)
Returns:
CreatorUrlInfo: 包含创作者ID的对象
CreatorUrlInfo: Object containing creator ID
"""
# 如果传入的已经是纯数字ID,直接返回
# If the input is already a numeric ID, return directly
if url.isdigit():
return CreatorUrlInfo(creator_id=url)
# 使用正则表达式提取UID
# 匹配 /space.bilibili.com/数字 格式
# Use regex to extract UID
# Match /space.bilibili.com/number format
uid_pattern = r'space\.bilibili\.com/(\d+)'
match = re.search(uid_pattern, url)
@@ -128,20 +128,20 @@ def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
creator_id = match.group(1)
return CreatorUrlInfo(creator_id=creator_id)
raise ValueError(f"无法从URL中解析出创作者ID: {url}")
raise ValueError(f"Unable to parse creator ID from URL: {url}")
if __name__ == '__main__':
# 测试视频URL解析
# Test video URL parsing
video_url1 = "https://www.bilibili.com/video/BV1dwuKzmE26/?spm_id_from=333.1387.homepage.video_card.click"
video_url2 = "BV1d54y1g7db"
print("视频URL解析测试:")
print("Video URL parsing test:")
print(f"URL1: {video_url1} -> {parse_video_info_from_url(video_url1)}")
print(f"URL2: {video_url2} -> {parse_video_info_from_url(video_url2)}")
# 测试创作者URL解析
# Test creator URL parsing
creator_url1 = "https://space.bilibili.com/434377496?spm_id_from=333.1007.0.0"
creator_url2 = "20813884"
print("\n创作者URL解析测试:")
print("\nCreator URL parsing test:")
print(f"URL1: {creator_url1} -> {parse_creator_info_from_url(creator_url1)}")
print(f"URL2: {creator_url2} -> {parse_creator_info_from_url(creator_url2)}")

View File

@@ -21,7 +21,7 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 18:44
# @Desc : bilibli登录实现类
# @Desc : bilibili login implementation class
import asyncio
import functools