i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase:

- api/: FastAPI server documentation and logger messages
- cache/: Cache abstraction layer comments and docstrings
- database/: Database models and MongoDB store documentation
- media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu)
- model/: Data model documentation
- proxy/: Proxy pool and provider documentation
- store/: Data storage layer comments
- tools/: Utility functions and browser automation
- test/: Test file documentation

Preserved: Chinese disclaimer header (lines 10-18) for legal compliance

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
程序员阿江(Relakkes)
2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions

View File

@@ -21,8 +21,8 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 23:26
# @Desc : bilibili 请求参数签名
# 逆向实现参考:https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
# @Desc : bilibili request parameter signing
# Reverse engineering implementation reference: https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
import re
import urllib.parse
from hashlib import md5
@@ -45,7 +45,7 @@ class BilibiliSign:
def get_salt(self) -> str:
"""
获取加盐的 key
Get the salted key
:return:
"""
salt = ""
@@ -56,8 +56,8 @@ class BilibiliSign:
def sign(self, req_data: Dict) -> Dict:
"""
请求参数中加上当前时间戳对请求参数中的key进行字典序排序
再将请求参数进行 url 编码集合 salt 进行 md5 就可以生成w_rid参数了
Add current timestamp to request parameters, sort keys in dictionary order,
then URL encode the parameters and combine with salt to generate md5 for w_rid parameter
:param req_data:
:return:
"""
@@ -65,35 +65,35 @@ class BilibiliSign:
req_data.update({"wts": current_ts})
req_data = dict(sorted(req_data.items()))
req_data = {
# 过滤 value 中的 "!'()*" 字符
# Filter "!'()*" characters from values
k: ''.join(filter(lambda ch: ch not in "!'()*", str(v)))
for k, v
in req_data.items()
}
query = urllib.parse.urlencode(req_data)
salt = self.get_salt()
wbi_sign = md5((query + salt).encode()).hexdigest() # 计算 w_rid
wbi_sign = md5((query + salt).encode()).hexdigest() # Calculate w_rid
req_data['w_rid'] = wbi_sign
return req_data
def parse_video_info_from_url(url: str) -> VideoUrlInfo:
"""
从B站视频URL中解析出视频ID
Parse video ID from Bilibili video URL
Args:
url: B站视频链接
url: Bilibili video link
- https://www.bilibili.com/video/BV1dwuKzmE26/?spm_id_from=333.1387.homepage.video_card.click
- https://www.bilibili.com/video/BV1d54y1g7db
- BV1d54y1g7db (直接传入BV号)
- BV1d54y1g7db (directly pass BV number)
Returns:
VideoUrlInfo: 包含视频ID的对象
VideoUrlInfo: Object containing video ID
"""
# 如果传入的已经是BV号,直接返回
# If the input is already a BV number, return directly
if url.startswith("BV"):
return VideoUrlInfo(video_id=url)
# 使用正则表达式提取BV号
# 匹配 /video/BV... /video/av... 格式
# Use regex to extract BV number
# Match /video/BV... or /video/av... format
bv_pattern = r'/video/(BV[a-zA-Z0-9]+)'
match = re.search(bv_pattern, url)
@@ -101,26 +101,26 @@ def parse_video_info_from_url(url: str) -> VideoUrlInfo:
video_id = match.group(1)
return VideoUrlInfo(video_id=video_id)
raise ValueError(f"无法从URL中解析出视频ID: {url}")
raise ValueError(f"Unable to parse video ID from URL: {url}")
def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
"""
从B站创作者空间URL中解析出创作者ID
Parse creator ID from Bilibili creator space URL
Args:
url: B站创作者空间链接
url: Bilibili creator space link
- https://space.bilibili.com/434377496?spm_id_from=333.1007.0.0
- https://space.bilibili.com/20813884
- 434377496 (直接传入UID)
- 434377496 (directly pass UID)
Returns:
CreatorUrlInfo: 包含创作者ID的对象
CreatorUrlInfo: Object containing creator ID
"""
# 如果传入的已经是纯数字ID,直接返回
# If the input is already a numeric ID, return directly
if url.isdigit():
return CreatorUrlInfo(creator_id=url)
# 使用正则表达式提取UID
# 匹配 /space.bilibili.com/数字 格式
# Use regex to extract UID
# Match /space.bilibili.com/number format
uid_pattern = r'space\.bilibili\.com/(\d+)'
match = re.search(uid_pattern, url)
@@ -128,20 +128,20 @@ def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
creator_id = match.group(1)
return CreatorUrlInfo(creator_id=creator_id)
raise ValueError(f"无法从URL中解析出创作者ID: {url}")
raise ValueError(f"Unable to parse creator ID from URL: {url}")
if __name__ == '__main__':
# 测试视频URL解析
# Test video URL parsing
video_url1 = "https://www.bilibili.com/video/BV1dwuKzmE26/?spm_id_from=333.1387.homepage.video_card.click"
video_url2 = "BV1d54y1g7db"
print("视频URL解析测试:")
print("Video URL parsing test:")
print(f"URL1: {video_url1} -> {parse_video_info_from_url(video_url1)}")
print(f"URL2: {video_url2} -> {parse_video_info_from_url(video_url2)}")
# 测试创作者URL解析
# Test creator URL parsing
creator_url1 = "https://space.bilibili.com/434377496?spm_id_from=333.1007.0.0"
creator_url2 = "20813884"
print("\n创作者URL解析测试:")
print("\nCreator URL parsing test:")
print(f"URL1: {creator_url1} -> {parse_creator_info_from_url(creator_url1)}")
print(f"URL2: {creator_url2} -> {parse_creator_info_from_url(creator_url2)}")