mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-07 10:27:25 +08:00
i18n: translate all Chinese comments, docstrings, and logger messages to English
Comprehensive translation of Chinese text to English across the entire codebase: - api/: FastAPI server documentation and logger messages - cache/: Cache abstraction layer comments and docstrings - database/: Database models and MongoDB store documentation - media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu) - model/: Data model documentation - proxy/: Proxy pool and provider documentation - store/: Data storage layer comments - tools/: Utility functions and browser automation - test/: Test file documentation Preserved: Chinese disclaimer header (lines 10-18) for legal compliance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -21,8 +21,8 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Author : relakkes@gmail.com
|
||||
# @Time : 2023/12/2 23:26
|
||||
# @Desc : bilibili 请求参数签名
|
||||
# 逆向实现参考:https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
|
||||
# @Desc : bilibili request parameter signing
|
||||
# Reverse engineering implementation reference: https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
|
||||
import re
|
||||
import urllib.parse
|
||||
from hashlib import md5
|
||||
@@ -45,7 +45,7 @@ class BilibiliSign:
|
||||
|
||||
def get_salt(self) -> str:
|
||||
"""
|
||||
获取加盐的 key
|
||||
Get the salted key
|
||||
:return:
|
||||
"""
|
||||
salt = ""
|
||||
@@ -56,8 +56,8 @@ class BilibiliSign:
|
||||
|
||||
def sign(self, req_data: Dict) -> Dict:
|
||||
"""
|
||||
请求参数中加上当前时间戳对请求参数中的key进行字典序排序
|
||||
再将请求参数进行 url 编码集合 salt 进行 md5 就可以生成w_rid参数了
|
||||
Add current timestamp to request parameters, sort keys in dictionary order,
|
||||
then URL encode the parameters and combine with salt to generate md5 for w_rid parameter
|
||||
:param req_data:
|
||||
:return:
|
||||
"""
|
||||
@@ -65,35 +65,35 @@ class BilibiliSign:
|
||||
req_data.update({"wts": current_ts})
|
||||
req_data = dict(sorted(req_data.items()))
|
||||
req_data = {
|
||||
# 过滤 value 中的 "!'()*" 字符
|
||||
# Filter "!'()*" characters from values
|
||||
k: ''.join(filter(lambda ch: ch not in "!'()*", str(v)))
|
||||
for k, v
|
||||
in req_data.items()
|
||||
}
|
||||
query = urllib.parse.urlencode(req_data)
|
||||
salt = self.get_salt()
|
||||
wbi_sign = md5((query + salt).encode()).hexdigest() # 计算 w_rid
|
||||
wbi_sign = md5((query + salt).encode()).hexdigest() # Calculate w_rid
|
||||
req_data['w_rid'] = wbi_sign
|
||||
return req_data
|
||||
|
||||
|
||||
def parse_video_info_from_url(url: str) -> VideoUrlInfo:
|
||||
"""
|
||||
从B站视频URL中解析出视频ID
|
||||
Parse video ID from Bilibili video URL
|
||||
Args:
|
||||
url: B站视频链接
|
||||
url: Bilibili video link
|
||||
- https://www.bilibili.com/video/BV1dwuKzmE26/?spm_id_from=333.1387.homepage.video_card.click
|
||||
- https://www.bilibili.com/video/BV1d54y1g7db
|
||||
- BV1d54y1g7db (直接传入BV号)
|
||||
- BV1d54y1g7db (directly pass BV number)
|
||||
Returns:
|
||||
VideoUrlInfo: 包含视频ID的对象
|
||||
VideoUrlInfo: Object containing video ID
|
||||
"""
|
||||
# 如果传入的已经是BV号,直接返回
|
||||
# If the input is already a BV number, return directly
|
||||
if url.startswith("BV"):
|
||||
return VideoUrlInfo(video_id=url)
|
||||
|
||||
# 使用正则表达式提取BV号
|
||||
# 匹配 /video/BV... 或 /video/av... 格式
|
||||
# Use regex to extract BV number
|
||||
# Match /video/BV... or /video/av... format
|
||||
bv_pattern = r'/video/(BV[a-zA-Z0-9]+)'
|
||||
match = re.search(bv_pattern, url)
|
||||
|
||||
@@ -101,26 +101,26 @@ def parse_video_info_from_url(url: str) -> VideoUrlInfo:
|
||||
video_id = match.group(1)
|
||||
return VideoUrlInfo(video_id=video_id)
|
||||
|
||||
raise ValueError(f"无法从URL中解析出视频ID: {url}")
|
||||
raise ValueError(f"Unable to parse video ID from URL: {url}")
|
||||
|
||||
|
||||
def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
|
||||
"""
|
||||
从B站创作者空间URL中解析出创作者ID
|
||||
Parse creator ID from Bilibili creator space URL
|
||||
Args:
|
||||
url: B站创作者空间链接
|
||||
url: Bilibili creator space link
|
||||
- https://space.bilibili.com/434377496?spm_id_from=333.1007.0.0
|
||||
- https://space.bilibili.com/20813884
|
||||
- 434377496 (直接传入UID)
|
||||
- 434377496 (directly pass UID)
|
||||
Returns:
|
||||
CreatorUrlInfo: 包含创作者ID的对象
|
||||
CreatorUrlInfo: Object containing creator ID
|
||||
"""
|
||||
# 如果传入的已经是纯数字ID,直接返回
|
||||
# If the input is already a numeric ID, return directly
|
||||
if url.isdigit():
|
||||
return CreatorUrlInfo(creator_id=url)
|
||||
|
||||
# 使用正则表达式提取UID
|
||||
# 匹配 /space.bilibili.com/数字 格式
|
||||
# Use regex to extract UID
|
||||
# Match /space.bilibili.com/number format
|
||||
uid_pattern = r'space\.bilibili\.com/(\d+)'
|
||||
match = re.search(uid_pattern, url)
|
||||
|
||||
@@ -128,20 +128,20 @@ def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
|
||||
creator_id = match.group(1)
|
||||
return CreatorUrlInfo(creator_id=creator_id)
|
||||
|
||||
raise ValueError(f"无法从URL中解析出创作者ID: {url}")
|
||||
raise ValueError(f"Unable to parse creator ID from URL: {url}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 测试视频URL解析
|
||||
# Test video URL parsing
|
||||
video_url1 = "https://www.bilibili.com/video/BV1dwuKzmE26/?spm_id_from=333.1387.homepage.video_card.click"
|
||||
video_url2 = "BV1d54y1g7db"
|
||||
print("视频URL解析测试:")
|
||||
print("Video URL parsing test:")
|
||||
print(f"URL1: {video_url1} -> {parse_video_info_from_url(video_url1)}")
|
||||
print(f"URL2: {video_url2} -> {parse_video_info_from_url(video_url2)}")
|
||||
|
||||
# 测试创作者URL解析
|
||||
# Test creator URL parsing
|
||||
creator_url1 = "https://space.bilibili.com/434377496?spm_id_from=333.1007.0.0"
|
||||
creator_url2 = "20813884"
|
||||
print("\n创作者URL解析测试:")
|
||||
print("\nCreator URL parsing test:")
|
||||
print(f"URL1: {creator_url1} -> {parse_creator_info_from_url(creator_url1)}")
|
||||
print(f"URL2: {creator_url2} -> {parse_creator_info_from_url(creator_url2)}")
|
||||
|
||||
Reference in New Issue
Block a user