i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase: - api/: FastAPI server documentation and logger messages - cache/: Cache abstraction layer comments and docstrings - database/: Database models and MongoDB store documentation - media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu) - model/: Data model documentation - proxy/: Proxy pool and provider documentation - store/: Data storage layer comments - tools/: Utility functions and browser automation - test/: Test file documentation Preserved: Chinese disclaimer header (lines 10-18) for legal compliance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-08 10:57:26 +08:00 · 2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions
--- a/media_platform/douyin/help.py
+++ b/media_platform/douyin/help.py
@@ -22,7 +22,7 @@
 # @Author  : relakkes@gmail.com
 # @Name    : 程序员阿江-Relakkes
 # @Time    : 2024/6/10 02:24
-# @Desc    : 获取 a_bogus 参数, 学习交流使用，请勿用作商业用途，侵权联系作者删除
+# @Desc    : Get a_bogus parameter, for learning and communication only, do not use for commercial purposes, contact author to delete if infringement

 import random
 import re
@@ -38,7 +38,7 @@ douyin_sign_obj = execjs.compile(open('libs/douyin.js', encoding='utf-8-sig').re

 def get_web_id():
    """
-    生成随机的webid
+    Generate random webid
    Returns:

    """
@@ -60,13 +60,13 @@ def get_web_id():

 async def get_a_bogus(url: str, params: str, post_data: dict, user_agent: str, page: Page = None):
    """
-    获取 a_bogus 参数, 目前不支持post请求类型的签名
+    Get a_bogus parameter, currently does not support POST request type signature
    """
    return get_a_bogus_from_js(url, params, user_agent)

 def get_a_bogus_from_js(url: str, params: str, user_agent: str):
    """
-    通过js获取 a_bogus 参数
+    Get a_bogus parameter through js
    Args:
        url:
        params:
@@ -84,8 +84,8 @@ def get_a_bogus_from_js(url: str, params: str, user_agent: str):

 async def get_a_bogus_from_playright(params: str, post_data: dict, user_agent: str, page: Page):
    """
-    通过playright获取 a_bogus 参数
-    playwright版本已失效
+    Get a_bogus parameter through playwright
+    playwright version is deprecated
    Returns:

    """
@@ -100,73 +100,73 @@ async def get_a_bogus_from_playright(params: str, post_data: dict, user_agent: s

 def parse_video_info_from_url(url: str) -> VideoUrlInfo:
    """
-    从抖音视频URL中解析出视频ID
-    支持以下格式:
-    1. 普通视频链接: https://www.douyin.com/video/7525082444551310602
-    2. 带modal_id参数的链接:
+    Parse video ID from Douyin video URL
+    Supports the following formats:
+    1. Normal video link: https://www.douyin.com/video/7525082444551310602
+    2. Link with modal_id parameter:
       - https://www.douyin.com/user/MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE?modal_id=7525082444551310602
       - https://www.douyin.com/root/search/python?modal_id=7471165520058862848
-    3. 短链接: https://v.douyin.com/iF12345ABC/ (需要client解析)
-    4. 纯ID: 7525082444551310602
+    3. Short link: https://v.douyin.com/iF12345ABC/ (requires client parsing)
+    4. Pure ID: 7525082444551310602

    Args:
-        url: 抖音视频链接或ID
+        url: Douyin video link or ID
    Returns:
-        VideoUrlInfo: 包含视频ID的对象
+        VideoUrlInfo: Object containing video ID
    """
-    # 如果是纯数字ID,直接返回
+    # If it's a pure numeric ID, return directly
    if url.isdigit():
        return VideoUrlInfo(aweme_id=url, url_type="normal")

-    # 检查是否是短链接 (v.douyin.com)
+    # Check if it's a short link (v.douyin.com)
    if "v.douyin.com" in url or url.startswith("http") and len(url) < 50 and "video" not in url:
-        return VideoUrlInfo(aweme_id="", url_type="short")  # 需要通过client解析
+        return VideoUrlInfo(aweme_id="", url_type="short")  # Requires client parsing

-    # 尝试从URL参数中提取modal_id
+    # Try to extract modal_id from URL parameters
    params = extract_url_params_to_dict(url)
    modal_id = params.get("modal_id")
    if modal_id:
        return VideoUrlInfo(aweme_id=modal_id, url_type="modal")

-    # 从标准视频URL中提取ID: /video/数字
+    # Extract ID from standard video URL: /video/number
    video_pattern = r'/video/(\d+)'
    match = re.search(video_pattern, url)
    if match:
        aweme_id = match.group(1)
        return VideoUrlInfo(aweme_id=aweme_id, url_type="normal")

-    raise ValueError(f"无法从URL中解析出视频ID: {url}")
+    raise ValueError(f"Unable to parse video ID from URL: {url}")


 def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
    """
-    从抖音创作者主页URL中解析出创作者ID (sec_user_id)
-    支持以下格式:
-    1. 创作者主页: https://www.douyin.com/user/MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE?from_tab_name=main
-    2. 纯ID: MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE
+    Parse creator ID (sec_user_id) from Douyin creator homepage URL
+    Supports the following formats:
+    1. Creator homepage: https://www.douyin.com/user/MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE?from_tab_name=main
+    2. Pure ID: MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE

    Args:
-        url: 抖音创作者主页链接或sec_user_id
+        url: Douyin creator homepage link or sec_user_id
    Returns:
-        CreatorUrlInfo: 包含创作者ID的对象
+        CreatorUrlInfo: Object containing creator ID
    """
-    # 如果是纯ID格式(通常以MS4wLjABAAAA开头),直接返回
+    # If it's a pure ID format (usually starts with MS4wLjABAAAA), return directly
    if url.startswith("MS4wLjABAAAA") or (not url.startswith("http") and "douyin.com" not in url):
        return CreatorUrlInfo(sec_user_id=url)

-    # 从创作者主页URL中提取sec_user_id: /user/xxx
+    # Extract sec_user_id from creator homepage URL: /user/xxx
    user_pattern = r'/user/([^/?]+)'
    match = re.search(user_pattern, url)
    if match:
        sec_user_id = match.group(1)
        return CreatorUrlInfo(sec_user_id=sec_user_id)

-    raise ValueError(f"无法从URL中解析出创作者ID: {url}")
+    raise ValueError(f"Unable to parse creator ID from URL: {url}")


 if __name__ == '__main__':
-    # 测试视频URL解析
-    print("=== 视频URL解析测试 ===")
+    # Test video URL parsing
+    print("=== Video URL Parsing Test ===")
    test_urls = [
        "https://www.douyin.com/video/7525082444551310602",
        "https://www.douyin.com/user/MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE?from_tab_name=main&modal_id=7525082444551310602",
@@ -177,13 +177,13 @@ if __name__ == '__main__':
        try:
            result = parse_video_info_from_url(url)
            print(f"✓ URL: {url[:80]}...")
-            print(f"  结果: {result}\n")
+            print(f"  Result: {result}\n")
        except Exception as e:
            print(f"✗ URL: {url}")
-            print(f"  错误: {e}\n")
+            print(f"  Error: {e}\n")

-    # 测试创作者URL解析
-    print("=== 创作者URL解析测试 ===")
+    # Test creator URL parsing
+    print("=== Creator URL Parsing Test ===")
    test_creator_urls = [
        "https://www.douyin.com/user/MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE?from_tab_name=main",
        "MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE",
@@ -192,7 +192,7 @@ if __name__ == '__main__':
        try:
            result = parse_creator_info_from_url(url)
            print(f"✓ URL: {url[:80]}...")
-            print(f"  结果: {result}\n")
+            print(f"  Result: {result}\n")
        except Exception as e:
            print(f"✗ URL: {url}")
-            print(f"  错误: {e}\n")
+            print(f"  Error: {e}\n")