i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase: - api/: FastAPI server documentation and logger messages - cache/: Cache abstraction layer comments and docstrings - database/: Database models and MongoDB store documentation - media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu) - model/: Data model documentation - proxy/: Proxy pool and provider documentation - store/: Data storage layer comments - tools/: Utility functions and browser automation - test/: Test file documentation Preserved: Chinese disclaimer header (lines 10-18) for legal compliance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-06 23:21:33 +08:00 · 2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions
--- a/cmd_arg/arg.py
+++ b/cmd_arg/arg.py
@@ -37,7 +37,7 @@ EnumT = TypeVar("EnumT", bound=Enum)


 class PlatformEnum(str, Enum):
-    """支持的媒体平台枚举"""
+    """Supported media platform enumeration"""

    XHS = "xhs"
    DOUYIN = "dy"
@@ -49,7 +49,7 @@ class PlatformEnum(str, Enum):


 class LoginTypeEnum(str, Enum):
-    """登录方式枚举"""
+    """Login type enumeration"""

    QRCODE = "qrcode"
    PHONE = "phone"
@@ -57,7 +57,7 @@ class LoginTypeEnum(str, Enum):


 class CrawlerTypeEnum(str, Enum):
-    """爬虫类型枚举"""
+    """Crawler type enumeration"""

    SEARCH = "search"
    DETAIL = "detail"
@@ -65,7 +65,7 @@ class CrawlerTypeEnum(str, Enum):


 class SaveDataOptionEnum(str, Enum):
-    """数据保存方式枚举"""
+    """Data save option enumeration"""

    CSV = "csv"
    DB = "db"
@@ -76,7 +76,7 @@ class SaveDataOptionEnum(str, Enum):


 class InitDbOptionEnum(str, Enum):
-    """数据库初始化选项"""
+    """Database initialization option"""

    SQLITE = "sqlite"
    MYSQL = "mysql"
@@ -102,7 +102,7 @@ def _coerce_enum(
        return enum_cls(value)
    except ValueError:
        typer.secho(
-            f"⚠️ 配置值 '{value}' 不在 {enum_cls.__name__} 支持的范围内，已回退到默认值 '{default.value}'.",
+            f"⚠️ Config value '{value}' is not within the supported range of {enum_cls.__name__}, falling back to default value '{default.value}'.",
            fg=typer.colors.YELLOW,
        )
        return default
@@ -133,7 +133,7 @@ def _inject_init_db_default(args: Sequence[str]) -> list[str]:


 async def parse_cmd(argv: Optional[Sequence[str]] = None):
-    """使用 Typer 解析命令行参数。"""
+    """Parse command line arguments using Typer."""

    app = typer.Typer(add_completion=False)

@@ -143,48 +143,48 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
            PlatformEnum,
            typer.Option(
                "--platform",
-                help="媒体平台选择 (xhs=小红书 | dy=抖音 | ks=快手 | bili=哔哩哔哩 | wb=微博 | tieba=百度贴吧 | zhihu=知乎)",
-                rich_help_panel="基础配置",
+                help="Media platform selection (xhs=XiaoHongShu | dy=Douyin | ks=Kuaishou | bili=Bilibili | wb=Weibo | tieba=Baidu Tieba | zhihu=Zhihu)",
+                rich_help_panel="Basic Configuration",
            ),
        ] = _coerce_enum(PlatformEnum, config.PLATFORM, PlatformEnum.XHS),
        lt: Annotated[
            LoginTypeEnum,
            typer.Option(
                "--lt",
-                help="登录方式 (qrcode=二维码 | phone=手机号 | cookie=Cookie)",
-                rich_help_panel="账号配置",
+                help="Login type (qrcode=QR Code | phone=Phone | cookie=Cookie)",
+                rich_help_panel="Account Configuration",
            ),
        ] = _coerce_enum(LoginTypeEnum, config.LOGIN_TYPE, LoginTypeEnum.QRCODE),
        crawler_type: Annotated[
            CrawlerTypeEnum,
            typer.Option(
                "--type",
-                help="爬取类型 (search=搜索 | detail=详情 | creator=创作者)",
-                rich_help_panel="基础配置",
+                help="Crawler type (search=Search | detail=Detail | creator=Creator)",
+                rich_help_panel="Basic Configuration",
            ),
        ] = _coerce_enum(CrawlerTypeEnum, config.CRAWLER_TYPE, CrawlerTypeEnum.SEARCH),
        start: Annotated[
            int,
            typer.Option(
                "--start",
-                help="起始页码",
-                rich_help_panel="基础配置",
+                help="Starting page number",
+                rich_help_panel="Basic Configuration",
            ),
        ] = config.START_PAGE,
        keywords: Annotated[
            str,
            typer.Option(
                "--keywords",
-                help="请输入关键词，多个关键词用逗号分隔",
-                rich_help_panel="基础配置",
+                help="Enter keywords, multiple keywords separated by commas",
+                rich_help_panel="Basic Configuration",
            ),
        ] = config.KEYWORDS,
        get_comment: Annotated[
            str,
            typer.Option(
                "--get_comment",
-                help="是否爬取一级评论，支持 yes/true/t/y/1 或 no/false/f/n/0",
-                rich_help_panel="评论配置",
+                help="Whether to crawl first-level comments, supports yes/true/t/y/1 or no/false/f/n/0",
+                rich_help_panel="Comment Configuration",
                show_default=True,
            ),
        ] = str(config.ENABLE_GET_COMMENTS),
@@ -192,8 +192,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
            str,
            typer.Option(
                "--get_sub_comment",
-                help="是否爬取二级评论，支持 yes/true/t/y/1 或 no/false/f/n/0",
-                rich_help_panel="评论配置",
+                help="Whether to crawl second-level comments, supports yes/true/t/y/1 or no/false/f/n/0",
+                rich_help_panel="Comment Configuration",
                show_default=True,
            ),
        ] = str(config.ENABLE_GET_SUB_COMMENTS),
@@ -201,8 +201,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
            str,
            typer.Option(
                "--headless",
-                help="是否启用无头模式（对 Playwright 和 CDP 均生效），支持 yes/true/t/y/1 或 no/false/f/n/0",
-                rich_help_panel="运行配置",
+                help="Whether to enable headless mode (applies to both Playwright and CDP), supports yes/true/t/y/1 or no/false/f/n/0",
+                rich_help_panel="Runtime Configuration",
                show_default=True,
            ),
        ] = str(config.HEADLESS),
@@ -210,8 +210,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
            SaveDataOptionEnum,
            typer.Option(
                "--save_data_option",
-                help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库 | mongodb=MongoDB数据库 | excel=Excel文件)",
-                rich_help_panel="存储配置",
+                help="Data save option (csv=CSV file | db=MySQL database | json=JSON file | sqlite=SQLite database | mongodb=MongoDB database | excel=Excel file)",
+                rich_help_panel="Storage Configuration",
            ),
        ] = _coerce_enum(
            SaveDataOptionEnum, config.SAVE_DATA_OPTION, SaveDataOptionEnum.JSON
@@ -220,32 +220,32 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
            Optional[InitDbOptionEnum],
            typer.Option(
                "--init_db",
-                help="初始化数据库表结构 (sqlite | mysql)",
-                rich_help_panel="存储配置",
+                help="Initialize database table structure (sqlite | mysql)",
+                rich_help_panel="Storage Configuration",
            ),
        ] = None,
        cookies: Annotated[
            str,
            typer.Option(
                "--cookies",
-                help="Cookie 登录方式使用的 Cookie 值",
-                rich_help_panel="账号配置",
+                help="Cookie value used for Cookie login method",
+                rich_help_panel="Account Configuration",
            ),
        ] = config.COOKIES,
        specified_id: Annotated[
            str,
            typer.Option(
                "--specified_id",
-                help="详情模式下的帖子/视频ID列表，多个ID用逗号分隔（支持完整URL或ID）",
-                rich_help_panel="基础配置",
+                help="Post/video ID list in detail mode, multiple IDs separated by commas (supports full URL or ID)",
+                rich_help_panel="Basic Configuration",
            ),
        ] = "",
        creator_id: Annotated[
            str,
            typer.Option(
                "--creator_id",
-                help="创作者模式下的创作者ID列表，多个ID用逗号分隔（支持完整URL或ID）",
-                rich_help_panel="基础配置",
+                help="Creator ID list in creator mode, multiple IDs separated by commas (supports full URL or ID)",
+                rich_help_panel="Basic Configuration",
            ),
        ] = "",
    ) -> SimpleNamespace: