i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase:

- api/: FastAPI server documentation and logger messages
- cache/: Cache abstraction layer comments and docstrings
- database/: Database models and MongoDB store documentation
- media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu)
- model/: Data model documentation
- proxy/: Proxy pool and provider documentation
- store/: Data storage layer comments
- tools/: Utility functions and browser automation
- test/: Test file documentation

Preserved: Chinese disclaimer header (lines 10-18) for legal compliance

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
程序员阿江(Relakkes)
2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions

View File

@@ -37,7 +37,7 @@ EnumT = TypeVar("EnumT", bound=Enum)
class PlatformEnum(str, Enum):
"""支持的媒体平台枚举"""
"""Supported media platform enumeration"""
XHS = "xhs"
DOUYIN = "dy"
@@ -49,7 +49,7 @@ class PlatformEnum(str, Enum):
class LoginTypeEnum(str, Enum):
"""登录方式枚举"""
"""Login type enumeration"""
QRCODE = "qrcode"
PHONE = "phone"
@@ -57,7 +57,7 @@ class LoginTypeEnum(str, Enum):
class CrawlerTypeEnum(str, Enum):
"""爬虫类型枚举"""
"""Crawler type enumeration"""
SEARCH = "search"
DETAIL = "detail"
@@ -65,7 +65,7 @@ class CrawlerTypeEnum(str, Enum):
class SaveDataOptionEnum(str, Enum):
"""数据保存方式枚举"""
"""Data save option enumeration"""
CSV = "csv"
DB = "db"
@@ -76,7 +76,7 @@ class SaveDataOptionEnum(str, Enum):
class InitDbOptionEnum(str, Enum):
"""数据库初始化选项"""
"""Database initialization option"""
SQLITE = "sqlite"
MYSQL = "mysql"
@@ -102,7 +102,7 @@ def _coerce_enum(
return enum_cls(value)
except ValueError:
typer.secho(
f"⚠️ 配置值 '{value}' 不在 {enum_cls.__name__} 支持的范围内,已回退到默认值 '{default.value}'.",
f"⚠️ Config value '{value}' is not within the supported range of {enum_cls.__name__}, falling back to default value '{default.value}'.",
fg=typer.colors.YELLOW,
)
return default
@@ -133,7 +133,7 @@ def _inject_init_db_default(args: Sequence[str]) -> list[str]:
async def parse_cmd(argv: Optional[Sequence[str]] = None):
"""使用 Typer 解析命令行参数。"""
"""Parse command line arguments using Typer."""
app = typer.Typer(add_completion=False)
@@ -143,48 +143,48 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
PlatformEnum,
typer.Option(
"--platform",
help="媒体平台选择 (xhs=小红书 | dy=抖音 | ks=快手 | bili=哔哩哔哩 | wb=微博 | tieba=百度贴吧 | zhihu=知乎)",
rich_help_panel="基础配置",
help="Media platform selection (xhs=XiaoHongShu | dy=Douyin | ks=Kuaishou | bili=Bilibili | wb=Weibo | tieba=Baidu Tieba | zhihu=Zhihu)",
rich_help_panel="Basic Configuration",
),
] = _coerce_enum(PlatformEnum, config.PLATFORM, PlatformEnum.XHS),
lt: Annotated[
LoginTypeEnum,
typer.Option(
"--lt",
help="登录方式 (qrcode=二维码 | phone=手机号 | cookie=Cookie)",
rich_help_panel="账号配置",
help="Login type (qrcode=QR Code | phone=Phone | cookie=Cookie)",
rich_help_panel="Account Configuration",
),
] = _coerce_enum(LoginTypeEnum, config.LOGIN_TYPE, LoginTypeEnum.QRCODE),
crawler_type: Annotated[
CrawlerTypeEnum,
typer.Option(
"--type",
help="爬取类型 (search=搜索 | detail=详情 | creator=创作者)",
rich_help_panel="基础配置",
help="Crawler type (search=Search | detail=Detail | creator=Creator)",
rich_help_panel="Basic Configuration",
),
] = _coerce_enum(CrawlerTypeEnum, config.CRAWLER_TYPE, CrawlerTypeEnum.SEARCH),
start: Annotated[
int,
typer.Option(
"--start",
help="起始页码",
rich_help_panel="基础配置",
help="Starting page number",
rich_help_panel="Basic Configuration",
),
] = config.START_PAGE,
keywords: Annotated[
str,
typer.Option(
"--keywords",
help="请输入关键词,多个关键词用逗号分隔",
rich_help_panel="基础配置",
help="Enter keywords, multiple keywords separated by commas",
rich_help_panel="Basic Configuration",
),
] = config.KEYWORDS,
get_comment: Annotated[
str,
typer.Option(
"--get_comment",
help="是否爬取一级评论,支持 yes/true/t/y/1 no/false/f/n/0",
rich_help_panel="评论配置",
help="Whether to crawl first-level comments, supports yes/true/t/y/1 or no/false/f/n/0",
rich_help_panel="Comment Configuration",
show_default=True,
),
] = str(config.ENABLE_GET_COMMENTS),
@@ -192,8 +192,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
str,
typer.Option(
"--get_sub_comment",
help="是否爬取二级评论,支持 yes/true/t/y/1 no/false/f/n/0",
rich_help_panel="评论配置",
help="Whether to crawl second-level comments, supports yes/true/t/y/1 or no/false/f/n/0",
rich_help_panel="Comment Configuration",
show_default=True,
),
] = str(config.ENABLE_GET_SUB_COMMENTS),
@@ -201,8 +201,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
str,
typer.Option(
"--headless",
help="是否启用无头模式(对 Playwright CDP 均生效),支持 yes/true/t/y/1 no/false/f/n/0",
rich_help_panel="运行配置",
help="Whether to enable headless mode (applies to both Playwright and CDP), supports yes/true/t/y/1 or no/false/f/n/0",
rich_help_panel="Runtime Configuration",
show_default=True,
),
] = str(config.HEADLESS),
@@ -210,8 +210,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
SaveDataOptionEnum,
typer.Option(
"--save_data_option",
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库 | mongodb=MongoDB数据库 | excel=Excel文件)",
rich_help_panel="存储配置",
help="Data save option (csv=CSV file | db=MySQL database | json=JSON file | sqlite=SQLite database | mongodb=MongoDB database | excel=Excel file)",
rich_help_panel="Storage Configuration",
),
] = _coerce_enum(
SaveDataOptionEnum, config.SAVE_DATA_OPTION, SaveDataOptionEnum.JSON
@@ -220,32 +220,32 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
Optional[InitDbOptionEnum],
typer.Option(
"--init_db",
help="初始化数据库表结构 (sqlite | mysql)",
rich_help_panel="存储配置",
help="Initialize database table structure (sqlite | mysql)",
rich_help_panel="Storage Configuration",
),
] = None,
cookies: Annotated[
str,
typer.Option(
"--cookies",
help="Cookie 登录方式使用的 Cookie 值",
rich_help_panel="账号配置",
help="Cookie value used for Cookie login method",
rich_help_panel="Account Configuration",
),
] = config.COOKIES,
specified_id: Annotated[
str,
typer.Option(
"--specified_id",
help="详情模式下的帖子/视频ID列表多个ID用逗号分隔支持完整URL或ID",
rich_help_panel="基础配置",
help="Post/video ID list in detail mode, multiple IDs separated by commas (supports full URL or ID)",
rich_help_panel="Basic Configuration",
),
] = "",
creator_id: Annotated[
str,
typer.Option(
"--creator_id",
help="创作者模式下的创作者ID列表多个ID用逗号分隔支持完整URL或ID",
rich_help_panel="基础配置",
help="Creator ID list in creator mode, multiple IDs separated by commas (supports full URL or ID)",
rich_help_panel="Basic Configuration",
),
] = "",
) -> SimpleNamespace: