mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-06 23:21:33 +08:00
i18n: translate all Chinese comments, docstrings, and logger messages to English
Comprehensive translation of Chinese text to English across the entire codebase: - api/: FastAPI server documentation and logger messages - cache/: Cache abstraction layer comments and docstrings - database/: Database models and MongoDB store documentation - media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu) - model/: Data model documentation - proxy/: Proxy pool and provider documentation - store/: Data storage layer comments - tools/: Utility functions and browser automation - test/: Test file documentation Preserved: Chinese disclaimer header (lines 10-18) for legal compliance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -37,7 +37,7 @@ EnumT = TypeVar("EnumT", bound=Enum)
|
||||
|
||||
|
||||
class PlatformEnum(str, Enum):
|
||||
"""支持的媒体平台枚举"""
|
||||
"""Supported media platform enumeration"""
|
||||
|
||||
XHS = "xhs"
|
||||
DOUYIN = "dy"
|
||||
@@ -49,7 +49,7 @@ class PlatformEnum(str, Enum):
|
||||
|
||||
|
||||
class LoginTypeEnum(str, Enum):
|
||||
"""登录方式枚举"""
|
||||
"""Login type enumeration"""
|
||||
|
||||
QRCODE = "qrcode"
|
||||
PHONE = "phone"
|
||||
@@ -57,7 +57,7 @@ class LoginTypeEnum(str, Enum):
|
||||
|
||||
|
||||
class CrawlerTypeEnum(str, Enum):
|
||||
"""爬虫类型枚举"""
|
||||
"""Crawler type enumeration"""
|
||||
|
||||
SEARCH = "search"
|
||||
DETAIL = "detail"
|
||||
@@ -65,7 +65,7 @@ class CrawlerTypeEnum(str, Enum):
|
||||
|
||||
|
||||
class SaveDataOptionEnum(str, Enum):
|
||||
"""数据保存方式枚举"""
|
||||
"""Data save option enumeration"""
|
||||
|
||||
CSV = "csv"
|
||||
DB = "db"
|
||||
@@ -76,7 +76,7 @@ class SaveDataOptionEnum(str, Enum):
|
||||
|
||||
|
||||
class InitDbOptionEnum(str, Enum):
|
||||
"""数据库初始化选项"""
|
||||
"""Database initialization option"""
|
||||
|
||||
SQLITE = "sqlite"
|
||||
MYSQL = "mysql"
|
||||
@@ -102,7 +102,7 @@ def _coerce_enum(
|
||||
return enum_cls(value)
|
||||
except ValueError:
|
||||
typer.secho(
|
||||
f"⚠️ 配置值 '{value}' 不在 {enum_cls.__name__} 支持的范围内,已回退到默认值 '{default.value}'.",
|
||||
f"⚠️ Config value '{value}' is not within the supported range of {enum_cls.__name__}, falling back to default value '{default.value}'.",
|
||||
fg=typer.colors.YELLOW,
|
||||
)
|
||||
return default
|
||||
@@ -133,7 +133,7 @@ def _inject_init_db_default(args: Sequence[str]) -> list[str]:
|
||||
|
||||
|
||||
async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
"""使用 Typer 解析命令行参数。"""
|
||||
"""Parse command line arguments using Typer."""
|
||||
|
||||
app = typer.Typer(add_completion=False)
|
||||
|
||||
@@ -143,48 +143,48 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
PlatformEnum,
|
||||
typer.Option(
|
||||
"--platform",
|
||||
help="媒体平台选择 (xhs=小红书 | dy=抖音 | ks=快手 | bili=哔哩哔哩 | wb=微博 | tieba=百度贴吧 | zhihu=知乎)",
|
||||
rich_help_panel="基础配置",
|
||||
help="Media platform selection (xhs=XiaoHongShu | dy=Douyin | ks=Kuaishou | bili=Bilibili | wb=Weibo | tieba=Baidu Tieba | zhihu=Zhihu)",
|
||||
rich_help_panel="Basic Configuration",
|
||||
),
|
||||
] = _coerce_enum(PlatformEnum, config.PLATFORM, PlatformEnum.XHS),
|
||||
lt: Annotated[
|
||||
LoginTypeEnum,
|
||||
typer.Option(
|
||||
"--lt",
|
||||
help="登录方式 (qrcode=二维码 | phone=手机号 | cookie=Cookie)",
|
||||
rich_help_panel="账号配置",
|
||||
help="Login type (qrcode=QR Code | phone=Phone | cookie=Cookie)",
|
||||
rich_help_panel="Account Configuration",
|
||||
),
|
||||
] = _coerce_enum(LoginTypeEnum, config.LOGIN_TYPE, LoginTypeEnum.QRCODE),
|
||||
crawler_type: Annotated[
|
||||
CrawlerTypeEnum,
|
||||
typer.Option(
|
||||
"--type",
|
||||
help="爬取类型 (search=搜索 | detail=详情 | creator=创作者)",
|
||||
rich_help_panel="基础配置",
|
||||
help="Crawler type (search=Search | detail=Detail | creator=Creator)",
|
||||
rich_help_panel="Basic Configuration",
|
||||
),
|
||||
] = _coerce_enum(CrawlerTypeEnum, config.CRAWLER_TYPE, CrawlerTypeEnum.SEARCH),
|
||||
start: Annotated[
|
||||
int,
|
||||
typer.Option(
|
||||
"--start",
|
||||
help="起始页码",
|
||||
rich_help_panel="基础配置",
|
||||
help="Starting page number",
|
||||
rich_help_panel="Basic Configuration",
|
||||
),
|
||||
] = config.START_PAGE,
|
||||
keywords: Annotated[
|
||||
str,
|
||||
typer.Option(
|
||||
"--keywords",
|
||||
help="请输入关键词,多个关键词用逗号分隔",
|
||||
rich_help_panel="基础配置",
|
||||
help="Enter keywords, multiple keywords separated by commas",
|
||||
rich_help_panel="Basic Configuration",
|
||||
),
|
||||
] = config.KEYWORDS,
|
||||
get_comment: Annotated[
|
||||
str,
|
||||
typer.Option(
|
||||
"--get_comment",
|
||||
help="是否爬取一级评论,支持 yes/true/t/y/1 或 no/false/f/n/0",
|
||||
rich_help_panel="评论配置",
|
||||
help="Whether to crawl first-level comments, supports yes/true/t/y/1 or no/false/f/n/0",
|
||||
rich_help_panel="Comment Configuration",
|
||||
show_default=True,
|
||||
),
|
||||
] = str(config.ENABLE_GET_COMMENTS),
|
||||
@@ -192,8 +192,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
str,
|
||||
typer.Option(
|
||||
"--get_sub_comment",
|
||||
help="是否爬取二级评论,支持 yes/true/t/y/1 或 no/false/f/n/0",
|
||||
rich_help_panel="评论配置",
|
||||
help="Whether to crawl second-level comments, supports yes/true/t/y/1 or no/false/f/n/0",
|
||||
rich_help_panel="Comment Configuration",
|
||||
show_default=True,
|
||||
),
|
||||
] = str(config.ENABLE_GET_SUB_COMMENTS),
|
||||
@@ -201,8 +201,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
str,
|
||||
typer.Option(
|
||||
"--headless",
|
||||
help="是否启用无头模式(对 Playwright 和 CDP 均生效),支持 yes/true/t/y/1 或 no/false/f/n/0",
|
||||
rich_help_panel="运行配置",
|
||||
help="Whether to enable headless mode (applies to both Playwright and CDP), supports yes/true/t/y/1 or no/false/f/n/0",
|
||||
rich_help_panel="Runtime Configuration",
|
||||
show_default=True,
|
||||
),
|
||||
] = str(config.HEADLESS),
|
||||
@@ -210,8 +210,8 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
SaveDataOptionEnum,
|
||||
typer.Option(
|
||||
"--save_data_option",
|
||||
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库 | mongodb=MongoDB数据库 | excel=Excel文件)",
|
||||
rich_help_panel="存储配置",
|
||||
help="Data save option (csv=CSV file | db=MySQL database | json=JSON file | sqlite=SQLite database | mongodb=MongoDB database | excel=Excel file)",
|
||||
rich_help_panel="Storage Configuration",
|
||||
),
|
||||
] = _coerce_enum(
|
||||
SaveDataOptionEnum, config.SAVE_DATA_OPTION, SaveDataOptionEnum.JSON
|
||||
@@ -220,32 +220,32 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
Optional[InitDbOptionEnum],
|
||||
typer.Option(
|
||||
"--init_db",
|
||||
help="初始化数据库表结构 (sqlite | mysql)",
|
||||
rich_help_panel="存储配置",
|
||||
help="Initialize database table structure (sqlite | mysql)",
|
||||
rich_help_panel="Storage Configuration",
|
||||
),
|
||||
] = None,
|
||||
cookies: Annotated[
|
||||
str,
|
||||
typer.Option(
|
||||
"--cookies",
|
||||
help="Cookie 登录方式使用的 Cookie 值",
|
||||
rich_help_panel="账号配置",
|
||||
help="Cookie value used for Cookie login method",
|
||||
rich_help_panel="Account Configuration",
|
||||
),
|
||||
] = config.COOKIES,
|
||||
specified_id: Annotated[
|
||||
str,
|
||||
typer.Option(
|
||||
"--specified_id",
|
||||
help="详情模式下的帖子/视频ID列表,多个ID用逗号分隔(支持完整URL或ID)",
|
||||
rich_help_panel="基础配置",
|
||||
help="Post/video ID list in detail mode, multiple IDs separated by commas (supports full URL or ID)",
|
||||
rich_help_panel="Basic Configuration",
|
||||
),
|
||||
] = "",
|
||||
creator_id: Annotated[
|
||||
str,
|
||||
typer.Option(
|
||||
"--creator_id",
|
||||
help="创作者模式下的创作者ID列表,多个ID用逗号分隔(支持完整URL或ID)",
|
||||
rich_help_panel="基础配置",
|
||||
help="Creator ID list in creator mode, multiple IDs separated by commas (supports full URL or ID)",
|
||||
rich_help_panel="Basic Configuration",
|
||||
),
|
||||
] = "",
|
||||
) -> SimpleNamespace:
|
||||
|
||||
Reference in New Issue
Block a user