i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase: - api/: FastAPI server documentation and logger messages - cache/: Cache abstraction layer comments and docstrings - database/: Database models and MongoDB store documentation - media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu) - model/: Data model documentation - proxy/: Proxy pool and provider documentation - store/: Data storage layer comments - tools/: Utility functions and browser automation - test/: Test file documentation Preserved: Chinese disclaimer header (lines 10-18) for legal compliance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-24 17:54:52 +08:00 · 2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions
--- a/api/main.py
+++ b/api/main.py
@@ -18,8 +18,8 @@

 """
 MediaCrawler WebUI API Server
-启动命令: uvicorn api.main:app --port 8080 --reload
-或者: python -m api.main
+Start command: uvicorn api.main:app --port 8080 --reload
+Or: python -m api.main
 """
 import asyncio
 import os
@@ -38,15 +38,15 @@ app = FastAPI(
    version="1.0.0"
 )

-# 获取 webui 静态文件目录
+# Get webui static files directory
 WEBUI_DIR = os.path.join(os.path.dirname(__file__), "webui")

-# CORS 配置 - 允许前端开发服务器访问
+# CORS configuration - allow frontend dev server access
 app.add_middleware(
    CORSMiddleware,
    allow_origins=[
        "http://localhost:5173",  # Vite dev server
-        "http://localhost:3000",  # 备用端口
+        "http://localhost:3000",  # Backup port
        "http://127.0.0.1:5173",
        "http://127.0.0.1:3000",
    ],
@@ -55,7 +55,7 @@ app.add_middleware(
    allow_headers=["*"],
 )

-# 注册路由
+# Register routers
 app.include_router(crawler_router, prefix="/api")
 app.include_router(data_router, prefix="/api")
 app.include_router(websocket_router, prefix="/api")
@@ -63,7 +63,7 @@ app.include_router(websocket_router, prefix="/api")

@app.get("/")
 async def serve_frontend():
-    """返回前端页面"""
+    """Return frontend page"""
    index_path = os.path.join(WEBUI_DIR, "index.html")
    if os.path.exists(index_path):
        return FileResponse(index_path)
@@ -82,103 +82,103 @@ async def health_check():

@app.get("/api/env/check")
 async def check_environment():
-    """检测 MediaCrawler 环境是否配置正确"""
+    """Check if MediaCrawler environment is configured correctly"""
    try:
-        # 运行 uv run main.py --help 命令检测环境
+        # Run uv run main.py --help command to check environment
        process = await asyncio.create_subprocess_exec(
            "uv", "run", "main.py", "--help",
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
-            cwd="."  # 项目根目录
+            cwd="."  # Project root directory
        )
        stdout, stderr = await asyncio.wait_for(
            process.communicate(),
-            timeout=30.0  # 30秒超时
+            timeout=30.0  # 30 seconds timeout
        )

        if process.returncode == 0:
            return {
                "success": True,
-                "message": "MediaCrawler 环境配置正确",
-                "output": stdout.decode("utf-8", errors="ignore")[:500]  # 截取前500字符
+                "message": "MediaCrawler environment configured correctly",
+                "output": stdout.decode("utf-8", errors="ignore")[:500]  # Truncate to first 500 characters
            }
        else:
            error_msg = stderr.decode("utf-8", errors="ignore") or stdout.decode("utf-8", errors="ignore")
            return {
                "success": False,
-                "message": "环境检测失败",
+                "message": "Environment check failed",
                "error": error_msg[:500]
            }
    except asyncio.TimeoutError:
        return {
            "success": False,
-            "message": "环境检测超时",
-            "error": "命令执行超过30秒"
+            "message": "Environment check timeout",
+            "error": "Command execution exceeded 30 seconds"
        }
    except FileNotFoundError:
        return {
            "success": False,
-            "message": "未找到 uv 命令",
-            "error": "请确保已安装 uv 并配置到系统 PATH"
+            "message": "uv command not found",
+            "error": "Please ensure uv is installed and configured in system PATH"
        }
    except Exception as e:
        return {
            "success": False,
-            "message": "环境检测出错",
+            "message": "Environment check error",
            "error": str(e)
        }


@app.get("/api/config/platforms")
 async def get_platforms():
-    """获取支持的平台列表"""
+    """Get list of supported platforms"""
    return {
        "platforms": [
-            {"value": "xhs", "label": "小红书", "icon": "book-open"},
-            {"value": "dy", "label": "抖音", "icon": "music"},
-            {"value": "ks", "label": "快手", "icon": "video"},
-            {"value": "bili", "label": "哔哩哔哩", "icon": "tv"},
-            {"value": "wb", "label": "微博", "icon": "message-circle"},
-            {"value": "tieba", "label": "百度贴吧", "icon": "messages-square"},
-            {"value": "zhihu", "label": "知乎", "icon": "help-circle"},
+            {"value": "xhs", "label": "Xiaohongshu", "icon": "book-open"},
+            {"value": "dy", "label": "Douyin", "icon": "music"},
+            {"value": "ks", "label": "Kuaishou", "icon": "video"},
+            {"value": "bili", "label": "Bilibili", "icon": "tv"},
+            {"value": "wb", "label": "Weibo", "icon": "message-circle"},
+            {"value": "tieba", "label": "Baidu Tieba", "icon": "messages-square"},
+            {"value": "zhihu", "label": "Zhihu", "icon": "help-circle"},
        ]
    }


@app.get("/api/config/options")
 async def get_config_options():
-    """获取所有配置选项"""
+    """Get all configuration options"""
    return {
        "login_types": [
-            {"value": "qrcode", "label": "二维码登录"},
-            {"value": "cookie", "label": "Cookie登录"},
+            {"value": "qrcode", "label": "QR Code Login"},
+            {"value": "cookie", "label": "Cookie Login"},
        ],
        "crawler_types": [
-            {"value": "search", "label": "搜索模式"},
-            {"value": "detail", "label": "详情模式"},
-            {"value": "creator", "label": "创作者模式"},
+            {"value": "search", "label": "Search Mode"},
+            {"value": "detail", "label": "Detail Mode"},
+            {"value": "creator", "label": "Creator Mode"},
        ],
        "save_options": [
-            {"value": "json", "label": "JSON 文件"},
-            {"value": "csv", "label": "CSV 文件"},
-            {"value": "excel", "label": "Excel 文件"},
-            {"value": "sqlite", "label": "SQLite 数据库"},
-            {"value": "db", "label": "MySQL 数据库"},
-            {"value": "mongodb", "label": "MongoDB 数据库"},
+            {"value": "json", "label": "JSON File"},
+            {"value": "csv", "label": "CSV File"},
+            {"value": "excel", "label": "Excel File"},
+            {"value": "sqlite", "label": "SQLite Database"},
+            {"value": "db", "label": "MySQL Database"},
+            {"value": "mongodb", "label": "MongoDB Database"},
        ],
    }


-# 挂载静态资源 - 必须放在所有路由之后
+# Mount static resources - must be placed after all routes
 if os.path.exists(WEBUI_DIR):
    assets_dir = os.path.join(WEBUI_DIR, "assets")
    if os.path.exists(assets_dir):
        app.mount("/assets", StaticFiles(directory=assets_dir), name="assets")
-    # 挂载 logos 目录
+    # Mount logos directory
    logos_dir = os.path.join(WEBUI_DIR, "logos")
    if os.path.exists(logos_dir):
        app.mount("/logos", StaticFiles(directory=logos_dir), name="logos")
-    # 挂载其他静态文件（如 vite.svg）
+    # Mount other static files (e.g., vite.svg)
    app.mount("/static", StaticFiles(directory=WEBUI_DIR), name="webui-static")


--- a/api/routers/crawler.py
+++ b/api/routers/crawler.py
@@ -26,10 +26,10 @@ router = APIRouter(prefix="/crawler", tags=["crawler"])

@router.post("/start")
 async def start_crawler(request: CrawlerStartRequest):
-    """启动爬虫任务"""
+    """Start crawler task"""
    success = await crawler_manager.start(request)
    if not success:
-        # 处理并发/重复请求：如果进程已经在跑，返回 400 而不是 500
+        # Handle concurrent/duplicate requests: if process is already running, return 400 instead of 500
        if crawler_manager.process and crawler_manager.process.poll() is None:
            raise HTTPException(status_code=400, detail="Crawler is already running")
        raise HTTPException(status_code=500, detail="Failed to start crawler")
@@ -39,10 +39,10 @@ async def start_crawler(request: CrawlerStartRequest):

@router.post("/stop")
 async def stop_crawler():
-    """停止爬虫任务"""
+    """Stop crawler task"""
    success = await crawler_manager.stop()
    if not success:
-        # 处理并发/重复请求：如果进程已退出/不存在，返回 400 而不是 500
+        # Handle concurrent/duplicate requests: if process already exited/doesn't exist, return 400 instead of 500
        if not crawler_manager.process or crawler_manager.process.poll() is not None:
            raise HTTPException(status_code=400, detail="No crawler is running")
        raise HTTPException(status_code=500, detail="Failed to stop crawler")
@@ -52,12 +52,12 @@ async def stop_crawler():

@router.get("/status", response_model=CrawlerStatusResponse)
 async def get_crawler_status():
-    """获取爬虫状态"""
+    """Get crawler status"""
    return crawler_manager.get_status()


@router.get("/logs")
 async def get_logs(limit: int = 100):
-    """获取最近的日志"""
+    """Get recent logs"""
    logs = crawler_manager.logs[-limit:] if limit > 0 else crawler_manager.logs
    return {"logs": [log.model_dump() for log in logs]}
--- a/api/routers/data.py
+++ b/api/routers/data.py
@@ -26,16 +26,16 @@ from fastapi.responses import FileResponse

 router = APIRouter(prefix="/data", tags=["data"])

-# 数据目录
+# Data directory
 DATA_DIR = Path(__file__).parent.parent.parent / "data"


 def get_file_info(file_path: Path) -> dict:
-    """获取文件信息"""
+    """Get file information"""
    stat = file_path.stat()
    record_count = None

-    # 尝试获取记录数
+    # Try to get record count
    try:
        if file_path.suffix == ".json":
            with open(file_path, "r", encoding="utf-8") as f:
@@ -44,7 +44,7 @@ def get_file_info(file_path: Path) -> dict:
                    record_count = len(data)
        elif file_path.suffix == ".csv":
            with open(file_path, "r", encoding="utf-8") as f:
-                record_count = sum(1 for _ in f) - 1  # 减去标题行
+                record_count = sum(1 for _ in f) - 1  # Subtract header row
    except Exception:
        pass

@@ -60,7 +60,7 @@ def get_file_info(file_path: Path) -> dict:

@router.get("/files")
 async def list_data_files(platform: Optional[str] = None, file_type: Optional[str] = None):
-    """获取数据文件列表"""
+    """Get data file list"""
    if not DATA_DIR.exists():
        return {"files": []}

@@ -74,13 +74,13 @@ async def list_data_files(platform: Optional[str] = None, file_type: Optional[st
            if file_path.suffix.lower() not in supported_extensions:
                continue

-            # 平台过滤
+            # Platform filter
            if platform:
                rel_path = str(file_path.relative_to(DATA_DIR))
                if platform.lower() not in rel_path.lower():
                    continue

-            # 类型过滤
+            # Type filter
            if file_type and file_path.suffix[1:].lower() != file_type.lower():
                continue

@@ -89,7 +89,7 @@ async def list_data_files(platform: Optional[str] = None, file_type: Optional[st
            except Exception:
                continue

-    # 按修改时间排序（最新的在前）
+    # Sort by modification time (newest first)
    files.sort(key=lambda x: x["modified_at"], reverse=True)

    return {"files": files}
@@ -97,7 +97,7 @@ async def list_data_files(platform: Optional[str] = None, file_type: Optional[st

@router.get("/files/{file_path:path}")
 async def get_file_content(file_path: str, preview: bool = True, limit: int = 100):
-    """获取文件内容或预览"""
+    """Get file content or preview"""
    full_path = DATA_DIR / file_path

    if not full_path.exists():
@@ -106,14 +106,14 @@ async def get_file_content(file_path: str, preview: bool = True, limit: int = 10
    if not full_path.is_file():
        raise HTTPException(status_code=400, detail="Not a file")

-    # 安全检查：确保在 DATA_DIR 内
+    # Security check: ensure within DATA_DIR
    try:
        full_path.resolve().relative_to(DATA_DIR.resolve())
    except ValueError:
        raise HTTPException(status_code=403, detail="Access denied")

    if preview:
-        # 返回预览数据
+        # Return preview data
        try:
            if full_path.suffix == ".json":
                with open(full_path, "r", encoding="utf-8") as f:
@@ -130,18 +130,18 @@ async def get_file_content(file_path: str, preview: bool = True, limit: int = 10
                        if i >= limit:
                            break
                        rows.append(row)
-                    # 重新读取获取总数
+                    # Re-read to get total count
                    f.seek(0)
                    total = sum(1 for _ in f) - 1
                    return {"data": rows, "total": total}
            elif full_path.suffix.lower() in (".xlsx", ".xls"):
                import pandas as pd
-                # 读取前 limit 行
+                # Read first limit rows
                df = pd.read_excel(full_path, nrows=limit)
-                # 获取总行数（只读取第一列来节省内存）
+                # Get total row count (only read first column to save memory)
                df_count = pd.read_excel(full_path, usecols=[0])
                total = len(df_count)
-                # 转换为字典列表，处理 NaN 值
+                # Convert to list of dictionaries, handle NaN values
                rows = df.where(pd.notnull(df), None).to_dict(orient='records')
                return {
                    "data": rows,
@@ -155,7 +155,7 @@ async def get_file_content(file_path: str, preview: bool = True, limit: int = 10
        except Exception as e:
            raise HTTPException(status_code=500, detail=str(e))
    else:
-        # 返回文件下载
+        # Return file download
        return FileResponse(
            path=full_path,
            filename=full_path.name,
@@ -165,7 +165,7 @@ async def get_file_content(file_path: str, preview: bool = True, limit: int = 10

@router.get("/download/{file_path:path}")
 async def download_file(file_path: str):
-    """下载文件"""
+    """Download file"""
    full_path = DATA_DIR / file_path

    if not full_path.exists():
@@ -174,7 +174,7 @@ async def download_file(file_path: str):
    if not full_path.is_file():
        raise HTTPException(status_code=400, detail="Not a file")

-    # 安全检查
+    # Security check
    try:
        full_path.resolve().relative_to(DATA_DIR.resolve())
    except ValueError:
@@ -189,7 +189,7 @@ async def download_file(file_path: str):

@router.get("/stats")
 async def get_data_stats():
-    """获取数据统计"""
+    """Get data statistics"""
    if not DATA_DIR.exists():
        return {"total_files": 0, "total_size": 0, "by_platform": {}, "by_type": {}}

@@ -214,11 +214,11 @@ async def get_data_stats():
                stats["total_files"] += 1
                stats["total_size"] += stat.st_size

-                # 按类型统计
+                # Statistics by type
                file_type = file_path.suffix[1:].lower()
                stats["by_type"][file_type] = stats["by_type"].get(file_type, 0) + 1

-                # 按平台统计（从路径推断）
+                # Statistics by platform (inferred from path)
                rel_path = str(file_path.relative_to(DATA_DIR))
                for platform in ["xhs", "dy", "ks", "bili", "wb", "tieba", "zhihu"]:
                    if platform in rel_path.lower():
--- a/api/routers/websocket.py
+++ b/api/routers/websocket.py
@@ -27,7 +27,7 @@ router = APIRouter(tags=["websocket"])


 class ConnectionManager:
-    """WebSocket 连接管理器"""
+    """WebSocket connection manager"""

    def __init__(self):
        self.active_connections: Set[WebSocket] = set()
@@ -40,7 +40,7 @@ class ConnectionManager:
        self.active_connections.discard(websocket)

    async def broadcast(self, message: dict):
-        """广播消息到所有连接"""
+        """Broadcast message to all connections"""
        if not self.active_connections:
            return

@@ -51,7 +51,7 @@ class ConnectionManager:
            except Exception:
                disconnected.append(connection)

-        # 清理断开的连接
+        # Clean up disconnected connections
        for conn in disconnected:
            self.disconnect(conn)

@@ -60,13 +60,13 @@ manager = ConnectionManager()


 async def log_broadcaster():
-    """后台任务：从队列读取日志并广播"""
+    """Background task: read logs from queue and broadcast"""
    queue = crawler_manager.get_log_queue()
    while True:
        try:
-            # 从队列获取日志条目
+            # Get log entry from queue
            entry = await queue.get()
-            # 广播到所有 WebSocket 连接
+            # Broadcast to all WebSocket connections
            await manager.broadcast(entry.model_dump())
        except asyncio.CancelledError:
            break
@@ -75,12 +75,12 @@ async def log_broadcaster():
            await asyncio.sleep(0.1)


-# 全局广播任务
+# Global broadcast task
 _broadcaster_task: Optional[asyncio.Task] = None


 def start_broadcaster():
-    """启动广播任务"""
+    """Start broadcast task"""
    global _broadcaster_task
    if _broadcaster_task is None or _broadcaster_task.done():
        _broadcaster_task = asyncio.create_task(log_broadcaster())
@@ -88,17 +88,17 @@ def start_broadcaster():

@router.websocket("/ws/logs")
 async def websocket_logs(websocket: WebSocket):
-    """WebSocket 日志流"""
+    """WebSocket log stream"""
    print("[WS] New connection attempt")

    try:
-        # 确保广播任务在运行
+        # Ensure broadcast task is running
        start_broadcaster()

        await manager.connect(websocket)
        print(f"[WS] Connected, active connections: {len(manager.active_connections)}")

-        # 发送现有日志
+        # Send existing logs
        for log in crawler_manager.logs:
            try:
                await websocket.send_json(log.model_dump())
@@ -109,7 +109,7 @@ async def websocket_logs(websocket: WebSocket):
        print(f"[WS] Sent {len(crawler_manager.logs)} existing logs, entering main loop")

        while True:
-            # 保持连接活跃，接收心跳或任意消息
+            # Keep connection alive, receive heartbeat or any message
            try:
                data = await asyncio.wait_for(
                    websocket.receive_text(),
@@ -118,7 +118,7 @@ async def websocket_logs(websocket: WebSocket):
                if data == "ping":
                    await websocket.send_text("pong")
            except asyncio.TimeoutError:
-                # 发送 ping 保持连接
+                # Send ping to keep connection alive
                try:
                    await websocket.send_text("ping")
                except Exception as e:
@@ -136,12 +136,12 @@ async def websocket_logs(websocket: WebSocket):

@router.websocket("/ws/status")
 async def websocket_status(websocket: WebSocket):
-    """WebSocket 状态流"""
+    """WebSocket status stream"""
    await websocket.accept()

    try:
        while True:
-            # 每秒发送一次状态
+            # Send status every second
            status = crawler_manager.get_status()
            await websocket.send_json(status)
            await asyncio.sleep(1)
--- a/api/schemas/crawler.py
+++ b/api/schemas/crawler.py
@@ -22,7 +22,7 @@ from pydantic import BaseModel


 class PlatformEnum(str, Enum):
-    """支持的媒体平台"""
+    """Supported media platforms"""
    XHS = "xhs"
    DOUYIN = "dy"
    KUAISHOU = "ks"
@@ -33,21 +33,21 @@ class PlatformEnum(str, Enum):


 class LoginTypeEnum(str, Enum):
-    """登录方式"""
+    """Login method"""
    QRCODE = "qrcode"
    PHONE = "phone"
    COOKIE = "cookie"


 class CrawlerTypeEnum(str, Enum):
-    """爬虫类型"""
+    """Crawler type"""
    SEARCH = "search"
    DETAIL = "detail"
    CREATOR = "creator"


 class SaveDataOptionEnum(str, Enum):
-    """数据保存方式"""
+    """Data save option"""
    CSV = "csv"
    DB = "db"
    JSON = "json"
@@ -57,13 +57,13 @@ class SaveDataOptionEnum(str, Enum):


 class CrawlerStartRequest(BaseModel):
-    """启动爬虫请求"""
+    """Crawler start request"""
    platform: PlatformEnum
    login_type: LoginTypeEnum = LoginTypeEnum.QRCODE
    crawler_type: CrawlerTypeEnum = CrawlerTypeEnum.SEARCH
-    keywords: str = ""  # 搜索模式下的关键词
-    specified_ids: str = ""  # 详情模式下的帖子/视频ID列表，逗号分隔
-    creator_ids: str = ""  # 创作者模式下的创作者ID列表，逗号分隔
+    keywords: str = ""  # Keywords for search mode
+    specified_ids: str = ""  # Post/video ID list for detail mode, comma-separated
+    creator_ids: str = ""  # Creator ID list for creator mode, comma-separated
    start_page: int = 1
    enable_comments: bool = True
    enable_sub_comments: bool = False
@@ -73,7 +73,7 @@ class CrawlerStartRequest(BaseModel):


 class CrawlerStatusResponse(BaseModel):
-    """爬虫状态响应"""
+    """Crawler status response"""
    status: Literal["idle", "running", "stopping", "error"]
    platform: Optional[str] = None
    crawler_type: Optional[str] = None
@@ -82,7 +82,7 @@ class CrawlerStatusResponse(BaseModel):


 class LogEntry(BaseModel):
-    """日志条目"""
+    """Log entry"""
    id: int
    timestamp: str
    level: Literal["info", "warning", "error", "success", "debug"]
@@ -90,7 +90,7 @@ class LogEntry(BaseModel):


 class DataFileInfo(BaseModel):
-    """数据文件信息"""
+    """Data file information"""
    name: str
    path: str
    size: int
--- a/api/services/crawler_manager.py
+++ b/api/services/crawler_manager.py
@@ -28,7 +28,7 @@ from ..schemas import CrawlerStartRequest, LogEntry


 class CrawlerManager:
-    """爬虫进程管理器"""
+    """Crawler process manager"""

    def __init__(self):
        self._lock = asyncio.Lock()
@@ -39,9 +39,9 @@ class CrawlerManager:
        self._log_id = 0
        self._logs: List[LogEntry] = []
        self._read_task: Optional[asyncio.Task] = None
-        # 项目根目录
+        # Project root directory
        self._project_root = Path(__file__).parent.parent.parent
-        # 日志队列 - 用于向 WebSocket 推送
+        # Log queue - for pushing to WebSocket
        self._log_queue: Optional[asyncio.Queue] = None

    @property
@@ -49,13 +49,13 @@ class CrawlerManager:
        return self._logs

    def get_log_queue(self) -> asyncio.Queue:
-        """获取或创建日志队列"""
+        """Get or create log queue"""
        if self._log_queue is None:
            self._log_queue = asyncio.Queue()
        return self._log_queue

    def _create_log_entry(self, message: str, level: str = "info") -> LogEntry:
-        """创建日志条目"""
+        """Create log entry"""
        self._log_id += 1
        entry = LogEntry(
            id=self._log_id,
@@ -64,13 +64,13 @@ class CrawlerManager:
            message=message
        )
        self._logs.append(entry)
-        # 保留最近 500 条日志
+        # Keep last 500 logs
        if len(self._logs) > 500:
            self._logs = self._logs[-500:]
        return entry

    async def _push_log(self, entry: LogEntry):
-        """推送日志到队列"""
+        """Push log to queue"""
        if self._log_queue is not None:
            try:
                self._log_queue.put_nowait(entry)
@@ -78,7 +78,7 @@ class CrawlerManager:
                pass

    def _parse_log_level(self, line: str) -> str:
-        """解析日志级别"""
+        """Parse log level"""
        line_upper = line.upper()
        if "ERROR" in line_upper or "FAILED" in line_upper:
            return "error"
@@ -91,16 +91,16 @@ class CrawlerManager:
        return "info"

    async def start(self, config: CrawlerStartRequest) -> bool:
-        """启动爬虫进程"""
+        """Start crawler process"""
        async with self._lock:
            if self.process and self.process.poll() is None:
                return False

-            # 清空旧日志
+            # Clear old logs
            self._logs = []
            self._log_id = 0

-            # 清空待推送队列（不要替换对象，避免 WebSocket 广播协程持有旧队列引用）
+            # Clear pending queue (don't replace object to avoid WebSocket broadcast coroutine holding old queue reference)
            if self._log_queue is None:
                self._log_queue = asyncio.Queue()
            else:
@@ -110,15 +110,15 @@ class CrawlerManager:
                except asyncio.QueueEmpty:
                    pass

-            # 构建命令行参数
+            # Build command line arguments
            cmd = self._build_command(config)

-            # 记录启动日志
+            # Log start information
            entry = self._create_log_entry(f"Starting crawler: {' '.join(cmd)}", "info")
            await self._push_log(entry)

            try:
-                # 启动子进程
+                # Start subprocess
                self.process = subprocess.Popen(
                    cmd,
                    stdout=subprocess.PIPE,
@@ -139,7 +139,7 @@ class CrawlerManager:
                )
                await self._push_log(entry)

-                # 启动日志读取任务
+                # Start log reading task
                self._read_task = asyncio.create_task(self._read_output())

                return True
@@ -150,7 +150,7 @@ class CrawlerManager:
                return False

    async def stop(self) -> bool:
-        """停止爬虫进程"""
+        """Stop crawler process"""
        async with self._lock:
            if not self.process or self.process.poll() is not None:
                return False
@@ -162,13 +162,13 @@ class CrawlerManager:
            try:
                self.process.send_signal(signal.SIGTERM)

-                # 等待优雅退出 (最多15秒)
+                # Wait for graceful exit (up to 15 seconds)
                for _ in range(30):
                    if self.process.poll() is not None:
                        break
                    await asyncio.sleep(0.5)

-                # 如果还没退出，强制杀死
+                # If still not exited, force kill
                if self.process.poll() is None:
                    entry = self._create_log_entry("Process not responding, sending SIGKILL...", "warning")
                    await self._push_log(entry)
@@ -184,7 +184,7 @@ class CrawlerManager:
            self.status = "idle"
            self.current_config = None

-            # 取消日志读取任务
+            # Cancel log reading task
            if self._read_task:
                self._read_task.cancel()
                self._read_task = None
@@ -192,7 +192,7 @@ class CrawlerManager:
            return True

    def get_status(self) -> dict:
-        """获取当前状态"""
+        """Get current status"""
        return {
            "status": self.status,
            "platform": self.current_config.platform.value if self.current_config else None,
@@ -202,7 +202,7 @@ class CrawlerManager:
        }

    def _build_command(self, config: CrawlerStartRequest) -> list:
-        """构建 main.py 命令行参数"""
+        """Build main.py command line arguments"""
        cmd = ["uv", "run", "python", "main.py"]

        cmd.extend(["--platform", config.platform.value])
@@ -210,7 +210,7 @@ class CrawlerManager:
        cmd.extend(["--type", config.crawler_type.value])
        cmd.extend(["--save_data_option", config.save_option.value])

-        # 根据爬虫类型传递不同的参数
+        # Pass different arguments based on crawler type
        if config.crawler_type.value == "search" and config.keywords:
            cmd.extend(["--keywords", config.keywords])
        elif config.crawler_type.value == "detail" and config.specified_ids:
@@ -232,12 +232,12 @@ class CrawlerManager:
        return cmd

    async def _read_output(self):
-        """异步读取进程输出"""
+        """Asynchronously read process output"""
        loop = asyncio.get_event_loop()

        try:
            while self.process and self.process.poll() is None:
-                # 在线程池中读取一行
+                # Read a line in thread pool
                line = await loop.run_in_executor(
                    None, self.process.stdout.readline
                )
@@ -248,7 +248,7 @@ class CrawlerManager:
                        entry = self._create_log_entry(line, level)
                        await self._push_log(entry)

-            # 读取剩余输出
+            # Read remaining output
            if self.process and self.process.stdout:
                remaining = await loop.run_in_executor(
                    None, self.process.stdout.read
@@ -260,7 +260,7 @@ class CrawlerManager:
                            entry = self._create_log_entry(line.strip(), level)
                            await self._push_log(entry)

-            # 进程结束
+            # Process ended
            if self.status == "running":
                exit_code = self.process.returncode if self.process else -1
                if exit_code == 0:
@@ -277,5 +277,5 @@ class CrawlerManager:
            await self._push_log(entry)


-# 全局单例
+# Global singleton
 crawler_manager = CrawlerManager()