i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase:

- api/: FastAPI server documentation and logger messages
- cache/: Cache abstraction layer comments and docstrings
- database/: Database models and MongoDB store documentation
- media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu)
- model/: Data model documentation
- proxy/: Proxy pool and provider documentation
- store/: Data storage layer comments
- tools/: Utility functions and browser automation
- test/: Test file documentation

Preserved: Chinese disclaimer header (lines 10-18) for legal compliance

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
程序员阿江(Relakkes)
2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions

View File

@@ -33,8 +33,8 @@ from tools import utils
class BrowserLauncher:
"""
浏览器启动器,用于检测和启动用户的Chrome/Edge浏览器
支持Windows和macOS系统
Browser launcher for detecting and launching user's Chrome/Edge browser
Supports Windows and macOS systems
"""
def __init__(self):
@@ -44,19 +44,19 @@ class BrowserLauncher:
def detect_browser_paths(self) -> List[str]:
"""
检测系统中可用的浏览器路径
返回按优先级排序的浏览器路径列表
Detect available browser paths in system
Returns list of browser paths sorted by priority
"""
paths = []
if self.system == "Windows":
# Windows下的常见Chrome/Edge安装路径
# Common Chrome/Edge installation paths on Windows
possible_paths = [
# Chrome路径
# Chrome paths
os.path.expandvars(r"%PROGRAMFILES%\Google\Chrome\Application\chrome.exe"),
os.path.expandvars(r"%PROGRAMFILES(X86)%\Google\Chrome\Application\chrome.exe"),
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome\Application\chrome.exe"),
# Edge路径
# Edge paths
os.path.expandvars(r"%PROGRAMFILES%\Microsoft\Edge\Application\msedge.exe"),
os.path.expandvars(r"%PROGRAMFILES(X86)%\Microsoft\Edge\Application\msedge.exe"),
# Chrome Beta/Dev/Canary
@@ -65,21 +65,21 @@ class BrowserLauncher:
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome SxS\Application\chrome.exe"),
]
elif self.system == "Darwin": # macOS
# macOS下的常见Chrome/Edge安装路径
# Common Chrome/Edge installation paths on macOS
possible_paths = [
# Chrome路径
# Chrome paths
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
"/Applications/Google Chrome Dev.app/Contents/MacOS/Google Chrome Dev",
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
# Edge路径
# Edge paths
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
"/Applications/Microsoft Edge Beta.app/Contents/MacOS/Microsoft Edge Beta",
"/Applications/Microsoft Edge Dev.app/Contents/MacOS/Microsoft Edge Dev",
"/Applications/Microsoft Edge Canary.app/Contents/MacOS/Microsoft Edge Canary",
]
else:
# Linux等其他系统
# Linux and other systems
possible_paths = [
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
@@ -94,7 +94,7 @@ class BrowserLauncher:
"/usr/bin/microsoft-edge-dev",
]
# 检查路径是否存在且可执行
# Check if path exists and is executable
for path in possible_paths:
if os.path.isfile(path) and os.access(path, os.X_OK):
paths.append(path)
@@ -103,10 +103,10 @@ class BrowserLauncher:
def find_available_port(self, start_port: int = 9222) -> int:
"""
查找可用的端口
Find available port
"""
port = start_port
while port < start_port + 100: # 最多尝试100个端口
while port < start_port + 100: # Try up to 100 ports
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('localhost', port))
@@ -114,18 +114,18 @@ class BrowserLauncher:
except OSError:
port += 1
raise RuntimeError(f"无法找到可用的端口,已尝试 {start_port} {port-1}")
raise RuntimeError(f"Cannot find available port, tried {start_port} to {port-1}")
def launch_browser(self, browser_path: str, debug_port: int, headless: bool = False,
user_data_dir: Optional[str] = None) -> subprocess.Popen:
"""
启动浏览器进程
Launch browser process
"""
# 基本启动参数
# Basic launch arguments
args = [
browser_path,
f"--remote-debugging-port={debug_port}",
"--remote-debugging-address=0.0.0.0", # 允许远程访问
"--remote-debugging-address=0.0.0.0", # Allow remote access
"--no-first-run",
"--no-default-browser-check",
"--disable-background-timer-throttling",
@@ -136,36 +136,36 @@ class BrowserLauncher:
"--disable-hang-monitor",
"--disable-prompt-on-repost",
"--disable-sync",
"--disable-dev-shm-usage", # 避免共享内存问题
"--no-sandbox", # 在CDP模式下关闭沙箱
# 🔥 关键反检测参数
"--disable-blink-features=AutomationControlled", # 禁用自动化控制标记
"--exclude-switches=enable-automation", # 排除自动化开关
"--disable-infobars", # 禁用信息栏
"--disable-dev-shm-usage", # Avoid shared memory issues
"--no-sandbox", # Disable sandbox in CDP mode
# Key anti-detection arguments
"--disable-blink-features=AutomationControlled", # Disable automation control flag
"--exclude-switches=enable-automation", # Exclude automation switch
"--disable-infobars", # Disable info bars
]
# 无头模式
# Headless mode
if headless:
args.extend([
"--headless=new", # 使用新的headless模式
"--headless=new", # Use new headless mode
"--disable-gpu",
])
else:
# 非无头模式的额外参数
# Extra arguments for non-headless mode
args.extend([
"--start-maximized", # 最大化窗口,更像真实用户
"--start-maximized", # Maximize window, more like real user
])
# 用户数据目录
# User data directory
if user_data_dir:
args.append(f"--user-data-dir={user_data_dir}")
utils.logger.info(f"[BrowserLauncher] 启动浏览器: {browser_path}")
utils.logger.info(f"[BrowserLauncher] 调试端口: {debug_port}")
utils.logger.info(f"[BrowserLauncher] 无头模式: {headless}")
utils.logger.info(f"[BrowserLauncher] Launching browser: {browser_path}")
utils.logger.info(f"[BrowserLauncher] Debug port: {debug_port}")
utils.logger.info(f"[BrowserLauncher] Headless mode: {headless}")
try:
# Windows上,使用CREATE_NEW_PROCESS_GROUP避免Ctrl+C影响子进程
# On Windows, use CREATE_NEW_PROCESS_GROUP to prevent Ctrl+C from affecting subprocess
if self.system == "Windows":
process = subprocess.Popen(
args,
@@ -178,21 +178,21 @@ class BrowserLauncher:
args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
preexec_fn=os.setsid # 创建新的进程组
preexec_fn=os.setsid # Create new process group
)
self.browser_process = process
return process
except Exception as e:
utils.logger.error(f"[BrowserLauncher] 启动浏览器失败: {e}")
utils.logger.error(f"[BrowserLauncher] Failed to launch browser: {e}")
raise
def wait_for_browser_ready(self, debug_port: int, timeout: int = 30) -> bool:
"""
等待浏览器准备就绪
Wait for browser to be ready
"""
utils.logger.info(f"[BrowserLauncher] 等待浏览器在端口 {debug_port} 上准备就绪...")
utils.logger.info(f"[BrowserLauncher] Waiting for browser to be ready on port {debug_port}...")
start_time = time.time()
while time.time() - start_time < timeout:
@@ -201,19 +201,19 @@ class BrowserLauncher:
s.settimeout(1)
result = s.connect_ex(('localhost', debug_port))
if result == 0:
utils.logger.info(f"[BrowserLauncher] 浏览器已在端口 {debug_port} 上准备就绪")
utils.logger.info(f"[BrowserLauncher] Browser is ready on port {debug_port}")
return True
except Exception:
pass
time.sleep(0.5)
utils.logger.error(f"[BrowserLauncher] 浏览器在 {timeout} 秒内未能准备就绪")
utils.logger.error(f"[BrowserLauncher] Browser failed to be ready within {timeout} seconds")
return False
def get_browser_info(self, browser_path: str) -> Tuple[str, str]:
"""
获取浏览器信息(名称和版本)
Get browser info (name and version)
"""
try:
if "chrome" in browser_path.lower():
@@ -225,7 +225,7 @@ class BrowserLauncher:
else:
name = "Unknown Browser"
# 尝试获取版本信息
# Try to get version info
try:
result = subprocess.run([browser_path, "--version"],
capture_output=True, text=True, timeout=5)
@@ -240,7 +240,7 @@ class BrowserLauncher:
def cleanup(self):
"""
清理资源,关闭浏览器进程
Cleanup resources, close browser process
"""
if not self.browser_process:
return
@@ -248,20 +248,20 @@ class BrowserLauncher:
process = self.browser_process
if process.poll() is not None:
utils.logger.info("[BrowserLauncher] 浏览器进程已退出,无需清理")
utils.logger.info("[BrowserLauncher] Browser process already exited, no cleanup needed")
self.browser_process = None
return
utils.logger.info("[BrowserLauncher] 正在关闭浏览器进程...")
utils.logger.info("[BrowserLauncher] Closing browser process...")
try:
if self.system == "Windows":
# 先尝试正常终止
# First try normal termination
process.terminate()
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
utils.logger.warning("[BrowserLauncher] 正常终止超时使用taskkill强制结束")
utils.logger.warning("[BrowserLauncher] Normal termination timeout, using taskkill to force kill")
subprocess.run(
["taskkill", "/F", "/T", "/PID", str(process.pid)],
capture_output=True,
@@ -273,17 +273,17 @@ class BrowserLauncher:
try:
os.killpg(pgid, signal.SIGTERM)
except ProcessLookupError:
utils.logger.info("[BrowserLauncher] 浏览器进程组不存在,可能已退出")
utils.logger.info("[BrowserLauncher] Browser process group does not exist, may have exited")
else:
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
utils.logger.warning("[BrowserLauncher] 优雅关闭超时,发送SIGKILL")
utils.logger.warning("[BrowserLauncher] Graceful shutdown timeout, sending SIGKILL")
os.killpg(pgid, signal.SIGKILL)
process.wait(timeout=5)
utils.logger.info("[BrowserLauncher] 浏览器进程已关闭")
utils.logger.info("[BrowserLauncher] Browser process closed")
except Exception as e:
utils.logger.warning(f"[BrowserLauncher] 关闭浏览器进程时出错: {e}")
utils.logger.warning(f"[BrowserLauncher] Error closing browser process: {e}")
finally:
self.browser_process = None