mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-05 09:27:25 +08:00
i18n: translate all Chinese comments, docstrings, and logger messages to English
Comprehensive translation of Chinese text to English across the entire codebase: - api/: FastAPI server documentation and logger messages - cache/: Cache abstraction layer comments and docstrings - database/: Database models and MongoDB store documentation - media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu) - model/: Data model documentation - proxy/: Proxy pool and provider documentation - store/: Data storage layer comments - tools/: Utility functions and browser automation - test/: Test file documentation Preserved: Chinese disclaimer header (lines 10-18) for legal compliance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -33,8 +33,8 @@ from tools import utils
|
||||
|
||||
class BrowserLauncher:
|
||||
"""
|
||||
浏览器启动器,用于检测和启动用户的Chrome/Edge浏览器
|
||||
支持Windows和macOS系统
|
||||
Browser launcher for detecting and launching user's Chrome/Edge browser
|
||||
Supports Windows and macOS systems
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
@@ -44,19 +44,19 @@ class BrowserLauncher:
|
||||
|
||||
def detect_browser_paths(self) -> List[str]:
|
||||
"""
|
||||
检测系统中可用的浏览器路径
|
||||
返回按优先级排序的浏览器路径列表
|
||||
Detect available browser paths in system
|
||||
Returns list of browser paths sorted by priority
|
||||
"""
|
||||
paths = []
|
||||
|
||||
if self.system == "Windows":
|
||||
# Windows下的常见Chrome/Edge安装路径
|
||||
# Common Chrome/Edge installation paths on Windows
|
||||
possible_paths = [
|
||||
# Chrome路径
|
||||
# Chrome paths
|
||||
os.path.expandvars(r"%PROGRAMFILES%\Google\Chrome\Application\chrome.exe"),
|
||||
os.path.expandvars(r"%PROGRAMFILES(X86)%\Google\Chrome\Application\chrome.exe"),
|
||||
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome\Application\chrome.exe"),
|
||||
# Edge路径
|
||||
# Edge paths
|
||||
os.path.expandvars(r"%PROGRAMFILES%\Microsoft\Edge\Application\msedge.exe"),
|
||||
os.path.expandvars(r"%PROGRAMFILES(X86)%\Microsoft\Edge\Application\msedge.exe"),
|
||||
# Chrome Beta/Dev/Canary
|
||||
@@ -65,21 +65,21 @@ class BrowserLauncher:
|
||||
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome SxS\Application\chrome.exe"),
|
||||
]
|
||||
elif self.system == "Darwin": # macOS
|
||||
# macOS下的常见Chrome/Edge安装路径
|
||||
# Common Chrome/Edge installation paths on macOS
|
||||
possible_paths = [
|
||||
# Chrome路径
|
||||
# Chrome paths
|
||||
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
||||
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
|
||||
"/Applications/Google Chrome Dev.app/Contents/MacOS/Google Chrome Dev",
|
||||
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
|
||||
# Edge路径
|
||||
# Edge paths
|
||||
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
|
||||
"/Applications/Microsoft Edge Beta.app/Contents/MacOS/Microsoft Edge Beta",
|
||||
"/Applications/Microsoft Edge Dev.app/Contents/MacOS/Microsoft Edge Dev",
|
||||
"/Applications/Microsoft Edge Canary.app/Contents/MacOS/Microsoft Edge Canary",
|
||||
]
|
||||
else:
|
||||
# Linux等其他系统
|
||||
# Linux and other systems
|
||||
possible_paths = [
|
||||
"/usr/bin/google-chrome",
|
||||
"/usr/bin/google-chrome-stable",
|
||||
@@ -94,7 +94,7 @@ class BrowserLauncher:
|
||||
"/usr/bin/microsoft-edge-dev",
|
||||
]
|
||||
|
||||
# 检查路径是否存在且可执行
|
||||
# Check if path exists and is executable
|
||||
for path in possible_paths:
|
||||
if os.path.isfile(path) and os.access(path, os.X_OK):
|
||||
paths.append(path)
|
||||
@@ -103,10 +103,10 @@ class BrowserLauncher:
|
||||
|
||||
def find_available_port(self, start_port: int = 9222) -> int:
|
||||
"""
|
||||
查找可用的端口
|
||||
Find available port
|
||||
"""
|
||||
port = start_port
|
||||
while port < start_port + 100: # 最多尝试100个端口
|
||||
while port < start_port + 100: # Try up to 100 ports
|
||||
try:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.bind(('localhost', port))
|
||||
@@ -114,18 +114,18 @@ class BrowserLauncher:
|
||||
except OSError:
|
||||
port += 1
|
||||
|
||||
raise RuntimeError(f"无法找到可用的端口,已尝试 {start_port} 到 {port-1}")
|
||||
raise RuntimeError(f"Cannot find available port, tried {start_port} to {port-1}")
|
||||
|
||||
def launch_browser(self, browser_path: str, debug_port: int, headless: bool = False,
|
||||
user_data_dir: Optional[str] = None) -> subprocess.Popen:
|
||||
"""
|
||||
启动浏览器进程
|
||||
Launch browser process
|
||||
"""
|
||||
# 基本启动参数
|
||||
# Basic launch arguments
|
||||
args = [
|
||||
browser_path,
|
||||
f"--remote-debugging-port={debug_port}",
|
||||
"--remote-debugging-address=0.0.0.0", # 允许远程访问
|
||||
"--remote-debugging-address=0.0.0.0", # Allow remote access
|
||||
"--no-first-run",
|
||||
"--no-default-browser-check",
|
||||
"--disable-background-timer-throttling",
|
||||
@@ -136,36 +136,36 @@ class BrowserLauncher:
|
||||
"--disable-hang-monitor",
|
||||
"--disable-prompt-on-repost",
|
||||
"--disable-sync",
|
||||
"--disable-dev-shm-usage", # 避免共享内存问题
|
||||
"--no-sandbox", # 在CDP模式下关闭沙箱
|
||||
# 🔥 关键反检测参数
|
||||
"--disable-blink-features=AutomationControlled", # 禁用自动化控制标记
|
||||
"--exclude-switches=enable-automation", # 排除自动化开关
|
||||
"--disable-infobars", # 禁用信息栏
|
||||
"--disable-dev-shm-usage", # Avoid shared memory issues
|
||||
"--no-sandbox", # Disable sandbox in CDP mode
|
||||
# Key anti-detection arguments
|
||||
"--disable-blink-features=AutomationControlled", # Disable automation control flag
|
||||
"--exclude-switches=enable-automation", # Exclude automation switch
|
||||
"--disable-infobars", # Disable info bars
|
||||
]
|
||||
|
||||
# 无头模式
|
||||
# Headless mode
|
||||
if headless:
|
||||
args.extend([
|
||||
"--headless=new", # 使用新的headless模式
|
||||
"--headless=new", # Use new headless mode
|
||||
"--disable-gpu",
|
||||
])
|
||||
else:
|
||||
# 非无头模式的额外参数
|
||||
# Extra arguments for non-headless mode
|
||||
args.extend([
|
||||
"--start-maximized", # 最大化窗口,更像真实用户
|
||||
"--start-maximized", # Maximize window, more like real user
|
||||
])
|
||||
|
||||
# 用户数据目录
|
||||
# User data directory
|
||||
if user_data_dir:
|
||||
args.append(f"--user-data-dir={user_data_dir}")
|
||||
|
||||
utils.logger.info(f"[BrowserLauncher] 启动浏览器: {browser_path}")
|
||||
utils.logger.info(f"[BrowserLauncher] 调试端口: {debug_port}")
|
||||
utils.logger.info(f"[BrowserLauncher] 无头模式: {headless}")
|
||||
utils.logger.info(f"[BrowserLauncher] Launching browser: {browser_path}")
|
||||
utils.logger.info(f"[BrowserLauncher] Debug port: {debug_port}")
|
||||
utils.logger.info(f"[BrowserLauncher] Headless mode: {headless}")
|
||||
|
||||
try:
|
||||
# 在Windows上,使用CREATE_NEW_PROCESS_GROUP避免Ctrl+C影响子进程
|
||||
# On Windows, use CREATE_NEW_PROCESS_GROUP to prevent Ctrl+C from affecting subprocess
|
||||
if self.system == "Windows":
|
||||
process = subprocess.Popen(
|
||||
args,
|
||||
@@ -178,21 +178,21 @@ class BrowserLauncher:
|
||||
args,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
preexec_fn=os.setsid # 创建新的进程组
|
||||
preexec_fn=os.setsid # Create new process group
|
||||
)
|
||||
|
||||
self.browser_process = process
|
||||
return process
|
||||
|
||||
except Exception as e:
|
||||
utils.logger.error(f"[BrowserLauncher] 启动浏览器失败: {e}")
|
||||
utils.logger.error(f"[BrowserLauncher] Failed to launch browser: {e}")
|
||||
raise
|
||||
|
||||
def wait_for_browser_ready(self, debug_port: int, timeout: int = 30) -> bool:
|
||||
"""
|
||||
等待浏览器准备就绪
|
||||
Wait for browser to be ready
|
||||
"""
|
||||
utils.logger.info(f"[BrowserLauncher] 等待浏览器在端口 {debug_port} 上准备就绪...")
|
||||
utils.logger.info(f"[BrowserLauncher] Waiting for browser to be ready on port {debug_port}...")
|
||||
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < timeout:
|
||||
@@ -201,19 +201,19 @@ class BrowserLauncher:
|
||||
s.settimeout(1)
|
||||
result = s.connect_ex(('localhost', debug_port))
|
||||
if result == 0:
|
||||
utils.logger.info(f"[BrowserLauncher] 浏览器已在端口 {debug_port} 上准备就绪")
|
||||
utils.logger.info(f"[BrowserLauncher] Browser is ready on port {debug_port}")
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
time.sleep(0.5)
|
||||
|
||||
utils.logger.error(f"[BrowserLauncher] 浏览器在 {timeout} 秒内未能准备就绪")
|
||||
utils.logger.error(f"[BrowserLauncher] Browser failed to be ready within {timeout} seconds")
|
||||
return False
|
||||
|
||||
def get_browser_info(self, browser_path: str) -> Tuple[str, str]:
|
||||
"""
|
||||
获取浏览器信息(名称和版本)
|
||||
Get browser info (name and version)
|
||||
"""
|
||||
try:
|
||||
if "chrome" in browser_path.lower():
|
||||
@@ -225,7 +225,7 @@ class BrowserLauncher:
|
||||
else:
|
||||
name = "Unknown Browser"
|
||||
|
||||
# 尝试获取版本信息
|
||||
# Try to get version info
|
||||
try:
|
||||
result = subprocess.run([browser_path, "--version"],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
@@ -240,7 +240,7 @@ class BrowserLauncher:
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
清理资源,关闭浏览器进程
|
||||
Cleanup resources, close browser process
|
||||
"""
|
||||
if not self.browser_process:
|
||||
return
|
||||
@@ -248,20 +248,20 @@ class BrowserLauncher:
|
||||
process = self.browser_process
|
||||
|
||||
if process.poll() is not None:
|
||||
utils.logger.info("[BrowserLauncher] 浏览器进程已退出,无需清理")
|
||||
utils.logger.info("[BrowserLauncher] Browser process already exited, no cleanup needed")
|
||||
self.browser_process = None
|
||||
return
|
||||
|
||||
utils.logger.info("[BrowserLauncher] 正在关闭浏览器进程...")
|
||||
utils.logger.info("[BrowserLauncher] Closing browser process...")
|
||||
|
||||
try:
|
||||
if self.system == "Windows":
|
||||
# 先尝试正常终止
|
||||
# First try normal termination
|
||||
process.terminate()
|
||||
try:
|
||||
process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
utils.logger.warning("[BrowserLauncher] 正常终止超时,使用taskkill强制结束")
|
||||
utils.logger.warning("[BrowserLauncher] Normal termination timeout, using taskkill to force kill")
|
||||
subprocess.run(
|
||||
["taskkill", "/F", "/T", "/PID", str(process.pid)],
|
||||
capture_output=True,
|
||||
@@ -273,17 +273,17 @@ class BrowserLauncher:
|
||||
try:
|
||||
os.killpg(pgid, signal.SIGTERM)
|
||||
except ProcessLookupError:
|
||||
utils.logger.info("[BrowserLauncher] 浏览器进程组不存在,可能已退出")
|
||||
utils.logger.info("[BrowserLauncher] Browser process group does not exist, may have exited")
|
||||
else:
|
||||
try:
|
||||
process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
utils.logger.warning("[BrowserLauncher] 优雅关闭超时,发送SIGKILL")
|
||||
utils.logger.warning("[BrowserLauncher] Graceful shutdown timeout, sending SIGKILL")
|
||||
os.killpg(pgid, signal.SIGKILL)
|
||||
process.wait(timeout=5)
|
||||
|
||||
utils.logger.info("[BrowserLauncher] 浏览器进程已关闭")
|
||||
utils.logger.info("[BrowserLauncher] Browser process closed")
|
||||
except Exception as e:
|
||||
utils.logger.warning(f"[BrowserLauncher] 关闭浏览器进程时出错: {e}")
|
||||
utils.logger.warning(f"[BrowserLauncher] Error closing browser process: {e}")
|
||||
finally:
|
||||
self.browser_process = None
|
||||
|
||||
Reference in New Issue
Block a user