feat: 支持playwright通过cdp协议连接本地chrome浏览器

docs: 增加uv来管理python依赖的文档
This commit is contained in:
程序员阿江(Relakkes)
2025-06-25 23:22:39 +08:00
parent fbc9788d54
commit e83b2422d9
10 changed files with 991 additions and 52 deletions

243
tools/browser_launcher.py Normal file
View File

@@ -0,0 +1,243 @@
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
import os
import platform
import subprocess
import time
import socket
from typing import Optional, List, Tuple
import asyncio
from pathlib import Path
from tools import utils
class BrowserLauncher:
"""
浏览器启动器用于检测和启动用户的Chrome/Edge浏览器
支持Windows和macOS系统
"""
def __init__(self):
self.system = platform.system()
self.browser_process = None
self.debug_port = None
def detect_browser_paths(self) -> List[str]:
"""
检测系统中可用的浏览器路径
返回按优先级排序的浏览器路径列表
"""
paths = []
if self.system == "Windows":
# Windows下的常见Chrome/Edge安装路径
possible_paths = [
# Chrome路径
os.path.expandvars(r"%PROGRAMFILES%\Google\Chrome\Application\chrome.exe"),
os.path.expandvars(r"%PROGRAMFILES(X86)%\Google\Chrome\Application\chrome.exe"),
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome\Application\chrome.exe"),
# Edge路径
os.path.expandvars(r"%PROGRAMFILES%\Microsoft\Edge\Application\msedge.exe"),
os.path.expandvars(r"%PROGRAMFILES(X86)%\Microsoft\Edge\Application\msedge.exe"),
# Chrome Beta/Dev/Canary
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome Beta\Application\chrome.exe"),
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome Dev\Application\chrome.exe"),
os.path.expandvars(r"%LOCALAPPDATA%\Google\Chrome SxS\Application\chrome.exe"),
]
elif self.system == "Darwin": # macOS
# macOS下的常见Chrome/Edge安装路径
possible_paths = [
# Chrome路径
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
"/Applications/Google Chrome Beta.app/Contents/MacOS/Google Chrome Beta",
"/Applications/Google Chrome Dev.app/Contents/MacOS/Google Chrome Dev",
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
# Edge路径
"/Applications/Microsoft Edge.app/Contents/MacOS/Microsoft Edge",
"/Applications/Microsoft Edge Beta.app/Contents/MacOS/Microsoft Edge Beta",
"/Applications/Microsoft Edge Dev.app/Contents/MacOS/Microsoft Edge Dev",
"/Applications/Microsoft Edge Canary.app/Contents/MacOS/Microsoft Edge Canary",
]
else:
# Linux等其他系统
possible_paths = [
"/usr/bin/google-chrome",
"/usr/bin/google-chrome-stable",
"/usr/bin/google-chrome-beta",
"/usr/bin/google-chrome-unstable",
"/usr/bin/chromium-browser",
"/usr/bin/chromium",
"/snap/bin/chromium",
"/usr/bin/microsoft-edge",
"/usr/bin/microsoft-edge-stable",
"/usr/bin/microsoft-edge-beta",
"/usr/bin/microsoft-edge-dev",
]
# 检查路径是否存在且可执行
for path in possible_paths:
if os.path.isfile(path) and os.access(path, os.X_OK):
paths.append(path)
return paths
def find_available_port(self, start_port: int = 9222) -> int:
"""
查找可用的端口
"""
port = start_port
while port < start_port + 100: # 最多尝试100个端口
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.bind(('localhost', port))
return port
except OSError:
port += 1
raise RuntimeError(f"无法找到可用的端口,已尝试 {start_port}{port-1}")
def launch_browser(self, browser_path: str, debug_port: int, headless: bool = False,
user_data_dir: Optional[str] = None) -> subprocess.Popen:
"""
启动浏览器进程
"""
# 基本启动参数
args = [
browser_path,
f"--remote-debugging-port={debug_port}",
"--no-first-run",
"--no-default-browser-check",
"--disable-background-timer-throttling",
"--disable-backgrounding-occluded-windows",
"--disable-renderer-backgrounding",
"--disable-features=TranslateUI",
"--disable-ipc-flooding-protection",
"--disable-hang-monitor",
"--disable-prompt-on-repost",
"--disable-sync",
"--disable-web-security", # 可能有助于某些网站的访问
"--disable-features=VizDisplayCompositor",
"--disable-extensions-except", # 保留用户扩展
"--load-extension", # 允许加载扩展
]
# 无头模式
if headless:
args.extend([
"--headless",
"--disable-gpu",
"--no-sandbox",
])
# 用户数据目录
if user_data_dir:
args.append(f"--user-data-dir={user_data_dir}")
utils.logger.info(f"[BrowserLauncher] 启动浏览器: {browser_path}")
utils.logger.info(f"[BrowserLauncher] 调试端口: {debug_port}")
utils.logger.info(f"[BrowserLauncher] 无头模式: {headless}")
try:
# 在Windows上使用CREATE_NEW_PROCESS_GROUP避免Ctrl+C影响子进程
if self.system == "Windows":
process = subprocess.Popen(
args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP
)
else:
process = subprocess.Popen(
args,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
preexec_fn=os.setsid # 创建新的进程组
)
return process
except Exception as e:
utils.logger.error(f"[BrowserLauncher] 启动浏览器失败: {e}")
raise
def wait_for_browser_ready(self, debug_port: int, timeout: int = 30) -> bool:
"""
等待浏览器准备就绪
"""
utils.logger.info(f"[BrowserLauncher] 等待浏览器在端口 {debug_port} 上准备就绪...")
start_time = time.time()
while time.time() - start_time < timeout:
try:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.settimeout(1)
result = s.connect_ex(('localhost', debug_port))
if result == 0:
utils.logger.info(f"[BrowserLauncher] 浏览器已在端口 {debug_port} 上准备就绪")
return True
except Exception:
pass
time.sleep(0.5)
utils.logger.error(f"[BrowserLauncher] 浏览器在 {timeout} 秒内未能准备就绪")
return False
def get_browser_info(self, browser_path: str) -> Tuple[str, str]:
"""
获取浏览器信息(名称和版本)
"""
try:
if "chrome" in browser_path.lower():
name = "Google Chrome"
elif "edge" in browser_path.lower() or "msedge" in browser_path.lower():
name = "Microsoft Edge"
elif "chromium" in browser_path.lower():
name = "Chromium"
else:
name = "Unknown Browser"
# 尝试获取版本信息
try:
result = subprocess.run([browser_path, "--version"],
capture_output=True, text=True, timeout=5)
version = result.stdout.strip() if result.stdout else "Unknown Version"
except:
version = "Unknown Version"
return name, version
except Exception:
return "Unknown Browser", "Unknown Version"
def cleanup(self):
"""
清理资源,关闭浏览器进程
"""
if self.browser_process:
try:
utils.logger.info("[BrowserLauncher] 正在关闭浏览器进程...")
if self.system == "Windows":
# Windows下使用taskkill强制终止进程树
subprocess.run(["taskkill", "/F", "/T", "/PID", str(self.browser_process.pid)],
capture_output=True)
else:
# Unix系统下终止进程组
os.killpg(os.getpgid(self.browser_process.pid), 9)
self.browser_process = None
utils.logger.info("[BrowserLauncher] 浏览器进程已关闭")
except Exception as e:
utils.logger.warning(f"[BrowserLauncher] 关闭浏览器进程时出错: {e}")