mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-05-11 21:17:38 +08:00
feat: 支持连接用户已有的 Chrome 浏览器进行爬取
新增 CDP_CONNECT_EXISTING 配置项,默认开启,通过 Chrome 远程调试功能 (chrome://inspect/#remote-debugging) 直接连接用户正在使用的浏览器, 复用真实的 Cookie、扩展和浏览历史,大幅降低平台风控检测风险。 主要变更: - 新增 _connect_existing_browser 方法,通过 ws:// 直接连接已有浏览器 - 支持等待用户在浏览器端确认连接对话框(60秒超时) - cleanup 时不关闭用户的浏览器进程 - 修复小红书在真实浏览器下 cookie 过多导致签名失败的问题 - 更新 README、CDP使用指南和常见问题文档
This commit is contained in:
@@ -48,31 +48,38 @@ HEADLESS = False
|
||||
# Whether to save login status
|
||||
SAVE_LOGIN_STATE = True
|
||||
|
||||
# ==================== CDP (Chrome DevTools Protocol) Configuration ====================
|
||||
# Whether to enable CDP mode - use the user's existing Chrome/Edge browser to crawl, providing better anti-detection capabilities
|
||||
# Once enabled, the user's Chrome/Edge browser will be automatically detected and started, and controlled through the CDP protocol.
|
||||
# This method uses the real browser environment, including the user's extensions, cookies and settings, greatly reducing the risk of detection.
|
||||
ENABLE_CDP_MODE = False
|
||||
# ==================== CDP (Chrome DevTools Protocol) 配置 ====================
|
||||
# 是否启用 CDP 模式 - 使用用户本地的 Chrome/Edge 浏览器进行爬取,具有更好的反检测能力
|
||||
# 开启后,会自动检测并启动用户的 Chrome/Edge 浏览器,通过 CDP 协议进行控制
|
||||
# 该方式使用真实浏览器环境,包括用户的扩展、Cookie 和设置,大幅降低被风控检测的风险
|
||||
ENABLE_CDP_MODE = True
|
||||
|
||||
# CDP debug port, used to communicate with the browser
|
||||
# If the port is occupied, the system will automatically try the next available port
|
||||
# CDP 调试端口,用于与浏览器通信
|
||||
# 如果端口被占用,系统会自动尝试下一个可用端口
|
||||
CDP_DEBUG_PORT = 9222
|
||||
|
||||
# Custom browser path (optional)
|
||||
# If it is empty, the system will automatically detect the installation path of Chrome/Edge
|
||||
# Windows example: "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
|
||||
# macOS example: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||
# 自定义浏览器路径(可选)
|
||||
# 如果为空,系统会自动检测 Chrome/Edge 的安装路径
|
||||
# Windows 示例: "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
|
||||
# macOS 示例: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
||||
CUSTOM_BROWSER_PATH = ""
|
||||
|
||||
# Whether to enable headless mode in CDP mode
|
||||
# NOTE: Even if set to True, some anti-detection features may not work well in headless mode
|
||||
# 是否在 CDP 模式下启用无头模式
|
||||
# 注意:即使设置为 True,某些反检测功能在无头模式下可能无法正常工作
|
||||
CDP_HEADLESS = False
|
||||
|
||||
# Browser startup timeout (seconds)
|
||||
# 浏览器启动超时时间(秒)
|
||||
BROWSER_LAUNCH_TIMEOUT = 60
|
||||
|
||||
# Whether to automatically close the browser when the program ends
|
||||
# Set to False to keep the browser running for easy debugging
|
||||
# 是否连接用户已打开的浏览器,而不是启动新的浏览器
|
||||
# 开启后,程序会连接一个已经启用了远程调试的浏览器
|
||||
# 用户需要在 Chrome 中开启远程调试:chrome://inspect/#remote-debugging
|
||||
# 或者使用命令行参数启动 Chrome:--remote-debugging-port=9222
|
||||
# 这种方式反检测效果最好,因为直接使用用户真实浏览器的所有 Cookie、扩展和浏览历史
|
||||
CDP_CONNECT_EXISTING = True
|
||||
|
||||
# 程序结束时是否自动关闭浏览器
|
||||
# 设置为 False 可以保持浏览器运行,方便调试
|
||||
AUTO_CLOSE_BROWSER = True
|
||||
|
||||
# Data saving type option configuration, supports: csv, db, json, jsonl, sqlite, excel, postgres. It is best to save to DB, with deduplication function.
|
||||
|
||||
Reference in New Issue
Block a user