feat: cdp browser cleanup after crawler done

This commit is contained in:
程序员阿江(Relakkes)
2025-11-17 12:21:53 +08:00
parent a1c5e07df8
commit e89a6d5781
2 changed files with 136 additions and 20 deletions

58
main.py
View File

@@ -11,6 +11,7 @@
import asyncio
import sys
import signal
from typing import Optional
import cmd_arg
@@ -87,15 +88,62 @@ async def main():
print(f"Error generating wordcloud: {e}")
def cleanup():
if crawler:
pass
if config.SAVE_DATA_OPTION in ["db", "sqlite"]:
asyncio.run(db.close())
async def async_cleanup():
"""异步清理函数用于处理CDP浏览器等异步资源"""
global crawler
if crawler:
# 检查并清理CDP浏览器
if hasattr(crawler, 'cdp_manager') and crawler.cdp_manager:
try:
await crawler.cdp_manager.cleanup(force=True) # 强制清理浏览器进程
except Exception as e:
# 只在非预期错误时打印
error_msg = str(e).lower()
if "closed" not in error_msg and "disconnected" not in error_msg:
print(f"[Main] 清理CDP浏览器时出错: {e}")
# 检查并清理标准浏览器上下文仅在非CDP模式下
elif hasattr(crawler, 'browser_context') and crawler.browser_context:
try:
# 检查上下文是否仍然打开
if hasattr(crawler.browser_context, 'pages'):
await crawler.browser_context.close()
except Exception as e:
# 只在非预期错误时打印
error_msg = str(e).lower()
if "closed" not in error_msg and "disconnected" not in error_msg:
print(f"[Main] 关闭浏览器上下文时出错: {e}")
# 关闭数据库连接
if config.SAVE_DATA_OPTION in ["db", "sqlite"]:
await db.close()
def cleanup():
"""同步清理函数"""
try:
# 创建新的事件循环来执行异步清理
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(async_cleanup())
loop.close()
except Exception as e:
print(f"[Main] 清理时出错: {e}")
def signal_handler(signum, _frame):
"""信号处理器处理Ctrl+C等中断信号"""
print(f"\n[Main] 收到中断信号 {signum},正在清理资源...")
cleanup()
sys.exit(0)
if __name__ == "__main__":
# 注册信号处理器
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
signal.signal(signal.SIGTERM, signal_handler) # 终止信号
try:
asyncio.get_event_loop().run_until_complete(main())
except KeyboardInterrupt:
print("\n[Main] 收到键盘中断,正在清理资源...")
finally:
cleanup()