Files
MediaCrawler/api/routers/websocket.py
程序员阿江(Relakkes) 508675a251 feat(api): add WebUI API server with built frontend
- Add FastAPI server with WebSocket support for real-time logs
- Add crawler management API endpoints (start/stop/status)
- Add data browsing API endpoints (list files, preview, download)
- Include pre-built WebUI assets for serving frontend

API endpoints:
- POST /api/crawler/start - Start crawler task
- POST /api/crawler/stop - Stop crawler task
- GET /api/crawler/status - Get crawler status
- WS /api/ws/logs - Real-time log streaming
- GET /api/data/files - List data files
- GET /api/data/stats - Get data statistics

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-19 00:02:08 +08:00

152 lines
4.7 KiB
Python

# -*- coding: utf-8 -*-
# Copyright (c) 2025 relakkes@gmail.com
#
# This file is part of MediaCrawler project.
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/api/routers/websocket.py
# GitHub: https://github.com/NanmiCoder
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
#
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
import asyncio
from typing import Set, Optional
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
from ..services import crawler_manager
router = APIRouter(tags=["websocket"])
class ConnectionManager:
"""WebSocket 连接管理器"""
def __init__(self):
self.active_connections: Set[WebSocket] = set()
async def connect(self, websocket: WebSocket):
await websocket.accept()
self.active_connections.add(websocket)
def disconnect(self, websocket: WebSocket):
self.active_connections.discard(websocket)
async def broadcast(self, message: dict):
"""广播消息到所有连接"""
if not self.active_connections:
return
disconnected = []
for connection in list(self.active_connections):
try:
await connection.send_json(message)
except Exception:
disconnected.append(connection)
# 清理断开的连接
for conn in disconnected:
self.disconnect(conn)
manager = ConnectionManager()
async def log_broadcaster():
"""后台任务:从队列读取日志并广播"""
queue = crawler_manager.get_log_queue()
while True:
try:
# 从队列获取日志条目
entry = await queue.get()
# 广播到所有 WebSocket 连接
await manager.broadcast(entry.model_dump())
except asyncio.CancelledError:
break
except Exception as e:
print(f"Log broadcaster error: {e}")
await asyncio.sleep(0.1)
# 全局广播任务
_broadcaster_task: Optional[asyncio.Task] = None
def start_broadcaster():
"""启动广播任务"""
global _broadcaster_task
if _broadcaster_task is None or _broadcaster_task.done():
_broadcaster_task = asyncio.create_task(log_broadcaster())
@router.websocket("/ws/logs")
async def websocket_logs(websocket: WebSocket):
"""WebSocket 日志流"""
print("[WS] New connection attempt")
try:
# 确保广播任务在运行
start_broadcaster()
await manager.connect(websocket)
print(f"[WS] Connected, active connections: {len(manager.active_connections)}")
# 发送现有日志
for log in crawler_manager.logs:
try:
await websocket.send_json(log.model_dump())
except Exception as e:
print(f"[WS] Error sending existing log: {e}")
break
print(f"[WS] Sent {len(crawler_manager.logs)} existing logs, entering main loop")
while True:
# 保持连接活跃,接收心跳或任意消息
try:
data = await asyncio.wait_for(
websocket.receive_text(),
timeout=30.0
)
if data == "ping":
await websocket.send_text("pong")
except asyncio.TimeoutError:
# 发送 ping 保持连接
try:
await websocket.send_text("ping")
except Exception as e:
print(f"[WS] Error sending ping: {e}")
break
except WebSocketDisconnect:
print("[WS] Client disconnected")
except Exception as e:
print(f"[WS] Error: {type(e).__name__}: {e}")
finally:
manager.disconnect(websocket)
print(f"[WS] Cleanup done, active connections: {len(manager.active_connections)}")
@router.websocket("/ws/status")
async def websocket_status(websocket: WebSocket):
"""WebSocket 状态流"""
await websocket.accept()
try:
while True:
# 每秒发送一次状态
status = crawler_manager.get_status()
await websocket.send_json(status)
await asyncio.sleep(1)
except WebSocketDisconnect:
pass
except Exception:
pass