mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 11:27:26 +08:00
feat(api): add WebUI API server with built frontend
- Add FastAPI server with WebSocket support for real-time logs - Add crawler management API endpoints (start/stop/status) - Add data browsing API endpoints (list files, preview, download) - Include pre-built WebUI assets for serving frontend API endpoints: - POST /api/crawler/start - Start crawler task - POST /api/crawler/stop - Stop crawler task - GET /api/crawler/status - Get crawler status - WS /api/ws/logs - Real-time log streaming - GET /api/data/files - List data files - GET /api/data/stats - Get data statistics 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
151
api/routers/websocket.py
Normal file
151
api/routers/websocket.py
Normal file
@@ -0,0 +1,151 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/api/routers/websocket.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
import asyncio
|
||||
from typing import Set, Optional
|
||||
|
||||
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
||||
|
||||
from ..services import crawler_manager
|
||||
|
||||
router = APIRouter(tags=["websocket"])
|
||||
|
||||
|
||||
class ConnectionManager:
|
||||
"""WebSocket 连接管理器"""
|
||||
|
||||
def __init__(self):
|
||||
self.active_connections: Set[WebSocket] = set()
|
||||
|
||||
async def connect(self, websocket: WebSocket):
|
||||
await websocket.accept()
|
||||
self.active_connections.add(websocket)
|
||||
|
||||
def disconnect(self, websocket: WebSocket):
|
||||
self.active_connections.discard(websocket)
|
||||
|
||||
async def broadcast(self, message: dict):
|
||||
"""广播消息到所有连接"""
|
||||
if not self.active_connections:
|
||||
return
|
||||
|
||||
disconnected = []
|
||||
for connection in list(self.active_connections):
|
||||
try:
|
||||
await connection.send_json(message)
|
||||
except Exception:
|
||||
disconnected.append(connection)
|
||||
|
||||
# 清理断开的连接
|
||||
for conn in disconnected:
|
||||
self.disconnect(conn)
|
||||
|
||||
|
||||
manager = ConnectionManager()
|
||||
|
||||
|
||||
async def log_broadcaster():
|
||||
"""后台任务:从队列读取日志并广播"""
|
||||
queue = crawler_manager.get_log_queue()
|
||||
while True:
|
||||
try:
|
||||
# 从队列获取日志条目
|
||||
entry = await queue.get()
|
||||
# 广播到所有 WebSocket 连接
|
||||
await manager.broadcast(entry.model_dump())
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"Log broadcaster error: {e}")
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
|
||||
# 全局广播任务
|
||||
_broadcaster_task: Optional[asyncio.Task] = None
|
||||
|
||||
|
||||
def start_broadcaster():
|
||||
"""启动广播任务"""
|
||||
global _broadcaster_task
|
||||
if _broadcaster_task is None or _broadcaster_task.done():
|
||||
_broadcaster_task = asyncio.create_task(log_broadcaster())
|
||||
|
||||
|
||||
@router.websocket("/ws/logs")
|
||||
async def websocket_logs(websocket: WebSocket):
|
||||
"""WebSocket 日志流"""
|
||||
print("[WS] New connection attempt")
|
||||
|
||||
try:
|
||||
# 确保广播任务在运行
|
||||
start_broadcaster()
|
||||
|
||||
await manager.connect(websocket)
|
||||
print(f"[WS] Connected, active connections: {len(manager.active_connections)}")
|
||||
|
||||
# 发送现有日志
|
||||
for log in crawler_manager.logs:
|
||||
try:
|
||||
await websocket.send_json(log.model_dump())
|
||||
except Exception as e:
|
||||
print(f"[WS] Error sending existing log: {e}")
|
||||
break
|
||||
|
||||
print(f"[WS] Sent {len(crawler_manager.logs)} existing logs, entering main loop")
|
||||
|
||||
while True:
|
||||
# 保持连接活跃,接收心跳或任意消息
|
||||
try:
|
||||
data = await asyncio.wait_for(
|
||||
websocket.receive_text(),
|
||||
timeout=30.0
|
||||
)
|
||||
if data == "ping":
|
||||
await websocket.send_text("pong")
|
||||
except asyncio.TimeoutError:
|
||||
# 发送 ping 保持连接
|
||||
try:
|
||||
await websocket.send_text("ping")
|
||||
except Exception as e:
|
||||
print(f"[WS] Error sending ping: {e}")
|
||||
break
|
||||
|
||||
except WebSocketDisconnect:
|
||||
print("[WS] Client disconnected")
|
||||
except Exception as e:
|
||||
print(f"[WS] Error: {type(e).__name__}: {e}")
|
||||
finally:
|
||||
manager.disconnect(websocket)
|
||||
print(f"[WS] Cleanup done, active connections: {len(manager.active_connections)}")
|
||||
|
||||
|
||||
@router.websocket("/ws/status")
|
||||
async def websocket_status(websocket: WebSocket):
|
||||
"""WebSocket 状态流"""
|
||||
await websocket.accept()
|
||||
|
||||
try:
|
||||
while True:
|
||||
# 每秒发送一次状态
|
||||
status = crawler_manager.get_status()
|
||||
await websocket.send_json(status)
|
||||
await asyncio.sleep(1)
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user