feat(api): add WebUI API server with built frontend

- Add FastAPI server with WebSocket support for real-time logs - Add crawler management API endpoints (start/stop/status) - Add data browsing API endpoints (list files, preview, download) - Include pre-built WebUI assets for serving frontend API endpoints: - POST /api/crawler/start - Start crawler task - POST /api/crawler/stop - Stop crawler task - GET /api/crawler/status - Get crawler status - WS /api/ws/logs - Real-time log streaming - GET /api/data/files - List data files - GET /api/data/stats - Get data statistics 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-07 18:37:30 +08:00 · 2025-12-19 00:02:08 +08:00
parent eb66e57f60
commit 508675a251
20 changed files with 1467 additions and 1 deletions
--- a/api/routers/crawler.py
+++ b/api/routers/crawler.py
@@ -0,0 +1,63 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) 2025 relakkes@gmail.com
+#
+# This file is part of MediaCrawler project.
+# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/api/routers/crawler.py
+# GitHub: https://github.com/NanmiCoder
+# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
+#
+# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
+# 1. 不得用于任何商业用途。
+# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
+# 3. 不得进行大规模爬取或对平台造成运营干扰。
+# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
+# 5. 不得用于任何非法或不当的用途。
+#
+# 详细许可条款请参阅项目根目录下的LICENSE文件。
+# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
+
+from fastapi import APIRouter, HTTPException
+
+from ..schemas import CrawlerStartRequest, CrawlerStatusResponse
+from ..services import crawler_manager
+
+router = APIRouter(prefix="/crawler", tags=["crawler"])
+
+
+@router.post("/start")
+async def start_crawler(request: CrawlerStartRequest):
+    """启动爬虫任务"""
+    success = await crawler_manager.start(request)
+    if not success:
+        # 处理并发/重复请求：如果进程已经在跑，返回 400 而不是 500
+        if crawler_manager.process and crawler_manager.process.poll() is None:
+            raise HTTPException(status_code=400, detail="Crawler is already running")
+        raise HTTPException(status_code=500, detail="Failed to start crawler")
+
+    return {"status": "ok", "message": "Crawler started successfully"}
+
+
+@router.post("/stop")
+async def stop_crawler():
+    """停止爬虫任务"""
+    success = await crawler_manager.stop()
+    if not success:
+        # 处理并发/重复请求：如果进程已退出/不存在，返回 400 而不是 500
+        if not crawler_manager.process or crawler_manager.process.poll() is not None:
+            raise HTTPException(status_code=400, detail="No crawler is running")
+        raise HTTPException(status_code=500, detail="Failed to stop crawler")
+
+    return {"status": "ok", "message": "Crawler stopped successfully"}
+
+
+@router.get("/status", response_model=CrawlerStatusResponse)
+async def get_crawler_status():
+    """获取爬虫状态"""
+    return crawler_manager.get_status()
+
+
+@router.get("/logs")
+async def get_logs(limit: int = 100):
+    """获取最近的日志"""
+    logs = crawler_manager.logs[-limit:] if limit > 0 else crawler_manager.logs
+    return {"logs": [log.model_dump() for log in logs]}