mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-01 07:27:25 +08:00
110 lines
3.9 KiB
Python
110 lines
3.9 KiB
Python
# -*- coding: utf-8 -*-
|
|
import pytest
|
|
import config
|
|
from unittest.mock import AsyncMock, patch
|
|
from fastapi.testclient import TestClient
|
|
from cmd_arg import parse_cmd
|
|
from api.schemas import CrawlerStartRequest, PlatformEnum, LoginTypeEnum, CrawlerTypeEnum
|
|
from api.services.crawler_manager import CrawlerManager
|
|
from api.main import app
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_cmd_arg_crawler_max_notes_count():
|
|
# Store original values
|
|
orig_notes = config.CRAWLER_MAX_NOTES_COUNT
|
|
orig_comments = config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES
|
|
|
|
try:
|
|
await parse_cmd([
|
|
"--platform", "xhs",
|
|
"--crawler_max_notes_count", "42",
|
|
"--max_comments_count_singlenotes", "24"
|
|
])
|
|
assert config.CRAWLER_MAX_NOTES_COUNT == 42
|
|
assert config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES == 24
|
|
finally:
|
|
config.CRAWLER_MAX_NOTES_COUNT = orig_notes
|
|
config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = orig_comments
|
|
|
|
def test_crawler_manager_build_command():
|
|
cm = CrawlerManager()
|
|
|
|
# 1. No max limits passed in API request
|
|
req1 = CrawlerStartRequest(
|
|
platform=PlatformEnum.XHS,
|
|
login_type=LoginTypeEnum.QRCODE,
|
|
crawler_type=CrawlerTypeEnum.SEARCH,
|
|
keywords="test",
|
|
max_notes_count=None,
|
|
max_comments_count=None
|
|
)
|
|
cmd1 = cm._build_command(req1)
|
|
# Check that the custom arguments are NOT present
|
|
assert "--crawler_max_notes_count" not in cmd1
|
|
assert "--max_comments_count_singlenotes" not in cmd1
|
|
|
|
# 2. Both limits passed in API request
|
|
req2 = CrawlerStartRequest(
|
|
platform=PlatformEnum.XHS,
|
|
login_type=LoginTypeEnum.QRCODE,
|
|
crawler_type=CrawlerTypeEnum.SEARCH,
|
|
keywords="test",
|
|
max_notes_count=50,
|
|
max_comments_count=5
|
|
)
|
|
cmd2 = cm._build_command(req2)
|
|
# Check that they are correctly added
|
|
assert "--crawler_max_notes_count" in cmd2
|
|
idx_notes = cmd2.index("--crawler_max_notes_count")
|
|
assert cmd2[idx_notes + 1] == "50"
|
|
|
|
assert "--max_comments_count_singlenotes" in cmd2
|
|
idx_comments = cmd2.index("--max_comments_count_singlenotes")
|
|
assert cmd2[idx_comments + 1] == "5"
|
|
|
|
def test_api_start_crawler_with_limits():
|
|
client = TestClient(app)
|
|
|
|
with patch("api.routers.crawler.crawler_manager.start", new_callable=AsyncMock) as mock_start:
|
|
mock_start.return_value = True
|
|
|
|
# Test case 1: with limits
|
|
response = client.post("/api/crawler/start", json={
|
|
"platform": "xhs",
|
|
"login_type": "qrcode",
|
|
"crawler_type": "search",
|
|
"keywords": "test",
|
|
"max_notes_count": 50,
|
|
"max_comments_count": 5
|
|
})
|
|
|
|
assert response.status_code == 200
|
|
assert response.json() == {"status": "ok", "message": "Crawler started successfully"}
|
|
|
|
mock_start.assert_called_once()
|
|
called_request = mock_start.call_args[0][0]
|
|
assert called_request.platform == PlatformEnum.XHS
|
|
assert called_request.max_notes_count == 50
|
|
assert called_request.max_comments_count == 5
|
|
|
|
def test_api_start_crawler_without_limits():
|
|
client = TestClient(app)
|
|
|
|
with patch("api.routers.crawler.crawler_manager.start", new_callable=AsyncMock) as mock_start:
|
|
mock_start.return_value = True
|
|
|
|
# Test case 2: without limits
|
|
response = client.post("/api/crawler/start", json={
|
|
"platform": "xhs",
|
|
"login_type": "qrcode",
|
|
"crawler_type": "search",
|
|
"keywords": "test"
|
|
})
|
|
|
|
assert response.status_code == 200
|
|
mock_start.assert_called_once()
|
|
called_request = mock_start.call_args[0][0]
|
|
assert called_request.platform == PlatformEnum.XHS
|
|
assert called_request.max_notes_count is None
|
|
assert called_request.max_comments_count is None
|