Files
MediaCrawler/tests/test_api_limits.py

110 lines
3.9 KiB
Python

# -*- coding: utf-8 -*-
import pytest
import config
from unittest.mock import AsyncMock, patch
from fastapi.testclient import TestClient
from cmd_arg import parse_cmd
from api.schemas import CrawlerStartRequest, PlatformEnum, LoginTypeEnum, CrawlerTypeEnum
from api.services.crawler_manager import CrawlerManager
from api.main import app
@pytest.mark.asyncio
async def test_cmd_arg_crawler_max_notes_count():
# Store original values
orig_notes = config.CRAWLER_MAX_NOTES_COUNT
orig_comments = config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES
try:
await parse_cmd([
"--platform", "xhs",
"--crawler_max_notes_count", "42",
"--max_comments_count_singlenotes", "24"
])
assert config.CRAWLER_MAX_NOTES_COUNT == 42
assert config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES == 24
finally:
config.CRAWLER_MAX_NOTES_COUNT = orig_notes
config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = orig_comments
def test_crawler_manager_build_command():
cm = CrawlerManager()
# 1. No max limits passed in API request
req1 = CrawlerStartRequest(
platform=PlatformEnum.XHS,
login_type=LoginTypeEnum.QRCODE,
crawler_type=CrawlerTypeEnum.SEARCH,
keywords="test",
max_notes_count=None,
max_comments_count=None
)
cmd1 = cm._build_command(req1)
# Check that the custom arguments are NOT present
assert "--crawler_max_notes_count" not in cmd1
assert "--max_comments_count_singlenotes" not in cmd1
# 2. Both limits passed in API request
req2 = CrawlerStartRequest(
platform=PlatformEnum.XHS,
login_type=LoginTypeEnum.QRCODE,
crawler_type=CrawlerTypeEnum.SEARCH,
keywords="test",
max_notes_count=50,
max_comments_count=5
)
cmd2 = cm._build_command(req2)
# Check that they are correctly added
assert "--crawler_max_notes_count" in cmd2
idx_notes = cmd2.index("--crawler_max_notes_count")
assert cmd2[idx_notes + 1] == "50"
assert "--max_comments_count_singlenotes" in cmd2
idx_comments = cmd2.index("--max_comments_count_singlenotes")
assert cmd2[idx_comments + 1] == "5"
def test_api_start_crawler_with_limits():
client = TestClient(app)
with patch("api.routers.crawler.crawler_manager.start", new_callable=AsyncMock) as mock_start:
mock_start.return_value = True
# Test case 1: with limits
response = client.post("/api/crawler/start", json={
"platform": "xhs",
"login_type": "qrcode",
"crawler_type": "search",
"keywords": "test",
"max_notes_count": 50,
"max_comments_count": 5
})
assert response.status_code == 200
assert response.json() == {"status": "ok", "message": "Crawler started successfully"}
mock_start.assert_called_once()
called_request = mock_start.call_args[0][0]
assert called_request.platform == PlatformEnum.XHS
assert called_request.max_notes_count == 50
assert called_request.max_comments_count == 5
def test_api_start_crawler_without_limits():
client = TestClient(app)
with patch("api.routers.crawler.crawler_manager.start", new_callable=AsyncMock) as mock_start:
mock_start.return_value = True
# Test case 2: without limits
response = client.post("/api/crawler/start", json={
"platform": "xhs",
"login_type": "qrcode",
"crawler_type": "search",
"keywords": "test"
})
assert response.status_code == 200
mock_start.assert_called_once()
called_request = mock_start.call_args[0][0]
assert called_request.platform == PlatformEnum.XHS
assert called_request.max_notes_count is None
assert called_request.max_comments_count is None