mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-20 22:11:11 +08:00
Features: - Excel export with formatted multi-sheet workbooks (Contents, Comments, Creators) - Professional styling: blue headers, auto-width columns, borders, text wrapping - Smart export: empty sheets automatically removed - Support for all platforms (xhs, dy, ks, bili, wb, tieba, zhihu) Testing: - Added pytest framework with asyncio support - Unit tests for Excel store functionality - Unit tests for store factory pattern - Shared fixtures for test data - Test coverage for edge cases Documentation: - Comprehensive Excel export guide (docs/excel_export_guide.md) - Updated README.md and README_en.md with Excel examples - Updated config comments to include excel option Dependencies: - Added openpyxl>=3.1.2 for Excel support - Added pytest>=7.4.0 and pytest-asyncio>=0.21.0 for testing This contribution adds immediate value for users who need data analysis capabilities and establishes a testing foundation for future development.
82 lines
2.3 KiB
Python
82 lines
2.3 KiB
Python
# -*- coding: utf-8 -*-
|
|
"""
|
|
Pytest configuration and shared fixtures
|
|
"""
|
|
|
|
import pytest
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project root to Python path
|
|
project_root = Path(__file__).parent.parent
|
|
sys.path.insert(0, str(project_root))
|
|
|
|
|
|
@pytest.fixture(scope="session")
|
|
def project_root_path():
|
|
"""Return project root path"""
|
|
return project_root
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_xhs_note():
|
|
"""Sample Xiaohongshu note data for testing"""
|
|
return {
|
|
"note_id": "test_note_123",
|
|
"type": "normal",
|
|
"title": "测试标题 Test Title",
|
|
"desc": "这是一个测试描述 This is a test description",
|
|
"video_url": "",
|
|
"time": 1700000000,
|
|
"last_update_time": 1700000000,
|
|
"user_id": "user_123",
|
|
"nickname": "测试用户",
|
|
"avatar": "https://example.com/avatar.jpg",
|
|
"liked_count": 100,
|
|
"collected_count": 50,
|
|
"comment_count": 25,
|
|
"share_count": 10,
|
|
"ip_location": "上海",
|
|
"image_list": "https://example.com/img1.jpg,https://example.com/img2.jpg",
|
|
"tag_list": "测试,编程,Python",
|
|
"note_url": "https://www.xiaohongshu.com/explore/test_note_123",
|
|
"source_keyword": "测试关键词",
|
|
"xsec_token": "test_token_123"
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_xhs_comment():
|
|
"""Sample Xiaohongshu comment data for testing"""
|
|
return {
|
|
"comment_id": "comment_123",
|
|
"create_time": 1700000000,
|
|
"ip_location": "北京",
|
|
"note_id": "test_note_123",
|
|
"content": "这是一条测试评论 This is a test comment",
|
|
"user_id": "user_456",
|
|
"nickname": "评论用户",
|
|
"avatar": "https://example.com/avatar2.jpg",
|
|
"sub_comment_count": 5,
|
|
"pictures": "",
|
|
"parent_comment_id": 0,
|
|
"like_count": 15
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_xhs_creator():
|
|
"""Sample Xiaohongshu creator data for testing"""
|
|
return {
|
|
"user_id": "creator_123",
|
|
"nickname": "创作者名称",
|
|
"gender": "女",
|
|
"avatar": "https://example.com/creator_avatar.jpg",
|
|
"desc": "这是创作者简介",
|
|
"ip_location": "广州",
|
|
"follows": 500,
|
|
"fans": 10000,
|
|
"interaction": 50000,
|
|
"tag_list": '{"profession": "设计师", "interest": "摄影"}'
|
|
}
|