mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-15 03:21:02 +08:00
Compare commits
6 Commits
fix/proxy-
...
feature/ex
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6e858c1a00 | ||
|
|
324f09cf9f | ||
|
|
46ef86ddef | ||
|
|
31a092c653 | ||
|
|
f989ce0788 | ||
|
|
15b98fa511 |
12
README.md
12
README.md
@@ -212,6 +212,10 @@ python main.py --help
|
||||
支持多种数据存储方式:
|
||||
- **CSV 文件**:支持保存到 CSV 中(`data/` 目录下)
|
||||
- **JSON 文件**:支持保存到 JSON 中(`data/` 目录下)
|
||||
- **Excel 文件**:支持保存到格式化的 Excel 文件(`data/` 目录下)✨ 新功能
|
||||
- 多工作表支持(内容、评论、创作者)
|
||||
- 专业格式化(标题样式、自动列宽、边框)
|
||||
- 易于分析和分享
|
||||
- **数据库存储**
|
||||
- 使用参数 `--init_db` 进行数据库初始化(使用`--init_db`时不需要携带其他optional)
|
||||
- **SQLite 数据库**:轻量级数据库,无需服务器,适合个人使用(推荐)
|
||||
@@ -224,11 +228,15 @@ python main.py --help
|
||||
|
||||
### 使用示例:
|
||||
```shell
|
||||
# 初始化 SQLite 数据库(使用'--init_db'时不需要携带其他optional)
|
||||
# 初始化 SQLite 数据库
|
||||
uv run main.py --init_db sqlite
|
||||
# 使用 SQLite 存储数据(推荐个人用户使用)
|
||||
# 使用 SQLite 存储数据
|
||||
uv run main.py --platform xhs --lt qrcode --type search --save_data_option sqlite
|
||||
|
||||
# 使用 Excel 存储数据(推荐用于数据分析)
|
||||
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
|
||||
```
|
||||
|
||||
```shell
|
||||
# 初始化 MySQL 数据库
|
||||
uv run main.py --init_db mysql
|
||||
|
||||
@@ -209,6 +209,10 @@ python main.py --help
|
||||
Supports multiple data storage methods:
|
||||
- **CSV Files**: Supports saving to CSV (under `data/` directory)
|
||||
- **JSON Files**: Supports saving to JSON (under `data/` directory)
|
||||
- **Excel Files**: Supports saving to formatted Excel files (under `data/` directory) ✨ New Feature
|
||||
- Multi-sheet support (Contents, Comments, Creators)
|
||||
- Professional formatting (styled headers, auto-width columns, borders)
|
||||
- Easy to analyze and share
|
||||
- **Database Storage**
|
||||
- Use the `--init_db` parameter for database initialization (when using `--init_db`, no other optional arguments are needed)
|
||||
- **SQLite Database**: Lightweight database, no server required, suitable for personal use (recommended)
|
||||
@@ -221,6 +225,9 @@ Supports multiple data storage methods:
|
||||
|
||||
### Usage Examples:
|
||||
```shell
|
||||
# Use Excel to store data (recommended for data analysis) ✨ New Feature
|
||||
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
|
||||
|
||||
# Initialize SQLite database (when using '--init_db', no other optional arguments are needed)
|
||||
uv run main.py --init_db sqlite
|
||||
# Use SQLite to store data (recommended for personal users)
|
||||
|
||||
@@ -71,6 +71,8 @@ class SaveDataOptionEnum(str, Enum):
|
||||
DB = "db"
|
||||
JSON = "json"
|
||||
SQLITE = "sqlite"
|
||||
MONGODB = "mongodb"
|
||||
EXCEL = "excel"
|
||||
|
||||
|
||||
class InitDbOptionEnum(str, Enum):
|
||||
@@ -199,7 +201,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
SaveDataOptionEnum,
|
||||
typer.Option(
|
||||
"--save_data_option",
|
||||
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库)",
|
||||
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库 | mongodb=MongoDB数据库 | excel=Excel文件)",
|
||||
rich_help_panel="存储配置",
|
||||
),
|
||||
] = _coerce_enum(
|
||||
|
||||
@@ -70,8 +70,8 @@ BROWSER_LAUNCH_TIMEOUT = 60
|
||||
# 设置为False可以保持浏览器运行,便于调试
|
||||
AUTO_CLOSE_BROWSER = True
|
||||
|
||||
# 数据保存类型选项配置,支持四种类型:csv、db、json、sqlite, 最好保存到DB,有排重的功能。
|
||||
SAVE_DATA_OPTION = "json" # csv or db or json or sqlite
|
||||
# 数据保存类型选项配置,支持五种类型:csv、db、json、sqlite、excel, 最好保存到DB,有排重的功能。
|
||||
SAVE_DATA_OPTION = "json" # csv or db or json or sqlite or excel
|
||||
|
||||
# 用户浏览器缓存的浏览器文件配置
|
||||
USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
|
||||
|
||||
244
docs/excel_export_guide.md
Normal file
244
docs/excel_export_guide.md
Normal file
@@ -0,0 +1,244 @@
|
||||
# Excel Export Guide
|
||||
|
||||
## Overview
|
||||
|
||||
MediaCrawler now supports exporting crawled data to formatted Excel files (.xlsx) with professional styling and multiple sheets for contents, comments, and creators.
|
||||
|
||||
## Features
|
||||
|
||||
- **Multi-sheet workbooks**: Separate sheets for Contents, Comments, and Creators
|
||||
- **Professional formatting**:
|
||||
- Styled headers with blue background and white text
|
||||
- Auto-adjusted column widths
|
||||
- Cell borders and text wrapping
|
||||
- Clean, readable layout
|
||||
- **Smart export**: Empty sheets are automatically removed
|
||||
- **Organized storage**: Files saved to `data/{platform}/` directory with timestamps
|
||||
|
||||
## Installation
|
||||
|
||||
Excel export requires the `openpyxl` library:
|
||||
|
||||
```bash
|
||||
# Using uv (recommended)
|
||||
uv sync
|
||||
|
||||
# Or using pip
|
||||
pip install openpyxl
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
1. **Configure Excel export** in `config/base_config.py`:
|
||||
|
||||
```python
|
||||
SAVE_DATA_OPTION = "excel" # Change from json/csv/db to excel
|
||||
```
|
||||
|
||||
2. **Run the crawler**:
|
||||
|
||||
```bash
|
||||
# Xiaohongshu example
|
||||
uv run main.py --platform xhs --lt qrcode --type search
|
||||
|
||||
# Douyin example
|
||||
uv run main.py --platform dy --lt qrcode --type search
|
||||
|
||||
# Bilibili example
|
||||
uv run main.py --platform bili --lt qrcode --type search
|
||||
```
|
||||
|
||||
3. **Find your Excel file** in `data/{platform}/` directory:
|
||||
- Filename format: `{platform}_{crawler_type}_{timestamp}.xlsx`
|
||||
- Example: `xhs_search_20250128_143025.xlsx`
|
||||
|
||||
### Command Line Examples
|
||||
|
||||
```bash
|
||||
# Search by keywords and export to Excel
|
||||
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
|
||||
|
||||
# Crawl specific posts and export to Excel
|
||||
uv run main.py --platform xhs --lt qrcode --type detail --save_data_option excel
|
||||
|
||||
# Crawl creator profile and export to Excel
|
||||
uv run main.py --platform xhs --lt qrcode --type creator --save_data_option excel
|
||||
```
|
||||
|
||||
## Excel File Structure
|
||||
|
||||
### Contents Sheet
|
||||
Contains post/video information:
|
||||
- `note_id`: Unique post identifier
|
||||
- `title`: Post title
|
||||
- `desc`: Post description
|
||||
- `user_id`: Author user ID
|
||||
- `nickname`: Author nickname
|
||||
- `liked_count`: Number of likes
|
||||
- `comment_count`: Number of comments
|
||||
- `share_count`: Number of shares
|
||||
- `ip_location`: IP location
|
||||
- `image_list`: Comma-separated image URLs
|
||||
- `tag_list`: Comma-separated tags
|
||||
- `note_url`: Direct link to post
|
||||
- And more platform-specific fields...
|
||||
|
||||
### Comments Sheet
|
||||
Contains comment information:
|
||||
- `comment_id`: Unique comment identifier
|
||||
- `note_id`: Associated post ID
|
||||
- `content`: Comment text
|
||||
- `user_id`: Commenter user ID
|
||||
- `nickname`: Commenter nickname
|
||||
- `like_count`: Comment likes
|
||||
- `create_time`: Comment timestamp
|
||||
- `ip_location`: Commenter location
|
||||
- `sub_comment_count`: Number of replies
|
||||
- And more...
|
||||
|
||||
### Creators Sheet
|
||||
Contains creator/author information:
|
||||
- `user_id`: Unique user identifier
|
||||
- `nickname`: Display name
|
||||
- `gender`: Gender
|
||||
- `avatar`: Profile picture URL
|
||||
- `desc`: Bio/description
|
||||
- `fans`: Follower count
|
||||
- `follows`: Following count
|
||||
- `interaction`: Total interactions
|
||||
- And more...
|
||||
|
||||
## Advantages Over Other Formats
|
||||
|
||||
### vs CSV
|
||||
- ✅ Multiple sheets in one file
|
||||
- ✅ Professional formatting
|
||||
- ✅ Better handling of special characters
|
||||
- ✅ Auto-adjusted column widths
|
||||
- ✅ No encoding issues
|
||||
|
||||
### vs JSON
|
||||
- ✅ Human-readable tabular format
|
||||
- ✅ Easy to open in Excel/Google Sheets
|
||||
- ✅ Better for data analysis
|
||||
- ✅ Easier to share with non-technical users
|
||||
|
||||
### vs Database
|
||||
- ✅ No database setup required
|
||||
- ✅ Portable single-file format
|
||||
- ✅ Easy to share and archive
|
||||
- ✅ Works offline
|
||||
|
||||
## Tips & Best Practices
|
||||
|
||||
1. **Large datasets**: For very large crawls (>10,000 rows), consider using database storage instead for better performance
|
||||
|
||||
2. **Data analysis**: Excel files work great with:
|
||||
- Microsoft Excel
|
||||
- Google Sheets
|
||||
- LibreOffice Calc
|
||||
- Python pandas: `pd.read_excel('file.xlsx')`
|
||||
|
||||
3. **Combining data**: You can merge multiple Excel files using:
|
||||
```python
|
||||
import pandas as pd
|
||||
df1 = pd.read_excel('file1.xlsx', sheet_name='Contents')
|
||||
df2 = pd.read_excel('file2.xlsx', sheet_name='Contents')
|
||||
combined = pd.concat([df1, df2])
|
||||
combined.to_excel('combined.xlsx', index=False)
|
||||
```
|
||||
|
||||
4. **File size**: Excel files are typically 2-3x larger than CSV but smaller than JSON
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "openpyxl not installed" error
|
||||
|
||||
```bash
|
||||
# Install openpyxl
|
||||
uv add openpyxl
|
||||
# or
|
||||
pip install openpyxl
|
||||
```
|
||||
|
||||
### Excel file not created
|
||||
|
||||
Check that:
|
||||
1. `SAVE_DATA_OPTION = "excel"` in config
|
||||
2. Crawler successfully collected data
|
||||
3. No errors in console output
|
||||
4. `data/{platform}/` directory exists
|
||||
|
||||
### Empty Excel file
|
||||
|
||||
This happens when:
|
||||
- No data was crawled (check keywords/IDs)
|
||||
- Login failed (check login status)
|
||||
- Platform blocked requests (check IP/rate limits)
|
||||
|
||||
## Example Output
|
||||
|
||||
After running a successful crawl, you'll see:
|
||||
|
||||
```
|
||||
[ExcelStoreBase] Initialized Excel export to: data/xhs/xhs_search_20250128_143025.xlsx
|
||||
[ExcelStoreBase] Stored content to Excel: 7123456789
|
||||
[ExcelStoreBase] Stored comment to Excel: comment_123
|
||||
...
|
||||
[Main] Excel file saved successfully
|
||||
```
|
||||
|
||||
Your Excel file will have:
|
||||
- Professional blue headers
|
||||
- Clean borders
|
||||
- Wrapped text for long content
|
||||
- Auto-sized columns
|
||||
- Separate organized sheets
|
||||
|
||||
## Advanced Usage
|
||||
|
||||
### Programmatic Access
|
||||
|
||||
```python
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
|
||||
# Create store
|
||||
store = ExcelStoreBase(platform="xhs", crawler_type="search")
|
||||
|
||||
# Store data
|
||||
await store.store_content({
|
||||
"note_id": "123",
|
||||
"title": "Test Post",
|
||||
"liked_count": 100
|
||||
})
|
||||
|
||||
# Save to file
|
||||
store.flush()
|
||||
```
|
||||
|
||||
### Custom Formatting
|
||||
|
||||
You can extend `ExcelStoreBase` to customize formatting:
|
||||
|
||||
```python
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
|
||||
class CustomExcelStore(ExcelStoreBase):
|
||||
def _apply_header_style(self, sheet, row_num=1):
|
||||
# Custom header styling
|
||||
super()._apply_header_style(sheet, row_num)
|
||||
# Add your customizations here
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
For issues or questions:
|
||||
- Check [常见问题](常见问题.md)
|
||||
- Open an issue on GitHub
|
||||
- Join the WeChat discussion group
|
||||
|
||||
---
|
||||
|
||||
**Note**: Excel export is designed for learning and research purposes. Please respect platform terms of service and rate limits.
|
||||
9
main.py
9
main.py
@@ -84,6 +84,15 @@ async def main():
|
||||
crawler = CrawlerFactory.create_crawler(platform=config.PLATFORM)
|
||||
await crawler.start()
|
||||
|
||||
# Flush Excel data if using Excel export
|
||||
if config.SAVE_DATA_OPTION == "excel":
|
||||
try:
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
ExcelStoreBase.flush_all()
|
||||
print("[Main] Excel files saved successfully")
|
||||
except Exception as e:
|
||||
print(f"[Main] Error flushing Excel data: {e}")
|
||||
|
||||
# Generate wordcloud after crawling is complete
|
||||
# Only for JSON save mode
|
||||
if config.SAVE_DATA_OPTION == "json" and config.ENABLE_GET_WORDCLOUD:
|
||||
|
||||
@@ -19,15 +19,12 @@
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import time
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
||||
from urllib.parse import urlencode, urlparse, parse_qs
|
||||
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import httpx
|
||||
from playwright.async_api import BrowserContext, Page
|
||||
from tenacity import retry, stop_after_attempt, wait_fixed
|
||||
from xhshow import Xhshow
|
||||
|
||||
import config
|
||||
from base.base_crawler import AbstractApiClient
|
||||
@@ -39,8 +36,9 @@ if TYPE_CHECKING:
|
||||
|
||||
from .exception import DataFetchError, IPBlockError
|
||||
from .field import SearchNoteType, SearchSortType
|
||||
from .help import get_search_id, sign
|
||||
from .help import get_search_id
|
||||
from .extractor import XiaoHongShuExtractor
|
||||
from .playwright_sign import sign_with_playwright
|
||||
|
||||
|
||||
class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
@@ -67,16 +65,14 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
self.playwright_page = playwright_page
|
||||
self.cookie_dict = cookie_dict
|
||||
self._extractor = XiaoHongShuExtractor()
|
||||
# 初始化 xhshow 客户端用于签名生成
|
||||
self._xhshow_client = Xhshow()
|
||||
# 初始化代理池(来自 ProxyRefreshMixin)
|
||||
self.init_proxy_pool(proxy_ip_pool)
|
||||
|
||||
async def _pre_headers(self, url: str, params: Optional[Dict] = None, payload: Optional[Dict] = None) -> Dict:
|
||||
"""请求头参数签名
|
||||
"""请求头参数签名(使用 playwright 注入方式)
|
||||
|
||||
Args:
|
||||
url: 请求的URL(GET请求是包含请求的参数)
|
||||
url: 请求的URL
|
||||
params: GET请求的参数
|
||||
payload: POST请求的参数
|
||||
|
||||
@@ -84,37 +80,21 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
Dict: 请求头参数签名
|
||||
"""
|
||||
a1_value = self.cookie_dict.get("a1", "")
|
||||
parsed = urlparse(url)
|
||||
uri = parsed.path
|
||||
|
||||
# 确定请求数据和 URI
|
||||
if params is not None:
|
||||
x_s = self._xhshow_client.sign_xs_get(
|
||||
uri=uri, a1_value=a1_value, params=params
|
||||
)
|
||||
data = params
|
||||
elif payload is not None:
|
||||
x_s = self._xhshow_client.sign_xs_post(
|
||||
uri=uri, a1_value=a1_value, payload=payload
|
||||
)
|
||||
data = payload
|
||||
else:
|
||||
raise ValueError("params or payload is required")
|
||||
|
||||
# 获取 b1 值
|
||||
b1_value = ""
|
||||
try:
|
||||
if self.playwright_page:
|
||||
local_storage = await self.playwright_page.evaluate(
|
||||
"() => window.localStorage"
|
||||
)
|
||||
b1_value = local_storage.get("b1", "")
|
||||
except Exception as e:
|
||||
utils.logger.warning(
|
||||
f"[XiaoHongShuClient._pre_headers] Failed to get b1 from localStorage: {e}"
|
||||
)
|
||||
|
||||
signs = sign(
|
||||
# 使用 playwright 注入方式生成签名
|
||||
signs = await sign_with_playwright(
|
||||
page=self.playwright_page,
|
||||
uri=url,
|
||||
data=data,
|
||||
a1=a1_value,
|
||||
b1=b1_value,
|
||||
x_s=x_s,
|
||||
x_t=str(int(time.time() * 1000)),
|
||||
)
|
||||
|
||||
headers = {
|
||||
@@ -177,11 +157,9 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
"""
|
||||
headers = await self._pre_headers(uri, params)
|
||||
if isinstance(params, dict):
|
||||
# 使用 xhsshow build_url 构建完整的 URL
|
||||
full_url = self._xhshow_client.build_url(
|
||||
base_url=f"{self._host}{uri}",
|
||||
params=params
|
||||
)
|
||||
# 构建带参数的完整 URL
|
||||
query_string = urlencode(params)
|
||||
full_url = f"{self._host}{uri}?{query_string}"
|
||||
else:
|
||||
full_url = f"{self._host}{uri}"
|
||||
|
||||
@@ -200,7 +178,7 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
|
||||
"""
|
||||
headers = await self._pre_headers(uri, payload=data)
|
||||
json_str = self._xhshow_client.build_json_body(payload=data)
|
||||
json_str = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
|
||||
return await self.request(
|
||||
method="POST",
|
||||
url=f"{self._host}{uri}",
|
||||
|
||||
203
media_platform/xhs/playwright_sign.py
Normal file
203
media_platform/xhs/playwright_sign.py
Normal file
@@ -0,0 +1,203 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/media_platform/xhs/playwright_sign.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
# 通过 Playwright 注入调用 window.mnsv2 生成小红书签名
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import time
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from playwright.async_api import Page
|
||||
|
||||
from .xhs_sign import b64_encode, encode_utf8, get_trace_id, mrc
|
||||
|
||||
|
||||
def _build_sign_string(uri: str, data: Optional[Union[Dict, str]] = None) -> str:
|
||||
"""构建待签名字符串"""
|
||||
c = uri
|
||||
if data is not None:
|
||||
if isinstance(data, dict):
|
||||
c += json.dumps(data, separators=(",", ":"), ensure_ascii=False)
|
||||
elif isinstance(data, str):
|
||||
c += data
|
||||
return c
|
||||
|
||||
|
||||
def _md5_hex(s: str) -> str:
|
||||
"""计算 MD5 哈希值"""
|
||||
return hashlib.md5(s.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _build_xs_payload(x3_value: str, data_type: str = "object") -> str:
|
||||
"""构建 x-s 签名"""
|
||||
s = {
|
||||
"x0": "4.2.1",
|
||||
"x1": "xhs-pc-web",
|
||||
"x2": "Mac OS",
|
||||
"x3": x3_value,
|
||||
"x4": data_type,
|
||||
}
|
||||
return "XYS_" + b64_encode(encode_utf8(json.dumps(s, separators=(",", ":"))))
|
||||
|
||||
|
||||
def _build_xs_common(a1: str, b1: str, x_s: str, x_t: str) -> str:
|
||||
"""构建 x-s-common 请求头"""
|
||||
payload = {
|
||||
"s0": 3,
|
||||
"s1": "",
|
||||
"x0": "1",
|
||||
"x1": "4.2.2",
|
||||
"x2": "Mac OS",
|
||||
"x3": "xhs-pc-web",
|
||||
"x4": "4.74.0",
|
||||
"x5": a1,
|
||||
"x6": x_t,
|
||||
"x7": x_s,
|
||||
"x8": b1,
|
||||
"x9": mrc(x_t + x_s + b1),
|
||||
"x10": 154,
|
||||
"x11": "normal",
|
||||
}
|
||||
return b64_encode(encode_utf8(json.dumps(payload, separators=(",", ":"))))
|
||||
|
||||
|
||||
async def get_b1_from_localstorage(page: Page) -> str:
|
||||
"""从 localStorage 获取 b1 值"""
|
||||
try:
|
||||
local_storage = await page.evaluate("() => window.localStorage")
|
||||
return local_storage.get("b1", "")
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
async def call_mnsv2(page: Page, sign_str: str, md5_str: str) -> str:
|
||||
"""
|
||||
通过 playwright 调用 window.mnsv2 函数
|
||||
|
||||
Args:
|
||||
page: playwright Page 对象
|
||||
sign_str: 待签名字符串 (uri + JSON.stringify(data))
|
||||
md5_str: sign_str 的 MD5 哈希值
|
||||
|
||||
Returns:
|
||||
mnsv2 返回的签名字符串
|
||||
"""
|
||||
sign_str_escaped = sign_str.replace("\\", "\\\\").replace("'", "\\'").replace("\n", "\\n")
|
||||
md5_str_escaped = md5_str.replace("\\", "\\\\").replace("'", "\\'")
|
||||
|
||||
try:
|
||||
result = await page.evaluate(f"window.mnsv2('{sign_str_escaped}', '{md5_str_escaped}')")
|
||||
return result if result else ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
async def sign_xs_with_playwright(
|
||||
page: Page,
|
||||
uri: str,
|
||||
data: Optional[Union[Dict, str]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
通过 playwright 注入生成 x-s 签名
|
||||
|
||||
Args:
|
||||
page: playwright Page 对象(必须已打开小红书页面)
|
||||
uri: API 路径,如 "/api/sns/web/v1/search/notes"
|
||||
data: 请求数据(GET 的 params 或 POST 的 payload)
|
||||
|
||||
Returns:
|
||||
x-s 签名字符串
|
||||
"""
|
||||
sign_str = _build_sign_string(uri, data)
|
||||
md5_str = _md5_hex(sign_str)
|
||||
x3_value = await call_mnsv2(page, sign_str, md5_str)
|
||||
data_type = "object" if isinstance(data, (dict, list)) else "string"
|
||||
return _build_xs_payload(x3_value, data_type)
|
||||
|
||||
|
||||
async def sign_with_playwright(
|
||||
page: Page,
|
||||
uri: str,
|
||||
data: Optional[Union[Dict, str]] = None,
|
||||
a1: str = "",
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
通过 playwright 生成完整的签名请求头
|
||||
|
||||
Args:
|
||||
page: playwright Page 对象(必须已打开小红书页面)
|
||||
uri: API 路径
|
||||
data: 请求数据
|
||||
a1: cookie 中的 a1 值
|
||||
|
||||
Returns:
|
||||
包含 x-s, x-t, x-s-common, x-b3-traceid 的字典
|
||||
"""
|
||||
b1 = await get_b1_from_localstorage(page)
|
||||
x_s = await sign_xs_with_playwright(page, uri, data)
|
||||
x_t = str(int(time.time() * 1000))
|
||||
|
||||
return {
|
||||
"x-s": x_s,
|
||||
"x-t": x_t,
|
||||
"x-s-common": _build_xs_common(a1, b1, x_s, x_t),
|
||||
"x-b3-traceid": get_trace_id(),
|
||||
}
|
||||
|
||||
|
||||
async def pre_headers_with_playwright(
|
||||
page: Page,
|
||||
url: str,
|
||||
cookie_dict: Dict[str, str],
|
||||
params: Optional[Dict] = None,
|
||||
payload: Optional[Dict] = None,
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
使用 playwright 注入方式生成请求头签名
|
||||
可直接替换 client.py 中的 _pre_headers 方法
|
||||
|
||||
Args:
|
||||
page: playwright Page 对象
|
||||
url: 请求 URL
|
||||
cookie_dict: cookie 字典
|
||||
params: GET 请求参数
|
||||
payload: POST 请求参数
|
||||
|
||||
Returns:
|
||||
签名后的请求头字典
|
||||
"""
|
||||
a1_value = cookie_dict.get("a1", "")
|
||||
uri = urlparse(url).path
|
||||
|
||||
if params is not None:
|
||||
data = params
|
||||
elif payload is not None:
|
||||
data = payload
|
||||
else:
|
||||
raise ValueError("params or payload is required")
|
||||
|
||||
signs = await sign_with_playwright(page, uri, data, a1_value)
|
||||
|
||||
return {
|
||||
"X-S": signs["x-s"],
|
||||
"X-T": signs["x-t"],
|
||||
"x-S-Common": signs["x-s-common"],
|
||||
"X-B3-Traceid": signs["x-b3-traceid"],
|
||||
}
|
||||
152
media_platform/xhs/xhs_sign.py
Normal file
152
media_platform/xhs/xhs_sign.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/media_platform/xhs/xhs_sign.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
# 小红书签名算法核心函数
|
||||
# 用于 playwright 注入方式生成签名
|
||||
|
||||
import ctypes
|
||||
import random
|
||||
from urllib.parse import quote
|
||||
|
||||
# 自定义 Base64 字符表
|
||||
# 标准 Base64: ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/
|
||||
# 小红书打乱顺序用于混淆
|
||||
BASE64_CHARS = list("ZmserbBoHQtNP+wOcza/LpngG8yJq42KWYj0DSfdikx3VT16IlUAFM97hECvuRX5")
|
||||
|
||||
# CRC32 查表
|
||||
CRC32_TABLE = [
|
||||
0, 1996959894, 3993919788, 2567524794, 124634137, 1886057615, 3915621685,
|
||||
2657392035, 249268274, 2044508324, 3772115230, 2547177864, 162941995,
|
||||
2125561021, 3887607047, 2428444049, 498536548, 1789927666, 4089016648,
|
||||
2227061214, 450548861, 1843258603, 4107580753, 2211677639, 325883990,
|
||||
1684777152, 4251122042, 2321926636, 335633487, 1661365465, 4195302755,
|
||||
2366115317, 997073096, 1281953886, 3579855332, 2724688242, 1006888145,
|
||||
1258607687, 3524101629, 2768942443, 901097722, 1119000684, 3686517206,
|
||||
2898065728, 853044451, 1172266101, 3705015759, 2882616665, 651767980,
|
||||
1373503546, 3369554304, 3218104598, 565507253, 1454621731, 3485111705,
|
||||
3099436303, 671266974, 1594198024, 3322730930, 2970347812, 795835527,
|
||||
1483230225, 3244367275, 3060149565, 1994146192, 31158534, 2563907772,
|
||||
4023717930, 1907459465, 112637215, 2680153253, 3904427059, 2013776290,
|
||||
251722036, 2517215374, 3775830040, 2137656763, 141376813, 2439277719,
|
||||
3865271297, 1802195444, 476864866, 2238001368, 4066508878, 1812370925,
|
||||
453092731, 2181625025, 4111451223, 1706088902, 314042704, 2344532202,
|
||||
4240017532, 1658658271, 366619977, 2362670323, 4224994405, 1303535960,
|
||||
984961486, 2747007092, 3569037538, 1256170817, 1037604311, 2765210733,
|
||||
3554079995, 1131014506, 879679996, 2909243462, 3663771856, 1141124467,
|
||||
855842277, 2852801631, 3708648649, 1342533948, 654459306, 3188396048,
|
||||
3373015174, 1466479909, 544179635, 3110523913, 3462522015, 1591671054,
|
||||
702138776, 2966460450, 3352799412, 1504918807, 783551873, 3082640443,
|
||||
3233442989, 3988292384, 2596254646, 62317068, 1957810842, 3939845945,
|
||||
2647816111, 81470997, 1943803523, 3814918930, 2489596804, 225274430,
|
||||
2053790376, 3826175755, 2466906013, 167816743, 2097651377, 4027552580,
|
||||
2265490386, 503444072, 1762050814, 4150417245, 2154129355, 426522225,
|
||||
1852507879, 4275313526, 2312317920, 282753626, 1742555852, 4189708143,
|
||||
2394877945, 397917763, 1622183637, 3604390888, 2714866558, 953729732,
|
||||
1340076626, 3518719985, 2797360999, 1068828381, 1219638859, 3624741850,
|
||||
2936675148, 906185462, 1090812512, 3747672003, 2825379669, 829329135,
|
||||
1181335161, 3412177804, 3160834842, 628085408, 1382605366, 3423369109,
|
||||
3138078467, 570562233, 1426400815, 3317316542, 2998733608, 733239954,
|
||||
1555261956, 3268935591, 3050360625, 752459403, 1541320221, 2607071920,
|
||||
3965973030, 1969922972, 40735498, 2617837225, 3943577151, 1913087877,
|
||||
83908371, 2512341634, 3803740692, 2075208622, 213261112, 2463272603,
|
||||
3855990285, 2094854071, 198958881, 2262029012, 4057260610, 1759359992,
|
||||
534414190, 2176718541, 4139329115, 1873836001, 414664567, 2282248934,
|
||||
4279200368, 1711684554, 285281116, 2405801727, 4167216745, 1634467795,
|
||||
376229701, 2685067896, 3608007406, 1308918612, 956543938, 2808555105,
|
||||
3495958263, 1231636301, 1047427035, 2932959818, 3654703836, 1088359270,
|
||||
936918000, 2847714899, 3736837829, 1202900863, 817233897, 3183342108,
|
||||
3401237130, 1404277552, 615818150, 3134207493, 3453421203, 1423857449,
|
||||
601450431, 3009837614, 3294710456, 1567103746, 711928724, 3020668471,
|
||||
3272380065, 1510334235, 755167117,
|
||||
]
|
||||
|
||||
|
||||
def _right_shift_unsigned(num: int, bit: int = 0) -> int:
|
||||
"""JavaScript 无符号右移 (>>>) 的 Python 实现"""
|
||||
val = ctypes.c_uint32(num).value >> bit
|
||||
MAX32INT = 4294967295
|
||||
return (val + (MAX32INT + 1)) % (2 * (MAX32INT + 1)) - MAX32INT - 1
|
||||
|
||||
|
||||
def mrc(e: str) -> int:
|
||||
"""CRC32 变体,用于 x-s-common 的 x9 字段"""
|
||||
o = -1
|
||||
for n in range(min(57, len(e))):
|
||||
o = CRC32_TABLE[(o & 255) ^ ord(e[n])] ^ _right_shift_unsigned(o, 8)
|
||||
return o ^ -1 ^ 3988292384
|
||||
|
||||
|
||||
def _triplet_to_base64(e: int) -> str:
|
||||
"""将 24 位整数转换为 4 个 Base64 字符"""
|
||||
return (
|
||||
BASE64_CHARS[(e >> 18) & 63]
|
||||
+ BASE64_CHARS[(e >> 12) & 63]
|
||||
+ BASE64_CHARS[(e >> 6) & 63]
|
||||
+ BASE64_CHARS[e & 63]
|
||||
)
|
||||
|
||||
|
||||
def _encode_chunk(data: list, start: int, end: int) -> str:
|
||||
"""编码数据块"""
|
||||
result = []
|
||||
for i in range(start, end, 3):
|
||||
c = ((data[i] << 16) & 0xFF0000) + ((data[i + 1] << 8) & 0xFF00) + (data[i + 2] & 0xFF)
|
||||
result.append(_triplet_to_base64(c))
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def encode_utf8(s: str) -> list:
|
||||
"""将字符串编码为 UTF-8 字节列表"""
|
||||
encoded = quote(s, safe="~()*!.'")
|
||||
result = []
|
||||
i = 0
|
||||
while i < len(encoded):
|
||||
if encoded[i] == "%":
|
||||
result.append(int(encoded[i + 1: i + 3], 16))
|
||||
i += 3
|
||||
else:
|
||||
result.append(ord(encoded[i]))
|
||||
i += 1
|
||||
return result
|
||||
|
||||
|
||||
def b64_encode(data: list) -> str:
|
||||
"""自定义 Base64 编码"""
|
||||
length = len(data)
|
||||
remainder = length % 3
|
||||
chunks = []
|
||||
|
||||
main_length = length - remainder
|
||||
for i in range(0, main_length, 16383):
|
||||
chunks.append(_encode_chunk(data, i, min(i + 16383, main_length)))
|
||||
|
||||
if remainder == 1:
|
||||
a = data[length - 1]
|
||||
chunks.append(BASE64_CHARS[a >> 2] + BASE64_CHARS[(a << 4) & 63] + "==")
|
||||
elif remainder == 2:
|
||||
a = (data[length - 2] << 8) + data[length - 1]
|
||||
chunks.append(
|
||||
BASE64_CHARS[a >> 10] + BASE64_CHARS[(a >> 4) & 63] + BASE64_CHARS[(a << 2) & 63] + "="
|
||||
)
|
||||
|
||||
return "".join(chunks)
|
||||
|
||||
|
||||
def get_trace_id() -> str:
|
||||
"""生成链路追踪 trace id"""
|
||||
return "".join(random.choice("abcdef0123456789") for _ in range(16))
|
||||
@@ -35,6 +35,9 @@ dependencies = [
|
||||
"wordcloud==1.9.3",
|
||||
"xhshow>=0.1.3",
|
||||
"pre-commit>=3.5.0",
|
||||
"openpyxl>=3.1.2",
|
||||
"pytest>=7.4.0",
|
||||
"pytest-asyncio>=0.21.0",
|
||||
]
|
||||
|
||||
[[tool.uv.index]]
|
||||
|
||||
@@ -25,4 +25,7 @@ alembic>=1.16.5
|
||||
asyncmy>=0.2.10
|
||||
sqlalchemy>=2.0.43
|
||||
motor>=3.3.0
|
||||
xhshow>=0.1.3
|
||||
xhshow>=0.1.3
|
||||
openpyxl>=3.1.2
|
||||
pytest>=7.4.0
|
||||
pytest-asyncio>=0.21.0
|
||||
@@ -38,13 +38,14 @@ class BiliStoreFactory:
|
||||
"json": BiliJsonStoreImplement,
|
||||
"sqlite": BiliSqliteStoreImplement,
|
||||
"mongodb": BiliMongoStoreImplement,
|
||||
"excel": BiliExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = BiliStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -365,3 +365,14 @@ class BiliMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[BiliMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class BiliExcelStoreImplement:
|
||||
"""B站Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="bilibili",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -37,13 +37,14 @@ class DouyinStoreFactory:
|
||||
"json": DouyinJsonStoreImplement,
|
||||
"sqlite": DouyinSqliteStoreImplement,
|
||||
"mongodb": DouyinMongoStoreImplement,
|
||||
"excel": DouyinExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = DouyinStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -264,3 +264,14 @@ class DouyinMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[DouyinMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class DouyinExcelStoreImplement:
|
||||
"""抖音Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="douyin",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
380
store/excel_store_base.py
Normal file
380
store/excel_store_base.py
Normal file
@@ -0,0 +1,380 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/excel_store_base.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
"""
|
||||
Excel Store Base Implementation
|
||||
Provides Excel export functionality for crawled data with formatted sheets
|
||||
"""
|
||||
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any
|
||||
from pathlib import Path
|
||||
|
||||
try:
|
||||
import openpyxl
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
|
||||
from openpyxl.utils import get_column_letter
|
||||
EXCEL_AVAILABLE = True
|
||||
except ImportError:
|
||||
EXCEL_AVAILABLE = False
|
||||
|
||||
from base.base_crawler import AbstractStore
|
||||
from tools import utils
|
||||
|
||||
|
||||
class ExcelStoreBase(AbstractStore):
|
||||
"""
|
||||
Base class for Excel storage implementation
|
||||
Provides formatted Excel export with multiple sheets for contents, comments, and creators
|
||||
Uses singleton pattern to maintain state across multiple store calls
|
||||
"""
|
||||
|
||||
# Class-level singleton management
|
||||
_instances: Dict[str, "ExcelStoreBase"] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls, platform: str, crawler_type: str) -> "ExcelStoreBase":
|
||||
"""
|
||||
Get or create a singleton instance for the given platform and crawler type
|
||||
|
||||
Args:
|
||||
platform: Platform name (xhs, dy, ks, etc.)
|
||||
crawler_type: Type of crawler (search, detail, creator)
|
||||
|
||||
Returns:
|
||||
ExcelStoreBase instance
|
||||
"""
|
||||
key = f"{platform}_{crawler_type}"
|
||||
with cls._lock:
|
||||
if key not in cls._instances:
|
||||
cls._instances[key] = cls(platform, crawler_type)
|
||||
return cls._instances[key]
|
||||
|
||||
@classmethod
|
||||
def flush_all(cls):
|
||||
"""
|
||||
Flush all Excel store instances and save to files
|
||||
Should be called at the end of crawler execution
|
||||
"""
|
||||
with cls._lock:
|
||||
for key, instance in cls._instances.items():
|
||||
try:
|
||||
instance.flush()
|
||||
utils.logger.info(f"[ExcelStoreBase] Flushed instance: {key}")
|
||||
except Exception as e:
|
||||
utils.logger.error(f"[ExcelStoreBase] Error flushing {key}: {e}")
|
||||
cls._instances.clear()
|
||||
|
||||
def __init__(self, platform: str, crawler_type: str = "search"):
|
||||
"""
|
||||
Initialize Excel store
|
||||
|
||||
Args:
|
||||
platform: Platform name (xhs, dy, ks, etc.)
|
||||
crawler_type: Type of crawler (search, detail, creator)
|
||||
"""
|
||||
if not EXCEL_AVAILABLE:
|
||||
raise ImportError(
|
||||
"openpyxl is required for Excel export. "
|
||||
"Install it with: pip install openpyxl"
|
||||
)
|
||||
|
||||
super().__init__()
|
||||
self.platform = platform
|
||||
self.crawler_type = crawler_type
|
||||
|
||||
# Create data directory
|
||||
self.data_dir = Path("data") / platform
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize workbook
|
||||
self.workbook = openpyxl.Workbook()
|
||||
self.workbook.remove(self.workbook.active) # Remove default sheet
|
||||
|
||||
# Create sheets
|
||||
self.contents_sheet = self.workbook.create_sheet("Contents")
|
||||
self.comments_sheet = self.workbook.create_sheet("Comments")
|
||||
self.creators_sheet = self.workbook.create_sheet("Creators")
|
||||
|
||||
# Track if headers are written
|
||||
self.contents_headers_written = False
|
||||
self.comments_headers_written = False
|
||||
self.creators_headers_written = False
|
||||
self.contacts_headers_written = False
|
||||
self.dynamics_headers_written = False
|
||||
|
||||
# Optional sheets for platforms that need them (e.g., Bilibili)
|
||||
self.contacts_sheet = None
|
||||
self.dynamics_sheet = None
|
||||
|
||||
# Generate filename
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
self.filename = self.data_dir / f"{platform}_{crawler_type}_{timestamp}.xlsx"
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Initialized Excel export to: {self.filename}")
|
||||
|
||||
def _apply_header_style(self, sheet, row_num: int = 1):
|
||||
"""
|
||||
Apply formatting to header row
|
||||
|
||||
Args:
|
||||
sheet: Worksheet object
|
||||
row_num: Row number for headers (default: 1)
|
||||
"""
|
||||
header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
|
||||
header_font = Font(bold=True, color="FFFFFF", size=11)
|
||||
header_alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
|
||||
border = Border(
|
||||
left=Side(style='thin'),
|
||||
right=Side(style='thin'),
|
||||
top=Side(style='thin'),
|
||||
bottom=Side(style='thin')
|
||||
)
|
||||
|
||||
for cell in sheet[row_num]:
|
||||
cell.fill = header_fill
|
||||
cell.font = header_font
|
||||
cell.alignment = header_alignment
|
||||
cell.border = border
|
||||
|
||||
def _auto_adjust_column_width(self, sheet):
|
||||
"""
|
||||
Auto-adjust column widths based on content
|
||||
|
||||
Args:
|
||||
sheet: Worksheet object
|
||||
"""
|
||||
for column in sheet.columns:
|
||||
max_length = 0
|
||||
column_letter = get_column_letter(column[0].column)
|
||||
|
||||
for cell in column:
|
||||
try:
|
||||
if cell.value:
|
||||
max_length = max(max_length, len(str(cell.value)))
|
||||
except (TypeError, AttributeError):
|
||||
pass
|
||||
|
||||
# Set width with min/max constraints
|
||||
adjusted_width = min(max(max_length + 2, 10), 50)
|
||||
sheet.column_dimensions[column_letter].width = adjusted_width
|
||||
|
||||
def _write_headers(self, sheet, headers: List[str]):
|
||||
"""
|
||||
Write headers to sheet
|
||||
|
||||
Args:
|
||||
sheet: Worksheet object
|
||||
headers: List of header names
|
||||
"""
|
||||
for col_num, header in enumerate(headers, 1):
|
||||
sheet.cell(row=1, column=col_num, value=header)
|
||||
|
||||
self._apply_header_style(sheet)
|
||||
|
||||
def _write_row(self, sheet, data: Dict[str, Any], headers: List[str]):
|
||||
"""
|
||||
Write data row to sheet
|
||||
|
||||
Args:
|
||||
sheet: Worksheet object
|
||||
data: Data dictionary
|
||||
headers: List of header names (defines column order)
|
||||
"""
|
||||
row_num = sheet.max_row + 1
|
||||
|
||||
for col_num, header in enumerate(headers, 1):
|
||||
value = data.get(header, "")
|
||||
|
||||
# Handle different data types
|
||||
if isinstance(value, (list, dict)):
|
||||
value = str(value)
|
||||
elif value is None:
|
||||
value = ""
|
||||
|
||||
cell = sheet.cell(row=row_num, column=col_num, value=value)
|
||||
|
||||
# Apply basic formatting
|
||||
cell.alignment = Alignment(vertical="top", wrap_text=True)
|
||||
cell.border = Border(
|
||||
left=Side(style='thin'),
|
||||
right=Side(style='thin'),
|
||||
top=Side(style='thin'),
|
||||
bottom=Side(style='thin')
|
||||
)
|
||||
|
||||
async def store_content(self, content_item: Dict):
|
||||
"""
|
||||
Store content data to Excel
|
||||
|
||||
Args:
|
||||
content_item: Content data dictionary
|
||||
"""
|
||||
# Define headers (customize based on platform)
|
||||
headers = list(content_item.keys())
|
||||
|
||||
# Write headers if first time
|
||||
if not self.contents_headers_written:
|
||||
self._write_headers(self.contents_sheet, headers)
|
||||
self.contents_headers_written = True
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.contents_sheet, content_item, headers)
|
||||
|
||||
# Get ID from various possible field names
|
||||
content_id = content_item.get('note_id') or content_item.get('aweme_id') or content_item.get('video_id') or content_item.get('content_id') or 'N/A'
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored content to Excel: {content_id}")
|
||||
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
"""
|
||||
Store comment data to Excel
|
||||
|
||||
Args:
|
||||
comment_item: Comment data dictionary
|
||||
"""
|
||||
# Define headers
|
||||
headers = list(comment_item.keys())
|
||||
|
||||
# Write headers if first time
|
||||
if not self.comments_headers_written:
|
||||
self._write_headers(self.comments_sheet, headers)
|
||||
self.comments_headers_written = True
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.comments_sheet, comment_item, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored comment to Excel: {comment_item.get('comment_id', 'N/A')}")
|
||||
|
||||
async def store_creator(self, creator: Dict):
|
||||
"""
|
||||
Store creator data to Excel
|
||||
|
||||
Args:
|
||||
creator: Creator data dictionary
|
||||
"""
|
||||
# Define headers
|
||||
headers = list(creator.keys())
|
||||
|
||||
# Write headers if first time
|
||||
if not self.creators_headers_written:
|
||||
self._write_headers(self.creators_sheet, headers)
|
||||
self.creators_headers_written = True
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.creators_sheet, creator, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored creator to Excel: {creator.get('user_id', 'N/A')}")
|
||||
|
||||
async def store_contact(self, contact_item: Dict):
|
||||
"""
|
||||
Store contact data to Excel (for platforms like Bilibili)
|
||||
|
||||
Args:
|
||||
contact_item: Contact data dictionary
|
||||
"""
|
||||
# Create contacts sheet if not exists
|
||||
if self.contacts_sheet is None:
|
||||
self.contacts_sheet = self.workbook.create_sheet("Contacts")
|
||||
|
||||
# Define headers
|
||||
headers = list(contact_item.keys())
|
||||
|
||||
# Write headers if first time
|
||||
if not self.contacts_headers_written:
|
||||
self._write_headers(self.contacts_sheet, headers)
|
||||
self.contacts_headers_written = True
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.contacts_sheet, contact_item, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored contact to Excel: up_id={contact_item.get('up_id', 'N/A')}, fan_id={contact_item.get('fan_id', 'N/A')}")
|
||||
|
||||
async def store_dynamic(self, dynamic_item: Dict):
|
||||
"""
|
||||
Store dynamic data to Excel (for platforms like Bilibili)
|
||||
|
||||
Args:
|
||||
dynamic_item: Dynamic data dictionary
|
||||
"""
|
||||
# Create dynamics sheet if not exists
|
||||
if self.dynamics_sheet is None:
|
||||
self.dynamics_sheet = self.workbook.create_sheet("Dynamics")
|
||||
|
||||
# Define headers
|
||||
headers = list(dynamic_item.keys())
|
||||
|
||||
# Write headers if first time
|
||||
if not self.dynamics_headers_written:
|
||||
self._write_headers(self.dynamics_sheet, headers)
|
||||
self.dynamics_headers_written = True
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.dynamics_sheet, dynamic_item, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored dynamic to Excel: {dynamic_item.get('dynamic_id', 'N/A')}")
|
||||
|
||||
def flush(self):
|
||||
"""
|
||||
Save workbook to file
|
||||
"""
|
||||
try:
|
||||
# Auto-adjust column widths for all sheets
|
||||
self._auto_adjust_column_width(self.contents_sheet)
|
||||
self._auto_adjust_column_width(self.comments_sheet)
|
||||
self._auto_adjust_column_width(self.creators_sheet)
|
||||
if self.contacts_sheet is not None:
|
||||
self._auto_adjust_column_width(self.contacts_sheet)
|
||||
if self.dynamics_sheet is not None:
|
||||
self._auto_adjust_column_width(self.dynamics_sheet)
|
||||
|
||||
# Remove empty sheets (only header row)
|
||||
if self.contents_sheet.max_row == 1:
|
||||
self.workbook.remove(self.contents_sheet)
|
||||
if self.comments_sheet.max_row == 1:
|
||||
self.workbook.remove(self.comments_sheet)
|
||||
if self.creators_sheet.max_row == 1:
|
||||
self.workbook.remove(self.creators_sheet)
|
||||
if self.contacts_sheet is not None and self.contacts_sheet.max_row == 1:
|
||||
self.workbook.remove(self.contacts_sheet)
|
||||
if self.dynamics_sheet is not None and self.dynamics_sheet.max_row == 1:
|
||||
self.workbook.remove(self.dynamics_sheet)
|
||||
|
||||
# Check if there are any sheets left
|
||||
if len(self.workbook.sheetnames) == 0:
|
||||
utils.logger.info(f"[ExcelStoreBase] No data to save, skipping file creation: {self.filename}")
|
||||
return
|
||||
|
||||
# Save workbook
|
||||
self.workbook.save(self.filename)
|
||||
utils.logger.info(f"[ExcelStoreBase] Excel file saved successfully: {self.filename}")
|
||||
|
||||
except Exception as e:
|
||||
utils.logger.error(f"[ExcelStoreBase] Error saving Excel file: {e}")
|
||||
raise
|
||||
@@ -37,6 +37,7 @@ class KuaishouStoreFactory:
|
||||
"json": KuaishouJsonStoreImplement,
|
||||
"sqlite": KuaishouSqliteStoreImplement,
|
||||
"mongodb": KuaishouMongoStoreImplement,
|
||||
"excel": KuaishouExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -44,7 +45,7 @@ class KuaishouStoreFactory:
|
||||
store_class = KuaishouStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError(
|
||||
"[KuaishouStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
"[KuaishouStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -226,3 +226,14 @@ class KuaishouMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[KuaishouMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class KuaishouExcelStoreImplement:
|
||||
"""快手Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="kuaishou",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -34,6 +34,7 @@ class TieBaStoreFactory:
|
||||
"json": TieBaJsonStoreImplement,
|
||||
"sqlite": TieBaSqliteStoreImplement,
|
||||
"mongodb": TieBaMongoStoreImplement,
|
||||
"excel": TieBaExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -41,7 +42,7 @@ class TieBaStoreFactory:
|
||||
store_class = TieBaStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError(
|
||||
"[TieBaStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
"[TieBaStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -258,3 +258,14 @@ class TieBaMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[TieBaMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class TieBaExcelStoreImplement:
|
||||
"""贴吧Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="tieba",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -38,13 +38,14 @@ class WeibostoreFactory:
|
||||
"json": WeiboJsonStoreImplement,
|
||||
"sqlite": WeiboSqliteStoreImplement,
|
||||
"mongodb": WeiboMongoStoreImplement,
|
||||
"excel": WeiboExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = WeibostoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -280,3 +280,14 @@ class WeiboMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[WeiboMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class WeiboExcelStoreImplement:
|
||||
"""微博Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="weibo",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -37,13 +37,14 @@ class XhsStoreFactory:
|
||||
"json": XhsJsonStoreImplement,
|
||||
"sqlite": XhsSqliteStoreImplement,
|
||||
"mongodb": XhsMongoStoreImplement,
|
||||
"excel": XhsExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = XhsStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[XhsStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[XhsStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ from tools.time_util import get_current_timestamp
|
||||
from var import crawler_type_var
|
||||
from database.mongodb_store_base import MongoDBStoreBase
|
||||
from tools import utils
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
|
||||
class XhsCsvStoreImplement(AbstractStore):
|
||||
def __init__(self, **kwargs):
|
||||
@@ -336,3 +337,14 @@ class XhsMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[XhsMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class XhsExcelStoreImplement:
|
||||
"""小红书Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="xhs",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -28,7 +28,8 @@ from ._store_impl import (ZhihuCsvStoreImplement,
|
||||
ZhihuDbStoreImplement,
|
||||
ZhihuJsonStoreImplement,
|
||||
ZhihuSqliteStoreImplement,
|
||||
ZhihuMongoStoreImplement)
|
||||
ZhihuMongoStoreImplement,
|
||||
ZhihuExcelStoreImplement)
|
||||
from tools import utils
|
||||
from var import source_keyword_var
|
||||
|
||||
@@ -40,13 +41,14 @@ class ZhihuStoreFactory:
|
||||
"json": ZhihuJsonStoreImplement,
|
||||
"sqlite": ZhihuSqliteStoreImplement,
|
||||
"mongodb": ZhihuMongoStoreImplement,
|
||||
"excel": ZhihuExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = ZhihuStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
async def batch_update_zhihu_contents(contents: List[ZhihuContent]):
|
||||
|
||||
@@ -257,3 +257,14 @@ class ZhihuMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[ZhihuMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class ZhihuExcelStoreImplement:
|
||||
"""知乎Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="zhihu",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
2
tests/__init__.py
Normal file
2
tests/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# MediaCrawler Test Suite
|
||||
81
tests/conftest.py
Normal file
81
tests/conftest.py
Normal file
@@ -0,0 +1,81 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Pytest configuration and shared fixtures
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add project root to Python path
|
||||
project_root = Path(__file__).parent.parent
|
||||
sys.path.insert(0, str(project_root))
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def project_root_path():
|
||||
"""Return project root path"""
|
||||
return project_root
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_xhs_note():
|
||||
"""Sample Xiaohongshu note data for testing"""
|
||||
return {
|
||||
"note_id": "test_note_123",
|
||||
"type": "normal",
|
||||
"title": "测试标题 Test Title",
|
||||
"desc": "这是一个测试描述 This is a test description",
|
||||
"video_url": "",
|
||||
"time": 1700000000,
|
||||
"last_update_time": 1700000000,
|
||||
"user_id": "user_123",
|
||||
"nickname": "测试用户",
|
||||
"avatar": "https://example.com/avatar.jpg",
|
||||
"liked_count": 100,
|
||||
"collected_count": 50,
|
||||
"comment_count": 25,
|
||||
"share_count": 10,
|
||||
"ip_location": "上海",
|
||||
"image_list": "https://example.com/img1.jpg,https://example.com/img2.jpg",
|
||||
"tag_list": "测试,编程,Python",
|
||||
"note_url": "https://www.xiaohongshu.com/explore/test_note_123",
|
||||
"source_keyword": "测试关键词",
|
||||
"xsec_token": "test_token_123"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_xhs_comment():
|
||||
"""Sample Xiaohongshu comment data for testing"""
|
||||
return {
|
||||
"comment_id": "comment_123",
|
||||
"create_time": 1700000000,
|
||||
"ip_location": "北京",
|
||||
"note_id": "test_note_123",
|
||||
"content": "这是一条测试评论 This is a test comment",
|
||||
"user_id": "user_456",
|
||||
"nickname": "评论用户",
|
||||
"avatar": "https://example.com/avatar2.jpg",
|
||||
"sub_comment_count": 5,
|
||||
"pictures": "",
|
||||
"parent_comment_id": 0,
|
||||
"like_count": 15
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_xhs_creator():
|
||||
"""Sample Xiaohongshu creator data for testing"""
|
||||
return {
|
||||
"user_id": "creator_123",
|
||||
"nickname": "创作者名称",
|
||||
"gender": "女",
|
||||
"avatar": "https://example.com/creator_avatar.jpg",
|
||||
"desc": "这是创作者简介",
|
||||
"ip_location": "广州",
|
||||
"follows": 500,
|
||||
"fans": 10000,
|
||||
"interaction": 50000,
|
||||
"tag_list": '{"profession": "设计师", "interest": "摄影"}'
|
||||
}
|
||||
275
tests/test_excel_store.py
Normal file
275
tests/test_excel_store.py
Normal file
@@ -0,0 +1,275 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/tests/test_excel_store.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
"""
|
||||
Unit tests for Excel export functionality
|
||||
"""
|
||||
|
||||
import pytest
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
try:
|
||||
import openpyxl
|
||||
EXCEL_AVAILABLE = True
|
||||
except ImportError:
|
||||
EXCEL_AVAILABLE = False
|
||||
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
|
||||
|
||||
@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
|
||||
class TestExcelStoreBase:
|
||||
"""Test cases for ExcelStoreBase"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_singleton_state(self):
|
||||
"""Clear singleton state before and after each test"""
|
||||
ExcelStoreBase._instances.clear()
|
||||
yield
|
||||
ExcelStoreBase._instances.clear()
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir(self):
|
||||
"""Create temporary directory for test files"""
|
||||
temp_path = tempfile.mkdtemp()
|
||||
yield temp_path
|
||||
# Cleanup
|
||||
shutil.rmtree(temp_path, ignore_errors=True)
|
||||
|
||||
@pytest.fixture
|
||||
def excel_store(self, temp_dir, monkeypatch):
|
||||
"""Create ExcelStoreBase instance for testing"""
|
||||
# Monkey patch data directory
|
||||
monkeypatch.chdir(temp_dir)
|
||||
store = ExcelStoreBase(platform="test", crawler_type="search")
|
||||
yield store
|
||||
# Cleanup is handled by temp_dir fixture
|
||||
|
||||
def test_initialization(self, excel_store):
|
||||
"""Test Excel store initialization"""
|
||||
assert excel_store.platform == "test"
|
||||
assert excel_store.crawler_type == "search"
|
||||
assert excel_store.workbook is not None
|
||||
assert excel_store.contents_sheet is not None
|
||||
assert excel_store.comments_sheet is not None
|
||||
assert excel_store.creators_sheet is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_content(self, excel_store):
|
||||
"""Test storing content data"""
|
||||
content_item = {
|
||||
"note_id": "test123",
|
||||
"title": "Test Title",
|
||||
"desc": "Test Description",
|
||||
"user_id": "user456",
|
||||
"nickname": "TestUser",
|
||||
"liked_count": 100,
|
||||
"comment_count": 50
|
||||
}
|
||||
|
||||
await excel_store.store_content(content_item)
|
||||
|
||||
# Verify data was written
|
||||
assert excel_store.contents_sheet.max_row == 2 # Header + 1 data row
|
||||
assert excel_store.contents_headers_written is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_comment(self, excel_store):
|
||||
"""Test storing comment data"""
|
||||
comment_item = {
|
||||
"comment_id": "comment123",
|
||||
"note_id": "note456",
|
||||
"content": "Great post!",
|
||||
"user_id": "user789",
|
||||
"nickname": "Commenter",
|
||||
"like_count": 10
|
||||
}
|
||||
|
||||
await excel_store.store_comment(comment_item)
|
||||
|
||||
# Verify data was written
|
||||
assert excel_store.comments_sheet.max_row == 2 # Header + 1 data row
|
||||
assert excel_store.comments_headers_written is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_creator(self, excel_store):
|
||||
"""Test storing creator data"""
|
||||
creator_item = {
|
||||
"user_id": "creator123",
|
||||
"nickname": "Creator Name",
|
||||
"fans": 10000,
|
||||
"follows": 500,
|
||||
"interaction": 50000
|
||||
}
|
||||
|
||||
await excel_store.store_creator(creator_item)
|
||||
|
||||
# Verify data was written
|
||||
assert excel_store.creators_sheet.max_row == 2 # Header + 1 data row
|
||||
assert excel_store.creators_headers_written is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_items(self, excel_store):
|
||||
"""Test storing multiple items"""
|
||||
# Store multiple content items
|
||||
for i in range(5):
|
||||
await excel_store.store_content({
|
||||
"note_id": f"note{i}",
|
||||
"title": f"Title {i}",
|
||||
"liked_count": i * 10
|
||||
})
|
||||
|
||||
# Verify all items were stored
|
||||
assert excel_store.contents_sheet.max_row == 6 # Header + 5 data rows
|
||||
|
||||
def test_flush(self, excel_store):
|
||||
"""Test flushing data to file"""
|
||||
# Add some test data
|
||||
asyncio.run(excel_store.store_content({
|
||||
"note_id": "test",
|
||||
"title": "Test"
|
||||
}))
|
||||
|
||||
# Flush to file
|
||||
excel_store.flush()
|
||||
|
||||
# Verify file was created
|
||||
assert excel_store.filename.exists()
|
||||
|
||||
# Verify file can be opened
|
||||
wb = openpyxl.load_workbook(excel_store.filename)
|
||||
assert "Contents" in wb.sheetnames
|
||||
wb.close()
|
||||
|
||||
def test_header_formatting(self, excel_store):
|
||||
"""Test header row formatting"""
|
||||
asyncio.run(excel_store.store_content({"note_id": "test", "title": "Test"}))
|
||||
|
||||
# Check header formatting
|
||||
header_cell = excel_store.contents_sheet.cell(row=1, column=1)
|
||||
assert header_cell.font.bold is True
|
||||
# RGB color may have different prefix (00 or FF), check the actual color part
|
||||
assert header_cell.fill.start_color.rgb[-6:] == "366092"
|
||||
|
||||
def test_empty_sheets_removed(self, excel_store):
|
||||
"""Test that empty sheets are removed on flush"""
|
||||
# Only add content, leave comments and creators empty
|
||||
asyncio.run(excel_store.store_content({"note_id": "test"}))
|
||||
|
||||
excel_store.flush()
|
||||
|
||||
# Reload workbook
|
||||
wb = openpyxl.load_workbook(excel_store.filename)
|
||||
|
||||
# Only Contents sheet should exist
|
||||
assert "Contents" in wb.sheetnames
|
||||
assert "Comments" not in wb.sheetnames
|
||||
assert "Creators" not in wb.sheetnames
|
||||
wb.close()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
|
||||
def test_excel_import_availability():
|
||||
"""Test that openpyxl is available"""
|
||||
assert EXCEL_AVAILABLE is True
|
||||
import openpyxl
|
||||
assert openpyxl is not None
|
||||
|
||||
|
||||
@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
|
||||
class TestSingletonPattern:
|
||||
"""Test singleton pattern for Excel store"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_and_teardown(self, tmp_path, monkeypatch):
|
||||
"""Setup and teardown for each test"""
|
||||
# Change to temp directory
|
||||
monkeypatch.chdir(tmp_path)
|
||||
# Clear singleton instances before each test
|
||||
ExcelStoreBase._instances.clear()
|
||||
yield
|
||||
# Cleanup after test
|
||||
ExcelStoreBase._instances.clear()
|
||||
|
||||
def test_get_instance_returns_same_instance(self):
|
||||
"""Test that get_instance returns the same instance for same parameters"""
|
||||
instance1 = ExcelStoreBase.get_instance("xhs", "search")
|
||||
instance2 = ExcelStoreBase.get_instance("xhs", "search")
|
||||
|
||||
assert instance1 is instance2
|
||||
|
||||
def test_get_instance_different_params_returns_different_instances(self):
|
||||
"""Test that different parameters return different instances"""
|
||||
instance1 = ExcelStoreBase.get_instance("xhs", "search")
|
||||
instance2 = ExcelStoreBase.get_instance("xhs", "detail")
|
||||
instance3 = ExcelStoreBase.get_instance("douyin", "search")
|
||||
|
||||
assert instance1 is not instance2
|
||||
assert instance1 is not instance3
|
||||
assert instance2 is not instance3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_singleton_preserves_data(self):
|
||||
"""Test that singleton pattern preserves data across multiple calls"""
|
||||
# First call - store some content
|
||||
store1 = ExcelStoreBase.get_instance("test", "search")
|
||||
await store1.store_content({"note_id": "note1", "title": "Title 1"})
|
||||
|
||||
# Second call - should get same instance with data
|
||||
store2 = ExcelStoreBase.get_instance("test", "search")
|
||||
await store2.store_content({"note_id": "note2", "title": "Title 2"})
|
||||
|
||||
# Verify both items are in the same workbook
|
||||
assert store1 is store2
|
||||
assert store1.contents_sheet.max_row == 3 # Header + 2 data rows
|
||||
|
||||
def test_flush_all_saves_all_instances(self, tmp_path):
|
||||
"""Test that flush_all saves all instances"""
|
||||
# Create multiple instances
|
||||
store1 = ExcelStoreBase.get_instance("platform1", "search")
|
||||
store2 = ExcelStoreBase.get_instance("platform2", "search")
|
||||
|
||||
# Add data to each
|
||||
asyncio.run(store1.store_content({"note_id": "note1"}))
|
||||
asyncio.run(store2.store_content({"note_id": "note2"}))
|
||||
|
||||
# Flush all
|
||||
ExcelStoreBase.flush_all()
|
||||
|
||||
# Verify instances are cleared
|
||||
assert len(ExcelStoreBase._instances) == 0
|
||||
|
||||
# Verify files were created
|
||||
assert store1.filename.exists()
|
||||
assert store2.filename.exists()
|
||||
|
||||
def test_flush_all_clears_instances(self):
|
||||
"""Test that flush_all clears the instances dictionary"""
|
||||
# Create an instance
|
||||
ExcelStoreBase.get_instance("test", "search")
|
||||
assert len(ExcelStoreBase._instances) == 1
|
||||
|
||||
# Flush all
|
||||
ExcelStoreBase.flush_all()
|
||||
|
||||
# Verify instances are cleared
|
||||
assert len(ExcelStoreBase._instances) == 0
|
||||
75
tests/test_store_factory.py
Normal file
75
tests/test_store_factory.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Unit tests for Store Factory functionality
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from store.xhs import XhsStoreFactory
|
||||
from store.xhs._store_impl import (
|
||||
XhsCsvStoreImplement,
|
||||
XhsJsonStoreImplement,
|
||||
XhsDbStoreImplement,
|
||||
XhsSqliteStoreImplement,
|
||||
XhsMongoStoreImplement,
|
||||
XhsExcelStoreImplement
|
||||
)
|
||||
|
||||
|
||||
class TestXhsStoreFactory:
|
||||
"""Test cases for XhsStoreFactory"""
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'csv')
|
||||
def test_create_csv_store(self):
|
||||
"""Test creating CSV store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsCsvStoreImplement)
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'json')
|
||||
def test_create_json_store(self):
|
||||
"""Test creating JSON store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsJsonStoreImplement)
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'db')
|
||||
def test_create_db_store(self):
|
||||
"""Test creating database store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsDbStoreImplement)
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'sqlite')
|
||||
def test_create_sqlite_store(self):
|
||||
"""Test creating SQLite store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsSqliteStoreImplement)
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'mongodb')
|
||||
def test_create_mongodb_store(self):
|
||||
"""Test creating MongoDB store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsMongoStoreImplement)
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'excel')
|
||||
def test_create_excel_store(self):
|
||||
"""Test creating Excel store"""
|
||||
# ContextVar cannot be mocked, so we test with actual value
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsExcelStoreImplement)
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'invalid')
|
||||
def test_invalid_store_option(self):
|
||||
"""Test that invalid store option raises ValueError"""
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
XhsStoreFactory.create_store()
|
||||
|
||||
assert "Invalid save option" in str(exc_info.value)
|
||||
|
||||
def test_all_stores_registered(self):
|
||||
"""Test that all store types are registered"""
|
||||
expected_stores = ['csv', 'json', 'db', 'sqlite', 'mongodb', 'excel']
|
||||
|
||||
for store_type in expected_stores:
|
||||
assert store_type in XhsStoreFactory.STORES
|
||||
|
||||
assert len(XhsStoreFactory.STORES) == len(expected_stores)
|
||||
78
uv.lock
generated
78
uv.lock
generated
@@ -171,9 +171,9 @@ wheels = [
|
||||
name = "cfgv"
|
||||
version = "3.4.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -376,6 +376,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "et-xmlfile"
|
||||
version = "2.0.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastapi"
|
||||
version = "0.110.2"
|
||||
@@ -513,6 +522,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.3.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jieba"
|
||||
version = "0.42.1"
|
||||
@@ -777,6 +795,7 @@ dependencies = [
|
||||
{ name = "matplotlib" },
|
||||
{ name = "motor" },
|
||||
{ name = "opencv-python" },
|
||||
{ name = "openpyxl" },
|
||||
{ name = "pandas" },
|
||||
{ name = "parsel" },
|
||||
{ name = "pillow" },
|
||||
@@ -785,6 +804,8 @@ dependencies = [
|
||||
{ name = "pydantic" },
|
||||
{ name = "pyexecjs" },
|
||||
{ name = "pyhumps" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "redis" },
|
||||
{ name = "requests" },
|
||||
@@ -810,6 +831,7 @@ requires-dist = [
|
||||
{ name = "matplotlib", specifier = "==3.9.0" },
|
||||
{ name = "motor", specifier = ">=3.3.0" },
|
||||
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
||||
{ name = "openpyxl", specifier = ">=3.1.2" },
|
||||
{ name = "pandas", specifier = "==2.2.3" },
|
||||
{ name = "parsel", specifier = "==1.9.1" },
|
||||
{ name = "pillow", specifier = "==9.5.0" },
|
||||
@@ -818,6 +840,8 @@ requires-dist = [
|
||||
{ name = "pydantic", specifier = "==2.5.2" },
|
||||
{ name = "pyexecjs", specifier = "==1.5.1" },
|
||||
{ name = "pyhumps", specifier = ">=3.8.0" },
|
||||
{ name = "pytest", specifier = ">=7.4.0" },
|
||||
{ name = "pytest-asyncio", specifier = ">=0.21.0" },
|
||||
{ name = "python-dotenv", specifier = "==1.0.1" },
|
||||
{ name = "redis", specifier = "~=4.6.0" },
|
||||
{ name = "requests", specifier = "==2.32.3" },
|
||||
@@ -925,6 +949,18 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openpyxl"
|
||||
version = "3.1.5"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "et-xmlfile" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "25.0"
|
||||
@@ -1040,6 +1076,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/0f/c8dcadb2f0dcfdab6052d5ecf57ccf19b439c0adc29fc510ed0830349345/playwright-1.45.0-py3-none-win_amd64.whl", hash = "sha256:701db496928429aec103739e48e3110806bd5cf49456cc95b89f28e1abda71da", size = 29692683 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.6.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pre-commit"
|
||||
version = "4.4.0"
|
||||
@@ -1234,6 +1279,35 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "9.0.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "iniconfig" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pluggy" },
|
||||
{ name = "pygments" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/56/f013048ac4bc4c1d9be45afd4ab209ea62822fb1598f40687e6bf45dcea4/pytest-9.0.1.tar.gz", hash = "sha256:3e9c069ea73583e255c3b21cf46b8d3c56f6e3a1a8f6da94ccb0fcf57b9d73c8", size = 1564125 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/8b/6300fb80f858cda1c51ffa17075df5d846757081d11ab4aa35cef9e6258b/pytest-9.0.1-py3-none-any.whl", hash = "sha256:67be0030d194df2dfa7b556f2e56fb3c3315bd5c8822c6951162b92b32ce7dad", size = 373668 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-asyncio"
|
||||
version = "1.3.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "pytest" },
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
|
||||
Reference in New Issue
Block a user