mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-06 23:21:33 +08:00
feat: excel store with other platform
This commit is contained in:
11
README.md
11
README.md
@@ -228,14 +228,15 @@ python main.py --help
|
||||
|
||||
### 使用示例:
|
||||
```shell
|
||||
# 使用 Excel 存储数据(推荐用于数据分析)✨ 新功能
|
||||
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
|
||||
|
||||
# 初始化 SQLite 数据库(使用'--init_db'时不需要携带其他optional)
|
||||
# 初始化 SQLite 数据库
|
||||
uv run main.py --init_db sqlite
|
||||
# 使用 SQLite 存储数据(推荐个人用户使用)
|
||||
# 使用 SQLite 存储数据
|
||||
uv run main.py --platform xhs --lt qrcode --type search --save_data_option sqlite
|
||||
|
||||
# 使用 Excel 存储数据(推荐用于数据分析)
|
||||
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
|
||||
```
|
||||
|
||||
```shell
|
||||
# 初始化 MySQL 数据库
|
||||
uv run main.py --init_db mysql
|
||||
|
||||
@@ -71,6 +71,8 @@ class SaveDataOptionEnum(str, Enum):
|
||||
DB = "db"
|
||||
JSON = "json"
|
||||
SQLITE = "sqlite"
|
||||
MONGODB = "mongodb"
|
||||
EXCEL = "excel"
|
||||
|
||||
|
||||
class InitDbOptionEnum(str, Enum):
|
||||
@@ -199,7 +201,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
|
||||
SaveDataOptionEnum,
|
||||
typer.Option(
|
||||
"--save_data_option",
|
||||
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库)",
|
||||
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库 | mongodb=MongoDB数据库 | excel=Excel文件)",
|
||||
rich_help_panel="存储配置",
|
||||
),
|
||||
] = _coerce_enum(
|
||||
|
||||
11
main.py
11
main.py
@@ -87,14 +87,11 @@ async def main():
|
||||
# Flush Excel data if using Excel export
|
||||
if config.SAVE_DATA_OPTION == "excel":
|
||||
try:
|
||||
# Get the store instance and flush data
|
||||
from store.xhs import XhsStoreFactory
|
||||
store = XhsStoreFactory.create_store()
|
||||
if hasattr(store, 'flush'):
|
||||
store.flush()
|
||||
print(f"[Main] Excel file saved successfully")
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
ExcelStoreBase.flush_all()
|
||||
print("[Main] Excel files saved successfully")
|
||||
except Exception as e:
|
||||
print(f"Error flushing Excel data: {e}")
|
||||
print(f"[Main] Error flushing Excel data: {e}")
|
||||
|
||||
# Generate wordcloud after crawling is complete
|
||||
# Only for JSON save mode
|
||||
|
||||
@@ -38,13 +38,14 @@ class BiliStoreFactory:
|
||||
"json": BiliJsonStoreImplement,
|
||||
"sqlite": BiliSqliteStoreImplement,
|
||||
"mongodb": BiliMongoStoreImplement,
|
||||
"excel": BiliExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = BiliStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -365,3 +365,14 @@ class BiliMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[BiliMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class BiliExcelStoreImplement:
|
||||
"""B站Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="bilibili",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -37,13 +37,14 @@ class DouyinStoreFactory:
|
||||
"json": DouyinJsonStoreImplement,
|
||||
"sqlite": DouyinSqliteStoreImplement,
|
||||
"mongodb": DouyinMongoStoreImplement,
|
||||
"excel": DouyinExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = DouyinStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -264,3 +264,14 @@ class DouyinMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[DouyinMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class DouyinExcelStoreImplement:
|
||||
"""抖音Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="douyin",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -2,10 +2,20 @@
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/excel_store_base.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -21,7 +31,7 @@ Excel Store Base Implementation
|
||||
Provides Excel export functionality for crawled data with formatted sheets
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Any
|
||||
from pathlib import Path
|
||||
@@ -42,12 +52,50 @@ class ExcelStoreBase(AbstractStore):
|
||||
"""
|
||||
Base class for Excel storage implementation
|
||||
Provides formatted Excel export with multiple sheets for contents, comments, and creators
|
||||
Uses singleton pattern to maintain state across multiple store calls
|
||||
"""
|
||||
|
||||
|
||||
# Class-level singleton management
|
||||
_instances: Dict[str, "ExcelStoreBase"] = {}
|
||||
_lock = threading.Lock()
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls, platform: str, crawler_type: str) -> "ExcelStoreBase":
|
||||
"""
|
||||
Get or create a singleton instance for the given platform and crawler type
|
||||
|
||||
Args:
|
||||
platform: Platform name (xhs, dy, ks, etc.)
|
||||
crawler_type: Type of crawler (search, detail, creator)
|
||||
|
||||
Returns:
|
||||
ExcelStoreBase instance
|
||||
"""
|
||||
key = f"{platform}_{crawler_type}"
|
||||
with cls._lock:
|
||||
if key not in cls._instances:
|
||||
cls._instances[key] = cls(platform, crawler_type)
|
||||
return cls._instances[key]
|
||||
|
||||
@classmethod
|
||||
def flush_all(cls):
|
||||
"""
|
||||
Flush all Excel store instances and save to files
|
||||
Should be called at the end of crawler execution
|
||||
"""
|
||||
with cls._lock:
|
||||
for key, instance in cls._instances.items():
|
||||
try:
|
||||
instance.flush()
|
||||
utils.logger.info(f"[ExcelStoreBase] Flushed instance: {key}")
|
||||
except Exception as e:
|
||||
utils.logger.error(f"[ExcelStoreBase] Error flushing {key}: {e}")
|
||||
cls._instances.clear()
|
||||
|
||||
def __init__(self, platform: str, crawler_type: str = "search"):
|
||||
"""
|
||||
Initialize Excel store
|
||||
|
||||
|
||||
Args:
|
||||
platform: Platform name (xhs, dy, ks, etc.)
|
||||
crawler_type: Type of crawler (search, detail, creator)
|
||||
@@ -57,39 +105,45 @@ class ExcelStoreBase(AbstractStore):
|
||||
"openpyxl is required for Excel export. "
|
||||
"Install it with: pip install openpyxl"
|
||||
)
|
||||
|
||||
|
||||
super().__init__()
|
||||
self.platform = platform
|
||||
self.crawler_type = crawler_type
|
||||
|
||||
|
||||
# Create data directory
|
||||
self.data_dir = Path("data") / platform
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# Initialize workbook
|
||||
self.workbook = openpyxl.Workbook()
|
||||
self.workbook.remove(self.workbook.active) # Remove default sheet
|
||||
|
||||
|
||||
# Create sheets
|
||||
self.contents_sheet = self.workbook.create_sheet("Contents")
|
||||
self.comments_sheet = self.workbook.create_sheet("Comments")
|
||||
self.creators_sheet = self.workbook.create_sheet("Creators")
|
||||
|
||||
|
||||
# Track if headers are written
|
||||
self.contents_headers_written = False
|
||||
self.comments_headers_written = False
|
||||
self.creators_headers_written = False
|
||||
|
||||
self.contacts_headers_written = False
|
||||
self.dynamics_headers_written = False
|
||||
|
||||
# Optional sheets for platforms that need them (e.g., Bilibili)
|
||||
self.contacts_sheet = None
|
||||
self.dynamics_sheet = None
|
||||
|
||||
# Generate filename
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
self.filename = self.data_dir / f"{platform}_{crawler_type}_{timestamp}.xlsx"
|
||||
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Initialized Excel export to: {self.filename}")
|
||||
|
||||
|
||||
def _apply_header_style(self, sheet, row_num: int = 1):
|
||||
"""
|
||||
Apply formatting to header row
|
||||
|
||||
|
||||
Args:
|
||||
sheet: Worksheet object
|
||||
row_num: Row number for headers (default: 1)
|
||||
@@ -103,70 +157,70 @@ class ExcelStoreBase(AbstractStore):
|
||||
top=Side(style='thin'),
|
||||
bottom=Side(style='thin')
|
||||
)
|
||||
|
||||
|
||||
for cell in sheet[row_num]:
|
||||
cell.fill = header_fill
|
||||
cell.font = header_font
|
||||
cell.alignment = header_alignment
|
||||
cell.border = border
|
||||
|
||||
|
||||
def _auto_adjust_column_width(self, sheet):
|
||||
"""
|
||||
Auto-adjust column widths based on content
|
||||
|
||||
|
||||
Args:
|
||||
sheet: Worksheet object
|
||||
"""
|
||||
for column in sheet.columns:
|
||||
max_length = 0
|
||||
column_letter = get_column_letter(column[0].column)
|
||||
|
||||
|
||||
for cell in column:
|
||||
try:
|
||||
if cell.value:
|
||||
max_length = max(max_length, len(str(cell.value)))
|
||||
except:
|
||||
except (TypeError, AttributeError):
|
||||
pass
|
||||
|
||||
|
||||
# Set width with min/max constraints
|
||||
adjusted_width = min(max(max_length + 2, 10), 50)
|
||||
sheet.column_dimensions[column_letter].width = adjusted_width
|
||||
|
||||
|
||||
def _write_headers(self, sheet, headers: List[str]):
|
||||
"""
|
||||
Write headers to sheet
|
||||
|
||||
|
||||
Args:
|
||||
sheet: Worksheet object
|
||||
headers: List of header names
|
||||
"""
|
||||
for col_num, header in enumerate(headers, 1):
|
||||
sheet.cell(row=1, column=col_num, value=header)
|
||||
|
||||
|
||||
self._apply_header_style(sheet)
|
||||
|
||||
|
||||
def _write_row(self, sheet, data: Dict[str, Any], headers: List[str]):
|
||||
"""
|
||||
Write data row to sheet
|
||||
|
||||
|
||||
Args:
|
||||
sheet: Worksheet object
|
||||
data: Data dictionary
|
||||
headers: List of header names (defines column order)
|
||||
"""
|
||||
row_num = sheet.max_row + 1
|
||||
|
||||
|
||||
for col_num, header in enumerate(headers, 1):
|
||||
value = data.get(header, "")
|
||||
|
||||
|
||||
# Handle different data types
|
||||
if isinstance(value, (list, dict)):
|
||||
value = str(value)
|
||||
elif value is None:
|
||||
value = ""
|
||||
|
||||
|
||||
cell = sheet.cell(row=row_num, column=col_num, value=value)
|
||||
|
||||
|
||||
# Apply basic formatting
|
||||
cell.alignment = Alignment(vertical="top", wrap_text=True)
|
||||
cell.border = Border(
|
||||
@@ -175,89 +229,152 @@ class ExcelStoreBase(AbstractStore):
|
||||
top=Side(style='thin'),
|
||||
bottom=Side(style='thin')
|
||||
)
|
||||
|
||||
|
||||
async def store_content(self, content_item: Dict):
|
||||
"""
|
||||
Store content data to Excel
|
||||
|
||||
|
||||
Args:
|
||||
content_item: Content data dictionary
|
||||
"""
|
||||
# Define headers (customize based on platform)
|
||||
headers = list(content_item.keys())
|
||||
|
||||
|
||||
# Write headers if first time
|
||||
if not self.contents_headers_written:
|
||||
self._write_headers(self.contents_sheet, headers)
|
||||
self.contents_headers_written = True
|
||||
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.contents_sheet, content_item, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored content to Excel: {content_item.get('note_id', 'N/A')}")
|
||||
|
||||
|
||||
# Get ID from various possible field names
|
||||
content_id = content_item.get('note_id') or content_item.get('aweme_id') or content_item.get('video_id') or content_item.get('content_id') or 'N/A'
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored content to Excel: {content_id}")
|
||||
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
"""
|
||||
Store comment data to Excel
|
||||
|
||||
|
||||
Args:
|
||||
comment_item: Comment data dictionary
|
||||
"""
|
||||
# Define headers
|
||||
headers = list(comment_item.keys())
|
||||
|
||||
|
||||
# Write headers if first time
|
||||
if not self.comments_headers_written:
|
||||
self._write_headers(self.comments_sheet, headers)
|
||||
self.comments_headers_written = True
|
||||
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.comments_sheet, comment_item, headers)
|
||||
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored comment to Excel: {comment_item.get('comment_id', 'N/A')}")
|
||||
|
||||
async def store_creator(self, creator_item: Dict):
|
||||
|
||||
async def store_creator(self, creator: Dict):
|
||||
"""
|
||||
Store creator data to Excel
|
||||
|
||||
|
||||
Args:
|
||||
creator_item: Creator data dictionary
|
||||
creator: Creator data dictionary
|
||||
"""
|
||||
# Define headers
|
||||
headers = list(creator_item.keys())
|
||||
|
||||
headers = list(creator.keys())
|
||||
|
||||
# Write headers if first time
|
||||
if not self.creators_headers_written:
|
||||
self._write_headers(self.creators_sheet, headers)
|
||||
self.creators_headers_written = True
|
||||
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.creators_sheet, creator_item, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored creator to Excel: {creator_item.get('user_id', 'N/A')}")
|
||||
|
||||
self._write_row(self.creators_sheet, creator, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored creator to Excel: {creator.get('user_id', 'N/A')}")
|
||||
|
||||
async def store_contact(self, contact_item: Dict):
|
||||
"""
|
||||
Store contact data to Excel (for platforms like Bilibili)
|
||||
|
||||
Args:
|
||||
contact_item: Contact data dictionary
|
||||
"""
|
||||
# Create contacts sheet if not exists
|
||||
if self.contacts_sheet is None:
|
||||
self.contacts_sheet = self.workbook.create_sheet("Contacts")
|
||||
|
||||
# Define headers
|
||||
headers = list(contact_item.keys())
|
||||
|
||||
# Write headers if first time
|
||||
if not self.contacts_headers_written:
|
||||
self._write_headers(self.contacts_sheet, headers)
|
||||
self.contacts_headers_written = True
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.contacts_sheet, contact_item, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored contact to Excel: up_id={contact_item.get('up_id', 'N/A')}, fan_id={contact_item.get('fan_id', 'N/A')}")
|
||||
|
||||
async def store_dynamic(self, dynamic_item: Dict):
|
||||
"""
|
||||
Store dynamic data to Excel (for platforms like Bilibili)
|
||||
|
||||
Args:
|
||||
dynamic_item: Dynamic data dictionary
|
||||
"""
|
||||
# Create dynamics sheet if not exists
|
||||
if self.dynamics_sheet is None:
|
||||
self.dynamics_sheet = self.workbook.create_sheet("Dynamics")
|
||||
|
||||
# Define headers
|
||||
headers = list(dynamic_item.keys())
|
||||
|
||||
# Write headers if first time
|
||||
if not self.dynamics_headers_written:
|
||||
self._write_headers(self.dynamics_sheet, headers)
|
||||
self.dynamics_headers_written = True
|
||||
|
||||
# Write data row
|
||||
self._write_row(self.dynamics_sheet, dynamic_item, headers)
|
||||
|
||||
utils.logger.info(f"[ExcelStoreBase] Stored dynamic to Excel: {dynamic_item.get('dynamic_id', 'N/A')}")
|
||||
|
||||
def flush(self):
|
||||
"""
|
||||
Save workbook to file
|
||||
"""
|
||||
try:
|
||||
# Auto-adjust column widths
|
||||
# Auto-adjust column widths for all sheets
|
||||
self._auto_adjust_column_width(self.contents_sheet)
|
||||
self._auto_adjust_column_width(self.comments_sheet)
|
||||
self._auto_adjust_column_width(self.creators_sheet)
|
||||
|
||||
# Remove empty sheets
|
||||
if self.contacts_sheet is not None:
|
||||
self._auto_adjust_column_width(self.contacts_sheet)
|
||||
if self.dynamics_sheet is not None:
|
||||
self._auto_adjust_column_width(self.dynamics_sheet)
|
||||
|
||||
# Remove empty sheets (only header row)
|
||||
if self.contents_sheet.max_row == 1:
|
||||
self.workbook.remove(self.contents_sheet)
|
||||
if self.comments_sheet.max_row == 1:
|
||||
self.workbook.remove(self.comments_sheet)
|
||||
if self.creators_sheet.max_row == 1:
|
||||
self.workbook.remove(self.creators_sheet)
|
||||
|
||||
if self.contacts_sheet is not None and self.contacts_sheet.max_row == 1:
|
||||
self.workbook.remove(self.contacts_sheet)
|
||||
if self.dynamics_sheet is not None and self.dynamics_sheet.max_row == 1:
|
||||
self.workbook.remove(self.dynamics_sheet)
|
||||
|
||||
# Check if there are any sheets left
|
||||
if len(self.workbook.sheetnames) == 0:
|
||||
utils.logger.info(f"[ExcelStoreBase] No data to save, skipping file creation: {self.filename}")
|
||||
return
|
||||
|
||||
# Save workbook
|
||||
self.workbook.save(self.filename)
|
||||
utils.logger.info(f"[ExcelStoreBase] Excel file saved successfully: {self.filename}")
|
||||
|
||||
|
||||
except Exception as e:
|
||||
utils.logger.error(f"[ExcelStoreBase] Error saving Excel file: {e}")
|
||||
raise
|
||||
|
||||
@@ -37,6 +37,7 @@ class KuaishouStoreFactory:
|
||||
"json": KuaishouJsonStoreImplement,
|
||||
"sqlite": KuaishouSqliteStoreImplement,
|
||||
"mongodb": KuaishouMongoStoreImplement,
|
||||
"excel": KuaishouExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -44,7 +45,7 @@ class KuaishouStoreFactory:
|
||||
store_class = KuaishouStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError(
|
||||
"[KuaishouStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
"[KuaishouStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -226,3 +226,14 @@ class KuaishouMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[KuaishouMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class KuaishouExcelStoreImplement:
|
||||
"""快手Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="kuaishou",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -34,6 +34,7 @@ class TieBaStoreFactory:
|
||||
"json": TieBaJsonStoreImplement,
|
||||
"sqlite": TieBaSqliteStoreImplement,
|
||||
"mongodb": TieBaMongoStoreImplement,
|
||||
"excel": TieBaExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
@@ -41,7 +42,7 @@ class TieBaStoreFactory:
|
||||
store_class = TieBaStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError(
|
||||
"[TieBaStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
"[TieBaStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -258,3 +258,14 @@ class TieBaMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[TieBaMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class TieBaExcelStoreImplement:
|
||||
"""贴吧Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="tieba",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -38,13 +38,14 @@ class WeibostoreFactory:
|
||||
"json": WeiboJsonStoreImplement,
|
||||
"sqlite": WeiboSqliteStoreImplement,
|
||||
"mongodb": WeiboMongoStoreImplement,
|
||||
"excel": WeiboExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = WeibostoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
|
||||
|
||||
@@ -280,3 +280,14 @@ class WeiboMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[WeiboMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class WeiboExcelStoreImplement:
|
||||
"""微博Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="weibo",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -339,9 +339,12 @@ class XhsMongoStoreImplement(AbstractStore):
|
||||
utils.logger.info(f"[XhsMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class XhsExcelStoreImplement(ExcelStoreBase):
|
||||
"""小红书Excel存储实现"""
|
||||
class XhsExcelStoreImplement:
|
||||
"""小红书Excel存储实现 - 全局单例"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(platform="xhs", crawler_type=crawler_type_var.get())
|
||||
utils.logger.info("[XhsExcelStoreImplement] Excel store initialized")
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="xhs",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -28,7 +28,8 @@ from ._store_impl import (ZhihuCsvStoreImplement,
|
||||
ZhihuDbStoreImplement,
|
||||
ZhihuJsonStoreImplement,
|
||||
ZhihuSqliteStoreImplement,
|
||||
ZhihuMongoStoreImplement)
|
||||
ZhihuMongoStoreImplement,
|
||||
ZhihuExcelStoreImplement)
|
||||
from tools import utils
|
||||
from var import source_keyword_var
|
||||
|
||||
@@ -40,13 +41,14 @@ class ZhihuStoreFactory:
|
||||
"json": ZhihuJsonStoreImplement,
|
||||
"sqlite": ZhihuSqliteStoreImplement,
|
||||
"mongodb": ZhihuMongoStoreImplement,
|
||||
"excel": ZhihuExcelStoreImplement,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def create_store() -> AbstractStore:
|
||||
store_class = ZhihuStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
|
||||
if not store_class:
|
||||
raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
|
||||
raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
|
||||
return store_class()
|
||||
|
||||
async def batch_update_zhihu_contents(contents: List[ZhihuContent]):
|
||||
|
||||
@@ -257,3 +257,14 @@ class ZhihuMongoStoreImplement(AbstractStore):
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[ZhihuMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
|
||||
class ZhihuExcelStoreImplement:
|
||||
"""知乎Excel存储实现 - 全局单例"""
|
||||
|
||||
def __new__(cls, *args, **kwargs):
|
||||
from store.excel_store_base import ExcelStoreBase
|
||||
return ExcelStoreBase.get_instance(
|
||||
platform="zhihu",
|
||||
crawler_type=crawler_type_var.get()
|
||||
)
|
||||
|
||||
@@ -1,4 +1,21 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/tests/test_excel_store.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
"""
|
||||
Unit tests for Excel export functionality
|
||||
"""
|
||||
@@ -22,7 +39,14 @@ from store.excel_store_base import ExcelStoreBase
|
||||
@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
|
||||
class TestExcelStoreBase:
|
||||
"""Test cases for ExcelStoreBase"""
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_singleton_state(self):
|
||||
"""Clear singleton state before and after each test"""
|
||||
ExcelStoreBase._instances.clear()
|
||||
yield
|
||||
ExcelStoreBase._instances.clear()
|
||||
|
||||
@pytest.fixture
|
||||
def temp_dir(self):
|
||||
"""Create temporary directory for test files"""
|
||||
@@ -30,7 +54,7 @@ class TestExcelStoreBase:
|
||||
yield temp_path
|
||||
# Cleanup
|
||||
shutil.rmtree(temp_path, ignore_errors=True)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def excel_store(self, temp_dir, monkeypatch):
|
||||
"""Create ExcelStoreBase instance for testing"""
|
||||
@@ -39,7 +63,7 @@ class TestExcelStoreBase:
|
||||
store = ExcelStoreBase(platform="test", crawler_type="search")
|
||||
yield store
|
||||
# Cleanup is handled by temp_dir fixture
|
||||
|
||||
|
||||
def test_initialization(self, excel_store):
|
||||
"""Test Excel store initialization"""
|
||||
assert excel_store.platform == "test"
|
||||
@@ -48,7 +72,7 @@ class TestExcelStoreBase:
|
||||
assert excel_store.contents_sheet is not None
|
||||
assert excel_store.comments_sheet is not None
|
||||
assert excel_store.creators_sheet is not None
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_content(self, excel_store):
|
||||
"""Test storing content data"""
|
||||
@@ -61,13 +85,13 @@ class TestExcelStoreBase:
|
||||
"liked_count": 100,
|
||||
"comment_count": 50
|
||||
}
|
||||
|
||||
|
||||
await excel_store.store_content(content_item)
|
||||
|
||||
|
||||
# Verify data was written
|
||||
assert excel_store.contents_sheet.max_row == 2 # Header + 1 data row
|
||||
assert excel_store.contents_headers_written is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_comment(self, excel_store):
|
||||
"""Test storing comment data"""
|
||||
@@ -79,13 +103,13 @@ class TestExcelStoreBase:
|
||||
"nickname": "Commenter",
|
||||
"like_count": 10
|
||||
}
|
||||
|
||||
|
||||
await excel_store.store_comment(comment_item)
|
||||
|
||||
|
||||
# Verify data was written
|
||||
assert excel_store.comments_sheet.max_row == 2 # Header + 1 data row
|
||||
assert excel_store.comments_headers_written is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_creator(self, excel_store):
|
||||
"""Test storing creator data"""
|
||||
@@ -96,13 +120,13 @@ class TestExcelStoreBase:
|
||||
"follows": 500,
|
||||
"interaction": 50000
|
||||
}
|
||||
|
||||
|
||||
await excel_store.store_creator(creator_item)
|
||||
|
||||
|
||||
# Verify data was written
|
||||
assert excel_store.creators_sheet.max_row == 2 # Header + 1 data row
|
||||
assert excel_store.creators_headers_written is True
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_multiple_items(self, excel_store):
|
||||
"""Test storing multiple items"""
|
||||
@@ -113,10 +137,10 @@ class TestExcelStoreBase:
|
||||
"title": f"Title {i}",
|
||||
"liked_count": i * 10
|
||||
})
|
||||
|
||||
|
||||
# Verify all items were stored
|
||||
assert excel_store.contents_sheet.max_row == 6 # Header + 5 data rows
|
||||
|
||||
|
||||
def test_flush(self, excel_store):
|
||||
"""Test flushing data to file"""
|
||||
# Add some test data
|
||||
@@ -124,38 +148,38 @@ class TestExcelStoreBase:
|
||||
"note_id": "test",
|
||||
"title": "Test"
|
||||
}))
|
||||
|
||||
|
||||
# Flush to file
|
||||
excel_store.flush()
|
||||
|
||||
|
||||
# Verify file was created
|
||||
assert excel_store.filename.exists()
|
||||
|
||||
|
||||
# Verify file can be opened
|
||||
wb = openpyxl.load_workbook(excel_store.filename)
|
||||
assert "Contents" in wb.sheetnames
|
||||
wb.close()
|
||||
|
||||
|
||||
def test_header_formatting(self, excel_store):
|
||||
"""Test header row formatting"""
|
||||
asyncio.run(excel_store.store_content({"note_id": "test", "title": "Test"}))
|
||||
|
||||
|
||||
# Check header formatting
|
||||
header_cell = excel_store.contents_sheet.cell(row=1, column=1)
|
||||
assert header_cell.font.bold is True
|
||||
# RGB color may have different prefix (00 or FF), check the actual color part
|
||||
assert header_cell.fill.start_color.rgb[-6:] == "366092"
|
||||
|
||||
|
||||
def test_empty_sheets_removed(self, excel_store):
|
||||
"""Test that empty sheets are removed on flush"""
|
||||
# Only add content, leave comments and creators empty
|
||||
asyncio.run(excel_store.store_content({"note_id": "test"}))
|
||||
|
||||
|
||||
excel_store.flush()
|
||||
|
||||
|
||||
# Reload workbook
|
||||
wb = openpyxl.load_workbook(excel_store.filename)
|
||||
|
||||
|
||||
# Only Contents sheet should exist
|
||||
assert "Contents" in wb.sheetnames
|
||||
assert "Comments" not in wb.sheetnames
|
||||
@@ -169,3 +193,83 @@ def test_excel_import_availability():
|
||||
assert EXCEL_AVAILABLE is True
|
||||
import openpyxl
|
||||
assert openpyxl is not None
|
||||
|
||||
|
||||
@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
|
||||
class TestSingletonPattern:
|
||||
"""Test singleton pattern for Excel store"""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_and_teardown(self, tmp_path, monkeypatch):
|
||||
"""Setup and teardown for each test"""
|
||||
# Change to temp directory
|
||||
monkeypatch.chdir(tmp_path)
|
||||
# Clear singleton instances before each test
|
||||
ExcelStoreBase._instances.clear()
|
||||
yield
|
||||
# Cleanup after test
|
||||
ExcelStoreBase._instances.clear()
|
||||
|
||||
def test_get_instance_returns_same_instance(self):
|
||||
"""Test that get_instance returns the same instance for same parameters"""
|
||||
instance1 = ExcelStoreBase.get_instance("xhs", "search")
|
||||
instance2 = ExcelStoreBase.get_instance("xhs", "search")
|
||||
|
||||
assert instance1 is instance2
|
||||
|
||||
def test_get_instance_different_params_returns_different_instances(self):
|
||||
"""Test that different parameters return different instances"""
|
||||
instance1 = ExcelStoreBase.get_instance("xhs", "search")
|
||||
instance2 = ExcelStoreBase.get_instance("xhs", "detail")
|
||||
instance3 = ExcelStoreBase.get_instance("douyin", "search")
|
||||
|
||||
assert instance1 is not instance2
|
||||
assert instance1 is not instance3
|
||||
assert instance2 is not instance3
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_singleton_preserves_data(self):
|
||||
"""Test that singleton pattern preserves data across multiple calls"""
|
||||
# First call - store some content
|
||||
store1 = ExcelStoreBase.get_instance("test", "search")
|
||||
await store1.store_content({"note_id": "note1", "title": "Title 1"})
|
||||
|
||||
# Second call - should get same instance with data
|
||||
store2 = ExcelStoreBase.get_instance("test", "search")
|
||||
await store2.store_content({"note_id": "note2", "title": "Title 2"})
|
||||
|
||||
# Verify both items are in the same workbook
|
||||
assert store1 is store2
|
||||
assert store1.contents_sheet.max_row == 3 # Header + 2 data rows
|
||||
|
||||
def test_flush_all_saves_all_instances(self, tmp_path):
|
||||
"""Test that flush_all saves all instances"""
|
||||
# Create multiple instances
|
||||
store1 = ExcelStoreBase.get_instance("platform1", "search")
|
||||
store2 = ExcelStoreBase.get_instance("platform2", "search")
|
||||
|
||||
# Add data to each
|
||||
asyncio.run(store1.store_content({"note_id": "note1"}))
|
||||
asyncio.run(store2.store_content({"note_id": "note2"}))
|
||||
|
||||
# Flush all
|
||||
ExcelStoreBase.flush_all()
|
||||
|
||||
# Verify instances are cleared
|
||||
assert len(ExcelStoreBase._instances) == 0
|
||||
|
||||
# Verify files were created
|
||||
assert store1.filename.exists()
|
||||
assert store2.filename.exists()
|
||||
|
||||
def test_flush_all_clears_instances(self):
|
||||
"""Test that flush_all clears the instances dictionary"""
|
||||
# Create an instance
|
||||
ExcelStoreBase.get_instance("test", "search")
|
||||
assert len(ExcelStoreBase._instances) == 1
|
||||
|
||||
# Flush all
|
||||
ExcelStoreBase.flush_all()
|
||||
|
||||
# Verify instances are cleared
|
||||
assert len(ExcelStoreBase._instances) == 0
|
||||
|
||||
78
uv.lock
generated
78
uv.lock
generated
@@ -171,9 +171,9 @@ wheels = [
|
||||
name = "cfgv"
|
||||
version = "3.4.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9" },
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -376,6 +376,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "et-xmlfile"
|
||||
version = "2.0.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastapi"
|
||||
version = "0.110.2"
|
||||
@@ -513,6 +522,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.3.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jieba"
|
||||
version = "0.42.1"
|
||||
@@ -777,6 +795,7 @@ dependencies = [
|
||||
{ name = "matplotlib" },
|
||||
{ name = "motor" },
|
||||
{ name = "opencv-python" },
|
||||
{ name = "openpyxl" },
|
||||
{ name = "pandas" },
|
||||
{ name = "parsel" },
|
||||
{ name = "pillow" },
|
||||
@@ -785,6 +804,8 @@ dependencies = [
|
||||
{ name = "pydantic" },
|
||||
{ name = "pyexecjs" },
|
||||
{ name = "pyhumps" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "redis" },
|
||||
{ name = "requests" },
|
||||
@@ -810,6 +831,7 @@ requires-dist = [
|
||||
{ name = "matplotlib", specifier = "==3.9.0" },
|
||||
{ name = "motor", specifier = ">=3.3.0" },
|
||||
{ name = "opencv-python", specifier = ">=4.11.0.86" },
|
||||
{ name = "openpyxl", specifier = ">=3.1.2" },
|
||||
{ name = "pandas", specifier = "==2.2.3" },
|
||||
{ name = "parsel", specifier = "==1.9.1" },
|
||||
{ name = "pillow", specifier = "==9.5.0" },
|
||||
@@ -818,6 +840,8 @@ requires-dist = [
|
||||
{ name = "pydantic", specifier = "==2.5.2" },
|
||||
{ name = "pyexecjs", specifier = "==1.5.1" },
|
||||
{ name = "pyhumps", specifier = ">=3.8.0" },
|
||||
{ name = "pytest", specifier = ">=7.4.0" },
|
||||
{ name = "pytest-asyncio", specifier = ">=0.21.0" },
|
||||
{ name = "python-dotenv", specifier = "==1.0.1" },
|
||||
{ name = "redis", specifier = "~=4.6.0" },
|
||||
{ name = "requests", specifier = "==2.32.3" },
|
||||
@@ -925,6 +949,18 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "openpyxl"
|
||||
version = "3.1.5"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "et-xmlfile" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "packaging"
|
||||
version = "25.0"
|
||||
@@ -1040,6 +1076,15 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/0f/c8dcadb2f0dcfdab6052d5ecf57ccf19b439c0adc29fc510ed0830349345/playwright-1.45.0-py3-none-win_amd64.whl", hash = "sha256:701db496928429aec103739e48e3110806bd5cf49456cc95b89f28e1abda71da", size = 29692683 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.6.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pre-commit"
|
||||
version = "4.4.0"
|
||||
@@ -1234,6 +1279,35 @@ wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "9.0.1"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
{ name = "iniconfig" },
|
||||
{ name = "packaging" },
|
||||
{ name = "pluggy" },
|
||||
{ name = "pygments" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/56/f013048ac4bc4c1d9be45afd4ab209ea62822fb1598f40687e6bf45dcea4/pytest-9.0.1.tar.gz", hash = "sha256:3e9c069ea73583e255c3b21cf46b8d3c56f6e3a1a8f6da94ccb0fcf57b9d73c8", size = 1564125 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/8b/6300fb80f858cda1c51ffa17075df5d846757081d11ab4aa35cef9e6258b/pytest-9.0.1-py3-none-any.whl", hash = "sha256:67be0030d194df2dfa7b556f2e56fb3c3315bd5c8822c6951162b92b32ce7dad", size = 373668 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pytest-asyncio"
|
||||
version = "1.3.0"
|
||||
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
|
||||
dependencies = [
|
||||
{ name = "pytest" },
|
||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
||||
]
|
||||
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087 }
|
||||
wheels = [
|
||||
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
|
||||
Reference in New Issue
Block a user