feat: excel store with other platform

This commit is contained in:
程序员阿江(Relakkes)
2025-11-28 15:12:36 +08:00
parent 324f09cf9f
commit 6e858c1a00
20 changed files with 477 additions and 106 deletions

BIN
.DS_Store vendored
View File

Binary file not shown.

View File

@@ -228,14 +228,15 @@ python main.py --help
### 使用示例: ### 使用示例:
```shell ```shell
# 使用 Excel 存储数据(推荐用于数据分析)✨ 新功能 # 初始化 SQLite 数据库
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
# 初始化 SQLite 数据库(使用'--init_db'时不需要携带其他optional
uv run main.py --init_db sqlite uv run main.py --init_db sqlite
# 使用 SQLite 存储数据(推荐个人用户使用) # 使用 SQLite 存储数据
uv run main.py --platform xhs --lt qrcode --type search --save_data_option sqlite uv run main.py --platform xhs --lt qrcode --type search --save_data_option sqlite
# 使用 Excel 存储数据(推荐用于数据分析)
uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
``` ```
```shell ```shell
# 初始化 MySQL 数据库 # 初始化 MySQL 数据库
uv run main.py --init_db mysql uv run main.py --init_db mysql

View File

@@ -71,6 +71,8 @@ class SaveDataOptionEnum(str, Enum):
DB = "db" DB = "db"
JSON = "json" JSON = "json"
SQLITE = "sqlite" SQLITE = "sqlite"
MONGODB = "mongodb"
EXCEL = "excel"
class InitDbOptionEnum(str, Enum): class InitDbOptionEnum(str, Enum):
@@ -199,7 +201,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
SaveDataOptionEnum, SaveDataOptionEnum,
typer.Option( typer.Option(
"--save_data_option", "--save_data_option",
help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库)", help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库 | mongodb=MongoDB数据库 | excel=Excel文件)",
rich_help_panel="存储配置", rich_help_panel="存储配置",
), ),
] = _coerce_enum( ] = _coerce_enum(

11
main.py
View File

@@ -87,14 +87,11 @@ async def main():
# Flush Excel data if using Excel export # Flush Excel data if using Excel export
if config.SAVE_DATA_OPTION == "excel": if config.SAVE_DATA_OPTION == "excel":
try: try:
# Get the store instance and flush data from store.excel_store_base import ExcelStoreBase
from store.xhs import XhsStoreFactory ExcelStoreBase.flush_all()
store = XhsStoreFactory.create_store() print("[Main] Excel files saved successfully")
if hasattr(store, 'flush'):
store.flush()
print(f"[Main] Excel file saved successfully")
except Exception as e: except Exception as e:
print(f"Error flushing Excel data: {e}") print(f"[Main] Error flushing Excel data: {e}")
# Generate wordcloud after crawling is complete # Generate wordcloud after crawling is complete
# Only for JSON save mode # Only for JSON save mode

View File

@@ -38,13 +38,14 @@ class BiliStoreFactory:
"json": BiliJsonStoreImplement, "json": BiliJsonStoreImplement,
"sqlite": BiliSqliteStoreImplement, "sqlite": BiliSqliteStoreImplement,
"mongodb": BiliMongoStoreImplement, "mongodb": BiliMongoStoreImplement,
"excel": BiliExcelStoreImplement,
} }
@staticmethod @staticmethod
def create_store() -> AbstractStore: def create_store() -> AbstractStore:
store_class = BiliStoreFactory.STORES.get(config.SAVE_DATA_OPTION) store_class = BiliStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
if not store_class: if not store_class:
raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...") raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
return store_class() return store_class()

View File

@@ -365,3 +365,14 @@ class BiliMongoStoreImplement(AbstractStore):
data=creator_item data=creator_item
) )
utils.logger.info(f"[BiliMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB") utils.logger.info(f"[BiliMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
class BiliExcelStoreImplement:
"""B站Excel存储实现 - 全局单例"""
def __new__(cls, *args, **kwargs):
from store.excel_store_base import ExcelStoreBase
return ExcelStoreBase.get_instance(
platform="bilibili",
crawler_type=crawler_type_var.get()
)

View File

@@ -37,13 +37,14 @@ class DouyinStoreFactory:
"json": DouyinJsonStoreImplement, "json": DouyinJsonStoreImplement,
"sqlite": DouyinSqliteStoreImplement, "sqlite": DouyinSqliteStoreImplement,
"mongodb": DouyinMongoStoreImplement, "mongodb": DouyinMongoStoreImplement,
"excel": DouyinExcelStoreImplement,
} }
@staticmethod @staticmethod
def create_store() -> AbstractStore: def create_store() -> AbstractStore:
store_class = DouyinStoreFactory.STORES.get(config.SAVE_DATA_OPTION) store_class = DouyinStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
if not store_class: if not store_class:
raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...") raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
return store_class() return store_class()

View File

@@ -264,3 +264,14 @@ class DouyinMongoStoreImplement(AbstractStore):
data=creator_item data=creator_item
) )
utils.logger.info(f"[DouyinMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB") utils.logger.info(f"[DouyinMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
class DouyinExcelStoreImplement:
"""抖音Excel存储实现 - 全局单例"""
def __new__(cls, *args, **kwargs):
from store.excel_store_base import ExcelStoreBase
return ExcelStoreBase.get_instance(
platform="douyin",
crawler_type=crawler_type_var.get()
)

View File

@@ -2,10 +2,20 @@
# Copyright (c) 2025 relakkes@gmail.com # Copyright (c) 2025 relakkes@gmail.com
# #
# This file is part of MediaCrawler project. # This file is part of MediaCrawler project.
# Repository: https://github.com/NanmiCoder/MediaCrawler # Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/excel_store_base.py
# GitHub: https://github.com/NanmiCoder # GitHub: https://github.com/NanmiCoder
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1 # Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
# #
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则: # 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。 # 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。 # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
@@ -21,7 +31,7 @@ Excel Store Base Implementation
Provides Excel export functionality for crawled data with formatted sheets Provides Excel export functionality for crawled data with formatted sheets
""" """
import os import threading
from datetime import datetime from datetime import datetime
from typing import Dict, List, Any from typing import Dict, List, Any
from pathlib import Path from pathlib import Path
@@ -42,12 +52,50 @@ class ExcelStoreBase(AbstractStore):
""" """
Base class for Excel storage implementation Base class for Excel storage implementation
Provides formatted Excel export with multiple sheets for contents, comments, and creators Provides formatted Excel export with multiple sheets for contents, comments, and creators
Uses singleton pattern to maintain state across multiple store calls
""" """
# Class-level singleton management
_instances: Dict[str, "ExcelStoreBase"] = {}
_lock = threading.Lock()
@classmethod
def get_instance(cls, platform: str, crawler_type: str) -> "ExcelStoreBase":
"""
Get or create a singleton instance for the given platform and crawler type
Args:
platform: Platform name (xhs, dy, ks, etc.)
crawler_type: Type of crawler (search, detail, creator)
Returns:
ExcelStoreBase instance
"""
key = f"{platform}_{crawler_type}"
with cls._lock:
if key not in cls._instances:
cls._instances[key] = cls(platform, crawler_type)
return cls._instances[key]
@classmethod
def flush_all(cls):
"""
Flush all Excel store instances and save to files
Should be called at the end of crawler execution
"""
with cls._lock:
for key, instance in cls._instances.items():
try:
instance.flush()
utils.logger.info(f"[ExcelStoreBase] Flushed instance: {key}")
except Exception as e:
utils.logger.error(f"[ExcelStoreBase] Error flushing {key}: {e}")
cls._instances.clear()
def __init__(self, platform: str, crawler_type: str = "search"): def __init__(self, platform: str, crawler_type: str = "search"):
""" """
Initialize Excel store Initialize Excel store
Args: Args:
platform: Platform name (xhs, dy, ks, etc.) platform: Platform name (xhs, dy, ks, etc.)
crawler_type: Type of crawler (search, detail, creator) crawler_type: Type of crawler (search, detail, creator)
@@ -57,39 +105,45 @@ class ExcelStoreBase(AbstractStore):
"openpyxl is required for Excel export. " "openpyxl is required for Excel export. "
"Install it with: pip install openpyxl" "Install it with: pip install openpyxl"
) )
super().__init__() super().__init__()
self.platform = platform self.platform = platform
self.crawler_type = crawler_type self.crawler_type = crawler_type
# Create data directory # Create data directory
self.data_dir = Path("data") / platform self.data_dir = Path("data") / platform
self.data_dir.mkdir(parents=True, exist_ok=True) self.data_dir.mkdir(parents=True, exist_ok=True)
# Initialize workbook # Initialize workbook
self.workbook = openpyxl.Workbook() self.workbook = openpyxl.Workbook()
self.workbook.remove(self.workbook.active) # Remove default sheet self.workbook.remove(self.workbook.active) # Remove default sheet
# Create sheets # Create sheets
self.contents_sheet = self.workbook.create_sheet("Contents") self.contents_sheet = self.workbook.create_sheet("Contents")
self.comments_sheet = self.workbook.create_sheet("Comments") self.comments_sheet = self.workbook.create_sheet("Comments")
self.creators_sheet = self.workbook.create_sheet("Creators") self.creators_sheet = self.workbook.create_sheet("Creators")
# Track if headers are written # Track if headers are written
self.contents_headers_written = False self.contents_headers_written = False
self.comments_headers_written = False self.comments_headers_written = False
self.creators_headers_written = False self.creators_headers_written = False
self.contacts_headers_written = False
self.dynamics_headers_written = False
# Optional sheets for platforms that need them (e.g., Bilibili)
self.contacts_sheet = None
self.dynamics_sheet = None
# Generate filename # Generate filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
self.filename = self.data_dir / f"{platform}_{crawler_type}_{timestamp}.xlsx" self.filename = self.data_dir / f"{platform}_{crawler_type}_{timestamp}.xlsx"
utils.logger.info(f"[ExcelStoreBase] Initialized Excel export to: {self.filename}") utils.logger.info(f"[ExcelStoreBase] Initialized Excel export to: {self.filename}")
def _apply_header_style(self, sheet, row_num: int = 1): def _apply_header_style(self, sheet, row_num: int = 1):
""" """
Apply formatting to header row Apply formatting to header row
Args: Args:
sheet: Worksheet object sheet: Worksheet object
row_num: Row number for headers (default: 1) row_num: Row number for headers (default: 1)
@@ -103,70 +157,70 @@ class ExcelStoreBase(AbstractStore):
top=Side(style='thin'), top=Side(style='thin'),
bottom=Side(style='thin') bottom=Side(style='thin')
) )
for cell in sheet[row_num]: for cell in sheet[row_num]:
cell.fill = header_fill cell.fill = header_fill
cell.font = header_font cell.font = header_font
cell.alignment = header_alignment cell.alignment = header_alignment
cell.border = border cell.border = border
def _auto_adjust_column_width(self, sheet): def _auto_adjust_column_width(self, sheet):
""" """
Auto-adjust column widths based on content Auto-adjust column widths based on content
Args: Args:
sheet: Worksheet object sheet: Worksheet object
""" """
for column in sheet.columns: for column in sheet.columns:
max_length = 0 max_length = 0
column_letter = get_column_letter(column[0].column) column_letter = get_column_letter(column[0].column)
for cell in column: for cell in column:
try: try:
if cell.value: if cell.value:
max_length = max(max_length, len(str(cell.value))) max_length = max(max_length, len(str(cell.value)))
except: except (TypeError, AttributeError):
pass pass
# Set width with min/max constraints # Set width with min/max constraints
adjusted_width = min(max(max_length + 2, 10), 50) adjusted_width = min(max(max_length + 2, 10), 50)
sheet.column_dimensions[column_letter].width = adjusted_width sheet.column_dimensions[column_letter].width = adjusted_width
def _write_headers(self, sheet, headers: List[str]): def _write_headers(self, sheet, headers: List[str]):
""" """
Write headers to sheet Write headers to sheet
Args: Args:
sheet: Worksheet object sheet: Worksheet object
headers: List of header names headers: List of header names
""" """
for col_num, header in enumerate(headers, 1): for col_num, header in enumerate(headers, 1):
sheet.cell(row=1, column=col_num, value=header) sheet.cell(row=1, column=col_num, value=header)
self._apply_header_style(sheet) self._apply_header_style(sheet)
def _write_row(self, sheet, data: Dict[str, Any], headers: List[str]): def _write_row(self, sheet, data: Dict[str, Any], headers: List[str]):
""" """
Write data row to sheet Write data row to sheet
Args: Args:
sheet: Worksheet object sheet: Worksheet object
data: Data dictionary data: Data dictionary
headers: List of header names (defines column order) headers: List of header names (defines column order)
""" """
row_num = sheet.max_row + 1 row_num = sheet.max_row + 1
for col_num, header in enumerate(headers, 1): for col_num, header in enumerate(headers, 1):
value = data.get(header, "") value = data.get(header, "")
# Handle different data types # Handle different data types
if isinstance(value, (list, dict)): if isinstance(value, (list, dict)):
value = str(value) value = str(value)
elif value is None: elif value is None:
value = "" value = ""
cell = sheet.cell(row=row_num, column=col_num, value=value) cell = sheet.cell(row=row_num, column=col_num, value=value)
# Apply basic formatting # Apply basic formatting
cell.alignment = Alignment(vertical="top", wrap_text=True) cell.alignment = Alignment(vertical="top", wrap_text=True)
cell.border = Border( cell.border = Border(
@@ -175,89 +229,152 @@ class ExcelStoreBase(AbstractStore):
top=Side(style='thin'), top=Side(style='thin'),
bottom=Side(style='thin') bottom=Side(style='thin')
) )
async def store_content(self, content_item: Dict): async def store_content(self, content_item: Dict):
""" """
Store content data to Excel Store content data to Excel
Args: Args:
content_item: Content data dictionary content_item: Content data dictionary
""" """
# Define headers (customize based on platform) # Define headers (customize based on platform)
headers = list(content_item.keys()) headers = list(content_item.keys())
# Write headers if first time # Write headers if first time
if not self.contents_headers_written: if not self.contents_headers_written:
self._write_headers(self.contents_sheet, headers) self._write_headers(self.contents_sheet, headers)
self.contents_headers_written = True self.contents_headers_written = True
# Write data row # Write data row
self._write_row(self.contents_sheet, content_item, headers) self._write_row(self.contents_sheet, content_item, headers)
utils.logger.info(f"[ExcelStoreBase] Stored content to Excel: {content_item.get('note_id', 'N/A')}") # Get ID from various possible field names
content_id = content_item.get('note_id') or content_item.get('aweme_id') or content_item.get('video_id') or content_item.get('content_id') or 'N/A'
utils.logger.info(f"[ExcelStoreBase] Stored content to Excel: {content_id}")
async def store_comment(self, comment_item: Dict): async def store_comment(self, comment_item: Dict):
""" """
Store comment data to Excel Store comment data to Excel
Args: Args:
comment_item: Comment data dictionary comment_item: Comment data dictionary
""" """
# Define headers # Define headers
headers = list(comment_item.keys()) headers = list(comment_item.keys())
# Write headers if first time # Write headers if first time
if not self.comments_headers_written: if not self.comments_headers_written:
self._write_headers(self.comments_sheet, headers) self._write_headers(self.comments_sheet, headers)
self.comments_headers_written = True self.comments_headers_written = True
# Write data row # Write data row
self._write_row(self.comments_sheet, comment_item, headers) self._write_row(self.comments_sheet, comment_item, headers)
utils.logger.info(f"[ExcelStoreBase] Stored comment to Excel: {comment_item.get('comment_id', 'N/A')}") utils.logger.info(f"[ExcelStoreBase] Stored comment to Excel: {comment_item.get('comment_id', 'N/A')}")
async def store_creator(self, creator_item: Dict): async def store_creator(self, creator: Dict):
""" """
Store creator data to Excel Store creator data to Excel
Args: Args:
creator_item: Creator data dictionary creator: Creator data dictionary
""" """
# Define headers # Define headers
headers = list(creator_item.keys()) headers = list(creator.keys())
# Write headers if first time # Write headers if first time
if not self.creators_headers_written: if not self.creators_headers_written:
self._write_headers(self.creators_sheet, headers) self._write_headers(self.creators_sheet, headers)
self.creators_headers_written = True self.creators_headers_written = True
# Write data row # Write data row
self._write_row(self.creators_sheet, creator_item, headers) self._write_row(self.creators_sheet, creator, headers)
utils.logger.info(f"[ExcelStoreBase] Stored creator to Excel: {creator_item.get('user_id', 'N/A')}") utils.logger.info(f"[ExcelStoreBase] Stored creator to Excel: {creator.get('user_id', 'N/A')}")
async def store_contact(self, contact_item: Dict):
"""
Store contact data to Excel (for platforms like Bilibili)
Args:
contact_item: Contact data dictionary
"""
# Create contacts sheet if not exists
if self.contacts_sheet is None:
self.contacts_sheet = self.workbook.create_sheet("Contacts")
# Define headers
headers = list(contact_item.keys())
# Write headers if first time
if not self.contacts_headers_written:
self._write_headers(self.contacts_sheet, headers)
self.contacts_headers_written = True
# Write data row
self._write_row(self.contacts_sheet, contact_item, headers)
utils.logger.info(f"[ExcelStoreBase] Stored contact to Excel: up_id={contact_item.get('up_id', 'N/A')}, fan_id={contact_item.get('fan_id', 'N/A')}")
async def store_dynamic(self, dynamic_item: Dict):
"""
Store dynamic data to Excel (for platforms like Bilibili)
Args:
dynamic_item: Dynamic data dictionary
"""
# Create dynamics sheet if not exists
if self.dynamics_sheet is None:
self.dynamics_sheet = self.workbook.create_sheet("Dynamics")
# Define headers
headers = list(dynamic_item.keys())
# Write headers if first time
if not self.dynamics_headers_written:
self._write_headers(self.dynamics_sheet, headers)
self.dynamics_headers_written = True
# Write data row
self._write_row(self.dynamics_sheet, dynamic_item, headers)
utils.logger.info(f"[ExcelStoreBase] Stored dynamic to Excel: {dynamic_item.get('dynamic_id', 'N/A')}")
def flush(self): def flush(self):
""" """
Save workbook to file Save workbook to file
""" """
try: try:
# Auto-adjust column widths # Auto-adjust column widths for all sheets
self._auto_adjust_column_width(self.contents_sheet) self._auto_adjust_column_width(self.contents_sheet)
self._auto_adjust_column_width(self.comments_sheet) self._auto_adjust_column_width(self.comments_sheet)
self._auto_adjust_column_width(self.creators_sheet) self._auto_adjust_column_width(self.creators_sheet)
if self.contacts_sheet is not None:
# Remove empty sheets self._auto_adjust_column_width(self.contacts_sheet)
if self.dynamics_sheet is not None:
self._auto_adjust_column_width(self.dynamics_sheet)
# Remove empty sheets (only header row)
if self.contents_sheet.max_row == 1: if self.contents_sheet.max_row == 1:
self.workbook.remove(self.contents_sheet) self.workbook.remove(self.contents_sheet)
if self.comments_sheet.max_row == 1: if self.comments_sheet.max_row == 1:
self.workbook.remove(self.comments_sheet) self.workbook.remove(self.comments_sheet)
if self.creators_sheet.max_row == 1: if self.creators_sheet.max_row == 1:
self.workbook.remove(self.creators_sheet) self.workbook.remove(self.creators_sheet)
if self.contacts_sheet is not None and self.contacts_sheet.max_row == 1:
self.workbook.remove(self.contacts_sheet)
if self.dynamics_sheet is not None and self.dynamics_sheet.max_row == 1:
self.workbook.remove(self.dynamics_sheet)
# Check if there are any sheets left
if len(self.workbook.sheetnames) == 0:
utils.logger.info(f"[ExcelStoreBase] No data to save, skipping file creation: {self.filename}")
return
# Save workbook # Save workbook
self.workbook.save(self.filename) self.workbook.save(self.filename)
utils.logger.info(f"[ExcelStoreBase] Excel file saved successfully: {self.filename}") utils.logger.info(f"[ExcelStoreBase] Excel file saved successfully: {self.filename}")
except Exception as e: except Exception as e:
utils.logger.error(f"[ExcelStoreBase] Error saving Excel file: {e}") utils.logger.error(f"[ExcelStoreBase] Error saving Excel file: {e}")
raise raise

View File

@@ -37,6 +37,7 @@ class KuaishouStoreFactory:
"json": KuaishouJsonStoreImplement, "json": KuaishouJsonStoreImplement,
"sqlite": KuaishouSqliteStoreImplement, "sqlite": KuaishouSqliteStoreImplement,
"mongodb": KuaishouMongoStoreImplement, "mongodb": KuaishouMongoStoreImplement,
"excel": KuaishouExcelStoreImplement,
} }
@staticmethod @staticmethod
@@ -44,7 +45,7 @@ class KuaishouStoreFactory:
store_class = KuaishouStoreFactory.STORES.get(config.SAVE_DATA_OPTION) store_class = KuaishouStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
if not store_class: if not store_class:
raise ValueError( raise ValueError(
"[KuaishouStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...") "[KuaishouStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
return store_class() return store_class()

View File

@@ -226,3 +226,14 @@ class KuaishouMongoStoreImplement(AbstractStore):
data=creator_item data=creator_item
) )
utils.logger.info(f"[KuaishouMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB") utils.logger.info(f"[KuaishouMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
class KuaishouExcelStoreImplement:
"""快手Excel存储实现 - 全局单例"""
def __new__(cls, *args, **kwargs):
from store.excel_store_base import ExcelStoreBase
return ExcelStoreBase.get_instance(
platform="kuaishou",
crawler_type=crawler_type_var.get()
)

View File

@@ -34,6 +34,7 @@ class TieBaStoreFactory:
"json": TieBaJsonStoreImplement, "json": TieBaJsonStoreImplement,
"sqlite": TieBaSqliteStoreImplement, "sqlite": TieBaSqliteStoreImplement,
"mongodb": TieBaMongoStoreImplement, "mongodb": TieBaMongoStoreImplement,
"excel": TieBaExcelStoreImplement,
} }
@staticmethod @staticmethod
@@ -41,7 +42,7 @@ class TieBaStoreFactory:
store_class = TieBaStoreFactory.STORES.get(config.SAVE_DATA_OPTION) store_class = TieBaStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
if not store_class: if not store_class:
raise ValueError( raise ValueError(
"[TieBaStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...") "[TieBaStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
return store_class() return store_class()

View File

@@ -258,3 +258,14 @@ class TieBaMongoStoreImplement(AbstractStore):
data=creator_item data=creator_item
) )
utils.logger.info(f"[TieBaMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB") utils.logger.info(f"[TieBaMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
class TieBaExcelStoreImplement:
"""贴吧Excel存储实现 - 全局单例"""
def __new__(cls, *args, **kwargs):
from store.excel_store_base import ExcelStoreBase
return ExcelStoreBase.get_instance(
platform="tieba",
crawler_type=crawler_type_var.get()
)

View File

@@ -38,13 +38,14 @@ class WeibostoreFactory:
"json": WeiboJsonStoreImplement, "json": WeiboJsonStoreImplement,
"sqlite": WeiboSqliteStoreImplement, "sqlite": WeiboSqliteStoreImplement,
"mongodb": WeiboMongoStoreImplement, "mongodb": WeiboMongoStoreImplement,
"excel": WeiboExcelStoreImplement,
} }
@staticmethod @staticmethod
def create_store() -> AbstractStore: def create_store() -> AbstractStore:
store_class = WeibostoreFactory.STORES.get(config.SAVE_DATA_OPTION) store_class = WeibostoreFactory.STORES.get(config.SAVE_DATA_OPTION)
if not store_class: if not store_class:
raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...") raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
return store_class() return store_class()

View File

@@ -280,3 +280,14 @@ class WeiboMongoStoreImplement(AbstractStore):
data=creator_item data=creator_item
) )
utils.logger.info(f"[WeiboMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB") utils.logger.info(f"[WeiboMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
class WeiboExcelStoreImplement:
"""微博Excel存储实现 - 全局单例"""
def __new__(cls, *args, **kwargs):
from store.excel_store_base import ExcelStoreBase
return ExcelStoreBase.get_instance(
platform="weibo",
crawler_type=crawler_type_var.get()
)

View File

@@ -339,9 +339,12 @@ class XhsMongoStoreImplement(AbstractStore):
utils.logger.info(f"[XhsMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB") utils.logger.info(f"[XhsMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
class XhsExcelStoreImplement(ExcelStoreBase): class XhsExcelStoreImplement:
"""小红书Excel存储实现""" """小红书Excel存储实现 - 全局单例"""
def __init__(self, **kwargs): def __new__(cls, *args, **kwargs):
super().__init__(platform="xhs", crawler_type=crawler_type_var.get()) from store.excel_store_base import ExcelStoreBase
utils.logger.info("[XhsExcelStoreImplement] Excel store initialized") return ExcelStoreBase.get_instance(
platform="xhs",
crawler_type=crawler_type_var.get()
)

View File

@@ -28,7 +28,8 @@ from ._store_impl import (ZhihuCsvStoreImplement,
ZhihuDbStoreImplement, ZhihuDbStoreImplement,
ZhihuJsonStoreImplement, ZhihuJsonStoreImplement,
ZhihuSqliteStoreImplement, ZhihuSqliteStoreImplement,
ZhihuMongoStoreImplement) ZhihuMongoStoreImplement,
ZhihuExcelStoreImplement)
from tools import utils from tools import utils
from var import source_keyword_var from var import source_keyword_var
@@ -40,13 +41,14 @@ class ZhihuStoreFactory:
"json": ZhihuJsonStoreImplement, "json": ZhihuJsonStoreImplement,
"sqlite": ZhihuSqliteStoreImplement, "sqlite": ZhihuSqliteStoreImplement,
"mongodb": ZhihuMongoStoreImplement, "mongodb": ZhihuMongoStoreImplement,
"excel": ZhihuExcelStoreImplement,
} }
@staticmethod @staticmethod
def create_store() -> AbstractStore: def create_store() -> AbstractStore:
store_class = ZhihuStoreFactory.STORES.get(config.SAVE_DATA_OPTION) store_class = ZhihuStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
if not store_class: if not store_class:
raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...") raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
return store_class() return store_class()
async def batch_update_zhihu_contents(contents: List[ZhihuContent]): async def batch_update_zhihu_contents(contents: List[ZhihuContent]):

View File

@@ -257,3 +257,14 @@ class ZhihuMongoStoreImplement(AbstractStore):
data=creator_item data=creator_item
) )
utils.logger.info(f"[ZhihuMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB") utils.logger.info(f"[ZhihuMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
class ZhihuExcelStoreImplement:
"""知乎Excel存储实现 - 全局单例"""
def __new__(cls, *args, **kwargs):
from store.excel_store_base import ExcelStoreBase
return ExcelStoreBase.get_instance(
platform="zhihu",
crawler_type=crawler_type_var.get()
)

View File

@@ -1,4 +1,21 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright (c) 2025 relakkes@gmail.com
#
# This file is part of MediaCrawler project.
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/tests/test_excel_store.py
# GitHub: https://github.com/NanmiCoder
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
#
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
""" """
Unit tests for Excel export functionality Unit tests for Excel export functionality
""" """
@@ -22,7 +39,14 @@ from store.excel_store_base import ExcelStoreBase
@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed") @pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
class TestExcelStoreBase: class TestExcelStoreBase:
"""Test cases for ExcelStoreBase""" """Test cases for ExcelStoreBase"""
@pytest.fixture(autouse=True)
def clear_singleton_state(self):
"""Clear singleton state before and after each test"""
ExcelStoreBase._instances.clear()
yield
ExcelStoreBase._instances.clear()
@pytest.fixture @pytest.fixture
def temp_dir(self): def temp_dir(self):
"""Create temporary directory for test files""" """Create temporary directory for test files"""
@@ -30,7 +54,7 @@ class TestExcelStoreBase:
yield temp_path yield temp_path
# Cleanup # Cleanup
shutil.rmtree(temp_path, ignore_errors=True) shutil.rmtree(temp_path, ignore_errors=True)
@pytest.fixture @pytest.fixture
def excel_store(self, temp_dir, monkeypatch): def excel_store(self, temp_dir, monkeypatch):
"""Create ExcelStoreBase instance for testing""" """Create ExcelStoreBase instance for testing"""
@@ -39,7 +63,7 @@ class TestExcelStoreBase:
store = ExcelStoreBase(platform="test", crawler_type="search") store = ExcelStoreBase(platform="test", crawler_type="search")
yield store yield store
# Cleanup is handled by temp_dir fixture # Cleanup is handled by temp_dir fixture
def test_initialization(self, excel_store): def test_initialization(self, excel_store):
"""Test Excel store initialization""" """Test Excel store initialization"""
assert excel_store.platform == "test" assert excel_store.platform == "test"
@@ -48,7 +72,7 @@ class TestExcelStoreBase:
assert excel_store.contents_sheet is not None assert excel_store.contents_sheet is not None
assert excel_store.comments_sheet is not None assert excel_store.comments_sheet is not None
assert excel_store.creators_sheet is not None assert excel_store.creators_sheet is not None
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_store_content(self, excel_store): async def test_store_content(self, excel_store):
"""Test storing content data""" """Test storing content data"""
@@ -61,13 +85,13 @@ class TestExcelStoreBase:
"liked_count": 100, "liked_count": 100,
"comment_count": 50 "comment_count": 50
} }
await excel_store.store_content(content_item) await excel_store.store_content(content_item)
# Verify data was written # Verify data was written
assert excel_store.contents_sheet.max_row == 2 # Header + 1 data row assert excel_store.contents_sheet.max_row == 2 # Header + 1 data row
assert excel_store.contents_headers_written is True assert excel_store.contents_headers_written is True
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_store_comment(self, excel_store): async def test_store_comment(self, excel_store):
"""Test storing comment data""" """Test storing comment data"""
@@ -79,13 +103,13 @@ class TestExcelStoreBase:
"nickname": "Commenter", "nickname": "Commenter",
"like_count": 10 "like_count": 10
} }
await excel_store.store_comment(comment_item) await excel_store.store_comment(comment_item)
# Verify data was written # Verify data was written
assert excel_store.comments_sheet.max_row == 2 # Header + 1 data row assert excel_store.comments_sheet.max_row == 2 # Header + 1 data row
assert excel_store.comments_headers_written is True assert excel_store.comments_headers_written is True
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_store_creator(self, excel_store): async def test_store_creator(self, excel_store):
"""Test storing creator data""" """Test storing creator data"""
@@ -96,13 +120,13 @@ class TestExcelStoreBase:
"follows": 500, "follows": 500,
"interaction": 50000 "interaction": 50000
} }
await excel_store.store_creator(creator_item) await excel_store.store_creator(creator_item)
# Verify data was written # Verify data was written
assert excel_store.creators_sheet.max_row == 2 # Header + 1 data row assert excel_store.creators_sheet.max_row == 2 # Header + 1 data row
assert excel_store.creators_headers_written is True assert excel_store.creators_headers_written is True
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_multiple_items(self, excel_store): async def test_multiple_items(self, excel_store):
"""Test storing multiple items""" """Test storing multiple items"""
@@ -113,10 +137,10 @@ class TestExcelStoreBase:
"title": f"Title {i}", "title": f"Title {i}",
"liked_count": i * 10 "liked_count": i * 10
}) })
# Verify all items were stored # Verify all items were stored
assert excel_store.contents_sheet.max_row == 6 # Header + 5 data rows assert excel_store.contents_sheet.max_row == 6 # Header + 5 data rows
def test_flush(self, excel_store): def test_flush(self, excel_store):
"""Test flushing data to file""" """Test flushing data to file"""
# Add some test data # Add some test data
@@ -124,38 +148,38 @@ class TestExcelStoreBase:
"note_id": "test", "note_id": "test",
"title": "Test" "title": "Test"
})) }))
# Flush to file # Flush to file
excel_store.flush() excel_store.flush()
# Verify file was created # Verify file was created
assert excel_store.filename.exists() assert excel_store.filename.exists()
# Verify file can be opened # Verify file can be opened
wb = openpyxl.load_workbook(excel_store.filename) wb = openpyxl.load_workbook(excel_store.filename)
assert "Contents" in wb.sheetnames assert "Contents" in wb.sheetnames
wb.close() wb.close()
def test_header_formatting(self, excel_store): def test_header_formatting(self, excel_store):
"""Test header row formatting""" """Test header row formatting"""
asyncio.run(excel_store.store_content({"note_id": "test", "title": "Test"})) asyncio.run(excel_store.store_content({"note_id": "test", "title": "Test"}))
# Check header formatting # Check header formatting
header_cell = excel_store.contents_sheet.cell(row=1, column=1) header_cell = excel_store.contents_sheet.cell(row=1, column=1)
assert header_cell.font.bold is True assert header_cell.font.bold is True
# RGB color may have different prefix (00 or FF), check the actual color part # RGB color may have different prefix (00 or FF), check the actual color part
assert header_cell.fill.start_color.rgb[-6:] == "366092" assert header_cell.fill.start_color.rgb[-6:] == "366092"
def test_empty_sheets_removed(self, excel_store): def test_empty_sheets_removed(self, excel_store):
"""Test that empty sheets are removed on flush""" """Test that empty sheets are removed on flush"""
# Only add content, leave comments and creators empty # Only add content, leave comments and creators empty
asyncio.run(excel_store.store_content({"note_id": "test"})) asyncio.run(excel_store.store_content({"note_id": "test"}))
excel_store.flush() excel_store.flush()
# Reload workbook # Reload workbook
wb = openpyxl.load_workbook(excel_store.filename) wb = openpyxl.load_workbook(excel_store.filename)
# Only Contents sheet should exist # Only Contents sheet should exist
assert "Contents" in wb.sheetnames assert "Contents" in wb.sheetnames
assert "Comments" not in wb.sheetnames assert "Comments" not in wb.sheetnames
@@ -169,3 +193,83 @@ def test_excel_import_availability():
assert EXCEL_AVAILABLE is True assert EXCEL_AVAILABLE is True
import openpyxl import openpyxl
assert openpyxl is not None assert openpyxl is not None
@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
class TestSingletonPattern:
"""Test singleton pattern for Excel store"""
@pytest.fixture(autouse=True)
def setup_and_teardown(self, tmp_path, monkeypatch):
"""Setup and teardown for each test"""
# Change to temp directory
monkeypatch.chdir(tmp_path)
# Clear singleton instances before each test
ExcelStoreBase._instances.clear()
yield
# Cleanup after test
ExcelStoreBase._instances.clear()
def test_get_instance_returns_same_instance(self):
"""Test that get_instance returns the same instance for same parameters"""
instance1 = ExcelStoreBase.get_instance("xhs", "search")
instance2 = ExcelStoreBase.get_instance("xhs", "search")
assert instance1 is instance2
def test_get_instance_different_params_returns_different_instances(self):
"""Test that different parameters return different instances"""
instance1 = ExcelStoreBase.get_instance("xhs", "search")
instance2 = ExcelStoreBase.get_instance("xhs", "detail")
instance3 = ExcelStoreBase.get_instance("douyin", "search")
assert instance1 is not instance2
assert instance1 is not instance3
assert instance2 is not instance3
@pytest.mark.asyncio
async def test_singleton_preserves_data(self):
"""Test that singleton pattern preserves data across multiple calls"""
# First call - store some content
store1 = ExcelStoreBase.get_instance("test", "search")
await store1.store_content({"note_id": "note1", "title": "Title 1"})
# Second call - should get same instance with data
store2 = ExcelStoreBase.get_instance("test", "search")
await store2.store_content({"note_id": "note2", "title": "Title 2"})
# Verify both items are in the same workbook
assert store1 is store2
assert store1.contents_sheet.max_row == 3 # Header + 2 data rows
def test_flush_all_saves_all_instances(self, tmp_path):
"""Test that flush_all saves all instances"""
# Create multiple instances
store1 = ExcelStoreBase.get_instance("platform1", "search")
store2 = ExcelStoreBase.get_instance("platform2", "search")
# Add data to each
asyncio.run(store1.store_content({"note_id": "note1"}))
asyncio.run(store2.store_content({"note_id": "note2"}))
# Flush all
ExcelStoreBase.flush_all()
# Verify instances are cleared
assert len(ExcelStoreBase._instances) == 0
# Verify files were created
assert store1.filename.exists()
assert store2.filename.exists()
def test_flush_all_clears_instances(self):
"""Test that flush_all clears the instances dictionary"""
# Create an instance
ExcelStoreBase.get_instance("test", "search")
assert len(ExcelStoreBase._instances) == 1
# Flush all
ExcelStoreBase.flush_all()
# Verify instances are cleared
assert len(ExcelStoreBase._instances) == 0

78
uv.lock generated
View File

@@ -171,9 +171,9 @@ wheels = [
name = "cfgv" name = "cfgv"
version = "3.4.0" version = "3.4.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560" } sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 }
wheels = [ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9" }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 },
] ]
[[package]] [[package]]
@@ -376,6 +376,15 @@ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094 }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094 },
] ]
[[package]]
name = "et-xmlfile"
version = "2.0.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
]
[[package]] [[package]]
name = "fastapi" name = "fastapi"
version = "0.110.2" version = "0.110.2"
@@ -513,6 +522,15 @@ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
] ]
[[package]]
name = "iniconfig"
version = "2.3.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 },
]
[[package]] [[package]]
name = "jieba" name = "jieba"
version = "0.42.1" version = "0.42.1"
@@ -777,6 +795,7 @@ dependencies = [
{ name = "matplotlib" }, { name = "matplotlib" },
{ name = "motor" }, { name = "motor" },
{ name = "opencv-python" }, { name = "opencv-python" },
{ name = "openpyxl" },
{ name = "pandas" }, { name = "pandas" },
{ name = "parsel" }, { name = "parsel" },
{ name = "pillow" }, { name = "pillow" },
@@ -785,6 +804,8 @@ dependencies = [
{ name = "pydantic" }, { name = "pydantic" },
{ name = "pyexecjs" }, { name = "pyexecjs" },
{ name = "pyhumps" }, { name = "pyhumps" },
{ name = "pytest" },
{ name = "pytest-asyncio" },
{ name = "python-dotenv" }, { name = "python-dotenv" },
{ name = "redis" }, { name = "redis" },
{ name = "requests" }, { name = "requests" },
@@ -810,6 +831,7 @@ requires-dist = [
{ name = "matplotlib", specifier = "==3.9.0" }, { name = "matplotlib", specifier = "==3.9.0" },
{ name = "motor", specifier = ">=3.3.0" }, { name = "motor", specifier = ">=3.3.0" },
{ name = "opencv-python", specifier = ">=4.11.0.86" }, { name = "opencv-python", specifier = ">=4.11.0.86" },
{ name = "openpyxl", specifier = ">=3.1.2" },
{ name = "pandas", specifier = "==2.2.3" }, { name = "pandas", specifier = "==2.2.3" },
{ name = "parsel", specifier = "==1.9.1" }, { name = "parsel", specifier = "==1.9.1" },
{ name = "pillow", specifier = "==9.5.0" }, { name = "pillow", specifier = "==9.5.0" },
@@ -818,6 +840,8 @@ requires-dist = [
{ name = "pydantic", specifier = "==2.5.2" }, { name = "pydantic", specifier = "==2.5.2" },
{ name = "pyexecjs", specifier = "==1.5.1" }, { name = "pyexecjs", specifier = "==1.5.1" },
{ name = "pyhumps", specifier = ">=3.8.0" }, { name = "pyhumps", specifier = ">=3.8.0" },
{ name = "pytest", specifier = ">=7.4.0" },
{ name = "pytest-asyncio", specifier = ">=0.21.0" },
{ name = "python-dotenv", specifier = "==1.0.1" }, { name = "python-dotenv", specifier = "==1.0.1" },
{ name = "redis", specifier = "~=4.6.0" }, { name = "redis", specifier = "~=4.6.0" },
{ name = "requests", specifier = "==2.32.3" }, { name = "requests", specifier = "==2.32.3" },
@@ -925,6 +949,18 @@ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
] ]
[[package]]
name = "openpyxl"
version = "3.1.5"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "et-xmlfile" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
]
[[package]] [[package]]
name = "packaging" name = "packaging"
version = "25.0" version = "25.0"
@@ -1040,6 +1076,15 @@ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/0f/c8dcadb2f0dcfdab6052d5ecf57ccf19b439c0adc29fc510ed0830349345/playwright-1.45.0-py3-none-win_amd64.whl", hash = "sha256:701db496928429aec103739e48e3110806bd5cf49456cc95b89f28e1abda71da", size = 29692683 }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/0f/c8dcadb2f0dcfdab6052d5ecf57ccf19b439c0adc29fc510ed0830349345/playwright-1.45.0-py3-none-win_amd64.whl", hash = "sha256:701db496928429aec103739e48e3110806bd5cf49456cc95b89f28e1abda71da", size = 29692683 },
] ]
[[package]]
name = "pluggy"
version = "1.6.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 },
]
[[package]] [[package]]
name = "pre-commit" name = "pre-commit"
version = "4.4.0" version = "4.4.0"
@@ -1234,6 +1279,35 @@ wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120 }, { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120 },
] ]
[[package]]
name = "pytest"
version = "9.0.1"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
{ name = "iniconfig" },
{ name = "packaging" },
{ name = "pluggy" },
{ name = "pygments" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/56/f013048ac4bc4c1d9be45afd4ab209ea62822fb1598f40687e6bf45dcea4/pytest-9.0.1.tar.gz", hash = "sha256:3e9c069ea73583e255c3b21cf46b8d3c56f6e3a1a8f6da94ccb0fcf57b9d73c8", size = 1564125 }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/8b/6300fb80f858cda1c51ffa17075df5d846757081d11ab4aa35cef9e6258b/pytest-9.0.1-py3-none-any.whl", hash = "sha256:67be0030d194df2dfa7b556f2e56fb3c3315bd5c8822c6951162b92b32ce7dad", size = 373668 },
]
[[package]]
name = "pytest-asyncio"
version = "1.3.0"
source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
dependencies = [
{ name = "pytest" },
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
]
sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087 }
wheels = [
{ url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075 },
]
[[package]] [[package]]
name = "python-dateutil" name = "python-dateutil"
version = "2.9.0.post0" version = "2.9.0.post0"