feat: excel store with other platform

2026-02-06 15:11:12 +08:00 · 2025-11-28 15:12:36 +08:00
parent 324f09cf9f
commit 6e858c1a00
20 changed files with 477 additions and 106 deletions
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.md
+++ b/README.md
@@ -228,14 +228,15 @@ python main.py --help

 ### 使用示例：
 ```shell
-# 使用 Excel 存储数据（推荐用于数据分析）✨ 新功能
-uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
-
-# 初始化 SQLite 数据库（使用'--init_db'时不需要携带其他optional）
+# 初始化 SQLite 数据库
 uv run main.py --init_db sqlite
-# 使用 SQLite 存储数据（推荐个人用户使用）
+# 使用 SQLite 存储数据
 uv run main.py --platform xhs --lt qrcode --type search --save_data_option sqlite
+
+# 使用 Excel 存储数据（推荐用于数据分析）
+uv run main.py --platform xhs --lt qrcode --type search --save_data_option excel
 ```
+
 ```shell
 # 初始化 MySQL 数据库
 uv run main.py --init_db mysql
--- a/cmd_arg/arg.py
+++ b/cmd_arg/arg.py
@@ -71,6 +71,8 @@ class SaveDataOptionEnum(str, Enum):
    DB = "db"
    JSON = "json"
    SQLITE = "sqlite"
+    MONGODB = "mongodb"
+    EXCEL = "excel"


 class InitDbOptionEnum(str, Enum):
@@ -199,7 +201,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
            SaveDataOptionEnum,
            typer.Option(
                "--save_data_option",
-                help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库)",
+                help="数据保存方式 (csv=CSV文件 | db=MySQL数据库 | json=JSON文件 | sqlite=SQLite数据库 | mongodb=MongoDB数据库 | excel=Excel文件)",
                rich_help_panel="存储配置",
            ),
        ] = _coerce_enum(
--- a/main.py
+++ b/main.py
@@ -87,14 +87,11 @@ async def main():
    # Flush Excel data if using Excel export
    if config.SAVE_DATA_OPTION == "excel":
        try:
-            # Get the store instance and flush data
-            from store.xhs import XhsStoreFactory
-            store = XhsStoreFactory.create_store()
-            if hasattr(store, 'flush'):
-                store.flush()
-                print(f"[Main] Excel file saved successfully")
+            from store.excel_store_base import ExcelStoreBase
+            ExcelStoreBase.flush_all()
+            print("[Main] Excel files saved successfully")
        except Exception as e:
-            print(f"Error flushing Excel data: {e}")
+            print(f"[Main] Error flushing Excel data: {e}")

    # Generate wordcloud after crawling is complete
    # Only for JSON save mode
--- a/store/bilibili/init.py
+++ b/store/bilibili/init.py
@@ -38,13 +38,14 @@ class BiliStoreFactory:
        "json": BiliJsonStoreImplement,
        "sqlite": BiliSqliteStoreImplement,
        "mongodb": BiliMongoStoreImplement,
+        "excel": BiliExcelStoreImplement,
    }

    @staticmethod
    def create_store() -> AbstractStore:
        store_class = BiliStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
        if not store_class:
-            raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
+            raise ValueError("[BiliStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
        return store_class()


--- a/store/bilibili/_store_impl.py
+++ b/store/bilibili/_store_impl.py
@@ -365,3 +365,14 @@ class BiliMongoStoreImplement(AbstractStore):
            data=creator_item
        )
        utils.logger.info(f"[BiliMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
+
+
+class BiliExcelStoreImplement:
+    """B站Excel存储实现 - 全局单例"""
+
+    def __new__(cls, *args, **kwargs):
+        from store.excel_store_base import ExcelStoreBase
+        return ExcelStoreBase.get_instance(
+            platform="bilibili",
+            crawler_type=crawler_type_var.get()
+        )
--- a/store/douyin/init.py
+++ b/store/douyin/init.py
@@ -37,13 +37,14 @@ class DouyinStoreFactory:
        "json": DouyinJsonStoreImplement,
        "sqlite": DouyinSqliteStoreImplement,
        "mongodb": DouyinMongoStoreImplement,
+        "excel": DouyinExcelStoreImplement,
    }

    @staticmethod
    def create_store() -> AbstractStore:
        store_class = DouyinStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
        if not store_class:
-            raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
+            raise ValueError("[DouyinStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
        return store_class()


--- a/store/douyin/_store_impl.py
+++ b/store/douyin/_store_impl.py
@@ -264,3 +264,14 @@ class DouyinMongoStoreImplement(AbstractStore):
            data=creator_item
        )
        utils.logger.info(f"[DouyinMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
+
+
+class DouyinExcelStoreImplement:
+    """抖音Excel存储实现 - 全局单例"""
+
+    def __new__(cls, *args, **kwargs):
+        from store.excel_store_base import ExcelStoreBase
+        return ExcelStoreBase.get_instance(
+            platform="douyin",
+            crawler_type=crawler_type_var.get()
+        )
--- a/store/excel_store_base.py
+++ b/store/excel_store_base.py
@@ -2,10 +2,20 @@
 # Copyright (c) 2025 relakkes@gmail.com
 #
 # This file is part of MediaCrawler project.
-# Repository: https://github.com/NanmiCoder/MediaCrawler
+# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/excel_store_base.py
 # GitHub: https://github.com/NanmiCoder
 # Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
 #
+# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
+# 1. 不得用于任何商业用途。
+# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
+# 3. 不得进行大规模爬取或对平台造成运营干扰。
+# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
+# 5. 不得用于任何非法或不当的用途。
+#
+# 详细许可条款请参阅项目根目录下的LICENSE文件。
+# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
+
 # 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
 # 1. 不得用于任何商业用途。
 # 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
@@ -21,7 +31,7 @@ Excel Store Base Implementation
 Provides Excel export functionality for crawled data with formatted sheets
 """

-import os
+import threading
 from datetime import datetime
 from typing import Dict, List, Any
 from pathlib import Path
@@ -42,12 +52,50 @@ class ExcelStoreBase(AbstractStore):
    """
    Base class for Excel storage implementation
    Provides formatted Excel export with multiple sheets for contents, comments, and creators
+    Uses singleton pattern to maintain state across multiple store calls
    """
-    
+
+    # Class-level singleton management
+    _instances: Dict[str, "ExcelStoreBase"] = {}
+    _lock = threading.Lock()
+
+    @classmethod
+    def get_instance(cls, platform: str, crawler_type: str) -> "ExcelStoreBase":
+        """
+        Get or create a singleton instance for the given platform and crawler type
+
+        Args:
+            platform: Platform name (xhs, dy, ks, etc.)
+            crawler_type: Type of crawler (search, detail, creator)
+
+        Returns:
+            ExcelStoreBase instance
+        """
+        key = f"{platform}_{crawler_type}"
+        with cls._lock:
+            if key not in cls._instances:
+                cls._instances[key] = cls(platform, crawler_type)
+            return cls._instances[key]
+
+    @classmethod
+    def flush_all(cls):
+        """
+        Flush all Excel store instances and save to files
+        Should be called at the end of crawler execution
+        """
+        with cls._lock:
+            for key, instance in cls._instances.items():
+                try:
+                    instance.flush()
+                    utils.logger.info(f"[ExcelStoreBase] Flushed instance: {key}")
+                except Exception as e:
+                    utils.logger.error(f"[ExcelStoreBase] Error flushing {key}: {e}")
+            cls._instances.clear()
+
    def __init__(self, platform: str, crawler_type: str = "search"):
        """
        Initialize Excel store
-        
+
        Args:
            platform: Platform name (xhs, dy, ks, etc.)
            crawler_type: Type of crawler (search, detail, creator)
@@ -57,39 +105,45 @@ class ExcelStoreBase(AbstractStore):
                "openpyxl is required for Excel export. "
                "Install it with: pip install openpyxl"
            )
-        
+
        super().__init__()
        self.platform = platform
        self.crawler_type = crawler_type
-        
+
        # Create data directory
        self.data_dir = Path("data") / platform
        self.data_dir.mkdir(parents=True, exist_ok=True)
-        
+
        # Initialize workbook
        self.workbook = openpyxl.Workbook()
        self.workbook.remove(self.workbook.active)  # Remove default sheet
-        
+
        # Create sheets
        self.contents_sheet = self.workbook.create_sheet("Contents")
        self.comments_sheet = self.workbook.create_sheet("Comments")
        self.creators_sheet = self.workbook.create_sheet("Creators")
-        
+
        # Track if headers are written
        self.contents_headers_written = False
        self.comments_headers_written = False
        self.creators_headers_written = False
-        
+        self.contacts_headers_written = False
+        self.dynamics_headers_written = False
+
+        # Optional sheets for platforms that need them (e.g., Bilibili)
+        self.contacts_sheet = None
+        self.dynamics_sheet = None
+
        # Generate filename
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.filename = self.data_dir / f"{platform}_{crawler_type}_{timestamp}.xlsx"
-        
+
        utils.logger.info(f"[ExcelStoreBase] Initialized Excel export to: {self.filename}")
-    
+
    def _apply_header_style(self, sheet, row_num: int = 1):
        """
        Apply formatting to header row
-        
+
        Args:
            sheet: Worksheet object
            row_num: Row number for headers (default: 1)
@@ -103,70 +157,70 @@ class ExcelStoreBase(AbstractStore):
            top=Side(style='thin'),
            bottom=Side(style='thin')
        )
-        
+
        for cell in sheet[row_num]:
            cell.fill = header_fill
            cell.font = header_font
            cell.alignment = header_alignment
            cell.border = border
-    
+
    def _auto_adjust_column_width(self, sheet):
        """
        Auto-adjust column widths based on content
-        
+
        Args:
            sheet: Worksheet object
        """
        for column in sheet.columns:
            max_length = 0
            column_letter = get_column_letter(column[0].column)
-            
+
            for cell in column:
                try:
                    if cell.value:
                        max_length = max(max_length, len(str(cell.value)))
-                except:
+                except (TypeError, AttributeError):
                    pass
-            
+
            # Set width with min/max constraints
            adjusted_width = min(max(max_length + 2, 10), 50)
            sheet.column_dimensions[column_letter].width = adjusted_width
-    
+
    def _write_headers(self, sheet, headers: List[str]):
        """
        Write headers to sheet
-        
+
        Args:
            sheet: Worksheet object
            headers: List of header names
        """
        for col_num, header in enumerate(headers, 1):
            sheet.cell(row=1, column=col_num, value=header)
-        
+
        self._apply_header_style(sheet)
-    
+
    def _write_row(self, sheet, data: Dict[str, Any], headers: List[str]):
        """
        Write data row to sheet
-        
+
        Args:
            sheet: Worksheet object
            data: Data dictionary
            headers: List of header names (defines column order)
        """
        row_num = sheet.max_row + 1
-        
+
        for col_num, header in enumerate(headers, 1):
            value = data.get(header, "")
-            
+
            # Handle different data types
            if isinstance(value, (list, dict)):
                value = str(value)
            elif value is None:
                value = ""
-            
+
            cell = sheet.cell(row=row_num, column=col_num, value=value)
-            
+
            # Apply basic formatting
            cell.alignment = Alignment(vertical="top", wrap_text=True)
            cell.border = Border(
@@ -175,89 +229,152 @@ class ExcelStoreBase(AbstractStore):
                top=Side(style='thin'),
                bottom=Side(style='thin')
            )
-    
+
    async def store_content(self, content_item: Dict):
        """
        Store content data to Excel
-        
+
        Args:
            content_item: Content data dictionary
        """
        # Define headers (customize based on platform)
        headers = list(content_item.keys())
-        
+
        # Write headers if first time
        if not self.contents_headers_written:
            self._write_headers(self.contents_sheet, headers)
            self.contents_headers_written = True
-        
+
        # Write data row
        self._write_row(self.contents_sheet, content_item, headers)
-        
-        utils.logger.info(f"[ExcelStoreBase] Stored content to Excel: {content_item.get('note_id', 'N/A')}")
-    
+
+        # Get ID from various possible field names
+        content_id = content_item.get('note_id') or content_item.get('aweme_id') or content_item.get('video_id') or content_item.get('content_id') or 'N/A'
+        utils.logger.info(f"[ExcelStoreBase] Stored content to Excel: {content_id}")
+
    async def store_comment(self, comment_item: Dict):
        """
        Store comment data to Excel
-        
+
        Args:
            comment_item: Comment data dictionary
        """
        # Define headers
        headers = list(comment_item.keys())
-        
+
        # Write headers if first time
        if not self.comments_headers_written:
            self._write_headers(self.comments_sheet, headers)
            self.comments_headers_written = True
-        
+
        # Write data row
        self._write_row(self.comments_sheet, comment_item, headers)
-        
+
        utils.logger.info(f"[ExcelStoreBase] Stored comment to Excel: {comment_item.get('comment_id', 'N/A')}")
-    
-    async def store_creator(self, creator_item: Dict):
+
+    async def store_creator(self, creator: Dict):
        """
        Store creator data to Excel
-        
+
        Args:
-            creator_item: Creator data dictionary
+            creator: Creator data dictionary
        """
        # Define headers
-        headers = list(creator_item.keys())
-        
+        headers = list(creator.keys())
+
        # Write headers if first time
        if not self.creators_headers_written:
            self._write_headers(self.creators_sheet, headers)
            self.creators_headers_written = True
-        
+
        # Write data row
-        self._write_row(self.creators_sheet, creator_item, headers)
-        
-        utils.logger.info(f"[ExcelStoreBase] Stored creator to Excel: {creator_item.get('user_id', 'N/A')}")
-    
+        self._write_row(self.creators_sheet, creator, headers)
+
+        utils.logger.info(f"[ExcelStoreBase] Stored creator to Excel: {creator.get('user_id', 'N/A')}")
+
+    async def store_contact(self, contact_item: Dict):
+        """
+        Store contact data to Excel (for platforms like Bilibili)
+
+        Args:
+            contact_item: Contact data dictionary
+        """
+        # Create contacts sheet if not exists
+        if self.contacts_sheet is None:
+            self.contacts_sheet = self.workbook.create_sheet("Contacts")
+
+        # Define headers
+        headers = list(contact_item.keys())
+
+        # Write headers if first time
+        if not self.contacts_headers_written:
+            self._write_headers(self.contacts_sheet, headers)
+            self.contacts_headers_written = True
+
+        # Write data row
+        self._write_row(self.contacts_sheet, contact_item, headers)
+
+        utils.logger.info(f"[ExcelStoreBase] Stored contact to Excel: up_id={contact_item.get('up_id', 'N/A')}, fan_id={contact_item.get('fan_id', 'N/A')}")
+
+    async def store_dynamic(self, dynamic_item: Dict):
+        """
+        Store dynamic data to Excel (for platforms like Bilibili)
+
+        Args:
+            dynamic_item: Dynamic data dictionary
+        """
+        # Create dynamics sheet if not exists
+        if self.dynamics_sheet is None:
+            self.dynamics_sheet = self.workbook.create_sheet("Dynamics")
+
+        # Define headers
+        headers = list(dynamic_item.keys())
+
+        # Write headers if first time
+        if not self.dynamics_headers_written:
+            self._write_headers(self.dynamics_sheet, headers)
+            self.dynamics_headers_written = True
+
+        # Write data row
+        self._write_row(self.dynamics_sheet, dynamic_item, headers)
+
+        utils.logger.info(f"[ExcelStoreBase] Stored dynamic to Excel: {dynamic_item.get('dynamic_id', 'N/A')}")
+
    def flush(self):
        """
        Save workbook to file
        """
        try:
-            # Auto-adjust column widths
+            # Auto-adjust column widths for all sheets
            self._auto_adjust_column_width(self.contents_sheet)
            self._auto_adjust_column_width(self.comments_sheet)
            self._auto_adjust_column_width(self.creators_sheet)
-            
-            # Remove empty sheets
+            if self.contacts_sheet is not None:
+                self._auto_adjust_column_width(self.contacts_sheet)
+            if self.dynamics_sheet is not None:
+                self._auto_adjust_column_width(self.dynamics_sheet)
+
+            # Remove empty sheets (only header row)
            if self.contents_sheet.max_row == 1:
                self.workbook.remove(self.contents_sheet)
            if self.comments_sheet.max_row == 1:
                self.workbook.remove(self.comments_sheet)
            if self.creators_sheet.max_row == 1:
                self.workbook.remove(self.creators_sheet)
-            
+            if self.contacts_sheet is not None and self.contacts_sheet.max_row == 1:
+                self.workbook.remove(self.contacts_sheet)
+            if self.dynamics_sheet is not None and self.dynamics_sheet.max_row == 1:
+                self.workbook.remove(self.dynamics_sheet)
+
+            # Check if there are any sheets left
+            if len(self.workbook.sheetnames) == 0:
+                utils.logger.info(f"[ExcelStoreBase] No data to save, skipping file creation: {self.filename}")
+                return
+
            # Save workbook
            self.workbook.save(self.filename)
            utils.logger.info(f"[ExcelStoreBase] Excel file saved successfully: {self.filename}")
-            
+
        except Exception as e:
            utils.logger.error(f"[ExcelStoreBase] Error saving Excel file: {e}")
            raise
--- a/store/kuaishou/init.py
+++ b/store/kuaishou/init.py
@@ -37,6 +37,7 @@ class KuaishouStoreFactory:
        "json": KuaishouJsonStoreImplement,
        "sqlite": KuaishouSqliteStoreImplement,
        "mongodb": KuaishouMongoStoreImplement,
+        "excel": KuaishouExcelStoreImplement,
    }

    @staticmethod
@@ -44,7 +45,7 @@ class KuaishouStoreFactory:
        store_class = KuaishouStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
        if not store_class:
            raise ValueError(
-                "[KuaishouStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
+                "[KuaishouStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
        return store_class()


--- a/store/kuaishou/_store_impl.py
+++ b/store/kuaishou/_store_impl.py
@@ -226,3 +226,14 @@ class KuaishouMongoStoreImplement(AbstractStore):
            data=creator_item
        )
        utils.logger.info(f"[KuaishouMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
+
+
+class KuaishouExcelStoreImplement:
+    """快手Excel存储实现 - 全局单例"""
+
+    def __new__(cls, *args, **kwargs):
+        from store.excel_store_base import ExcelStoreBase
+        return ExcelStoreBase.get_instance(
+            platform="kuaishou",
+            crawler_type=crawler_type_var.get()
+        )
--- a/store/tieba/init.py
+++ b/store/tieba/init.py
@@ -34,6 +34,7 @@ class TieBaStoreFactory:
        "json": TieBaJsonStoreImplement,
        "sqlite": TieBaSqliteStoreImplement,
        "mongodb": TieBaMongoStoreImplement,
+        "excel": TieBaExcelStoreImplement,
    }

    @staticmethod
@@ -41,7 +42,7 @@ class TieBaStoreFactory:
        store_class = TieBaStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
        if not store_class:
            raise ValueError(
-                "[TieBaStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
+                "[TieBaStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
        return store_class()


--- a/store/tieba/_store_impl.py
+++ b/store/tieba/_store_impl.py
@@ -258,3 +258,14 @@ class TieBaMongoStoreImplement(AbstractStore):
            data=creator_item
        )
        utils.logger.info(f"[TieBaMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
+
+
+class TieBaExcelStoreImplement:
+    """贴吧Excel存储实现 - 全局单例"""
+
+    def __new__(cls, *args, **kwargs):
+        from store.excel_store_base import ExcelStoreBase
+        return ExcelStoreBase.get_instance(
+            platform="tieba",
+            crawler_type=crawler_type_var.get()
+        )
--- a/store/weibo/init.py
+++ b/store/weibo/init.py
@@ -38,13 +38,14 @@ class WeibostoreFactory:
        "json": WeiboJsonStoreImplement,
        "sqlite": WeiboSqliteStoreImplement,
        "mongodb": WeiboMongoStoreImplement,
+        "excel": WeiboExcelStoreImplement,
    }

    @staticmethod
    def create_store() -> AbstractStore:
        store_class = WeibostoreFactory.STORES.get(config.SAVE_DATA_OPTION)
        if not store_class:
-            raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
+            raise ValueError("[WeibotoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
        return store_class()


--- a/store/weibo/_store_impl.py
+++ b/store/weibo/_store_impl.py
@@ -280,3 +280,14 @@ class WeiboMongoStoreImplement(AbstractStore):
            data=creator_item
        )
        utils.logger.info(f"[WeiboMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
+
+
+class WeiboExcelStoreImplement:
+    """微博Excel存储实现 - 全局单例"""
+
+    def __new__(cls, *args, **kwargs):
+        from store.excel_store_base import ExcelStoreBase
+        return ExcelStoreBase.get_instance(
+            platform="weibo",
+            crawler_type=crawler_type_var.get()
+        )
--- a/store/xhs/_store_impl.py
+++ b/store/xhs/_store_impl.py
@@ -339,9 +339,12 @@ class XhsMongoStoreImplement(AbstractStore):
        utils.logger.info(f"[XhsMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")


-class XhsExcelStoreImplement(ExcelStoreBase):
-    """小红书Excel存储实现"""
+class XhsExcelStoreImplement:
+    """小红书Excel存储实现 - 全局单例"""

-    def __init__(self, **kwargs):
-        super().__init__(platform="xhs", crawler_type=crawler_type_var.get())
-        utils.logger.info("[XhsExcelStoreImplement] Excel store initialized")
+    def __new__(cls, *args, **kwargs):
+        from store.excel_store_base import ExcelStoreBase
+        return ExcelStoreBase.get_instance(
+            platform="xhs",
+            crawler_type=crawler_type_var.get()
+        )
--- a/store/zhihu/init.py
+++ b/store/zhihu/init.py
@@ -28,7 +28,8 @@ from ._store_impl import (ZhihuCsvStoreImplement,
                                          ZhihuDbStoreImplement,
                                          ZhihuJsonStoreImplement,
                                          ZhihuSqliteStoreImplement,
-                                          ZhihuMongoStoreImplement)
+                                          ZhihuMongoStoreImplement,
+                                          ZhihuExcelStoreImplement)
 from tools import utils
 from var import source_keyword_var

@@ -40,13 +41,14 @@ class ZhihuStoreFactory:
        "json": ZhihuJsonStoreImplement,
        "sqlite": ZhihuSqliteStoreImplement,
        "mongodb": ZhihuMongoStoreImplement,
+        "excel": ZhihuExcelStoreImplement,
    }

    @staticmethod
    def create_store() -> AbstractStore:
        store_class = ZhihuStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
        if not store_class:
-            raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb ...")
+            raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json or sqlite or mongodb or excel ...")
        return store_class()

 async def batch_update_zhihu_contents(contents: List[ZhihuContent]):
--- a/store/zhihu/_store_impl.py
+++ b/store/zhihu/_store_impl.py
@@ -257,3 +257,14 @@ class ZhihuMongoStoreImplement(AbstractStore):
            data=creator_item
        )
        utils.logger.info(f"[ZhihuMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
+
+
+class ZhihuExcelStoreImplement:
+    """知乎Excel存储实现 - 全局单例"""
+
+    def __new__(cls, *args, **kwargs):
+        from store.excel_store_base import ExcelStoreBase
+        return ExcelStoreBase.get_instance(
+            platform="zhihu",
+            crawler_type=crawler_type_var.get()
+        )
--- a/tests/test_excel_store.py
+++ b/tests/test_excel_store.py
@@ -1,4 +1,21 @@
 # -*- coding: utf-8 -*-
+# Copyright (c) 2025 relakkes@gmail.com
+#
+# This file is part of MediaCrawler project.
+# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/tests/test_excel_store.py
+# GitHub: https://github.com/NanmiCoder
+# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
+#
+# 声明：本代码仅供学习和研究目的使用。使用者应遵守以下原则：
+# 1. 不得用于任何商业用途。
+# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
+# 3. 不得进行大规模爬取或对平台造成运营干扰。
+# 4. 应合理控制请求频率，避免给目标平台带来不必要的负担。
+# 5. 不得用于任何非法或不当的用途。
+#
+# 详细许可条款请参阅项目根目录下的LICENSE文件。
+# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
+
 """
 Unit tests for Excel export functionality
 """
@@ -22,7 +39,14 @@ from store.excel_store_base import ExcelStoreBase
@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
 class TestExcelStoreBase:
    """Test cases for ExcelStoreBase"""
-    
+
+    @pytest.fixture(autouse=True)
+    def clear_singleton_state(self):
+        """Clear singleton state before and after each test"""
+        ExcelStoreBase._instances.clear()
+        yield
+        ExcelStoreBase._instances.clear()
+
    @pytest.fixture
    def temp_dir(self):
        """Create temporary directory for test files"""
@@ -30,7 +54,7 @@ class TestExcelStoreBase:
        yield temp_path
        # Cleanup
        shutil.rmtree(temp_path, ignore_errors=True)
-    
+
    @pytest.fixture
    def excel_store(self, temp_dir, monkeypatch):
        """Create ExcelStoreBase instance for testing"""
@@ -39,7 +63,7 @@ class TestExcelStoreBase:
        store = ExcelStoreBase(platform="test", crawler_type="search")
        yield store
        # Cleanup is handled by temp_dir fixture
-    
+
    def test_initialization(self, excel_store):
        """Test Excel store initialization"""
        assert excel_store.platform == "test"
@@ -48,7 +72,7 @@ class TestExcelStoreBase:
        assert excel_store.contents_sheet is not None
        assert excel_store.comments_sheet is not None
        assert excel_store.creators_sheet is not None
-    
+
    @pytest.mark.asyncio
    async def test_store_content(self, excel_store):
        """Test storing content data"""
@@ -61,13 +85,13 @@ class TestExcelStoreBase:
            "liked_count": 100,
            "comment_count": 50
        }
-        
+
        await excel_store.store_content(content_item)
-        
+
        # Verify data was written
        assert excel_store.contents_sheet.max_row == 2  # Header + 1 data row
        assert excel_store.contents_headers_written is True
-    
+
    @pytest.mark.asyncio
    async def test_store_comment(self, excel_store):
        """Test storing comment data"""
@@ -79,13 +103,13 @@ class TestExcelStoreBase:
            "nickname": "Commenter",
            "like_count": 10
        }
-        
+
        await excel_store.store_comment(comment_item)
-        
+
        # Verify data was written
        assert excel_store.comments_sheet.max_row == 2  # Header + 1 data row
        assert excel_store.comments_headers_written is True
-    
+
    @pytest.mark.asyncio
    async def test_store_creator(self, excel_store):
        """Test storing creator data"""
@@ -96,13 +120,13 @@ class TestExcelStoreBase:
            "follows": 500,
            "interaction": 50000
        }
-        
+
        await excel_store.store_creator(creator_item)
-        
+
        # Verify data was written
        assert excel_store.creators_sheet.max_row == 2  # Header + 1 data row
        assert excel_store.creators_headers_written is True
-    
+
    @pytest.mark.asyncio
    async def test_multiple_items(self, excel_store):
        """Test storing multiple items"""
@@ -113,10 +137,10 @@ class TestExcelStoreBase:
                "title": f"Title {i}",
                "liked_count": i * 10
            })
-        
+
        # Verify all items were stored
        assert excel_store.contents_sheet.max_row == 6  # Header + 5 data rows
-    
+
    def test_flush(self, excel_store):
        """Test flushing data to file"""
        # Add some test data
@@ -124,38 +148,38 @@ class TestExcelStoreBase:
            "note_id": "test",
            "title": "Test"
        }))
-        
+
        # Flush to file
        excel_store.flush()
-        
+
        # Verify file was created
        assert excel_store.filename.exists()
-        
+
        # Verify file can be opened
        wb = openpyxl.load_workbook(excel_store.filename)
        assert "Contents" in wb.sheetnames
        wb.close()
-    
+
    def test_header_formatting(self, excel_store):
        """Test header row formatting"""
        asyncio.run(excel_store.store_content({"note_id": "test", "title": "Test"}))
-        
+
        # Check header formatting
        header_cell = excel_store.contents_sheet.cell(row=1, column=1)
        assert header_cell.font.bold is True
        # RGB color may have different prefix (00 or FF), check the actual color part
        assert header_cell.fill.start_color.rgb[-6:] == "366092"
-    
+
    def test_empty_sheets_removed(self, excel_store):
        """Test that empty sheets are removed on flush"""
        # Only add content, leave comments and creators empty
        asyncio.run(excel_store.store_content({"note_id": "test"}))
-        
+
        excel_store.flush()
-        
+
        # Reload workbook
        wb = openpyxl.load_workbook(excel_store.filename)
-        
+
        # Only Contents sheet should exist
        assert "Contents" in wb.sheetnames
        assert "Comments" not in wb.sheetnames
@@ -169,3 +193,83 @@ def test_excel_import_availability():
    assert EXCEL_AVAILABLE is True
    import openpyxl
    assert openpyxl is not None
+
+
+@pytest.mark.skipif(not EXCEL_AVAILABLE, reason="openpyxl not installed")
+class TestSingletonPattern:
+    """Test singleton pattern for Excel store"""
+
+    @pytest.fixture(autouse=True)
+    def setup_and_teardown(self, tmp_path, monkeypatch):
+        """Setup and teardown for each test"""
+        # Change to temp directory
+        monkeypatch.chdir(tmp_path)
+        # Clear singleton instances before each test
+        ExcelStoreBase._instances.clear()
+        yield
+        # Cleanup after test
+        ExcelStoreBase._instances.clear()
+
+    def test_get_instance_returns_same_instance(self):
+        """Test that get_instance returns the same instance for same parameters"""
+        instance1 = ExcelStoreBase.get_instance("xhs", "search")
+        instance2 = ExcelStoreBase.get_instance("xhs", "search")
+
+        assert instance1 is instance2
+
+    def test_get_instance_different_params_returns_different_instances(self):
+        """Test that different parameters return different instances"""
+        instance1 = ExcelStoreBase.get_instance("xhs", "search")
+        instance2 = ExcelStoreBase.get_instance("xhs", "detail")
+        instance3 = ExcelStoreBase.get_instance("douyin", "search")
+
+        assert instance1 is not instance2
+        assert instance1 is not instance3
+        assert instance2 is not instance3
+
+    @pytest.mark.asyncio
+    async def test_singleton_preserves_data(self):
+        """Test that singleton pattern preserves data across multiple calls"""
+        # First call - store some content
+        store1 = ExcelStoreBase.get_instance("test", "search")
+        await store1.store_content({"note_id": "note1", "title": "Title 1"})
+
+        # Second call - should get same instance with data
+        store2 = ExcelStoreBase.get_instance("test", "search")
+        await store2.store_content({"note_id": "note2", "title": "Title 2"})
+
+        # Verify both items are in the same workbook
+        assert store1 is store2
+        assert store1.contents_sheet.max_row == 3  # Header + 2 data rows
+
+    def test_flush_all_saves_all_instances(self, tmp_path):
+        """Test that flush_all saves all instances"""
+        # Create multiple instances
+        store1 = ExcelStoreBase.get_instance("platform1", "search")
+        store2 = ExcelStoreBase.get_instance("platform2", "search")
+
+        # Add data to each
+        asyncio.run(store1.store_content({"note_id": "note1"}))
+        asyncio.run(store2.store_content({"note_id": "note2"}))
+
+        # Flush all
+        ExcelStoreBase.flush_all()
+
+        # Verify instances are cleared
+        assert len(ExcelStoreBase._instances) == 0
+
+        # Verify files were created
+        assert store1.filename.exists()
+        assert store2.filename.exists()
+
+    def test_flush_all_clears_instances(self):
+        """Test that flush_all clears the instances dictionary"""
+        # Create an instance
+        ExcelStoreBase.get_instance("test", "search")
+        assert len(ExcelStoreBase._instances) == 1
+
+        # Flush all
+        ExcelStoreBase.flush_all()
+
+        # Verify instances are cleared
+        assert len(ExcelStoreBase._instances) == 0
--- a/uv.lock
+++ b/uv.lock
@@ -171,9 +171,9 @@ wheels = [
 name = "cfgv"
 version = "3.4.0"
 source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
-sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560" }
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 }
 wheels = [
-    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9" },
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 },
 ]

 [[package]]
@@ -376,6 +376,15 @@ wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094 },
 ]

+[[package]]
+name = "et-xmlfile"
+version = "2.0.0"
+source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
+wheels = [
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
+]
+
 [[package]]
 name = "fastapi"
 version = "0.110.2"
@@ -513,6 +522,15 @@ wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
 ]

+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503 }
+wheels = [
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484 },
+]
+
 [[package]]
 name = "jieba"
 version = "0.42.1"
@@ -777,6 +795,7 @@ dependencies = [
    { name = "matplotlib" },
    { name = "motor" },
    { name = "opencv-python" },
+    { name = "openpyxl" },
    { name = "pandas" },
    { name = "parsel" },
    { name = "pillow" },
@@ -785,6 +804,8 @@ dependencies = [
    { name = "pydantic" },
    { name = "pyexecjs" },
    { name = "pyhumps" },
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
    { name = "python-dotenv" },
    { name = "redis" },
    { name = "requests" },
@@ -810,6 +831,7 @@ requires-dist = [
    { name = "matplotlib", specifier = "==3.9.0" },
    { name = "motor", specifier = ">=3.3.0" },
    { name = "opencv-python", specifier = ">=4.11.0.86" },
+    { name = "openpyxl", specifier = ">=3.1.2" },
    { name = "pandas", specifier = "==2.2.3" },
    { name = "parsel", specifier = "==1.9.1" },
    { name = "pillow", specifier = "==9.5.0" },
@@ -818,6 +840,8 @@ requires-dist = [
    { name = "pydantic", specifier = "==2.5.2" },
    { name = "pyexecjs", specifier = "==1.5.1" },
    { name = "pyhumps", specifier = ">=3.8.0" },
+    { name = "pytest", specifier = ">=7.4.0" },
+    { name = "pytest-asyncio", specifier = ">=0.21.0" },
    { name = "python-dotenv", specifier = "==1.0.1" },
    { name = "redis", specifier = "~=4.6.0" },
    { name = "requests", specifier = "==2.32.3" },
@@ -925,6 +949,18 @@ wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/7d/f1c30a92854540bf789e9cd5dde7ef49bbe63f855b85a2e6b3db8135c591/opencv_python-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:085ad9b77c18853ea66283e98affefe2de8cc4c1f43eda4c100cf9b2721142ec", size = 39488044 },
 ]

+[[package]]
+name = "openpyxl"
+version = "3.1.5"
+source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
+dependencies = [
+    { name = "et-xmlfile" },
+]
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
+wheels = [
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
+]
+
 [[package]]
 name = "packaging"
 version = "25.0"
@@ -1040,6 +1076,15 @@ wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/87/0f/c8dcadb2f0dcfdab6052d5ecf57ccf19b439c0adc29fc510ed0830349345/playwright-1.45.0-py3-none-win_amd64.whl", hash = "sha256:701db496928429aec103739e48e3110806bd5cf49456cc95b89f28e1abda71da", size = 29692683 },
 ]

+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412 }
+wheels = [
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538 },
+]
+
 [[package]]
 name = "pre-commit"
 version = "4.4.0"
@@ -1234,6 +1279,35 @@ wheels = [
    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120 },
 ]

+[[package]]
+name = "pytest"
+version = "9.0.1"
+source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/07/56/f013048ac4bc4c1d9be45afd4ab209ea62822fb1598f40687e6bf45dcea4/pytest-9.0.1.tar.gz", hash = "sha256:3e9c069ea73583e255c3b21cf46b8d3c56f6e3a1a8f6da94ccb0fcf57b9d73c8", size = 1564125 }
+wheels = [
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/0b/8b/6300fb80f858cda1c51ffa17075df5d846757081d11ab4aa35cef9e6258b/pytest-9.0.1-py3-none-any.whl", hash = "sha256:67be0030d194df2dfa7b556f2e56fb3c3315bd5c8822c6951162b92b32ce7dad", size = 373668 },
+]
+
+[[package]]
+name = "pytest-asyncio"
+version = "1.3.0"
+source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" }
+dependencies = [
+    { name = "pytest" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087 }
+wheels = [
+    { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e5/35/f8b19922b6a25bc0880171a2f1a003eaeb93657475193ab516fd87cac9da/pytest_asyncio-1.3.0-py3-none-any.whl", hash = "sha256:611e26147c7f77640e6d0a92a38ed17c3e9848063698d5c93d5aa7aa11cebff5", size = 15075 },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"