新增数据保存路径,默认不指定则保存到data文件夹下

Co-Authored-By: ouzhuowei <190020754@qq.com>
2026-06-03 16:37:28 +08:00 · 2026-02-03 11:24:22 +08:00
parent 51a7d94de8
commit 7484156f02
3 changed files with 20 additions and 2 deletions
--- a/cmd_arg/arg.py
+++ b/cmd_arg/arg.py
@@ -266,6 +266,14 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
                rich_help_panel="Performance Configuration",
            ),
        ] = config.MAX_CONCURRENCY_NUM,
        save_data_path: Annotated[
            str,
            typer.Option(
                "--save_data_path",
                help="Data save path, default is empty and will save to data folder",
                rich_help_panel="Storage Configuration",
            ),
        ] = config.SAVE_DATA_PATH,
    ) -> SimpleNamespace:
        """MediaCrawler 命令行入口"""
@@ -292,6 +300,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
        config.COOKIES = cookies
        config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = max_comments_count_singlenotes
        config.MAX_CONCURRENCY_NUM = max_concurrency_num
        config.SAVE_DATA_PATH = save_data_path
        # Set platform-specific ID lists for detail/creator mode
        if specified_id_list:
--- a/config/base_config.py
+++ b/config/base_config.py
@@ -73,6 +73,9 @@ AUTO_CLOSE_BROWSER = True
 # 数据保存类型选项配置,支持六种类型：csv、db、json、sqlite、excel、postgres, 最好保存到DB，有排重的功能。
 SAVE_DATA_OPTION = "json"  # csv or db or json or sqlite or excel or postgres
 # 数据保存路径,默认不指定,则保存到data文件夹下
 SAVE_DATA_PATH = ""
 # 用户浏览器缓存的浏览器文件配置
 USER_DATA_DIR = "%s_user_data_dir"  # %s will be replaced by platform name
--- a/tools/async_file_writer.py
+++ b/tools/async_file_writer.py
@@ -35,6 +35,9 @@ class AsyncFileWriter:
        self.wordcloud_generator = AsyncWordCloudGenerator() if config.ENABLE_GET_WORDCLOUD else None
    def _get_file_path(self, file_type: str, item_type: str) -> str:
        if config.SAVE_DATA_PATH:
            base_path = f"{config.SAVE_DATA_PATH}/{self.platform}/{file_type}"
        else:
            base_path = f"data/{self.platform}/{file_type}"
        pathlib.Path(base_path).mkdir(parents=True, exist_ok=True)
        file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_date()}.{file_type}"
@@ -113,6 +116,9 @@ class AsyncFileWriter:
                return
            # Generate wordcloud
            if config.SAVE_DATA_PATH:
                words_base_path = f"{config.SAVE_DATA_PATH}/{self.platform}/words"
            else:
                words_base_path = f"data/{self.platform}/words"
            pathlib.Path(words_base_path).mkdir(parents=True, exist_ok=True)
            words_file_prefix = f"{words_base_path}/{self.crawler_type}_comments_{utils.get_current_date()}"