新增数据保存路径,默认不指定则保存到data文件夹下

Co-Authored-By: ouzhuowei <190020754@qq.com>
This commit is contained in:
ouzhuowei
2026-02-03 11:24:22 +08:00
parent 51a7d94de8
commit 7484156f02
3 changed files with 20 additions and 2 deletions

View File

@@ -266,6 +266,14 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
rich_help_panel="Performance Configuration",
),
] = config.MAX_CONCURRENCY_NUM,
save_data_path: Annotated[
str,
typer.Option(
"--save_data_path",
help="Data save path, default is empty and will save to data folder",
rich_help_panel="Storage Configuration",
),
] = config.SAVE_DATA_PATH,
) -> SimpleNamespace:
"""MediaCrawler 命令行入口"""
@@ -292,6 +300,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
config.COOKIES = cookies
config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES = max_comments_count_singlenotes
config.MAX_CONCURRENCY_NUM = max_concurrency_num
config.SAVE_DATA_PATH = save_data_path
# Set platform-specific ID lists for detail/creator mode
if specified_id_list:

View File

@@ -73,6 +73,9 @@ AUTO_CLOSE_BROWSER = True
# 数据保存类型选项配置,支持六种类型csv、db、json、sqlite、excel、postgres, 最好保存到DB有排重的功能。
SAVE_DATA_OPTION = "json" # csv or db or json or sqlite or excel or postgres
# 数据保存路径,默认不指定,则保存到data文件夹下
SAVE_DATA_PATH = ""
# 用户浏览器缓存的浏览器文件配置
USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name

View File

@@ -35,7 +35,10 @@ class AsyncFileWriter:
self.wordcloud_generator = AsyncWordCloudGenerator() if config.ENABLE_GET_WORDCLOUD else None
def _get_file_path(self, file_type: str, item_type: str) -> str:
base_path = f"data/{self.platform}/{file_type}"
if config.SAVE_DATA_PATH:
base_path = f"{config.SAVE_DATA_PATH}/{self.platform}/{file_type}"
else:
base_path = f"data/{self.platform}/{file_type}"
pathlib.Path(base_path).mkdir(parents=True, exist_ok=True)
file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_date()}.{file_type}"
return f"{base_path}/{file_name}"
@@ -113,7 +116,10 @@ class AsyncFileWriter:
return
# Generate wordcloud
words_base_path = f"data/{self.platform}/words"
if config.SAVE_DATA_PATH:
words_base_path = f"{config.SAVE_DATA_PATH}/{self.platform}/words"
else:
words_base_path = f"data/{self.platform}/words"
pathlib.Path(words_base_path).mkdir(parents=True, exist_ok=True)
words_file_prefix = f"{words_base_path}/{self.crawler_type}_comments_{utils.get_current_date()}"