mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-08 02:47:26 +08:00
feat: 新增 JSONL 存储格式支持,默认存储格式改为 jsonl
JSONL(JSON Lines)每行一个 JSON 对象,采用 append 模式写入, 无需读取已有数据,大数据量下性能远优于 JSON 格式。 - 新增 AsyncFileWriter.write_to_jsonl() 核心方法 - 7 个平台新增 JsonlStoreImplement 类并注册到工厂 - 配置默认值从 json 改为 jsonl,CLI/API 枚举同步更新 - db_session.py 守卫条件加入 jsonl,避免误触 ValueError - 词云生成支持读取 JSONL 文件,优先 jsonl 回退 json - 原有 json 选项完全保留,向后兼容 - 更新相关文档和测试
This commit is contained in:
@@ -1,4 +1,21 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/tests/test_store_factory.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
"""
|
||||
Unit tests for Store Factory functionality
|
||||
"""
|
||||
@@ -10,6 +27,7 @@ from store.xhs import XhsStoreFactory
|
||||
from store.xhs._store_impl import (
|
||||
XhsCsvStoreImplement,
|
||||
XhsJsonStoreImplement,
|
||||
XhsJsonlStoreImplement,
|
||||
XhsDbStoreImplement,
|
||||
XhsSqliteStoreImplement,
|
||||
XhsMongoStoreImplement,
|
||||
@@ -19,57 +37,63 @@ from store.xhs._store_impl import (
|
||||
|
||||
class TestXhsStoreFactory:
|
||||
"""Test cases for XhsStoreFactory"""
|
||||
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'csv')
|
||||
def test_create_csv_store(self):
|
||||
"""Test creating CSV store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsCsvStoreImplement)
|
||||
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'json')
|
||||
def test_create_json_store(self):
|
||||
"""Test creating JSON store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsJsonStoreImplement)
|
||||
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'db')
|
||||
def test_create_db_store(self):
|
||||
"""Test creating database store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsDbStoreImplement)
|
||||
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'sqlite')
|
||||
def test_create_sqlite_store(self):
|
||||
"""Test creating SQLite store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsSqliteStoreImplement)
|
||||
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'mongodb')
|
||||
def test_create_mongodb_store(self):
|
||||
"""Test creating MongoDB store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsMongoStoreImplement)
|
||||
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'excel')
|
||||
def test_create_excel_store(self):
|
||||
"""Test creating Excel store"""
|
||||
# ContextVar cannot be mocked, so we test with actual value
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsExcelStoreImplement)
|
||||
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'jsonl')
|
||||
def test_create_jsonl_store(self):
|
||||
"""Test creating JSONL store"""
|
||||
store = XhsStoreFactory.create_store()
|
||||
assert isinstance(store, XhsJsonlStoreImplement)
|
||||
|
||||
@patch('config.SAVE_DATA_OPTION', 'invalid')
|
||||
def test_invalid_store_option(self):
|
||||
"""Test that invalid store option raises ValueError"""
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
XhsStoreFactory.create_store()
|
||||
|
||||
|
||||
assert "Invalid save option" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_all_stores_registered(self):
|
||||
"""Test that all store types are registered"""
|
||||
expected_stores = ['csv', 'json', 'db', 'sqlite', 'mongodb', 'excel']
|
||||
|
||||
expected_stores = ['csv', 'json', 'jsonl', 'db', 'postgres', 'sqlite', 'mongodb', 'excel']
|
||||
|
||||
for store_type in expected_stores:
|
||||
assert store_type in XhsStoreFactory.STORES
|
||||
|
||||
|
||||
assert len(XhsStoreFactory.STORES) == len(expected_stores)
|
||||
|
||||
Reference in New Issue
Block a user