mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-03-05 13:40:46 +08:00
JSONL(JSON Lines)每行一个 JSON 对象,采用 append 模式写入, 无需读取已有数据,大数据量下性能远优于 JSON 格式。 - 新增 AsyncFileWriter.write_to_jsonl() 核心方法 - 7 个平台新增 JsonlStoreImplement 类并注册到工厂 - 配置默认值从 json 改为 jsonl,CLI/API 枚举同步更新 - db_session.py 守卫条件加入 jsonl,避免误触 ValueError - 词云生成支持读取 JSONL 文件,优先 jsonl 回退 json - 原有 json 选项完全保留,向后兼容 - 更新相关文档和测试
100 lines
3.6 KiB
Python
100 lines
3.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright (c) 2025 relakkes@gmail.com
|
|
#
|
|
# This file is part of MediaCrawler project.
|
|
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/tests/test_store_factory.py
|
|
# GitHub: https://github.com/NanmiCoder
|
|
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
|
#
|
|
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
|
# 1. 不得用于任何商业用途。
|
|
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
|
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
|
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
|
# 5. 不得用于任何非法或不当的用途。
|
|
#
|
|
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
|
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
|
|
|
"""
|
|
Unit tests for Store Factory functionality
|
|
"""
|
|
|
|
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
from store.xhs import XhsStoreFactory
|
|
from store.xhs._store_impl import (
|
|
XhsCsvStoreImplement,
|
|
XhsJsonStoreImplement,
|
|
XhsJsonlStoreImplement,
|
|
XhsDbStoreImplement,
|
|
XhsSqliteStoreImplement,
|
|
XhsMongoStoreImplement,
|
|
XhsExcelStoreImplement
|
|
)
|
|
|
|
|
|
class TestXhsStoreFactory:
|
|
"""Test cases for XhsStoreFactory"""
|
|
|
|
@patch('config.SAVE_DATA_OPTION', 'csv')
|
|
def test_create_csv_store(self):
|
|
"""Test creating CSV store"""
|
|
store = XhsStoreFactory.create_store()
|
|
assert isinstance(store, XhsCsvStoreImplement)
|
|
|
|
@patch('config.SAVE_DATA_OPTION', 'json')
|
|
def test_create_json_store(self):
|
|
"""Test creating JSON store"""
|
|
store = XhsStoreFactory.create_store()
|
|
assert isinstance(store, XhsJsonStoreImplement)
|
|
|
|
@patch('config.SAVE_DATA_OPTION', 'db')
|
|
def test_create_db_store(self):
|
|
"""Test creating database store"""
|
|
store = XhsStoreFactory.create_store()
|
|
assert isinstance(store, XhsDbStoreImplement)
|
|
|
|
@patch('config.SAVE_DATA_OPTION', 'sqlite')
|
|
def test_create_sqlite_store(self):
|
|
"""Test creating SQLite store"""
|
|
store = XhsStoreFactory.create_store()
|
|
assert isinstance(store, XhsSqliteStoreImplement)
|
|
|
|
@patch('config.SAVE_DATA_OPTION', 'mongodb')
|
|
def test_create_mongodb_store(self):
|
|
"""Test creating MongoDB store"""
|
|
store = XhsStoreFactory.create_store()
|
|
assert isinstance(store, XhsMongoStoreImplement)
|
|
|
|
@patch('config.SAVE_DATA_OPTION', 'excel')
|
|
def test_create_excel_store(self):
|
|
"""Test creating Excel store"""
|
|
# ContextVar cannot be mocked, so we test with actual value
|
|
store = XhsStoreFactory.create_store()
|
|
assert isinstance(store, XhsExcelStoreImplement)
|
|
|
|
@patch('config.SAVE_DATA_OPTION', 'jsonl')
|
|
def test_create_jsonl_store(self):
|
|
"""Test creating JSONL store"""
|
|
store = XhsStoreFactory.create_store()
|
|
assert isinstance(store, XhsJsonlStoreImplement)
|
|
|
|
@patch('config.SAVE_DATA_OPTION', 'invalid')
|
|
def test_invalid_store_option(self):
|
|
"""Test that invalid store option raises ValueError"""
|
|
with pytest.raises(ValueError) as exc_info:
|
|
XhsStoreFactory.create_store()
|
|
|
|
assert "Invalid save option" in str(exc_info.value)
|
|
|
|
def test_all_stores_registered(self):
|
|
"""Test that all store types are registered"""
|
|
expected_stores = ['csv', 'json', 'jsonl', 'db', 'postgres', 'sqlite', 'mongodb', 'excel']
|
|
|
|
for store_type in expected_stores:
|
|
assert store_type in XhsStoreFactory.STORES
|
|
|
|
assert len(XhsStoreFactory.STORES) == len(expected_stores)
|