新增日志存储逻辑

Co-Authored-By: ouzhuowei <190020754@qq.com>
This commit is contained in:
ouzhuowei
2026-02-06 12:33:35 +08:00
parent 80e9c866a0
commit 30cf16af0c
3 changed files with 92 additions and 2 deletions

View File

@@ -84,6 +84,16 @@ class InitDbOptionEnum(str, Enum):
POSTGRES = "postgres"
class LogLevelEnum(str, Enum):
"""Log level enumeration"""
DEBUG = "DEBUG"
INFO = "INFO"
WARNING = "WARNING"
ERROR = "ERROR"
CRITICAL = "CRITICAL"
def _to_bool(value: bool | str) -> bool:
if isinstance(value, bool):
return value
@@ -299,6 +309,31 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
rich_help_panel="Proxy Configuration",
),
] = config.IP_PROXY_PROVIDER_NAME,
log_save_enable: Annotated[
str,
typer.Option(
"--log_save_enable",
help="Whether to save logs to file, supports yes/true/t/y/1 or no/false/f/n/0",
rich_help_panel="Log Configuration",
show_default=True,
),
] = str(config.LOG_SAVE_ENABLE),
log_save_path: Annotated[
str,
typer.Option(
"--log_save_path",
help="Log file save path, default is ./logs",
rich_help_panel="Log Configuration",
),
] = config.LOG_SAVE_PATH,
log_save_level: Annotated[
LogLevelEnum,
typer.Option(
"--log_save_level",
help="Log save level (DEBUG | INFO | WARNING | ERROR | CRITICAL)",
rich_help_panel="Log Configuration",
),
] = _coerce_enum(LogLevelEnum, config.LOG_SAVE_LEVEL, LogLevelEnum.INFO),
) -> SimpleNamespace:
"""MediaCrawler 命令行入口"""
@@ -306,6 +341,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
enable_sub_comment = _to_bool(get_sub_comment)
enable_headless = _to_bool(headless)
enable_ip_proxy_value = _to_bool(enable_ip_proxy)
enable_log_save = _to_bool(log_save_enable)
init_db_value = init_db.value if init_db else None
# Parse specified_id and creator_id into lists
@@ -330,6 +366,9 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
config.ENABLE_IP_PROXY = enable_ip_proxy_value
config.IP_PROXY_POOL_COUNT = ip_proxy_pool_count
config.IP_PROXY_PROVIDER_NAME = ip_proxy_provider_name
config.LOG_SAVE_ENABLE = enable_log_save
config.LOG_SAVE_PATH = log_save_path
config.LOG_SAVE_LEVEL = log_save_level.value
# Set platform-specific ID lists for detail/creator mode
if specified_id_list:

View File

@@ -120,6 +120,14 @@ FONT_PATH = "./docs/STZHONGS.TTF"
# 爬取间隔时间
CRAWLER_MAX_SLEEP_SEC = 2
# 日志配置
# 是否保存日志
LOG_SAVE_ENABLE = False
# 日志保存路径
LOG_SAVE_PATH = "./logs"
# 日志保存级别
LOG_SAVE_LEVEL = "INFO"
from .bilibili_config import *
from .xhs_config import *
from .dy_config import *

View File

@@ -20,6 +20,9 @@
import argparse
import logging
import os
from logging.handlers import RotatingFileHandler
from datetime import datetime
from .crawler_util import *
from .slider_util import *
@@ -27,15 +30,55 @@ from .time_util import *
def init_loging_config():
# 导入配置
try:
from config.base_config import LOG_SAVE_ENABLE, LOG_SAVE_PATH, LOG_SAVE_LEVEL
except ImportError:
LOG_SAVE_ENABLE = False
LOG_SAVE_PATH = "./logs"
LOG_SAVE_LEVEL = "INFO"
level = logging.INFO
log_format = "%(asctime)s %(name)s %(levelname)s (%(filename)s:%(lineno)d) - %(message)s"
date_format = '%Y-%m-%d %H:%M:%S'
# 配置基础日志
logging.basicConfig(
level=level,
format="%(asctime)s %(name)s %(levelname)s (%(filename)s:%(lineno)d) - %(message)s",
datefmt='%Y-%m-%d %H:%M:%S'
format=log_format,
datefmt=date_format
)
_logger = logging.getLogger("MediaCrawler")
_logger.setLevel(level)
# 如果启用日志保存,添加文件处理器
if LOG_SAVE_ENABLE and LOG_SAVE_PATH:
try:
# 确保日志目录存在
log_dir = os.path.abspath(LOG_SAVE_PATH)
os.makedirs(log_dir, exist_ok=True)
# 日志文件名:按日期命名
log_filename = os.path.join(log_dir, f"mediacrawler-{datetime.now().strftime('%Y-%m-%d')}.log")
# 转换日志级别字符串为logging级别
file_level = getattr(logging, LOG_SAVE_LEVEL.upper(), logging.INFO)
# 创建文件处理器
file_handler = RotatingFileHandler(
log_filename,
encoding='utf-8'
)
file_handler.setLevel(file_level)
file_handler.setFormatter(logging.Formatter(log_format, date_format))
# 添加到logger
_logger.addHandler(file_handler)
except Exception as e:
# 如果文件日志配置失败,不影响控制台日志
_logger.warning(f"日志文件保存配置失败: {e}")
# Disable httpx INFO level logs
logging.getLogger("httpx").setLevel(logging.WARNING)