diff --git a/cmd_arg/arg.py b/cmd_arg/arg.py index e27d45a..1840a50 100644 --- a/cmd_arg/arg.py +++ b/cmd_arg/arg.py @@ -84,6 +84,16 @@ class InitDbOptionEnum(str, Enum): POSTGRES = "postgres" +class LogLevelEnum(str, Enum): + """Log level enumeration""" + + DEBUG = "DEBUG" + INFO = "INFO" + WARNING = "WARNING" + ERROR = "ERROR" + CRITICAL = "CRITICAL" + + def _to_bool(value: bool | str) -> bool: if isinstance(value, bool): return value @@ -299,6 +309,31 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): rich_help_panel="Proxy Configuration", ), ] = config.IP_PROXY_PROVIDER_NAME, + log_save_enable: Annotated[ + str, + typer.Option( + "--log_save_enable", + help="Whether to save logs to file, supports yes/true/t/y/1 or no/false/f/n/0", + rich_help_panel="Log Configuration", + show_default=True, + ), + ] = str(config.LOG_SAVE_ENABLE), + log_save_path: Annotated[ + str, + typer.Option( + "--log_save_path", + help="Log file save path, default is ./logs", + rich_help_panel="Log Configuration", + ), + ] = config.LOG_SAVE_PATH, + log_save_level: Annotated[ + LogLevelEnum, + typer.Option( + "--log_save_level", + help="Log save level (DEBUG | INFO | WARNING | ERROR | CRITICAL)", + rich_help_panel="Log Configuration", + ), + ] = _coerce_enum(LogLevelEnum, config.LOG_SAVE_LEVEL, LogLevelEnum.INFO), ) -> SimpleNamespace: """MediaCrawler 命令行入口""" @@ -306,6 +341,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): enable_sub_comment = _to_bool(get_sub_comment) enable_headless = _to_bool(headless) enable_ip_proxy_value = _to_bool(enable_ip_proxy) + enable_log_save = _to_bool(log_save_enable) init_db_value = init_db.value if init_db else None # Parse specified_id and creator_id into lists @@ -330,6 +366,9 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): config.ENABLE_IP_PROXY = enable_ip_proxy_value config.IP_PROXY_POOL_COUNT = ip_proxy_pool_count config.IP_PROXY_PROVIDER_NAME = ip_proxy_provider_name + config.LOG_SAVE_ENABLE = enable_log_save + config.LOG_SAVE_PATH = log_save_path + config.LOG_SAVE_LEVEL = log_save_level.value # Set platform-specific ID lists for detail/creator mode if specified_id_list: diff --git a/config/base_config.py b/config/base_config.py index 88a3a6c..ffafe99 100644 --- a/config/base_config.py +++ b/config/base_config.py @@ -120,6 +120,14 @@ FONT_PATH = "./docs/STZHONGS.TTF" # 爬取间隔时间 CRAWLER_MAX_SLEEP_SEC = 2 +# 日志配置 +# 是否保存日志 +LOG_SAVE_ENABLE = False +# 日志保存路径 +LOG_SAVE_PATH = "./logs" +# 日志保存级别 +LOG_SAVE_LEVEL = "INFO" + from .bilibili_config import * from .xhs_config import * from .dy_config import * diff --git a/tools/utils.py b/tools/utils.py index fcee910..14a2089 100644 --- a/tools/utils.py +++ b/tools/utils.py @@ -20,6 +20,9 @@ import argparse import logging +import os +from logging.handlers import RotatingFileHandler +from datetime import datetime from .crawler_util import * from .slider_util import * @@ -27,15 +30,55 @@ from .time_util import * def init_loging_config(): + # 导入配置 + try: + from config.base_config import LOG_SAVE_ENABLE, LOG_SAVE_PATH, LOG_SAVE_LEVEL + except ImportError: + LOG_SAVE_ENABLE = False + LOG_SAVE_PATH = "./logs" + LOG_SAVE_LEVEL = "INFO" + level = logging.INFO + log_format = "%(asctime)s %(name)s %(levelname)s (%(filename)s:%(lineno)d) - %(message)s" + date_format = '%Y-%m-%d %H:%M:%S' + + # 配置基础日志 logging.basicConfig( level=level, - format="%(asctime)s %(name)s %(levelname)s (%(filename)s:%(lineno)d) - %(message)s", - datefmt='%Y-%m-%d %H:%M:%S' + format=log_format, + datefmt=date_format ) _logger = logging.getLogger("MediaCrawler") _logger.setLevel(level) + # 如果启用日志保存,添加文件处理器 + if LOG_SAVE_ENABLE and LOG_SAVE_PATH: + try: + # 确保日志目录存在 + log_dir = os.path.abspath(LOG_SAVE_PATH) + os.makedirs(log_dir, exist_ok=True) + + # 日志文件名:按日期命名 + log_filename = os.path.join(log_dir, f"mediacrawler-{datetime.now().strftime('%Y-%m-%d')}.log") + + # 转换日志级别字符串为logging级别 + file_level = getattr(logging, LOG_SAVE_LEVEL.upper(), logging.INFO) + + # 创建文件处理器 + file_handler = RotatingFileHandler( + log_filename, + encoding='utf-8' + ) + file_handler.setLevel(file_level) + file_handler.setFormatter(logging.Formatter(log_format, date_format)) + + # 添加到logger + _logger.addHandler(file_handler) + + except Exception as e: + # 如果文件日志配置失败,不影响控制台日志 + _logger.warning(f"日志文件保存配置失败: {e}") + # Disable httpx INFO level logs logging.getLogger("httpx").setLevel(logging.WARNING)