diff --git a/cmd_arg/arg.py b/cmd_arg/arg.py index c0f8b54..ba24fd7 100644 --- a/cmd_arg/arg.py +++ b/cmd_arg/arg.py @@ -33,7 +33,7 @@ async def parse_cmd(): parser.add_argument('--get_sub_comment', type=str2bool, help=''''whether to crawl level two comment, supported values case insensitive ('yes', 'true', 't', 'y', '1', 'no', 'false', 'f', 'n', '0')''', default=config.ENABLE_GET_SUB_COMMENTS) parser.add_argument('--save_data_option', type=str, - help='where to save the data (csv or db or json)', choices=['csv', 'db', 'json'], default=config.SAVE_DATA_OPTION) + help='where to save the data (csv or db or json or sqlite)', choices=['csv', 'db', 'json', 'sqlite'], default=config.SAVE_DATA_OPTION) parser.add_argument('--cookies', type=str, help='cookies used for cookie login type', default=config.COOKIES) diff --git a/config/base_config.py b/config/base_config.py index 1411f8c..cba590b 100644 --- a/config/base_config.py +++ b/config/base_config.py @@ -74,8 +74,8 @@ BROWSER_LAUNCH_TIMEOUT = 30 # 设置为False可以保持浏览器运行,便于调试 AUTO_CLOSE_BROWSER = True -# 数据保存类型选项配置,支持三种类型:csv、db、json, 最好保存到DB,有排重的功能。 -SAVE_DATA_OPTION = "json" # csv or db or json +# 数据保存类型选项配置,支持四种类型:csv、db、json、sqlite, 最好保存到DB,有排重的功能。 +SAVE_DATA_OPTION = "json" # csv or db or json or sqlite # 用户浏览器缓存的浏览器文件配置 USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name diff --git a/config/db_config.py b/config/db_config.py index 51d3fd0..e522630 100644 --- a/config/db_config.py +++ b/config/db_config.py @@ -12,11 +12,11 @@ import os # mysql config -RELATION_DB_PWD = os.getenv("RELATION_DB_PWD", "123456") -RELATION_DB_USER = os.getenv("RELATION_DB_USER", "root") -RELATION_DB_HOST = os.getenv("RELATION_DB_HOST", "localhost") -RELATION_DB_PORT = os.getenv("RELATION_DB_PORT", 3306) -RELATION_DB_NAME = os.getenv("RELATION_DB_NAME", "media_crawler") +MYSQL_DB_PWD = os.getenv("MYSQL_DB_PWD", "123456") +MYSQL_DB_USER = os.getenv("MYSQL_DB_USER", "root") +MYSQL_DB_HOST = os.getenv("MYSQL_DB_HOST", "localhost") +MYSQL_DB_PORT = os.getenv("MYSQL_DB_PORT", 3306) +MYSQL_DB_NAME = os.getenv("MYSQL_DB_NAME", "media_crawler") # redis config @@ -27,4 +27,7 @@ REDIS_DB_NUM = os.getenv("REDIS_DB_NUM", 0) # your redis db num # cache type CACHE_TYPE_REDIS = "redis" -CACHE_TYPE_MEMORY = "memory" \ No newline at end of file +CACHE_TYPE_MEMORY = "memory" + +# sqlite config +SQLITE_DB_PATH = os.path.join(os.path.dirname(os.path.dirname(__file__)), "schema", "sqlite_tables.db") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 04690f8..3ac862a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ requires-python = ">=3.9" dependencies = [ "aiofiles~=23.2.1", "aiomysql==0.2.0", + "aiosqlite>=0.21.0", "fastapi==0.110.2", "httpx==0.24.0", "jieba==0.42.1", @@ -28,5 +29,5 @@ dependencies = [ ] [[tool.uv.index]] -url = "https://mirrors.aliyun.com/pypi/simple" +url = "https://pypi.tuna.tsinghua.edu.cn/simple" default = true