diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..ec89970 --- /dev/null +++ b/.env.example @@ -0,0 +1,46 @@ +# MySQL Configuration +MYSQL_DB_PWD=123456 +MYSQL_DB_USER=root +MYSQL_DB_HOST=localhost +MYSQL_DB_PORT=3306 +MYSQL_DB_NAME=media_crawler + +# Redis Configuration +REDIS_DB_HOST=127.0.0.1 +REDIS_DB_PWD=123456 +REDIS_DB_PORT=6379 +REDIS_DB_NUM=0 + +# MongoDB Configuration +MONGODB_HOST=localhost +MONGODB_PORT=27017 +MONGODB_USER= +MONGODB_PWD= +MONGODB_DB_NAME=media_crawler + +# PostgreSQL Configuration +POSTGRES_DB_PWD=123456 +POSTGRES_DB_USER=postgres +POSTGRES_DB_HOST=localhost +POSTGRES_DB_PORT=5432 +POSTGRES_DB_NAME=media_crawler + +# Proxy Configuration (Wandou HTTP) +# your_wandou_http_app_key +WANDOU_APP_KEY= + +# Proxy Configuration (Kuaidaili) +# your_kuaidaili_secret_id +KDL_SECERT_ID= +# your_kuaidaili_signature +KDL_SIGNATURE= +# your_kuaidaili_username +KDL_USER_NAME= +# your_kuaidaili_password +KDL_USER_PWD= + +# Proxy Configuration (Jisu HTTP) +# Get JiSu HTTP IP extraction key value +jisu_key= +# Get JiSu HTTP IP extraction encryption signature +jisu_crypto= diff --git a/api/services/crawler_manager.py b/api/services/crawler_manager.py index 2d0aad5..f0fb228 100644 --- a/api/services/crawler_manager.py +++ b/api/services/crawler_manager.py @@ -124,6 +124,7 @@ class CrawlerManager: stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, + encoding='utf-8', bufsize=1, cwd=str(self._project_root), env={**os.environ, "PYTHONUNBUFFERED": "1"} diff --git a/cmd_arg/arg.py b/cmd_arg/arg.py index 2517c24..0ae0a21 100644 --- a/cmd_arg/arg.py +++ b/cmd_arg/arg.py @@ -73,6 +73,7 @@ class SaveDataOptionEnum(str, Enum): SQLITE = "sqlite" MONGODB = "mongodb" EXCEL = "excel" + POSTGRES = "postgres" class InitDbOptionEnum(str, Enum): @@ -80,6 +81,7 @@ class InitDbOptionEnum(str, Enum): SQLITE = "sqlite" MYSQL = "mysql" + POSTGRES = "postgres" def _to_bool(value: bool | str) -> bool: @@ -210,7 +212,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): SaveDataOptionEnum, typer.Option( "--save_data_option", - help="Data save option (csv=CSV file | db=MySQL database | json=JSON file | sqlite=SQLite database | mongodb=MongoDB database | excel=Excel file)", + help="Data save option (csv=CSV file | db=MySQL database | json=JSON file | sqlite=SQLite database | mongodb=MongoDB database | excel=Excel file | postgres=PostgreSQL database)", rich_help_panel="Storage Configuration", ), ] = _coerce_enum( @@ -220,7 +222,7 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None): Optional[InitDbOptionEnum], typer.Option( "--init_db", - help="Initialize database table structure (sqlite | mysql)", + help="Initialize database table structure (sqlite | mysql | postgres)", rich_help_panel="Storage Configuration", ), ] = None, diff --git a/config/base_config.py b/config/base_config.py index ff430ff..961d216 100644 --- a/config/base_config.py +++ b/config/base_config.py @@ -70,8 +70,8 @@ BROWSER_LAUNCH_TIMEOUT = 60 # 设置为False可以保持浏览器运行,便于调试 AUTO_CLOSE_BROWSER = True -# 数据保存类型选项配置,支持五种类型:csv、db、json、sqlite、excel, 最好保存到DB,有排重的功能。 -SAVE_DATA_OPTION = "json" # csv or db or json or sqlite or excel +# 数据保存类型选项配置,支持六种类型:csv、db、json、sqlite、excel、postgres, 最好保存到DB,有排重的功能。 +SAVE_DATA_OPTION = "json" # csv or db or json or sqlite or excel or postgres # 用户浏览器缓存的浏览器文件配置 USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name diff --git a/config/db_config.py b/config/db_config.py index 052b000..ca30812 100644 --- a/config/db_config.py +++ b/config/db_config.py @@ -37,7 +37,7 @@ mysql_db_config = { # redis config -REDIS_DB_HOST = "127.0.0.1" # your redis host +REDIS_DB_HOST = os.getenv("REDIS_DB_HOST", "127.0.0.1") # your redis host REDIS_DB_PWD = os.getenv("REDIS_DB_PWD", "123456") # your redis password REDIS_DB_PORT = os.getenv("REDIS_DB_PORT", 6379) # your redis port REDIS_DB_NUM = os.getenv("REDIS_DB_NUM", 0) # your redis db num @@ -67,3 +67,18 @@ mongodb_config = { "password": MONGODB_PWD, "db_name": MONGODB_DB_NAME, } + +# postgres config +POSTGRES_DB_PWD = os.getenv("POSTGRES_DB_PWD", "123456") +POSTGRES_DB_USER = os.getenv("POSTGRES_DB_USER", "postgres") +POSTGRES_DB_HOST = os.getenv("POSTGRES_DB_HOST", "localhost") +POSTGRES_DB_PORT = os.getenv("POSTGRES_DB_PORT", 5432) +POSTGRES_DB_NAME = os.getenv("POSTGRES_DB_NAME", "media_crawler") + +postgres_db_config = { + "user": POSTGRES_DB_USER, + "password": POSTGRES_DB_PWD, + "host": POSTGRES_DB_HOST, + "port": POSTGRES_DB_PORT, + "db_name": POSTGRES_DB_NAME, +} diff --git a/database/db_session.py b/database/db_session.py index d9e9007..2a8073c 100644 --- a/database/db_session.py +++ b/database/db_session.py @@ -22,7 +22,7 @@ from sqlalchemy.orm import sessionmaker from contextlib import asynccontextmanager from .models import Base import config -from config.db_config import mysql_db_config, sqlite_db_config +from config.db_config import mysql_db_config, sqlite_db_config, postgres_db_config # Keep a cache of engines _engines = {} @@ -36,6 +36,18 @@ async def create_database_if_not_exists(db_type: str): async with engine.connect() as conn: await conn.execute(text(f"CREATE DATABASE IF NOT EXISTS {mysql_db_config['db_name']}")) await engine.dispose() + elif db_type == "postgres": + # Connect to the default 'postgres' database + server_url = f"postgresql+asyncpg://{postgres_db_config['user']}:{postgres_db_config['password']}@{postgres_db_config['host']}:{postgres_db_config['port']}/postgres" + print(f"[init_db] Connecting to Postgres: host={postgres_db_config['host']}, port={postgres_db_config['port']}, user={postgres_db_config['user']}, dbname=postgres") + # Isolation level AUTOCOMMIT is required for CREATE DATABASE + engine = create_async_engine(server_url, echo=False, isolation_level="AUTOCOMMIT") + async with engine.connect() as conn: + # Check if database exists + result = await conn.execute(text(f"SELECT 1 FROM pg_database WHERE datname = '{postgres_db_config['db_name']}'")) + if not result.scalar(): + await conn.execute(text(f"CREATE DATABASE {postgres_db_config['db_name']}")) + await engine.dispose() def get_async_engine(db_type: str = None): @@ -52,6 +64,8 @@ def get_async_engine(db_type: str = None): db_url = f"sqlite+aiosqlite:///{sqlite_db_config['db_path']}" elif db_type == "mysql" or db_type == "db": db_url = f"mysql+asyncmy://{mysql_db_config['user']}:{mysql_db_config['password']}@{mysql_db_config['host']}:{mysql_db_config['port']}/{mysql_db_config['db_name']}" + elif db_type == "postgres": + db_url = f"postgresql+asyncpg://{postgres_db_config['user']}:{postgres_db_config['password']}@{postgres_db_config['host']}:{postgres_db_config['port']}/{postgres_db_config['db_name']}" else: raise ValueError(f"Unsupported database type: {db_type}") diff --git a/docs/data_storage_guide.md b/docs/data_storage_guide.md index 8dfa81f..f1accef 100644 --- a/docs/data_storage_guide.md +++ b/docs/data_storage_guide.md @@ -21,6 +21,9 @@ MediaCrawler 支持多种数据存储方式,您可以根据需求选择最适 - **MySQL 数据库**:支持关系型数据库 MySQL 中保存(需要提前创建数据库) 1. 初始化:`--init_db mysql` 2. 数据存储:`--save_data_option db`(db 参数为兼容历史更新保留) + - **PostgreSQL 数据库**:支持高级关系型数据库 PostgreSQL 中保存(推荐生产环境使用) + 1. 初始化:`--init_db postgres` + 2. 数据存储:`--save_data_option postgres` #### 使用示例 @@ -41,6 +44,13 @@ uv run main.py --init_db mysql uv run main.py --platform xhs --lt qrcode --type search --save_data_option db ``` +```shell +# 初始化 PostgreSQL 数据库 +uv run main.py --init_db postgres +# 使用 PostgreSQL 存储数据 +uv run main.py --platform xhs --lt qrcode --type search --save_data_option postgres +``` + ```shell # 使用 CSV 存储数据 uv run main.py --platform xhs --lt qrcode --type search --save_data_option csv diff --git a/pyproject.toml b/pyproject.toml index 9c8bba6..6329f4c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "pytest>=7.4.0", "pytest-asyncio>=0.21.0", "websockets>=15.0.1", + "asyncpg>=0.31.0", ] [[tool.uv.index]] diff --git a/store/bilibili/__init__.py b/store/bilibili/__init__.py index 06faa65..2e6cc56 100644 --- a/store/bilibili/__init__.py +++ b/store/bilibili/__init__.py @@ -35,6 +35,7 @@ class BiliStoreFactory: STORES = { "csv": BiliCsvStoreImplement, "db": BiliDbStoreImplement, + "postgres": BiliDbStoreImplement, "json": BiliJsonStoreImplement, "sqlite": BiliSqliteStoreImplement, "mongodb": BiliMongoStoreImplement, diff --git a/store/bilibili/_store_impl.py b/store/bilibili/_store_impl.py index 278d601..5ec8837 100644 --- a/store/bilibili/_store_impl.py +++ b/store/bilibili/_store_impl.py @@ -128,16 +128,23 @@ class BiliDbStoreImplement(AbstractStore): Args: content_item: content item dict """ - video_id = content_item.get("video_id") + video_id = int(content_item.get("video_id")) + content_item["video_id"] = video_id + content_item["user_id"] = int(content_item.get("user_id", 0) or 0) + content_item["liked_count"] = int(content_item.get("liked_count", 0) or 0) + content_item["create_time"] = int(content_item.get("create_time", 0) or 0) + async with get_session() as session: result = await session.execute(select(BilibiliVideo).where(BilibiliVideo.video_id == video_id)) video_detail = result.scalar_one_or_none() if not video_detail: content_item["add_ts"] = utils.get_current_timestamp() + content_item["last_modify_ts"] = utils.get_current_timestamp() new_content = BilibiliVideo(**content_item) session.add(new_content) else: + content_item["last_modify_ts"] = utils.get_current_timestamp() for key, value in content_item.items(): setattr(video_detail, key, value) await session.commit() @@ -148,16 +155,25 @@ class BiliDbStoreImplement(AbstractStore): Args: comment_item: comment item dict """ - comment_id = comment_item.get("comment_id") + comment_id = int(comment_item.get("comment_id")) + comment_item["comment_id"] = comment_id + comment_item["video_id"] = int(comment_item.get("video_id", 0) or 0) + comment_item["create_time"] = int(comment_item.get("create_time", 0) or 0) + comment_item["like_count"] = str(comment_item.get("like_count", "0")) + comment_item["sub_comment_count"] = str(comment_item.get("sub_comment_count", "0")) + comment_item["parent_comment_id"] = str(comment_item.get("parent_comment_id", "0")) + async with get_session() as session: result = await session.execute(select(BilibiliVideoComment).where(BilibiliVideoComment.comment_id == comment_id)) comment_detail = result.scalar_one_or_none() if not comment_detail: comment_item["add_ts"] = utils.get_current_timestamp() + comment_item["last_modify_ts"] = utils.get_current_timestamp() new_comment = BilibiliVideoComment(**comment_item) session.add(new_comment) else: + comment_item["last_modify_ts"] = utils.get_current_timestamp() for key, value in comment_item.items(): setattr(comment_detail, key, value) await session.commit() @@ -168,16 +184,24 @@ class BiliDbStoreImplement(AbstractStore): Args: creator: creator item dict """ - creator_id = creator.get("user_id") + creator_id = int(creator.get("user_id")) + creator["user_id"] = creator_id + creator["total_fans"] = int(creator.get("total_fans", 0) or 0) + creator["total_liked"] = int(creator.get("total_liked", 0) or 0) + creator["user_rank"] = int(creator.get("user_rank", 0) or 0) + creator["is_official"] = int(creator.get("is_official", 0) or 0) + async with get_session() as session: result = await session.execute(select(BilibiliUpInfo).where(BilibiliUpInfo.user_id == creator_id)) creator_detail = result.scalar_one_or_none() if not creator_detail: creator["add_ts"] = utils.get_current_timestamp() + creator["last_modify_ts"] = utils.get_current_timestamp() new_creator = BilibiliUpInfo(**creator) session.add(new_creator) else: + creator["last_modify_ts"] = utils.get_current_timestamp() for key, value in creator.items(): setattr(creator_detail, key, value) await session.commit() @@ -188,8 +212,11 @@ class BiliDbStoreImplement(AbstractStore): Args: contact_item: contact item dict """ - up_id = contact_item.get("up_id") - fan_id = contact_item.get("fan_id") + up_id = int(contact_item.get("up_id")) + fan_id = int(contact_item.get("fan_id")) + contact_item["up_id"] = up_id + contact_item["fan_id"] = fan_id + async with get_session() as session: result = await session.execute( select(BilibiliContactInfo).where(BilibiliContactInfo.up_id == up_id, BilibiliContactInfo.fan_id == fan_id) @@ -198,9 +225,11 @@ class BiliDbStoreImplement(AbstractStore): if not contact_detail: contact_item["add_ts"] = utils.get_current_timestamp() + contact_item["last_modify_ts"] = utils.get_current_timestamp() new_contact = BilibiliContactInfo(**contact_item) session.add(new_contact) else: + contact_item["last_modify_ts"] = utils.get_current_timestamp() for key, value in contact_item.items(): setattr(contact_detail, key, value) await session.commit() @@ -211,16 +240,20 @@ class BiliDbStoreImplement(AbstractStore): Args: dynamic_item: dynamic item dict """ - dynamic_id = dynamic_item.get("dynamic_id") + dynamic_id = int(dynamic_item.get("dynamic_id")) + dynamic_item["dynamic_id"] = dynamic_id + async with get_session() as session: result = await session.execute(select(BilibiliUpDynamic).where(BilibiliUpDynamic.dynamic_id == dynamic_id)) dynamic_detail = result.scalar_one_or_none() if not dynamic_detail: dynamic_item["add_ts"] = utils.get_current_timestamp() + dynamic_item["last_modify_ts"] = utils.get_current_timestamp() new_dynamic = BilibiliUpDynamic(**dynamic_item) session.add(new_dynamic) else: + dynamic_item["last_modify_ts"] = utils.get_current_timestamp() for key, value in dynamic_item.items(): setattr(dynamic_detail, key, value) await session.commit() diff --git a/store/douyin/__init__.py b/store/douyin/__init__.py index ef001c3..cb8f774 100644 --- a/store/douyin/__init__.py +++ b/store/douyin/__init__.py @@ -34,6 +34,7 @@ class DouyinStoreFactory: STORES = { "csv": DouyinCsvStoreImplement, "db": DouyinDbStoreImplement, + "postgres": DouyinDbStoreImplement, "json": DouyinJsonStoreImplement, "sqlite": DouyinSqliteStoreImplement, "mongodb": DouyinMongoStoreImplement, diff --git a/store/douyin/_store_impl.py b/store/douyin/_store_impl.py index 71b8584..19022b0 100644 --- a/store/douyin/_store_impl.py +++ b/store/douyin/_store_impl.py @@ -97,7 +97,7 @@ class DouyinDbStoreImplement(AbstractStore): Args: content_item: content item dict """ - aweme_id = content_item.get("aweme_id") + aweme_id = int(content_item.get("aweme_id")) async with get_session() as session: result = await session.execute(select(DouyinAweme).where(DouyinAweme.aweme_id == aweme_id)) aweme_detail = result.scalar_one_or_none() @@ -118,7 +118,7 @@ class DouyinDbStoreImplement(AbstractStore): Args: comment_item: comment item dict """ - comment_id = comment_item.get("comment_id") + comment_id = int(comment_item.get("comment_id")) async with get_session() as session: result = await session.execute(select(DouyinAwemeComment).where(DouyinAwemeComment.comment_id == comment_id)) comment_detail = result.scalar_one_or_none() diff --git a/store/kuaishou/__init__.py b/store/kuaishou/__init__.py index 3c6ef48..dc2f111 100644 --- a/store/kuaishou/__init__.py +++ b/store/kuaishou/__init__.py @@ -34,6 +34,7 @@ class KuaishouStoreFactory: STORES = { "csv": KuaishouCsvStoreImplement, "db": KuaishouDbStoreImplement, + "postgres": KuaishouDbStoreImplement, "json": KuaishouJsonStoreImplement, "sqlite": KuaishouSqliteStoreImplement, "mongodb": KuaishouMongoStoreImplement, diff --git a/store/kuaishou/_store_impl.py b/store/kuaishou/_store_impl.py index 22fb1c3..9b5209b 100644 --- a/store/kuaishou/_store_impl.py +++ b/store/kuaishou/_store_impl.py @@ -109,7 +109,8 @@ class KuaishouDbStoreImplement(AbstractStore): session.add(new_content) else: for key, value in content_item.items(): - setattr(video_detail, key, value) + if hasattr(video_detail, key): + setattr(video_detail, key, value) await session.commit() async def store_comment(self, comment_item: Dict): @@ -130,7 +131,8 @@ class KuaishouDbStoreImplement(AbstractStore): session.add(new_comment) else: for key, value in comment_item.items(): - setattr(comment_detail, key, value) + if hasattr(comment_detail, key): + setattr(comment_detail, key, value) await session.commit() diff --git a/store/tieba/__init__.py b/store/tieba/__init__.py index 349baef..7850287 100644 --- a/store/tieba/__init__.py +++ b/store/tieba/__init__.py @@ -31,6 +31,7 @@ class TieBaStoreFactory: STORES = { "csv": TieBaCsvStoreImplement, "db": TieBaDbStoreImplement, + "postgres": TieBaDbStoreImplement, "json": TieBaJsonStoreImplement, "sqlite": TieBaSqliteStoreImplement, "mongodb": TieBaMongoStoreImplement, diff --git a/store/weibo/__init__.py b/store/weibo/__init__.py index 90efb09..1e09d67 100644 --- a/store/weibo/__init__.py +++ b/store/weibo/__init__.py @@ -35,6 +35,7 @@ class WeibostoreFactory: STORES = { "csv": WeiboCsvStoreImplement, "db": WeiboDbStoreImplement, + "postgres": WeiboDbStoreImplement, "json": WeiboJsonStoreImplement, "sqlite": WeiboSqliteStoreImplement, "mongodb": WeiboMongoStoreImplement, diff --git a/store/weibo/_store_impl.py b/store/weibo/_store_impl.py index bbda929..44a0754 100644 --- a/store/weibo/_store_impl.py +++ b/store/weibo/_store_impl.py @@ -108,7 +108,8 @@ class WeiboDbStoreImplement(AbstractStore): Returns: """ - note_id = content_item.get("note_id") + note_id = int(content_item.get("note_id")) + content_item["note_id"] = note_id async with get_session() as session: stmt = select(WeiboNote).where(WeiboNote.note_id == note_id) res = await session.execute(stmt) @@ -134,7 +135,14 @@ class WeiboDbStoreImplement(AbstractStore): Returns: """ - comment_id = comment_item.get("comment_id") + comment_id = int(comment_item.get("comment_id")) + comment_item["comment_id"] = comment_id + comment_item["note_id"] = int(comment_item.get("note_id", 0) or 0) + comment_item["create_time"] = int(comment_item.get("create_time", 0) or 0) + comment_item["comment_like_count"] = str(comment_item.get("comment_like_count", "0")) + comment_item["sub_comment_count"] = str(comment_item.get("sub_comment_count", "0")) + comment_item["parent_comment_id"] = str(comment_item.get("parent_comment_id", "0")) + async with get_session() as session: stmt = select(WeiboNoteComment).where(WeiboNoteComment.comment_id == comment_id) res = await session.execute(stmt) @@ -160,7 +168,8 @@ class WeiboDbStoreImplement(AbstractStore): Returns: """ - user_id = creator.get("user_id") + user_id = int(creator.get("user_id")) + creator["user_id"] = user_id async with get_session() as session: stmt = select(WeiboCreator).where(WeiboCreator.user_id == user_id) res = await session.execute(stmt) diff --git a/store/xhs/__init__.py b/store/xhs/__init__.py index 08e9850..32f4eaf 100644 --- a/store/xhs/__init__.py +++ b/store/xhs/__init__.py @@ -34,6 +34,7 @@ class XhsStoreFactory: STORES = { "csv": XhsCsvStoreImplement, "db": XhsDbStoreImplement, + "postgres": XhsDbStoreImplement, "json": XhsJsonStoreImplement, "sqlite": XhsSqliteStoreImplement, "mongodb": XhsMongoStoreImplement, diff --git a/store/xhs/_store_impl.py b/store/xhs/_store_impl.py index 3b8d6d1..5eac9af 100644 --- a/store/xhs/_store_impl.py +++ b/store/xhs/_store_impl.py @@ -189,9 +189,9 @@ class XhsDbStoreImplement(AbstractStore): create_time=comment_item.get("create_time"), note_id=comment_item.get("note_id"), content=comment_item.get("content"), - sub_comment_count=comment_item.get("sub_comment_count"), + sub_comment_count=int(comment_item.get("sub_comment_count", 0) or 0), pictures=json.dumps(comment_item.get("pictures")), - parent_comment_id=comment_item.get("parent_comment_id"), + parent_comment_id=str(comment_item.get("parent_comment_id", "")), like_count=str(comment_item.get("like_count")) ) session.add(comment) @@ -202,7 +202,7 @@ class XhsDbStoreImplement(AbstractStore): update_data = { "last_modify_ts": last_modify_ts, "like_count": str(comment_item.get("like_count")), - "sub_comment_count": comment_item.get("sub_comment_count"), + "sub_comment_count": int(comment_item.get("sub_comment_count", 0) or 0), } stmt = update(XhsNoteComment).where(XhsNoteComment.comment_id == comment_id).values(**update_data) await session.execute(stmt) diff --git a/store/zhihu/__init__.py b/store/zhihu/__init__.py index f4a0acc..593085d 100644 --- a/store/zhihu/__init__.py +++ b/store/zhihu/__init__.py @@ -38,6 +38,7 @@ class ZhihuStoreFactory: STORES = { "csv": ZhihuCsvStoreImplement, "db": ZhihuDbStoreImplement, + "postgres": ZhihuDbStoreImplement, "json": ZhihuJsonStoreImplement, "sqlite": ZhihuSqliteStoreImplement, "mongodb": ZhihuMongoStoreImplement, diff --git a/store/zhihu/_store_impl.py b/store/zhihu/_store_impl.py index 2b672ce..cd01552 100644 --- a/store/zhihu/_store_impl.py +++ b/store/zhihu/_store_impl.py @@ -110,7 +110,8 @@ class ZhihuDbStoreImplement(AbstractStore): existing_content = result.scalars().first() if existing_content: for key, value in content_item.items(): - setattr(existing_content, key, value) + if hasattr(existing_content, key): + setattr(existing_content, key, value) else: new_content = ZhihuContent(**content_item) session.add(new_content) @@ -129,7 +130,8 @@ class ZhihuDbStoreImplement(AbstractStore): existing_comment = result.scalars().first() if existing_comment: for key, value in comment_item.items(): - setattr(existing_comment, key, value) + if hasattr(existing_comment, key): + setattr(existing_comment, key, value) else: new_comment = ZhihuComment(**comment_item) session.add(new_comment) @@ -148,7 +150,8 @@ class ZhihuDbStoreImplement(AbstractStore): existing_creator = result.scalars().first() if existing_creator: for key, value in creator.items(): - setattr(existing_creator, key, value) + if hasattr(existing_creator, key): + setattr(existing_creator, key, value) else: new_creator = ZhihuCreator(**creator) session.add(new_creator) diff --git a/tools/browser_launcher.py b/tools/browser_launcher.py index 827e72c..6d272bd 100644 --- a/tools/browser_launcher.py +++ b/tools/browser_launcher.py @@ -228,7 +228,7 @@ class BrowserLauncher: # Try to get version info try: result = subprocess.run([browser_path, "--version"], - capture_output=True, text=True, timeout=5) + capture_output=True, text=True, encoding='utf-8', errors='ignore', timeout=5) version = result.stdout.strip() if result.stdout else "Unknown Version" except: version = "Unknown Version" @@ -266,6 +266,8 @@ class BrowserLauncher: ["taskkill", "/F", "/T", "/PID", str(process.pid)], capture_output=True, check=False, + encoding='utf-8', + errors='ignore' ) process.wait(timeout=5) else: diff --git a/uv.lock b/uv.lock index 2432593..479f77f 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '3.12' and sys_platform == 'darwin'", @@ -113,6 +112,54 @@ wheels = [ { url = "https://pypi.tuna.tsinghua.edu.cn/packages/94/08/7de4f4a17196c355e4706ceba0ab60627541c78011881a7c69f41c6414c5/asyncmy-0.2.10-cp312-cp312-win_amd64.whl", hash = "sha256:4c6674073be97ffb7ac7f909e803008b23e50281131fef4e30b7b2162141a574", size = 1679064 }, ] +[[package]] +name = "asyncpg" +version = "0.31.0" +source = { registry = "https://pypi.tuna.tsinghua.edu.cn/simple" } +sdist = { url = "https://pypi.tuna.tsinghua.edu.cn/packages/fe/cc/d18065ce2380d80b1bcce927c24a2642efd38918e33fd724bc4bca904877/asyncpg-0.31.0.tar.gz", hash = "sha256:c989386c83940bfbd787180f2b1519415e2d3d6277a70d9d0f0145ac73500735", size = 993667 } +wheels = [ + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/17/cc02bc49bc350623d050fa139e34ea512cd6e020562f2a7312a7bcae4bc9/asyncpg-0.31.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:eee690960e8ab85063ba93af2ce128c0f52fd655fdff9fdb1a28df01329f031d", size = 643159 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a4/62/4ded7d400a7b651adf06f49ea8f73100cca07c6df012119594d1e3447aa6/asyncpg-0.31.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2657204552b75f8288de08ca60faf4a99a65deef3a71d1467454123205a88fab", size = 638157 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d6/5b/4179538a9a72166a0bf60ad783b1ef16efb7960e4d7b9afe9f77a5551680/asyncpg-0.31.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a429e842a3a4b4ea240ea52d7fe3f82d5149853249306f7ff166cb9948faa46c", size = 2918051 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/e6/35/c27719ae0536c5b6e61e4701391ffe435ef59539e9360959240d6e47c8c8/asyncpg-0.31.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0807be46c32c963ae40d329b3a686356e417f674c976c07fa49f1b30303f109", size = 2972640 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/43/f4/01ebb9207f29e645a64699b9ce0eefeff8e7a33494e1d29bb53736f7766b/asyncpg-0.31.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e5d5098f63beeae93512ee513d4c0c53dc12e9aa2b7a1af5a81cddf93fe4e4da", size = 2851050 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3e/f4/03ff1426acc87be0f4e8d40fa2bff5c3952bef0080062af9efc2212e3be8/asyncpg-0.31.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:37fc6c00a814e18eef51833545d1891cac9aa69140598bb076b4cd29b3e010b9", size = 2962574 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/c7/39/cc788dfca3d4060f9d93e67be396ceec458dfc429e26139059e58c2c244d/asyncpg-0.31.0-cp311-cp311-win32.whl", hash = "sha256:5a4af56edf82a701aece93190cc4e094d2df7d33f6e915c222fb09efbb5afc24", size = 521076 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/28/fc/735af5384c029eb7f1ca60ccb8fa95521dbdaeef788edf4cecfc604c3cab/asyncpg-0.31.0-cp311-cp311-win_amd64.whl", hash = "sha256:480c4befbdf079c14c9ca43c8c5e1fe8b6296c96f1f927158d4f1e750aacc047", size = 584980 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2a/a6/59d0a146e61d20e18db7396583242e32e0f120693b67a8de43f1557033e2/asyncpg-0.31.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b44c31e1efc1c15188ef183f287c728e2046abb1d26af4d20858215d50d91fad", size = 662042 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/36/01/ffaa189dcb63a2471720615e60185c3f6327716fdc0fc04334436fbb7c65/asyncpg-0.31.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0c89ccf741c067614c9b5fc7f1fc6f3b61ab05ae4aaa966e6fd6b93097c7d20d", size = 638504 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/9f/62/3f699ba45d8bd24c5d65392190d19656d74ff0185f42e19d0bbd973bb371/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:12b3b2e39dc5470abd5e98c8d3373e4b1d1234d9fbdedf538798b2c13c64460a", size = 3426241 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/8c/d1/a867c2150f9c6e7af6462637f613ba67f78a314b00db220cd26ff559d532/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:aad7a33913fb8bcb5454313377cc330fbb19a0cd5faa7272407d8a0c4257b671", size = 3520321 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7a/1a/cce4c3f246805ecd285a3591222a2611141f1669d002163abef999b60f98/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3df118d94f46d85b2e434fd62c84cb66d5834d5a890725fe625f498e72e4d5ec", size = 3316685 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/40/ae/0fc961179e78cc579e138fad6eb580448ecae64908f95b8cb8ee2f241f67/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5b6efff3c17c3202d4b37189969acf8927438a238c6257f66be3c426beba20", size = 3471858 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/52/b2/b20e09670be031afa4cbfabd645caece7f85ec62d69c312239de568e058e/asyncpg-0.31.0-cp312-cp312-win32.whl", hash = "sha256:027eaa61361ec735926566f995d959ade4796f6a49d3bde17e5134b9964f9ba8", size = 527852 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/b5/f0/f2ed1de154e15b107dc692262395b3c17fc34eafe2a78fc2115931561730/asyncpg-0.31.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d6bdcbc93d608a1158f17932de2321f68b1a967a13e014998db87a72ed3186", size = 597175 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/95/11/97b5c2af72a5d0b9bc3fa30cd4b9ce22284a9a943a150fdc768763caf035/asyncpg-0.31.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c204fab1b91e08b0f47e90a75d1b3c62174dab21f670ad6c5d0f243a228f015b", size = 661111 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/1b/71/157d611c791a5e2d0423f09f027bd499935f0906e0c2a416ce712ba51ef3/asyncpg-0.31.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:54a64f91839ba59008eccf7aad2e93d6e3de688d796f35803235ea1c4898ae1e", size = 636928 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/2e/fc/9e3486fb2bbe69d4a867c0b76d68542650a7ff1574ca40e84c3111bb0c6e/asyncpg-0.31.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e0822b1038dc7253b337b0f3f676cadc4ac31b126c5d42691c39691962e403", size = 3424067 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/12/c6/8c9d076f73f07f995013c791e018a1cd5f31823c2a3187fc8581706aa00f/asyncpg-0.31.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bef056aa502ee34204c161c72ca1f3c274917596877f825968368b2c33f585f4", size = 3518156 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ae/3b/60683a0baf50fbc546499cfb53132cb6835b92b529a05f6a81471ab60d0c/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0bfbcc5b7ffcd9b75ab1558f00db2ae07db9c80637ad1b2469c43df79d7a5ae2", size = 3319636 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/50/dc/8487df0f69bd398a61e1792b3cba0e47477f214eff085ba0efa7eac9ce87/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22bc525ebbdc24d1261ecbf6f504998244d4e3be1721784b5f64664d61fbe602", size = 3472079 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/a1/c5bbeeb8531c05c89135cb8b28575ac2fac618bcb60119ee9696c3faf71c/asyncpg-0.31.0-cp313-cp313-win32.whl", hash = "sha256:f890de5e1e4f7e14023619399a471ce4b71f5418cd67a51853b9910fdfa73696", size = 527606 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/91/66/b25ccb84a246b470eb943b0107c07edcae51804912b824054b3413995a10/asyncpg-0.31.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc5f2fa9916f292e5c5c8b2ac2813763bcd7f58e130055b4ad8a0531314201ab", size = 596569 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/36/e9450d62e84a13aea6580c83a47a437f26c7ca6fa0f0fd40b6670793ea30/asyncpg-0.31.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6b56b91bb0ffc328c4e3ed113136cddd9deefdf5f79ab448598b9772831df44", size = 660867 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/82/4b/1d0a2b33b3102d210439338e1beea616a6122267c0df459ff0265cd5807a/asyncpg-0.31.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:334dec28cf20d7f5bb9e45b39546ddf247f8042a690bff9b9573d00086e69cb5", size = 638349 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/41/aa/e7f7ac9a7974f08eff9183e392b2d62516f90412686532d27e196c0f0eeb/asyncpg-0.31.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98cc158c53f46de7bb677fd20c417e264fc02b36d901cc2a43bd6cb0dc6dbfd2", size = 3410428 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6f/de/bf1b60de3dede5c2731e6788617a512bc0ebd9693eac297ee74086f101d7/asyncpg-0.31.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9322b563e2661a52e3cdbc93eed3be7748b289f792e0011cb2720d278b366ce2", size = 3471678 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/46/78/fc3ade003e22d8bd53aaf8f75f4be48f0b460fa73738f0391b9c856a9147/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19857a358fc811d82227449b7ca40afb46e75b33eb8897240c3839dd8b744218", size = 3313505 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/bf/e9/73eb8a6789e927816f4705291be21f2225687bfa97321e40cd23055e903a/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5f8886e850882ff2c2ace5732300e99193823e8107e2c53ef01c1ebfa1e85d", size = 3434744 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/08/4b/f10b880534413c65c5b5862f79b8e81553a8f364e5238832ad4c0af71b7f/asyncpg-0.31.0-cp314-cp314-win32.whl", hash = "sha256:cea3a0b2a14f95834cee29432e4ddc399b95700eb1d51bbc5bfee8f31fa07b2b", size = 532251 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/d3/2d/7aa40750b7a19efa5d66e67fc06008ca0f27ba1bd082e457ad82f59aba49/asyncpg-0.31.0-cp314-cp314-win_amd64.whl", hash = "sha256:04d19392716af6b029411a0264d92093b6e5e8285ae97a39957b9a9c14ea72be", size = 604901 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ce/fe/b9dfe349b83b9dee28cc42360d2c86b2cdce4cb551a2c2d27e156bcac84d/asyncpg-0.31.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bdb957706da132e982cc6856bb2f7b740603472b54c3ebc77fe60ea3e57e1bd2", size = 702280 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/6a/81/e6be6e37e560bd91e6c23ea8a6138a04fd057b08cf63d3c5055c98e81c1d/asyncpg-0.31.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6d11b198111a72f47154fa03b85799f9be63701e068b43f84ac25da0bda9cb31", size = 682931 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/a6/45/6009040da85a1648dd5bc75b3b0a062081c483e75a1a29041ae63a0bf0dc/asyncpg-0.31.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18c83b03bc0d1b23e6230f5bf8d4f217dc9bc08644ce0502a9d91dc9e634a9c7", size = 3581608 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7e/06/2e3d4d7608b0b2b3adbee0d0bd6a2d29ca0fc4d8a78f8277df04e2d1fd7b/asyncpg-0.31.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e009abc333464ff18b8f6fd146addffd9aaf63e79aa3bb40ab7a4c332d0c5e9e", size = 3498738 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/7d/aa/7d75ede780033141c51d83577ea23236ba7d3a23593929b32b49db8ed36e/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3b1fbcb0e396a5ca435a8826a87e5c2c2cc0c8c68eb6fadf82168056b0e53a8c", size = 3401026 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/ba/7a/15e37d45e7f7c94facc1e9148c0e455e8f33c08f0b8a0b1deb2c5171771b/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8df714dba348efcc162d2adf02d213e5fab1bd9f557e1305633e851a61814a7a", size = 3429426 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/13/d5/71437c5f6ae5f307828710efbe62163974e71237d5d46ebd2869ea052d10/asyncpg-0.31.0-cp314-cp314t-win32.whl", hash = "sha256:1b41f1afb1033f2b44f3234993b15096ddc9cd71b21a42dbd87fc6a57b43d65d", size = 614495 }, + { url = "https://pypi.tuna.tsinghua.edu.cn/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062 }, +] + [[package]] name = "certifi" version = "2025.6.15" @@ -788,6 +835,7 @@ dependencies = [ { name = "aiosqlite" }, { name = "alembic" }, { name = "asyncmy" }, + { name = "asyncpg" }, { name = "cryptography" }, { name = "fastapi" }, { name = "httpx" }, @@ -824,6 +872,7 @@ requires-dist = [ { name = "aiosqlite", specifier = ">=0.21.0" }, { name = "alembic", specifier = ">=1.16.5" }, { name = "asyncmy", specifier = ">=0.2.10" }, + { name = "asyncpg", specifier = ">=0.31.0" }, { name = "cryptography", specifier = ">=45.0.7" }, { name = "fastapi", specifier = "==0.110.2" }, { name = "httpx", specifier = "==0.28.1" },