mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 19:37:25 +08:00
feat: db数据存储重构完成
This commit is contained in:
@@ -82,20 +82,19 @@ class DouyinDbStoreImplement(AbstractStore):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
from .douyin_store_db_types import DouyinAweme
|
||||
|
||||
from .douyin_store_sql import (add_new_content,
|
||||
query_content_by_content_id,
|
||||
update_content_by_content_id)
|
||||
aweme_id = content_item.get("aweme_id")
|
||||
if not await DouyinAweme.filter(aweme_id=aweme_id).exists():
|
||||
aweme_detail: Dict = await query_content_by_content_id(content_id=aweme_id)
|
||||
if not aweme_detail:
|
||||
content_item["add_ts"] = utils.get_current_timestamp()
|
||||
douyin_aweme_pydantic = pydantic_model_creator(DouyinAweme, name='DouyinAwemeCreate', exclude=('id',))
|
||||
douyin_data = douyin_aweme_pydantic(**content_item)
|
||||
douyin_aweme_pydantic.model_validate(douyin_data)
|
||||
await DouyinAweme.create(**douyin_data.dict())
|
||||
if aweme_detail.get("title"):
|
||||
await add_new_content(content_item)
|
||||
else:
|
||||
douyin_aweme_pydantic = pydantic_model_creator(DouyinAweme, name='DouyinAwemeUpdate',
|
||||
exclude=('id', 'add_ts'))
|
||||
douyin_data = douyin_aweme_pydantic(**content_item)
|
||||
douyin_aweme_pydantic.model_validate(douyin_data)
|
||||
await DouyinAweme.filter(aweme_id=aweme_id).update(**douyin_data.model_dump())
|
||||
await update_content_by_content_id(aweme_id, content_item=content_item)
|
||||
|
||||
|
||||
async def store_comment(self, comment_item: Dict):
|
||||
"""
|
||||
@@ -106,21 +105,16 @@ class DouyinDbStoreImplement(AbstractStore):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
from .douyin_store_db_types import DouyinAwemeComment
|
||||
from .douyin_store_sql import (add_new_comment,
|
||||
query_comment_by_comment_id,
|
||||
update_comment_by_comment_id)
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not await DouyinAwemeComment.filter(comment_id=comment_id).exists():
|
||||
comment_detail: Dict = await query_comment_by_comment_id(comment_id=comment_id)
|
||||
if not comment_detail:
|
||||
comment_item["add_ts"] = utils.get_current_timestamp()
|
||||
comment_pydantic = pydantic_model_creator(DouyinAwemeComment, name='DouyinAwemeCommentCreate',
|
||||
exclude=('id',))
|
||||
comment_data = comment_pydantic(**comment_item)
|
||||
comment_pydantic.model_validate(comment_data)
|
||||
await DouyinAwemeComment.create(**comment_data.model_dump())
|
||||
await add_new_comment(comment_item)
|
||||
else:
|
||||
comment_pydantic = pydantic_model_creator(DouyinAwemeComment, name='DouyinAwemeCommentUpdate',
|
||||
exclude=('id', 'add_ts'))
|
||||
comment_data = comment_pydantic(**comment_item)
|
||||
comment_pydantic.model_validate(comment_data)
|
||||
await DouyinAwemeComment.filter(comment_id=comment_id).update(**comment_data.model_dump())
|
||||
await update_comment_by_comment_id(comment_id, comment_item=comment_item)
|
||||
|
||||
|
||||
class DouyinJsonStoreImplement(AbstractStore):
|
||||
|
||||
102
store/douyin/douyin_store_sql.py
Normal file
102
store/douyin/douyin_store_sql.py
Normal file
@@ -0,0 +1,102 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Author : relakkes@gmail.com
|
||||
# @Time : 2024/4/6 15:30
|
||||
# @Desc : sql接口集合
|
||||
|
||||
from typing import Dict, List
|
||||
|
||||
from db import AsyncMysqlDB
|
||||
from var import media_crawler_db_var
|
||||
|
||||
|
||||
async def query_content_by_content_id(content_id: str) -> Dict:
|
||||
"""
|
||||
查询一条内容记录(xhs的帖子 | 抖音的视频 | 微博 | 快手视频 ...)
|
||||
Args:
|
||||
content_id:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
sql: str = f"select * from douyin_aweme where aweme_id = '{content_id}'"
|
||||
rows: List[Dict] = await async_db_conn.query(sql)
|
||||
if len(rows) > 0:
|
||||
return rows[0]
|
||||
return dict()
|
||||
|
||||
|
||||
async def add_new_content(content_item: Dict) -> int:
|
||||
"""
|
||||
新增一条内容记录(xhs的帖子 | 抖音的视频 | 微博 | 快手视频 ...)
|
||||
Args:
|
||||
content_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
last_row_id: int = await async_db_conn.item_to_table("douyin_aweme", content_item)
|
||||
return last_row_id
|
||||
|
||||
|
||||
async def update_content_by_content_id(content_id: str, content_item: Dict) -> int:
|
||||
"""
|
||||
更新一条记录(xhs的帖子 | 抖音的视频 | 微博 | 快手视频 ...)
|
||||
Args:
|
||||
content_id:
|
||||
content_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
effect_row: int = await async_db_conn.update_table("douyin_aweme", content_item, "aweme_id", content_id)
|
||||
return effect_row
|
||||
|
||||
|
||||
|
||||
async def query_comment_by_comment_id(comment_id: str) -> Dict:
|
||||
"""
|
||||
查询一条评论内容
|
||||
Args:
|
||||
comment_id:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
sql: str = f"select * from douyin_aweme_comment where comment_id = '{comment_id}'"
|
||||
rows: List[Dict] = await async_db_conn.query(sql)
|
||||
if len(rows) > 0:
|
||||
return rows[0]
|
||||
return dict()
|
||||
|
||||
|
||||
async def add_new_comment(comment_item: Dict) -> int:
|
||||
"""
|
||||
新增一条评论记录
|
||||
Args:
|
||||
comment_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
last_row_id: int = await async_db_conn.item_to_table("douyin_aweme_comment", comment_item)
|
||||
return last_row_id
|
||||
|
||||
|
||||
async def update_comment_by_comment_id(comment_id: str, comment_item: Dict) -> int:
|
||||
"""
|
||||
更新增一条评论记录
|
||||
Args:
|
||||
comment_id:
|
||||
comment_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
effect_row: int = await async_db_conn.update_table("douyin_aweme_comment", comment_item, "comment_id", comment_id)
|
||||
return effect_row
|
||||
Reference in New Issue
Block a user