fix(数据库): 修复模型字段类型以支持更广泛的数据格式;

修复xhs评论存储方法,从批量处理改为单条处理
This commit is contained in:
persist-1
2025-09-07 04:10:49 +08:00
parent b04f5bcd6f
commit 684a16ed9a
3 changed files with 32 additions and 33 deletions

View File

@@ -161,12 +161,12 @@ class DyCreator(Base):
class KuaishouVideo(Base): class KuaishouVideo(Base):
__tablename__ = 'kuaishou_video' __tablename__ = 'kuaishou_video'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
user_id = Column(String(255)) user_id = Column(BigInteger)
nickname = Column(Text) nickname = Column(Text)
avatar = Column(Text) avatar = Column(Text)
add_ts = Column(BigInteger) add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger) last_modify_ts = Column(BigInteger)
video_id = Column(BigInteger, index=True) video_id = Column(String(255), index=True)
video_type = Column(Text) video_type = Column(Text)
title = Column(Text) title = Column(Text)
desc = Column(Text) desc = Column(Text)
@@ -181,13 +181,13 @@ class KuaishouVideo(Base):
class KuaishouVideoComment(Base): class KuaishouVideoComment(Base):
__tablename__ = 'kuaishou_video_comment' __tablename__ = 'kuaishou_video_comment'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
user_id = Column(Integer) user_id = Column(Text)
nickname = Column(Text) nickname = Column(Text)
avatar = Column(Text) avatar = Column(Text)
add_ts = Column(BigInteger) add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger) last_modify_ts = Column(BigInteger)
comment_id = Column(BigInteger, index=True) comment_id = Column(BigInteger, index=True)
video_id = Column(BigInteger, index=True) video_id = Column(String(255), index=True)
content = Column(Text) content = Column(Text)
create_time = Column(BigInteger) create_time = Column(BigInteger)
sub_comment_count = Column(Text) sub_comment_count = Column(Text)
@@ -206,7 +206,7 @@ class WeiboNote(Base):
note_id = Column(BigInteger, index=True) note_id = Column(BigInteger, index=True)
content = Column(Text) content = Column(Text)
create_time = Column(BigInteger, index=True) create_time = Column(BigInteger, index=True)
create_date_time = Column(BigInteger, index=True) create_date_time = Column(String(255), index=True)
liked_count = Column(Text) liked_count = Column(Text)
comments_count = Column(Text) comments_count = Column(Text)
shared_count = Column(Text) shared_count = Column(Text)
@@ -228,7 +228,7 @@ class WeiboNoteComment(Base):
note_id = Column(BigInteger, index=True) note_id = Column(BigInteger, index=True)
content = Column(Text) content = Column(Text)
create_time = Column(BigInteger) create_time = Column(BigInteger)
create_date_time = Column(BigInteger, index=True) create_date_time = Column(String(255), index=True)
comment_like_count = Column(Text) comment_like_count = Column(Text)
sub_comment_count = Column(Text) sub_comment_count = Column(Text)
parent_comment_id = Column(String(255)) parent_comment_id = Column(String(255))
@@ -273,7 +273,7 @@ class XhsNote(Base):
ip_location = Column(Text) ip_location = Column(Text)
add_ts = Column(BigInteger) add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger) last_modify_ts = Column(BigInteger)
note_id = Column(BigInteger, index=True) note_id = Column(String(255), index=True)
type = Column(Text) type = Column(Text)
title = Column(Text) title = Column(Text)
desc = Column(Text) desc = Column(Text)
@@ -299,7 +299,7 @@ class XhsNoteComment(Base):
ip_location = Column(Text) ip_location = Column(Text)
add_ts = Column(BigInteger) add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger) last_modify_ts = Column(BigInteger)
comment_id = Column(BigInteger, index=True) comment_id = Column(String(255), index=True)
create_time = Column(BigInteger, index=True) create_time = Column(BigInteger, index=True)
note_id = Column(String(255)) note_id = Column(String(255))
content = Column(Text) content = Column(Text)
@@ -311,15 +311,15 @@ class XhsNoteComment(Base):
class TiebaNote(Base): class TiebaNote(Base):
__tablename__ = 'tieba_note' __tablename__ = 'tieba_note'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
note_id = Column(BigInteger, index=True) note_id = Column(String(644), index=True)
title = Column(Text) title = Column(Text)
desc = Column(Text) desc = Column(Text)
note_url = Column(Text) note_url = Column(Text)
publish_time = Column(BigInteger, index=True) publish_time = Column(String(255), index=True)
user_link = Column(Text, default='') user_link = Column(Text, default='')
user_nickname = Column(Text, default='') user_nickname = Column(Text, default='')
user_avatar = Column(Text, default='') user_avatar = Column(Text, default='')
tieba_id = Column(Integer, default='') tieba_id = Column(String(255), default='')
tieba_name = Column(Text) tieba_name = Column(Text)
tieba_link = Column(Text) tieba_link = Column(Text)
total_replay_num = Column(Integer, default=0) total_replay_num = Column(Integer, default=0)
@@ -332,7 +332,7 @@ class TiebaNote(Base):
class TiebaComment(Base): class TiebaComment(Base):
__tablename__ = 'tieba_comment' __tablename__ = 'tieba_comment'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
comment_id = Column(BigInteger, index=True) comment_id = Column(String(255), index=True)
parent_comment_id = Column(String(255), default='') parent_comment_id = Column(String(255), default='')
content = Column(Text) content = Column(Text)
user_link = Column(Text, default='') user_link = Column(Text, default='')
@@ -341,10 +341,10 @@ class TiebaComment(Base):
tieba_id = Column(String(255), default='') tieba_id = Column(String(255), default='')
tieba_name = Column(Text) tieba_name = Column(Text)
tieba_link = Column(Text) tieba_link = Column(Text)
publish_time = Column(BigInteger, index=True) publish_time = Column(String(255), index=True)
ip_location = Column(Text, default='') ip_location = Column(Text, default='')
sub_comment_count = Column(Integer, default=0) sub_comment_count = Column(Integer, default=0)
note_id = Column(BigInteger, index=True) note_id = Column(String(255), index=True)
note_url = Column(Text) note_url = Column(Text)
add_ts = Column(BigInteger) add_ts = Column(BigInteger)
last_modify_ts = Column(BigInteger) last_modify_ts = Column(BigInteger)
@@ -352,7 +352,7 @@ class TiebaComment(Base):
class TiebaCreator(Base): class TiebaCreator(Base):
__tablename__ = 'tieba_creator' __tablename__ = 'tieba_creator'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
user_id = Column(String(255)) user_id = Column(String(64))
user_name = Column(Text) user_name = Column(Text)
nickname = Column(Text) nickname = Column(Text)
avatar = Column(Text) avatar = Column(Text)
@@ -367,14 +367,14 @@ class TiebaCreator(Base):
class ZhihuContent(Base): class ZhihuContent(Base):
__tablename__ = 'zhihu_content' __tablename__ = 'zhihu_content'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
content_id = Column(BigInteger, index=True) content_id = Column(String(64), index=True)
content_type = Column(Text) content_type = Column(Text)
content_text = Column(Text) content_text = Column(Text)
content_url = Column(Text) content_url = Column(Text)
question_id = Column(String(255)) question_id = Column(String(255))
title = Column(Text) title = Column(Text)
desc = Column(Text) desc = Column(Text)
created_time = Column(BigInteger, index=True) created_time = Column(String(32), index=True)
updated_time = Column(Text) updated_time = Column(Text)
voteup_count = Column(Integer, default=0) voteup_count = Column(Integer, default=0)
comment_count = Column(Integer, default=0) comment_count = Column(Integer, default=0)
@@ -395,17 +395,17 @@ class ZhihuContent(Base):
class ZhihuComment(Base): class ZhihuComment(Base):
__tablename__ = 'zhihu_comment' __tablename__ = 'zhihu_comment'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
comment_id = Column(BigInteger, index=True) comment_id = Column(String(64), index=True)
parent_comment_id = Column(String(255)) parent_comment_id = Column(String(64))
content = Column(Text) content = Column(Text)
publish_time = Column(BigInteger, index=True) publish_time = Column(String(32), index=True)
ip_location = Column(Text) ip_location = Column(Text)
sub_comment_count = Column(Integer, default=0) sub_comment_count = Column(Integer, default=0)
like_count = Column(Integer, default=0) like_count = Column(Integer, default=0)
dislike_count = Column(Integer, default=0) dislike_count = Column(Integer, default=0)
content_id = Column(BigInteger, index=True) content_id = Column(String(64), index=True)
content_type = Column(Text) content_type = Column(Text)
user_id = Column(String(255)) user_id = Column(String(64))
user_link = Column(Text) user_link = Column(Text)
user_nickname = Column(Text) user_nickname = Column(Text)
user_avatar = Column(Text) user_avatar = Column(Text)
@@ -415,7 +415,7 @@ class ZhihuComment(Base):
class ZhihuCreator(Base): class ZhihuCreator(Base):
__tablename__ = 'zhihu_creator' __tablename__ = 'zhihu_creator'
id = Column(Integer, primary_key=True) id = Column(Integer, primary_key=True)
user_id = Column(Integer, unique=True, index=True) user_id = Column(String(64), unique=True, index=True)
user_link = Column(Text) user_link = Column(Text)
user_nickname = Column(Text) user_nickname = Column(Text)
user_avatar = Column(Text) user_avatar = Column(Text)

View File

Binary file not shown.

View File

@@ -143,14 +143,13 @@ class XhsDbStoreImplement(AbstractStore):
result = await session.execute(stmt) result = await session.execute(stmt)
return result.first() is not None return result.first() is not None
async def store_comments(self, comments: List[Dict]): async def store_comment(self, comment_item: Dict):
if not comments: if not comment_item:
return return
async with get_session() as session: async with get_session() as session:
for comment_item in comments:
comment_id = comment_item.get("comment_id") comment_id = comment_item.get("comment_id")
if not comment_id: if not comment_id:
continue return
if await self.comment_is_exist(session, comment_id): if await self.comment_is_exist(session, comment_id):
await self.update_comment(session, comment_item) await self.update_comment(session, comment_item)
else: else: