mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-10 03:47:26 +08:00
fix(数据库): 修复模型字段类型以支持更广泛的数据格式;
修复xhs评论存储方法,从批量处理改为单条处理
This commit is contained in:
@@ -161,12 +161,12 @@ class DyCreator(Base):
|
|||||||
class KuaishouVideo(Base):
|
class KuaishouVideo(Base):
|
||||||
__tablename__ = 'kuaishou_video'
|
__tablename__ = 'kuaishou_video'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
user_id = Column(String(255))
|
user_id = Column(BigInteger)
|
||||||
nickname = Column(Text)
|
nickname = Column(Text)
|
||||||
avatar = Column(Text)
|
avatar = Column(Text)
|
||||||
add_ts = Column(BigInteger)
|
add_ts = Column(BigInteger)
|
||||||
last_modify_ts = Column(BigInteger)
|
last_modify_ts = Column(BigInteger)
|
||||||
video_id = Column(BigInteger, index=True)
|
video_id = Column(String(255), index=True)
|
||||||
video_type = Column(Text)
|
video_type = Column(Text)
|
||||||
title = Column(Text)
|
title = Column(Text)
|
||||||
desc = Column(Text)
|
desc = Column(Text)
|
||||||
@@ -181,13 +181,13 @@ class KuaishouVideo(Base):
|
|||||||
class KuaishouVideoComment(Base):
|
class KuaishouVideoComment(Base):
|
||||||
__tablename__ = 'kuaishou_video_comment'
|
__tablename__ = 'kuaishou_video_comment'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
user_id = Column(Integer)
|
user_id = Column(Text)
|
||||||
nickname = Column(Text)
|
nickname = Column(Text)
|
||||||
avatar = Column(Text)
|
avatar = Column(Text)
|
||||||
add_ts = Column(BigInteger)
|
add_ts = Column(BigInteger)
|
||||||
last_modify_ts = Column(BigInteger)
|
last_modify_ts = Column(BigInteger)
|
||||||
comment_id = Column(BigInteger, index=True)
|
comment_id = Column(BigInteger, index=True)
|
||||||
video_id = Column(BigInteger, index=True)
|
video_id = Column(String(255), index=True)
|
||||||
content = Column(Text)
|
content = Column(Text)
|
||||||
create_time = Column(BigInteger)
|
create_time = Column(BigInteger)
|
||||||
sub_comment_count = Column(Text)
|
sub_comment_count = Column(Text)
|
||||||
@@ -206,7 +206,7 @@ class WeiboNote(Base):
|
|||||||
note_id = Column(BigInteger, index=True)
|
note_id = Column(BigInteger, index=True)
|
||||||
content = Column(Text)
|
content = Column(Text)
|
||||||
create_time = Column(BigInteger, index=True)
|
create_time = Column(BigInteger, index=True)
|
||||||
create_date_time = Column(BigInteger, index=True)
|
create_date_time = Column(String(255), index=True)
|
||||||
liked_count = Column(Text)
|
liked_count = Column(Text)
|
||||||
comments_count = Column(Text)
|
comments_count = Column(Text)
|
||||||
shared_count = Column(Text)
|
shared_count = Column(Text)
|
||||||
@@ -228,7 +228,7 @@ class WeiboNoteComment(Base):
|
|||||||
note_id = Column(BigInteger, index=True)
|
note_id = Column(BigInteger, index=True)
|
||||||
content = Column(Text)
|
content = Column(Text)
|
||||||
create_time = Column(BigInteger)
|
create_time = Column(BigInteger)
|
||||||
create_date_time = Column(BigInteger, index=True)
|
create_date_time = Column(String(255), index=True)
|
||||||
comment_like_count = Column(Text)
|
comment_like_count = Column(Text)
|
||||||
sub_comment_count = Column(Text)
|
sub_comment_count = Column(Text)
|
||||||
parent_comment_id = Column(String(255))
|
parent_comment_id = Column(String(255))
|
||||||
@@ -273,7 +273,7 @@ class XhsNote(Base):
|
|||||||
ip_location = Column(Text)
|
ip_location = Column(Text)
|
||||||
add_ts = Column(BigInteger)
|
add_ts = Column(BigInteger)
|
||||||
last_modify_ts = Column(BigInteger)
|
last_modify_ts = Column(BigInteger)
|
||||||
note_id = Column(BigInteger, index=True)
|
note_id = Column(String(255), index=True)
|
||||||
type = Column(Text)
|
type = Column(Text)
|
||||||
title = Column(Text)
|
title = Column(Text)
|
||||||
desc = Column(Text)
|
desc = Column(Text)
|
||||||
@@ -299,7 +299,7 @@ class XhsNoteComment(Base):
|
|||||||
ip_location = Column(Text)
|
ip_location = Column(Text)
|
||||||
add_ts = Column(BigInteger)
|
add_ts = Column(BigInteger)
|
||||||
last_modify_ts = Column(BigInteger)
|
last_modify_ts = Column(BigInteger)
|
||||||
comment_id = Column(BigInteger, index=True)
|
comment_id = Column(String(255), index=True)
|
||||||
create_time = Column(BigInteger, index=True)
|
create_time = Column(BigInteger, index=True)
|
||||||
note_id = Column(String(255))
|
note_id = Column(String(255))
|
||||||
content = Column(Text)
|
content = Column(Text)
|
||||||
@@ -311,15 +311,15 @@ class XhsNoteComment(Base):
|
|||||||
class TiebaNote(Base):
|
class TiebaNote(Base):
|
||||||
__tablename__ = 'tieba_note'
|
__tablename__ = 'tieba_note'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
note_id = Column(BigInteger, index=True)
|
note_id = Column(String(644), index=True)
|
||||||
title = Column(Text)
|
title = Column(Text)
|
||||||
desc = Column(Text)
|
desc = Column(Text)
|
||||||
note_url = Column(Text)
|
note_url = Column(Text)
|
||||||
publish_time = Column(BigInteger, index=True)
|
publish_time = Column(String(255), index=True)
|
||||||
user_link = Column(Text, default='')
|
user_link = Column(Text, default='')
|
||||||
user_nickname = Column(Text, default='')
|
user_nickname = Column(Text, default='')
|
||||||
user_avatar = Column(Text, default='')
|
user_avatar = Column(Text, default='')
|
||||||
tieba_id = Column(Integer, default='')
|
tieba_id = Column(String(255), default='')
|
||||||
tieba_name = Column(Text)
|
tieba_name = Column(Text)
|
||||||
tieba_link = Column(Text)
|
tieba_link = Column(Text)
|
||||||
total_replay_num = Column(Integer, default=0)
|
total_replay_num = Column(Integer, default=0)
|
||||||
@@ -332,7 +332,7 @@ class TiebaNote(Base):
|
|||||||
class TiebaComment(Base):
|
class TiebaComment(Base):
|
||||||
__tablename__ = 'tieba_comment'
|
__tablename__ = 'tieba_comment'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
comment_id = Column(BigInteger, index=True)
|
comment_id = Column(String(255), index=True)
|
||||||
parent_comment_id = Column(String(255), default='')
|
parent_comment_id = Column(String(255), default='')
|
||||||
content = Column(Text)
|
content = Column(Text)
|
||||||
user_link = Column(Text, default='')
|
user_link = Column(Text, default='')
|
||||||
@@ -341,10 +341,10 @@ class TiebaComment(Base):
|
|||||||
tieba_id = Column(String(255), default='')
|
tieba_id = Column(String(255), default='')
|
||||||
tieba_name = Column(Text)
|
tieba_name = Column(Text)
|
||||||
tieba_link = Column(Text)
|
tieba_link = Column(Text)
|
||||||
publish_time = Column(BigInteger, index=True)
|
publish_time = Column(String(255), index=True)
|
||||||
ip_location = Column(Text, default='')
|
ip_location = Column(Text, default='')
|
||||||
sub_comment_count = Column(Integer, default=0)
|
sub_comment_count = Column(Integer, default=0)
|
||||||
note_id = Column(BigInteger, index=True)
|
note_id = Column(String(255), index=True)
|
||||||
note_url = Column(Text)
|
note_url = Column(Text)
|
||||||
add_ts = Column(BigInteger)
|
add_ts = Column(BigInteger)
|
||||||
last_modify_ts = Column(BigInteger)
|
last_modify_ts = Column(BigInteger)
|
||||||
@@ -352,7 +352,7 @@ class TiebaComment(Base):
|
|||||||
class TiebaCreator(Base):
|
class TiebaCreator(Base):
|
||||||
__tablename__ = 'tieba_creator'
|
__tablename__ = 'tieba_creator'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
user_id = Column(String(255))
|
user_id = Column(String(64))
|
||||||
user_name = Column(Text)
|
user_name = Column(Text)
|
||||||
nickname = Column(Text)
|
nickname = Column(Text)
|
||||||
avatar = Column(Text)
|
avatar = Column(Text)
|
||||||
@@ -367,14 +367,14 @@ class TiebaCreator(Base):
|
|||||||
class ZhihuContent(Base):
|
class ZhihuContent(Base):
|
||||||
__tablename__ = 'zhihu_content'
|
__tablename__ = 'zhihu_content'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
content_id = Column(BigInteger, index=True)
|
content_id = Column(String(64), index=True)
|
||||||
content_type = Column(Text)
|
content_type = Column(Text)
|
||||||
content_text = Column(Text)
|
content_text = Column(Text)
|
||||||
content_url = Column(Text)
|
content_url = Column(Text)
|
||||||
question_id = Column(String(255))
|
question_id = Column(String(255))
|
||||||
title = Column(Text)
|
title = Column(Text)
|
||||||
desc = Column(Text)
|
desc = Column(Text)
|
||||||
created_time = Column(BigInteger, index=True)
|
created_time = Column(String(32), index=True)
|
||||||
updated_time = Column(Text)
|
updated_time = Column(Text)
|
||||||
voteup_count = Column(Integer, default=0)
|
voteup_count = Column(Integer, default=0)
|
||||||
comment_count = Column(Integer, default=0)
|
comment_count = Column(Integer, default=0)
|
||||||
@@ -395,17 +395,17 @@ class ZhihuContent(Base):
|
|||||||
class ZhihuComment(Base):
|
class ZhihuComment(Base):
|
||||||
__tablename__ = 'zhihu_comment'
|
__tablename__ = 'zhihu_comment'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
comment_id = Column(BigInteger, index=True)
|
comment_id = Column(String(64), index=True)
|
||||||
parent_comment_id = Column(String(255))
|
parent_comment_id = Column(String(64))
|
||||||
content = Column(Text)
|
content = Column(Text)
|
||||||
publish_time = Column(BigInteger, index=True)
|
publish_time = Column(String(32), index=True)
|
||||||
ip_location = Column(Text)
|
ip_location = Column(Text)
|
||||||
sub_comment_count = Column(Integer, default=0)
|
sub_comment_count = Column(Integer, default=0)
|
||||||
like_count = Column(Integer, default=0)
|
like_count = Column(Integer, default=0)
|
||||||
dislike_count = Column(Integer, default=0)
|
dislike_count = Column(Integer, default=0)
|
||||||
content_id = Column(BigInteger, index=True)
|
content_id = Column(String(64), index=True)
|
||||||
content_type = Column(Text)
|
content_type = Column(Text)
|
||||||
user_id = Column(String(255))
|
user_id = Column(String(64))
|
||||||
user_link = Column(Text)
|
user_link = Column(Text)
|
||||||
user_nickname = Column(Text)
|
user_nickname = Column(Text)
|
||||||
user_avatar = Column(Text)
|
user_avatar = Column(Text)
|
||||||
@@ -415,7 +415,7 @@ class ZhihuComment(Base):
|
|||||||
class ZhihuCreator(Base):
|
class ZhihuCreator(Base):
|
||||||
__tablename__ = 'zhihu_creator'
|
__tablename__ = 'zhihu_creator'
|
||||||
id = Column(Integer, primary_key=True)
|
id = Column(Integer, primary_key=True)
|
||||||
user_id = Column(Integer, unique=True, index=True)
|
user_id = Column(String(64), unique=True, index=True)
|
||||||
user_link = Column(Text)
|
user_link = Column(Text)
|
||||||
user_nickname = Column(Text)
|
user_nickname = Column(Text)
|
||||||
user_avatar = Column(Text)
|
user_avatar = Column(Text)
|
||||||
|
|||||||
Binary file not shown.
@@ -143,14 +143,13 @@ class XhsDbStoreImplement(AbstractStore):
|
|||||||
result = await session.execute(stmt)
|
result = await session.execute(stmt)
|
||||||
return result.first() is not None
|
return result.first() is not None
|
||||||
|
|
||||||
async def store_comments(self, comments: List[Dict]):
|
async def store_comment(self, comment_item: Dict):
|
||||||
if not comments:
|
if not comment_item:
|
||||||
return
|
return
|
||||||
async with get_session() as session:
|
async with get_session() as session:
|
||||||
for comment_item in comments:
|
|
||||||
comment_id = comment_item.get("comment_id")
|
comment_id = comment_item.get("comment_id")
|
||||||
if not comment_id:
|
if not comment_id:
|
||||||
continue
|
return
|
||||||
if await self.comment_is_exist(session, comment_id):
|
if await self.comment_is_exist(session, comment_id):
|
||||||
await self.update_comment(session, comment_item)
|
await self.update_comment(session, comment_item)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user