fix: 修复zhihu评论爬取分页问题

This commit is contained in:
ravenling
2026-02-28 15:57:55 +08:00
parent 13b6140f22
commit 95c3293b97

View File

@@ -299,8 +299,10 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
result: List[ZhihuComment] = [] result: List[ZhihuComment] = []
is_end: bool = False is_end: bool = False
offset: str = "" offset: str = ""
prev_offset: str = ""
limit: int = 10 limit: int = 10
while not is_end: while not is_end:
prev_offset = offset
root_comment_res = await self.get_root_comments(content.content_id, content.content_type, offset, limit) root_comment_res = await self.get_root_comments(content.content_id, content.content_type, offset, limit)
if not root_comment_res: if not root_comment_res:
break break
@@ -312,6 +314,9 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
if not comments: if not comments:
break break
if prev_offset == offset:
break
if callback: if callback:
await callback(comments) await callback(comments)
@@ -348,8 +353,10 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
is_end: bool = False is_end: bool = False
offset: str = "" offset: str = ""
prev_offset: str = ""
limit: int = 10 limit: int = 10
while not is_end: while not is_end:
prev_offset = offset
child_comment_res = await self.get_child_comments(parment_comment.comment_id, offset, limit) child_comment_res = await self.get_child_comments(parment_comment.comment_id, offset, limit)
if not child_comment_res: if not child_comment_res:
break break
@@ -361,6 +368,9 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
if not sub_comments: if not sub_comments:
break break
if prev_offset == offset:
break
if callback: if callback:
await callback(sub_comments) await callback(sub_comments)