mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-10 03:47:26 +08:00
fix: 修复zhihu评论爬取分页问题
This commit is contained in:
@@ -299,8 +299,10 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
|
|||||||
result: List[ZhihuComment] = []
|
result: List[ZhihuComment] = []
|
||||||
is_end: bool = False
|
is_end: bool = False
|
||||||
offset: str = ""
|
offset: str = ""
|
||||||
|
prev_offset: str = ""
|
||||||
limit: int = 10
|
limit: int = 10
|
||||||
while not is_end:
|
while not is_end:
|
||||||
|
prev_offset = offset
|
||||||
root_comment_res = await self.get_root_comments(content.content_id, content.content_type, offset, limit)
|
root_comment_res = await self.get_root_comments(content.content_id, content.content_type, offset, limit)
|
||||||
if not root_comment_res:
|
if not root_comment_res:
|
||||||
break
|
break
|
||||||
@@ -312,6 +314,9 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
|
|||||||
if not comments:
|
if not comments:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if prev_offset == offset:
|
||||||
|
break
|
||||||
|
|
||||||
if callback:
|
if callback:
|
||||||
await callback(comments)
|
await callback(comments)
|
||||||
|
|
||||||
@@ -348,8 +353,10 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
|
|||||||
|
|
||||||
is_end: bool = False
|
is_end: bool = False
|
||||||
offset: str = ""
|
offset: str = ""
|
||||||
|
prev_offset: str = ""
|
||||||
limit: int = 10
|
limit: int = 10
|
||||||
while not is_end:
|
while not is_end:
|
||||||
|
prev_offset = offset
|
||||||
child_comment_res = await self.get_child_comments(parment_comment.comment_id, offset, limit)
|
child_comment_res = await self.get_child_comments(parment_comment.comment_id, offset, limit)
|
||||||
if not child_comment_res:
|
if not child_comment_res:
|
||||||
break
|
break
|
||||||
@@ -361,6 +368,9 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
|
|||||||
if not sub_comments:
|
if not sub_comments:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if prev_offset == offset:
|
||||||
|
break
|
||||||
|
|
||||||
if callback:
|
if callback:
|
||||||
await callback(sub_comments)
|
await callback(sub_comments)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user