mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-03-02 04:00:45 +08:00
Merge pull request #839 from ravenling/fix-zhihu-comment
fix: 修复zhihu评论爬取分页问题
This commit is contained in:
@@ -299,8 +299,10 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
result: List[ZhihuComment] = []
|
||||
is_end: bool = False
|
||||
offset: str = ""
|
||||
prev_offset: str = ""
|
||||
limit: int = 10
|
||||
while not is_end:
|
||||
prev_offset = offset
|
||||
root_comment_res = await self.get_root_comments(content.content_id, content.content_type, offset, limit)
|
||||
if not root_comment_res:
|
||||
break
|
||||
@@ -312,6 +314,9 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
if not comments:
|
||||
break
|
||||
|
||||
if prev_offset == offset:
|
||||
break
|
||||
|
||||
if callback:
|
||||
await callback(comments)
|
||||
|
||||
@@ -348,8 +353,10 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
|
||||
is_end: bool = False
|
||||
offset: str = ""
|
||||
prev_offset: str = ""
|
||||
limit: int = 10
|
||||
while not is_end:
|
||||
prev_offset = offset
|
||||
child_comment_res = await self.get_child_comments(parment_comment.comment_id, offset, limit)
|
||||
if not child_comment_res:
|
||||
break
|
||||
@@ -361,6 +368,9 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
if not sub_comments:
|
||||
break
|
||||
|
||||
if prev_offset == offset:
|
||||
break
|
||||
|
||||
if callback:
|
||||
await callback(sub_comments)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user