feat: 知乎支持创作者主页数据爬取(回答、文章、视频)

This commit is contained in:
Relakkes
2024-10-16 21:02:27 +08:00
parent af9d2d8e84
commit da8f1c62b8
8 changed files with 511 additions and 66 deletions

View File

@@ -3,7 +3,7 @@ from typing import List
import config
from base.base_crawler import AbstractStore
from model.m_zhihu import ZhihuComment, ZhihuContent
from model.m_zhihu import ZhihuComment, ZhihuContent, ZhihuCreator
from store.zhihu.zhihu_store_impl import (ZhihuCsvStoreImplement,
ZhihuDbStoreImplement,
ZhihuJsonStoreImplement)
@@ -25,6 +25,21 @@ class ZhihuStoreFactory:
raise ValueError("[ZhihuStoreFactory.create_store] Invalid save option only supported csv or db or json ...")
return store_class()
async def batch_update_zhihu_contents(contents: List[ZhihuContent]):
"""
批量更新知乎内容
Args:
contents:
Returns:
"""
if not contents:
return
for content_item in contents:
await update_zhihu_content(content_item)
async def update_zhihu_content(content_item: ZhihuContent):
"""
更新知乎内容
@@ -71,3 +86,19 @@ async def update_zhihu_content_comment(comment_item: ZhihuComment):
local_db_item.update({"last_modify_ts": utils.get_current_timestamp()})
utils.logger.info(f"[store.zhihu.update_zhihu_note_comment] zhihu content comment:{local_db_item}")
await ZhihuStoreFactory.create_store().store_comment(local_db_item)
async def save_creator(creator: ZhihuCreator):
"""
保存知乎创作者信息
Args:
creator:
Returns:
"""
if not creator:
return
local_db_item = creator.model_dump()
local_db_item.update({"last_modify_ts": utils.get_current_timestamp()})
await ZhihuStoreFactory.create_store().store_creator(local_db_item)