feat: 小红书支持通过博主ID采集笔记和评论,小红书type=search时支持配置按哪种排序方式获取笔记数据,小红书笔记增加视频地址和标签字段

This commit is contained in:
jayeeliu@gmail.com
2024-03-02 01:49:42 +08:00
parent c09f9fef68
commit 61ba8c5cc7
8 changed files with 244 additions and 2 deletions

View File

@@ -72,6 +72,17 @@ class XhsCsvStoreImplement(AbstractStore):
"""
await self.save_data_to_csv(save_item=comment_item, store_type="comments")
async def store_creator(self, creator: Dict):
"""
Xiaohongshu content CSV storage implementation
Args:
creator: creator dict
Returns:
"""
await self.save_data_to_csv(save_item=creator, store_type="creator")
class XhsDbStoreImplement(AbstractStore):
async def store_content(self, content_item: Dict):
@@ -121,6 +132,31 @@ class XhsDbStoreImplement(AbstractStore):
comment_pydantic.model_validate(comment_data)
await XHSNoteComment.filter(comment_id=comment_id).update(**comment_data.model_dump())
async def store_creator(self, creator: Dict):
"""
Xiaohongshu content DB storage implementation
Args:
creator: creator dict
Returns:
"""
from .xhs_store_db_types import XhsCreator
user_id = creator.get("user_id")
if not await XhsCreator.filter(user_id=user_id).first():
creator["add_ts"] = utils.get_current_timestamp()
creator["last_modify_ts"] = creator["add_ts"]
creator_pydantic = pydantic_model_creator(XhsCreator, name="CreatorPydanticCreate", exclude=('id',))
creator_data = creator_pydantic(**creator)
creator_pydantic.model_validate(creator_data)
await XhsCreator.create(**creator_data.model_dump())
else:
creator["last_modify_ts"] = utils.get_current_timestamp()
creator_pydantic = pydantic_model_creator(XhsCreator, name="CreatorPydanticUpdate", exclude=('id', 'add_ts',))
creator_data = creator_pydantic(**creator)
creator_pydantic.model_validate(creator_data)
await XhsCreator.filter(user_id=user_id).update(**creator_data.model_dump())
class XhsJsonStoreImplement(AbstractStore):
json_store_path: str = "data/xhs"
@@ -181,3 +217,14 @@ class XhsJsonStoreImplement(AbstractStore):
"""
await self.save_data_to_json(comment_item, "comments")
async def store_creator(self, creator: Dict):
"""
Xiaohongshu content JSON storage implementation
Args:
creator: creator dict
Returns:
"""
await self.save_data_to_json(creator, "creator")