mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-08 10:57:26 +08:00
chore: add copyright to every file
This commit is contained in:
@@ -1,12 +1,21 @@
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/__init__.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/bilibili/__init__.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/bilibili/_store_impl.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -302,7 +311,7 @@ class BiliSqliteStoreImplement(BiliDbStoreImplement):
|
||||
|
||||
class BiliMongoStoreImplement(AbstractStore):
|
||||
"""B站MongoDB存储实现"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.mongo_store = MongoDBStoreBase(collection_prefix="bilibili")
|
||||
|
||||
@@ -315,7 +324,7 @@ class BiliMongoStoreImplement(AbstractStore):
|
||||
video_id = content_item.get("video_id")
|
||||
if not video_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="contents",
|
||||
query={"video_id": video_id},
|
||||
@@ -332,7 +341,7 @@ class BiliMongoStoreImplement(AbstractStore):
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="comments",
|
||||
query={"comment_id": comment_id},
|
||||
@@ -349,7 +358,7 @@ class BiliMongoStoreImplement(AbstractStore):
|
||||
user_id = creator_item.get("user_id")
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="creators",
|
||||
query={"user_id": user_id},
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/bilibili/bilibilli_store_media.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -27,7 +36,7 @@ class BilibiliVideo(AbstractStoreVideo):
|
||||
async def store_video(self, video_content_item: Dict):
|
||||
"""
|
||||
store content
|
||||
|
||||
|
||||
Args:
|
||||
video_content_item:
|
||||
|
||||
@@ -39,11 +48,11 @@ class BilibiliVideo(AbstractStoreVideo):
|
||||
def make_save_file_name(self, aid: str, extension_file_name: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
|
||||
|
||||
Args:
|
||||
aid: aid
|
||||
extension_file_name: video filename with extension
|
||||
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
@@ -52,7 +61,7 @@ class BilibiliVideo(AbstractStoreVideo):
|
||||
async def save_video(self, aid: int, video_content: str, extension_file_name="mp4"):
|
||||
"""
|
||||
save video to local
|
||||
|
||||
|
||||
Args:
|
||||
aid: aid
|
||||
video_content: video content
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/douyin/__init__.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/douyin/_store_impl.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -201,7 +210,7 @@ class DouyinSqliteStoreImplement(DouyinDbStoreImplement):
|
||||
|
||||
class DouyinMongoStoreImplement(AbstractStore):
|
||||
"""抖音MongoDB存储实现"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.mongo_store = MongoDBStoreBase(collection_prefix="douyin")
|
||||
|
||||
@@ -214,7 +223,7 @@ class DouyinMongoStoreImplement(AbstractStore):
|
||||
aweme_id = content_item.get("aweme_id")
|
||||
if not aweme_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="contents",
|
||||
query={"aweme_id": aweme_id},
|
||||
@@ -231,7 +240,7 @@ class DouyinMongoStoreImplement(AbstractStore):
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="comments",
|
||||
query={"comment_id": comment_id},
|
||||
@@ -248,10 +257,10 @@ class DouyinMongoStoreImplement(AbstractStore):
|
||||
user_id = creator_item.get("user_id")
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="creators",
|
||||
query={"user_id": user_id},
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[DouyinMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
utils.logger.info(f"[DouyinMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/douyin/douyin_store_media.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -23,7 +32,7 @@ class DouYinImage(AbstractStoreImage):
|
||||
async def store_image(self, image_content_item: Dict):
|
||||
"""
|
||||
store content
|
||||
|
||||
|
||||
Args:
|
||||
image_content_item:
|
||||
|
||||
@@ -35,7 +44,7 @@ class DouYinImage(AbstractStoreImage):
|
||||
def make_save_file_name(self, aweme_id: str, extension_file_name: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
|
||||
|
||||
Args:
|
||||
aweme_id: aweme id
|
||||
extension_file_name: image filename with extension
|
||||
@@ -48,7 +57,7 @@ class DouYinImage(AbstractStoreImage):
|
||||
async def save_image(self, aweme_id: str, pic_content: str, extension_file_name):
|
||||
"""
|
||||
save image to local
|
||||
|
||||
|
||||
Args:
|
||||
aweme_id: aweme id
|
||||
pic_content: image content
|
||||
@@ -70,7 +79,7 @@ class DouYinVideo(AbstractStoreVideo):
|
||||
async def store_video(self, video_content_item: Dict):
|
||||
"""
|
||||
store content
|
||||
|
||||
|
||||
Args:
|
||||
video_content_item:
|
||||
|
||||
@@ -82,7 +91,7 @@ class DouYinVideo(AbstractStoreVideo):
|
||||
def make_save_file_name(self, aweme_id: str, extension_file_name: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
|
||||
|
||||
Args:
|
||||
aweme_id: aweme id
|
||||
extension_file_name: video filename with extension
|
||||
@@ -95,7 +104,7 @@ class DouYinVideo(AbstractStoreVideo):
|
||||
async def save_video(self, aweme_id: str, video_content: str, extension_file_name):
|
||||
"""
|
||||
save video to local
|
||||
|
||||
|
||||
Args:
|
||||
aweme_id: aweme id
|
||||
video_content: video content
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/kuaishou/__init__.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
@@ -109,4 +118,4 @@ async def save_creator(user_id: str, creator: Dict):
|
||||
"last_modify_ts": utils.get_current_timestamp(),
|
||||
}
|
||||
utils.logger.info(f"[store.kuaishou.save_creator] creator:{local_db_item}")
|
||||
await KuaishouStoreFactory.create_store().store_creator(local_db_item)
|
||||
await KuaishouStoreFactory.create_store().store_creator(local_db_item)
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/kuaishou/_store_impl.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -163,7 +172,7 @@ class KuaishouSqliteStoreImplement(KuaishouDbStoreImplement):
|
||||
|
||||
class KuaishouMongoStoreImplement(AbstractStore):
|
||||
"""快手MongoDB存储实现"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.mongo_store = MongoDBStoreBase(collection_prefix="kuaishou")
|
||||
|
||||
@@ -176,7 +185,7 @@ class KuaishouMongoStoreImplement(AbstractStore):
|
||||
video_id = content_item.get("video_id")
|
||||
if not video_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="contents",
|
||||
query={"video_id": video_id},
|
||||
@@ -193,7 +202,7 @@ class KuaishouMongoStoreImplement(AbstractStore):
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="comments",
|
||||
query={"comment_id": comment_id},
|
||||
@@ -210,10 +219,10 @@ class KuaishouMongoStoreImplement(AbstractStore):
|
||||
user_id = creator_item.get("user_id")
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="creators",
|
||||
query={"user_id": user_id},
|
||||
data=creator_item
|
||||
)
|
||||
utils.logger.info(f"[KuaishouMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
utils.logger.info(f"[KuaishouMongoStoreImplement.store_creator] Saved creator {user_id} to MongoDB")
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/tieba/__init__.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/tieba/_store_impl.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -195,7 +204,7 @@ class TieBaSqliteStoreImplement(TieBaDbStoreImplement):
|
||||
|
||||
class TieBaMongoStoreImplement(AbstractStore):
|
||||
"""贴吧MongoDB存储实现"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.mongo_store = MongoDBStoreBase(collection_prefix="tieba")
|
||||
|
||||
@@ -208,7 +217,7 @@ class TieBaMongoStoreImplement(AbstractStore):
|
||||
note_id = content_item.get("note_id")
|
||||
if not note_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="contents",
|
||||
query={"note_id": note_id},
|
||||
@@ -225,7 +234,7 @@ class TieBaMongoStoreImplement(AbstractStore):
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="comments",
|
||||
query={"comment_id": comment_id},
|
||||
@@ -242,7 +251,7 @@ class TieBaMongoStoreImplement(AbstractStore):
|
||||
user_id = creator_item.get("user_id")
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="creators",
|
||||
query={"user_id": user_id},
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/weibo/__init__.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/weibo/_store_impl.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -217,7 +226,7 @@ class WeiboSqliteStoreImplement(WeiboDbStoreImplement):
|
||||
|
||||
class WeiboMongoStoreImplement(AbstractStore):
|
||||
"""微博MongoDB存储实现"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.mongo_store = MongoDBStoreBase(collection_prefix="weibo")
|
||||
|
||||
@@ -230,7 +239,7 @@ class WeiboMongoStoreImplement(AbstractStore):
|
||||
note_id = content_item.get("note_id")
|
||||
if not note_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="contents",
|
||||
query={"note_id": note_id},
|
||||
@@ -247,7 +256,7 @@ class WeiboMongoStoreImplement(AbstractStore):
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="comments",
|
||||
query={"comment_id": comment_id},
|
||||
@@ -264,7 +273,7 @@ class WeiboMongoStoreImplement(AbstractStore):
|
||||
user_id = creator_item.get("user_id")
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="creators",
|
||||
query={"user_id": user_id},
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/weibo/weibo_store_media.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -27,7 +36,7 @@ class WeiboStoreImage(AbstractStoreImage):
|
||||
async def store_image(self, image_content_item: Dict):
|
||||
"""
|
||||
store content
|
||||
|
||||
|
||||
Args:
|
||||
image_content_item:
|
||||
|
||||
@@ -39,7 +48,7 @@ class WeiboStoreImage(AbstractStoreImage):
|
||||
def make_save_file_name(self, picid: str, extension_file_name: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
|
||||
|
||||
Args:
|
||||
picid: image id
|
||||
extension_file_name: video filename with extension
|
||||
@@ -52,7 +61,7 @@ class WeiboStoreImage(AbstractStoreImage):
|
||||
async def save_image(self, picid: str, pic_content: str, extension_file_name="jpg"):
|
||||
"""
|
||||
save image to local
|
||||
|
||||
|
||||
Args:
|
||||
picid: image id
|
||||
pic_content: image content
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/xhs/__init__.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
|
||||
@@ -1,4 +1,21 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/xhs/_store_impl.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
# @Author : persist1@126.com
|
||||
# @Time : 2025/9/5 19:34
|
||||
# @Desc : 小红书存储实现类
|
||||
@@ -264,7 +281,7 @@ class XhsSqliteStoreImplement(XhsDbStoreImplement):
|
||||
|
||||
class XhsMongoStoreImplement(AbstractStore):
|
||||
"""小红书MongoDB存储实现"""
|
||||
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.mongo_store = MongoDBStoreBase(collection_prefix="xhs")
|
||||
@@ -278,7 +295,7 @@ class XhsMongoStoreImplement(AbstractStore):
|
||||
note_id = content_item.get("note_id")
|
||||
if not note_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="contents",
|
||||
query={"note_id": note_id},
|
||||
@@ -295,7 +312,7 @@ class XhsMongoStoreImplement(AbstractStore):
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="comments",
|
||||
query={"comment_id": comment_id},
|
||||
@@ -312,7 +329,7 @@ class XhsMongoStoreImplement(AbstractStore):
|
||||
user_id = creator_item.get("user_id")
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="creators",
|
||||
query={"user_id": user_id},
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/xhs/xhs_store_media.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -27,7 +36,7 @@ class XiaoHongShuImage(AbstractStoreImage):
|
||||
async def store_image(self, image_content_item: Dict):
|
||||
"""
|
||||
store content
|
||||
|
||||
|
||||
Args:
|
||||
image_content_item:
|
||||
|
||||
@@ -39,7 +48,7 @@ class XiaoHongShuImage(AbstractStoreImage):
|
||||
def make_save_file_name(self, notice_id: str, extension_file_name: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
|
||||
|
||||
Args:
|
||||
notice_id: notice id
|
||||
extension_file_name: image filename with extension
|
||||
@@ -52,7 +61,7 @@ class XiaoHongShuImage(AbstractStoreImage):
|
||||
async def save_image(self, notice_id: str, pic_content: str, extension_file_name):
|
||||
"""
|
||||
save image to local
|
||||
|
||||
|
||||
Args:
|
||||
notice_id: notice id
|
||||
pic_content: image content
|
||||
@@ -74,7 +83,7 @@ class XiaoHongShuVideo(AbstractStoreVideo):
|
||||
async def store_video(self, video_content_item: Dict):
|
||||
"""
|
||||
store content
|
||||
|
||||
|
||||
Args:
|
||||
video_content_item:
|
||||
|
||||
@@ -86,7 +95,7 @@ class XiaoHongShuVideo(AbstractStoreVideo):
|
||||
def make_save_file_name(self, notice_id: str, extension_file_name: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
|
||||
|
||||
Args:
|
||||
notice_id: notice id
|
||||
extension_file_name: video filename with extension
|
||||
@@ -99,7 +108,7 @@ class XiaoHongShuVideo(AbstractStoreVideo):
|
||||
async def save_video(self, notice_id: str, video_content: str, extension_file_name):
|
||||
"""
|
||||
save video to local
|
||||
|
||||
|
||||
Args:
|
||||
notice_id: notice id
|
||||
video_content: video content
|
||||
|
||||
@@ -1,12 +1,21 @@
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/zhihu/__init__.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
@@ -83,7 +92,7 @@ async def batch_update_zhihu_note_comments(comments: List[ZhihuComment]):
|
||||
"""
|
||||
if not comments:
|
||||
return
|
||||
|
||||
|
||||
for comment_item in comments:
|
||||
await update_zhihu_content_comment(comment_item)
|
||||
|
||||
@@ -116,4 +125,4 @@ async def save_creator(creator: ZhihuCreator):
|
||||
return
|
||||
local_db_item = creator.model_dump()
|
||||
local_db_item.update({"last_modify_ts": utils.get_current_timestamp()})
|
||||
await ZhihuStoreFactory.create_store().store_creator(local_db_item)
|
||||
await ZhihuStoreFactory.create_store().store_creator(local_db_item)
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler/blob/main/store/zhihu/_store_impl.py
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
@@ -194,7 +203,7 @@ class ZhihuSqliteStoreImplement(ZhihuDbStoreImplement):
|
||||
|
||||
class ZhihuMongoStoreImplement(AbstractStore):
|
||||
"""知乎MongoDB存储实现"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
self.mongo_store = MongoDBStoreBase(collection_prefix="zhihu")
|
||||
|
||||
@@ -207,7 +216,7 @@ class ZhihuMongoStoreImplement(AbstractStore):
|
||||
note_id = content_item.get("note_id")
|
||||
if not note_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="contents",
|
||||
query={"note_id": note_id},
|
||||
@@ -224,7 +233,7 @@ class ZhihuMongoStoreImplement(AbstractStore):
|
||||
comment_id = comment_item.get("comment_id")
|
||||
if not comment_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="comments",
|
||||
query={"comment_id": comment_id},
|
||||
@@ -241,7 +250,7 @@ class ZhihuMongoStoreImplement(AbstractStore):
|
||||
user_id = creator_item.get("user_id")
|
||||
if not user_id:
|
||||
return
|
||||
|
||||
|
||||
await self.mongo_store.save_or_update(
|
||||
collection_suffix="creators",
|
||||
query={"user_id": user_id},
|
||||
|
||||
Reference in New Issue
Block a user