From 0d0af57a01e342a88bb9f261519c92d7cf7aa1e9 Mon Sep 17 00:00:00 2001 From: persist-1 Date: Wed, 10 Sep 2025 23:47:05 +0800 Subject: [PATCH 1/2] =?UTF-8?q?fix(store):=20=E4=BF=AE=E5=A4=8D'crawler=5F?= =?UTF-8?q?type=5Fvar'=E7=9A=84=E4=B8=8D=E5=BD=93=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E5=AF=BC=E8=87=B4csv/json=E4=BF=9D=E5=AD=98=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=90=8D=E5=BC=82=E5=B8=B8=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- store/kuaishou/_store_impl.py | 4 ++-- store/tieba/_store_impl.py | 4 ++-- store/weibo/_store_impl.py | 4 ++-- store/xhs/_store_impl.py | 6 +++--- store/zhihu/_store_impl.py | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/store/kuaishou/_store_impl.py b/store/kuaishou/_store_impl.py index 1cb1d80..4292cb0 100644 --- a/store/kuaishou/_store_impl.py +++ b/store/kuaishou/_store_impl.py @@ -50,7 +50,7 @@ def calculate_number_of_files(file_store_path: str) -> int: class KuaishouCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -127,7 +127,7 @@ class KuaishouDbStoreImplement(AbstractStore): class KuaishouJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/store/tieba/_store_impl.py b/store/tieba/_store_impl.py index 80464dd..723d1f4 100644 --- a/store/tieba/_store_impl.py +++ b/store/tieba/_store_impl.py @@ -51,7 +51,7 @@ def calculate_number_of_files(file_store_path: str) -> int: class TieBaCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="tieba", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="tieba", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -149,7 +149,7 @@ class TieBaDbStoreImplement(AbstractStore): class TieBaJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="tieba", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="tieba", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/store/weibo/_store_impl.py b/store/weibo/_store_impl.py index b27317f..f7503db 100644 --- a/store/weibo/_store_impl.py +++ b/store/weibo/_store_impl.py @@ -51,7 +51,7 @@ def calculate_number_of_files(file_store_path: str) -> int: class WeiboCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="weibo", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="weibo", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -171,7 +171,7 @@ class WeiboDbStoreImplement(AbstractStore): class WeiboJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="weibo", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="weibo", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/store/xhs/_store_impl.py b/store/xhs/_store_impl.py index 2dbfcdd..1bffddd 100644 --- a/store/xhs/_store_impl.py +++ b/store/xhs/_store_impl.py @@ -17,12 +17,12 @@ from database.models import XhsNote, XhsNoteComment, XhsCreator from tools.async_file_writer import AsyncFileWriter from tools.time_util import get_current_timestamp - +from var import crawler_type_var class XhsCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="xhs", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="xhs", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -51,7 +51,7 @@ class XhsCsvStoreImplement(AbstractStore): class XhsJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="xhs", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="xhs", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/store/zhihu/_store_impl.py b/store/zhihu/_store_impl.py index d2d6c4e..ac4dc1b 100644 --- a/store/zhihu/_store_impl.py +++ b/store/zhihu/_store_impl.py @@ -50,7 +50,7 @@ def calculate_number_of_files(file_store_path: str) -> int: class ZhihuCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="zhihu", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="zhihu", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -148,7 +148,7 @@ class ZhihuDbStoreImplement(AbstractStore): class ZhihuJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="zhihu", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="zhihu", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ From 926ea9dc42ee1e0b6a4c90e2680050c91cff0299 Mon Sep 17 00:00:00 2001 From: persist-1 Date: Thu, 11 Sep 2025 00:35:02 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E8=B7=AF=E5=BE=84?= =?UTF-8?q?=E5=88=86=E9=9A=94=E7=AC=A6=E8=BF=9E=E6=8E=A5=E6=96=B9=E5=BC=8F?= =?UTF-8?q?=E4=B8=8D=E5=BD=93=E5=AF=BC=E8=87=B4=E7=9A=84=E8=B7=AF=E5=BE=84?= =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修改'async_file_writer.py'中'_get_file_path'返回值由字符串连接改为直接用正斜杠拼接路径,以确保路径分隔符的统一 - 修改获取文件保存时间后缀方式为'get_current_date',以'天'为文件内容划分点 --- tools/async_file_writer.py | 4 ++-- tools/time_util.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/async_file_writer.py b/tools/async_file_writer.py index f4d430e..972fff8 100644 --- a/tools/async_file_writer.py +++ b/tools/async_file_writer.py @@ -16,8 +16,8 @@ class AsyncFileWriter: def _get_file_path(self, file_type: str, item_type: str) -> str: base_path = f"data/{self.platform}/{file_type}" pathlib.Path(base_path).mkdir(parents=True, exist_ok=True) - file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_time()}.{file_type}" - return os.path.join(base_path, file_name) + file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_date()}.{file_type}" + return f"{base_path}/{file_name}" async def write_to_csv(self, item: Dict, item_type: str): file_path = self._get_file_path('csv', item_type) diff --git a/tools/time_util.py b/tools/time_util.py index 341aa80..d2eed96 100644 --- a/tools/time_util.py +++ b/tools/time_util.py @@ -33,6 +33,12 @@ def get_current_time() -> str: """ return time.strftime('%Y-%m-%d %X', time.localtime()) +def get_current_time_hour() -> str: + """ + 获取当前的时间:'2023-12-02-13' + :return: + """ + return time.strftime('%Y-%m-%d-%H', time.localtime()) def get_current_date() -> str: """