diff --git a/store/kuaishou/_store_impl.py b/store/kuaishou/_store_impl.py index 1cb1d80..4292cb0 100644 --- a/store/kuaishou/_store_impl.py +++ b/store/kuaishou/_store_impl.py @@ -50,7 +50,7 @@ def calculate_number_of_files(file_store_path: str) -> int: class KuaishouCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -127,7 +127,7 @@ class KuaishouDbStoreImplement(AbstractStore): class KuaishouJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/store/tieba/_store_impl.py b/store/tieba/_store_impl.py index 80464dd..723d1f4 100644 --- a/store/tieba/_store_impl.py +++ b/store/tieba/_store_impl.py @@ -51,7 +51,7 @@ def calculate_number_of_files(file_store_path: str) -> int: class TieBaCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="tieba", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="tieba", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -149,7 +149,7 @@ class TieBaDbStoreImplement(AbstractStore): class TieBaJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="tieba", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="tieba", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/store/weibo/_store_impl.py b/store/weibo/_store_impl.py index b27317f..f7503db 100644 --- a/store/weibo/_store_impl.py +++ b/store/weibo/_store_impl.py @@ -51,7 +51,7 @@ def calculate_number_of_files(file_store_path: str) -> int: class WeiboCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="weibo", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="weibo", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -171,7 +171,7 @@ class WeiboDbStoreImplement(AbstractStore): class WeiboJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="weibo", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="weibo", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/store/xhs/_store_impl.py b/store/xhs/_store_impl.py index 2dbfcdd..1bffddd 100644 --- a/store/xhs/_store_impl.py +++ b/store/xhs/_store_impl.py @@ -17,12 +17,12 @@ from database.models import XhsNote, XhsNoteComment, XhsCreator from tools.async_file_writer import AsyncFileWriter from tools.time_util import get_current_timestamp - +from var import crawler_type_var class XhsCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="xhs", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="xhs", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -51,7 +51,7 @@ class XhsCsvStoreImplement(AbstractStore): class XhsJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="xhs", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="xhs", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/store/zhihu/_store_impl.py b/store/zhihu/_store_impl.py index d2d6c4e..ac4dc1b 100644 --- a/store/zhihu/_store_impl.py +++ b/store/zhihu/_store_impl.py @@ -50,7 +50,7 @@ def calculate_number_of_files(file_store_path: str) -> int: class ZhihuCsvStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="zhihu", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="zhihu", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ @@ -148,7 +148,7 @@ class ZhihuDbStoreImplement(AbstractStore): class ZhihuJsonStoreImplement(AbstractStore): def __init__(self, **kwargs): super().__init__(**kwargs) - self.writer = AsyncFileWriter(platform="zhihu", crawler_type=kwargs.get("crawler_type")) + self.writer = AsyncFileWriter(platform="zhihu", crawler_type=crawler_type_var.get()) async def store_content(self, content_item: Dict): """ diff --git a/tools/async_file_writer.py b/tools/async_file_writer.py index f4d430e..972fff8 100644 --- a/tools/async_file_writer.py +++ b/tools/async_file_writer.py @@ -16,8 +16,8 @@ class AsyncFileWriter: def _get_file_path(self, file_type: str, item_type: str) -> str: base_path = f"data/{self.platform}/{file_type}" pathlib.Path(base_path).mkdir(parents=True, exist_ok=True) - file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_time()}.{file_type}" - return os.path.join(base_path, file_name) + file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_date()}.{file_type}" + return f"{base_path}/{file_name}" async def write_to_csv(self, item: Dict, item_type: str): file_path = self._get_file_path('csv', item_type) diff --git a/tools/time_util.py b/tools/time_util.py index 341aa80..d2eed96 100644 --- a/tools/time_util.py +++ b/tools/time_util.py @@ -33,6 +33,12 @@ def get_current_time() -> str: """ return time.strftime('%Y-%m-%d %X', time.localtime()) +def get_current_time_hour() -> str: + """ + 获取当前的时间:'2023-12-02-13' + :return: + """ + return time.strftime('%Y-%m-%d-%H', time.localtime()) def get_current_date() -> str: """