Merge pull request #718 from persist-1/refactor

fix(store): 修复'crawler_type_var'的不当使用导致csv/json保存文件名异常的bug
This commit is contained in:
程序员阿江-Relakkes
2025-09-11 06:45:26 +08:00
committed by GitHub
7 changed files with 19 additions and 13 deletions

View File

@@ -50,7 +50,7 @@ def calculate_number_of_files(file_store_path: str) -> int:
class KuaishouCsvStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""
@@ -127,7 +127,7 @@ class KuaishouDbStoreImplement(AbstractStore):
class KuaishouJsonStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""

View File

@@ -51,7 +51,7 @@ def calculate_number_of_files(file_store_path: str) -> int:
class TieBaCsvStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="tieba", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="tieba", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""
@@ -149,7 +149,7 @@ class TieBaDbStoreImplement(AbstractStore):
class TieBaJsonStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="tieba", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="tieba", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""

View File

@@ -51,7 +51,7 @@ def calculate_number_of_files(file_store_path: str) -> int:
class WeiboCsvStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="weibo", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="weibo", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""
@@ -171,7 +171,7 @@ class WeiboDbStoreImplement(AbstractStore):
class WeiboJsonStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="weibo", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="weibo", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""

View File

@@ -17,12 +17,12 @@ from database.models import XhsNote, XhsNoteComment, XhsCreator
from tools.async_file_writer import AsyncFileWriter
from tools.time_util import get_current_timestamp
from var import crawler_type_var
class XhsCsvStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="xhs", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="xhs", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""
@@ -51,7 +51,7 @@ class XhsCsvStoreImplement(AbstractStore):
class XhsJsonStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="xhs", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="xhs", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""

View File

@@ -50,7 +50,7 @@ def calculate_number_of_files(file_store_path: str) -> int:
class ZhihuCsvStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="zhihu", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="zhihu", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""
@@ -148,7 +148,7 @@ class ZhihuDbStoreImplement(AbstractStore):
class ZhihuJsonStoreImplement(AbstractStore):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.writer = AsyncFileWriter(platform="zhihu", crawler_type=kwargs.get("crawler_type"))
self.writer = AsyncFileWriter(platform="zhihu", crawler_type=crawler_type_var.get())
async def store_content(self, content_item: Dict):
"""

View File

@@ -16,8 +16,8 @@ class AsyncFileWriter:
def _get_file_path(self, file_type: str, item_type: str) -> str:
base_path = f"data/{self.platform}/{file_type}"
pathlib.Path(base_path).mkdir(parents=True, exist_ok=True)
file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_time()}.{file_type}"
return os.path.join(base_path, file_name)
file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_date()}.{file_type}"
return f"{base_path}/{file_name}"
async def write_to_csv(self, item: Dict, item_type: str):
file_path = self._get_file_path('csv', item_type)

View File

@@ -33,6 +33,12 @@ def get_current_time() -> str:
"""
return time.strftime('%Y-%m-%d %X', time.localtime())
def get_current_time_hour() -> str:
"""
获取当前的时间:'2023-12-02-13'
:return:
"""
return time.strftime('%Y-%m-%d-%H', time.localtime())
def get_current_date() -> str:
"""