mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 11:27:26 +08:00
Merge pull request #718 from persist-1/refactor
fix(store): 修复'crawler_type_var'的不当使用导致csv/json保存文件名异常的bug
This commit is contained in:
@@ -50,7 +50,7 @@ def calculate_number_of_files(file_store_path: str) -> int:
|
|||||||
class KuaishouCsvStoreImplement(AbstractStore):
|
class KuaishouCsvStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
@@ -127,7 +127,7 @@ class KuaishouDbStoreImplement(AbstractStore):
|
|||||||
class KuaishouJsonStoreImplement(AbstractStore):
|
class KuaishouJsonStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="kuaishou", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ def calculate_number_of_files(file_store_path: str) -> int:
|
|||||||
class TieBaCsvStoreImplement(AbstractStore):
|
class TieBaCsvStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="tieba", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="tieba", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
@@ -149,7 +149,7 @@ class TieBaDbStoreImplement(AbstractStore):
|
|||||||
class TieBaJsonStoreImplement(AbstractStore):
|
class TieBaJsonStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="tieba", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="tieba", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ def calculate_number_of_files(file_store_path: str) -> int:
|
|||||||
class WeiboCsvStoreImplement(AbstractStore):
|
class WeiboCsvStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="weibo", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="weibo", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
@@ -171,7 +171,7 @@ class WeiboDbStoreImplement(AbstractStore):
|
|||||||
class WeiboJsonStoreImplement(AbstractStore):
|
class WeiboJsonStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="weibo", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="weibo", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -17,12 +17,12 @@ from database.models import XhsNote, XhsNoteComment, XhsCreator
|
|||||||
|
|
||||||
from tools.async_file_writer import AsyncFileWriter
|
from tools.async_file_writer import AsyncFileWriter
|
||||||
from tools.time_util import get_current_timestamp
|
from tools.time_util import get_current_timestamp
|
||||||
|
from var import crawler_type_var
|
||||||
|
|
||||||
class XhsCsvStoreImplement(AbstractStore):
|
class XhsCsvStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="xhs", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="xhs", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
@@ -51,7 +51,7 @@ class XhsCsvStoreImplement(AbstractStore):
|
|||||||
class XhsJsonStoreImplement(AbstractStore):
|
class XhsJsonStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="xhs", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="xhs", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ def calculate_number_of_files(file_store_path: str) -> int:
|
|||||||
class ZhihuCsvStoreImplement(AbstractStore):
|
class ZhihuCsvStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="zhihu", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="zhihu", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
@@ -148,7 +148,7 @@ class ZhihuDbStoreImplement(AbstractStore):
|
|||||||
class ZhihuJsonStoreImplement(AbstractStore):
|
class ZhihuJsonStoreImplement(AbstractStore):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.writer = AsyncFileWriter(platform="zhihu", crawler_type=kwargs.get("crawler_type"))
|
self.writer = AsyncFileWriter(platform="zhihu", crawler_type=crawler_type_var.get())
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -16,8 +16,8 @@ class AsyncFileWriter:
|
|||||||
def _get_file_path(self, file_type: str, item_type: str) -> str:
|
def _get_file_path(self, file_type: str, item_type: str) -> str:
|
||||||
base_path = f"data/{self.platform}/{file_type}"
|
base_path = f"data/{self.platform}/{file_type}"
|
||||||
pathlib.Path(base_path).mkdir(parents=True, exist_ok=True)
|
pathlib.Path(base_path).mkdir(parents=True, exist_ok=True)
|
||||||
file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_time()}.{file_type}"
|
file_name = f"{self.crawler_type}_{item_type}_{utils.get_current_date()}.{file_type}"
|
||||||
return os.path.join(base_path, file_name)
|
return f"{base_path}/{file_name}"
|
||||||
|
|
||||||
async def write_to_csv(self, item: Dict, item_type: str):
|
async def write_to_csv(self, item: Dict, item_type: str):
|
||||||
file_path = self._get_file_path('csv', item_type)
|
file_path = self._get_file_path('csv', item_type)
|
||||||
|
|||||||
@@ -33,6 +33,12 @@ def get_current_time() -> str:
|
|||||||
"""
|
"""
|
||||||
return time.strftime('%Y-%m-%d %X', time.localtime())
|
return time.strftime('%Y-%m-%d %X', time.localtime())
|
||||||
|
|
||||||
|
def get_current_time_hour() -> str:
|
||||||
|
"""
|
||||||
|
获取当前的时间:'2023-12-02-13'
|
||||||
|
:return:
|
||||||
|
"""
|
||||||
|
return time.strftime('%Y-%m-%d-%H', time.localtime())
|
||||||
|
|
||||||
def get_current_date() -> str:
|
def get_current_date() -> str:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user