mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-10 03:47:26 +08:00
Update weibo_store_impl.py
Update bilibili_store_impl.py 新增id Update bilibili_store_impl.py 新增id,解决同一天内的不同查询写入同一个文件的问题 Update douyin_store_impl.py 新增id,解决同一天内的不同查询写入同一个文件的问题 Update kuaishou_store_impl.py Update weibo_store_impl.py Update xhs_store_impl.py Update weibo_store_impl.py Update kuaishou_store_impl.py Update bilibili_store_impl.py Update douyin_store_impl.py Update kuaishou_store_impl.py Update xhs_store_impl.py
This commit is contained in:
@@ -14,11 +14,20 @@ import aiofiles
|
||||
from base.base_crawler import AbstractStore
|
||||
from tools import utils
|
||||
from var import crawler_type_var
|
||||
|
||||
def calculatet_number_of_files(file_store_path: str) -> int:
|
||||
"""计算数据保存文件的前部分排序数字,支持每次运行代码不写到同一个文件中
|
||||
Args:
|
||||
file_store_path;
|
||||
Returns:
|
||||
file nums
|
||||
"""
|
||||
if not os.path.exists(file_store_path):
|
||||
return 1
|
||||
return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1
|
||||
|
||||
class BiliCsvStoreImplement(AbstractStore):
|
||||
csv_store_path: str = "data/bilibili"
|
||||
|
||||
file_count:int=calculatet_number_of_files(csv_store_path)
|
||||
def make_save_file_name(self, store_type: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
@@ -28,7 +37,7 @@ class BiliCsvStoreImplement(AbstractStore):
|
||||
Returns: eg: data/bilibili/search_comments_20240114.csv ...
|
||||
|
||||
"""
|
||||
return f"{self.csv_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.csv"
|
||||
return f"{self.csv_store_path}/{self.file_count}_{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.csv"
|
||||
|
||||
async def save_data_to_csv(self, save_item: Dict, store_type: str):
|
||||
"""
|
||||
@@ -118,6 +127,8 @@ class BiliDbStoreImplement(AbstractStore):
|
||||
class BiliJsonStoreImplement(AbstractStore):
|
||||
json_store_path: str = "data/bilibili"
|
||||
lock = asyncio.Lock()
|
||||
file_count:int=calculatet_number_of_files(json_store_path)
|
||||
|
||||
|
||||
def make_save_file_name(self, store_type: str) -> str:
|
||||
"""
|
||||
@@ -128,7 +139,8 @@ class BiliJsonStoreImplement(AbstractStore):
|
||||
Returns:
|
||||
|
||||
"""
|
||||
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||
|
||||
return f"{self.json_store_path}/{self.file_count}_{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||
|
||||
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user