mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 19:37:25 +08:00
fix_words
This commit is contained in:
@@ -144,7 +144,7 @@ async def batch_update_bilibili_creator_fans(creator_info: Dict, fans_list: List
|
||||
"sign": fan_item.get("sign"),
|
||||
"avatar": fan_item.get("face"),
|
||||
}
|
||||
await update_bilibili_creator_fans(creator_info=creator_info, fan_info=fan_info)
|
||||
await update_bilibili_creator_contact(creator_info=creator_info, fan_info=fan_info)
|
||||
|
||||
|
||||
async def batch_update_bilibili_creator_followings(creator_info: Dict, followings_list: List[Dict]):
|
||||
@@ -157,10 +157,36 @@ async def batch_update_bilibili_creator_followings(creator_info: Dict, following
|
||||
"sign": following_item.get("sign"),
|
||||
"avatar": following_item.get("face"),
|
||||
}
|
||||
await update_bilibili_creator_fans(creator_info=following_info, fan_info=creator_info)
|
||||
await update_bilibili_creator_contact(creator_info=following_info, fan_info=creator_info)
|
||||
|
||||
|
||||
async def update_bilibili_creator_fans(creator_info: Dict, fan_info: Dict):
|
||||
async def batch_update_bilibili_creator_dynamics(creator_info: Dict, dynamics_list: List[Dict]):
|
||||
if not dynamics_list:
|
||||
return
|
||||
for dynamic_item in dynamics_list:
|
||||
dynamic_id: str = dynamic_item["id_str"]
|
||||
dynamic_text: str = ""
|
||||
if dynamic_item["modules"]["module_dynamic"].get("desc"):
|
||||
dynamic_text = dynamic_item["modules"]["module_dynamic"]["desc"]["text"]
|
||||
dynamic_type: str = dynamic_item["type"].split("_")[-1]
|
||||
dynamic_pub_ts: str = dynamic_item["modules"]["module_author"]["pub_ts"]
|
||||
dynamic_stat: Dict = dynamic_item["modules"]["module_stat"]
|
||||
dynamic_comment: int = dynamic_stat["comment"]["count"]
|
||||
dynamic_forward: int = dynamic_stat["forward"]["count"]
|
||||
dynamic_like: int = dynamic_stat["like"]["count"]
|
||||
dynamic_info: Dict = {
|
||||
"dynamic_id": dynamic_id,
|
||||
"text": dynamic_text,
|
||||
"type": dynamic_type,
|
||||
"pub_ts": dynamic_pub_ts,
|
||||
"comment": dynamic_comment,
|
||||
"forward": dynamic_forward,
|
||||
"like": dynamic_like,
|
||||
}
|
||||
await update_bilibili_creator_dynamic(creator_info=creator_info, dynamic_info=dynamic_info)
|
||||
|
||||
|
||||
async def update_bilibili_creator_contact(creator_info: Dict, fan_info: Dict):
|
||||
save_contact_item = {
|
||||
"up_id": creator_info["id"],
|
||||
"fan_id": fan_info["id"],
|
||||
@@ -169,7 +195,25 @@ async def update_bilibili_creator_fans(creator_info: Dict, fan_info: Dict):
|
||||
"up_sign": creator_info["sign"],
|
||||
"fan_sign": fan_info["sign"],
|
||||
"up_avatar": creator_info["avatar"],
|
||||
"fan_avatar": fan_info["avatar"]
|
||||
"fan_avatar": fan_info["avatar"],
|
||||
"last_modify_ts": utils.get_current_timestamp(),
|
||||
}
|
||||
|
||||
await BiliStoreFactory.create_store().store_creator_contact(contact_item=save_contact_item)
|
||||
await BiliStoreFactory.create_store().store_contact(contact_item=save_contact_item)
|
||||
|
||||
|
||||
async def update_bilibili_creator_dynamic(creator_info: Dict, dynamic_info: Dict):
|
||||
save_dynamic_item = {
|
||||
"dynamic_id": dynamic_info["dynamic_id"],
|
||||
"user_id": creator_info["id"],
|
||||
"user_name": creator_info["name"],
|
||||
"text": dynamic_info["text"],
|
||||
"type": dynamic_info["type"],
|
||||
"pub_ts": dynamic_info["pub_ts"],
|
||||
"comment": dynamic_info["comment"],
|
||||
"forward": dynamic_info["forward"],
|
||||
"like": dynamic_info["like"],
|
||||
"last_modify_ts": utils.get_current_timestamp(),
|
||||
}
|
||||
|
||||
await BiliStoreFactory.create_store().store_dynamic(dynamic_item=save_dynamic_item)
|
||||
|
||||
@@ -38,13 +38,15 @@ def calculate_number_of_files(file_store_path: str) -> int:
|
||||
if not os.path.exists(file_store_path):
|
||||
return 1
|
||||
try:
|
||||
return max([int(file_name.split("_")[0])for file_name in os.listdir(file_store_path)])+1
|
||||
return max([int(file_name.split("_")[0]) for file_name in os.listdir(file_store_path)]) + 1
|
||||
except ValueError:
|
||||
return 1
|
||||
|
||||
|
||||
class BiliCsvStoreImplement(AbstractStore):
|
||||
csv_store_path: str = "data/bilibili"
|
||||
file_count:int=calculate_number_of_files(csv_store_path)
|
||||
file_count: int = calculate_number_of_files(csv_store_path)
|
||||
|
||||
def make_save_file_name(self, store_type: str) -> str:
|
||||
"""
|
||||
make save file name by store type
|
||||
@@ -107,9 +109,9 @@ class BiliCsvStoreImplement(AbstractStore):
|
||||
"""
|
||||
await self.save_data_to_csv(save_item=creator, store_type="creators")
|
||||
|
||||
async def store_creator_contact(self, contact_item: Dict):
|
||||
async def store_contact(self, contact_item: Dict):
|
||||
"""
|
||||
Bilibili comment CSV storage implementation
|
||||
Bilibili contact CSV storage implementation
|
||||
Args:
|
||||
contact_item: creator's contact item dict
|
||||
|
||||
@@ -117,7 +119,19 @@ class BiliCsvStoreImplement(AbstractStore):
|
||||
|
||||
"""
|
||||
|
||||
await self.save_data_to_csv(save_item=contact_item, store_type="fans")
|
||||
await self.save_data_to_csv(save_item=contact_item, store_type="contacts")
|
||||
|
||||
async def store_dynamic(self, dynamic_item: Dict):
|
||||
"""
|
||||
Bilibili dynamic CSV storage implementation
|
||||
Args:
|
||||
dynamic_item: creator's dynamic item dict
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
|
||||
await self.save_data_to_csv(save_item=dynamic_item, store_type="dynamics")
|
||||
|
||||
|
||||
class BiliDbStoreImplement(AbstractStore):
|
||||
@@ -184,16 +198,50 @@ class BiliDbStoreImplement(AbstractStore):
|
||||
else:
|
||||
await update_creator_by_creator_id(creator_id,creator_item=creator)
|
||||
|
||||
async def store_contact(self, contact_item: Dict):
|
||||
"""
|
||||
Bilibili contact DB storage implementation
|
||||
Args:
|
||||
contact_item: contact item dict
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
|
||||
from .bilibili_store_sql import (add_new_contact,
|
||||
query_contact_by_up_and_fan,
|
||||
update_contact_by_id, )
|
||||
|
||||
up_id = contact_item.get("up_id")
|
||||
fan_id = contact_item.get("fan_id")
|
||||
contact_detail: Dict = await query_contact_by_up_and_fan(up_id=up_id, fan_id=fan_id)
|
||||
if not contact_detail:
|
||||
contact_item["add_ts"] = utils.get_current_timestamp()
|
||||
await add_new_contact(contact_item)
|
||||
else:
|
||||
key_id = contact_detail.get("id")
|
||||
await update_contact_by_id(id=key_id, contact_item=contact_item)
|
||||
|
||||
async def store_dynamic(self, dynamic_item):
|
||||
"""
|
||||
Bilibili dynamic DB storage implementation
|
||||
Args:
|
||||
dynamic_item: dynamic item dict
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
||||
class BiliJsonStoreImplement(AbstractStore):
|
||||
json_store_path: str = "data/bilibili/json"
|
||||
words_store_path: str = "data/bilibili/words"
|
||||
lock = asyncio.Lock()
|
||||
file_count:int=calculate_number_of_files(json_store_path)
|
||||
file_count: int = calculate_number_of_files(json_store_path)
|
||||
WordCloud = words.AsyncWordCloudGenerator()
|
||||
|
||||
|
||||
def make_save_file_name(self, store_type: str) -> (str,str):
|
||||
def make_save_file_name(self, store_type: str) -> (str, str):
|
||||
"""
|
||||
make save file name by store type
|
||||
Args:
|
||||
@@ -220,7 +268,7 @@ class BiliJsonStoreImplement(AbstractStore):
|
||||
"""
|
||||
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||
pathlib.Path(self.words_store_path).mkdir(parents=True, exist_ok=True)
|
||||
save_file_name,words_file_name_prefix = self.make_save_file_name(store_type=store_type)
|
||||
save_file_name, words_file_name_prefix = self.make_save_file_name(store_type=store_type)
|
||||
save_data = []
|
||||
|
||||
async with self.lock:
|
||||
@@ -271,7 +319,7 @@ class BiliJsonStoreImplement(AbstractStore):
|
||||
"""
|
||||
await self.save_data_to_json(creator, "creators")
|
||||
|
||||
async def store_creator_contact(self, contact_item: Dict):
|
||||
async def store_contact(self, contact_item: Dict):
|
||||
"""
|
||||
creator contact JSON storage implementation
|
||||
Args:
|
||||
@@ -281,4 +329,16 @@ class BiliJsonStoreImplement(AbstractStore):
|
||||
|
||||
"""
|
||||
|
||||
await self.save_data_to_json(save_item=contact_item, store_type="fans")
|
||||
await self.save_data_to_json(save_item=contact_item, store_type="contacts")
|
||||
|
||||
async def store_dynamic(self, dynamic_item: Dict):
|
||||
"""
|
||||
creator dynamic JSON storage implementation
|
||||
Args:
|
||||
dynamic_item: creator's contact item dict
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
|
||||
await self.save_data_to_json(save_item=dynamic_item, store_type="dynamics")
|
||||
|
||||
@@ -158,3 +158,49 @@ async def update_creator_by_creator_id(creator_id: str, creator_item: Dict) -> i
|
||||
effect_row: int = await async_db_conn.update_table("bilibili_up_info", creator_item, "user_id", creator_id)
|
||||
return effect_row
|
||||
|
||||
async def query_contact_by_up_and_fan(up_id: str, fan_id: str) -> Dict:
|
||||
"""
|
||||
查询一条关联关系
|
||||
Args:
|
||||
up_id:
|
||||
fan_id:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
sql: str = f"select * from bilibili_contact_info where up_id = '{up_id}' and fan_id = '{fan_id}'"
|
||||
rows: List[Dict] = await async_db_conn.query(sql)
|
||||
if len(rows) > 0:
|
||||
return rows[0]
|
||||
return dict()
|
||||
|
||||
|
||||
async def add_new_contact(contact_item: Dict) -> int:
|
||||
"""
|
||||
新增关联关系
|
||||
Args:
|
||||
contact_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
last_row_id: int = await async_db_conn.item_to_table("bilibili_contact_info", contact_item)
|
||||
return last_row_id
|
||||
|
||||
|
||||
async def update_contact_by_id(id: str, contact_item: Dict) -> int:
|
||||
"""
|
||||
更新关联关系
|
||||
Args:
|
||||
id:
|
||||
contact_item:
|
||||
|
||||
Returns:
|
||||
|
||||
"""
|
||||
async_db_conn: AsyncMysqlDB = media_crawler_db_var.get()
|
||||
effect_row: int = await async_db_conn.update_table("bilibili_contact_info", contact_item, "id", id)
|
||||
return effect_row
|
||||
|
||||
|
||||
Reference in New Issue
Block a user