refactor: 数据存储重构,分离不同类型的存储实现

This commit is contained in:
Relakkes
2024-01-14 22:06:31 +08:00
parent e31aebbdfb
commit 894dabcf63
37 changed files with 1427 additions and 864 deletions

View File

@@ -10,7 +10,7 @@ from playwright.async_api import BrowserContext, Page
import config
from tools import utils
from .exception import DataFetchError, IPBlockError
from .exception import DataFetchError
from .graphql import KuaiShouGraphQL
@@ -56,13 +56,21 @@ class KuaiShouClient:
return await self.request(method="POST", url=f"{self._host}{uri}",
data=json_str, headers=self.headers)
@staticmethod
async def pong() -> bool:
async def pong(self) -> bool:
"""get a note to check if login state is ok"""
utils.logger.info("[KuaiShouClient.pong] Begin pong kuaishou...")
ping_flag = False
try:
pass
post_data = {
"operationName": "visionProfileUserList",
"variables": {
"ftype": 1,
},
"query": self.graphql.get("vision_profile")
}
res = await self.post("", post_data)
if res.get("visionProfileUserList", {}).get("result") == 1:
ping_flag = True
except Exception as e:
utils.logger.error(f"[KuaiShouClient.pong] Pong kuaishou failed: {e}, and try to login again...")
ping_flag = False

View File

@@ -10,8 +10,8 @@ from playwright.async_api import (BrowserContext, BrowserType, Page,
import config
from base.base_crawler import AbstractCrawler
from models import kuaishou
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
from store import kuaishou as kuaishou_store
from tools import utils
from var import comment_tasks_var, crawler_type_var
@@ -106,7 +106,7 @@ class KuaishouCrawler(AbstractCrawler):
for video_detail in vision_search_photo.get("feeds"):
video_id_list.append(video_detail.get("photo", {}).get("id"))
await kuaishou.update_kuaishou_video(video_item=video_detail)
await kuaishou_store.update_kuaishou_video(video_item=video_detail)
# batch fetch video comments
page += 1
@@ -121,7 +121,7 @@ class KuaishouCrawler(AbstractCrawler):
video_details = await asyncio.gather(*task_list)
for video_detail in video_details:
if video_detail is not None:
await kuaishou.update_kuaishou_video(video_detail)
await kuaishou_store.update_kuaishou_video(video_detail)
await self.batch_get_video_comments(config.KS_SPECIFIED_ID_LIST)
async def get_video_info_task(self, video_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]:
@@ -167,7 +167,7 @@ class KuaishouCrawler(AbstractCrawler):
await self.ks_client.get_video_all_comments(
photo_id=video_id,
crawl_interval=random.random(),
callback=kuaishou.batch_update_ks_video_comments
callback=kuaishou_store.batch_update_ks_video_comments
)
except DataFetchError as ex:
utils.logger.error(f"[KuaishouCrawler.get_comments] get video_id: {video_id} comment error: {ex}")

View File

@@ -11,7 +11,7 @@ class KuaiShouGraphQL:
self.load_graphql_queries()
def load_graphql_queries(self):
graphql_files = ["search_query.graphql", "video_detail.graphql", "comment_list.graphql"]
graphql_files = ["search_query.graphql", "video_detail.graphql", "comment_list.graphql", "vision_profile.graphql"]
for file in graphql_files:
with open(self.graphql_dir + file, mode="r") as f:

View File

@@ -0,0 +1,16 @@
query visionProfileUserList($pcursor: String, $ftype: Int) {
visionProfileUserList(pcursor: $pcursor, ftype: $ftype) {
result
fols {
user_name
headurl
user_text
isFollowing
user_id
__typename
}
hostName
pcursor
__typename
}
}