mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 11:27:26 +08:00
refactor: 数据存储重构,分离不同类型的存储实现
This commit is contained in:
@@ -10,7 +10,7 @@ from playwright.async_api import BrowserContext, Page
|
||||
import config
|
||||
from tools import utils
|
||||
|
||||
from .exception import DataFetchError, IPBlockError
|
||||
from .exception import DataFetchError
|
||||
from .graphql import KuaiShouGraphQL
|
||||
|
||||
|
||||
@@ -56,13 +56,21 @@ class KuaiShouClient:
|
||||
return await self.request(method="POST", url=f"{self._host}{uri}",
|
||||
data=json_str, headers=self.headers)
|
||||
|
||||
@staticmethod
|
||||
async def pong() -> bool:
|
||||
async def pong(self) -> bool:
|
||||
"""get a note to check if login state is ok"""
|
||||
utils.logger.info("[KuaiShouClient.pong] Begin pong kuaishou...")
|
||||
ping_flag = False
|
||||
try:
|
||||
pass
|
||||
post_data = {
|
||||
"operationName": "visionProfileUserList",
|
||||
"variables": {
|
||||
"ftype": 1,
|
||||
},
|
||||
"query": self.graphql.get("vision_profile")
|
||||
}
|
||||
res = await self.post("", post_data)
|
||||
if res.get("visionProfileUserList", {}).get("result") == 1:
|
||||
ping_flag = True
|
||||
except Exception as e:
|
||||
utils.logger.error(f"[KuaiShouClient.pong] Pong kuaishou failed: {e}, and try to login again...")
|
||||
ping_flag = False
|
||||
|
||||
@@ -10,8 +10,8 @@ from playwright.async_api import (BrowserContext, BrowserType, Page,
|
||||
|
||||
import config
|
||||
from base.base_crawler import AbstractCrawler
|
||||
from models import kuaishou
|
||||
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
|
||||
from store import kuaishou as kuaishou_store
|
||||
from tools import utils
|
||||
from var import comment_tasks_var, crawler_type_var
|
||||
|
||||
@@ -106,7 +106,7 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
|
||||
for video_detail in vision_search_photo.get("feeds"):
|
||||
video_id_list.append(video_detail.get("photo", {}).get("id"))
|
||||
await kuaishou.update_kuaishou_video(video_item=video_detail)
|
||||
await kuaishou_store.update_kuaishou_video(video_item=video_detail)
|
||||
|
||||
# batch fetch video comments
|
||||
page += 1
|
||||
@@ -121,7 +121,7 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
video_details = await asyncio.gather(*task_list)
|
||||
for video_detail in video_details:
|
||||
if video_detail is not None:
|
||||
await kuaishou.update_kuaishou_video(video_detail)
|
||||
await kuaishou_store.update_kuaishou_video(video_detail)
|
||||
await self.batch_get_video_comments(config.KS_SPECIFIED_ID_LIST)
|
||||
|
||||
async def get_video_info_task(self, video_id: str, semaphore: asyncio.Semaphore) -> Optional[Dict]:
|
||||
@@ -167,7 +167,7 @@ class KuaishouCrawler(AbstractCrawler):
|
||||
await self.ks_client.get_video_all_comments(
|
||||
photo_id=video_id,
|
||||
crawl_interval=random.random(),
|
||||
callback=kuaishou.batch_update_ks_video_comments
|
||||
callback=kuaishou_store.batch_update_ks_video_comments
|
||||
)
|
||||
except DataFetchError as ex:
|
||||
utils.logger.error(f"[KuaishouCrawler.get_comments] get video_id: {video_id} comment error: {ex}")
|
||||
|
||||
@@ -11,7 +11,7 @@ class KuaiShouGraphQL:
|
||||
self.load_graphql_queries()
|
||||
|
||||
def load_graphql_queries(self):
|
||||
graphql_files = ["search_query.graphql", "video_detail.graphql", "comment_list.graphql"]
|
||||
graphql_files = ["search_query.graphql", "video_detail.graphql", "comment_list.graphql", "vision_profile.graphql"]
|
||||
|
||||
for file in graphql_files:
|
||||
with open(self.graphql_dir + file, mode="r") as f:
|
||||
|
||||
16
media_platform/kuaishou/graphql/vision_profile.graphql
Normal file
16
media_platform/kuaishou/graphql/vision_profile.graphql
Normal file
@@ -0,0 +1,16 @@
|
||||
query visionProfileUserList($pcursor: String, $ftype: Int) {
|
||||
visionProfileUserList(pcursor: $pcursor, ftype: $ftype) {
|
||||
result
|
||||
fols {
|
||||
user_name
|
||||
headurl
|
||||
user_text
|
||||
isFollowing
|
||||
user_id
|
||||
__typename
|
||||
}
|
||||
hostName
|
||||
pcursor
|
||||
__typename
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user