feat: issue #14

refactor: 优化小红书crawler流程代码
This commit is contained in:
Relakkes
2023-07-15 17:11:53 +08:00
parent e5f4ecd8ec
commit dad8d56ab5
6 changed files with 138 additions and 77 deletions

View File

@@ -1,13 +1,16 @@
import json
import logging
import asyncio
from typing import Optional, Dict
import httpx
from playwright.async_api import Page
from playwright.async_api import BrowserContext
from .help import sign, get_search_id
from .field import SearchSortType, SearchNoteType
from .exception import DataFetchError, IPBlockError
from tools import utils
class XHSClient:
@@ -77,6 +80,21 @@ class XHSClient:
return await self.request(method="POST", url=f"{self._host}{uri}",
data=json_str, headers=headers)
async def ping(self) -> bool:
"""get a note to check if login state is ok"""
logging.info("begin to ping xhs...")
note_id = "5e5cb38a000000000100185e"
try:
note_card: Dict = await self.get_note_by_id(note_id)
return note_card.get("note_id") == note_id
except DataFetchError:
return False
async def update_cookies(self, browser_context: BrowserContext):
cookie_str, cookie_dict = utils.convert_cookies(await browser_context.cookies())
self.headers["Cookie"] = cookie_str
self.cookie_dict = cookie_dict
async def get_note_by_keyword(
self, keyword: str,
page: int = 1, page_size: int = 20,