mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-05-23 02:57:26 +08:00
fix: xhs creator error
This commit is contained in:
@@ -20,7 +20,7 @@
|
||||
import asyncio
|
||||
import json
|
||||
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
||||
from urllib.parse import urlencode
|
||||
from urllib.parse import quote, urlencode
|
||||
|
||||
import httpx
|
||||
from playwright.async_api import BrowserContext, Page
|
||||
@@ -39,7 +39,7 @@ from .exception import DataFetchError, IPBlockError, NoteNotFoundError
|
||||
from .field import SearchNoteType, SearchSortType
|
||||
from .help import get_search_id
|
||||
from .extractor import XiaoHongShuExtractor
|
||||
from .playwright_sign import sign_with_playwright
|
||||
from .playwright_sign import sign_with_xhshow
|
||||
|
||||
|
||||
class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
@@ -71,19 +71,16 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
self.init_proxy_pool(proxy_ip_pool)
|
||||
|
||||
async def _pre_headers(self, url: str, params: Optional[Dict] = None, payload: Optional[Dict] = None) -> Dict:
|
||||
"""Request header parameter signing (using playwright injection method)
|
||||
"""请求头参数签名 (使用 xhshow 纯算法)
|
||||
|
||||
Args:
|
||||
url: Request URL
|
||||
params: GET request parameters
|
||||
payload: POST request parameters
|
||||
url: 请求 URI path
|
||||
params: GET 请求参数
|
||||
payload: POST 请求参数
|
||||
|
||||
Returns:
|
||||
Dict: Signed request header parameters
|
||||
Dict: 签名后的请求头参数
|
||||
"""
|
||||
a1_value = self.cookie_dict.get("a1", "")
|
||||
|
||||
# Determine request data, method and URI
|
||||
if params is not None:
|
||||
data = params
|
||||
method = "GET"
|
||||
@@ -93,12 +90,11 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
else:
|
||||
raise ValueError("params or payload is required")
|
||||
|
||||
# Generate signature using playwright injection method
|
||||
signs = await sign_with_playwright(
|
||||
page=self.playwright_page,
|
||||
# 使用 xhshow 纯算法生成签名
|
||||
signs = sign_with_xhshow(
|
||||
uri=url,
|
||||
data=data,
|
||||
a1=a1_value,
|
||||
cookie_str=self.headers.get("Cookie", ""),
|
||||
method=method,
|
||||
)
|
||||
|
||||
@@ -152,6 +148,15 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
err_msg = data.get("msg", None) or f"{response.text}"
|
||||
raise DataFetchError(err_msg)
|
||||
|
||||
@staticmethod
|
||||
def _build_query_string(params: Dict) -> str:
|
||||
"""Build URL query string with encoding matching browser behavior (commas not encoded)"""
|
||||
parts = []
|
||||
for key, value in params.items():
|
||||
value_str = str(value) if value is not None else ""
|
||||
parts.append(f"{key}={quote(value_str, safe=',')}")
|
||||
return "&".join(parts)
|
||||
|
||||
async def get(self, uri: str, params: Optional[Dict] = None) -> Dict:
|
||||
"""
|
||||
GET request, signs request headers
|
||||
@@ -163,10 +168,15 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
|
||||
"""
|
||||
headers = await self._pre_headers(uri, params)
|
||||
full_url = f"{self._host}{uri}"
|
||||
# Build URL manually to ensure query string encoding matches the sign string
|
||||
# (httpx's default params encoding differs from browser/XHS frontend behavior)
|
||||
if params:
|
||||
full_url = f"{self._host}{uri}?{self._build_query_string(params)}"
|
||||
else:
|
||||
full_url = f"{self._host}{uri}"
|
||||
|
||||
return await self.request(
|
||||
method="GET", url=full_url, headers=headers, params=params
|
||||
method="GET", url=full_url, headers=headers
|
||||
)
|
||||
|
||||
async def post(self, uri: str, data: dict, **kwargs) -> Dict:
|
||||
@@ -568,6 +578,7 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
"num": page_size,
|
||||
"cursor": cursor,
|
||||
"user_id": creator,
|
||||
"image_formats": "jpg,webp,avif",
|
||||
"xsec_token": xsec_token,
|
||||
"xsec_source": xsec_source,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user