From 3f5925e32695c9aea2d2662e36352e8fb53d0bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A8=8B=E5=BA=8F=E5=91=98=E9=98=BF=E6=B1=9F=28Relakkes?= =?UTF-8?q?=29?= Date: Mon, 27 Oct 2025 19:06:07 +0800 Subject: [PATCH] feat: update xhs sign --- config/xhs_config.py | 2 +- media_platform/xhs/client.py | 15 ++++---- media_platform/xhs/core.py | 11 ++---- media_platform/xhs/help.py | 5 +-- media_platform/xhs/secsign.py | 66 +++++++++++++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 20 deletions(-) create mode 100644 media_platform/xhs/secsign.py diff --git a/config/xhs_config.py b/config/xhs_config.py index 9296905..2359b96 100644 --- a/config/xhs_config.py +++ b/config/xhs_config.py @@ -17,7 +17,7 @@ SORT_TYPE = "popularity_descending" # 指定笔记URL列表, 必须要携带xsec_token参数 XHS_SPECIFIED_NOTE_URL_LIST = [ - "https://www.xiaohongshu.com/explore/66fad51c000000001b0224b8?xsec_token=AB3rO-QopW5sgrJ41GwN01WCXh6yWPxjSoFI9D5JIMgKw=&xsec_source=pc_search" + "https://www.xiaohongshu.com/explore/68f99f6d0000000007033fcf?xsec_token=ABZEzjuN2fPjKF9EcMsCCxfbt3IBRsFZldGFoCJbdDmXI=&xsec_source=pc_feed" # ........................ ] diff --git a/media_platform/xhs/client.py b/media_platform/xhs/client.py index c538874..652667f 100644 --- a/media_platform/xhs/client.py +++ b/media_platform/xhs/client.py @@ -10,23 +10,24 @@ import asyncio import json -import re +import time from typing import Any, Callable, Dict, List, Optional, Union from urllib.parse import urlencode import httpx from playwright.async_api import BrowserContext, Page -from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_result +from tenacity import retry, stop_after_attempt, wait_fixed import config from base.base_crawler import AbstractApiClient from tools import utils -from html import unescape + from .exception import DataFetchError, IPBlockError from .field import SearchNoteType, SearchSortType from .help import get_search_id, sign from .extractor import XiaoHongShuExtractor +from .secsign import seccore_signv2_playwright class XiaoHongShuClient(AbstractApiClient): @@ -63,15 +64,13 @@ class XiaoHongShuClient(AbstractApiClient): Returns: """ - encrypt_params = await self.playwright_page.evaluate( - "([url, data]) => window._webmsxyw(url,data)", [url, data] - ) + x_s = await seccore_signv2_playwright(self.playwright_page, url, data) local_storage = await self.playwright_page.evaluate("() => window.localStorage") signs = sign( a1=self.cookie_dict.get("a1", ""), b1=local_storage.get("b1", ""), - x_s=encrypt_params.get("X-s", ""), - x_t=str(encrypt_params.get("X-t", "")), + x_s=x_s, + x_t=str(int(time.time())), ) headers = { diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index 3567c6b..68d2139 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -282,16 +282,9 @@ class XiaoHongShuCrawler(AbstractCrawler): async with semaphore: try: utils.logger.info(f"[get_note_detail_async_task] Begin get note detail, note_id: {note_id}") - - try: - note_detail = await self.xhs_client.get_note_by_id(note_id, xsec_source, xsec_token) - except RetryError: - pass - + note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=True) if not note_detail: - note_detail = await self.xhs_client.get_note_by_id_from_html(note_id, xsec_source, xsec_token, enable_cookie=True) - if not note_detail: - raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}") + raise Exception(f"[get_note_detail_async_task] Failed to get note detail, Id: {note_id}") note_detail.update({"xsec_token": xsec_token, "xsec_source": xsec_source}) diff --git a/media_platform/xhs/help.py b/media_platform/xhs/help.py index 2838b67..652c6c8 100644 --- a/media_platform/xhs/help.py +++ b/media_platform/xhs/help.py @@ -27,16 +27,17 @@ def sign(a1="", b1="", x_s="", x_t=""): "s0": 3, # getPlatformCode "s1": "", "x0": "1", # localStorage.getItem("b1b1") - "x1": "3.7.8-2", # version + "x1": "4.2.2", # version "x2": "Mac OS", "x3": "xhs-pc-web", - "x4": "4.27.2", + "x4": "4.74.0", "x5": a1, # cookie of a1 "x6": x_t, "x7": x_s, "x8": b1, # localStorage.getItem("b1") "x9": mrc(x_t + x_s + b1), "x10": 154, # getSigCount + "x11": "normal" } encode_str = encodeUtf8(json.dumps(common, separators=(',', ':'))) x_s_common = b64Encode(encode_str) diff --git a/media_platform/xhs/secsign.py b/media_platform/xhs/secsign.py new file mode 100644 index 0000000..2a34daa --- /dev/null +++ b/media_platform/xhs/secsign.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则: +# 1. 不得用于任何商业用途。 +# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。 +# 3. 不得进行大规模爬取或对平台造成运营干扰。 +# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。 +# 5. 不得用于任何非法或不当的用途。 +# +# 详细许可条款请参阅项目根目录下的LICENSE文件。 +# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。 + +import hashlib +import base64 +import json +from typing import Any + +def _build_c(e: Any, a: Any) -> str: + c = str(e) + if isinstance(a, (dict, list)): + c += json.dumps(a, separators=(",", ":"), ensure_ascii=False) + elif isinstance(a, str): + c += a + # 其它类型不拼 + return c + + +# --------------------------- +# p.Pu = MD5(c) => hex 小写 +# --------------------------- +def _md5_hex(s: str) -> str: + return hashlib.md5(s.encode("utf-8")).hexdigest() + + + +# ============================================================ +# Playwright 版本(异步):传入 page(Page 对象) +# 内部用 page.evaluate('window.mnsv2(...)') +# ============================================================ +async def seccore_signv2_playwright( + page, # Playwright Page + e: Any, + a: Any, +) -> str: + """ + 使用 Playwright 的 page.evaluate 调用 window.mnsv2(c, d) 来生成签名。 + 需确保 page 上下文中已存在 window.mnsv2(比如已注入目标站点脚本)。 + + 用法: + s = await page.evaluate("(c, d) => window.mnsv2(c, d)", c, d) + """ + c = _build_c(e, a) + d = _md5_hex(c) + + # 调用浏览器上下文里的 window.mnsv2 + s = await page.evaluate("(c, d) => window.mnsv2(c, d)", [c, d]) + f = { + "x0": "4.2.6", + "x1": "xhs-pc-web", + "x2": "Mac OS", + "x3": s, + "x4": a, + } + payload = json.dumps(f, separators=(",", ":"), ensure_ascii=False).encode("utf-8") + token = "XYS_" + base64.b64encode(payload).decode("ascii") + print(token) + return token \ No newline at end of file