This commit is contained in:
程序员阿江(Relakkes)
2026-02-03 20:40:15 +08:00
parent fb42ab5b60
commit 6625663bde
3 changed files with 14 additions and 4 deletions

View File

@@ -24,7 +24,7 @@ from urllib.parse import urlencode
import httpx
from playwright.async_api import BrowserContext, Page
from tenacity import retry, stop_after_attempt, wait_fixed
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_not_exception_type
import config
from base.base_crawler import AbstractApiClient
@@ -34,7 +34,7 @@ from tools import utils
if TYPE_CHECKING:
from proxy.proxy_ip_pool import ProxyIpPool
from .exception import DataFetchError, IPBlockError
from .exception import DataFetchError, IPBlockError, NoteNotFoundError
from .field import SearchNoteType, SearchSortType
from .help import get_search_id
from .extractor import XiaoHongShuExtractor
@@ -60,6 +60,7 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
self._domain = "https://www.xiaohongshu.com"
self.IP_ERROR_STR = "Network connection error, please check network settings or restart"
self.IP_ERROR_CODE = 300012
self.NOTE_NOT_FOUND_CODE = -510000
self.NOTE_ABNORMAL_STR = "Note status abnormal, please check later"
self.NOTE_ABNORMAL_CODE = -510001
self.playwright_page = playwright_page
@@ -109,7 +110,7 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
self.headers.update(headers)
return self.headers
@retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
@retry(stop=stop_after_attempt(3), wait=wait_fixed(1), retry=retry_if_not_exception_type(NoteNotFoundError))
async def request(self, method, url, **kwargs) -> Union[str, Any]:
"""
Wrapper for httpx common request method, processes request response
@@ -144,6 +145,8 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
return data.get("data", data.get("success", {}))
elif data["code"] == self.IP_ERROR_CODE:
raise IPBlockError(self.IP_ERROR_STR)
elif data["code"] in (self.NOTE_NOT_FOUND_CODE, self.NOTE_ABNORMAL_CODE):
raise NoteNotFoundError(f"Note not found or abnormal, code: {data['code']}")
else:
err_msg = data.get("msg", None) or f"{response.text}"
raise DataFetchError(err_msg)

View File

@@ -42,7 +42,7 @@ from tools.cdp_browser import CDPBrowserManager
from var import crawler_type_var, source_keyword_var
from .client import XiaoHongShuClient
from .exception import DataFetchError
from .exception import DataFetchError, NoteNotFoundError
from .field import SearchSortType
from .help import parse_note_info_from_note_url, parse_creator_info_from_url, get_search_id
from .login import XiaoHongShuLogin
@@ -308,6 +308,9 @@ class XiaoHongShuCrawler(AbstractCrawler):
return note_detail
except NoteNotFoundError as ex:
utils.logger.warning(f"[XiaoHongShuCrawler.get_note_detail_async_task] Note not found: {note_id}, {ex}")
return None
except DataFetchError as ex:
utils.logger.error(f"[XiaoHongShuCrawler.get_note_detail_async_task] Get note detail error: {ex}")
return None

View File

@@ -27,3 +27,7 @@ class DataFetchError(RequestError):
class IPBlockError(RequestError):
"""fetch so fast that the server block us ip"""
class NoteNotFoundError(RequestError):
"""Note does not exist or is abnormal"""