fix: restore Tieba crawling after PC page rewrite

Tieba search, detail, comments, creator, and forum-list pages now rely on the current signed PC JSON APIs instead of brittle HTML selectors. The CLI also maps Tieba detail and creator arguments into the platform-specific config so command-line runs exercise the intended mode. Constraint: Tieba PC pages no longer expose stable HTML structures for search, creator, and forum-list extraction Constraint: Current PC APIs require browser cookies, tbs, and the web client signing convention Rejected: Keep expanding HTML selectors | search and creator pages returned large documents with empty parsed results after the redesign Confidence: high Scope-risk: moderate Directive: Do not replace these API paths with page HTML parsing without re-verifying the current Tieba network requests Tested: uv run pytest tests/test_tieba_client_pagination.py tests/test_cmd_arg_tieba.py tests/test_tieba_extractor.py -q Tested: uv run python -m py_compile cmd_arg/arg.py media_platform/tieba/help.py media_platform/tieba/client.py media_platform/tieba/core.py tests/test_cmd_arg_tieba.py tests/test_tieba_client_pagination.py tests/test_tieba_extractor.py Tested: uv run main.py --platform tieba --type search --keywords 编程兼职 --get_comment false Tested: uv run main.py --platform tieba --type detail --specified_id 9835114923 --get_comment true --max_comments_count_singlenotes 3 Tested: uv run main.py --platform tieba --type creator --creator_id https://tieba.baidu.com/home/main?id=tb.1.6ad0cd4a.7ZcjVYWa7UpHttCld2OppA --get_comment false Not-tested: Second-level Tieba comment API migration; this path still uses the existing /p/comment HTML parser Not-tested: Full pytest suite has one pre-existing unrelated XHS Excel store assertion failure
2026-05-10 20:47:39 +08:00 · 2026-04-30 18:20:46 +08:00
parent 1572b64334
commit f328ee35b5
7 changed files with 1308 additions and 176 deletions
--- a/cmd_arg/arg.py
+++ b/cmd_arg/arg.py
@@ -22,6 +22,7 @@ from __future__ import annotations


 import sys
+import re
 from enum import Enum
 from types import SimpleNamespace
 from typing import Iterable, Optional, Sequence, Type, TypeVar
@@ -135,6 +136,21 @@ def _inject_init_db_default(args: Sequence[str]) -> list[str]:
    return normalized


+def _normalize_tieba_note_id(value: str) -> str:
+    """Accept a raw Tieba thread id or a /p/<id> URL."""
+    value = value.strip()
+    match = re.search(r"/p/(\d+)", value)
+    return match.group(1) if match else value
+
+
+def _normalize_tieba_creator_url(value: str) -> str:
+    """Accept a Tieba creator homepage URL or a portrait id."""
+    value = value.strip()
+    if value.startswith("http://") or value.startswith("https://"):
+        return value
+    return f"https://tieba.baidu.com/home/main?id={value}"
+
+
 async def parse_cmd(argv: Optional[Sequence[str]] = None):
    """Parse command line arguments using Typer."""

@@ -344,6 +360,10 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
                config.WEIBO_SPECIFIED_ID_LIST = specified_id_list
            elif platform == PlatformEnum.KUAISHOU:
                config.KS_SPECIFIED_ID_LIST = specified_id_list
+            elif platform == PlatformEnum.TIEBA:
+                config.TIEBA_SPECIFIED_ID_LIST = [
+                    _normalize_tieba_note_id(item) for item in specified_id_list
+                ]

        if creator_id_list:
            if platform == PlatformEnum.XHS:
@@ -356,6 +376,10 @@ async def parse_cmd(argv: Optional[Sequence[str]] = None):
                config.WEIBO_CREATOR_ID_LIST = creator_id_list
            elif platform == PlatformEnum.KUAISHOU:
                config.KS_CREATOR_ID_LIST = creator_id_list
+            elif platform == PlatformEnum.TIEBA:
+                config.TIEBA_CREATOR_URL_LIST = [
+                    _normalize_tieba_creator_url(item) for item in creator_id_list
+                ]

        return SimpleNamespace(
            platform=config.PLATFORM,
--- a/media_platform/tieba/client.py
+++ b/media_platform/tieba/client.py
@@ -18,9 +18,10 @@
 # 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。

 import asyncio
+import hashlib
 import json
 from typing import Any, Callable, Dict, List, Optional, Union
-from urllib.parse import urlencode, quote
+from urllib.parse import urlencode, quote, parse_qs, unquote, urlparse

 import requests
 from playwright.async_api import BrowserContext, Page
@@ -35,6 +36,8 @@ from tools import utils
 from .field import SearchNoteType, SearchSortType
 from .help import TieBaExtractor

+PC_SIGN_SECRET = "36770b1f34c9bbf2e7d1a99d2b82fa9e"
+

 class BaiduTieBaClient(AbstractApiClient):

@@ -58,6 +61,128 @@ class BaiduTieBaClient(AbstractApiClient):
        self._page_extractor = TieBaExtractor()
        self.default_ip_proxy = default_ip_proxy
        self.playwright_page = playwright_page  # Playwright page object
+        self._pc_tbs = ""
+
+    @staticmethod
+    def _sign_pc_params(params: Dict[str, Any]) -> str:
+        sign_text = ""
+        for key in sorted(params):
+            if key in {"sign", "sig"} or params[key] is None:
+                continue
+            sign_text += f"{key}={params[key]}"
+        sign_text += PC_SIGN_SECRET
+        return hashlib.md5(sign_text.encode("utf-8")).hexdigest()
+
+    async def _ensure_tieba_origin(self) -> None:
+        if not self.playwright_page:
+            raise Exception("playwright_page is required for tieba PC API requests")
+        if not self.playwright_page.url.startswith(self._host):
+            await self.playwright_page.goto(self._host, wait_until="domcontentloaded")
+
+    async def _fetch_json_by_browser(
+        self,
+        uri: str,
+        method: str = "GET",
+        params: Optional[Dict[str, Any]] = None,
+        data: Optional[Dict[str, Any]] = None,
+        use_sign: bool = False,
+    ) -> Dict:
+        """
+        Fetch current Tieba PC JSON APIs from the browser context.
+        These APIs rely on logged-in browser cookies and Baidu's PC signing
+        convention, while Python requests can be blocked by local proxy/TLS.
+        """
+        await self._ensure_tieba_origin()
+        params = {k: v for k, v in (params or {}).items() if v is not None}
+        data = {k: v for k, v in (data or {}).items() if v is not None}
+        if use_sign:
+            sign_source = data if method.upper() == "POST" else params
+            sign_source.setdefault("subapp_type", "pc")
+            sign_source.setdefault("_client_type", "20")
+            sign_source["sign"] = self._sign_pc_params(sign_source)
+
+        url = f"{self._host}{uri}"
+        if params:
+            url = f"{url}?{urlencode(params)}"
+        body = urlencode(data) if data else ""
+        response = await self.playwright_page.evaluate(
+            """async ({ url, method, body }) => {
+                const headers = { "Accept": "application/json, text/plain, */*" };
+                const options = { method, credentials: "include", headers };
+                if (method === "POST") {
+                    headers["Content-Type"] = "application/x-www-form-urlencoded;charset=UTF-8";
+                    options.body = body;
+                }
+                const resp = await fetch(url, options);
+                const text = await resp.text();
+                return { status: resp.status, text };
+            }""",
+            {"url": url, "method": method.upper(), "body": body},
+        )
+        if response["status"] != 200:
+            raise Exception(f"Tieba PC API failed, status={response['status']}, url={url}")
+        try:
+            json_data = json.loads(response["text"])
+        except json.JSONDecodeError as exc:
+            raise Exception(f"Tieba PC API returned non-JSON, url={url}, body={response['text'][:500]}") from exc
+        error_code = json_data.get("error_code", json_data.get("no", 0))
+        if str(error_code) not in {"0", "None"}:
+            raise Exception(f"Tieba PC API error, url={url}, response={json_data}")
+        return json_data
+
+    async def _get_pc_tbs(self) -> str:
+        if self._pc_tbs:
+            return self._pc_tbs
+        sync_data = await self._fetch_json_by_browser(
+            "/c/s/pc/sync",
+            params={"subapp_type": "pc", "_client_type": "20"},
+            use_sign=True,
+        )
+        self._pc_tbs = (
+            sync_data.get("data", {})
+            .get("anti", {})
+            .get("tbs", "")
+        )
+        if not self._pc_tbs:
+            raise Exception(f"Can not get Tieba tbs from pc sync API: {sync_data}")
+        return self._pc_tbs
+
+    async def _get_pc_page_data(self, note_id: str, page: int = 1) -> Dict:
+        tbs = await self._get_pc_tbs()
+        return await self._fetch_json_by_browser(
+            "/c/f/pb/page_pc",
+            method="POST",
+            data={
+                "pn": page,
+                "lz": 0,
+                "r": 2,
+                "mark_type": 0,
+                "back": 0,
+                "fr": "",
+                "kz": note_id,
+                "session_request_times": 1,
+                "tbs": tbs,
+                "subapp_type": "pc",
+                "_client_type": "20",
+            },
+            use_sign=True,
+        )
+
+    @staticmethod
+    def _extract_creator_portrait(creator_url: str) -> str:
+        creator_url = (creator_url or "").strip()
+        if not creator_url:
+            return ""
+        if not creator_url.startswith(("http://", "https://")):
+            return creator_url.split("?")[0]
+        parsed = urlparse(creator_url)
+        query = parse_qs(parsed.query)
+        portrait = (
+            query.get("id", [""])[0]
+            or query.get("portrait", [""])[0]
+            or query.get("un", [""])[0]
+        )
+        return unquote(portrait).split("?")[0]

    def _sync_request(self, method, url, proxy=None, **kwargs):
        """
@@ -270,35 +395,29 @@ class BaiduTieBaClient(AbstractApiClient):
            utils.logger.error("[BaiduTieBaClient.get_notes_by_keyword] playwright_page is None, cannot use browser mode")
            raise Exception("playwright_page is required for browser-based search")

-        # Construct search URL
-        # Example: https://tieba.baidu.com/f/search/res?ie=utf-8&qw=keyword
-        search_url = f"{self._host}/f/search/res"
        params = {
-            "ie": "utf-8",
-            "qw": keyword,
-            "rn": page_size,
+            "rn": max(page_size, 20),
+            "st": sort.value,
+            "word": keyword,
+            "needbrand": 1,
+            "sug_type": 2,
            "pn": page,
-            "sm": sort.value,
-            "only_thread": note_type.value,
+            "come_from": "search",
+            "subapp_type": "pc",
+            "_client_type": "20",
        }
-
-        # Concatenate full URL
-        full_url = f"{search_url}?{urlencode(params)}"
-        utils.logger.info(f"[BaiduTieBaClient.get_notes_by_keyword] Accessing search page: {full_url}")
+        utils.logger.info(
+            f"[BaiduTieBaClient.get_notes_by_keyword] Accessing search API: "
+            f"{self._host}/mo/q/search/multsearch?{urlencode(params)}"
+        )

        try:
-            # Use Playwright to access search page
-            await self.playwright_page.goto(full_url, wait_until="domcontentloaded")
-
-            # Wait for page loading, using delay setting from config file
-            await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
-
-            # Get page HTML content
-            page_content = await self.playwright_page.content()
-            utils.logger.info(f"[BaiduTieBaClient.get_notes_by_keyword] Successfully retrieved search page HTML, length: {len(page_content)}")
-
-            # Extract search results
-            notes = self._page_extractor.extract_search_note_list(page_content)
+            api_data = await self._fetch_json_by_browser(
+                "/mo/q/search/multsearch",
+                params=params,
+                use_sign=True,
+            )
+            notes = self._page_extractor.extract_search_note_list_from_api(api_data)[:page_size]
            utils.logger.info(f"[BaiduTieBaClient.get_notes_by_keyword] Extracted {len(notes)} posts")
            return notes

@@ -319,23 +438,11 @@ class BaiduTieBaClient(AbstractApiClient):
            utils.logger.error("[BaiduTieBaClient.get_note_by_id] playwright_page is None, cannot use browser mode")
            raise Exception("playwright_page is required for browser-based note detail fetching")

-        # Construct post detail URL
-        note_url = f"{self._host}/p/{note_id}"
-        utils.logger.info(f"[BaiduTieBaClient.get_note_by_id] Accessing post detail page: {note_url}")
+        utils.logger.info(f"[BaiduTieBaClient.get_note_by_id] Accessing post detail API, note_id: {note_id}")

        try:
-            # Use Playwright to access post detail page
-            await self.playwright_page.goto(note_url, wait_until="domcontentloaded")
-
-            # Wait for page loading, using delay setting from config file
-            await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
-
-            # Get page HTML content
-            page_content = await self.playwright_page.content()
-            utils.logger.info(f"[BaiduTieBaClient.get_note_by_id] Successfully retrieved post detail HTML, length: {len(page_content)}")
-
-            # Extract post details
-            note_detail = self._page_extractor.extract_note_detail(page_content)
+            api_data = await self._get_pc_page_data(note_id=note_id, page=1)
+            note_detail = self._page_extractor.extract_note_detail_from_api(api_data)
            return note_detail

        except Exception as e:
@@ -367,23 +474,15 @@ class BaiduTieBaClient(AbstractApiClient):
        current_page = 1

        while note_detail.total_replay_page >= current_page and len(result) < max_count:
-            # Construct comment page URL
-            comment_url = f"{self._host}/p/{note_detail.note_id}?pn={current_page}"
-            utils.logger.info(f"[BaiduTieBaClient.get_note_all_comments] Accessing comment page: {comment_url}")
+            utils.logger.info(
+                f"[BaiduTieBaClient.get_note_all_comments] Accessing comment API, "
+                f"note_id: {note_detail.note_id}, page: {current_page}"
+            )

            try:
-                # Use Playwright to access comment page
-                await self.playwright_page.goto(comment_url, wait_until="domcontentloaded")
-
-                # Wait for page loading, using delay setting from config file
-                await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
-
-                # Get page HTML content
-                page_content = await self.playwright_page.content()
-
-                # Extract comments
-                comments = self._page_extractor.extract_tieba_note_parment_comments(
-                    page_content, note_id=note_detail.note_id
+                api_data = await self._get_pc_page_data(note_id=note_detail.note_id, page=current_page)
+                comments = self._page_extractor.extract_tieba_note_parent_comments_from_api(
+                    api_data, note_detail=note_detail
                )

                if not comments:
@@ -498,7 +597,7 @@ class BaiduTieBaClient(AbstractApiClient):

    async def get_notes_by_tieba_name(self, tieba_name: str, page_num: int) -> List[TiebaNote]:
        """
-        Get post list by Tieba name (uses Playwright to access page, avoiding API detection)
+        Get post list by Tieba name from current PC forum JSON API.
        Args:
            tieba_name: Tieba name
            page_num: Page number
@@ -510,23 +609,33 @@ class BaiduTieBaClient(AbstractApiClient):
            utils.logger.error("[BaiduTieBaClient.get_notes_by_tieba_name] playwright_page is None, cannot use browser mode")
            raise Exception("playwright_page is required for browser-based tieba note fetching")

-        # Construct Tieba post list URL
-        tieba_url = f"{self._host}/f?kw={quote(tieba_name)}&pn={page_num}"
-        utils.logger.info(f"[BaiduTieBaClient.get_notes_by_tieba_name] Accessing Tieba page: {tieba_url}")
+        page_size = 30
+        api_page = page_num // page_size + 1
+        tbs = await self._get_pc_tbs()
+        utils.logger.info(
+            f"[BaiduTieBaClient.get_notes_by_tieba_name] Accessing Tieba FRS API, "
+            f"tieba_name: {tieba_name}, page: {api_page}"
+        )

        try:
-            # Use Playwright to access Tieba page
-            await self.playwright_page.goto(tieba_url, wait_until="domcontentloaded")
-
-            # Wait for page loading, using delay setting from config file
-            await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
-
-            # Get page HTML content
-            page_content = await self.playwright_page.content()
-            utils.logger.info(f"[BaiduTieBaClient.get_notes_by_tieba_name] Successfully retrieved Tieba page HTML, length: {len(page_content)}")
-
-            # Extract post list
-            notes = self._page_extractor.extract_tieba_note_list(page_content)
+            api_data = await self._fetch_json_by_browser(
+                "/c/f/frs/page_pc",
+                method="POST",
+                data={
+                    "kw": quote(tieba_name),
+                    "pn": api_page,
+                    "sort_type": -1,
+                    "is_newfrs": 1,
+                    "is_newfeed": 1,
+                    "rn": page_size,
+                    "rn_need": 10,
+                    "tbs": tbs,
+                    "subapp_type": "pc",
+                    "_client_type": "20",
+                },
+                use_sign=True,
+            )
+            notes = self._page_extractor.extract_tieba_note_list_from_frs_api(api_data)[:page_size]
            utils.logger.info(f"[BaiduTieBaClient.get_notes_by_tieba_name] Extracted {len(notes)} posts")
            return notes

@@ -534,38 +643,72 @@ class BaiduTieBaClient(AbstractApiClient):
            utils.logger.error(f"[BaiduTieBaClient.get_notes_by_tieba_name] Failed to get Tieba post list: {e}")
            raise

-    async def get_creator_info_by_url(self, creator_url: str) -> str:
+    async def get_creator_info_by_url(self, creator_url: str) -> TiebaCreator:
        """
-        Get creator information by creator URL (uses Playwright to access page, avoiding API detection)
+        Get creator information by creator URL from current PC JSON API.
        Args:
            creator_url: Creator homepage URL

        Returns:
-            str: Page HTML content
+            TiebaCreator: Creator information
        """
        if not self.playwright_page:
            utils.logger.error("[BaiduTieBaClient.get_creator_info_by_url] playwright_page is None, cannot use browser mode")
            raise Exception("playwright_page is required for browser-based creator info fetching")

-        utils.logger.info(f"[BaiduTieBaClient.get_creator_info_by_url] Accessing creator homepage: {creator_url}")
+        portrait = self._extract_creator_portrait(creator_url)
+        if not portrait:
+            raise Exception(f"Can not extract Tieba creator portrait from url: {creator_url}")
+
+        utils.logger.info(
+            f"[BaiduTieBaClient.get_creator_info_by_url] Accessing creator info API, portrait: {portrait}"
+        )

        try:
-            # Use Playwright to access creator homepage
-            await self.playwright_page.goto(creator_url, wait_until="domcontentloaded")
-
-            # Wait for page loading, using delay setting from config file
-            await asyncio.sleep(config.CRAWLER_MAX_SLEEP_SEC)
-
-            # Get page HTML content
-            page_content = await self.playwright_page.content()
-            utils.logger.info(f"[BaiduTieBaClient.get_creator_info_by_url] Successfully retrieved creator homepage HTML, length: {len(page_content)}")
-
-            return page_content
+            api_data = await self._fetch_json_by_browser(
+                "/c/u/pc/homeSidebarRight",
+                params={
+                    "portrait": portrait,
+                    "un": "",
+                    "subapp_type": "pc",
+                    "_client_type": "20",
+                },
+                use_sign=True,
+            )
+            return self._page_extractor.extract_creator_info_from_api(api_data)

        except Exception as e:
-            utils.logger.error(f"[BaiduTieBaClient.get_creator_info_by_url] Failed to get creator homepage: {e}")
+            utils.logger.error(f"[BaiduTieBaClient.get_creator_info_by_url] Failed to get creator info: {e}")
            raise

+    async def get_notes_by_creator_portrait(
+        self, portrait: str, page_number: int, page_size: int = 20
+    ) -> Dict:
+        """
+        Get creator's thread feed by creator portrait from current PC JSON API.
+        """
+        if not self.playwright_page:
+            utils.logger.error("[BaiduTieBaClient.get_notes_by_creator_portrait] playwright_page is None, cannot use browser mode")
+            raise Exception("playwright_page is required for browser-based creator notes fetching")
+
+        utils.logger.info(
+            f"[BaiduTieBaClient.get_notes_by_creator_portrait] Accessing creator feed API, "
+            f"portrait: {portrait}, page: {page_number}"
+        )
+        return await self._fetch_json_by_browser(
+            "/c/u/feed/myThread",
+            params={
+                "pn": page_number,
+                "rn": page_size,
+                "portrait": portrait,
+                "type": 1,
+                "un": "",
+                "subapp_type": "pc",
+                "_client_type": "20",
+            },
+            use_sign=True,
+        )
+
    async def get_notes_by_creator(self, user_name: str, page_number: int) -> Dict:
        """
        Get creator's posts by creator (uses Playwright to access page, avoiding API detection)
@@ -648,12 +791,12 @@ class BaiduTieBaClient(AbstractApiClient):
        while notes_has_more == 1 and (max_note_count == 0 or total_get_count < max_note_count):
            notes_res = await self.get_notes_by_creator(user_name, page_number)
            if not notes_res or notes_res.get("no") != 0:
-                utils.logger.error(f"[WeiboClient.get_notes_by_creator] got user_name:{user_name} notes failed, notes_res: {notes_res}")
+                utils.logger.error(f"[TieBaClient.get_notes_by_creator] got user_name:{user_name} notes failed, notes_res: {notes_res}")
                break
            notes_data = notes_res.get("data")
            notes_has_more = notes_data.get("has_more")
            notes = notes_data["thread_list"]
-            utils.logger.info(f"[WeiboClient.get_all_notes_by_creator] got user_name:{user_name} notes len : {len(notes)}")
+            utils.logger.info(f"[TieBaClient.get_all_notes_by_creator] got user_name:{user_name} notes len : {len(notes)}")

            note_detail_task = [self.get_note_by_id(note['thread_id']) for note in notes]
            notes = await asyncio.gather(*note_detail_task)
@@ -664,3 +807,59 @@ class BaiduTieBaClient(AbstractApiClient):
            page_number += 1
            total_get_count += page_per_count
        return result
+
+    async def get_all_notes_by_creator_url(
+        self,
+        creator_url: str,
+        crawl_interval: float = 1.0,
+        callback: Optional[Callable] = None,
+        max_note_count: int = 0,
+    ) -> List[TiebaNote]:
+        """
+        Get all creator posts by current PC creator feed API.
+        """
+        portrait = self._extract_creator_portrait(creator_url)
+        if not portrait:
+            raise Exception(f"Can not extract Tieba creator portrait from url: {creator_url}")
+
+        result: List[TiebaNote] = []
+        page_number = 1
+        page_size = 20
+
+        while max_note_count == 0 or len(result) < max_note_count:
+            notes_res = await self.get_notes_by_creator_portrait(
+                portrait=portrait,
+                page_number=page_number,
+                page_size=page_size,
+            )
+            thread_id_list = self._page_extractor.extract_creator_thread_id_list_from_api(notes_res)
+            if not thread_id_list:
+                utils.logger.info(
+                    f"[BaiduTieBaClient.get_all_notes_by_creator_url] "
+                    f"Creator portrait:{portrait} page:{page_number} has no threads"
+                )
+                break
+
+            if max_note_count:
+                thread_id_list = thread_id_list[: max_note_count - len(result)]
+
+            utils.logger.info(
+                f"[BaiduTieBaClient.get_all_notes_by_creator_url] "
+                f"got portrait:{portrait} thread ids len: {len(thread_id_list)}"
+            )
+            note_detail_task = [self.get_note_by_id(thread_id) for thread_id in thread_id_list]
+            notes = await asyncio.gather(*note_detail_task)
+            notes = [note for note in notes if note]
+            if callback and notes:
+                await callback(notes)
+            result.extend(notes)
+
+            data = notes_res.get("data", {})
+            has_more = int(data.get("has_more") or 0)
+            if not has_more:
+                break
+
+            await asyncio.sleep(crawl_interval)
+            page_number += 1
+
+        return result
--- a/media_platform/tieba/core.py
+++ b/media_platform/tieba/core.py
@@ -213,7 +213,7 @@ class TieBaCrawler(AbstractCrawler):
        Returns:

        """
-        tieba_limit_count = 50
+        tieba_limit_count = 30
        if config.CRAWLER_MAX_NOTES_COUNT < tieba_limit_count:
            config.CRAWLER_MAX_NOTES_COUNT = tieba_limit_count
        for tieba_name in config.TIEBA_NAME_LIST:
@@ -245,7 +245,7 @@ class TieBaCrawler(AbstractCrawler):
                page_number += tieba_limit_count

    async def get_specified_notes(
-        self, note_id_list: List[str] = config.TIEBA_SPECIFIED_ID_LIST
+        self, note_id_list: Optional[List[str]] = None
    ):
        """
        Get the information and comments of the specified post
@@ -255,6 +255,8 @@ class TieBaCrawler(AbstractCrawler):
        Returns:

        """
+        if note_id_list is None:
+            note_id_list = config.TIEBA_SPECIFIED_ID_LIST
        semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
        task_list = [
            self.get_note_detail_async_task(note_id=note_id, semaphore=semaphore)
@@ -365,18 +367,15 @@ class TieBaCrawler(AbstractCrawler):

        """
        utils.logger.info(
-            "[WeiboCrawler.get_creators_and_notes] Begin get weibo creators"
+            "[TieBaCrawler.get_creators_and_notes] Begin get tieba creators"
        )
        for creator_url in config.TIEBA_CREATOR_URL_LIST:
-            creator_page_html_content = await self.tieba_client.get_creator_info_by_url(
+            creator_info: TiebaCreator = await self.tieba_client.get_creator_info_by_url(
                creator_url=creator_url
            )
-            creator_info: TiebaCreator = self._page_extractor.extract_creator_info(
-                creator_page_html_content
-            )
            if creator_info:
                utils.logger.info(
-                    f"[WeiboCrawler.get_creators_and_notes] creator info: {creator_info}"
+                    f"[TieBaCrawler.get_creators_and_notes] creator info: {creator_info}"
                )
                if not creator_info:
                    raise Exception("Get creator info error")
@@ -385,12 +384,11 @@ class TieBaCrawler(AbstractCrawler):

                # Get all note information of the creator
                all_notes_list = (
-                    await self.tieba_client.get_all_notes_by_creator_user_name(
-                        user_name=creator_info.user_name,
+                    await self.tieba_client.get_all_notes_by_creator_url(
+                        creator_url=creator_url,
                        crawl_interval=0,
                        callback=tieba_store.batch_update_tieba_notes,
                        max_note_count=config.CRAWLER_MAX_NOTES_COUNT,
-                        creator_page_html_content=creator_page_html_content,
                    )
                )

@@ -398,7 +396,7 @@ class TieBaCrawler(AbstractCrawler):

            else:
                utils.logger.error(
-                    f"[WeiboCrawler.get_creators_and_notes] get creator info error, creator_url:{creator_url}"
+                    f"[TieBaCrawler.get_creators_and_notes] get creator info error, creator_url:{creator_url}"
                )

    async def _navigate_to_tieba_via_baidu(self):
--- a/media_platform/tieba/help.py
+++ b/media_platform/tieba/help.py
@@ -22,8 +22,8 @@
 import html
 import json
 import re
-from typing import Dict, List, Tuple
-from urllib.parse import parse_qs, unquote
+from typing import Any, Dict, List, Tuple
+from urllib.parse import parse_qs, quote, unquote, urljoin

 from parsel import Selector

@@ -39,6 +39,306 @@ class TieBaExtractor:
    def __init__(self):
        pass

+    @staticmethod
+    def _class_contains(class_name: str) -> str:
+        return f"contains(concat(' ', normalize-space(@class), ' '), ' {class_name} ')"
+
+    @staticmethod
+    def _normalize_text(text: str) -> str:
+        return re.sub(r"\s+", " ", text or "").strip()
+
+    @classmethod
+    def _selector_text(cls, selector: Selector, xpath: str) -> str:
+        node = selector.xpath(xpath)
+        if not node:
+            return ""
+        return cls._normalize_text(node[0].xpath("string(.)").get(default=""))
+
+    @staticmethod
+    def _absolute_url(url: str) -> str:
+        return urljoin(const.TIEBA_URL, (url or "").strip())
+
+    @staticmethod
+    def _extract_note_id_from_url(url: str) -> str:
+        note_id_match = re.search(r"/p/(\d+)", url or "")
+        return note_id_match.group(1) if note_id_match else ""
+
+    @staticmethod
+    def _text_to_int(text: str) -> int:
+        match = re.search(r"\d+", text or "")
+        return int(match.group(0)) if match else 0
+
+    @staticmethod
+    def _ensure_tieba_suffix(tieba_name: str) -> str:
+        tieba_name = (tieba_name or "").strip()
+        return tieba_name if not tieba_name or tieba_name.endswith("吧") else f"{tieba_name}吧"
+
+    @classmethod
+    def _tieba_link_from_name(cls, tieba_name: str) -> str:
+        if not tieba_name:
+            return const.TIEBA_URL
+        return f"{const.TIEBA_URL}/f?kw={quote(tieba_name.removesuffix('吧'))}"
+
+    @classmethod
+    def _extract_api_content_text(cls, content: Any) -> str:
+        if isinstance(content, str):
+            return cls._normalize_text(content)
+        if not isinstance(content, list):
+            return ""
+        text_list: List[str] = []
+        for item in content:
+            if not isinstance(item, dict):
+                continue
+            text = item.get("text") or item.get("c") or ""
+            if text:
+                text_list.append(str(text))
+        return cls._normalize_text("".join(text_list))
+
+    @staticmethod
+    def _api_user_map(api_data: Dict) -> Dict[str, Dict]:
+        return {str(user.get("id")): user for user in api_data.get("user_list", []) if user.get("id")}
+
+    @staticmethod
+    def _api_user_link(user: Dict) -> str:
+        portrait = (user or {}).get("portrait", "")
+        if not portrait:
+            return ""
+        return f"{const.TIEBA_URL}/home/main?id={quote(str(portrait))}"
+
+    @staticmethod
+    def _api_user_avatar(user: Dict) -> str:
+        image_data = (
+            (user or {})
+            .get("user_show_info", {})
+            .get("feed_head", {})
+            .get("image_data", {})
+        )
+        return image_data.get("img_url") or (
+            "https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/"
+            f"{user.get('portrait', '')}"
+            if user and user.get("portrait")
+            else ""
+        )
+
+    def extract_search_note_list_from_api(self, api_data: Dict) -> List[TiebaNote]:
+        """
+        Extract Tieba post list from current PC search JSON API.
+        """
+        result: List[TiebaNote] = []
+        cards = api_data.get("data", {}).get("card_list", [])
+        for card in cards:
+            if card.get("cardInfo") != "thread" and card.get("cardStyle") != "thread":
+                continue
+            item = card.get("data") or {}
+            note_id = str(item.get("tid") or "")
+            if not note_id:
+                continue
+            user = item.get("user") or {}
+            tieba_name = self._ensure_tieba_suffix(item.get("forum_name") or "")
+            tieba_note = TiebaNote(
+                note_id=note_id,
+                title=self._normalize_text(item.get("title") or ""),
+                desc=self._normalize_text(item.get("content") or ""),
+                note_url=f"{const.TIEBA_URL}/p/{note_id}",
+                publish_time=utils.get_time_str_from_unix_time(
+                    item.get("time") or item.get("create_time") or 0
+                ),
+                user_link="",
+                user_nickname=user.get("show_nickname") or user.get("user_name") or "",
+                user_avatar=user.get("portrait") or user.get("portraith") or "",
+                tieba_name=tieba_name,
+                tieba_link=self._tieba_link_from_name(tieba_name),
+                total_replay_num=item.get("post_num") or 0,
+            )
+            result.append(tieba_note)
+        return result
+
+    def extract_note_detail_from_api(self, api_data: Dict) -> TiebaNote:
+        """
+        Extract Tieba post detail from current PC page_pc JSON API.
+        """
+        thread = api_data.get("thread") or {}
+        first_floor = api_data.get("first_floor") or {}
+        forum = api_data.get("forum") or api_data.get("display_forum") or {}
+        page = api_data.get("page") or {}
+        user_map = self._api_user_map(api_data)
+        author = user_map.get(str(first_floor.get("author_id"))) or {}
+        note_id = str(thread.get("id") or thread.get("tid") or first_floor.get("tid") or "")
+        tieba_name = self._ensure_tieba_suffix(forum.get("name") or "")
+        note = TiebaNote(
+            note_id=note_id,
+            title=self._clean_title(thread.get("title") or first_floor.get("title") or "", tieba_name),
+            desc=self._extract_api_content_text(
+                first_floor.get("content")
+                or thread.get("origin_thread_info", {}).get("abstract")
+                or thread.get("origin_thread_info", {}).get("content")
+            ),
+            note_url=f"{const.TIEBA_URL}/p/{note_id}",
+            publish_time=utils.get_time_str_from_unix_time(
+                first_floor.get("time") or thread.get("create_time") or 0
+            ),
+            user_link=self._api_user_link(author),
+            user_nickname=author.get("name_show") or author.get("name") or "",
+            user_avatar=self._api_user_avatar(author),
+            tieba_name=tieba_name,
+            tieba_link=self._tieba_link_from_name(tieba_name),
+            total_replay_num=thread.get("reply_num") or 0,
+            total_replay_page=page.get("total_page") or 0,
+            ip_location=author.get("ip_address") or "",
+        )
+        return note
+
+    def extract_tieba_note_parent_comments_from_api(
+        self, api_data: Dict, note_detail: TiebaNote
+    ) -> List[TiebaComment]:
+        """
+        Extract first-level comments from current PC page_pc JSON API.
+        """
+        forum = api_data.get("forum") or api_data.get("display_forum") or {}
+        tieba_id = str(forum.get("id") or "")
+        tieba_name = note_detail.tieba_name or self._ensure_tieba_suffix(forum.get("name") or "")
+        tieba_link = note_detail.tieba_link or self._tieba_link_from_name(tieba_name)
+        user_map = self._api_user_map(api_data)
+        result: List[TiebaComment] = []
+        for item in api_data.get("post_list", []):
+            comment_id = str(item.get("id") or "")
+            if not comment_id:
+                continue
+            user = user_map.get(str(item.get("author_id"))) or {}
+            comment = TiebaComment(
+                comment_id=comment_id,
+                sub_comment_count=item.get("sub_post_number") or 0,
+                content=self._extract_api_content_text(item.get("content")),
+                note_url=note_detail.note_url,
+                user_link=self._api_user_link(user),
+                user_nickname=user.get("name_show") or user.get("name") or "",
+                user_avatar=self._api_user_avatar(user),
+                tieba_id=tieba_id,
+                tieba_name=tieba_name,
+                tieba_link=tieba_link,
+                ip_location=user.get("ip_address") or "",
+                publish_time=utils.get_time_str_from_unix_time(item.get("time") or 0),
+                note_id=note_detail.note_id,
+            )
+            result.append(comment)
+        return result
+
+    def extract_creator_info_from_api(self, api_data: Dict) -> TiebaCreator:
+        """
+        Extract Tieba creator information from current PC creator JSON API.
+        """
+        user = api_data.get("data", {}).get("user", {})
+        if not user:
+            raise ValueError(f"Creator API response does not contain user info: {api_data}")
+        gender_value = user.get("sex", user.get("gender", 0))
+        gender = "Unknown"
+        if gender_value == 1:
+            gender = "Male"
+        elif gender_value == 2:
+            gender = "Female"
+
+        return TiebaCreator(
+            user_id=str(user.get("id", "")),
+            user_name=str(user.get("name", "")),
+            nickname=str(user.get("name_show") or user.get("name") or ""),
+            avatar=self._api_user_avatar(user),
+            gender=gender,
+            ip_location=str(user.get("ip_address", "")),
+            follows=int(user.get("concern_num") or 0),
+            fans=int(user.get("fans_num") or 0),
+            registration_duration=str(user.get("tb_age", "")),
+        )
+
+    @staticmethod
+    def extract_creator_thread_id_list_from_api(api_data: Dict) -> List[str]:
+        """
+        Extract creator thread ids from current PC creator feed JSON API.
+        """
+        thread_ids: List[str] = []
+        for item in api_data.get("data", {}).get("list", []):
+            thread_info = item.get("thread_info") or {}
+            thread_id = thread_info.get("tid") or thread_info.get("id")
+            if thread_id:
+                thread_ids.append(str(thread_id))
+        return thread_ids
+
+    def extract_tieba_note_list_from_frs_api(self, api_data: Dict) -> List[TiebaNote]:
+        """
+        Extract Tieba thread ids from current PC forum page JSON API.
+
+        The by-forum command immediately fetches full details for every id, so
+        this list intentionally carries only stable routing fields.
+        """
+        forum = api_data.get("forum", {})
+        tieba_name = self._ensure_tieba_suffix(forum.get("name") or "")
+        tieba_link = self._tieba_link_from_name(tieba_name)
+        tids = [
+            tid.strip()
+            for tid in str(forum.get("tids") or "").split(",")
+            if tid.strip()
+        ]
+        return [
+            TiebaNote(
+                note_id=tid,
+                title="",
+                desc="",
+                note_url=f"{const.TIEBA_URL}/p/{tid}",
+                tieba_name=tieba_name,
+                tieba_link=tieba_link,
+            )
+            for tid in tids
+        ]
+
+    @staticmethod
+    def _decode_js_string(value: str) -> str:
+        if not value or value == "null":
+            return ""
+        try:
+            decoded_value = json.loads(f'"{value}"')
+            return decoded_value if isinstance(decoded_value, str) else str(decoded_value)
+        except Exception:
+            return value
+
+    @classmethod
+    def _extract_forum_info(cls, selector: Selector, page_content: str) -> Tuple[str, str]:
+        forum_xpath = f"//a[{cls._class_contains('card_title_fname')}]"
+        forum_link_selector = selector.xpath(forum_xpath)
+        tieba_name = cls._selector_text(selector, forum_xpath)
+        tieba_link = cls._absolute_url(forum_link_selector.xpath("./@href").get(default=""))
+
+        if not tieba_name:
+            patterns = [
+                r"PageData\.forum\s*=\s*\{.*?['\"]name['\"]\s*:\s*\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"",
+                r'"forum_name"\s*:\s*"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"',
+                r'"kw"\s*:\s*"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"',
+            ]
+            for pattern in patterns:
+                match = re.search(pattern, page_content, re.S)
+                if match:
+                    tieba_name = cls._decode_js_string(match.group(1))
+                    if tieba_name:
+                        break
+
+        if not tieba_name:
+            title = selector.xpath("//title/text()").get(default="")
+            match = re.search(r"(.+?)吧[-_]", title)
+            if match:
+                tieba_name = cls._normalize_text(match.group(1))
+
+        if not tieba_link and tieba_name:
+            tieba_link = f"{const.TIEBA_URL}/f?kw={quote(tieba_name.removesuffix('吧'))}"
+
+        return tieba_name, tieba_link or const.TIEBA_URL
+
+    @classmethod
+    def _clean_title(cls, title: str, tieba_name: str = "") -> str:
+        title = cls._normalize_text(title)
+        title = re.sub(r"_(?:百度贴吧|Baidu Tieba)$", "", title).strip()
+        for name in {tieba_name, tieba_name.removesuffix("吧")}:
+            if name:
+                title = title.replace(f"【{name}】", "").strip()
+        return title
+
    @staticmethod
    def extract_search_note_list(page_content: str) -> List[TiebaNote]:
        """
@@ -49,23 +349,115 @@ class TieBaExtractor:
        Returns:
            List of Tieba post objects
        """
-        xpath_selector = "//div[@class='s_post']"
-        post_list = Selector(text=page_content).xpath(xpath_selector)
+        extractor = TieBaExtractor()
+        selector = Selector(text=page_content)
+        post_list = selector.xpath(
+            f"//div[{extractor._class_contains('s_post')}]"
+        )
        result: List[TiebaNote] = []
        for post in post_list:
-            tieba_note = TiebaNote(note_id=post.xpath(".//span[@class='p_title']/a/@data-tid").get(default='').strip(),
-                                   title=post.xpath(".//span[@class='p_title']/a/text()").get(default='').strip(),
-                                   desc=post.xpath(".//div[@class='p_content']/text()").get(default='').strip(),
-                                   note_url=const.TIEBA_URL + post.xpath(".//span[@class='p_title']/a/@href").get(
-                                       default=''),
-                                   user_nickname=post.xpath(".//a[starts-with(@href, '/home/main')]/font/text()").get(
-                                       default='').strip(), user_link=const.TIEBA_URL + post.xpath(
-                    ".//a[starts-with(@href, '/home/main')]/@href").get(default=''),
-                                   tieba_name=post.xpath(".//a[@class='p_forum']/font/text()").get(default='').strip(),
-                                   tieba_link=const.TIEBA_URL + post.xpath(".//a[@class='p_forum']/@href").get(
-                                       default=''),
-                                   publish_time=post.xpath(".//font[@class='p_green p_date']/text()").get(
-                                       default='').strip(), )
+            title_link = post.xpath(".//*[contains(@class, 'p_title')]//a[1]")
+            note_url = extractor._absolute_url(title_link.xpath("./@href").get(default=""))
+            note_id = title_link.xpath("./@data-tid").get(default="").strip()
+            if not note_id:
+                note_id = extractor._extract_note_id_from_url(note_url)
+            user_selector = post.xpath(".//a[contains(@href, '/home/main')][1]")
+            forum_selector = post.xpath(f".//a[{extractor._class_contains('p_forum')}][1]")
+            tieba_note = TiebaNote(
+                note_id=note_id,
+                title=extractor._selector_text(post, ".//*[contains(@class, 'p_title')]//a[1]"),
+                desc=extractor._selector_text(
+                    post, f".//div[{extractor._class_contains('p_content')}]"
+                ),
+                note_url=note_url,
+                user_nickname=extractor._selector_text(
+                    post, ".//a[contains(@href, '/home/main')][1]"
+                ),
+                user_link=extractor._absolute_url(user_selector.xpath("./@href").get(default="")),
+                tieba_name=extractor._selector_text(
+                    post, f".//a[{extractor._class_contains('p_forum')}][1]"
+                ),
+                tieba_link=extractor._absolute_url(forum_selector.xpath("./@href").get(default="")),
+                publish_time=extractor._selector_text(
+                    post, ".//*[contains(@class, 'p_date')][1]"
+                ),
+            )
+            result.append(tieba_note)
+        if result:
+            return result
+
+        # Tieba search changed to a PC feed/card layout in 2026. The old
+        # s_post nodes disappeared, while each search result now lives in a
+        # threadcardclass card with overlay links to /p/<thread_id>.
+        post_list = selector.xpath(
+            f"//*[contains(concat(' ', normalize-space(@class), ' '), ' threadcardclass ') "
+            f"and .//a[contains(@href, '/p/')]]"
+        )
+        seen_note_ids = set()
+        for post in post_list:
+            title_link = post.xpath(
+                f".//a[{extractor._class_contains('action-link-bg')} and contains(@href, '/p/')][1]"
+                f"|.//a[contains(@href, '/p/')][1]"
+            )
+            note_url = extractor._absolute_url(title_link.xpath("./@href").get(default=""))
+            note_id = extractor._extract_note_id_from_url(note_url)
+            if not note_id or note_id in seen_note_ids:
+                continue
+            seen_note_ids.add(note_id)
+
+            tieba_name = extractor._selector_text(
+                post, f".//*[{extractor._class_contains('forum-name-text')}][1]"
+            )
+            tieba_link = ""
+            forum_link = post.xpath(".//a[contains(@href, '/f?')][1]/@href").get(default="")
+            if forum_link:
+                tieba_link = extractor._absolute_url(forum_link)
+            elif tieba_name:
+                tieba_keyword = tieba_name.removesuffix("吧")
+                tieba_link = f"{const.TIEBA_URL}/f?kw={quote(tieba_keyword)}"
+            else:
+                tieba_link = const.TIEBA_URL
+
+            publish_time = ""
+            top_title_text = extractor._selector_text(
+                post, f".//*[{extractor._class_contains('top-title')}][1]"
+            )
+            publish_match = re.search(r"发布于\s*([^\s]+)", top_title_text)
+            if publish_match:
+                publish_time = publish_match.group(1)
+
+            title = extractor._selector_text(
+                post, f".//*[{extractor._class_contains('title-wrap')}][1]"
+            )
+            desc = extractor._selector_text(
+                post, f".//*[{extractor._class_contains('abstract-wrap')}][1]"
+            )
+            if not title:
+                title = extractor._normalize_text(desc[:80])
+
+            user_nickname = extractor._selector_text(
+                post, f".//*[{extractor._class_contains('forum-attention')}][1]"
+            )
+            if not user_nickname and publish_time:
+                user_nickname = extractor._normalize_text(
+                    top_title_text.split("发布于", 1)[0]
+                )
+
+            comment_text = extractor._selector_text(
+                post, f".//a[{extractor._class_contains('comment-link-zone')}][1]"
+            )
+            tieba_note = TiebaNote(
+                note_id=note_id,
+                title=title,
+                desc=desc,
+                note_url=f"{const.TIEBA_URL}/p/{note_id}",
+                user_nickname=user_nickname,
+                user_link="",
+                tieba_name=tieba_name,
+                tieba_link=tieba_link,
+                publish_time=publish_time,
+                total_replay_num=extractor._text_to_int(comment_text),
+            )
            result.append(tieba_note)
        return result

@@ -80,27 +472,39 @@ class TieBaExtractor:
        """
        page_content = page_content.replace('<!--', "")
        content_selector = Selector(text=page_content)
-        xpath_selector = "//ul[@id='thread_list']/li"
+        xpath_selector = f"//ul[@id='thread_list']/li[{self._class_contains('j_thread_list')}]"
        post_list = content_selector.xpath(xpath_selector)
+        tieba_name, tieba_link = self._extract_forum_info(content_selector, page_content)
        result: List[TiebaNote] = []
        for post_selector in post_list:
            post_field_value: Dict = self.extract_data_field_value(post_selector)
            if not post_field_value:
                continue
            note_id = str(post_field_value.get("id"))
-            tieba_note = TiebaNote(note_id=note_id,
-                                   title=post_selector.xpath(".//a[@class='j_th_tit ']/text()").get(default='').strip(),
-                                   desc=post_selector.xpath(
-                                       ".//div[@class='threadlist_abs threadlist_abs_onlyline ']/text()").get(
-                                       default='').strip(), note_url=const.TIEBA_URL + f"/p/{note_id}",
-                                   user_link=const.TIEBA_URL + post_selector.xpath(
-                                       ".//a[@class='frs-author-name j_user_card ']/@href").get(default='').strip(),
-                                   user_nickname=post_field_value.get("authoer_nickname") or post_field_value.get(
-                                       "author_name"),
-                                   tieba_name=content_selector.xpath("//a[@class='card_title_fname']/text()").get(
-                                       default='').strip(), tieba_link=const.TIEBA_URL + content_selector.xpath(
-                    "//a[@class='card_title_fname']/@href").get(default=''),
-                                   total_replay_num=post_field_value.get("reply_num", 0))
+            user_selector = post_selector.xpath(f".//a[{self._class_contains('frs-author-name')}][1]")
+            title = self._selector_text(post_selector, f".//a[{self._class_contains('j_th_tit')}][1]")
+            if not title:
+                title = self._selector_text(post_selector, f".//*[{self._class_contains('threadlist_title')}]//a[1]")
+            user_nickname = (
+                post_field_value.get("author_nickname")
+                or post_field_value.get("author_name")
+                or self._selector_text(
+                    post_selector, f".//a[{self._class_contains('frs-author-name')}][1]"
+                )
+            )
+            tieba_note = TiebaNote(
+                note_id=note_id,
+                title=title,
+                desc=self._selector_text(
+                    post_selector, f".//div[{self._class_contains('threadlist_abs')}]"
+                ),
+                note_url=const.TIEBA_URL + f"/p/{note_id}",
+                user_link=self._absolute_url(user_selector.xpath("./@href").get(default="")),
+                user_nickname=user_nickname,
+                tieba_name=tieba_name,
+                tieba_link=tieba_link,
+                total_replay_num=post_field_value.get("reply_num", 0),
+            )
            result.append(tieba_note)
        return result

@@ -114,31 +518,59 @@ class TieBaExtractor:
            Tieba post detail object
        """
        content_selector = Selector(text=page_content)
-        first_floor_selector = content_selector.xpath("//div[@class='p_postlist'][1]")
+        first_floor_selector = content_selector.xpath(
+            f"//div[{self._class_contains('l_post')} and {self._class_contains('j_l_post')}][1]"
+        )
        only_view_author_link = content_selector.xpath("//*[@id='lzonly_cntn']/@href").get(default='').strip()
        note_id = only_view_author_link.split("?")[0].split("/")[-1]
+        if not note_id:
+            note_id_match = re.search(r'"thread_id"\s*:\s*"?(\d+)"?', page_content)
+            note_id = note_id_match.group(1) if note_id_match else ""
        # Post reply count and reply page count
        thread_num_infos = content_selector.xpath(
-            "//div[@id='thread_theme_5']//li[@class='l_reply_num']//span[@class='red']")
+            f"//div[@id='thread_theme_5']//li[{self._class_contains('l_reply_num')}]"
+            f"//span[{self._class_contains('red')}]"
+        )
        # IP location and publish time
-        other_info_content = content_selector.xpath(".//div[@class='post-tail-wrap']").get(default="").strip()
+        other_info_content = first_floor_selector.xpath(
+            f".//div[{self._class_contains('post-tail-wrap')}]"
+        ).get(default="").strip()
        ip_location, publish_time = self.extract_ip_and_pub_time(other_info_content)
-        note = TiebaNote(note_id=note_id, title=content_selector.xpath("//title/text()").get(default='').strip(),
-                         desc=content_selector.xpath("//meta[@name='description']/@content").get(default='').strip(),
-                         note_url=const.TIEBA_URL + f"/p/{note_id}",
-                         user_link=const.TIEBA_URL + first_floor_selector.xpath(
-                             ".//a[@class='p_author_face ']/@href").get(default='').strip(),
-                         user_nickname=first_floor_selector.xpath(
-                             ".//a[@class='p_author_name j_user_card']/text()").get(default='').strip(),
-                         user_avatar=first_floor_selector.xpath(".//a[@class='p_author_face ']/img/@src").get(
-                             default='').strip(),
-                         tieba_name=content_selector.xpath("//a[@class='card_title_fname']/text()").get(
-                             default='').strip(), tieba_link=const.TIEBA_URL + content_selector.xpath(
-                "//a[@class='card_title_fname']/@href").get(default=''), ip_location=ip_location,
-                         publish_time=publish_time,
-                         total_replay_num=thread_num_infos[0].xpath("./text()").get(default='').strip(),
-                         total_replay_page=thread_num_infos[1].xpath("./text()").get(default='').strip(), )
-        note.title = note.title.replace(f"【{note.tieba_name}】_Baidu Tieba", "")
+        tieba_name, tieba_link = self._extract_forum_info(content_selector, page_content)
+        first_floor_value = self.extract_data_field_value(first_floor_selector)
+        author_value = first_floor_value.get("author", {}) if first_floor_value else {}
+        author_link = first_floor_selector.xpath(
+            f".//a[{self._class_contains('p_author_face')} "
+            f"or {self._class_contains('p_author_name')}]/@href"
+        ).get(default="")
+        note = TiebaNote(
+            note_id=note_id,
+            title=content_selector.xpath("//title/text()").get(default="").strip(),
+            desc=content_selector.xpath("//meta[@name='description']/@content").get(default="").strip(),
+            note_url=const.TIEBA_URL + f"/p/{note_id}",
+            user_link=self._absolute_url(author_link),
+            user_nickname=(
+                self._selector_text(first_floor_selector, f".//a[{self._class_contains('p_author_name')}][1]")
+                or author_value.get("user_nickname")
+                or author_value.get("user_name", "")
+            ),
+            user_avatar=first_floor_selector.xpath(
+                f".//a[{self._class_contains('p_author_face')}]//img/@src"
+            ).get(default="").strip(),
+            tieba_name=tieba_name,
+            tieba_link=tieba_link,
+            ip_location=ip_location,
+            publish_time=publish_time,
+            total_replay_num=(
+                thread_num_infos[0].xpath("./text()").get(default="0").strip()
+                if len(thread_num_infos) > 0 else 0
+            ),
+            total_replay_page=(
+                thread_num_infos[1].xpath("./text()").get(default="0").strip()
+                if len(thread_num_infos) > 1 else 0
+            ),
+        )
+        note.title = self._clean_title(note.title, note.tieba_name)
        return note

    def extract_tieba_note_parment_comments(self, page_content: str, note_id: str) -> List[TiebaComment]:
@@ -151,30 +583,56 @@ class TieBaExtractor:
        Returns:
            List of first-level comment objects
        """
-        xpath_selector = "//div[@class='l_post l_post_bright j_l_post clearfix  ']"
+        xpath_selector = f"//div[{self._class_contains('l_post')} and {self._class_contains('j_l_post')}]"
        comment_list = Selector(text=page_content).xpath(xpath_selector)
+        content_selector = Selector(text=page_content)
+        tieba_name, tieba_link = self._extract_forum_info(content_selector, page_content)
        result: List[TiebaComment] = []
        for comment_selector in comment_list:
            comment_field_value: Dict = self.extract_data_field_value(comment_selector)
-            if not comment_field_value:
+            comment_content_value = comment_field_value.get("content", {}) if comment_field_value else {}
+            if not comment_content_value:
                continue
-            tieba_name = comment_selector.xpath("//a[@class='card_title_fname']/text()").get(default='').strip()
-            other_info_content = comment_selector.xpath(".//div[@class='post-tail-wrap']").get(default="").strip()
+            other_info_content = comment_selector.xpath(
+                f".//div[{self._class_contains('post-tail-wrap')}]"
+            ).get(default="").strip()
            ip_location, publish_time = self.extract_ip_and_pub_time(other_info_content)
-            tieba_comment = TiebaComment(comment_id=str(comment_field_value.get("content").get("post_id")),
-                                         sub_comment_count=comment_field_value.get("content").get("comment_num"),
-                                         content=utils.extract_text_from_html(
-                                             comment_field_value.get("content").get("content")),
-                                         note_url=const.TIEBA_URL + f"/p/{note_id}",
-                                         user_link=const.TIEBA_URL + comment_selector.xpath(
-                                             ".//a[@class='p_author_face ']/@href").get(default='').strip(),
-                                         user_nickname=comment_selector.xpath(
-                                             ".//a[@class='p_author_name j_user_card']/text()").get(default='').strip(),
-                                         user_avatar=comment_selector.xpath(
-                                             ".//a[@class='p_author_face ']/img/@src").get(default='').strip(),
-                                         tieba_id=str(comment_field_value.get("content").get("forum_id", "")),
-                                         tieba_name=tieba_name, tieba_link=f"https://tieba.baidu.com/f?kw={tieba_name}",
-                                         ip_location=ip_location, publish_time=publish_time, note_id=note_id, )
+            user_selector = comment_selector.xpath(f".//a[{self._class_contains('p_author_name')}][1]")
+            user_avatar = comment_selector.xpath(
+                f".//a[{self._class_contains('p_author_face')}]//img/@src"
+            ).get(default="").strip()
+            if not user_avatar and comment_field_value.get("author", {}).get("portrait"):
+                portrait = comment_field_value["author"]["portrait"]
+                user_avatar = (
+                    "https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/"
+                    f"{portrait}"
+                )
+            content_html = comment_content_value.get("content") or comment_selector.xpath(
+                f".//div[{self._class_contains('d_post_content')}]"
+            ).get(default="")
+            user_nickname = (
+                self._selector_text(comment_selector, f".//a[{self._class_contains('p_author_name')}][1]")
+                or comment_field_value.get("author", {}).get("user_nickname")
+                or comment_field_value.get("author", {}).get("user_name", "")
+            )
+            tieba_comment = TiebaComment(
+                comment_id=str(
+                    comment_content_value.get("post_id")
+                    or comment_selector.xpath("./@data-pid").get(default="")
+                ),
+                sub_comment_count=comment_content_value.get("comment_num") or 0,
+                content=utils.extract_text_from_html(content_html),
+                note_url=const.TIEBA_URL + f"/p/{note_id}",
+                user_link=self._absolute_url(user_selector.xpath("./@href").get(default="")),
+                user_nickname=user_nickname,
+                user_avatar=user_avatar,
+                tieba_id=str(comment_content_value.get("forum_id", "")),
+                tieba_name=tieba_name,
+                tieba_link=tieba_link,
+                ip_location=ip_location,
+                publish_time=publish_time,
+                note_id=note_id,
+            )
            result.append(tieba_comment)
        return result

@@ -190,21 +648,24 @@ class TieBaExtractor:
        """
        selector = Selector(page_content)
        comments = []
-        comment_ele_list = selector.xpath("//li[@class='lzl_single_post j_lzl_s_p first_no_border']")
-        comment_ele_list.extend(selector.xpath("//li[@class='lzl_single_post j_lzl_s_p ']"))
+        comment_ele_list = selector.xpath(
+            f"//li[{self._class_contains('lzl_single_post')} and {self._class_contains('j_lzl_s_p')}]"
+        )
        for comment_ele in comment_ele_list:
            comment_value = self.extract_data_field_value(comment_ele)
            if not comment_value:
                continue
-            comment_user_a_selector = comment_ele.xpath("./a[@class='j_user_card lzl_p_p']")[0]
+            comment_user_a_selector = comment_ele.xpath(
+                f"./a[{self._class_contains('j_user_card')} and {self._class_contains('lzl_p_p')}][1]"
+            )
            content = utils.extract_text_from_html(
-                comment_ele.xpath(".//span[@class='lzl_content_main']").get(default=""))
+                comment_ele.xpath(f".//span[{self._class_contains('lzl_content_main')}]").get(default=""))
            comment = TiebaComment(
                comment_id=str(comment_value.get("spid")), content=content,
-                user_link=comment_user_a_selector.xpath("./@href").get(default=""),
-                user_nickname=comment_value.get("showname"),
+                user_link=self._absolute_url(comment_user_a_selector.xpath("./@href").get(default="")),
+                user_nickname=str(comment_value.get("showname") or ""),
                user_avatar=comment_user_a_selector.xpath("./img/@src").get(default=""),
-                publish_time=comment_ele.xpath(".//span[@class='lzl_time']/text()").get(default="").strip(),
+                publish_time=self._selector_text(comment_ele, f".//span[{self._class_contains('lzl_time')}]"),
                parent_comment_id=parent_comment.comment_id,
                note_id=parent_comment.note_id, note_url=parent_comment.note_url,
                tieba_id=parent_comment.tieba_id, tieba_name=parent_comment.tieba_name,
--- a/tests/test_cmd_arg_tieba.py
+++ b/tests/test_cmd_arg_tieba.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+import config
+import pytest
+from cmd_arg import parse_cmd
+from media_platform.tieba import TieBaCrawler
+
+
+@pytest.mark.asyncio
+async def test_tieba_detail_cli_sets_specified_ids():
+    await parse_cmd(
+        [
+            "--platform",
+            "tieba",
+            "--type",
+            "detail",
+            "--specified_id",
+            "https://tieba.baidu.com/p/10451142633,9835114923",
+        ]
+    )
+
+    assert config.TIEBA_SPECIFIED_ID_LIST == ["10451142633", "9835114923"]
+
+
+@pytest.mark.asyncio
+async def test_tieba_creator_cli_sets_creator_urls():
+    await parse_cmd(
+        [
+            "--platform",
+            "tieba",
+            "--type",
+            "creator",
+            "--creator_id",
+            "tb.1.example,https://tieba.baidu.com/home/main?id=tb.1.raw",
+        ]
+    )
+
+    assert config.TIEBA_CREATOR_URL_LIST == [
+        "https://tieba.baidu.com/home/main?id=tb.1.example",
+        "https://tieba.baidu.com/home/main?id=tb.1.raw",
+    ]
+
+
+@pytest.mark.asyncio
+async def test_tieba_detail_reads_runtime_specified_ids(monkeypatch):
+    crawler = TieBaCrawler()
+    seen_note_ids = []
+
+    async def fake_get_note_detail(note_id, semaphore):
+        seen_note_ids.append(note_id)
+        return None
+
+    async def fake_batch_get_comments(note_details):
+        return None
+
+    monkeypatch.setattr(config, "TIEBA_SPECIFIED_ID_LIST", ["10451142633"])
+    monkeypatch.setattr(crawler, "get_note_detail_async_task", fake_get_note_detail)
+    monkeypatch.setattr(crawler, "batch_get_note_comments", fake_batch_get_comments)
+
+    await crawler.get_specified_notes()
+
+    assert seen_note_ids == ["10451142633"]
--- a/tests/test_tieba_client_pagination.py
+++ b/tests/test_tieba_client_pagination.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+from media_platform.tieba.client import BaiduTieBaClient
+from model.m_baidu_tieba import TiebaComment, TiebaNote
+
+
+class DummyPage:
+    url = "https://tieba.baidu.com/"
+
+
+@pytest.mark.asyncio
+async def test_search_uses_requested_page_number():
+    client = BaiduTieBaClient(playwright_page=DummyPage())
+    calls = []
+
+    async def fake_fetch(uri, method="GET", params=None, data=None, use_sign=False):
+        calls.append((uri, params))
+        return {"no": 0, "data": {"card_list": []}}
+
+    client._fetch_json_by_browser = fake_fetch
+
+    await client.get_notes_by_keyword("编程兼职", page=2, page_size=10)
+
+    assert calls[0][0] == "/mo/q/search/multsearch"
+    assert calls[0][1]["pn"] == 2
+
+
+@pytest.mark.asyncio
+async def test_comments_walk_pages_until_total_reply_page():
+    client = BaiduTieBaClient(playwright_page=DummyPage())
+    pages = []
+    note = TiebaNote(
+        note_id="9835114923",
+        title="title",
+        note_url="https://tieba.baidu.com/p/9835114923",
+        tieba_name="加工中心吧",
+        tieba_link="https://tieba.baidu.com/f?kw=%E5%8A%A0%E5%B7%A5%E4%B8%AD%E5%BF%83",
+        total_replay_page=2,
+    )
+
+    async def fake_get_page_data(note_id, page=1):
+        pages.append(page)
+        return {"forum": {"id": 1, "name": "加工中心"}, "post_list": []}
+
+    def fake_extract_comments(api_data, note_detail):
+        page = pages[-1]
+        return [
+            TiebaComment(
+                comment_id=str(page),
+                content="comment",
+                note_id=note_detail.note_id,
+                note_url=note_detail.note_url,
+                tieba_id="1",
+                tieba_name=note_detail.tieba_name,
+                tieba_link=note_detail.tieba_link,
+            )
+        ]
+
+    client._get_pc_page_data = fake_get_page_data
+    client._page_extractor.extract_tieba_note_parent_comments_from_api = fake_extract_comments
+
+    await client.get_note_all_comments(note, crawl_interval=0, max_count=10)
+
+    assert pages == [1, 2]
+
+
+@pytest.mark.asyncio
+async def test_creator_feed_walks_until_has_more_false(monkeypatch):
+    client = BaiduTieBaClient(playwright_page=DummyPage())
+    pages = []
+
+    async def fake_get_notes_by_creator_portrait(portrait, page_number, page_size=20):
+        pages.append(page_number)
+        return {
+            "error_code": 0,
+            "data": {
+                "has_more": 1 if page_number == 1 else 0,
+                "list": [
+                    {
+                        "thread_info": {
+                            "id": str(1000 + page_number),
+                            "tid": str(1000 + page_number),
+                        }
+                    }
+                ],
+            },
+        }
+
+    async def fake_get_note_by_id(note_id):
+        return TiebaNote(
+            note_id=note_id,
+            title="title",
+            note_url=f"https://tieba.baidu.com/p/{note_id}",
+            tieba_name="加工中心吧",
+            tieba_link="https://tieba.baidu.com/f?kw=%E5%8A%A0%E5%B7%A5%E4%B8%AD%E5%BF%83",
+        )
+
+    async def fake_sleep(_):
+        return None
+
+    client.get_notes_by_creator_portrait = fake_get_notes_by_creator_portrait
+    client.get_note_by_id = fake_get_note_by_id
+    monkeypatch.setattr("media_platform.tieba.client.asyncio.sleep", fake_sleep)
+
+    notes = await client.get_all_notes_by_creator_url("tb.1.creator", crawl_interval=0)
+
+    assert pages == [1, 2]
+    assert [note.note_id for note in notes] == ["1001", "1002"]
--- a/tests/test_tieba_extractor.py
+++ b/tests/test_tieba_extractor.py
@@ -0,0 +1,278 @@
+# -*- coding: utf-8 -*-
+
+from pathlib import Path
+
+from media_platform.tieba.help import TieBaExtractor
+from model.m_baidu_tieba import TiebaComment
+
+
+FIXTURE_DIR = Path(__file__).parent.parent / "media_platform" / "tieba" / "test_data"
+
+
+def read_fixture(name: str) -> str:
+    return (FIXTURE_DIR / name).read_text(encoding="utf-8")
+
+
+def test_extract_search_note_list_from_keyword_page():
+    notes = TieBaExtractor.extract_search_note_list(read_fixture("search_keyword_notes.html"))
+
+    assert len(notes) == 10
+    assert notes[0].note_id == "9117888152"
+    assert notes[0].title.startswith("武汉交互空间科技")
+    assert notes[0].tieba_name == "武汉交互空间"
+    assert notes[0].user_nickname == "VR虚拟达人"
+
+
+def test_extract_search_note_list_from_current_pc_card_page():
+    page_content = """
+    <html>
+      <body>
+        <div class="threadcardclass thread-new3 index-feed-cards">
+          <a class="action-link-bg" href="https://tieba.baidu.com/p/10559655942?fr=undefined"></a>
+          <div class="thread-forum-name display-flex align-center">
+            <span class="forum-name-text">诸城吧</span>
+          </div>
+          <div class="top-title">
+            <span class="forum-attention user">754023117</span>
+            <span>发布于 2026-3-15</span>
+          </div>
+          <div class="title-wrap"><span>数，英，编程老师</span></div>
+          <div class="abstract-wrap">
+            <span>培训班需求，数学，英语，编程老师，专职兼职都可</span>
+          </div>
+          <a class="comment-link-zone" href="https://tieba.baidu.com/p/10559655942?showComment=1">
+            <span class="action-number">19</span>
+          </a>
+        </div>
+      </body>
+    </html>
+    """
+
+    notes = TieBaExtractor.extract_search_note_list(page_content)
+
+    assert len(notes) == 1
+    assert notes[0].note_id == "10559655942"
+    assert notes[0].title == "数，英，编程老师"
+    assert notes[0].desc == "培训班需求，数学，英语，编程老师，专职兼职都可"
+    assert notes[0].tieba_name == "诸城吧"
+    assert notes[0].tieba_link.endswith("kw=%E8%AF%B8%E5%9F%8E")
+    assert notes[0].user_nickname == "754023117"
+    assert notes[0].publish_time == "2026-3-15"
+    assert notes[0].total_replay_num == 19
+
+
+def test_extract_search_note_list_from_current_pc_api():
+    api_data = {
+        "no": 0,
+        "error": "success",
+        "data": {
+            "card_list": [
+                {"cardInfo": "related_user", "cardStyle": "related_user", "data": {}},
+                {
+                    "cardInfo": "thread",
+                    "cardStyle": "thread",
+                    "data": {
+                        "tid": "10559655942",
+                        "title": "数，英，编程老师",
+                        "content": "培训班需求，数学，英语，编程老师，专职兼职都可",
+                        "time": 1773552643,
+                        "user": {
+                            "show_nickname": "754023117",
+                            "portrait": "https://example.com/avatar.jpg",
+                        },
+                        "post_num": 19,
+                        "forum_name": "诸城",
+                    },
+                },
+            ]
+        },
+    }
+
+    notes = TieBaExtractor().extract_search_note_list_from_api(api_data)
+
+    assert len(notes) == 1
+    assert notes[0].note_id == "10559655942"
+    assert notes[0].title == "数，英，编程老师"
+    assert notes[0].tieba_name == "诸城吧"
+    assert notes[0].total_replay_num == 19
+    assert notes[0].publish_time
+
+
+def test_extract_note_detail_and_comments_from_current_pc_api():
+    api_data = {
+        "error_code": 0,
+        "thread": {
+            "id": 10451142633,
+            "title": "这X尔斯对比巴尔斯，我只能说ID正确，允许居功自傲",
+            "reply_num": 15,
+            "create_time": 1769951446,
+        },
+        "forum": {"id": 1627732, "name": "dota2"},
+        "page": {"total_page": 1},
+        "first_floor": {
+            "id": 153154064746,
+            "author_id": 4089186644,
+            "time": 1769951446,
+            "content": [{"type": 0, "text": "皮队败决处刑德国编程钢琴师兼职数学家"}],
+        },
+        "post_list": [
+            {
+                "id": 153154097267,
+                "author_id": 6614897968,
+                "time": 1769952062,
+                "content": [{"type": 0, "text": "xg现在大树阵容另一个辅助不选控制"}],
+                "sub_post_number": 4,
+            }
+        ],
+        "user_list": [
+            {
+                "id": 4089186644,
+                "name_show": "泰高祖蒙斯克",
+                "portrait": "tb.1.f893a7af",
+                "ip_address": "广东",
+            },
+            {
+                "id": 6614897968,
+                "name_show": "期胡希3",
+                "portrait": "tb.1.4d0471d4",
+                "ip_address": "河北",
+            },
+        ],
+    }
+
+    extractor = TieBaExtractor()
+    note = extractor.extract_note_detail_from_api(api_data)
+    comments = extractor.extract_tieba_note_parent_comments_from_api(api_data, note)
+
+    assert note.note_id == "10451142633"
+    assert note.title == "这X尔斯对比巴尔斯，我只能说ID正确，允许居功自傲"
+    assert note.desc == "皮队败决处刑德国编程钢琴师兼职数学家"
+    assert note.user_nickname == "泰高祖蒙斯克"
+    assert note.tieba_name == "dota2吧"
+    assert note.total_replay_num == 15
+    assert note.total_replay_page == 1
+    assert note.ip_location == "广东"
+    assert len(comments) == 1
+    assert comments[0].comment_id == "153154097267"
+    assert comments[0].content == "xg现在大树阵容另一个辅助不选控制"
+    assert comments[0].user_nickname == "期胡希3"
+    assert comments[0].sub_comment_count == 4
+    assert comments[0].ip_location == "河北"
+
+
+def test_extract_creator_info_and_threads_from_current_pc_api():
+    creator_api = {
+        "error_code": 0,
+        "data": {
+            "user": {
+                "id": 3546493137,
+                "name": "拜月教Alice",
+                "name_show": "米米世界大手子",
+                "portrait": "tb.1.6ad0cd4a.7ZcjVYWa7UpHttCld2OppA?t=1777543466",
+                "fans_num": 58,
+                "concern_num": 1,
+                "sex": 1,
+                "tb_age": "7.8",
+                "ip_address": "广东",
+            }
+        },
+    }
+    feed_api = {
+        "error_code": 0,
+        "data": {
+            "list": [
+                {"type": 1, "thread_info": {"id": 10208192951, "tid": 10208192951}},
+                {"type": 1, "thread_info": {"id": 9835114923}},
+            ]
+        },
+    }
+
+    extractor = TieBaExtractor()
+    creator = extractor.extract_creator_info_from_api(creator_api)
+    thread_ids = extractor.extract_creator_thread_id_list_from_api(feed_api)
+
+    assert creator.user_id == "3546493137"
+    assert creator.user_name == "拜月教Alice"
+    assert creator.nickname == "米米世界大手子"
+    assert creator.fans == 58
+    assert creator.follows == 1
+    assert creator.ip_location == "广东"
+    assert creator.registration_duration == "7.8"
+    assert thread_ids == ["10208192951", "9835114923"]
+
+
+def test_extract_tieba_note_list_from_current_frs_api():
+    api_data = {
+        "error_code": 0,
+        "forum": {
+            "id": 351091,
+            "name": "加工中心",
+            "tids": "10376710029,10636556989,",
+        },
+    }
+
+    notes = TieBaExtractor().extract_tieba_note_list_from_frs_api(api_data)
+
+    assert [note.note_id for note in notes] == ["10376710029", "10636556989"]
+    assert notes[0].note_url == "https://tieba.baidu.com/p/10376710029"
+    assert notes[0].tieba_name == "加工中心吧"
+    assert notes[0].tieba_link.endswith("kw=%E5%8A%A0%E5%B7%A5%E4%B8%AD%E5%BF%83")
+
+
+def test_extract_tieba_note_list_from_bigpipe_thread_page():
+    notes = TieBaExtractor().extract_tieba_note_list(read_fixture("tieba_note_list.html"))
+
+    assert len(notes) == 48
+    assert notes[0].note_id == "9079949995"
+    assert notes[0].title == "盗墓笔记全集+txt小说，已整理"
+    assert notes[0].user_nickname == "公子伯仲"
+    assert notes[0].tieba_name == "盗墓笔记吧"
+    assert notes[0].tieba_link.endswith("kw=%E7%9B%97%E5%A2%93%E7%AC%94%E8%AE%B0&ie=utf-8")
+
+
+def test_extract_note_detail_from_post_page():
+    note = TieBaExtractor().extract_note_detail(read_fixture("note_detail.html"))
+
+    assert note.note_id == "9117905169"
+    assert note.title == "对于一个父亲来说，这个女儿14岁就死了"
+    assert note.user_nickname == "章景轩"
+    assert note.tieba_name == "以太比特吧"
+    assert note.total_replay_num == 786
+    assert note.total_replay_page == 13
+    assert note.ip_location == "广东"
+
+
+def test_extract_parent_comments_from_post_page():
+    comments = TieBaExtractor().extract_tieba_note_parment_comments(
+        read_fixture("note_comments.html"),
+        "9119688421",
+    )
+
+    assert len(comments) == 30
+    assert comments[0].comment_id == "150726491368"
+    assert comments[0].content == "中国队第22金！无悬念！"
+    assert comments[0].user_nickname == "heinzfrentzen"
+    assert comments[0].tieba_name == "网球风云吧"
+    assert comments[0].ip_location == "福建"
+
+
+def test_extract_sub_comments_with_class_token_matching():
+    parent = TiebaComment(
+        comment_id="150726496253",
+        content="parent",
+        note_id="9119688421",
+        note_url="https://tieba.baidu.com/p/9119688421",
+        tieba_id="4513750",
+        tieba_name="网球风云吧",
+        tieba_link="https://tieba.baidu.com/f?kw=%E7%BD%91%E7%90%83%E9%A3%8E%E4%BA%91",
+    )
+
+    comments = TieBaExtractor().extract_tieba_note_sub_comments(
+        read_fixture("note_sub_comments.html"),
+        parent,
+    )
+
+    assert len(comments) >= 10
+    assert comments[0].comment_id
+    assert comments[0].parent_comment_id == parent.comment_id
+    assert comments[0].user_link.startswith("https://tieba.baidu.com/home/main")