i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase: - api/: FastAPI server documentation and logger messages - cache/: Cache abstraction layer comments and docstrings - database/: Database models and MongoDB store documentation - media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu) - model/: Data model documentation - proxy/: Proxy pool and provider documentation - store/: Data storage layer comments - tools/: Utility functions and browser automation - test/: Test file documentation Preserved: Chinese disclaimer header (lines 10-18) for legal compliance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-05 09:27:25 +08:00 · 2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions
--- a/media_platform/xhs/extractor.py
+++ b/media_platform/xhs/extractor.py
@@ -29,16 +29,16 @@ class XiaoHongShuExtractor:
        pass

    def extract_note_detail_from_html(self, note_id: str, html: str) -> Optional[Dict]:
-        """从html中提取笔记详情
+        """Extract note details from HTML

        Args:
-            html (str): html字符串
+            html (str): HTML string

        Returns:
-            Dict: 笔记详情字典
+            Dict: Note details dictionary
        """
        if "noteDetailMap" not in html:
-            # 这种情况要么是出了验证码了，要么是笔记不存在
+            # Either a CAPTCHA appeared or the note doesn't exist
            return None

        state = re.findall(r"window.__INITIAL_STATE__=({.*})</script>", html)[
@@ -50,13 +50,13 @@ class XiaoHongShuExtractor:
        return None

    def extract_creator_info_from_html(self, html: str) -> Optional[Dict]:
-        """从html中提取用户信息
+        """Extract user information from HTML

        Args:
-            html (str): html字符串
+            html (str): HTML string

        Returns:
-            Dict: 用户信息字典
+            Dict: User information dictionary
        """
        match = re.search(
            r"<script>window.__INITIAL_STATE__=(.+)<\/script>", html, re.M