i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase: - api/: FastAPI server documentation and logger messages - cache/: Cache abstraction layer comments and docstrings - database/: Database models and MongoDB store documentation - media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu) - model/: Data model documentation - proxy/: Proxy pool and provider documentation - store/: Data storage layer comments - tools/: Utility functions and browser automation - test/: Test file documentation Preserved: Chinese disclaimer header (lines 10-18) for legal compliance 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-09 11:27:26 +08:00 · 2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions
--- a/media_platform/bilibili/client.py
+++ b/media_platform/bilibili/client.py
@@ -20,7 +20,7 @@
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2023/12/2 18:44
-# @Desc    : bilibili 请求客户端
+# @Desc    : bilibili request client
 import asyncio
 import json
 import random
@@ -47,7 +47,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):

    def __init__(
        self,
-        timeout=60,  # 若开启爬取媒体选项，b 站的长视频需要更久的超时时间
+        timeout=60,  # For media crawling, Bilibili long videos need a longer timeout
        proxy=None,
        *,
        headers: Dict[str, str],
@@ -61,11 +61,11 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
        self._host = "https://api.bilibili.com"
        self.playwright_page = playwright_page
        self.cookie_dict = cookie_dict
-        # 初始化代理池（来自 ProxyRefreshMixin）
+        # Initialize proxy pool (from ProxyRefreshMixin)
        self.init_proxy_pool(proxy_ip_pool)

    async def request(self, method, url, **kwargs) -> Any:
-        # 每次请求前检测代理是否过期
+        # Check if proxy has expired before each request
        await self._refresh_proxy_if_expired()

        async with httpx.AsyncClient(proxy=self.proxy) as client:
@@ -82,8 +82,8 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):

    async def pre_request_data(self, req_data: Dict) -> Dict:
        """
-        发送请求进行请求参数签名
-        需要从 localStorage 拿 wbi_img_urls 这参数，值如下：
+        Send request to sign request parameters
+        Need to get wbi_img_urls parameter from localStorage, value as follows:
        https://i0.hdslb.com/bfs/wbi/7cd084941338484aae1ad9425b84077c.png-https://i0.hdslb.com/bfs/wbi/4932caff0ff746eab6f01bf08b70ac45.png
        :param req_data:
        :return:
@@ -95,7 +95,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):

    async def get_wbi_keys(self) -> Tuple[str, str]:
        """
-        获取最新的 img_key 和 sub_key
+        Get the latest img_key and sub_key
        :return:
        """
        local_storage = await self.playwright_page.evaluate("() => window.localStorage")
@@ -160,12 +160,12 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
    ) -> Dict:
        """
        KuaiShou web search api
-        :param keyword: 搜索关键词
-        :param page: 分页参数具体第几页
-        :param page_size: 每一页参数的数量
-        :param order: 搜索结果排序，默认位综合排序
-        :param pubtime_begin_s: 发布时间开始时间戳
-        :param pubtime_end_s: 发布时间结束时间戳
+        :param keyword: Search keyword
+        :param page: Page number for pagination
+        :param page_size: Number of items per page
+        :param order: Sort order for search results, default is comprehensive sorting
+        :param pubtime_begin_s: Publish time start timestamp
+        :param pubtime_end_s: Publish time end timestamp
        :return:
        """
        uri = "/x/web-interface/wbi/search/type"
@@ -182,13 +182,13 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):

    async def get_video_info(self, aid: Union[int, None] = None, bvid: Union[str, None] = None) -> Dict:
        """
-        Bilibli web video detail api, aid 和 bvid任选一个参数
-        :param aid: 稿件avid
-        :param bvid: 稿件bvid
+        Bilibli web video detail api, choose one parameter between aid and bvid
+        :param aid: Video aid
+        :param bvid: Video bvid
        :return:
        """
        if not aid and not bvid:
-            raise ValueError("请提供 aid 或 bvid 中的至少一个参数")
+            raise ValueError("Please provide at least one parameter: aid or bvid")

        uri = "/x/web-interface/view/detail"
        params = dict()
@@ -201,12 +201,12 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
    async def get_video_play_url(self, aid: int, cid: int) -> Dict:
        """
        Bilibli web video play url api
-        :param aid: 稿件avid
+        :param aid: Video aid
        :param cid: cid
        :return:
        """
        if not aid or not cid or aid <= 0 or cid <= 0:
-            raise ValueError("aid 和 cid 必须存在")
+            raise ValueError("aid and cid must exist")
        uri = "/x/player/wbi/playurl"
        qn_value = getattr(config, "BILI_QN", 80)
        params = {
@@ -233,7 +233,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
                )
                return None
            except httpx.HTTPError as exc:  # some wrong when call httpx.request method, such as connection error, client error, server error or response status code is not 2xx
-                utils.logger.error(f"[BilibiliClient.get_video_media] {exc.__class__.__name__} for {exc.request.url} - {exc}")  # 保留原始异常类型名称，以便开发者调试
+                utils.logger.error(f"[BilibiliClient.get_video_media] {exc.__class__.__name__} for {exc.request.url} - {exc}")  # Keep original exception type name for developer debugging
                return None

    async def get_video_comments(
@@ -243,9 +243,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
        next: int = 0,
    ) -> Dict:
        """get video comments
-        :param video_id: 视频 ID
-        :param order_mode: 排序方式
-        :param next: 评论页选择
+        :param video_id: Video ID
+        :param order_mode: Sort order
+        :param next: Comment page selection
        :return:
        """
        uri = "/x/v2/reply/wbi/main"
@@ -266,7 +266,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
        :param crawl_interval:
        :param is_fetch_sub_comments:
        :param callback:
-        max_count: 一次笔记爬取的最大评论数量
+        max_count: Maximum number of comments to crawl per note

        :return:
        """
@@ -299,7 +299,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):

            comment_list: List[Dict] = comments_res.get("replies", [])

-            # 检查 is_end 和 next 是否存在
+            # Check if is_end and next exist
            if "is_end" not in cursor_info or "next" not in cursor_info:
                utils.logger.warning(f"[BilibiliClient.get_video_all_comments] 'is_end' or 'next' not in cursor for video_id: {video_id}. Assuming end of comments.")
                is_end = True
@@ -317,7 +317,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
                        {await self.get_video_all_level_two_comments(video_id, comment_id, CommentOrderType.DEFAULT, 10, crawl_interval, callback)}
            if len(result) + len(comment_list) > max_count:
                comment_list = comment_list[:max_count - len(result)]
-            if callback:  # 如果有回调函数，就执行回调函数
+            if callback:  # If there is a callback function, execute it
                await callback(video_id, comment_list)
            await asyncio.sleep(crawl_interval)
            if not is_fetch_sub_comments:
@@ -336,10 +336,10 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
    ) -> Dict:
        """
        get video all level two comments for a level one comment
-        :param video_id: 视频 ID
-        :param level_one_comment_id: 一级评论 ID
+        :param video_id: Video ID
+        :param level_one_comment_id: Level one comment ID
        :param order_mode:
-        :param ps: 一页评论数
+        :param ps: Number of comments per page
        :param crawl_interval:
        :param callback:
        :return:
@@ -349,7 +349,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
        while True:
            result = await self.get_video_level_two_comments(video_id, level_one_comment_id, pn, ps, order_mode)
            comment_list: List[Dict] = result.get("replies", [])
-            if callback:  # 如果有回调函数，就执行回调函数
+            if callback:  # If there is a callback function, execute it
                await callback(video_id, comment_list)
            await asyncio.sleep(crawl_interval)
            if (int(result["page"]["count"]) <= pn * ps):
@@ -366,9 +366,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
        order_mode: CommentOrderType,
    ) -> Dict:
        """get video level two comments
-        :param video_id: 视频 ID
-        :param level_one_comment_id: 一级评论 ID
-        :param order_mode: 排序方式
+        :param video_id: Video ID
+        :param level_one_comment_id: Level one comment ID
+        :param order_mode: Sort order

        :return:
        """
@@ -386,10 +386,10 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):

    async def get_creator_videos(self, creator_id: str, pn: int, ps: int = 30, order_mode: SearchOrderType = SearchOrderType.LAST_PUBLISH) -> Dict:
        """get all videos for a creator
-        :param creator_id: 创作者 ID
-        :param pn: 页数
-        :param ps: 一页视频数
-        :param order_mode: 排序方式
+        :param creator_id: Creator ID
+        :param pn: Page number
+        :param ps: Number of videos per page
+        :param order_mode: Sort order

        :return:
        """
@@ -405,7 +405,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
    async def get_creator_info(self, creator_id: int) -> Dict:
        """
        get creator info
-        :param creator_id: 作者 ID
+        :param creator_id: Creator ID
        """
        uri = "/x/space/wbi/acc/info"
        post_data = {
@@ -421,9 +421,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
    ) -> Dict:
        """
        get creator fans
-        :param creator_id: 创作者 ID
-        :param pn: 开始页数
-        :param ps: 每页数量
+        :param creator_id: Creator ID
+        :param pn: Start page number
+        :param ps: Number of items per page
        :return:
        """
        uri = "/x/relation/fans"
@@ -443,9 +443,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
    ) -> Dict:
        """
        get creator followings
-        :param creator_id: 创作者 ID
-        :param pn: 开始页数
-        :param ps: 每页数量
+        :param creator_id: Creator ID
+        :param pn: Start page number
+        :param ps: Number of items per page
        :return:
        """
        uri = "/x/relation/followings"
@@ -460,8 +460,8 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
    async def get_creator_dynamics(self, creator_id: int, offset: str = ""):
        """
        get creator comments
-        :param creator_id: 创作者 ID
-        :param offset: 发送请求所需参数
+        :param creator_id: Creator ID
+        :param offset: Parameter required for sending request
        :return:
        """
        uri = "/x/polymer/web-dynamic/v1/feed/space"
@@ -485,9 +485,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
        :param creator_info:
        :param crawl_interval:
        :param callback:
-        :param max_count: 一个up主爬取的最大粉丝数量
+        :param max_count: Maximum number of fans to crawl for a creator

-        :return: up主粉丝数列表
+        :return: List of creator fans
        """
        creator_id = creator_info["id"]
        result = []
@@ -499,7 +499,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
            pn += 1
            if len(result) + len(fans_list) > max_count:
                fans_list = fans_list[:max_count - len(result)]
-            if callback:  # 如果有回调函数，就执行回调函数
+            if callback:  # If there is a callback function, execute it
                await callback(creator_info, fans_list)
            await asyncio.sleep(crawl_interval)
            if not fans_list:
@@ -519,9 +519,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
        :param creator_info:
        :param crawl_interval:
        :param callback:
-        :param max_count: 一个up主爬取的最大关注者数量
+        :param max_count: Maximum number of followings to crawl for a creator

-        :return: up主关注者列表
+        :return: List of creator followings
        """
        creator_id = creator_info["id"]
        result = []
@@ -533,7 +533,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
            pn += 1
            if len(result) + len(followings_list) > max_count:
                followings_list = followings_list[:max_count - len(result)]
-            if callback:  # 如果有回调函数，就执行回调函数
+            if callback:  # If there is a callback function, execute it
                await callback(creator_info, followings_list)
            await asyncio.sleep(crawl_interval)
            if not followings_list:
@@ -553,9 +553,9 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
        :param creator_info:
        :param crawl_interval:
        :param callback:
-        :param max_count: 一个up主爬取的最大动态数量
+        :param max_count: Maximum number of dynamics to crawl for a creator

-        :return: up主关注者列表
+        :return: List of creator dynamics
        """
        creator_id = creator_info["id"]
        result = []
--- a/media_platform/bilibili/core.py
+++ b/media_platform/bilibili/core.py
@@ -20,7 +20,7 @@
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2023/12/2 18:44
-# @Desc    : B站爬虫
+# @Desc    : Bilibili Crawler

 import asyncio
 import os
@@ -64,7 +64,7 @@ class BilibiliCrawler(AbstractCrawler):
        self.index_url = "https://www.bilibili.com"
        self.user_agent = utils.get_user_agent()
        self.cdp_manager = None
-        self.ip_proxy_pool = None  # 代理IP池，用于代理自动刷新
+        self.ip_proxy_pool = None  # Proxy IP pool for automatic proxy refresh

    async def start(self):
        playwright_proxy_format, httpx_proxy_format = None, None
@@ -74,9 +74,9 @@ class BilibiliCrawler(AbstractCrawler):
            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)

        async with async_playwright() as playwright:
-            # 根据配置选择启动模式
+            # Choose launch mode based on configuration
            if config.ENABLE_CDP_MODE:
-                utils.logger.info("[BilibiliCrawler] 使用CDP模式启动浏览器")
+                utils.logger.info("[BilibiliCrawler] Launching browser using CDP mode")
                self.browser_context = await self.launch_browser_with_cdp(
                    playwright,
                    playwright_proxy_format,
@@ -84,7 +84,7 @@ class BilibiliCrawler(AbstractCrawler):
                    headless=config.CDP_HEADLESS,
                )
            else:
-                utils.logger.info("[BilibiliCrawler] 使用标准模式启动浏览器")
+                utils.logger.info("[BilibiliCrawler] Launching browser using standard mode")
                # Launch a browser context.
                chromium = playwright.chromium
                self.browser_context = await self.launch_browser(chromium, None, self.user_agent, headless=config.HEADLESS)
@@ -149,31 +149,31 @@ class BilibiliCrawler(AbstractCrawler):
        end: str = config.END_DAY,
    ) -> Tuple[str, str]:
        """
-        获取 bilibili 作品发布日期起始时间戳 pubtime_begin_s 与发布日期结束时间戳 pubtime_end_s
+        Get bilibili publish start timestamp pubtime_begin_s and publish end timestamp pubtime_end_s
        ---
-        :param start: 发布日期起始时间，YYYY-MM-DD
-        :param end: 发布日期结束时间，YYYY-MM-DD
+        :param start: Publish date start time, YYYY-MM-DD
+        :param end: Publish date end time, YYYY-MM-DD

        Note
        ---
-        - 搜索的时间范围为 start 至 end，包含 start 和 end
-        - 若要搜索同一天的内容，为了包含 start 当天的搜索内容，则 pubtime_end_s 的值应该为 pubtime_begin_s 的值加上一天再减去一秒，即 start 当天的最后一秒
-            - 如仅搜索 2024-01-05 的内容，pubtime_begin_s = 1704384000，pubtime_end_s = 1704470399
-              转换为可读的 datetime 对象：pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0)，pubtime_end_s = datetime.datetime(2024, 1, 5, 23, 59, 59)
-        - 若要搜索 start 至 end 的内容，为了包含 end 当天的搜索内容，则 pubtime_end_s 的值应该为 pubtime_end_s 的值加上一天再减去一秒，即 end 当天的最后一秒
-            - 如搜索 2024-01-05 - 2024-01-06 的内容，pubtime_begin_s = 1704384000，pubtime_end_s = 1704556799
-              转换为可读的 datetime 对象：pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0)，pubtime_end_s = datetime.datetime(2024, 1, 6, 23, 59, 59)
+        - Search time range is from start to end, including both start and end
+        - To search content from the same day, to include search content from that day, pubtime_end_s should be pubtime_begin_s plus one day minus one second, i.e., the last second of start day
+            - For example, searching only 2024-01-05 content, pubtime_begin_s = 1704384000, pubtime_end_s = 1704470399
+              Converted to readable datetime objects: pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0), pubtime_end_s = datetime.datetime(2024, 1, 5, 23, 59, 59)
+        - To search content from start to end, to include search content from end day, pubtime_end_s should be pubtime_end_s plus one day minus one second, i.e., the last second of end day
+            - For example, searching 2024-01-05 - 2024-01-06 content, pubtime_begin_s = 1704384000, pubtime_end_s = 1704556799
+              Converted to readable datetime objects: pubtime_begin_s = datetime.datetime(2024, 1, 5, 0, 0), pubtime_end_s = datetime.datetime(2024, 1, 6, 23, 59, 59)
        """
-        # 转换 start 与 end 为 datetime 对象
+        # Convert start and end to datetime objects
        start_day: datetime = datetime.strptime(start, "%Y-%m-%d")
        end_day: datetime = datetime.strptime(end, "%Y-%m-%d")
        if start_day > end_day:
            raise ValueError("Wrong time range, please check your start and end argument, to ensure that the start cannot exceed end")
-        elif start_day == end_day:  # 搜索同一天的内容
-            end_day = (start_day + timedelta(days=1) - timedelta(seconds=1))  # 则将 end_day 设置为 start_day + 1 day - 1 second
-        else:  # 搜索 start 至 end
-            end_day = (end_day + timedelta(days=1) - timedelta(seconds=1))  # 则将 end_day 设置为 end_day + 1 day - 1 second
-        # 将其重新转换为时间戳
+        elif start_day == end_day:  # Searching content from the same day
+            end_day = (start_day + timedelta(days=1) - timedelta(seconds=1))  # Set end_day to start_day + 1 day - 1 second
+        else:  # Searching from start to end
+            end_day = (end_day + timedelta(days=1) - timedelta(seconds=1))  # Set end_day to end_day + 1 day - 1 second
+        # Convert back to timestamps
        return str(int(start_day.timestamp())), str(int(end_day.timestamp()))

    async def search_by_keywords(self):
@@ -203,8 +203,8 @@ class BilibiliCrawler(AbstractCrawler):
                    page=page,
                    page_size=bili_limit_count,
                    order=SearchOrderType.DEFAULT,
-                    pubtime_begin_s=0,  # 作品发布日期起始时间戳
-                    pubtime_end_s=0,  # 作品发布日期结束日期时间戳
+                    pubtime_begin_s=0,  # Publish date start timestamp
+                    pubtime_end_s=0,  # Publish date end timestamp
                )
                video_list: List[Dict] = videos_res.get("result")

@@ -508,7 +508,7 @@ class BilibiliCrawler(AbstractCrawler):
                    "height": 1080
                },
                user_agent=user_agent,
-                channel="chrome",  # 使用系统的Chrome稳定版
+                channel="chrome",  # Use system's stable Chrome version
            )
            return browser_context
        else:
@@ -525,7 +525,7 @@ class BilibiliCrawler(AbstractCrawler):
        headless: bool = True,
    ) -> BrowserContext:
        """
-        使用CDP模式启动浏览器
+        Launch browser using CDP mode
        """
        try:
            self.cdp_manager = CDPBrowserManager()
@@ -536,22 +536,22 @@ class BilibiliCrawler(AbstractCrawler):
                headless=headless,
            )

-            # 显示浏览器信息
+            # Display browser information
            browser_info = await self.cdp_manager.get_browser_info()
-            utils.logger.info(f"[BilibiliCrawler] CDP浏览器信息: {browser_info}")
+            utils.logger.info(f"[BilibiliCrawler] CDP browser info: {browser_info}")

            return browser_context

        except Exception as e:
-            utils.logger.error(f"[BilibiliCrawler] CDP模式启动失败，回退到标准模式: {e}")
-            # 回退到标准模式
+            utils.logger.error(f"[BilibiliCrawler] CDP mode launch failed, fallback to standard mode: {e}")
+            # Fallback to standard mode
            chromium = playwright.chromium
            return await self.launch_browser(chromium, playwright_proxy, user_agent, headless)

    async def close(self):
        """Close browser context"""
        try:
-            # 如果使用CDP模式，需要特殊处理
+            # If using CDP mode, special handling is required
            if self.cdp_manager:
                await self.cdp_manager.cleanup()
                self.cdp_manager = None
--- a/media_platform/bilibili/field.py
+++ b/media_platform/bilibili/field.py
@@ -27,28 +27,28 @@ from enum import Enum


 class SearchOrderType(Enum):
-    # 综合排序
+    # Comprehensive sorting
    DEFAULT = ""

-    # 最多点击
+    # Most clicks
    MOST_CLICK = "click"

-    # 最新发布
+    # Latest published
    LAST_PUBLISH = "pubdate"

-    # 最多弹幕
+    # Most danmu (comments)
    MOST_DANMU = "dm"

-    # 最多收藏
+    # Most bookmarks
    MOST_MARK = "stow"


 class CommentOrderType(Enum):
-    # 仅按热度
+    # By popularity only
    DEFAULT = 0

-    # 按热度+按时间
+    # By popularity + time
    MIXED = 1

-    # 按时间
+    # By time
    TIME = 2
--- a/media_platform/bilibili/help.py
+++ b/media_platform/bilibili/help.py
@@ -21,8 +21,8 @@
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2023/12/2 23:26
-# @Desc    : bilibili 请求参数签名
-# 逆向实现参考：https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
+# @Desc    : bilibili request parameter signing
+# Reverse engineering implementation reference: https://socialsisteryi.github.io/bilibili-API-collect/docs/misc/sign/wbi.html#wbi%E7%AD%BE%E5%90%8D%E7%AE%97%E6%B3%95
 import re
 import urllib.parse
 from hashlib import md5
@@ -45,7 +45,7 @@ class BilibiliSign:

    def get_salt(self) -> str:
        """
-        获取加盐的 key
+        Get the salted key
        :return:
        """
        salt = ""
@@ -56,8 +56,8 @@ class BilibiliSign:

    def sign(self, req_data: Dict) -> Dict:
        """
-        请求参数中加上当前时间戳对请求参数中的key进行字典序排序
-        再将请求参数进行 url 编码集合 salt 进行 md5 就可以生成w_rid参数了
+        Add current timestamp to request parameters, sort keys in dictionary order,
+        then URL encode the parameters and combine with salt to generate md5 for w_rid parameter
        :param req_data:
        :return:
        """
@@ -65,35 +65,35 @@ class BilibiliSign:
        req_data.update({"wts": current_ts})
        req_data = dict(sorted(req_data.items()))
        req_data = {
-            # 过滤 value 中的 "!'()*" 字符
+            # Filter "!'()*" characters from values
            k: ''.join(filter(lambda ch: ch not in "!'()*", str(v)))
            for k, v
            in req_data.items()
        }
        query = urllib.parse.urlencode(req_data)
        salt = self.get_salt()
-        wbi_sign = md5((query + salt).encode()).hexdigest()  # 计算 w_rid
+        wbi_sign = md5((query + salt).encode()).hexdigest()  # Calculate w_rid
        req_data['w_rid'] = wbi_sign
        return req_data


 def parse_video_info_from_url(url: str) -> VideoUrlInfo:
    """
-    从B站视频URL中解析出视频ID
+    Parse video ID from Bilibili video URL
    Args:
-        url: B站视频链接
+        url: Bilibili video link
            - https://www.bilibili.com/video/BV1dwuKzmE26/?spm_id_from=333.1387.homepage.video_card.click
            - https://www.bilibili.com/video/BV1d54y1g7db
-            - BV1d54y1g7db (直接传入BV号)
+            - BV1d54y1g7db (directly pass BV number)
    Returns:
-        VideoUrlInfo: 包含视频ID的对象
+        VideoUrlInfo: Object containing video ID
    """
-    # 如果传入的已经是BV号,直接返回
+    # If the input is already a BV number, return directly
    if url.startswith("BV"):
        return VideoUrlInfo(video_id=url)

-    # 使用正则表达式提取BV号
-    # 匹配 /video/BV... 或 /video/av... 格式
+    # Use regex to extract BV number
+    # Match /video/BV... or /video/av... format
    bv_pattern = r'/video/(BV[a-zA-Z0-9]+)'
    match = re.search(bv_pattern, url)

@@ -101,26 +101,26 @@ def parse_video_info_from_url(url: str) -> VideoUrlInfo:
        video_id = match.group(1)
        return VideoUrlInfo(video_id=video_id)

-    raise ValueError(f"无法从URL中解析出视频ID: {url}")
+    raise ValueError(f"Unable to parse video ID from URL: {url}")


 def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
    """
-    从B站创作者空间URL中解析出创作者ID
+    Parse creator ID from Bilibili creator space URL
    Args:
-        url: B站创作者空间链接
+        url: Bilibili creator space link
            - https://space.bilibili.com/434377496?spm_id_from=333.1007.0.0
            - https://space.bilibili.com/20813884
-            - 434377496 (直接传入UID)
+            - 434377496 (directly pass UID)
    Returns:
-        CreatorUrlInfo: 包含创作者ID的对象
+        CreatorUrlInfo: Object containing creator ID
    """
-    # 如果传入的已经是纯数字ID,直接返回
+    # If the input is already a numeric ID, return directly
    if url.isdigit():
        return CreatorUrlInfo(creator_id=url)

-    # 使用正则表达式提取UID
-    # 匹配 /space.bilibili.com/数字 格式
+    # Use regex to extract UID
+    # Match /space.bilibili.com/number format
    uid_pattern = r'space\.bilibili\.com/(\d+)'
    match = re.search(uid_pattern, url)

@@ -128,20 +128,20 @@ def parse_creator_info_from_url(url: str) -> CreatorUrlInfo:
        creator_id = match.group(1)
        return CreatorUrlInfo(creator_id=creator_id)

-    raise ValueError(f"无法从URL中解析出创作者ID: {url}")
+    raise ValueError(f"Unable to parse creator ID from URL: {url}")


 if __name__ == '__main__':
-    # 测试视频URL解析
+    # Test video URL parsing
    video_url1 = "https://www.bilibili.com/video/BV1dwuKzmE26/?spm_id_from=333.1387.homepage.video_card.click"
    video_url2 = "BV1d54y1g7db"
-    print("视频URL解析测试:")
+    print("Video URL parsing test:")
    print(f"URL1: {video_url1} -> {parse_video_info_from_url(video_url1)}")
    print(f"URL2: {video_url2} -> {parse_video_info_from_url(video_url2)}")

-    # 测试创作者URL解析
+    # Test creator URL parsing
    creator_url1 = "https://space.bilibili.com/434377496?spm_id_from=333.1007.0.0"
    creator_url2 = "20813884"
-    print("\n创作者URL解析测试:")
+    print("\nCreator URL parsing test:")
    print(f"URL1: {creator_url1} -> {parse_creator_info_from_url(creator_url1)}")
    print(f"URL2: {creator_url2} -> {parse_creator_info_from_url(creator_url2)}")
--- a/media_platform/bilibili/login.py
+++ b/media_platform/bilibili/login.py
@@ -21,7 +21,7 @@
 # -*- coding: utf-8 -*-
 # @Author  : relakkes@gmail.com
 # @Time    : 2023/12/2 18:44
-# @Desc    : bilibli登录实现类
+# @Desc    : bilibili login implementation class

 import asyncio
 import functools