docs: translate comments and metadata to English

Update Chinese comments, variable descriptions, and metadata across multiple configuration and core files to English. This improves codebase accessibility for international developers. Additionally, removed the sponsorship section from README files.
2026-06-09 03:17:25 +08:00 · 2026-02-12 05:30:11 +08:00
parent 257743b016
commit d614ccf247
20 changed files with 140 additions and 181 deletions
--- a/media_platform/douyin/core.py
+++ b/media_platform/douyin/core.py
@@ -55,7 +55,7 @@ class DouYinCrawler(AbstractCrawler):
    def __init__(self) -> None:
        self.index_url = "https://www.douyin.com"
        self.cdp_manager = None
-        self.ip_proxy_pool = None  # 代理IP池，用于代理自动刷新
+        self.ip_proxy_pool = None  # Proxy IP pool for automatic proxy refresh

    async def start(self) -> None:
        playwright_proxy_format, httpx_proxy_format = None, None
@@ -65,7 +65,7 @@ class DouYinCrawler(AbstractCrawler):
            playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)

        async with async_playwright() as playwright:
-            # 根据配置选择启动模式
+            # Select startup mode based on configuration
            if config.ENABLE_CDP_MODE:
                utils.logger.info("[DouYinCrawler] 使用CDP模式启动浏览器")
                self.browser_context = await self.launch_browser_with_cdp(
@@ -178,12 +178,12 @@ class DouYinCrawler(AbstractCrawler):
            try:
                video_info = parse_video_info_from_url(video_url)

-                # 处理短链接
+                # Handling short links
                if video_info.url_type == "short":
                    utils.logger.info(f"[DouYinCrawler.get_specified_awemes] Resolving short link: {video_url}")
                    resolved_url = await self.dy_client.resolve_short_url(video_url)
                    if resolved_url:
-                        # 从解析后的URL中提取视频ID
+                        # Extract video ID from parsed URL
                        video_info = parse_video_info_from_url(resolved_url)
                        utils.logger.info(f"[DouYinCrawler.get_specified_awemes] Short link resolved to aweme ID: {video_info.aweme_id}")
                    else:
@@ -240,7 +240,7 @@ class DouYinCrawler(AbstractCrawler):
    async def get_comments(self, aweme_id: str, semaphore: asyncio.Semaphore) -> None:
        async with semaphore:
            try:
-                # 将关键词列表传递给 get_aweme_all_comments 方法
+                # Pass the list of keywords to the get_aweme_all_comments method
                # Use fixed crawling interval
                crawl_interval = config.CRAWLER_MAX_SLEEP_SEC
                await self.dy_client.get_aweme_all_comments(
@@ -311,7 +311,7 @@ class DouYinCrawler(AbstractCrawler):
            },
            playwright_page=self.context_page,
            cookie_dict=cookie_dict,
-            proxy_ip_pool=self.ip_proxy_pool,  # 传递代理池用于自动刷新
+            proxy_ip_pool=self.ip_proxy_pool,  # Pass proxy pool for automatic refresh
        )
        return douyin_client

@@ -361,10 +361,10 @@ class DouYinCrawler(AbstractCrawler):
                headless=headless,
            )

-            # 添加反检测脚本
+            # Add anti-detection script
            await self.cdp_manager.add_stealth_script()

-            # 显示浏览器信息
+            # Show browser information
            browser_info = await self.cdp_manager.get_browser_info()
            utils.logger.info(f"[DouYinCrawler] CDP浏览器信息: {browser_info}")

@@ -372,13 +372,13 @@ class DouYinCrawler(AbstractCrawler):

        except Exception as e:
            utils.logger.error(f"[DouYinCrawler] CDP模式启动失败，回退到标准模式: {e}")
-            # 回退到标准模式
+            # Fall back to standard mode
            chromium = playwright.chromium
            return await self.launch_browser(chromium, playwright_proxy, user_agent, headless)

    async def close(self) -> None:
        """Close browser context"""
-        # 如果使用CDP模式，需要特殊处理
+        # If you use CDP mode, special processing is required
        if self.cdp_manager:
            await self.cdp_manager.cleanup()
            self.cdp_manager = None
@@ -396,11 +396,11 @@ class DouYinCrawler(AbstractCrawler):
        if not config.ENABLE_GET_MEIDAS:
            utils.logger.info(f"[DouYinCrawler.get_aweme_media] Crawling image mode is not enabled")
            return
-        # 笔记 urls 列表，若为短视频类型则返回为空列表
+        # List of note urls. If it is a short video type, an empty list will be returned.
        note_download_url: List[str] = douyin_store._extract_note_image_list(aweme_item)
-        # 视频 url，永远存在，但为短视频类型时的文件其实是音频文件
+        # The video URL will always exist, but when it is a short video type, the file is actually an audio file.
        video_download_url: str = douyin_store._extract_video_download_url(aweme_item)
-        # TODO: 抖音并没采用音视频分离的策略，故音频可从原视频中分离，暂不提取
+        # TODO: Douyin does not adopt the audio and video separation strategy, so the audio can be separated from the original video and will not be extracted for the time being.
        if note_download_url:
            await self.get_aweme_images(aweme_item)
        else:
@@ -416,7 +416,7 @@ class DouYinCrawler(AbstractCrawler):
        if not config.ENABLE_GET_MEIDAS:
            return
        aweme_id = aweme_item.get("aweme_id")
-        # 笔记 urls 列表，若为短视频类型则返回为空列表
+        # List of note urls. If it is a short video type, an empty list will be returned.
        note_download_url: List[str] = douyin_store._extract_note_image_list(aweme_item)

        if not note_download_url:
@@ -444,7 +444,7 @@ class DouYinCrawler(AbstractCrawler):
            return
        aweme_id = aweme_item.get("aweme_id")

-        # 视频 url，永远存在，但为短视频类型时的文件其实是音频文件
+        # The video URL will always exist, but when it is a short video type, the file is actually an audio file.
        video_download_url: str = douyin_store._extract_video_download_url(aweme_item)

        if not video_download_url: