i18n: translate all Chinese comments, docstrings, and logger messages to English

Comprehensive translation of Chinese text to English across the entire codebase:

- api/: FastAPI server documentation and logger messages
- cache/: Cache abstraction layer comments and docstrings
- database/: Database models and MongoDB store documentation
- media_platform/: All platform crawlers (Bilibili, Douyin, Kuaishou, Tieba, Weibo, Xiaohongshu, Zhihu)
- model/: Data model documentation
- proxy/: Proxy pool and provider documentation
- store/: Data storage layer comments
- tools/: Utility functions and browser automation
- test/: Test file documentation

Preserved: Chinese disclaimer header (lines 10-18) for legal compliance

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
程序员阿江(Relakkes)
2025-12-26 23:27:19 +08:00
parent 1544d13dd5
commit 157ddfb21b
93 changed files with 1971 additions and 1955 deletions

View File

@@ -50,7 +50,7 @@ class XhsStoreFactory:
def get_video_url_arr(note_item: Dict) -> List:
"""
获取视频url数组
Get video url array
Args:
note_item:
@@ -64,7 +64,7 @@ def get_video_url_arr(note_item: Dict) -> List:
originVideoKey = note_item.get('video').get('consumer').get('origin_video_key')
if originVideoKey == '':
originVideoKey = note_item.get('video').get('consumer').get('originVideoKey')
# 降级有水印
# Fallback with watermark
if originVideoKey == '':
videos = note_item.get('video').get('media').get('stream').get('h264')
if type(videos).__name__ == 'list':
@@ -77,7 +77,7 @@ def get_video_url_arr(note_item: Dict) -> List:
async def update_xhs_note(note_item: Dict):
"""
更新小红书笔记
Update Xiaohongshu note
Args:
note_item:
@@ -97,26 +97,26 @@ async def update_xhs_note(note_item: Dict):
video_url = ','.join(get_video_url_arr(note_item))
local_db_item = {
"note_id": note_item.get("note_id"), # 帖子id
"type": note_item.get("type"), # 帖子类型
"title": note_item.get("title") or note_item.get("desc", "")[:255], # 帖子标题
"desc": note_item.get("desc", ""), # 帖子描述
"video_url": video_url, # 帖子视频url
"time": note_item.get("time"), # 帖子发布时间
"last_update_time": note_item.get("last_update_time", 0), # 帖子最后更新时间
"user_id": user_info.get("user_id"), # 用户id
"nickname": user_info.get("nickname"), # 用户昵称
"avatar": user_info.get("avatar"), # 用户头像
"liked_count": interact_info.get("liked_count"), # 点赞数
"collected_count": interact_info.get("collected_count"), # 收藏数
"comment_count": interact_info.get("comment_count"), # 评论数
"share_count": interact_info.get("share_count"), # 分享数
"ip_location": note_item.get("ip_location", ""), # ip地址
"image_list": ','.join([img.get('url', '') for img in image_list]), # 图片url
"tag_list": ','.join([tag.get('name', '') for tag in tag_list if tag.get('type') == 'topic']), # 标签
"last_modify_ts": utils.get_current_timestamp(), # 最后更新时间戳MediaCrawler程序生成的主要用途在db存储的时候记录一条记录最新更新时间
"note_url": f"https://www.xiaohongshu.com/explore/{note_id}?xsec_token={note_item.get('xsec_token')}&xsec_source=pc_search", # 帖子url
"source_keyword": source_keyword_var.get(), # 搜索关键词
"note_id": note_item.get("note_id"), # Note ID
"type": note_item.get("type"), # Note type
"title": note_item.get("title") or note_item.get("desc", "")[:255], # Note title
"desc": note_item.get("desc", ""), # Note description
"video_url": video_url, # Note video url
"time": note_item.get("time"), # Note publish time
"last_update_time": note_item.get("last_update_time", 0), # Note last update time
"user_id": user_info.get("user_id"), # User ID
"nickname": user_info.get("nickname"), # User nickname
"avatar": user_info.get("avatar"), # User avatar
"liked_count": interact_info.get("liked_count"), # Like count
"collected_count": interact_info.get("collected_count"), # Collection count
"comment_count": interact_info.get("comment_count"), # Comment count
"share_count": interact_info.get("share_count"), # Share count
"ip_location": note_item.get("ip_location", ""), # IP location
"image_list": ','.join([img.get('url', '') for img in image_list]), # Image URLs
"tag_list": ','.join([tag.get('name', '') for tag in tag_list if tag.get('type') == 'topic']), # Tags
"last_modify_ts": utils.get_current_timestamp(), # Last modification timestamp (Generated by MediaCrawler, mainly used to record the latest update time of a record in DB storage)
"note_url": f"https://www.xiaohongshu.com/explore/{note_id}?xsec_token={note_item.get('xsec_token')}&xsec_source=pc_search", # Note URL
"source_keyword": source_keyword_var.get(), # Search keyword
"xsec_token": note_item.get("xsec_token"), # xsec_token
}
utils.logger.info(f"[store.xhs.update_xhs_note] xhs note: {local_db_item}")
@@ -125,7 +125,7 @@ async def update_xhs_note(note_item: Dict):
async def batch_update_xhs_note_comments(note_id: str, comments: List[Dict]):
"""
批量更新小红书笔记评论
Batch update Xiaohongshu note comments
Args:
note_id:
comments:
@@ -141,7 +141,7 @@ async def batch_update_xhs_note_comments(note_id: str, comments: List[Dict]):
async def update_xhs_note_comment(note_id: str, comment_item: Dict):
"""
更新小红书笔记评论
Update Xiaohongshu note comment
Args:
note_id:
comment_item:
@@ -154,18 +154,18 @@ async def update_xhs_note_comment(note_id: str, comment_item: Dict):
comment_pictures = [item.get("url_default", "") for item in comment_item.get("pictures", [])]
target_comment = comment_item.get("target_comment", {})
local_db_item = {
"comment_id": comment_id, # 评论id
"create_time": comment_item.get("create_time"), # 评论时间
"ip_location": comment_item.get("ip_location"), # ip地址
"note_id": note_id, # 帖子id
"content": comment_item.get("content"), # 评论内容
"user_id": user_info.get("user_id"), # 用户id
"nickname": user_info.get("nickname"), # 用户昵称
"avatar": user_info.get("image"), # 用户头像
"sub_comment_count": comment_item.get("sub_comment_count", 0), # 子评论数
"pictures": ",".join(comment_pictures), # 评论图片
"parent_comment_id": target_comment.get("id", 0), # 父评论id
"last_modify_ts": utils.get_current_timestamp(), # 最后更新时间戳MediaCrawler程序生成的主要用途在db存储的时候记录一条记录最新更新时间
"comment_id": comment_id, # Comment ID
"create_time": comment_item.get("create_time"), # Comment time
"ip_location": comment_item.get("ip_location"), # IP location
"note_id": note_id, # Note ID
"content": comment_item.get("content"), # Comment content
"user_id": user_info.get("user_id"), # User ID
"nickname": user_info.get("nickname"), # User nickname
"avatar": user_info.get("image"), # User avatar
"sub_comment_count": comment_item.get("sub_comment_count", 0), # Sub-comment count
"pictures": ",".join(comment_pictures), # Comment pictures
"parent_comment_id": target_comment.get("id", 0), # Parent comment ID
"last_modify_ts": utils.get_current_timestamp(), # Last modification timestamp (Generated by MediaCrawler, mainly used to record the latest update time of a record in DB storage)
"like_count": comment_item.get("like_count", 0),
}
utils.logger.info(f"[store.xhs.update_xhs_note_comment] xhs note comment:{local_db_item}")
@@ -174,7 +174,7 @@ async def update_xhs_note_comment(note_id: str, comment_item: Dict):
async def save_creator(user_id: str, creator: Dict):
"""
保存小红书创作者
Save Xiaohongshu creator
Args:
user_id:
creator:
@@ -197,25 +197,25 @@ async def save_creator(user_id: str, creator: Dict):
def get_gender(gender):
if gender == 1:
return ''
return 'Female'
elif gender == 0:
return ''
return 'Male'
else:
return None
local_db_item = {
'user_id': user_id, # 用户id
'nickname': user_info.get('nickname'), # 昵称
'gender': get_gender(user_info.get('gender')), # 性别
'avatar': user_info.get('images'), # 头像
'desc': user_info.get('desc'), # 个人描述
'ip_location': user_info.get('ipLocation'), # ip地址
'follows': follows, # 关注数
'fans': fans, # 粉丝数
'interaction': interaction, # 互动数
'user_id': user_id, # User ID
'nickname': user_info.get('nickname'), # Nickname
'gender': get_gender(user_info.get('gender')), # Gender
'avatar': user_info.get('images'), # Avatar
'desc': user_info.get('desc'), # Personal description
'ip_location': user_info.get('ipLocation'), # IP location
'follows': follows, # Following count
'fans': fans, # Fans count
'interaction': interaction, # Interaction count
'tag_list': json.dumps({tag.get('tagType'): tag.get('name')
for tag in creator.get('tags')}, ensure_ascii=False), # 标签
"last_modify_ts": utils.get_current_timestamp(), # 最后更新时间戳MediaCrawler程序生成的主要用途在db存储的时候记录一条记录最新更新时间
for tag in creator.get('tags')}, ensure_ascii=False), # Tags
"last_modify_ts": utils.get_current_timestamp(), # Last modification timestamp (Generated by MediaCrawler, mainly used to record the latest update time of a record in DB storage)
}
utils.logger.info(f"[store.xhs.save_creator] creator:{local_db_item}")
await XhsStoreFactory.create_store().store_creator(local_db_item)
@@ -223,7 +223,7 @@ async def save_creator(user_id: str, creator: Dict):
async def update_xhs_note_image(note_id, pic_content, extension_file_name):
"""
更新小红书笔记图片
Update Xiaohongshu note image
Args:
note_id:
pic_content:
@@ -238,7 +238,7 @@ async def update_xhs_note_image(note_id, pic_content, extension_file_name):
async def update_xhs_note_video(note_id, video_content, extension_file_name):
"""
更新小红书笔记视频
Update Xiaohongshu note video
Args:
note_id:
video_content:

View File

@@ -18,7 +18,7 @@
# @Author : persist1@126.com
# @Time : 2025/9/5 19:34
# @Desc : 小红书存储实现类
# @Desc : Xiaohongshu storage implementation class
import json
import os
from datetime import datetime
@@ -281,7 +281,7 @@ class XhsSqliteStoreImplement(XhsDbStoreImplement):
class XhsMongoStoreImplement(AbstractStore):
"""小红书MongoDB存储实现"""
"""Xiaohongshu MongoDB storage implementation"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
@@ -289,9 +289,9 @@ class XhsMongoStoreImplement(AbstractStore):
async def store_content(self, content_item: Dict):
"""
存储笔记内容到MongoDB
Store note content to MongoDB
Args:
content_item: 笔记内容数据
content_item: Note content data
"""
note_id = content_item.get("note_id")
if not note_id:
@@ -306,9 +306,9 @@ class XhsMongoStoreImplement(AbstractStore):
async def store_comment(self, comment_item: Dict):
"""
存储评论到MongoDB
Store comment to MongoDB
Args:
comment_item: 评论数据
comment_item: Comment data
"""
comment_id = comment_item.get("comment_id")
if not comment_id:
@@ -323,9 +323,9 @@ class XhsMongoStoreImplement(AbstractStore):
async def store_creator(self, creator_item: Dict):
"""
存储创作者信息到MongoDB
Store creator information to MongoDB
Args:
creator_item: 创作者数据
creator_item: Creator data
"""
user_id = creator_item.get("user_id")
if not user_id:
@@ -340,7 +340,7 @@ class XhsMongoStoreImplement(AbstractStore):
class XhsExcelStoreImplement:
"""小红书Excel存储实现 - 全局单例"""
"""Xiaohongshu Excel storage implementation - Global singleton"""
def __new__(cls, *args, **kwargs):
from store.excel_store_base import ExcelStoreBase

View File

@@ -20,7 +20,7 @@
# -*- coding: utf-8 -*-
# @Author : helloteemo
# @Time : 2024/7/11 22:35
# @Desc : 小红书媒体保存
# @Desc : Xiaohongshu media storage
import pathlib
from typing import Dict