mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-06-09 03:17:25 +08:00
docs: translate comments and metadata to English
Update Chinese comments, variable descriptions, and metadata across multiple configuration and core files to English. This improves codebase accessibility for international developers. Additionally, removed the sponsorship section from README files.
This commit is contained in:
@@ -43,7 +43,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
timeout=60, # 若开启爬取媒体选项,抖音的短视频需要更久的超时时间
|
||||
timeout=60, # If the crawl media option is turned on, Douyin’s short videos will require a longer timeout.
|
||||
proxy=None,
|
||||
*,
|
||||
headers: Dict,
|
||||
@@ -57,7 +57,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
self._host = "https://www.douyin.com"
|
||||
self.playwright_page = playwright_page
|
||||
self.cookie_dict = cookie_dict
|
||||
# 初始化代理池(来自 ProxyRefreshMixin)
|
||||
# Initialize proxy pool (from ProxyRefreshMixin)
|
||||
self.init_proxy_pool(proxy_ip_pool)
|
||||
|
||||
async def __process_req_params(
|
||||
@@ -103,7 +103,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
params.update(common_params)
|
||||
query_string = urllib.parse.urlencode(params)
|
||||
|
||||
# 20240927 a-bogus更新(JS版本)
|
||||
# 20240927 a-bogus update (JS version)
|
||||
post_data = {}
|
||||
if request_method == "POST":
|
||||
post_data = params
|
||||
@@ -113,7 +113,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
params["a_bogus"] = a_bogus
|
||||
|
||||
async def request(self, method, url, **kwargs):
|
||||
# 每次请求前检测代理是否过期
|
||||
# Check whether the proxy has expired before each request
|
||||
await self._refresh_proxy_if_expired()
|
||||
|
||||
async with httpx.AsyncClient(proxy=self.proxy) as client:
|
||||
@@ -266,13 +266,13 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
if len(result) + len(comments) > max_count:
|
||||
comments = comments[:max_count - len(result)]
|
||||
result.extend(comments)
|
||||
if callback: # 如果有回调函数,就执行回调函数
|
||||
if callback: # If there is a callback function, execute the callback function
|
||||
await callback(aweme_id, comments)
|
||||
|
||||
await asyncio.sleep(crawl_interval)
|
||||
if not is_fetch_sub_comments:
|
||||
continue
|
||||
# 获取二级评论
|
||||
# Get secondary reviews
|
||||
for comment in comments:
|
||||
reply_comment_total = comment.get("reply_comment_total")
|
||||
|
||||
@@ -290,7 +290,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
if not sub_comments:
|
||||
continue
|
||||
result.extend(sub_comments)
|
||||
if callback: # 如果有回调函数,就执行回调函数
|
||||
if callback: # If there is a callback function, execute the callback function
|
||||
await callback(aweme_id, sub_comments)
|
||||
await asyncio.sleep(crawl_interval)
|
||||
return result
|
||||
@@ -343,7 +343,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
else:
|
||||
return response.content
|
||||
except httpx.HTTPError as exc: # some wrong when call httpx.request method, such as connection error, client error, server error or response status code is not 2xx
|
||||
utils.logger.error(f"[DouYinClient.get_aweme_media] {exc.__class__.__name__} for {exc.request.url} - {exc}") # 保留原始异常类型名称,以便开发者调试
|
||||
utils.logger.error(f"[DouYinClient.get_aweme_media] {exc.__class__.__name__} for {exc.request.url} - {exc}") # Keep the original exception type name for developers to debug
|
||||
return None
|
||||
|
||||
async def resolve_short_url(self, short_url: str) -> str:
|
||||
@@ -359,7 +359,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
|
||||
utils.logger.info(f"[DouYinClient.resolve_short_url] Resolving short URL: {short_url}")
|
||||
response = await client.get(short_url, timeout=10)
|
||||
|
||||
# 短链接通常返回302重定向
|
||||
# Short links usually return a 302 redirect
|
||||
if response.status_code in [301, 302, 303, 307, 308]:
|
||||
redirect_url = response.headers.get("Location", "")
|
||||
utils.logger.info(f"[DouYinClient.resolve_short_url] Resolved to: {redirect_url}")
|
||||
|
||||
Reference in New Issue
Block a user