添加了对媒体资源服务器的异常处理,参见 issue #691

This commit is contained in:
未来可欺
2025-08-05 13:11:00 +08:00
parent b9d30bbabb
commit 81f2dbe4ab
8 changed files with 50 additions and 24 deletions

View File

@@ -30,7 +30,7 @@ class DouYinClient(AbstractApiClient):
def __init__(
self,
timeout=30, # 若开启爬取媒体选项,抖音的短视频需要更久的超时时间
timeout=60, # 若开启爬取媒体选项,抖音的短视频需要更久的超时时间
proxy=None,
*,
headers: Dict,
@@ -305,7 +305,7 @@ class DouYinClient(AbstractApiClient):
posts_has_more = aweme_post_res.get("has_more", 0)
max_cursor = aweme_post_res.get("max_cursor")
aweme_list = aweme_post_res.get("aweme_list") if aweme_post_res.get("aweme_list") else []
utils.logger.info(f"[DouYinCrawler.get_all_user_aweme_posts] get sec_user_id:{sec_user_id} video len : {len(aweme_list)}")
utils.logger.info(f"[DouYinClient.get_all_user_aweme_posts] get sec_user_id:{sec_user_id} video len : {len(aweme_list)}")
if callback:
await callback(aweme_list)
result.extend(aweme_list)
@@ -313,9 +313,14 @@ class DouYinClient(AbstractApiClient):
async def get_aweme_media(self, url: str) -> Union[bytes, None]:
async with httpx.AsyncClient(proxy=self.proxy) as client:
response = await client.request("GET", url, timeout=self.timeout, follow_redirects=True)
if not response.reason_phrase == "OK":
utils.logger.error(f"[DouYinCrawler.get_aweme_media] request {url} err, res:{response.text}")
try:
response = await client.request("GET", url, timeout=self.timeout, follow_redirects=True)
response.raise_for_status()
if not response.reason_phrase == "OK":
utils.logger.error(f"[DouYinClient.get_aweme_media] request {url} err, res:{response.text}")
return None
else:
return response.content
except httpx.HTTPStatusError as exc: # some wrong when call httpx.request method, such as connection error, client error or server error
utils.logger.error(f"[DouYinClient.get_aweme_media] {exc}")
return None
else:
return response.content