Merge pull request #847 from w21180239/fix/ssl-verify-proxy

fix: disable SSL verification for proxy/VPN environments
This commit is contained in:
程序员阿江-Relakkes
2026-03-19 00:24:47 +08:00
committed by GitHub
13 changed files with 47 additions and 19 deletions

View File

@@ -120,6 +120,10 @@ FONT_PATH = "./docs/STZHONGS.TTF"
# Crawl interval
CRAWLER_MAX_SLEEP_SEC = 2
# 是否禁用 SSL 证书验证。仅在使用企业代理、Burp Suite、mitmproxy 等会注入自签名证书的中间人代理时设为 True。
# 警告:禁用 SSL 验证将使所有流量暴露于中间人攻击风险,请勿在生产环境中开启。
DISABLE_SSL_VERIFY = False
from .bilibili_config import *
from .xhs_config import *
from .dy_config import *

View File

@@ -29,6 +29,7 @@ from urllib.parse import urlencode
import httpx
from playwright.async_api import BrowserContext, Page
from tools.httpx_util import make_async_client
import config
from base.base_crawler import AbstractApiClient
@@ -68,7 +69,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
# Check if proxy has expired before each request
await self._refresh_proxy_if_expired()
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
try:
data: Dict = response.json()
@@ -222,7 +223,7 @@ class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
async def get_video_media(self, url: str) -> Union[bytes, None]:
# Follow CDN 302 redirects and treat any 2xx as success (some endpoints return 206)
async with httpx.AsyncClient(proxy=self.proxy, follow_redirects=True) as client:
async with make_async_client(proxy=self.proxy, follow_redirects=True) as client:
try:
response = await client.request("GET", url, timeout=self.timeout, headers=self.headers)
response.raise_for_status()

View File

@@ -29,6 +29,7 @@ from playwright.async_api import BrowserContext
from base.base_crawler import AbstractApiClient
from proxy.proxy_mixin import ProxyRefreshMixin
from tools import utils
from tools.httpx_util import make_async_client
from var import request_keyword_var
if TYPE_CHECKING:
@@ -116,7 +117,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
# Check whether the proxy has expired before each request
await self._refresh_proxy_if_expired()
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
try:
if response.text == "" or response.text == "blocked":
@@ -333,7 +334,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
return result
async def get_aweme_media(self, url: str) -> Union[bytes, None]:
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
try:
response = await client.request("GET", url, timeout=self.timeout, follow_redirects=True)
response.raise_for_status()
@@ -354,7 +355,7 @@ class DouYinClient(AbstractApiClient, ProxyRefreshMixin):
Returns:
重定向后的完整URL
"""
async with httpx.AsyncClient(proxy=self.proxy, follow_redirects=False) as client:
async with make_async_client(proxy=self.proxy, follow_redirects=False) as client:
try:
utils.logger.info(f"[DouYinClient.resolve_short_url] Resolving short URL: {short_url}")
response = await client.get(short_url, timeout=10)

View File

@@ -31,6 +31,7 @@ import config
from base.base_crawler import AbstractApiClient
from proxy.proxy_mixin import ProxyRefreshMixin
from tools import utils
from tools.httpx_util import make_async_client
if TYPE_CHECKING:
from proxy.proxy_ip_pool import ProxyIpPool
@@ -65,7 +66,7 @@ class KuaiShouClient(AbstractApiClient, ProxyRefreshMixin):
# Check if proxy is expired before each request
await self._refresh_proxy_if_expired()
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
data: Dict = response.json()
if data.get("errors"):
@@ -97,7 +98,7 @@ class KuaiShouClient(AbstractApiClient, ProxyRefreshMixin):
await self._refresh_proxy_if_expired()
json_str = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.request(
method="POST",
url=f"{self._rest_host}{uri}",

View File

@@ -32,6 +32,7 @@ from urllib.parse import parse_qs, unquote, urlencode
import httpx
from httpx import Response
from playwright.async_api import BrowserContext, Page
from tools.httpx_util import make_async_client
from tenacity import retry, stop_after_attempt, wait_fixed
import config
@@ -73,7 +74,7 @@ class WeiboClient(ProxyRefreshMixin):
await self._refresh_proxy_if_expired()
enable_return_response = kwargs.pop("return_response", False)
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
if enable_return_response:
@@ -261,7 +262,7 @@ class WeiboClient(ProxyRefreshMixin):
:return:
"""
url = f"{self._host}/detail/{note_id}"
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.request("GET", url, timeout=self.timeout, headers=self.headers)
if response.status_code != 200:
raise DataFetchError(f"get weibo detail err: {response.text}")
@@ -291,7 +292,7 @@ class WeiboClient(ProxyRefreshMixin):
# Since Weibo images are accessed through i1.wp.com, we need to concatenate the URL
final_uri = (f"{self._image_agent_host}"
f"{image_url}")
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
try:
response = await client.request("GET", final_uri, timeout=self.timeout)
response.raise_for_status()

View File

@@ -25,6 +25,7 @@ from urllib.parse import urlencode
import httpx
from playwright.async_api import BrowserContext, Page
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_not_exception_type
from tools.httpx_util import make_async_client
import config
from base.base_crawler import AbstractApiClient
@@ -127,7 +128,7 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
# return response.text
return_response = kwargs.pop("return_response", False)
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
if response.status_code == 471 or response.status_code == 461:
@@ -192,7 +193,7 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
# Check if proxy is expired before request
await self._refresh_proxy_if_expired()
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
try:
response = await client.request("GET", url, timeout=self.timeout)
response.raise_for_status()
@@ -219,7 +220,7 @@ class XiaoHongShuClient(AbstractApiClient, ProxyRefreshMixin):
"""
uri = "/api/sns/web/v1/user/selfinfo"
headers = await self._pre_headers(uri, params={})
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.get(f"{self._host}{uri}", headers=headers)
if response.status_code == 200:
return response.json()

View File

@@ -26,6 +26,7 @@ from urllib.parse import urlencode
import httpx
from httpx import Response
from playwright.async_api import BrowserContext, Page
from tools.httpx_util import make_async_client
from tenacity import retry, stop_after_attempt, wait_fixed
import config
@@ -98,7 +99,7 @@ class ZhiHuClient(AbstractApiClient, ProxyRefreshMixin):
# return response.text
return_response = kwargs.pop('return_response', False)
async with httpx.AsyncClient(proxy=self.proxy) as client:
async with make_async_client(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
if response.status_code != 200:

View File

@@ -30,6 +30,7 @@ import httpx
from proxy import IpCache, IpGetError, ProxyProvider
from proxy.types import IpInfoModel
from tools import utils
from tools.httpx_util import make_async_client
class JiSuHttpProxy(ProxyProvider):
@@ -68,7 +69,7 @@ class JiSuHttpProxy(ProxyProvider):
need_get_count = num - len(ip_cache_list)
self.params.update({"num": need_get_count})
ip_infos = []
async with httpx.AsyncClient() as client:
async with make_async_client() as client:
url = self.api_path + "/fetchips" + '?' + urlencode(self.params)
utils.logger.info(f"[JiSuHttpProxy.get_proxy] get ip proxy url:{url}")
response = await client.get(url, headers={

View File

@@ -28,6 +28,7 @@ from typing import Dict, List
import httpx
from pydantic import BaseModel, Field
from tools.httpx_util import make_async_client
from proxy import IpCache, IpInfoModel, ProxyProvider
from proxy.types import ProviderNameEnum
@@ -113,7 +114,7 @@ class KuaiDaiLiProxy(ProxyProvider):
self.params.update({"num": need_get_count})
ip_infos: List[IpInfoModel] = []
async with httpx.AsyncClient() as client:
async with make_async_client() as client:
response = await client.get(self.api_base + uri, params=self.params)
if response.status_code != 200:

View File

@@ -30,6 +30,7 @@ import httpx
from proxy import IpCache, IpGetError, ProxyProvider
from proxy.types import IpInfoModel
from tools import utils
from tools.httpx_util import make_async_client
class WanDouHttpProxy(ProxyProvider):
@@ -65,7 +66,7 @@ class WanDouHttpProxy(ProxyProvider):
need_get_count = num - len(ip_cache_list)
self.params.update({"num": min(need_get_count, 100)}) # Maximum 100
ip_infos = []
async with httpx.AsyncClient() as client:
async with make_async_client() as client:
url = self.api_path + "?" + urlencode(self.params)
utils.logger.info(f"[WanDouHttpProxy.get_proxy] get ip proxy url:{url}")
response = await client.get(

View File

@@ -26,6 +26,7 @@ from typing import Dict, List
import httpx
from tenacity import retry, stop_after_attempt, wait_fixed
from tools.httpx_util import make_async_client
import config
from proxy.providers import (
@@ -81,7 +82,7 @@ class ProxyIpPool:
else:
proxy_url = f"http://{proxy.ip}:{proxy.port}"
async with httpx.AsyncClient(proxy=proxy_url) as client:
async with make_async_client(proxy=proxy_url) as client:
response = await client.get(self.valid_ip_url)
if response.status_code == 200:
return True

View File

@@ -37,6 +37,7 @@ from PIL import Image, ImageDraw, ImageShow
from playwright.async_api import Cookie, Page
from . import utils
from .httpx_util import make_async_client
async def find_login_qrcode(page: Page, selector: str) -> str:
@@ -47,7 +48,7 @@ async def find_login_qrcode(page: Page, selector: str) -> str:
)
login_qrcode_img = str(await elements.get_property("src")) # type: ignore
if "http://" in login_qrcode_img or "https://" in login_qrcode_img:
async with httpx.AsyncClient(follow_redirects=True) as client:
async with make_async_client(follow_redirects=True) as client:
utils.logger.info(f"[find_login_qrcode] get qrcode by url:{login_qrcode_img}")
resp = await client.get(login_qrcode_img, headers={"User-Agent": get_user_agent()})
if resp.status_code == 200:

13
tools/httpx_util.py Normal file
View File

@@ -0,0 +1,13 @@
# -*- coding: utf-8 -*-
import httpx
import config
def make_async_client(**kwargs) -> httpx.AsyncClient:
"""创建统一配置的 httpx.AsyncClient。
从配置文件读取 DISABLE_SSL_VERIFY默认 False即开启 SSL 验证)。
仅在使用企业代理、Burp、mitmproxy 等中间人代理时才需将其设为 True。
"""
kwargs.setdefault("verify", not getattr(config, "DISABLE_SSL_VERIFY", False))
return httpx.AsyncClient(**kwargs)