feat: ip proxy expired check

This commit is contained in:
程序员阿江(Relakkes)
2025-11-25 12:39:10 +08:00
parent 1da347cbf8
commit 6eef02d08c
16 changed files with 241 additions and 25 deletions

View File

@@ -24,7 +24,7 @@
import asyncio
import json
import random
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
from urllib.parse import urlencode
import httpx
@@ -32,14 +32,18 @@ from playwright.async_api import BrowserContext, Page
import config
from base.base_crawler import AbstractApiClient
from proxy.proxy_mixin import ProxyRefreshMixin
from tools import utils
if TYPE_CHECKING:
from proxy.proxy_ip_pool import ProxyIpPool
from .exception import DataFetchError
from .field import CommentOrderType, SearchOrderType
from .help import BilibiliSign
class BilibiliClient(AbstractApiClient):
class BilibiliClient(AbstractApiClient, ProxyRefreshMixin):
def __init__(
self,
@@ -49,6 +53,7 @@ class BilibiliClient(AbstractApiClient):
headers: Dict[str, str],
playwright_page: Page,
cookie_dict: Dict[str, str],
proxy_ip_pool: Optional["ProxyIpPool"] = None,
):
self.proxy = proxy
self.timeout = timeout
@@ -56,8 +61,13 @@ class BilibiliClient(AbstractApiClient):
self._host = "https://api.bilibili.com"
self.playwright_page = playwright_page
self.cookie_dict = cookie_dict
# 初始化代理池(来自 ProxyRefreshMixin
self.init_proxy_pool(proxy_ip_pool)
async def request(self, method, url, **kwargs) -> Any:
# 每次请求前检测代理是否过期
await self._refresh_proxy_if_expired()
async with httpx.AsyncClient(proxy=self.proxy) as client:
response = await client.request(method, url, timeout=self.timeout, **kwargs)
try:

View File

@@ -64,12 +64,13 @@ class BilibiliCrawler(AbstractCrawler):
self.index_url = "https://www.bilibili.com"
self.user_agent = utils.get_user_agent()
self.cdp_manager = None
self.ip_proxy_pool = None # 代理IP池用于代理自动刷新
async def start(self):
playwright_proxy_format, httpx_proxy_format = None, None
if config.ENABLE_IP_PROXY:
ip_proxy_pool = await create_ip_pool(config.IP_PROXY_POOL_COUNT, enable_validate_ip=True)
ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy()
self.ip_proxy_pool = await create_ip_pool(config.IP_PROXY_POOL_COUNT, enable_validate_ip=True)
ip_proxy_info: IpInfoModel = await self.ip_proxy_pool.get_proxy()
playwright_proxy_format, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)
async with async_playwright() as playwright:
@@ -473,6 +474,7 @@ class BilibiliCrawler(AbstractCrawler):
},
playwright_page=self.context_page,
cookie_dict=cookie_dict,
proxy_ip_pool=self.ip_proxy_pool, # 传递代理池用于自动刷新
)
return bilibili_client_obj