mirror of
https://github.com/NanmiCoder/MediaCrawler.git
synced 2026-02-06 15:11:12 +08:00
feat: ip proxy expired check
This commit is contained in:
@@ -55,6 +55,7 @@ class ProxyIpPool:
|
||||
self.enable_validate_ip = enable_validate_ip
|
||||
self.proxy_list: List[IpInfoModel] = []
|
||||
self.ip_provider: ProxyProvider = ip_provider
|
||||
self.current_proxy: IpInfoModel | None = None # 当前正在使用的代理
|
||||
|
||||
async def load_proxies(self) -> None:
|
||||
"""
|
||||
@@ -108,8 +109,37 @@ class ProxyIpPool:
|
||||
raise Exception(
|
||||
"[ProxyIpPool.get_proxy] current ip invalid and again get it"
|
||||
)
|
||||
self.current_proxy = proxy # 保存当前使用的代理
|
||||
return proxy
|
||||
|
||||
def is_current_proxy_expired(self, buffer_seconds: int = 30) -> bool:
|
||||
"""
|
||||
检测当前代理是否已过期
|
||||
Args:
|
||||
buffer_seconds: 缓冲时间(秒),提前多少秒认为已过期
|
||||
Returns:
|
||||
bool: True表示已过期或没有当前代理,False表示仍然有效
|
||||
"""
|
||||
if self.current_proxy is None:
|
||||
return True
|
||||
return self.current_proxy.is_expired(buffer_seconds)
|
||||
|
||||
async def get_or_refresh_proxy(self, buffer_seconds: int = 30) -> IpInfoModel:
|
||||
"""
|
||||
获取当前代理,如果已过期则自动刷新
|
||||
每次发起请求前调用此方法来确保代理有效
|
||||
Args:
|
||||
buffer_seconds: 缓冲时间(秒),提前多少秒认为已过期
|
||||
Returns:
|
||||
IpInfoModel: 有效的代理IP信息
|
||||
"""
|
||||
if self.is_current_proxy_expired(buffer_seconds):
|
||||
utils.logger.info(
|
||||
f"[ProxyIpPool.get_or_refresh_proxy] Current proxy expired or not set, getting new proxy..."
|
||||
)
|
||||
return await self.get_proxy()
|
||||
return self.current_proxy
|
||||
|
||||
async def _reload_proxies(self):
|
||||
"""
|
||||
# 重新加载代理池
|
||||
|
||||
77
proxy/proxy_mixin.py
Normal file
77
proxy/proxy_mixin.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright (c) 2025 relakkes@gmail.com
|
||||
#
|
||||
# This file is part of MediaCrawler project.
|
||||
# Repository: https://github.com/NanmiCoder/MediaCrawler
|
||||
# GitHub: https://github.com/NanmiCoder
|
||||
# Licensed under NON-COMMERCIAL LEARNING LICENSE 1.1
|
||||
#
|
||||
|
||||
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
|
||||
# 1. 不得用于任何商业用途。
|
||||
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
|
||||
# 3. 不得进行大规模爬取或对平台造成运营干扰。
|
||||
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
|
||||
# 5. 不得用于任何非法或不当的用途。
|
||||
#
|
||||
# 详细许可条款请参阅项目根目录下的LICENSE文件。
|
||||
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
|
||||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
# @Author : relakkes@gmail.com
|
||||
# @Time : 2025/11/25
|
||||
# @Desc : 代理自动刷新 Mixin 类,供各平台 client 使用
|
||||
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
from tools import utils
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from proxy.proxy_ip_pool import ProxyIpPool
|
||||
|
||||
|
||||
class ProxyRefreshMixin:
|
||||
"""
|
||||
代理自动刷新 Mixin 类
|
||||
|
||||
使用方法:
|
||||
1. 让 client 类继承此 Mixin
|
||||
2. 在 client 的 __init__ 中调用 init_proxy_pool(proxy_ip_pool)
|
||||
3. 在每次 request 方法调用前调用 await _refresh_proxy_if_expired()
|
||||
|
||||
要求:
|
||||
- client 类必须有 self.proxy 属性来存储当前代理URL
|
||||
"""
|
||||
|
||||
_proxy_ip_pool: Optional["ProxyIpPool"] = None
|
||||
|
||||
def init_proxy_pool(self, proxy_ip_pool: Optional["ProxyIpPool"]) -> None:
|
||||
"""
|
||||
初始化代理池引用
|
||||
Args:
|
||||
proxy_ip_pool: 代理IP池实例
|
||||
"""
|
||||
self._proxy_ip_pool = proxy_ip_pool
|
||||
|
||||
async def _refresh_proxy_if_expired(self) -> None:
|
||||
"""
|
||||
检测代理是否过期,如果过期则自动刷新
|
||||
每次发起请求前调用此方法来确保代理有效
|
||||
"""
|
||||
if self._proxy_ip_pool is None:
|
||||
return
|
||||
|
||||
if self._proxy_ip_pool.is_current_proxy_expired():
|
||||
utils.logger.info(
|
||||
f"[{self.__class__.__name__}._refresh_proxy_if_expired] Proxy expired, refreshing..."
|
||||
)
|
||||
new_proxy = await self._proxy_ip_pool.get_or_refresh_proxy()
|
||||
# 更新 httpx 代理URL
|
||||
if new_proxy.user and new_proxy.password:
|
||||
self.proxy = f"http://{new_proxy.user}:{new_proxy.password}@{new_proxy.ip}:{new_proxy.port}"
|
||||
else:
|
||||
self.proxy = f"http://{new_proxy.ip}:{new_proxy.port}"
|
||||
utils.logger.info(
|
||||
f"[{self.__class__.__name__}._refresh_proxy_if_expired] New proxy: {new_proxy.ip}:{new_proxy.port}"
|
||||
)
|
||||
@@ -22,6 +22,7 @@
|
||||
# @Author : relakkes@gmail.com
|
||||
# @Time : 2024/4/5 10:18
|
||||
# @Desc : 基础类型
|
||||
import time
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
@@ -41,4 +42,17 @@ class IpInfoModel(BaseModel):
|
||||
user: str = Field(title="IP代理认证的用户名")
|
||||
protocol: str = Field(default="https://", title="代理IP的协议")
|
||||
password: str = Field(title="IP代理认证用户的密码")
|
||||
expired_time_ts: Optional[int] = Field(title="IP 过期时间")
|
||||
expired_time_ts: Optional[int] = Field(default=None, title="IP 过期时间")
|
||||
|
||||
def is_expired(self, buffer_seconds: int = 30) -> bool:
|
||||
"""
|
||||
检测代理IP是否已过期
|
||||
Args:
|
||||
buffer_seconds: 缓冲时间(秒),提前多少秒认为已过期,避免临界时间请求失败
|
||||
Returns:
|
||||
bool: True表示已过期或即将过期,False表示仍然有效
|
||||
"""
|
||||
if self.expired_time_ts is None:
|
||||
return False
|
||||
current_ts = int(time.time())
|
||||
return current_ts >= (self.expired_time_ts - buffer_seconds)
|
||||
|
||||
Reference in New Issue
Block a user