feat: 增加 IP 代理的最新实现

This commit is contained in:
Relakkes
2023-12-02 16:14:36 +08:00
parent a8a4d34d2a
commit 986179b9c9
16 changed files with 562 additions and 267 deletions

4
proxy/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 14:37
# @Desc :

137
proxy/proxy_account_pool.py Normal file
View File

@@ -0,0 +1,137 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 11:18
# @Desc : IP 和 手机号 一一配对的账号代理池
from typing import List, Optional, Set, Tuple
import config
class PhonePool:
"""phone pool class"""
def __init__(self) -> None:
self.phones: List[str] = []
self.used_phones: Set[str] = set()
def add_phone(self, phone: str) -> bool:
"""add phone to the pool"""
if phone not in self.phones:
self.phones.append(phone)
return True
return False
def remove_phone(self, phone: str) -> bool:
"""remove phone from the pool"""
if phone in self.used_phones:
self.phones.remove(phone)
self.used_phones.remove(phone)
return True
return False
def get_phone(self) -> Optional[str]:
"""get phone and mark as used"""
if self.phones:
left_phone = self.phones.pop(0)
self.used_phones.add(left_phone)
return left_phone
return None
def clear(self):
"""clear phone pool"""
self.phones = []
self.used_phones = set()
class IPPool:
def __init__(self) -> None:
self.ips: List[str]= []
self.used_ips: Set[str]= set()
def add_ip(self, ip):
"""添加ip"""
if ip not in self.ips:
self.ips.append(ip)
return True
return False
def remove_ip(self, ip: str) -> bool:
"""remove ip"""
if ip in self.used_ips:
self.ips.remove(ip)
self.used_ips.remove(ip)
return True
return False
def get_ip(self) -> Optional[str]:
"""get ip and mark as used"""
if self.ips:
left_ips = self.ips.pop(0)
self.used_ips.add(left_ips)
return left_ips
return None
def clear(self):
""" clear ip pool"""
self.ips = []
self.used_ips = set()
class AccountPool:
"""account pool class"""
def __init__(self):
self.phone_pool = PhonePool()
self.ip_pool = IPPool()
def add_account(self, phone: str, ip: str) -> bool:
"""add account to pool with phone and ip"""
if self.phone_pool.add_phone(phone) and self.ip_pool.add_ip(ip):
return True
return False
def remove_account(self, phone: str, ip: str) -> bool:
"""remove account from pool """
if self.phone_pool.remove_phone(phone) and self.ip_pool.remove_ip(ip):
return True
return False
def get_account(self) -> Tuple[str, str]:
"""get account if no account, reload account pool"""
phone = self.phone_pool.get_phone()
ip = self.ip_pool.get_ip()
if not phone or not ip:
reload_account_pool(self)
return self.get_account()
return phone, ip
def clear_account(self):
"""clear account pool"""
self.phone_pool.clear()
self.ip_pool.clear()
def reload_account_pool(apo: AccountPool):
"""reload account pool"""
apo.clear_account()
for phone, ip in zip(config.PHONE_LIST, config.IP_PROXY_LIST):
apo.add_account(phone, ip)
def create_account_pool() -> AccountPool:
"""create account pool"""
apo = AccountPool()
reload_account_pool(apo=apo)
return apo
if __name__ == '__main__':
import time
ac_pool = create_account_pool()
p, i = ac_pool.get_account()
while p:
print(f"get phone:{p}, ip proxy:{i} from account pool")
p, i = ac_pool.get_account()
time.sleep(1)

89
proxy/proxy_ip_pool.py Normal file
View File

@@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 13:45
# @Desc : ip代理池实现
import random
from typing import List
import httpx
from tenacity import retry, stop_after_attempt, wait_fixed
from tools import utils
from .proxy_ip_provider import IpInfoModel, IpProxy
class ProxyIpPool:
def __init__(self, ip_pool_count: int, enable_validate_ip: bool) -> None:
self.valid_ip_url = "https://httpbin.org/ip" # 验证 IP 是否有效的地址
self.ip_pool_count = ip_pool_count
self.enable_validate_ip = enable_validate_ip
self.proxy_list: List[IpInfoModel] = []
async def load_proxies(self) -> None:
"""
从 HTTP 代理商获取 IP 列表
:return:
"""
self.proxy_list = await IpProxy.get_proxies(self.ip_pool_count)
@retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
async def is_valid_proxy(self, proxy: IpInfoModel) -> bool:
"""
验证代理IP是否有效
:param proxy:
:return:
"""
utils.logger.info(f"[ProxyIpPool.is_valid_proxy] testing {proxy.ip} is it valid ")
try:
httpx_proxy = f"{proxy.protocol}{proxy.ip}:{proxy.port}"
proxy_auth = httpx.BasicAuth(proxy.user, proxy.password)
async with httpx.AsyncClient(proxies={proxy.protocol: httpx_proxy}, auth=proxy_auth) as client:
response = await client.get(self.valid_ip_url)
if response.status_code == 200:
return True
else:
return False
except Exception as e:
utils.logger.info(f"[ProxyIpPool.is_valid_proxy] testing {proxy.ip} err: {e}")
raise e
@retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
async def get_proxy(self) -> IpInfoModel:
"""
从代理池中随机提取一个代理IP
:return:
"""
if len(self.proxy_list) == 0:
await self.reload_proxies()
proxy = random.choice(self.proxy_list)
if self.enable_validate_ip:
if not await self.is_valid_proxy(proxy):
raise Exception("[ProxyIpPool.get_proxy] current ip invalid and again get it")
self.proxy_list.remove(proxy)
return proxy
async def reload_proxies(self):
"""
# 重新加载代理池
:return:
"""
self.proxy_list = []
await self.load_proxies()
async def create_ip_pool(ip_pool_count: int, enable_validate_ip) -> ProxyIpPool:
"""
创建 IP 代理池
:param ip_pool_count:
:param enable_validate_ip:
:return:
"""
pool = ProxyIpPool(ip_pool_count, enable_validate_ip)
await pool.load_proxies()
return pool
if __name__ == '__main__':
pass

111
proxy/proxy_ip_provider.py Normal file
View File

@@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2023/12/2 11:18
# @Desc : 爬虫 IP 获取实现
# @Url : 现在实现了极速HTTP的接口官网地址https://www.jisuhttp.com/?pl=mAKphQ&plan=ZY&kd=Yang
import asyncio
import os
from abc import ABC, abstractmethod
from typing import Dict, List, Optional
from urllib.parse import urlencode
import httpx
from pydantic import BaseModel, Field
from tools import utils
class IpGetError(Exception):
""" ip get error"""
class IpInfoModel(BaseModel):
"""Unified IP model"""
ip: str = Field(title="ip")
port: int = Field(title="端口")
user: str = Field(title="IP代理认证的用户名")
protocol: str = Field(default="https://", title="代理IP的协议")
password: str = Field(title="IP代理认证用户的密码")
expired_time_ts: Optional[int] = Field(title="IP 过期时间")
class ProxyProvider(ABC):
@abstractmethod
async def get_proxies(self, num: int) -> List[Dict]:
"""
获取 IP 的抽象方法,不同的 HTTP 代理商需要实现该方法
:param num: 提取的 IP 数量
:return:
"""
pass
class JiSuHttpProxy(ProxyProvider):
def __init__(self, exract_type: str, key: str, crypto: str, res_type: str, protocol: int, time: int):
"""
极速HTTP 代理IP实现
官网地址https://www.jisuhttp.com/?pl=mAKphQ&plan=ZY&kd=Yang
:param exract_type: 提取方式
:param key: 提取key值 (到上面链接的官网去注册后获取)
:param crypto: 加密签名 (到上面链接的官网去注册后获取)
:param res_type: 返回的数据格式TXT、JSON
:param protocol: IP协议1:HTTP、2:HTTPS、3:SOCKS5
:param time: IP使用时长支持3、5、10、15、30分钟时效
"""
self.exract_type = exract_type
self.api_path = "https://api.jisuhttp.com"
self.params = {
"key": key,
"crypto": crypto,
"type": res_type,
"port": protocol,
"time": time,
"pw": "1", # 是否使用账密验证, 10否表示白名单验证默认为0
"se": "1", # 返回JSON格式时是否显示IP过期时间 1显示0不显示默认为0
}
async def get_proxies(self, num: int) -> List[IpInfoModel]:
"""
:param num:
:return:
"""
if self.exract_type == "API":
uri = "/fetchips"
self.params.update({"num": num})
ip_infos = []
async with httpx.AsyncClient() as client:
url = self.api_path + uri + '?' + urlencode(self.params)
utils.logger.info(f"[JiSuHttpProxy] get ip proxy url:{url}")
response = await client.get(url, headers={"User-Agent": "MediaCrawler"})
res_dict: Dict = response.json()
if res_dict.get("code") == 0:
data: List[Dict] = res_dict.get("data")
for ip_item in data:
ip_info_model = IpInfoModel(
ip=ip_item.get("ip"),
port=ip_item.get("port"),
user=ip_item.get("user"),
password=ip_item.get("pass"),
expired_time_ts=utils.get_unix_time_from_time_str(ip_item.get("expire"))
)
ip_infos.append(ip_info_model)
else:
raise IpGetError(res_dict.get("msg", "unkown err"))
return ip_infos
else:
pass
IpProxy = JiSuHttpProxy(
key=os.getenv("jisu_key", ""), # 通过环境变量的方式获取极速HTTPIP提取key值
crypto=os.getenv("jisu_crypto", ""), # 通过环境变量的方式获取极速HTTPIP提取加密签名
res_type="json",
protocol=2,
time=30
)
if __name__ == '__main__':
_ip_infos = asyncio.run(IpProxy.get_proxies(1))
print(_ip_infos)