refactor:优化部分代码

feat: 增加IP代理账号池
This commit is contained in:
Relakkes
2023-06-27 23:38:30 +08:00
parent 963d9a16d3
commit b8093a2c0f
19 changed files with 615 additions and 254 deletions

0
tools/__init__.py Normal file
View File

View File

@@ -0,0 +1,85 @@
# Start an HTTP server to receive SMS forwarding notifications and store them in Redis.
import re
import json
import asyncio
import aioredis
import tornado.web
import config
def extract_verification_code(message) -> str:
"""
Extract verification code of 6 digits from the SMS.
"""
pattern = re.compile(r'\b[0-9]{6}\b')
codes = pattern.findall(message)
return codes[0] if codes and len(codes) > 0 else ""
class RecvSmsNotificationHandler(tornado.web.RequestHandler):
async def get(self):
self.set_status(404)
self.write("404")
async def post(self):
# GitHub address for the SMS forwarding functionhttps://github.com/pppscn/SmsForwarder
# Document address:https://gitee.com/pp/SmsForwarder/wikis/pages?sort_id=6040999&doc_id=1821427
# Forwarding channel definition
# {
# "platform": "xhs",
# "current_number": "138xxxxxxxx",
# "from_number": "[from]",
# "sms_content": "[org_content]",
# "timestamp": "[timestamp]"
# }
# SMS message body
# {
# 'platform': 'xhs', # or dy
# 'current_number': '138xxxxxxxx',
# 'from_number': '1069421xxx134',
# 'sms_content': '【小红书】您的验证码是: 171959 3分钟内有效。请勿向他人泄漏。如非本人操作可忽略本消息。',
# 'timestamp': '1686720601614'
# }
request_body = self.request.body.decode("utf-8")
req_body_dict = json.loads(request_body)
print("recv sms notification and body content: ", req_body_dict)
redis_obj = aioredis.from_url(url=config.redis_db_host, password=config.redis_db_pwd, decode_responses=True)
sms_content = req_body_dict.get("sms_content")
sms_code = extract_verification_code(sms_content)
if sms_code:
# Save the verification code in Redis and set the expiration time to 3 minutes.
# Use Redis string data structure, in the following format:
# xhs_138xxxxxxxx -> 171959
key = f"{req_body_dict.get('platform')}_{req_body_dict.get('current_number')}"
await redis_obj.set(name=key, value=sms_code, ex=60 * 3)
self.set_status(200)
self.write("ok")
class Application(tornado.web.Application):
def __init__(self):
handlers = [(r'/', RecvSmsNotificationHandler)]
settings = dict(
gzip=True,
autoescape=None,
autoreload=True
)
super(Application, self).__init__(handlers, **settings)
async def main():
app = Application()
app.listen(port=9435)
print("Recv sms notification app running ...")
shutdown_event = tornado.locks.Event()
await shutdown_event.wait()
if __name__ == '__main__':
try:
asyncio.run(main())
except KeyboardInterrupt:
pass

104
tools/utils.py Normal file
View File

@@ -0,0 +1,104 @@
import re
import time
import random
import base64
import logging
from io import BytesIO
from typing import Optional, Dict, List, Tuple
from PIL import Image, ImageDraw
from playwright.async_api import Cookie
from playwright.async_api import Page
async def find_login_qrcode(page: Page, selector: str) -> str:
"""find login qrcode image from target selector"""
try:
elements = await page.wait_for_selector(
selector=selector,
)
login_qrcode_img = await elements.get_property("src")
return str(login_qrcode_img)
except Exception as e:
print(e)
return ""
def show_qrcode(qr_code: str):
"""parse base64 encode qrcode image and show it"""
qr_code = qr_code.split(",")[1]
qr_code = base64.b64decode(qr_code)
image = Image.open(BytesIO(qr_code))
# Add a square border around the QR code and display it within the border to improve scanning accuracy.
width, height = image.size
new_image = Image.new('RGB', (width + 20, height + 20), color=(255, 255, 255))
new_image.paste(image, (10, 10))
draw = ImageDraw.Draw(new_image)
draw.rectangle((0, 0, width + 19, height + 19), outline=(0, 0, 0), width=1)
new_image.show()
def get_user_agent() -> str:
ua_list = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36"
]
return random.choice(ua_list)
def convert_cookies(cookies: Optional[List[Cookie]]) -> Tuple[str, Dict]:
if not cookies:
return "", {}
cookies_str = ";".join([f"{cookie.get('name')}={cookie.get('value')}" for cookie in cookies])
cookie_dict = dict()
for cookie in cookies:
cookie_dict[cookie.get('name')] = cookie.get('value')
return cookies_str, cookie_dict
def convert_str_cookie_to_dict(cookie_str: str) -> Dict:
cookie_dict = dict()
if not cookie_str:
return cookie_dict
for cookie in cookie_str.split(";"):
cookie = cookie.strip()
if not cookie:
continue
cookie = cookie.split("=")
cookie_value = cookie[1]
if isinstance(cookie_value, list):
cookie_value = "".join(cookie_value)
cookie_dict[cookie[0]] = cookie_value
return cookie_dict
def get_current_timestamp():
return int(time.time() * 1000)
def match_interact_info_count(count_str: str) -> int:
if not count_str:
return 0
match = re.search(r'\d+', count_str)
if match:
number = match.group()
return int(number)
else:
return 0
def init_loging_config():
level = logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s %(name)s %(levelname)s %(message)s ",
datefmt='%Y-%m-%d %H:%M:%S'
)
logging.Logger("Media Crawler")