打码方式使用机器学习
This commit is contained in:
@@ -2,7 +2,6 @@ import math
|
||||
import random
|
||||
|
||||
from py12306.config import Config
|
||||
from py12306.helpers.api import *
|
||||
from py12306.helpers.request import Request
|
||||
from py12306.log.common_log import CommonLog
|
||||
from py12306.vender.ruokuai.main import RKClient
|
||||
@@ -54,26 +53,21 @@ class OCR:
|
||||
positions.append(int(y))
|
||||
return positions
|
||||
|
||||
def get_image_by_free_site(self, img):
|
||||
data = {
|
||||
'base64': img
|
||||
}
|
||||
response = self.session.post(API_FREE_CODE_QCR_API, json=data)
|
||||
result = response.json()
|
||||
if result.get('success') and result.get('data.check'):
|
||||
check_data = {
|
||||
'check': result.get('data.check'),
|
||||
'img_buf': img,
|
||||
'logon': 1,
|
||||
'type': 'D'
|
||||
}
|
||||
check_response = self.session.post(API_FREE_CODE_QCR_API_CHECK, json=check_data)
|
||||
check_result = check_response.json()
|
||||
if check_result.get('res'):
|
||||
position = check_result.get('res')
|
||||
return position.replace('(', '').replace(')', '').split(',')
|
||||
@staticmethod
|
||||
def get_image_by_free_site(img):
|
||||
from py12306.helpers.ocr.ml_predict import get_coordinate
|
||||
import base64
|
||||
|
||||
# 转为图片文件
|
||||
with open('authcode.jpg', 'wb') as image:
|
||||
image.write(base64.b64decode(img))
|
||||
|
||||
result = get_coordinate('authcode.jpg')
|
||||
# CommonLog.print_auth_code_info("验证码识别的结果为:" + result)
|
||||
|
||||
if result:
|
||||
return result
|
||||
|
||||
CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -46,8 +46,4 @@ API_GET_BROWSER_DEVICE_ID = BASE_URL_OF_12306 + '/otn/HttpZF/logdevice'
|
||||
API_NOTIFICATION_BY_VOICE_CODE = 'http://ali-voice.showapi.com/sendVoice?'
|
||||
API_NOTIFICATION_BY_VOICE_CODE_DINGXIN = 'http://yuyin2.market.alicloudapi.com/dx/voice_notice'
|
||||
|
||||
# API_FREE_CODE_QCR_API = 'http://60.205.200.159/api' # 19-03-07 接口已失效
|
||||
API_FREE_CODE_QCR_API = 'https://12306.jiedanba.cn/api/v2/getCheck'
|
||||
API_FREE_CODE_QCR_API_CHECK = 'http://check.huochepiao.360.cn/img_vcode'
|
||||
|
||||
API_CHECK_CDN_AVAILABLE = 'https://{}/otn/dynamicJs/omseuuq'
|
||||
|
||||
@@ -32,7 +32,11 @@ class AuthCode:
|
||||
if not position: # 打码失败
|
||||
return self.retry_get_auth_code()
|
||||
|
||||
answer = ','.join(map(str, position))
|
||||
if Config().AUTO_CODE_PLATFORM == 'free':
|
||||
answer = position
|
||||
else:
|
||||
answer = ','.join(map(str, position))
|
||||
|
||||
if not self.check_code(answer):
|
||||
return self.retry_get_auth_code()
|
||||
return position
|
||||
|
||||
123
py12306/helpers/ocr/ml_predict.py
Normal file
123
py12306/helpers/ocr/ml_predict.py
Normal file
@@ -0,0 +1,123 @@
|
||||
# coding: utf-8
|
||||
import cv2, os
|
||||
import numpy as np
|
||||
from keras import models
|
||||
from py12306.log.common_log import CommonLog
|
||||
|
||||
|
||||
def get_text(img, offset=0):
|
||||
text = img[3:22, 120 + offset:177 + offset]
|
||||
text = cv2.cvtColor(text, cv2.COLOR_BGR2GRAY)
|
||||
text = text / 255.0
|
||||
h, w = text.shape
|
||||
text.shape = (1, h, w, 1)
|
||||
return text
|
||||
|
||||
|
||||
def get_coordinate(fn):
|
||||
# 储存最终坐标结果
|
||||
result = ''
|
||||
|
||||
try:
|
||||
# 读取并预处理验证码
|
||||
img = cv2.imread(fn)
|
||||
text = get_text(img)
|
||||
imgs = np.array(list(_get_imgs(img)))
|
||||
imgs = preprocess_input(imgs)
|
||||
|
||||
# 识别文字
|
||||
model = models.load_model('py12306/helpers/ocr/model.v2.0.h5')
|
||||
label = model.predict(text)
|
||||
label = label.argmax()
|
||||
fp = open('py12306/helpers/ocr/texts.txt', encoding='utf-8')
|
||||
texts = [text.rstrip('\n') for text in fp]
|
||||
text = texts[label]
|
||||
|
||||
# list放文字
|
||||
titles = [text]
|
||||
|
||||
position = []
|
||||
|
||||
# 获取下一个词
|
||||
# 根据第一个词的长度来定位第二个词的位置
|
||||
if len(text) == 1:
|
||||
offset = 27
|
||||
elif len(text) == 2:
|
||||
offset = 47
|
||||
else:
|
||||
offset = 60
|
||||
text2 = get_text(img, offset=offset)
|
||||
if text2.mean() < 0.95:
|
||||
label = model.predict(text2)
|
||||
label = label.argmax()
|
||||
text2 = texts[label]
|
||||
titles.append(text2)
|
||||
|
||||
# 加载图片分类器
|
||||
model = models.load_model('py12306/helpers/ocr/12306.image.model.h5')
|
||||
labels = model.predict(imgs)
|
||||
labels = labels.argmax(axis=1)
|
||||
|
||||
for pos, label in enumerate(labels):
|
||||
# print(pos // 4, pos % 4, texts[label])
|
||||
if len(titles) == 1:
|
||||
if texts[label] == titles[0]:
|
||||
position.append(pos)
|
||||
elif len(titles) == 2:
|
||||
if texts[label] == titles[0]:
|
||||
position.append(pos)
|
||||
elif texts[label] == titles[1]:
|
||||
position.append(pos)
|
||||
elif len(titles) == 3:
|
||||
if texts[label] == titles[0]:
|
||||
position.append(pos)
|
||||
elif texts[label] == titles[1]:
|
||||
position.append(pos)
|
||||
elif texts[label] == titles[2]:
|
||||
position.append(pos)
|
||||
|
||||
# 没有识别到结果
|
||||
if len(position) == 0:
|
||||
return result
|
||||
|
||||
for i in position:
|
||||
if i == 0:
|
||||
result += '31,45,'
|
||||
elif i == 1:
|
||||
result += '100,45,'
|
||||
elif i == 2:
|
||||
result += '170,45,'
|
||||
elif i == 3:
|
||||
result += '240,45,'
|
||||
elif i == 4:
|
||||
result += '30,115,'
|
||||
elif i == 5:
|
||||
result += '100,115,'
|
||||
elif i == 6:
|
||||
result += '170,115,'
|
||||
elif i == 7:
|
||||
result += '240,115,'
|
||||
result = result[:-1]
|
||||
except:
|
||||
CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE)
|
||||
return result
|
||||
|
||||
|
||||
def preprocess_input(x):
|
||||
x = x.astype('float32')
|
||||
# 我是用cv2来读取的图片,其已经是BGR格式了
|
||||
mean = [103.939, 116.779, 123.68]
|
||||
x -= mean
|
||||
return x
|
||||
|
||||
|
||||
def _get_imgs(img):
|
||||
interval = 5
|
||||
length = 67
|
||||
for x in range(40, img.shape[0] - length, interval + length):
|
||||
for y in range(interval, img.shape[1] - length, interval + length):
|
||||
yield img[x:x + length, y:y + length]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(get_coordinate('a.jpg'))
|
||||
BIN
py12306/helpers/ocr/model.v2.0.h5
Normal file
BIN
py12306/helpers/ocr/model.v2.0.h5
Normal file
Binary file not shown.
80
py12306/helpers/ocr/texts.txt
Normal file
80
py12306/helpers/ocr/texts.txt
Normal file
@@ -0,0 +1,80 @@
|
||||
打字机
|
||||
调色板
|
||||
跑步机
|
||||
毛线
|
||||
老虎
|
||||
安全帽
|
||||
沙包
|
||||
盘子
|
||||
本子
|
||||
药片
|
||||
双面胶
|
||||
龙舟
|
||||
红酒
|
||||
拖把
|
||||
卷尺
|
||||
海苔
|
||||
红豆
|
||||
黑板
|
||||
热水袋
|
||||
烛台
|
||||
钟表
|
||||
路灯
|
||||
沙拉
|
||||
海报
|
||||
公交卡
|
||||
樱桃
|
||||
创可贴
|
||||
牌坊
|
||||
苍蝇拍
|
||||
高压锅
|
||||
电线
|
||||
网球拍
|
||||
海鸥
|
||||
风铃
|
||||
订书机
|
||||
冰箱
|
||||
话梅
|
||||
排风机
|
||||
锅铲
|
||||
绿豆
|
||||
航母
|
||||
电子秤
|
||||
红枣
|
||||
金字塔
|
||||
鞭炮
|
||||
菠萝
|
||||
开瓶器
|
||||
电饭煲
|
||||
仪表盘
|
||||
棉棒
|
||||
篮球
|
||||
狮子
|
||||
蚂蚁
|
||||
蜡烛
|
||||
茶盅
|
||||
印章
|
||||
茶几
|
||||
啤酒
|
||||
档案袋
|
||||
挂钟
|
||||
刺绣
|
||||
铃铛
|
||||
护腕
|
||||
手掌印
|
||||
锦旗
|
||||
文具盒
|
||||
辣椒酱
|
||||
耳塞
|
||||
中国结
|
||||
蜥蜴
|
||||
剪纸
|
||||
漏斗
|
||||
锣
|
||||
蒸笼
|
||||
珊瑚
|
||||
雨靴
|
||||
薯条
|
||||
蜜蜂
|
||||
日历
|
||||
口哨
|
||||
@@ -139,3 +139,10 @@ class CommonLog(BaseLog):
|
||||
self.add_quick_log('打码失败: 错误原因 {reason}'.format(reason=reason))
|
||||
self.flush()
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def print_auth_code_info(cls, reason):
|
||||
self = cls()
|
||||
self.add_quick_log('打码信息: {reason}'.format(reason=reason))
|
||||
self.flush()
|
||||
return self
|
||||
|
||||
@@ -30,4 +30,11 @@ w3lib==1.19.0
|
||||
websockets==7.0
|
||||
Werkzeug==0.14.1
|
||||
DingtalkChatbot==1.3.0
|
||||
lightpush==0.1.3
|
||||
lightpush==0.1.3
|
||||
sklearn==0.0
|
||||
opencv-python==4.1.0.25
|
||||
keras==2.2.4
|
||||
tensorflow==1.14.0
|
||||
matplotlib==3.1.0
|
||||
numpy==1.16.4
|
||||
scipy==1.3.0
|
||||
Reference in New Issue
Block a user