打码方式使用机器学习

This commit is contained in:
Weey
2019-06-29 11:07:34 +08:00
parent 8cabf157f6
commit 707fd5bd8a
8 changed files with 237 additions and 26 deletions

View File

@@ -2,7 +2,6 @@ import math
import random
from py12306.config import Config
from py12306.helpers.api import *
from py12306.helpers.request import Request
from py12306.log.common_log import CommonLog
from py12306.vender.ruokuai.main import RKClient
@@ -54,26 +53,21 @@ class OCR:
positions.append(int(y))
return positions
def get_image_by_free_site(self, img):
data = {
'base64': img
}
response = self.session.post(API_FREE_CODE_QCR_API, json=data)
result = response.json()
if result.get('success') and result.get('data.check'):
check_data = {
'check': result.get('data.check'),
'img_buf': img,
'logon': 1,
'type': 'D'
}
check_response = self.session.post(API_FREE_CODE_QCR_API_CHECK, json=check_data)
check_result = check_response.json()
if check_result.get('res'):
position = check_result.get('res')
return position.replace('(', '').replace(')', '').split(',')
@staticmethod
def get_image_by_free_site(img):
from py12306.helpers.ocr.ml_predict import get_coordinate
import base64
# 转为图片文件
with open('authcode.jpg', 'wb') as image:
image.write(base64.b64decode(img))
result = get_coordinate('authcode.jpg')
# CommonLog.print_auth_code_info("验证码识别的结果为:" + result)
if result:
return result
CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE)
return None

View File

@@ -46,8 +46,4 @@ API_GET_BROWSER_DEVICE_ID = BASE_URL_OF_12306 + '/otn/HttpZF/logdevice'
API_NOTIFICATION_BY_VOICE_CODE = 'http://ali-voice.showapi.com/sendVoice?'
API_NOTIFICATION_BY_VOICE_CODE_DINGXIN = 'http://yuyin2.market.alicloudapi.com/dx/voice_notice'
# API_FREE_CODE_QCR_API = 'http://60.205.200.159/api' # 19-03-07 接口已失效
API_FREE_CODE_QCR_API = 'https://12306.jiedanba.cn/api/v2/getCheck'
API_FREE_CODE_QCR_API_CHECK = 'http://check.huochepiao.360.cn/img_vcode'
API_CHECK_CDN_AVAILABLE = 'https://{}/otn/dynamicJs/omseuuq'

View File

@@ -32,7 +32,11 @@ class AuthCode:
if not position: # 打码失败
return self.retry_get_auth_code()
answer = ','.join(map(str, position))
if Config().AUTO_CODE_PLATFORM == 'free':
answer = position
else:
answer = ','.join(map(str, position))
if not self.check_code(answer):
return self.retry_get_auth_code()
return position

View File

@@ -0,0 +1,123 @@
# coding: utf-8
import cv2, os
import numpy as np
from keras import models
from py12306.log.common_log import CommonLog
def get_text(img, offset=0):
text = img[3:22, 120 + offset:177 + offset]
text = cv2.cvtColor(text, cv2.COLOR_BGR2GRAY)
text = text / 255.0
h, w = text.shape
text.shape = (1, h, w, 1)
return text
def get_coordinate(fn):
# 储存最终坐标结果
result = ''
try:
# 读取并预处理验证码
img = cv2.imread(fn)
text = get_text(img)
imgs = np.array(list(_get_imgs(img)))
imgs = preprocess_input(imgs)
# 识别文字
model = models.load_model('py12306/helpers/ocr/model.v2.0.h5')
label = model.predict(text)
label = label.argmax()
fp = open('py12306/helpers/ocr/texts.txt', encoding='utf-8')
texts = [text.rstrip('\n') for text in fp]
text = texts[label]
# list放文字
titles = [text]
position = []
# 获取下一个词
# 根据第一个词的长度来定位第二个词的位置
if len(text) == 1:
offset = 27
elif len(text) == 2:
offset = 47
else:
offset = 60
text2 = get_text(img, offset=offset)
if text2.mean() < 0.95:
label = model.predict(text2)
label = label.argmax()
text2 = texts[label]
titles.append(text2)
# 加载图片分类器
model = models.load_model('py12306/helpers/ocr/12306.image.model.h5')
labels = model.predict(imgs)
labels = labels.argmax(axis=1)
for pos, label in enumerate(labels):
# print(pos // 4, pos % 4, texts[label])
if len(titles) == 1:
if texts[label] == titles[0]:
position.append(pos)
elif len(titles) == 2:
if texts[label] == titles[0]:
position.append(pos)
elif texts[label] == titles[1]:
position.append(pos)
elif len(titles) == 3:
if texts[label] == titles[0]:
position.append(pos)
elif texts[label] == titles[1]:
position.append(pos)
elif texts[label] == titles[2]:
position.append(pos)
# 没有识别到结果
if len(position) == 0:
return result
for i in position:
if i == 0:
result += '31,45,'
elif i == 1:
result += '100,45,'
elif i == 2:
result += '170,45,'
elif i == 3:
result += '240,45,'
elif i == 4:
result += '30,115,'
elif i == 5:
result += '100,115,'
elif i == 6:
result += '170,115,'
elif i == 7:
result += '240,115,'
result = result[:-1]
except:
CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE)
return result
def preprocess_input(x):
x = x.astype('float32')
# 我是用cv2来读取的图片其已经是BGR格式了
mean = [103.939, 116.779, 123.68]
x -= mean
return x
def _get_imgs(img):
interval = 5
length = 67
for x in range(40, img.shape[0] - length, interval + length):
for y in range(interval, img.shape[1] - length, interval + length):
yield img[x:x + length, y:y + length]
if __name__ == '__main__':
print(get_coordinate('a.jpg'))

View File

Binary file not shown.

View File

@@ -0,0 +1,80 @@
打字机
调色板
跑步机
毛线
老虎
安全帽
沙包
盘子
本子
药片
双面胶
龙舟
红酒
拖把
卷尺
海苔
红豆
黑板
热水袋
烛台
钟表
路灯
沙拉
海报
公交卡
樱桃
创可贴
牌坊
苍蝇拍
高压锅
电线
网球拍
海鸥
风铃
订书机
冰箱
话梅
排风机
锅铲
绿豆
航母
电子秤
红枣
金字塔
鞭炮
菠萝
开瓶器
电饭煲
仪表盘
棉棒
篮球
狮子
蚂蚁
蜡烛
茶盅
印章
茶几
啤酒
档案袋
挂钟
刺绣
铃铛
护腕
手掌印
锦旗
文具盒
辣椒酱
耳塞
中国结
蜥蜴
剪纸
漏斗
蒸笼
珊瑚
雨靴
薯条
蜜蜂
日历
口哨

View File

@@ -139,3 +139,10 @@ class CommonLog(BaseLog):
self.add_quick_log('打码失败: 错误原因 {reason}'.format(reason=reason))
self.flush()
return self
@classmethod
def print_auth_code_info(cls, reason):
self = cls()
self.add_quick_log('打码信息: {reason}'.format(reason=reason))
self.flush()
return self

View File

@@ -30,4 +30,11 @@ w3lib==1.19.0
websockets==7.0
Werkzeug==0.14.1
DingtalkChatbot==1.3.0
lightpush==0.1.3
lightpush==0.1.3
sklearn==0.0
opencv-python==4.1.0.25
keras==2.2.4
tensorflow==1.14.0
matplotlib==3.1.0
numpy==1.16.4
scipy==1.3.0