diff --git a/py12306/helpers/OCR.py b/py12306/helpers/OCR.py index af9be61..875219d 100644 --- a/py12306/helpers/OCR.py +++ b/py12306/helpers/OCR.py @@ -2,7 +2,6 @@ import math import random from py12306.config import Config -from py12306.helpers.api import * from py12306.helpers.request import Request from py12306.log.common_log import CommonLog from py12306.vender.ruokuai.main import RKClient @@ -54,26 +53,21 @@ class OCR: positions.append(int(y)) return positions - def get_image_by_free_site(self, img): - data = { - 'base64': img - } - response = self.session.post(API_FREE_CODE_QCR_API, json=data) - result = response.json() - if result.get('success') and result.get('data.check'): - check_data = { - 'check': result.get('data.check'), - 'img_buf': img, - 'logon': 1, - 'type': 'D' - } - check_response = self.session.post(API_FREE_CODE_QCR_API_CHECK, json=check_data) - check_result = check_response.json() - if check_result.get('res'): - position = check_result.get('res') - return position.replace('(', '').replace(')', '').split(',') + @staticmethod + def get_image_by_free_site(img): + from py12306.helpers.ocr.ml_predict import get_coordinate + import base64 + + # 转为图片文件 + with open('authcode.jpg', 'wb') as image: + image.write(base64.b64decode(img)) + + result = get_coordinate('authcode.jpg') + # CommonLog.print_auth_code_info("验证码识别的结果为:" + result) + + if result: + return result - CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE) return None diff --git a/py12306/helpers/api.py b/py12306/helpers/api.py index ecd169f..37e75fa 100644 --- a/py12306/helpers/api.py +++ b/py12306/helpers/api.py @@ -46,8 +46,4 @@ API_GET_BROWSER_DEVICE_ID = BASE_URL_OF_12306 + '/otn/HttpZF/logdevice' API_NOTIFICATION_BY_VOICE_CODE = 'http://ali-voice.showapi.com/sendVoice?' API_NOTIFICATION_BY_VOICE_CODE_DINGXIN = 'http://yuyin2.market.alicloudapi.com/dx/voice_notice' -# API_FREE_CODE_QCR_API = 'http://60.205.200.159/api' # 19-03-07 接口已失效 -API_FREE_CODE_QCR_API = 'https://12306.jiedanba.cn/api/v2/getCheck' -API_FREE_CODE_QCR_API_CHECK = 'http://check.huochepiao.360.cn/img_vcode' - API_CHECK_CDN_AVAILABLE = 'https://{}/otn/dynamicJs/omseuuq' diff --git a/py12306/helpers/auth_code.py b/py12306/helpers/auth_code.py index b4d5e41..93143d0 100644 --- a/py12306/helpers/auth_code.py +++ b/py12306/helpers/auth_code.py @@ -32,7 +32,11 @@ class AuthCode: if not position: # 打码失败 return self.retry_get_auth_code() - answer = ','.join(map(str, position)) + if Config().AUTO_CODE_PLATFORM == 'free': + answer = position + else: + answer = ','.join(map(str, position)) + if not self.check_code(answer): return self.retry_get_auth_code() return position diff --git a/py12306/helpers/ocr/ml_predict.py b/py12306/helpers/ocr/ml_predict.py new file mode 100644 index 0000000..154c03a --- /dev/null +++ b/py12306/helpers/ocr/ml_predict.py @@ -0,0 +1,123 @@ +# coding: utf-8 +import cv2, os +import numpy as np +from keras import models +from py12306.log.common_log import CommonLog + + +def get_text(img, offset=0): + text = img[3:22, 120 + offset:177 + offset] + text = cv2.cvtColor(text, cv2.COLOR_BGR2GRAY) + text = text / 255.0 + h, w = text.shape + text.shape = (1, h, w, 1) + return text + + +def get_coordinate(fn): + # 储存最终坐标结果 + result = '' + + try: + # 读取并预处理验证码 + img = cv2.imread(fn) + text = get_text(img) + imgs = np.array(list(_get_imgs(img))) + imgs = preprocess_input(imgs) + + # 识别文字 + model = models.load_model('py12306/helpers/ocr/model.v2.0.h5') + label = model.predict(text) + label = label.argmax() + fp = open('py12306/helpers/ocr/texts.txt', encoding='utf-8') + texts = [text.rstrip('\n') for text in fp] + text = texts[label] + + # list放文字 + titles = [text] + + position = [] + + # 获取下一个词 + # 根据第一个词的长度来定位第二个词的位置 + if len(text) == 1: + offset = 27 + elif len(text) == 2: + offset = 47 + else: + offset = 60 + text2 = get_text(img, offset=offset) + if text2.mean() < 0.95: + label = model.predict(text2) + label = label.argmax() + text2 = texts[label] + titles.append(text2) + + # 加载图片分类器 + model = models.load_model('py12306/helpers/ocr/12306.image.model.h5') + labels = model.predict(imgs) + labels = labels.argmax(axis=1) + + for pos, label in enumerate(labels): + # print(pos // 4, pos % 4, texts[label]) + if len(titles) == 1: + if texts[label] == titles[0]: + position.append(pos) + elif len(titles) == 2: + if texts[label] == titles[0]: + position.append(pos) + elif texts[label] == titles[1]: + position.append(pos) + elif len(titles) == 3: + if texts[label] == titles[0]: + position.append(pos) + elif texts[label] == titles[1]: + position.append(pos) + elif texts[label] == titles[2]: + position.append(pos) + + # 没有识别到结果 + if len(position) == 0: + return result + + for i in position: + if i == 0: + result += '31,45,' + elif i == 1: + result += '100,45,' + elif i == 2: + result += '170,45,' + elif i == 3: + result += '240,45,' + elif i == 4: + result += '30,115,' + elif i == 5: + result += '100,115,' + elif i == 6: + result += '170,115,' + elif i == 7: + result += '240,115,' + result = result[:-1] + except: + CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE) + return result + + +def preprocess_input(x): + x = x.astype('float32') + # 我是用cv2来读取的图片,其已经是BGR格式了 + mean = [103.939, 116.779, 123.68] + x -= mean + return x + + +def _get_imgs(img): + interval = 5 + length = 67 + for x in range(40, img.shape[0] - length, interval + length): + for y in range(interval, img.shape[1] - length, interval + length): + yield img[x:x + length, y:y + length] + + +if __name__ == '__main__': + print(get_coordinate('a.jpg')) diff --git a/py12306/helpers/ocr/model.v2.0.h5 b/py12306/helpers/ocr/model.v2.0.h5 new file mode 100644 index 0000000..25611fe Binary files /dev/null and b/py12306/helpers/ocr/model.v2.0.h5 differ diff --git a/py12306/helpers/ocr/texts.txt b/py12306/helpers/ocr/texts.txt new file mode 100644 index 0000000..b52c72f --- /dev/null +++ b/py12306/helpers/ocr/texts.txt @@ -0,0 +1,80 @@ +打字机 +调色板 +跑步机 +毛线 +老虎 +安全帽 +沙包 +盘子 +本子 +药片 +双面胶 +龙舟 +红酒 +拖把 +卷尺 +海苔 +红豆 +黑板 +热水袋 +烛台 +钟表 +路灯 +沙拉 +海报 +公交卡 +樱桃 +创可贴 +牌坊 +苍蝇拍 +高压锅 +电线 +网球拍 +海鸥 +风铃 +订书机 +冰箱 +话梅 +排风机 +锅铲 +绿豆 +航母 +电子秤 +红枣 +金字塔 +鞭炮 +菠萝 +开瓶器 +电饭煲 +仪表盘 +棉棒 +篮球 +狮子 +蚂蚁 +蜡烛 +茶盅 +印章 +茶几 +啤酒 +档案袋 +挂钟 +刺绣 +铃铛 +护腕 +手掌印 +锦旗 +文具盒 +辣椒酱 +耳塞 +中国结 +蜥蜴 +剪纸 +漏斗 +锣 +蒸笼 +珊瑚 +雨靴 +薯条 +蜜蜂 +日历 +口哨 diff --git a/py12306/log/common_log.py b/py12306/log/common_log.py index d0992d0..f8c2be8 100644 --- a/py12306/log/common_log.py +++ b/py12306/log/common_log.py @@ -139,3 +139,10 @@ class CommonLog(BaseLog): self.add_quick_log('打码失败: 错误原因 {reason}'.format(reason=reason)) self.flush() return self + + @classmethod + def print_auth_code_info(cls, reason): + self = cls() + self.add_quick_log('打码信息: {reason}'.format(reason=reason)) + self.flush() + return self diff --git a/requirements.txt b/requirements.txt index 2f2ad1c..c6b8537 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,4 +30,11 @@ w3lib==1.19.0 websockets==7.0 Werkzeug==0.14.1 DingtalkChatbot==1.3.0 -lightpush==0.1.3 \ No newline at end of file +lightpush==0.1.3 +sklearn==0.0 +opencv-python==4.1.0.25 +keras==2.2.4 +tensorflow==1.14.0 +matplotlib==3.1.0 +numpy==1.16.4 +scipy==1.3.0 \ No newline at end of file