diff --git a/py12306/helpers/OCR.py b/py12306/helpers/OCR.py index 8b29878..0345d0b 100644 --- a/py12306/helpers/OCR.py +++ b/py12306/helpers/OCR.py @@ -2,6 +2,7 @@ import math import random from py12306.config import Config +from py12306.helpers.api import API_FREE_CODE_QCR_API from py12306.helpers.request import Request from py12306.log.common_log import CommonLog from py12306.vender.ruokuai.main import RKClient @@ -54,16 +55,16 @@ class OCR: return positions def get_image_by_free_site(self, img): - from py12306.helpers.ocr.ml_predict import get_coordinate - import base64 - - result = get_coordinate(base64.b64decode(img)) - result = self.get_image_position_by_offset(result) - # CommonLog.print_auth_code_info("验证码识别的结果为:" + result) - - if result: - return result + data = { + 'img': img + } + response = self.session.post(API_FREE_CODE_QCR_API, data=data) + result = response.json() + if result.get('msg') == 'success': + pos = result.get('result') + return self.get_image_position_by_offset(pos) + CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE) return None diff --git a/py12306/helpers/api.py b/py12306/helpers/api.py index 4256d58..c489f88 100644 --- a/py12306/helpers/api.py +++ b/py12306/helpers/api.py @@ -42,7 +42,7 @@ API_QUERY_ORDER_WAIT_TIME = BASE_URL_OF_12306 + '/otn/confirmPassenger/queryOrde API_QUERY_INIT_PAGE = BASE_URL_OF_12306 + '/otn/leftTicket/init' # API_GET_BROWSER_DEVICE_ID = BASE_URL_OF_12306 + '/otn/HttpZF/logdevice' API_GET_BROWSER_DEVICE_ID = 'https://12306-rail-id.pjialin.com/' - +API_FREE_CODE_QCR_API = 'https://12306-ocr.pjialin.com/check/' API_NOTIFICATION_BY_VOICE_CODE = 'http://ali-voice.showapi.com/sendVoice?' API_NOTIFICATION_BY_VOICE_CODE_DINGXIN = 'http://yuyin2.market.alicloudapi.com/dx/voice_notice' diff --git a/py12306/helpers/ocr/ml_predict.py b/py12306/helpers/ocr/ml_predict.py deleted file mode 100644 index 36c3ec4..0000000 --- a/py12306/helpers/ocr/ml_predict.py +++ /dev/null @@ -1,97 +0,0 @@ -# coding: utf-8 -import cv2, os -import tensorflow as tf -import numpy as np -from keras import models -from py12306.log.common_log import CommonLog -from py12306.config import Config - -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - - -def get_text(img, offset=0): - text = img[3:22, 120 + offset:177 + offset] - text = cv2.cvtColor(text, cv2.COLOR_BGR2GRAY) - text = text / 255.0 - h, w = text.shape - text.shape = (1, h, w, 1) - return text - - -def get_coordinate(img_str): - # 储存最终坐标结果 - result = '' - orc_dir = '%spy12306/helpers/ocr/' % Config.PROJECT_DIR - - try: - # 读取并预处理验证码 - img = cv2.imdecode(np.fromstring(img_str, np.uint8), cv2.IMREAD_COLOR) - text = get_text(img) - imgs = np.array(list(_get_imgs(img))) - imgs = preprocess_input(imgs) - - # 识别文字 - model = models.load_model('%smodel.v2.0.h5' % orc_dir, compile=False) - label = model.predict(text) - label = label.argmax() - fp = open('%stexts.txt' % orc_dir, encoding='utf-8') - texts = [text.rstrip('\n') for text in fp] - text = texts[label] - - # list放文字 - titles = [text] - - position = [] - - # 获取下一个词 - # 根据第一个词的长度来定位第二个词的位置 - if len(text) == 1: - offset = 27 - elif len(text) == 2: - offset = 47 - else: - offset = 60 - text2 = get_text(img, offset=offset) - if text2.mean() < 0.95: - label = model.predict(text2) - label = label.argmax() - text2 = texts[label] - titles.append(text2) - - # 加载图片分类器 - model = models.load_model('%s12306.image.model.h5' % orc_dir, compile=False) - labels = model.predict(imgs) - labels = labels.argmax(axis=1) - - for pos, label in enumerate(labels): - if texts[label] in titles: - position.append(pos + 1) - - # 没有识别到结果 - if len(position) == 0: - return result - result = position - except: - CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE) - return result - - -def preprocess_input(x): - x = x.astype('float32') - # 我是用cv2来读取的图片,其已经是BGR格式了 - mean = [103.939, 116.779, 123.68] - x -= mean - return x - - -def _get_imgs(img): - interval = 5 - length = 67 - for x in range(40, img.shape[0] - length, interval + length): - for y in range(interval, img.shape[1] - length, interval + length): - yield img[x:x + length, y:y + length] - - -if __name__ == '__main__': - with open('a.jpg', 'r') as f: - print(get_coordinate(f.buffer.read())) diff --git a/py12306/helpers/ocr/model.v2.0.h5 b/py12306/helpers/ocr/model.v2.0.h5 deleted file mode 100644 index 25611fe..0000000 Binary files a/py12306/helpers/ocr/model.v2.0.h5 and /dev/null differ diff --git a/py12306/helpers/ocr/texts.txt b/py12306/helpers/ocr/texts.txt deleted file mode 100644 index b52c72f..0000000 --- a/py12306/helpers/ocr/texts.txt +++ /dev/null @@ -1,80 +0,0 @@ -打字机 -调色板 -跑步机 -毛线 -老虎 -安全帽 -沙包 -盘子 -本子 -药片 -双面胶 -龙舟 -红酒 -拖把 -卷尺 -海苔 -红豆 -黑板 -热水袋 -烛台 -钟表 -路灯 -沙拉 -海报 -公交卡 -樱桃 -创可贴 -牌坊 -苍蝇拍 -高压锅 -电线 -网球拍 -海鸥 -风铃 -订书机 -冰箱 -话梅 -排风机 -锅铲 -绿豆 -航母 -电子秤 -红枣 -金字塔 -鞭炮 -菠萝 -开瓶器 -电饭煲 -仪表盘 -棉棒 -篮球 -狮子 -蚂蚁 -蜡烛 -茶盅 -印章 -茶几 -啤酒 -档案袋 -挂钟 -刺绣 -铃铛 -护腕 -手掌印 -锦旗 -文具盒 -辣椒酱 -耳塞 -中国结 -蜥蜴 -剪纸 -漏斗 -锣 -蒸笼 -珊瑚 -雨靴 -薯条 -蜜蜂 -日历 -口哨