验证码识别修改为免费打码

2019-09-12 15:34:50 +08:00
parent cc67987bb3
commit 3a31349662
5 changed files with 11 additions and 187 deletions
--- a/py12306/helpers/OCR.py
+++ b/py12306/helpers/OCR.py
@@ -2,6 +2,7 @@ import math
 import random

 from py12306.config import Config
+from py12306.helpers.api import API_FREE_CODE_QCR_API
 from py12306.helpers.request import Request
 from py12306.log.common_log import CommonLog
 from py12306.vender.ruokuai.main import RKClient
@@ -54,16 +55,16 @@ class OCR:
        return positions

    def get_image_by_free_site(self, img):
-        from py12306.helpers.ocr.ml_predict import get_coordinate
-        import base64
-
-        result = get_coordinate(base64.b64decode(img))
-        result = self.get_image_position_by_offset(result)
-        # CommonLog.print_auth_code_info("验证码识别的结果为：" + result)
-
-        if result:
-            return result
+        data = {
+            'img': img
+        }
+        response = self.session.post(API_FREE_CODE_QCR_API, data=data)
+        result = response.json()
+        if result.get('msg') == 'success':
+            pos = result.get('result')
+            return self.get_image_position_by_offset(pos)

+        CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE)
        return None


--- a/py12306/helpers/api.py
+++ b/py12306/helpers/api.py
@@ -42,7 +42,7 @@ API_QUERY_ORDER_WAIT_TIME = BASE_URL_OF_12306 + '/otn/confirmPassenger/queryOrde
 API_QUERY_INIT_PAGE = BASE_URL_OF_12306 + '/otn/leftTicket/init'
 # API_GET_BROWSER_DEVICE_ID = BASE_URL_OF_12306 + '/otn/HttpZF/logdevice'
 API_GET_BROWSER_DEVICE_ID = 'https://12306-rail-id.pjialin.com/'
-
+API_FREE_CODE_QCR_API = 'https://12306-ocr.pjialin.com/check/'

 API_NOTIFICATION_BY_VOICE_CODE = 'http://ali-voice.showapi.com/sendVoice?'
 API_NOTIFICATION_BY_VOICE_CODE_DINGXIN = 'http://yuyin2.market.alicloudapi.com/dx/voice_notice'
--- a/py12306/helpers/ocr/ml_predict.py
+++ b/py12306/helpers/ocr/ml_predict.py
@@ -1,97 +0,0 @@
-# coding: utf-8
-import cv2, os
-import tensorflow as tf
-import numpy as np
-from keras import models
-from py12306.log.common_log import CommonLog
-from py12306.config import Config
-
-tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
-
-
-def get_text(img, offset=0):
-    text = img[3:22, 120 + offset:177 + offset]
-    text = cv2.cvtColor(text, cv2.COLOR_BGR2GRAY)
-    text = text / 255.0
-    h, w = text.shape
-    text.shape = (1, h, w, 1)
-    return text
-
-
-def get_coordinate(img_str):
-    # 储存最终坐标结果
-    result = ''
-    orc_dir = '%spy12306/helpers/ocr/' % Config.PROJECT_DIR
-
-    try:
-        # 读取并预处理验证码
-        img = cv2.imdecode(np.fromstring(img_str, np.uint8), cv2.IMREAD_COLOR)
-        text = get_text(img)
-        imgs = np.array(list(_get_imgs(img)))
-        imgs = preprocess_input(imgs)
-
-        # 识别文字
-        model = models.load_model('%smodel.v2.0.h5' % orc_dir, compile=False)
-        label = model.predict(text)
-        label = label.argmax()
-        fp = open('%stexts.txt' % orc_dir, encoding='utf-8')
-        texts = [text.rstrip('\n') for text in fp]
-        text = texts[label]
-
-        # list放文字
-        titles = [text]
-
-        position = []
-
-        # 获取下一个词
-        # 根据第一个词的长度来定位第二个词的位置
-        if len(text) == 1:
-            offset = 27
-        elif len(text) == 2:
-            offset = 47
-        else:
-            offset = 60
-        text2 = get_text(img, offset=offset)
-        if text2.mean() < 0.95:
-            label = model.predict(text2)
-            label = label.argmax()
-            text2 = texts[label]
-            titles.append(text2)
-
-        # 加载图片分类器
-        model = models.load_model('%s12306.image.model.h5' % orc_dir, compile=False)
-        labels = model.predict(imgs)
-        labels = labels.argmax(axis=1)
-
-        for pos, label in enumerate(labels):
-            if texts[label] in titles:
-                position.append(pos + 1)
-
-        # 没有识别到结果
-        if len(position) == 0:
-            return result
-        result = position
-    except:
-        CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE)
-    return result
-
-
-def preprocess_input(x):
-    x = x.astype('float32')
-    # 我是用cv2来读取的图片，其已经是BGR格式了
-    mean = [103.939, 116.779, 123.68]
-    x -= mean
-    return x
-
-
-def _get_imgs(img):
-    interval = 5
-    length = 67
-    for x in range(40, img.shape[0] - length, interval + length):
-        for y in range(interval, img.shape[1] - length, interval + length):
-            yield img[x:x + length, y:y + length]
-
-
-if __name__ == '__main__':
-    with open('a.jpg', 'r') as f:
-        print(get_coordinate(f.buffer.read()))
--- a/py12306/helpers/ocr/model.v2.0.h5
+++ b/py12306/helpers/ocr/model.v2.0.h5
--- a/py12306/helpers/ocr/texts.txt
+++ b/py12306/helpers/ocr/texts.txt
@@ -1,80 +0,0 @@
-打字机
-调色板
-跑步机
-毛线
-老虎
-安全帽
-沙包
-盘子
-本子
-药片
-双面胶
-龙舟
-红酒
-拖把
-卷尺
-海苔
-红豆
-黑板
-热水袋
-烛台
-钟表
-路灯
-沙拉
-海报
-公交卡
-樱桃
-创可贴
-牌坊
-苍蝇拍
-高压锅
-电线
-网球拍
-海鸥
-风铃
-订书机
-冰箱
-话梅
-排风机
-锅铲
-绿豆
-航母
-电子秤
-红枣
-金字塔
-鞭炮
-菠萝
-开瓶器
-电饭煲
-仪表盘
-棉棒
-篮球
-狮子
-蚂蚁
-蜡烛
-茶盅
-印章
-茶几
-啤酒
-档案袋
-挂钟
-刺绣
-铃铛
-护腕
-手掌印
-锦旗
-文具盒
-辣椒酱
-耳塞
-中国结
-蜥蜴
-剪纸
-漏斗
-锣
-蒸笼
-珊瑚
-雨靴
-薯条
-蜜蜂
-日历
-口哨