优化验证码识别

2019-09-03 13:43:48 +08:00
parent 0bd6417ff4
commit 8f984cd751
4 changed files with 20 additions and 56 deletions
--- a/py12306/helpers/OCR.py
+++ b/py12306/helpers/OCR.py
@@ -53,16 +53,12 @@ class OCR:
            positions.append(int(y))
        return positions

-    @staticmethod
-    def get_image_by_free_site(img):
+    def get_image_by_free_site(self, img):
        from py12306.helpers.ocr.ml_predict import get_coordinate
        import base64

-        # 转为图片文件
-        with open('authcode.jpg', 'wb') as image:
-            image.write(base64.b64decode(img))
-
-        result = get_coordinate('authcode.jpg')
+        result = get_coordinate(base64.b64decode(img))
+        result = self.get_image_position_by_offset(result)
        # CommonLog.print_auth_code_info("验证码识别的结果为：" + result)

        if result:
--- a/py12306/helpers/auth_code.py
+++ b/py12306/helpers/auth_code.py
@@ -32,10 +32,7 @@ class AuthCode:
        if not position:  # 打码失败
            return self.retry_get_auth_code()

-        if Config().AUTO_CODE_PLATFORM == 'free':
-            answer = position
-        else:
-            answer = ','.join(map(str, position))
+        answer = ','.join(map(str, position))

        if not self.check_code(answer):
            return self.retry_get_auth_code()
--- a/py12306/helpers/ocr/ml_predict.py
+++ b/py12306/helpers/ocr/ml_predict.py
@@ -1,8 +1,12 @@
 # coding: utf-8
 import cv2, os
+import tensorflow as tf
 import numpy as np
 from keras import models
 from py12306.log.common_log import CommonLog
+from py12306.config import Config
+
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


 def get_text(img, offset=0):
@@ -14,22 +18,23 @@ def get_text(img, offset=0):
    return text


-def get_coordinate(fn):
+def get_coordinate(img_str):
    # 储存最终坐标结果
    result = ''
+    orc_dir = '%spy12306/helpers/ocr/' % Config.PROJECT_DIR

    try:
        # 读取并预处理验证码
-        img = cv2.imread(fn)
+        img = cv2.imdecode(np.fromstring(img_str, np.uint8), cv2.IMREAD_COLOR)
        text = get_text(img)
        imgs = np.array(list(_get_imgs(img)))
        imgs = preprocess_input(imgs)

        # 识别文字
-        model = models.load_model('py12306/helpers/ocr/model.v2.0.h5')
+        model = models.load_model('%smodel.v2.0.h5' % orc_dir, compile=False)
        label = model.predict(text)
        label = label.argmax()
-        fp = open('py12306/helpers/ocr/texts.txt', encoding='utf-8')
+        fp = open('%stexts.txt' % orc_dir, encoding='utf-8')
        texts = [text.rstrip('\n') for text in fp]
        text = texts[label]

@@ -54,50 +59,18 @@ def get_coordinate(fn):
            titles.append(text2)

        # 加载图片分类器
-        model = models.load_model('py12306/helpers/ocr/12306.image.model.h5')
+        model = models.load_model('%s12306.image.model.h5' % orc_dir, compile=False)
        labels = model.predict(imgs)
        labels = labels.argmax(axis=1)

        for pos, label in enumerate(labels):
-            # print(pos // 4, pos % 4, texts[label])
-            if len(titles) == 1:
-                if texts[label] == titles[0]:
-                    position.append(pos)
-            elif len(titles) == 2:
-                if texts[label] == titles[0]:
-                    position.append(pos)
-                elif texts[label] == titles[1]:
-                    position.append(pos)
-            elif len(titles) == 3:
-                if texts[label] == titles[0]:
-                    position.append(pos)
-                elif texts[label] == titles[1]:
-                    position.append(pos)
-                elif texts[label] == titles[2]:
-                    position.append(pos)
+            if texts[label] in titles:
+                position.append(pos + 1)

        # 没有识别到结果
        if len(position) == 0:
            return result
-
-        for i in position:
-            if i == 0:
-                result += '31,45,'
-            elif i == 1:
-                result += '100,45,'
-            elif i == 2:
-                result += '170,45,'
-            elif i == 3:
-                result += '240,45,'
-            elif i == 4:
-                result += '30,115,'
-            elif i == 5:
-                result += '100,115,'
-            elif i == 6:
-                result += '170,115,'
-            elif i == 7:
-                result += '240,115,'
-        result = result[:-1]
+        result = position
    except:
        CommonLog.print_auto_code_fail(CommonLog.MESSAGE_GET_RESPONSE_FROM_FREE_AUTO_CODE)
    return result
@@ -120,4 +93,5 @@ def _get_imgs(img):


 if __name__ == '__main__':
-    print(get_coordinate('a.jpg'))
+    with open('a.jpg', 'r') as f:
+        print(get_coordinate(f.buffer.read()))
--- a/requirements.txt
+++ b/requirements.txt
@@ -31,10 +31,7 @@ websockets==7.0
 Werkzeug==0.14.1
 DingtalkChatbot==1.3.0
 lightpush==0.1.3
-sklearn==0.0
 opencv-python==4.1.0.25
 keras==2.2.4
 tensorflow==1.14.0
-matplotlib==3.1.0
-numpy==1.16.4
-scipy==1.3.0
+numpy==1.16.4