MOKA
/
scbc_repos


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
							# -*- coding:utf-8 -*-
import os, sys, time
from aip import AipOcr
import cchardet as chardet
from ssat_sdk.utils import LoggingUtil
from ssat_sdk.config.baidu_config import BaiduConfig


class OCRBaidu():
    '''
    识别语言类型，默认为CHN_ENG。可选值包括：
    - CHN_ENG#中英文混合；
    - ENG#英文；
    - POR#葡萄牙语；
    - FRE#法语；
    - GER#德语；
    - ITA#意大利语；
    - SPA#西班牙语；
    - RUS#俄语；
    - JAP#日语；
    - KOR#韩语
    '''

    def __init__(self):
        baiduCFG = BaiduConfig()
        APP_ID = baiduCFG.getAppID()
        API_KEY = baiduCFG.getAPIKey()
        SECRET_KEY = baiduCFG.getSecretKey()
        print "APP_ID,API_KEY,SECRET_KEY:",APP_ID,API_KEY,SECRET_KEY
        self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY)

    """ 读取图片 """

    def get_file_content(self, filePath):
        try:
            with open(filePath, 'rb') as fp:
                img = fp.read()
                fp.close()
                return img
        except Exception,e:
            LoggingUtil.printLog("OCR", u"百度OCR读取图片失败,Err:" + unicode(e))
            return None

    '''
    :param language:
        - CHN_ENG#中英文混合；
        - ENG#英文；
        - POR#葡萄牙语；
        - FRE#法语；
        - GER#德语；
        - ITA#意大利语；
        - SPA#西班牙语；
        - RUS#俄语；
        - JAP#日语；
        - KOR#韩语
    '''
    def basicGeneral(self, pic_path, language):
        """ 读取图片 """
        image = self.get_file_content(pic_path)
        if image is None:
            return None

        """ 如果有可选参数 """
        options = {}
        options["language_type"] = language

        """ 带参数调用通用文字识别, 图片参数为本地图片 """
        """
        {u'log_id': 3857306686703806895L,
        u'direction': 0,
        u'words_result_num': 1,
        u'words_result':
            [
                {
                    u'words': u'\u58f0\u97f3',
                    u'probability': {u'variance': 0.0,
                                    u'average': 0.99964, u'min': 0.99952}
                }
            ],
        u'language': -1}
        """
        try:
            print "普通精度"
            result = self.client.basicGeneral(image, options)
        except Exception,e:
            LoggingUtil.printLog("OCR",u"百度普通OCR连接失败,Err:"+ unicode(e))
            return None

        try:
            str_words = self.genOCRStrList(result["words_result"])
        except Exception,e:
            LoggingUtil.printLog("OCR",u"百度普通OCR识别失败,Err:"+ unicode(e))
            return []

        return str_words

    def basicAccurate(self, pic_path, language="CHN_ENG"):
        baiduCFG = BaiduConfig()
        retCount = baiduCFG.getRetCount()
        # if retCount <= 0:
        #     return self.basicGeneral(pic_path, language)

        """ 读取图片 """
        image = self.get_file_content(pic_path)
        if image is None:
            return None
        """ 如果有可选参数 """
        options = {}
        # options["detect_direction"] = "true"
        # options["probability"] = "true"

        """ 带参数调用通用文字识别, 图片参数为本地图片 """
        """
        {u'log_id': 3857306686703806895L,
        u'direction': 0,
        u'words_result_num': 1,
        u'words_result':
            [
                {
                    u'words': u'\u58f0\u97f3',
                    u'probability': {u'variance': 0.0,
                                    u'average': 0.99964, u'min': 0.99952}
                }
            ],
        u'language': -1}
        """
        try:
            print "高精度"
            result = self.client.basicAccurate(image, options)
        except Exception,e:
            LoggingUtil.printLog("OCR",u"百度高精度OCR连接失败,Err:" + unicode(e))
            return None

        try:
            str_words = self.genOCRStrList(result["words_result"])
        except Exception,e:
            LoggingUtil.printLog("OCR",u"百度高精度OCR识别失败,Err:" + unicode(e) + unicode(result))
            return []
        finally:
            baiduCFG = BaiduConfig()
            baiduCFG.subRetCount(1)

        return str_words

    def genOCRStrList(self, words_result):
        strList = []
        for word in words_result:
            strList.append(word["words"].encode("utf-8"))

        return strList

if __name__ == "__main__":
    pic_path = r"D:\ocr_err\mi_1.png"
    # pic_path = r"D:\ocr_err\mi_3.png"
    # pic_path = r"D:\ocr_err\mi_5.png"
    ocr = OCRBaidu()
    # for word in ocr.basicGeneral(pic_path,"CHN_ENG"):
    #     print "word:", word, chardet.detect(word)

    for word in ocr.basicAccurate(pic_path):
        print "word:", word, chardet.detect(word)