| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 | # -*- coding:utf-8 -*-import os, sys, timefrom aip import AipOcrimport cchardet as chardetfrom ssat_sdk.utils import LoggingUtilfrom ssat_sdk.config.baidu_config import BaiduConfigclass OCRBaidu():    '''    识别语言类型,默认为CHN_ENG。可选值包括:    - CHN_ENG#中英文混合;    - ENG#英文;    - POR#葡萄牙语;    - FRE#法语;    - GER#德语;    - ITA#意大利语;    - SPA#西班牙语;    - RUS#俄语;    - JAP#日语;    - KOR#韩语    '''    def __init__(self):        baiduCFG = BaiduConfig()        APP_ID = baiduCFG.getAppID()        API_KEY = baiduCFG.getAPIKey()        SECRET_KEY = baiduCFG.getSecretKey()        print "APP_ID,API_KEY,SECRET_KEY:",APP_ID,API_KEY,SECRET_KEY        self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY)    """ 读取图片 """    def get_file_content(self, filePath):        try:            with open(filePath, 'rb') as fp:                img = fp.read()                fp.close()                return img        except Exception,e:            LoggingUtil.printLog("OCR", u"百度OCR读取图片失败,Err:" + unicode(e))            return None    '''    :param language:        - CHN_ENG#中英文混合;        - ENG#英文;        - POR#葡萄牙语;        - FRE#法语;        - GER#德语;        - ITA#意大利语;        - SPA#西班牙语;        - RUS#俄语;        - JAP#日语;        - KOR#韩语    '''    def basicGeneral(self, pic_path, language):        """ 读取图片 """        image = self.get_file_content(pic_path)        if image is None:            return None        """ 如果有可选参数 """        options = {}        options["language_type"] = language        """ 带参数调用通用文字识别, 图片参数为本地图片 """        """        {u'log_id': 3857306686703806895L,        u'direction': 0,        u'words_result_num': 1,        u'words_result':            [                {                    u'words': u'\u58f0\u97f3',                    u'probability': {u'variance': 0.0,                                    u'average': 0.99964, u'min': 0.99952}                }            ],        u'language': -1}        """        try:            print "普通精度"            result = self.client.basicGeneral(image, options)        except Exception,e:            LoggingUtil.printLog("OCR",u"百度普通OCR连接失败,Err:"+ unicode(e))            return None        try:            str_words = self.genOCRStrList(result["words_result"])        except Exception,e:            LoggingUtil.printLog("OCR",u"百度普通OCR识别失败,Err:"+ unicode(e))            return []        return str_words    def basicAccurate(self, pic_path, language="CHN_ENG"):        baiduCFG = BaiduConfig()        retCount = baiduCFG.getRetCount()        # if retCount <= 0:        #     return self.basicGeneral(pic_path, language)        """ 读取图片 """        image = self.get_file_content(pic_path)        if image is None:            return None        """ 如果有可选参数 """        options = {}        # options["detect_direction"] = "true"        # options["probability"] = "true"        """ 带参数调用通用文字识别, 图片参数为本地图片 """        """        {u'log_id': 3857306686703806895L,        u'direction': 0,        u'words_result_num': 1,        u'words_result':            [                {                    u'words': u'\u58f0\u97f3',                    u'probability': {u'variance': 0.0,                                    u'average': 0.99964, u'min': 0.99952}                }            ],        u'language': -1}        """        try:            print "高精度"            result = self.client.basicAccurate(image, options)        except Exception,e:            LoggingUtil.printLog("OCR",u"百度高精度OCR连接失败,Err:" + unicode(e))            return None        try:            str_words = self.genOCRStrList(result["words_result"])        except Exception,e:            LoggingUtil.printLog("OCR",u"百度高精度OCR识别失败,Err:" + unicode(e) + unicode(result))            return []        finally:            baiduCFG = BaiduConfig()            baiduCFG.subRetCount(1)        return str_words    def genOCRStrList(self, words_result):        strList = []        for word in words_result:            strList.append(word["words"].encode("utf-8"))        return strListif __name__ == "__main__":    pic_path = r"D:\ocr_err\mi_1.png"    # pic_path = r"D:\ocr_err\mi_3.png"    # pic_path = r"D:\ocr_err\mi_5.png"    ocr = OCRBaidu()    # for word in ocr.basicGeneral(pic_path,"CHN_ENG"):    #     print "word:", word, chardet.detect(word)    for word in ocr.basicAccurate(pic_path):        print "word:", word, chardet.detect(word)
 |