| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161 | 
							- # -*- coding:utf-8 -*-
 
- import os, sys, time
 
- from aip import AipOcr
 
- import cchardet as chardet
 
- from ssat_sdk.utils import LoggingUtil
 
- from ssat_sdk.config.baidu_config import BaiduConfig
 
- class OCRBaidu():
 
-     '''
 
-     识别语言类型,默认为CHN_ENG。可选值包括:
 
-     - CHN_ENG#中英文混合;
 
-     - ENG#英文;
 
-     - POR#葡萄牙语;
 
-     - FRE#法语;
 
-     - GER#德语;
 
-     - ITA#意大利语;
 
-     - SPA#西班牙语;
 
-     - RUS#俄语;
 
-     - JAP#日语;
 
-     - KOR#韩语
 
-     '''
 
-     def __init__(self):
 
-         baiduCFG = BaiduConfig()
 
-         APP_ID = baiduCFG.getAppID()
 
-         API_KEY = baiduCFG.getAPIKey()
 
-         SECRET_KEY = baiduCFG.getSecretKey()
 
-         print "APP_ID,API_KEY,SECRET_KEY:",APP_ID,API_KEY,SECRET_KEY
 
-         self.client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
 
-     """ 读取图片 """
 
-     def get_file_content(self, filePath):
 
-         try:
 
-             with open(filePath, 'rb') as fp:
 
-                 img = fp.read()
 
-                 fp.close()
 
-                 return img
 
-         except Exception,e:
 
-             LoggingUtil.printLog("OCR", u"百度OCR读取图片失败,Err:" + unicode(e))
 
-             return None
 
-     '''
 
-     :param language:
 
-         - CHN_ENG#中英文混合;
 
-         - ENG#英文;
 
-         - POR#葡萄牙语;
 
-         - FRE#法语;
 
-         - GER#德语;
 
-         - ITA#意大利语;
 
-         - SPA#西班牙语;
 
-         - RUS#俄语;
 
-         - JAP#日语;
 
-         - KOR#韩语
 
-     '''
 
-     def basicGeneral(self, pic_path, language):
 
-         """ 读取图片 """
 
-         image = self.get_file_content(pic_path)
 
-         if image is None:
 
-             return None
 
-         """ 如果有可选参数 """
 
-         options = {}
 
-         options["language_type"] = language
 
-         """ 带参数调用通用文字识别, 图片参数为本地图片 """
 
-         """
 
-         {u'log_id': 3857306686703806895L,
 
-         u'direction': 0,
 
-         u'words_result_num': 1,
 
-         u'words_result':
 
-             [
 
-                 {
 
-                     u'words': u'\u58f0\u97f3',
 
-                     u'probability': {u'variance': 0.0,
 
-                                     u'average': 0.99964, u'min': 0.99952}
 
-                 }
 
-             ],
 
-         u'language': -1}
 
-         """
 
-         try:
 
-             print "普通精度"
 
-             result = self.client.basicGeneral(image, options)
 
-         except Exception,e:
 
-             LoggingUtil.printLog("OCR",u"百度普通OCR连接失败,Err:"+ unicode(e))
 
-             return None
 
-         try:
 
-             str_words = self.genOCRStrList(result["words_result"])
 
-         except Exception,e:
 
-             LoggingUtil.printLog("OCR",u"百度普通OCR识别失败,Err:"+ unicode(e))
 
-             return []
 
-         return str_words
 
-     def basicAccurate(self, pic_path, language="CHN_ENG"):
 
-         baiduCFG = BaiduConfig()
 
-         retCount = baiduCFG.getRetCount()
 
-         # if retCount <= 0:
 
-         #     return self.basicGeneral(pic_path, language)
 
-         """ 读取图片 """
 
-         image = self.get_file_content(pic_path)
 
-         if image is None:
 
-             return None
 
-         """ 如果有可选参数 """
 
-         options = {}
 
-         # options["detect_direction"] = "true"
 
-         # options["probability"] = "true"
 
-         """ 带参数调用通用文字识别, 图片参数为本地图片 """
 
-         """
 
-         {u'log_id': 3857306686703806895L,
 
-         u'direction': 0,
 
-         u'words_result_num': 1,
 
-         u'words_result':
 
-             [
 
-                 {
 
-                     u'words': u'\u58f0\u97f3',
 
-                     u'probability': {u'variance': 0.0,
 
-                                     u'average': 0.99964, u'min': 0.99952}
 
-                 }
 
-             ],
 
-         u'language': -1}
 
-         """
 
-         try:
 
-             print "高精度"
 
-             result = self.client.basicAccurate(image, options)
 
-         except Exception,e:
 
-             LoggingUtil.printLog("OCR",u"百度高精度OCR连接失败,Err:" + unicode(e))
 
-             return None
 
-         try:
 
-             str_words = self.genOCRStrList(result["words_result"])
 
-         except Exception,e:
 
-             LoggingUtil.printLog("OCR",u"百度高精度OCR识别失败,Err:" + unicode(e) + unicode(result))
 
-             return []
 
-         finally:
 
-             baiduCFG = BaiduConfig()
 
-             baiduCFG.subRetCount(1)
 
-         return str_words
 
-     def genOCRStrList(self, words_result):
 
-         strList = []
 
-         for word in words_result:
 
-             strList.append(word["words"].encode("utf-8"))
 
-         return strList
 
- if __name__ == "__main__":
 
-     pic_path = r"D:\ocr_err\mi_1.png"
 
-     # pic_path = r"D:\ocr_err\mi_3.png"
 
-     # pic_path = r"D:\ocr_err\mi_5.png"
 
-     ocr = OCRBaidu()
 
-     # for word in ocr.basicGeneral(pic_path,"CHN_ENG"):
 
-     #     print "word:", word, chardet.detect(word)
 
-     for word in ocr.basicAccurate(pic_path):
 
-         print "word:", word, chardet.detect(word)
 
 
  |