# -*- coding:utf-8 -*- import os, sys, time import chardet import re ''' 将任意编码的str类型字符串,转码为utf-8.但是ascii编码的字符串,不会转码。 ''' def toUTF8Str(str1): if str1.__len__() == 0: return "" detType = chardet.detect(str1) encoding = detType['encoding'] retStr = str1.decode(encoding).encode("utf-8") return retStr ''' 将str类型的字符串解码,返回解码后的字符串 ''' def decode(str1): if str1.__len__() == 0: return "" detType = chardet.detect(str1) encoding = detType['encoding'] retStr = str1.decode(encoding) return retStr ''' 把拼接的文件路径转化成windows可直接打开的路径,方便调试 ''' def pathToWindowsPath(str_path): str_window_path = str_path.replace('/', '\\') return str_window_path ''' # 描述:比较字符串s1和s2,但不区分字母的大小写 用于COR文字识别的字符串匹配判断 # 返回值:相等返回True. # # ''' def strcmp(str1, str2, erase = []): if type(str1) == type(u''): str1 = str(str1).encode('utf-8') else: try: str1 = str(str1).encode('utf-8') except Exception: pass if type(str2) == type(u''): str2 = str(str2).encode('utf-8') else: try: str2 = str(str2).encode('utf-8') except Exception: pass # 去除空格; str1 = str1.replace(' ', '').lower() str2 = str2.replace(' ', '').lower() # 移除指定字符; for char in erase: str1 = str1.replace(char, '').lower() str2 = str2.replace(char, '').lower() #长度判断 if len(str1) != len(str2): return False # 遍历字符串 result = True # 忽略的相似字符; ignore = [{'i','l','1','t','I','T'},{'o','0','O'}] cnt = len(str2) for i in range(0, cnt): if str2[i] == str1[i]: continue elif str2[i] in ignore[0] and str1[i] in ignore[0]: continue elif str2[i] in ignore[1] and str1[i] in ignore[1]: continue else: result = False break #endfor return result ''' 根据传入的字符串,找出数字,组成数组输出 :param :mulStr 混合了字符和数字的字符串 :return : 返回数字数组.例如:['11.11', '22'] ''' def getDigitFromString(mulStr): # ret = re.findall("\d+\.?\d*", mulStr) # ret = re.findall(r'(-?[\d]+)', mulStr) ret = re.findall(r'(-?\d+\.?\d*)', mulStr) return ret "将字符串拆分成数组" def strToList(srcStr, reg): arr = srcStr.split(reg) retArr = [] for index in range(arr.__len__()): if arr[index].__len__() > 0: retArr.append(arr[index]) return retArr if __name__ == "__main__": # str1 = "中文test11.11test22" str1 = "-1sdadad32dadsa2.4afsa-6.8" # ret1 = toUTF8Str(str1) # print "test1 = ret1:", str1 == ret1, chardet.detect(str1), chardet.detect(ret1) # ret = getDigitFromString(str1) # print ret str2 = u"left, right" print str2.split(",") arr = strToList(str2, ",") print "strToArr:",arr