123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129 |
- # -*- coding:utf-8 -*-
- import os, sys, time
- import chardet
- import re
- '''
- 将任意编码的str类型字符串,转码为utf-8.但是ascii编码的字符串,不会转码。
- '''
- def toUTF8Str(str1):
- if str1.__len__() == 0:
- return ""
- detType = chardet.detect(str1)
- encoding = detType['encoding']
- retStr = str1.decode(encoding).encode("utf-8")
- return retStr
- '''
- 将str类型的字符串解码,返回解码后的字符串
- '''
- def decode(str1):
- if str1.__len__() == 0:
- return ""
- detType = chardet.detect(str1)
- encoding = detType['encoding']
- retStr = str1.decode(encoding)
- return retStr
- '''
- 把拼接的文件路径转化成windows可直接打开的路径,方便调试
- '''
- def pathToWindowsPath(str_path):
- str_window_path = str_path.replace('/', '\\')
- return str_window_path
- '''
- # 描述:比较字符串s1和s2,但不区分字母的大小写
- 用于COR文字识别的字符串匹配判断
- # 返回值:相等返回True.
- #
- # '''
- def strcmp(str1, str2, erase = []):
- if type(str1) == type(u''):
- str1 = str(str1).encode('utf-8')
- else:
- try:
- str1 = str(str1).encode('utf-8')
- except Exception:
- pass
- if type(str2) == type(u''):
- str2 = str(str2).encode('utf-8')
- else:
- try:
- str2 = str(str2).encode('utf-8')
- except Exception:
- pass
-
- # 去除空格;
- str1 = str1.replace(' ', '').lower()
- str2 = str2.replace(' ', '').lower()
- # 移除指定字符;
- for char in erase:
- str1 = str1.replace(char, '').lower()
- str2 = str2.replace(char, '').lower()
-
- #长度判断
- if len(str1) != len(str2):
- return False
- # 遍历字符串
- result = True
- # 忽略的相似字符;
- ignore = [{'i','l','1','t','I','T'},{'o','0','O'}]
- cnt = len(str2)
- for i in range(0, cnt):
- if str2[i] == str1[i]:
- continue
- elif str2[i] in ignore[0] and str1[i] in ignore[0]:
- continue
- elif str2[i] in ignore[1] and str1[i] in ignore[1]:
- continue
- else:
- result = False
- break
- #endfor
- return result
- '''
- 根据传入的字符串,找出数字,组成数组输出
- :param :mulStr 混合了字符和数字的字符串
- :return : 返回数字数组.例如:['11.11', '22']
- '''
- def getDigitFromString(mulStr):
- # ret = re.findall("\d+\.?\d*", mulStr)
- # ret = re.findall(r'(-?[\d]+)', mulStr)
- ret = re.findall(r'(-?\d+\.?\d*)', mulStr)
- return ret
- "将字符串拆分成数组"
- def strToList(srcStr, reg):
- arr = srcStr.split(reg)
- retArr = []
- for index in range(arr.__len__()):
- if arr[index].__len__() > 0:
- retArr.append(arr[index])
- return retArr
- if __name__ == "__main__":
- # str1 = "中文test11.11test22"
- str1 = "-1sdadad32dadsa2.4afsa-6.8"
- # ret1 = toUTF8Str(str1)
- # print "test1 = ret1:", str1 == ret1, chardet.detect(str1), chardet.detect(ret1)
- # ret = getDigitFromString(str1)
- # print ret
- str2 = u"left, right"
- print str2.split(",")
- arr = strToList(str2, ",")
- print "strToArr:",arr
|