MOKA
/
scbc_repos


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
							# -*- coding:utf-8 -*-
import os, sys, time

import chardet
import re

'''

将任意编码的str类型字符串，转码为utf-8.但是ascii编码的字符串，不会转码。

'''


def toUTF8Str(str1):
    if str1.__len__() == 0:
        return ""
    detType = chardet.detect(str1)
    encoding = detType['encoding']
    retStr = str1.decode(encoding).encode("utf-8")
    return retStr


'''

将str类型的字符串解码,返回解码后的字符串

'''


def decode(str1):
    if str1.__len__() == 0:
        return ""
    detType = chardet.detect(str1)
    encoding = detType['encoding']
    retStr = str1.decode(encoding)
    return retStr


'''

把拼接的文件路径转化成windows可直接打开的路径，方便调试

'''


def pathToWindowsPath(str_path):
    str_window_path = str_path.replace('/', '\\')
    return str_window_path

'''

    # 描述：比较字符串s1和s2，但不区分字母的大小写

    用于COR文字识别的字符串匹配判断

    # 返回值：相等返回True.

    # 

    # '''
def strcmp(str1, str2, erase = []):
    if type(str1) == type(u''):
        str1 = str(str1).encode('utf-8')
    else:
        try:
            str1 = str(str1).encode('utf-8')
        except Exception:
            pass

    if type(str2) == type(u''):
        str2 = str(str2).encode('utf-8')
    else:
        try:
            str2 = str(str2).encode('utf-8')
        except Exception:
            pass
    
    # 去除空格;
    str1 = str1.replace(' ', '').lower()
    str2 = str2.replace(' ', '').lower()
    # 移除指定字符;
    for char in erase:
        str1 = str1.replace(char, '').lower()
        str2 = str2.replace(char, '').lower()
    
    #长度判断
    if len(str1) != len(str2):
        return False

    # 遍历字符串
    result = True
    # 忽略的相似字符;
    ignore = [{'i','l','1','t','I','T'},{'o','0','O'}]
    cnt = len(str2)
    for i in range(0, cnt):
        if str2[i] == str1[i]:
            continue
        elif str2[i] in ignore[0] and str1[i] in ignore[0]:
            continue
        elif str2[i] in ignore[1] and str1[i] in ignore[1]:
            continue
        else:
            result = False
            break
    #endfor
    return result

'''

根据传入的字符串，找出数字，组成数组输出

:param :mulStr 混合了字符和数字的字符串

:return : 返回数字数组.例如：['11.11', '22']

'''
def getDigitFromString(mulStr):
    # ret = re.findall("\d+\.?\d*", mulStr)
    # ret = re.findall(r'(-?[\d]+)', mulStr)
    ret = re.findall(r'(-?\d+\.?\d*)', mulStr)
    return ret

"将字符串拆分成数组"
def strToList(srcStr, reg):
    arr = srcStr.split(reg)
    retArr = []
    for index in range(arr.__len__()):
        if arr[index].__len__() > 0:
            retArr.append(arr[index])
    return retArr

if __name__ == "__main__":
    # str1 = "中文test11.11test22"
    str1 = "-1sdadad32dadsa2.4afsa-6.8"
    # ret1 = toUTF8Str(str1)
    # print "test1 = ret1:", str1 == ret1, chardet.detect(str1), chardet.detect(ret1)

    # ret = getDigitFromString(str1)
    # print ret

    str2 = u"left, right"
    print str2.split(",")
    arr = strToList(str2, ",")
    print "strToArr:",arr