#include "StdAfx.h" #include "CharEncoding.h" WCHAR* CharEncoding::ASCII2UNICODE(IN LPCCH lpASCIIStr) { if ( lpASCIIStr == NULL ) return NULL; // ��ȡ���ַ��ֽ���; int cchWideChar = MultiByteToWideChar(CP_ACP, 0, lpASCIIStr, -1, NULL, 0); if ( cchWideChar == 0) return NULL; // ת���ɿ��ַ���; WCHAR *pWideChar = new WCHAR[cchWideChar + 1]; memset(pWideChar, 0 , sizeof(WCHAR)*(cchWideChar + 1)); int nWriteNum = MultiByteToWideChar(CP_ACP, 0, lpASCIIStr, -1, pWideChar, cchWideChar ); if ( nWriteNum != cchWideChar) { if (pWideChar) delete []pWideChar; return NULL; } return pWideChar; } BOOL CharEncoding::ASCII2UNICODE(IN LPCCH lpASCIIStr, OUT PWCH pUNICODEStr, IN CONST INT& nUNICODEStrLen) { if ( lpASCIIStr == NULL ) return FALSE; // ��ȡ���ַ��ֽ���; int cchWideChar = MultiByteToWideChar(CP_ACP, 0, lpASCIIStr, -1, NULL, 0); if ( cchWideChar == 0 || cchWideChar >= nUNICODEStrLen) return FALSE; // ת���ɿ��ַ���; memset(pUNICODEStr, 0 , sizeof(WCHAR)*nUNICODEStrLen); int nWriteNum = MultiByteToWideChar(CP_ACP, 0, lpASCIIStr, -1, pUNICODEStr, cchWideChar ); if ( nWriteNum != cchWideChar) return FALSE; return TRUE; } BOOL CharEncoding::ASCII2UNICODE(IN LPCCH lpASCIIStr, OUT wstring &strResult) { if ( lpASCIIStr == NULL ) return FALSE; // ��ȡ���ַ��ֽ���; int cchWideChar = MultiByteToWideChar(CP_ACP, 0, lpASCIIStr, -1, NULL, 0); if ( cchWideChar == 0 ) return FALSE; // ת���ɿ��ַ���; WCHAR *pResult = new WCHAR[cchWideChar]; memset(pResult, 0 , sizeof(WCHAR)*cchWideChar); int nWriteNum = MultiByteToWideChar(CP_ACP, 0, lpASCIIStr, -1, pResult, cchWideChar ); if ( nWriteNum != cchWideChar) return FALSE; strResult = pResult; if ( pResult ) delete[] pResult; return TRUE; } CHAR* CharEncoding::UNICODE2ASCII(IN LPWCH lpUNICODEStr) { if ( lpUNICODEStr == NULL ) return NULL; // ��ȡ���ֽ��ַ��ֽ���; int cbMultiByte = WideCharToMultiByte(CP_OEMCP, 0, lpUNICODEStr, -1, NULL, 0, NULL, NULL); if ( cbMultiByte == 0 ) return NULL; // ת���ɶ��ֽ��ַ�; CHAR *pMultiByteStr = new CHAR[cbMultiByte+1]; memset(pMultiByteStr, 0, cbMultiByte + 1); int nWriteNum = WideCharToMultiByte(CP_OEMCP, 0, lpUNICODEStr, -1, pMultiByteStr, cbMultiByte, NULL, NULL); if (nWriteNum != cbMultiByte) { if (pMultiByteStr) delete []pMultiByteStr; return NULL; } return pMultiByteStr; } BOOL CharEncoding::UNICODE2ASCII(IN LPWCH lpUNICODEStr, OUT LPCH pASCIIStr, IN CONST INT& nASCIIStrLen) { if ( lpUNICODEStr == NULL ) return FALSE; // ��ȡ���ֽ��ַ��ֽ���; int cbMultiByte = WideCharToMultiByte(CP_OEMCP, 0, lpUNICODEStr, -1, NULL, 0, NULL, NULL); if ( cbMultiByte == 0 || cbMultiByte >= nASCIIStrLen ) return FALSE; // ת���ɶ��ֽ��ַ�; memset((void*)pASCIIStr, 0, nASCIIStrLen); int nWriteNum = WideCharToMultiByte(CP_OEMCP, 0, lpUNICODEStr, -1, pASCIIStr, cbMultiByte, NULL, NULL); if (nWriteNum != cbMultiByte) { return FALSE; } return TRUE; } BOOL CharEncoding::UNICODE2ASCII(IN LPWCH lpUNICODEStr, OUT string &strResult) { if ( lpUNICODEStr == NULL ) return FALSE; // ��ȡ���ֽ��ַ��ֽ���; int cbMultiByte = WideCharToMultiByte(CP_OEMCP, 0, lpUNICODEStr, -1, NULL, 0, NULL, NULL); if ( cbMultiByte == 0 ) return FALSE; // ת���ɶ��ֽ��ַ�; CHAR* pResult = new CHAR[cbMultiByte]; memset(pResult, 0, cbMultiByte); int nWriteNum = WideCharToMultiByte(CP_OEMCP, 0, lpUNICODEStr, -1, pResult, cbMultiByte, NULL, NULL); if (nWriteNum != cbMultiByte) return FALSE; strResult = pResult; if ( pResult ) delete[] pResult; return TRUE; } CHAR* CharEncoding::UNICODE2UTF8(IN LPWCH lpUNICODEStr) { if ( lpUNICODEStr == NULL ) return NULL; // ��ȡ���ֽ��ַ��ֽ���; int cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, lpUNICODEStr, -1, NULL, 0, NULL, NULL); if ( cbMultiByte == 0 ) return NULL; // ת���ɶ��ֽ��ַ�; CHAR* pMultiByteStr = new CHAR[cbMultiByte+1]; memset(pMultiByteStr, 0, cbMultiByte + 1); int nWriteNum = WideCharToMultiByte(CP_UTF8, 0, lpUNICODEStr, -1, pMultiByteStr, cbMultiByte, NULL, NULL); if (nWriteNum != cbMultiByte) { if (pMultiByteStr) delete []pMultiByteStr; return NULL; } return pMultiByteStr; } BOOL CharEncoding::UNICODE2UTF8(IN LPWCH lpUNICODEStr, OUT LPCH pUTF8Str, IN CONST INT& nUTF8StrLen) { if ( lpUNICODEStr == NULL ) return FALSE; // ��ȡ���ֽ��ַ��ֽ���; int cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, lpUNICODEStr, -1, NULL, 0, NULL, NULL); if ( cbMultiByte == 0 || cbMultiByte >= nUTF8StrLen ) return FALSE; // ת���ɶ��ֽ��ַ�; memset(pUTF8Str, 0, nUTF8StrLen); int nWriteNum = WideCharToMultiByte(CP_UTF8, 0, lpUNICODEStr, -1, pUTF8Str, cbMultiByte, NULL, NULL); if (nWriteNum != cbMultiByte) { return FALSE; } return TRUE; } BOOL CharEncoding::UNICODE2UTF8(IN LPWCH lpUNICODEStr, OUT string &strResult) { if ( lpUNICODEStr == NULL ) return FALSE; // ��ȡ���ֽ��ַ��ֽ���; int cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, lpUNICODEStr, -1, NULL, 0, NULL, NULL); if ( cbMultiByte == 0 ) return FALSE; // ת���ɶ��ֽ��ַ�; CHAR *pResult = new CHAR[cbMultiByte]; memset(pResult, 0, cbMultiByte); int nWriteNum = WideCharToMultiByte(CP_UTF8, 0, lpUNICODEStr, -1, pResult, cbMultiByte, NULL, NULL); if (nWriteNum != cbMultiByte) return FALSE; strResult = pResult; if ( pResult ) delete[] pResult; return TRUE; } CHAR* CharEncoding::ASCII2UTF8(IN LPCCH lpASCIIStr) { // ��ASCII�ַ���ת��UNICODE�ַ���; WCHAR* pWideChar = ASCII2UNICODE(lpASCIIStr); if ( pWideChar == NULL ) return NULL; // �ٽ�UICODEת��UTF8; CHAR* pUTF8 = UNICODE2UTF8(pWideChar); if ( pWideChar ) delete []pWideChar; return pUTF8; } BOOL CharEncoding::ASCII2UTF8(IN LPCCH lpASCIIStr, OUT LPCH pUTF8Str, IN CONST INT& nUTF8StrLen) { // ��ASCII�ַ���ת��UNICODE�ַ���; WCHAR* pWideChar = ASCII2UNICODE(lpASCIIStr); if ( pWideChar == NULL ) return FALSE; // �ٽ�UICODEת��UTF8; BOOL bResult = UNICODE2UTF8(pWideChar, pUTF8Str, nUTF8StrLen); if ( pWideChar ) delete []pWideChar; return bResult; } BOOL CharEncoding::ASCII2UTF8(IN LPCCH lpASCIIStr, OUT string &strResult) { // ��ASCII�ַ���ת��UNICODE�ַ���; WCHAR* pWideChar = ASCII2UNICODE(lpASCIIStr); if ( pWideChar == NULL ) return FALSE; // �ٽ�UICODEת��UTF8; BOOL bResult = UNICODE2UTF8(pWideChar, strResult); if ( pWideChar ) delete []pWideChar; return bResult; } WCHAR* CharEncoding::UTF82UNICODE(IN LPCCH lpUTF8) { if ( lpUTF8 == NULL ) return NULL; // ��ȡunicode�ַ���; int cchWideChar = MultiByteToWideChar(CP_UTF8, 0, lpUTF8, -1, NULL, 0); if ( cchWideChar == 0) return NULL; // ת���ɿ��ַ���; WCHAR *pWideChar = new WCHAR[cchWideChar + 1]; memset(pWideChar, 0 , sizeof(WCHAR)*(cchWideChar + 1)); int nWriteNum = MultiByteToWideChar(CP_UTF8, 0, lpUTF8, -1, pWideChar, cchWideChar ); if ( nWriteNum != cchWideChar) { if (pWideChar) delete []pWideChar; return NULL; } return pWideChar; } BOOL CharEncoding::UTF82UNICODE(IN LPCCH lpUTF8, OUT PWCH pUNICODEStr, IN CONST INT& nUNICODEStrLen) { if ( lpUTF8 == NULL ) return FALSE; // ��ȡ���ַ��ֽ���; int cchWideChar = MultiByteToWideChar(CP_UTF8, 0, lpUTF8, -1, NULL, 0); if ( cchWideChar == 0 || cchWideChar >= nUNICODEStrLen) return FALSE; // ת���ɿ��ַ���; memset(pUNICODEStr, 0 , sizeof(WCHAR)*nUNICODEStrLen); int nWriteNum = MultiByteToWideChar(CP_UTF8, 0, lpUTF8, -1, pUNICODEStr, cchWideChar ); if ( nWriteNum != cchWideChar) return FALSE; return TRUE; } BOOL CharEncoding::UTF82UNICODE(IN LPCCH lpUTF8, OUT wstring &strResult) { if ( lpUTF8 == NULL ) return FALSE; // ��ȡ���ַ��ֽ���; int cchWideChar = MultiByteToWideChar(CP_UTF8, 0, lpUTF8, -1, NULL, 0); if ( cchWideChar == 0 ) return FALSE; // ת���ɿ��ַ���; WCHAR* pResult = new WCHAR[cchWideChar]; memset(pResult, 0 , sizeof(WCHAR)*cchWideChar); int nWriteNum = MultiByteToWideChar(CP_UTF8, 0, lpUTF8, -1, pResult, cchWideChar ); if ( nWriteNum != cchWideChar) return FALSE; strResult = pResult; if ( pResult ) delete[] pResult; return TRUE; } CHAR* CharEncoding::UTF82ASCII(IN LPCCH lpUTF8) { // ��ASCII�ַ���ת��UNICODE�ַ���; WCHAR* pWideChar = UTF82UNICODE(lpUTF8); if ( pWideChar == NULL ) return NULL; // �ٽ�UICODEת��UTF8; CHAR* pUTF8 = UNICODE2ASCII(pWideChar); if ( pWideChar ) delete []pWideChar; return pUTF8; } BOOL CharEncoding::UTF82ASCII(IN LPCCH lpUTF8, OUT LPCH pASCIIStr, IN CONST INT& nASCIIStrLen) { // ��ASCII�ַ���ת��UNICODE�ַ���; WCHAR* pWideChar = UTF82UNICODE(lpUTF8); if ( pWideChar == NULL ) return FALSE; // �ٽ�UICODEת��UTF8; BOOL bResult = UNICODE2ASCII(pWideChar, pASCIIStr, nASCIIStrLen); if ( pWideChar ) delete []pWideChar; return bResult; } BOOL CharEncoding::UTF82ASCII(IN LPCCH lpUTF8, OUT string &strResult) { // ��ASCII�ַ���ת��UNICODE�ַ���; WCHAR* pWideChar = UTF82UNICODE(lpUTF8); if ( pWideChar == NULL ) return FALSE; // �ٽ�UICODEת��UTF8; BOOL bResult = UNICODE2ASCII(pWideChar, strResult); if ( pWideChar ) delete []pWideChar; return bResult; } //��Ϊ��Urlʹ�� char CharEncoding::CharToInt(char ch) { if (ch >= '0' && ch <= '9')return (char)(ch - '0'); if (ch >= 'a' && ch <= 'f')return (char)(ch - 'a' + 10); if (ch >= 'A' && ch <= 'F')return (char)(ch - 'A' + 10); return -1; } char CharEncoding::StrToBin(IN char (&str)[2]) { char tempWord[2]; char chn; tempWord[0] = CharToInt(str[0]); //make the B to 11 -- 00001011 tempWord[1] = CharToInt(str[1]); //make the 0 to 0 -- 00000000 chn = (tempWord[0] << 4) | tempWord[1]; //to change the BO to 10110000 return chn; } //GB2312 תΪ UTF-8 void CharEncoding::GB2312ToUTF_8(string& pOut, const char *pText, int pLen) { char buf[4]; memset(buf, 0, 4); pOut.clear(); int i = 0; while (i < pLen) { //�����Ӣ��ֱ�Ӹ��ƾͿ���; if (pText[i] >= 0) { char asciistr[2] = { 0 }; asciistr[0] = (pText[i++]); pOut.append(asciistr); } else { WCHAR pbuffer[2] = {0}; MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, pText + i, 2, pbuffer, 1); UNICODE2UTF8(pbuffer, buf, 4); pOut.append(buf); i += 2; } } return; } /************************************************************************/ /* ������[7/26/2016 IT]; /* ���������ַ��������ΪGB2312��ʽ��URL;; /* ������; /* [IN] ��; /* [OUT] ��; /* [IN/OUT] ��; /* ���أ�void; /* ע�⣺; /* ʾ����; /* /* �ģ�; /* ���ڣ�; /* ���ݣ�; /************************************************************************/ string CharEncoding::EnCode_GB2312URL(IN CHAR* pText) { string dd; size_t len = strlen(pText); for (size_t i = 0; i < len; i++) { if (isalnum((BYTE)pText[i])) { char tempbuff[2]; sprintf_s(tempbuff, "%c", pText[i]); dd.append(tempbuff); } else if (isspace((BYTE)pText[i])) { dd.append("+"); } else { char tempbuff[4]; sprintf_s(tempbuff, "%%%X%X", ((BYTE*)pText)[i] >> 4, ((BYTE*)pText)[i] % 16); dd.append(tempbuff); } } return dd; } void CharEncoding::EnCode_GB2312URL(IN CHAR* pText, OUT string& strResult) { size_t len = strlen(pText); for (size_t i = 0; i < len; i++) { if (isalnum((BYTE)pText[i])) { char tempbuff[2]; sprintf_s(tempbuff, "%c", pText[i]); strResult.append(tempbuff); } else if (isspace((BYTE)pText[i])) { strResult.append("+"); } else { char tempbuff[4]; sprintf_s(tempbuff, "%%%X%X", ((BYTE*)pText)[i] >> 4, ((BYTE*)pText)[i] % 16); strResult.append(tempbuff); } } } /************************************************************************/ /* ������[7/26/2016 IT]; /* ������; /* ������; /* [IN] ��; /* [OUT] ��; /* [IN/OUT] ��; /* ���أ�void; /* ע�⣺; /* ʾ����; /* /* �ģ�; /* ���ڣ�; /* ���ݣ�; /************************************************************************/ string CharEncoding::EnCode_UTF8URL(IN const CHAR* pText) { string tt = ""; string dd = ""; ASCII2UTF8(pText,tt); size_t len = tt.length(); for (size_t i = 0; i < len; i++) { if (isalnum((BYTE)tt.at(i))) { char tempbuff[2] = { 0 }; sprintf_s(tempbuff, "%c", (BYTE)tt.at(i)); dd.append(tempbuff); } else if (isspace((BYTE)tt.at(i))) { dd.append("+"); } else { char tempbuff[4]; sprintf_s(tempbuff, "%%%X%X", ((BYTE)tt.at(i)) >> 4, ((BYTE)tt.at(i)) % 16); dd.append(tempbuff); } } return dd; } void CharEncoding::EnCode_UTF8URL(IN const CHAR* pText, OUT string& strResult) { string tt = ""; ASCII2UTF8(pText,tt); size_t len = tt.length(); for (size_t i = 0; i < len; i++) { if (isalnum((BYTE)tt.at(i))) { char tempbuff[2] = { 0 }; sprintf_s(tempbuff, "%c", (BYTE)tt.at(i)); strResult.append(tempbuff); } else if (isspace((BYTE)tt.at(i))) { strResult.append("+"); } else { char tempbuff[4]; sprintf_s(tempbuff, "%%%X%X", ((BYTE)tt.at(i)) >> 4, ((BYTE)tt.at(i)) % 16); strResult.append(tempbuff); } } } string CharEncoding::EnCode_UNICODEURL(IN const CHAR* pText) { return ""; } /************************************************************************/ /* ������[7/26/2016 IT]; /* ������; /* ������; /* [IN] ��; /* [OUT] ��; /* [IN/OUT] ��; /* ���أ�void; /* ע�⣺; /* ʾ����; /* /* �ģ�; /* ���ڣ�; /* ���ݣ�; /************************************************************************/ string CharEncoding::DeCode_URLGB2312(IN const CHAR* pURLText) { string output = ""; char tmp[2]; int i = 0, idx = 0, len = strlen(pURLText); while (i < len){ if (pURLText[i] == '%') { tmp[0] = pURLText[i + 1]; tmp[1] = pURLText[i + 2]; output += StrToBin(tmp); i = i + 3; } else if (pURLText[i] == '+') { output += ' '; i++; } else{ output += pURLText[i]; i++; } } return output; } void CharEncoding::DeCode_URLGB2312(IN const CHAR* pURLText, OUT string& strResult) { char tmp[2]; int i = 0, idx = 0, len = strlen(pURLText); while (i < len){ if (pURLText[i] == '%') { tmp[0] = pURLText[i + 1]; tmp[1] = pURLText[i + 2]; strResult += StrToBin(tmp); i = i + 3; } else if (pURLText[i] == '+') { strResult += ' '; i++; } else{ strResult += pURLText[i]; i++; } } } /************************************************************************/ /* ������[7/26/2016 IT]; /* ������; /* ������; /* [IN] ��; /* [OUT] ��; /* [IN/OUT] ��; /* ���أ�void; /* ע�⣺; /* ʾ����; /* /* �ģ�; /* ���ڣ�; /* ���ݣ�; /************************************************************************/ string CharEncoding::DeCode_URLUTF8(IN const CHAR* pURLText) { string output = ""; string temp = DeCode_URLGB2312(pURLText); UTF82ASCII(temp.c_str(), output); return output; } void CharEncoding::DeCode_URLUTF8(IN const CHAR* pURLText, OUT string& strResult) { string temp = DeCode_URLGB2312(pURLText); UTF82ASCII(temp.c_str(), strResult); } /************************************************************************/ /* ������[7/26/2016 IT]; /* ������; /* ������; /* [IN] ��; /* [OUT] ��; /* [IN/OUT] ��; /* ���أ�void; /* ע�⣺; /* ʾ����; /* /* �ģ�; /* ���ڣ�; /* ���ݣ�; /************************************************************************/ string CharEncoding::DeCode_URLUNICODE(IN const CHAR* pURLText) { string str = pURLText; string strResult = ""; INT nIndex = 0; string strTemp = ""; while ( str.find_first_of("\\u") != string::npos ) { nIndex = str.find_first_of("\\u"); strResult.append(str.substr(0, nIndex)); strTemp = str.substr(nIndex + 2, 4); str = str.substr(nIndex + 2 +4); CHAR szReturn[10] = {0}; union __UNION_VAR_INT{ BYTE ch[2]; int value; }unionVarInt; unionVarInt.ch[0] = (CharToInt(strTemp.at(2)) << 4) | (CharToInt(strTemp.at(3)) & 0x00FF); unionVarInt.ch[1] = (CharToInt(strTemp.at(0)) << 4) | (CharToInt(strTemp.at(1)) & 0x00FF); WCHAR szWide[2] = {0}; szWide[0] = unionVarInt.value; UNICODE2ASCII(szWide,szReturn,10); strResult.append(szReturn); } strResult.append(str); return strResult; } void CharEncoding::DeCode_URLUNICODE(IN const CHAR* pURLText, OUT string& strResult) { string str = pURLText; INT nIndex = 0; string strTemp = ""; while ( str.find_first_of("\\u") != string::npos ) { nIndex = str.find_first_of("\\u"); strResult.append(str.substr(0, nIndex)); strTemp = str.substr(nIndex + 2, 4); str = str.substr(nIndex + 2 +4); CHAR szReturn[10] = {0}; union __UNION_VAR_INT{ BYTE ch[2]; int value; }unionVarInt; unionVarInt.ch[0] = (CharToInt(strTemp.at(2)) << 4) | (CharToInt(strTemp.at(3)) & 0x00FF); unionVarInt.ch[1] = (CharToInt(strTemp.at(0)) << 4) | (CharToInt(strTemp.at(1)) & 0x00FF); WCHAR szWide[2] = {0}; szWide[0] = unionVarInt.value; UNICODE2ASCII(szWide,szReturn,10); strResult.append(szReturn); } strResult.append(str); }