LYFZ
/
lyfz_repos


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645
							#include "StdAfx.h"
#include "StringProcess.h"

namespace StringProcess
{
	/************************************************************************/
	/*
	函数: utf82unicode
	描述: utf8字符串转为unicode字符串;
	参数:
	pszutf8 utf8字符串;
	返回: unicode字符串;
	注意: 返回的指针所指向的内存需要释放;
	*/
	/************************************************************************/
	WCHAR* utf82unicode(IN const char *pszutf8)
	{
		int wnSize = MultiByteToWideChar(CP_UTF8, 0, pszutf8, -1, NULL, 0);
		if (wnSize == ERROR_NO_UNICODE_TRANSLATION)
		{
			//throw std::exception("Invalide UTF-8 sequence");
			return NULL;
		}

		if (wnSize == 0)
		{
			//throw std::exception("Error in conversion");
			return NULL;
		}

		WCHAR *pwResult = new WCHAR[wnSize];
		int nConvertSize = MultiByteToWideChar(CP_UTF8, 0, pszutf8, -1, pwResult, wnSize);
		if (nConvertSize != wnSize)
		{
			//throw std::exception("la falla");
			if (pwResult)
				delete []pwResult;
			return NULL;
		}

		return pwResult;
	}

	/************************************************************************/
	/*
	函数: utf82unicode
	描述: utf8字符串转为unicode字符串;
	参数:
	pszutf8 utf8字符串;
	pszunicode 返回的unicode字符串;
	返回: unicode字符串大小;
	*/
	/************************************************************************/
	int utf82unicode(IN const char *pszutf8, IN OUT WCHAR *pszunicode)
	{
		int wnSize = MultiByteToWideChar(CP_UTF8, 0, pszutf8, -1, NULL, 0);
		if (wnSize == ERROR_NO_UNICODE_TRANSLATION)
		{
			//throw std::exception("Invalide UTF-8 sequence");
			return 0;
		}

		if (wnSize == 0)
		{
			//throw std::exception("Error in conversion");
			return 0;
		}

		int nConvertSize = MultiByteToWideChar(CP_UTF8, 0, pszutf8, -1, pszunicode, wnSize);
		if (nConvertSize != wnSize)
		{
			//throw std::exception("la falla");
			return 0;
		}

		return wnSize;
	}

	/************************************************************************/
	/*
	函数: unicode2acsii
	描述: unicode字符串转为acsii字符串;
	参数:
	pszunicode unicode字符串;
	返回: acsii字符串;
	注意: 返回的指针所指向的内存需要释放;
	*/
	/************************************************************************/
	CHAR* unicode2acsii(IN const WCHAR *pszunicode)
	{
		int asciisize = WideCharToMultiByte(CP_OEMCP, 0, pszunicode, -1, NULL, 0, NULL, NULL);
		if (asciisize == ERROR_NO_UNICODE_TRANSLATION)
		{
			//throw std::exception("Invalid UTF-8 sequence."); 
			return NULL;
		}
		if (asciisize == 0)
		{
			//throw std::exception("Error in conversion."); 
			return NULL;
		}

		CHAR *pAscii = new CHAR[asciisize];
		int convresult = WideCharToMultiByte(CP_OEMCP, 0, pszunicode, -1, pAscii, asciisize, NULL, NULL);
		if (convresult != asciisize)
		{
			//throw std::exception("La falla!"); 
			if (pAscii) delete []pAscii;
			return NULL;
		}

		return pAscii;
	}

	/************************************************************************/
	/*
	函数: unicode2acsii
	描述: unicode字符串转为acsii字符串;
	参数:
	pszunicode unicode字符串;
	pszacsii   返回的acsii字符串;
	返回: acsii字符串大小;
	*/
	/************************************************************************/
	int unicode2acsii(IN const WCHAR *pszunicode, IN OUT CHAR *pszacsii)
	{
		int asciisize = WideCharToMultiByte(CP_OEMCP, 0, pszunicode, -1, NULL, 0, NULL, NULL);
		if (asciisize == ERROR_NO_UNICODE_TRANSLATION)
		{
			//throw std::exception("Invalid UTF-8 sequence."); 
			return 0;
		}

		if (asciisize == 0)
		{
			//throw std::exception("Error in conversion."); 
			return 0;
		}

		int convresult = WideCharToMultiByte(CP_OEMCP, 0, pszunicode, -1, pszacsii, asciisize, NULL, NULL);
		if (convresult != asciisize)
		{
			//throw std::exception("La falla!"); 
			return 0;
		}

		return asciisize;
	}

	/************************************************************************/
	/*
	函数: utf82ascii
	描述: 将utf8字符串转为ascii字符串;
	参数:
	pszutf8 utf8字符串;

	返回: ascii字符串;
	注意: 返回的指针需要手动释放所指内存;
	*/
	/************************************************************************/
	CHAR* utf82ascii(const CHAR *pszutf8)
	{
		// 先把 utf8 转为 unicode ;
		WCHAR *pwstr = utf82unicode(pszutf8);

		// 最后把 unicode 转为 ascii ;
		CHAR *pacsii = NULL;
		if (pwstr)
			pacsii = unicode2acsii(pwstr);

		if (pwstr)
			delete []pwstr;

		return pacsii;
	}

	/************************************************************************/
	/*
	函数: utf82ascii
	描述: 将utf8字符串转为ascii字符串;
	参数:
	pszutf8 utf8字符串;

	返回: ascii字符串;
	注意: 返回的指针需要手动释放所指内存;
	*/
	/************************************************************************/
	int utf82ascii(IN const CHAR *pszutf8, IN OUT CHAR* pszacsii)
	{
		// 先把 utf8 转为 unicode ;
		WCHAR *pwstr = utf82unicode(pszutf8);

		// 最后把 unicode 转为 ascii ;
		int nascii = 0;
		if (pwstr)
			nascii = unicode2acsii(pwstr, pszacsii);

		if (pwstr)
			delete []pwstr;

		return nascii;
	}

	/************************************************************************/
	/*
	函数: unicode2uft8
	描述: 将unicode字符串转为utf8字符串;
	参数:
	pszunicode unicode字符串;

	返回: utf8字符串;
	注意: 返回的指针需要手动释放所指内存;
	*/
	/************************************************************************/
	CHAR* unicode2uft8(IN const WCHAR *pszunicode)
	{
		int utf8size = WideCharToMultiByte(CP_UTF8, 0, pszunicode, -1, NULL, 0, NULL, NULL);
		if (utf8size == 0)
		{
			//throw std::exception("Error in conversion."); 
			return NULL;
		}

		CHAR* putf8 = new CHAR[utf8size];
		int convresult = WideCharToMultiByte(CP_UTF8, 0, pszunicode, -1, putf8, utf8size, NULL, NULL);
		if (convresult != utf8size)
		{
			//throw std::exception("La falla!"); 
			if (putf8)delete []putf8;
			return NULL;
		}

		return putf8;
	}

	/************************************************************************/
	/*
	函数: unicode2uft8
	描述: 将unicode字符串转为utf8字符串;
	参数:
	pszunicode unicode字符串;
	pszutf8 返回的utf8字符串;

	返回: utf8字符串大小;
	*/
	/************************************************************************/
	int unicode2uft8(IN const WCHAR *pszunicode, IN OUT CHAR* pszutf8)
	{
		int utf8size = WideCharToMultiByte(CP_UTF8, 0, pszunicode, -1, NULL, 0, NULL, NULL);
		if (utf8size == 0)
		{
			//throw std::exception("Error in conversion."); 
			return 0;
		}

		int convresult = WideCharToMultiByte(CP_UTF8, 0, pszunicode, -1, pszutf8, utf8size, NULL, NULL);
		if (convresult != utf8size)
		{
			//throw std::exception("La falla!"); 
			return 0;
		}

		return utf8size;
	}

	/************************************************************************/
	/*
	函数: ascii2unicode
	描述: 将ascii字符串转为unicode字符串;
	参数:
	pszascii  ascii字符串;

	返回: unicode字符串;
	注意: 返回的指针需要手动释放其所指的内存;
	*/
	/************************************************************************/
	WCHAR* ascii2unicode(IN const CHAR* pszascii)
	{
		int wSize = MultiByteToWideChar(CP_ACP, 0, pszascii, -1, NULL, 0);
		if (wSize == ERROR_NO_UNICODE_TRANSLATION)
		{
			//throw std::exception("Invalid UTF-8 sequence."); 
			return NULL;
		}

		if (wSize == 0)
		{
			//throw std::exception("Error in conversion."); 
			return NULL;
		}

		WCHAR *punicode = new WCHAR[wSize];
		int convresult = MultiByteToWideChar(CP_ACP, 0, pszascii, -1, punicode, wSize);
		if (convresult != wSize)
		{
			//throw std::exception("La falla!"); 
			if (punicode) delete []punicode;
			return NULL;
		}

		return punicode;
	}

	/************************************************************************/
	/*
	函数: ascii2unicode
	描述: 将ascii字符串转为unicode字符串;
	参数:
	pszascii  ascii字符串;

	返回: unicode字符串;
	注意: 返回的指针需要手动释放其所指的内存;
	*/
	/************************************************************************/
	int ascii2unicode(IN const CHAR* pszascii, IN OUT WCHAR *pszunicode)
	{
		int wSize = MultiByteToWideChar(CP_ACP, 0, pszascii, -1, NULL, 0);
		if (wSize == ERROR_NO_UNICODE_TRANSLATION)
		{
			//throw std::exception("Invalid UTF-8 sequence."); 
			return 0;
		}

		if (wSize == 0)
		{
			//throw std::exception("Error in conversion.");
			return 0;
		}

		int convresult = MultiByteToWideChar(CP_ACP, 0, pszascii, -1, pszunicode, wSize);
		if (convresult != wSize)
		{
			//throw std::exception("La falla!"); 
			return 0;
		}

		return wSize;
	}

	/************************************************************************/
	/*
	函数: ascii2utf8
	描述: 将ascii字符串转为utf8字符串;
	参数:
	pszascii  ascii字符串;

	返回: uft8字符串;
	注意: 返回的指针需要手动释放其所指的内存;
	*/
	/************************************************************************/
	CHAR* ascii2utf8(IN const CHAR* pszascii)
	{
		// 先把 ascii 转为 unicode ;
		WCHAR *pwstr = ascii2unicode(pszascii);

		// 最后把 unicode 转为 utf8 ;
		CHAR* putf8 = NULL;
		if (pwstr)
			putf8 = unicode2uft8(pwstr);

		if (pwstr)
			delete []pwstr;

		return putf8;
	}

	/************************************************************************/
	/*
	函数: ascii2utf8
	描述: 将ascii字符串转为utf8字符串;
	参数:
	pszascii  ascii字符串;

	返回: uft8字符串;
	注意: 返回的指针需要手动释放其所指的内存;
	*/
	/************************************************************************/
	int ascii2utf8(IN const CHAR* pszascii, IN OUT CHAR* pszutf8)
	{
		// 先把 ascii 转为 unicode ;
		WCHAR *pwstr = ascii2unicode(pszascii);

		// 最后把 unicode 转为 utf8 ;
		int nSize = 0;
		if (pwstr)
			nSize = unicode2uft8(pwstr, pszutf8);

		if (pwstr)
			delete []pwstr;

		return nSize;
	}

	//////////////////////////////////////////////////////////////////////////
	void Gb2312ToUnicode(WCHAR* pOut, char *gbBuffer)
	{
		::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, gbBuffer, 2, pOut, 1);
		return;
	}

	//-------------------------------------------------------------
	// 函数：UTF_8ToUnicode
	// 描述：把3个uft-8中文字符转为单个unicode字符;
	// 参数：
	//		pOut[IN OUT]: 返回的unicode字符;
	//		pText[IN]:	uft-8字符;
	// 返回：null;
	//-------------------------------------------------------------
	void UTF_8ToUnicode(WCHAR* pOut, char *pText)
	{
		char* uchar = (char *)pOut;

		uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
		uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);

		return;
	}

	//-------------------------------------------------------------
	// 函数：UnicodeToUTF_8
	// 描述：把单个unicode字符转为3个uft-8中文字符;
	// 参数：
	//		pOut[IN OUT]: 返回的unicode字符;
	//		pText[IN]:	uft-8字符;
	// 返回：null;
	//-------------------------------------------------------------
	void UnicodeToUTF_8(char* pOut, WCHAR* pText)
	{
		// 注意 WCHAR高低字的顺序,低字节在前，高字节在后
		char* pchar = (char *)pText;

		pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
		pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
		pOut[2] = (0x80 | (pchar[0] & 0x3F));

		return;
	}

	void UnicodeToGB2312(char* pOut, WCHAR uData)
	{
		WideCharToMultiByte(CP_ACP, NULL, &uData, 1, pOut, sizeof(WCHAR), NULL, NULL);
		return;
	}

	//做为解Url使用
	char CharToInt(char ch)
	{
		if (ch >= '0' && ch <= '9')return (char)(ch - '0');
		if (ch >= 'a' && ch <= 'f')return (char)(ch - 'a' + 10);
		if (ch >= 'A' && ch <= 'F')return (char)(ch - 'A' + 10);
		return -1;
	}

	char StrToBin(char *str)
	{
		char tempWord[2];
		char chn;

		tempWord[0] = CharToInt(str[0]);                         //make the B to 11 -- 00001011
		tempWord[1] = CharToInt(str[1]);                         //make the 0 to 0  -- 00000000

		chn = (tempWord[0] << 4) | tempWord[1];                //to change the BO to 10110000

		return chn;
	}

	//UTF_8 转gb2312
	void UTF_8ToGB2312(string &pOut, char *pText, int pLen)
	{
		char buf[4];
		char* rst = new char[pLen + (pLen >> 2) + 2];
		memset(buf, 0, 4);
		memset(rst, 0, pLen + (pLen >> 2) + 2);

		int i = 0;
		int j = 0;

		while (i < pLen)
		{
			if (*(pText + i) >= 0)
			{
				rst[j++] = pText[i++];
			}
			else
			{
				WCHAR Wtemp;
				UTF_8ToUnicode(&Wtemp, pText + i);
				UnicodeToGB2312(buf, Wtemp);

				unsigned short int tmp = 0;
				tmp = rst[j] = buf[0];
				tmp = rst[j + 1] = buf[1];
				tmp = rst[j + 2] = buf[2];

				//newBuf[j] = Ctemp[0];
				//newBuf[j + 1] = Ctemp[1];

				i += 3;
				j += 2;
			}
		}

		if (i == 0)
			rst[j] = '/0';
		pOut = rst;
		delete[]rst;
	}

	//GB2312 转为 UTF-8
	void GB2312ToUTF_8(string& pOut, char *pText, int pLen)
	{
		char buf[4];
		memset(buf, 0, 4);

		pOut.clear();

		int i = 0;
		while (i < pLen)
		{
			//如果是英文直接复制就可以
			if (pText[i] >= 0)
			{
				char asciistr[2] = { 0 };
				asciistr[0] = (pText[i++]);
				pOut.append(asciistr);
			}
			else
			{
				WCHAR pbuffer;
				Gb2312ToUnicode(&pbuffer, pText + i);
				UnicodeToUTF_8(buf, &pbuffer);
				pOut.append(buf);
				i += 2;
			}
		}

		return;
	}

	//把str编码为网页中的 GB2312 url encode ,英文不变，汉字双字节  如%3D%AE%88
	string UrlGB2312(char * str)
	{
		string dd;
		size_t len = strlen(str);
		for (size_t i = 0; i < len; i++)
		{
			if (isalnum((BYTE)str[i]))
			{
				char tempbuff[2];
				//sprintf(tempbuff,"%c",str[i]);
				sprintf_s(tempbuff, 2, "%c", str[i]);
				//StringCchPrintfA(tempbuff, 2, "%s",str[i]);
				dd.append(tempbuff);
			}
			else if (isspace((BYTE)str[i]))
			{
				dd.append("+");
			}
			else
			{
				char tempbuff[4];
				//sprintf(tempbuff,"%%%X%X",((BYTE*)str)[i] >>4,((BYTE*)str)[i] %16);
				sprintf_s(tempbuff, 4, "%%%X%X", ((BYTE*)str)[i] >> 4, ((BYTE*)str)[i] % 16);
				//StringCchPrintfA(tempbuff, 4, "%%%X%X", ((BYTE*)str)[i] >> 4, ((BYTE*)str)[i] % 16);
				dd.append(tempbuff);
			}
		}
		return dd;
	}

	//把str编码为网页中的 UTF-8 url encode ,英文不变，汉字三字节  如%3D%AE%88
	string UrlUTF8(char * str)
	{
		string tt;
		string dd;
		GB2312ToUTF_8(tt, str, (int)strlen(str));

		size_t len = tt.length();
		for (size_t i = 0; i < len; i++)
		{
			if (isalnum((BYTE)tt.at(i)))
			{
				char tempbuff[2] = { 0 };
				//sprintf(tempbuff,"%c",(BYTE)tt.at(i));
				sprintf_s(tempbuff, 2, "%c", (BYTE)tt.at(i));
				//StringCchPrintfA(tempbuff, 2, "%c", (BYTE)tt.at(i));
				dd.append(tempbuff);
			}
			else if (isspace((BYTE)tt.at(i)))
			{
				dd.append("+");
			}
			else
			{
				char tempbuff[4];
				//sprintf(tempbuff,"%%%X%X",((BYTE)tt.at(i)) >>4,((BYTE)tt.at(i)) %16);
				sprintf_s(tempbuff, 4, "%%%X%X", ((BYTE)tt.at(i)) >> 4, ((BYTE)tt.at(i)) % 16);
				//StringCchPrintfA(tempbuff, 4, "%%%X%X", ((BYTE)tt.at(i)) >> 4, ((BYTE)tt.at(i)) % 16);
				dd.append(tempbuff);
			}
		}
		return dd;
	}

	//把url GB2312解码
	string UrlGB2312Decode(string str)
	{
		string output = "";
		char tmp[2];
		int i = 0, idx = 0, len = str.length();

		while (i < len){
			if (str[i] == '%')
			{
				tmp[0] = str[i + 1];
				tmp[1] = str[i + 2];
				output += StrToBin(tmp);
				i = i + 3;
			}
			else if (str[i] == '+')
			{
				output += ' ';
				i++;
			}
			else{
				output += str[i];
				i++;
			}
		}

		return output;
	}

	//把url utf8解码
	string UrlUTF8Decode(string str)
	{
		string output = "";

		string temp = UrlGB2312Decode(str);//

		UTF_8ToGB2312(output, (char *)temp.data(), strlen(temp.data()));

		return output;
	}
}