url.cpp 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. #include "StdAfx.h"
  2. #include "./url.h"
  3. //这是个类strCoding (strCoding.cpp文件)
  4. strCoding::strCoding(void)
  5. {
  6. }
  7. strCoding::~strCoding(void)
  8. {
  9. }
  10. void strCoding::Gb2312ToUnicode(WCHAR* pOut, char *gbBuffer)
  11. {
  12. ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, gbBuffer, 2, pOut, 1);
  13. return;
  14. }
  15. //-------------------------------------------------------------
  16. // 函数:UTF_8ToUnicode
  17. // 描述:把3个uft-8中文字符转为单个unicode字符;
  18. // 参数:
  19. // pOut[IN OUT]: 返回的unicode字符;
  20. // pText[IN]: uft-8字符;
  21. // 返回:null;
  22. //-------------------------------------------------------------
  23. void strCoding::UTF_8ToUnicode(WCHAR* pOut, char *pText)
  24. {
  25. char* uchar = (char *)pOut;
  26. uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
  27. uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);
  28. return;
  29. }
  30. //-------------------------------------------------------------
  31. // 函数:UnicodeToUTF_8
  32. // 描述:把单个unicode字符转为3个uft-8中文字符;
  33. // 参数:
  34. // pOut[IN OUT]: 返回的unicode字符;
  35. // pText[IN]: uft-8字符;
  36. // 返回:null;
  37. //-------------------------------------------------------------
  38. void strCoding::UnicodeToUTF_8(char* pOut, WCHAR* pText)
  39. {
  40. // 注意 WCHAR高低字的顺序,低字节在前,高字节在后
  41. char* pchar = (char *)pText;
  42. pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
  43. pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
  44. pOut[2] = (0x80 | (pchar[0] & 0x3F));
  45. return;
  46. }
  47. void strCoding::UnicodeToGB2312(char* pOut, WCHAR uData)
  48. {
  49. WideCharToMultiByte(CP_ACP, NULL, &uData, 1, pOut, sizeof(WCHAR), NULL, NULL);
  50. return;
  51. }
  52. //做为解Url使用
  53. char strCoding::CharToInt(char ch)
  54. {
  55. if (ch >= '0' && ch <= '9')return (char)(ch - '0');
  56. if (ch >= 'a' && ch <= 'f')return (char)(ch - 'a' + 10);
  57. if (ch >= 'A' && ch <= 'F')return (char)(ch - 'A' + 10);
  58. return -1;
  59. }
  60. char strCoding::StrToBin(char *str)
  61. {
  62. char tempWord[2];
  63. char chn;
  64. tempWord[0] = CharToInt(str[0]); //make the B to 11 -- 00001011
  65. tempWord[1] = CharToInt(str[1]); //make the 0 to 0 -- 00000000
  66. chn = (tempWord[0] << 4) | tempWord[1]; //to change the BO to 10110000
  67. return chn;
  68. }
  69. //UTF_8 转gb2312
  70. void strCoding::UTF_8ToGB2312(string &pOut, char *pText, int pLen)
  71. {
  72. char buf[4];
  73. char* rst = new char[pLen + (pLen >> 2) + 2];
  74. memset(buf, 0, 4);
  75. memset(rst, 0, pLen + (pLen >> 2) + 2);
  76. int i = 0;
  77. int j = 0;
  78. while (i < pLen)
  79. {
  80. if (*(pText + i) >= 0)
  81. {
  82. rst[j++] = pText[i++];
  83. }
  84. else
  85. {
  86. WCHAR Wtemp;
  87. UTF_8ToUnicode(&Wtemp, pText + i);
  88. UnicodeToGB2312(buf, Wtemp);
  89. unsigned short int tmp = 0;
  90. tmp = rst[j] = buf[0];
  91. tmp = rst[j + 1] = buf[1];
  92. tmp = rst[j + 2] = buf[2];
  93. //newBuf[j] = Ctemp[0];
  94. //newBuf[j + 1] = Ctemp[1];
  95. i += 3;
  96. j += 2;
  97. }
  98. }
  99. if (i == 0) rst[j] = '/0';
  100. pOut = rst;
  101. delete[]rst;
  102. }
  103. //GB2312 转为 UTF-8
  104. void strCoding::GB2312ToUTF_8(string& pOut, char *pText, int pLen)
  105. {
  106. char buf[4];
  107. memset(buf, 0, 4);
  108. pOut.clear();
  109. int i = 0;
  110. while (i < pLen)
  111. {
  112. //如果是英文直接复制就可以
  113. if (pText[i] >= 0)
  114. {
  115. char asciistr[2] = { 0 };
  116. asciistr[0] = (pText[i++]);
  117. pOut.append(asciistr);
  118. }
  119. else
  120. {
  121. WCHAR pbuffer;
  122. Gb2312ToUnicode(&pbuffer, pText + i);
  123. UnicodeToUTF_8(buf, &pbuffer);
  124. pOut.append(buf);
  125. i += 2;
  126. }
  127. }
  128. return;
  129. }
  130. //把str编码为网页中的 GB2312 url encode ,英文不变,汉字双字节 如%3D%AE%88
  131. string strCoding::UrlGB2312(char * str)
  132. {
  133. string dd;
  134. size_t len = strlen(str);
  135. for (size_t i = 0; i < len; i++)
  136. {
  137. if (isalnum((BYTE)str[i]))
  138. {
  139. char tempbuff[2];
  140. //sprintf(tempbuff,"%c",str[i]);
  141. sprintf_s(tempbuff, "%c", str[i]);
  142. dd.append(tempbuff);
  143. }
  144. else if (isspace((BYTE)str[i]))
  145. {
  146. dd.append("+");
  147. }
  148. else
  149. {
  150. char tempbuff[4];
  151. //sprintf(tempbuff,"%%%X%X",((BYTE*)str)[i] >>4,((BYTE*)str)[i] %16);
  152. sprintf_s(tempbuff, "%%%X%X", ((BYTE*)str)[i] >> 4, ((BYTE*)str)[i] % 16);
  153. dd.append(tempbuff);
  154. }
  155. }
  156. return dd;
  157. }
  158. //把str编码为网页中的 UTF-8 url encode ,英文不变,汉字三字节 如%3D%AE%88
  159. string strCoding::UrlUTF8(char * str)
  160. {
  161. string tt;
  162. string dd;
  163. GB2312ToUTF_8(tt, str, (int)strlen(str));
  164. size_t len = tt.length();
  165. for (size_t i = 0; i < len; i++)
  166. {
  167. if (isalnum((BYTE)tt.at(i)))
  168. {
  169. char tempbuff[2] = { 0 };
  170. //sprintf(tempbuff,"%c",(BYTE)tt.at(i));
  171. sprintf_s(tempbuff, "%c", (BYTE)tt.at(i));
  172. dd.append(tempbuff);
  173. }
  174. else if (isspace((BYTE)tt.at(i)))
  175. {
  176. dd.append("+");
  177. }
  178. else
  179. {
  180. char tempbuff[4];
  181. //sprintf(tempbuff,"%%%X%X",((BYTE)tt.at(i)) >>4,((BYTE)tt.at(i)) %16);
  182. sprintf_s(tempbuff, "%%%X%X", ((BYTE)tt.at(i)) >> 4, ((BYTE)tt.at(i)) % 16);
  183. dd.append(tempbuff);
  184. }
  185. }
  186. return dd;
  187. }
  188. //把url GB2312解码
  189. string strCoding::UrlGB2312Decode(string str)
  190. {
  191. string output = "";
  192. char tmp[2];
  193. int i = 0, idx = 0, ndx, len = str.length();
  194. while (i < len){
  195. if (str[i] == '%')
  196. {
  197. tmp[0] = str[i + 1];
  198. tmp[1] = str[i + 2];
  199. output += StrToBin(tmp);
  200. i = i + 3;
  201. }
  202. else if (str[i] == '+')
  203. {
  204. output += ' ';
  205. i++;
  206. }
  207. else{
  208. output += str[i];
  209. i++;
  210. }
  211. }
  212. return output;
  213. }
  214. //把url utf8解码
  215. string strCoding::UrlUTF8Decode(string str)
  216. {
  217. string output = "";
  218. string temp = UrlGB2312Decode(str);//
  219. UTF_8ToGB2312(output, (char *)temp.data(), strlen(temp.data()));
  220. return output;
  221. }