converters.h 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. /*
  2. * Copyright (C) 1999-2002, 2004-2011, 2016, 2022 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either version 2.1
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, see <https://www.gnu.org/licenses/>.
  18. */
  19. /* This file defines all the converters. */
  20. /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
  21. typedef unsigned int ucs4_t;
  22. /* State used by a conversion. 0 denotes the initial state. */
  23. typedef unsigned int state_t;
  24. /* iconv_t is an opaque type. This is the real iconv_t type. */
  25. typedef struct conv_struct * conv_t;
  26. /*
  27. * Data type for conversion multibyte -> unicode
  28. */
  29. struct mbtowc_funcs {
  30. int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n);
  31. /*
  32. * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, size_t n)
  33. * converts the byte sequence starting at s to a wide character. Up to n bytes
  34. * are available at s. n is >= 1.
  35. * Result is number of bytes consumed (if a wide character was read),
  36. * or -1 if invalid, or -2 if n too small,
  37. * or RET_SHIFT_ILSEQ(number of bytes consumed) if invalid input after a shift
  38. * sequence was read,
  39. * or RET_TOOFEW(number of bytes consumed) if only a shift sequence was read.
  40. */
  41. int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc);
  42. /*
  43. * int xxx_flushwc (conv_t conv, ucs4_t *pwc)
  44. * returns to the initial state and stores the pending wide character, if any.
  45. * Result is 1 (if a wide character was read) or 0 if none was pending.
  46. */
  47. };
  48. /* Return code if invalid input after a shift sequence of n bytes was read.
  49. (xxx_mbtowc) */
  50. #define RET_SHIFT_ILSEQ(n) (-1-2*(n))
  51. /* Return code if invalid. (xxx_mbtowc) */
  52. #define RET_ILSEQ RET_SHIFT_ILSEQ(0)
  53. /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
  54. #define RET_TOOFEW(n) (-2-2*(n))
  55. /* Retrieve the n from the encoded RET_... value. */
  56. #define DECODE_SHIFT_ILSEQ(r) ((unsigned int)(RET_SHIFT_ILSEQ(0) - (r)) / 2)
  57. #define DECODE_TOOFEW(r) ((unsigned int)(RET_TOOFEW(0) - (r)) / 2)
  58. /* Maximum value of n that may be used as argument to RET_SHIFT_ILSEQ or RET_TOOFEW. */
  59. #define RET_COUNT_MAX ((INT_MAX / 2) - 1)
  60. /*
  61. * Data type for conversion unicode -> multibyte
  62. */
  63. struct wctomb_funcs {
  64. int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, size_t n);
  65. /*
  66. * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
  67. * converts the wide character wc to the character set xxx, and stores the
  68. * result beginning at r. Up to n bytes may be written at r. n is >= 1.
  69. * Result is number of bytes written, or -1 if invalid, or -2 if n too small.
  70. */
  71. int (*xxx_reset) (conv_t conv, unsigned char *r, size_t n);
  72. /*
  73. * int xxx_reset (conv_t conv, unsigned char *r, size_t n)
  74. * stores a shift sequences returning to the initial state beginning at r.
  75. * Up to n bytes may be written at r. n is >= 0.
  76. * Result is number of bytes written, or -2 if n too small.
  77. */
  78. };
  79. /* Return code if invalid. (xxx_wctomb) */
  80. #define RET_ILUNI -1
  81. /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
  82. #define RET_TOOSMALL -2
  83. /*
  84. * Contents of a conversion descriptor.
  85. */
  86. struct conv_struct {
  87. struct loop_funcs lfuncs;
  88. /* Input (conversion multibyte -> unicode) */
  89. int iindex;
  90. struct mbtowc_funcs ifuncs;
  91. state_t istate;
  92. /* Output (conversion unicode -> multibyte) */
  93. int oindex;
  94. struct wctomb_funcs ofuncs;
  95. int oflags;
  96. state_t ostate;
  97. /* Operation flags */
  98. int transliterate;
  99. int discard_ilseq;
  100. #ifndef LIBICONV_PLUG
  101. struct iconv_fallbacks fallbacks;
  102. struct iconv_hooks hooks;
  103. #endif
  104. };
  105. /*
  106. * Include all the converters.
  107. */
  108. #include "ascii.h"
  109. /* General multi-byte encodings */
  110. #include "utf8.h"
  111. #include "ucs2.h"
  112. #include "ucs2be.h"
  113. #include "ucs2le.h"
  114. #include "ucs4.h"
  115. #include "ucs4be.h"
  116. #include "ucs4le.h"
  117. #include "utf16.h"
  118. #include "utf16be.h"
  119. #include "utf16le.h"
  120. #include "utf32.h"
  121. #include "utf32be.h"
  122. #include "utf32le.h"
  123. #include "utf7.h"
  124. #include "ucs2internal.h"
  125. #include "ucs2swapped.h"
  126. #include "ucs4internal.h"
  127. #include "ucs4swapped.h"
  128. #include "c99.h"
  129. #include "java.h"
  130. /* 8-bit encodings */
  131. #include "iso8859_1.h"
  132. #include "iso8859_2.h"
  133. #include "iso8859_3.h"
  134. #include "iso8859_4.h"
  135. #include "iso8859_5.h"
  136. #include "iso8859_6.h"
  137. #include "iso8859_7.h"
  138. #include "iso8859_8.h"
  139. #include "iso8859_9.h"
  140. #include "iso8859_10.h"
  141. #include "iso8859_11.h"
  142. #include "iso8859_13.h"
  143. #include "iso8859_14.h"
  144. #include "iso8859_15.h"
  145. #include "iso8859_16.h"
  146. #include "koi8_r.h"
  147. #include "koi8_u.h"
  148. #include "koi8_ru.h"
  149. #include "cp1250.h"
  150. #include "cp1251.h"
  151. #include "cp1252.h"
  152. #include "cp1253.h"
  153. #include "cp1254.h"
  154. #include "cp1255.h"
  155. #include "cp1256.h"
  156. #include "cp1257.h"
  157. #include "cp1258.h"
  158. #include "cp850.h"
  159. #include "cp862.h"
  160. #include "cp866.h"
  161. #include "cp1131.h"
  162. #include "mac_roman.h"
  163. #include "mac_centraleurope.h"
  164. #include "mac_iceland.h"
  165. #include "mac_croatian.h"
  166. #include "mac_romania.h"
  167. #include "mac_cyrillic.h"
  168. #include "mac_ukraine.h"
  169. #include "mac_greek.h"
  170. #include "mac_turkish.h"
  171. #include "mac_hebrew.h"
  172. #include "mac_arabic.h"
  173. #include "mac_thai.h"
  174. #include "hp_roman8.h"
  175. #include "nextstep.h"
  176. #include "armscii_8.h"
  177. #include "georgian_academy.h"
  178. #include "georgian_ps.h"
  179. #include "koi8_t.h"
  180. #include "pt154.h"
  181. #include "rk1048.h"
  182. #include "mulelao.h"
  183. #include "cp1133.h"
  184. #include "tis620.h"
  185. #include "cp874.h"
  186. #include "viscii.h"
  187. #include "tcvn.h"
  188. /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
  189. typedef struct {
  190. unsigned short indx; /* index into big table */
  191. unsigned short used; /* bitmask of used entries */
  192. } Summary16;
  193. #include "iso646_jp.h"
  194. #include "jisx0201.h"
  195. #include "jisx0208.h"
  196. #include "jisx0212.h"
  197. #include "iso646_cn.h"
  198. #include "gb2312.h"
  199. #include "isoir165.h"
  200. /*#include "gb12345.h"*/
  201. #include "gbk.h"
  202. #include "cns11643.h"
  203. #include "big5.h"
  204. #include "ksc5601.h"
  205. #include "johab_hangul.h"
  206. /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
  207. #include "euc_jp.h"
  208. #include "sjis.h"
  209. #include "cp932.h"
  210. #include "iso2022_jp.h"
  211. #include "iso2022_jp1.h"
  212. #include "iso2022_jp2.h"
  213. #include "iso2022_jpms.h"
  214. #include "euc_cn.h"
  215. #include "ces_gbk.h"
  216. #include "cp936.h"
  217. #include "gb18030.h"
  218. #include "iso2022_cn.h"
  219. #include "iso2022_cnext.h"
  220. #include "hz.h"
  221. #include "euc_tw.h"
  222. #include "ces_big5.h"
  223. #include "cp950.h"
  224. #include "big5hkscs1999.h"
  225. #include "big5hkscs2001.h"
  226. #include "big5hkscs2004.h"
  227. #include "big5hkscs2008.h"
  228. #include "euc_kr.h"
  229. #include "cp949.h"
  230. #include "johab.h"
  231. #include "iso2022_kr.h"
  232. /* Encodings used by system dependent locales. */
  233. #ifdef USE_AIX
  234. #include "cp856.h"
  235. #include "cp922.h"
  236. #include "cp943.h"
  237. #include "cp1046.h"
  238. #include "cp1124.h"
  239. #include "cp1129.h"
  240. #include "cp1161.h"
  241. #include "cp1162.h"
  242. #include "cp1163.h"
  243. #endif
  244. #ifdef USE_OSF1
  245. #include "dec_kanji.h"
  246. #include "dec_hanyu.h"
  247. #endif
  248. #ifdef USE_DOS
  249. #include "cp437.h"
  250. #include "cp737.h"
  251. #include "cp775.h"
  252. #include "cp852.h"
  253. #include "cp853.h"
  254. #include "cp855.h"
  255. #include "cp857.h"
  256. #include "cp858.h"
  257. #include "cp860.h"
  258. #include "cp861.h"
  259. #include "cp863.h"
  260. #include "cp864.h"
  261. #include "cp865.h"
  262. #include "cp869.h"
  263. #include "cp1125.h"
  264. #endif
  265. #ifdef USE_ZOS
  266. #define DEDUPLICATE_TABLES 1
  267. #include "ebcdic037.h"
  268. #include "ebcdic273.h"
  269. #include "ebcdic277.h"
  270. #include "ebcdic278.h"
  271. #include "ebcdic280.h"
  272. #include "ebcdic282.h"
  273. #include "ebcdic284.h"
  274. #include "ebcdic285.h"
  275. #include "ebcdic297.h"
  276. #include "ebcdic423.h"
  277. #include "ebcdic424.h"
  278. #include "ebcdic425.h"
  279. #include "ebcdic500.h"
  280. #include "ebcdic838.h"
  281. #include "ebcdic870.h"
  282. #include "ebcdic871.h"
  283. #include "ebcdic875.h"
  284. #include "ebcdic880.h"
  285. #include "ebcdic905.h"
  286. #include "ebcdic924.h"
  287. #include "ebcdic1025.h"
  288. #include "ebcdic1026.h"
  289. #include "ebcdic1047.h"
  290. #include "ebcdic1097.h"
  291. #include "ebcdic1112.h"
  292. #include "ebcdic1122.h"
  293. #include "ebcdic1123.h"
  294. #include "ebcdic1130.h"
  295. #include "ebcdic1132.h"
  296. #include "ebcdic1137.h"
  297. #include "ebcdic1140.h"
  298. #include "ebcdic1141.h"
  299. #include "ebcdic1142.h"
  300. #include "ebcdic1143.h"
  301. #include "ebcdic1144.h"
  302. #include "ebcdic1145.h"
  303. #include "ebcdic1146.h"
  304. #include "ebcdic1147.h"
  305. #include "ebcdic1148.h"
  306. #include "ebcdic1149.h"
  307. #include "ebcdic1153.h"
  308. #include "ebcdic1154.h"
  309. #include "ebcdic1155.h"
  310. #include "ebcdic1156.h"
  311. #include "ebcdic1157.h"
  312. #include "ebcdic1158.h"
  313. #include "ebcdic1160.h"
  314. #include "ebcdic1164.h"
  315. #include "ebcdic1165.h"
  316. #include "ebcdic1166.h"
  317. #include "ebcdic4971.h"
  318. #include "ebcdic12712.h"
  319. #include "ebcdic16804.h"
  320. #endif
  321. #ifdef USE_EXTRA
  322. #include "euc_jisx0213.h"
  323. #include "shift_jisx0213.h"
  324. #include "iso2022_jp3.h"
  325. #include "big5_2003.h"
  326. #include "tds565.h"
  327. #include "atarist.h"
  328. #include "riscos1.h"
  329. #endif