iconv.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. /*
  2. * Copyright (C) 1999-2008, 2011, 2016 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Library General Public
  7. * License as published by the Free Software Foundation; either version 2
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Library General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Library General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include <iconv.h>
  20. #include <limits.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include "config.h"
  24. #include "localcharset.h"
  25. #ifdef __CYGWIN__
  26. #include <cygwin/version.h>
  27. #endif
  28. #if ENABLE_EXTRA
  29. /*
  30. * Consider all system dependent encodings, for any system,
  31. * and the extra encodings.
  32. */
  33. #define USE_AIX
  34. #define USE_OSF1
  35. #define USE_DOS
  36. #define USE_EXTRA
  37. #else
  38. /*
  39. * Consider those system dependent encodings that are needed for the
  40. * current system.
  41. */
  42. #ifdef _AIX
  43. #define USE_AIX
  44. #endif
  45. #if defined(__osf__) || defined(VMS)
  46. #define USE_OSF1
  47. #endif
  48. #if defined(__DJGPP__) || (defined(_WIN32) && (defined(_MSC_VER) || defined(__MINGW32__)))
  49. #define USE_DOS
  50. #endif
  51. #endif
  52. /*
  53. * Data type for general conversion loop.
  54. */
  55. struct loop_funcs {
  56. size_t (*loop_convert) (iconv_t icd,
  57. const char* * inbuf, size_t *inbytesleft,
  58. char* * outbuf, size_t *outbytesleft);
  59. size_t (*loop_reset) (iconv_t icd,
  60. char* * outbuf, size_t *outbytesleft);
  61. };
  62. /*
  63. * Converters.
  64. */
  65. #include "converters.h"
  66. /*
  67. * Transliteration tables.
  68. */
  69. #include "cjk_variants.h"
  70. #include "translit.h"
  71. /*
  72. * Table of all supported encodings.
  73. */
  74. struct encoding {
  75. struct mbtowc_funcs ifuncs; /* conversion multibyte -> unicode */
  76. struct wctomb_funcs ofuncs; /* conversion unicode -> multibyte */
  77. int oflags; /* flags for unicode -> multibyte conversion */
  78. };
  79. #define DEFALIAS(xxx_alias,xxx) /* nothing */
  80. enum {
  81. #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
  82. ei_##xxx ,
  83. #include "encodings.def"
  84. #ifdef USE_AIX
  85. # include "encodings_aix.def"
  86. #endif
  87. #ifdef USE_OSF1
  88. # include "encodings_osf1.def"
  89. #endif
  90. #ifdef USE_DOS
  91. # include "encodings_dos.def"
  92. #endif
  93. #ifdef USE_EXTRA
  94. # include "encodings_extra.def"
  95. #endif
  96. #include "encodings_local.def"
  97. #undef DEFENCODING
  98. ei_for_broken_compilers_that_dont_like_trailing_commas
  99. };
  100. #include "flags.h"
  101. static struct encoding const all_encodings[] = {
  102. #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
  103. { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, ei_##xxx##_oflags },
  104. #include "encodings.def"
  105. #ifdef USE_AIX
  106. # include "encodings_aix.def"
  107. #endif
  108. #ifdef USE_OSF1
  109. # include "encodings_osf1.def"
  110. #endif
  111. #ifdef USE_DOS
  112. # include "encodings_dos.def"
  113. #endif
  114. #ifdef USE_EXTRA
  115. # include "encodings_extra.def"
  116. #endif
  117. #undef DEFENCODING
  118. #define DEFENCODING(xxx_names,xxx,xxx_ifuncs1,xxx_ifuncs2,xxx_ofuncs1,xxx_ofuncs2) \
  119. { xxx_ifuncs1,xxx_ifuncs2, xxx_ofuncs1,xxx_ofuncs2, 0 },
  120. #include "encodings_local.def"
  121. #undef DEFENCODING
  122. };
  123. #undef DEFALIAS
  124. /*
  125. * Conversion loops.
  126. */
  127. #include "loops.h"
  128. /*
  129. * Alias lookup function.
  130. * Defines
  131. * struct alias { int name; unsigned int encoding_index; };
  132. * const struct alias * aliases_lookup (const char *str, unsigned int len);
  133. * #define MAX_WORD_LENGTH ...
  134. */
  135. #if defined _AIX
  136. # include "aliases_sysaix.h"
  137. #elif defined hpux || defined __hpux
  138. # include "aliases_syshpux.h"
  139. #elif defined __osf__
  140. # include "aliases_sysosf1.h"
  141. #elif defined __sun
  142. # include "aliases_syssolaris.h"
  143. #else
  144. # include "aliases.h"
  145. #endif
  146. /*
  147. * System dependent alias lookup function.
  148. * Defines
  149. * const struct alias * aliases2_lookup (const char *str);
  150. */
  151. #if defined(USE_AIX) || defined(USE_OSF1) || defined(USE_DOS) || defined(USE_EXTRA) /* || ... */
  152. struct stringpool2_t {
  153. #define S(tag,name,encoding_index) char stringpool_##tag[sizeof(name)];
  154. #include "aliases2.h"
  155. #undef S
  156. };
  157. static const struct stringpool2_t stringpool2_contents = {
  158. #define S(tag,name,encoding_index) name,
  159. #include "aliases2.h"
  160. #undef S
  161. };
  162. #define stringpool2 ((const char *) &stringpool2_contents)
  163. static const struct alias sysdep_aliases[] = {
  164. #define S(tag,name,encoding_index) { (int)(long)&((struct stringpool2_t *)0)->stringpool_##tag, encoding_index },
  165. #include "aliases2.h"
  166. #undef S
  167. };
  168. #ifdef __GNUC__
  169. __inline
  170. #else
  171. #ifdef __cplusplus
  172. inline
  173. #endif
  174. #endif
  175. static const struct alias *
  176. aliases2_lookup (register const char *str)
  177. {
  178. const struct alias * ptr;
  179. unsigned int count;
  180. for (ptr = sysdep_aliases, count = sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0]); count > 0; ptr++, count--)
  181. if (!strcmp(str, stringpool2 + ptr->name))
  182. return ptr;
  183. return NULL;
  184. }
  185. #else
  186. #define aliases2_lookup(str) NULL
  187. #define stringpool2 NULL
  188. #endif
  189. #if 0
  190. /* Like !strcasecmp, except that the both strings can be assumed to be ASCII
  191. and the first string can be assumed to be in uppercase. */
  192. static int strequal (const char* str1, const char* str2)
  193. {
  194. unsigned char c1;
  195. unsigned char c2;
  196. for (;;) {
  197. c1 = * (unsigned char *) str1++;
  198. c2 = * (unsigned char *) str2++;
  199. if (c1 == 0)
  200. break;
  201. if (c2 >= 'a' && c2 <= 'z')
  202. c2 -= 'a'-'A';
  203. if (c1 != c2)
  204. break;
  205. }
  206. return (c1 == c2);
  207. }
  208. #endif
  209. iconv_t iconv_open (const char* tocode, const char* fromcode)
  210. {
  211. struct conv_struct * cd;
  212. unsigned int from_index;
  213. int from_wchar;
  214. unsigned int to_index;
  215. int to_wchar;
  216. int transliterate;
  217. int discard_ilseq;
  218. #include "iconv_open1.h"
  219. cd = (struct conv_struct *) malloc(from_wchar != to_wchar
  220. ? sizeof(struct wchar_conv_struct)
  221. : sizeof(struct conv_struct));
  222. if (cd == NULL) {
  223. errno = ENOMEM;
  224. return (iconv_t)(-1);
  225. }
  226. #include "iconv_open2.h"
  227. return (iconv_t)cd;
  228. invalid:
  229. errno = EINVAL;
  230. return (iconv_t)(-1);
  231. }
  232. size_t iconv (iconv_t icd,
  233. ICONV_CONST char* * inbuf, size_t *inbytesleft,
  234. char* * outbuf, size_t *outbytesleft)
  235. {
  236. conv_t cd = (conv_t) icd;
  237. if (inbuf == NULL || *inbuf == NULL)
  238. return cd->lfuncs.loop_reset(icd,outbuf,outbytesleft);
  239. else
  240. return cd->lfuncs.loop_convert(icd,
  241. (const char* *)inbuf,inbytesleft,
  242. outbuf,outbytesleft);
  243. }
  244. int iconv_close (iconv_t icd)
  245. {
  246. conv_t cd = (conv_t) icd;
  247. free(cd);
  248. return 0;
  249. }
  250. #ifndef LIBICONV_PLUG
  251. /*
  252. * Verify that a 'struct conv_struct' and a 'struct wchar_conv_struct' each
  253. * fit in an iconv_allocation_t.
  254. * If this verification fails, iconv_allocation_t must be made larger and
  255. * the major version in LIBICONV_VERSION_INFO must be bumped.
  256. * Currently 'struct conv_struct' has 21 integer/pointer fields, and
  257. * 'struct wchar_conv_struct' additionally has an 'mbstate_t' field.
  258. */
  259. typedef int verify_size_1[2 * (sizeof (struct conv_struct) <= sizeof (iconv_allocation_t)) - 1];
  260. typedef int verify_size_2[2 * (sizeof (struct wchar_conv_struct) <= sizeof (iconv_allocation_t)) - 1];
  261. int iconv_open_into (const char* tocode, const char* fromcode,
  262. iconv_allocation_t* resultp)
  263. {
  264. struct conv_struct * cd;
  265. unsigned int from_index;
  266. int from_wchar;
  267. unsigned int to_index;
  268. int to_wchar;
  269. int transliterate;
  270. int discard_ilseq;
  271. #include "iconv_open1.h"
  272. cd = (struct conv_struct *) resultp;
  273. #include "iconv_open2.h"
  274. return 0;
  275. invalid:
  276. errno = EINVAL;
  277. return -1;
  278. }
  279. int iconvctl (iconv_t icd, int request, void* argument)
  280. {
  281. conv_t cd = (conv_t) icd;
  282. switch (request) {
  283. case ICONV_TRIVIALP:
  284. *(int *)argument =
  285. ((cd->lfuncs.loop_convert == unicode_loop_convert
  286. && cd->iindex == cd->oindex)
  287. || cd->lfuncs.loop_convert == wchar_id_loop_convert
  288. ? 1 : 0);
  289. return 0;
  290. case ICONV_GET_TRANSLITERATE:
  291. *(int *)argument = cd->transliterate;
  292. return 0;
  293. case ICONV_SET_TRANSLITERATE:
  294. cd->transliterate = (*(const int *)argument ? 1 : 0);
  295. return 0;
  296. case ICONV_GET_DISCARD_ILSEQ:
  297. *(int *)argument = cd->discard_ilseq;
  298. return 0;
  299. case ICONV_SET_DISCARD_ILSEQ:
  300. cd->discard_ilseq = (*(const int *)argument ? 1 : 0);
  301. return 0;
  302. case ICONV_SET_HOOKS:
  303. if (argument != NULL) {
  304. cd->hooks = *(const struct iconv_hooks *)argument;
  305. } else {
  306. cd->hooks.uc_hook = NULL;
  307. cd->hooks.wc_hook = NULL;
  308. cd->hooks.data = NULL;
  309. }
  310. return 0;
  311. case ICONV_SET_FALLBACKS:
  312. if (argument != NULL) {
  313. cd->fallbacks = *(const struct iconv_fallbacks *)argument;
  314. } else {
  315. cd->fallbacks.mb_to_uc_fallback = NULL;
  316. cd->fallbacks.uc_to_mb_fallback = NULL;
  317. cd->fallbacks.mb_to_wc_fallback = NULL;
  318. cd->fallbacks.wc_to_mb_fallback = NULL;
  319. cd->fallbacks.data = NULL;
  320. }
  321. return 0;
  322. default:
  323. errno = EINVAL;
  324. return -1;
  325. }
  326. }
  327. /* An alias after its name has been converted from 'int' to 'const char*'. */
  328. struct nalias { const char* name; unsigned int encoding_index; };
  329. static int compare_by_index (const void * arg1, const void * arg2)
  330. {
  331. const struct nalias * alias1 = (const struct nalias *) arg1;
  332. const struct nalias * alias2 = (const struct nalias *) arg2;
  333. return (int)alias1->encoding_index - (int)alias2->encoding_index;
  334. }
  335. static int compare_by_name (const void * arg1, const void * arg2)
  336. {
  337. const char * name1 = *(const char **)arg1;
  338. const char * name2 = *(const char **)arg2;
  339. /* Compare alphabetically, but put "CS" names at the end. */
  340. int sign = strcmp(name1,name2);
  341. if (sign != 0) {
  342. sign = ((name1[0]=='C' && name1[1]=='S') - (name2[0]=='C' && name2[1]=='S'))
  343. * 4 + (sign >= 0 ? 1 : -1);
  344. }
  345. return sign;
  346. }
  347. void iconvlist (int (*do_one) (unsigned int namescount,
  348. const char * const * names,
  349. void* data),
  350. void* data)
  351. {
  352. #define aliascount1 sizeof(aliases)/sizeof(aliases[0])
  353. #ifndef aliases2_lookup
  354. #define aliascount2 sizeof(sysdep_aliases)/sizeof(sysdep_aliases[0])
  355. #else
  356. #define aliascount2 0
  357. #endif
  358. #define aliascount (aliascount1+aliascount2)
  359. struct nalias aliasbuf[aliascount];
  360. const char * namesbuf[aliascount];
  361. size_t num_aliases;
  362. {
  363. /* Put all existing aliases into a buffer. */
  364. size_t i;
  365. size_t j;
  366. j = 0;
  367. for (i = 0; i < aliascount1; i++) {
  368. const struct alias * p = &aliases[i];
  369. if (p->name >= 0
  370. && p->encoding_index != ei_local_char
  371. && p->encoding_index != ei_local_wchar_t) {
  372. aliasbuf[j].name = stringpool + p->name;
  373. aliasbuf[j].encoding_index = p->encoding_index;
  374. j++;
  375. }
  376. }
  377. #ifndef aliases2_lookup
  378. for (i = 0; i < aliascount2; i++) {
  379. aliasbuf[j].name = stringpool2 + sysdep_aliases[i].name;
  380. aliasbuf[j].encoding_index = sysdep_aliases[i].encoding_index;
  381. j++;
  382. }
  383. #endif
  384. num_aliases = j;
  385. }
  386. /* Sort by encoding_index. */
  387. if (num_aliases > 1)
  388. qsort(aliasbuf, num_aliases, sizeof(struct nalias), compare_by_index);
  389. {
  390. /* Process all aliases with the same encoding_index together. */
  391. size_t j;
  392. j = 0;
  393. while (j < num_aliases) {
  394. unsigned int ei = aliasbuf[j].encoding_index;
  395. size_t i = 0;
  396. do
  397. namesbuf[i++] = aliasbuf[j++].name;
  398. while (j < num_aliases && aliasbuf[j].encoding_index == ei);
  399. if (i > 1)
  400. qsort(namesbuf, i, sizeof(const char *), compare_by_name);
  401. /* Call the callback. */
  402. if (do_one(i,namesbuf,data))
  403. break;
  404. }
  405. }
  406. #undef aliascount
  407. #undef aliascount2
  408. #undef aliascount1
  409. }
  410. /*
  411. * Table of canonical names of encodings.
  412. * Instead of strings, it contains offsets into stringpool and stringpool2.
  413. */
  414. static const unsigned short all_canonical[] = {
  415. #if defined _AIX
  416. # include "canonical_sysaix.h"
  417. #elif defined hpux || defined __hpux
  418. # include "canonical_syshpux.h"
  419. #elif defined __osf__
  420. # include "canonical_sysosf1.h"
  421. #elif defined __sun
  422. # include "canonical_syssolaris.h"
  423. #else
  424. # include "canonical.h"
  425. #endif
  426. #ifdef USE_AIX
  427. # if defined _AIX
  428. # include "canonical_aix_sysaix.h"
  429. # else
  430. # include "canonical_aix.h"
  431. # endif
  432. #endif
  433. #ifdef USE_OSF1
  434. # if defined __osf__
  435. # include "canonical_osf1_sysosf1.h"
  436. # else
  437. # include "canonical_osf1.h"
  438. # endif
  439. #endif
  440. #ifdef USE_DOS
  441. # include "canonical_dos.h"
  442. #endif
  443. #ifdef USE_EXTRA
  444. # include "canonical_extra.h"
  445. #endif
  446. #if defined _AIX
  447. # include "canonical_local_sysaix.h"
  448. #elif defined hpux || defined __hpux
  449. # include "canonical_local_syshpux.h"
  450. #elif defined __osf__
  451. # include "canonical_local_sysosf1.h"
  452. #elif defined __sun
  453. # include "canonical_local_syssolaris.h"
  454. #else
  455. # include "canonical_local.h"
  456. #endif
  457. };
  458. const char * iconv_canonicalize (const char * name)
  459. {
  460. const char* code;
  461. char buf[MAX_WORD_LENGTH+10+1];
  462. const char* cp;
  463. char* bp;
  464. const struct alias * ap;
  465. unsigned int count;
  466. unsigned int index;
  467. const char* pool;
  468. /* Before calling aliases_lookup, convert the input string to upper case,
  469. * and check whether it's entirely ASCII (we call gperf with option "-7"
  470. * to achieve a smaller table) and non-empty. If it's not entirely ASCII,
  471. * or if it's too long, it is not a valid encoding name.
  472. */
  473. for (code = name;;) {
  474. /* Search code in the table. */
  475. for (cp = code, bp = buf, count = MAX_WORD_LENGTH+10+1; ; cp++, bp++) {
  476. unsigned char c = * (unsigned char *) cp;
  477. if (c >= 0x80)
  478. goto invalid;
  479. if (c >= 'a' && c <= 'z')
  480. c -= 'a'-'A';
  481. *bp = c;
  482. if (c == '\0')
  483. break;
  484. if (--count == 0)
  485. goto invalid;
  486. }
  487. for (;;) {
  488. if (bp-buf >= 10 && memcmp(bp-10,"//TRANSLIT",10)==0) {
  489. bp -= 10;
  490. *bp = '\0';
  491. continue;
  492. }
  493. if (bp-buf >= 8 && memcmp(bp-8,"//IGNORE",8)==0) {
  494. bp -= 8;
  495. *bp = '\0';
  496. continue;
  497. }
  498. break;
  499. }
  500. if (buf[0] == '\0') {
  501. code = locale_charset();
  502. /* Avoid an endless loop that could occur when using an older version
  503. of localcharset.c. */
  504. if (code[0] == '\0')
  505. goto invalid;
  506. continue;
  507. }
  508. pool = stringpool;
  509. ap = aliases_lookup(buf,bp-buf);
  510. if (ap == NULL) {
  511. pool = stringpool2;
  512. ap = aliases2_lookup(buf);
  513. if (ap == NULL)
  514. goto invalid;
  515. }
  516. if (ap->encoding_index == ei_local_char) {
  517. code = locale_charset();
  518. /* Avoid an endless loop that could occur when using an older version
  519. of localcharset.c. */
  520. if (code[0] == '\0')
  521. goto invalid;
  522. continue;
  523. }
  524. if (ap->encoding_index == ei_local_wchar_t) {
  525. /* On systems which define __STDC_ISO_10646__, wchar_t is Unicode.
  526. This is also the case on native Woe32 systems and Cygwin >= 1.7, where
  527. we know that it is UTF-16. */
  528. #if ((defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__) || (defined __CYGWIN__ && CYGWIN_VERSION_DLL_MAJOR >= 1007)
  529. if (sizeof(wchar_t) == 4) {
  530. index = ei_ucs4internal;
  531. break;
  532. }
  533. if (sizeof(wchar_t) == 2) {
  534. # if WORDS_LITTLEENDIAN
  535. index = ei_utf16le;
  536. # else
  537. index = ei_utf16be;
  538. # endif
  539. break;
  540. }
  541. #elif __STDC_ISO_10646__
  542. if (sizeof(wchar_t) == 4) {
  543. index = ei_ucs4internal;
  544. break;
  545. }
  546. if (sizeof(wchar_t) == 2) {
  547. index = ei_ucs2internal;
  548. break;
  549. }
  550. if (sizeof(wchar_t) == 1) {
  551. index = ei_iso8859_1;
  552. break;
  553. }
  554. #endif
  555. }
  556. index = ap->encoding_index;
  557. break;
  558. }
  559. return all_canonical[index] + pool;
  560. invalid:
  561. return name;
  562. }
  563. int _libiconv_version = _LIBICONV_VERSION;
  564. #if defined __FreeBSD__ && !defined __gnu_freebsd__
  565. /* GNU libiconv is the native FreeBSD iconv implementation since 2002.
  566. It wants to define the symbols 'iconv_open', 'iconv', 'iconv_close'. */
  567. #define strong_alias(name, aliasname) _strong_alias(name, aliasname)
  568. #define _strong_alias(name, aliasname) \
  569. extern __typeof (name) aliasname __attribute__ ((alias (#name)));
  570. #undef iconv_open
  571. #undef iconv
  572. #undef iconv_close
  573. strong_alias (libiconv_open, iconv_open)
  574. strong_alias (libiconv, iconv)
  575. strong_alias (libiconv_close, iconv_close)
  576. #endif
  577. #endif