iso2022_cnext.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. /*
  2. * Copyright (C) 1999-2001, 2008, 2016 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either version 2.1
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, see <https://www.gnu.org/licenses/>.
  18. */
  19. /*
  20. * ISO-2022-CN-EXT
  21. */
  22. /* Specification: RFC 1922 */
  23. #define ESC 0x1b
  24. #define SO 0x0e
  25. #define SI 0x0f
  26. /*
  27. * The state is composed of one of the following values
  28. */
  29. #define STATE_ASCII 0
  30. #define STATE_TWOBYTE 1
  31. /*
  32. * and one of the following values, << 8
  33. */
  34. #define STATE2_NONE 0
  35. #define STATE2_DESIGNATED_GB2312 1
  36. #define STATE2_DESIGNATED_CNS11643_1 2
  37. #define STATE2_DESIGNATED_ISO_IR_165 3
  38. /*
  39. * and one of the following values, << 16
  40. */
  41. #define STATE3_NONE 0
  42. #define STATE3_DESIGNATED_CNS11643_2 1
  43. /*
  44. * and one of the following values, << 24
  45. */
  46. #define STATE4_NONE 0
  47. #define STATE4_DESIGNATED_CNS11643_3 1
  48. #define STATE4_DESIGNATED_CNS11643_4 2
  49. #define STATE4_DESIGNATED_CNS11643_5 3
  50. #define STATE4_DESIGNATED_CNS11643_6 4
  51. #define STATE4_DESIGNATED_CNS11643_7 5
  52. #define SPLIT_STATE \
  53. unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
  54. #define COMBINE_STATE \
  55. state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
  56. static int
  57. iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
  58. {
  59. state_t state = conv->istate;
  60. SPLIT_STATE;
  61. int count = 0;
  62. unsigned char c;
  63. for (;;) {
  64. c = *s;
  65. if (c == ESC) {
  66. if (n < count+4)
  67. goto none;
  68. if (s[1] == '$') {
  69. if (s[2] == ')') {
  70. if (s[3] == 'A') {
  71. state2 = STATE2_DESIGNATED_GB2312;
  72. s += 4; count += 4;
  73. if (n < count+1)
  74. goto none;
  75. continue;
  76. }
  77. if (s[3] == 'G') {
  78. state2 = STATE2_DESIGNATED_CNS11643_1;
  79. s += 4; count += 4;
  80. if (n < count+1)
  81. goto none;
  82. continue;
  83. }
  84. if (s[3] == 'E') {
  85. state2 = STATE2_DESIGNATED_ISO_IR_165;
  86. s += 4; count += 4;
  87. if (n < count+1)
  88. goto none;
  89. continue;
  90. }
  91. }
  92. if (s[2] == '*') {
  93. if (s[3] == 'H') {
  94. state3 = STATE3_DESIGNATED_CNS11643_2;
  95. s += 4; count += 4;
  96. if (n < count+1)
  97. goto none;
  98. continue;
  99. }
  100. }
  101. if (s[2] == '+') {
  102. if (s[3] == 'I') {
  103. state4 = STATE4_DESIGNATED_CNS11643_3;
  104. s += 4; count += 4;
  105. if (n < count+1)
  106. goto none;
  107. continue;
  108. }
  109. if (s[3] == 'J') {
  110. state4 = STATE4_DESIGNATED_CNS11643_4;
  111. s += 4; count += 4;
  112. if (n < count+1)
  113. goto none;
  114. continue;
  115. }
  116. if (s[3] == 'K') {
  117. state4 = STATE4_DESIGNATED_CNS11643_5;
  118. s += 4; count += 4;
  119. if (n < count+1)
  120. goto none;
  121. continue;
  122. }
  123. if (s[3] == 'L') {
  124. state4 = STATE4_DESIGNATED_CNS11643_6;
  125. s += 4; count += 4;
  126. if (n < count+1)
  127. goto none;
  128. continue;
  129. }
  130. if (s[3] == 'M') {
  131. state4 = STATE4_DESIGNATED_CNS11643_7;
  132. s += 4; count += 4;
  133. if (n < count+1)
  134. goto none;
  135. continue;
  136. }
  137. }
  138. }
  139. if (s[1] == 'N') {
  140. switch (state3) {
  141. case STATE3_NONE:
  142. goto ilseq;
  143. case STATE3_DESIGNATED_CNS11643_2:
  144. if (s[2] < 0x80 && s[3] < 0x80) {
  145. int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
  146. if (ret == RET_ILSEQ)
  147. goto ilseq;
  148. if (ret != 2) abort();
  149. COMBINE_STATE;
  150. conv->istate = state;
  151. return count+4;
  152. } else
  153. goto ilseq;
  154. default: abort();
  155. }
  156. }
  157. if (s[1] == 'O') {
  158. switch (state4) {
  159. case STATE4_NONE:
  160. goto ilseq;
  161. case STATE4_DESIGNATED_CNS11643_3:
  162. if (s[2] < 0x80 && s[3] < 0x80) {
  163. int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
  164. if (ret == RET_ILSEQ)
  165. goto ilseq;
  166. if (ret != 2) abort();
  167. COMBINE_STATE;
  168. conv->istate = state;
  169. return count+4;
  170. } else
  171. goto ilseq;
  172. case STATE4_DESIGNATED_CNS11643_4:
  173. if (s[2] < 0x80 && s[3] < 0x80) {
  174. int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
  175. if (ret == RET_ILSEQ)
  176. goto ilseq;
  177. if (ret != 2) abort();
  178. COMBINE_STATE;
  179. conv->istate = state;
  180. return count+4;
  181. } else
  182. goto ilseq;
  183. case STATE4_DESIGNATED_CNS11643_5:
  184. if (s[2] < 0x80 && s[3] < 0x80) {
  185. int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
  186. if (ret == RET_ILSEQ)
  187. goto ilseq;
  188. if (ret != 2) abort();
  189. COMBINE_STATE;
  190. conv->istate = state;
  191. return count+4;
  192. } else
  193. goto ilseq;
  194. case STATE4_DESIGNATED_CNS11643_6:
  195. if (s[2] < 0x80 && s[3] < 0x80) {
  196. int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
  197. if (ret == RET_ILSEQ)
  198. goto ilseq;
  199. if (ret != 2) abort();
  200. COMBINE_STATE;
  201. conv->istate = state;
  202. return count+4;
  203. } else
  204. goto ilseq;
  205. case STATE4_DESIGNATED_CNS11643_7:
  206. if (s[2] < 0x80 && s[3] < 0x80) {
  207. int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
  208. if (ret == RET_ILSEQ)
  209. goto ilseq;
  210. if (ret != 2) abort();
  211. COMBINE_STATE;
  212. conv->istate = state;
  213. return count+4;
  214. } else
  215. goto ilseq;
  216. default: abort();
  217. }
  218. }
  219. goto ilseq;
  220. }
  221. if (c == SO) {
  222. if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
  223. goto ilseq;
  224. state1 = STATE_TWOBYTE;
  225. s++; count++;
  226. if (n < count+1)
  227. goto none;
  228. continue;
  229. }
  230. if (c == SI) {
  231. state1 = STATE_ASCII;
  232. s++; count++;
  233. if (n < count+1)
  234. goto none;
  235. continue;
  236. }
  237. break;
  238. }
  239. switch (state1) {
  240. case STATE_ASCII:
  241. if (c < 0x80) {
  242. int ret = ascii_mbtowc(conv,pwc,s,1);
  243. if (ret == RET_ILSEQ)
  244. goto ilseq;
  245. if (ret != 1) abort();
  246. if (*pwc == 0x000a || *pwc == 0x000d) {
  247. state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
  248. }
  249. COMBINE_STATE;
  250. conv->istate = state;
  251. return count+1;
  252. } else
  253. goto ilseq;
  254. case STATE_TWOBYTE:
  255. if (n < count+2)
  256. goto none;
  257. if (s[0] < 0x80 && s[1] < 0x80) {
  258. int ret;
  259. switch (state2) {
  260. case STATE2_NONE:
  261. goto ilseq;
  262. case STATE2_DESIGNATED_GB2312:
  263. ret = gb2312_mbtowc(conv,pwc,s,2); break;
  264. case STATE2_DESIGNATED_CNS11643_1:
  265. ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
  266. case STATE2_DESIGNATED_ISO_IR_165:
  267. ret = isoir165_mbtowc(conv,pwc,s,2); break;
  268. default: abort();
  269. }
  270. if (ret == RET_ILSEQ)
  271. goto ilseq;
  272. if (ret != 2) abort();
  273. COMBINE_STATE;
  274. conv->istate = state;
  275. return count+2;
  276. } else
  277. goto ilseq;
  278. default: abort();
  279. }
  280. none:
  281. COMBINE_STATE;
  282. conv->istate = state;
  283. return RET_TOOFEW(count);
  284. ilseq:
  285. COMBINE_STATE;
  286. conv->istate = state;
  287. return RET_SHIFT_ILSEQ(count);
  288. }
  289. static int
  290. iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
  291. {
  292. state_t state = conv->ostate;
  293. SPLIT_STATE;
  294. unsigned char buf[3];
  295. int ret;
  296. /* There is no need to handle Unicode 3.1 tag characters and to look for
  297. "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
  298. /* Try ASCII. */
  299. ret = ascii_wctomb(conv,buf,wc,1);
  300. if (ret != RET_ILUNI) {
  301. if (ret != 1) abort();
  302. if (buf[0] < 0x80) {
  303. int count = (state1 == STATE_ASCII ? 1 : 2);
  304. if (n < count)
  305. return RET_TOOSMALL;
  306. if (state1 != STATE_ASCII) {
  307. r[0] = SI;
  308. r += 1;
  309. state1 = STATE_ASCII;
  310. }
  311. r[0] = buf[0];
  312. if (wc == 0x000a || wc == 0x000d) {
  313. state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
  314. }
  315. COMBINE_STATE;
  316. conv->ostate = state;
  317. return count;
  318. }
  319. }
  320. /* Try GB 2312-1980. */
  321. ret = gb2312_wctomb(conv,buf,wc,2);
  322. if (ret != RET_ILUNI) {
  323. if (ret != 2) abort();
  324. if (buf[0] < 0x80 && buf[1] < 0x80) {
  325. int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  326. if (n < count)
  327. return RET_TOOSMALL;
  328. if (state2 != STATE2_DESIGNATED_GB2312) {
  329. r[0] = ESC;
  330. r[1] = '$';
  331. r[2] = ')';
  332. r[3] = 'A';
  333. r += 4;
  334. state2 = STATE2_DESIGNATED_GB2312;
  335. }
  336. if (state1 != STATE_TWOBYTE) {
  337. r[0] = SO;
  338. r += 1;
  339. state1 = STATE_TWOBYTE;
  340. }
  341. r[0] = buf[0];
  342. r[1] = buf[1];
  343. COMBINE_STATE;
  344. conv->ostate = state;
  345. return count;
  346. }
  347. }
  348. ret = cns11643_wctomb(conv,buf,wc,3);
  349. if (ret != RET_ILUNI) {
  350. if (ret != 3) abort();
  351. /* Try CNS 11643-1992 Plane 1. */
  352. if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
  353. int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  354. if (n < count)
  355. return RET_TOOSMALL;
  356. if (state2 != STATE2_DESIGNATED_CNS11643_1) {
  357. r[0] = ESC;
  358. r[1] = '$';
  359. r[2] = ')';
  360. r[3] = 'G';
  361. r += 4;
  362. state2 = STATE2_DESIGNATED_CNS11643_1;
  363. }
  364. if (state1 != STATE_TWOBYTE) {
  365. r[0] = SO;
  366. r += 1;
  367. state1 = STATE_TWOBYTE;
  368. }
  369. r[0] = buf[1];
  370. r[1] = buf[2];
  371. COMBINE_STATE;
  372. conv->ostate = state;
  373. return count;
  374. }
  375. /* Try CNS 11643-1992 Plane 2. */
  376. if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
  377. int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
  378. if (n < count)
  379. return RET_TOOSMALL;
  380. if (state3 != STATE3_DESIGNATED_CNS11643_2) {
  381. r[0] = ESC;
  382. r[1] = '$';
  383. r[2] = '*';
  384. r[3] = 'H';
  385. r += 4;
  386. state3 = STATE3_DESIGNATED_CNS11643_2;
  387. }
  388. r[0] = ESC;
  389. r[1] = 'N';
  390. r[2] = buf[1];
  391. r[3] = buf[2];
  392. COMBINE_STATE;
  393. conv->ostate = state;
  394. return count;
  395. }
  396. /* Try CNS 11643-1992 Plane 3. */
  397. if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
  398. int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
  399. if (n < count)
  400. return RET_TOOSMALL;
  401. if (state4 != STATE4_DESIGNATED_CNS11643_3) {
  402. r[0] = ESC;
  403. r[1] = '$';
  404. r[2] = '+';
  405. r[3] = 'I';
  406. r += 4;
  407. state4 = STATE4_DESIGNATED_CNS11643_3;
  408. }
  409. r[0] = ESC;
  410. r[1] = 'O';
  411. r[2] = buf[1];
  412. r[3] = buf[2];
  413. COMBINE_STATE;
  414. conv->ostate = state;
  415. return count;
  416. }
  417. /* Try CNS 11643-1992 Plane 4. */
  418. if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
  419. int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
  420. if (n < count)
  421. return RET_TOOSMALL;
  422. if (state4 != STATE4_DESIGNATED_CNS11643_4) {
  423. r[0] = ESC;
  424. r[1] = '$';
  425. r[2] = '+';
  426. r[3] = 'J';
  427. r += 4;
  428. state4 = STATE4_DESIGNATED_CNS11643_4;
  429. }
  430. r[0] = ESC;
  431. r[1] = 'O';
  432. r[2] = buf[1];
  433. r[3] = buf[2];
  434. COMBINE_STATE;
  435. conv->ostate = state;
  436. return count;
  437. }
  438. /* Try CNS 11643-1992 Plane 5. */
  439. if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
  440. int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
  441. if (n < count)
  442. return RET_TOOSMALL;
  443. if (state4 != STATE4_DESIGNATED_CNS11643_5) {
  444. r[0] = ESC;
  445. r[1] = '$';
  446. r[2] = '+';
  447. r[3] = 'K';
  448. r += 4;
  449. state4 = STATE4_DESIGNATED_CNS11643_5;
  450. }
  451. r[0] = ESC;
  452. r[1] = 'O';
  453. r[2] = buf[1];
  454. r[3] = buf[2];
  455. COMBINE_STATE;
  456. conv->ostate = state;
  457. return count;
  458. }
  459. /* Try CNS 11643-1992 Plane 6. */
  460. if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
  461. int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
  462. if (n < count)
  463. return RET_TOOSMALL;
  464. if (state4 != STATE4_DESIGNATED_CNS11643_6) {
  465. r[0] = ESC;
  466. r[1] = '$';
  467. r[2] = '+';
  468. r[3] = 'L';
  469. r += 4;
  470. state4 = STATE4_DESIGNATED_CNS11643_6;
  471. }
  472. r[0] = ESC;
  473. r[1] = 'O';
  474. r[2] = buf[1];
  475. r[3] = buf[2];
  476. COMBINE_STATE;
  477. conv->ostate = state;
  478. return count;
  479. }
  480. /* Try CNS 11643-1992 Plane 7. */
  481. if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
  482. int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
  483. if (n < count)
  484. return RET_TOOSMALL;
  485. if (state4 != STATE4_DESIGNATED_CNS11643_7) {
  486. r[0] = ESC;
  487. r[1] = '$';
  488. r[2] = '+';
  489. r[3] = 'M';
  490. r += 4;
  491. state4 = STATE4_DESIGNATED_CNS11643_7;
  492. }
  493. r[0] = ESC;
  494. r[1] = 'O';
  495. r[2] = buf[1];
  496. r[3] = buf[2];
  497. COMBINE_STATE;
  498. conv->ostate = state;
  499. return count;
  500. }
  501. }
  502. /* Try ISO-IR-165. */
  503. ret = isoir165_wctomb(conv,buf,wc,2);
  504. if (ret != RET_ILUNI) {
  505. if (ret != 2) abort();
  506. if (buf[0] < 0x80 && buf[1] < 0x80) {
  507. int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
  508. if (n < count)
  509. return RET_TOOSMALL;
  510. if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
  511. r[0] = ESC;
  512. r[1] = '$';
  513. r[2] = ')';
  514. r[3] = 'E';
  515. r += 4;
  516. state2 = STATE2_DESIGNATED_ISO_IR_165;
  517. }
  518. if (state1 != STATE_TWOBYTE) {
  519. r[0] = SO;
  520. r += 1;
  521. state1 = STATE_TWOBYTE;
  522. }
  523. r[0] = buf[0];
  524. r[1] = buf[1];
  525. COMBINE_STATE;
  526. conv->ostate = state;
  527. return count;
  528. }
  529. }
  530. return RET_ILUNI;
  531. }
  532. static int
  533. iso2022_cn_ext_reset (conv_t conv, unsigned char *r, size_t n)
  534. {
  535. state_t state = conv->ostate;
  536. SPLIT_STATE;
  537. (void)state2;
  538. (void)state3;
  539. (void)state4;
  540. if (state1 != STATE_ASCII) {
  541. if (n < 1)
  542. return RET_TOOSMALL;
  543. r[0] = SI;
  544. /* conv->ostate = 0; will be done by the caller */
  545. return 1;
  546. } else
  547. return 0;
  548. }
  549. #undef COMBINE_STATE
  550. #undef SPLIT_STATE
  551. #undef STATE4_DESIGNATED_CNS11643_7
  552. #undef STATE4_DESIGNATED_CNS11643_6
  553. #undef STATE4_DESIGNATED_CNS11643_5
  554. #undef STATE4_DESIGNATED_CNS11643_4
  555. #undef STATE4_DESIGNATED_CNS11643_3
  556. #undef STATE4_NONE
  557. #undef STATE3_DESIGNATED_CNS11643_2
  558. #undef STATE3_NONE
  559. #undef STATE2_DESIGNATED_ISO_IR_165
  560. #undef STATE2_DESIGNATED_CNS11643_1
  561. #undef STATE2_DESIGNATED_GB2312
  562. #undef STATE2_NONE
  563. #undef STATE_TWOBYTE
  564. #undef STATE_ASCII