c99.h 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. /*
  2. * Copyright (C) 1999-2002, 2016 Free Software Foundation, Inc.
  3. * This file is part of the GNU LIBICONV Library.
  4. *
  5. * The GNU LIBICONV Library is free software; you can redistribute it
  6. * and/or modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either version 2.1
  8. * of the License, or (at your option) any later version.
  9. *
  10. * The GNU LIBICONV Library is distributed in the hope that it will be
  11. * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
  17. * If not, see <https://www.gnu.org/licenses/>.
  18. */
  19. /*
  20. * C99
  21. * This is ASCII with \uXXXX and \UXXXXXXXX escape sequences, denoting Unicode
  22. * characters. See ISO/IEC 9899:1999, section 6.4.3.
  23. * The treatment of control characters in the range U+0080..U+009F is not
  24. * specified; we pass them through unmodified.
  25. */
  26. static int
  27. c99_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
  28. {
  29. unsigned char c;
  30. ucs4_t wc;
  31. int i;
  32. c = s[0];
  33. if (c < 0xa0) {
  34. if (c != '\\') {
  35. *pwc = c;
  36. return 1;
  37. }
  38. if (n < 2)
  39. return RET_TOOFEW(0);
  40. c = s[1];
  41. if (c == 'u') {
  42. wc = 0;
  43. for (i = 2; i < 6; i++) {
  44. if (n <= i)
  45. return RET_TOOFEW(0);
  46. c = s[i];
  47. if (c >= '0' && c <= '9')
  48. c -= '0';
  49. else if (c >= 'A' && c <= 'Z')
  50. c -= 'A'-10;
  51. else if (c >= 'a' && c <= 'z')
  52. c -= 'a'-10;
  53. else
  54. goto simply_backslash;
  55. wc |= (ucs4_t) c << (4 * (5-i));
  56. }
  57. if ((wc >= 0x00a0 && !(wc >= 0xd800 && wc < 0xe000))
  58. || wc == 0x0024 || wc == 0x0040 || wc == 0x0060) {
  59. *pwc = wc;
  60. return 6;
  61. }
  62. } else if (c == 'U') {
  63. wc = 0;
  64. for (i = 2; i < 10; i++) {
  65. if (n <= i)
  66. return RET_TOOFEW(0);
  67. c = s[i];
  68. if (c >= '0' && c <= '9')
  69. c -= '0';
  70. else if (c >= 'A' && c <= 'Z')
  71. c -= 'A'-10;
  72. else if (c >= 'a' && c <= 'z')
  73. c -= 'a'-10;
  74. else
  75. goto simply_backslash;
  76. wc |= (ucs4_t) c << (4 * (9-i));
  77. }
  78. if ((wc >= 0x00a0 && !(wc >= 0xd800 && wc < 0xe000))
  79. || wc == 0x0024 || wc == 0x0040 || wc == 0x0060) {
  80. *pwc = wc;
  81. return 10;
  82. }
  83. } else
  84. goto simply_backslash;
  85. }
  86. return RET_ILSEQ;
  87. simply_backslash:
  88. *pwc = '\\';
  89. return 1;
  90. }
  91. static int
  92. c99_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
  93. {
  94. if (wc < 0xa0) {
  95. *r = wc;
  96. return 1;
  97. } else {
  98. int result;
  99. unsigned char u;
  100. if (wc < 0x10000) {
  101. result = 6;
  102. u = 'u';
  103. } else {
  104. result = 10;
  105. u = 'U';
  106. }
  107. if (n >= result) {
  108. int count;
  109. r[0] = '\\';
  110. r[1] = u;
  111. r += 2;
  112. for (count = result-3; count >= 0; count--) {
  113. unsigned int i = (wc >> (4*count)) & 0x0f;
  114. *r++ = (i < 10 ? '0'+i : 'a'-10+i);
  115. }
  116. return result;
  117. } else
  118. return RET_TOOSMALL;
  119. }
  120. }