dnscrypto-proxy: Update to release 1.3.0
[tomato.git] / release / src / router / httpd / utf8.c
bloba9fa207130c57ff70e6eee6cc47e0ad163bca1ac
1 /*
3 Tomato Firmware
5 */
7 #include "tomato.h"
9 #include <ctype.h>
10 #include <sys/types.h>
12 // Returns the amount of 16-bit elements in UTF-16LE needed
13 // (without the terminating null) to store given UTF-8 string
14 static int utf8_to_utf16_size(const char *s)
16 int ret = -1;
17 unsigned int byte;
18 size_t count = 0;
20 while ((byte = *((const unsigned char *)s++))) {
21 ++count;
22 if (byte >= 0xc0) {
23 if (byte >= 0xF5) {
24 errno = EILSEQ;
25 goto out;
27 if (!*s)
28 break;
29 if (byte >= 0xC0)
30 s++;
31 if (!*s)
32 break;
33 if (byte >= 0xE0)
34 s++;
35 if (!*s)
36 break;
37 if (byte >= 0xF0) {
38 s++;
39 ++count;
43 ret = count;
44 out:
45 return ret;
48 // Converts one UTF-8 sequence to cpu-endian Unicode value
49 static int utf8_to_unicode(uint32_t *wc, const char *s)
51 unsigned int byte = *((const unsigned char *)s);
53 /* single byte */
54 if (byte == 0) {
55 *wc = (uint32_t) 0;
56 return 0;
57 } else if (byte < 0x80) {
58 *wc = (uint32_t) byte;
59 return 1;
60 /* double byte */
61 } else if (byte < 0xc2) {
62 goto fail;
63 } else if (byte < 0xE0) {
64 if ((s[1] & 0xC0) == 0x80) {
65 *wc = ((uint32_t)(byte & 0x1F) << 6)
66 | ((uint32_t)(s[1] & 0x3F));
67 return 2;
68 } else
69 goto fail;
70 /* three-byte */
71 } else if (byte < 0xF0) {
72 if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) {
73 *wc = ((uint32_t)(byte & 0x0F) << 12)
74 | ((uint32_t)(s[1] & 0x3F) << 6)
75 | ((uint32_t)(s[2] & 0x3F));
76 /* Check valid ranges */
77 if (((*wc >= 0x800) && (*wc <= 0xD7FF))
78 || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
79 return 3;
81 goto fail;
82 /* four-byte */
83 } else if (byte < 0xF5) {
84 if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)
85 && ((s[3] & 0xC0) == 0x80)) {
86 *wc = ((uint32_t)(byte & 0x07) << 18)
87 | ((uint32_t)(s[1] & 0x3F) << 12)
88 | ((uint32_t)(s[2] & 0x3F) << 6)
89 | ((uint32_t)(s[3] & 0x3F));
90 /* Check valid ranges */
91 if ((*wc <= 0x10ffff) && (*wc >= 0x10000))
92 return 4;
94 goto fail;
96 fail:
97 errno = EILSEQ;
98 return -1;
101 #if 0
102 // Converts a UTF-8 string to a UTF-16LE string (little endian version)
103 // Returns length of output buffer in utf16 characters
104 int utf8_to_utf16_string(const char *ins, uint16_t **outs)
106 const char *t = ins;
107 uint32_t wc;
108 int allocated;
109 uint16_t *outpos;
110 int shorts, ret = -1;
112 shorts = utf8_to_utf16_size(ins);
113 if (shorts < 0)
114 goto fail;
116 allocated = 0;
117 if (!*outs) {
118 *outs = malloc((shorts + 1) * sizeof(uint16_t));
119 if (!*outs)
120 goto fail;
121 allocated = 1;
123 outpos = *outs;
125 while(1) {
126 int m = utf8_to_unicode(&wc, t);
127 if (m <= 0) {
128 if (m < 0) {
129 /* do not leave space allocated if failed */
130 if (allocated) {
131 free(*outs);
132 *outs = (uint16_t *)NULL;
134 goto fail;
136 *outpos++ = (uint16_t)0;
137 break;
139 if (wc < 0x10000)
140 *outpos++ = (uint16_t)wc;
141 else {
142 wc -= 0x10000;
143 *outpos++ = (uint16_t)((wc >> 10) + 0xd800);
144 *outpos++ = (uint16_t)((wc & 0x3ff) + 0xdc00);
146 t += m;
149 ret = --outpos - *outs;
151 fail:
152 return ret;
154 #endif
156 static inline char *js_utf16_char(const uint16_t u, char *b)
158 unsigned char c = (unsigned char)u;
160 if (u <= 0xFF) {
161 if ((c == '"') || (c == '\'') || (c == '\\') || (!isprint(c)))
162 b += sprintf(b, "\\x%02x", c);
163 else
164 *b++ = c;
166 else
167 b += sprintf(b, "\\u%04x", u);
169 return b;
172 static inline char *html_utf16_char(const uint16_t u, char *b)
174 unsigned char c = (unsigned char)u;
176 if ((u > 0xFF) || (c == '&') || (c == '<') || (c == '>') || (c == '"') || (c == '\'') || (!isprint(c)))
177 b += sprintf(b, "&#%d;", u);
178 else
179 *b++ = c;
181 return b;
184 typedef char *(*utf16_conv)(const uint16_t u, char *b);
186 static char *utf8_to_string(const char *ins, int csize, utf16_conv conv)
188 const char *t = ins;
189 uint32_t wc;
190 char *outpos, *outs = NULL;
191 int shorts;
193 shorts = utf8_to_utf16_size(ins);
194 if (shorts <= 0)
195 return NULL;
197 outpos = outs = malloc((shorts + 1) * csize + 1);
198 if (!outs)
199 return NULL;
201 while(1) {
202 int m = utf8_to_unicode(&wc, t);
203 if (m <= 0) {
204 if (m < 0) {
205 /* do not leave space allocated if failed */
206 free(outs);
207 return NULL;
209 break;
212 if (wc < 0x10000) {
213 outpos = conv((uint16_t)wc, outpos);
215 else {
216 wc -= 0x10000;
217 outpos = conv((uint16_t)((wc >> 10) + 0xd800), outpos);
218 outpos = conv((uint16_t)((wc & 0x3ff) + 0xdc00), outpos);
220 t += m;
223 *outpos = '\0';
224 return outs;
227 char *utf8_to_js_string(const char *ins)
229 return utf8_to_string(ins, 6, js_utf16_char);
232 char *utf8_to_html_string(const char *ins)
234 return utf8_to_string(ins, 8, html_utf16_char);