usbmodeswitch: Updated to v.1.2.6 from shibby's branch.
[tomato.git] / release / src / router / usbmodeswitch / jim / utf8.c
bloba81b3deef02af6c09f91d32a35f768daed31f2fb
1 /**
2 * UTF-8 utility functions
4 * (c) 2010 Steve Bennett <steveb@workware.net.au>
6 * See LICENCE for licence details.
7 */
9 #include <ctype.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <assert.h>
14 #include "utf8.h"
16 /* This one is always implemented */
17 int utf8_fromunicode(char *p, unsigned short uc)
19 if (uc <= 0x7f) {
20 *p = uc;
21 return 1;
23 else if (uc <= 0x7ff) {
24 *p++ = 0xc0 | ((uc & 0x7c0) >> 6);
25 *p = 0x80 | (uc & 0x3f);
26 return 2;
28 else {
29 *p++ = 0xe0 | ((uc & 0xf000) >> 12);
30 *p++ = 0x80 | ((uc & 0xfc0) >> 6);
31 *p = 0x80 | (uc & 0x3f);
32 return 3;
36 #if defined(JIM_UTF8) && !defined(JIM_BOOTSTRAP)
37 int utf8_charlen(int c)
39 if ((c & 0x80) == 0) {
40 return 1;
42 if ((c & 0xe0) == 0xc0) {
43 return 2;
45 if ((c & 0xf0) == 0xe0) {
46 return 3;
48 if ((c & 0xf8) == 0xf0) {
49 return 4;
51 /* Invalid sequence */
52 return -1;
55 int utf8_strlen(const char *str, int bytelen)
57 int charlen = 0;
58 if (bytelen < 0) {
59 bytelen = strlen(str);
61 while (bytelen) {
62 int c;
63 int l = utf8_tounicode(str, &c);
64 charlen++;
65 str += l;
66 bytelen -= l;
68 return charlen;
71 int utf8_index(const char *str, int index)
73 const char *s = str;
74 while (index--) {
75 int c;
76 s += utf8_tounicode(s, &c);
78 return s - str;
81 int utf8_charequal(const char *s1, const char *s2)
83 int c1, c2;
85 utf8_tounicode(s1, &c1);
86 utf8_tounicode(s2, &c2);
88 return c1 == c2;
91 int utf8_prev_len(const char *str, int len)
93 int n = 1;
95 assert(len > 0);
97 /* Look up to len chars backward for a start-of-char byte */
98 while (--len) {
99 if ((str[-n] & 0x80) == 0) {
100 /* Start of a 1-byte char */
101 break;
103 if ((str[-n] & 0xc0) == 0xc0) {
104 /* Start of a multi-byte char */
105 break;
107 n++;
109 return n;
112 int utf8_tounicode(const char *str, int *uc)
114 unsigned const char *s = (unsigned const char *)str;
116 if (s[0] < 0xc0) {
117 *uc = s[0];
118 return 1;
120 if (s[0] < 0xe0) {
121 if ((s[1] & 0xc0) == 0x80) {
122 *uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80);
123 return 2;
126 else if (s[0] < 0xf0) {
127 if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) {
128 *uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80);
129 return 3;
133 /* Invalid sequence, so just return the byte */
134 *uc = *s;
135 return 1;
138 struct casemap {
139 unsigned short code; /* code point */
140 signed char lowerdelta; /* add for lowercase, or if -128 use the ext table */
141 signed char upperdelta; /* add for uppercase, or offset into the ext table */
144 /* Extended table for codepoints where |delta| > 127 */
145 struct caseextmap {
146 unsigned short lower;
147 unsigned short upper;
150 /* Generated mapping tables */
151 #include "_unicode_mapping.c"
153 #define NUMCASEMAP sizeof(unicode_case_mapping) / sizeof(*unicode_case_mapping)
155 static int cmp_casemap(const void *key, const void *cm)
157 return *(int *)key - (int)((const struct casemap *)cm)->code;
160 static int utf8_map_case(int uc, int upper)
162 const struct casemap *cm = bsearch(&uc, unicode_case_mapping, NUMCASEMAP, sizeof(*unicode_case_mapping), cmp_casemap);
164 if (cm) {
165 if (cm->lowerdelta == -128) {
166 uc = upper ? unicode_extmap[cm->upperdelta].upper : unicode_extmap[cm->upperdelta].lower;
168 else {
169 uc += upper ? cm->upperdelta : cm->lowerdelta;
172 return uc;
175 int utf8_upper(int uc)
177 if (isascii(uc)) {
178 return toupper(uc);
180 return utf8_map_case(uc, 1);
183 int utf8_lower(int uc)
185 if (isascii(uc)) {
186 return tolower(uc);
189 return utf8_map_case(uc, 0);
192 #endif /* JIM_BOOTSTRAP */