release/src/router/usbmodeswitch/jim/utf8.c

   1 /**
   2  * UTF-8 utility functions
   3  *
   4  * (c) 2010 Steve Bennett <steveb@workware.net.au>
   5  *
   6  * See LICENCE for licence details.
   7  */
   8
   9 #include <ctype.h>
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <assert.h>
  14 #include "utf8.h"
  15
  16 /* This one is always implemented */
  17 int utf8_fromunicode(char *p, unsigned short uc)
  18 {
  19     if (uc <= 0x7f) {
  20         *p = uc;
  21         return 1;
  22     }
  23     else if (uc <= 0x7ff) {
  24         *p++ = 0xc0 | ((uc & 0x7c0) >> 6);
  25         *p = 0x80 | (uc & 0x3f);
  26         return 2;
  27     }
  28     else {
  29         *p++ = 0xe0 | ((uc & 0xf000) >> 12);
  30         *p++ = 0x80 | ((uc & 0xfc0) >> 6);
  31         *p = 0x80 | (uc & 0x3f);
  32         return 3;
  33     }
  34 }
  35
  36 #if defined(JIM_UTF8) && !defined(JIM_BOOTSTRAP)
  37 int utf8_charlen(int c)
  38 {
  39     if ((c & 0x80) == 0) {
  40         return 1;
  41     }
  42     if ((c & 0xe0) == 0xc0) {
  43         return 2;
  44     }
  45     if ((c & 0xf0) == 0xe0) {
  46         return 3;
  47     }
  48     if ((c & 0xf8) == 0xf0) {
  49         return 4;
  50     }
  51     /* Invalid sequence */
  52     return -1;
  53 }
  54
  55 int utf8_strlen(const char *str, int bytelen)
  56 {
  57     int charlen = 0;
  58     if (bytelen < 0) {
  59         bytelen = strlen(str);
  60     }
  61     while (bytelen) {
  62         int c;
  63         int l = utf8_tounicode(str, &c);
  64         charlen++;
  65         str += l;
  66         bytelen -= l;
  67     }
  68     return charlen;
  69 }
  70
  71 int utf8_index(const char *str, int index)
  72 {
  73     const char *s = str;
  74     while (index--) {
  75         int c;
  76         s += utf8_tounicode(s, &c);
  77     }
  78     return s - str;
  79 }
  80
  81 int utf8_charequal(const char *s1, const char *s2)
  82 {
  83     int c1, c2;
  84
  85     utf8_tounicode(s1, &c1);
  86     utf8_tounicode(s2, &c2);
  87
  88     return c1 == c2;
  89 }
  90
  91 int utf8_prev_len(const char *str, int len)
  92 {
  93     int n = 1;
  94
  95     assert(len > 0);
  96
  97     /* Look up to len chars backward for a start-of-char byte */
  98     while (--len) {
  99         if ((str[-n] & 0x80) == 0) {
 100             /* Start of a 1-byte char */
 101             break;
 102         }
 103         if ((str[-n] & 0xc0) == 0xc0) {
 104             /* Start of a multi-byte char */
 105             break;
 106         }
 107         n++;
 108     }
 109     return n;
 110 }
 111
 112 int utf8_tounicode(const char *str, int *uc)
 113 {
 114     unsigned const char *s = (unsigned const char *)str;
 115
 116     if (s[0] < 0xc0) {
 117         *uc = s[0];
 118         return 1;
 119     }
 120     if (s[0] < 0xe0) {
 121         if ((s[1] & 0xc0) == 0x80) {
 122             *uc = ((s[0] & ~0xc0) << 6) | (s[1] & ~0x80);
 123             return 2;
 124         }
 125     }
 126     else if (s[0] < 0xf0) {
 127         if (((str[1] & 0xc0) == 0x80) && ((str[2] & 0xc0) == 0x80)) {
 128             *uc = ((s[0] & ~0xe0) << 12) | ((s[1] & ~0x80) << 6) | (s[2] & ~0x80);
 129             return 3;
 130         }
 131     }
 132
 133     /* Invalid sequence, so just return the byte */
 134     *uc = *s;
 135     return 1;
 136 }
 137
 138 struct casemap {
 139     unsigned short code;    /* code point */
 140     signed char lowerdelta; /* add for lowercase, or if -128 use the ext table */
 141     signed char upperdelta; /* add for uppercase, or offset into the ext table */
 142 };
 143
 144 /* Extended table for codepoints where |delta| > 127 */
 145 struct caseextmap {
 146     unsigned short lower;
 147     unsigned short upper;
 148 };
 149
 150 /* Generated mapping tables */
 151 #include "_unicode_mapping.c"
 152
 153 #define NUMCASEMAP sizeof(unicode_case_mapping) / sizeof(*unicode_case_mapping)
 154
 155 static int cmp_casemap(const void *key, const void *cm)
 156 {
 157     return *(int *)key - (int)((const struct casemap *)cm)->code;
 158 }
 159
 160 static int utf8_map_case(int uc, int upper)
 161 {
 162     const struct casemap *cm = bsearch(&uc, unicode_case_mapping, NUMCASEMAP, sizeof(*unicode_case_mapping), cmp_casemap);
 163
 164     if (cm) {
 165         if (cm->lowerdelta == -128) {
 166             uc = upper ? unicode_extmap[cm->upperdelta].upper : unicode_extmap[cm->upperdelta].lower;
 167         }
 168         else {
 169             uc += upper ? cm->upperdelta : cm->lowerdelta;
 170         }
 171     }
 172     return uc;
 173 }
 174
 175 int utf8_upper(int uc)
 176 {
 177     if (isascii(uc)) {
 178         return toupper(uc);
 179     }
 180     return utf8_map_case(uc, 1);
 181 }
 182
 183 int utf8_lower(int uc)
 184 {
 185     if (isascii(uc)) {
 186         return tolower(uc);
 187     }
 188
 189     return utf8_map_case(uc, 0);
 190 }
 191
 192 #endif /* JIM_BOOTSTRAP */