led: messages and ex prompts are always left-to-right
[neatvi.git] / uc.c
blob42173de33649112d5366d399421f37eafd12b61e
1 #include <ctype.h>
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include "vi.h"
7 #define LEN(a) (sizeof(a) / sizeof((a)[0]))
9 /* return the length of a utf-8 character */
10 int uc_len(char *s)
12 int c = (unsigned char) s[0];
13 if (~c & 0x80) /* ASCII */
14 return c > 0;
15 if (~c & 0x40) /* invalid UTF-8 */
16 return 1;
17 if (~c & 0x20)
18 return 2;
19 if (~c & 0x10)
20 return 3;
21 if (~c & 0x08)
22 return 4;
23 return 1;
26 /* the number of utf-8 characters in s */
27 int uc_slen(char *s)
29 int n;
30 for (n = 0; *s; n++)
31 s = uc_end(s) + 1;
32 return n;
35 /* the unicode codepoint of the given utf-8 character */
36 int uc_code(char *s)
38 int c = (unsigned char) s[0];
39 int l;
40 if (!(c & 0x80))
41 return c;
42 if (!(c & 0x20))
43 return ((c & 0x1f) << 6) | (s[1] & 0x3f);
44 if (!(c & 0x10))
45 return ((c & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f);
46 l = uc_len(s);
47 c = (0x3f >> --l) & (unsigned char) *s++;
48 while (l--)
49 c = (c << 6) | ((unsigned char) *s++ & 0x3f);
50 return c;
53 /* find the beginning of the character at s[i] */
54 char *uc_beg(char *beg, char *s)
56 while (s > beg && (((unsigned char) *s) & 0xc0) == 0x80)
57 s--;
58 return s;
61 /* find the end of the character at s[i] */
62 char *uc_end(char *s)
64 if (!*s || !((unsigned char) *s & 0x80))
65 return s;
66 if (((unsigned char) *s & 0xc0) == 0xc0)
67 s++;
68 while (((unsigned char) *s & 0xc0) == 0x80)
69 s++;
70 return s - 1;
73 /* return a pointer to the character following s */
74 char *uc_next(char *s)
76 s = uc_end(s);
77 return *s ? s + 1 : s;
80 /* return a pointer to the character preceding s */
81 char *uc_prev(char *beg, char *s)
83 return s == beg ? beg : uc_beg(beg, s - 1);
86 char *uc_lastline(char *s)
88 char *r = strrchr(s, '\n');
89 return r ? r + 1 : s;
92 /* allocate and return an array for the characters in s */
93 char **uc_chop(char *s, int *n)
95 char **chrs;
96 int i;
97 *n = uc_slen(s);
98 chrs = malloc((*n + 1) * sizeof(chrs[0]));
99 for (i = 0; i < *n + 1; i++) {
100 chrs[i] = s;
101 s = uc_next(s);
103 return chrs;
106 char *uc_chr(char *s, int off)
108 int i = 0;
109 while (s && *s) {
110 if (i++ == off)
111 return s;
112 s = uc_next(s);
114 return s && (off < 0 || i == off) ? s : "";
117 /* the number of characters between s and s + off */
118 int uc_off(char *s, int off)
120 char *e = s + off;
121 int i;
122 for (i = 0; s < e && *s; i++)
123 s = uc_next(s);
124 return i;
127 char *uc_sub(char *s, int beg, int end)
129 char *sbeg = uc_chr(s, beg);
130 char *send = uc_chr(s, end);
131 int len = sbeg && send && sbeg <= send ? send - sbeg : 0;
132 char *r = malloc(len + 1);
133 memcpy(r, sbeg, len);
134 r[len] = '\0';
135 return r;
138 char *uc_dup(char *s)
140 char *r = malloc(strlen(s) + 1);
141 return r ? strcpy(r, s) : NULL;
144 int uc_isspace(char *s)
146 int c = s ? (unsigned char) *s : 0;
147 return c <= 0x7f && isspace(c);
150 int uc_isprint(char *s)
152 int c = s ? (unsigned char) *s : 0;
153 return c > 0x7f || isprint(c);
156 int uc_isalpha(char *s)
158 int c = s ? (unsigned char) *s : 0;
159 return c > 0x7f || isalpha(c);
162 int uc_isdigit(char *s)
164 int c = s ? (unsigned char) *s : 0;
165 return c <= 0x7f && isdigit(c);
168 int uc_kind(char *c)
170 if (uc_isspace(c))
171 return 0;
172 if (uc_isalpha(c) || uc_isdigit(c) || c[0] == '_')
173 return 1;
174 return 2;
177 #define UC_R2L(ch) (((ch) & 0xff00) == 0x0600 || \
178 ((ch) & 0xfffc) == 0x200c || \
179 ((ch) & 0xff00) == 0xfb00 || \
180 ((ch) & 0xff00) == 0xfc00 || \
181 ((ch) & 0xff00) == 0xfe00)
183 /* sorted list of characters that can be shaped */
184 static struct achar {
185 unsigned c; /* utf-8 code */
186 unsigned s; /* single form */
187 unsigned i; /* initial form */
188 unsigned m; /* medial form */
189 unsigned f; /* final form */
190 } achars[] = {
191 {0x0621, 0xfe80}, /* hamza */
192 {0x0622, 0xfe81, 0, 0, 0xfe82}, /* alef madda */
193 {0x0623, 0xfe83, 0, 0, 0xfe84}, /* alef hamza above */
194 {0x0624, 0xfe85, 0, 0, 0xfe86}, /* waw hamza */
195 {0x0625, 0xfe87, 0, 0, 0xfe88}, /* alef hamza below */
196 {0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a}, /* yeh hamza */
197 {0x0627, 0xfe8d, 0, 0, 0xfe8e}, /* alef */
198 {0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90}, /* beh */
199 {0x0629, 0xfe93, 0, 0, 0xfe94}, /* teh marbuta */
200 {0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96}, /* teh */
201 {0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a}, /* theh */
202 {0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e}, /* jeem */
203 {0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2}, /* hah */
204 {0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6}, /* khah */
205 {0x062f, 0xfea9, 0, 0, 0xfeaa}, /* dal */
206 {0x0630, 0xfeab, 0, 0, 0xfeac}, /* thal */
207 {0x0631, 0xfead, 0, 0, 0xfeae}, /* reh */
208 {0x0632, 0xfeaf, 0, 0, 0xfeb0}, /* zain */
209 {0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2}, /* seen */
210 {0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6}, /* sheen */
211 {0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba}, /* sad */
212 {0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe}, /* dad */
213 {0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2}, /* tah */
214 {0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6}, /* zah */
215 {0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca}, /* ain */
216 {0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece}, /* ghain */
217 {0x0640, 0x640, 0x640, 0x640}, /* tatweel */
218 {0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2}, /* feh */
219 {0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6}, /* qaf */
220 {0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda}, /* kaf */
221 {0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede}, /* lam */
222 {0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2}, /* meem */
223 {0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6}, /* noon */
224 {0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea}, /* heh */
225 {0x0648, 0xfeed, 0, 0, 0xfeee}, /* waw */
226 {0x0649, 0xfeef, 0, 0, 0xfef0}, /* alef maksura */
227 {0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2}, /* yeh */
228 {0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57}, /* peh */
229 {0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b}, /* tcheh */
230 {0x0698, 0xfb8a, 0, 0, 0xfb8b}, /* jeh */
231 {0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f}, /* fkaf */
232 {0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93}, /* gaf */
233 {0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd}, /* fyeh */
234 {0x200c}, /* ZWNJ */
235 {0x200d, 0, 0x200d, 0x200d}, /* ZWJ */
238 static struct achar *find_achar(int c)
240 int h, m, l;
241 h = LEN(achars);
242 l = 0;
243 /* using binary search to find c */
244 while (l < h) {
245 m = (h + l) >> 1;
246 if (achars[m].c == c)
247 return &achars[m];
248 if (c < achars[m].c)
249 h = m;
250 else
251 l = m + 1;
253 return NULL;
256 static int can_join(int c1, int c2)
258 struct achar *a1 = find_achar(c1);
259 struct achar *a2 = find_achar(c2);
260 return a1 && a2 && (a1->i || a1->m) && (a2->f || a2->m);
263 static int uc_cshape(int cur, int prev, int next)
265 int c = cur;
266 int join_prev, join_next;
267 struct achar *ac = find_achar(c);
268 if (!ac) /* ignore non-Arabic characters */
269 return c;
270 join_prev = can_join(prev, c);
271 join_next = can_join(c, next);
272 if (join_prev && join_next)
273 c = ac->m;
274 if (join_prev && !join_next)
275 c = ac->f;
276 if (!join_prev && join_next)
277 c = ac->i;
278 if (!join_prev && !join_next)
279 c = ac->c; /* some fonts do not have a glyph for ac->s */
280 return c ? c : cur;
284 * return nonzero for Arabic combining characters
286 * The standard Arabic diacritics:
287 * + 0x064b: fathatan
288 * + 0x064c: dammatan
289 * + 0x064d: kasratan
290 * + 0x064e: fatha
291 * + 0x064f: damma
292 * + 0x0650: kasra
293 * + 0x0651: shadda
294 * + 0x0652: sukun
295 * + 0x0653: madda above
296 * + 0x0654: hamza above
297 * + 0x0655: hamza below
298 * + 0x0670: superscript alef
300 static int uc_acomb(int c)
302 return (c >= 0x064b && c <= 0x0655) || /* the standard diacritics */
303 (c >= 0xfc5e && c <= 0xfc63) || /* shadda ligatures */
304 c == 0x0670; /* superscript alef */
307 static void uc_cput(char *d, int c)
309 int l = 0;
310 if (c > 0xffff) {
311 *d++ = 0xf0 | (c >> 18);
312 l = 3;
313 } else if (c > 0x7ff) {
314 *d++ = 0xe0 | (c >> 12);
315 l = 2;
316 } else if (c > 0x7f) {
317 *d++ = 0xc0 | (c >> 6);
318 l = 1;
319 } else {
320 *d++ = c;
322 while (l--)
323 *d++ = 0x80 | ((c >> (l * 6)) & 0x3f);
324 *d = '\0';
327 /* shape the given arabic character; returns a static buffer */
328 char *uc_shape(char *beg, char *s)
330 static char out[16];
331 char *r;
332 int prev = 0;
333 int next = 0;
334 int curr = uc_code(s);
335 if (!curr || !UC_R2L(curr))
336 return NULL;
337 r = s;
338 while (r > beg) {
339 r = uc_beg(beg, r - 1);
340 if (!uc_acomb(uc_code(r))) {
341 prev = uc_code(r);
342 break;
345 r = s;
346 while (*r) {
347 r = uc_next(r);
348 if (!uc_acomb(uc_code(r))) {
349 next = uc_code(r);
350 break;
353 uc_cput(out, uc_cshape(curr, prev, next));
354 return out;
357 static int dwchars[][2] = {
358 {0x1100, 0x115f}, {0x11a3, 0x11a7}, {0x11fa, 0x11ff}, {0x2329, 0x232a},
359 {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb},
360 {0x3000, 0x3029}, {0x3030, 0x303e}, {0x3041, 0x3096}, {0x309b, 0x30ff},
361 {0x3105, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x31c0, 0x31e3},
362 {0x31f0, 0x321e}, {0x3220, 0x3247}, {0x3250, 0x32fe}, {0x3300, 0x4dbf},
363 {0x4e00, 0xa48c}, {0xa490, 0xa4c6}, {0xa960, 0xa97c}, {0xac00, 0xd7a3},
364 {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xfaff}, {0xfe10, 0xfe19},
365 {0xfe30, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xff01, 0xff60},
366 {0xffe0, 0xffe6}, {0x1f200, 0x1f200}, {0x1f210, 0x1f231}, {0x1f240, 0x1f248},
367 {0x20000,0x2ffff},
370 static int zwchars[][2] = {
371 {0x0300, 0x036f}, {0x0483, 0x0489}, {0x0591, 0x05bd}, {0x05bf, 0x05bf},
372 {0x05c1, 0x05c2}, {0x05c4, 0x05c5}, {0x05c7, 0x05c7}, {0x0610, 0x061a},
373 {0x064b, 0x065e}, {0x0670, 0x0670}, {0x06d6, 0x06dc}, {0x06de, 0x06e4},
374 {0x06e7, 0x06e8}, {0x06ea, 0x06ed}, {0x0711, 0x0711}, {0x0730, 0x074a},
375 {0x07a6, 0x07b0}, {0x07eb, 0x07f3}, {0x0816, 0x0819}, {0x081b, 0x0823},
376 {0x0825, 0x0827}, {0x0829, 0x082d}, {0x0900, 0x0903}, {0x093c, 0x093c},
377 {0x093e, 0x094e}, {0x0951, 0x0955}, {0x0962, 0x0963}, {0x0981, 0x0983},
378 {0x09bc, 0x09bc}, {0x09be, 0x09c4}, {0x09c7, 0x09c8}, {0x09cb, 0x09cd},
379 {0x09d7, 0x09d7}, {0x09e2, 0x09e3}, {0x0a01, 0x0a03}, {0x0a3c, 0x0a3c},
380 {0x0a3e, 0x0a42}, {0x0a47, 0x0a48}, {0x0a4b, 0x0a4d}, {0x0a51, 0x0a51},
381 {0x0a70, 0x0a71}, {0x0a75, 0x0a75}, {0x0a81, 0x0a83}, {0x0abc, 0x0abc},
382 {0x0abe, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae2, 0x0ae3},
383 {0x0b01, 0x0b03}, {0x0b3c, 0x0b3c}, {0x0b3e, 0x0b44}, {0x0b47, 0x0b48},
384 {0x0b4b, 0x0b4d}, {0x0b56, 0x0b57}, {0x0b62, 0x0b63}, {0x0b82, 0x0b82},
385 {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0bd7, 0x0bd7},
386 {0x0c01, 0x0c03}, {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d},
387 {0x0c55, 0x0c56}, {0x0c62, 0x0c63}, {0x0c82, 0x0c83}, {0x0cbc, 0x0cbc},
388 {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, {0x0cd5, 0x0cd6},
389 {0x0ce2, 0x0ce3}, {0x0d02, 0x0d03}, {0x0d3e, 0x0d44}, {0x0d46, 0x0d48},
390 {0x0d4a, 0x0d4d}, {0x0d57, 0x0d57}, {0x0d62, 0x0d63}, {0x0d82, 0x0d83},
391 {0x0dca, 0x0dca}, {0x0dcf, 0x0dd4}, {0x0dd6, 0x0dd6}, {0x0dd8, 0x0ddf},
392 {0x0df2, 0x0df3}, {0x0e31, 0x0e31}, {0x0e34, 0x0e3a}, {0x0e47, 0x0e4e},
393 {0x0eb1, 0x0eb1}, {0x0eb4, 0x0eb9}, {0x0ebb, 0x0ebc}, {0x0ec8, 0x0ecd},
394 {0x0f18, 0x0f19}, {0x0f35, 0x0f35}, {0x0f37, 0x0f37}, {0x0f39, 0x0f39},
395 {0x0f3e, 0x0f3f}, {0x0f71, 0x0f84}, {0x0f86, 0x0f87}, {0x0f90, 0x0f97},
396 {0x0f99, 0x0fbc}, {0x0fc6, 0x0fc6}, {0x102b, 0x103e}, {0x1056, 0x1059},
397 {0x105e, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106d}, {0x1071, 0x1074},
398 {0x1082, 0x108d}, {0x108f, 0x108f}, {0x109a, 0x109d}, {0x135f, 0x135f},
399 {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773},
400 {0x17b6, 0x17d3}, {0x17dd, 0x17dd}, {0x180b, 0x180d}, {0x18a9, 0x18a9},
401 {0x1920, 0x192b}, {0x1930, 0x193b}, {0x19b0, 0x19c0}, {0x19c8, 0x19c9},
402 {0x1a17, 0x1a1b}, {0x1a55, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a7f},
403 {0x1b00, 0x1b04}, {0x1b34, 0x1b44}, {0x1b6b, 0x1b73}, {0x1b80, 0x1b82},
404 {0x1ba1, 0x1baa}, {0x1c24, 0x1c37}, {0x1cd0, 0x1cd2}, {0x1cd4, 0x1ce8},
405 {0x1ced, 0x1ced}, {0x1cf2, 0x1cf2}, {0x1dc0, 0x1de6}, {0x1dfd, 0x1dff},
406 {0x200b, 0x200f},
407 {0x20d0, 0x20f0}, {0x2cef, 0x2cf1}, {0x2de0, 0x2dff}, {0x302a, 0x302f},
408 {0x3099, 0x309a}, {0xa66f, 0xa672}, {0xa67c, 0xa67d}, {0xa6f0, 0xa6f1},
409 {0xa802, 0xa802}, {0xa806, 0xa806}, {0xa80b, 0xa80b}, {0xa823, 0xa827},
410 {0xa880, 0xa881}, {0xa8b4, 0xa8c4}, {0xa8e0, 0xa8f1}, {0xa926, 0xa92d},
411 {0xa947, 0xa953}, {0xa980, 0xa983}, {0xa9b3, 0xa9c0}, {0xaa29, 0xaa36},
412 {0xaa43, 0xaa43}, {0xaa4c, 0xaa4d}, {0xaa7b, 0xaa7b}, {0xaab0, 0xaab0},
413 {0xaab2, 0xaab4}, {0xaab7, 0xaab8}, {0xaabe, 0xaabf}, {0xaac1, 0xaac1},
414 {0xabe3, 0xabea}, {0xabec, 0xabed}, {0xfb1e, 0xfb1e}, {0xfe00, 0xfe0f},
415 {0xfe20, 0xfe26}, {0x101fd, 0x101fd}, {0x10a01, 0x10a03}, {0x10a05, 0x10a06},
416 {0x10a0c, 0x10a0f}, {0x10a38, 0x10a3a}, {0x10a3f, 0x10a3f}, {0x11080, 0x11082},
417 {0x110b0, 0x110ba}, {0x1d165, 0x1d169}, {0x1d16d, 0x1d172}, {0x1d17b, 0x1d182},
418 {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, {0x1d242, 0x1d244}, {0xe0100, 0xe01ef}
421 static int bchars[][2] = {
422 {0x00000, 0x0001f}, {0x00080, 0x0009f}, {0x00300, 0x0036f},
423 {0x00379, 0x00379}, {0x00380, 0x00383}, {0x0038d, 0x0038d},
424 {0x00483, 0x00489}, {0x00527, 0x00530}, {0x00558, 0x00558},
425 {0x00588, 0x00588}, {0x0058c, 0x005bd}, {0x005c1, 0x005c2},
426 {0x005c5, 0x005c5}, {0x005c8, 0x005cf}, {0x005ec, 0x005ef},
427 {0x005f6, 0x00605}, {0x00611, 0x0061a}, {0x0061d, 0x0061d},
428 {0x0064b, 0x0065f}, {0x006d6, 0x006e4}, {0x006e8, 0x006e8},
429 {0x006eb, 0x006ed}, {0x0070f, 0x0070f}, {0x00730, 0x0074c},
430 {0x007a7, 0x007b0}, {0x007b3, 0x007bf}, {0x007ec, 0x007f3},
431 {0x007fc, 0x007ff}, {0x00817, 0x00819}, {0x0081c, 0x00823},
432 {0x00826, 0x00827}, {0x0082a, 0x0082f}, {0x00840, 0x00903},
433 {0x0093b, 0x0093c}, {0x0093f, 0x0094f}, {0x00952, 0x00957},
434 {0x00963, 0x00963}, {0x00974, 0x00978}, {0x00981, 0x00984},
435 {0x0098e, 0x0098e}, {0x00992, 0x00992}, {0x009b1, 0x009b1},
436 {0x009b4, 0x009b5}, {0x009bb, 0x009bc}, {0x009bf, 0x009cd},
437 {0x009d0, 0x009db}, {0x009e2, 0x009e5}, {0x009fd, 0x00a04},
438 {0x00a0c, 0x00a0e}, {0x00a12, 0x00a12}, {0x00a31, 0x00a31},
439 {0x00a37, 0x00a37}, {0x00a3b, 0x00a58}, {0x00a5f, 0x00a65},
440 {0x00a71, 0x00a71}, {0x00a76, 0x00a84}, {0x00a92, 0x00a92},
441 {0x00ab1, 0x00ab1}, {0x00aba, 0x00abc}, {0x00abf, 0x00acf},
442 {0x00ad2, 0x00adf}, {0x00ae3, 0x00ae5}, {0x00af2, 0x00b04},
443 {0x00b0e, 0x00b0e}, {0x00b12, 0x00b12}, {0x00b31, 0x00b31},
444 {0x00b3a, 0x00b3c}, {0x00b3f, 0x00b5b}, {0x00b62, 0x00b65},
445 {0x00b73, 0x00b82}, {0x00b8b, 0x00b8d}, {0x00b96, 0x00b98},
446 {0x00b9d, 0x00b9d}, {0x00ba1, 0x00ba2}, {0x00ba6, 0x00ba7},
447 {0x00bac, 0x00bad}, {0x00bbb, 0x00bcf}, {0x00bd2, 0x00be5},
448 {0x00bfc, 0x00c04}, {0x00c11, 0x00c11}, {0x00c34, 0x00c34},
449 {0x00c3b, 0x00c3c}, {0x00c3f, 0x00c57}, {0x00c5b, 0x00c5f},
450 {0x00c63, 0x00c65}, {0x00c71, 0x00c77}, {0x00c81, 0x00c84},
451 {0x00c91, 0x00c91}, {0x00cb4, 0x00cb4}, {0x00cbb, 0x00cbc},
452 {0x00cbf, 0x00cdd}, {0x00ce2, 0x00ce5}, {0x00cf3, 0x00d04},
453 {0x00d11, 0x00d11}, {0x00d3a, 0x00d3c}, {0x00d3f, 0x00d5f},
454 {0x00d63, 0x00d65}, {0x00d77, 0x00d78}, {0x00d81, 0x00d84},
455 {0x00d98, 0x00d99}, {0x00dbc, 0x00dbc}, {0x00dbf, 0x00dbf},
456 {0x00dc8, 0x00df3}, {0x00df6, 0x00e00}, {0x00e34, 0x00e3e},
457 {0x00e48, 0x00e4e}, {0x00e5d, 0x00e80}, {0x00e85, 0x00e86},
458 {0x00e8b, 0x00e8c}, {0x00e8f, 0x00e93}, {0x00ea0, 0x00ea0},
459 {0x00ea6, 0x00ea6}, {0x00ea9, 0x00ea9}, {0x00eb1, 0x00eb1},
460 {0x00eb5, 0x00ebc}, {0x00ebf, 0x00ebf}, {0x00ec7, 0x00ecf},
461 {0x00edb, 0x00edb}, {0x00edf, 0x00eff}, {0x00f19, 0x00f19},
462 {0x00f37, 0x00f37}, {0x00f3e, 0x00f3f}, {0x00f6d, 0x00f84},
463 {0x00f87, 0x00f87}, {0x00f8d, 0x00fbd}, {0x00fcd, 0x00fcd},
464 {0x00fda, 0x00fff}, {0x0102c, 0x0103e}, {0x01057, 0x01059},
465 {0x0105f, 0x01060}, {0x01063, 0x01064}, {0x01068, 0x0106d},
466 {0x01072, 0x01074}, {0x01083, 0x0108d}, {0x0109a, 0x0109d},
467 {0x010c7, 0x010cf}, {0x010fe, 0x010ff}, {0x0124e, 0x0124f},
468 {0x01259, 0x01259}, {0x0125f, 0x0125f}, {0x0128e, 0x0128f},
469 {0x012b6, 0x012b7}, {0x012c1, 0x012c1}, {0x012c7, 0x012c7},
470 {0x01311, 0x01311}, {0x01317, 0x01317}, {0x0135c, 0x0135f},
471 {0x0137e, 0x0137f}, {0x0139b, 0x0139f}, {0x013f6, 0x013ff},
472 {0x0169e, 0x0169f}, {0x016f2, 0x016ff}, {0x01712, 0x0171f},
473 {0x01733, 0x01734}, {0x01738, 0x0173f}, {0x01753, 0x0175f},
474 {0x01771, 0x0177f}, {0x017b5, 0x017d3}, {0x017de, 0x017df},
475 {0x017eb, 0x017ef}, {0x017fb, 0x017ff}, {0x0180c, 0x0180d},
476 {0x0181a, 0x0181f}, {0x01879, 0x0187f}, {0x018ab, 0x018af},
477 {0x018f7, 0x018ff}, {0x0191e, 0x0193f}, {0x01942, 0x01943},
478 {0x0196f, 0x0196f}, {0x01976, 0x0197f}, {0x019ad, 0x019c0},
479 {0x019c9, 0x019cf}, {0x019dc, 0x019dd}, {0x01a18, 0x01a1d},
480 {0x01a56, 0x01a7f}, {0x01a8b, 0x01a8f}, {0x01a9b, 0x01a9f},
481 {0x01aaf, 0x01b04}, {0x01b35, 0x01b44}, {0x01b4d, 0x01b4f},
482 {0x01b6c, 0x01b73}, {0x01b7e, 0x01b82}, {0x01ba2, 0x01bad},
483 {0x01bbb, 0x01bff}, {0x01c25, 0x01c3a}, {0x01c4b, 0x01c4c},
484 {0x01c81, 0x01cd2}, {0x01cd5, 0x01ce8}, {0x01cf2, 0x01cff},
485 {0x01dc1, 0x01dff}, {0x01f17, 0x01f17}, {0x01f1f, 0x01f1f},
486 {0x01f47, 0x01f47}, {0x01f4f, 0x01f4f}, {0x01f5a, 0x01f5a},
487 {0x01f5e, 0x01f5e}, {0x01f7f, 0x01f7f}, {0x01fc5, 0x01fc5},
488 {0x01fd5, 0x01fd5}, {0x01ff0, 0x01ff1}, {0x01fff, 0x01fff},
489 {0x0200c, 0x0200f}, {0x02029, 0x0202e}, {0x02061, 0x0206f},
490 {0x02073, 0x02073}, {0x02095, 0x0209f}, {0x020ba, 0x020ff},
491 {0x0218b, 0x0218f}, {0x023ea, 0x023ff}, {0x02428, 0x0243f},
492 {0x0244c, 0x0245f}, {0x026e2, 0x026e2}, {0x026e5, 0x026e7},
493 {0x02705, 0x02705}, {0x0270b, 0x0270b}, {0x0274c, 0x0274c},
494 {0x02753, 0x02755}, {0x02760, 0x02760}, {0x02796, 0x02797},
495 {0x027bf, 0x027bf}, {0x027cd, 0x027cf}, {0x02b4e, 0x02b4f},
496 {0x02b5b, 0x02bff}, {0x02c5f, 0x02c5f}, {0x02cf0, 0x02cf8},
497 {0x02d27, 0x02d2f}, {0x02d67, 0x02d6e}, {0x02d71, 0x02d7f},
498 {0x02d98, 0x02d9f}, {0x02daf, 0x02daf}, {0x02dbf, 0x02dbf},
499 {0x02dcf, 0x02dcf}, {0x02ddf, 0x02dff}, {0x02e33, 0x02e7f},
500 {0x02ef4, 0x02eff}, {0x02fd7, 0x02fef}, {0x02ffd, 0x02fff},
501 {0x0302b, 0x0302f}, {0x03097, 0x0309a}, {0x03101, 0x03104},
502 {0x0312f, 0x03130}, {0x031b8, 0x031bf}, {0x031e5, 0x031ef},
503 {0x032ff, 0x032ff}, {0x04db7, 0x04dbf}, {0x09fcd, 0x09fff},
504 {0x0a48e, 0x0a48f}, {0x0a4c8, 0x0a4cf}, {0x0a62d, 0x0a63f},
505 {0x0a661, 0x0a661}, {0x0a670, 0x0a672}, {0x0a675, 0x0a67d},
506 {0x0a699, 0x0a69f}, {0x0a6f1, 0x0a6f1}, {0x0a6f9, 0x0a6ff},
507 {0x0a78e, 0x0a7fa}, {0x0a806, 0x0a806}, {0x0a823, 0x0a827},
508 {0x0a82d, 0x0a82f}, {0x0a83b, 0x0a83f}, {0x0a879, 0x0a881},
509 {0x0a8b5, 0x0a8cd}, {0x0a8db, 0x0a8f1}, {0x0a8fd, 0x0a8ff},
510 {0x0a927, 0x0a92d}, {0x0a948, 0x0a95e}, {0x0a97e, 0x0a983},
511 {0x0a9b4, 0x0a9c0}, {0x0a9da, 0x0a9dd}, {0x0a9e1, 0x0a9ff},
512 {0x0aa2a, 0x0aa3f}, {0x0aa4c, 0x0aa4f}, {0x0aa5b, 0x0aa5b},
513 {0x0aa7c, 0x0aa7f}, {0x0aab2, 0x0aab4}, {0x0aab8, 0x0aab8},
514 {0x0aabf, 0x0aabf}, {0x0aac3, 0x0aada}, {0x0aae1, 0x0abbf},
515 {0x0abe4, 0x0abea}, {0x0abed, 0x0abef}, {0x0abfb, 0x0abff},
516 {0x0d7a5, 0x0d7af}, {0x0d7c8, 0x0d7ca}, {0x0d7fd, 0x0f8ff},
517 {0x0fa2f, 0x0fa2f}, {0x0fa6f, 0x0fa6f}, {0x0fadb, 0x0faff},
518 {0x0fb08, 0x0fb12}, {0x0fb19, 0x0fb1c}, {0x0fb37, 0x0fb37},
519 {0x0fb3f, 0x0fb3f}, {0x0fb45, 0x0fb45}, {0x0fbb3, 0x0fbd2},
520 {0x0fd41, 0x0fd4f}, {0x0fd91, 0x0fd91}, {0x0fdc9, 0x0fdef},
521 {0x0fdff, 0x0fe0f}, {0x0fe1b, 0x0fe2f}, {0x0fe67, 0x0fe67},
522 {0x0fe6d, 0x0fe6f}, {0x0fefd, 0x0ff00}, {0x0ffc0, 0x0ffc1},
523 {0x0ffc9, 0x0ffc9}, {0x0ffd1, 0x0ffd1}, {0x0ffd9, 0x0ffd9},
524 {0x0ffde, 0x0ffdf}, {0x0ffef, 0x0fffb}, {0x0ffff, 0x0ffff},
525 {0x10027, 0x10027}, {0x1003e, 0x1003e}, {0x1004f, 0x1004f},
526 {0x1005f, 0x1007f}, {0x100fc, 0x100ff}, {0x10104, 0x10106},
527 {0x10135, 0x10136}, {0x1018c, 0x1018f}, {0x1019d, 0x101cf},
528 {0x101fe, 0x1027f}, {0x1029e, 0x1029f}, {0x102d2, 0x102ff},
529 {0x10324, 0x1032f}, {0x1034c, 0x1037f}, {0x103c4, 0x103c7},
530 {0x103d7, 0x103ff}, {0x1049f, 0x1049f}, {0x104ab, 0x107ff},
531 {0x10807, 0x10807}, {0x10836, 0x10836}, {0x1083a, 0x1083b},
532 {0x1083e, 0x1083e}, {0x10860, 0x108ff}, {0x1091d, 0x1091e},
533 {0x1093b, 0x1093e}, {0x10941, 0x109ff}, {0x10a02, 0x10a0f},
534 {0x10a18, 0x10a18}, {0x10a35, 0x10a3f}, {0x10a49, 0x10a4f},
535 {0x10a5a, 0x10a5f}, {0x10a81, 0x10aff}, {0x10b37, 0x10b38},
536 {0x10b57, 0x10b57}, {0x10b74, 0x10b77}, {0x10b81, 0x10bff},
537 {0x10c4a, 0x10e5f}, {0x10e80, 0x11082}, {0x110b1, 0x110ba},
538 {0x110c2, 0x11fff}, {0x12370, 0x123ff}, {0x12464, 0x1246f},
539 {0x12475, 0x12fff}, {0x13430, 0x1cfff}, {0x1d0f7, 0x1d0ff},
540 {0x1d128, 0x1d128}, {0x1d166, 0x1d169}, {0x1d16e, 0x1d182},
541 {0x1d186, 0x1d18b}, {0x1d1ab, 0x1d1ad}, {0x1d1df, 0x1d1ff},
542 {0x1d243, 0x1d244}, {0x1d247, 0x1d2ff}, {0x1d358, 0x1d35f},
543 {0x1d373, 0x1d3ff}, {0x1d49d, 0x1d49d}, {0x1d4a1, 0x1d4a1},
544 {0x1d4a4, 0x1d4a4}, {0x1d4a8, 0x1d4a8}, {0x1d4ba, 0x1d4ba},
545 {0x1d4c4, 0x1d4c4}, {0x1d50b, 0x1d50c}, {0x1d51d, 0x1d51d},
546 {0x1d53f, 0x1d53f}, {0x1d547, 0x1d549}, {0x1d6a6, 0x1d6a7},
547 {0x1d7cd, 0x1d7cd}, {0x1d801, 0x1efff}, {0x1f02d, 0x1f02f},
548 {0x1f095, 0x1f0ff}, {0x1f10c, 0x1f10f}, {0x1f130, 0x1f130},
549 {0x1f133, 0x1f13c}, {0x1f140, 0x1f141}, {0x1f144, 0x1f145},
550 {0x1f148, 0x1f149}, {0x1f150, 0x1f156}, {0x1f159, 0x1f15e},
551 {0x1f161, 0x1f178}, {0x1f17d, 0x1f17e}, {0x1f181, 0x1f189},
552 {0x1f18f, 0x1f18f}, {0x1f192, 0x1f1ff}, {0x1f202, 0x1f20f},
553 {0x1f233, 0x1f23f}, {0x1f24a, 0x1ffff}, {0x2a6d8, 0x2a6ff},
554 {0x2b736, 0x2f7ff}, {0x2fa1f, 0x10ffff},
557 static int find(int c, int tab[][2], int n)
559 int l = 0;
560 int h = n - 1;
561 int m;
562 if (c < tab[0][0])
563 return 0;
564 while (l <= h) {
565 m = (h + l) / 2;
566 if (c >= tab[m][0] && c <= tab[m][1])
567 return 1;
568 if (c < tab[m][0])
569 h = m - 1;
570 else
571 l = m + 1;
573 return 0;
576 /* double-width characters */
577 static int uc_isdw(int c)
579 return c >= 0x1100 && find(c, dwchars, LEN(dwchars));
582 /* zero-width and combining characters */
583 static int uc_iszw(int c)
585 return c >= 0x0300 && find(c, zwchars, LEN(zwchars));
588 int uc_wid(char *s)
590 int c = uc_code(s);
591 if (uc_iszw(c))
592 return 0;
593 return uc_isdw(c) ? 2 : 1;
596 /* nonprintable characters */
597 int uc_isbell(char *s)
599 int c = (unsigned char) *s;
600 if (c == ' ' || c == '\t' || c == '\n' || (c <= 0x7f && isprint(c)))
601 return 0;
602 c = uc_code(s);
603 return uc_isdw(c) || uc_iszw(c) || find(c, bchars, LEN(bchars));
606 /* nonprintable characters */
607 int uc_iscomb(char *s)
609 int c = (unsigned char) *s;
610 if (c == ' ' || c == '\t' || c == '\n' || (c <= 0x7f && isprint(c)))
611 return 0;
612 return uc_acomb(uc_code(s));