Rearrange the python scripting support a bit.
[screen-lua.git] / src / encoding.c
blob6c1567a0e43b95789b7851369993d8d39d0e3913
1 /* Copyright (c) 1993-2003
2 * Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de)
3 * Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de)
4 * Copyright (c) 1987 Oliver Laumann
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program (see the file COPYING); if not, see
18 * http://www.gnu.org/licenses/, or contact Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
21 ****************************************************************
24 #include <sys/types.h>
26 #include "config.h"
27 #include "screen.h"
28 #include "extern.h"
30 #ifdef ENCODINGS
32 extern unsigned char *null;
33 extern struct display *display, *displays;
34 extern struct layer *flayer;
36 extern char *screenencodings;
38 #ifdef DW_CHARS
39 extern int cjkwidth;
40 #endif
42 static int encmatch __P((char *, char *));
43 # ifdef UTF8
44 static int recode_char __P((int, int, int));
45 static int recode_char_to_encoding __P((int, int));
46 static void comb_tofront __P((int, int));
47 # ifdef DW_CHARS
48 static int recode_char_dw __P((int, int *, int, int));
49 static int recode_char_dw_to_encoding __P((int, int *, int));
50 # endif
51 # endif
53 struct encoding {
54 char *name;
55 char *charsets;
56 int deffont;
57 int usegr;
58 int noc1;
59 char *fontlist;
62 /* big5 font: ^X */
63 /* KOI8-R font: 96 ! */
64 /* CP1251 font: 96 ? */
66 struct encoding encodings[] = {
67 { "C", 0, 0, 0, 0, 0 },
68 { "eucJP", "B\002I\00401", 0, 1, 0, "\002\004I" },
69 { "SJIS", "BIBB01", 0, 1, 1, "\002I" },
70 { "eucKR", "B\003BB01", 0, 1, 0, "\003" },
71 { "eucCN", "B\001BB01", 0, 1, 0, "\001" },
72 { "Big5", "B\030BB01", 0, 1, 0, "\030" },
73 { "KOI8-R", 0, 0x80|'!', 0, 1, 0 },
74 { "CP1251", 0, 0x80|'?', 0, 1, 0 },
75 { "UTF-8", 0, -1, 0, 0, 0 },
76 { "ISO8859-2", 0, 0x80|'B', 0, 0, 0 },
77 { "ISO8859-3", 0, 0x80|'C', 0, 0, 0 },
78 { "ISO8859-4", 0, 0x80|'D', 0, 0, 0 },
79 { "ISO8859-5", 0, 0x80|'L', 0, 0, 0 },
80 { "ISO8859-6", 0, 0x80|'G', 0, 0, 0 },
81 { "ISO8859-7", 0, 0x80|'F', 0, 0, 0 },
82 { "ISO8859-8", 0, 0x80|'H', 0, 0, 0 },
83 { "ISO8859-9", 0, 0x80|'M', 0, 0, 0 },
84 { "ISO8859-10", 0, 0x80|'V', 0, 0, 0 },
85 { "ISO8859-15", 0, 0x80|'b', 0, 0, 0 },
86 { "jis", 0, 0, 0, 0, "\002\004I" },
87 { "GBK", "B\031BB01", 0x80|'b', 1, 1, "\031" }
90 #ifdef UTF8
92 static unsigned short builtin_tabs[][2] = {
93 { 0x30, 0 }, /* 0: special graphics (line drawing) */
94 { 0x005f, 0x25AE },
95 { 0x0060, 0x25C6 },
96 { 0x0061, 0x2592 },
97 { 0x0062, 0x2409 },
98 { 0x0063, 0x240C },
99 { 0x0064, 0x240D },
100 { 0x0065, 0x240A },
101 { 0x0066, 0x00B0 },
102 { 0x0067, 0x00B1 },
103 { 0x0068, 0x2424 },
104 { 0x0069, 0x240B },
105 { 0x006a, 0x2518 },
106 { 0x006b, 0x2510 },
107 { 0x006c, 0x250C },
108 { 0x006d, 0x2514 },
109 { 0x006e, 0x253C },
110 { 0x006f, 0x23BA },
111 { 0x0070, 0x23BB },
112 { 0x0071, 0x2500 },
113 { 0x0072, 0x23BC },
114 { 0x0073, 0x23BD },
115 { 0x0074, 0x251C },
116 { 0x0075, 0x2524 },
117 { 0x0076, 0x2534 },
118 { 0x0077, 0x252C },
119 { 0x0078, 0x2502 },
120 { 0x0079, 0x2264 },
121 { 0x007a, 0x2265 },
122 { 0x007b, 0x03C0 },
123 { 0x007c, 0x2260 },
124 { 0x007d, 0x00A3 },
125 { 0x007e, 0x00B7 },
126 { 0, 0},
128 { 0x34, 0 }, /* 4: Dutch */
129 { 0x0023, 0x00a3 },
130 { 0x0040, 0x00be },
131 { 0x005b, 0x00ff },
132 { 0x005c, 0x00bd },
133 { 0x005d, 0x007c },
134 { 0x007b, 0x00a8 },
135 { 0x007c, 0x0066 },
136 { 0x007d, 0x00bc },
137 { 0x007e, 0x00b4 },
138 { 0, 0},
140 { 0x35, 0 }, /* 5: Finnish */
141 { 0x005b, 0x00c4 },
142 { 0x005c, 0x00d6 },
143 { 0x005d, 0x00c5 },
144 { 0x005e, 0x00dc },
145 { 0x0060, 0x00e9 },
146 { 0x007b, 0x00e4 },
147 { 0x007c, 0x00f6 },
148 { 0x007d, 0x00e5 },
149 { 0x007e, 0x00fc },
150 { 0, 0},
152 { 0x36, 0 }, /* 6: Norwegian/Danish */
153 { 0x0040, 0x00c4 },
154 { 0x005b, 0x00c6 },
155 { 0x005c, 0x00d8 },
156 { 0x005d, 0x00c5 },
157 { 0x005e, 0x00dc },
158 { 0x0060, 0x00e4 },
159 { 0x007b, 0x00e6 },
160 { 0x007c, 0x00f8 },
161 { 0x007d, 0x00e5 },
162 { 0x007e, 0x00fc },
163 { 0, 0},
165 { 0x37, 0 }, /* 7: Swedish */
166 { 0x0040, 0x00c9 },
167 { 0x005b, 0x00c4 },
168 { 0x005c, 0x00d6 },
169 { 0x005d, 0x00c5 },
170 { 0x005e, 0x00dc },
171 { 0x0060, 0x00e9 },
172 { 0x007b, 0x00e4 },
173 { 0x007c, 0x00f6 },
174 { 0x007d, 0x00e5 },
175 { 0x007e, 0x00fc },
176 { 0, 0},
178 { 0x3d, 0}, /* =: Swiss */
179 { 0x0023, 0x00f9 },
180 { 0x0040, 0x00e0 },
181 { 0x005b, 0x00e9 },
182 { 0x005c, 0x00e7 },
183 { 0x005d, 0x00ea },
184 { 0x005e, 0x00ee },
185 { 0x005f, 0x00e8 },
186 { 0x0060, 0x00f4 },
187 { 0x007b, 0x00e4 },
188 { 0x007c, 0x00f6 },
189 { 0x007d, 0x00fc },
190 { 0x007e, 0x00fb },
191 { 0, 0},
193 { 0x41, 0}, /* A: UK */
194 { 0x0023, 0x00a3 },
195 { 0, 0},
197 { 0x4b, 0}, /* K: German */
198 { 0x0040, 0x00a7 },
199 { 0x005b, 0x00c4 },
200 { 0x005c, 0x00d6 },
201 { 0x005d, 0x00dc },
202 { 0x007b, 0x00e4 },
203 { 0x007c, 0x00f6 },
204 { 0x007d, 0x00fc },
205 { 0x007e, 0x00df },
206 { 0, 0},
208 { 0x51, 0}, /* Q: French Canadian */
209 { 0x0040, 0x00e0 },
210 { 0x005b, 0x00e2 },
211 { 0x005c, 0x00e7 },
212 { 0x005d, 0x00ea },
213 { 0x005e, 0x00ee },
214 { 0x0060, 0x00f4 },
215 { 0x007b, 0x00e9 },
216 { 0x007c, 0x00f9 },
217 { 0x007d, 0x00e8 },
218 { 0x007e, 0x00fb },
219 { 0, 0},
221 { 0x52, 0}, /* R: French */
222 { 0x0023, 0x00a3 },
223 { 0x0040, 0x00e0 },
224 { 0x005b, 0x00b0 },
225 { 0x005c, 0x00e7 },
226 { 0x005d, 0x00a7 },
227 { 0x007b, 0x00e9 },
228 { 0x007c, 0x00f9 },
229 { 0x007d, 0x00e8 },
230 { 0x007e, 0x00a8 },
231 { 0, 0},
233 { 0x59, 0}, /* Y: Italian */
234 { 0x0023, 0x00a3 },
235 { 0x0040, 0x00a7 },
236 { 0x005b, 0x00b0 },
237 { 0x005c, 0x00e7 },
238 { 0x005d, 0x00e9 },
239 { 0x0060, 0x00f9 },
240 { 0x007b, 0x00e0 },
241 { 0x007c, 0x00f2 },
242 { 0x007d, 0x00e8 },
243 { 0x007e, 0x00ec },
244 { 0, 0},
246 { 0x5a, 0}, /* Z: Spanish */
247 { 0x0023, 0x00a3 },
248 { 0x0040, 0x00a7 },
249 { 0x005b, 0x00a1 },
250 { 0x005c, 0x00d1 },
251 { 0x005d, 0x00bf },
252 { 0x007b, 0x00b0 },
253 { 0x007c, 0x00f1 },
254 { 0x007d, 0x00e7 },
255 { 0, 0},
257 { 0xe2, 0}, /* 96-b: ISO-8859-15 */
258 { 0x00a4, 0x20ac },
259 { 0x00a6, 0x0160 },
260 { 0x00a8, 0x0161 },
261 { 0x00b4, 0x017D },
262 { 0x00b8, 0x017E },
263 { 0x00bc, 0x0152 },
264 { 0x00bd, 0x0153 },
265 { 0x00be, 0x0178 },
266 { 0, 0},
268 { 0x4a, 0}, /* J: JIS 0201 Roman */
269 { 0x005c, 0x00a5 },
270 { 0x007e, 0x203e },
271 { 0, 0},
273 { 0x49, 0}, /* I: halfwidth katakana */
274 { 0x0021, 0xff61 },
275 { 0x005f|0x8000, 0xff9f },
276 { 0, 0},
278 { 0, 0}
281 struct recodetab
283 unsigned short (*tab)[2];
284 int flags;
287 #define RECODETAB_ALLOCED 1
288 #define RECODETAB_BUILTIN 2
289 #define RECODETAB_TRIED 4
291 static struct recodetab recodetabs[256];
293 void
294 InitBuiltinTabs()
296 unsigned short (*p)[2];
297 for (p = builtin_tabs; (*p)[0]; p++)
299 recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN;
300 recodetabs[(*p)[0]].tab = p + 1;
301 p++;
302 while((*p)[0])
303 p++;
307 static int
308 recode_char(c, to_utf, font)
309 int c, to_utf, font;
311 int f;
312 unsigned short (*p)[2];
314 if (to_utf)
316 if (c < 256)
317 return c;
318 f = (c >> 8) & 0xff;
319 c &= 0xff;
320 /* map aliases to keep the table small */
321 switch (f)
323 case 'C':
324 f ^= ('C' ^ '5');
325 break;
326 case 'E':
327 f ^= ('E' ^ '6');
328 break;
329 case 'H':
330 f ^= ('H' ^ '7');
331 break;
332 default:
333 break;
335 p = recodetabs[f].tab;
336 if (p == 0 && recodetabs[f].flags == 0)
338 LoadFontTranslation(f, 0);
339 p = recodetabs[f].tab;
341 if (p)
342 for (; (*p)[0]; p++)
344 if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0])
345 return c - p[-1][0] + p[-1][1];
346 if ((*p)[0] == c)
347 return (*p)[1];
349 return c & 0xff; /* map to latin1 */
351 if (font == -1)
353 if (c < 256)
354 return c; /* latin1 */
355 for (font = 32; font < 128; font++)
357 p = recodetabs[font].tab;
358 if (p)
359 for (; (*p)[1]; p++)
361 if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
362 return (c - p[-1][1] + p[-1][0]) | (font << 8);
363 if ((*p)[1] == c)
364 return (*p)[0] | (font << 8);
367 return '?';
369 if (c < 128 && (font & 128) != 0)
370 return c;
371 if (font >= 32)
373 p = recodetabs[font].tab;
374 if (p == 0 && recodetabs[font].flags == 0)
376 LoadFontTranslation(font, 0);
377 p = recodetabs[font].tab;
379 if (p)
380 for (; (*p)[1]; p++)
382 if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
383 return (c - p[-1][1] + p[-1][0]) | (font & 128 ? 0 : font << 8);
384 if ((*p)[1] == c)
385 return (*p)[0] | (font & 128 ? 0 : font << 8);
388 return -1;
392 #ifdef DW_CHARS
393 static int
394 recode_char_dw(c, c2p, to_utf, font)
395 int c, *c2p, to_utf, font;
397 int f;
398 unsigned short (*p)[2];
400 if (to_utf)
402 f = (c >> 8) & 0xff;
403 c = (c & 255) << 8 | (*c2p & 255);
404 *c2p = 0xffff;
405 p = recodetabs[f].tab;
406 if (p == 0 && recodetabs[f].flags == 0)
408 LoadFontTranslation(f, 0);
409 p = recodetabs[f].tab;
411 if (p)
412 for (; (*p)[0]; p++)
413 if ((*p)[0] == c)
415 #ifdef DW_CHARS
416 if (!utf8_isdouble((*p)[1]))
417 *c2p = ' ';
418 #endif
419 return (*p)[1];
421 return UCS_REPL_DW;
423 if (font == -1)
425 for (font = 0; font < 030; font++)
427 p = recodetabs[font].tab;
428 if (p)
429 for (; (*p)[1]; p++)
430 if ((*p)[1] == c)
432 *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
433 return ((*p)[0] >> 8) | font << 8;
436 *c2p = '?';
437 return '?';
439 if (font < 32)
441 p = recodetabs[font].tab;
442 if (p == 0 && recodetabs[font].flags == 0)
444 LoadFontTranslation(font, 0);
445 p = recodetabs[font].tab;
447 if (p)
448 for (; (*p)[1]; p++)
449 if ((*p)[1] == c)
451 *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
452 return ((*p)[0] >> 8) | font << 8;
455 return -1;
457 #endif
459 static int
460 recode_char_to_encoding(c, encoding)
461 int c, encoding;
463 char *fp;
464 int x;
466 if (encoding == UTF8)
467 return recode_char(c, 1, -1);
468 if ((fp = encodings[encoding].fontlist) != 0)
469 while(*fp)
470 if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1)
471 return x;
472 if (encodings[encoding].deffont)
473 if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1)
474 return x;
475 return recode_char(c, 0, -1);
478 #ifdef DW_CHARS
479 static int
480 recode_char_dw_to_encoding(c, c2p, encoding)
481 int c, *c2p, encoding;
483 char *fp;
484 int x;
486 if (encoding == UTF8)
487 return recode_char_dw(c, c2p, 1, -1);
488 if ((fp = encodings[encoding].fontlist) != 0)
489 while(*fp)
490 if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1)
491 return x;
492 if (encodings[encoding].deffont)
493 if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1)
494 return x;
495 return recode_char_dw(c, c2p, 0, -1);
497 #endif
500 struct mchar *
501 recode_mchar(mc, from, to)
502 struct mchar *mc;
503 int from, to;
505 static struct mchar rmc;
506 int c;
508 debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to);
509 if (from == to || (from != UTF8 && to != UTF8))
510 return mc;
511 rmc = *mc;
512 if (rmc.font == 0 && from != UTF8)
513 rmc.font = encodings[from].deffont;
514 if (rmc.font == 0) /* latin1 is the same in unicode */
515 return mc;
516 c = rmc.image | (rmc.font << 8);
517 #ifdef DW_CHARS
518 if (rmc.mbcs)
520 int c2 = rmc.mbcs;
521 c = recode_char_dw_to_encoding(c, &c2, to);
522 rmc.mbcs = c2;
524 else
525 #endif
526 c = recode_char_to_encoding(c, to);
527 rmc.image = c & 255;
528 rmc.font = c >> 8 & 255;
529 return &rmc;
532 struct mline *
533 recode_mline(ml, w, from, to)
534 struct mline *ml;
535 int w;
536 int from, to;
538 static int maxlen;
539 static int last;
540 static struct mline rml[2], *rl;
541 int i, c;
543 if (from == to || (from != UTF8 && to != UTF8) || w == 0)
544 return ml;
545 if (ml->font == null && encodings[from].deffont == 0)
546 return ml;
547 if (w > maxlen)
549 for (i = 0; i < 2; i++)
551 if (rml[i].image == 0)
552 rml[i].image = malloc(w);
553 else
554 rml[i].image = realloc(rml[i].image, w);
555 if (rml[i].font == 0)
556 rml[i].font = malloc(w);
557 else
558 rml[i].font = realloc(rml[i].font, w);
559 if (rml[i].image == 0 || rml[i].font == 0)
561 maxlen = 0;
562 return ml; /* sorry */
565 maxlen = w;
568 debug("recode_mline: from\n");
569 for (i = 0; i < w; i++)
570 debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]);
571 debug("\n");
572 for (i = 0; i < w; i++)
573 debug1("%c", "0123456789abcdef"[(ml->image[i] ) & 15]);
574 debug("\n");
575 for (i = 0; i < w; i++)
576 debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]);
577 debug("\n");
578 for (i = 0; i < w; i++)
579 debug1("%c", "0123456789abcdef"[(ml->font[i] ) & 15]);
580 debug("\n");
582 rl = rml + last;
583 rl->attr = ml->attr;
584 #ifdef COLOR
585 rl->color = ml->color;
586 # ifdef COLORS256
587 rl->colorx = ml->colorx;
588 # endif
589 #endif
590 for (i = 0; i < w; i++)
592 c = ml->image[i] | (ml->font[i] << 8);
593 if (from != UTF8 && c < 256)
594 c |= encodings[from].deffont << 8;
595 #ifdef DW_CHARS
596 if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c)))
598 if (i + 1 == w)
599 c = '?';
600 else
602 int c2;
603 i++;
604 c2 = ml->image[i] | (ml->font[i] << 8);
605 c = recode_char_dw_to_encoding(c, &c2, to);
606 rl->font[i - 1] = c >> 8 & 255;
607 rl->image[i - 1] = c & 255;
608 c = c2;
611 else
612 #endif
613 c = recode_char_to_encoding(c, to);
614 rl->image[i] = c & 255;
615 rl->font[i] = c >> 8 & 255;
617 last ^= 1;
618 debug("recode_mline: to\n");
619 for (i = 0; i < w; i++)
620 debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]);
621 debug("\n");
622 for (i = 0; i < w; i++)
623 debug1("%c", "0123456789abcdef"[(rl->image[i] ) & 15]);
624 debug("\n");
625 for (i = 0; i < w; i++)
626 debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]);
627 debug("\n");
628 for (i = 0; i < w; i++)
629 debug1("%c", "0123456789abcdef"[(rl->font[i] ) & 15]);
630 debug("\n");
631 return rl;
634 struct combchar {
635 unsigned short c1;
636 unsigned short c2;
637 unsigned short next;
638 unsigned short prev;
640 struct combchar **combchars;
642 void
643 AddUtf8(c)
644 int c;
646 ASSERT(D_encoding == UTF8);
647 if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
649 AddUtf8(combchars[c - 0xd800]->c1);
650 c = combchars[c - 0xd800]->c2;
652 if (c >= 0x800)
654 AddChar((c & 0xf000) >> 12 | 0xe0);
655 c = (c & 0x0fff) | 0x1000;
657 if (c >= 0x80)
659 AddChar((c & 0x1fc0) >> 6 ^ 0xc0);
660 c = (c & 0x3f) | 0x80;
662 AddChar(c);
666 ToUtf8_comb(p, c)
667 char *p;
668 int c;
670 int l;
672 if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
674 l = ToUtf8_comb(p, combchars[c - 0xd800]->c1);
675 return l + ToUtf8(p ? p + l : 0, combchars[c - 0xd800]->c2);
677 return ToUtf8(p, c);
681 ToUtf8(p, c)
682 char *p;
683 int c;
685 int l = 1;
686 if (c >= 0x800)
688 if (p)
689 *p++ = (c & 0xf000) >> 12 | 0xe0;
690 l++;
691 c = (c & 0x0fff) | 0x1000;
693 if (c >= 0x80)
695 if (p)
696 *p++ = (c & 0x1fc0) >> 6 ^ 0xc0;
697 l++;
698 c = (c & 0x3f) | 0x80;
700 if (p)
701 *p++ = c;
702 return l;
706 * returns:
707 * -1: need more bytes, sequence not finished
708 * -2: corrupt sequence found, redo last char
709 * >= 0: decoded character
712 FromUtf8(c, utf8charp)
713 int c, *utf8charp;
715 int utf8char = *utf8charp;
716 if (utf8char)
718 if ((c & 0xc0) != 0x80)
720 *utf8charp = 0;
721 return -2; /* corrupt sequence! */
723 else
724 c = (c & 0x3f) | (utf8char << 6);
725 if (!(utf8char & 0x40000000))
727 /* check for overlong sequences */
728 if ((c & 0x820823e0) == 0x80000000)
729 c = 0xfdffffff;
730 else if ((c & 0x020821f0) == 0x02000000)
731 c = 0xfff7ffff;
732 else if ((c & 0x000820f8) == 0x00080000)
733 c = 0xffffd000;
734 else if ((c & 0x0000207c) == 0x00002000)
735 c = 0xffffff70;
738 else
740 /* new sequence */
741 if (c >= 0xfe)
742 c = UCS_REPL;
743 else if (c >= 0xfc)
744 c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */
745 else if (c >= 0xf8)
746 c = (c & 0x03) | 0xbfffff00; /* 4 */
747 else if (c >= 0xf0)
748 c = (c & 0x07) | 0xbfffc000; /* 3 */
749 else if (c >= 0xe0)
750 c = (c & 0x0f) | 0xbff00000; /* 2 */
751 else if (c >= 0xc2)
752 c = (c & 0x1f) | 0xfc000000; /* 1 */
753 else if (c >= 0xc0)
754 c = 0xfdffffff; /* overlong */
755 else if (c >= 0x80)
756 c = UCS_REPL;
758 *utf8charp = utf8char = (c & 0x80000000) ? c : 0;
759 if (utf8char)
760 return -1;
761 if (c & 0xffff0000)
762 c = UCS_REPL; /* sorry, only know 16bit Unicode */
763 if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff))
764 c = UCS_REPL; /* illegal code */
765 return c;
769 void
770 WinSwitchEncoding(p, encoding)
771 struct win *p;
772 int encoding;
774 int i, j, c;
775 struct mline *ml;
776 struct display *d;
777 struct canvas *cv;
778 struct layer *oldflayer;
780 if ((p->w_encoding == UTF8) == (encoding == UTF8))
782 p->w_encoding = encoding;
783 return;
785 oldflayer = flayer;
786 for (d = displays; d; d = d->d_next)
787 for (cv = d->d_cvlist; cv; cv = cv->c_next)
788 if (p == Layer2Window(cv->c_layer))
790 flayer = cv->c_layer;
791 while(flayer->l_next)
793 if (oldflayer == flayer)
794 oldflayer = flayer->l_next;
795 ExitOverlayPage();
798 flayer = oldflayer;
799 for (j = 0; j < p->w_height + p->w_histheight; j++)
801 #ifdef COPY_PASTE
802 ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height];
803 #else
804 ml = &p->w_mlines[j];
805 #endif
806 if (ml->font == null && encodings[p->w_encoding].deffont == 0)
807 continue;
808 for (i = 0; i < p->w_width; i++)
810 c = ml->image[i] | (ml->font[i] << 8);
811 if (p->w_encoding != UTF8 && c < 256)
812 c |= encodings[p->w_encoding].deffont << 8;
813 if (c < 256)
814 continue;
815 if (ml->font == null)
817 if ((ml->font = (unsigned char *)calloc(p->w_width + 1, 1)) == 0)
819 ml->font = null;
820 break;
823 #ifdef DW_CHARS
824 if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c)))
826 if (i + 1 == p->w_width)
827 c = '?';
828 else
830 int c2;
831 i++;
832 c2 = ml->image[i] | (ml->font[i] << 8);
833 c = recode_char_dw_to_encoding(c, &c2, encoding);
834 ml->font[i - 1] = c >> 8 & 255;
835 ml->image[i - 1] = c & 255;
836 c = c2;
839 else
840 #endif
841 c = recode_char_to_encoding(c, encoding);
842 ml->image[i] = c & 255;
843 ml->font[i] = c >> 8 & 255;
846 p->w_encoding = encoding;
847 return;
850 #ifdef DW_CHARS
851 struct interval {
852 int first;
853 int last;
856 /* auxiliary function for binary search in interval table */
857 static int bisearch(int ucs, const struct interval *table, int max) {
858 int min = 0;
859 int mid;
861 if (ucs < table[0].first || ucs > table[max].last)
862 return 0;
863 while (max >= min) {
864 mid = (min + max) / 2;
865 if (ucs > table[mid].last)
866 min = mid + 1;
867 else if (ucs < table[mid].first)
868 max = mid - 1;
869 else
870 return 1;
873 return 0;
877 utf8_isdouble(c)
878 int c;
880 /* sorted list of non-overlapping intervals of East Asian Ambiguous
881 * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
882 static const struct interval ambiguous[] = {
883 { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
884 { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
885 { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
886 { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
887 { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
888 { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
889 { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
890 { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
891 { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
892 { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
893 { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
894 { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
895 { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
896 { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
897 { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
898 { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
899 { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
900 { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
901 { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
902 { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
903 { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
904 { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
905 { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
906 { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
907 { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
908 { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
909 { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
910 { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
911 { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
912 { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
913 { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
914 { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
915 { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
916 { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
917 { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
918 { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
919 { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
920 { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
921 { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
922 { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
923 { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
924 { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
925 { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
926 { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
927 { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
928 { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
929 { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
930 { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
931 { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
932 { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
933 { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
934 { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
937 return ((c >= 0x1100 &&
938 (c <= 0x115f || /* Hangul Jamo init. consonants */
939 c == 0x2329 || c == 0x232a ||
940 (c >= 0x2e80 && c <= 0xa4cf &&
941 c != 0x303f) || /* CJK ... Yi */
942 (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */
943 (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */
944 (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */
945 (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */
946 (c >= 0xffe0 && c <= 0xffe6) ||
947 (c >= 0x20000 && c <= 0x2fffd) ||
948 (c >= 0x30000 && c <= 0x3fffd))) ||
949 (cjkwidth &&
950 bisearch(c, ambiguous,
951 sizeof(ambiguous) / sizeof(struct interval) - 1)));
953 #endif
956 utf8_iscomb(c)
957 int c;
959 /* taken from Markus Kuhn's wcwidth */
960 static const struct interval combining[] = {
961 { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
962 { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
963 { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
964 { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
965 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
966 { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
967 { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
968 { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
969 { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
970 { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
971 { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
972 { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
973 { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
974 { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
975 { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
976 { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
977 { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
978 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
979 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
980 { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
981 { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
982 { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
983 { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
984 { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
985 { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
986 { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
987 { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
988 { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
989 { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
990 { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
991 { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
992 { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
993 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
994 { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
995 { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
996 { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
997 { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
998 { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
999 { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
1000 { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
1001 { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
1002 { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
1003 { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
1004 { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
1005 { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
1006 { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
1007 { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
1008 { 0xE0100, 0xE01EF }
1011 return bisearch(c, combining, sizeof(combining) / sizeof(struct interval) - 1);
1014 static void
1015 comb_tofront(root, i)
1016 int root, i;
1018 for (;;)
1020 debug1("bring to front: %x\n", i);
1021 combchars[combchars[i]->prev]->next = combchars[i]->next;
1022 combchars[combchars[i]->next]->prev = combchars[i]->prev;
1023 combchars[i]->next = combchars[root]->next;
1024 combchars[i]->prev = root;
1025 combchars[combchars[root]->next]->prev = i;
1026 combchars[root]->next = i;
1027 i = combchars[i]->c1;
1028 if (i < 0xd800 || i >= 0xe000)
1029 return;
1030 i -= 0xd800;
1034 void
1035 utf8_handle_comb(c, mc)
1036 int c;
1037 struct mchar *mc;
1039 int root, i, c1;
1040 int isdouble;
1042 c1 = mc->image | (mc->font << 8);
1043 isdouble = c1 >= 0x1100 && utf8_isdouble(c1);
1044 if (!combchars)
1046 combchars = (struct combchar **)calloc(0x802, sizeof(struct combchar *));
1047 if (!combchars)
1048 return;
1049 combchars[0x800] = (struct combchar *)malloc(sizeof(struct combchar));
1050 combchars[0x801] = (struct combchar *)malloc(sizeof(struct combchar));
1051 if (!combchars[0x800] || !combchars[0x801])
1053 if (combchars[0x800])
1054 free(combchars[0x800]);
1055 if (combchars[0x801])
1056 free(combchars[0x801]);
1057 free(combchars);
1058 return;
1060 combchars[0x800]->c1 = 0x000;
1061 combchars[0x800]->c2 = 0x700;
1062 combchars[0x800]->next = 0x800;
1063 combchars[0x800]->prev = 0x800;
1064 combchars[0x801]->c1 = 0x700;
1065 combchars[0x801]->c2 = 0x800;
1066 combchars[0x801]->next = 0x801;
1067 combchars[0x801]->prev = 0x801;
1069 root = isdouble ? 0x801 : 0x800;
1070 for (i = combchars[root]->c1; i < combchars[root]->c2; i++)
1072 if (!combchars[i])
1073 break;
1074 if (combchars[i]->c1 == c1 && combchars[i]->c2 == c)
1075 break;
1077 if (i == combchars[root]->c2)
1079 /* full, recycle old entry */
1080 if (c1 >= 0xd800 && c1 < 0xe000)
1081 comb_tofront(root, c1 - 0xd800);
1082 i = combchars[root]->prev;
1083 if (c1 == i + 0xd800)
1085 /* completely full, can't recycle */
1086 debug("utf8_handle_comp: completely full!\n");
1087 mc->image = '?';
1088 mc->font = 0;
1089 return;
1091 /* FIXME: delete old char from all buffers */
1093 else if (!combchars[i])
1095 combchars[i] = (struct combchar *)malloc(sizeof(struct combchar));
1096 if (!combchars[i])
1097 return;
1098 combchars[i]->prev = i;
1099 combchars[i]->next = i;
1101 combchars[i]->c1 = c1;
1102 combchars[i]->c2 = c;
1103 mc->image = i & 0xff;
1104 mc->font = (i >> 8) + 0xd8;
1105 debug3("combinig char %x %x -> %x\n", c1, c, i + 0xd800);
1106 comb_tofront(root, i);
1109 #else /* !UTF8 */
1111 void
1112 WinSwitchEncoding(p, encoding)
1113 struct win *p;
1114 int encoding;
1116 p->w_encoding = encoding;
1117 return;
1120 #endif /* UTF8 */
1122 static int
1123 encmatch(s1, s2)
1124 char *s1;
1125 char *s2;
1127 int c1, c2;
1130 c1 = (unsigned char)*s1;
1131 if (c1 >= 'A' && c1 <= 'Z')
1132 c1 += 'a' - 'A';
1133 if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9'))
1135 s1++;
1136 continue;
1138 c2 = (unsigned char)*s2;
1139 if (c2 >= 'A' && c2 <= 'Z')
1140 c2 += 'a' - 'A';
1141 if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9'))
1143 s2++;
1144 continue;
1146 if (c1 != c2)
1147 return 0;
1148 s1++;
1149 s2++;
1151 while(c1);
1152 return 1;
1156 FindEncoding(name)
1157 char *name;
1159 int encoding;
1161 debug1("FindEncoding %s\n", name);
1162 if (name == 0 || *name == 0)
1163 return 0;
1164 if (encmatch(name, "euc"))
1165 name = "eucJP";
1166 if (encmatch(name, "off") || encmatch(name, "iso8859-1"))
1167 return 0;
1168 #ifndef UTF8
1169 if (encmatch(name, "UTF-8"))
1170 return -1;
1171 #endif
1172 for (encoding = 0; encoding < (int)(sizeof(encodings)/sizeof(*encodings)); encoding++)
1173 if (encmatch(name, encodings[encoding].name))
1175 #ifdef UTF8
1176 LoadFontTranslationsForEncoding(encoding);
1177 #endif
1178 return encoding;
1180 return -1;
1183 char *
1184 EncodingName(encoding)
1185 int encoding;
1187 if (encoding >= (int)(sizeof(encodings)/sizeof(*encodings)))
1188 return 0;
1189 return encodings[encoding].name;
1193 EncodingDefFont(encoding)
1194 int encoding;
1196 return encodings[encoding].deffont;
1199 void
1200 ResetEncoding(p)
1201 struct win *p;
1203 char *c;
1204 int encoding = p->w_encoding;
1206 c = encodings[encoding].charsets;
1207 if (c)
1208 SetCharsets(p, c);
1209 #ifdef UTF8
1210 LoadFontTranslationsForEncoding(encoding);
1211 #endif
1212 if (encodings[encoding].usegr)
1214 p->w_gr = 2;
1215 p->w_FontE = encodings[encoding].charsets[1];
1217 else
1218 p->w_FontE = 0;
1219 if (encodings[encoding].noc1)
1220 p->w_c1 = 0;
1224 DecodeChar(c, encoding, statep)
1225 int c;
1226 int encoding;
1227 int *statep;
1229 int t;
1231 debug2("Decoding char %02x for encoding %d\n", c, encoding);
1232 #ifdef UTF8
1233 if (encoding == UTF8)
1234 return FromUtf8(c, statep);
1235 #endif
1236 if (encoding == SJIS)
1238 if (!*statep)
1240 if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef))
1242 *statep = c;
1243 return -1;
1245 if (c < 0x80)
1246 return c;
1247 return c | (KANA << 16);
1249 t = c;
1250 c = *statep;
1251 *statep = 0;
1252 if (0x40 <= t && t <= 0xfc && t != 0x7f)
1254 if (c <= 0x9f)
1255 c = (c - 0x81) * 2 + 0x21;
1256 else
1257 c = (c - 0xc1) * 2 + 0x21;
1258 if (t <= 0x7e)
1259 t -= 0x1f;
1260 else if (t <= 0x9e)
1261 t -= 0x20;
1262 else
1263 t -= 0x7e, c++;
1264 return (c << 8) | t | (KANJI << 16);
1266 return t;
1268 if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN)
1270 if (!*statep)
1272 if (c & 0x80)
1274 *statep = c;
1275 return -1;
1277 return c;
1279 t = c;
1280 c = *statep;
1281 *statep = 0;
1282 if (encoding == EUC_JP)
1284 if (c == 0x8e)
1285 return t | (KANA << 16);
1286 if (c == 0x8f)
1288 *statep = t | (KANJI0212 << 8);
1289 return -1;
1292 c &= 0xff7f;
1293 t &= 0x7f;
1294 c = c << 8 | t;
1295 if (encoding == EUC_KR)
1296 return c | (3 << 16);
1297 if (encoding == EUC_CN)
1298 return c | (1 << 16);
1299 if (c & (KANJI0212 << 16))
1300 return c;
1301 else
1302 return c | (KANJI << 16);
1304 if (encoding == BIG5 || encoding == GBK)
1306 if (!*statep)
1308 if (c & 0x80)
1310 if (encoding == GBK && c == 0x80)
1311 return 0xa4 | (('b'|0x80) << 16);
1312 *statep = c;
1313 return -1;
1315 return c;
1317 t = c;
1318 c = *statep;
1319 *statep = 0;
1320 c &= 0x7f;
1321 return c << 8 | t | (encoding == BIG5 ? 030 << 16 : 031 << 16);
1323 return c | (encodings[encoding].deffont << 16);
1327 EncodeChar(bp, c, encoding, fontp)
1328 char *bp;
1329 int c;
1330 int encoding;
1331 int *fontp;
1333 int t, f, l;
1335 debug2("Encoding char %02x for encoding %d\n", c, encoding);
1336 if (c == -1 && fontp)
1338 if (*fontp == 0)
1339 return 0;
1340 if (bp)
1342 *bp++ = 033;
1343 *bp++ = '(';
1344 *bp++ = 'B';
1346 return 3;
1348 f = c >> 16;
1350 #ifdef UTF8
1351 if (encoding == UTF8)
1353 if (f)
1355 # ifdef DW_CHARS
1356 if (is_dw_font(f))
1358 int c2 = c & 0xff;
1359 c = (c >> 8 & 0xff) | (f << 8);
1360 c = recode_char_dw_to_encoding(c, &c2, encoding);
1362 else
1363 # endif
1365 c = (c & 0xff) | (f << 8);
1366 c = recode_char_to_encoding(c, encoding);
1369 return ToUtf8(bp, c);
1371 if ((c & 0xff00) && f == 0) /* is_utf8? */
1373 # ifdef DW_CHARS
1374 if (utf8_isdouble(c))
1376 int c2 = 0xffff;
1377 c = recode_char_dw_to_encoding(c, &c2, encoding);
1378 c = (c << 8) | (c2 & 0xff);
1380 else
1381 # endif
1383 c = recode_char_to_encoding(c, encoding);
1384 c = ((c & 0xff00) << 8) | (c & 0xff);
1386 debug1("Encode: char mapped from utf8 to %x\n", c);
1387 f = c >> 16;
1389 #endif
1390 if (f & 0x80) /* map special 96-fonts to latin1 */
1391 f = 0;
1393 if (encoding == SJIS)
1395 if (f == KANA)
1396 c = (c & 0xff) | 0x80;
1397 else if (f == KANJI)
1399 if (!bp)
1400 return 2;
1401 t = c & 0xff;
1402 c = (c >> 8) & 0xff;
1403 t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
1404 c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
1405 *bp++ = c;
1406 *bp++ = t;
1407 return 2;
1410 if (encoding == EUC)
1412 if (f == KANA)
1414 if (bp)
1416 *bp++ = 0x8e;
1417 *bp++ = c;
1419 return 2;
1421 if (f == KANJI)
1423 if (bp)
1425 *bp++ = (c >> 8) | 0x80;
1426 *bp++ = c | 0x80;
1428 return 2;
1430 if (f == KANJI0212)
1432 if (bp)
1434 *bp++ = 0x8f;
1435 *bp++ = c >> 8;
1436 *bp++ = c;
1438 return 3;
1441 if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
1443 if (bp)
1445 *bp++ = (c >> 8) | 0x80;
1446 *bp++ = c | 0x80;
1448 return 2;
1450 if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))
1452 if (bp)
1454 *bp++ = (c >> 8) | 0x80;
1455 *bp++ = c;
1457 return 2;
1459 if (encoding == GBK && f == 0 && c == 0xa4)
1460 c = 0x80;
1462 l = 0;
1463 if (fontp && f != *fontp)
1465 *fontp = f;
1466 if (f && f < ' ')
1468 if (bp)
1470 *bp++ = 033;
1471 *bp++ = '$';
1472 if (f > 2)
1473 *bp++ = '(';
1474 *bp++ = '@' + f;
1476 l += f > 2 ? 4 : 3;
1478 else if (f < 128)
1480 if (f == 0)
1481 f = 'B';
1482 if (bp)
1484 *bp++ = 033;
1485 *bp++ = '(';
1486 *bp++ = f;
1488 l += 3;
1491 if (c & 0xff00)
1493 if (bp)
1494 *bp++ = c >> 8;
1495 l++;
1497 if (bp)
1498 *bp++ = c;
1499 return l + 1;
1503 CanEncodeFont(encoding, f)
1504 int encoding, f;
1506 switch(encoding)
1508 #ifdef UTF8
1509 case UTF8:
1510 return 1;
1511 #endif
1512 case SJIS:
1513 return f == KANJI || f == KANA;
1514 case EUC:
1515 return f == KANJI || f == KANA || f == KANJI0212;
1516 case EUC_KR:
1517 return f == 3;
1518 case EUC_CN:
1519 return f == 1;
1520 case BIG5:
1521 return f == 030;
1522 case GBK:
1523 return f == 031;
1524 default:
1525 break;
1527 return 0;
1530 #ifdef DW_CHARS
1532 PrepareEncodedChar(c)
1533 int c;
1535 int encoding;
1536 int t = 0;
1537 int f;
1539 encoding = D_encoding;
1540 f = D_rend.font;
1541 t = D_mbcs;
1542 if (encoding == SJIS)
1544 if (f == KANA)
1545 return c | 0x80;
1546 else if (f == KANJI)
1548 t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
1549 c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
1550 D_mbcs = t;
1552 return c;
1554 if (encoding == EUC)
1556 if (f == KANA)
1558 AddChar(0x8e);
1559 return c | 0x80;
1561 if (f == KANJI)
1563 D_mbcs = t | 0x80;
1564 return c | 0x80;
1566 if (f == KANJI0212)
1568 AddChar(0x8f);
1569 D_mbcs = t | 0x80;
1570 return c | 0x80;
1573 if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
1575 D_mbcs = t | 0x80;
1576 return c | 0x80;
1578 if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))
1579 return c | 0x80;
1580 return c;
1582 #endif
1585 RecodeBuf(fbuf, flen, fenc, tenc, tbuf)
1586 unsigned char *fbuf;
1587 int flen;
1588 int fenc, tenc;
1589 unsigned char *tbuf;
1591 int c, i, j;
1592 int decstate = 0, font = 0;
1594 for (i = j = 0; i < flen; i++)
1596 c = fbuf[i];
1597 c = DecodeChar(c, fenc, &decstate);
1598 if (c == -2)
1599 i--;
1600 if (c < 0)
1601 continue;
1602 j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font);
1604 j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font);
1605 return j;
1608 #ifdef UTF8
1610 ContainsSpecialDeffont(ml, xs, xe, encoding)
1611 struct mline *ml;
1612 int xs, xe;
1613 int encoding;
1615 unsigned char *f, *i;
1616 int c, x, dx;
1618 if (encoding == UTF8 || encodings[encoding].deffont == 0)
1619 return 0;
1620 i = ml->image + xs;
1621 f = ml->font + xs;
1622 dx = xe - xs + 1;
1623 while (dx-- > 0)
1625 if (*f++)
1626 continue;
1627 c = *i++;
1628 x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8);
1629 if (c != x)
1631 debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x);
1632 return 1;
1635 debug("ContainsSpecialDeffont: no\n");
1636 return 0;
1641 LoadFontTranslation(font, file)
1642 int font;
1643 char *file;
1645 char buf[1024], *myfile;
1646 FILE *f;
1647 int i;
1648 int fo;
1649 int x, u, c, ok;
1650 unsigned short (*p)[2], (*tab)[2];
1652 myfile = file;
1653 if (myfile == 0)
1655 if (font == 0 || screenencodings == 0)
1656 return -1;
1657 if (strlen(screenencodings) > sizeof(buf) - 10)
1658 return -1;
1659 sprintf(buf, "%s/%02x", screenencodings, font & 0xff);
1660 myfile = buf;
1662 debug1("LoadFontTranslation: trying %s\n", myfile);
1663 if ((f = secfopen(myfile, "r")) == 0)
1664 return -1;
1665 i = ok = 0;
1666 for (;;)
1668 for(; i < 12; i++)
1669 if (getc(f) != "ScreenI2UTF8"[i])
1670 break;
1671 if (getc(f) != 0) /* format */
1672 break;
1673 fo = getc(f); /* id */
1674 if (fo == EOF)
1675 break;
1676 if (font != -1 && font != fo)
1677 break;
1678 i = getc(f);
1679 x = getc(f);
1680 if (x == EOF)
1681 break;
1682 i = i << 8 | x;
1683 getc(f);
1684 while ((x = getc(f)) && x != EOF)
1685 getc(f); /* skip font name (padded to 2 bytes) */
1686 if ((p = malloc(sizeof(*p) * (i + 1))) == 0)
1687 break;
1688 tab = p;
1689 while(i > 0)
1691 x = getc(f);
1692 x = x << 8 | getc(f);
1693 u = getc(f);
1694 c = getc(f);
1695 u = u << 8 | c;
1696 if (c == EOF)
1697 break;
1698 (*p)[0] = x;
1699 (*p)[1] = u;
1700 p++;
1701 i--;
1703 (*p)[0] = 0;
1704 (*p)[1] = 0;
1705 if (i || (tab[0][0] & 0x8000))
1707 free(tab);
1708 break;
1710 if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0)
1711 free(recodetabs[fo].tab);
1712 recodetabs[fo].tab = tab;
1713 recodetabs[fo].flags = RECODETAB_ALLOCED;
1714 debug1("Successful load of recodetab %02x\n", fo);
1715 c = getc(f);
1716 if (c == EOF)
1718 ok = 1;
1719 break;
1721 if (c != 'S')
1722 break;
1723 i = 1;
1725 fclose(f);
1726 if (font != -1 && file == 0 && recodetabs[font].flags == 0)
1727 recodetabs[font].flags = RECODETAB_TRIED;
1728 return ok ? 0 : -1;
1731 void
1732 LoadFontTranslationsForEncoding(encoding)
1733 int encoding;
1735 char *c;
1736 int f;
1738 debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding);
1739 if ((c = encodings[encoding].fontlist) != 0)
1740 while ((f = (unsigned char)*c++) != 0)
1741 if (recodetabs[f].flags == 0)
1742 LoadFontTranslation(f, 0);
1743 f = encodings[encoding].deffont;
1744 if (f > 0 && recodetabs[f].flags == 0)
1745 LoadFontTranslation(f, 0);
1748 #endif /* UTF8 */
1750 #else /* !ENCODINGS */
1752 /* Simple version of EncodeChar to encode font changes for
1753 * copy/paste mode
1756 EncodeChar(bp, c, encoding, fontp)
1757 char *bp;
1758 int c;
1759 int encoding;
1760 int *fontp;
1762 int f, l;
1763 f = (c == -1) ? 0 : c >> 16;
1764 l = 0;
1765 if (fontp && f != *fontp)
1767 *fontp = f;
1768 if (f && f < ' ')
1770 if (bp)
1772 *bp++ = 033;
1773 *bp++ = '$';
1774 if (f > 2)
1775 *bp++ = '(';
1776 *bp++ = '@' + f;
1778 l += f > 2 ? 4 : 3;
1780 else if (f < 128)
1782 if (f == 0)
1783 f = 'B';
1784 if (bp)
1786 *bp++ = 033;
1787 *bp++ = '(';
1788 *bp++ = f;
1790 l += 3;
1793 if (c == -1)
1794 return l;
1795 if (c & 0xff00)
1797 if (bp)
1798 *bp++ = c >> 8;
1799 l++;
1801 if (bp)
1802 *bp++ = c;
1803 return l + 1;
1806 #endif /* ENCODINGS */