Start converting to GPL v3+ (ref: ticket #23900)
[screen-lua.git] / src / encoding.c
blob1601105cbce23f76e6ed19c91f3308f8cbec07b1
1 /* Copyright (c) 1993-2003
2 * Juergen Weigert (jnweiger@immd4.informatik.uni-erlangen.de)
3 * Michael Schroeder (mlschroe@immd4.informatik.uni-erlangen.de)
4 * Copyright (c) 1987 Oliver Laumann
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3, or (at your option)
9 * any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program (see the file COPYING); if not, see
18 * http://www.gnu.org/licenses/, or contact Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
21 ****************************************************************
24 #include <sys/types.h>
26 #include "config.h"
27 #include "screen.h"
28 #include "extern.h"
30 #ifdef ENCODINGS
32 extern unsigned char *null;
33 extern struct display *display, *displays;
34 extern struct layer *flayer;
36 extern char *screenencodings;
38 #ifdef DW_CHARS
39 extern int cjkwidth;
40 #endif
42 static int encmatch __P((char *, char *));
43 # ifdef UTF8
44 static int recode_char __P((int, int, int));
45 static int recode_char_to_encoding __P((int, int));
46 static void comb_tofront __P((int, int));
47 # ifdef DW_CHARS
48 static int recode_char_dw __P((int, int *, int, int));
49 static int recode_char_dw_to_encoding __P((int, int *, int));
50 # endif
51 # endif
53 struct encoding {
54 char *name;
55 char *charsets;
56 int deffont;
57 int usegr;
58 int noc1;
59 char *fontlist;
62 /* big5 font: ^X */
63 /* KOI8-R font: 96 ! */
64 /* CP1251 font: 96 ? */
66 struct encoding encodings[] = {
67 { "C", 0, 0, 0, 0, 0 },
68 { "eucJP", "B\002I\00401", 0, 1, 0, "\002\004I" },
69 { "SJIS", "BIBB01", 0, 1, 1, "\002I" },
70 { "eucKR", "B\003BB01", 0, 1, 0, "\003" },
71 { "eucCN", "B\001BB01", 0, 1, 0, "\001" },
72 { "Big5", "B\030BB01", 0, 1, 0, "\030" },
73 { "KOI8-R", 0, 0x80|'!', 0, 1, 0 },
74 { "CP1251", 0, 0x80|'?', 0, 1, 0 },
75 { "UTF-8", 0, -1, 0, 0, 0 },
76 { "ISO8859-2", 0, 0x80|'B', 0, 0, 0 },
77 { "ISO8859-3", 0, 0x80|'C', 0, 0, 0 },
78 { "ISO8859-4", 0, 0x80|'D', 0, 0, 0 },
79 { "ISO8859-5", 0, 0x80|'L', 0, 0, 0 },
80 { "ISO8859-6", 0, 0x80|'G', 0, 0, 0 },
81 { "ISO8859-7", 0, 0x80|'F', 0, 0, 0 },
82 { "ISO8859-8", 0, 0x80|'H', 0, 0, 0 },
83 { "ISO8859-9", 0, 0x80|'M', 0, 0, 0 },
84 { "ISO8859-10", 0, 0x80|'V', 0, 0, 0 },
85 { "ISO8859-15", 0, 0x80|'b', 0, 0, 0 },
86 { "jis", 0, 0, 0, 0, "\002\004I" },
87 { "GBK", "B\031BB01", 0x80|'b', 1, 1, "\031" }
90 #ifdef UTF8
92 static unsigned short builtin_tabs[][2] = {
93 { 0x30, 0 }, /* 0: special graphics (line drawing) */
94 { 0x005f, 0x25AE },
95 { 0x0060, 0x25C6 },
96 { 0x0061, 0x2592 },
97 { 0x0062, 0x2409 },
98 { 0x0063, 0x240C },
99 { 0x0064, 0x240D },
100 { 0x0065, 0x240A },
101 { 0x0066, 0x00B0 },
102 { 0x0067, 0x00B1 },
103 { 0x0068, 0x2424 },
104 { 0x0069, 0x240B },
105 { 0x006a, 0x2518 },
106 { 0x006b, 0x2510 },
107 { 0x006c, 0x250C },
108 { 0x006d, 0x2514 },
109 { 0x006e, 0x253C },
110 { 0x006f, 0x23BA },
111 { 0x0070, 0x23BB },
112 { 0x0071, 0x2500 },
113 { 0x0072, 0x23BC },
114 { 0x0073, 0x23BD },
115 { 0x0074, 0x251C },
116 { 0x0075, 0x2524 },
117 { 0x0076, 0x2534 },
118 { 0x0077, 0x252C },
119 { 0x0078, 0x2502 },
120 { 0x0079, 0x2264 },
121 { 0x007a, 0x2265 },
122 { 0x007b, 0x03C0 },
123 { 0x007c, 0x2260 },
124 { 0x007d, 0x00A3 },
125 { 0x007e, 0x00B7 },
126 { 0, 0},
128 { 0x34, 0 }, /* 4: Dutch */
129 { 0x0023, 0x00a3 },
130 { 0x0040, 0x00be },
131 { 0x005b, 0x00ff },
132 { 0x005c, 0x00bd },
133 { 0x005d, 0x007c },
134 { 0x007b, 0x00a8 },
135 { 0x007c, 0x0066 },
136 { 0x007d, 0x00bc },
137 { 0x007e, 0x00b4 },
138 { 0, 0},
140 { 0x35, 0 }, /* 5: Finnish */
141 { 0x005b, 0x00c4 },
142 { 0x005c, 0x00d6 },
143 { 0x005d, 0x00c5 },
144 { 0x005e, 0x00dc },
145 { 0x0060, 0x00e9 },
146 { 0x007b, 0x00e4 },
147 { 0x007c, 0x00f6 },
148 { 0x007d, 0x00e5 },
149 { 0x007e, 0x00fc },
150 { 0, 0},
152 { 0x36, 0 }, /* 6: Norwegian/Danish */
153 { 0x0040, 0x00c4 },
154 { 0x005b, 0x00c6 },
155 { 0x005c, 0x00d8 },
156 { 0x005d, 0x00c5 },
157 { 0x005e, 0x00dc },
158 { 0x0060, 0x00e4 },
159 { 0x007b, 0x00e6 },
160 { 0x007c, 0x00f8 },
161 { 0x007d, 0x00e5 },
162 { 0x007e, 0x00fc },
163 { 0, 0},
165 { 0x37, 0 }, /* 7: Swedish */
166 { 0x0040, 0x00c9 },
167 { 0x005b, 0x00c4 },
168 { 0x005c, 0x00d6 },
169 { 0x005d, 0x00c5 },
170 { 0x005e, 0x00dc },
171 { 0x0060, 0x00e9 },
172 { 0x007b, 0x00e4 },
173 { 0x007c, 0x00f6 },
174 { 0x007d, 0x00e5 },
175 { 0x007e, 0x00fc },
176 { 0, 0},
178 { 0x3d, 0}, /* =: Swiss */
179 { 0x0023, 0x00f9 },
180 { 0x0040, 0x00e0 },
181 { 0x005b, 0x00e9 },
182 { 0x005c, 0x00e7 },
183 { 0x005d, 0x00ea },
184 { 0x005e, 0x00ee },
185 { 0x005f, 0x00e8 },
186 { 0x0060, 0x00f4 },
187 { 0x007b, 0x00e4 },
188 { 0x007c, 0x00f6 },
189 { 0x007d, 0x00fc },
190 { 0x007e, 0x00fb },
191 { 0, 0},
193 { 0x41, 0}, /* A: UK */
194 { 0x0023, 0x00a3 },
195 { 0, 0},
197 { 0x4b, 0}, /* K: German */
198 { 0x0040, 0x00a7 },
199 { 0x005b, 0x00c4 },
200 { 0x005c, 0x00d6 },
201 { 0x005d, 0x00dc },
202 { 0x007b, 0x00e4 },
203 { 0x007c, 0x00f6 },
204 { 0x007d, 0x00fc },
205 { 0x007e, 0x00df },
206 { 0, 0},
208 { 0x51, 0}, /* Q: French Canadian */
209 { 0x0040, 0x00e0 },
210 { 0x005b, 0x00e2 },
211 { 0x005c, 0x00e7 },
212 { 0x005d, 0x00ea },
213 { 0x005e, 0x00ee },
214 { 0x0060, 0x00f4 },
215 { 0x007b, 0x00e9 },
216 { 0x007c, 0x00f9 },
217 { 0x007d, 0x00e8 },
218 { 0x007e, 0x00fb },
219 { 0, 0},
221 { 0x52, 0}, /* R: French */
222 { 0x0023, 0x00a3 },
223 { 0x0040, 0x00e0 },
224 { 0x005b, 0x00b0 },
225 { 0x005c, 0x00e7 },
226 { 0x005d, 0x00a7 },
227 { 0x007b, 0x00e9 },
228 { 0x007c, 0x00f9 },
229 { 0x007d, 0x00e8 },
230 { 0x007e, 0x00a8 },
231 { 0, 0},
233 { 0x59, 0}, /* Y: Italian */
234 { 0x0023, 0x00a3 },
235 { 0x0040, 0x00a7 },
236 { 0x005b, 0x00b0 },
237 { 0x005c, 0x00e7 },
238 { 0x005d, 0x00e9 },
239 { 0x0060, 0x00f9 },
240 { 0x007b, 0x00e0 },
241 { 0x007c, 0x00f2 },
242 { 0x007d, 0x00e8 },
243 { 0x007e, 0x00ec },
244 { 0, 0},
246 { 0x5a, 0}, /* Z: Spanish */
247 { 0x0023, 0x00a3 },
248 { 0x0040, 0x00a7 },
249 { 0x005b, 0x00a1 },
250 { 0x005c, 0x00d1 },
251 { 0x005d, 0x00bf },
252 { 0x007b, 0x00b0 },
253 { 0x007c, 0x00f1 },
254 { 0x007d, 0x00e7 },
255 { 0, 0},
257 { 0xe2, 0}, /* 96-b: ISO-8859-15 */
258 { 0x00a4, 0x20ac },
259 { 0x00a6, 0x0160 },
260 { 0x00a8, 0x0161 },
261 { 0x00b4, 0x017D },
262 { 0x00b8, 0x017E },
263 { 0x00bc, 0x0152 },
264 { 0x00bd, 0x0153 },
265 { 0x00be, 0x0178 },
266 { 0, 0},
268 { 0x4a, 0}, /* J: JIS 0201 Roman */
269 { 0x005c, 0x00a5 },
270 { 0x007e, 0x203e },
271 { 0, 0},
273 { 0x49, 0}, /* I: halfwidth katakana */
274 { 0x0021, 0xff61 },
275 { 0x005f|0x8000, 0xff9f },
276 { 0, 0},
278 { 0, 0}
281 struct recodetab
283 unsigned short (*tab)[2];
284 int flags;
287 #define RECODETAB_ALLOCED 1
288 #define RECODETAB_BUILTIN 2
289 #define RECODETAB_TRIED 4
291 static struct recodetab recodetabs[256];
293 void
294 InitBuiltinTabs()
296 unsigned short (*p)[2];
297 for (p = builtin_tabs; (*p)[0]; p++)
299 recodetabs[(*p)[0]].flags = RECODETAB_BUILTIN;
300 recodetabs[(*p)[0]].tab = p + 1;
301 p++;
302 while((*p)[0])
303 p++;
307 static int
308 recode_char(c, to_utf, font)
309 int c, to_utf, font;
311 int f;
312 unsigned short (*p)[2];
314 if (to_utf)
316 if (c < 256)
317 return c;
318 f = (c >> 8) & 0xff;
319 c &= 0xff;
320 /* map aliases to keep the table small */
321 switch (f)
323 case 'C':
324 f ^= ('C' ^ '5');
325 break;
326 case 'E':
327 f ^= ('E' ^ '6');
328 break;
329 case 'H':
330 f ^= ('H' ^ '7');
331 break;
332 default:
333 break;
335 p = recodetabs[f].tab;
336 if (p == 0 && recodetabs[f].flags == 0)
338 LoadFontTranslation(f, 0);
339 p = recodetabs[f].tab;
341 if (p)
342 for (; (*p)[0]; p++)
344 if ((p[0][0] & 0x8000) && (c <= (p[0][0] & 0x7fff)) && c >= p[-1][0])
345 return c - p[-1][0] + p[-1][1];
346 if ((*p)[0] == c)
347 return (*p)[1];
349 return c & 0xff; /* map to latin1 */
351 if (font == -1)
353 if (c < 256)
354 return c; /* latin1 */
355 for (font = 32; font < 128; font++)
357 p = recodetabs[font].tab;
358 if (p)
359 for (; (*p)[1]; p++)
361 if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
362 return (c - p[-1][1] + p[-1][0]) | (font << 8);
363 if ((*p)[1] == c)
364 return (*p)[0] | (font << 8);
367 return '?';
369 if (c < 128 && (font & 128) != 0)
370 return c;
371 if (font >= 32)
373 p = recodetabs[font].tab;
374 if (p == 0 && recodetabs[font].flags == 0)
376 LoadFontTranslation(font, 0);
377 p = recodetabs[font].tab;
379 if (p)
380 for (; (*p)[1]; p++)
382 if ((p[0][0] & 0x8000) && c <= p[0][1] && c >= p[-1][1])
383 return (c - p[-1][1] + p[-1][0]) | (font & 128 ? 0 : font << 8);
384 if ((*p)[1] == c)
385 return (*p)[0] | (font & 128 ? 0 : font << 8);
388 return -1;
392 #ifdef DW_CHARS
393 static int
394 recode_char_dw(c, c2p, to_utf, font)
395 int c, *c2p, to_utf, font;
397 int f;
398 unsigned short (*p)[2];
400 if (to_utf)
402 f = (c >> 8) & 0xff;
403 c = (c & 255) << 8 | (*c2p & 255);
404 *c2p = 0xffff;
405 p = recodetabs[f].tab;
406 if (p == 0 && recodetabs[f].flags == 0)
408 LoadFontTranslation(f, 0);
409 p = recodetabs[f].tab;
411 if (p)
412 for (; (*p)[0]; p++)
413 if ((*p)[0] == c)
415 #ifdef DW_CHARS
416 if (!utf8_isdouble((*p)[1]))
417 *c2p = ' ';
418 #endif
419 return (*p)[1];
421 return UCS_REPL_DW;
423 if (font == -1)
425 for (font = 0; font < 030; font++)
427 p = recodetabs[font].tab;
428 if (p)
429 for (; (*p)[1]; p++)
430 if ((*p)[1] == c)
432 *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
433 return ((*p)[0] >> 8) | font << 8;
436 *c2p = '?';
437 return '?';
439 if (font < 32)
441 p = recodetabs[font].tab;
442 if (p == 0 && recodetabs[font].flags == 0)
444 LoadFontTranslation(font, 0);
445 p = recodetabs[font].tab;
447 if (p)
448 for (; (*p)[1]; p++)
449 if ((*p)[1] == c)
451 *c2p = ((*p)[0] & 255) | font << 8 | 0x8000;
452 return ((*p)[0] >> 8) | font << 8;
455 return -1;
457 #endif
459 static int
460 recode_char_to_encoding(c, encoding)
461 int c, encoding;
463 char *fp;
464 int x;
466 if (encoding == UTF8)
467 return recode_char(c, 1, -1);
468 if ((fp = encodings[encoding].fontlist) != 0)
469 while(*fp)
470 if ((x = recode_char(c, 0, (unsigned char)*fp++)) != -1)
471 return x;
472 if (encodings[encoding].deffont)
473 if ((x = recode_char(c, 0, encodings[encoding].deffont)) != -1)
474 return x;
475 return recode_char(c, 0, -1);
478 #ifdef DW_CHARS
479 static int
480 recode_char_dw_to_encoding(c, c2p, encoding)
481 int c, *c2p, encoding;
483 char *fp;
484 int x;
486 if (encoding == UTF8)
487 return recode_char_dw(c, c2p, 1, -1);
488 if ((fp = encodings[encoding].fontlist) != 0)
489 while(*fp)
490 if ((x = recode_char_dw(c, c2p, 0, (unsigned char)*fp++)) != -1)
491 return x;
492 if (encodings[encoding].deffont)
493 if ((x = recode_char_dw(c, c2p, 0, encodings[encoding].deffont)) != -1)
494 return x;
495 return recode_char_dw(c, c2p, 0, -1);
497 #endif
500 struct mchar *
501 recode_mchar(mc, from, to)
502 struct mchar *mc;
503 int from, to;
505 static struct mchar rmc;
506 int c;
508 debug3("recode_mchar %02x from %d to %d\n", mc->image, from, to);
509 if (from == to || (from != UTF8 && to != UTF8))
510 return mc;
511 rmc = *mc;
512 if (rmc.font == 0 && from != UTF8)
513 rmc.font = encodings[from].deffont;
514 if (rmc.font == 0) /* latin1 is the same in unicode */
515 return mc;
516 c = rmc.image | (rmc.font << 8);
517 #ifdef DW_CHARS
518 if (rmc.mbcs)
520 int c2 = rmc.mbcs;
521 c = recode_char_dw_to_encoding(c, &c2, to);
522 rmc.mbcs = c2;
524 else
525 #endif
526 c = recode_char_to_encoding(c, to);
527 rmc.image = c & 255;
528 rmc.font = c >> 8 & 255;
529 return &rmc;
532 struct mline *
533 recode_mline(ml, w, from, to)
534 struct mline *ml;
535 int w;
536 int from, to;
538 static int maxlen;
539 static int last;
540 static struct mline rml[2], *rl;
541 int i, c;
543 if (from == to || (from != UTF8 && to != UTF8) || w == 0)
544 return ml;
545 if (ml->font == null && encodings[from].deffont == 0)
546 return ml;
547 if (w > maxlen)
549 for (i = 0; i < 2; i++)
551 if (rml[i].image == 0)
552 rml[i].image = malloc(w);
553 else
554 rml[i].image = realloc(rml[i].image, w);
555 if (rml[i].font == 0)
556 rml[i].font = malloc(w);
557 else
558 rml[i].font = realloc(rml[i].font, w);
559 if (rml[i].image == 0 || rml[i].font == 0)
561 maxlen = 0;
562 return ml; /* sorry */
565 maxlen = w;
568 debug("recode_mline: from\n");
569 for (i = 0; i < w; i++)
570 debug1("%c", "0123456789abcdef"[(ml->image[i] >> 4) & 15]);
571 debug("\n");
572 for (i = 0; i < w; i++)
573 debug1("%c", "0123456789abcdef"[(ml->image[i] ) & 15]);
574 debug("\n");
575 for (i = 0; i < w; i++)
576 debug1("%c", "0123456789abcdef"[(ml->font[i] >> 4) & 15]);
577 debug("\n");
578 for (i = 0; i < w; i++)
579 debug1("%c", "0123456789abcdef"[(ml->font[i] ) & 15]);
580 debug("\n");
582 rl = rml + last;
583 rl->attr = ml->attr;
584 #ifdef COLOR
585 rl->color = ml->color;
586 # ifdef COLORS256
587 rl->colorx = ml->colorx;
588 # endif
589 #endif
590 for (i = 0; i < w; i++)
592 c = ml->image[i] | (ml->font[i] << 8);
593 if (from != UTF8 && c < 256)
594 c |= encodings[from].deffont << 8;
595 #ifdef DW_CHARS
596 if ((from != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (from == UTF8 && utf8_isdouble(c)))
598 if (i + 1 == w)
599 c = '?';
600 else
602 int c2;
603 i++;
604 c2 = ml->image[i] | (ml->font[i] << 8);
605 c = recode_char_dw_to_encoding(c, &c2, to);
606 rl->font[i - 1] = c >> 8 & 255;
607 rl->image[i - 1] = c & 255;
608 c = c2;
611 else
612 #endif
613 c = recode_char_to_encoding(c, to);
614 rl->image[i] = c & 255;
615 rl->font[i] = c >> 8 & 255;
617 last ^= 1;
618 debug("recode_mline: to\n");
619 for (i = 0; i < w; i++)
620 debug1("%c", "0123456789abcdef"[(rl->image[i] >> 4) & 15]);
621 debug("\n");
622 for (i = 0; i < w; i++)
623 debug1("%c", "0123456789abcdef"[(rl->image[i] ) & 15]);
624 debug("\n");
625 for (i = 0; i < w; i++)
626 debug1("%c", "0123456789abcdef"[(rl->font[i] >> 4) & 15]);
627 debug("\n");
628 for (i = 0; i < w; i++)
629 debug1("%c", "0123456789abcdef"[(rl->font[i] ) & 15]);
630 debug("\n");
631 return rl;
634 struct combchar {
635 unsigned short c1;
636 unsigned short c2;
637 unsigned short next;
638 unsigned short prev;
640 struct combchar **combchars;
642 void
643 AddUtf8(c)
644 int c;
646 ASSERT(D_encoding == UTF8);
647 if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
649 AddUtf8(combchars[c - 0xd800]->c1);
650 c = combchars[c - 0xd800]->c2;
652 if (c >= 0x800)
654 AddChar((c & 0xf000) >> 12 | 0xe0);
655 c = (c & 0x0fff) | 0x1000;
657 if (c >= 0x80)
659 AddChar((c & 0x1fc0) >> 6 ^ 0xc0);
660 c = (c & 0x3f) | 0x80;
662 AddChar(c);
666 ToUtf8_comb(p, c)
667 char *p;
668 int c;
670 int l;
672 if (c >= 0xd800 && c < 0xe000 && combchars && combchars[c - 0xd800])
674 l = ToUtf8_comb(p, combchars[c - 0xd800]->c1);
675 return l + ToUtf8(p ? p + l : 0, combchars[c - 0xd800]->c2);
677 return ToUtf8(p, c);
681 ToUtf8(p, c)
682 char *p;
683 int c;
685 int l = 1;
686 if (c >= 0x800)
688 if (p)
689 *p++ = (c & 0xf000) >> 12 | 0xe0;
690 l++;
691 c = (c & 0x0fff) | 0x1000;
693 if (c >= 0x80)
695 if (p)
696 *p++ = (c & 0x1fc0) >> 6 ^ 0xc0;
697 l++;
698 c = (c & 0x3f) | 0x80;
700 if (p)
701 *p++ = c;
702 return l;
706 * returns:
707 * -1: need more bytes, sequence not finished
708 * -2: corrupt sequence found, redo last char
709 * >= 0: decoded character
712 FromUtf8(c, utf8charp)
713 int c, *utf8charp;
715 int utf8char = *utf8charp;
716 if (utf8char)
718 if ((c & 0xc0) != 0x80)
720 *utf8charp = 0;
721 return -2; /* corrupt sequence! */
723 else
724 c = (c & 0x3f) | (utf8char << 6);
725 if (!(utf8char & 0x40000000))
727 /* check for overlong sequences */
728 if ((c & 0x820823e0) == 0x80000000)
729 c = 0xfdffffff;
730 else if ((c & 0x020821f0) == 0x02000000)
731 c = 0xfff7ffff;
732 else if ((c & 0x000820f8) == 0x00080000)
733 c = 0xffffd000;
734 else if ((c & 0x0000207c) == 0x00002000)
735 c = 0xffffff70;
738 else
740 /* new sequence */
741 if (c >= 0xfe)
742 c = UCS_REPL;
743 else if (c >= 0xfc)
744 c = (c & 0x01) | 0xbffffffc; /* 5 bytes to follow */
745 else if (c >= 0xf8)
746 c = (c & 0x03) | 0xbfffff00; /* 4 */
747 else if (c >= 0xf0)
748 c = (c & 0x07) | 0xbfffc000; /* 3 */
749 else if (c >= 0xe0)
750 c = (c & 0x0f) | 0xbff00000; /* 2 */
751 else if (c >= 0xc2)
752 c = (c & 0x1f) | 0xfc000000; /* 1 */
753 else if (c >= 0xc0)
754 c = 0xfdffffff; /* overlong */
755 else if (c >= 0x80)
756 c = UCS_REPL;
758 *utf8charp = utf8char = (c & 0x80000000) ? c : 0;
759 if (utf8char)
760 return -1;
761 if (c & 0xffff0000)
762 c = UCS_REPL; /* sorry, only know 16bit Unicode */
763 if (c >= 0xd800 && (c <= 0xdfff || c == 0xfffe || c == 0xffff))
764 c = UCS_REPL; /* illegal code */
765 return c;
769 void
770 WinSwitchEncoding(p, encoding)
771 struct win *p;
772 int encoding;
774 int i, j, c;
775 struct mline *ml;
776 struct display *d;
777 struct canvas *cv;
778 struct layer *oldflayer;
780 if ((p->w_encoding == UTF8) == (encoding == UTF8))
782 p->w_encoding = encoding;
783 return;
785 oldflayer = flayer;
786 for (d = displays; d; d = d->d_next)
787 for (cv = d->d_cvlist; cv; cv = cv->c_next)
788 if (p == Layer2Window(cv->c_layer))
790 flayer = cv->c_layer;
791 while(flayer->l_next)
793 if (oldflayer == flayer)
794 oldflayer = flayer->l_next;
795 ExitOverlayPage();
798 flayer = oldflayer;
799 for (j = 0; j < p->w_height + p->w_histheight; j++)
801 #ifdef COPY_PASTE
802 ml = j < p->w_height ? &p->w_mlines[j] : &p->w_hlines[j - p->w_height];
803 #else
804 ml = &p->w_mlines[j];
805 #endif
806 if (ml->font == null && encodings[p->w_encoding].deffont == 0)
807 continue;
808 for (i = 0; i < p->w_width; i++)
810 c = ml->image[i] | (ml->font[i] << 8);
811 if (p->w_encoding != UTF8 && c < 256)
812 c |= encodings[p->w_encoding].deffont << 8;
813 if (c < 256)
814 continue;
815 if (ml->font == null)
817 if ((ml->font = (unsigned char *)malloc(p->w_width + 1)) == 0)
819 ml->font = null;
820 break;
822 bzero(ml->font, p->w_width + 1);
824 #ifdef DW_CHARS
825 if ((p->w_encoding != UTF8 && (c & 0x1f00) != 0 && (c & 0xe000) == 0) || (p->w_encoding == UTF8 && utf8_isdouble(c)))
827 if (i + 1 == p->w_width)
828 c = '?';
829 else
831 int c2;
832 i++;
833 c2 = ml->image[i] | (ml->font[i] << 8);
834 c = recode_char_dw_to_encoding(c, &c2, encoding);
835 ml->font[i - 1] = c >> 8 & 255;
836 ml->image[i - 1] = c & 255;
837 c = c2;
840 else
841 #endif
842 c = recode_char_to_encoding(c, encoding);
843 ml->image[i] = c & 255;
844 ml->font[i] = c >> 8 & 255;
847 p->w_encoding = encoding;
848 return;
851 #ifdef DW_CHARS
852 struct interval {
853 int first;
854 int last;
857 /* auxiliary function for binary search in interval table */
858 static int bisearch(int ucs, const struct interval *table, int max) {
859 int min = 0;
860 int mid;
862 if (ucs < table[0].first || ucs > table[max].last)
863 return 0;
864 while (max >= min) {
865 mid = (min + max) / 2;
866 if (ucs > table[mid].last)
867 min = mid + 1;
868 else if (ucs < table[mid].first)
869 max = mid - 1;
870 else
871 return 1;
874 return 0;
878 utf8_isdouble(c)
879 int c;
881 /* sorted list of non-overlapping intervals of East Asian Ambiguous
882 * characters, generated by "uniset +WIDTH-A -cat=Me -cat=Mn -cat=Cf c" */
883 static const struct interval ambiguous[] = {
884 { 0x00A1, 0x00A1 }, { 0x00A4, 0x00A4 }, { 0x00A7, 0x00A8 },
885 { 0x00AA, 0x00AA }, { 0x00AE, 0x00AE }, { 0x00B0, 0x00B4 },
886 { 0x00B6, 0x00BA }, { 0x00BC, 0x00BF }, { 0x00C6, 0x00C6 },
887 { 0x00D0, 0x00D0 }, { 0x00D7, 0x00D8 }, { 0x00DE, 0x00E1 },
888 { 0x00E6, 0x00E6 }, { 0x00E8, 0x00EA }, { 0x00EC, 0x00ED },
889 { 0x00F0, 0x00F0 }, { 0x00F2, 0x00F3 }, { 0x00F7, 0x00FA },
890 { 0x00FC, 0x00FC }, { 0x00FE, 0x00FE }, { 0x0101, 0x0101 },
891 { 0x0111, 0x0111 }, { 0x0113, 0x0113 }, { 0x011B, 0x011B },
892 { 0x0126, 0x0127 }, { 0x012B, 0x012B }, { 0x0131, 0x0133 },
893 { 0x0138, 0x0138 }, { 0x013F, 0x0142 }, { 0x0144, 0x0144 },
894 { 0x0148, 0x014B }, { 0x014D, 0x014D }, { 0x0152, 0x0153 },
895 { 0x0166, 0x0167 }, { 0x016B, 0x016B }, { 0x01CE, 0x01CE },
896 { 0x01D0, 0x01D0 }, { 0x01D2, 0x01D2 }, { 0x01D4, 0x01D4 },
897 { 0x01D6, 0x01D6 }, { 0x01D8, 0x01D8 }, { 0x01DA, 0x01DA },
898 { 0x01DC, 0x01DC }, { 0x0251, 0x0251 }, { 0x0261, 0x0261 },
899 { 0x02C4, 0x02C4 }, { 0x02C7, 0x02C7 }, { 0x02C9, 0x02CB },
900 { 0x02CD, 0x02CD }, { 0x02D0, 0x02D0 }, { 0x02D8, 0x02DB },
901 { 0x02DD, 0x02DD }, { 0x02DF, 0x02DF }, { 0x0391, 0x03A1 },
902 { 0x03A3, 0x03A9 }, { 0x03B1, 0x03C1 }, { 0x03C3, 0x03C9 },
903 { 0x0401, 0x0401 }, { 0x0410, 0x044F }, { 0x0451, 0x0451 },
904 { 0x2010, 0x2010 }, { 0x2013, 0x2016 }, { 0x2018, 0x2019 },
905 { 0x201C, 0x201D }, { 0x2020, 0x2022 }, { 0x2024, 0x2027 },
906 { 0x2030, 0x2030 }, { 0x2032, 0x2033 }, { 0x2035, 0x2035 },
907 { 0x203B, 0x203B }, { 0x203E, 0x203E }, { 0x2074, 0x2074 },
908 { 0x207F, 0x207F }, { 0x2081, 0x2084 }, { 0x20AC, 0x20AC },
909 { 0x2103, 0x2103 }, { 0x2105, 0x2105 }, { 0x2109, 0x2109 },
910 { 0x2113, 0x2113 }, { 0x2116, 0x2116 }, { 0x2121, 0x2122 },
911 { 0x2126, 0x2126 }, { 0x212B, 0x212B }, { 0x2153, 0x2154 },
912 { 0x215B, 0x215E }, { 0x2160, 0x216B }, { 0x2170, 0x2179 },
913 { 0x2190, 0x2199 }, { 0x21B8, 0x21B9 }, { 0x21D2, 0x21D2 },
914 { 0x21D4, 0x21D4 }, { 0x21E7, 0x21E7 }, { 0x2200, 0x2200 },
915 { 0x2202, 0x2203 }, { 0x2207, 0x2208 }, { 0x220B, 0x220B },
916 { 0x220F, 0x220F }, { 0x2211, 0x2211 }, { 0x2215, 0x2215 },
917 { 0x221A, 0x221A }, { 0x221D, 0x2220 }, { 0x2223, 0x2223 },
918 { 0x2225, 0x2225 }, { 0x2227, 0x222C }, { 0x222E, 0x222E },
919 { 0x2234, 0x2237 }, { 0x223C, 0x223D }, { 0x2248, 0x2248 },
920 { 0x224C, 0x224C }, { 0x2252, 0x2252 }, { 0x2260, 0x2261 },
921 { 0x2264, 0x2267 }, { 0x226A, 0x226B }, { 0x226E, 0x226F },
922 { 0x2282, 0x2283 }, { 0x2286, 0x2287 }, { 0x2295, 0x2295 },
923 { 0x2299, 0x2299 }, { 0x22A5, 0x22A5 }, { 0x22BF, 0x22BF },
924 { 0x2312, 0x2312 }, { 0x2460, 0x24E9 }, { 0x24EB, 0x254B },
925 { 0x2550, 0x2573 }, { 0x2580, 0x258F }, { 0x2592, 0x2595 },
926 { 0x25A0, 0x25A1 }, { 0x25A3, 0x25A9 }, { 0x25B2, 0x25B3 },
927 { 0x25B6, 0x25B7 }, { 0x25BC, 0x25BD }, { 0x25C0, 0x25C1 },
928 { 0x25C6, 0x25C8 }, { 0x25CB, 0x25CB }, { 0x25CE, 0x25D1 },
929 { 0x25E2, 0x25E5 }, { 0x25EF, 0x25EF }, { 0x2605, 0x2606 },
930 { 0x2609, 0x2609 }, { 0x260E, 0x260F }, { 0x2614, 0x2615 },
931 { 0x261C, 0x261C }, { 0x261E, 0x261E }, { 0x2640, 0x2640 },
932 { 0x2642, 0x2642 }, { 0x2660, 0x2661 }, { 0x2663, 0x2665 },
933 { 0x2667, 0x266A }, { 0x266C, 0x266D }, { 0x266F, 0x266F },
934 { 0x273D, 0x273D }, { 0x2776, 0x277F }, { 0xE000, 0xF8FF },
935 { 0xFFFD, 0xFFFD }, { 0xF0000, 0xFFFFD }, { 0x100000, 0x10FFFD }
938 return ((c >= 0x1100 &&
939 (c <= 0x115f || /* Hangul Jamo init. consonants */
940 c == 0x2329 || c == 0x232a ||
941 (c >= 0x2e80 && c <= 0xa4cf &&
942 c != 0x303f) || /* CJK ... Yi */
943 (c >= 0xac00 && c <= 0xd7a3) || /* Hangul Syllables */
944 (c >= 0xf900 && c <= 0xfaff) || /* CJK Compatibility Ideographs */
945 (c >= 0xfe30 && c <= 0xfe6f) || /* CJK Compatibility Forms */
946 (c >= 0xff00 && c <= 0xff60) || /* Fullwidth Forms */
947 (c >= 0xffe0 && c <= 0xffe6) ||
948 (c >= 0x20000 && c <= 0x2fffd) ||
949 (c >= 0x30000 && c <= 0x3fffd))) ||
950 (cjkwidth &&
951 bisearch(c, ambiguous,
952 sizeof(ambiguous) / sizeof(struct interval) - 1)));
954 #endif
957 utf8_iscomb(c)
958 int c;
960 /* taken from Markus Kuhn's wcwidth */
961 static const struct interval combining[] = {
962 { 0x0300, 0x036F }, { 0x0483, 0x0486 }, { 0x0488, 0x0489 },
963 { 0x0591, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
964 { 0x05C4, 0x05C5 }, { 0x05C7, 0x05C7 }, { 0x0600, 0x0603 },
965 { 0x0610, 0x0615 }, { 0x064B, 0x065E }, { 0x0670, 0x0670 },
966 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
967 { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
968 { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, { 0x0901, 0x0902 },
969 { 0x093C, 0x093C }, { 0x0941, 0x0948 }, { 0x094D, 0x094D },
970 { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, { 0x0981, 0x0981 },
971 { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD },
972 { 0x09E2, 0x09E3 }, { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C },
973 { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D },
974 { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC },
975 { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD },
976 { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C },
977 { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D },
978 { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 },
979 { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 },
980 { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBC, 0x0CBC },
981 { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
982 { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D },
983 { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 },
984 { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E },
985 { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC },
986 { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 },
987 { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E },
988 { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 },
989 { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 },
990 { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, { 0x1039, 0x1039 },
991 { 0x1058, 0x1059 }, { 0x1160, 0x11FF }, { 0x135F, 0x135F },
992 { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 },
993 { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD },
994 { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD },
995 { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 },
996 { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B },
997 { 0x1A17, 0x1A18 }, { 0x1B00, 0x1B03 }, { 0x1B34, 0x1B34 },
998 { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, { 0x1B42, 0x1B42 },
999 { 0x1B6B, 0x1B73 }, { 0x1DC0, 0x1DCA }, { 0x1DFE, 0x1DFF },
1000 { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x2060, 0x2063 },
1001 { 0x206A, 0x206F }, { 0x20D0, 0x20EF }, { 0x302A, 0x302F },
1002 { 0x3099, 0x309A }, { 0xA806, 0xA806 }, { 0xA80B, 0xA80B },
1003 { 0xA825, 0xA826 }, { 0xFB1E, 0xFB1E }, { 0xFE00, 0xFE0F },
1004 { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, { 0xFFF9, 0xFFFB },
1005 { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F },
1006 { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x1D167, 0x1D169 },
1007 { 0x1D173, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD },
1008 { 0x1D242, 0x1D244 }, { 0xE0001, 0xE0001 }, { 0xE0020, 0xE007F },
1009 { 0xE0100, 0xE01EF }
1012 return bisearch(c, combining, sizeof(combining) / sizeof(struct interval) - 1);
1015 static void
1016 comb_tofront(root, i)
1017 int root, i;
1019 for (;;)
1021 debug1("bring to front: %x\n", i);
1022 combchars[combchars[i]->prev]->next = combchars[i]->next;
1023 combchars[combchars[i]->next]->prev = combchars[i]->prev;
1024 combchars[i]->next = combchars[root]->next;
1025 combchars[i]->prev = root;
1026 combchars[combchars[root]->next]->prev = i;
1027 combchars[root]->next = i;
1028 i = combchars[i]->c1;
1029 if (i < 0xd800 || i >= 0xe000)
1030 return;
1031 i -= 0xd800;
1035 void
1036 utf8_handle_comb(c, mc)
1037 int c;
1038 struct mchar *mc;
1040 int root, i, c1;
1041 int isdouble;
1043 c1 = mc->image | (mc->font << 8);
1044 isdouble = c1 >= 0x1100 && utf8_isdouble(c1);
1045 if (!combchars)
1047 combchars = (struct combchar **)malloc(sizeof(struct combchar *) * 0x802);
1048 if (!combchars)
1049 return;
1050 bzero((char *)combchars, sizeof(struct combchar *) * 0x802);
1051 combchars[0x800] = (struct combchar *)malloc(sizeof(struct combchar));
1052 combchars[0x801] = (struct combchar *)malloc(sizeof(struct combchar));
1053 if (!combchars[0x800] || !combchars[0x801])
1055 if (combchars[0x800])
1056 free(combchars[0x800]);
1057 if (combchars[0x801])
1058 free(combchars[0x801]);
1059 free(combchars);
1060 return;
1062 combchars[0x800]->c1 = 0x000;
1063 combchars[0x800]->c2 = 0x700;
1064 combchars[0x800]->next = 0x800;
1065 combchars[0x800]->prev = 0x800;
1066 combchars[0x801]->c1 = 0x700;
1067 combchars[0x801]->c2 = 0x800;
1068 combchars[0x801]->next = 0x801;
1069 combchars[0x801]->prev = 0x801;
1071 root = isdouble ? 0x801 : 0x800;
1072 for (i = combchars[root]->c1; i < combchars[root]->c2; i++)
1074 if (!combchars[i])
1075 break;
1076 if (combchars[i]->c1 == c1 && combchars[i]->c2 == c)
1077 break;
1079 if (i == combchars[root]->c2)
1081 /* full, recycle old entry */
1082 if (c1 >= 0xd800 && c1 < 0xe000)
1083 comb_tofront(root, c1 - 0xd800);
1084 i = combchars[root]->prev;
1085 if (c1 == i + 0xd800)
1087 /* completely full, can't recycle */
1088 debug("utf8_handle_comp: completely full!\n");
1089 mc->image = '?';
1090 mc->font = 0;
1091 return;
1093 /* FIXME: delete old char from all buffers */
1095 else if (!combchars[i])
1097 combchars[i] = (struct combchar *)malloc(sizeof(struct combchar));
1098 if (!combchars[i])
1099 return;
1100 combchars[i]->prev = i;
1101 combchars[i]->next = i;
1103 combchars[i]->c1 = c1;
1104 combchars[i]->c2 = c;
1105 mc->image = i & 0xff;
1106 mc->font = (i >> 8) + 0xd8;
1107 debug3("combinig char %x %x -> %x\n", c1, c, i + 0xd800);
1108 comb_tofront(root, i);
1111 #else /* !UTF8 */
1113 void
1114 WinSwitchEncoding(p, encoding)
1115 struct win *p;
1116 int encoding;
1118 p->w_encoding = encoding;
1119 return;
1122 #endif /* UTF8 */
1124 static int
1125 encmatch(s1, s2)
1126 char *s1;
1127 char *s2;
1129 int c1, c2;
1132 c1 = (unsigned char)*s1;
1133 if (c1 >= 'A' && c1 <= 'Z')
1134 c1 += 'a' - 'A';
1135 if (!(c1 >= 'a' && c1 <= 'z') && !(c1 >= '0' && c1 <= '9'))
1137 s1++;
1138 continue;
1140 c2 = (unsigned char)*s2;
1141 if (c2 >= 'A' && c2 <= 'Z')
1142 c2 += 'a' - 'A';
1143 if (!(c2 >= 'a' && c2 <= 'z') && !(c2 >= '0' && c2 <= '9'))
1145 s2++;
1146 continue;
1148 if (c1 != c2)
1149 return 0;
1150 s1++;
1151 s2++;
1153 while(c1);
1154 return 1;
1158 FindEncoding(name)
1159 char *name;
1161 int encoding;
1163 debug1("FindEncoding %s\n", name);
1164 if (name == 0 || *name == 0)
1165 return 0;
1166 if (encmatch(name, "euc"))
1167 name = "eucJP";
1168 if (encmatch(name, "off") || encmatch(name, "iso8859-1"))
1169 return 0;
1170 #ifndef UTF8
1171 if (encmatch(name, "UTF-8"))
1172 return -1;
1173 #endif
1174 for (encoding = 0; encoding < (int)(sizeof(encodings)/sizeof(*encodings)); encoding++)
1175 if (encmatch(name, encodings[encoding].name))
1177 #ifdef UTF8
1178 LoadFontTranslationsForEncoding(encoding);
1179 #endif
1180 return encoding;
1182 return -1;
1185 char *
1186 EncodingName(encoding)
1187 int encoding;
1189 if (encoding >= (int)(sizeof(encodings)/sizeof(*encodings)))
1190 return 0;
1191 return encodings[encoding].name;
1195 EncodingDefFont(encoding)
1196 int encoding;
1198 return encodings[encoding].deffont;
1201 void
1202 ResetEncoding(p)
1203 struct win *p;
1205 char *c;
1206 int encoding = p->w_encoding;
1208 c = encodings[encoding].charsets;
1209 if (c)
1210 SetCharsets(p, c);
1211 #ifdef UTF8
1212 LoadFontTranslationsForEncoding(encoding);
1213 #endif
1214 if (encodings[encoding].usegr)
1216 p->w_gr = 2;
1217 p->w_FontE = encodings[encoding].charsets[1];
1219 else
1220 p->w_FontE = 0;
1221 if (encodings[encoding].noc1)
1222 p->w_c1 = 0;
1226 DecodeChar(c, encoding, statep)
1227 int c;
1228 int encoding;
1229 int *statep;
1231 int t;
1233 debug2("Decoding char %02x for encoding %d\n", c, encoding);
1234 #ifdef UTF8
1235 if (encoding == UTF8)
1236 return FromUtf8(c, statep);
1237 #endif
1238 if (encoding == SJIS)
1240 if (!*statep)
1242 if ((0x81 <= c && c <= 0x9f) || (0xe0 <= c && c <= 0xef))
1244 *statep = c;
1245 return -1;
1247 if (c < 0x80)
1248 return c;
1249 return c | (KANA << 16);
1251 t = c;
1252 c = *statep;
1253 *statep = 0;
1254 if (0x40 <= t && t <= 0xfc && t != 0x7f)
1256 if (c <= 0x9f)
1257 c = (c - 0x81) * 2 + 0x21;
1258 else
1259 c = (c - 0xc1) * 2 + 0x21;
1260 if (t <= 0x7e)
1261 t -= 0x1f;
1262 else if (t <= 0x9e)
1263 t -= 0x20;
1264 else
1265 t -= 0x7e, c++;
1266 return (c << 8) | t | (KANJI << 16);
1268 return t;
1270 if (encoding == EUC_JP || encoding == EUC_KR || encoding == EUC_CN)
1272 if (!*statep)
1274 if (c & 0x80)
1276 *statep = c;
1277 return -1;
1279 return c;
1281 t = c;
1282 c = *statep;
1283 *statep = 0;
1284 if (encoding == EUC_JP)
1286 if (c == 0x8e)
1287 return t | (KANA << 16);
1288 if (c == 0x8f)
1290 *statep = t | (KANJI0212 << 8);
1291 return -1;
1294 c &= 0xff7f;
1295 t &= 0x7f;
1296 c = c << 8 | t;
1297 if (encoding == EUC_KR)
1298 return c | (3 << 16);
1299 if (encoding == EUC_CN)
1300 return c | (1 << 16);
1301 if (c & (KANJI0212 << 16))
1302 return c;
1303 else
1304 return c | (KANJI << 16);
1306 if (encoding == BIG5 || encoding == GBK)
1308 if (!*statep)
1310 if (c & 0x80)
1312 if (encoding == GBK && c == 0x80)
1313 return 0xa4 | (('b'|0x80) << 16);
1314 *statep = c;
1315 return -1;
1317 return c;
1319 t = c;
1320 c = *statep;
1321 *statep = 0;
1322 c &= 0x7f;
1323 return c << 8 | t | (encoding == BIG5 ? 030 << 16 : 031 << 16);
1325 return c | (encodings[encoding].deffont << 16);
1329 EncodeChar(bp, c, encoding, fontp)
1330 char *bp;
1331 int c;
1332 int encoding;
1333 int *fontp;
1335 int t, f, l;
1337 debug2("Encoding char %02x for encoding %d\n", c, encoding);
1338 if (c == -1 && fontp)
1340 if (*fontp == 0)
1341 return 0;
1342 if (bp)
1344 *bp++ = 033;
1345 *bp++ = '(';
1346 *bp++ = 'B';
1348 return 3;
1350 f = c >> 16;
1352 #ifdef UTF8
1353 if (encoding == UTF8)
1355 if (f)
1357 # ifdef DW_CHARS
1358 if (is_dw_font(f))
1360 int c2 = c & 0xff;
1361 c = (c >> 8 & 0xff) | (f << 8);
1362 c = recode_char_dw_to_encoding(c, &c2, encoding);
1364 else
1365 # endif
1367 c = (c & 0xff) | (f << 8);
1368 c = recode_char_to_encoding(c, encoding);
1371 return ToUtf8(bp, c);
1373 if ((c & 0xff00) && f == 0) /* is_utf8? */
1375 # ifdef DW_CHARS
1376 if (utf8_isdouble(c))
1378 int c2 = 0xffff;
1379 c = recode_char_dw_to_encoding(c, &c2, encoding);
1380 c = (c << 8) | (c2 & 0xff);
1382 else
1383 # endif
1385 c = recode_char_to_encoding(c, encoding);
1386 c = ((c & 0xff00) << 8) | (c & 0xff);
1388 debug1("Encode: char mapped from utf8 to %x\n", c);
1389 f = c >> 16;
1391 #endif
1392 if (f & 0x80) /* map special 96-fonts to latin1 */
1393 f = 0;
1395 if (encoding == SJIS)
1397 if (f == KANA)
1398 c = (c & 0xff) | 0x80;
1399 else if (f == KANJI)
1401 if (!bp)
1402 return 2;
1403 t = c & 0xff;
1404 c = (c >> 8) & 0xff;
1405 t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
1406 c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
1407 *bp++ = c;
1408 *bp++ = t;
1409 return 2;
1412 if (encoding == EUC)
1414 if (f == KANA)
1416 if (bp)
1418 *bp++ = 0x8e;
1419 *bp++ = c;
1421 return 2;
1423 if (f == KANJI)
1425 if (bp)
1427 *bp++ = (c >> 8) | 0x80;
1428 *bp++ = c | 0x80;
1430 return 2;
1432 if (f == KANJI0212)
1434 if (bp)
1436 *bp++ = 0x8f;
1437 *bp++ = c >> 8;
1438 *bp++ = c;
1440 return 3;
1443 if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
1445 if (bp)
1447 *bp++ = (c >> 8) | 0x80;
1448 *bp++ = c | 0x80;
1450 return 2;
1452 if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))
1454 if (bp)
1456 *bp++ = (c >> 8) | 0x80;
1457 *bp++ = c;
1459 return 2;
1461 if (encoding == GBK && f == 0 && c == 0xa4)
1462 c = 0x80;
1464 l = 0;
1465 if (fontp && f != *fontp)
1467 *fontp = f;
1468 if (f && f < ' ')
1470 if (bp)
1472 *bp++ = 033;
1473 *bp++ = '$';
1474 if (f > 2)
1475 *bp++ = '(';
1476 *bp++ = '@' + f;
1478 l += f > 2 ? 4 : 3;
1480 else if (f < 128)
1482 if (f == 0)
1483 f = 'B';
1484 if (bp)
1486 *bp++ = 033;
1487 *bp++ = '(';
1488 *bp++ = f;
1490 l += 3;
1493 if (c & 0xff00)
1495 if (bp)
1496 *bp++ = c >> 8;
1497 l++;
1499 if (bp)
1500 *bp++ = c;
1501 return l + 1;
1505 CanEncodeFont(encoding, f)
1506 int encoding, f;
1508 switch(encoding)
1510 #ifdef UTF8
1511 case UTF8:
1512 return 1;
1513 #endif
1514 case SJIS:
1515 return f == KANJI || f == KANA;
1516 case EUC:
1517 return f == KANJI || f == KANA || f == KANJI0212;
1518 case EUC_KR:
1519 return f == 3;
1520 case EUC_CN:
1521 return f == 1;
1522 case BIG5:
1523 return f == 030;
1524 case GBK:
1525 return f == 031;
1526 default:
1527 break;
1529 return 0;
1532 #ifdef DW_CHARS
1534 PrepareEncodedChar(c)
1535 int c;
1537 int encoding;
1538 int t = 0;
1539 int f;
1541 encoding = D_encoding;
1542 f = D_rend.font;
1543 t = D_mbcs;
1544 if (encoding == SJIS)
1546 if (f == KANA)
1547 return c | 0x80;
1548 else if (f == KANJI)
1550 t += (c & 1) ? ((t <= 0x5f) ? 0x1f : 0x20) : 0x7e;
1551 c = (c - 0x21) / 2 + ((c < 0x5f) ? 0x81 : 0xc1);
1552 D_mbcs = t;
1554 return c;
1556 if (encoding == EUC)
1558 if (f == KANA)
1560 AddChar(0x8e);
1561 return c | 0x80;
1563 if (f == KANJI)
1565 D_mbcs = t | 0x80;
1566 return c | 0x80;
1568 if (f == KANJI0212)
1570 AddChar(0x8f);
1571 D_mbcs = t | 0x80;
1572 return c | 0x80;
1575 if ((encoding == EUC_KR && f == 3) || (encoding == EUC_CN && f == 1))
1577 D_mbcs = t | 0x80;
1578 return c | 0x80;
1580 if ((encoding == BIG5 && f == 030) || (encoding == GBK && f == 031))
1581 return c | 0x80;
1582 return c;
1584 #endif
1587 RecodeBuf(fbuf, flen, fenc, tenc, tbuf)
1588 unsigned char *fbuf;
1589 int flen;
1590 int fenc, tenc;
1591 unsigned char *tbuf;
1593 int c, i, j;
1594 int decstate = 0, font = 0;
1596 for (i = j = 0; i < flen; i++)
1598 c = fbuf[i];
1599 c = DecodeChar(c, fenc, &decstate);
1600 if (c == -2)
1601 i--;
1602 if (c < 0)
1603 continue;
1604 j += EncodeChar(tbuf ? (char *)tbuf + j : 0, c, tenc, &font);
1606 j += EncodeChar(tbuf ? (char *)tbuf + j : 0, -1, tenc, &font);
1607 return j;
1610 #ifdef UTF8
1612 ContainsSpecialDeffont(ml, xs, xe, encoding)
1613 struct mline *ml;
1614 int xs, xe;
1615 int encoding;
1617 unsigned char *f, *i;
1618 int c, x, dx;
1620 if (encoding == UTF8 || encodings[encoding].deffont == 0)
1621 return 0;
1622 i = ml->image + xs;
1623 f = ml->font + xs;
1624 dx = xe - xs + 1;
1625 while (dx-- > 0)
1627 if (*f++)
1628 continue;
1629 c = *i++;
1630 x = recode_char_to_encoding(c | (encodings[encoding].deffont << 8), UTF8);
1631 if (c != x)
1633 debug2("ContainsSpecialDeffont: yes %02x != %02x\n", c, x);
1634 return 1;
1637 debug("ContainsSpecialDeffont: no\n");
1638 return 0;
1643 LoadFontTranslation(font, file)
1644 int font;
1645 char *file;
1647 char buf[1024], *myfile;
1648 FILE *f;
1649 int i;
1650 int fo;
1651 int x, u, c, ok;
1652 unsigned short (*p)[2], (*tab)[2];
1654 myfile = file;
1655 if (myfile == 0)
1657 if (font == 0 || screenencodings == 0)
1658 return -1;
1659 if (strlen(screenencodings) > sizeof(buf) - 10)
1660 return -1;
1661 sprintf(buf, "%s/%02x", screenencodings, font & 0xff);
1662 myfile = buf;
1664 debug1("LoadFontTranslation: trying %s\n", myfile);
1665 if ((f = secfopen(myfile, "r")) == 0)
1666 return -1;
1667 i = ok = 0;
1668 for (;;)
1670 for(; i < 12; i++)
1671 if (getc(f) != "ScreenI2UTF8"[i])
1672 break;
1673 if (getc(f) != 0) /* format */
1674 break;
1675 fo = getc(f); /* id */
1676 if (fo == EOF)
1677 break;
1678 if (font != -1 && font != fo)
1679 break;
1680 i = getc(f);
1681 x = getc(f);
1682 if (x == EOF)
1683 break;
1684 i = i << 8 | x;
1685 getc(f);
1686 while ((x = getc(f)) && x != EOF)
1687 getc(f); /* skip font name (padded to 2 bytes) */
1688 if ((p = malloc(sizeof(*p) * (i + 1))) == 0)
1689 break;
1690 tab = p;
1691 while(i > 0)
1693 x = getc(f);
1694 x = x << 8 | getc(f);
1695 u = getc(f);
1696 c = getc(f);
1697 u = u << 8 | c;
1698 if (c == EOF)
1699 break;
1700 (*p)[0] = x;
1701 (*p)[1] = u;
1702 p++;
1703 i--;
1705 (*p)[0] = 0;
1706 (*p)[1] = 0;
1707 if (i || (tab[0][0] & 0x8000))
1709 free(tab);
1710 break;
1712 if (recodetabs[fo].tab && (recodetabs[fo].flags & RECODETAB_ALLOCED) != 0)
1713 free(recodetabs[fo].tab);
1714 recodetabs[fo].tab = tab;
1715 recodetabs[fo].flags = RECODETAB_ALLOCED;
1716 debug1("Successful load of recodetab %02x\n", fo);
1717 c = getc(f);
1718 if (c == EOF)
1720 ok = 1;
1721 break;
1723 if (c != 'S')
1724 break;
1725 i = 1;
1727 fclose(f);
1728 if (font != -1 && file == 0 && recodetabs[font].flags == 0)
1729 recodetabs[font].flags = RECODETAB_TRIED;
1730 return ok ? 0 : -1;
1733 void
1734 LoadFontTranslationsForEncoding(encoding)
1735 int encoding;
1737 char *c;
1738 int f;
1740 debug1("LoadFontTranslationsForEncoding: encoding %d\n", encoding);
1741 if ((c = encodings[encoding].fontlist) != 0)
1742 while ((f = (unsigned char)*c++) != 0)
1743 if (recodetabs[f].flags == 0)
1744 LoadFontTranslation(f, 0);
1745 f = encodings[encoding].deffont;
1746 if (f > 0 && recodetabs[f].flags == 0)
1747 LoadFontTranslation(f, 0);
1750 #endif /* UTF8 */
1752 #else /* !ENCODINGS */
1754 /* Simple version of EncodeChar to encode font changes for
1755 * copy/paste mode
1758 EncodeChar(bp, c, encoding, fontp)
1759 char *bp;
1760 int c;
1761 int encoding;
1762 int *fontp;
1764 int f, l;
1765 f = (c == -1) ? 0 : c >> 16;
1766 l = 0;
1767 if (fontp && f != *fontp)
1769 *fontp = f;
1770 if (f && f < ' ')
1772 if (bp)
1774 *bp++ = 033;
1775 *bp++ = '$';
1776 if (f > 2)
1777 *bp++ = '(';
1778 *bp++ = '@' + f;
1780 l += f > 2 ? 4 : 3;
1782 else if (f < 128)
1784 if (f == 0)
1785 f = 'B';
1786 if (bp)
1788 *bp++ = 033;
1789 *bp++ = '(';
1790 *bp++ = f;
1792 l += 3;
1795 if (c == -1)
1796 return l;
1797 if (c & 0xff00)
1799 if (bp)
1800 *bp++ = c >> 8;
1801 l++;
1803 if (bp)
1804 *bp++ = c;
1805 return l + 1;
1808 #endif /* ENCODINGS */