Update TortoiseGitPlink to PuTTY Plink 0.78
[TortoiseGit.git] / src / TortoisePlink / Windows / unicode.c
blob948b4005e4b1340ef319ffdb46e840fa0cc1d4c7
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <ctype.h>
4 #include <time.h>
5 #include <assert.h>
7 #include "putty.h"
8 #include "terminal.h"
9 #include "misc.h"
11 /* Character conversion arrays; they are usually taken from windows,
12 * the xterm one has the four scanlines that have no unicode 2.0
13 * equivalents mapped to their unicode 3.0 locations.
15 static const WCHAR unitab_xterm_std[32] = {
16 0x2666, 0x2592, 0x2409, 0x240c, 0x240d, 0x240a, 0x00b0, 0x00b1,
17 0x2424, 0x240b, 0x2518, 0x2510, 0x250c, 0x2514, 0x253c, 0x23ba,
18 0x23bb, 0x2500, 0x23bc, 0x23bd, 0x251c, 0x2524, 0x2534, 0x252c,
19 0x2502, 0x2264, 0x2265, 0x03c0, 0x2260, 0x00a3, 0x00b7, 0x0020
23 * If the codepage is non-zero it's a window codepage, zero means use a
24 * local codepage. The name is always converted to the first of any
25 * duplicate definitions.
29 * Tables for ISO-8859-{1-10,13-16} derived from those downloaded
30 * 2001-10-02 from <http://www.unicode.org/Public/MAPPINGS/> -- jtn
31 * Table for ISO-8859-11 derived from same on 2002-11-18. -- bjh21
34 /* XXX: This could be done algorithmically, but I'm not sure it's
35 * worth the hassle -- jtn */
36 /* ISO/IEC 8859-1:1998 (Latin-1, "Western", "West European") */
37 static const wchar_t iso_8859_1[] = {
38 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
39 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
40 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
41 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
42 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
43 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
44 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
45 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
46 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
47 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
48 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
49 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
52 /* ISO/IEC 8859-2:1999 (Latin-2, "Central European", "East European") */
53 static const wchar_t iso_8859_2[] = {
54 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
55 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
56 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
57 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
58 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
59 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
60 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
61 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
62 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
63 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
64 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
65 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9
68 /* ISO/IEC 8859-3:1999 (Latin-3, "South European", "Maltese & Esperanto") */
69 static const wchar_t iso_8859_3[] = {
70 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
71 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
72 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
73 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
74 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
75 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
76 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
77 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
78 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
79 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
80 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
81 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9
84 /* ISO/IEC 8859-4:1998 (Latin-4, "North European") */
85 static const wchar_t iso_8859_4[] = {
86 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
87 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
88 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
89 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
90 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
91 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
92 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
93 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
94 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
95 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
96 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
97 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9
100 /* ISO/IEC 8859-5:1999 (Latin/Cyrillic) */
101 static const wchar_t iso_8859_5[] = {
102 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
103 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
104 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
105 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
106 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
107 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
108 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
109 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
110 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
111 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
112 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
113 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F
116 /* ISO/IEC 8859-6:1999 (Latin/Arabic) */
117 static const wchar_t iso_8859_6[] = {
118 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
119 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
120 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
121 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
122 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
123 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
124 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
125 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
126 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
127 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
128 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
129 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD
132 /* ISO 8859-7:1987 (Latin/Greek) */
133 static const wchar_t iso_8859_7[] = {
134 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
135 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
136 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
137 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
138 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
139 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
140 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
141 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
142 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
143 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
144 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
145 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD
148 /* ISO/IEC 8859-8:1999 (Latin/Hebrew) */
149 static const wchar_t iso_8859_8[] = {
150 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
151 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
152 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
153 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
154 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
155 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
156 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
157 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
158 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
159 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
160 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
161 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD
164 /* ISO/IEC 8859-9:1999 (Latin-5, "Turkish") */
165 static const wchar_t iso_8859_9[] = {
166 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
167 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
168 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
169 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
170 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
171 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
172 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
173 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
174 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
175 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
176 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
177 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF
180 /* ISO/IEC 8859-10:1998 (Latin-6, "Nordic" [Sami, Inuit, Icelandic]) */
181 static const wchar_t iso_8859_10[] = {
182 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
183 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
184 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
185 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
186 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
187 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
188 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
189 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
190 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
191 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
192 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
193 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138
196 /* ISO/IEC 8859-11:2001 ("Thai", "TIS620") */
197 static const wchar_t iso_8859_11[] = {
198 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
199 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
200 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
201 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
202 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
203 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
204 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
205 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
206 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
207 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
208 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
209 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD
212 /* ISO/IEC 8859-13:1998 (Latin-7, "Baltic Rim") */
213 static const wchar_t iso_8859_13[] = {
214 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
215 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
216 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
217 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
218 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
219 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
220 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
221 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
222 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
223 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
224 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
225 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019
228 /* ISO/IEC 8859-14:1998 (Latin-8, "Celtic", "Gaelic/Welsh") */
229 static const wchar_t iso_8859_14[] = {
230 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
231 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
232 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
233 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
234 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
235 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
236 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
237 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
238 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
239 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
240 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
241 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF
244 /* ISO/IEC 8859-15:1999 (Latin-9 aka -0, "euro") */
245 static const wchar_t iso_8859_15[] = {
246 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AC, 0x00A5, 0x0160, 0x00A7,
247 0x0161, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
248 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x017D, 0x00B5, 0x00B6, 0x00B7,
249 0x017E, 0x00B9, 0x00BA, 0x00BB, 0x0152, 0x0153, 0x0178, 0x00BF,
250 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
251 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
252 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
253 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
254 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
255 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
256 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
257 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
260 /* ISO/IEC 8859-16:2001 (Latin-10, "Balkan") */
261 static const wchar_t iso_8859_16[] = {
262 0x00A0, 0x0104, 0x0105, 0x0141, 0x20AC, 0x201E, 0x0160, 0x00A7,
263 0x0161, 0x00A9, 0x0218, 0x00AB, 0x0179, 0x00AD, 0x017A, 0x017B,
264 0x00B0, 0x00B1, 0x010C, 0x0142, 0x017D, 0x201D, 0x00B6, 0x00B7,
265 0x017E, 0x010D, 0x0219, 0x00BB, 0x0152, 0x0153, 0x0178, 0x017C,
266 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0106, 0x00C6, 0x00C7,
267 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
268 0x0110, 0x0143, 0x00D2, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x015A,
269 0x0170, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0118, 0x021A, 0x00DF,
270 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x0107, 0x00E6, 0x00E7,
271 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
272 0x0111, 0x0144, 0x00F2, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x015B,
273 0x0171, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0119, 0x021B, 0x00FF
276 static const wchar_t roman8[] = {
277 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF,
278 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4,
279 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1,
280 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2,
281 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA,
282 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC,
283 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6,
284 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4,
285 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3,
286 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF,
287 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC,
288 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD
291 static const wchar_t koi8_u[] = {
292 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
293 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
294 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2022, 0x221A, 0x2248,
295 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
296 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
297 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
298 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
299 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
300 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
301 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
302 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
303 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
304 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
305 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
306 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
307 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A
310 static const wchar_t vscii[] = {
311 0x0000, 0x0001, 0x1EB2, 0x0003, 0x0004, 0x1EB4, 0x1EAA, 0x0007,
312 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
313 0x0010, 0x0011, 0x0012, 0x0013, 0x1EF6, 0x0015, 0x0016, 0x0017,
314 0x0018, 0x1EF8, 0x001a, 0x001b, 0x001c, 0x001d, 0x1EF4, 0x001f,
315 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
316 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
317 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
318 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
319 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
320 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
321 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
322 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
323 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
324 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
325 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
326 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007f,
327 0x1EA0, 0x1EAE, 0x1EB0, 0x1EB6, 0x1EA4, 0x1EA6, 0x1EA8, 0x1EAC,
328 0x1EBC, 0x1EB8, 0x1EBE, 0x1EC0, 0x1EC2, 0x1EC4, 0x1EC6, 0x1ED0,
329 0x1ED2, 0x1ED4, 0x1ED6, 0x1ED8, 0x1EE2, 0x1EDA, 0x1EDC, 0x1EDE,
330 0x1ECA, 0x1ECE, 0x1ECC, 0x1EC8, 0x1EE6, 0x0168, 0x1EE4, 0x1EF2,
331 0x00D5, 0x1EAF, 0x1EB1, 0x1EB7, 0x1EA5, 0x1EA7, 0x1EA8, 0x1EAD,
332 0x1EBD, 0x1EB9, 0x1EBF, 0x1EC1, 0x1EC3, 0x1EC5, 0x1EC7, 0x1ED1,
333 0x1ED3, 0x1ED5, 0x1ED7, 0x1EE0, 0x01A0, 0x1ED9, 0x1EDD, 0x1EDF,
334 0x1ECB, 0x1EF0, 0x1EE8, 0x1EEA, 0x1EEC, 0x01A1, 0x1EDB, 0x01AF,
335 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x1EA2, 0x0102, 0x1EB3, 0x1EB5,
336 0x00C8, 0x00C9, 0x00CA, 0x1EBA, 0x00CC, 0x00CD, 0x0128, 0x1EF3,
337 0x0110, 0x1EE9, 0x00D2, 0x00D3, 0x00D4, 0x1EA1, 0x1EF7, 0x1EEB,
338 0x1EED, 0x00D9, 0x00DA, 0x1EF9, 0x1EF5, 0x00DD, 0x1EE1, 0x01B0,
339 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x1EA3, 0x0103, 0x1EEF, 0x1EAB,
340 0x00E8, 0x00E9, 0x00EA, 0x1EBB, 0x00EC, 0x00ED, 0x0129, 0x1EC9,
341 0x0111, 0x1EF1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x1ECF, 0x1ECD,
342 0x1EE5, 0x00F9, 0x00FA, 0x0169, 0x1EE7, 0x00FD, 0x1EE3, 0x1EEE
345 static const wchar_t dec_mcs[] = {
346 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0xFFFD, 0x00A5, 0xFFFD, 0x00A7,
347 0x00A4, 0x00A9, 0x00AA, 0x00AB, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
348 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0xFFFD, 0x00B5, 0x00B6, 0x00B7,
349 0xFFFD, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0xFFFD, 0x00BF,
350 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
351 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
352 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0152,
353 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0178, 0xFFFD, 0x00DF,
354 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
355 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
356 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0153,
357 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FF, 0xFFFD, 0xFFFD
360 /* Mazovia (Polish) aka CP620
361 * from "Mazowia to Unicode table", 04/24/96, Mikolaj Jedrzejak */
362 static const wchar_t mazovia[] = {
363 /* Code point 0x9B is "zloty" symbol (z&#0142;), which is not
364 * widely used and for which there is no Unicode equivalent.
365 * One reference shows 0xA8 as U+00A7 SECTION SIGN, but we're
366 * told that's incorrect. */
367 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x0105, 0x00E7,
368 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x0107, 0x00C4, 0x0104,
369 0x0118, 0x0119, 0x0142, 0x00F4, 0x00F6, 0x0106, 0x00FB, 0x00F9,
370 0x015a, 0x00D6, 0x00DC, 0xFFFD, 0x0141, 0x00A5, 0x015b, 0x0192,
371 0x0179, 0x017b, 0x00F3, 0x00d3, 0x0144, 0x0143, 0x017a, 0x017c,
372 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
373 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
374 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
375 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
376 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
377 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
378 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
379 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
380 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229,
381 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
382 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0
385 struct cp_list_item {
386 char *name;
387 int codepage;
388 int cp_size;
389 const wchar_t *cp_table;
392 static const struct cp_list_item cp_list[] = {
393 {"UTF-8", CP_UTF8},
395 {"ISO-8859-1:1998 (Latin-1, West Europe)", 0, 96, iso_8859_1},
396 {"ISO-8859-2:1999 (Latin-2, East Europe)", 0, 96, iso_8859_2},
397 {"ISO-8859-3:1999 (Latin-3, South Europe)", 0, 96, iso_8859_3},
398 {"ISO-8859-4:1998 (Latin-4, North Europe)", 0, 96, iso_8859_4},
399 {"ISO-8859-5:1999 (Latin/Cyrillic)", 0, 96, iso_8859_5},
400 {"ISO-8859-6:1999 (Latin/Arabic)", 0, 96, iso_8859_6},
401 {"ISO-8859-7:1987 (Latin/Greek)", 0, 96, iso_8859_7},
402 {"ISO-8859-8:1999 (Latin/Hebrew)", 0, 96, iso_8859_8},
403 {"ISO-8859-9:1999 (Latin-5, Turkish)", 0, 96, iso_8859_9},
404 {"ISO-8859-10:1998 (Latin-6, Nordic)", 0, 96, iso_8859_10},
405 {"ISO-8859-11:2001 (Latin/Thai)", 0, 96, iso_8859_11},
406 {"ISO-8859-13:1998 (Latin-7, Baltic)", 0, 96, iso_8859_13},
407 {"ISO-8859-14:1998 (Latin-8, Celtic)", 0, 96, iso_8859_14},
408 {"ISO-8859-15:1999 (Latin-9, \"euro\")", 0, 96, iso_8859_15},
409 {"ISO-8859-16:2001 (Latin-10, Balkan)", 0, 96, iso_8859_16},
411 {"KOI8-U", 0, 128, koi8_u},
412 {"KOI8-R", 20866},
413 {"HP-ROMAN8", 0, 96, roman8},
414 {"VSCII", 0, 256, vscii},
415 {"DEC-MCS", 0, 96, dec_mcs},
417 {"Win1250 (Central European)", 1250},
418 {"Win1251 (Cyrillic)", 1251},
419 {"Win1252 (Western)", 1252},
420 {"Win1253 (Greek)", 1253},
421 {"Win1254 (Turkish)", 1254},
422 {"Win1255 (Hebrew)", 1255},
423 {"Win1256 (Arabic)", 1256},
424 {"Win1257 (Baltic)", 1257},
425 {"Win1258 (Vietnamese)", 1258},
427 {"CP437", 437},
428 {"CP620 (Mazovia)", 0, 128, mazovia},
429 {"CP819", 28591},
430 {"CP852", 852},
431 {"CP878", 20866},
433 {"Use font encoding", -1},
435 {0, 0}
438 static void link_font(WCHAR *line_tbl, WCHAR *font_tbl, WCHAR attr);
441 * We keep a collection of reverse mappings from Unicode back to code pages,
442 * in the form of array[256] of array[256] of char. These live forever in a
443 * local tree234, and we just make a new one whenever we find a need.
445 typedef struct reverse_mapping {
446 int codepage;
447 char **blocks;
448 } reverse_mapping;
449 static tree234 *reverse_mappings = NULL;
451 static int reverse_mapping_cmp(void *av, void *bv)
453 const reverse_mapping *a = (const reverse_mapping *)av;
454 const reverse_mapping *b = (const reverse_mapping *)bv;
455 if (a->codepage < b->codepage)
456 return -1;
457 if (a->codepage > b->codepage)
458 return +1;
459 return 0;
462 static int reverse_mapping_find(void *av, void *bv)
464 const reverse_mapping *a = (const reverse_mapping *)av;
465 int b_codepage = *(const int *)bv;
466 if (a->codepage < b_codepage)
467 return -1;
468 if (a->codepage > b_codepage)
469 return +1;
470 return 0;
473 static reverse_mapping *get_existing_reverse_mapping(int codepage)
475 if (!reverse_mappings)
476 return NULL;
477 return find234(reverse_mappings, &codepage, reverse_mapping_find);
480 static reverse_mapping *make_reverse_mapping_inner(
481 int codepage, const wchar_t *mapping)
483 if (!reverse_mappings)
484 reverse_mappings = newtree234(reverse_mapping_cmp);
486 reverse_mapping *rmap = snew(reverse_mapping);
487 rmap->blocks = snewn(256, char *);
488 memset(rmap->blocks, 0, 256 * sizeof(char *));
490 for (size_t i = 0; i < 256; i++) {
491 /* These special kinds of value correspond to no Unicode character */
492 if (DIRECT_CHAR(mapping[i]))
493 continue;
494 if (DIRECT_FONT(mapping[i]))
495 continue;
497 size_t chr = mapping[i];
498 size_t block = chr >> 8, index = chr & 0xFF;
500 if (!rmap->blocks[block]) {
501 rmap->blocks[block] = snewn(256, char);
502 memset(rmap->blocks[block], 0, 256);
504 rmap->blocks[block][index] = i;
507 rmap->codepage = codepage;
508 reverse_mapping *added = add234(reverse_mappings, rmap);
509 assert(added == rmap); /* we already checked it wasn't already in there */
510 return added;
513 static void make_reverse_mapping(int codepage, const wchar_t *mapping)
515 if (get_existing_reverse_mapping(codepage))
516 return; /* we've already got this one */
517 make_reverse_mapping_inner(codepage, mapping);
520 static reverse_mapping *get_reverse_mapping(int codepage)
523 * Try harder to get a reverse mapping for a codepage we implement
524 * internally via a translation table, by hastily making it if it doesn't
525 * already exist.
528 reverse_mapping *rmap = get_existing_reverse_mapping(codepage);
529 if (rmap)
530 return rmap;
532 if (codepage < 65536)
533 return NULL;
534 if (codepage >= 65536 + lenof(cp_list))
535 return NULL;
536 const struct cp_list_item *cp = &cp_list[codepage - 65536];
537 if (!cp->cp_table)
538 return NULL;
540 wchar_t mapping[256];
541 get_unitab(codepage, mapping, 0);
542 return make_reverse_mapping_inner(codepage, mapping);
545 void init_ucs(Conf *conf, struct unicode_data *ucsdata)
547 int i;
548 bool used_dtf = false;
549 int vtmode;
551 /* Decide on the Line and Font codepages */
552 ucsdata->line_codepage = decode_codepage(conf_get_str(conf,
553 CONF_line_codepage));
555 if (ucsdata->font_codepage <= 0) {
556 ucsdata->font_codepage=0;
557 ucsdata->dbcs_screenfont=false;
560 vtmode = conf_get_int(conf, CONF_vtmode);
561 if (vtmode == VT_OEMONLY) {
562 ucsdata->font_codepage = 437;
563 ucsdata->dbcs_screenfont = false;
564 if (ucsdata->line_codepage <= 0)
565 ucsdata->line_codepage = GetACP();
566 } else if (ucsdata->line_codepage <= 0)
567 ucsdata->line_codepage = ucsdata->font_codepage;
569 /* Collect screen font ucs table */
570 if (ucsdata->dbcs_screenfont || ucsdata->font_codepage == 0) {
571 get_unitab(ucsdata->font_codepage, ucsdata->unitab_font, 2);
572 for (i = 128; i < 256; i++)
573 ucsdata->unitab_font[i] = (WCHAR) (CSET_ACP + i);
574 } else {
575 get_unitab(ucsdata->font_codepage, ucsdata->unitab_font, 1);
577 /* CP437 fonts are often broken ... */
578 if (ucsdata->font_codepage == 437)
579 ucsdata->unitab_font[0] = ucsdata->unitab_font[255] = 0xFFFF;
581 if (vtmode == VT_XWINDOWS)
582 memcpy(ucsdata->unitab_font + 1, unitab_xterm_std,
583 sizeof(unitab_xterm_std));
585 /* Collect OEMCP ucs table */
586 get_unitab(CP_OEMCP, ucsdata->unitab_oemcp, 1);
588 /* Collect CP437 ucs table for SCO acs */
589 if (vtmode == VT_OEMANSI || vtmode == VT_XWINDOWS)
590 memcpy(ucsdata->unitab_scoacs, ucsdata->unitab_oemcp,
591 sizeof(ucsdata->unitab_scoacs));
592 else
593 get_unitab(437, ucsdata->unitab_scoacs, 1);
595 /* Collect line set ucs table */
596 if (ucsdata->line_codepage == ucsdata->font_codepage &&
597 (ucsdata->dbcs_screenfont ||
598 vtmode == VT_POORMAN || ucsdata->font_codepage==0)) {
600 /* For DBCS and POOR fonts force direct to font */
601 used_dtf = true;
602 for (i = 0; i < 32; i++)
603 ucsdata->unitab_line[i] = (WCHAR) i;
604 for (i = 32; i < 256; i++)
605 ucsdata->unitab_line[i] = (WCHAR) (CSET_ACP + i);
606 ucsdata->unitab_line[127] = (WCHAR) 127;
607 } else {
608 get_unitab(ucsdata->line_codepage, ucsdata->unitab_line, 0);
611 #if 0
612 debug("Line cp%d, Font cp%d%s\n", ucsdata->line_codepage,
613 ucsdata->font_codepage, ucsdata->dbcs_screenfont ? " DBCS" : "");
615 for (i = 0; i < 256; i += 16) {
616 for (j = 0; j < 16; j++) {
617 debug("%04x%s", ucsdata->unitab_line[i + j], j == 15 ? "" : ",");
619 debug("\n");
621 #endif
623 /* VT100 graphics - NB: Broken for non-ascii CP's */
624 memcpy(ucsdata->unitab_xterm, ucsdata->unitab_line,
625 sizeof(ucsdata->unitab_xterm));
626 memcpy(ucsdata->unitab_xterm + '`', unitab_xterm_std,
627 sizeof(unitab_xterm_std));
628 ucsdata->unitab_xterm['_'] = ' ';
630 if (!used_dtf) {
631 /* Make sure a reverse mapping exists for this code page. */
632 make_reverse_mapping(ucsdata->line_codepage, ucsdata->unitab_line);
635 /* Find the line control characters. */
636 for (i = 0; i < 256; i++)
637 if (ucsdata->unitab_line[i] < ' '
638 || (ucsdata->unitab_line[i] >= 0x7F &&
639 ucsdata->unitab_line[i] < 0xA0))
640 ucsdata->unitab_ctrl[i] = i;
641 else
642 ucsdata->unitab_ctrl[i] = 0xFF;
644 /* Generate line->screen direct conversion links. */
645 if (vtmode == VT_OEMANSI || vtmode == VT_XWINDOWS)
646 link_font(ucsdata->unitab_scoacs, ucsdata->unitab_oemcp, CSET_OEMCP);
648 link_font(ucsdata->unitab_line, ucsdata->unitab_font, CSET_ACP);
649 link_font(ucsdata->unitab_scoacs, ucsdata->unitab_font, CSET_ACP);
650 link_font(ucsdata->unitab_xterm, ucsdata->unitab_font, CSET_ACP);
652 if (vtmode == VT_OEMANSI || vtmode == VT_XWINDOWS) {
653 link_font(ucsdata->unitab_line, ucsdata->unitab_oemcp, CSET_OEMCP);
654 link_font(ucsdata->unitab_xterm, ucsdata->unitab_oemcp, CSET_OEMCP);
657 if (ucsdata->dbcs_screenfont &&
658 ucsdata->font_codepage != ucsdata->line_codepage) {
659 /* F***ing Microsoft fonts, Japanese and Korean codepage fonts
660 * have a currency symbol at 0x5C but their unicode value is
661 * still given as U+005C not the correct U+00A5. */
662 ucsdata->unitab_line['\\'] = CSET_OEMCP + '\\';
665 /* Last chance, if !unicode then try poorman links. */
666 if (vtmode != VT_UNICODE) {
667 static const char poorman_scoacs[] =
668 "CueaaaaceeeiiiAAE**ooouuyOUc$YPsaiounNao?++**!<>###||||++||++++++--|-+||++--|-+----++++++++##||#aBTPEsyt******EN=+><++-=... n2* ";
669 static const char poorman_latin1[] =
670 " !cL.Y|S\"Ca<--R~o+23'u|.,1o>///?AAAAAAACEEEEIIIIDNOOOOOxOUUUUYPBaaaaaaaceeeeiiiionooooo/ouuuuypy";
671 static const char poorman_vt100[] = "*#****o~**+++++-----++++|****L.";
673 for (i = 160; i < 256; i++)
674 if (!DIRECT_FONT(ucsdata->unitab_line[i]) &&
675 ucsdata->unitab_line[i] >= 160 &&
676 ucsdata->unitab_line[i] < 256) {
677 ucsdata->unitab_line[i] =
678 (WCHAR) (CSET_ACP +
679 poorman_latin1[ucsdata->unitab_line[i] - 160]);
681 for (i = 96; i < 127; i++)
682 if (!DIRECT_FONT(ucsdata->unitab_xterm[i]))
683 ucsdata->unitab_xterm[i] =
684 (WCHAR) (CSET_ACP + poorman_vt100[i - 96]);
685 for(i=128;i<256;i++)
686 if (!DIRECT_FONT(ucsdata->unitab_scoacs[i]))
687 ucsdata->unitab_scoacs[i] =
688 (WCHAR) (CSET_ACP + poorman_scoacs[i - 128]);
692 static void link_font(WCHAR *line_tbl, WCHAR *font_tbl, WCHAR attr)
694 int font_index, line_index, i;
695 for (line_index = 0; line_index < 256; line_index++) {
696 if (DIRECT_FONT(line_tbl[line_index]))
697 continue;
698 for(i = 0; i < 256; i++) {
699 font_index = ((32 + i) & 0xFF);
700 if (line_tbl[line_index] == font_tbl[font_index]) {
701 line_tbl[line_index] = (WCHAR) (attr + font_index);
702 break;
708 wchar_t xlat_uskbd2cyrllic(int ch)
710 static const wchar_t cyrtab[] = {
711 0, 1, 2, 3, 4, 5, 6, 7,
712 8, 9, 10, 11, 12, 13, 14, 15,
713 16, 17, 18, 19, 20, 21, 22, 23,
714 24, 25, 26, 27, 28, 29, 30, 31,
715 32, 33, 0x042d, 35, 36, 37, 38, 0x044d,
716 40, 41, 42, 0x0406, 0x0431, 0x0454, 0x044e, 0x002e,
717 48, 49, 50, 51, 52, 53, 54, 55,
718 56, 57, 0x0416, 0x0436, 0x0411, 0x0456, 0x042e, 0x002c,
719 64, 0x0424, 0x0418, 0x0421, 0x0412, 0x0423, 0x0410, 0x041f,
720 0x0420, 0x0428, 0x041e, 0x041b, 0x0414, 0x042c, 0x0422, 0x0429,
721 0x0417, 0x0419, 0x041a, 0x042b, 0x0415, 0x0413, 0x041c, 0x0426,
722 0x0427, 0x041d, 0x042f, 0x0445, 0x0457, 0x044a, 94, 0x0404,
723 96, 0x0444, 0x0438, 0x0441, 0x0432, 0x0443, 0x0430, 0x043f,
724 0x0440, 0x0448, 0x043e, 0x043b, 0x0434, 0x044c, 0x0442, 0x0449,
725 0x0437, 0x0439, 0x043a, 0x044b, 0x0435, 0x0433, 0x043c, 0x0446,
726 0x0447, 0x043d, 0x044f, 0x0425, 0x0407, 0x042a, 126, 127
728 return cyrtab[ch&0x7F];
731 static int check_compose_internal(int first, int second, int recurse)
734 static const struct {
735 char first, second;
736 wchar_t composed;
737 } composetbl[] = {
738 {0x2b, 0x2b, 0x0023},
739 {0x41, 0x41, 0x0040},
740 {0x28, 0x28, 0x005b},
741 {0x2f, 0x2f, 0x005c},
742 {0x29, 0x29, 0x005d},
743 {0x28, 0x2d, 0x007b},
744 {0x2d, 0x29, 0x007d},
745 {0x2f, 0x5e, 0x007c},
746 {0x21, 0x21, 0x00a1},
747 {0x43, 0x2f, 0x00a2},
748 {0x43, 0x7c, 0x00a2},
749 {0x4c, 0x2d, 0x00a3},
750 {0x4c, 0x3d, 0x20a4},
751 {0x58, 0x4f, 0x00a4},
752 {0x58, 0x30, 0x00a4},
753 {0x59, 0x2d, 0x00a5},
754 {0x59, 0x3d, 0x00a5},
755 {0x7c, 0x7c, 0x00a6},
756 {0x53, 0x4f, 0x00a7},
757 {0x53, 0x21, 0x00a7},
758 {0x53, 0x30, 0x00a7},
759 {0x22, 0x22, 0x00a8},
760 {0x43, 0x4f, 0x00a9},
761 {0x43, 0x30, 0x00a9},
762 {0x41, 0x5f, 0x00aa},
763 {0x3c, 0x3c, 0x00ab},
764 {0x2c, 0x2d, 0x00ac},
765 {0x2d, 0x2d, 0x00ad},
766 {0x52, 0x4f, 0x00ae},
767 {0x2d, 0x5e, 0x00af},
768 {0x30, 0x5e, 0x00b0},
769 {0x2b, 0x2d, 0x00b1},
770 {0x32, 0x5e, 0x00b2},
771 {0x33, 0x5e, 0x00b3},
772 {0x27, 0x27, 0x00b4},
773 {0x2f, 0x55, 0x00b5},
774 {0x50, 0x21, 0x00b6},
775 {0x2e, 0x5e, 0x00b7},
776 {0x2c, 0x2c, 0x00b8},
777 {0x31, 0x5e, 0x00b9},
778 {0x4f, 0x5f, 0x00ba},
779 {0x3e, 0x3e, 0x00bb},
780 {0x31, 0x34, 0x00bc},
781 {0x31, 0x32, 0x00bd},
782 {0x33, 0x34, 0x00be},
783 {0x3f, 0x3f, 0x00bf},
784 {0x60, 0x41, 0x00c0},
785 {0x27, 0x41, 0x00c1},
786 {0x5e, 0x41, 0x00c2},
787 {0x7e, 0x41, 0x00c3},
788 {0x22, 0x41, 0x00c4},
789 {0x2a, 0x41, 0x00c5},
790 {0x41, 0x45, 0x00c6},
791 {0x2c, 0x43, 0x00c7},
792 {0x60, 0x45, 0x00c8},
793 {0x27, 0x45, 0x00c9},
794 {0x5e, 0x45, 0x00ca},
795 {0x22, 0x45, 0x00cb},
796 {0x60, 0x49, 0x00cc},
797 {0x27, 0x49, 0x00cd},
798 {0x5e, 0x49, 0x00ce},
799 {0x22, 0x49, 0x00cf},
800 {0x2d, 0x44, 0x00d0},
801 {0x7e, 0x4e, 0x00d1},
802 {0x60, 0x4f, 0x00d2},
803 {0x27, 0x4f, 0x00d3},
804 {0x5e, 0x4f, 0x00d4},
805 {0x7e, 0x4f, 0x00d5},
806 {0x22, 0x4f, 0x00d6},
807 {0x58, 0x58, 0x00d7},
808 {0x2f, 0x4f, 0x00d8},
809 {0x60, 0x55, 0x00d9},
810 {0x27, 0x55, 0x00da},
811 {0x5e, 0x55, 0x00db},
812 {0x22, 0x55, 0x00dc},
813 {0x27, 0x59, 0x00dd},
814 {0x48, 0x54, 0x00de},
815 {0x73, 0x73, 0x00df},
816 {0x60, 0x61, 0x00e0},
817 {0x27, 0x61, 0x00e1},
818 {0x5e, 0x61, 0x00e2},
819 {0x7e, 0x61, 0x00e3},
820 {0x22, 0x61, 0x00e4},
821 {0x2a, 0x61, 0x00e5},
822 {0x61, 0x65, 0x00e6},
823 {0x2c, 0x63, 0x00e7},
824 {0x60, 0x65, 0x00e8},
825 {0x27, 0x65, 0x00e9},
826 {0x5e, 0x65, 0x00ea},
827 {0x22, 0x65, 0x00eb},
828 {0x60, 0x69, 0x00ec},
829 {0x27, 0x69, 0x00ed},
830 {0x5e, 0x69, 0x00ee},
831 {0x22, 0x69, 0x00ef},
832 {0x2d, 0x64, 0x00f0},
833 {0x7e, 0x6e, 0x00f1},
834 {0x60, 0x6f, 0x00f2},
835 {0x27, 0x6f, 0x00f3},
836 {0x5e, 0x6f, 0x00f4},
837 {0x7e, 0x6f, 0x00f5},
838 {0x22, 0x6f, 0x00f6},
839 {0x3a, 0x2d, 0x00f7},
840 {0x6f, 0x2f, 0x00f8},
841 {0x60, 0x75, 0x00f9},
842 {0x27, 0x75, 0x00fa},
843 {0x5e, 0x75, 0x00fb},
844 {0x22, 0x75, 0x00fc},
845 {0x27, 0x79, 0x00fd},
846 {0x68, 0x74, 0x00fe},
847 {0x22, 0x79, 0x00ff},
848 /* Unicode extras. */
849 {0x6f, 0x65, 0x0153},
850 {0x4f, 0x45, 0x0152},
851 /* Compose pairs from UCS */
852 {0x41, 0x2D, 0x0100},
853 {0x61, 0x2D, 0x0101},
854 {0x43, 0x27, 0x0106},
855 {0x63, 0x27, 0x0107},
856 {0x43, 0x5E, 0x0108},
857 {0x63, 0x5E, 0x0109},
858 {0x45, 0x2D, 0x0112},
859 {0x65, 0x2D, 0x0113},
860 {0x47, 0x5E, 0x011C},
861 {0x67, 0x5E, 0x011D},
862 {0x47, 0x2C, 0x0122},
863 {0x67, 0x2C, 0x0123},
864 {0x48, 0x5E, 0x0124},
865 {0x68, 0x5E, 0x0125},
866 {0x49, 0x7E, 0x0128},
867 {0x69, 0x7E, 0x0129},
868 {0x49, 0x2D, 0x012A},
869 {0x69, 0x2D, 0x012B},
870 {0x4A, 0x5E, 0x0134},
871 {0x6A, 0x5E, 0x0135},
872 {0x4B, 0x2C, 0x0136},
873 {0x6B, 0x2C, 0x0137},
874 {0x4C, 0x27, 0x0139},
875 {0x6C, 0x27, 0x013A},
876 {0x4C, 0x2C, 0x013B},
877 {0x6C, 0x2C, 0x013C},
878 {0x4E, 0x27, 0x0143},
879 {0x6E, 0x27, 0x0144},
880 {0x4E, 0x2C, 0x0145},
881 {0x6E, 0x2C, 0x0146},
882 {0x4F, 0x2D, 0x014C},
883 {0x6F, 0x2D, 0x014D},
884 {0x52, 0x27, 0x0154},
885 {0x72, 0x27, 0x0155},
886 {0x52, 0x2C, 0x0156},
887 {0x72, 0x2C, 0x0157},
888 {0x53, 0x27, 0x015A},
889 {0x73, 0x27, 0x015B},
890 {0x53, 0x5E, 0x015C},
891 {0x73, 0x5E, 0x015D},
892 {0x53, 0x2C, 0x015E},
893 {0x73, 0x2C, 0x015F},
894 {0x54, 0x2C, 0x0162},
895 {0x74, 0x2C, 0x0163},
896 {0x55, 0x7E, 0x0168},
897 {0x75, 0x7E, 0x0169},
898 {0x55, 0x2D, 0x016A},
899 {0x75, 0x2D, 0x016B},
900 {0x55, 0x2A, 0x016E},
901 {0x75, 0x2A, 0x016F},
902 {0x57, 0x5E, 0x0174},
903 {0x77, 0x5E, 0x0175},
904 {0x59, 0x5E, 0x0176},
905 {0x79, 0x5E, 0x0177},
906 {0x59, 0x22, 0x0178},
907 {0x5A, 0x27, 0x0179},
908 {0x7A, 0x27, 0x017A},
909 {0x47, 0x27, 0x01F4},
910 {0x67, 0x27, 0x01F5},
911 {0x4E, 0x60, 0x01F8},
912 {0x6E, 0x60, 0x01F9},
913 {0x45, 0x2C, 0x0228},
914 {0x65, 0x2C, 0x0229},
915 {0x59, 0x2D, 0x0232},
916 {0x79, 0x2D, 0x0233},
917 {0x44, 0x2C, 0x1E10},
918 {0x64, 0x2C, 0x1E11},
919 {0x47, 0x2D, 0x1E20},
920 {0x67, 0x2D, 0x1E21},
921 {0x48, 0x22, 0x1E26},
922 {0x68, 0x22, 0x1E27},
923 {0x48, 0x2C, 0x1E28},
924 {0x68, 0x2C, 0x1E29},
925 {0x4B, 0x27, 0x1E30},
926 {0x6B, 0x27, 0x1E31},
927 {0x4D, 0x27, 0x1E3E},
928 {0x6D, 0x27, 0x1E3F},
929 {0x50, 0x27, 0x1E54},
930 {0x70, 0x27, 0x1E55},
931 {0x56, 0x7E, 0x1E7C},
932 {0x76, 0x7E, 0x1E7D},
933 {0x57, 0x60, 0x1E80},
934 {0x77, 0x60, 0x1E81},
935 {0x57, 0x27, 0x1E82},
936 {0x77, 0x27, 0x1E83},
937 {0x57, 0x22, 0x1E84},
938 {0x77, 0x22, 0x1E85},
939 {0x58, 0x22, 0x1E8C},
940 {0x78, 0x22, 0x1E8D},
941 {0x5A, 0x5E, 0x1E90},
942 {0x7A, 0x5E, 0x1E91},
943 {0x74, 0x22, 0x1E97},
944 {0x77, 0x2A, 0x1E98},
945 {0x79, 0x2A, 0x1E99},
946 {0x45, 0x7E, 0x1EBC},
947 {0x65, 0x7E, 0x1EBD},
948 {0x59, 0x60, 0x1EF2},
949 {0x79, 0x60, 0x1EF3},
950 {0x59, 0x7E, 0x1EF8},
951 {0x79, 0x7E, 0x1EF9},
952 /* Compatible/possibles from UCS */
953 {0x49, 0x4A, 0x0132},
954 {0x69, 0x6A, 0x0133},
955 {0x4C, 0x4A, 0x01C7},
956 {0x4C, 0x6A, 0x01C8},
957 {0x6C, 0x6A, 0x01C9},
958 {0x4E, 0x4A, 0x01CA},
959 {0x4E, 0x6A, 0x01CB},
960 {0x6E, 0x6A, 0x01CC},
961 {0x44, 0x5A, 0x01F1},
962 {0x44, 0x7A, 0x01F2},
963 {0x64, 0x7A, 0x01F3},
964 {0x2E, 0x2E, 0x2025},
965 {0x21, 0x21, 0x203C},
966 {0x3F, 0x21, 0x2048},
967 {0x21, 0x3F, 0x2049},
968 {0x52, 0x73, 0x20A8},
969 {0x4E, 0x6F, 0x2116},
970 {0x53, 0x4D, 0x2120},
971 {0x54, 0x4D, 0x2122},
972 {0x49, 0x49, 0x2161},
973 {0x49, 0x56, 0x2163},
974 {0x56, 0x49, 0x2165},
975 {0x49, 0x58, 0x2168},
976 {0x58, 0x49, 0x216A},
977 {0x69, 0x69, 0x2171},
978 {0x69, 0x76, 0x2173},
979 {0x76, 0x69, 0x2175},
980 {0x69, 0x78, 0x2178},
981 {0x78, 0x69, 0x217A},
982 {0x31, 0x30, 0x2469},
983 {0x31, 0x31, 0x246A},
984 {0x31, 0x32, 0x246B},
985 {0x31, 0x33, 0x246C},
986 {0x31, 0x34, 0x246D},
987 {0x31, 0x35, 0x246E},
988 {0x31, 0x36, 0x246F},
989 {0x31, 0x37, 0x2470},
990 {0x31, 0x38, 0x2471},
991 {0x31, 0x39, 0x2472},
992 {0x32, 0x30, 0x2473},
993 {0x31, 0x2E, 0x2488},
994 {0x32, 0x2E, 0x2489},
995 {0x33, 0x2E, 0x248A},
996 {0x34, 0x2E, 0x248B},
997 {0x35, 0x2E, 0x248C},
998 {0x36, 0x2E, 0x248D},
999 {0x37, 0x2E, 0x248E},
1000 {0x38, 0x2E, 0x248F},
1001 {0x39, 0x2E, 0x2490},
1002 {0x64, 0x61, 0x3372},
1003 {0x41, 0x55, 0x3373},
1004 {0x6F, 0x56, 0x3375},
1005 {0x70, 0x63, 0x3376},
1006 {0x70, 0x41, 0x3380},
1007 {0x6E, 0x41, 0x3381},
1008 {0x6D, 0x41, 0x3383},
1009 {0x6B, 0x41, 0x3384},
1010 {0x4B, 0x42, 0x3385},
1011 {0x4D, 0x42, 0x3386},
1012 {0x47, 0x42, 0x3387},
1013 {0x70, 0x46, 0x338A},
1014 {0x6E, 0x46, 0x338B},
1015 {0x6D, 0x67, 0x338E},
1016 {0x6B, 0x67, 0x338F},
1017 {0x48, 0x7A, 0x3390},
1018 {0x66, 0x6D, 0x3399},
1019 {0x6E, 0x6D, 0x339A},
1020 {0x6D, 0x6D, 0x339C},
1021 {0x63, 0x6D, 0x339D},
1022 {0x6B, 0x6D, 0x339E},
1023 {0x50, 0x61, 0x33A9},
1024 {0x70, 0x73, 0x33B0},
1025 {0x6E, 0x73, 0x33B1},
1026 {0x6D, 0x73, 0x33B3},
1027 {0x70, 0x56, 0x33B4},
1028 {0x6E, 0x56, 0x33B5},
1029 {0x6D, 0x56, 0x33B7},
1030 {0x6B, 0x56, 0x33B8},
1031 {0x4D, 0x56, 0x33B9},
1032 {0x70, 0x57, 0x33BA},
1033 {0x6E, 0x57, 0x33BB},
1034 {0x6D, 0x57, 0x33BD},
1035 {0x6B, 0x57, 0x33BE},
1036 {0x4D, 0x57, 0x33BF},
1037 {0x42, 0x71, 0x33C3},
1038 {0x63, 0x63, 0x33C4},
1039 {0x63, 0x64, 0x33C5},
1040 {0x64, 0x42, 0x33C8},
1041 {0x47, 0x79, 0x33C9},
1042 {0x68, 0x61, 0x33CA},
1043 {0x48, 0x50, 0x33CB},
1044 {0x69, 0x6E, 0x33CC},
1045 {0x4B, 0x4B, 0x33CD},
1046 {0x4B, 0x4D, 0x33CE},
1047 {0x6B, 0x74, 0x33CF},
1048 {0x6C, 0x6D, 0x33D0},
1049 {0x6C, 0x6E, 0x33D1},
1050 {0x6C, 0x78, 0x33D3},
1051 {0x6D, 0x62, 0x33D4},
1052 {0x50, 0x48, 0x33D7},
1053 {0x50, 0x52, 0x33DA},
1054 {0x73, 0x72, 0x33DB},
1055 {0x53, 0x76, 0x33DC},
1056 {0x57, 0x62, 0x33DD},
1057 {0x66, 0x66, 0xFB00},
1058 {0x66, 0x69, 0xFB01},
1059 {0x66, 0x6C, 0xFB02},
1060 {0x73, 0x74, 0xFB06},
1061 {0, 0, 0}
1062 }, *c;
1064 int nc = -1;
1066 for (c = composetbl; c->first; c++) {
1067 if (c->first == first && c->second == second)
1068 return c->composed;
1071 if (recurse == 0) {
1072 nc = check_compose_internal(second, first, 1);
1073 if (nc == -1)
1074 nc = check_compose_internal(toupper(first), toupper(second), 1);
1075 if (nc == -1)
1076 nc = check_compose_internal(toupper(second), toupper(first), 1);
1078 return nc;
1081 int check_compose(int first, int second)
1083 return check_compose_internal(first, second, 0);
1086 int decode_codepage(const char *cp_name)
1088 const char *s, *d;
1089 const struct cp_list_item *cpi;
1090 int codepage = -1;
1091 CPINFO cpinfo;
1093 if (!cp_name || !*cp_name)
1094 return CP_UTF8; /* default */
1096 for (cpi = cp_list; cpi->name; cpi++) {
1097 s = cp_name;
1098 d = cpi->name;
1099 for (;;) {
1100 while (*s && !isalnum(*s) && *s != ':')
1101 s++;
1102 while (*d && !isalnum(*d) && *d != ':')
1103 d++;
1104 if (*s == 0) {
1105 codepage = cpi->codepage;
1106 if (codepage == CP_UTF8)
1107 goto break_break;
1108 if (codepage == -1)
1109 return codepage;
1110 if (codepage == 0) {
1111 codepage = 65536 + (cpi - cp_list);
1112 goto break_break;
1115 if (GetCPInfo(codepage, &cpinfo) != 0)
1116 goto break_break;
1118 if (tolower((unsigned char)*s++) != tolower((unsigned char)*d++))
1119 break;
1123 d = cp_name;
1124 if (tolower((unsigned char)d[0]) == 'c' &&
1125 tolower((unsigned char)d[1]) == 'p')
1126 d += 2;
1127 if (tolower((unsigned char)d[0]) == 'i' &&
1128 tolower((unsigned char)d[1]) == 'b' &&
1129 tolower((unsigned char)d[2]) == 'm')
1130 d += 3;
1131 for (s = d; *s >= '0' && *s <= '9'; s++);
1132 if (*s == 0 && s != d)
1133 codepage = atoi(d); /* CP999 or IBM999 */
1135 if (codepage == CP_ACP)
1136 codepage = GetACP();
1137 if (codepage == CP_OEMCP)
1138 codepage = GetOEMCP();
1139 if (codepage > 65535)
1140 codepage = -2;
1142 break_break:;
1143 if (codepage != -1) {
1144 if (codepage != CP_UTF8 && codepage < 65536) {
1145 if (GetCPInfo(codepage, &cpinfo) == 0) {
1146 codepage = -2;
1147 } else if (cpinfo.MaxCharSize > 1)
1148 codepage = -3;
1151 if (codepage == -1 && *cp_name)
1152 codepage = -2;
1153 return codepage;
1156 const char *cp_name(int codepage)
1158 const struct cp_list_item *cpi, *cpno;
1159 static char buf[32];
1161 if (codepage == -1) {
1162 sprintf(buf, "Use font encoding");
1163 return buf;
1166 if (codepage > 0 && codepage < 65536)
1167 sprintf(buf, "CP%03d", codepage);
1168 else
1169 *buf = 0;
1171 if (codepage >= 65536) {
1172 cpno = 0;
1173 for (cpi = cp_list; cpi->name; cpi++)
1174 if (cpi == cp_list + (codepage - 65536)) {
1175 cpno = cpi;
1176 break;
1178 if (cpno)
1179 for (cpi = cp_list; cpi->name; cpi++) {
1180 if (cpno->cp_table == cpi->cp_table)
1181 return cpi->name;
1183 } else {
1184 for (cpi = cp_list; cpi->name; cpi++) {
1185 if (codepage == cpi->codepage)
1186 return cpi->name;
1189 return buf;
1193 * Return the nth code page in the list, for use in the GUI
1194 * configurer.
1196 const char *cp_enumerate(int index)
1198 if (index < 0 || index >= lenof(cp_list))
1199 return NULL;
1200 return cp_list[index].name;
1203 void get_unitab(int codepage, wchar_t *unitab, int ftype)
1205 char tbuf[4];
1206 int i, max = 256, flg = MB_ERR_INVALID_CHARS;
1208 if (ftype)
1209 flg |= MB_USEGLYPHCHARS;
1210 if (ftype == 2)
1211 max = 128;
1213 if (codepage == CP_UTF8) {
1214 for (i = 0; i < max; i++)
1215 unitab[i] = i;
1216 return;
1219 if (codepage == CP_ACP)
1220 codepage = GetACP();
1221 else if (codepage == CP_OEMCP)
1222 codepage = GetOEMCP();
1224 if (codepage > 0 && codepage < 65536) {
1225 for (i = 0; i < max; i++) {
1226 tbuf[0] = i;
1228 if (mb_to_wc(codepage, flg, tbuf, 1, unitab + i, 1)
1229 != 1)
1230 unitab[i] = 0xFFFD;
1232 } else {
1233 int j = 256 - cp_list[codepage & 0xFFFF].cp_size;
1234 for (i = 0; i < max; i++)
1235 unitab[i] = i;
1236 for (i = j; i < max; i++)
1237 unitab[i] = cp_list[codepage & 0xFFFF].cp_table[i - j];
1241 int wc_to_mb(int codepage, int flags, const wchar_t *wcstr, int wclen,
1242 char *mbstr, int mblen, const char *defchr)
1244 reverse_mapping *rmap = get_reverse_mapping(codepage);
1246 if (rmap) {
1247 /* Do this by array lookup if we can. */
1248 if (wclen < 0) {
1249 for (wclen = 0; wcstr[wclen++] ;); /* will include the NUL */
1251 char *p;
1252 int i;
1253 for (p = mbstr, i = 0; i < wclen; i++) {
1254 wchar_t ch = wcstr[i];
1255 int by;
1256 const char *p1;
1258 #define WRITECH(chr) do \
1260 assert(p - mbstr < mblen); \
1261 *p++ = (char)(chr); \
1262 } while (0)
1264 if ((p1 = rmap->blocks[(ch >> 8) & 0xFF]) != NULL &&
1265 (by = p1[ch & 0xFF]) != '\0')
1266 WRITECH(by);
1267 else if (ch < 0x80)
1268 WRITECH(ch);
1269 else if (defchr)
1270 for (const char *q = defchr; *q; q++)
1271 WRITECH(*q);
1272 #if 1
1273 else
1274 WRITECH('.');
1275 #endif
1277 #undef WRITECH
1279 return p - mbstr;
1280 } else {
1281 int defused, ret;
1282 ret = WideCharToMultiByte(codepage, flags, wcstr, wclen,
1283 mbstr, mblen, defchr, &defused);
1284 if (ret)
1285 return ret;
1287 #ifdef LEGACY_WINDOWS
1289 * Fallback for legacy platforms too old to support UTF-8: if
1290 * the codepage is UTF-8, we can do the translation ourselves.
1292 if (codepage == CP_UTF8 && mblen > 0 && wclen > 0) {
1293 size_t remaining = mblen;
1294 char *p = mbstr;
1296 while (wclen > 0) {
1297 unsigned long wc = (wclen--, *wcstr++);
1298 if (wclen > 0 && IS_SURROGATE_PAIR(wc, *wcstr)) {
1299 wc = FROM_SURROGATES(wc, *wcstr);
1300 wclen--, wcstr++;
1303 char utfbuf[6];
1304 size_t utflen = encode_utf8(utfbuf, wc);
1305 if (utflen <= remaining) {
1306 memcpy(p, utfbuf, utflen);
1307 p += utflen;
1308 remaining -= utflen;
1309 } else {
1310 return p - mbstr;
1314 return p - mbstr;
1316 #endif
1318 /* No other fallbacks are available */
1319 return 0;
1323 int mb_to_wc(int codepage, int flags, const char *mbstr, int mblen,
1324 wchar_t *wcstr, int wclen)
1326 if (codepage >= 65536) {
1327 /* Character set not known to Windows, so we'll have to
1328 * translate it ourself */
1329 size_t index = codepage - 65536;
1330 if (index >= lenof(cp_list))
1331 return 0;
1332 const struct cp_list_item *cp = &cp_list[index];
1333 if (!cp->cp_table)
1334 return 0;
1336 size_t remaining = wclen;
1337 wchar_t *p = wcstr;
1338 unsigned tablebase = 256 - cp->cp_size;
1340 while (mblen > 0) {
1341 mblen--;
1342 unsigned c = 0xFF & *mbstr++;
1343 wchar_t wc = (c < tablebase ? c : cp->cp_table[c - tablebase]);
1344 if (remaining > 0) {
1345 remaining--;
1346 *p++ = wc;
1347 } else {
1348 return p - wcstr;
1352 return p - wcstr;
1355 int ret = MultiByteToWideChar(codepage, flags, mbstr, mblen, wcstr, wclen);
1356 if (ret)
1357 return ret;
1359 #ifdef LEGACY_WINDOWS
1361 * Fallback for legacy platforms too old to support UTF-8: if the
1362 * codepage is UTF-8, we can do the translation ourselves.
1364 if (codepage == CP_UTF8 && mblen > 0 && wclen > 0) {
1365 size_t remaining = wclen;
1366 wchar_t *p = wcstr;
1368 while (mblen > 0) {
1369 char utfbuf[7];
1370 int thissize = mblen < 6 ? mblen : 6;
1371 memcpy(utfbuf, mbstr, thissize);
1372 utfbuf[thissize] = '\0';
1374 const char *utfptr = utfbuf;
1375 wchar_t wcbuf[2];
1376 size_t nwc = decode_utf8_to_wchar(&utfptr, wcbuf);
1378 for (size_t i = 0; i < nwc; i++) {
1379 if (remaining > 0) {
1380 remaining--;
1381 *p++ = wcbuf[i];
1382 } else {
1383 return p - wcstr;
1387 mbstr += (utfptr - utfbuf);
1388 mblen -= (utfptr - utfbuf);
1391 return p - wcstr;
1393 #endif
1395 /* No other fallbacks are available */
1396 return 0;
1399 bool is_dbcs_leadbyte(int codepage, char byte)
1401 return IsDBCSLeadByteEx(codepage, byte);