beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / luafontloader / fontforge / fontforge / encoding.c
blob8b42a186b2b4781564d5916bd8361d41950e71ec
1 /* Copyright (C) 2000-2008 by George Williams */
2 /*
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright notice, this
7 * list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice,
10 * this list of conditions and the following disclaimer in the documentation
11 * and/or other materials provided with the distribution.
13 * The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "fontforgevw.h"
29 #include <ustring.h>
30 #include <utype.h>
31 #include <math.h>
32 #include <unistd.h>
33 #include <sys/types.h>
34 #include <dirent.h>
35 #include <gfile.h>
36 #include "plugins.h"
37 #include "encoding.h"
39 Encoding *default_encoding = NULL;
41 static int32 tex_base_encoding[] = {
42 0x0000, 0x02d9, 0xfb01, 0xfb02, 0x2044, 0x02dd, 0x0141, 0x0142,
43 0x02db, 0x02da, 0x000a, 0x02d8, 0x2212, 0x000d, 0x017d, 0x017e,
44 0x02c7, 0x0131, 0xf6be, 0xfb00, 0xfb03, 0xfb04, 0x2260, 0x221e,
45 0x2264, 0x2265, 0x2202, 0x2211, 0x220f, 0x03c0, 0x0060, 0x0027,
46 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x2019,
47 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
48 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
49 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
50 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
51 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
52 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
53 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
54 0x2018, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
55 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
56 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
57 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
58 0x20ac, 0x222b, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
59 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x2126, 0x221a, 0x2248,
60 0x0090, 0x0091, 0x0092, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
61 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x2206, 0x25ca, 0x0178,
62 0x0000, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
63 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x002d, 0x00ae, 0x00af,
64 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
65 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
66 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
67 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
68 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7,
69 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df,
70 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
71 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef,
72 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7,
73 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff
76 static int32 unicode_from_MacSymbol[] = {
77 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
78 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
79 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
80 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
81 0x0020, 0x0021, 0x2200, 0x0023, 0x2203, 0x0025, 0x0026, 0x220d,
82 0x0028, 0x0029, 0x2217, 0x002b, 0x002c, 0x2212, 0x002e, 0x002f,
83 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
84 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
85 0x2245, 0x0391, 0x0392, 0x03a7, 0x0394, 0x0395, 0x03a6, 0x0393,
86 0x0397, 0x0399, 0x03d1, 0x039a, 0x039b, 0x039c, 0x039d, 0x039f,
87 0x03a0, 0x0398, 0x03a1, 0x03a3, 0x03a4, 0x03a5, 0x03c2, 0x03a9,
88 0x039e, 0x03a8, 0x0396, 0x005b, 0x2234, 0x005d, 0x22a5, 0x005f,
89 0xf8e5, 0x03b1, 0x03b2, 0x03c7, 0x03b4, 0x03b5, 0x03c6, 0x03b3,
90 0x03b7, 0x03b9, 0x03d5, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03bf,
91 0x03c0, 0x03b8, 0x03c1, 0x03c3, 0x03c4, 0x03c5, 0x03d6, 0x03c9,
92 0x03be, 0x03c8, 0x03b6, 0x007b, 0x007c, 0x007d, 0x223c, 0x007f,
93 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
94 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
95 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
96 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
97 0x0000, 0x03d2, 0x2032, 0x2264, 0x2044, 0x221e, 0x0192, 0x2663,
98 0x2666, 0x2665, 0x2660, 0x2194, 0x2190, 0x2191, 0x2192, 0x2193,
99 0x00b0, 0x00b1, 0x2033, 0x2265, 0x00d7, 0x221d, 0x2202, 0x2022,
100 0x00f7, 0x2260, 0x2261, 0x2248, 0x2026, 0xf8e6, 0xf8e7, 0x21b5,
101 0x2135, 0x2111, 0x211c, 0x2118, 0x2297, 0x2295, 0x2205, 0x2229,
102 0x222a, 0x2283, 0x2287, 0x2284, 0x2282, 0x2286, 0x2208, 0x2209,
103 0x2220, 0x2207, 0x00ae, 0x00a9, 0x2122, 0x220f, 0x221a, 0x22c5,
104 0x00ac, 0x2227, 0x2228, 0x21d4, 0x21d0, 0x21d1, 0x21d2, 0x21d3,
105 0x22c4, 0x2329, 0xf8e8, 0xf8e9, 0xf8ea, 0x2211, 0xf8eb, 0xf8ec,
106 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4,
107 0xf8ff, 0x232a, 0x222b, 0x2320, 0xf8f5, 0x2321, 0xf8f6, 0xf8f7,
108 0xf8f8, 0xf8f9, 0xf8fa, 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0x02c7
111 /* I don't think iconv provides encodings for zapfdingbats nor jis201 */
112 /* Perhaps I should list them here for compatability, but I think I'll just */
113 /* leave them out. I doubt they get used. */
114 static Encoding texbase = { "TeX-Base-Encoding", 256, tex_base_encoding, NULL, NULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, 0, 0 };
115 Encoding custom = { "Custom", 0, NULL, NULL, &texbase, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, { 0, 0 }, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, 0, 0 };
116 static Encoding original = { "Original", 0, NULL, NULL, &custom, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, { 0, 0 }, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, 0, 0 };
117 static Encoding unicodebmp = { "UnicodeBmp", 65536, NULL, NULL, &original, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, 0, 0 };
118 static Encoding unicodefull = { "UnicodeFull", 17*65536, NULL, NULL, &unicodebmp, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, 0, 0 };
119 static Encoding adobestd = { "AdobeStandard", 256, unicode_from_adobestd, AdobeStandardEncoding, &unicodefull,
120 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, 0, 0 };
121 static Encoding symbol = { "Symbol", 256, unicode_from_MacSymbol, NULL, &adobestd,1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, { 0, 0 }, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, 0, 0 };
123 Encoding *enclist = &symbol;
125 const char *FindUnicharName(void) {
126 /* Iconv and libiconv use different names for UCS2. Just great. Perhaps */
127 /* different versions of each use still different names? */
128 /* Even worse, both accept UCS-2, but under iconv it means native byte */
129 /* ordering and under libiconv it means big-endian */
130 iconv_t test;
131 static char *goodname = NULL;
132 #ifdef UNICHAR_16
133 static char *names[] = { "UCS-2-INTERNAL", "UCS-2", "UCS2", "ISO-10646/UCS2", "UNICODE", NULL };
134 static char *namesle[] = { "UCS-2LE", "UNICODELITTLE", NULL };
135 static char *namesbe[] = { "UCS-2BE", "UNICODEBIG", NULL };
136 #else
137 static char *names[] = { "UCS-4-INTERNAL", "UCS-4", "UCS4", "ISO-10646-UCS-4", "UTF-32", NULL };
138 static char *namesle[] = { "UCS-4LE", "UTF-32LE", NULL };
139 static char *namesbe[] = { "UCS-4BE", "UTF-32BE", NULL };
140 #endif
141 char **testnames;
142 int i;
143 union {
144 short s;
145 char c[2];
146 } u;
148 if ( goodname!=NULL )
149 return( goodname );
151 u.c[0] = 0x1; u.c[1] = 0x2;
152 if ( u.s==0x201 ) { /* Little endian */
153 testnames = namesle;
154 } else {
155 testnames = namesbe;
157 for ( i=0; testnames[i]!=NULL; ++i ) {
158 test = iconv_open(testnames[i],"ISO-8859-1");
159 if ( test!=(iconv_t) -1 && test!=NULL ) {
160 iconv_close(test);
161 goodname = testnames[i];
162 break;
166 if ( goodname==NULL ) {
167 for ( i=0; names[i]!=NULL; ++i ) {
168 test = iconv_open(names[i],"ISO-8859-1");
169 if ( test!=(iconv_t) -1 && test!=NULL ) {
170 iconv_close(test);
171 goodname = names[i];
172 break;
177 if ( goodname==NULL ) {
178 #ifdef UNICHAR_16
179 IError( "I can't figure out your version of iconv(). I need a name for the UCS-2 encoding and I can't find one. Reconfigure --without-iconv. Bye.");
180 #else
181 IError( "I can't figure out your version of iconv(). I need a name for the UCS-4 encoding and I can't find one. Reconfigure --without-iconv. Bye.");
182 #endif
183 exit( 1 );
186 test = iconv_open(goodname,"Mac");
187 if ( test==(iconv_t) -1 || test==NULL ) {
188 IError( "Your version of iconv does not support the \"Mac Roman\" encoding.\nIf this causes problems, reconfigure --without-iconv." );
189 } else
190 iconv_close(test);
192 /* I really should check for ISO-2022-JP, KR, CN, and all the other encodings */
193 /* I might find in a ttf 'name' table. But those tables take too long to build */
194 return( goodname );
197 static int TryEscape( Encoding *enc,char *escape_sequence ) {
198 char from[20], ucs[20];
199 size_t fromlen, tolen;
200 ICONV_CONST char *fpt;
201 char *upt;
202 int i, j, low;
203 int esc_len = strlen(escape_sequence);
205 strcpy(from,escape_sequence);
207 enc->has_2byte = false;
208 low = -1;
209 for ( i=0; i<256; ++i ) if ( i!=escape_sequence[0] ) {
210 for ( j=0; j<256; ++j ) {
211 from[esc_len] = i; from[esc_len+1] = j; from[esc_len+2] = 0;
212 fromlen = esc_len+2;
213 fpt = from;
214 upt = ucs;
215 tolen = sizeof(ucs);
216 if ( iconv( enc->tounicode , &fpt, &fromlen, &upt, &tolen )!= (size_t) (-1) &&
217 upt-ucs==sizeof(unichar_t) /* Exactly one character */ ) {
218 if ( low==-1 ) {
219 enc->low_page = low = i;
220 enc->has_2byte = true;
222 enc->high_page = i;
223 break;
227 if ( enc->low_page==enc->high_page )
228 enc->has_2byte = false;
229 if ( enc->has_2byte ) {
230 strcpy(enc->iso_2022_escape, escape_sequence);
231 enc->iso_2022_escape_len = esc_len;
233 return( enc->has_2byte );
236 Encoding *_FindOrMakeEncoding(const char *name,int make_it) {
237 Encoding *enc;
238 char buffer[20];
239 const char *iconv_name;
240 Encoding temp;
241 uint8 good[256];
242 int i, j, any, all;
243 char from[8], ucs[20];
244 size_t fromlen, tolen;
245 ICONV_CONST char *fpt;
246 char *upt;
247 /* iconv is not case sensitive */
249 if ( strncasecmp(name,"iso8859_",8)==0 || strncasecmp(name,"koi8_",5)==0 ) {
250 /* Fixup for old naming conventions */
251 strncpy(buffer,name,sizeof(buffer));
252 *strchr(buffer,'_') = '-';
253 name = buffer;
254 } else if ( strcasecmp(name,"iso-8859")==0 ) {
255 /* Fixup for old naming conventions */
256 strncpy(buffer,name,3);
257 strncpy(buffer+3,name+4,sizeof(buffer)-3);
258 name = buffer;
259 } else if ( strcasecmp(name,"isolatin1")==0 ) {
260 name = "iso8859-1";
261 } else if ( strcasecmp(name,"isocyrillic")==0 ) {
262 name = "iso8859-5";
263 } else if ( strcasecmp(name,"isoarabic")==0 ) {
264 name = "iso8859-6";
265 } else if ( strcasecmp(name,"isogreek")==0 ) {
266 name = "iso8859-7";
267 } else if ( strcasecmp(name,"isohebrew")==0 ) {
268 name = "iso8859-8";
269 } else if ( strcasecmp(name,"isothai")==0 ) {
270 name = "tis-620"; /* TIS doesn't define non-breaking space in 0xA0 */
271 } else if ( strcasecmp(name,"latin0")==0 || strcasecmp(name,"latin9")==0 ) {
272 name = "iso8859-15"; /* "latin-9" is supported (libiconv bug?) */
273 } else if ( strcasecmp(name,"koi8r")==0 ) {
274 name = "koi8-r";
275 } else if ( strncasecmp(name,"jis201",6)==0 || strncasecmp(name,"jisx0201",8)==0 ) {
276 name = "jis_x0201";
277 } else if ( strcasecmp(name,"AdobeStandardEncoding")==0 || strcasecmp(name,"Adobe")==0 )
278 name = "AdobeStandard";
279 for ( enc=enclist; enc!=NULL; enc=enc->next )
280 if ( strmatch(name,enc->enc_name)==0 ||
281 (enc->iconv_name!=NULL && strmatch(name,enc->iconv_name)==0))
282 return( enc );
283 if ( strmatch(name,"unicode")==0 || strmatch(name,"iso10646")==0 || strmatch(name,"iso10646-1")==0 )
284 return( &unicodebmp );
285 if ( strmatch(name,"unicode4")==0 || strmatch(name,"ucs4")==0 )
286 return( &unicodefull );
288 iconv_name = name;
289 /* Mac seems to work ok */
290 if ( strcasecmp(name,"win")==0 || strcasecmp(name,"ansi")==0 )
291 iconv_name = "MS-ANSI"; /* "WINDOWS-1252";*/
292 else if ( strcasecmp(name,"gb2312pk")==0 || strcasecmp(name,"gb2312packed")==0 )
293 iconv_name = "EUC-CN";
294 else if ( strcasecmp(name,"wansung")==0 )
295 iconv_name = "EUC-KR";
296 else if ( strcasecmp(name,"EUC-CN")==0 ) {
297 iconv_name = name;
298 name = "gb2312pk";
299 } else if ( strcasecmp(name,"EUC-KR")==0 ) {
300 iconv_name = name;
301 name = "wansung";
304 /* Escape sequences: */
305 /* ISO-2022-CN: \e $ ) A ^N */
306 /* ISO-2022-KR: \e $ ) C ^N */
307 /* ISO-2022-JP: \e $ B */
308 /* ISO-2022-JP-2: \e $ ( D */
309 /* ISO-2022-JP-3: \e $ ( O */ /* Capital "O", not zero */
310 /* ISO-2022-CN-EXT: \e $ ) E ^N */ /* Not sure about this, also uses CN escape */
312 memset(&temp,0,sizeof(temp));
313 temp.builtin = true;
314 temp.tounicode = iconv_open(FindUnicharName(),iconv_name);
315 if ( temp.tounicode==(iconv_t) -1 || temp.tounicode==NULL )
316 return( NULL ); /* Iconv doesn't recognize this name */
317 temp.fromunicode = iconv_open(iconv_name,FindUnicharName());
318 if ( temp.fromunicode==(iconv_t) -1 || temp.fromunicode==NULL ) {
319 /* This should never happen, but if it does... */
320 iconv_close(temp.tounicode);
321 return( NULL );
324 memset(good,0,sizeof(good));
325 any = false; all = true;
326 for ( i=1; i<256; ++i ) {
327 from[0] = i; from[1] = 0;
328 fromlen = 1;
329 fpt = from;
330 upt = ucs;
331 tolen = sizeof(ucs);
332 if ( iconv( temp.tounicode , &fpt, &fromlen, &upt, &tolen )!= (size_t) (-1)) {
333 good[i] = true;
334 any = true;
335 } else
336 all = false;
338 if ( any )
339 temp.has_1byte = true;
340 if ( all )
341 temp.only_1byte = true;
343 if ( !all ) {
344 if ( strstr(iconv_name,"2022")==NULL ) {
345 for ( i=temp.has_1byte; i<256; ++i ) if ( !good[i] ) {
346 for ( j=0; j<256; ++j ) {
347 from[0] = i; from[1] = j; from[2] = 0;
348 fromlen = 2;
349 fpt = from;
350 upt = ucs;
351 tolen = sizeof(ucs);
352 if ( iconv( temp.tounicode , &fpt, &fromlen, &upt, &tolen )!= (size_t) (-1) &&
353 upt-ucs==sizeof(unichar_t) /* Exactly one character */ ) {
354 if ( temp.low_page==-1 )
355 temp.low_page = i;
356 temp.high_page = i;
357 temp.has_2byte = true;
358 break;
362 if ( temp.low_page==temp.high_page ) {
363 temp.has_2byte = false;
364 temp.low_page = temp.high_page = -1;
367 if ( !temp.has_2byte && !good[033]/* escape */ ) {
368 if ( strstr(iconv_name,"2022")!=NULL &&
369 strstr(iconv_name,"JP3")!=NULL &&
370 TryEscape( &temp,"\33$(O" )) {
373 else if ( strstr(iconv_name,"2022")!=NULL &&
374 strstr(iconv_name,"JP2")!=NULL &&
375 TryEscape( &temp,"\33$(D" )) {
378 else if ( strstr(iconv_name,"2022")!=NULL &&
379 strstr(iconv_name,"JP")!=NULL &&
380 TryEscape( &temp,"\33$B" )) {
383 else if ( strstr(iconv_name,"2022")!=NULL &&
384 strstr(iconv_name,"KR")!=NULL &&
385 TryEscape( &temp,"\33$)C\16" )) {
388 else if ( strstr(iconv_name,"2022")!=NULL &&
389 strstr(iconv_name,"CN")!=NULL &&
390 TryEscape( &temp,"\33$)A\16" )) {
395 if ( !temp.has_1byte && !temp.has_2byte )
396 return( NULL );
397 if ( !make_it )
398 return( NULL );
400 enc = chunkalloc(sizeof(Encoding));
401 *enc = temp;
402 enc->enc_name = copy(name);
403 if ( iconv_name!=name )
404 enc->iconv_name = copy(iconv_name);
405 enc->next = enclist;
406 enc->builtin = true;
407 enclist = enc;
408 if ( enc->has_2byte )
409 enc->char_cnt = (enc->high_page<<8) + 256;
410 else {
411 enc->char_cnt = 256;
412 enc->only_1byte = true;
414 if ( strstrmatch(iconv_name,"JP")!=NULL ||
415 strstrmatch(iconv_name,"sjis")!=NULL ||
416 strstrmatch(iconv_name,"cp932")!=NULL )
417 enc->is_japanese = true;
418 else if ( strstrmatch(iconv_name,"KR")!=NULL )
419 enc->is_korean = true;
420 else if ( strstrmatch(iconv_name,"CN")!=NULL )
421 enc->is_simplechinese = true;
422 else if ( strstrmatch(iconv_name,"BIG")!=NULL && strstrmatch(iconv_name,"5")!=NULL )
423 enc->is_tradchinese = true;
425 if ( strstrmatch(name,"ISO8859")!=NULL &&
426 strtol(name+strlen(name)-2,NULL,10)>=16 )
427 /* Not in our menu, don't hide */;
428 else if ( iconv_name!=name || strmatch(name,"mac")==0 || strstrmatch(name,"ISO8859")!=NULL ||
429 strmatch(name,"koi8-r")==0 || strmatch(name,"sjis")==0 ||
430 strmatch(name,"big5")==0 || strmatch(name,"big5hkscs")==0 )
431 enc->hidden = true;
433 return( enc );
436 Encoding *FindOrMakeEncoding(const char *name) {
437 return( _FindOrMakeEncoding(name,true));
441 /* ************************************************************************** */
442 /* ****************************** CID Encodings ***************************** */
443 /* ************************************************************************** */
444 struct cidmap *cidmaps = NULL;
446 int CID2NameUni(struct cidmap *map,int cid, char *buffer, int len) {
447 int enc = -1;
448 const char *temp;
450 #if defined( _NO_SNPRINTF ) || defined( __VMS )
451 if ( map==NULL )
452 sprintf(buffer,"cid-%d", cid);
453 else if ( cid<map->namemax && map->name[cid]!=NULL )
454 strncpy(buffer,map->name[cid],len);
455 else if ( cid==0 || (cid<map->namemax && map->unicode[cid]!=0 )) {
456 if ( map->unicode==NULL || map->namemax==0 )
457 enc = 0;
458 else
459 enc = map->unicode[cid];
460 temp = StdGlyphName(buffer,enc,ui_none,(NameList *) -1);
461 if ( temp!=buffer )
462 strcpy(buffer,temp);
463 } else
464 sprintf(buffer,"%s.%d", map->ordering, cid);
465 #else
466 if ( map==NULL )
467 snprintf(buffer,len,"cid-%d", cid);
468 else if ( cid<map->namemax && map->name[cid]!=NULL )
469 strncpy(buffer,map->name[cid],len);
470 else if ( cid==0 )
471 strcpy(buffer,".notdef");
472 else if ( cid<map->namemax && map->unicode[cid]!=0 ) {
473 if ( map->unicode==NULL || map->namemax==0 )
474 enc = 0;
475 else
476 enc = map->unicode[cid];
477 temp = StdGlyphName(buffer,enc,ui_none,(NameList *) -1);
478 if ( temp!=buffer )
479 strcpy(buffer,temp);
480 } else
481 snprintf(buffer,len,"%s.%d", map->ordering, cid);
482 #endif
483 return( enc );
486 int NameUni2CID(struct cidmap *map,int uni, const char *name) {
487 int i;
489 if ( map==NULL )
490 return( -1 );
491 if ( uni!=-1 ) {
492 for ( i=0; i<map->namemax; ++i )
493 if ( map->unicode[i]==(unsigned)uni )
494 return( i );
495 } else {
496 for ( i=0; i<map->namemax; ++i )
497 if ( map->name[i]!=NULL && strcmp(map->name[i],name)==0 )
498 return( i );
500 return( -1 );
503 int MaxCID(struct cidmap *map) {
504 return( map->cidmax );
507 static struct cidmap *MakeDummyMap(char *registry,char *ordering,int supplement) {
508 struct cidmap *ret = galloc(sizeof(struct cidmap));
510 ret->registry = copy(registry);
511 ret->ordering = copy(ordering);
512 ret->supplement = ret->maxsupple = supplement;
513 ret->cidmax = ret->namemax = 0;
514 ret->unicode = NULL; ret->name = NULL;
515 ret->next = cidmaps;
516 cidmaps = ret;
517 return( ret );
520 struct cidmap *FindCidMap(char *registry,char *ordering,int supplement,SplineFont *sf) {
521 return( MakeDummyMap(registry,ordering,supplement));
524 /* ************************** Reencoding routines ************************** */
527 EncMap *EncMapFromEncoding(SplineFont *sf,Encoding *enc) {
528 int i,j, extras, found, base, unmax;
529 int *encoded, *unencoded;
530 EncMap *map;
531 struct altuni *altuni;
532 SplineChar *sc;
534 if ( enc==NULL )
535 return( NULL );
537 base = enc->char_cnt;
538 if ( enc->is_original )
539 base = 0;
540 else if ( enc->char_cnt<=256 )
541 base = 256;
542 else if ( enc->char_cnt<=0x10000 )
543 base = 0x10000;
544 if (base==0)
545 return( NULL );
546 encoded = galloc(base*sizeof(int));
547 memset(encoded,-1,base*sizeof(int));
548 unencoded = galloc(sf->glyphcnt*sizeof(int));
549 unmax = sf->glyphcnt;
551 for ( i=extras=0; i<sf->glyphcnt; ++i ) if ( (sc=sf->glyphs[i])!=NULL ) {
552 found = false;
553 if ( enc->psnames!=NULL ) {
554 for ( j=enc->char_cnt-1; j>=0; --j ) {
555 if ( enc->psnames[j]!=NULL &&
556 strcmp(enc->psnames[j],sc->name)==0 ) {
557 found = true;
558 encoded[j] = i;
562 if ( !found ) {
563 if ( sc->unicodeenc!=-1 &&
564 sc->unicodeenc<unicode4_size &&
565 (j = EncFromUni(sc->unicodeenc,enc))!= -1 )
566 encoded[j] = i;
567 else {
568 /* I don't think extras can surpass unmax now, but it doesn't */
569 /* hurt to leave the code (it's from when we encoded duplicates see below) */
570 if ( extras>=unmax ) unencoded = grealloc(unencoded,(unmax+=300)*sizeof(int));
571 unencoded[extras++] = i;
573 for ( altuni=sc->altuni; altuni!=NULL; altuni=altuni->next ) {
574 if ( altuni->unienc!=-1 &&
575 altuni->unienc<unicode4_size &&
576 altuni->vs==-1 &&
577 altuni->fid==0 &&
578 (j = EncFromUni(altuni->unienc,enc))!= -1 )
579 encoded[j] = i;
580 /* I used to have code here to add these unencoded duplicates */
581 /* but I don't really see any reason to do so. The main unicode */
582 /* will occur, and any encoded duplicates so the glyph won't */
583 /* vanish */
588 /* Some glyphs have both a pua encoding and an encoding in a non-bmp */
589 /* plane. Big5HK does and the AMS glyphs do */
590 if ( enc->is_unicodefull && (sf->uni_interp == ui_trad_chinese ||
591 sf->uni_interp == ui_ams )) {
592 extern const int cns14pua[], amspua[];
593 const int *pua = sf->uni_interp == ui_ams? amspua : cns14pua;
594 for ( i=0xe000; i<0xf8ff; ++i ) {
595 if ( pua[i-0xe000]!=0 )
596 encoded[pua[i-0xe000]] = encoded[i];
600 if ( enc->psnames != NULL ) {
601 /* Names are more important than unicode code points for some encodings */
602 /* AdobeStandard for instance which won't work if you have a glyph */
603 /* named "f_i" (must be "fi") even though the code point is correct */
604 /* The code above would match f_i where AS requires fi, so force the */
605 /* names to be correct. */
606 for ( j=0; j<enc->char_cnt; ++j ) {
607 if ( encoded[j]!=-1 && enc->psnames[j]!=NULL &&
608 strcmp(sf->glyphs[encoded[j]]->name,enc->psnames[j])!=0 ) {
609 free(sf->glyphs[encoded[j]]->name);
610 sf->glyphs[encoded[j]]->name = copy(enc->psnames[j]);
615 map = chunkalloc(sizeof(EncMap));
616 map->enccount = map->encmax = base + extras;
617 map->map = galloc(map->enccount*sizeof(int));
618 memcpy(map->map,encoded,base*sizeof(int));
619 memcpy(map->map+base,unencoded,extras*sizeof(int));
620 map->backmax = sf->glyphcnt;
621 map->backmap = galloc(sf->glyphcnt*sizeof(int));
622 memset(map->backmap,-1,sf->glyphcnt*sizeof(int)); /* Just in case there are some unencoded glyphs (duplicates perhaps) */
623 for ( i = map->enccount-1; i>=0; --i ) if ( map->map[i]!=-1 )
624 map->backmap[map->map[i]] = i;
625 map->enc = enc;
627 free(encoded);
628 free(unencoded);
630 return( map );
633 EncMap *CompactEncMap(EncMap *map, SplineFont *sf) {
634 int i, inuse, gid;
635 int32 *newmap;
637 for ( i=inuse=0; i<map->enccount ; ++i )
638 if ( (gid = map->map[i])!=-1 && SCWorthOutputting(sf->glyphs[gid]))
639 ++inuse;
640 newmap = galloc(inuse*sizeof(int32));
641 for ( i=inuse=0; i<map->enccount ; ++i )
642 if ( (gid = map->map[i])!=-1 && SCWorthOutputting(sf->glyphs[gid]))
643 newmap[inuse++] = gid;
644 free(map->map);
645 map->map = newmap;
646 map->enccount = inuse;
647 map->encmax = inuse;
648 map->enc = &custom;
649 memset(map->backmap,-1,sf->glyphcnt*sizeof(int));
650 for ( i=inuse-1; i>=0; --i )
651 if ( (gid=map->map[i])!=-1 )
652 map->backmap[gid] = i;
653 return( map );
657 static int MapAddEncodingSlot(EncMap *map,int gid) {
658 int enc;
660 if ( map->enccount>=map->encmax )
661 map->map = grealloc(map->map,(map->encmax+=10)*sizeof(int));
662 enc = map->enccount++;
663 map->map[enc] = gid;
664 map->backmap[gid] = enc;
665 return( enc );
668 void FVAddEncodingSlot(FontViewBase *fv,int gid) {
669 EncMap *map = fv->map;
670 int enc;
672 enc = MapAddEncodingSlot(map,gid);
674 fv->selected = grealloc(fv->selected,map->enccount);
675 fv->selected[enc] = 0;
676 FVAdjustScrollBarRows(fv,enc);
679 static int MapAddEnc(SplineFont *sf,SplineChar *sc,EncMap *basemap, EncMap *map,int baseenc, int gid, FontViewBase *fv) {
680 int any = false, enc;
682 if ( gid>=map->backmax ) {
683 map->backmap = grealloc(map->backmap,(map->backmax+=10)*sizeof(int));
684 memset(map->backmap+map->backmax-10,-1,10*sizeof(int));
686 if ( map->enc->psnames!=NULL ) {
687 /* Check for multiple encodings */
688 for ( enc = map->enc->char_cnt-1; enc>=0; --enc ) {
689 if ( map->enc->psnames[enc]!=NULL && strcmp(sc->name,map->enc->psnames[enc])==0 ) {
690 if ( !any ) {
691 map->backmap[gid] = enc;
692 any = true;
694 map->map[enc] = gid;
697 } else {
698 enc = SFFindSlot(sf,map,sc->unicodeenc,sc->name);
699 if ( enc!=-1 ) {
700 map->map[enc] = gid;
701 map->backmap[gid] = enc;
702 any = true;
705 if ( basemap!=NULL && map->enc==basemap->enc && baseenc!=-1 ) {
706 if ( baseenc>=map->enccount ) {
707 if ( map==fv->map )
708 FVAddEncodingSlot(fv,gid);
709 else
710 MapAddEncodingSlot(map,gid);
711 } else {
712 map->map[baseenc] = gid;
713 if ( map->backmap[gid]==-1 )
714 map->backmap[gid] = baseenc;
716 any = true;
718 return( any );
721 void SFAddGlyphAndEncode(SplineFont *sf,SplineChar *sc,EncMap *basemap, int baseenc) {
722 int gid, mapfound = false;
723 FontViewBase *fv;
724 BDFFont *bdf;
726 if ( sf->cidmaster==NULL ) {
727 if ( sf->glyphcnt+1>=sf->glyphmax )
728 sf->glyphs = grealloc(sf->glyphs,(sf->glyphmax+=10)*sizeof(SplineChar *));
729 gid = sf->glyphcnt++;
730 for ( bdf = sf->bitmaps; bdf!=NULL; bdf=bdf->next ) {
731 if ( sf->glyphcnt+1>=bdf->glyphmax )
732 bdf->glyphs = grealloc(bdf->glyphs,(bdf->glyphmax=sf->glyphmax)*sizeof(BDFChar *));
733 if ( sf->glyphcnt>bdf->glyphcnt ) {
734 memset(bdf->glyphs+bdf->glyphcnt,0,(sf->glyphcnt-bdf->glyphcnt)*sizeof(BDFChar *));
735 bdf->glyphcnt = sf->glyphcnt;
738 for ( fv=sf->fv; fv!=NULL; fv = fv->nextsame ) {
739 EncMap *map = fv->map;
740 if ( gid>=map->backmax )
741 map->backmap = grealloc(map->backmap,(map->backmax=gid+10)*sizeof(int));
742 map->backmap[gid] = -1;
744 } else {
745 gid = baseenc;
746 if ( baseenc+1>=sf->glyphmax )
747 sf->glyphs = grealloc(sf->glyphs,(sf->glyphmax = baseenc+10)*sizeof(SplineChar *));
748 if ( baseenc>=sf->glyphcnt ) {
749 memset(sf->glyphs+sf->glyphcnt,0,(baseenc+1-sf->glyphcnt)*sizeof(SplineChar *));
750 sf->glyphcnt = baseenc+1;
751 for ( bdf = sf->cidmaster->bitmaps; bdf!=NULL; bdf=bdf->next ) {
752 if ( baseenc+1>=bdf->glyphmax )
753 bdf->glyphs = grealloc(bdf->glyphs,(bdf->glyphmax=baseenc+10)*sizeof(BDFChar *));
754 if ( baseenc+1>bdf->glyphcnt ) {
755 memset(bdf->glyphs+bdf->glyphcnt,0,(baseenc+1-bdf->glyphcnt)*sizeof(BDFChar *));
756 bdf->glyphcnt = baseenc+1;
759 for ( fv=sf->fv; fv!=NULL; fv = fv->nextsame ) if ( fv->sf==sf ) {
760 EncMap *map = fv->map;
761 if ( gid>=map->backmax )
762 map->backmap = grealloc(map->backmap,(map->backmax=gid+10)*sizeof(int));
763 map->backmap[gid] = -1;
767 sf->glyphs[gid] = NULL;
768 for ( fv=sf->fv; fv!=NULL; fv = fv->nextsame ) {
769 EncMap *map = fv->map;
771 FVBiggerGlyphCache(fv,gid);
773 if ( !MapAddEnc(sf,sc,basemap,map,baseenc,gid,fv) )
774 FVAddEncodingSlot(fv,gid);
775 if ( map==basemap ) mapfound = true;
776 if ( fv->normal!=NULL ) {
777 if ( !MapAddEnc(sf,sc,basemap,fv->normal,baseenc,gid,fv))
778 MapAddEncodingSlot(fv->normal,gid);
781 if ( !mapfound && basemap!=NULL )
782 MapAddEnc(sf,sc,basemap,basemap,baseenc,gid,fv);
783 sf->glyphs[gid] = sc;
784 sc->orig_pos = gid;
785 sc->parent = sf;
786 SFHashGlyph(sf,sc);
790 int32 UniFromEnc(int enc, Encoding *encname) {
791 char from[20];
792 unichar_t to[20];
793 ICONV_CONST char *fpt;
794 char *tpt;
795 size_t fromlen, tolen;
797 if ( encname->is_custom || encname->is_original )
798 return( -1 );
799 if ( enc>=encname->char_cnt )
800 return( -1 );
801 if ( encname->is_unicodebmp || encname->is_unicodefull )
802 return( enc );
803 if ( encname->unicode!=NULL )
804 return( encname->unicode[enc] );
805 else if ( encname->tounicode ) {
806 /* To my surprise, on RH9, doing a reset on conversion of CP1258->UCS2 */
807 /* causes subsequent calls to return garbage */
808 if ( encname->iso_2022_escape_len ) {
809 tolen = sizeof(to); fromlen = 0;
810 iconv(encname->tounicode,NULL,&fromlen,NULL,&tolen); /* Reset state */
812 fpt = from; tpt = (char *) to; tolen = sizeof(to);
813 if ( encname->has_1byte && enc<256 ) {
814 *(char *) fpt = enc;
815 fromlen = 1;
816 } else if ( encname->has_2byte ) {
817 if ( encname->iso_2022_escape_len )
818 strncpy(from,encname->iso_2022_escape,encname->iso_2022_escape_len );
819 fromlen = encname->iso_2022_escape_len;
820 from[fromlen++] = enc>>8;
821 from[fromlen++] = enc&0xff;
823 if ( iconv(encname->tounicode,&fpt,&fromlen,&tpt,&tolen)==(size_t) -1 )
824 return( -1 );
825 if ( tpt-(char *) to == 0 ) {
826 /* This strange call appears to be what we need to make CP1258->UCS2 */
827 /* work. It's supposed to reset the state and give us the shift */
828 /* out. As there is no state, and no shift out I have no idea why*/
829 /* this works, but it does. */
830 if ( iconv(encname->tounicode,NULL,&fromlen,&tpt,&tolen)==(size_t) -1 )
831 return( -1 );
833 if ( tpt-(char *) to == sizeof(unichar_t) )
834 return( to[0] );
835 #ifdef UNICHAR_16
836 else if ( tpt-(char *) to == 4 && to[0]>=0xd800 && to[0]<0xdc00 && to[1]>=0xdc00 )
837 return( ((to[0]-0xd800)<<10) + (to[1]-0xdc00) + 0x10000 );
838 #endif
839 } else if ( encname->tounicode_func!=NULL ) {
840 return( (encname->tounicode_func)(enc) );
842 return( -1 );
845 int32 EncFromUni(int32 uni, Encoding *enc) {
846 unichar_t from[20];
847 unsigned char to[20];
848 ICONV_CONST char *fpt;
849 char *tpt;
850 size_t fromlen, tolen;
851 int i;
853 if ( enc->is_custom || enc->is_original || enc->is_compact || uni==-1 )
854 return( -1 );
855 if ( enc->is_unicodebmp || enc->is_unicodefull )
856 return( uni<enc->char_cnt ? uni : -1 );
858 if ( enc->unicode!=NULL ) {
859 for ( i=0; i<enc->char_cnt; ++i ) {
860 if ( enc->unicode[i]==uni )
861 return( i );
863 return( -1 );
864 } else if ( enc->fromunicode!=NULL ) {
865 /* I don't see how there can be any state to reset in this direction */
866 /* So I don't reset it */
867 #ifdef UNICHAR_16
868 if ( uni<0x10000 ) {
869 from[0] = uni;
870 fromlen = sizeof(unichar_t);
871 } else {
872 uni -= 0x10000;
873 from[0] = 0xd800 + (uni>>10);
874 from[1] = 0xdc00 + (uni&0x3ff);
875 fromlen = 2*sizeof(unichar_t);
877 #else
878 from[0] = uni;
879 fromlen = sizeof(unichar_t);
880 #endif
881 fpt = (char *) from; tpt = (char *) to; tolen = sizeof(to);
882 iconv(enc->fromunicode,NULL,NULL,NULL,NULL); /* reset shift in/out, etc. */
883 if ( iconv(enc->fromunicode,&fpt,&fromlen,&tpt,&tolen)==(size_t) -1 )
884 return( -1 );
885 if ( tpt-(char *) to == 1 )
886 return( to[0] );
887 if ( enc->iso_2022_escape_len!=0 ) {
888 if ( tpt-(char *) to == enc->iso_2022_escape_len+2 &&
889 strncmp((char *) to,enc->iso_2022_escape,enc->iso_2022_escape_len)==0 )
890 return( (to[enc->iso_2022_escape_len]<<8) | to[enc->iso_2022_escape_len+1] );
891 } else {
892 if ( tpt-(char *) to == sizeof(unichar_t) )
893 return( (to[0]<<8) | to[1] );
895 } else if ( enc->fromunicode_func!=NULL ) {
896 return( (enc->fromunicode_func)(uni) );
898 return( -1 );
901 int32 EncFromName(const char *name,enum uni_interp interp,Encoding *encname) {
902 int i;
903 if ( encname->psnames!=NULL ) {
904 for ( i=0; i<encname->char_cnt; ++i )
905 if ( encname->psnames[i]!=NULL && strcmp(name,encname->psnames[i])==0 )
906 return( i );
908 i = UniFromName(name,interp,encname);
909 if ( i==-1 && strlen(name)==4 ) {
910 /* MS says use this kind of name, Adobe says use the one above */
911 char *end;
912 i = strtol(name,&end,16);
913 if ( i<0 || i>0xffff || *end!='\0' )
914 return( -1 );
916 return( EncFromUni(i,encname));
919 void SFExpandGlyphCount(SplineFont *sf, int newcnt) {
920 int old = sf->glyphcnt;
921 FontViewBase *fv;
923 if ( old>=newcnt )
924 return;
925 if ( sf->glyphmax<newcnt ) {
926 sf->glyphs = grealloc(sf->glyphs,newcnt*sizeof(SplineChar *));
927 sf->glyphmax = newcnt;
929 memset(sf->glyphs+sf->glyphcnt,0,(newcnt-sf->glyphcnt)*sizeof(SplineChar *));
930 sf->glyphcnt = newcnt;
932 for ( fv=sf->fv; fv!=NULL; fv=fv->nextsame ) {
933 if ( fv->sf==sf ) { /* Beware of cid keyed fonts which might look at a different subfont */
934 if ( fv->normal!=NULL )
935 continue; /* If compacted then we haven't added any glyphs so haven't changed anything */
936 /* Don't display any of these guys, so not mapped. */
937 /* No change to selection, or to map->map, but change to backmap */
938 if ( newcnt>fv->map->backmax )
939 fv->map->backmap = grealloc(fv->map->backmap,(fv->map->backmax = newcnt+5)*sizeof(int32));
940 memset(fv->map->backmap+old,-1,(newcnt-old)*sizeof(int32));