r9965@lvps87-230-33-50: verhaegs | 2009-01-10 17:52:41 +0100
[AROS.git] / workbench / libs / codesetslib / src / codesets.c
blobc58edbafdfa3bcb5970f4a99ff9790d4f48c8cb8
1 /***************************************************************************
3 codesets.library - Amiga shared library for handling different codesets
4 Copyright (C) 2001-2005 by Alfonso [alfie] Ranieri <alforan@tin.it>.
5 Copyright (C) 2005-2008 by codesets.library Open Source Team
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 codesets.library project: http://sourceforge.net/projects/codesetslib/
19 Most of the code included in this file was relicensed from GPL to LGPL
20 from the source code of SimpleMail (http://www.sf.net/projects/simplemail)
21 with full permissions by its authors.
23 $Id$
25 ***************************************************************************/
27 #include "lib.h"
29 #include <clib/alib_protos.h>
31 #include <diskfont/glyph.h>
32 #include <diskfont/diskfonttag.h>
33 #include <proto/diskfont.h>
34 #include <ctype.h>
35 #include <limits.h>
37 #ifdef __MORPHOS__
38 #include <proto/keymap.h>
39 #include <proto/locale.h>
40 #endif
42 #include "codesets_table.h"
43 #include "convertUTF.h"
44 #include "codepages.h"
46 #ifndef __AROS__
47 #include "SDI_stdarg.h"
48 #endif /* __AROS__ */
50 #include "debug.h"
52 /**************************************************************************/
54 /// BIN_SEARCH()
55 // search a sorted array in O(log n) e.g.
56 // BIN_SEARCH(strings,0,sizeof(strings)/sizeof(strings[0]),strcmp(key,array[mid]),res);
57 #define BIN_SEARCH(array,low,high,compare,result) \
59 int l = low;\
60 int h = high;\
61 int m = (low+high)/2;\
62 result = NULL;\
63 while (l<=h)\
65 int d = compare;\
66 if (!d){ result = &array[m]; break; }\
67 if (d < 0) h = m - 1;\
68 else l = m + 1;\
69 m = (l + h)/2;\
73 ///
74 /// mystrdup()
75 static STRPTR
76 mystrdup(const char *str)
78 STRPTR newStr = NULL;
80 ENTER();
82 if(str != NULL)
84 int len;
86 if((len = strlen(str)) > 0)
88 if((newStr = allocArbitrateVecPooled(len+1)) != NULL)
89 strlcpy(newStr, str, len+1);
93 RETURN(newStr);
94 return newStr;
96 ///
97 /// mystrndup()
98 static STRPTR
99 mystrndup(const char *str1, int n)
101 STRPTR dest;
103 ENTER();
105 if((dest = allocArbitrateVecPooled(n+1)) != NULL)
107 if(str1 != NULL)
108 strlcpy(dest, str1, n+1);
109 else
110 dest[0] = '\0';
112 dest[n] = '\0';
115 RETURN(dest);
116 return dest;
119 /// readLine()
120 static ULONG
121 readLine(BPTR fh, char *buf, ULONG size)
123 char *c;
125 ENTER();
127 if((c = FGets(fh, buf, size)) == NULL)
129 RETURN(FALSE);
130 return FALSE;
133 for(; *c; c++)
135 if(*c == '\n' || *c == '\r')
137 *c = '\0';
138 break;
142 RETURN(TRUE);
143 return TRUE;
146 /// getConfigItem()
147 static const char * getConfigItem(const char *buf, const char *item, int len)
149 ENTER();
151 if(strnicmp(buf, item, len) == 0)
153 UBYTE c;
155 buf += len;
157 /* skip spaces */
158 while((c = *buf) != '\0' && isspace(c))
159 buf++;
161 if(*buf != '=')
163 RETURN(NULL);
164 return NULL;
167 buf++;
169 /* skip spaces */
170 while((c = *buf) != '\0' && isspace(c))
171 buf++;
173 RETURN(buf);
174 return buf;
177 RETURN(NULL);
178 return NULL;
181 /// parseUtf8()
182 static int
183 parseUtf8(STRPTR *ps)
185 STRPTR s = *ps;
186 int wc, n, i;
188 ENTER();
190 if(*s<0x80)
192 *ps = s+1;
194 RETURN(*s);
195 return *s;
198 if(*s<0xc2)
200 RETURN(-1);
201 return -1;
203 else
205 if(*s<0xe0)
207 if((s[1] & 0xc0)!=0x80)
209 RETURN(-1);
210 return -1;
213 *ps = s+2;
215 RETURN(((s[0] & 0x1f)<<6) | (s[1] & 0x3f));
216 return ((s[0] & 0x1f)<<6) | (s[1] & 0x3f);
218 else
220 if(*s<0xf0)
222 n = 3;
224 else
226 if(*s<0xf8)
228 n = 4;
230 else
232 if(*s<0xfc)
234 n = 5;
236 else
238 if(*s<0xfe)
240 n = 6;
242 else
244 RETURN(-1);
245 return -1;
253 wc = *s++ & ((1<<(7-n))-1);
255 for(i = 1; i<n; i++)
257 if((*s & 0xc0) != 0x80)
259 RETURN(-1);
260 return -1;
263 wc = (wc << 6) | (*s++ & 0x3f);
266 if(wc < (1 << (5 * n - 4)))
268 RETURN(-1);
269 return -1;
272 *ps = s;
274 RETURN(wc);
275 return wc;
279 /// countCodesets()
280 static int
281 countCodesets(struct codesetList *csList)
283 struct MinNode *node, *succ;
284 int num;
286 for(node = csList->list.mlh_Head, num = 0; (succ = node->mln_Succ); node = succ)
287 ++num;
289 return num;
293 /// mapUTF8toASCII()
294 // in case some UTF8 sequences can not be converted during CodesetsUTF8ToStrA(), this
295 // function is used to replace these unknown sequences with lookalike characters that
296 // still make the text more readable. For more replacement see
297 // http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
299 // The conversion table in this function is partly borrowed from the awebcharset plugin
300 // written by Frank Weber. See http://cvs.sunsite.dk/viewcvs.cgi/aweb/plugins/charset/awebcharset.c
302 struct UTF8Replacement
304 const char *utf8; // the original UTF8 string we are going to replace
305 const int utf8len; // the length of the UTF8 string
306 const char *rep; // pointer to the replacement string
307 const int replen; // the length of the replacement string (minus for signalling an UTF8 string)
310 static int compareUTF8Replacements(const void *p1, const void *p2)
312 struct UTF8Replacement *key = (struct UTF8Replacement *)p1;
313 struct UTF8Replacement *rep = (struct UTF8Replacement *)p2;
314 int cmp;
316 // compare the length first, after that compare the strings
317 cmp = key->utf8len - rep->utf8len;
318 if(cmp == 0)
319 cmp = memcmp(key->utf8, rep->utf8, key->utf8len);
321 return cmp;
324 static int mapUTF8toASCII(const char **dst, const unsigned char *src, const int utf8len)
326 int len = 0;
327 struct UTF8Replacement key = { (char *)src, utf8len, NULL, 0 };
328 struct UTF8Replacement *rep;
330 static struct UTF8Replacement const utf8map[] =
332 // U+0100 ... U+017F (Latin Extended-A)
333 { "\xC4\x80", 2, "A", 1 }, // U+0100 -> A (LATIN CAPITAL LETTER A WITH MACRON)
334 { "\xC4\x81", 2, "a", 1 }, // U+0101 -> a (LATIN SMALL LETTER A WITH MACRON)
335 { "\xC4\x82", 2, "A", 1 }, // U+0102 -> A (LATIN CAPITAL LETTER A WITH BREVE)
336 { "\xC4\x83", 2, "a", 1 }, // U+0103 -> a (LATIN SMALL LETTER A WITH BREVE)
337 { "\xC4\x84", 2, "A", 1 }, // U+0104 -> A (LATIN CAPITAL LETTER A WITH OGONEK)
338 { "\xC4\x85", 2, "a", 1 }, // U+0105 -> a (LATIN SMALL LETTER A WITH OGONEK)
339 { "\xC4\x86", 2, "C", 1 }, // U+0106 -> C (LATIN CAPITAL LETTER C WITH ACUTE)
340 { "\xC4\x87", 2, "c", 1 }, // U+0107 -> c (LATIN SMALL LETTER C WITH ACUTE)
341 { "\xC4\x88", 2, "C", 1 }, // U+0108 -> C (LATIN CAPITAL LETTER C WITH CIRCUMFLEX)
342 { "\xC4\x89", 2, "c", 1 }, // U+0109 -> c (LATIN SMALL LETTER C WITH CIRCUMFLEX)
343 { "\xC4\x8A", 2, "C", 1 }, // U+010A -> C (LATIN CAPITAL LETTER C WITH DOT ABOVE)
344 { "\xC4\x8B", 2, "c", 1 }, // U+010B -> c (LATIN SMALL LETTER C WITH DOT ABOVE)
345 { "\xC4\x8C", 2, "C", 1 }, // U+010C -> C (LATIN CAPITAL LETTER C WITH CARON)
346 { "\xC4\x8D", 2, "c", 1 }, // U+010D -> c (LATIN SMALL LETTER C WITH CARON)
347 { "\xC4\x8E", 2, "D", 1 }, // U+010E -> D (LATIN CAPITAL LETTER D WITH CARON)
348 { "\xC4\x8F", 2, "d", 1 }, // U+010F -> d (LATIN SMALL LETTER D WITH CARON)
349 { "\xC4\x90", 2, "D", 1 }, // U+0110 -> D (LATIN CAPITAL LETTER D WITH STROKE)
350 { "\xC4\x91", 2, "d", 1 }, // U+0111 -> d (LATIN SMALL LETTER D WITH STROKE)
351 { "\xC4\x92", 2, "E", 1 }, // U+0112 -> E (LATIN CAPITAL LETTER E WITH MACRON)
352 { "\xC4\x93", 2, "e", 1 }, // U+0113 -> e (LATIN SMALL LETTER E WITH MACRON)
353 { "\xC4\x94", 2, "E", 1 }, // U+0114 -> E (LATIN CAPITAL LETTER E WITH BREVE)
354 { "\xC4\x95", 2, "e", 1 }, // U+0115 -> e (LATIN SMALL LETTER E WITH BREVE)
355 { "\xC4\x96", 2, "E", 1 }, // U+0116 -> E (LATIN CAPITAL LETTER E WITH DOT ABOVE)
356 { "\xC4\x97", 2, "e", 1 }, // U+0117 -> e (LATIN SMALL LETTER E WITH DOT ABOVE)
357 { "\xC4\x98", 2, "E", 1 }, // U+0118 -> E (LATIN CAPITAL LETTER E WITH OGONEK)
358 { "\xC4\x99", 2, "e", 1 }, // U+0119 -> e (LATIN SMALL LETTER E WITH OGONEK)
359 { "\xC4\x9A", 2, "E", 1 }, // U+011A -> E (LATIN CAPITAL LETTER E WITH CARON)
360 { "\xC4\x9B", 2, "e", 1 }, // U+011B -> e (LATIN SMALL LETTER E WITH CARON)
361 { "\xC4\x9C", 2, "G", 1 }, // U+011C -> G (LATIN CAPITAL LETTER G WITH CIRCUMFLEX)
362 { "\xC4\x9D", 2, "g", 1 }, // U+011D -> g (LATIN SMALL LETTER G WITH CIRCUMFLEX)
363 { "\xC4\x9E", 2, "G", 1 }, // U+011E -> G (LATIN CAPITAL LETTER G WITH BREVE)
364 { "\xC4\x9F", 2, "g", 1 }, // U+011F -> g (LATIN SMALL LETTER G WITH BREVE)
365 { "\xC4\xA0", 2, "G", 1 }, // U+0120 -> G (LATIN CAPITAL LETTER G WITH DOT ABOVE)
366 { "\xC4\xA1", 2, "g", 1 }, // U+0121 -> g (LATIN SMALL LETTER G WITH DOT ABOVE)
367 { "\xC4\xA2", 2, "G", 1 }, // U+0122 -> G (LATIN CAPITAL LETTER G WITH CEDILLA)
368 { "\xC4\xA3", 2, "g", 1 }, // U+0123 -> g (LATIN SMALL LETTER G WITH CEDILLA)
369 { "\xC4\xA4", 2, "H", 1 }, // U+0124 -> H (LATIN CAPITAL LETTER H WITH CIRCUMFLEX)
370 { "\xC4\xA5", 2, "h", 1 }, // U+0125 -> h (LATIN SMALL LETTER H WITH CIRCUMFLEX)
371 { "\xC4\xA6", 2, "H", 1 }, // U+0126 -> H (LATIN CAPITAL LETTER H WITH STROKE)
372 { "\xC4\xA7", 2, "h", 1 }, // U+0127 -> h (LATIN SMALL LETTER H WITH STROKE)
373 { "\xC4\xA8", 2, "I", 1 }, // U+0128 -> I (LATIN CAPITAL LETTER I WITH TILDE)
374 { "\xC4\xA9", 2, "i", 1 }, // U+0129 -> i (LATIN SMALL LETTER I WITH TILDE)
375 { "\xC4\xAA", 2, "I", 1 }, // U+012A -> I (LATIN CAPITAL LETTER I WITH MACRON)
376 { "\xC4\xAB", 2, "i", 1 }, // U+012B -> i (LATIN SMALL LETTER I WITH MACRON)
377 { "\xC4\xAC", 2, "I", 1 }, // U+012C -> I (LATIN CAPITAL LETTER I WITH BREVE)
378 { "\xC4\xAD", 2, "i", 1 }, // U+012D -> i (LATIN SMALL LETTER I WITH BREVE)
379 { "\xC4\xAE", 2, "I", 1 }, // U+012E -> I (LATIN CAPITAL LETTER I WITH OGONEK)
380 { "\xC4\xAF", 2, "i", 1 }, // U+012F -> i (LATIN SMALL LETTER I WITH OGONEK)
381 { "\xC4\xB0", 2, "I", 1 }, // U+0130 -> I (LATIN CAPITAL LETTER I WITH DOT ABOVE)
382 { "\xC4\xB1", 2, "i", 1 }, // U+0131 -> i (LATIN SMALL LETTER DOTLESS I)
383 { "\xC4\xB2", 2, "Ij", 2 }, // U+0132 -> Ij (LATIN CAPITAL LIGATURE IJ)
384 { "\xC4\xB3", 2, "ij", 2 }, // U+0133 -> ij (LATIN SMALL LIGATURE IJ)
385 { "\xC4\xB4", 2, "J", 1 }, // U+0134 -> J (LATIN CAPITAL LETTER J WITH CIRCUMFLEX)
386 { "\xC4\xB5", 2, "j", 1 }, // U+0135 -> j (LATIN SMALL LETTER J WITH CIRCUMFLEX)
387 { "\xC4\xB6", 2, "K", 1 }, // U+0136 -> K (LATIN CAPITAL LETTER K WITH CEDILLA)
388 { "\xC4\xB7", 2, "k", 1 }, // U+0137 -> k (LATIN SMALL LETTER K WITH CEDILLA)
389 { "\xC4\xB8", 2, "k", 1 }, // U+0138 -> k (LATIN SMALL LETTER KRA)
390 { "\xC4\xB9", 2, "L", 1 }, // U+0139 -> L (LATIN CAPITAL LETTER L WITH ACUTE)
391 { "\xC4\xBA", 2, "l", 1 }, // U+013A -> l (LATIN SMALL LETTER L WITH ACUTE)
392 { "\xC4\xBB", 2, "L", 1 }, // U+013B -> L (LATIN CAPITAL LETTER L WITH CEDILLA)
393 { "\xC4\xBC", 2, "l", 1 }, // U+013C -> l (LATIN SMALL LETTER L WITH CEDILLA)
394 { "\xC4\xBD", 2, "L", 1 }, // U+013D -> L (LATIN CAPITAL LETTER L WITH CARON)
395 { "\xC4\xBE", 2, "l", 1 }, // U+013E -> l (LATIN SMALL LETTER L WITH CARON)
396 { "\xC4\xBF", 2, "L", 1 }, // U+013F -> L (LATIN CAPITAL LETTER L WITH MIDDLE DOT)
397 { "\xC5\x80", 2, "l", 1 }, // U+0140 -> l (LATIN SMALL LETTER L WITH MIDDLE DOT)
398 { "\xC5\x81", 2, "L", 1 }, // U+0141 -> L (LATIN CAPITAL LETTER L WITH STROKE)
399 { "\xC5\x82", 2, "l", 1 }, // U+0142 -> l (LATIN SMALL LETTER L WITH STROKE)
400 { "\xC5\x83", 2, "N", 1 }, // U+0143 -> N (LATIN CAPITAL LETTER N WITH ACUTE)
401 { "\xC5\x84", 2, "n", 1 }, // U+0144 -> n (LATIN SMALL LETTER N WITH ACUTE)
402 { "\xC5\x85", 2, "N", 1 }, // U+0145 -> N (LATIN CAPITAL LETTER N WITH CEDILLA)
403 { "\xC5\x86", 2, "n", 1 }, // U+0146 -> n (LATIN SMALL LETTER N WITH CEDILLA)
404 { "\xC5\x87", 2, "N", 1 }, // U+0147 -> N (LATIN CAPITAL LETTER N WITH CARON)
405 { "\xC5\x88", 2, "n", 1 }, // U+0148 -> n (LATIN SMALL LETTER N WITH CARON)
406 { "\xC5\x89", 2, "'n", 2 }, // U+0149 -> 'n (LATIN SMALL LETTER N PRECEDED BY APOSTROPHE)
407 { "\xC5\x8A", 2, "Ng", 2 }, // U+014A -> Ng (LATIN CAPITAL LETTER ENG)
408 { "\xC5\x8B", 2, "ng", 2 }, // U+014B -> ng (LATIN SMALL LETTER ENG)
409 { "\xC5\x8C", 2, "O", 1 }, // U+014C -> O (LATIN CAPITAL LETTER O WITH MACRON)
410 { "\xC5\x8D", 2, "o", 1 }, // U+014D -> o (LATIN SMALL LETTER O WITH MACRON)
411 { "\xC5\x8E", 2, "O", 1 }, // U+014E -> O (LATIN CAPITAL LETTER O WITH BREVE)
412 { "\xC5\x8F", 2, "o", 1 }, // U+014F -> o (LATIN SMALL LETTER O WITH BREVE)
413 { "\xC5\x90", 2, "O", 1 }, // U+0150 -> O (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE)
414 { "\xC5\x91", 2, "o", 1 }, // U+0151 -> o (LATIN SMALL LETTER O WITH DOUBLE ACUTE)
415 { "\xC5\x92", 2, "Oe", 2 }, // U+0152 -> Oe (LATIN CAPITAL LIGATURE OE)
416 { "\xC5\x93", 2, "oe", 2 }, // U+0153 -> oe (LATIN SMALL LIGATURE OE)
417 { "\xC5\x94", 2, "R", 1 }, // U+0154 -> R (LATIN CAPITAL LETTER R WITH ACUTE)
418 { "\xC5\x95", 2, "r", 1 }, // U+0155 -> r (LATIN SMALL LETTER R WITH ACUTE)
419 { "\xC5\x96", 2, "R", 1 }, // U+0156 -> R (LATIN CAPITAL LETTER R WITH CEDILLA)
420 { "\xC5\x97", 2, "r", 1 }, // U+0157 -> r (LATIN SMALL LETTER R WITH CEDILLA)
421 { "\xC5\x98", 2, "R", 1 }, // U+0158 -> R (LATIN CAPITAL LETTER R WITH CARON)
422 { "\xC5\x99", 2, "r", 1 }, // U+0159 -> r (LATIN SMALL LETTER R WITH CARON)
423 { "\xC5\x9A", 2, "S", 1 }, // U+015A -> S (LATIN CAPITAL LETTER S WITH ACUTE)
424 { "\xC5\x9B", 2, "s", 1 }, // U+015B -> s (LATIN SMALL LETTER S WITH ACUTE)
425 { "\xC5\x9C", 2, "S", 1 }, // U+015C -> S (LATIN CAPITAL LETTER S WITH CIRCUMFLEX)
426 { "\xC5\x9D", 2, "s", 1 }, // U+015D -> s (LATIN SMALL LETTER S WITH CIRCUMFLEX)
427 { "\xC5\x9E", 2, "S", 1 }, // U+015E -> S (LATIN CAPITAL LETTER S WITH CEDILLA)
428 { "\xC5\x9F", 2, "s", 1 }, // U+015F -> s (LATIN SMALL LETTER S WITH CEDILLA)
429 { "\xC5\xA0", 2, "S", 1 }, // U+0160 -> S (LATIN CAPITAL LETTER S WITH CARON)
430 { "\xC5\xA1", 2, "s", 1 }, // U+0161 -> s (LATIN SMALL LETTER S WITH CARON)
431 { "\xC5\xA2", 2, "T", 1 }, // U+0162 -> T (LATIN CAPITAL LETTER T WITH CEDILLA)
432 { "\xC5\xA3", 2, "t", 1 }, // U+0163 -> t (LATIN SMALL LETTER T WITH CEDILLA)
433 { "\xC5\xA4", 2, "T", 1 }, // U+0164 -> T (LATIN CAPITAL LETTER T WITH CARON)
434 { "\xC5\xA5", 2, "t", 1 }, // U+0165 -> t (LATIN SMALL LETTER T WITH CARON)
435 { "\xC5\xA6", 2, "T", 1 }, // U+0166 -> T (LATIN CAPITAL LETTER T WITH STROKE)
436 { "\xC5\xA7", 2, "t", 1 }, // U+0167 -> t (LATIN SMALL LETTER T WITH STROKE)
437 { "\xC5\xA8", 2, "U", 1 }, // U+0168 -> U (LATIN CAPITAL LETTER U WITH TILDE)
438 { "\xC5\xA9", 2, "u", 1 }, // U+0169 -> u (LATIN SMALL LETTER U WITH TILDE)
439 { "\xC5\xAA", 2, "U", 1 }, // U+016A -> U (LATIN CAPITAL LETTER U WITH MACRON)
440 { "\xC5\xAB", 2, "u", 1 }, // U+016B -> u (LATIN SMALL LETTER U WITH MACRON)
441 { "\xC5\xAC", 2, "U", 1 }, // U+016C -> U (LATIN CAPITAL LETTER U WITH BREVE)
442 { "\xC5\xAD", 2, "u", 1 }, // U+016D -> u (LATIN SMALL LETTER U WITH BREVE)
443 { "\xC5\xAE", 2, "U", 1 }, // U+016E -> U (LATIN CAPITAL LETTER U WITH RING ABOVE)
444 { "\xC5\xAF", 2, "u", 1 }, // U+016F -> u (LATIN SMALL LETTER U WITH RING ABOVE)
445 { "\xC5\xB0", 2, "U", 1 }, // U+0170 -> U (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE)
446 { "\xC5\xB1", 2, "u", 1 }, // U+0171 -> u (LATIN SMALL LETTER U WITH DOUBLE ACUTE)
447 { "\xC5\xB2", 2, "U", 1 }, // U+0172 -> U (LATIN CAPITAL LETTER U WITH OGONEK)
448 { "\xC5\xB3", 2, "u", 1 }, // U+0173 -> u (LATIN SMALL LETTER U WITH OGONEK)
449 { "\xC5\xB4", 2, "W", 1 }, // U+0174 -> W (LATIN CAPITAL LETTER W WITH CIRCUMFLEX)
450 { "\xC5\xB5", 2, "w", 1 }, // U+0175 -> w (LATIN SMALL LETTER W WITH CIRCUMFLEX)
451 { "\xC5\xB6", 2, "Y", 1 }, // U+0176 -> Y (LATIN CAPITAL LETTER Y WITH CIRCUMFLEX)
452 { "\xC5\xB7", 2, "y", 1 }, // U+0177 -> y (LATIN SMALL LETTER Y WITH CIRCUMFLEX)
453 { "\xC5\xB8", 2, "Y", 1 }, // U+0178 -> Y (LATIN CAPITAL LETTER Y WITH DIAERESIS)
454 { "\xC5\xB9", 2, "Z", 1 }, // U+0179 -> Z (LATIN CAPITAL LETTER Z WITH ACUTE)
455 { "\xC5\xBA", 2, "z", 1 }, // U+017A -> z (LATIN SMALL LETTER Z WITH ACUTE)
456 { "\xC5\xBB", 2, "Z", 1 }, // U+017B -> Z (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
457 { "\xC5\xBC", 2, "z", 1 }, // U+017C -> z (LATIN SMALL LETTER Z WITH DOT ABOVE)
458 { "\xC5\xBD", 2, "Z", 1 }, // U+017D -> Z (LATIN CAPITAL LETTER Z WITH CARON)
459 { "\xC5\xBE", 2, "z", 1 }, // U+017E -> z (LATIN SMALL LETTER Z WITH CARON)
460 { "\xC5\xBF", 2, "s", 1 }, // U+017F -> s (LATIN SMALL LETTER LONG S
462 // U+2000 ... U+206F (General Punctuation)
463 { "\xE2\x80\x90", 3, "-", 1 }, // U+2010 -> - (HYPHEN)
464 { "\xE2\x80\x91", 3, "-", 1 }, // U+2011 -> - (NON-BREAKING HYPHEN)
465 { "\xE2\x80\x92", 3, "--", 2 }, // U+2012 -> -- (FIGURE DASH)
466 { "\xE2\x80\x93", 3, "--", 2 }, // U+2013 -> -- (EN DASH)
467 { "\xE2\x80\x94", 3, "---", 3 }, // U+2014 -> --- (EM DASH)
468 { "\xE2\x80\x95", 3, "---", 3 }, // U+2015 -> --- (HORIZONTAL BAR)
469 { "\xE2\x80\x96", 3, "||", 2 }, // U+2016 -> || (DOUBLE VERTICAL LINE)
470 { "\xE2\x80\x97", 3, "_", 1 }, // U+2017 -> _ (DOUBLE LOW LINE)
471 { "\xE2\x80\x98", 3, "`", 1 }, // U+2018 -> ` (LEFT SINGLE QUOTATION MARK)
472 { "\xE2\x80\x99", 3, "'", 1 }, // U+2019 -> ' (RIGHT SINGLE QUOTATION MARK)
473 { "\xE2\x80\x9A", 3, ",", 1 }, // U+201A -> , (SINGLE LOW-9 QUOTATION MARK)
474 { "\xE2\x80\x9B", 3, "'", 1 }, // U+201B -> ' (SINGLE HIGH-REVERSED-9 QUOTATION MARK)
475 { "\xE2\x80\x9C", 3, "\"", 1 }, // U+201C -> " (LEFT DOUBLE QUOTATION MARK)
476 { "\xE2\x80\x9D", 3, "\"", 1 }, // U+201D -> " (RIGHT DOUBLE QUOTATION MARK)
477 { "\xE2\x80\x9E", 3, ",,", 2 }, // U+201E -> ,, (DOUBLE LOW-9 QUOTATION MARK)
478 { "\xE2\x80\x9F", 3, "``", 2 }, // U+201F -> `` (DOUBLE HIGH-REVERSED-9 QUOTATION MARK)
479 { "\xE2\x80\xA0", 3, "+", 1 }, // U+2020 -> + (DAGGER)
480 { "\xE2\x80\xA1", 3, "+", 1 }, // U+2021 -> + (DOUBLE DAGGER)
481 { "\xE2\x80\xA2", 3, "\xC2\xB7", -2 }, // U+2022 -> U+00B7 (BULLET) -> (MIDDLE POINT)
482 { "\xE2\x80\xA3", 3, ".", 1 }, // U+2023 -> . (TRIANGULAR BULLET)
483 { "\xE2\x80\xA4", 3, ".", 1 }, // U+2024 -> . (ONE DOT LEADER)
484 { "\xE2\x80\xA5", 3, "..", 2 }, // U+2025 -> .. (TWO DOT LEADER)
485 { "\xE2\x80\xA6", 3, "...", 3 }, // U+2026 -> ... (HORIZONTAL ELLIPSIS)
486 { "\xE2\x80\xA7", 3, "\xC2\xB7", -2 }, // U+2027 -> U+00B7 (HYPHENATION POINT) -> (MIDDLE POINT)
487 { "\xE2\x80\xB0", 3, "%.", 2 }, // U+2030 -> %. (PER MILLE SIGN)
488 { "\xE2\x80\xB1", 3, "%..", 3 }, // U+2031 -> %.. (PER TEN THOUSAND SIGN)
489 { "\xE2\x80\xB2", 3, "'", 1 }, // U+2032 -> ` (PRIME)
490 { "\xE2\x80\xB3", 3, "''", 2 }, // U+2033 -> '' (DOUBLE PRIME)
491 { "\xE2\x80\xB4", 3, "'''", 3 }, // U+2034 -> ''' (TRIPLE PRIME)
492 { "\xE2\x80\xB5", 3, "`", 1 }, // U+2035 -> ` (REVERSED PRIME)
493 { "\xE2\x80\xB6", 3, "``", 2 }, // U+2036 -> `` (REVERSED DOUBLE PRIME)
494 { "\xE2\x80\xB7", 3, "```", 3 }, // U+2037 -> ``` (REVERSED TRIPLE PRIME)
495 { "\xE2\x80\xB8", 3, "^", 1 }, // U+2038 -> ^ (CARET)
496 { "\xE2\x80\xB9", 3, "<", 1 }, // U+2039 -> < (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
497 { "\xE2\x80\xBA", 3, ">", 1 }, // U+203A -> > (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
498 { "\xE2\x80\xBB", 3, "\xC3\x97", -2 }, // U+203B -> U+00D7 (REFERENCE MARK) -> (MULTIPLICATION SIGN)
499 { "\xE2\x80\xBC", 3, "!!", 2 }, // U+203C -> !! (DOUBLE EXCLAMATION MARK)
500 { "\xE2\x80\xBD", 3, "?", 1 }, // U+203D -> ? (INTERROBANG)
501 { "\xE2\x81\x82", 3, "*", 1 }, // U+2042 -> * (ASTERISM)
502 { "\xE2\x81\x83", 3, ".", 1 }, // U+2043 -> . (HYPHEN BULLET)
503 { "\xE2\x81\x84", 3, "/", 1 }, // U+2044 -> / (FRACTION SLASH)
504 { "\xE2\x81\x87", 3, "??", 2 }, // U+2047 -> ?? (DOUBLE QUESTION MARK)
505 { "\xE2\x81\x88", 3, "?!", 2 }, // U+2048 -> ?! (QUESTION EXCLAMATION MARK)
506 { "\xE2\x81\x89", 3, "!?", 2 }, // U+2049 -> !? (EXCLAMATION QUESTION MARK)
507 { "\xE2\x81\x8E", 3, "*", 1 }, // U+204E -> * (LOW ASTERISK)
508 { "\xE2\x81\x8F", 3, ";", 1 }, // U+204F -> ; (REVERSED SEMICOLON)
509 { "\xE2\x81\x91", 3, "*", 1 }, // U+2051 -> * (TWO ASTERISKS ALIGNED VERTICALLY)
510 { "\xE2\x81\x92", 3, "-", 1 }, // U+2052 -> - (COMMERCIAL MINUS SIGN)
511 { "\xE2\x81\x93", 3, "~", 1 }, // U+2053 -> ~ (SWUNG DASH)
512 { "\xE2\x81\x95", 3, "*", 1 }, // U+2055 -> * (FLOWER PUNCTUATION MARK)
513 { "\xE2\x81\x97", 3, "''''", 4 }, // U+2057 -> '''' (QUADRUPLE PRIME)
514 { "\xE2\x81\x9A", 3, ":", 1 }, // U+205A -> : (TWO DOT PUNCTUATION)
515 { "\xE2\x81\x9C", 3, "+", 1 }, // U+205C -> + (DOTTED CROSS)
517 // U+20A0 ... U+20CF (Currency Symbols)
518 { "\xE2\x82\xA0", 3, "ECU", 3 }, // U+20A0 -> ECU (EURO-CURRENCY SIGN)
519 { "\xE2\x82\xA1", 3, "CRC", 3 }, // U+20A1 -> CRC (COLON SIGN)
520 { "\xE2\x82\xA2", 3, "BRC", 3 }, // U+20A2 -> BRC (CRUZEIRO SIGN)
521 { "\xE2\x82\xA3", 3, "BEF", 3 }, // U+20A3 -> BEF (FRENCH FRANC SIGN)
522 { "\xE2\x82\xA4", 3, "ITL", 3 }, // U+20A4 -> ITL (LIRA SIGN)
523 { "\xE2\x82\xA6", 3, "NGN", 3 }, // U+20A6 -> NGN (NEIRA SIGN)
524 { "\xE2\x82\xA7", 3, "ESP", 3 }, // U+20A7 -> ESP (PESETA SIGN)
525 { "\xE2\x82\xA8", 3, "MVQ", 3 }, // U+20A8 -> MVQ (RUPEE SIGN)
526 { "\xE2\x82\xA9", 3, "KPW", 3 }, // U+20A9 -> KPW (WON SIGN)
527 { "\xE2\x82\xAA", 3, "ILS", 3 }, // U+20AA -> ILS (NEW SHEQEL SIGN)
528 { "\xE2\x82\xAB", 3, "VNC", 3 }, // U+20AB -> VNC (DONG SIGN)
529 { "\xE2\x82\xAC", 3, "EUR", 3 }, // U+20AC -> EUR (EURO SIGN)
530 { "\xE2\x82\xAD", 3, "LAK", 3 }, // U+20AD -> LAK (KIP SIGN)
531 { "\xE2\x82\xAE", 3, "MNT", 3 }, // U+20AE -> MNT (TUGRIK SIGN)
532 { "\xE2\x82\xAF", 3, "GRD", 3 }, // U+20AF -> GRD (DRACHMA SIGN)
533 { "\xE2\x82\xB0", 3, "Pf", 2 }, // U+20B0 -> Pf (GERMAN PENNY SIGN)
534 { "\xE2\x82\xB1", 3, "P", 1 }, // U+20B1 -> P (PESO SIGN)
535 { "\xE2\x82\xB2", 3, "PYG", 3 }, // U+20B2 -> PYG (GUARANI SIGN)
536 { "\xE2\x82\xB3", 3, "ARA", 3 }, // U+20B3 -> ARA (AUSTRAL SIGN)
537 { "\xE2\x82\xB4", 3, "UAH", 3 }, // U+20B4 -> UAH (HRYVNIA SIGN)
538 { "\xE2\x82\xB5", 3, "GHS", 3 }, // U+20B5 -> GHS (CEDI SIGN)
540 // U+2190 ... U+21FF (Arrows)
541 { "\xE2\x86\x90", 3, "<-", 2 }, // U+2190 -> <- (LEFTWARDS ARROW)
542 { "\xE2\x86\x92", 3, "->", 2 }, // U+2192 -> -> (RIGHTWARDS ARROW)
545 ENTER();
547 // start with no replacement string
548 *dst = NULL;
550 // perform a binary search in the lookup table
551 if((rep = bsearch(&key, utf8map, sizeof(utf8map) / sizeof(utf8map[0]), sizeof(utf8map[0]), compareUTF8Replacements)) != NULL)
553 // if we found something, then copy this over to the result variables
554 *dst = rep->rep;
555 len = rep->replen;
558 RETURN(len);
559 return len;
563 /// matchCodesetAlias()
565 struct CodesetAliases
567 const char *MIMEname; // The official and correct MIME name for a codeset
568 const char *Aliases; // A space separated array with well-known aliases
571 const struct CodesetAliases codesetAliases[] =
573 // MIME name Aliases
574 { "Amiga-1251", "Ami1251 Amiga1251" },
575 { "AmigaPL", "AmiPL Amiga-PL" },
576 { "ISO-8859-1", "ISO8859-1 8859-1" },
577 { "ISO-8859-2", "ISO8859-2 8859-2" },
578 { "ISO-8859-3", "ISO8859-3 8859-3" },
579 { "ISO-8859-4", "ISO8859-4 8859-4" },
580 { "ISO-8859-5", "ISO8859-5 8859-5" },
581 { "ISO-8859-6", "ISO8859-6 8859-6" },
582 { "ISO-8859-7", "ISO8859-7 8859-7" },
583 { "ISO-8859-8", "ISO8859-8 8859-8" },
584 { "ISO-8859-9", "ISO8859-9 8859-9" },
585 { "ISO-8859-10", "ISO8859-10 8859-10" },
586 { "ISO-8859-11", "ISO8859-11 8859-11" },
587 { "ISO-8859-12", "ISO8859-12 8859-12" },
588 { "ISO-8859-13", "ISO8859-13 8859-13" },
589 { "ISO-8859-14", "ISO8859-14 8859-14" },
590 { "ISO-8859-15", "ISO8859-15 8859-15" },
591 { "ISO-8859-16", "ISO8859-16 8859-16" },
592 { "ISO-8859-10", "ISO8859-10 8859-10" },
593 { "KOI8-R", "KOI8R" },
594 { "US-ASCII", "ASCII" },
595 { "UTF-8", "UTF8 UTF" },
596 { "UTF-16", "UTF16" },
597 { "UTF-32", "UTF32" },
598 { "windows-1250", "cp1250 windows1250" },
599 { "windows-1251", "cp1251 windows1251" },
600 { "windows-1252", "cp1252 windows1252" },
601 { "windows-1253", "cp1253 windows1253" },
602 { "windows-1254", "cp1254 windows1254" },
603 { "windows-1255", "cp1255 windows1255" },
604 { "windows-1256", "cp1256 windows1256" },
605 { "windows-1257", "cp1257 windows1257" },
606 { NULL, NULL, }
609 static char *matchCodesetAlias(const char *search)
611 char *result = NULL;
612 size_t len = strlen(search);
613 int i;
615 ENTER();
617 for(i=0; codesetAliases[i].MIMEname != NULL; i++)
619 BOOL found = FALSE;
621 // search the MIMEname first
622 if(stricmp(search, codesetAliases[i].MIMEname) == 0)
623 found = TRUE;
624 else
626 const char *s = codesetAliases[i].Aliases;
628 // loop through space separated list of aliases
629 while(s != NULL && *s != '\0')
631 if(strnicmp(search, s, len) == 0)
633 found = TRUE;
634 break;
637 if((s = strpbrk(s, " ")) != NULL)
638 s++;
642 if(found == TRUE)
644 result = (char *)codesetAliases[i].MIMEname;
646 break;
650 RETURN(result);
651 return result;
656 /**************************************************************************/
658 /// defaultCodeset()
659 static struct codeset *
660 defaultCodeset(BOOL useSemaphore)
662 char buf[256];
663 struct codeset *codeset;
665 ENTER();
667 if(useSemaphore == TRUE)
668 ObtainSemaphoreShared(&CodesetsBase->libSem);
670 buf[0] = '\0';
671 GetVar("codeset_default",buf,sizeof(buf),GVF_GLOBAL_ONLY);
673 if(buf[0] == '\0' || (codeset = codesetsFind(&CodesetsBase->codesets,buf)) == NULL)
674 codeset = CodesetsBase->systemCodeset;
676 if(useSemaphore == TRUE)
677 ReleaseSemaphore(&CodesetsBase->libSem);
679 RETURN(codeset);
680 return codeset;
683 /// codesetsCmpUnicode()
684 // The compare function
685 static int
686 codesetsCmpUnicode(struct single_convert *arg1,struct single_convert *arg2)
688 return strcmp((char*)&arg1->utf8[1], (char*)&arg2->utf8[1]);
691 /// codesetsReadTable()
693 #define ITEM_STANDARD "Standard"
694 #define ITEM_ALTSTANDARD "AltStandard"
695 #define ITEM_READONLY "ReadOnly"
696 #define ITEM_CHARACTERIZATION "Characterization"
698 // Reads a coding table and adds it
699 static BOOL
700 codesetsReadTable(struct codesetList *csList, STRPTR name)
702 BPTR fh;
703 BOOL res = FALSE;
705 ENTER();
707 D(DBF_STARTUP, "trying to fetch charset file '%s'...", name);
709 if((fh = Open(name, MODE_OLDFILE)))
711 struct codeset *codeset;
713 if((codeset = (struct codeset *)allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) != NULL)
715 int i;
716 char buf[512];
718 memset(codeset,0,sizeof(struct codeset));
720 for(i = 0; i<256; i++)
721 codeset->table[i].code = codeset->table[i].ucs4 = i;
723 while(readLine(fh, buf, 512*sizeof(char)))
725 const char *result;
727 if(buf[0]=='#')
728 continue;
730 if((result = getConfigItem(buf, ITEM_STANDARD, strlen(ITEM_STANDARD))))
731 codeset->name = mystrdup(result);
732 else if(codeset->name == NULL) // a valid file starts with standard and nothing else!!
733 break;
734 else if((result = getConfigItem(buf,ITEM_ALTSTANDARD,strlen(ITEM_ALTSTANDARD))))
735 codeset->alt_name = mystrdup(result);
736 else if((result = getConfigItem(buf,ITEM_READONLY,strlen(ITEM_READONLY))))
737 codeset->read_only = !!atoi(result);
738 else if((result = getConfigItem(buf,ITEM_CHARACTERIZATION,strlen(ITEM_CHARACTERIZATION))))
740 if((result[0]=='_') && (result[1]=='(') && (result[2]=='"'))
742 char *end = strchr(result + 3, '"');
744 if(end)
745 codeset->characterization = mystrndup(result+3,end-(result+3));
747 else
748 codeset->characterization = mystrdup(result);
750 else
752 char *p = buf;
753 int fmt2 = 0;
755 if((*p=='=') || (fmt2 = ((*p=='0') || (*(p+1)=='x'))))
757 p++;
758 p += fmt2;
760 i = strtol((const char *)p,(char **)&p,16);
761 if(i>0 && i<256)
763 while(isspace(*p)) p++;
765 if(!strnicmp(p, "U+", 2))
767 p += 2;
768 codeset->table[i].ucs4 = strtol((const char *)p,(char **)&p,16);
770 else
772 if(*p!='#')
773 codeset->table[i].ucs4 = strtol((const char *)p,(char **)&p,0);
780 // check if there is not already codeset with the same name in here
781 if(codeset->name != NULL && !(codesetsFind(csList, codeset->name)))
783 for(i=0; i<256; i++)
785 UTF32 src = codeset->table[i].ucs4, *src_ptr = &src;
786 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
788 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
789 *dest_ptr = 0;
790 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)(&codeset->table[i].utf8[1]);
793 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
794 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), (int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
795 AddTail((struct List *)csList, (struct Node *)&codeset->node);
797 res = TRUE;
799 else
801 // cleanup
802 if(codeset->name) freeArbitrateVecPooled(codeset->name);
803 if(codeset->alt_name) freeArbitrateVecPooled(codeset->alt_name);
804 if(codeset->characterization) freeArbitrateVecPooled(codeset->characterization);
805 freeArbitrateVecPooled(codeset);
809 Close(fh);
812 RETURN(res);
813 return res;
816 /// codesetsScanDir()
817 static void
818 codesetsScanDir(struct codesetList *csList, const char *dirPath)
820 ENTER();
822 if(dirPath != NULL && dirPath[0] != '\0')
824 #if defined(__amigaos4__)
825 APTR dirContext;
827 if((dirContext = ObtainDirContextTags(EX_StringNameInput, dirPath,
828 EX_DataFields, EXF_NAME|EXF_TYPE,
829 TAG_END)) != NULL)
831 struct ExamineData *exd;
833 D(DBF_STARTUP, "scanning directory '%s' for codesets tables", dirPath);
835 while((exd = ExamineDir(dirContext)) != NULL)
837 if(EXD_IS_FILE(exd))
839 char filePath[620];
841 strlcpy(filePath, dirPath, sizeof(filePath));
842 AddPart(filePath, exd->Name, sizeof(filePath));
844 D(DBF_STARTUP, "about to read codeset table '%s'", filePath);
846 codesetsReadTable(csList, filePath);
850 ReleaseDirContext(dirContext);
852 #else
853 BPTR dirLock;
855 if((dirLock = Lock(dirPath, ACCESS_READ)))
857 struct ExAllControl *eac;
859 D(DBF_STARTUP, "scanning directory '%s' for codesets tables", dirPath);
861 if((eac = AllocDosObject(DOS_EXALLCONTROL, NULL)) != NULL)
863 struct ExAllData *ead;
864 struct ExAllData *eabuffer;
865 LONG more;
867 eac->eac_LastKey = 0;
868 eac->eac_MatchString = NULL;
869 eac->eac_MatchFunc = NULL;
871 if((eabuffer = allocVecPooled(CodesetsBase->pool, 10*sizeof(struct ExAllData))) != NULL)
873 char filePath[620];
877 more = ExAll(dirLock, eabuffer, 10*sizeof(struct ExAllData), ED_TYPE, eac);
878 if(!more && IoErr() != ERROR_NO_MORE_ENTRIES)
879 break;
881 if(eac->eac_Entries == 0)
882 continue;
884 ead = (struct ExAllData *)eabuffer;
887 // we only take that ead if it is a file (ed_Type < 0)
888 if(ead->ed_Type < 0)
890 strlcpy(filePath, dirPath, sizeof(filePath));
891 AddPart(filePath, (char *)ead->ed_Name, sizeof(filePath));
893 D(DBF_STARTUP, "about to read codeset table '%s'", filePath);
895 codesetsReadTable(csList, filePath);
898 while((ead = ead->ed_Next));
900 while(more);
902 freeVecPooled(CodesetsBase->pool, eabuffer);
905 FreeDosObject(DOS_EXALLCONTROL, eac);
908 UnLock(dirLock);
910 #endif
913 LEAVE();
917 /// codesetsInit()
918 // Initialized and loads the codesets
919 BOOL
920 codesetsInit(struct codesetList *csList)
922 struct codeset *codeset = NULL;
923 UTF32 src;
924 int i;
925 #if defined(__amigaos4__)
926 ULONG nextMIB = 3;
927 #endif
929 ENTER();
931 ObtainSemaphore(&CodesetsBase->poolSem);
933 NewList((struct List *)&CodesetsBase->codesets);
935 // to make the list of the supported codesets complete we also add a
936 // fake 'UTF-8' only so that our users can query for that codeset as well.
937 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
938 goto end;
940 codeset->name = mystrdup("UTF-8");
941 codeset->alt_name = mystrdup("UTF8");
942 codeset->characterization = mystrdup("Unicode");
943 codeset->read_only = 0;
944 AddTail((struct List *)csList, (struct Node *)&codeset->node);
945 CodesetsBase->utf8Codeset = codeset;
947 // on AmigaOS4 we can use diskfont.library to inquire charset information as
948 // it comes with a quite rich implementation of different charsets.
949 #if defined(__amigaos4__)
952 char *mimename;
953 char *ianaName;
954 ULONG *mapTable;
955 ULONG curMIB = nextMIB;
957 nextMIB = ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_NEXTNUMBER);
958 if(nextMIB == 0)
959 break;
961 mapTable = (ULONG *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_MAPTABLE);
962 mimename = (char *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_MIMENAME);
963 ianaName = (char *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_NAME);
964 if(mapTable != NULL && mimename != NULL && codesetsFind(csList, mimename) == NULL)
966 D(DBF_STARTUP, "loading charset '%s' from diskfont.library...", mimename);
968 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
969 goto end;
971 codeset->name = mystrdup(mimename);
972 codeset->alt_name = NULL;
973 codeset->characterization = mystrdup(ianaName);
974 codeset->read_only = 0;
976 for(i=0; i<256; i++)
978 UTF32 *src_ptr = &src;
979 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
981 src = mapTable[i];
983 codeset->table[i].code = i;
984 codeset->table[i].ucs4 = src;
985 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
986 *dest_ptr = 0;
987 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
990 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
991 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
993 AddTail((struct List *)csList, (struct Node *)&codeset->node);
996 while(TRUE);
997 #endif
999 #if defined(__MORPHOS__)
1001 struct Library *KeymapBase;
1002 struct Library *LocaleBase;
1004 if((KeymapBase = OpenLibrary("keymap.library", 51)) != NULL)
1006 if((LocaleBase = OpenLibrary("locale.library", 51)) != NULL)
1008 struct KeyMap *keymap = AskKeyMapDefault();
1009 CONST_STRPTR name = GetKeyMapCodepage(keymap);
1011 if(name != NULL && keymap != NULL) // Legacy keymaps dont have codepage or Unicode mappings
1013 D(DBF_STARTUP, "loading charset '%s' from keymap.library...", name);
1015 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) != NULL)
1017 codeset->name = mystrdup(name);
1018 codeset->alt_name = NULL;
1019 codeset->characterization = mystrdup(name); // No more information available
1020 codeset->read_only = 0;
1022 for(i=0; i<256; i++)
1024 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1025 LONG rc;
1027 codeset->table[i].code = i;
1028 codeset->table[i].ucs4 = src = ToUCS4(i, keymap);
1029 rc = ConvertUCS4ToUTF8((CONST_WSTRPTR)&src, dest_ptr, 1);
1030 dest_ptr[rc] = 0;
1031 codeset->table[i].utf8[0] = rc;
1034 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1035 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1037 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1039 else
1040 goto end;
1043 CloseLibrary(LocaleBase);
1046 CloseLibrary(KeymapBase);
1049 #endif
1051 D(DBF_STARTUP, "loading charsets from Libs:Charsets...");
1053 // we try to walk to the LIBS:Charsets directory on our own and readin our
1054 // own charset tables
1055 codesetsScanDir(csList, "LIBS:Charsets");
1058 // now we go and initialize our internally supported codesets but only if
1059 // we have not already loaded a charset with the same name
1061 D(DBF_STARTUP, "initializing internal charsets...");
1063 // ISO-8859-1 + EURO
1064 if(codesetsFind(csList, "ISO-8859-1 + Euro") == NULL)
1066 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1067 goto end;
1069 codeset->name = mystrdup("ISO-8859-1 + Euro");
1070 codeset->alt_name = NULL;
1071 codeset->characterization = mystrdup("West European (with EURO)");
1072 codeset->read_only = 1;
1073 for(i = 0; i<256; i++)
1075 UTF32 *src_ptr = &src;
1076 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1078 if(i==164)
1079 src = 0x20AC; /* the EURO sign */
1080 else
1081 src = i;
1083 codeset->table[i].code = i;
1084 codeset->table[i].ucs4 = src;
1085 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1086 *dest_ptr = 0;
1087 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1089 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1090 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1091 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1094 // ISO-8859-1
1095 if(codesetsFind(csList, "ISO-8859-1") == NULL)
1097 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1098 goto end;
1100 codeset->name = mystrdup("ISO-8859-1");
1101 codeset->alt_name = mystrdup("ISO8859-1");
1102 codeset->characterization = mystrdup("West European");
1103 codeset->read_only = 0;
1104 for(i = 0; i<256; i++)
1106 UTF32 *src_ptr = &src;
1107 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1109 src = i;
1111 codeset->table[i].code = i;
1112 codeset->table[i].ucs4 = src;
1113 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1114 *dest_ptr = 0;
1115 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1117 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1118 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1119 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1122 // ISO-8859-2
1123 if(codesetsFind(csList, "ISO-8859-2") == NULL)
1125 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1126 goto end;
1128 codeset->name = mystrdup("ISO-8859-2");
1129 codeset->alt_name = mystrdup("ISO8859-2");
1130 codeset->characterization = mystrdup("Central/East European");
1131 codeset->read_only = 0;
1132 for(i = 0; i<256; i++)
1134 UTF32 *src_ptr = &src;
1135 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1137 if(i<0xa0)
1138 src = i;
1139 else
1140 src = iso_8859_2_to_ucs4[i-0xa0];
1142 codeset->table[i].code = i;
1143 codeset->table[i].ucs4 = src;
1144 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr,dest_ptr+6, CSF_StrictConversion);
1145 *dest_ptr = 0;
1146 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1148 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1149 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1150 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1153 // ISO-8859-3
1154 if(codesetsFind(csList, "ISO-8859-3") == NULL)
1156 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1157 goto end;
1159 codeset->name = mystrdup("ISO-8859-3");
1160 codeset->alt_name = mystrdup("ISO8859-3");
1161 codeset->characterization = mystrdup("South European");
1162 codeset->read_only = 0;
1163 for(i = 0; i<256; i++)
1165 UTF32 *src_ptr = &src;
1166 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1168 if(i<0xa0)
1169 src = i;
1170 else
1171 src = iso_8859_3_to_ucs4[i-0xa0];
1173 codeset->table[i].code = i;
1174 codeset->table[i].ucs4 = src;
1175 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1176 *dest_ptr = 0;
1177 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1179 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1180 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1181 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1184 // ISO-8859-4
1185 if(codesetsFind(csList, "ISO-8859-4") == NULL)
1187 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1188 goto end;
1190 codeset->name = mystrdup("ISO-8859-4");
1191 codeset->alt_name = mystrdup("ISO8859-4");
1192 codeset->characterization = mystrdup("North European");
1193 codeset->read_only = 0;
1194 for(i = 0; i<256; i++)
1196 UTF32 *src_ptr = &src;
1197 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1199 if(i<0xa0)
1200 src = i;
1201 else
1202 src = iso_8859_4_to_ucs4[i-0xa0];
1204 codeset->table[i].code = i;
1205 codeset->table[i].ucs4 = src;
1206 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1207 *dest_ptr = 0;
1208 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1210 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1211 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1212 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1215 // ISO-8859-5
1216 if(codesetsFind(csList, "ISO-8859-5") == NULL)
1218 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1219 goto end;
1221 codeset->name = mystrdup("ISO-8859-5");
1222 codeset->alt_name = mystrdup("ISO8859-5");
1223 codeset->characterization = mystrdup("Slavic languages");
1224 codeset->read_only = 0;
1225 for(i = 0; i<256; i++)
1227 UTF32 *src_ptr = &src;
1228 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1230 if(i<0xa0)
1231 src = i;
1232 else
1233 src = iso_8859_5_to_ucs4[i-0xa0];
1235 codeset->table[i].code = i;
1236 codeset->table[i].ucs4 = src;
1237 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1238 *dest_ptr = 0;
1239 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1241 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1242 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1243 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1246 // ISO-8859-9
1247 if(codesetsFind(csList, "ISO-8859-9") == NULL)
1249 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1250 goto end;
1252 codeset->name = mystrdup("ISO-8859-9");
1253 codeset->alt_name = mystrdup("ISO8859-9");
1254 codeset->characterization = mystrdup("Turkish");
1255 codeset->read_only = 0;
1256 for(i = 0; i<256; i++)
1258 UTF32 *src_ptr = &src;
1259 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1261 if(i<0xa0)
1262 src = i;
1263 else
1264 src = iso_8859_9_to_ucs4[i-0xa0];
1266 codeset->table[i].code = i;
1267 codeset->table[i].ucs4 = src;
1268 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1269 *dest_ptr = 0;
1270 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1272 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1273 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1274 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1277 // ISO-8859-15
1278 if(codesetsFind(csList, "ISO-8859-15") == NULL)
1280 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1281 goto end;
1283 codeset->name = mystrdup("ISO-8859-15");
1284 codeset->alt_name = mystrdup("ISO8859-15");
1285 codeset->characterization = mystrdup("West European II");
1286 codeset->read_only = 0;
1287 for(i = 0; i<256; i++)
1289 UTF32 *src_ptr = &src;
1290 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1292 if(i<0xa0)
1293 src = i;
1294 else
1295 src = iso_8859_15_to_ucs4[i-0xa0];
1297 codeset->table[i].code = i;
1298 codeset->table[i].ucs4 = src;
1299 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1300 *dest_ptr = 0;
1301 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1303 memcpy(codeset->table_sorted,codeset->table,sizeof (codeset->table));
1304 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1305 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1308 // ISO-8859-16
1309 if(codesetsFind(csList, "ISO-8859-16") == NULL)
1311 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1312 goto end;
1314 codeset->name = mystrdup("ISO-8859-16");
1315 codeset->alt_name = mystrdup("ISO8869-16");
1316 codeset->characterization = mystrdup("South-Eastern European");
1317 codeset->read_only = 0;
1318 for(i=0;i<256;i++)
1320 UTF32 *src_ptr = &src;
1321 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1323 if(i < 0xa0)
1324 src = i;
1325 else
1326 src = iso_8859_16_to_ucs4[i-0xa0];
1328 codeset->table[i].code = i;
1329 codeset->table[i].ucs4 = src;
1330 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1331 *dest_ptr = 0;
1332 codeset->table[i].utf8[0] = (ULONG)dest_ptr - (ULONG)&codeset->table[i].utf8[1];
1334 memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1335 qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), (int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1336 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1339 // KOI8-R
1340 if(codesetsFind(csList, "KOI8-R") == NULL)
1342 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1343 goto end;
1345 codeset->name = mystrdup("KOI8-R");
1346 codeset->alt_name = mystrdup("KOI8R");
1347 codeset->characterization = mystrdup("Russian");
1348 codeset->read_only = 0;
1349 for(i = 0; i<256; i++)
1351 UTF32 *src_ptr = &src;
1352 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1354 if(i<0x80)
1355 src = i;
1356 else
1357 src = koi8r_to_ucs4[i-0x80];
1359 codeset->table[i].code = i;
1360 codeset->table[i].ucs4 = src;
1361 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1362 *dest_ptr = 0;
1363 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1365 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1366 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1367 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1370 // AmigaPL
1371 if(codesetsFind(csList, "AmigaPL") == NULL)
1373 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1374 goto end;
1376 codeset->name = mystrdup("AmigaPL");
1377 codeset->alt_name = mystrdup("AmiPL");
1378 codeset->characterization = mystrdup("Polish (Amiga)");
1379 codeset->read_only = 1;
1380 for(i=0; i<256; i++)
1382 UTF32 *src_ptr = &src;
1383 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1385 if(i<0xa0)
1386 src = i;
1387 else
1388 src = amigapl_to_ucs4[i-0xa0];
1390 codeset->table[i].code = i;
1391 codeset->table[i].ucs4 = src;
1392 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1393 *dest_ptr = 0;
1394 codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1396 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1397 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1398 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1401 // Amiga-1251
1402 if(codesetsFind(csList, "Amiga-1251") == NULL)
1404 if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1405 goto end;
1407 codeset->name = mystrdup("Amiga-1251");
1408 codeset->alt_name = mystrdup("Ami1251");
1409 codeset->characterization = mystrdup("Cyrillic (Amiga)");
1410 codeset->read_only = 1;
1411 for(i=0; i<256; i++)
1413 UTF32 *src_ptr = &src;
1414 UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1416 if(i < 0xa0)
1417 src = i;
1418 else
1419 src = amiga1251_to_ucs4[i-0xa0];
1421 codeset->table[i].code = i;
1422 codeset->table[i].ucs4 = src;
1423 CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1424 *dest_ptr = 0;
1425 codeset->table[i].utf8[0] = (char*)dest_ptr - (char*)&codeset->table[i].utf8[1];
1427 memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1428 qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1429 AddTail((struct List *)csList, (struct Node *)&codeset->node);
1432 end:
1433 ReleaseSemaphore(&CodesetsBase->poolSem);
1435 RETURN(codeset != 0);
1436 return codeset != NULL;
1440 /// codesetsCleanup()
1441 // Cleanup the memory for the codeset
1442 void
1443 codesetsCleanup(struct codesetList *csList)
1445 struct codeset *code;
1447 ENTER();
1449 while((code = (struct codeset *)RemHead((struct List *)csList)))
1451 if(code->name) freeArbitrateVecPooled(code->name);
1452 if(code->alt_name) freeArbitrateVecPooled(code->alt_name);
1453 if(code->characterization) freeArbitrateVecPooled(code->characterization);
1455 freeArbitrateVecPooled(code);
1458 LEAVE();
1462 /// codesetsFind()
1463 // Returns the given codeset.
1464 struct codeset *
1465 codesetsFind(struct codesetList *csList, const char *name)
1467 struct codeset *res = NULL;
1469 ENTER();
1471 if(name && *name)
1473 struct codeset *mstate, *succ;
1474 char *matchedName = matchCodesetAlias(name);
1476 if(matchedName != NULL)
1477 name = matchedName;
1479 for(mstate = (struct codeset *)csList->list.mlh_Head; (succ = (struct codeset *)mstate->node.mln_Succ); mstate = succ)
1481 if(stricmp(name, mstate->name) == 0 ||
1482 (mstate->alt_name != NULL && stricmp(name, mstate->alt_name) == 0))
1484 // break out
1485 break;
1489 if(succ)
1490 res = mstate;
1493 RETURN(res);
1494 return res;
1497 /// codesetsFindBest()
1498 // Returns the best codeset for the given text
1499 static struct codeset *
1500 codesetsFindBest(struct TagItem *attrs, ULONG csFamily, STRPTR text, int text_len, int *error_ptr)
1502 struct codeset *best_codeset = NULL;
1503 int best_errors = text_len;
1504 BOOL found = FALSE;
1506 ENTER();
1508 // in case the user specified the codeset family as a
1509 // cyrillic one we go and do our cyrillic specific analysis first
1510 if(csFamily == CSV_CodesetFamily_Cyrillic)
1512 #define NUM_CYRILLIC 3
1514 struct CodesetSearch
1516 const char *name;
1517 const char *data;
1520 struct CodesetSearch search[NUM_CYRILLIC];
1521 unsigned char *p;
1522 unsigned char *tp;
1523 int ctr[NUM_CYRILLIC];
1524 int Nmax;
1525 int NGlob = 1;
1526 int max;
1527 int gr = 0;
1528 int lr = 0;
1530 search[0].name = "windows-1251";
1531 search[0].data = cp1251_data;
1532 search[1].name = "IBM866";
1533 search[1].data = cp866_data;
1534 search[2].name = "KOI8-R";
1535 search[2].data = koi8r_data;
1537 memset(&ctr, 0, sizeof(ctr));
1539 tp = (unsigned char *)text;
1543 int n;
1544 int mid = max = -466725766; // TODO: what's the magic behind this constant?
1545 Nmax = 0;
1547 for(n=0; n < NUM_CYRILLIC; n++)
1549 unsigned char la = 0;
1550 unsigned char *tptr = (unsigned char *)search[n].data;
1552 p = tp;
1556 unsigned char lb = (*p++) ^ 128;
1558 if(!((la | lb) & 128))
1559 ctr[n] += (signed char)tptr[(la << 7) + lb];
1561 la = lb;
1563 while(*p);
1565 if(max < ctr[n])
1567 mid = max;
1568 max = ctr[n];
1569 Nmax = n+1;
1573 tp = p;
1574 if((max >= 500) && ((max-mid) >= 1000))
1576 lr = gr = 1;
1577 NGlob = Nmax;
1580 while((*p) && (!gr));
1582 if(gr || ((!(*p)) && lr))
1583 Nmax = NGlob;
1585 // if our analysis found something, we go and try
1586 // to find the corresponding codeset in out codeset list
1587 if(max != 0)
1589 struct TagItem *tstate = attrs;
1590 struct TagItem *tag;
1592 D(DBF_STARTUP, "identified text as '%s", search[Nmax-1].name);
1594 // now we walk through our taglist and check if the user
1595 // supplied
1596 while((tag = NextTagItem(&tstate)))
1598 if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1600 struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1602 if((best_codeset = codesetsFind(csList, search[Nmax-1].name)) != NULL)
1603 break;
1607 // if we still haven't found the matching codeset
1608 // we search the internal list
1609 if(best_codeset == NULL)
1610 best_codeset = codesetsFind(&CodesetsBase->codesets, search[Nmax-1].name);
1612 best_errors = 0;
1614 found = TRUE;
1618 // if we haven't found the best codeset (through the cyrillic analysis
1619 // we go and do the dumb latin search in our codesetlist
1620 if(found == FALSE)
1622 struct TagItem *tstate = attrs;
1623 struct TagItem *tag;
1624 BOOL lastIteration = FALSE;
1626 while((tag = NextTagItem(&tstate)) || (lastIteration = TRUE))
1628 if(lastIteration == TRUE || (tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0))
1630 struct codesetList *csList = (lastIteration ? &CodesetsBase->codesets : (struct codesetList *)tag->ti_Data);
1631 struct codeset *codeset = (struct codeset *)csList->list.mlh_Head;
1633 // the following identification/detection routines is NOT really smart.
1634 // we just see how each UTF8 string is the representation of each char
1635 // in our source text and then check if they are valid or not. As said,
1636 // not very smart, but we don't have anything better right now :(
1638 while(codeset)
1640 if(!codeset->read_only && codeset != CodesetsBase->utf8Codeset)
1642 char *text_ptr = text;
1643 int i;
1644 int errors = 0;
1646 for(i=0; i < text_len; i++)
1648 unsigned char c = *text_ptr++;
1650 if(c)
1652 struct single_convert *f = &codeset->table[c];
1654 if(f->utf8[0] == 0 || f->utf8[1] == 0x00)
1655 errors++;
1657 else
1658 break;
1661 D(DBF_STARTUP, "tried to identify text as '%s' text with %ld of %ld errors", codeset->name, errors, text_len);
1663 if(errors < best_errors)
1665 best_codeset = codeset;
1666 best_errors = errors;
1669 if(best_errors == 0)
1670 break;
1673 codeset = (struct codeset *)codeset->node.mln_Succ;
1676 if(lastIteration)
1677 break;
1682 if(error_ptr)
1683 *error_ptr = best_errors;
1685 RETURN(best_codeset);
1686 return best_codeset;
1690 /**************************************************************************/
1692 /// CodesetsSupportedA()
1693 #ifdef __AROS__
1694 AROS_LH1(STRPTR *, CodesetsSupportedA,
1695 AROS_LHA(struct TagItem *, attrs, A0),
1696 struct LibraryHeader *, library, 15, Codesets
1699 AROS_LIBFUNC_INIT
1700 #else
1701 STRPTR *LIBFUNC
1702 CodesetsSupportedA(REG(a0, UNUSED struct TagItem * attrs))
1704 #endif
1705 STRPTR *array = NULL;
1706 struct TagItem *tstate = attrs;
1707 struct TagItem *tag;
1708 int numCodesets;
1710 ENTER();
1712 // first we need to check how many codesets our supplied
1713 // lists carry.
1714 numCodesets = countCodesets(&CodesetsBase->codesets);
1715 while((tag = NextTagItem(&tstate)))
1717 if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1718 numCodesets += countCodesets((struct codesetList *)tag->ti_Data);
1721 // now that we know how many codesets we have in our lists we
1722 // can put their names into our string arrays
1723 if(numCodesets > 0)
1725 if((array = allocArbitrateVecPooled((numCodesets+1)*sizeof(STRPTR))))
1727 struct codeset *code;
1728 struct codeset *succ;
1729 int i=0;
1731 // reset the tstate
1732 tstate = attrs;
1734 ObtainSemaphoreShared(&CodesetsBase->libSem);
1736 // first we walk through the internal codesets list and
1737 // add the names
1738 for(code = (struct codeset *)CodesetsBase->codesets.list.mlh_Head; (succ = (struct codeset *)code->node.mln_Succ); code = succ, i++)
1739 array[i] = code->name;
1741 // then we also iterate through our private codesets list
1742 while((tag = NextTagItem(&tstate)))
1744 if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1746 struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1748 for(code = (struct codeset *)csList->list.mlh_Head; (succ = (struct codeset *)code->node.mln_Succ); code = succ, i++)
1749 array[i] = code->name;
1753 array[i] = NULL;
1755 ReleaseSemaphore(&CodesetsBase->libSem);
1759 RETURN(array);
1760 return array;
1761 #ifdef __AROS__
1762 AROS_LIBFUNC_EXIT
1763 #endif
1766 #ifndef __AROS__
1767 LIBSTUB(CodesetsSupportedA, STRPTR*, REG(a0, struct TagItem *attrs))
1769 #ifdef __MORPHOS__
1770 return CodesetsSupportedA((struct TagItem *)REG_A0);
1771 #else
1772 return CodesetsSupportedA(attrs);
1773 #endif
1775 #endif
1777 #ifdef __amigaos4__
1778 LIBSTUBVA(CodesetsSupported, STRPTR*, ...)
1780 STRPTR* res;
1781 VA_LIST args;
1783 VA_START(args, self);
1784 res = CodesetsSupportedA(VA_ARG(args, struct TagItem *));
1785 VA_END(args);
1787 return res;
1789 #endif
1792 /// CodesetsFreeA()
1793 #ifdef __AROS__
1794 AROS_LH2(void, CodesetsFreeA,
1795 AROS_LHA(APTR, obj, A0),
1796 AROS_LHA(struct TagItem *, attrs, A1),
1797 struct LibraryHeader *, library, 14, Codesets
1800 AROS_LIBFUNC_INIT
1801 #else
1802 void LIBFUNC
1803 CodesetsFreeA(REG(a0, APTR obj),
1804 REG(a1, UNUSED struct TagItem *attrs))
1806 #endif
1807 ENTER();
1809 if(obj)
1810 freeArbitrateVecPooled(obj);
1812 LEAVE();
1813 #ifdef __AROS__
1814 AROS_LIBFUNC_EXIT
1815 #endif
1818 #ifndef __AROS__
1819 LIBSTUB(CodesetsFreeA, void, REG(a0, APTR obj), REG(a1, struct TagItem *attrs))
1821 #ifdef __MORPHOS__
1822 return CodesetsFreeA((APTR)REG_A0,(struct TagItem *)REG_A1);
1823 #else
1824 return CodesetsFreeA(obj, attrs);
1825 #endif
1827 #endif
1829 #ifdef __amigaos4__
1830 LIBSTUBVA(CodesetsFree, void, REG(a0, APTR obj), ...)
1832 VA_LIST args;
1834 VA_START(args, obj);
1835 CodesetsFreeA(obj, VA_ARG(args, struct TagItem *));
1836 VA_END(args);
1838 #endif
1841 /// CodesetsSetDefaultA()
1842 #ifdef __AROS__
1843 AROS_LH2(struct codeset *, CodesetsSetDefaultA,
1844 AROS_LHA(STRPTR, name, A0),
1845 AROS_LHA(struct TagItem *, attrs, A1),
1846 struct LibraryHeader *, library, 13, Codesets
1849 AROS_LIBFUNC_INIT
1850 #else
1851 struct codeset *LIBFUNC
1852 CodesetsSetDefaultA(REG(a0, STRPTR name),
1853 REG(a1, struct TagItem *attrs))
1855 #endif
1856 struct codeset *codeset;
1858 ENTER();
1860 ObtainSemaphoreShared(&CodesetsBase->libSem);
1862 if((codeset = codesetsFind(&CodesetsBase->codesets,name)))
1864 ULONG flags;
1866 flags = GVF_SAVE_VAR | (GetTagData(CSA_Save,FALSE,attrs) ? GVF_GLOBAL_ONLY : 0);
1868 SetVar("codeset_default",codeset->name,strlen(codeset->name),flags);
1871 ReleaseSemaphore(&CodesetsBase->libSem);
1873 RETURN(codeset);
1874 return codeset;
1875 #ifdef __AROS__
1876 AROS_LIBFUNC_EXIT
1877 #endif
1880 #ifndef __AROS__
1881 LIBSTUB(CodesetsSetDefaultA, struct codeset *, REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
1883 #ifdef __MORPHOS__
1884 return CodesetsSetDefaultA((STRPTR)REG_A0,(struct TagItem *)REG_A1);
1885 #else
1886 return CodesetsSetDefaultA(name, attrs);
1887 #endif
1889 #endif
1891 #ifdef __amigaos4__
1892 LIBSTUBVA(CodesetsSetDefault, struct codeset *, REG(a0, STRPTR name), ...)
1894 struct codeset *cs;
1895 VA_LIST args;
1897 VA_START(args, name);
1898 cs = CodesetsSetDefaultA(name, VA_ARG(args, struct TagItem *));
1899 VA_END(args);
1901 return cs;
1903 #endif
1906 /// CodesetsFindA()
1907 #ifdef __AROS__
1908 AROS_LH2(struct codeset *, CodesetsFindA,
1909 AROS_LHA(STRPTR, name, A0),
1910 AROS_LHA(struct TagItem *, attrs, A1),
1911 struct LibraryHeader *, library, 16, Codesets
1914 AROS_LIBFUNC_INIT
1915 #else
1916 struct codeset *LIBFUNC
1917 CodesetsFindA(REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
1919 #endif
1920 struct codeset *codeset = NULL;
1922 ENTER();
1924 ObtainSemaphoreShared(&CodesetsBase->libSem);
1926 // if no name pointer was supplied we have to return
1927 // the default codeset only.
1928 if(name != NULL)
1930 // we first walk through our internal list and check if we
1931 // can find the requested codeset
1932 codeset = codesetsFind(&CodesetsBase->codesets, name);
1934 if(codeset == NULL && attrs != NULL)
1936 struct TagItem *tstate = attrs;
1937 struct TagItem *tag;
1939 // now we walk through our taglist and check if the user
1940 // supplied
1941 while((tag = NextTagItem(&tstate)))
1943 if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1945 struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1947 if((codeset = codesetsFind(csList, name)) != NULL)
1948 break;
1954 // check if we found something or not.
1955 if(codeset == NULL && (attrs == NULL || GetTagData(CSA_FallbackToDefault, TRUE, attrs)))
1956 codeset = defaultCodeset(FALSE);
1958 ReleaseSemaphore(&CodesetsBase->libSem);
1960 RETURN(codeset);
1961 return codeset;
1962 #ifdef __AROS__
1963 AROS_LIBFUNC_EXIT
1964 #endif
1967 #ifndef __AROS__
1968 LIBSTUB(CodesetsFindA, struct codeset *, REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
1970 #ifdef __MORPHOS__
1971 return CodesetsFindA((STRPTR)REG_A0,(struct TagItem *)REG_A1);
1972 #else
1973 return CodesetsFindA(name, attrs);
1974 #endif
1976 #endif
1978 #ifdef __amigaos4__
1979 LIBSTUBVA(CodesetsFind, struct codeset *, REG(a0, STRPTR name), ...)
1981 struct codeset *cs;
1982 VA_LIST args;
1984 VA_START(args, name);
1985 cs = CodesetsFindA(name, VA_ARG(args, struct TagItem *));
1986 VA_END(args);
1988 return cs;
1990 #endif
1992 /// CodesetsFindBestA()
1993 #ifdef __AROS__
1994 AROS_LH1(struct codeset *, CodesetsFindBestA,
1995 AROS_LHA(struct TagItem *, attrs, A0),
1996 struct LibraryHeader *, library, 17, Codesets
1999 AROS_LIBFUNC_INIT
2000 #else
2001 struct codeset *LIBFUNC
2002 CodesetsFindBestA(REG(a0, struct TagItem *attrs))
2004 #endif
2005 struct codeset *codeset = NULL;
2007 ENTER();
2009 ObtainSemaphoreShared(&CodesetsBase->libSem);
2011 if(attrs)
2013 char *text = (char *)GetTagData(CSA_Source, 0, attrs);
2014 ULONG text_len = GetTagData(CSA_SourceLen, text != NULL ? strlen(text) : 0, attrs);
2016 if(text != NULL && text_len > 0)
2018 int numErrors = 0;
2019 ULONG csFamily = GetTagData(CSA_CodesetFamily, CSV_CodesetFamily_Latin, attrs);
2020 int *error_ptr = (int *)GetTagData(CSA_ErrPtr, 0, attrs);
2021 BOOL defaultFallBack = GetTagData(CSA_FallbackToDefault, FALSE, attrs);
2023 codeset = codesetsFindBest(attrs, csFamily, text, text_len, &numErrors);
2025 if(error_ptr != NULL)
2026 *error_ptr = numErrors;
2028 // if we still haven't got the codeset we fallback to the default
2029 if(codeset == NULL && defaultFallBack == TRUE)
2030 codeset = defaultCodeset(FALSE);
2034 ReleaseSemaphore(&CodesetsBase->libSem);
2036 RETURN(codeset);
2037 return codeset;
2038 #ifdef __AROS__
2039 AROS_LIBFUNC_EXIT
2040 #endif
2043 #ifndef __AROS__
2044 LIBSTUB(CodesetsFindBestA, struct codeset *, REG(a0, struct TagItem *attrs))
2046 #ifdef __MORPHOS__
2047 return CodesetsFindBestA((struct TagItem *)REG_A0);
2048 #else
2049 return CodesetsFindBestA(attrs);
2050 #endif
2052 #endif
2054 #ifdef __amigaos4__
2055 LIBSTUBVA(CodesetsFindBest, struct codeset *, ...)
2057 struct codeset *cs;
2058 VA_LIST args;
2060 VA_START(args, self);
2061 cs = CodesetsFindBestA(VA_ARG(args, struct TagItem *));
2062 VA_END(args);
2064 return cs;
2066 #endif
2068 /// CodesetsUTF8Len()
2069 // Returns the number of characters a utf8 string has. This is not
2070 // identically with the size of memory is required to hold the string.
2071 #ifdef __AROS__
2072 AROS_LH1(ULONG, CodesetsUTF8Len,
2073 AROS_LHA(const UTF8 *, str, A0),
2074 struct LibraryHeader *, library, 18, Codesets
2077 AROS_LIBFUNC_INIT
2078 #else
2079 ULONG LIBFUNC
2080 CodesetsUTF8Len(REG(a0, UTF8 *str))
2082 #endif
2083 int len;
2084 unsigned char c;
2086 ENTER();
2088 if(!str)
2089 return 0;
2091 len = 0;
2093 while((c = *str++))
2095 len++;
2096 str += trailingBytesForUTF8[c];
2099 RETURN((ULONG)len);
2100 return (ULONG)len;
2101 #ifdef __AROS__
2102 AROS_LIBFUNC_EXIT
2103 #endif
2106 #ifndef __AROS__
2107 LIBSTUB(CodesetsUTF8Len, ULONG, REG(a0, UTF8* str))
2109 #ifdef __MORPHOS__
2110 return CodesetsUTF8Len((UTF8 *)REG_A0);
2111 #else
2112 return CodesetsUTF8Len(str);
2113 #endif
2115 #endif
2118 /// CodesetsStrLenA()
2119 #ifdef __AROS__
2120 AROS_LH2(ULONG, CodesetsStrLenA,
2121 AROS_LHA(STRPTR, str, A0),
2122 AROS_LHA(struct TagItem *, attrs, A1),
2123 struct LibraryHeader *, library, 23, Codesets
2126 AROS_LIBFUNC_INIT
2127 #else
2128 ULONG LIBFUNC
2129 CodesetsStrLenA(REG(a0, STRPTR str),
2130 REG(a1, struct TagItem *attrs))
2132 #endif
2133 struct codeset *codeset;
2134 int len, res;
2135 STRPTR src;
2136 UBYTE c;
2138 ENTER();
2140 if(!str)
2141 return 0;
2143 if(!(codeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)))
2144 codeset = defaultCodeset(TRUE);
2146 len = GetTagData(CSA_SourceLen, strlen(str), attrs);
2148 src = str;
2149 res = 0;
2151 while(((c = *src++) && (len--)))
2152 res += codeset->table[c].utf8[0];
2154 RETURN((ULONG)res);
2155 return (ULONG)res;
2156 #ifdef __AROS__
2157 AROS_LIBFUNC_EXIT
2158 #endif
2161 #ifndef __AROS__
2162 LIBSTUB(CodesetsStrLenA, ULONG, REG(a0, STRPTR str),
2163 REG(a1, struct TagItem *attrs))
2165 #ifdef __MORPHOS__
2166 return CodesetsStrLenA((STRPTR)REG_A0,(struct TagItem *)REG_A1);
2167 #else
2168 return CodesetsStrLenA(str, attrs);
2169 #endif
2171 #endif
2173 #ifdef __amigaos4__
2174 LIBSTUBVA(CodesetsStrLen, ULONG, REG(a0, STRPTR str), ...)
2176 ULONG res;
2177 VA_LIST args;
2179 VA_START(args, str);
2180 res = CodesetsStrLenA(str, VA_ARG(args, struct TagItem *));
2181 VA_END(args);
2183 return res;
2185 #endif
2187 /// CodesetsUTF8ToStrA()
2188 // Converts an UTF8 string to a given charset. Return the number of bytes
2189 // written to dest excluding the NULL byte (which is always ensured by this
2190 // function; it means a NULL str will produce "" as dest; anyway you should
2191 // check NULL str to not waste your time!).
2192 #ifdef __AROS__
2193 AROS_LH1(STRPTR, CodesetsUTF8ToStrA,
2194 AROS_LHA(struct TagItem *, attrs, A0),
2195 struct LibraryHeader *, library, 19, Codesets
2198 AROS_LIBFUNC_INIT
2199 #else
2200 STRPTR LIBFUNC
2201 CodesetsUTF8ToStrA(REG(a0, struct TagItem *attrs))
2203 #endif
2204 UTF8 *src;
2205 ULONG srcLen;
2206 ULONG *destLenPtr;
2207 ULONG n = 0;
2208 STRPTR dest = NULL;
2210 ENTER();
2212 if((src = (UTF8 *)GetTagData(CSA_Source, (ULONG)NULL, attrs)) != NULL &&
2213 (srcLen = GetTagData(CSA_SourceLen, src != NULL ? strlen((char *)src) : 0, attrs)) > 0)
2215 struct convertMsg msg;
2216 struct codeset *codeset;
2217 struct Hook *destHook;
2218 struct Hook *mapForeignCharsHook;
2219 char buf[256];
2220 STRPTR destIter = NULL;
2221 STRPTR b = NULL;
2222 ULONG destLen = 0;
2223 int i = 0;
2224 unsigned char *s = src;
2225 unsigned char *e = (src+srcLen);
2226 int numConvErrors = 0;
2227 int *numConvErrorsPtr;
2228 BOOL mapForeignChars;
2229 APTR pool = NULL;
2230 struct SignalSemaphore *sem = NULL;
2232 // get some more optional attributes
2233 destHook = (struct Hook *)GetTagData(CSA_DestHook, (ULONG)NULL, attrs);
2234 destLen = GetTagData(CSA_DestLen, 0, attrs);
2235 numConvErrorsPtr = (int *)GetTagData(CSA_ErrPtr, (ULONG)NULL, attrs);
2236 mapForeignChars = (BOOL)GetTagData(CSA_MapForeignChars, FALSE, attrs);
2237 mapForeignCharsHook = (struct Hook *)GetTagData(CSA_MapForeignCharsHook, (ULONG)NULL, attrs);
2239 // first we make sure we allocate enough memory
2240 // for our destination buffer
2241 if(destHook != NULL)
2243 if(destLen < 16 || destLen > sizeof(buf))
2244 destLen = sizeof(buf);
2246 msg.state = CSV_Translating;
2247 b = buf;
2248 i = 0;
2250 else
2252 // in case the user wants us to dynamically generate the
2253 // destination buffer we do it right now
2254 if((dest = (STRPTR)GetTagData(CSA_Dest, (ULONG)NULL, attrs)) == NULL ||
2255 GetTagData(CSA_AllocIfNeeded, TRUE, attrs) != FALSE)
2257 ULONG len = 0;
2259 // calculate the destLen
2260 while(s < e)
2262 unsigned char c = *s++;
2264 len++;
2265 s += trailingBytesForUTF8[c];
2268 if(dest == NULL || (destLen < len+1))
2270 if((pool = (APTR)GetTagData(CSA_Pool, (ULONG)NULL, attrs)) != NULL)
2272 if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, (ULONG)NULL, attrs)) != NULL)
2273 ObtainSemaphore(sem);
2275 // allocate the destination buffer
2276 dest = allocVecPooled(pool, len+1);
2278 if(sem != NULL)
2279 ReleaseSemaphore(sem);
2281 else
2282 dest = allocArbitrateVecPooled(len+1);
2284 destLen = len+1;
2287 if(dest == NULL)
2289 RETURN(NULL);
2290 return NULL;
2294 destIter = dest;
2297 // get the destination codeset pointer
2298 if((codeset = (struct codeset *)GetTagData(CSA_DestCodeset, (ULONG)NULL, attrs)) == NULL)
2299 codeset = defaultCodeset(TRUE);
2301 // now we convert the src string to the
2302 // destination buffer.
2303 for(s=src;;n++)
2305 if(destHook == NULL && n >= destLen-1)
2306 break;
2308 // convert until we reach the end of the
2309 // source buffer.
2310 if(s < e)
2312 unsigned char c = *s;
2313 unsigned char d = '?';
2314 const char *repstr = NULL;
2315 int replen = 0;
2317 // check if the char is a >7bit char
2318 if(c > 127)
2320 struct single_convert *f;
2321 int lenAdd = trailingBytesForUTF8[c];
2322 int lenStr = lenAdd+1;
2323 unsigned char *src = s;
2327 // start each iteration with "no replacement found yet"
2328 repstr = NULL;
2329 replen = 0;
2331 // search in the UTF8 conversion table of the current charset if
2332 // we have a replacement character for the char sequence starting at s
2333 BIN_SEARCH(codeset->table_sorted, 0, 255, strncmp((char *)src, (char *)codeset->table_sorted[m].utf8+1, lenStr), f);
2335 if(f != NULL)
2337 d = f->code;
2338 replen = -1;
2340 break;
2342 else
2344 // the analysed char sequence (s) is not convertable to a
2345 // single visible char replacement, so we normally have to put
2346 // a ? sign as a "unknown char" sign at the very position.
2348 // For convienence we, however, allow users to replace these
2349 // UTF8 characters with char sequences that "looklike" the
2350 // original char.
2351 if(mapForeignChars == TRUE)
2352 replen = mapUTF8toASCII(&repstr, src, lenStr);
2354 // call the hook only, if the internal table yielded no suitable
2355 // replacement
2356 if(replen == 0 && mapForeignCharsHook != NULL)
2358 struct replaceMsg rmsg;
2360 rmsg.dst = (char **)&repstr;
2361 rmsg.src = src;
2362 rmsg.srclen = lenStr;
2363 replen = CallHookPkt(mapForeignCharsHook, &rmsg, NULL);
2366 if(replen < 0)
2368 D(DBF_UTF, "got UTF8 replacement (%ld)", replen);
2370 // stay in the loop as long as one replacement function delivers
2371 // further UTF8 replacement sequences
2372 src = (unsigned char *)repstr;
2374 else if(replen == 0)
2376 D(DBF_UTF, "found no ASCII replacement for UTF8 string (%ld)", replen);
2377 repstr = NULL;
2379 else
2380 D(DBF_UTF, "got replacement string '%s' (%ld)", repstr ? repstr : "<null>", replen);
2383 while(replen < 0);
2385 if(repstr == NULL || replen == 0)
2387 if(replen >= 0)
2389 d = '?';
2390 numConvErrors++;
2394 s += lenAdd;
2396 else
2397 d = c;
2399 if(destHook != NULL)
2401 if(replen > 1)
2403 while(replen > 0)
2405 *b++ = *repstr;
2406 repstr++;
2407 i++;
2408 replen--;
2410 if(i%(destLen-1)==0)
2412 *b = '\0';
2413 msg.len = i;
2414 CallHookPkt(destHook, &msg, buf);
2416 b = buf;
2417 *b = '\0';
2418 i = 0;
2422 else
2424 *b++ = replen > 0 ? *repstr : d;
2425 i++;
2428 if(i%(destLen-1)==0)
2430 *b = '\0';
2431 msg.len = i;
2432 CallHookPkt(destHook, &msg, buf);
2434 b = buf;
2435 *b = '\0';
2436 i = 0;
2439 else
2441 if(replen > 1)
2443 ULONG destPos = destIter-dest;
2445 if(pool != NULL)
2447 if(sem != NULL)
2448 ObtainSemaphore(sem);
2450 // allocate the destination buffer
2451 dest = reallocVecPooled(pool, dest, destLen, destLen+replen-1);
2453 if(sem != NULL)
2454 ReleaseSemaphore(sem);
2456 else
2457 dest = reallocArbitrateVecPooled(dest, destLen, destLen+replen-1);
2459 if(dest == NULL)
2461 RETURN(NULL);
2462 return NULL;
2465 destIter = dest+destPos;
2466 memcpy(destIter, repstr, replen);
2468 // adjust our loop pointer and destination length
2469 destIter += replen;
2470 destLen += replen-1;
2472 else if(replen == 1)
2473 *destIter++ = *repstr;
2474 else
2475 *destIter++ = d;
2478 s++;
2480 else
2481 break;
2484 if(destHook != NULL)
2486 msg.state = CSV_End;
2487 msg.len = i;
2488 *b = '\0';
2489 CallHookPkt(destHook,&msg,buf);
2491 else
2492 *destIter = '\0';
2494 // let us write the number of conversion errors
2495 // to the proper variable pointer, if wanted
2496 if(numConvErrorsPtr != NULL)
2497 *numConvErrorsPtr = numConvErrors;
2500 // put the final length of our destination buffer
2501 // into the destLenPtr
2502 if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, (ULONG)NULL, attrs)) != NULL)
2503 *destLenPtr = n;
2505 RETURN(dest);
2506 return dest;
2508 #ifdef __AROS__
2509 AROS_LIBFUNC_EXIT
2510 #endif
2513 #ifndef __AROS__
2514 LIBSTUB(CodesetsUTF8ToStrA, STRPTR, REG(a0, struct TagItem *attrs))
2516 #ifdef __MORPHOS__
2517 return CodesetsUTF8ToStrA((struct TagItem *)REG_A0);
2518 #else
2519 return CodesetsUTF8ToStrA(attrs);
2520 #endif
2522 #endif
2524 #ifdef __amigaos4__
2525 LIBSTUBVA(CodesetsUTF8ToStr, STRPTR, ...)
2527 STRPTR res;
2528 VA_LIST args;
2530 VA_START(args, self);
2531 res = CodesetsUTF8ToStrA(VA_ARG(args, struct TagItem *));
2532 VA_END(args);
2534 return res;
2536 #endif
2539 /// CodesetsUTF8CreateA()
2540 // Converts a string and a charset to an UTF8. Returns the UTF8.
2541 // If a destination hook is supplied always return 0.
2542 // If from is NULL, it returns NULL and doesn't call the hook.
2543 #ifdef __AROS__
2544 AROS_LH1(UTF8 *, CodesetsUTF8CreateA,
2545 AROS_LHA(struct TagItem *, attrs, A0),
2546 struct LibraryHeader *, library, 20, Codesets
2549 AROS_LIBFUNC_INIT
2550 #else
2551 UTF8 *LIBFUNC
2552 CodesetsUTF8CreateA(REG(a0, struct TagItem *attrs))
2554 #endif
2555 UTF8 *from;
2556 UTF8 *dest;
2557 ULONG fromLen, *destLenPtr;
2558 ULONG n;
2560 ENTER();
2562 dest = NULL;
2563 n = 0;
2565 from = (UTF8*)GetTagData(CSA_Source, 0, attrs);
2566 fromLen = GetTagData(CSA_SourceLen, from != NULL ? strlen((char *)from) : 0, attrs);
2568 if(from != NULL && fromLen != 0)
2570 struct convertMsg msg;
2571 struct codeset *codeset;
2572 struct Hook *hook;
2573 ULONG destLen;
2574 int i = 0;
2575 UBYTE buf[256];
2576 UBYTE *src, *destPtr = NULL, *b = NULL, c;
2578 if((codeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)) == NULL)
2579 codeset = defaultCodeset(TRUE);
2581 hook = (struct Hook *)GetTagData(CSA_DestHook, 0, attrs);
2582 destLen = GetTagData(CSA_DestLen,0,attrs);
2584 if(hook != NULL)
2586 if(destLen<16 || destLen>sizeof(buf))
2587 destLen = sizeof(buf);
2589 msg.state = CSV_Translating;
2590 b = buf;
2591 i = 0;
2593 else
2595 if((dest = (UTF8*)GetTagData(CSA_Dest, 0, attrs)) != NULL ||
2596 GetTagData(CSA_AllocIfNeeded,TRUE,attrs))
2598 ULONG len, flen;
2600 flen = fromLen;
2601 len = 0;
2602 src = from;
2604 while(((c = *src++) && (flen--)))
2605 len += codeset->table[c].utf8[0];
2607 if(dest == NULL || (destLen<len+1))
2609 APTR pool;
2610 struct SignalSemaphore *sem;
2612 if((pool = (APTR)GetTagData(CSA_Pool, 0, attrs)) != NULL)
2614 if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)) != NULL)
2615 ObtainSemaphore(sem);
2617 // allocate the destination buffer
2618 dest = allocVecPooled(pool,len+1);
2620 if(sem != NULL)
2621 ReleaseSemaphore(sem);
2623 else
2624 dest = allocArbitrateVecPooled(len+1);
2626 destLen = len;
2629 if(dest == NULL)
2631 RETURN(NULL);
2632 return NULL;
2636 destPtr = (UBYTE*)dest;
2639 for(src = from; fromLen && (c = *src); src++, fromLen--)
2641 UTF8* utf8_seq;
2643 for(utf8_seq = &codeset->table[c].utf8[1]; (c = *utf8_seq); utf8_seq++)
2645 if(hook != NULL)
2647 *b++ = c;
2648 i++;
2650 if(i%(destLen-1)==0)
2652 *b = 0;
2653 msg.len = i;
2654 CallHookPkt(hook,&msg,buf);
2656 b = buf;
2657 *b = 0;
2658 i = 0;
2661 else
2663 if(n>=destLen)
2664 break;
2666 *destPtr++ = c;
2669 n++;
2673 if(hook != NULL)
2675 msg.state = CSV_End;
2676 msg.len = i;
2677 *b = 0;
2678 CallHookPkt(hook,&msg,buf);
2680 else
2682 *destPtr = 0;
2686 if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, 0, attrs)))
2687 *destLenPtr = n;
2689 RETURN(dest);
2690 return dest;
2691 #ifdef __AROS__
2692 AROS_LIBFUNC_EXIT
2693 #endif
2696 #ifndef __AROS__
2697 LIBSTUB(CodesetsUTF8CreateA, UTF8*, REG(a0, struct TagItem *attrs))
2699 #ifdef __MORPHOS__
2700 return CodesetsUTF8CreateA((struct TagItem *)REG_A0);
2701 #else
2702 return CodesetsUTF8CreateA(attrs);
2703 #endif
2705 #endif
2707 #ifdef __amigaos4__
2708 LIBSTUBVA(CodesetsUTF8Create, UTF8*, ...)
2710 UTF8 *res;
2711 VA_LIST args;
2713 VA_START(args, self);
2714 res = CodesetsUTF8CreateA(VA_ARG(args, struct TagItem *));
2715 VA_END(args);
2717 return res;
2719 #endif
2722 /// CodesetsIsValidUTF8()
2723 #define GOOD_UCS(c) \
2724 ((c) >= 160 && ((c) & ~0x3ff) != 0xd800 && \
2725 (c) != 0xfeff && (c) != 0xfffe && (c) != 0xffff)
2727 #ifdef __AROS__
2728 AROS_LH1(BOOL, CodesetsIsValidUTF8,
2729 AROS_LHA(STRPTR, s, A0),
2730 struct LibraryHeader *, library, 24, Codesets
2733 AROS_LIBFUNC_INIT
2734 #else
2735 BOOL LIBFUNC
2736 CodesetsIsValidUTF8(REG(a0, STRPTR s))
2738 #endif
2739 STRPTR t = s;
2740 int n;
2742 ENTER();
2744 while((n = parseUtf8(&t)))
2746 if(!GOOD_UCS(n))
2748 RETURN(FALSE);
2749 return FALSE;
2753 RETURN(TRUE);
2754 return TRUE;
2755 #ifdef __AROS__
2756 AROS_LIBFUNC_EXIT
2757 #endif
2760 #ifndef __AROS__
2761 LIBSTUB(CodesetsIsValidUTF8, BOOL, REG(a0, STRPTR s))
2763 #ifdef __MORPHOS__
2764 return CodesetsIsValidUTF8((STRPTR)REG_A0);
2765 #else
2766 return CodesetsIsValidUTF8(s);
2767 #endif
2769 #endif
2772 /// CodesetsConvertStrA()
2773 // Converts a given string from one source Codeset to a given destination
2774 // codeset and returns the convert string
2775 #ifdef __AROS__
2776 AROS_LH1(STRPTR, CodesetsConvertStrA,
2777 AROS_LHA(struct TagItem *, attrs, A0),
2778 struct LibraryHeader *, library, 26, Codesets
2781 AROS_LIBFUNC_INIT
2782 #else
2783 STRPTR LIBFUNC
2784 CodesetsConvertStrA(REG(a0, struct TagItem *attrs))
2786 #endif
2787 STRPTR srcStr = NULL;
2788 STRPTR dstStr = NULL;
2789 ULONG srcLen = 0;
2790 ULONG dstLen = 0;
2792 ENTER();
2794 // get the ptr to the src string we want to convert
2795 // from the source codeset to the dest codeset.
2796 srcStr = (STRPTR)GetTagData(CSA_Source, (ULONG)NULL, attrs);
2797 srcLen = GetTagData(CSA_SourceLen, srcStr != NULL ? strlen(srcStr) : 0, attrs);
2799 if(srcStr != NULL && srcLen > 0)
2801 struct codeset *srcCodeset;
2802 struct codeset *dstCodeset;
2804 // get the pointer to the codeset in which the src string is encoded
2805 if((srcCodeset = (struct codeset *)GetTagData(CSA_SourceCodeset, (ULONG)NULL, attrs)) == NULL)
2806 srcCodeset = defaultCodeset(TRUE);
2808 // get the pointer to the codeset in which the dst string should be encoded
2809 if((dstCodeset = (struct codeset *)GetTagData(CSA_DestCodeset, (ULONG)NULL, attrs)) == NULL)
2810 dstCodeset = defaultCodeset(TRUE);
2812 D(DBF_UTF, "srcCodeset: '%s' dstCodeset: '%s'", srcCodeset->name, dstCodeset->name);
2814 // check that the user didn't supplied the very same codeset
2815 // or otherwise a conversion is not required.
2816 if(srcCodeset != NULL && dstCodeset != NULL && srcCodeset != dstCodeset)
2818 BOOL utf8Create = FALSE;
2819 BOOL strCreate = FALSE;
2820 UTF8 *utf8str;
2821 ULONG utf8strLen = 0;
2822 ULONG *destLenPtr = NULL;
2823 BOOL mapForeignChars;
2824 struct Hook *mapForeignCharsHook;
2826 mapForeignChars = (BOOL)GetTagData(CSA_MapForeignChars, FALSE, attrs);
2827 mapForeignCharsHook = (struct Hook *)GetTagData(CSA_MapForeignCharsHook, (ULONG)NULL, attrs);
2829 // if the source codeset is UTF-8 we don't have to use the UTF8Create()
2830 // function and can directly call the UTF8ToStr() function
2831 if(srcCodeset != CodesetsBase->utf8Codeset)
2833 struct TagItem tags[] = { { CSA_SourceCodeset, (ULONG)srcCodeset },
2834 { CSA_Source, (ULONG)srcStr },
2835 { CSA_SourceLen, srcLen },
2836 { CSA_DestLenPtr, (ULONG)&utf8strLen },
2837 { TAG_DONE, 0 } };
2839 utf8str = CodesetsUTF8CreateA((struct TagItem *)&tags[0]);
2841 utf8Create = TRUE;
2843 else
2845 utf8str = (UTF8 *)srcStr;
2846 utf8strLen = srcLen;
2849 // in case the destination codeset is UTF-8 we don't have to actually
2850 // use the UTF8ToStr() function and can immediately return our
2851 // UTF8 string
2852 if(utf8str != NULL && utf8strLen > 0 && dstCodeset != CodesetsBase->utf8Codeset)
2854 struct TagItem tags[] = { { CSA_DestCodeset, (ULONG)dstCodeset },
2855 { CSA_Source, (ULONG)utf8str },
2856 { CSA_SourceLen, utf8strLen },
2857 { CSA_DestLenPtr, (ULONG)&dstLen },
2858 { CSA_MapForeignChars, mapForeignChars },
2859 { CSA_MapForeignCharsHook, (ULONG)mapForeignCharsHook },
2860 { TAG_DONE, 0 } };
2862 dstStr = CodesetsUTF8ToStrA((struct TagItem *)&tags[0]);
2864 strCreate = TRUE;
2866 else
2868 dstStr = (STRPTR)utf8str;
2869 dstLen = utf8strLen;
2872 D(DBF_UTF, "srcStr: %lx srcLen: %ld dstStr: %lx dstLen: %ld utf8create: %ld strCreate: %ld", srcStr, srcLen,
2873 dstStr, dstLen,
2874 utf8Create,
2875 strCreate);
2877 // if everything was successfull we can go and finalize everything
2878 if(dstStr != NULL && utf8str != NULL)
2880 // as the conversion was a two way pass we have to either free the
2881 // memory of the utf8 string or not
2882 if(utf8Create == TRUE && strCreate == TRUE)
2883 CodesetsFreeA(utf8str, NULL);
2885 // if the user wants to be informed abour the length
2886 // of our destination string we store the length now in the supplied ptr.
2887 if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, (ULONG)NULL, attrs)) != NULL)
2888 *destLenPtr = dstLen;
2890 D(DBF_UTF, "successfully converted string with len %ld", dstLen);
2892 else
2894 W(DBF_ALWAYS, "an error occurred while trying to convert a string");
2896 // free all memory in case the conversion didn't work out
2897 if(utf8Create == TRUE && utf8str != NULL)
2898 CodesetsFreeA(utf8str, NULL);
2900 if(strCreate == TRUE && dstStr != NULL)
2901 CodesetsFreeA(dstStr, NULL);
2903 dstStr = NULL;
2908 RETURN(dstStr);
2909 return dstStr;
2910 #ifdef __AROS__
2911 AROS_LIBFUNC_EXIT
2912 #endif
2915 #ifndef __AROS__
2916 LIBSTUB(CodesetsConvertStrA, STRPTR, REG(a0, struct TagItem *attrs))
2918 #ifdef __MORPHOS__
2919 return CodesetsConvertStrA((struct TagItem *)REG_A0);
2920 #else
2921 return CodesetsConvertStrA(attrs);
2922 #endif
2924 #endif
2926 #ifdef __amigaos4__
2927 LIBSTUBVA(CodesetsConvertStr, STRPTR, ...)
2929 STRPTR res;
2930 VA_LIST args;
2932 VA_START(args, self);
2933 res = CodesetsConvertStrA(VA_ARG(args, struct TagItem *));
2934 VA_END(args);
2936 return res;
2938 #endif
2941 /// CodesetsFreeVecPooledA()
2942 #ifdef __AROS__
2943 AROS_LH3(void, CodesetsFreeVecPooledA,
2944 AROS_LHA(APTR, pool, A0),
2945 AROS_LHA(APTR, mem, A1),
2946 AROS_LHA(struct TagItem *, attrs, A2),
2947 struct LibraryHeader *, library, 25, Codesets
2950 AROS_LIBFUNC_INIT
2951 #else
2952 void LIBFUNC
2953 CodesetsFreeVecPooledA(REG(a0, APTR pool),
2954 REG(a1, APTR mem),
2955 REG(a2, struct TagItem *attrs))
2957 #endif
2958 ENTER();
2960 if(pool && mem)
2962 struct SignalSemaphore *sem;
2964 if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)))
2965 ObtainSemaphore(sem);
2967 freeVecPooled(pool,mem);
2969 if(sem)
2970 ReleaseSemaphore(sem);
2973 LEAVE();
2974 #ifdef __AROS__
2975 AROS_LIBFUNC_EXIT
2976 #endif
2979 #ifndef __AROS__
2980 LIBSTUB(CodesetsFreeVecPooledA, void, REG(a0, APTR pool),
2981 REG(a1, APTR mem),
2982 REG(a2, struct TagItem *attrs))
2984 #ifdef __MORPHOS__
2985 return CodesetsFreeVecPooledA((APTR)REG_A0,(APTR)REG_A1,(struct TagItem *)REG_A2);
2986 #else
2987 return CodesetsFreeVecPooledA(pool, mem, attrs);
2988 #endif
2990 #endif
2992 #ifdef __amigaos4__
2993 LIBSTUBVA(CodesetsFreeVecPooled, void, REG(a0, APTR pool),
2994 REG(a1, APTR mem), ...)
2996 VA_LIST args;
2998 VA_START(args, mem);
2999 CodesetsFreeVecPooledA(pool, mem, VA_ARG(args, struct TagItem *));
3000 VA_END(args);
3002 #endif
3004 /// CodesetsListCreateA()
3005 #ifdef __AROS__
3006 AROS_LH1(struct codesetList *, CodesetsListCreateA,
3007 AROS_LHA(struct TagItem *, attrs, A0),
3008 struct LibraryHeader *, library, 27, Codesets
3011 AROS_LIBFUNC_INIT
3012 #else
3013 struct codesetList *LIBFUNC
3014 CodesetsListCreateA(REG(a0, struct TagItem *attrs))
3016 #endif
3017 struct codesetList *csList = NULL;
3019 ENTER();
3021 ObtainSemaphore(&CodesetsBase->poolSem);
3023 // no matter what, we create a codesets list we will return to the user
3024 if((csList = allocVecPooled(CodesetsBase->pool, sizeof(struct codesetList))))
3026 BOOL scanProgDir = TRUE;
3027 struct TagItem *tstate = attrs;
3028 struct TagItem *tag;
3030 // initialize the new private codeset list and put it into a separate list
3031 NewList((struct List *)csList);
3033 // first we get the path of the directory from which we go
3034 // and scan for charset tables from
3035 while((tag = NextTagItem(&tstate)))
3037 switch(tag->ti_Tag)
3039 case CSA_CodesetDir:
3041 codesetsScanDir(csList, (STRPTR)tag->ti_Data);
3043 scanProgDir = FALSE;
3045 break;
3047 case CSA_CodesetFile:
3049 codesetsReadTable(csList, (STRPTR)tag->ti_Data);
3051 scanProgDir = FALSE;
3053 break;
3055 case CSA_SourceCodeset:
3057 struct codeset *cs = (struct codeset *)tag->ti_Data;
3059 AddTail((struct List *)csList, (struct Node *)&cs->node);
3061 scanProgDir = FALSE;
3063 break;
3067 // in case the user also wants us to scan PROGDIR:
3068 // we do so
3069 if(scanProgDir == TRUE)
3070 codesetsScanDir(csList, "PROGDIR:Charsets");
3073 ReleaseSemaphore(&CodesetsBase->poolSem);
3075 RETURN(csList);
3076 return csList;
3077 #ifdef __AROS__
3078 AROS_LIBFUNC_EXIT
3079 #endif
3082 #ifndef __AROS__
3083 LIBSTUB(CodesetsListCreateA, struct codesetList *, REG(a0, struct TagItem *attrs))
3085 #ifdef __MORPHOS__
3086 return CodesetsListCreateA((struct TagItem *)REG_A0);
3087 #else
3088 return CodesetsListCreateA(attrs);
3089 #endif
3091 #endif
3093 #ifdef __amigaos4__
3094 LIBSTUBVA(CodesetsListCreate, struct codesetList *, ...)
3096 struct codesetList *res;
3097 VA_LIST args;
3099 VA_START(args, self);
3100 res = CodesetsListCreateA(VA_ARG(args, struct TagItem *));
3101 VA_END(args);
3103 return res;
3105 #endif
3108 /// CodesetsListDeleteA()
3109 #ifdef __AROS__
3110 AROS_LH1(BOOL, CodesetsListDeleteA,
3111 AROS_LHA(struct TagItem *, attrs, A0),
3112 struct LibraryHeader *, library, 28, Codesets
3115 AROS_LIBFUNC_INIT
3116 #else
3117 BOOL LIBFUNC
3118 CodesetsListDeleteA(REG(a0, struct TagItem *attrs))
3120 #endif
3121 BOOL result = FALSE;
3122 ENTER();
3124 ObtainSemaphore(&CodesetsBase->poolSem);
3126 if(attrs != NULL)
3128 BOOL freeCodesets;
3129 struct TagItem *tstate = attrs;
3130 struct TagItem *tag;
3132 // check if the caller wants us also to free the codesets
3133 freeCodesets = (BOOL)GetTagData(CSA_FreeCodesets, TRUE, attrs);
3135 // now we iterate through or tagItems and see what the
3136 // user wants to remove from the list
3137 while((tag = NextTagItem(&tstate)))
3139 switch(tag->ti_Tag)
3141 case CSA_CodesetList:
3143 struct codesetList *csList = (struct codesetList *)tag->ti_Data;
3145 if(csList)
3147 // cleanup the codesets within the list
3148 if(freeCodesets)
3149 codesetsCleanup(csList);
3151 // then free the list itself
3152 freeArbitrateVecPooled(csList);
3154 result = TRUE;
3161 ReleaseSemaphore(&CodesetsBase->poolSem);
3163 RETURN(result);
3164 return result;
3165 #ifdef __AROS__
3166 AROS_LIBFUNC_EXIT
3167 #endif
3170 #ifndef __AROS__
3171 LIBSTUB(CodesetsListDeleteA, BOOL, REG(a0, struct TagItem *attrs))
3173 #ifdef __MORPHOS__
3174 return CodesetsListDeleteA((struct TagItem *)REG_A0);
3175 #else
3176 return CodesetsListDeleteA(attrs);
3177 #endif
3179 #endif
3181 #ifdef __amigaos4__
3182 LIBSTUBVA(CodesetsListDelete, BOOL, ...)
3184 BOOL result;
3185 VA_LIST args;
3187 VA_START(args, self);
3188 result = CodesetsListDeleteA(VA_ARG(args, struct TagItem *));
3189 VA_END(args);
3191 return result;
3193 #endif
3196 /// CodesetsListAddA()
3197 #ifdef __AROS__
3198 AROS_LH2(BOOL, CodesetsListAddA,
3199 AROS_LHA(struct codesetList *, csList, A0),
3200 AROS_LHA(struct TagItem *, attrs, A1),
3201 struct LibraryHeader *, library, 29, Codesets
3204 AROS_LIBFUNC_INIT
3205 #else
3206 BOOL LIBFUNC
3207 CodesetsListAddA(REG(a0, struct codesetList *csList),
3208 REG(a1, struct TagItem *attrs))
3210 #endif
3211 BOOL result = FALSE;
3212 ENTER();
3214 ObtainSemaphore(&CodesetsBase->poolSem);
3216 if(csList != NULL && attrs != NULL)
3218 struct TagItem *tstate = attrs;
3219 struct TagItem *tag;
3221 // now we iterate through or tagItems and see if the user
3222 // wants to scan a whole directory or just adds a file.
3223 while((tag = NextTagItem(&tstate)))
3225 switch(tag->ti_Tag)
3227 case CSA_CodesetDir:
3229 codesetsScanDir(csList, (STRPTR)tag->ti_Data);
3230 result = TRUE;
3232 break;
3234 case CSA_CodesetFile:
3236 codesetsReadTable(csList, (STRPTR)tag->ti_Data);
3237 result = TRUE;
3239 break;
3241 case CSA_SourceCodeset:
3243 struct codeset *cs = (struct codeset *)tag->ti_Data;
3245 AddTail((struct List *)csList, (struct Node *)&cs->node);
3246 result = TRUE;
3248 break;
3253 ReleaseSemaphore(&CodesetsBase->poolSem);
3255 RETURN(result);
3256 return result;
3257 #ifdef __AROS__
3258 AROS_LIBFUNC_EXIT
3259 #endif
3262 #ifndef __AROS__
3263 LIBSTUB(CodesetsListAddA, BOOL, REG(a0, struct codesetList *csList), REG(a1, struct TagItem *attrs))
3265 #ifdef __MORPHOS__
3266 return CodesetsListAddA((struct codesetList *)REG_A0, (struct TagItem *)REG_A1);
3267 #else
3268 return CodesetsListAddA(csList, attrs);
3269 #endif
3271 #endif
3273 #ifdef __amigaos4__
3274 LIBSTUBVA(CodesetsListAdd, BOOL, struct codesetList *csList, ...)
3276 BOOL result;
3277 VA_LIST args;
3279 VA_START(args, csList);
3280 result = CodesetsListAddA(csList, VA_ARG(args, struct TagItem *));
3281 VA_END(args);
3283 return result;
3285 #endif
3288 /// CodesetsListRemoveA()
3289 #ifdef __AROS__
3290 AROS_LH1(BOOL, CodesetsListRemoveA,
3291 AROS_LHA(struct TagItem *, attrs, A0),
3292 struct LibraryHeader *, library, 30, Codesets
3295 AROS_LIBFUNC_INIT
3296 #else
3297 BOOL LIBFUNC
3298 CodesetsListRemoveA(REG(a0, struct TagItem *attrs))
3300 #endif
3301 BOOL result = FALSE;
3302 ENTER();
3304 ObtainSemaphore(&CodesetsBase->poolSem);
3306 if(attrs != NULL)
3308 BOOL freeCodesets;
3309 struct TagItem *tstate = attrs;
3310 struct TagItem *tag;
3312 // check if the caller wants us also to free the codesets
3313 freeCodesets = (BOOL)GetTagData(CSA_FreeCodesets, TRUE, attrs);
3315 // now we iterate through or tagItems and see what the
3316 // user wants to remove from the list
3317 while((tag = NextTagItem(&tstate)))
3319 switch(tag->ti_Tag)
3321 case CSA_SourceCodeset:
3323 struct codeset *cs = (struct codeset *)tag->ti_Data;
3325 if(cs)
3327 struct MinNode *mstate = &cs->node;
3329 // before we actually remove the node from its list, we
3330 // have to make sure it isn't part of our internal codesets list
3331 while(mstate->mln_Succ)
3332 mstate = mstate->mln_Succ;
3334 if(mstate != CodesetsBase->codesets.list.mlh_Tail)
3336 Remove((struct Node *)&cs->node);
3338 // free all codesets data if requested.
3339 if(freeCodesets == TRUE)
3341 if(cs->name) freeArbitrateVecPooled(cs->name);
3342 if(cs->alt_name) freeArbitrateVecPooled(cs->alt_name);
3343 if(cs->characterization) freeArbitrateVecPooled(cs->characterization);
3345 freeArbitrateVecPooled(cs);
3348 result = TRUE;
3350 else
3351 W(DBF_ALWAYS, "user tried to remove an internal codesets!");
3354 break;
3359 ReleaseSemaphore(&CodesetsBase->poolSem);
3361 RETURN(result);
3362 return result;
3363 #ifdef __AROS__
3364 AROS_LIBFUNC_EXIT
3365 #endif
3368 #ifndef __AROS__
3369 LIBSTUB(CodesetsListRemoveA, BOOL, REG(a0, struct TagItem *attrs))
3371 #ifdef __MORPHOS__
3372 return CodesetsListRemoveA((struct TagItem *)REG_A0);
3373 #else
3374 return CodesetsListRemoveA(attrs);
3375 #endif
3377 #endif
3379 #ifdef __amigaos4__
3380 LIBSTUBVA(CodesetsListRemove, BOOL, ...)
3382 BOOL result;
3383 VA_LIST args;
3385 VA_START(args, self);
3386 result = CodesetsListRemoveA(VA_ARG(args, struct TagItem *));
3387 VA_END(args);
3389 return result;
3391 #endif
3395 /**************************************************************************/