1 /***************************************************************************
3 codesets.library - Amiga shared library for handling different codesets
4 Copyright (C) 2001-2005 by Alfonso [alfie] Ranieri <alforan@tin.it>.
5 Copyright (C) 2005-2008 by codesets.library Open Source Team
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 codesets.library project: http://sourceforge.net/projects/codesetslib/
19 Most of the code included in this file was relicensed from GPL to LGPL
20 from the source code of SimpleMail (http://www.sf.net/projects/simplemail)
21 with full permissions by its authors.
25 ***************************************************************************/
29 #include <clib/alib_protos.h>
31 #include <diskfont/glyph.h>
32 #include <diskfont/diskfonttag.h>
33 #include <proto/diskfont.h>
38 #include <proto/keymap.h>
39 #include <proto/locale.h>
42 #include "codesets_table.h"
43 #include "convertUTF.h"
44 #include "codepages.h"
47 #include "SDI_stdarg.h"
52 /**************************************************************************/
55 // search a sorted array in O(log n) e.g.
56 // BIN_SEARCH(strings,0,sizeof(strings)/sizeof(strings[0]),strcmp(key,array[mid]),res);
57 #define BIN_SEARCH(array,low,high,compare,result) \
61 int m = (low+high)/2;\
66 if (!d){ result = &array[m]; break; }\
67 if (d < 0) h = m - 1;\
76 mystrdup(const char *str
)
86 if((len
= strlen(str
)) > 0)
88 if((newStr
= allocArbitrateVecPooled(len
+1)) != NULL
)
89 strlcpy(newStr
, str
, len
+1);
99 mystrndup(const char *str1
, int n
)
105 if((dest
= allocArbitrateVecPooled(n
+1)) != NULL
)
108 strlcpy(dest
, str1
, n
+1);
121 readLine(BPTR fh
, char *buf
, ULONG size
)
127 if((c
= FGets(fh
, buf
, size
)) == NULL
)
135 if(*c
== '\n' || *c
== '\r')
147 static const char * getConfigItem(const char *buf
, const char *item
, int len
)
151 if(strnicmp(buf
, item
, len
) == 0)
158 while((c
= *buf
) != '\0' && isspace(c
))
170 while((c
= *buf
) != '\0' && isspace(c
))
183 parseUtf8(STRPTR
*ps
)
207 if((s
[1] & 0xc0)!=0x80)
215 RETURN(((s
[0] & 0x1f)<<6) | (s
[1] & 0x3f));
216 return ((s
[0] & 0x1f)<<6) | (s
[1] & 0x3f);
253 wc
= *s
++ & ((1<<(7-n
))-1);
257 if((*s
& 0xc0) != 0x80)
263 wc
= (wc
<< 6) | (*s
++ & 0x3f);
266 if(wc
< (1 << (5 * n
- 4)))
281 countCodesets(struct codesetList
*csList
)
283 struct MinNode
*node
, *succ
;
286 for(node
= csList
->list
.mlh_Head
, num
= 0; (succ
= node
->mln_Succ
); node
= succ
)
294 // in case some UTF8 sequences can not be converted during CodesetsUTF8ToStrA(), this
295 // function is used to replace these unknown sequences with lookalike characters that
296 // still make the text more readable. For more replacement see
297 // http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
299 // The conversion table in this function is partly borrowed from the awebcharset plugin
300 // written by Frank Weber. See http://cvs.sunsite.dk/viewcvs.cgi/aweb/plugins/charset/awebcharset.c
302 struct UTF8Replacement
304 const char *utf8
; // the original UTF8 string we are going to replace
305 const int utf8len
; // the length of the UTF8 string
306 const char *rep
; // pointer to the replacement string
307 const int replen
; // the length of the replacement string (minus for signalling an UTF8 string)
310 static int compareUTF8Replacements(const void *p1
, const void *p2
)
312 struct UTF8Replacement
*key
= (struct UTF8Replacement
*)p1
;
313 struct UTF8Replacement
*rep
= (struct UTF8Replacement
*)p2
;
316 // compare the length first, after that compare the strings
317 cmp
= key
->utf8len
- rep
->utf8len
;
319 cmp
= memcmp(key
->utf8
, rep
->utf8
, key
->utf8len
);
324 static int mapUTF8toASCII(const char **dst
, const unsigned char *src
, const int utf8len
)
327 struct UTF8Replacement key
= { (char *)src
, utf8len
, NULL
, 0 };
328 struct UTF8Replacement
*rep
;
330 static struct UTF8Replacement
const utf8map
[] =
332 // U+0100 ... U+017F (Latin Extended-A)
333 { "\xC4\x80", 2, "A", 1 }, // U+0100 -> A (LATIN CAPITAL LETTER A WITH MACRON)
334 { "\xC4\x81", 2, "a", 1 }, // U+0101 -> a (LATIN SMALL LETTER A WITH MACRON)
335 { "\xC4\x82", 2, "A", 1 }, // U+0102 -> A (LATIN CAPITAL LETTER A WITH BREVE)
336 { "\xC4\x83", 2, "a", 1 }, // U+0103 -> a (LATIN SMALL LETTER A WITH BREVE)
337 { "\xC4\x84", 2, "A", 1 }, // U+0104 -> A (LATIN CAPITAL LETTER A WITH OGONEK)
338 { "\xC4\x85", 2, "a", 1 }, // U+0105 -> a (LATIN SMALL LETTER A WITH OGONEK)
339 { "\xC4\x86", 2, "C", 1 }, // U+0106 -> C (LATIN CAPITAL LETTER C WITH ACUTE)
340 { "\xC4\x87", 2, "c", 1 }, // U+0107 -> c (LATIN SMALL LETTER C WITH ACUTE)
341 { "\xC4\x88", 2, "C", 1 }, // U+0108 -> C (LATIN CAPITAL LETTER C WITH CIRCUMFLEX)
342 { "\xC4\x89", 2, "c", 1 }, // U+0109 -> c (LATIN SMALL LETTER C WITH CIRCUMFLEX)
343 { "\xC4\x8A", 2, "C", 1 }, // U+010A -> C (LATIN CAPITAL LETTER C WITH DOT ABOVE)
344 { "\xC4\x8B", 2, "c", 1 }, // U+010B -> c (LATIN SMALL LETTER C WITH DOT ABOVE)
345 { "\xC4\x8C", 2, "C", 1 }, // U+010C -> C (LATIN CAPITAL LETTER C WITH CARON)
346 { "\xC4\x8D", 2, "c", 1 }, // U+010D -> c (LATIN SMALL LETTER C WITH CARON)
347 { "\xC4\x8E", 2, "D", 1 }, // U+010E -> D (LATIN CAPITAL LETTER D WITH CARON)
348 { "\xC4\x8F", 2, "d", 1 }, // U+010F -> d (LATIN SMALL LETTER D WITH CARON)
349 { "\xC4\x90", 2, "D", 1 }, // U+0110 -> D (LATIN CAPITAL LETTER D WITH STROKE)
350 { "\xC4\x91", 2, "d", 1 }, // U+0111 -> d (LATIN SMALL LETTER D WITH STROKE)
351 { "\xC4\x92", 2, "E", 1 }, // U+0112 -> E (LATIN CAPITAL LETTER E WITH MACRON)
352 { "\xC4\x93", 2, "e", 1 }, // U+0113 -> e (LATIN SMALL LETTER E WITH MACRON)
353 { "\xC4\x94", 2, "E", 1 }, // U+0114 -> E (LATIN CAPITAL LETTER E WITH BREVE)
354 { "\xC4\x95", 2, "e", 1 }, // U+0115 -> e (LATIN SMALL LETTER E WITH BREVE)
355 { "\xC4\x96", 2, "E", 1 }, // U+0116 -> E (LATIN CAPITAL LETTER E WITH DOT ABOVE)
356 { "\xC4\x97", 2, "e", 1 }, // U+0117 -> e (LATIN SMALL LETTER E WITH DOT ABOVE)
357 { "\xC4\x98", 2, "E", 1 }, // U+0118 -> E (LATIN CAPITAL LETTER E WITH OGONEK)
358 { "\xC4\x99", 2, "e", 1 }, // U+0119 -> e (LATIN SMALL LETTER E WITH OGONEK)
359 { "\xC4\x9A", 2, "E", 1 }, // U+011A -> E (LATIN CAPITAL LETTER E WITH CARON)
360 { "\xC4\x9B", 2, "e", 1 }, // U+011B -> e (LATIN SMALL LETTER E WITH CARON)
361 { "\xC4\x9C", 2, "G", 1 }, // U+011C -> G (LATIN CAPITAL LETTER G WITH CIRCUMFLEX)
362 { "\xC4\x9D", 2, "g", 1 }, // U+011D -> g (LATIN SMALL LETTER G WITH CIRCUMFLEX)
363 { "\xC4\x9E", 2, "G", 1 }, // U+011E -> G (LATIN CAPITAL LETTER G WITH BREVE)
364 { "\xC4\x9F", 2, "g", 1 }, // U+011F -> g (LATIN SMALL LETTER G WITH BREVE)
365 { "\xC4\xA0", 2, "G", 1 }, // U+0120 -> G (LATIN CAPITAL LETTER G WITH DOT ABOVE)
366 { "\xC4\xA1", 2, "g", 1 }, // U+0121 -> g (LATIN SMALL LETTER G WITH DOT ABOVE)
367 { "\xC4\xA2", 2, "G", 1 }, // U+0122 -> G (LATIN CAPITAL LETTER G WITH CEDILLA)
368 { "\xC4\xA3", 2, "g", 1 }, // U+0123 -> g (LATIN SMALL LETTER G WITH CEDILLA)
369 { "\xC4\xA4", 2, "H", 1 }, // U+0124 -> H (LATIN CAPITAL LETTER H WITH CIRCUMFLEX)
370 { "\xC4\xA5", 2, "h", 1 }, // U+0125 -> h (LATIN SMALL LETTER H WITH CIRCUMFLEX)
371 { "\xC4\xA6", 2, "H", 1 }, // U+0126 -> H (LATIN CAPITAL LETTER H WITH STROKE)
372 { "\xC4\xA7", 2, "h", 1 }, // U+0127 -> h (LATIN SMALL LETTER H WITH STROKE)
373 { "\xC4\xA8", 2, "I", 1 }, // U+0128 -> I (LATIN CAPITAL LETTER I WITH TILDE)
374 { "\xC4\xA9", 2, "i", 1 }, // U+0129 -> i (LATIN SMALL LETTER I WITH TILDE)
375 { "\xC4\xAA", 2, "I", 1 }, // U+012A -> I (LATIN CAPITAL LETTER I WITH MACRON)
376 { "\xC4\xAB", 2, "i", 1 }, // U+012B -> i (LATIN SMALL LETTER I WITH MACRON)
377 { "\xC4\xAC", 2, "I", 1 }, // U+012C -> I (LATIN CAPITAL LETTER I WITH BREVE)
378 { "\xC4\xAD", 2, "i", 1 }, // U+012D -> i (LATIN SMALL LETTER I WITH BREVE)
379 { "\xC4\xAE", 2, "I", 1 }, // U+012E -> I (LATIN CAPITAL LETTER I WITH OGONEK)
380 { "\xC4\xAF", 2, "i", 1 }, // U+012F -> i (LATIN SMALL LETTER I WITH OGONEK)
381 { "\xC4\xB0", 2, "I", 1 }, // U+0130 -> I (LATIN CAPITAL LETTER I WITH DOT ABOVE)
382 { "\xC4\xB1", 2, "i", 1 }, // U+0131 -> i (LATIN SMALL LETTER DOTLESS I)
383 { "\xC4\xB2", 2, "Ij", 2 }, // U+0132 -> Ij (LATIN CAPITAL LIGATURE IJ)
384 { "\xC4\xB3", 2, "ij", 2 }, // U+0133 -> ij (LATIN SMALL LIGATURE IJ)
385 { "\xC4\xB4", 2, "J", 1 }, // U+0134 -> J (LATIN CAPITAL LETTER J WITH CIRCUMFLEX)
386 { "\xC4\xB5", 2, "j", 1 }, // U+0135 -> j (LATIN SMALL LETTER J WITH CIRCUMFLEX)
387 { "\xC4\xB6", 2, "K", 1 }, // U+0136 -> K (LATIN CAPITAL LETTER K WITH CEDILLA)
388 { "\xC4\xB7", 2, "k", 1 }, // U+0137 -> k (LATIN SMALL LETTER K WITH CEDILLA)
389 { "\xC4\xB8", 2, "k", 1 }, // U+0138 -> k (LATIN SMALL LETTER KRA)
390 { "\xC4\xB9", 2, "L", 1 }, // U+0139 -> L (LATIN CAPITAL LETTER L WITH ACUTE)
391 { "\xC4\xBA", 2, "l", 1 }, // U+013A -> l (LATIN SMALL LETTER L WITH ACUTE)
392 { "\xC4\xBB", 2, "L", 1 }, // U+013B -> L (LATIN CAPITAL LETTER L WITH CEDILLA)
393 { "\xC4\xBC", 2, "l", 1 }, // U+013C -> l (LATIN SMALL LETTER L WITH CEDILLA)
394 { "\xC4\xBD", 2, "L", 1 }, // U+013D -> L (LATIN CAPITAL LETTER L WITH CARON)
395 { "\xC4\xBE", 2, "l", 1 }, // U+013E -> l (LATIN SMALL LETTER L WITH CARON)
396 { "\xC4\xBF", 2, "L", 1 }, // U+013F -> L (LATIN CAPITAL LETTER L WITH MIDDLE DOT)
397 { "\xC5\x80", 2, "l", 1 }, // U+0140 -> l (LATIN SMALL LETTER L WITH MIDDLE DOT)
398 { "\xC5\x81", 2, "L", 1 }, // U+0141 -> L (LATIN CAPITAL LETTER L WITH STROKE)
399 { "\xC5\x82", 2, "l", 1 }, // U+0142 -> l (LATIN SMALL LETTER L WITH STROKE)
400 { "\xC5\x83", 2, "N", 1 }, // U+0143 -> N (LATIN CAPITAL LETTER N WITH ACUTE)
401 { "\xC5\x84", 2, "n", 1 }, // U+0144 -> n (LATIN SMALL LETTER N WITH ACUTE)
402 { "\xC5\x85", 2, "N", 1 }, // U+0145 -> N (LATIN CAPITAL LETTER N WITH CEDILLA)
403 { "\xC5\x86", 2, "n", 1 }, // U+0146 -> n (LATIN SMALL LETTER N WITH CEDILLA)
404 { "\xC5\x87", 2, "N", 1 }, // U+0147 -> N (LATIN CAPITAL LETTER N WITH CARON)
405 { "\xC5\x88", 2, "n", 1 }, // U+0148 -> n (LATIN SMALL LETTER N WITH CARON)
406 { "\xC5\x89", 2, "'n", 2 }, // U+0149 -> 'n (LATIN SMALL LETTER N PRECEDED BY APOSTROPHE)
407 { "\xC5\x8A", 2, "Ng", 2 }, // U+014A -> Ng (LATIN CAPITAL LETTER ENG)
408 { "\xC5\x8B", 2, "ng", 2 }, // U+014B -> ng (LATIN SMALL LETTER ENG)
409 { "\xC5\x8C", 2, "O", 1 }, // U+014C -> O (LATIN CAPITAL LETTER O WITH MACRON)
410 { "\xC5\x8D", 2, "o", 1 }, // U+014D -> o (LATIN SMALL LETTER O WITH MACRON)
411 { "\xC5\x8E", 2, "O", 1 }, // U+014E -> O (LATIN CAPITAL LETTER O WITH BREVE)
412 { "\xC5\x8F", 2, "o", 1 }, // U+014F -> o (LATIN SMALL LETTER O WITH BREVE)
413 { "\xC5\x90", 2, "O", 1 }, // U+0150 -> O (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE)
414 { "\xC5\x91", 2, "o", 1 }, // U+0151 -> o (LATIN SMALL LETTER O WITH DOUBLE ACUTE)
415 { "\xC5\x92", 2, "Oe", 2 }, // U+0152 -> Oe (LATIN CAPITAL LIGATURE OE)
416 { "\xC5\x93", 2, "oe", 2 }, // U+0153 -> oe (LATIN SMALL LIGATURE OE)
417 { "\xC5\x94", 2, "R", 1 }, // U+0154 -> R (LATIN CAPITAL LETTER R WITH ACUTE)
418 { "\xC5\x95", 2, "r", 1 }, // U+0155 -> r (LATIN SMALL LETTER R WITH ACUTE)
419 { "\xC5\x96", 2, "R", 1 }, // U+0156 -> R (LATIN CAPITAL LETTER R WITH CEDILLA)
420 { "\xC5\x97", 2, "r", 1 }, // U+0157 -> r (LATIN SMALL LETTER R WITH CEDILLA)
421 { "\xC5\x98", 2, "R", 1 }, // U+0158 -> R (LATIN CAPITAL LETTER R WITH CARON)
422 { "\xC5\x99", 2, "r", 1 }, // U+0159 -> r (LATIN SMALL LETTER R WITH CARON)
423 { "\xC5\x9A", 2, "S", 1 }, // U+015A -> S (LATIN CAPITAL LETTER S WITH ACUTE)
424 { "\xC5\x9B", 2, "s", 1 }, // U+015B -> s (LATIN SMALL LETTER S WITH ACUTE)
425 { "\xC5\x9C", 2, "S", 1 }, // U+015C -> S (LATIN CAPITAL LETTER S WITH CIRCUMFLEX)
426 { "\xC5\x9D", 2, "s", 1 }, // U+015D -> s (LATIN SMALL LETTER S WITH CIRCUMFLEX)
427 { "\xC5\x9E", 2, "S", 1 }, // U+015E -> S (LATIN CAPITAL LETTER S WITH CEDILLA)
428 { "\xC5\x9F", 2, "s", 1 }, // U+015F -> s (LATIN SMALL LETTER S WITH CEDILLA)
429 { "\xC5\xA0", 2, "S", 1 }, // U+0160 -> S (LATIN CAPITAL LETTER S WITH CARON)
430 { "\xC5\xA1", 2, "s", 1 }, // U+0161 -> s (LATIN SMALL LETTER S WITH CARON)
431 { "\xC5\xA2", 2, "T", 1 }, // U+0162 -> T (LATIN CAPITAL LETTER T WITH CEDILLA)
432 { "\xC5\xA3", 2, "t", 1 }, // U+0163 -> t (LATIN SMALL LETTER T WITH CEDILLA)
433 { "\xC5\xA4", 2, "T", 1 }, // U+0164 -> T (LATIN CAPITAL LETTER T WITH CARON)
434 { "\xC5\xA5", 2, "t", 1 }, // U+0165 -> t (LATIN SMALL LETTER T WITH CARON)
435 { "\xC5\xA6", 2, "T", 1 }, // U+0166 -> T (LATIN CAPITAL LETTER T WITH STROKE)
436 { "\xC5\xA7", 2, "t", 1 }, // U+0167 -> t (LATIN SMALL LETTER T WITH STROKE)
437 { "\xC5\xA8", 2, "U", 1 }, // U+0168 -> U (LATIN CAPITAL LETTER U WITH TILDE)
438 { "\xC5\xA9", 2, "u", 1 }, // U+0169 -> u (LATIN SMALL LETTER U WITH TILDE)
439 { "\xC5\xAA", 2, "U", 1 }, // U+016A -> U (LATIN CAPITAL LETTER U WITH MACRON)
440 { "\xC5\xAB", 2, "u", 1 }, // U+016B -> u (LATIN SMALL LETTER U WITH MACRON)
441 { "\xC5\xAC", 2, "U", 1 }, // U+016C -> U (LATIN CAPITAL LETTER U WITH BREVE)
442 { "\xC5\xAD", 2, "u", 1 }, // U+016D -> u (LATIN SMALL LETTER U WITH BREVE)
443 { "\xC5\xAE", 2, "U", 1 }, // U+016E -> U (LATIN CAPITAL LETTER U WITH RING ABOVE)
444 { "\xC5\xAF", 2, "u", 1 }, // U+016F -> u (LATIN SMALL LETTER U WITH RING ABOVE)
445 { "\xC5\xB0", 2, "U", 1 }, // U+0170 -> U (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE)
446 { "\xC5\xB1", 2, "u", 1 }, // U+0171 -> u (LATIN SMALL LETTER U WITH DOUBLE ACUTE)
447 { "\xC5\xB2", 2, "U", 1 }, // U+0172 -> U (LATIN CAPITAL LETTER U WITH OGONEK)
448 { "\xC5\xB3", 2, "u", 1 }, // U+0173 -> u (LATIN SMALL LETTER U WITH OGONEK)
449 { "\xC5\xB4", 2, "W", 1 }, // U+0174 -> W (LATIN CAPITAL LETTER W WITH CIRCUMFLEX)
450 { "\xC5\xB5", 2, "w", 1 }, // U+0175 -> w (LATIN SMALL LETTER W WITH CIRCUMFLEX)
451 { "\xC5\xB6", 2, "Y", 1 }, // U+0176 -> Y (LATIN CAPITAL LETTER Y WITH CIRCUMFLEX)
452 { "\xC5\xB7", 2, "y", 1 }, // U+0177 -> y (LATIN SMALL LETTER Y WITH CIRCUMFLEX)
453 { "\xC5\xB8", 2, "Y", 1 }, // U+0178 -> Y (LATIN CAPITAL LETTER Y WITH DIAERESIS)
454 { "\xC5\xB9", 2, "Z", 1 }, // U+0179 -> Z (LATIN CAPITAL LETTER Z WITH ACUTE)
455 { "\xC5\xBA", 2, "z", 1 }, // U+017A -> z (LATIN SMALL LETTER Z WITH ACUTE)
456 { "\xC5\xBB", 2, "Z", 1 }, // U+017B -> Z (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
457 { "\xC5\xBC", 2, "z", 1 }, // U+017C -> z (LATIN SMALL LETTER Z WITH DOT ABOVE)
458 { "\xC5\xBD", 2, "Z", 1 }, // U+017D -> Z (LATIN CAPITAL LETTER Z WITH CARON)
459 { "\xC5\xBE", 2, "z", 1 }, // U+017E -> z (LATIN SMALL LETTER Z WITH CARON)
460 { "\xC5\xBF", 2, "s", 1 }, // U+017F -> s (LATIN SMALL LETTER LONG S
462 // U+2000 ... U+206F (General Punctuation)
463 { "\xE2\x80\x90", 3, "-", 1 }, // U+2010 -> - (HYPHEN)
464 { "\xE2\x80\x91", 3, "-", 1 }, // U+2011 -> - (NON-BREAKING HYPHEN)
465 { "\xE2\x80\x92", 3, "--", 2 }, // U+2012 -> -- (FIGURE DASH)
466 { "\xE2\x80\x93", 3, "--", 2 }, // U+2013 -> -- (EN DASH)
467 { "\xE2\x80\x94", 3, "---", 3 }, // U+2014 -> --- (EM DASH)
468 { "\xE2\x80\x95", 3, "---", 3 }, // U+2015 -> --- (HORIZONTAL BAR)
469 { "\xE2\x80\x96", 3, "||", 2 }, // U+2016 -> || (DOUBLE VERTICAL LINE)
470 { "\xE2\x80\x97", 3, "_", 1 }, // U+2017 -> _ (DOUBLE LOW LINE)
471 { "\xE2\x80\x98", 3, "`", 1 }, // U+2018 -> ` (LEFT SINGLE QUOTATION MARK)
472 { "\xE2\x80\x99", 3, "'", 1 }, // U+2019 -> ' (RIGHT SINGLE QUOTATION MARK)
473 { "\xE2\x80\x9A", 3, ",", 1 }, // U+201A -> , (SINGLE LOW-9 QUOTATION MARK)
474 { "\xE2\x80\x9B", 3, "'", 1 }, // U+201B -> ' (SINGLE HIGH-REVERSED-9 QUOTATION MARK)
475 { "\xE2\x80\x9C", 3, "\"", 1 }, // U+201C -> " (LEFT DOUBLE QUOTATION MARK)
476 { "\xE2\x80\x9D", 3, "\"", 1 }, // U+201D -> " (RIGHT DOUBLE QUOTATION MARK)
477 { "\xE2\x80\x9E", 3, ",,", 2 }, // U+201E -> ,, (DOUBLE LOW-9 QUOTATION MARK)
478 { "\xE2\x80\x9F", 3, "``", 2 }, // U+201F -> `` (DOUBLE HIGH-REVERSED-9 QUOTATION MARK)
479 { "\xE2\x80\xA0", 3, "+", 1 }, // U+2020 -> + (DAGGER)
480 { "\xE2\x80\xA1", 3, "+", 1 }, // U+2021 -> + (DOUBLE DAGGER)
481 { "\xE2\x80\xA2", 3, "\xC2\xB7", -2 }, // U+2022 -> U+00B7 (BULLET) -> (MIDDLE POINT)
482 { "\xE2\x80\xA3", 3, ".", 1 }, // U+2023 -> . (TRIANGULAR BULLET)
483 { "\xE2\x80\xA4", 3, ".", 1 }, // U+2024 -> . (ONE DOT LEADER)
484 { "\xE2\x80\xA5", 3, "..", 2 }, // U+2025 -> .. (TWO DOT LEADER)
485 { "\xE2\x80\xA6", 3, "...", 3 }, // U+2026 -> ... (HORIZONTAL ELLIPSIS)
486 { "\xE2\x80\xA7", 3, "\xC2\xB7", -2 }, // U+2027 -> U+00B7 (HYPHENATION POINT) -> (MIDDLE POINT)
487 { "\xE2\x80\xB0", 3, "%.", 2 }, // U+2030 -> %. (PER MILLE SIGN)
488 { "\xE2\x80\xB1", 3, "%..", 3 }, // U+2031 -> %.. (PER TEN THOUSAND SIGN)
489 { "\xE2\x80\xB2", 3, "'", 1 }, // U+2032 -> ` (PRIME)
490 { "\xE2\x80\xB3", 3, "''", 2 }, // U+2033 -> '' (DOUBLE PRIME)
491 { "\xE2\x80\xB4", 3, "'''", 3 }, // U+2034 -> ''' (TRIPLE PRIME)
492 { "\xE2\x80\xB5", 3, "`", 1 }, // U+2035 -> ` (REVERSED PRIME)
493 { "\xE2\x80\xB6", 3, "``", 2 }, // U+2036 -> `` (REVERSED DOUBLE PRIME)
494 { "\xE2\x80\xB7", 3, "```", 3 }, // U+2037 -> ``` (REVERSED TRIPLE PRIME)
495 { "\xE2\x80\xB8", 3, "^", 1 }, // U+2038 -> ^ (CARET)
496 { "\xE2\x80\xB9", 3, "<", 1 }, // U+2039 -> < (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
497 { "\xE2\x80\xBA", 3, ">", 1 }, // U+203A -> > (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
498 { "\xE2\x80\xBB", 3, "\xC3\x97", -2 }, // U+203B -> U+00D7 (REFERENCE MARK) -> (MULTIPLICATION SIGN)
499 { "\xE2\x80\xBC", 3, "!!", 2 }, // U+203C -> !! (DOUBLE EXCLAMATION MARK)
500 { "\xE2\x80\xBD", 3, "?", 1 }, // U+203D -> ? (INTERROBANG)
501 { "\xE2\x81\x82", 3, "*", 1 }, // U+2042 -> * (ASTERISM)
502 { "\xE2\x81\x83", 3, ".", 1 }, // U+2043 -> . (HYPHEN BULLET)
503 { "\xE2\x81\x84", 3, "/", 1 }, // U+2044 -> / (FRACTION SLASH)
504 { "\xE2\x81\x87", 3, "??", 2 }, // U+2047 -> ?? (DOUBLE QUESTION MARK)
505 { "\xE2\x81\x88", 3, "?!", 2 }, // U+2048 -> ?! (QUESTION EXCLAMATION MARK)
506 { "\xE2\x81\x89", 3, "!?", 2 }, // U+2049 -> !? (EXCLAMATION QUESTION MARK)
507 { "\xE2\x81\x8E", 3, "*", 1 }, // U+204E -> * (LOW ASTERISK)
508 { "\xE2\x81\x8F", 3, ";", 1 }, // U+204F -> ; (REVERSED SEMICOLON)
509 { "\xE2\x81\x91", 3, "*", 1 }, // U+2051 -> * (TWO ASTERISKS ALIGNED VERTICALLY)
510 { "\xE2\x81\x92", 3, "-", 1 }, // U+2052 -> - (COMMERCIAL MINUS SIGN)
511 { "\xE2\x81\x93", 3, "~", 1 }, // U+2053 -> ~ (SWUNG DASH)
512 { "\xE2\x81\x95", 3, "*", 1 }, // U+2055 -> * (FLOWER PUNCTUATION MARK)
513 { "\xE2\x81\x97", 3, "''''", 4 }, // U+2057 -> '''' (QUADRUPLE PRIME)
514 { "\xE2\x81\x9A", 3, ":", 1 }, // U+205A -> : (TWO DOT PUNCTUATION)
515 { "\xE2\x81\x9C", 3, "+", 1 }, // U+205C -> + (DOTTED CROSS)
517 // U+20A0 ... U+20CF (Currency Symbols)
518 { "\xE2\x82\xA0", 3, "ECU", 3 }, // U+20A0 -> ECU (EURO-CURRENCY SIGN)
519 { "\xE2\x82\xA1", 3, "CRC", 3 }, // U+20A1 -> CRC (COLON SIGN)
520 { "\xE2\x82\xA2", 3, "BRC", 3 }, // U+20A2 -> BRC (CRUZEIRO SIGN)
521 { "\xE2\x82\xA3", 3, "BEF", 3 }, // U+20A3 -> BEF (FRENCH FRANC SIGN)
522 { "\xE2\x82\xA4", 3, "ITL", 3 }, // U+20A4 -> ITL (LIRA SIGN)
523 { "\xE2\x82\xA6", 3, "NGN", 3 }, // U+20A6 -> NGN (NEIRA SIGN)
524 { "\xE2\x82\xA7", 3, "ESP", 3 }, // U+20A7 -> ESP (PESETA SIGN)
525 { "\xE2\x82\xA8", 3, "MVQ", 3 }, // U+20A8 -> MVQ (RUPEE SIGN)
526 { "\xE2\x82\xA9", 3, "KPW", 3 }, // U+20A9 -> KPW (WON SIGN)
527 { "\xE2\x82\xAA", 3, "ILS", 3 }, // U+20AA -> ILS (NEW SHEQEL SIGN)
528 { "\xE2\x82\xAB", 3, "VNC", 3 }, // U+20AB -> VNC (DONG SIGN)
529 { "\xE2\x82\xAC", 3, "EUR", 3 }, // U+20AC -> EUR (EURO SIGN)
530 { "\xE2\x82\xAD", 3, "LAK", 3 }, // U+20AD -> LAK (KIP SIGN)
531 { "\xE2\x82\xAE", 3, "MNT", 3 }, // U+20AE -> MNT (TUGRIK SIGN)
532 { "\xE2\x82\xAF", 3, "GRD", 3 }, // U+20AF -> GRD (DRACHMA SIGN)
533 { "\xE2\x82\xB0", 3, "Pf", 2 }, // U+20B0 -> Pf (GERMAN PENNY SIGN)
534 { "\xE2\x82\xB1", 3, "P", 1 }, // U+20B1 -> P (PESO SIGN)
535 { "\xE2\x82\xB2", 3, "PYG", 3 }, // U+20B2 -> PYG (GUARANI SIGN)
536 { "\xE2\x82\xB3", 3, "ARA", 3 }, // U+20B3 -> ARA (AUSTRAL SIGN)
537 { "\xE2\x82\xB4", 3, "UAH", 3 }, // U+20B4 -> UAH (HRYVNIA SIGN)
538 { "\xE2\x82\xB5", 3, "GHS", 3 }, // U+20B5 -> GHS (CEDI SIGN)
540 // U+2190 ... U+21FF (Arrows)
541 { "\xE2\x86\x90", 3, "<-", 2 }, // U+2190 -> <- (LEFTWARDS ARROW)
542 { "\xE2\x86\x92", 3, "->", 2 }, // U+2192 -> -> (RIGHTWARDS ARROW)
547 // start with no replacement string
550 // perform a binary search in the lookup table
551 if((rep
= bsearch(&key
, utf8map
, sizeof(utf8map
) / sizeof(utf8map
[0]), sizeof(utf8map
[0]), compareUTF8Replacements
)) != NULL
)
553 // if we found something, then copy this over to the result variables
563 /// matchCodesetAlias()
565 struct CodesetAliases
567 const char *MIMEname
; // The official and correct MIME name for a codeset
568 const char *Aliases
; // A space separated array with well-known aliases
571 const struct CodesetAliases codesetAliases
[] =
574 { "Amiga-1251", "Ami1251 Amiga1251" },
575 { "AmigaPL", "AmiPL Amiga-PL" },
576 { "ISO-8859-1", "ISO8859-1 8859-1" },
577 { "ISO-8859-2", "ISO8859-2 8859-2" },
578 { "ISO-8859-3", "ISO8859-3 8859-3" },
579 { "ISO-8859-4", "ISO8859-4 8859-4" },
580 { "ISO-8859-5", "ISO8859-5 8859-5" },
581 { "ISO-8859-6", "ISO8859-6 8859-6" },
582 { "ISO-8859-7", "ISO8859-7 8859-7" },
583 { "ISO-8859-8", "ISO8859-8 8859-8" },
584 { "ISO-8859-9", "ISO8859-9 8859-9" },
585 { "ISO-8859-10", "ISO8859-10 8859-10" },
586 { "ISO-8859-11", "ISO8859-11 8859-11" },
587 { "ISO-8859-12", "ISO8859-12 8859-12" },
588 { "ISO-8859-13", "ISO8859-13 8859-13" },
589 { "ISO-8859-14", "ISO8859-14 8859-14" },
590 { "ISO-8859-15", "ISO8859-15 8859-15" },
591 { "ISO-8859-16", "ISO8859-16 8859-16" },
592 { "ISO-8859-10", "ISO8859-10 8859-10" },
593 { "KOI8-R", "KOI8R" },
594 { "US-ASCII", "ASCII" },
595 { "UTF-8", "UTF8 UTF" },
596 { "UTF-16", "UTF16" },
597 { "UTF-32", "UTF32" },
598 { "windows-1250", "cp1250 windows1250" },
599 { "windows-1251", "cp1251 windows1251" },
600 { "windows-1252", "cp1252 windows1252" },
601 { "windows-1253", "cp1253 windows1253" },
602 { "windows-1254", "cp1254 windows1254" },
603 { "windows-1255", "cp1255 windows1255" },
604 { "windows-1256", "cp1256 windows1256" },
605 { "windows-1257", "cp1257 windows1257" },
609 static char *matchCodesetAlias(const char *search
)
612 size_t len
= strlen(search
);
617 for(i
=0; codesetAliases
[i
].MIMEname
!= NULL
; i
++)
621 // search the MIMEname first
622 if(stricmp(search
, codesetAliases
[i
].MIMEname
) == 0)
626 const char *s
= codesetAliases
[i
].Aliases
;
628 // loop through space separated list of aliases
629 while(s
!= NULL
&& *s
!= '\0')
631 if(strnicmp(search
, s
, len
) == 0)
637 if((s
= strpbrk(s
, " ")) != NULL
)
644 result
= (char *)codesetAliases
[i
].MIMEname
;
656 /**************************************************************************/
659 static struct codeset
*
660 defaultCodeset(BOOL useSemaphore
)
663 struct codeset
*codeset
;
667 if(useSemaphore
== TRUE
)
668 ObtainSemaphoreShared(&CodesetsBase
->libSem
);
671 GetVar("codeset_default",buf
,sizeof(buf
),GVF_GLOBAL_ONLY
);
673 if(buf
[0] == '\0' || (codeset
= codesetsFind(&CodesetsBase
->codesets
,buf
)) == NULL
)
674 codeset
= CodesetsBase
->systemCodeset
;
676 if(useSemaphore
== TRUE
)
677 ReleaseSemaphore(&CodesetsBase
->libSem
);
683 /// codesetsCmpUnicode()
684 // The compare function
686 codesetsCmpUnicode(struct single_convert
*arg1
,struct single_convert
*arg2
)
688 return strcmp((char*)&arg1
->utf8
[1], (char*)&arg2
->utf8
[1]);
691 /// codesetsReadTable()
693 #define ITEM_STANDARD "Standard"
694 #define ITEM_ALTSTANDARD "AltStandard"
695 #define ITEM_READONLY "ReadOnly"
696 #define ITEM_CHARACTERIZATION "Characterization"
698 // Reads a coding table and adds it
700 codesetsReadTable(struct codesetList
*csList
, STRPTR name
)
707 D(DBF_STARTUP
, "trying to fetch charset file '%s'...", name
);
709 if((fh
= Open(name
, MODE_OLDFILE
)))
711 struct codeset
*codeset
;
713 if((codeset
= (struct codeset
*)allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) != NULL
)
718 memset(codeset
,0,sizeof(struct codeset
));
720 for(i
= 0; i
<256; i
++)
721 codeset
->table
[i
].code
= codeset
->table
[i
].ucs4
= i
;
723 while(readLine(fh
, buf
, 512*sizeof(char)))
730 if((result
= getConfigItem(buf
, ITEM_STANDARD
, strlen(ITEM_STANDARD
))))
731 codeset
->name
= mystrdup(result
);
732 else if(codeset
->name
== NULL
) // a valid file starts with standard and nothing else!!
734 else if((result
= getConfigItem(buf
,ITEM_ALTSTANDARD
,strlen(ITEM_ALTSTANDARD
))))
735 codeset
->alt_name
= mystrdup(result
);
736 else if((result
= getConfigItem(buf
,ITEM_READONLY
,strlen(ITEM_READONLY
))))
737 codeset
->read_only
= !!atoi(result
);
738 else if((result
= getConfigItem(buf
,ITEM_CHARACTERIZATION
,strlen(ITEM_CHARACTERIZATION
))))
740 if((result
[0]=='_') && (result
[1]=='(') && (result
[2]=='"'))
742 char *end
= strchr(result
+ 3, '"');
745 codeset
->characterization
= mystrndup(result
+3,end
-(result
+3));
748 codeset
->characterization
= mystrdup(result
);
755 if((*p
=='=') || (fmt2
= ((*p
=='0') || (*(p
+1)=='x'))))
760 i
= strtol((const char *)p
,(char **)&p
,16);
763 while(isspace(*p
)) p
++;
765 if(!strnicmp(p
, "U+", 2))
768 codeset
->table
[i
].ucs4
= strtol((const char *)p
,(char **)&p
,16);
773 codeset
->table
[i
].ucs4
= strtol((const char *)p
,(char **)&p
,0);
780 // check if there is not already codeset with the same name in here
781 if(codeset
->name
!= NULL
&& !(codesetsFind(csList
, codeset
->name
)))
785 UTF32 src
= codeset
->table
[i
].ucs4
, *src_ptr
= &src
;
786 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
788 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
,src_ptr
+1,&dest_ptr
,dest_ptr
+6,CSF_StrictConversion
);
790 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)(&codeset
->table
[i
].utf8
[1]);
793 memcpy(codeset
->table_sorted
, codeset
->table
, sizeof(codeset
->table
));
794 qsort(codeset
->table_sorted
, 256, sizeof(codeset
->table
[0]), (int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
795 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
802 if(codeset
->name
) freeArbitrateVecPooled(codeset
->name
);
803 if(codeset
->alt_name
) freeArbitrateVecPooled(codeset
->alt_name
);
804 if(codeset
->characterization
) freeArbitrateVecPooled(codeset
->characterization
);
805 freeArbitrateVecPooled(codeset
);
816 /// codesetsScanDir()
818 codesetsScanDir(struct codesetList
*csList
, const char *dirPath
)
822 if(dirPath
!= NULL
&& dirPath
[0] != '\0')
824 #if defined(__amigaos4__)
827 if((dirContext
= ObtainDirContextTags(EX_StringNameInput
, dirPath
,
828 EX_DataFields
, EXF_NAME
|EXF_TYPE
,
831 struct ExamineData
*exd
;
833 D(DBF_STARTUP
, "scanning directory '%s' for codesets tables", dirPath
);
835 while((exd
= ExamineDir(dirContext
)) != NULL
)
841 strlcpy(filePath
, dirPath
, sizeof(filePath
));
842 AddPart(filePath
, exd
->Name
, sizeof(filePath
));
844 D(DBF_STARTUP
, "about to read codeset table '%s'", filePath
);
846 codesetsReadTable(csList
, filePath
);
850 ReleaseDirContext(dirContext
);
855 if((dirLock
= Lock(dirPath
, ACCESS_READ
)))
857 struct ExAllControl
*eac
;
859 D(DBF_STARTUP
, "scanning directory '%s' for codesets tables", dirPath
);
861 if((eac
= AllocDosObject(DOS_EXALLCONTROL
, NULL
)) != NULL
)
863 struct ExAllData
*ead
;
864 struct ExAllData
*eabuffer
;
867 eac
->eac_LastKey
= 0;
868 eac
->eac_MatchString
= NULL
;
869 eac
->eac_MatchFunc
= NULL
;
871 if((eabuffer
= allocVecPooled(CodesetsBase
->pool
, 10*sizeof(struct ExAllData
))) != NULL
)
877 more
= ExAll(dirLock
, eabuffer
, 10*sizeof(struct ExAllData
), ED_TYPE
, eac
);
878 if(!more
&& IoErr() != ERROR_NO_MORE_ENTRIES
)
881 if(eac
->eac_Entries
== 0)
884 ead
= (struct ExAllData
*)eabuffer
;
887 // we only take that ead if it is a file (ed_Type < 0)
890 strlcpy(filePath
, dirPath
, sizeof(filePath
));
891 AddPart(filePath
, (char *)ead
->ed_Name
, sizeof(filePath
));
893 D(DBF_STARTUP
, "about to read codeset table '%s'", filePath
);
895 codesetsReadTable(csList
, filePath
);
898 while((ead
= ead
->ed_Next
));
902 freeVecPooled(CodesetsBase
->pool
, eabuffer
);
905 FreeDosObject(DOS_EXALLCONTROL
, eac
);
918 // Initialized and loads the codesets
920 codesetsInit(struct codesetList
*csList
)
922 struct codeset
*codeset
= NULL
;
925 #if defined(__amigaos4__)
931 ObtainSemaphore(&CodesetsBase
->poolSem
);
933 NewList((struct List
*)&CodesetsBase
->codesets
);
935 // to make the list of the supported codesets complete we also add a
936 // fake 'UTF-8' only so that our users can query for that codeset as well.
937 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
940 codeset
->name
= mystrdup("UTF-8");
941 codeset
->alt_name
= mystrdup("UTF8");
942 codeset
->characterization
= mystrdup("Unicode");
943 codeset
->read_only
= 0;
944 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
945 CodesetsBase
->utf8Codeset
= codeset
;
947 // on AmigaOS4 we can use diskfont.library to inquire charset information as
948 // it comes with a quite rich implementation of different charsets.
949 #if defined(__amigaos4__)
955 ULONG curMIB
= nextMIB
;
957 nextMIB
= ObtainCharsetInfo(DFCS_NUMBER
, curMIB
, DFCS_NEXTNUMBER
);
961 mapTable
= (ULONG
*)ObtainCharsetInfo(DFCS_NUMBER
, curMIB
, DFCS_MAPTABLE
);
962 mimename
= (char *)ObtainCharsetInfo(DFCS_NUMBER
, curMIB
, DFCS_MIMENAME
);
963 ianaName
= (char *)ObtainCharsetInfo(DFCS_NUMBER
, curMIB
, DFCS_NAME
);
964 if(mapTable
!= NULL
&& mimename
!= NULL
&& codesetsFind(csList
, mimename
) == NULL
)
966 D(DBF_STARTUP
, "loading charset '%s' from diskfont.library...", mimename
);
968 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
971 codeset
->name
= mystrdup(mimename
);
972 codeset
->alt_name
= NULL
;
973 codeset
->characterization
= mystrdup(ianaName
);
974 codeset
->read_only
= 0;
978 UTF32
*src_ptr
= &src
;
979 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
983 codeset
->table
[i
].code
= i
;
984 codeset
->table
[i
].ucs4
= src
;
985 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
, src_ptr
+1, &dest_ptr
, dest_ptr
+6, CSF_StrictConversion
);
987 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
990 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
991 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
, const void *arg2
))codesetsCmpUnicode
);
993 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
999 #if defined(__MORPHOS__)
1001 struct Library
*KeymapBase
;
1002 struct Library
*LocaleBase
;
1004 if((KeymapBase
= OpenLibrary("keymap.library", 51)) != NULL
)
1006 if((LocaleBase
= OpenLibrary("locale.library", 51)) != NULL
)
1008 struct KeyMap
*keymap
= AskKeyMapDefault();
1009 CONST_STRPTR name
= GetKeyMapCodepage(keymap
);
1011 if(name
!= NULL
&& keymap
!= NULL
) // Legacy keymaps dont have codepage or Unicode mappings
1013 D(DBF_STARTUP
, "loading charset '%s' from keymap.library...", name
);
1015 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) != NULL
)
1017 codeset
->name
= mystrdup(name
);
1018 codeset
->alt_name
= NULL
;
1019 codeset
->characterization
= mystrdup(name
); // No more information available
1020 codeset
->read_only
= 0;
1022 for(i
=0; i
<256; i
++)
1024 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1027 codeset
->table
[i
].code
= i
;
1028 codeset
->table
[i
].ucs4
= src
= ToUCS4(i
, keymap
);
1029 rc
= ConvertUCS4ToUTF8((CONST_WSTRPTR
)&src
, dest_ptr
, 1);
1031 codeset
->table
[i
].utf8
[0] = rc
;
1034 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1035 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
, const void *arg2
))codesetsCmpUnicode
);
1037 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1043 CloseLibrary(LocaleBase
);
1046 CloseLibrary(KeymapBase
);
1051 D(DBF_STARTUP
, "loading charsets from Libs:Charsets...");
1053 // we try to walk to the LIBS:Charsets directory on our own and readin our
1054 // own charset tables
1055 codesetsScanDir(csList
, "LIBS:Charsets");
1058 // now we go and initialize our internally supported codesets but only if
1059 // we have not already loaded a charset with the same name
1061 D(DBF_STARTUP
, "initializing internal charsets...");
1063 // ISO-8859-1 + EURO
1064 if(codesetsFind(csList
, "ISO-8859-1 + Euro") == NULL
)
1066 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1069 codeset
->name
= mystrdup("ISO-8859-1 + Euro");
1070 codeset
->alt_name
= NULL
;
1071 codeset
->characterization
= mystrdup("West European (with EURO)");
1072 codeset
->read_only
= 1;
1073 for(i
= 0; i
<256; i
++)
1075 UTF32
*src_ptr
= &src
;
1076 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1079 src
= 0x20AC; /* the EURO sign */
1083 codeset
->table
[i
].code
= i
;
1084 codeset
->table
[i
].ucs4
= src
;
1085 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
, src_ptr
+1, &dest_ptr
, dest_ptr
+6, CSF_StrictConversion
);
1087 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1089 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1090 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
, const void *arg2
))codesetsCmpUnicode
);
1091 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1095 if(codesetsFind(csList
, "ISO-8859-1") == NULL
)
1097 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1100 codeset
->name
= mystrdup("ISO-8859-1");
1101 codeset
->alt_name
= mystrdup("ISO8859-1");
1102 codeset
->characterization
= mystrdup("West European");
1103 codeset
->read_only
= 0;
1104 for(i
= 0; i
<256; i
++)
1106 UTF32
*src_ptr
= &src
;
1107 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1111 codeset
->table
[i
].code
= i
;
1112 codeset
->table
[i
].ucs4
= src
;
1113 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
, src_ptr
+1, &dest_ptr
, dest_ptr
+6, CSF_StrictConversion
);
1115 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1117 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1118 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
1119 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1123 if(codesetsFind(csList
, "ISO-8859-2") == NULL
)
1125 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1128 codeset
->name
= mystrdup("ISO-8859-2");
1129 codeset
->alt_name
= mystrdup("ISO8859-2");
1130 codeset
->characterization
= mystrdup("Central/East European");
1131 codeset
->read_only
= 0;
1132 for(i
= 0; i
<256; i
++)
1134 UTF32
*src_ptr
= &src
;
1135 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1140 src
= iso_8859_2_to_ucs4
[i
-0xa0];
1142 codeset
->table
[i
].code
= i
;
1143 codeset
->table
[i
].ucs4
= src
;
1144 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
, src_ptr
+1, &dest_ptr
,dest_ptr
+6, CSF_StrictConversion
);
1146 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1148 memcpy(codeset
->table_sorted
, codeset
->table
, sizeof(codeset
->table
));
1149 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
1150 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1154 if(codesetsFind(csList
, "ISO-8859-3") == NULL
)
1156 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1159 codeset
->name
= mystrdup("ISO-8859-3");
1160 codeset
->alt_name
= mystrdup("ISO8859-3");
1161 codeset
->characterization
= mystrdup("South European");
1162 codeset
->read_only
= 0;
1163 for(i
= 0; i
<256; i
++)
1165 UTF32
*src_ptr
= &src
;
1166 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1171 src
= iso_8859_3_to_ucs4
[i
-0xa0];
1173 codeset
->table
[i
].code
= i
;
1174 codeset
->table
[i
].ucs4
= src
;
1175 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
,src_ptr
+1,&dest_ptr
,dest_ptr
+6,CSF_StrictConversion
);
1177 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1179 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1180 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
1181 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1185 if(codesetsFind(csList
, "ISO-8859-4") == NULL
)
1187 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1190 codeset
->name
= mystrdup("ISO-8859-4");
1191 codeset
->alt_name
= mystrdup("ISO8859-4");
1192 codeset
->characterization
= mystrdup("North European");
1193 codeset
->read_only
= 0;
1194 for(i
= 0; i
<256; i
++)
1196 UTF32
*src_ptr
= &src
;
1197 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1202 src
= iso_8859_4_to_ucs4
[i
-0xa0];
1204 codeset
->table
[i
].code
= i
;
1205 codeset
->table
[i
].ucs4
= src
;
1206 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
,src_ptr
+1,&dest_ptr
,dest_ptr
+6,CSF_StrictConversion
);
1208 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1210 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1211 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
, const void *arg2
))codesetsCmpUnicode
);
1212 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1216 if(codesetsFind(csList
, "ISO-8859-5") == NULL
)
1218 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1221 codeset
->name
= mystrdup("ISO-8859-5");
1222 codeset
->alt_name
= mystrdup("ISO8859-5");
1223 codeset
->characterization
= mystrdup("Slavic languages");
1224 codeset
->read_only
= 0;
1225 for(i
= 0; i
<256; i
++)
1227 UTF32
*src_ptr
= &src
;
1228 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1233 src
= iso_8859_5_to_ucs4
[i
-0xa0];
1235 codeset
->table
[i
].code
= i
;
1236 codeset
->table
[i
].ucs4
= src
;
1237 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
,src_ptr
+1,&dest_ptr
,dest_ptr
+6,CSF_StrictConversion
);
1239 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1241 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1242 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
1243 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1247 if(codesetsFind(csList
, "ISO-8859-9") == NULL
)
1249 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1252 codeset
->name
= mystrdup("ISO-8859-9");
1253 codeset
->alt_name
= mystrdup("ISO8859-9");
1254 codeset
->characterization
= mystrdup("Turkish");
1255 codeset
->read_only
= 0;
1256 for(i
= 0; i
<256; i
++)
1258 UTF32
*src_ptr
= &src
;
1259 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1264 src
= iso_8859_9_to_ucs4
[i
-0xa0];
1266 codeset
->table
[i
].code
= i
;
1267 codeset
->table
[i
].ucs4
= src
;
1268 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
,src_ptr
+1,&dest_ptr
,dest_ptr
+6,CSF_StrictConversion
);
1270 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1272 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1273 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
1274 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1278 if(codesetsFind(csList
, "ISO-8859-15") == NULL
)
1280 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1283 codeset
->name
= mystrdup("ISO-8859-15");
1284 codeset
->alt_name
= mystrdup("ISO8859-15");
1285 codeset
->characterization
= mystrdup("West European II");
1286 codeset
->read_only
= 0;
1287 for(i
= 0; i
<256; i
++)
1289 UTF32
*src_ptr
= &src
;
1290 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1295 src
= iso_8859_15_to_ucs4
[i
-0xa0];
1297 codeset
->table
[i
].code
= i
;
1298 codeset
->table
[i
].ucs4
= src
;
1299 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
,src_ptr
+1,&dest_ptr
,dest_ptr
+6,CSF_StrictConversion
);
1301 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1303 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof (codeset
->table
));
1304 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
1305 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1309 if(codesetsFind(csList
, "ISO-8859-16") == NULL
)
1311 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1314 codeset
->name
= mystrdup("ISO-8859-16");
1315 codeset
->alt_name
= mystrdup("ISO8869-16");
1316 codeset
->characterization
= mystrdup("South-Eastern European");
1317 codeset
->read_only
= 0;
1320 UTF32
*src_ptr
= &src
;
1321 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1326 src
= iso_8859_16_to_ucs4
[i
-0xa0];
1328 codeset
->table
[i
].code
= i
;
1329 codeset
->table
[i
].ucs4
= src
;
1330 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
, src_ptr
+1, &dest_ptr
, dest_ptr
+6, CSF_StrictConversion
);
1332 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
- (ULONG
)&codeset
->table
[i
].utf8
[1];
1334 memcpy(codeset
->table_sorted
, codeset
->table
, sizeof(codeset
->table
));
1335 qsort(codeset
->table_sorted
, 256, sizeof(codeset
->table
[0]), (int (*)(const void *arg1
, const void *arg2
))codesetsCmpUnicode
);
1336 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1340 if(codesetsFind(csList
, "KOI8-R") == NULL
)
1342 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1345 codeset
->name
= mystrdup("KOI8-R");
1346 codeset
->alt_name
= mystrdup("KOI8R");
1347 codeset
->characterization
= mystrdup("Russian");
1348 codeset
->read_only
= 0;
1349 for(i
= 0; i
<256; i
++)
1351 UTF32
*src_ptr
= &src
;
1352 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1357 src
= koi8r_to_ucs4
[i
-0x80];
1359 codeset
->table
[i
].code
= i
;
1360 codeset
->table
[i
].ucs4
= src
;
1361 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
,src_ptr
+1,&dest_ptr
,dest_ptr
+6,CSF_StrictConversion
);
1363 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1365 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1366 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
1367 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1371 if(codesetsFind(csList
, "AmigaPL") == NULL
)
1373 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1376 codeset
->name
= mystrdup("AmigaPL");
1377 codeset
->alt_name
= mystrdup("AmiPL");
1378 codeset
->characterization
= mystrdup("Polish (Amiga)");
1379 codeset
->read_only
= 1;
1380 for(i
=0; i
<256; i
++)
1382 UTF32
*src_ptr
= &src
;
1383 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1388 src
= amigapl_to_ucs4
[i
-0xa0];
1390 codeset
->table
[i
].code
= i
;
1391 codeset
->table
[i
].ucs4
= src
;
1392 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
,src_ptr
+1,&dest_ptr
,dest_ptr
+6,CSF_StrictConversion
);
1394 codeset
->table
[i
].utf8
[0] = (ULONG
)dest_ptr
-(ULONG
)&codeset
->table
[i
].utf8
[1];
1396 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1397 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
,const void *arg2
))codesetsCmpUnicode
);
1398 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1402 if(codesetsFind(csList
, "Amiga-1251") == NULL
)
1404 if((codeset
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codeset
))) == NULL
)
1407 codeset
->name
= mystrdup("Amiga-1251");
1408 codeset
->alt_name
= mystrdup("Ami1251");
1409 codeset
->characterization
= mystrdup("Cyrillic (Amiga)");
1410 codeset
->read_only
= 1;
1411 for(i
=0; i
<256; i
++)
1413 UTF32
*src_ptr
= &src
;
1414 UTF8
*dest_ptr
= &codeset
->table
[i
].utf8
[1];
1419 src
= amiga1251_to_ucs4
[i
-0xa0];
1421 codeset
->table
[i
].code
= i
;
1422 codeset
->table
[i
].ucs4
= src
;
1423 CodesetsConvertUTF32toUTF8((const UTF32
**)&src_ptr
, src_ptr
+1, &dest_ptr
, dest_ptr
+6, CSF_StrictConversion
);
1425 codeset
->table
[i
].utf8
[0] = (char*)dest_ptr
- (char*)&codeset
->table
[i
].utf8
[1];
1427 memcpy(codeset
->table_sorted
,codeset
->table
,sizeof(codeset
->table
));
1428 qsort(codeset
->table_sorted
,256,sizeof(codeset
->table
[0]),(int (*)(const void *arg1
, const void *arg2
))codesetsCmpUnicode
);
1429 AddTail((struct List
*)csList
, (struct Node
*)&codeset
->node
);
1433 ReleaseSemaphore(&CodesetsBase
->poolSem
);
1435 RETURN(codeset
!= 0);
1436 return codeset
!= NULL
;
1440 /// codesetsCleanup()
1441 // Cleanup the memory for the codeset
1443 codesetsCleanup(struct codesetList
*csList
)
1445 struct codeset
*code
;
1449 while((code
= (struct codeset
*)RemHead((struct List
*)csList
)))
1451 if(code
->name
) freeArbitrateVecPooled(code
->name
);
1452 if(code
->alt_name
) freeArbitrateVecPooled(code
->alt_name
);
1453 if(code
->characterization
) freeArbitrateVecPooled(code
->characterization
);
1455 freeArbitrateVecPooled(code
);
1463 // Returns the given codeset.
1465 codesetsFind(struct codesetList
*csList
, const char *name
)
1467 struct codeset
*res
= NULL
;
1473 struct codeset
*mstate
, *succ
;
1474 char *matchedName
= matchCodesetAlias(name
);
1476 if(matchedName
!= NULL
)
1479 for(mstate
= (struct codeset
*)csList
->list
.mlh_Head
; (succ
= (struct codeset
*)mstate
->node
.mln_Succ
); mstate
= succ
)
1481 if(stricmp(name
, mstate
->name
) == 0 ||
1482 (mstate
->alt_name
!= NULL
&& stricmp(name
, mstate
->alt_name
) == 0))
1497 /// codesetsFindBest()
1498 // Returns the best codeset for the given text
1499 static struct codeset
*
1500 codesetsFindBest(struct TagItem
*attrs
, ULONG csFamily
, STRPTR text
, int text_len
, int *error_ptr
)
1502 struct codeset
*best_codeset
= NULL
;
1503 int best_errors
= text_len
;
1508 // in case the user specified the codeset family as a
1509 // cyrillic one we go and do our cyrillic specific analysis first
1510 if(csFamily
== CSV_CodesetFamily_Cyrillic
)
1512 #define NUM_CYRILLIC 3
1514 struct CodesetSearch
1520 struct CodesetSearch search
[NUM_CYRILLIC
];
1523 int ctr
[NUM_CYRILLIC
];
1530 search
[0].name
= "windows-1251";
1531 search
[0].data
= cp1251_data
;
1532 search
[1].name
= "IBM866";
1533 search
[1].data
= cp866_data
;
1534 search
[2].name
= "KOI8-R";
1535 search
[2].data
= koi8r_data
;
1537 memset(&ctr
, 0, sizeof(ctr
));
1539 tp
= (unsigned char *)text
;
1544 int mid
= max
= -466725766; // TODO: what's the magic behind this constant?
1547 for(n
=0; n
< NUM_CYRILLIC
; n
++)
1549 unsigned char la
= 0;
1550 unsigned char *tptr
= (unsigned char *)search
[n
].data
;
1556 unsigned char lb
= (*p
++) ^ 128;
1558 if(!((la
| lb
) & 128))
1559 ctr
[n
] += (signed char)tptr
[(la
<< 7) + lb
];
1574 if((max
>= 500) && ((max
-mid
) >= 1000))
1580 while((*p
) && (!gr
));
1582 if(gr
|| ((!(*p
)) && lr
))
1585 // if our analysis found something, we go and try
1586 // to find the corresponding codeset in out codeset list
1589 struct TagItem
*tstate
= attrs
;
1590 struct TagItem
*tag
;
1592 D(DBF_STARTUP
, "identified text as '%s", search
[Nmax
-1].name
);
1594 // now we walk through our taglist and check if the user
1596 while((tag
= NextTagItem(&tstate
)))
1598 if(tag
->ti_Tag
== CSA_CodesetList
&& tag
->ti_Data
!= 0)
1600 struct codesetList
*csList
= (struct codesetList
*)tag
->ti_Data
;
1602 if((best_codeset
= codesetsFind(csList
, search
[Nmax
-1].name
)) != NULL
)
1607 // if we still haven't found the matching codeset
1608 // we search the internal list
1609 if(best_codeset
== NULL
)
1610 best_codeset
= codesetsFind(&CodesetsBase
->codesets
, search
[Nmax
-1].name
);
1618 // if we haven't found the best codeset (through the cyrillic analysis
1619 // we go and do the dumb latin search in our codesetlist
1622 struct TagItem
*tstate
= attrs
;
1623 struct TagItem
*tag
;
1624 BOOL lastIteration
= FALSE
;
1626 while((tag
= NextTagItem(&tstate
)) || (lastIteration
= TRUE
))
1628 if(lastIteration
== TRUE
|| (tag
->ti_Tag
== CSA_CodesetList
&& tag
->ti_Data
!= 0))
1630 struct codesetList
*csList
= (lastIteration
? &CodesetsBase
->codesets
: (struct codesetList
*)tag
->ti_Data
);
1631 struct codeset
*codeset
= (struct codeset
*)csList
->list
.mlh_Head
;
1633 // the following identification/detection routines is NOT really smart.
1634 // we just see how each UTF8 string is the representation of each char
1635 // in our source text and then check if they are valid or not. As said,
1636 // not very smart, but we don't have anything better right now :(
1640 if(!codeset
->read_only
&& codeset
!= CodesetsBase
->utf8Codeset
)
1642 char *text_ptr
= text
;
1646 for(i
=0; i
< text_len
; i
++)
1648 unsigned char c
= *text_ptr
++;
1652 struct single_convert
*f
= &codeset
->table
[c
];
1654 if(f
->utf8
[0] == 0 || f
->utf8
[1] == 0x00)
1661 D(DBF_STARTUP
, "tried to identify text as '%s' text with %ld of %ld errors", codeset
->name
, errors
, text_len
);
1663 if(errors
< best_errors
)
1665 best_codeset
= codeset
;
1666 best_errors
= errors
;
1669 if(best_errors
== 0)
1673 codeset
= (struct codeset
*)codeset
->node
.mln_Succ
;
1683 *error_ptr
= best_errors
;
1685 RETURN(best_codeset
);
1686 return best_codeset
;
1690 /**************************************************************************/
1692 /// CodesetsSupportedA()
1694 AROS_LH1(STRPTR
*, CodesetsSupportedA
,
1695 AROS_LHA(struct TagItem
*, attrs
, A0
),
1696 struct LibraryHeader
*, library
, 15, Codesets
1702 CodesetsSupportedA(REG(a0
, UNUSED
struct TagItem
* attrs
))
1705 STRPTR
*array
= NULL
;
1706 struct TagItem
*tstate
= attrs
;
1707 struct TagItem
*tag
;
1712 // first we need to check how many codesets our supplied
1714 numCodesets
= countCodesets(&CodesetsBase
->codesets
);
1715 while((tag
= NextTagItem(&tstate
)))
1717 if(tag
->ti_Tag
== CSA_CodesetList
&& tag
->ti_Data
!= 0)
1718 numCodesets
+= countCodesets((struct codesetList
*)tag
->ti_Data
);
1721 // now that we know how many codesets we have in our lists we
1722 // can put their names into our string arrays
1725 if((array
= allocArbitrateVecPooled((numCodesets
+1)*sizeof(STRPTR
))))
1727 struct codeset
*code
;
1728 struct codeset
*succ
;
1734 ObtainSemaphoreShared(&CodesetsBase
->libSem
);
1736 // first we walk through the internal codesets list and
1738 for(code
= (struct codeset
*)CodesetsBase
->codesets
.list
.mlh_Head
; (succ
= (struct codeset
*)code
->node
.mln_Succ
); code
= succ
, i
++)
1739 array
[i
] = code
->name
;
1741 // then we also iterate through our private codesets list
1742 while((tag
= NextTagItem(&tstate
)))
1744 if(tag
->ti_Tag
== CSA_CodesetList
&& tag
->ti_Data
!= 0)
1746 struct codesetList
*csList
= (struct codesetList
*)tag
->ti_Data
;
1748 for(code
= (struct codeset
*)csList
->list
.mlh_Head
; (succ
= (struct codeset
*)code
->node
.mln_Succ
); code
= succ
, i
++)
1749 array
[i
] = code
->name
;
1755 ReleaseSemaphore(&CodesetsBase
->libSem
);
1767 LIBSTUB(CodesetsSupportedA
, STRPTR
*, REG(a0
, struct TagItem
*attrs
))
1770 return CodesetsSupportedA((struct TagItem
*)REG_A0
);
1772 return CodesetsSupportedA(attrs
);
1778 LIBSTUBVA(CodesetsSupported
, STRPTR
*, ...)
1783 VA_START(args
, self
);
1784 res
= CodesetsSupportedA(VA_ARG(args
, struct TagItem
*));
1794 AROS_LH2(void, CodesetsFreeA
,
1795 AROS_LHA(APTR
, obj
, A0
),
1796 AROS_LHA(struct TagItem
*, attrs
, A1
),
1797 struct LibraryHeader
*, library
, 14, Codesets
1803 CodesetsFreeA(REG(a0
, APTR obj
),
1804 REG(a1
, UNUSED
struct TagItem
*attrs
))
1810 freeArbitrateVecPooled(obj
);
1819 LIBSTUB(CodesetsFreeA
, void, REG(a0
, APTR obj
), REG(a1
, struct TagItem
*attrs
))
1822 return CodesetsFreeA((APTR
)REG_A0
,(struct TagItem
*)REG_A1
);
1824 return CodesetsFreeA(obj
, attrs
);
1830 LIBSTUBVA(CodesetsFree
, void, REG(a0
, APTR obj
), ...)
1834 VA_START(args
, obj
);
1835 CodesetsFreeA(obj
, VA_ARG(args
, struct TagItem
*));
1841 /// CodesetsSetDefaultA()
1843 AROS_LH2(struct codeset
*, CodesetsSetDefaultA
,
1844 AROS_LHA(STRPTR
, name
, A0
),
1845 AROS_LHA(struct TagItem
*, attrs
, A1
),
1846 struct LibraryHeader
*, library
, 13, Codesets
1851 struct codeset
*LIBFUNC
1852 CodesetsSetDefaultA(REG(a0
, STRPTR name
),
1853 REG(a1
, struct TagItem
*attrs
))
1856 struct codeset
*codeset
;
1860 ObtainSemaphoreShared(&CodesetsBase
->libSem
);
1862 if((codeset
= codesetsFind(&CodesetsBase
->codesets
,name
)))
1866 flags
= GVF_SAVE_VAR
| (GetTagData(CSA_Save
,FALSE
,attrs
) ? GVF_GLOBAL_ONLY
: 0);
1868 SetVar("codeset_default",codeset
->name
,strlen(codeset
->name
),flags
);
1871 ReleaseSemaphore(&CodesetsBase
->libSem
);
1881 LIBSTUB(CodesetsSetDefaultA
, struct codeset
*, REG(a0
, STRPTR name
), REG(a1
, struct TagItem
*attrs
))
1884 return CodesetsSetDefaultA((STRPTR
)REG_A0
,(struct TagItem
*)REG_A1
);
1886 return CodesetsSetDefaultA(name
, attrs
);
1892 LIBSTUBVA(CodesetsSetDefault
, struct codeset
*, REG(a0
, STRPTR name
), ...)
1897 VA_START(args
, name
);
1898 cs
= CodesetsSetDefaultA(name
, VA_ARG(args
, struct TagItem
*));
1908 AROS_LH2(struct codeset
*, CodesetsFindA
,
1909 AROS_LHA(STRPTR
, name
, A0
),
1910 AROS_LHA(struct TagItem
*, attrs
, A1
),
1911 struct LibraryHeader
*, library
, 16, Codesets
1916 struct codeset
*LIBFUNC
1917 CodesetsFindA(REG(a0
, STRPTR name
), REG(a1
, struct TagItem
*attrs
))
1920 struct codeset
*codeset
= NULL
;
1924 ObtainSemaphoreShared(&CodesetsBase
->libSem
);
1926 // if no name pointer was supplied we have to return
1927 // the default codeset only.
1930 // we first walk through our internal list and check if we
1931 // can find the requested codeset
1932 codeset
= codesetsFind(&CodesetsBase
->codesets
, name
);
1934 if(codeset
== NULL
&& attrs
!= NULL
)
1936 struct TagItem
*tstate
= attrs
;
1937 struct TagItem
*tag
;
1939 // now we walk through our taglist and check if the user
1941 while((tag
= NextTagItem(&tstate
)))
1943 if(tag
->ti_Tag
== CSA_CodesetList
&& tag
->ti_Data
!= 0)
1945 struct codesetList
*csList
= (struct codesetList
*)tag
->ti_Data
;
1947 if((codeset
= codesetsFind(csList
, name
)) != NULL
)
1954 // check if we found something or not.
1955 if(codeset
== NULL
&& (attrs
== NULL
|| GetTagData(CSA_FallbackToDefault
, TRUE
, attrs
)))
1956 codeset
= defaultCodeset(FALSE
);
1958 ReleaseSemaphore(&CodesetsBase
->libSem
);
1968 LIBSTUB(CodesetsFindA
, struct codeset
*, REG(a0
, STRPTR name
), REG(a1
, struct TagItem
*attrs
))
1971 return CodesetsFindA((STRPTR
)REG_A0
,(struct TagItem
*)REG_A1
);
1973 return CodesetsFindA(name
, attrs
);
1979 LIBSTUBVA(CodesetsFind
, struct codeset
*, REG(a0
, STRPTR name
), ...)
1984 VA_START(args
, name
);
1985 cs
= CodesetsFindA(name
, VA_ARG(args
, struct TagItem
*));
1992 /// CodesetsFindBestA()
1994 AROS_LH1(struct codeset
*, CodesetsFindBestA
,
1995 AROS_LHA(struct TagItem
*, attrs
, A0
),
1996 struct LibraryHeader
*, library
, 17, Codesets
2001 struct codeset
*LIBFUNC
2002 CodesetsFindBestA(REG(a0
, struct TagItem
*attrs
))
2005 struct codeset
*codeset
= NULL
;
2009 ObtainSemaphoreShared(&CodesetsBase
->libSem
);
2013 char *text
= (char *)GetTagData(CSA_Source
, 0, attrs
);
2014 ULONG text_len
= GetTagData(CSA_SourceLen
, text
!= NULL
? strlen(text
) : 0, attrs
);
2016 if(text
!= NULL
&& text_len
> 0)
2019 ULONG csFamily
= GetTagData(CSA_CodesetFamily
, CSV_CodesetFamily_Latin
, attrs
);
2020 int *error_ptr
= (int *)GetTagData(CSA_ErrPtr
, 0, attrs
);
2021 BOOL defaultFallBack
= GetTagData(CSA_FallbackToDefault
, FALSE
, attrs
);
2023 codeset
= codesetsFindBest(attrs
, csFamily
, text
, text_len
, &numErrors
);
2025 if(error_ptr
!= NULL
)
2026 *error_ptr
= numErrors
;
2028 // if we still haven't got the codeset we fallback to the default
2029 if(codeset
== NULL
&& defaultFallBack
== TRUE
)
2030 codeset
= defaultCodeset(FALSE
);
2034 ReleaseSemaphore(&CodesetsBase
->libSem
);
2044 LIBSTUB(CodesetsFindBestA
, struct codeset
*, REG(a0
, struct TagItem
*attrs
))
2047 return CodesetsFindBestA((struct TagItem
*)REG_A0
);
2049 return CodesetsFindBestA(attrs
);
2055 LIBSTUBVA(CodesetsFindBest
, struct codeset
*, ...)
2060 VA_START(args
, self
);
2061 cs
= CodesetsFindBestA(VA_ARG(args
, struct TagItem
*));
2068 /// CodesetsUTF8Len()
2069 // Returns the number of characters a utf8 string has. This is not
2070 // identically with the size of memory is required to hold the string.
2072 AROS_LH1(ULONG
, CodesetsUTF8Len
,
2073 AROS_LHA(const UTF8
*, str
, A0
),
2074 struct LibraryHeader
*, library
, 18, Codesets
2080 CodesetsUTF8Len(REG(a0
, UTF8
*str
))
2096 str
+= trailingBytesForUTF8
[c
];
2107 LIBSTUB(CodesetsUTF8Len
, ULONG
, REG(a0
, UTF8
* str
))
2110 return CodesetsUTF8Len((UTF8
*)REG_A0
);
2112 return CodesetsUTF8Len(str
);
2118 /// CodesetsStrLenA()
2120 AROS_LH2(ULONG
, CodesetsStrLenA
,
2121 AROS_LHA(STRPTR
, str
, A0
),
2122 AROS_LHA(struct TagItem
*, attrs
, A1
),
2123 struct LibraryHeader
*, library
, 23, Codesets
2129 CodesetsStrLenA(REG(a0
, STRPTR str
),
2130 REG(a1
, struct TagItem
*attrs
))
2133 struct codeset
*codeset
;
2143 if(!(codeset
= (struct codeset
*)GetTagData(CSA_SourceCodeset
, 0, attrs
)))
2144 codeset
= defaultCodeset(TRUE
);
2146 len
= GetTagData(CSA_SourceLen
, strlen(str
), attrs
);
2151 while(((c
= *src
++) && (len
--)))
2152 res
+= codeset
->table
[c
].utf8
[0];
2162 LIBSTUB(CodesetsStrLenA
, ULONG
, REG(a0
, STRPTR str
),
2163 REG(a1
, struct TagItem
*attrs
))
2166 return CodesetsStrLenA((STRPTR
)REG_A0
,(struct TagItem
*)REG_A1
);
2168 return CodesetsStrLenA(str
, attrs
);
2174 LIBSTUBVA(CodesetsStrLen
, ULONG
, REG(a0
, STRPTR str
), ...)
2179 VA_START(args
, str
);
2180 res
= CodesetsStrLenA(str
, VA_ARG(args
, struct TagItem
*));
2187 /// CodesetsUTF8ToStrA()
2188 // Converts an UTF8 string to a given charset. Return the number of bytes
2189 // written to dest excluding the NULL byte (which is always ensured by this
2190 // function; it means a NULL str will produce "" as dest; anyway you should
2191 // check NULL str to not waste your time!).
2193 AROS_LH1(STRPTR
, CodesetsUTF8ToStrA
,
2194 AROS_LHA(struct TagItem
*, attrs
, A0
),
2195 struct LibraryHeader
*, library
, 19, Codesets
2201 CodesetsUTF8ToStrA(REG(a0
, struct TagItem
*attrs
))
2212 if((src
= (UTF8
*)GetTagData(CSA_Source
, (ULONG
)NULL
, attrs
)) != NULL
&&
2213 (srcLen
= GetTagData(CSA_SourceLen
, src
!= NULL
? strlen((char *)src
) : 0, attrs
)) > 0)
2215 struct convertMsg msg
;
2216 struct codeset
*codeset
;
2217 struct Hook
*destHook
;
2218 struct Hook
*mapForeignCharsHook
;
2220 STRPTR destIter
= NULL
;
2224 unsigned char *s
= src
;
2225 unsigned char *e
= (src
+srcLen
);
2226 int numConvErrors
= 0;
2227 int *numConvErrorsPtr
;
2228 BOOL mapForeignChars
;
2230 struct SignalSemaphore
*sem
= NULL
;
2232 // get some more optional attributes
2233 destHook
= (struct Hook
*)GetTagData(CSA_DestHook
, (ULONG
)NULL
, attrs
);
2234 destLen
= GetTagData(CSA_DestLen
, 0, attrs
);
2235 numConvErrorsPtr
= (int *)GetTagData(CSA_ErrPtr
, (ULONG
)NULL
, attrs
);
2236 mapForeignChars
= (BOOL
)GetTagData(CSA_MapForeignChars
, FALSE
, attrs
);
2237 mapForeignCharsHook
= (struct Hook
*)GetTagData(CSA_MapForeignCharsHook
, (ULONG
)NULL
, attrs
);
2239 // first we make sure we allocate enough memory
2240 // for our destination buffer
2241 if(destHook
!= NULL
)
2243 if(destLen
< 16 || destLen
> sizeof(buf
))
2244 destLen
= sizeof(buf
);
2246 msg
.state
= CSV_Translating
;
2252 // in case the user wants us to dynamically generate the
2253 // destination buffer we do it right now
2254 if((dest
= (STRPTR
)GetTagData(CSA_Dest
, (ULONG
)NULL
, attrs
)) == NULL
||
2255 GetTagData(CSA_AllocIfNeeded
, TRUE
, attrs
) != FALSE
)
2259 // calculate the destLen
2262 unsigned char c
= *s
++;
2265 s
+= trailingBytesForUTF8
[c
];
2268 if(dest
== NULL
|| (destLen
< len
+1))
2270 if((pool
= (APTR
)GetTagData(CSA_Pool
, (ULONG
)NULL
, attrs
)) != NULL
)
2272 if((sem
= (struct SignalSemaphore
*)GetTagData(CSA_PoolSem
, (ULONG
)NULL
, attrs
)) != NULL
)
2273 ObtainSemaphore(sem
);
2275 // allocate the destination buffer
2276 dest
= allocVecPooled(pool
, len
+1);
2279 ReleaseSemaphore(sem
);
2282 dest
= allocArbitrateVecPooled(len
+1);
2297 // get the destination codeset pointer
2298 if((codeset
= (struct codeset
*)GetTagData(CSA_DestCodeset
, (ULONG
)NULL
, attrs
)) == NULL
)
2299 codeset
= defaultCodeset(TRUE
);
2301 // now we convert the src string to the
2302 // destination buffer.
2305 if(destHook
== NULL
&& n
>= destLen
-1)
2308 // convert until we reach the end of the
2312 unsigned char c
= *s
;
2313 unsigned char d
= '?';
2314 const char *repstr
= NULL
;
2317 // check if the char is a >7bit char
2320 struct single_convert
*f
;
2321 int lenAdd
= trailingBytesForUTF8
[c
];
2322 int lenStr
= lenAdd
+1;
2323 unsigned char *src
= s
;
2327 // start each iteration with "no replacement found yet"
2331 // search in the UTF8 conversion table of the current charset if
2332 // we have a replacement character for the char sequence starting at s
2333 BIN_SEARCH(codeset
->table_sorted
, 0, 255, strncmp((char *)src
, (char *)codeset
->table_sorted
[m
].utf8
+1, lenStr
), f
);
2344 // the analysed char sequence (s) is not convertable to a
2345 // single visible char replacement, so we normally have to put
2346 // a ? sign as a "unknown char" sign at the very position.
2348 // For convienence we, however, allow users to replace these
2349 // UTF8 characters with char sequences that "looklike" the
2351 if(mapForeignChars
== TRUE
)
2352 replen
= mapUTF8toASCII(&repstr
, src
, lenStr
);
2354 // call the hook only, if the internal table yielded no suitable
2356 if(replen
== 0 && mapForeignCharsHook
!= NULL
)
2358 struct replaceMsg rmsg
;
2360 rmsg
.dst
= (char **)&repstr
;
2362 rmsg
.srclen
= lenStr
;
2363 replen
= CallHookPkt(mapForeignCharsHook
, &rmsg
, NULL
);
2368 D(DBF_UTF
, "got UTF8 replacement (%ld)", replen
);
2370 // stay in the loop as long as one replacement function delivers
2371 // further UTF8 replacement sequences
2372 src
= (unsigned char *)repstr
;
2374 else if(replen
== 0)
2376 D(DBF_UTF
, "found no ASCII replacement for UTF8 string (%ld)", replen
);
2380 D(DBF_UTF
, "got replacement string '%s' (%ld)", repstr
? repstr
: "<null>", replen
);
2385 if(repstr
== NULL
|| replen
== 0)
2399 if(destHook
!= NULL
)
2410 if(i
%(destLen
-1)==0)
2414 CallHookPkt(destHook
, &msg
, buf
);
2424 *b
++ = replen
> 0 ? *repstr
: d
;
2428 if(i
%(destLen
-1)==0)
2432 CallHookPkt(destHook
, &msg
, buf
);
2443 ULONG destPos
= destIter
-dest
;
2448 ObtainSemaphore(sem
);
2450 // allocate the destination buffer
2451 dest
= reallocVecPooled(pool
, dest
, destLen
, destLen
+replen
-1);
2454 ReleaseSemaphore(sem
);
2457 dest
= reallocArbitrateVecPooled(dest
, destLen
, destLen
+replen
-1);
2465 destIter
= dest
+destPos
;
2466 memcpy(destIter
, repstr
, replen
);
2468 // adjust our loop pointer and destination length
2470 destLen
+= replen
-1;
2472 else if(replen
== 1)
2473 *destIter
++ = *repstr
;
2484 if(destHook
!= NULL
)
2486 msg
.state
= CSV_End
;
2489 CallHookPkt(destHook
,&msg
,buf
);
2494 // let us write the number of conversion errors
2495 // to the proper variable pointer, if wanted
2496 if(numConvErrorsPtr
!= NULL
)
2497 *numConvErrorsPtr
= numConvErrors
;
2500 // put the final length of our destination buffer
2501 // into the destLenPtr
2502 if((destLenPtr
= (ULONG
*)GetTagData(CSA_DestLenPtr
, (ULONG
)NULL
, attrs
)) != NULL
)
2514 LIBSTUB(CodesetsUTF8ToStrA
, STRPTR
, REG(a0
, struct TagItem
*attrs
))
2517 return CodesetsUTF8ToStrA((struct TagItem
*)REG_A0
);
2519 return CodesetsUTF8ToStrA(attrs
);
2525 LIBSTUBVA(CodesetsUTF8ToStr
, STRPTR
, ...)
2530 VA_START(args
, self
);
2531 res
= CodesetsUTF8ToStrA(VA_ARG(args
, struct TagItem
*));
2539 /// CodesetsUTF8CreateA()
2540 // Converts a string and a charset to an UTF8. Returns the UTF8.
2541 // If a destination hook is supplied always return 0.
2542 // If from is NULL, it returns NULL and doesn't call the hook.
2544 AROS_LH1(UTF8
*, CodesetsUTF8CreateA
,
2545 AROS_LHA(struct TagItem
*, attrs
, A0
),
2546 struct LibraryHeader
*, library
, 20, Codesets
2552 CodesetsUTF8CreateA(REG(a0
, struct TagItem
*attrs
))
2557 ULONG fromLen
, *destLenPtr
;
2565 from
= (UTF8
*)GetTagData(CSA_Source
, 0, attrs
);
2566 fromLen
= GetTagData(CSA_SourceLen
, from
!= NULL
? strlen((char *)from
) : 0, attrs
);
2568 if(from
!= NULL
&& fromLen
!= 0)
2570 struct convertMsg msg
;
2571 struct codeset
*codeset
;
2576 UBYTE
*src
, *destPtr
= NULL
, *b
= NULL
, c
;
2578 if((codeset
= (struct codeset
*)GetTagData(CSA_SourceCodeset
, 0, attrs
)) == NULL
)
2579 codeset
= defaultCodeset(TRUE
);
2581 hook
= (struct Hook
*)GetTagData(CSA_DestHook
, 0, attrs
);
2582 destLen
= GetTagData(CSA_DestLen
,0,attrs
);
2586 if(destLen
<16 || destLen
>sizeof(buf
))
2587 destLen
= sizeof(buf
);
2589 msg
.state
= CSV_Translating
;
2595 if((dest
= (UTF8
*)GetTagData(CSA_Dest
, 0, attrs
)) != NULL
||
2596 GetTagData(CSA_AllocIfNeeded
,TRUE
,attrs
))
2604 while(((c
= *src
++) && (flen
--)))
2605 len
+= codeset
->table
[c
].utf8
[0];
2607 if(dest
== NULL
|| (destLen
<len
+1))
2610 struct SignalSemaphore
*sem
;
2612 if((pool
= (APTR
)GetTagData(CSA_Pool
, 0, attrs
)) != NULL
)
2614 if((sem
= (struct SignalSemaphore
*)GetTagData(CSA_PoolSem
, 0, attrs
)) != NULL
)
2615 ObtainSemaphore(sem
);
2617 // allocate the destination buffer
2618 dest
= allocVecPooled(pool
,len
+1);
2621 ReleaseSemaphore(sem
);
2624 dest
= allocArbitrateVecPooled(len
+1);
2636 destPtr
= (UBYTE
*)dest
;
2639 for(src
= from
; fromLen
&& (c
= *src
); src
++, fromLen
--)
2643 for(utf8_seq
= &codeset
->table
[c
].utf8
[1]; (c
= *utf8_seq
); utf8_seq
++)
2650 if(i
%(destLen
-1)==0)
2654 CallHookPkt(hook
,&msg
,buf
);
2675 msg
.state
= CSV_End
;
2678 CallHookPkt(hook
,&msg
,buf
);
2686 if((destLenPtr
= (ULONG
*)GetTagData(CSA_DestLenPtr
, 0, attrs
)))
2697 LIBSTUB(CodesetsUTF8CreateA
, UTF8
*, REG(a0
, struct TagItem
*attrs
))
2700 return CodesetsUTF8CreateA((struct TagItem
*)REG_A0
);
2702 return CodesetsUTF8CreateA(attrs
);
2708 LIBSTUBVA(CodesetsUTF8Create
, UTF8
*, ...)
2713 VA_START(args
, self
);
2714 res
= CodesetsUTF8CreateA(VA_ARG(args
, struct TagItem
*));
2722 /// CodesetsIsValidUTF8()
2723 #define GOOD_UCS(c) \
2724 ((c) >= 160 && ((c) & ~0x3ff) != 0xd800 && \
2725 (c) != 0xfeff && (c) != 0xfffe && (c) != 0xffff)
2728 AROS_LH1(BOOL
, CodesetsIsValidUTF8
,
2729 AROS_LHA(STRPTR
, s
, A0
),
2730 struct LibraryHeader
*, library
, 24, Codesets
2736 CodesetsIsValidUTF8(REG(a0
, STRPTR s
))
2744 while((n
= parseUtf8(&t
)))
2761 LIBSTUB(CodesetsIsValidUTF8
, BOOL
, REG(a0
, STRPTR s
))
2764 return CodesetsIsValidUTF8((STRPTR
)REG_A0
);
2766 return CodesetsIsValidUTF8(s
);
2772 /// CodesetsConvertStrA()
2773 // Converts a given string from one source Codeset to a given destination
2774 // codeset and returns the convert string
2776 AROS_LH1(STRPTR
, CodesetsConvertStrA
,
2777 AROS_LHA(struct TagItem
*, attrs
, A0
),
2778 struct LibraryHeader
*, library
, 26, Codesets
2784 CodesetsConvertStrA(REG(a0
, struct TagItem
*attrs
))
2787 STRPTR srcStr
= NULL
;
2788 STRPTR dstStr
= NULL
;
2794 // get the ptr to the src string we want to convert
2795 // from the source codeset to the dest codeset.
2796 srcStr
= (STRPTR
)GetTagData(CSA_Source
, (ULONG
)NULL
, attrs
);
2797 srcLen
= GetTagData(CSA_SourceLen
, srcStr
!= NULL
? strlen(srcStr
) : 0, attrs
);
2799 if(srcStr
!= NULL
&& srcLen
> 0)
2801 struct codeset
*srcCodeset
;
2802 struct codeset
*dstCodeset
;
2804 // get the pointer to the codeset in which the src string is encoded
2805 if((srcCodeset
= (struct codeset
*)GetTagData(CSA_SourceCodeset
, (ULONG
)NULL
, attrs
)) == NULL
)
2806 srcCodeset
= defaultCodeset(TRUE
);
2808 // get the pointer to the codeset in which the dst string should be encoded
2809 if((dstCodeset
= (struct codeset
*)GetTagData(CSA_DestCodeset
, (ULONG
)NULL
, attrs
)) == NULL
)
2810 dstCodeset
= defaultCodeset(TRUE
);
2812 D(DBF_UTF
, "srcCodeset: '%s' dstCodeset: '%s'", srcCodeset
->name
, dstCodeset
->name
);
2814 // check that the user didn't supplied the very same codeset
2815 // or otherwise a conversion is not required.
2816 if(srcCodeset
!= NULL
&& dstCodeset
!= NULL
&& srcCodeset
!= dstCodeset
)
2818 BOOL utf8Create
= FALSE
;
2819 BOOL strCreate
= FALSE
;
2821 ULONG utf8strLen
= 0;
2822 ULONG
*destLenPtr
= NULL
;
2823 BOOL mapForeignChars
;
2824 struct Hook
*mapForeignCharsHook
;
2826 mapForeignChars
= (BOOL
)GetTagData(CSA_MapForeignChars
, FALSE
, attrs
);
2827 mapForeignCharsHook
= (struct Hook
*)GetTagData(CSA_MapForeignCharsHook
, (ULONG
)NULL
, attrs
);
2829 // if the source codeset is UTF-8 we don't have to use the UTF8Create()
2830 // function and can directly call the UTF8ToStr() function
2831 if(srcCodeset
!= CodesetsBase
->utf8Codeset
)
2833 struct TagItem tags
[] = { { CSA_SourceCodeset
, (ULONG
)srcCodeset
},
2834 { CSA_Source
, (ULONG
)srcStr
},
2835 { CSA_SourceLen
, srcLen
},
2836 { CSA_DestLenPtr
, (ULONG
)&utf8strLen
},
2839 utf8str
= CodesetsUTF8CreateA((struct TagItem
*)&tags
[0]);
2845 utf8str
= (UTF8
*)srcStr
;
2846 utf8strLen
= srcLen
;
2849 // in case the destination codeset is UTF-8 we don't have to actually
2850 // use the UTF8ToStr() function and can immediately return our
2852 if(utf8str
!= NULL
&& utf8strLen
> 0 && dstCodeset
!= CodesetsBase
->utf8Codeset
)
2854 struct TagItem tags
[] = { { CSA_DestCodeset
, (ULONG
)dstCodeset
},
2855 { CSA_Source
, (ULONG
)utf8str
},
2856 { CSA_SourceLen
, utf8strLen
},
2857 { CSA_DestLenPtr
, (ULONG
)&dstLen
},
2858 { CSA_MapForeignChars
, mapForeignChars
},
2859 { CSA_MapForeignCharsHook
, (ULONG
)mapForeignCharsHook
},
2862 dstStr
= CodesetsUTF8ToStrA((struct TagItem
*)&tags
[0]);
2868 dstStr
= (STRPTR
)utf8str
;
2869 dstLen
= utf8strLen
;
2872 D(DBF_UTF
, "srcStr: %lx srcLen: %ld dstStr: %lx dstLen: %ld utf8create: %ld strCreate: %ld", srcStr
, srcLen
,
2877 // if everything was successfull we can go and finalize everything
2878 if(dstStr
!= NULL
&& utf8str
!= NULL
)
2880 // as the conversion was a two way pass we have to either free the
2881 // memory of the utf8 string or not
2882 if(utf8Create
== TRUE
&& strCreate
== TRUE
)
2883 CodesetsFreeA(utf8str
, NULL
);
2885 // if the user wants to be informed abour the length
2886 // of our destination string we store the length now in the supplied ptr.
2887 if((destLenPtr
= (ULONG
*)GetTagData(CSA_DestLenPtr
, (ULONG
)NULL
, attrs
)) != NULL
)
2888 *destLenPtr
= dstLen
;
2890 D(DBF_UTF
, "successfully converted string with len %ld", dstLen
);
2894 W(DBF_ALWAYS
, "an error occurred while trying to convert a string");
2896 // free all memory in case the conversion didn't work out
2897 if(utf8Create
== TRUE
&& utf8str
!= NULL
)
2898 CodesetsFreeA(utf8str
, NULL
);
2900 if(strCreate
== TRUE
&& dstStr
!= NULL
)
2901 CodesetsFreeA(dstStr
, NULL
);
2916 LIBSTUB(CodesetsConvertStrA
, STRPTR
, REG(a0
, struct TagItem
*attrs
))
2919 return CodesetsConvertStrA((struct TagItem
*)REG_A0
);
2921 return CodesetsConvertStrA(attrs
);
2927 LIBSTUBVA(CodesetsConvertStr
, STRPTR
, ...)
2932 VA_START(args
, self
);
2933 res
= CodesetsConvertStrA(VA_ARG(args
, struct TagItem
*));
2941 /// CodesetsFreeVecPooledA()
2943 AROS_LH3(void, CodesetsFreeVecPooledA
,
2944 AROS_LHA(APTR
, pool
, A0
),
2945 AROS_LHA(APTR
, mem
, A1
),
2946 AROS_LHA(struct TagItem
*, attrs
, A2
),
2947 struct LibraryHeader
*, library
, 25, Codesets
2953 CodesetsFreeVecPooledA(REG(a0
, APTR pool
),
2955 REG(a2
, struct TagItem
*attrs
))
2962 struct SignalSemaphore
*sem
;
2964 if((sem
= (struct SignalSemaphore
*)GetTagData(CSA_PoolSem
, 0, attrs
)))
2965 ObtainSemaphore(sem
);
2967 freeVecPooled(pool
,mem
);
2970 ReleaseSemaphore(sem
);
2980 LIBSTUB(CodesetsFreeVecPooledA
, void, REG(a0
, APTR pool
),
2982 REG(a2
, struct TagItem
*attrs
))
2985 return CodesetsFreeVecPooledA((APTR
)REG_A0
,(APTR
)REG_A1
,(struct TagItem
*)REG_A2
);
2987 return CodesetsFreeVecPooledA(pool
, mem
, attrs
);
2993 LIBSTUBVA(CodesetsFreeVecPooled
, void, REG(a0
, APTR pool
),
2994 REG(a1
, APTR mem
), ...)
2998 VA_START(args
, mem
);
2999 CodesetsFreeVecPooledA(pool
, mem
, VA_ARG(args
, struct TagItem
*));
3004 /// CodesetsListCreateA()
3006 AROS_LH1(struct codesetList
*, CodesetsListCreateA
,
3007 AROS_LHA(struct TagItem
*, attrs
, A0
),
3008 struct LibraryHeader
*, library
, 27, Codesets
3013 struct codesetList
*LIBFUNC
3014 CodesetsListCreateA(REG(a0
, struct TagItem
*attrs
))
3017 struct codesetList
*csList
= NULL
;
3021 ObtainSemaphore(&CodesetsBase
->poolSem
);
3023 // no matter what, we create a codesets list we will return to the user
3024 if((csList
= allocVecPooled(CodesetsBase
->pool
, sizeof(struct codesetList
))))
3026 BOOL scanProgDir
= TRUE
;
3027 struct TagItem
*tstate
= attrs
;
3028 struct TagItem
*tag
;
3030 // initialize the new private codeset list and put it into a separate list
3031 NewList((struct List
*)csList
);
3033 // first we get the path of the directory from which we go
3034 // and scan for charset tables from
3035 while((tag
= NextTagItem(&tstate
)))
3039 case CSA_CodesetDir
:
3041 codesetsScanDir(csList
, (STRPTR
)tag
->ti_Data
);
3043 scanProgDir
= FALSE
;
3047 case CSA_CodesetFile
:
3049 codesetsReadTable(csList
, (STRPTR
)tag
->ti_Data
);
3051 scanProgDir
= FALSE
;
3055 case CSA_SourceCodeset
:
3057 struct codeset
*cs
= (struct codeset
*)tag
->ti_Data
;
3059 AddTail((struct List
*)csList
, (struct Node
*)&cs
->node
);
3061 scanProgDir
= FALSE
;
3067 // in case the user also wants us to scan PROGDIR:
3069 if(scanProgDir
== TRUE
)
3070 codesetsScanDir(csList
, "PROGDIR:Charsets");
3073 ReleaseSemaphore(&CodesetsBase
->poolSem
);
3083 LIBSTUB(CodesetsListCreateA
, struct codesetList
*, REG(a0
, struct TagItem
*attrs
))
3086 return CodesetsListCreateA((struct TagItem
*)REG_A0
);
3088 return CodesetsListCreateA(attrs
);
3094 LIBSTUBVA(CodesetsListCreate
, struct codesetList
*, ...)
3096 struct codesetList
*res
;
3099 VA_START(args
, self
);
3100 res
= CodesetsListCreateA(VA_ARG(args
, struct TagItem
*));
3108 /// CodesetsListDeleteA()
3110 AROS_LH1(BOOL
, CodesetsListDeleteA
,
3111 AROS_LHA(struct TagItem
*, attrs
, A0
),
3112 struct LibraryHeader
*, library
, 28, Codesets
3118 CodesetsListDeleteA(REG(a0
, struct TagItem
*attrs
))
3121 BOOL result
= FALSE
;
3124 ObtainSemaphore(&CodesetsBase
->poolSem
);
3129 struct TagItem
*tstate
= attrs
;
3130 struct TagItem
*tag
;
3132 // check if the caller wants us also to free the codesets
3133 freeCodesets
= (BOOL
)GetTagData(CSA_FreeCodesets
, TRUE
, attrs
);
3135 // now we iterate through or tagItems and see what the
3136 // user wants to remove from the list
3137 while((tag
= NextTagItem(&tstate
)))
3141 case CSA_CodesetList
:
3143 struct codesetList
*csList
= (struct codesetList
*)tag
->ti_Data
;
3147 // cleanup the codesets within the list
3149 codesetsCleanup(csList
);
3151 // then free the list itself
3152 freeArbitrateVecPooled(csList
);
3161 ReleaseSemaphore(&CodesetsBase
->poolSem
);
3171 LIBSTUB(CodesetsListDeleteA
, BOOL
, REG(a0
, struct TagItem
*attrs
))
3174 return CodesetsListDeleteA((struct TagItem
*)REG_A0
);
3176 return CodesetsListDeleteA(attrs
);
3182 LIBSTUBVA(CodesetsListDelete
, BOOL
, ...)
3187 VA_START(args
, self
);
3188 result
= CodesetsListDeleteA(VA_ARG(args
, struct TagItem
*));
3196 /// CodesetsListAddA()
3198 AROS_LH2(BOOL
, CodesetsListAddA
,
3199 AROS_LHA(struct codesetList
*, csList
, A0
),
3200 AROS_LHA(struct TagItem
*, attrs
, A1
),
3201 struct LibraryHeader
*, library
, 29, Codesets
3207 CodesetsListAddA(REG(a0
, struct codesetList
*csList
),
3208 REG(a1
, struct TagItem
*attrs
))
3211 BOOL result
= FALSE
;
3214 ObtainSemaphore(&CodesetsBase
->poolSem
);
3216 if(csList
!= NULL
&& attrs
!= NULL
)
3218 struct TagItem
*tstate
= attrs
;
3219 struct TagItem
*tag
;
3221 // now we iterate through or tagItems and see if the user
3222 // wants to scan a whole directory or just adds a file.
3223 while((tag
= NextTagItem(&tstate
)))
3227 case CSA_CodesetDir
:
3229 codesetsScanDir(csList
, (STRPTR
)tag
->ti_Data
);
3234 case CSA_CodesetFile
:
3236 codesetsReadTable(csList
, (STRPTR
)tag
->ti_Data
);
3241 case CSA_SourceCodeset
:
3243 struct codeset
*cs
= (struct codeset
*)tag
->ti_Data
;
3245 AddTail((struct List
*)csList
, (struct Node
*)&cs
->node
);
3253 ReleaseSemaphore(&CodesetsBase
->poolSem
);
3263 LIBSTUB(CodesetsListAddA
, BOOL
, REG(a0
, struct codesetList
*csList
), REG(a1
, struct TagItem
*attrs
))
3266 return CodesetsListAddA((struct codesetList
*)REG_A0
, (struct TagItem
*)REG_A1
);
3268 return CodesetsListAddA(csList
, attrs
);
3274 LIBSTUBVA(CodesetsListAdd
, BOOL
, struct codesetList
*csList
, ...)
3279 VA_START(args
, csList
);
3280 result
= CodesetsListAddA(csList
, VA_ARG(args
, struct TagItem
*));
3288 /// CodesetsListRemoveA()
3290 AROS_LH1(BOOL
, CodesetsListRemoveA
,
3291 AROS_LHA(struct TagItem
*, attrs
, A0
),
3292 struct LibraryHeader
*, library
, 30, Codesets
3298 CodesetsListRemoveA(REG(a0
, struct TagItem
*attrs
))
3301 BOOL result
= FALSE
;
3304 ObtainSemaphore(&CodesetsBase
->poolSem
);
3309 struct TagItem
*tstate
= attrs
;
3310 struct TagItem
*tag
;
3312 // check if the caller wants us also to free the codesets
3313 freeCodesets
= (BOOL
)GetTagData(CSA_FreeCodesets
, TRUE
, attrs
);
3315 // now we iterate through or tagItems and see what the
3316 // user wants to remove from the list
3317 while((tag
= NextTagItem(&tstate
)))
3321 case CSA_SourceCodeset
:
3323 struct codeset
*cs
= (struct codeset
*)tag
->ti_Data
;
3327 struct MinNode
*mstate
= &cs
->node
;
3329 // before we actually remove the node from its list, we
3330 // have to make sure it isn't part of our internal codesets list
3331 while(mstate
->mln_Succ
)
3332 mstate
= mstate
->mln_Succ
;
3334 if(mstate
!= CodesetsBase
->codesets
.list
.mlh_Tail
)
3336 Remove((struct Node
*)&cs
->node
);
3338 // free all codesets data if requested.
3339 if(freeCodesets
== TRUE
)
3341 if(cs
->name
) freeArbitrateVecPooled(cs
->name
);
3342 if(cs
->alt_name
) freeArbitrateVecPooled(cs
->alt_name
);
3343 if(cs
->characterization
) freeArbitrateVecPooled(cs
->characterization
);
3345 freeArbitrateVecPooled(cs
);
3351 W(DBF_ALWAYS
, "user tried to remove an internal codesets!");
3359 ReleaseSemaphore(&CodesetsBase
->poolSem
);
3369 LIBSTUB(CodesetsListRemoveA
, BOOL
, REG(a0
, struct TagItem
*attrs
))
3372 return CodesetsListRemoveA((struct TagItem
*)REG_A0
);
3374 return CodesetsListRemoveA(attrs
);
3380 LIBSTUBVA(CodesetsListRemove
, BOOL
, ...)
3385 VA_START(args
, self
);
3386 result
= CodesetsListRemoveA(VA_ARG(args
, struct TagItem
*));
3395 /**************************************************************************/