workbench/libs/codesetslib/src/codesets.c

   1 /***************************************************************************
   2
   3  codesets.library - Amiga shared library for handling different codesets
   4  Copyright (C) 2001-2005 by Alfonso [alfie] Ranieri <alforan@tin.it>.
   5  Copyright (C) 2005-2008 by codesets.library Open Source Team
   6
   7  This library is free software; you can redistribute it and/or
   8  modify it under the terms of the GNU Lesser General Public
   9  License as published by the Free Software Foundation; either
  10  version 2.1 of the License, or (at your option) any later version.
  11
  12  This library is distributed in the hope that it will be useful,
  13  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  Lesser General Public License for more details.
  16
  17  codesets.library project: http://sourceforge.net/projects/codesetslib/
  18
  19  Most of the code included in this file was relicensed from GPL to LGPL
  20  from the source code of SimpleMail (http://www.sf.net/projects/simplemail)
  21  with full permissions by its authors.
  22
  23  $Id$
  24
  25 ***************************************************************************/
  26
  27 #include "lib.h"
  28
  29 #include <clib/alib_protos.h>
  30
  31 #include <diskfont/glyph.h>
  32 #include <diskfont/diskfonttag.h>
  33 #include <proto/diskfont.h>
  34 #include <ctype.h>
  35 #include <limits.h>
  36
  37 #ifdef __MORPHOS__
  38 #include <proto/keymap.h>
  39 #include <proto/locale.h>
  40 #endif
  41
  42 #include "codesets_table.h"
  43 #include "convertUTF.h"
  44 #include "codepages.h"
  45
  46 #ifndef __AROS__
  47 #include "SDI_stdarg.h"
  48 #endif /* __AROS__ */
  49
  50 #include "debug.h"
  51
  52 /**************************************************************************/
  53
  54 /// BIN_SEARCH()
  55 // search a sorted array in O(log n) e.g.
  56 // BIN_SEARCH(strings,0,sizeof(strings)/sizeof(strings[0]),strcmp(key,array[mid]),res);
  57 #define BIN_SEARCH(array,low,high,compare,result) \
  58   {\
  59     int l = low;\
  60     int h = high;\
  61     int m = (low+high)/2;\
  62     result = NULL;\
  63     while (l<=h)\
  64     {\
  65       int d = compare;\
  66       if (!d){ result = &array[m]; break; }\
  67       if (d < 0) h = m - 1;\
  68       else l = m + 1;\
  69       m = (l + h)/2;\
  70     }\
  71   }
  72
  73 ///
  74 /// mystrdup()
  75 static STRPTR
  76 mystrdup(const char *str)
  77 {
  78   STRPTR newStr = NULL;
  79
  80   ENTER();
  81
  82   if(str != NULL)
  83   {
  84     int len;
  85
  86     if((len = strlen(str)) > 0)
  87     {
  88       if((newStr = allocArbitrateVecPooled(len+1)) != NULL)
  89         strlcpy(newStr, str, len+1);
  90     }
  91   }
  92
  93   RETURN(newStr);
  94   return newStr;
  95 }
  96 ///
  97 /// mystrndup()
  98 static STRPTR
  99 mystrndup(const char *str1, int n)
 100 {
 101   STRPTR dest;
 102
 103   ENTER();
 104
 105   if((dest = allocArbitrateVecPooled(n+1)) != NULL)
 106   {
 107     if(str1 != NULL)
 108       strlcpy(dest, str1, n+1);
 109     else
 110       dest[0] = '\0';
 111
 112     dest[n] = '\0';
 113   }
 114
 115   RETURN(dest);
 116   return dest;
 117 }
 118 ///
 119 /// readLine()
 120 static ULONG
 121 readLine(BPTR fh, char *buf, ULONG size)
 122 {
 123   char *c;
 124
 125   ENTER();
 126
 127   if((c = FGets(fh, buf, size)) == NULL)
 128   {
 129     RETURN(FALSE);
 130     return FALSE;
 131   }
 132
 133   for(; *c; c++)
 134   {
 135     if(*c == '\n' || *c == '\r')
 136     {
 137       *c = '\0';
 138       break;
 139     }
 140   }
 141
 142   RETURN(TRUE);
 143   return TRUE;
 144 }
 145 ///
 146 /// getConfigItem()
 147 static const char * getConfigItem(const char *buf, const char *item, int len)
 148 {
 149   ENTER();
 150
 151   if(strnicmp(buf, item, len) == 0)
 152   {
 153     UBYTE c;
 154
 155     buf += len;
 156
 157     /* skip spaces */
 158     while((c = *buf) != '\0' && isspace(c))
 159       buf++;
 160
 161     if(*buf != '=')
 162     {
 163       RETURN(NULL);
 164       return NULL;
 165     }
 166
 167     buf++;
 168
 169     /* skip spaces */
 170     while((c = *buf) != '\0'  && isspace(c))
 171       buf++;
 172
 173     RETURN(buf);
 174     return buf;
 175   }
 176
 177   RETURN(NULL);
 178   return NULL;
 179 }
 180 ///
 181 /// parseUtf8()
 182 static int
 183 parseUtf8(STRPTR *ps)
 184 {
 185   STRPTR s = *ps;
 186   int    wc, n, i;
 187
 188   ENTER();
 189
 190   if(*s<0x80)
 191   {
 192     *ps = s+1;
 193
 194     RETURN(*s);
 195     return *s;
 196   }
 197
 198   if(*s<0xc2)
 199   {
 200     RETURN(-1);
 201     return -1;
 202   }
 203   else
 204   {
 205     if(*s<0xe0)
 206     {
 207       if((s[1] & 0xc0)!=0x80)
 208       {
 209         RETURN(-1);
 210         return -1;
 211       }
 212
 213       *ps = s+2;
 214
 215       RETURN(((s[0] & 0x1f)<<6) | (s[1] & 0x3f));
 216       return ((s[0] & 0x1f)<<6) | (s[1] & 0x3f);
 217     }
 218     else
 219     {
 220       if(*s<0xf0)
 221       {
 222         n = 3;
 223       }
 224       else
 225       {
 226         if(*s<0xf8)
 227         {
 228           n = 4;
 229         }
 230         else
 231         {
 232           if(*s<0xfc)
 233           {
 234             n = 5;
 235           }
 236           else
 237           {
 238             if(*s<0xfe)
 239             {
 240               n = 6;
 241             }
 242             else
 243             {
 244               RETURN(-1);
 245               return -1;
 246             }
 247           }
 248         }
 249       }
 250     }
 251   }
 252
 253   wc = *s++ & ((1<<(7-n))-1);
 254
 255   for(i = 1; i<n; i++)
 256   {
 257     if((*s & 0xc0) != 0x80)
 258     {
 259       RETURN(-1);
 260       return -1;
 261     }
 262
 263     wc = (wc << 6) | (*s++ & 0x3f);
 264   }
 265
 266   if(wc < (1 << (5 * n - 4)))
 267   {
 268     RETURN(-1);
 269     return -1;
 270   }
 271
 272   *ps = s;
 273
 274   RETURN(wc);
 275   return wc;
 276 }
 277
 278 ///
 279 /// countCodesets()
 280 static int
 281 countCodesets(struct codesetList *csList)
 282 {
 283   struct MinNode *node, *succ;
 284   int num;
 285
 286   for(node = csList->list.mlh_Head, num = 0; (succ = node->mln_Succ); node = succ)
 287     ++num;
 288
 289   return num;
 290 }
 291
 292 ///
 293 /// mapUTF8toASCII()
 294 // in case some UTF8 sequences can not be converted during CodesetsUTF8ToStrA(), this
 295 // function is used to replace these unknown sequences with lookalike characters that
 296 // still make the text more readable. For more replacement see
 297 // http://www.utf8-zeichentabelle.de/unicode-utf8-table.pl
 298 //
 299 // The conversion table in this function is partly borrowed from the awebcharset plugin
 300 // written by Frank Weber. See http://cvs.sunsite.dk/viewcvs.cgi/aweb/plugins/charset/awebcharset.c
 301 //
 302 struct UTF8Replacement
 303 {
 304   const char *utf8;     // the original UTF8 string we are going to replace
 305   const int utf8len;    // the length of the UTF8 string
 306   const char *rep;      // pointer to the replacement string
 307   const int replen;     // the length of the replacement string (minus for signalling an UTF8 string)
 308 };
 309
 310 static int compareUTF8Replacements(const void *p1, const void *p2)
 311 {
 312   struct UTF8Replacement *key = (struct UTF8Replacement *)p1;
 313   struct UTF8Replacement *rep = (struct UTF8Replacement *)p2;
 314   int cmp;
 315
 316   // compare the length first, after that compare the strings
 317   cmp = key->utf8len - rep->utf8len;
 318   if(cmp == 0)
 319     cmp = memcmp(key->utf8, rep->utf8, key->utf8len);
 320
 321   return cmp;
 322 }
 323
 324 static int mapUTF8toASCII(const char **dst, const unsigned char *src, const int utf8len)
 325 {
 326   int len = 0;
 327   struct UTF8Replacement key = { (char *)src, utf8len, NULL, 0 };
 328   struct UTF8Replacement *rep;
 329
 330   static struct UTF8Replacement const utf8map[] =
 331   {
 332     // U+0100 ... U+017F (Latin Extended-A)
 333     { "\xC4\x80", 2, "A",         1 }, // U+0100 -> A       (LATIN CAPITAL LETTER A WITH MACRON)
 334     { "\xC4\x81", 2, "a",         1 }, // U+0101 -> a       (LATIN SMALL LETTER A WITH MACRON)
 335     { "\xC4\x82", 2, "A",         1 }, // U+0102 -> A       (LATIN CAPITAL LETTER A WITH BREVE)
 336     { "\xC4\x83", 2, "a",         1 }, // U+0103 -> a       (LATIN SMALL LETTER A WITH BREVE)
 337     { "\xC4\x84", 2, "A",         1 }, // U+0104 -> A       (LATIN CAPITAL LETTER A WITH OGONEK)
 338     { "\xC4\x85", 2, "a",         1 }, // U+0105 -> a       (LATIN SMALL LETTER A WITH OGONEK)
 339     { "\xC4\x86", 2, "C",         1 }, // U+0106 -> C       (LATIN CAPITAL LETTER C WITH ACUTE)
 340     { "\xC4\x87", 2, "c",         1 }, // U+0107 -> c       (LATIN SMALL LETTER C WITH ACUTE)
 341     { "\xC4\x88", 2, "C",         1 }, // U+0108 -> C       (LATIN CAPITAL LETTER C WITH CIRCUMFLEX)
 342     { "\xC4\x89", 2, "c",         1 }, // U+0109 -> c       (LATIN SMALL LETTER C WITH CIRCUMFLEX)
 343     { "\xC4\x8A", 2, "C",         1 }, // U+010A -> C       (LATIN CAPITAL LETTER C WITH DOT ABOVE)
 344     { "\xC4\x8B", 2, "c",         1 }, // U+010B -> c       (LATIN SMALL LETTER C WITH DOT ABOVE)
 345     { "\xC4\x8C", 2, "C",         1 }, // U+010C -> C       (LATIN CAPITAL LETTER C WITH CARON)
 346     { "\xC4\x8D", 2, "c",         1 }, // U+010D -> c       (LATIN SMALL LETTER C WITH CARON)
 347     { "\xC4\x8E", 2, "D",         1 }, // U+010E -> D       (LATIN CAPITAL LETTER D WITH CARON)
 348     { "\xC4\x8F", 2, "d",         1 }, // U+010F -> d       (LATIN SMALL LETTER D WITH CARON)
 349     { "\xC4\x90", 2, "D",         1 }, // U+0110 -> D       (LATIN CAPITAL LETTER D WITH STROKE)
 350     { "\xC4\x91", 2, "d",         1 }, // U+0111 -> d       (LATIN SMALL LETTER D WITH STROKE)
 351     { "\xC4\x92", 2, "E",         1 }, // U+0112 -> E       (LATIN CAPITAL LETTER E WITH MACRON)
 352     { "\xC4\x93", 2, "e",         1 }, // U+0113 -> e       (LATIN SMALL LETTER E WITH MACRON)
 353     { "\xC4\x94", 2, "E",         1 }, // U+0114 -> E       (LATIN CAPITAL LETTER E WITH BREVE)
 354     { "\xC4\x95", 2, "e",         1 }, // U+0115 -> e       (LATIN SMALL LETTER E WITH BREVE)
 355     { "\xC4\x96", 2, "E",         1 }, // U+0116 -> E       (LATIN CAPITAL LETTER E WITH DOT ABOVE)
 356     { "\xC4\x97", 2, "e",         1 }, // U+0117 -> e       (LATIN SMALL LETTER E WITH DOT ABOVE)
 357     { "\xC4\x98", 2, "E",         1 }, // U+0118 -> E       (LATIN CAPITAL LETTER E WITH OGONEK)
 358     { "\xC4\x99", 2, "e",         1 }, // U+0119 -> e       (LATIN SMALL LETTER E WITH OGONEK)
 359     { "\xC4\x9A", 2, "E",         1 }, // U+011A -> E       (LATIN CAPITAL LETTER E WITH CARON)
 360     { "\xC4\x9B", 2, "e",         1 }, // U+011B -> e       (LATIN SMALL LETTER E WITH CARON)
 361     { "\xC4\x9C", 2, "G",         1 }, // U+011C -> G       (LATIN CAPITAL LETTER G WITH CIRCUMFLEX)
 362     { "\xC4\x9D", 2, "g",         1 }, // U+011D -> g       (LATIN SMALL LETTER G WITH CIRCUMFLEX)
 363     { "\xC4\x9E", 2, "G",         1 }, // U+011E -> G       (LATIN CAPITAL LETTER G WITH BREVE)
 364     { "\xC4\x9F", 2, "g",         1 }, // U+011F -> g       (LATIN SMALL LETTER G WITH BREVE)
 365     { "\xC4\xA0", 2, "G",         1 }, // U+0120 -> G       (LATIN CAPITAL LETTER G WITH DOT ABOVE)
 366     { "\xC4\xA1", 2, "g",         1 }, // U+0121 -> g       (LATIN SMALL LETTER G WITH DOT ABOVE)
 367     { "\xC4\xA2", 2, "G",         1 }, // U+0122 -> G       (LATIN CAPITAL LETTER G WITH CEDILLA)
 368     { "\xC4\xA3", 2, "g",         1 }, // U+0123 -> g       (LATIN SMALL LETTER G WITH CEDILLA)
 369     { "\xC4\xA4", 2, "H",         1 }, // U+0124 -> H       (LATIN CAPITAL LETTER H WITH CIRCUMFLEX)
 370     { "\xC4\xA5", 2, "h",         1 }, // U+0125 -> h       (LATIN SMALL LETTER H WITH CIRCUMFLEX)
 371     { "\xC4\xA6", 2, "H",         1 }, // U+0126 -> H       (LATIN CAPITAL LETTER H WITH STROKE)
 372     { "\xC4\xA7", 2, "h",         1 }, // U+0127 -> h       (LATIN SMALL LETTER H WITH STROKE)
 373     { "\xC4\xA8", 2, "I",         1 }, // U+0128 -> I       (LATIN CAPITAL LETTER I WITH TILDE)
 374     { "\xC4\xA9", 2, "i",         1 }, // U+0129 -> i       (LATIN SMALL LETTER I WITH TILDE)
 375     { "\xC4\xAA", 2, "I",         1 }, // U+012A -> I       (LATIN CAPITAL LETTER I WITH MACRON)
 376     { "\xC4\xAB", 2, "i",         1 }, // U+012B -> i       (LATIN SMALL LETTER I WITH MACRON)
 377     { "\xC4\xAC", 2, "I",         1 }, // U+012C -> I       (LATIN CAPITAL LETTER I WITH BREVE)
 378     { "\xC4\xAD", 2, "i",         1 }, // U+012D -> i       (LATIN SMALL LETTER I WITH BREVE)
 379     { "\xC4\xAE", 2, "I",         1 }, // U+012E -> I       (LATIN CAPITAL LETTER I WITH OGONEK)
 380     { "\xC4\xAF", 2, "i",         1 }, // U+012F -> i       (LATIN SMALL LETTER I WITH OGONEK)
 381     { "\xC4\xB0", 2, "I",         1 }, // U+0130 -> I       (LATIN CAPITAL LETTER I WITH DOT ABOVE)
 382     { "\xC4\xB1", 2, "i",         1 }, // U+0131 -> i       (LATIN SMALL LETTER DOTLESS I)
 383     { "\xC4\xB2", 2, "Ij",        2 }, // U+0132 -> Ij      (LATIN CAPITAL LIGATURE IJ)
 384     { "\xC4\xB3", 2, "ij",        2 }, // U+0133 -> ij      (LATIN SMALL LIGATURE IJ)
 385     { "\xC4\xB4", 2, "J",         1 }, // U+0134 -> J       (LATIN CAPITAL LETTER J WITH CIRCUMFLEX)
 386     { "\xC4\xB5", 2, "j",         1 }, // U+0135 -> j       (LATIN SMALL LETTER J WITH CIRCUMFLEX)
 387     { "\xC4\xB6", 2, "K",         1 }, // U+0136 -> K       (LATIN CAPITAL LETTER K WITH CEDILLA)
 388     { "\xC4\xB7", 2, "k",         1 }, // U+0137 -> k       (LATIN SMALL LETTER K WITH CEDILLA)
 389     { "\xC4\xB8", 2, "k",         1 }, // U+0138 -> k       (LATIN SMALL LETTER KRA)
 390     { "\xC4\xB9", 2, "L",         1 }, // U+0139 -> L       (LATIN CAPITAL LETTER L WITH ACUTE)
 391     { "\xC4\xBA", 2, "l",         1 }, // U+013A -> l       (LATIN SMALL LETTER L WITH ACUTE)
 392     { "\xC4\xBB", 2, "L",         1 }, // U+013B -> L       (LATIN CAPITAL LETTER L WITH CEDILLA)
 393     { "\xC4\xBC", 2, "l",         1 }, // U+013C -> l       (LATIN SMALL LETTER L WITH CEDILLA)
 394     { "\xC4\xBD", 2, "L",         1 }, // U+013D -> L       (LATIN CAPITAL LETTER L WITH CARON)
 395     { "\xC4\xBE", 2, "l",         1 }, // U+013E -> l       (LATIN SMALL LETTER L WITH CARON)
 396     { "\xC4\xBF", 2, "L",         1 }, // U+013F -> L       (LATIN CAPITAL LETTER L WITH MIDDLE DOT)
 397     { "\xC5\x80", 2, "l",         1 }, // U+0140 -> l       (LATIN SMALL LETTER L WITH MIDDLE DOT)
 398     { "\xC5\x81", 2, "L",         1 }, // U+0141 -> L       (LATIN CAPITAL LETTER L WITH STROKE)
 399     { "\xC5\x82", 2, "l",         1 }, // U+0142 -> l       (LATIN SMALL LETTER L WITH STROKE)
 400     { "\xC5\x83", 2, "N",         1 }, // U+0143 -> N       (LATIN CAPITAL LETTER N WITH ACUTE)
 401     { "\xC5\x84", 2, "n",         1 }, // U+0144 -> n       (LATIN SMALL LETTER N WITH ACUTE)
 402     { "\xC5\x85", 2, "N",         1 }, // U+0145 -> N       (LATIN CAPITAL LETTER N WITH CEDILLA)
 403     { "\xC5\x86", 2, "n",         1 }, // U+0146 -> n       (LATIN SMALL LETTER N WITH CEDILLA)
 404     { "\xC5\x87", 2, "N",         1 }, // U+0147 -> N       (LATIN CAPITAL LETTER N WITH CARON)
 405     { "\xC5\x88", 2, "n",         1 }, // U+0148 -> n       (LATIN SMALL LETTER N WITH CARON)
 406     { "\xC5\x89", 2, "'n",        2 }, // U+0149 -> 'n      (LATIN SMALL LETTER N PRECEDED BY APOSTROPHE)
 407     { "\xC5\x8A", 2, "Ng",        2 }, // U+014A -> Ng      (LATIN CAPITAL LETTER ENG)
 408     { "\xC5\x8B", 2, "ng",        2 }, // U+014B -> ng      (LATIN SMALL LETTER ENG)
 409     { "\xC5\x8C", 2, "O",         1 }, // U+014C -> O       (LATIN CAPITAL LETTER O WITH MACRON)
 410     { "\xC5\x8D", 2, "o",         1 }, // U+014D -> o       (LATIN SMALL LETTER O WITH MACRON)
 411     { "\xC5\x8E", 2, "O",         1 }, // U+014E -> O       (LATIN CAPITAL LETTER O WITH BREVE)
 412     { "\xC5\x8F", 2, "o",         1 }, // U+014F -> o       (LATIN SMALL LETTER O WITH BREVE)
 413     { "\xC5\x90", 2, "O",         1 }, // U+0150 -> O       (LATIN CAPITAL LETTER O WITH DOUBLE ACUTE)
 414     { "\xC5\x91", 2, "o",         1 }, // U+0151 -> o       (LATIN SMALL LETTER O WITH DOUBLE ACUTE)
 415     { "\xC5\x92", 2, "Oe",        2 }, // U+0152 -> Oe      (LATIN CAPITAL LIGATURE OE)
 416     { "\xC5\x93", 2, "oe",        2 }, // U+0153 -> oe      (LATIN SMALL LIGATURE OE)
 417     { "\xC5\x94", 2, "R",         1 }, // U+0154 -> R       (LATIN CAPITAL LETTER R WITH ACUTE)
 418     { "\xC5\x95", 2, "r",         1 }, // U+0155 -> r       (LATIN SMALL LETTER R WITH ACUTE)
 419     { "\xC5\x96", 2, "R",         1 }, // U+0156 -> R       (LATIN CAPITAL LETTER R WITH CEDILLA)
 420     { "\xC5\x97", 2, "r",         1 }, // U+0157 -> r       (LATIN SMALL LETTER R WITH CEDILLA)
 421     { "\xC5\x98", 2, "R",         1 }, // U+0158 -> R       (LATIN CAPITAL LETTER R WITH CARON)
 422     { "\xC5\x99", 2, "r",         1 }, // U+0159 -> r       (LATIN SMALL LETTER R WITH CARON)
 423     { "\xC5\x9A", 2, "S",         1 }, // U+015A -> S       (LATIN CAPITAL LETTER S WITH ACUTE)
 424     { "\xC5\x9B", 2, "s",         1 }, // U+015B -> s       (LATIN SMALL LETTER S WITH ACUTE)
 425     { "\xC5\x9C", 2, "S",         1 }, // U+015C -> S       (LATIN CAPITAL LETTER S WITH CIRCUMFLEX)
 426     { "\xC5\x9D", 2, "s",         1 }, // U+015D -> s       (LATIN SMALL LETTER S WITH CIRCUMFLEX)
 427     { "\xC5\x9E", 2, "S",         1 }, // U+015E -> S       (LATIN CAPITAL LETTER S WITH CEDILLA)
 428     { "\xC5\x9F", 2, "s",         1 }, // U+015F -> s       (LATIN SMALL LETTER S WITH CEDILLA)
 429     { "\xC5\xA0", 2, "S",         1 }, // U+0160 -> S       (LATIN CAPITAL LETTER S WITH CARON)
 430     { "\xC5\xA1", 2, "s",         1 }, // U+0161 -> s       (LATIN SMALL LETTER S WITH CARON)
 431     { "\xC5\xA2", 2, "T",         1 }, // U+0162 -> T       (LATIN CAPITAL LETTER T WITH CEDILLA)
 432     { "\xC5\xA3", 2, "t",         1 }, // U+0163 -> t       (LATIN SMALL LETTER T WITH CEDILLA)
 433     { "\xC5\xA4", 2, "T",         1 }, // U+0164 -> T       (LATIN CAPITAL LETTER T WITH CARON)
 434     { "\xC5\xA5", 2, "t",         1 }, // U+0165 -> t       (LATIN SMALL LETTER T WITH CARON)
 435     { "\xC5\xA6", 2, "T",         1 }, // U+0166 -> T       (LATIN CAPITAL LETTER T WITH STROKE)
 436     { "\xC5\xA7", 2, "t",         1 }, // U+0167 -> t       (LATIN SMALL LETTER T WITH STROKE)
 437     { "\xC5\xA8", 2, "U",         1 }, // U+0168 -> U       (LATIN CAPITAL LETTER U WITH TILDE)
 438     { "\xC5\xA9", 2, "u",         1 }, // U+0169 -> u       (LATIN SMALL LETTER U WITH TILDE)
 439     { "\xC5\xAA", 2, "U",         1 }, // U+016A -> U       (LATIN CAPITAL LETTER U WITH MACRON)
 440     { "\xC5\xAB", 2, "u",         1 }, // U+016B -> u       (LATIN SMALL LETTER U WITH MACRON)
 441     { "\xC5\xAC", 2, "U",         1 }, // U+016C -> U       (LATIN CAPITAL LETTER U WITH BREVE)
 442     { "\xC5\xAD", 2, "u",         1 }, // U+016D -> u       (LATIN SMALL LETTER U WITH BREVE)
 443     { "\xC5\xAE", 2, "U",         1 }, // U+016E -> U       (LATIN CAPITAL LETTER U WITH RING ABOVE)
 444     { "\xC5\xAF", 2, "u",         1 }, // U+016F -> u       (LATIN SMALL LETTER U WITH RING ABOVE)
 445     { "\xC5\xB0", 2, "U",         1 }, // U+0170 -> U       (LATIN CAPITAL LETTER U WITH DOUBLE ACUTE)
 446     { "\xC5\xB1", 2, "u",         1 }, // U+0171 -> u       (LATIN SMALL LETTER U WITH DOUBLE ACUTE)
 447     { "\xC5\xB2", 2, "U",         1 }, // U+0172 -> U       (LATIN CAPITAL LETTER U WITH OGONEK)
 448     { "\xC5\xB3", 2, "u",         1 }, // U+0173 -> u       (LATIN SMALL LETTER U WITH OGONEK)
 449     { "\xC5\xB4", 2, "W",         1 }, // U+0174 -> W       (LATIN CAPITAL LETTER W WITH CIRCUMFLEX)
 450     { "\xC5\xB5", 2, "w",         1 }, // U+0175 -> w       (LATIN SMALL LETTER W WITH CIRCUMFLEX)
 451     { "\xC5\xB6", 2, "Y",         1 }, // U+0176 -> Y       (LATIN CAPITAL LETTER Y WITH CIRCUMFLEX)
 452     { "\xC5\xB7", 2, "y",         1 }, // U+0177 -> y       (LATIN SMALL LETTER Y WITH CIRCUMFLEX)
 453     { "\xC5\xB8", 2, "Y",         1 }, // U+0178 -> Y       (LATIN CAPITAL LETTER Y WITH DIAERESIS)
 454     { "\xC5\xB9", 2, "Z",         1 }, // U+0179 -> Z       (LATIN CAPITAL LETTER Z WITH ACUTE)
 455     { "\xC5\xBA", 2, "z",         1 }, // U+017A -> z       (LATIN SMALL LETTER Z WITH ACUTE)
 456     { "\xC5\xBB", 2, "Z",         1 }, // U+017B -> Z       (LATIN CAPITAL LETTER Z WITH DOT ABOVE)
 457     { "\xC5\xBC", 2, "z",         1 }, // U+017C -> z       (LATIN SMALL LETTER Z WITH DOT ABOVE)
 458     { "\xC5\xBD", 2, "Z",         1 }, // U+017D -> Z       (LATIN CAPITAL LETTER Z WITH CARON)
 459     { "\xC5\xBE", 2, "z",         1 }, // U+017E -> z       (LATIN SMALL LETTER Z WITH CARON)
 460     { "\xC5\xBF", 2, "s",         1 }, // U+017F -> s       (LATIN SMALL LETTER LONG S
 461
 462     // U+2000 ... U+206F (General Punctuation)
 463     { "\xE2\x80\x90", 3, "-",         1 }, // U+2010 -> -       (HYPHEN)
 464     { "\xE2\x80\x91", 3, "-",         1 }, // U+2011 -> -       (NON-BREAKING HYPHEN)
 465     { "\xE2\x80\x92", 3, "--",        2 }, // U+2012 -> --      (FIGURE DASH)
 466     { "\xE2\x80\x93", 3, "--",        2 }, // U+2013 -> --      (EN DASH)
 467     { "\xE2\x80\x94", 3, "---",       3 }, // U+2014 -> ---     (EM DASH)
 468     { "\xE2\x80\x95", 3, "---",       3 }, // U+2015 -> ---     (HORIZONTAL BAR)
 469     { "\xE2\x80\x96", 3, "||",        2 }, // U+2016 -> ||      (DOUBLE VERTICAL LINE)
 470     { "\xE2\x80\x97", 3, "_",         1 }, // U+2017 -> _       (DOUBLE LOW LINE)
 471     { "\xE2\x80\x98", 3, "`",         1 }, // U+2018 -> `       (LEFT SINGLE QUOTATION MARK)
 472     { "\xE2\x80\x99", 3, "'",         1 }, // U+2019 -> '       (RIGHT SINGLE QUOTATION MARK)
 473     { "\xE2\x80\x9A", 3, ",",         1 }, // U+201A -> ,       (SINGLE LOW-9 QUOTATION MARK)
 474     { "\xE2\x80\x9B", 3, "'",         1 }, // U+201B -> '       (SINGLE HIGH-REVERSED-9 QUOTATION MARK)
 475     { "\xE2\x80\x9C", 3, "\"",        1 }, // U+201C -> "       (LEFT DOUBLE QUOTATION MARK)
 476     { "\xE2\x80\x9D", 3, "\"",        1 }, // U+201D -> "       (RIGHT DOUBLE QUOTATION MARK)
 477     { "\xE2\x80\x9E", 3, ",,",        2 }, // U+201E -> ,,      (DOUBLE LOW-9 QUOTATION MARK)
 478     { "\xE2\x80\x9F", 3, "``",        2 }, // U+201F -> ``      (DOUBLE HIGH-REVERSED-9 QUOTATION MARK)
 479     { "\xE2\x80\xA0", 3, "+",         1 }, // U+2020 -> +       (DAGGER)
 480     { "\xE2\x80\xA1", 3, "+",         1 }, // U+2021 -> +       (DOUBLE DAGGER)
 481     { "\xE2\x80\xA2", 3, "\xC2\xB7", -2 }, // U+2022 -> U+00B7  (BULLET) -> (MIDDLE POINT)
 482     { "\xE2\x80\xA3", 3, ".",         1 }, // U+2023 -> .       (TRIANGULAR BULLET)
 483     { "\xE2\x80\xA4", 3, ".",         1 }, // U+2024 -> .       (ONE DOT LEADER)
 484     { "\xE2\x80\xA5", 3, "..",        2 }, // U+2025 -> ..      (TWO DOT LEADER)
 485     { "\xE2\x80\xA6", 3, "...",       3 }, // U+2026 -> ...     (HORIZONTAL ELLIPSIS)
 486     { "\xE2\x80\xA7", 3, "\xC2\xB7", -2 }, // U+2027 -> U+00B7  (HYPHENATION POINT) -> (MIDDLE POINT)
 487     { "\xE2\x80\xB0", 3, "%.",        2 }, // U+2030 -> %.      (PER MILLE SIGN)
 488     { "\xE2\x80\xB1", 3, "%..",       3 }, // U+2031 -> %..     (PER TEN THOUSAND SIGN)
 489     { "\xE2\x80\xB2", 3, "'",         1 }, // U+2032 -> `       (PRIME)
 490     { "\xE2\x80\xB3", 3, "''",        2 }, // U+2033 -> ''      (DOUBLE PRIME)
 491     { "\xE2\x80\xB4", 3, "'''",       3 }, // U+2034 -> '''     (TRIPLE PRIME)
 492     { "\xE2\x80\xB5", 3, "`",         1 }, // U+2035 -> `       (REVERSED PRIME)
 493     { "\xE2\x80\xB6", 3, "``",        2 }, // U+2036 -> ``      (REVERSED DOUBLE PRIME)
 494     { "\xE2\x80\xB7", 3, "```",       3 }, // U+2037 -> ```     (REVERSED TRIPLE PRIME)
 495     { "\xE2\x80\xB8", 3, "^",         1 }, // U+2038 -> ^       (CARET)
 496     { "\xE2\x80\xB9", 3, "<",         1 }, // U+2039 -> <       (SINGLE LEFT-POINTING ANGLE QUOTATION MARK)
 497     { "\xE2\x80\xBA", 3, ">",         1 }, // U+203A -> >       (SINGLE RIGHT-POINTING ANGLE QUOTATION MARK)
 498     { "\xE2\x80\xBB", 3, "\xC3\x97", -2 }, // U+203B -> U+00D7  (REFERENCE MARK) -> (MULTIPLICATION SIGN)
 499     { "\xE2\x80\xBC", 3, "!!",        2 }, // U+203C -> !!      (DOUBLE EXCLAMATION MARK)
 500     { "\xE2\x80\xBD", 3, "?",         1 }, // U+203D -> ?       (INTERROBANG)
 501     { "\xE2\x81\x82", 3, "*",         1 }, // U+2042 -> *       (ASTERISM)
 502     { "\xE2\x81\x83", 3, ".",         1 }, // U+2043 -> .       (HYPHEN BULLET)
 503     { "\xE2\x81\x84", 3, "/",         1 }, // U+2044 -> /       (FRACTION SLASH)
 504     { "\xE2\x81\x87", 3, "??",        2 }, // U+2047 -> ??      (DOUBLE QUESTION MARK)
 505     { "\xE2\x81\x88", 3, "?!",        2 }, // U+2048 -> ?!      (QUESTION EXCLAMATION MARK)
 506     { "\xE2\x81\x89", 3, "!?",        2 }, // U+2049 -> !?      (EXCLAMATION QUESTION MARK)
 507     { "\xE2\x81\x8E", 3, "*",         1 }, // U+204E -> *       (LOW ASTERISK)
 508     { "\xE2\x81\x8F", 3, ";",         1 }, // U+204F -> ;       (REVERSED SEMICOLON)
 509     { "\xE2\x81\x91", 3, "*",         1 }, // U+2051 -> *       (TWO ASTERISKS ALIGNED VERTICALLY)
 510     { "\xE2\x81\x92", 3, "-",         1 }, // U+2052 -> -       (COMMERCIAL MINUS SIGN)
 511     { "\xE2\x81\x93", 3, "~",         1 }, // U+2053 -> ~       (SWUNG DASH)
 512     { "\xE2\x81\x95", 3, "*",         1 }, // U+2055 -> *       (FLOWER PUNCTUATION MARK)
 513     { "\xE2\x81\x97", 3, "''''",      4 }, // U+2057 -> ''''    (QUADRUPLE PRIME)
 514     { "\xE2\x81\x9A", 3, ":",         1 }, // U+205A -> :       (TWO DOT PUNCTUATION)
 515     { "\xE2\x81\x9C", 3, "+",         1 }, // U+205C -> +       (DOTTED CROSS)
 516
 517     // U+20A0 ... U+20CF (Currency Symbols)
 518     { "\xE2\x82\xA0", 3, "ECU",       3 }, // U+20A0 -> ECU     (EURO-CURRENCY SIGN)
 519     { "\xE2\x82\xA1", 3, "CRC",       3 }, // U+20A1 -> CRC     (COLON SIGN)
 520     { "\xE2\x82\xA2", 3, "BRC",       3 }, // U+20A2 -> BRC     (CRUZEIRO SIGN)
 521     { "\xE2\x82\xA3", 3, "BEF",       3 }, // U+20A3 -> BEF     (FRENCH FRANC SIGN)
 522     { "\xE2\x82\xA4", 3, "ITL",       3 }, // U+20A4 -> ITL     (LIRA SIGN)
 523     { "\xE2\x82\xA6", 3, "NGN",       3 }, // U+20A6 -> NGN     (NEIRA SIGN)
 524     { "\xE2\x82\xA7", 3, "ESP",       3 }, // U+20A7 -> ESP     (PESETA SIGN)
 525     { "\xE2\x82\xA8", 3, "MVQ",       3 }, // U+20A8 -> MVQ     (RUPEE SIGN)
 526     { "\xE2\x82\xA9", 3, "KPW",       3 }, // U+20A9 -> KPW     (WON SIGN)
 527     { "\xE2\x82\xAA", 3, "ILS",       3 }, // U+20AA -> ILS     (NEW SHEQEL SIGN)
 528     { "\xE2\x82\xAB", 3, "VNC",       3 }, // U+20AB -> VNC     (DONG SIGN)
 529     { "\xE2\x82\xAC", 3, "EUR",       3 }, // U+20AC -> EUR     (EURO SIGN)
 530     { "\xE2\x82\xAD", 3, "LAK",       3 }, // U+20AD -> LAK     (KIP SIGN)
 531     { "\xE2\x82\xAE", 3, "MNT",       3 }, // U+20AE -> MNT     (TUGRIK SIGN)
 532     { "\xE2\x82\xAF", 3, "GRD",       3 }, // U+20AF -> GRD     (DRACHMA SIGN)
 533     { "\xE2\x82\xB0", 3, "Pf",        2 }, // U+20B0 -> Pf      (GERMAN PENNY SIGN)
 534     { "\xE2\x82\xB1", 3, "P",         1 }, // U+20B1 -> P       (PESO SIGN)
 535     { "\xE2\x82\xB2", 3, "PYG",       3 }, // U+20B2 -> PYG     (GUARANI SIGN)
 536     { "\xE2\x82\xB3", 3, "ARA",       3 }, // U+20B3 -> ARA     (AUSTRAL SIGN)
 537     { "\xE2\x82\xB4", 3, "UAH",       3 }, // U+20B4 -> UAH     (HRYVNIA SIGN)
 538     { "\xE2\x82\xB5", 3, "GHS",       3 }, // U+20B5 -> GHS     (CEDI SIGN)
 539
 540     // U+2190 ... U+21FF (Arrows)
 541     { "\xE2\x86\x90", 3, "<-",        2 }, // U+2190 -> <-      (LEFTWARDS ARROW)
 542     { "\xE2\x86\x92", 3, "->",        2 }, // U+2192 -> ->      (RIGHTWARDS ARROW)
 543   };
 544
 545   ENTER();
 546
 547   // start with no replacement string
 548   *dst = NULL;
 549
 550   // perform a binary search in the lookup table
 551   if((rep = bsearch(&key, utf8map, sizeof(utf8map) / sizeof(utf8map[0]), sizeof(utf8map[0]), compareUTF8Replacements)) != NULL)
 552   {
 553     // if we found something, then copy this over to the result variables
 554     *dst = rep->rep;
 555     len = rep->replen;
 556   }
 557
 558   RETURN(len);
 559   return len;
 560 }
 561
 562 ///
 563 /// matchCodesetAlias()
 564 //
 565 struct CodesetAliases
 566 {
 567   const char *MIMEname;   // The official and correct MIME name for a codeset
 568   const char *Aliases;    // A space separated array with well-known aliases
 569 };
 570
 571 const struct CodesetAliases codesetAliases[] =
 572 {
 573   // MIME name       Aliases
 574   { "Amiga-1251",   "Ami1251 Amiga1251"  },
 575   { "AmigaPL",      "AmiPL Amiga-PL"     },
 576   { "ISO-8859-1",   "ISO8859-1 8859-1" },
 577   { "ISO-8859-2",   "ISO8859-2 8859-2" },
 578   { "ISO-8859-3",   "ISO8859-3 8859-3" },
 579   { "ISO-8859-4",   "ISO8859-4 8859-4" },
 580   { "ISO-8859-5",   "ISO8859-5 8859-5" },
 581   { "ISO-8859-6",   "ISO8859-6 8859-6" },
 582   { "ISO-8859-7",   "ISO8859-7 8859-7" },
 583   { "ISO-8859-8",   "ISO8859-8 8859-8" },
 584   { "ISO-8859-9",   "ISO8859-9 8859-9" },
 585   { "ISO-8859-10",  "ISO8859-10 8859-10" },
 586   { "ISO-8859-11",  "ISO8859-11 8859-11" },
 587   { "ISO-8859-12",  "ISO8859-12 8859-12" },
 588   { "ISO-8859-13",  "ISO8859-13 8859-13" },
 589   { "ISO-8859-14",  "ISO8859-14 8859-14" },
 590   { "ISO-8859-15",  "ISO8859-15 8859-15" },
 591   { "ISO-8859-16",  "ISO8859-16 8859-16" },
 592   { "ISO-8859-10",  "ISO8859-10 8859-10" },
 593   { "KOI8-R",       "KOI8R" },
 594   { "US-ASCII",     "ASCII" },
 595   { "UTF-8",        "UTF8 UTF" },
 596   { "UTF-16",       "UTF16" },
 597   { "UTF-32",       "UTF32" },
 598   { "windows-1250", "cp1250 windows1250" },
 599   { "windows-1251", "cp1251 windows1251" },
 600   { "windows-1252", "cp1252 windows1252" },
 601   { "windows-1253", "cp1253 windows1253" },
 602   { "windows-1254", "cp1254 windows1254" },
 603   { "windows-1255", "cp1255 windows1255" },
 604   { "windows-1256", "cp1256 windows1256" },
 605   { "windows-1257", "cp1257 windows1257" },
 606   { NULL,           NULL,                }
 607 };
 608
 609 static char *matchCodesetAlias(const char *search)
 610 {
 611   char *result = NULL;
 612   size_t len = strlen(search);
 613   int i;
 614
 615   ENTER();
 616
 617   for(i=0; codesetAliases[i].MIMEname != NULL; i++)
 618   {
 619     BOOL found = FALSE;
 620
 621     // search the MIMEname first
 622     if(stricmp(search, codesetAliases[i].MIMEname) == 0)
 623       found = TRUE;
 624     else
 625     {
 626       const char *s = codesetAliases[i].Aliases;
 627
 628       // loop through space separated list of aliases
 629       while(s != NULL && *s != '\0')
 630       {
 631         if(strnicmp(search, s, len) == 0)
 632         {
 633           found = TRUE;
 634           break;
 635         }
 636
 637         if((s = strpbrk(s, " ")) != NULL)
 638           s++;
 639       }
 640     }
 641
 642     if(found == TRUE)
 643     {
 644       result = (char *)codesetAliases[i].MIMEname;
 645
 646       break;
 647     }
 648   }
 649
 650   RETURN(result);
 651   return result;
 652 }
 653
 654 ///
 655
 656 /**************************************************************************/
 657
 658 /// defaultCodeset()
 659 static struct codeset *
 660 defaultCodeset(BOOL useSemaphore)
 661 {
 662   char buf[256];
 663   struct codeset *codeset;
 664
 665   ENTER();
 666
 667   if(useSemaphore == TRUE)
 668     ObtainSemaphoreShared(&CodesetsBase->libSem);
 669
 670   buf[0] = '\0';
 671   GetVar("codeset_default",buf,sizeof(buf),GVF_GLOBAL_ONLY);
 672
 673   if(buf[0] == '\0' || (codeset = codesetsFind(&CodesetsBase->codesets,buf)) == NULL)
 674     codeset = CodesetsBase->systemCodeset;
 675
 676   if(useSemaphore == TRUE)
 677     ReleaseSemaphore(&CodesetsBase->libSem);
 678
 679   RETURN(codeset);
 680   return codeset;
 681 }
 682 ///
 683 /// codesetsCmpUnicode()
 684 // The compare function
 685 static int
 686 codesetsCmpUnicode(struct single_convert *arg1,struct single_convert *arg2)
 687 {
 688   return strcmp((char*)&arg1->utf8[1], (char*)&arg2->utf8[1]);
 689 }
 690 ///
 691 /// codesetsReadTable()
 692
 693 #define ITEM_STANDARD           "Standard"
 694 #define ITEM_ALTSTANDARD        "AltStandard"
 695 #define ITEM_READONLY           "ReadOnly"
 696 #define ITEM_CHARACTERIZATION   "Characterization"
 697
 698 // Reads a coding table and adds it
 699 static BOOL
 700 codesetsReadTable(struct codesetList *csList, STRPTR name)
 701 {
 702   BPTR fh;
 703   BOOL res = FALSE;
 704
 705   ENTER();
 706
 707   D(DBF_STARTUP, "trying to fetch charset file '%s'...", name);
 708
 709   if((fh = Open(name, MODE_OLDFILE)))
 710   {
 711     struct codeset *codeset;
 712
 713     if((codeset = (struct codeset *)allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) != NULL)
 714     {
 715       int i;
 716       char buf[512];
 717
 718       memset(codeset,0,sizeof(struct codeset));
 719
 720       for(i = 0; i<256; i++)
 721         codeset->table[i].code = codeset->table[i].ucs4 = i;
 722
 723       while(readLine(fh, buf, 512*sizeof(char)))
 724       {
 725         const char *result;
 726
 727         if(buf[0]=='#')
 728           continue;
 729
 730         if((result = getConfigItem(buf, ITEM_STANDARD, strlen(ITEM_STANDARD))))
 731           codeset->name = mystrdup(result);
 732         else if(codeset->name == NULL) // a valid file starts with standard and nothing else!!
 733           break;
 734         else if((result = getConfigItem(buf,ITEM_ALTSTANDARD,strlen(ITEM_ALTSTANDARD))))
 735           codeset->alt_name = mystrdup(result);
 736         else if((result = getConfigItem(buf,ITEM_READONLY,strlen(ITEM_READONLY))))
 737           codeset->read_only = !!atoi(result);
 738         else if((result = getConfigItem(buf,ITEM_CHARACTERIZATION,strlen(ITEM_CHARACTERIZATION))))
 739         {
 740           if((result[0]=='_') && (result[1]=='(') && (result[2]=='"'))
 741           {
 742             char *end = strchr(result + 3, '"');
 743
 744             if(end)
 745               codeset->characterization = mystrndup(result+3,end-(result+3));
 746           }
 747           else
 748             codeset->characterization = mystrdup(result);
 749         }
 750         else
 751         {
 752           char *p = buf;
 753           int fmt2 = 0;
 754
 755           if((*p=='=') || (fmt2 = ((*p=='0') || (*(p+1)=='x'))))
 756           {
 757             p++;
 758             p += fmt2;
 759
 760             i = strtol((const char *)p,(char **)&p,16);
 761             if(i>0 && i<256)
 762             {
 763               while(isspace(*p)) p++;
 764
 765               if(!strnicmp(p, "U+", 2))
 766               {
 767                 p += 2;
 768                 codeset->table[i].ucs4 = strtol((const char *)p,(char **)&p,16);
 769               }
 770               else
 771               {
 772                 if(*p!='#')
 773                   codeset->table[i].ucs4 = strtol((const char *)p,(char **)&p,0);
 774               }
 775             }
 776           }
 777         }
 778       }
 779
 780       // check if there is not already codeset with the same name in here
 781       if(codeset->name != NULL && !(codesetsFind(csList, codeset->name)))
 782       {
 783         for(i=0; i<256; i++)
 784         {
 785           UTF32 src = codeset->table[i].ucs4, *src_ptr = &src;
 786           UTF8  *dest_ptr = &codeset->table[i].utf8[1];
 787
 788           CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
 789           *dest_ptr = 0;
 790           codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)(&codeset->table[i].utf8[1]);
 791         }
 792
 793         memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
 794         qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), (int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
 795         AddTail((struct List *)csList, (struct Node *)&codeset->node);
 796
 797         res = TRUE;
 798       }
 799       else
 800       {
 801         // cleanup
 802         if(codeset->name)             freeArbitrateVecPooled(codeset->name);
 803         if(codeset->alt_name)         freeArbitrateVecPooled(codeset->alt_name);
 804         if(codeset->characterization) freeArbitrateVecPooled(codeset->characterization);
 805         freeArbitrateVecPooled(codeset);
 806       }
 807     }
 808
 809     Close(fh);
 810   }
 811
 812   RETURN(res);
 813   return res;
 814 }
 815 ///
 816 /// codesetsScanDir()
 817 static void
 818 codesetsScanDir(struct codesetList *csList, const char *dirPath)
 819 {
 820   ENTER();
 821
 822   if(dirPath != NULL && dirPath[0] != '\0')
 823   {
 824     #if defined(__amigaos4__)
 825     APTR dirContext;
 826
 827     if((dirContext = ObtainDirContextTags(EX_StringNameInput, dirPath,
 828                                           EX_DataFields,      EXF_NAME|EXF_TYPE,
 829                                           TAG_END)) != NULL)
 830     {
 831       struct ExamineData *exd;
 832
 833       D(DBF_STARTUP, "scanning directory '%s' for codesets tables", dirPath);
 834
 835       while((exd = ExamineDir(dirContext)) != NULL)
 836       {
 837         if(EXD_IS_FILE(exd))
 838         {
 839           char filePath[620];
 840
 841           strlcpy(filePath, dirPath, sizeof(filePath));
 842           AddPart(filePath, exd->Name, sizeof(filePath));
 843
 844           D(DBF_STARTUP, "about to read codeset table '%s'", filePath);
 845
 846           codesetsReadTable(csList, filePath);
 847         }
 848       }
 849
 850       ReleaseDirContext(dirContext);
 851     }
 852     #else
 853     BPTR dirLock;
 854
 855     if((dirLock = Lock(dirPath, ACCESS_READ)))
 856     {
 857       struct ExAllControl *eac;
 858
 859       D(DBF_STARTUP, "scanning directory '%s' for codesets tables", dirPath);
 860
 861       if((eac = AllocDosObject(DOS_EXALLCONTROL, NULL)) != NULL)
 862       {
 863         struct ExAllData *ead;
 864         struct ExAllData *eabuffer;
 865         LONG more;
 866
 867         eac->eac_LastKey = 0;
 868         eac->eac_MatchString = NULL;
 869         eac->eac_MatchFunc = NULL;
 870
 871         if((eabuffer = allocVecPooled(CodesetsBase->pool, 10*sizeof(struct ExAllData))) != NULL)
 872         {
 873           char filePath[620];
 874
 875           do
 876           {
 877             more = ExAll(dirLock, eabuffer, 10*sizeof(struct ExAllData), ED_TYPE, eac);
 878             if(!more && IoErr() != ERROR_NO_MORE_ENTRIES)
 879               break;
 880
 881             if(eac->eac_Entries == 0)
 882               continue;
 883
 884             ead = (struct ExAllData *)eabuffer;
 885             do
 886             {
 887               // we only take that ead if it is a file (ed_Type < 0)
 888               if(ead->ed_Type < 0)
 889               {
 890                 strlcpy(filePath, dirPath, sizeof(filePath));
 891                 AddPart(filePath, (char *)ead->ed_Name, sizeof(filePath));
 892
 893                 D(DBF_STARTUP, "about to read codeset table '%s'", filePath);
 894
 895                 codesetsReadTable(csList, filePath);
 896               }
 897             }
 898             while((ead = ead->ed_Next));
 899           }
 900           while(more);
 901
 902           freeVecPooled(CodesetsBase->pool, eabuffer);
 903         }
 904
 905         FreeDosObject(DOS_EXALLCONTROL, eac);
 906       }
 907
 908       UnLock(dirLock);
 909     }
 910     #endif
 911   }
 912
 913   LEAVE();
 914 }
 915
 916 ///
 917 /// codesetsInit()
 918 // Initialized and loads the codesets
 919 BOOL
 920 codesetsInit(struct codesetList *csList)
 921 {
 922   struct codeset       *codeset = NULL;
 923   UTF32                src;
 924   int                  i;
 925   #if defined(__amigaos4__)
 926   ULONG                nextMIB = 3;
 927   #endif
 928
 929   ENTER();
 930
 931   ObtainSemaphore(&CodesetsBase->poolSem);
 932
 933   NewList((struct List *)&CodesetsBase->codesets);
 934
 935   // to make the list of the supported codesets complete we also add a
 936   // fake 'UTF-8' only so that our users can query for that codeset as well.
 937   if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
 938     goto end;
 939
 940   codeset->name             = mystrdup("UTF-8");
 941   codeset->alt_name         = mystrdup("UTF8");
 942   codeset->characterization = mystrdup("Unicode");
 943   codeset->read_only        = 0;
 944   AddTail((struct List *)csList, (struct Node *)&codeset->node);
 945   CodesetsBase->utf8Codeset = codeset;
 946
 947   // on AmigaOS4 we can use diskfont.library to inquire charset information as
 948   // it comes with a quite rich implementation of different charsets.
 949   #if defined(__amigaos4__)
 950   do
 951   {
 952     char *mimename;
 953     char *ianaName;
 954     ULONG *mapTable;
 955     ULONG curMIB = nextMIB;
 956
 957     nextMIB = ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_NEXTNUMBER);
 958     if(nextMIB == 0)
 959       break;
 960
 961     mapTable = (ULONG *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_MAPTABLE);
 962     mimename = (char *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_MIMENAME);
 963     ianaName = (char *)ObtainCharsetInfo(DFCS_NUMBER, curMIB, DFCS_NAME);
 964     if(mapTable != NULL && mimename != NULL && codesetsFind(csList, mimename) == NULL)
 965     {
 966       D(DBF_STARTUP, "loading charset '%s' from diskfont.library...", mimename);
 967
 968       if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
 969         goto end;
 970
 971       codeset->name             = mystrdup(mimename);
 972       codeset->alt_name         = NULL;
 973       codeset->characterization = mystrdup(ianaName);
 974       codeset->read_only        = 0;
 975
 976       for(i=0; i<256; i++)
 977       {
 978         UTF32 *src_ptr = &src;
 979         UTF8  *dest_ptr = &codeset->table[i].utf8[1];
 980
 981         src = mapTable[i];
 982
 983         codeset->table[i].code = i;
 984         codeset->table[i].ucs4 = src;
 985         CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
 986         *dest_ptr = 0;
 987         codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
 988       }
 989
 990       memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
 991       qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
 992
 993       AddTail((struct List *)csList, (struct Node *)&codeset->node);
 994     }
 995   }
 996   while(TRUE);
 997   #endif
 998
 999   #if defined(__MORPHOS__)
1000   {
1001     struct Library *KeymapBase;
1002     struct Library *LocaleBase;
1003
1004     if((KeymapBase = OpenLibrary("keymap.library", 51)) != NULL)
1005     {
1006       if((LocaleBase = OpenLibrary("locale.library", 51)) != NULL)
1007       {
1008         struct KeyMap *keymap = AskKeyMapDefault();
1009         CONST_STRPTR name = GetKeyMapCodepage(keymap);
1010
1011         if(name != NULL && keymap != NULL) // Legacy keymaps dont have codepage or Unicode mappings
1012         {
1013           D(DBF_STARTUP, "loading charset '%s' from keymap.library...", name);
1014
1015           if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) != NULL)
1016           {
1017              codeset->name             = mystrdup(name);
1018              codeset->alt_name         = NULL;
1019              codeset->characterization = mystrdup(name);  // No more information available
1020              codeset->read_only        = 0;
1021
1022              for(i=0; i<256; i++)
1023              {
1024                UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1025                LONG rc;
1026
1027                codeset->table[i].code = i;
1028                codeset->table[i].ucs4 = src = ToUCS4(i, keymap);
1029                rc = ConvertUCS4ToUTF8((CONST_WSTRPTR)&src, dest_ptr, 1);
1030                dest_ptr[rc] = 0;
1031                codeset->table[i].utf8[0] = rc;
1032              }
1033
1034              memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1035             qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1036
1037              AddTail((struct List *)csList, (struct Node *)&codeset->node);
1038           }
1039           else
1040             goto end;
1041         }
1042
1043         CloseLibrary(LocaleBase);
1044       }
1045
1046       CloseLibrary(KeymapBase);
1047     }
1048   }
1049   #endif
1050
1051   D(DBF_STARTUP, "loading charsets from Libs:Charsets...");
1052
1053   // we try to walk to the LIBS:Charsets directory on our own and readin our
1054   // own charset tables
1055   codesetsScanDir(csList, "LIBS:Charsets");
1056
1057   //
1058   // now we go and initialize our internally supported codesets but only if
1059   // we have not already loaded a charset with the same name
1060   //
1061   D(DBF_STARTUP, "initializing internal charsets...");
1062
1063   // ISO-8859-1 + EURO
1064   if(codesetsFind(csList, "ISO-8859-1 + Euro") == NULL)
1065   {
1066     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1067       goto end;
1068
1069     codeset->name             = mystrdup("ISO-8859-1 + Euro");
1070     codeset->alt_name         = NULL;
1071     codeset->characterization = mystrdup("West European (with EURO)");
1072     codeset->read_only        = 1;
1073     for(i = 0; i<256; i++)
1074     {
1075       UTF32 *src_ptr = &src;
1076       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1077
1078       if(i==164)
1079         src = 0x20AC; /* the EURO sign */
1080       else
1081         src = i;
1082
1083       codeset->table[i].code = i;
1084       codeset->table[i].ucs4 = src;
1085       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1086       *dest_ptr = 0;
1087       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1088     }
1089     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1090     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1091     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1092   }
1093
1094   // ISO-8859-1
1095   if(codesetsFind(csList, "ISO-8859-1") == NULL)
1096   {
1097     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1098       goto end;
1099
1100     codeset->name             = mystrdup("ISO-8859-1");
1101     codeset->alt_name         = mystrdup("ISO8859-1");
1102     codeset->characterization = mystrdup("West European");
1103     codeset->read_only        = 0;
1104     for(i = 0; i<256; i++)
1105     {
1106       UTF32 *src_ptr = &src;
1107       UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1108
1109       src = i;
1110
1111       codeset->table[i].code = i;
1112       codeset->table[i].ucs4 = src;
1113       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1114       *dest_ptr = 0;
1115       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1116     }
1117     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1118     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1119     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1120   }
1121
1122   // ISO-8859-2
1123   if(codesetsFind(csList, "ISO-8859-2") == NULL)
1124   {
1125     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1126       goto end;
1127
1128     codeset->name             = mystrdup("ISO-8859-2");
1129     codeset->alt_name         = mystrdup("ISO8859-2");
1130     codeset->characterization = mystrdup("Central/East European");
1131     codeset->read_only        = 0;
1132     for(i = 0; i<256; i++)
1133     {
1134       UTF32 *src_ptr = &src;
1135       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1136
1137       if(i<0xa0)
1138         src = i;
1139       else
1140         src = iso_8859_2_to_ucs4[i-0xa0];
1141
1142       codeset->table[i].code = i;
1143       codeset->table[i].ucs4 = src;
1144       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr,dest_ptr+6, CSF_StrictConversion);
1145       *dest_ptr = 0;
1146       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1147     }
1148     memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1149     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1150     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1151   }
1152
1153   // ISO-8859-3
1154   if(codesetsFind(csList, "ISO-8859-3") == NULL)
1155   {
1156     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1157       goto end;
1158
1159     codeset->name             = mystrdup("ISO-8859-3");
1160     codeset->alt_name         = mystrdup("ISO8859-3");
1161     codeset->characterization = mystrdup("South European");
1162     codeset->read_only        = 0;
1163     for(i = 0; i<256; i++)
1164     {
1165       UTF32 *src_ptr = &src;
1166       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1167
1168       if(i<0xa0)
1169         src = i;
1170       else
1171         src = iso_8859_3_to_ucs4[i-0xa0];
1172
1173       codeset->table[i].code = i;
1174       codeset->table[i].ucs4 = src;
1175       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1176       *dest_ptr = 0;
1177       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1178     }
1179     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1180     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1181     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1182   }
1183
1184   // ISO-8859-4
1185   if(codesetsFind(csList, "ISO-8859-4") == NULL)
1186   {
1187     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1188       goto end;
1189
1190     codeset->name             = mystrdup("ISO-8859-4");
1191     codeset->alt_name         = mystrdup("ISO8859-4");
1192     codeset->characterization = mystrdup("North European");
1193     codeset->read_only        = 0;
1194     for(i = 0; i<256; i++)
1195     {
1196       UTF32 *src_ptr = &src;
1197       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1198
1199       if(i<0xa0)
1200         src = i;
1201       else
1202         src = iso_8859_4_to_ucs4[i-0xa0];
1203
1204       codeset->table[i].code = i;
1205       codeset->table[i].ucs4 = src;
1206       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1207       *dest_ptr = 0;
1208       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1209     }
1210     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1211     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1212     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1213   }
1214
1215   // ISO-8859-5
1216   if(codesetsFind(csList, "ISO-8859-5") == NULL)
1217   {
1218     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1219       goto end;
1220
1221     codeset->name             = mystrdup("ISO-8859-5");
1222     codeset->alt_name         = mystrdup("ISO8859-5");
1223     codeset->characterization = mystrdup("Slavic languages");
1224     codeset->read_only        = 0;
1225     for(i = 0; i<256; i++)
1226     {
1227       UTF32 *src_ptr = &src;
1228       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1229
1230       if(i<0xa0)
1231         src = i;
1232       else
1233         src = iso_8859_5_to_ucs4[i-0xa0];
1234
1235       codeset->table[i].code = i;
1236       codeset->table[i].ucs4 = src;
1237       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1238       *dest_ptr = 0;
1239       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1240     }
1241     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1242     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1243     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1244   }
1245
1246   // ISO-8859-9
1247   if(codesetsFind(csList, "ISO-8859-9") == NULL)
1248   {
1249     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1250       goto end;
1251
1252     codeset->name             = mystrdup("ISO-8859-9");
1253     codeset->alt_name         = mystrdup("ISO8859-9");
1254     codeset->characterization = mystrdup("Turkish");
1255     codeset->read_only        = 0;
1256     for(i = 0; i<256; i++)
1257     {
1258       UTF32 *src_ptr = &src;
1259       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1260
1261       if(i<0xa0)
1262         src = i;
1263       else
1264         src = iso_8859_9_to_ucs4[i-0xa0];
1265
1266       codeset->table[i].code = i;
1267       codeset->table[i].ucs4 = src;
1268       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1269       *dest_ptr = 0;
1270       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1271     }
1272     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1273     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1274     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1275   }
1276
1277   // ISO-8859-15
1278   if(codesetsFind(csList, "ISO-8859-15") == NULL)
1279   {
1280     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1281       goto end;
1282
1283     codeset->name             = mystrdup("ISO-8859-15");
1284     codeset->alt_name         = mystrdup("ISO8859-15");
1285     codeset->characterization = mystrdup("West European II");
1286     codeset->read_only        = 0;
1287     for(i = 0; i<256; i++)
1288     {
1289       UTF32 *src_ptr = &src;
1290       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1291
1292       if(i<0xa0)
1293         src = i;
1294       else
1295         src = iso_8859_15_to_ucs4[i-0xa0];
1296
1297       codeset->table[i].code = i;
1298       codeset->table[i].ucs4 = src;
1299       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1300       *dest_ptr = 0;
1301       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1302     }
1303     memcpy(codeset->table_sorted,codeset->table,sizeof (codeset->table));
1304     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1305     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1306   }
1307
1308   // ISO-8859-16
1309   if(codesetsFind(csList, "ISO-8859-16") == NULL)
1310   {
1311     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1312       goto end;
1313
1314     codeset->name             = mystrdup("ISO-8859-16");
1315     codeset->alt_name         = mystrdup("ISO8869-16");
1316     codeset->characterization = mystrdup("South-Eastern European");
1317     codeset->read_only        = 0;
1318     for(i=0;i<256;i++)
1319     {
1320       UTF32 *src_ptr = &src;
1321       UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1322
1323       if(i < 0xa0)
1324         src = i;
1325       else
1326         src = iso_8859_16_to_ucs4[i-0xa0];
1327
1328       codeset->table[i].code = i;
1329       codeset->table[i].ucs4 = src;
1330       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1331       *dest_ptr = 0;
1332       codeset->table[i].utf8[0] = (ULONG)dest_ptr - (ULONG)&codeset->table[i].utf8[1];
1333     }
1334     memcpy(codeset->table_sorted, codeset->table, sizeof(codeset->table));
1335     qsort(codeset->table_sorted, 256, sizeof(codeset->table[0]), (int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1336     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1337   }
1338
1339   // KOI8-R
1340   if(codesetsFind(csList, "KOI8-R") == NULL)
1341   {
1342     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1343       goto end;
1344
1345     codeset->name               = mystrdup("KOI8-R");
1346     codeset->alt_name           = mystrdup("KOI8R");
1347     codeset->characterization   = mystrdup("Russian");
1348     codeset->read_only          = 0;
1349     for(i = 0; i<256; i++)
1350     {
1351       UTF32 *src_ptr = &src;
1352       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1353
1354       if(i<0x80)
1355         src = i;
1356       else
1357         src = koi8r_to_ucs4[i-0x80];
1358
1359       codeset->table[i].code = i;
1360       codeset->table[i].ucs4 = src;
1361       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1362       *dest_ptr = 0;
1363       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1364     }
1365     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1366     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1367     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1368   }
1369
1370   // AmigaPL
1371   if(codesetsFind(csList, "AmigaPL") == NULL)
1372   {
1373     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1374       goto end;
1375
1376     codeset->name             = mystrdup("AmigaPL");
1377     codeset->alt_name         = mystrdup("AmiPL");
1378     codeset->characterization = mystrdup("Polish (Amiga)");
1379     codeset->read_only        = 1;
1380     for(i=0; i<256; i++)
1381     {
1382       UTF32 *src_ptr = &src;
1383       UTF8  *dest_ptr = &codeset->table[i].utf8[1];
1384
1385       if(i<0xa0)
1386         src = i;
1387       else
1388         src = amigapl_to_ucs4[i-0xa0];
1389
1390       codeset->table[i].code = i;
1391       codeset->table[i].ucs4 = src;
1392       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr,src_ptr+1,&dest_ptr,dest_ptr+6,CSF_StrictConversion);
1393       *dest_ptr = 0;
1394       codeset->table[i].utf8[0] = (ULONG)dest_ptr-(ULONG)&codeset->table[i].utf8[1];
1395     }
1396     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1397     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1,const void *arg2))codesetsCmpUnicode);
1398     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1399   }
1400
1401   // Amiga-1251
1402   if(codesetsFind(csList, "Amiga-1251") == NULL)
1403   {
1404     if((codeset = allocVecPooled(CodesetsBase->pool, sizeof(struct codeset))) == NULL)
1405       goto end;
1406
1407     codeset->name             = mystrdup("Amiga-1251");
1408     codeset->alt_name         = mystrdup("Ami1251");
1409     codeset->characterization = mystrdup("Cyrillic (Amiga)");
1410     codeset->read_only        = 1;
1411     for(i=0; i<256; i++)
1412     {
1413       UTF32 *src_ptr = &src;
1414       UTF8 *dest_ptr = &codeset->table[i].utf8[1];
1415
1416       if(i < 0xa0)
1417         src = i;
1418       else
1419         src = amiga1251_to_ucs4[i-0xa0];
1420
1421       codeset->table[i].code = i;
1422       codeset->table[i].ucs4 = src;
1423       CodesetsConvertUTF32toUTF8((const UTF32 **)&src_ptr, src_ptr+1, &dest_ptr, dest_ptr+6, CSF_StrictConversion);
1424       *dest_ptr = 0;
1425       codeset->table[i].utf8[0] = (char*)dest_ptr - (char*)&codeset->table[i].utf8[1];
1426     }
1427     memcpy(codeset->table_sorted,codeset->table,sizeof(codeset->table));
1428     qsort(codeset->table_sorted,256,sizeof(codeset->table[0]),(int (*)(const void *arg1, const void *arg2))codesetsCmpUnicode);
1429     AddTail((struct List *)csList, (struct Node *)&codeset->node);
1430   }
1431
1432 end:
1433   ReleaseSemaphore(&CodesetsBase->poolSem);
1434
1435   RETURN(codeset != 0);
1436   return codeset != NULL;
1437 }
1438
1439 ///
1440 /// codesetsCleanup()
1441 // Cleanup the memory for the codeset
1442 void
1443 codesetsCleanup(struct codesetList *csList)
1444 {
1445   struct codeset *code;
1446
1447   ENTER();
1448
1449   while((code = (struct codeset *)RemHead((struct List *)csList)))
1450   {
1451     if(code->name) freeArbitrateVecPooled(code->name);
1452     if(code->alt_name) freeArbitrateVecPooled(code->alt_name);
1453     if(code->characterization) freeArbitrateVecPooled(code->characterization);
1454
1455     freeArbitrateVecPooled(code);
1456   }
1457
1458   LEAVE();
1459 }
1460
1461 ///
1462 /// codesetsFind()
1463 // Returns the given codeset.
1464 struct codeset *
1465 codesetsFind(struct codesetList *csList, const char *name)
1466 {
1467   struct codeset *res = NULL;
1468
1469   ENTER();
1470
1471   if(name && *name)
1472   {
1473     struct codeset *mstate, *succ;
1474     char *matchedName = matchCodesetAlias(name);
1475
1476     if(matchedName != NULL)
1477       name = matchedName;
1478
1479     for(mstate = (struct codeset *)csList->list.mlh_Head; (succ = (struct codeset *)mstate->node.mln_Succ); mstate = succ)
1480     {
1481       if(stricmp(name, mstate->name) == 0 ||
1482         (mstate->alt_name != NULL && stricmp(name, mstate->alt_name) == 0))
1483       {
1484         // break out
1485         break;
1486       }
1487     }
1488
1489     if(succ)
1490       res = mstate;
1491   }
1492
1493   RETURN(res);
1494   return res;
1495 }
1496 ///
1497 /// codesetsFindBest()
1498 // Returns the best codeset for the given text
1499 static struct codeset *
1500 codesetsFindBest(struct TagItem *attrs, ULONG csFamily, STRPTR text, int text_len, int *error_ptr)
1501 {
1502   struct codeset *best_codeset = NULL;
1503   int best_errors = text_len;
1504   BOOL found = FALSE;
1505
1506   ENTER();
1507
1508   // in case the user specified the codeset family as a
1509   // cyrillic one we go and do our cyrillic specific analysis first
1510   if(csFamily == CSV_CodesetFamily_Cyrillic)
1511   {
1512     #define NUM_CYRILLIC 3
1513
1514     struct CodesetSearch
1515     {
1516       const char *name;
1517       const char *data;
1518     };
1519
1520     struct CodesetSearch search[NUM_CYRILLIC];
1521     unsigned char *p;
1522     unsigned char *tp;
1523     int ctr[NUM_CYRILLIC];
1524     int Nmax;
1525     int NGlob = 1;
1526     int max;
1527     int gr = 0;
1528     int lr = 0;
1529
1530     search[0].name = "windows-1251";
1531     search[0].data = cp1251_data;
1532     search[1].name = "IBM866";
1533     search[1].data = cp866_data;
1534     search[2].name = "KOI8-R";
1535     search[2].data = koi8r_data;
1536
1537     memset(&ctr, 0, sizeof(ctr));
1538
1539     tp = (unsigned char *)text;
1540
1541     do
1542     {
1543       int n;
1544       int mid = max = -466725766; // TODO: what's the magic behind this constant?
1545       Nmax = 0;
1546
1547       for(n=0; n < NUM_CYRILLIC; n++)
1548       {
1549         unsigned char la = 0;
1550         unsigned char *tptr = (unsigned char *)search[n].data;
1551
1552         p = tp;
1553
1554         do
1555         {
1556           unsigned char lb = (*p++) ^ 128;
1557
1558           if(!((la | lb) & 128))
1559             ctr[n] += (signed char)tptr[(la << 7) + lb];
1560
1561           la = lb;
1562         }
1563         while(*p);
1564
1565         if(max < ctr[n])
1566         {
1567           mid = max;
1568           max = ctr[n];
1569           Nmax = n+1;
1570         }
1571       }
1572
1573       tp = p;
1574       if((max >= 500) && ((max-mid) >= 1000))
1575       {
1576         lr = gr = 1;
1577         NGlob = Nmax;
1578       }
1579     }
1580     while((*p) && (!gr));
1581
1582     if(gr || ((!(*p)) && lr))
1583       Nmax = NGlob;
1584
1585     // if our analysis found something, we go and try
1586     // to find the corresponding codeset in out codeset list
1587     if(max != 0)
1588     {
1589       struct TagItem *tstate = attrs;
1590       struct TagItem *tag;
1591
1592       D(DBF_STARTUP, "identified text as '%s", search[Nmax-1].name);
1593
1594       // now we walk through our taglist and check if the user
1595       // supplied
1596       while((tag = NextTagItem(&tstate)))
1597       {
1598         if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1599         {
1600           struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1601
1602           if((best_codeset = codesetsFind(csList, search[Nmax-1].name)) != NULL)
1603             break;
1604         }
1605       }
1606
1607       // if we still haven't found the matching codeset
1608       // we search the internal list
1609       if(best_codeset == NULL)
1610         best_codeset = codesetsFind(&CodesetsBase->codesets, search[Nmax-1].name);
1611
1612       best_errors = 0;
1613
1614       found = TRUE;
1615     }
1616   }
1617
1618   // if we haven't found the best codeset (through the cyrillic analysis
1619   // we go and do the dumb latin search in our codesetlist
1620   if(found == FALSE)
1621   {
1622     struct TagItem *tstate = attrs;
1623     struct TagItem *tag;
1624     BOOL lastIteration = FALSE;
1625
1626     while((tag = NextTagItem(&tstate)) || (lastIteration = TRUE))
1627     {
1628       if(lastIteration == TRUE || (tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0))
1629       {
1630         struct codesetList *csList = (lastIteration ? &CodesetsBase->codesets : (struct codesetList *)tag->ti_Data);
1631         struct codeset *codeset = (struct codeset *)csList->list.mlh_Head;
1632
1633         // the following identification/detection routines is NOT really smart.
1634         // we just see how each UTF8 string is the representation of each char
1635         // in our source text and then check if they are valid or not. As said,
1636         // not very smart, but we don't have anything better right now :(
1637
1638         while(codeset)
1639         {
1640           if(!codeset->read_only && codeset != CodesetsBase->utf8Codeset)
1641           {
1642             char *text_ptr = text;
1643             int i;
1644             int errors = 0;
1645
1646             for(i=0; i < text_len; i++)
1647             {
1648               unsigned char c = *text_ptr++;
1649
1650               if(c)
1651               {
1652                 struct single_convert *f = &codeset->table[c];
1653
1654                 if(f->utf8[0] == 0 || f->utf8[1] == 0x00)
1655                   errors++;
1656               }
1657               else
1658                 break;
1659             }
1660
1661             D(DBF_STARTUP, "tried to identify text as '%s' text with %ld of %ld errors", codeset->name, errors, text_len);
1662
1663             if(errors < best_errors)
1664             {
1665               best_codeset = codeset;
1666               best_errors = errors;
1667             }
1668
1669             if(best_errors == 0)
1670               break;
1671           }
1672
1673           codeset = (struct codeset *)codeset->node.mln_Succ;
1674         }
1675
1676         if(lastIteration)
1677           break;
1678       }
1679     }
1680   }
1681
1682   if(error_ptr)
1683     *error_ptr = best_errors;
1684
1685   RETURN(best_codeset);
1686   return best_codeset;
1687 }
1688 ///
1689
1690 /**************************************************************************/
1691
1692 /// CodesetsSupportedA()
1693 #ifdef __AROS__
1694 AROS_LH1(STRPTR *, CodesetsSupportedA,
1695     AROS_LHA(struct TagItem *, attrs, A0),
1696     struct LibraryHeader *, library, 15, Codesets
1697 )
1698 {
1699     AROS_LIBFUNC_INIT
1700 #else
1701 STRPTR *LIBFUNC
1702 CodesetsSupportedA(REG(a0, UNUSED struct TagItem * attrs))
1703 {
1704 #endif
1705   STRPTR *array = NULL;
1706   struct TagItem *tstate = attrs;
1707   struct TagItem *tag;
1708   int numCodesets;
1709
1710   ENTER();
1711
1712   // first we need to check how many codesets our supplied
1713   // lists carry.
1714   numCodesets = countCodesets(&CodesetsBase->codesets);
1715   while((tag = NextTagItem(&tstate)))
1716   {
1717     if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1718       numCodesets += countCodesets((struct codesetList *)tag->ti_Data);
1719   }
1720
1721   // now that we know how many codesets we have in our lists we
1722   // can put their names into our string arrays
1723   if(numCodesets > 0)
1724   {
1725     if((array = allocArbitrateVecPooled((numCodesets+1)*sizeof(STRPTR))))
1726     {
1727       struct codeset *code;
1728       struct codeset *succ;
1729       int i=0;
1730
1731       // reset the tstate
1732       tstate = attrs;
1733
1734       ObtainSemaphoreShared(&CodesetsBase->libSem);
1735
1736       // first we walk through the internal codesets list and
1737       // add the names
1738       for(code = (struct codeset *)CodesetsBase->codesets.list.mlh_Head; (succ = (struct codeset *)code->node.mln_Succ); code = succ, i++)
1739         array[i] = code->name;
1740
1741       // then we also iterate through our private codesets list
1742       while((tag = NextTagItem(&tstate)))
1743       {
1744         if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1745         {
1746           struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1747
1748           for(code = (struct codeset *)csList->list.mlh_Head; (succ = (struct codeset *)code->node.mln_Succ); code = succ, i++)
1749             array[i] = code->name;
1750         }
1751       }
1752
1753       array[i] = NULL;
1754
1755       ReleaseSemaphore(&CodesetsBase->libSem);
1756     }
1757   }
1758
1759   RETURN(array);
1760   return array;
1761 #ifdef __AROS__
1762     AROS_LIBFUNC_EXIT
1763 #endif
1764 }
1765
1766 #ifndef __AROS__
1767 LIBSTUB(CodesetsSupportedA, STRPTR*, REG(a0, struct TagItem *attrs))
1768 {
1769   #ifdef __MORPHOS__
1770   return CodesetsSupportedA((struct TagItem *)REG_A0);
1771   #else
1772   return CodesetsSupportedA(attrs);
1773   #endif
1774 }
1775 #endif
1776
1777 #ifdef __amigaos4__
1778 LIBSTUBVA(CodesetsSupported, STRPTR*, ...)
1779 {
1780   STRPTR* res;
1781   VA_LIST args;
1782
1783   VA_START(args, self);
1784   res = CodesetsSupportedA(VA_ARG(args, struct TagItem *));
1785   VA_END(args);
1786
1787   return res;
1788 }
1789 #endif
1790
1791 ///
1792 /// CodesetsFreeA()
1793 #ifdef __AROS__
1794 AROS_LH2(void, CodesetsFreeA,
1795     AROS_LHA(APTR, obj, A0),
1796     AROS_LHA(struct TagItem *, attrs, A1),
1797     struct LibraryHeader *, library, 14, Codesets
1798 )
1799 {
1800     AROS_LIBFUNC_INIT
1801 #else
1802 void LIBFUNC
1803 CodesetsFreeA(REG(a0, APTR obj),
1804               REG(a1, UNUSED struct TagItem *attrs))
1805 {
1806 #endif
1807   ENTER();
1808
1809   if(obj)
1810     freeArbitrateVecPooled(obj);
1811
1812   LEAVE();
1813 #ifdef __AROS__
1814     AROS_LIBFUNC_EXIT
1815 #endif
1816 }
1817
1818 #ifndef __AROS__
1819 LIBSTUB(CodesetsFreeA, void, REG(a0, APTR obj), REG(a1, struct TagItem *attrs))
1820 {
1821   #ifdef __MORPHOS__
1822   return CodesetsFreeA((APTR)REG_A0,(struct TagItem *)REG_A1);
1823   #else
1824   return CodesetsFreeA(obj, attrs);
1825   #endif
1826 }
1827 #endif
1828
1829 #ifdef __amigaos4__
1830 LIBSTUBVA(CodesetsFree, void, REG(a0, APTR obj), ...)
1831 {
1832   VA_LIST args;
1833
1834   VA_START(args, obj);
1835   CodesetsFreeA(obj, VA_ARG(args, struct TagItem *));
1836   VA_END(args);
1837 }
1838 #endif
1839
1840 ///
1841 /// CodesetsSetDefaultA()
1842 #ifdef __AROS__
1843 AROS_LH2(struct codeset *, CodesetsSetDefaultA,
1844     AROS_LHA(STRPTR, name, A0),
1845     AROS_LHA(struct TagItem *, attrs, A1),
1846     struct LibraryHeader *, library, 13, Codesets
1847 )
1848 {
1849     AROS_LIBFUNC_INIT
1850 #else
1851 struct codeset *LIBFUNC
1852 CodesetsSetDefaultA(REG(a0, STRPTR name),
1853                     REG(a1, struct TagItem *attrs))
1854 {
1855 #endif
1856   struct codeset *codeset;
1857
1858   ENTER();
1859
1860   ObtainSemaphoreShared(&CodesetsBase->libSem);
1861
1862   if((codeset = codesetsFind(&CodesetsBase->codesets,name)))
1863   {
1864     ULONG flags;
1865
1866     flags = GVF_SAVE_VAR | (GetTagData(CSA_Save,FALSE,attrs) ? GVF_GLOBAL_ONLY : 0);
1867
1868     SetVar("codeset_default",codeset->name,strlen(codeset->name),flags);
1869   }
1870
1871   ReleaseSemaphore(&CodesetsBase->libSem);
1872
1873   RETURN(codeset);
1874   return codeset;
1875 #ifdef __AROS__
1876     AROS_LIBFUNC_EXIT
1877 #endif
1878 }
1879
1880 #ifndef __AROS__
1881 LIBSTUB(CodesetsSetDefaultA, struct codeset *, REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
1882 {
1883   #ifdef __MORPHOS__
1884   return CodesetsSetDefaultA((STRPTR)REG_A0,(struct TagItem *)REG_A1);
1885   #else
1886   return CodesetsSetDefaultA(name, attrs);
1887   #endif
1888 }
1889 #endif
1890
1891 #ifdef __amigaos4__
1892 LIBSTUBVA(CodesetsSetDefault, struct codeset *, REG(a0, STRPTR name), ...)
1893 {
1894   struct codeset *cs;
1895   VA_LIST args;
1896
1897   VA_START(args, name);
1898   cs = CodesetsSetDefaultA(name, VA_ARG(args, struct TagItem *));
1899   VA_END(args);
1900
1901   return cs;
1902 }
1903 #endif
1904
1905 ///
1906 /// CodesetsFindA()
1907 #ifdef __AROS__
1908 AROS_LH2(struct codeset *, CodesetsFindA,
1909     AROS_LHA(STRPTR, name, A0),
1910     AROS_LHA(struct TagItem *, attrs, A1),
1911     struct LibraryHeader *, library, 16, Codesets
1912 )
1913 {
1914     AROS_LIBFUNC_INIT
1915 #else
1916 struct codeset *LIBFUNC
1917 CodesetsFindA(REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
1918 {
1919 #endif
1920   struct codeset *codeset = NULL;
1921
1922   ENTER();
1923
1924   ObtainSemaphoreShared(&CodesetsBase->libSem);
1925
1926   // if no name pointer was supplied we have to return
1927   // the default codeset only.
1928   if(name != NULL)
1929   {
1930     // we first walk through our internal list and check if we
1931     // can find the requested codeset
1932     codeset = codesetsFind(&CodesetsBase->codesets, name);
1933
1934     if(codeset == NULL && attrs != NULL)
1935     {
1936       struct TagItem *tstate = attrs;
1937       struct TagItem *tag;
1938
1939       // now we walk through our taglist and check if the user
1940       // supplied
1941       while((tag = NextTagItem(&tstate)))
1942       {
1943         if(tag->ti_Tag == CSA_CodesetList && tag->ti_Data != 0)
1944         {
1945           struct codesetList *csList = (struct codesetList *)tag->ti_Data;
1946
1947           if((codeset = codesetsFind(csList, name)) != NULL)
1948             break;
1949         }
1950       }
1951     }
1952   }
1953
1954   // check if we found something or not.
1955   if(codeset == NULL && (attrs == NULL || GetTagData(CSA_FallbackToDefault, TRUE, attrs)))
1956     codeset = defaultCodeset(FALSE);
1957
1958   ReleaseSemaphore(&CodesetsBase->libSem);
1959
1960   RETURN(codeset);
1961   return codeset;
1962 #ifdef __AROS__
1963     AROS_LIBFUNC_EXIT
1964 #endif
1965 }
1966
1967 #ifndef __AROS__
1968 LIBSTUB(CodesetsFindA, struct codeset *, REG(a0, STRPTR name), REG(a1, struct TagItem *attrs))
1969 {
1970   #ifdef __MORPHOS__
1971   return CodesetsFindA((STRPTR)REG_A0,(struct TagItem *)REG_A1);
1972   #else
1973   return CodesetsFindA(name, attrs);
1974   #endif
1975 }
1976 #endif
1977
1978 #ifdef __amigaos4__
1979 LIBSTUBVA(CodesetsFind, struct codeset *, REG(a0, STRPTR name), ...)
1980 {
1981   struct codeset *cs;
1982   VA_LIST args;
1983
1984   VA_START(args, name);
1985   cs = CodesetsFindA(name, VA_ARG(args, struct TagItem *));
1986   VA_END(args);
1987
1988   return cs;
1989 }
1990 #endif
1991 ///
1992 /// CodesetsFindBestA()
1993 #ifdef __AROS__
1994 AROS_LH1(struct codeset *, CodesetsFindBestA,
1995     AROS_LHA(struct TagItem *, attrs, A0),
1996     struct LibraryHeader *, library, 17, Codesets
1997 )
1998 {
1999     AROS_LIBFUNC_INIT
2000 #else
2001 struct codeset *LIBFUNC
2002 CodesetsFindBestA(REG(a0, struct TagItem *attrs))
2003 {
2004 #endif
2005   struct codeset *codeset = NULL;
2006
2007   ENTER();
2008
2009   ObtainSemaphoreShared(&CodesetsBase->libSem);
2010
2011   if(attrs)
2012   {
2013     char *text = (char *)GetTagData(CSA_Source, 0, attrs);
2014     ULONG text_len = GetTagData(CSA_SourceLen, text != NULL ? strlen(text) : 0, attrs);
2015
2016     if(text != NULL && text_len > 0)
2017     {
2018       int numErrors = 0;
2019       ULONG csFamily = GetTagData(CSA_CodesetFamily, CSV_CodesetFamily_Latin, attrs);
2020       int *error_ptr = (int *)GetTagData(CSA_ErrPtr, 0, attrs);
2021       BOOL defaultFallBack = GetTagData(CSA_FallbackToDefault, FALSE, attrs);
2022
2023       codeset = codesetsFindBest(attrs, csFamily, text, text_len, &numErrors);
2024
2025       if(error_ptr != NULL)
2026         *error_ptr = numErrors;
2027
2028       // if we still haven't got the codeset we fallback to the default
2029       if(codeset == NULL && defaultFallBack == TRUE)
2030         codeset = defaultCodeset(FALSE);
2031     }
2032   }
2033
2034   ReleaseSemaphore(&CodesetsBase->libSem);
2035
2036   RETURN(codeset);
2037   return codeset;
2038 #ifdef __AROS__
2039     AROS_LIBFUNC_EXIT
2040 #endif
2041 }
2042
2043 #ifndef __AROS__
2044 LIBSTUB(CodesetsFindBestA, struct codeset *, REG(a0, struct TagItem *attrs))
2045 {
2046   #ifdef __MORPHOS__
2047   return CodesetsFindBestA((struct TagItem *)REG_A0);
2048   #else
2049   return CodesetsFindBestA(attrs);
2050   #endif
2051 }
2052 #endif
2053
2054 #ifdef __amigaos4__
2055 LIBSTUBVA(CodesetsFindBest, struct codeset *, ...)
2056 {
2057   struct codeset *cs;
2058   VA_LIST args;
2059
2060   VA_START(args, self);
2061   cs = CodesetsFindBestA(VA_ARG(args, struct TagItem *));
2062   VA_END(args);
2063
2064   return cs;
2065 }
2066 #endif
2067 ///
2068 /// CodesetsUTF8Len()
2069 // Returns the number of characters a utf8 string has. This is not
2070 // identically with the size of memory is required to hold the string.
2071 #ifdef __AROS__
2072 AROS_LH1(ULONG, CodesetsUTF8Len,
2073     AROS_LHA(const UTF8 *, str, A0),
2074     struct LibraryHeader *, library, 18, Codesets
2075 )
2076 {
2077     AROS_LIBFUNC_INIT
2078 #else
2079 ULONG LIBFUNC
2080 CodesetsUTF8Len(REG(a0, UTF8 *str))
2081 {
2082 #endif
2083   int           len;
2084   unsigned char c;
2085
2086   ENTER();
2087
2088   if(!str)
2089     return 0;
2090
2091   len = 0;
2092
2093   while((c = *str++))
2094   {
2095     len++;
2096     str += trailingBytesForUTF8[c];
2097   }
2098
2099   RETURN((ULONG)len);
2100   return (ULONG)len;
2101 #ifdef __AROS__
2102     AROS_LIBFUNC_EXIT
2103 #endif
2104 }
2105
2106 #ifndef __AROS__
2107 LIBSTUB(CodesetsUTF8Len, ULONG, REG(a0, UTF8* str))
2108 {
2109   #ifdef __MORPHOS__
2110   return CodesetsUTF8Len((UTF8 *)REG_A0);
2111   #else
2112   return CodesetsUTF8Len(str);
2113   #endif
2114 }
2115 #endif
2116
2117 ///
2118 /// CodesetsStrLenA()
2119 #ifdef __AROS__
2120 AROS_LH2(ULONG, CodesetsStrLenA,
2121     AROS_LHA(STRPTR, str, A0),
2122     AROS_LHA(struct TagItem *, attrs, A1),
2123     struct LibraryHeader *, library, 23, Codesets
2124 )
2125 {
2126     AROS_LIBFUNC_INIT
2127 #else
2128 ULONG LIBFUNC
2129 CodesetsStrLenA(REG(a0, STRPTR str),
2130                 REG(a1, struct TagItem *attrs))
2131 {
2132 #endif
2133   struct codeset *codeset;
2134   int            len, res;
2135   STRPTR         src;
2136   UBYTE          c;
2137
2138   ENTER();
2139
2140   if(!str)
2141     return 0;
2142
2143   if(!(codeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)))
2144     codeset = defaultCodeset(TRUE);
2145
2146   len = GetTagData(CSA_SourceLen, strlen(str), attrs);
2147
2148   src = str;
2149   res = 0;
2150
2151   while(((c = *src++) && (len--)))
2152     res += codeset->table[c].utf8[0];
2153
2154   RETURN((ULONG)res);
2155   return (ULONG)res;
2156 #ifdef __AROS__
2157     AROS_LIBFUNC_EXIT
2158 #endif
2159 }
2160
2161 #ifndef __AROS__
2162 LIBSTUB(CodesetsStrLenA, ULONG, REG(a0, STRPTR str),
2163                                 REG(a1, struct TagItem *attrs))
2164 {
2165   #ifdef __MORPHOS__
2166   return CodesetsStrLenA((STRPTR)REG_A0,(struct TagItem *)REG_A1);
2167   #else
2168   return CodesetsStrLenA(str, attrs);
2169   #endif
2170 }
2171 #endif
2172
2173 #ifdef __amigaos4__
2174 LIBSTUBVA(CodesetsStrLen, ULONG, REG(a0, STRPTR str), ...)
2175 {
2176   ULONG res;
2177   VA_LIST args;
2178
2179   VA_START(args, str);
2180   res = CodesetsStrLenA(str, VA_ARG(args, struct TagItem *));
2181   VA_END(args);
2182
2183   return res;
2184 }
2185 #endif
2186 ///
2187 /// CodesetsUTF8ToStrA()
2188 // Converts an UTF8 string to a given charset. Return the number of bytes
2189 // written to dest excluding the NULL byte (which is always ensured by this
2190 // function; it means a NULL str will produce "" as dest; anyway you should
2191 // check NULL str to not waste your time!).
2192 #ifdef __AROS__
2193 AROS_LH1(STRPTR, CodesetsUTF8ToStrA,
2194     AROS_LHA(struct TagItem *, attrs, A0),
2195     struct LibraryHeader *, library, 19, Codesets
2196 )
2197 {
2198     AROS_LIBFUNC_INIT
2199 #else
2200 STRPTR LIBFUNC
2201 CodesetsUTF8ToStrA(REG(a0, struct TagItem *attrs))
2202 {
2203 #endif
2204   UTF8 *src;
2205   ULONG srcLen;
2206   ULONG *destLenPtr;
2207   ULONG n = 0;
2208   STRPTR dest = NULL;
2209
2210   ENTER();
2211
2212   if((src = (UTF8 *)GetTagData(CSA_Source, (ULONG)NULL, attrs)) != NULL &&
2213      (srcLen = GetTagData(CSA_SourceLen, src != NULL ? strlen((char *)src) : 0, attrs)) > 0)
2214   {
2215     struct convertMsg msg;
2216     struct codeset *codeset;
2217     struct Hook *destHook;
2218     struct Hook *mapForeignCharsHook;
2219     char buf[256];
2220     STRPTR destIter = NULL;
2221     STRPTR b = NULL;
2222     ULONG destLen = 0;
2223     int i = 0;
2224     unsigned char *s = src;
2225     unsigned char *e = (src+srcLen);
2226     int numConvErrors = 0;
2227     int *numConvErrorsPtr;
2228     BOOL mapForeignChars;
2229     APTR pool = NULL;
2230     struct SignalSemaphore *sem = NULL;
2231
2232     // get some more optional attributes
2233     destHook = (struct Hook *)GetTagData(CSA_DestHook, (ULONG)NULL, attrs);
2234     destLen = GetTagData(CSA_DestLen, 0, attrs);
2235     numConvErrorsPtr = (int *)GetTagData(CSA_ErrPtr, (ULONG)NULL, attrs);
2236     mapForeignChars = (BOOL)GetTagData(CSA_MapForeignChars, FALSE, attrs);
2237     mapForeignCharsHook = (struct Hook *)GetTagData(CSA_MapForeignCharsHook, (ULONG)NULL, attrs);
2238
2239     // first we make sure we allocate enough memory
2240     // for our destination buffer
2241     if(destHook != NULL)
2242     {
2243       if(destLen < 16 || destLen > sizeof(buf))
2244         destLen = sizeof(buf);
2245
2246       msg.state = CSV_Translating;
2247       b = buf;
2248       i = 0;
2249     }
2250     else
2251     {
2252       // in case the user wants us to dynamically generate the
2253       // destination buffer we do it right now
2254       if((dest = (STRPTR)GetTagData(CSA_Dest, (ULONG)NULL, attrs)) == NULL ||
2255          GetTagData(CSA_AllocIfNeeded, TRUE, attrs) != FALSE)
2256       {
2257         ULONG len = 0;
2258
2259         // calculate the destLen
2260         while(s < e)
2261         {
2262           unsigned char c = *s++;
2263
2264           len++;
2265           s += trailingBytesForUTF8[c];
2266         }
2267
2268         if(dest == NULL || (destLen < len+1))
2269         {
2270           if((pool = (APTR)GetTagData(CSA_Pool, (ULONG)NULL, attrs)) != NULL)
2271           {
2272             if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, (ULONG)NULL, attrs)) != NULL)
2273               ObtainSemaphore(sem);
2274
2275             // allocate the destination buffer
2276             dest = allocVecPooled(pool, len+1);
2277
2278             if(sem != NULL)
2279               ReleaseSemaphore(sem);
2280           }
2281           else
2282             dest = allocArbitrateVecPooled(len+1);
2283
2284           destLen = len+1;
2285         }
2286
2287         if(dest == NULL)
2288         {
2289           RETURN(NULL);
2290           return NULL;
2291         }
2292       }
2293
2294       destIter = dest;
2295     }
2296
2297     // get the destination codeset pointer
2298     if((codeset = (struct codeset *)GetTagData(CSA_DestCodeset, (ULONG)NULL, attrs)) == NULL)
2299       codeset = defaultCodeset(TRUE);
2300
2301     // now we convert the src string to the
2302     // destination buffer.
2303     for(s=src;;n++)
2304     {
2305       if(destHook == NULL && n >= destLen-1)
2306         break;
2307
2308       // convert until we reach the end of the
2309       // source buffer.
2310       if(s < e)
2311       {
2312         unsigned char c = *s;
2313         unsigned char d = '?';
2314         const char *repstr = NULL;
2315         int replen = 0;
2316
2317         // check if the char is a >7bit char
2318         if(c > 127)
2319         {
2320           struct single_convert *f;
2321           int lenAdd = trailingBytesForUTF8[c];
2322           int lenStr = lenAdd+1;
2323           unsigned char *src = s;
2324
2325           do
2326           {
2327             // start each iteration with "no replacement found yet"
2328             repstr = NULL;
2329             replen = 0;
2330
2331             // search in the UTF8 conversion table of the current charset if
2332             // we have a replacement character for the char sequence starting at s
2333             BIN_SEARCH(codeset->table_sorted, 0, 255, strncmp((char *)src, (char *)codeset->table_sorted[m].utf8+1, lenStr), f);
2334
2335             if(f != NULL)
2336             {
2337               d = f->code;
2338               replen = -1;
2339
2340               break;
2341             }
2342             else
2343             {
2344               // the analysed char sequence (s) is not convertable to a
2345               // single visible char replacement, so we normally have to put
2346               // a ? sign as a "unknown char" sign at the very position.
2347               //
2348               // For convienence we, however, allow users to replace these
2349               // UTF8 characters with char sequences that "looklike" the
2350               // original char.
2351               if(mapForeignChars == TRUE)
2352                 replen = mapUTF8toASCII(&repstr, src, lenStr);
2353
2354               // call the hook only, if the internal table yielded no suitable
2355               // replacement
2356               if(replen == 0 && mapForeignCharsHook != NULL)
2357               {
2358                 struct replaceMsg rmsg;
2359
2360                 rmsg.dst = (char **)&repstr;
2361                 rmsg.src = src;
2362                 rmsg.srclen = lenStr;
2363                 replen = CallHookPkt(mapForeignCharsHook, &rmsg, NULL);
2364               }
2365
2366               if(replen < 0)
2367               {
2368                 D(DBF_UTF, "got UTF8 replacement (%ld)", replen);
2369
2370                 // stay in the loop as long as one replacement function delivers
2371                 // further UTF8 replacement sequences
2372                 src = (unsigned char *)repstr;
2373               }
2374               else if(replen == 0)
2375               {
2376                 D(DBF_UTF, "found no ASCII replacement for UTF8 string (%ld)", replen);
2377                 repstr = NULL;
2378               }
2379               else
2380                 D(DBF_UTF, "got replacement string '%s' (%ld)", repstr ? repstr : "<null>", replen);
2381             }
2382           }
2383           while(replen < 0);
2384
2385           if(repstr == NULL || replen == 0)
2386           {
2387             if(replen >= 0)
2388             {
2389               d = '?';
2390               numConvErrors++;
2391             }
2392           }
2393
2394           s += lenAdd;
2395         }
2396         else
2397           d = c;
2398
2399         if(destHook != NULL)
2400         {
2401           if(replen > 1)
2402           {
2403             while(replen > 0)
2404             {
2405               *b++ = *repstr;
2406               repstr++;
2407               i++;
2408               replen--;
2409
2410               if(i%(destLen-1)==0)
2411               {
2412                 *b = '\0';
2413                 msg.len = i;
2414                 CallHookPkt(destHook, &msg, buf);
2415
2416                 b  = buf;
2417                 *b = '\0';
2418                 i  = 0;
2419               }
2420             }
2421           }
2422           else
2423           {
2424             *b++ = replen > 0 ? *repstr : d;
2425             i++;
2426           }
2427
2428           if(i%(destLen-1)==0)
2429           {
2430             *b = '\0';
2431             msg.len = i;
2432             CallHookPkt(destHook, &msg, buf);
2433
2434             b  = buf;
2435             *b = '\0';
2436             i  = 0;
2437           }
2438         }
2439         else
2440         {
2441           if(replen > 1)
2442           {
2443             ULONG destPos = destIter-dest;
2444
2445             if(pool != NULL)
2446             {
2447               if(sem != NULL)
2448                 ObtainSemaphore(sem);
2449
2450               // allocate the destination buffer
2451               dest = reallocVecPooled(pool, dest, destLen, destLen+replen-1);
2452
2453               if(sem != NULL)
2454                 ReleaseSemaphore(sem);
2455             }
2456             else
2457               dest = reallocArbitrateVecPooled(dest, destLen, destLen+replen-1);
2458
2459             if(dest == NULL)
2460             {
2461               RETURN(NULL);
2462               return NULL;
2463             }
2464
2465             destIter = dest+destPos;
2466             memcpy(destIter, repstr, replen);
2467
2468             // adjust our loop pointer and destination length
2469             destIter += replen;
2470             destLen += replen-1;
2471           }
2472           else if(replen == 1)
2473             *destIter++ = *repstr;
2474           else
2475             *destIter++ = d;
2476         }
2477
2478         s++;
2479       }
2480       else
2481         break;
2482     }
2483
2484     if(destHook != NULL)
2485     {
2486       msg.state = CSV_End;
2487       msg.len   = i;
2488       *b        = '\0';
2489       CallHookPkt(destHook,&msg,buf);
2490     }
2491     else
2492       *destIter = '\0';
2493
2494     // let us write the number of conversion errors
2495     // to the proper variable pointer, if wanted
2496     if(numConvErrorsPtr != NULL)
2497       *numConvErrorsPtr = numConvErrors;
2498   }
2499
2500   // put the final length of our destination buffer
2501   // into the destLenPtr
2502   if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, (ULONG)NULL, attrs)) != NULL)
2503     *destLenPtr = n;
2504
2505   RETURN(dest);
2506   return dest;
2507
2508 #ifdef __AROS__
2509     AROS_LIBFUNC_EXIT
2510 #endif
2511 }
2512
2513 #ifndef __AROS__
2514 LIBSTUB(CodesetsUTF8ToStrA, STRPTR, REG(a0, struct TagItem *attrs))
2515 {
2516   #ifdef __MORPHOS__
2517   return CodesetsUTF8ToStrA((struct TagItem *)REG_A0);
2518   #else
2519   return CodesetsUTF8ToStrA(attrs);
2520   #endif
2521 }
2522 #endif
2523
2524 #ifdef __amigaos4__
2525 LIBSTUBVA(CodesetsUTF8ToStr, STRPTR, ...)
2526 {
2527   STRPTR res;
2528   VA_LIST args;
2529
2530   VA_START(args, self);
2531   res = CodesetsUTF8ToStrA(VA_ARG(args, struct TagItem *));
2532   VA_END(args);
2533
2534   return res;
2535 }
2536 #endif
2537
2538 ///
2539 /// CodesetsUTF8CreateA()
2540 // Converts a string and a charset to an UTF8. Returns the UTF8.
2541 // If a destination hook is supplied always return 0.
2542 // If from is NULL, it returns NULL and doesn't call the hook.
2543 #ifdef __AROS__
2544 AROS_LH1(UTF8 *, CodesetsUTF8CreateA,
2545     AROS_LHA(struct TagItem *, attrs, A0),
2546     struct LibraryHeader *, library, 20, Codesets
2547 )
2548 {
2549     AROS_LIBFUNC_INIT
2550 #else
2551 UTF8 *LIBFUNC
2552 CodesetsUTF8CreateA(REG(a0, struct TagItem *attrs))
2553 {
2554 #endif
2555   UTF8   *from;
2556   UTF8   *dest;
2557   ULONG  fromLen, *destLenPtr;
2558   ULONG  n;
2559
2560   ENTER();
2561
2562   dest = NULL;
2563   n    = 0;
2564
2565   from = (UTF8*)GetTagData(CSA_Source, 0, attrs);
2566   fromLen = GetTagData(CSA_SourceLen, from != NULL ? strlen((char *)from) : 0, attrs);
2567
2568   if(from != NULL && fromLen != 0)
2569   {
2570     struct convertMsg       msg;
2571     struct codeset *codeset;
2572     struct Hook    *hook;
2573     ULONG          destLen;
2574     int            i = 0;
2575     UBYTE          buf[256];
2576     UBYTE          *src, *destPtr = NULL, *b = NULL, c;
2577
2578     if((codeset = (struct codeset *)GetTagData(CSA_SourceCodeset, 0, attrs)) == NULL)
2579       codeset = defaultCodeset(TRUE);
2580
2581     hook    = (struct Hook *)GetTagData(CSA_DestHook, 0, attrs);
2582     destLen = GetTagData(CSA_DestLen,0,attrs);
2583
2584     if(hook != NULL)
2585     {
2586       if(destLen<16 || destLen>sizeof(buf))
2587         destLen = sizeof(buf);
2588
2589       msg.state = CSV_Translating;
2590       b = buf;
2591       i = 0;
2592     }
2593     else
2594     {
2595       if((dest = (UTF8*)GetTagData(CSA_Dest, 0, attrs)) != NULL ||
2596         GetTagData(CSA_AllocIfNeeded,TRUE,attrs))
2597       {
2598         ULONG len, flen;
2599
2600         flen = fromLen;
2601         len  = 0;
2602         src  = from;
2603
2604         while(((c = *src++) && (flen--)))
2605           len += codeset->table[c].utf8[0];
2606
2607         if(dest == NULL || (destLen<len+1))
2608         {
2609           APTR                   pool;
2610           struct SignalSemaphore *sem;
2611
2612           if((pool = (APTR)GetTagData(CSA_Pool, 0, attrs)) != NULL)
2613           {
2614             if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)) != NULL)
2615               ObtainSemaphore(sem);
2616
2617             // allocate the destination buffer
2618             dest = allocVecPooled(pool,len+1);
2619
2620             if(sem != NULL)
2621               ReleaseSemaphore(sem);
2622           }
2623           else
2624             dest = allocArbitrateVecPooled(len+1);
2625
2626           destLen  = len;
2627         }
2628
2629         if(dest == NULL)
2630         {
2631           RETURN(NULL);
2632           return NULL;
2633         }
2634       }
2635
2636       destPtr = (UBYTE*)dest;
2637     }
2638
2639     for(src = from; fromLen && (c = *src); src++, fromLen--)
2640     {
2641       UTF8* utf8_seq;
2642
2643       for(utf8_seq = &codeset->table[c].utf8[1]; (c = *utf8_seq); utf8_seq++)
2644       {
2645         if(hook != NULL)
2646         {
2647           *b++ = c;
2648           i++;
2649
2650           if(i%(destLen-1)==0)
2651           {
2652             *b = 0;
2653             msg.len = i;
2654             CallHookPkt(hook,&msg,buf);
2655
2656             b  = buf;
2657             *b = 0;
2658             i  = 0;
2659           }
2660         }
2661         else
2662         {
2663           if(n>=destLen)
2664             break;
2665
2666           *destPtr++ = c;
2667         }
2668
2669         n++;
2670       }
2671     }
2672
2673     if(hook != NULL)
2674     {
2675       msg.state = CSV_End;
2676       msg.len   = i;
2677       *b = 0;
2678       CallHookPkt(hook,&msg,buf);
2679     }
2680     else
2681     {
2682       *destPtr = 0;
2683     }
2684   }
2685
2686   if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, 0, attrs)))
2687     *destLenPtr = n;
2688
2689   RETURN(dest);
2690   return dest;
2691 #ifdef __AROS__
2692     AROS_LIBFUNC_EXIT
2693 #endif
2694 }
2695
2696 #ifndef __AROS__
2697 LIBSTUB(CodesetsUTF8CreateA, UTF8*, REG(a0, struct TagItem *attrs))
2698 {
2699   #ifdef __MORPHOS__
2700   return CodesetsUTF8CreateA((struct TagItem *)REG_A0);
2701   #else
2702   return CodesetsUTF8CreateA(attrs);
2703   #endif
2704 }
2705 #endif
2706
2707 #ifdef __amigaos4__
2708 LIBSTUBVA(CodesetsUTF8Create, UTF8*, ...)
2709 {
2710   UTF8 *res;
2711   VA_LIST args;
2712
2713   VA_START(args, self);
2714   res = CodesetsUTF8CreateA(VA_ARG(args, struct TagItem *));
2715   VA_END(args);
2716
2717   return res;
2718 }
2719 #endif
2720
2721 ///
2722 /// CodesetsIsValidUTF8()
2723 #define GOOD_UCS(c) \
2724      ((c) >= 160 && ((c) & ~0x3ff) != 0xd800 && \
2725       (c) != 0xfeff && (c) != 0xfffe && (c) != 0xffff)
2726
2727 #ifdef __AROS__
2728 AROS_LH1(BOOL, CodesetsIsValidUTF8,
2729     AROS_LHA(STRPTR, s, A0),
2730     struct LibraryHeader *, library, 24, Codesets
2731 )
2732 {
2733     AROS_LIBFUNC_INIT
2734 #else
2735 BOOL LIBFUNC
2736 CodesetsIsValidUTF8(REG(a0, STRPTR s))
2737 {
2738 #endif
2739   STRPTR t = s;
2740   int n;
2741
2742   ENTER();
2743
2744   while((n = parseUtf8(&t)))
2745   {
2746     if(!GOOD_UCS(n))
2747     {
2748       RETURN(FALSE);
2749       return FALSE;
2750     }
2751   }
2752
2753   RETURN(TRUE);
2754   return TRUE;
2755 #ifdef __AROS__
2756     AROS_LIBFUNC_EXIT
2757 #endif
2758 }
2759
2760 #ifndef __AROS__
2761 LIBSTUB(CodesetsIsValidUTF8, BOOL, REG(a0, STRPTR s))
2762 {
2763   #ifdef __MORPHOS__
2764   return CodesetsIsValidUTF8((STRPTR)REG_A0);
2765   #else
2766   return CodesetsIsValidUTF8(s);
2767   #endif
2768 }
2769 #endif
2770
2771 ///
2772 /// CodesetsConvertStrA()
2773 // Converts a given string from one source Codeset to a given destination
2774 // codeset and returns the convert string
2775 #ifdef __AROS__
2776 AROS_LH1(STRPTR, CodesetsConvertStrA,
2777     AROS_LHA(struct TagItem *, attrs, A0),
2778     struct LibraryHeader *, library, 26, Codesets
2779 )
2780 {
2781     AROS_LIBFUNC_INIT
2782 #else
2783 STRPTR LIBFUNC
2784 CodesetsConvertStrA(REG(a0, struct TagItem *attrs))
2785 {
2786 #endif
2787   STRPTR srcStr = NULL;
2788   STRPTR dstStr = NULL;
2789   ULONG srcLen = 0;
2790   ULONG dstLen = 0;
2791
2792   ENTER();
2793
2794   // get the ptr to the src string we want to convert
2795   // from the source codeset to the dest codeset.
2796   srcStr = (STRPTR)GetTagData(CSA_Source, (ULONG)NULL, attrs);
2797   srcLen = GetTagData(CSA_SourceLen, srcStr != NULL ? strlen(srcStr) : 0, attrs);
2798
2799   if(srcStr != NULL && srcLen > 0)
2800   {
2801     struct codeset *srcCodeset;
2802     struct codeset *dstCodeset;
2803
2804     // get the pointer to the codeset in which the src string is encoded
2805     if((srcCodeset = (struct codeset *)GetTagData(CSA_SourceCodeset, (ULONG)NULL, attrs)) == NULL)
2806       srcCodeset = defaultCodeset(TRUE);
2807
2808     // get the pointer to the codeset in which the dst string should be encoded
2809     if((dstCodeset = (struct codeset *)GetTagData(CSA_DestCodeset, (ULONG)NULL, attrs)) == NULL)
2810       dstCodeset = defaultCodeset(TRUE);
2811
2812     D(DBF_UTF, "srcCodeset: '%s' dstCodeset: '%s'", srcCodeset->name, dstCodeset->name);
2813
2814     // check that the user didn't supplied the very same codeset
2815     // or otherwise a conversion is not required.
2816     if(srcCodeset != NULL && dstCodeset != NULL && srcCodeset != dstCodeset)
2817     {
2818       BOOL utf8Create = FALSE;
2819       BOOL strCreate = FALSE;
2820       UTF8 *utf8str;
2821       ULONG utf8strLen = 0;
2822       ULONG *destLenPtr = NULL;
2823       BOOL mapForeignChars;
2824       struct Hook *mapForeignCharsHook;
2825
2826       mapForeignChars = (BOOL)GetTagData(CSA_MapForeignChars, FALSE, attrs);
2827       mapForeignCharsHook = (struct Hook *)GetTagData(CSA_MapForeignCharsHook, (ULONG)NULL, attrs);
2828
2829       // if the source codeset is UTF-8 we don't have to use the UTF8Create()
2830       // function and can directly call the UTF8ToStr() function
2831       if(srcCodeset != CodesetsBase->utf8Codeset)
2832       {
2833         struct TagItem tags[] = { { CSA_SourceCodeset,  (ULONG)srcCodeset  },
2834                                   { CSA_Source,         (ULONG)srcStr      },
2835                                   { CSA_SourceLen,      srcLen             },
2836                                   { CSA_DestLenPtr,     (ULONG)&utf8strLen },
2837                                   { TAG_DONE,           0                  } };
2838
2839         utf8str = CodesetsUTF8CreateA((struct TagItem *)&tags[0]);
2840
2841         utf8Create = TRUE;
2842       }
2843       else
2844       {
2845         utf8str = (UTF8 *)srcStr;
2846         utf8strLen = srcLen;
2847       }
2848
2849       // in case the destination codeset is UTF-8 we don't have to actually
2850       // use the UTF8ToStr() function and can immediately return our
2851       // UTF8 string
2852       if(utf8str != NULL && utf8strLen > 0 && dstCodeset != CodesetsBase->utf8Codeset)
2853       {
2854         struct TagItem tags[] = { { CSA_DestCodeset,          (ULONG)dstCodeset          },
2855                                   { CSA_Source,               (ULONG)utf8str             },
2856                                   { CSA_SourceLen,            utf8strLen                 },
2857                                   { CSA_DestLenPtr,           (ULONG)&dstLen             },
2858                                   { CSA_MapForeignChars,      mapForeignChars            },
2859                                   { CSA_MapForeignCharsHook,  (ULONG)mapForeignCharsHook },
2860                                   { TAG_DONE,                 0                          } };
2861
2862         dstStr = CodesetsUTF8ToStrA((struct TagItem *)&tags[0]);
2863
2864         strCreate = TRUE;
2865       }
2866       else
2867       {
2868         dstStr = (STRPTR)utf8str;
2869         dstLen = utf8strLen;
2870       }
2871
2872       D(DBF_UTF, "srcStr: %lx srcLen: %ld dstStr: %lx dstLen: %ld utf8create: %ld strCreate: %ld", srcStr, srcLen,
2873                                                                                                    dstStr, dstLen,
2874                                                                                                    utf8Create,
2875                                                                                                    strCreate);
2876
2877       // if everything was successfull we can go and finalize everything
2878       if(dstStr != NULL && utf8str != NULL)
2879       {
2880         // as the conversion was a two way pass we have to either free the
2881         // memory of the utf8 string or not
2882         if(utf8Create == TRUE && strCreate == TRUE)
2883           CodesetsFreeA(utf8str, NULL);
2884
2885         // if the user wants to be informed abour the length
2886         // of our destination string we store the length now in the supplied ptr.
2887         if((destLenPtr = (ULONG *)GetTagData(CSA_DestLenPtr, (ULONG)NULL, attrs)) != NULL)
2888           *destLenPtr = dstLen;
2889
2890         D(DBF_UTF, "successfully converted string with len %ld", dstLen);
2891       }
2892       else
2893       {
2894         W(DBF_ALWAYS, "an error occurred while trying to convert a string");
2895
2896         // free all memory in case the conversion didn't work out
2897         if(utf8Create == TRUE && utf8str != NULL)
2898           CodesetsFreeA(utf8str, NULL);
2899
2900         if(strCreate == TRUE && dstStr != NULL)
2901           CodesetsFreeA(dstStr, NULL);
2902
2903         dstStr = NULL;
2904       }
2905     }
2906   }
2907
2908   RETURN(dstStr);
2909   return dstStr;
2910 #ifdef __AROS__
2911     AROS_LIBFUNC_EXIT
2912 #endif
2913 }
2914
2915 #ifndef __AROS__
2916 LIBSTUB(CodesetsConvertStrA, STRPTR, REG(a0, struct TagItem *attrs))
2917 {
2918   #ifdef __MORPHOS__
2919   return CodesetsConvertStrA((struct TagItem *)REG_A0);
2920   #else
2921   return CodesetsConvertStrA(attrs);
2922   #endif
2923 }
2924 #endif
2925
2926 #ifdef __amigaos4__
2927 LIBSTUBVA(CodesetsConvertStr, STRPTR, ...)
2928 {
2929   STRPTR res;
2930   VA_LIST args;
2931
2932   VA_START(args, self);
2933   res = CodesetsConvertStrA(VA_ARG(args, struct TagItem *));
2934   VA_END(args);
2935
2936   return res;
2937 }
2938 #endif
2939
2940 ///
2941 /// CodesetsFreeVecPooledA()
2942 #ifdef __AROS__
2943 AROS_LH3(void, CodesetsFreeVecPooledA,
2944     AROS_LHA(APTR, pool, A0),
2945     AROS_LHA(APTR, mem, A1),
2946     AROS_LHA(struct TagItem *, attrs, A2),
2947     struct LibraryHeader *, library, 25, Codesets
2948 )
2949 {
2950     AROS_LIBFUNC_INIT
2951 #else
2952 void LIBFUNC
2953 CodesetsFreeVecPooledA(REG(a0, APTR pool),
2954                        REG(a1, APTR mem),
2955                        REG(a2, struct TagItem *attrs))
2956 {
2957 #endif
2958   ENTER();
2959
2960   if(pool && mem)
2961   {
2962     struct SignalSemaphore *sem;
2963
2964     if((sem = (struct SignalSemaphore *)GetTagData(CSA_PoolSem, 0, attrs)))
2965       ObtainSemaphore(sem);
2966
2967     freeVecPooled(pool,mem);
2968
2969     if(sem)
2970       ReleaseSemaphore(sem);
2971   }
2972
2973   LEAVE();
2974 #ifdef __AROS__
2975     AROS_LIBFUNC_EXIT
2976 #endif
2977 }
2978
2979 #ifndef __AROS__
2980 LIBSTUB(CodesetsFreeVecPooledA, void, REG(a0, APTR pool),
2981                                       REG(a1, APTR mem),
2982                                       REG(a2, struct TagItem *attrs))
2983 {
2984   #ifdef __MORPHOS__
2985   return CodesetsFreeVecPooledA((APTR)REG_A0,(APTR)REG_A1,(struct TagItem *)REG_A2);
2986   #else
2987   return CodesetsFreeVecPooledA(pool, mem, attrs);
2988   #endif
2989 }
2990 #endif
2991
2992 #ifdef __amigaos4__
2993 LIBSTUBVA(CodesetsFreeVecPooled, void, REG(a0, APTR pool),
2994                                        REG(a1, APTR mem), ...)
2995 {
2996   VA_LIST args;
2997
2998   VA_START(args, mem);
2999   CodesetsFreeVecPooledA(pool, mem, VA_ARG(args, struct TagItem *));
3000   VA_END(args);
3001 }
3002 #endif
3003 ///
3004 /// CodesetsListCreateA()
3005 #ifdef __AROS__
3006 AROS_LH1(struct codesetList *, CodesetsListCreateA,
3007     AROS_LHA(struct TagItem *, attrs, A0),
3008     struct LibraryHeader *, library, 27, Codesets
3009 )
3010 {
3011     AROS_LIBFUNC_INIT
3012 #else
3013 struct codesetList *LIBFUNC
3014 CodesetsListCreateA(REG(a0, struct TagItem *attrs))
3015 {
3016 #endif
3017   struct codesetList *csList = NULL;
3018
3019   ENTER();
3020
3021   ObtainSemaphore(&CodesetsBase->poolSem);
3022
3023   // no matter what, we create a codesets list we will return to the user
3024   if((csList = allocVecPooled(CodesetsBase->pool, sizeof(struct codesetList))))
3025   {
3026     BOOL scanProgDir = TRUE;
3027     struct TagItem *tstate = attrs;
3028     struct TagItem *tag;
3029
3030     // initialize the new private codeset list and put it into a separate list
3031     NewList((struct List *)csList);
3032
3033     // first we get the path of the directory from which we go
3034     // and scan for charset tables from
3035     while((tag = NextTagItem(&tstate)))
3036     {
3037       switch(tag->ti_Tag)
3038       {
3039         case CSA_CodesetDir:
3040         {
3041           codesetsScanDir(csList, (STRPTR)tag->ti_Data);
3042
3043           scanProgDir = FALSE;
3044         }
3045         break;
3046
3047         case CSA_CodesetFile:
3048         {
3049           codesetsReadTable(csList, (STRPTR)tag->ti_Data);
3050
3051           scanProgDir = FALSE;
3052         }
3053         break;
3054
3055         case CSA_SourceCodeset:
3056         {
3057           struct codeset *cs = (struct codeset *)tag->ti_Data;
3058
3059           AddTail((struct List *)csList, (struct Node *)&cs->node);
3060
3061           scanProgDir = FALSE;
3062         }
3063         break;
3064       }
3065     }
3066
3067     // in case the user also wants us to scan PROGDIR:
3068     // we do so
3069     if(scanProgDir == TRUE)
3070       codesetsScanDir(csList, "PROGDIR:Charsets");
3071   }
3072
3073   ReleaseSemaphore(&CodesetsBase->poolSem);
3074
3075   RETURN(csList);
3076   return csList;
3077 #ifdef __AROS__
3078     AROS_LIBFUNC_EXIT
3079 #endif
3080 }
3081
3082 #ifndef __AROS__
3083 LIBSTUB(CodesetsListCreateA, struct codesetList *, REG(a0, struct TagItem *attrs))
3084 {
3085   #ifdef __MORPHOS__
3086   return CodesetsListCreateA((struct TagItem *)REG_A0);
3087   #else
3088   return CodesetsListCreateA(attrs);
3089   #endif
3090 }
3091 #endif
3092
3093 #ifdef __amigaos4__
3094 LIBSTUBVA(CodesetsListCreate, struct codesetList *, ...)
3095 {
3096   struct codesetList *res;
3097   VA_LIST args;
3098
3099   VA_START(args, self);
3100   res = CodesetsListCreateA(VA_ARG(args, struct TagItem *));
3101   VA_END(args);
3102
3103   return res;
3104 }
3105 #endif
3106
3107 ///
3108 /// CodesetsListDeleteA()
3109 #ifdef __AROS__
3110 AROS_LH1(BOOL, CodesetsListDeleteA,
3111     AROS_LHA(struct TagItem *, attrs, A0),
3112     struct LibraryHeader *, library, 28, Codesets
3113 )
3114 {
3115     AROS_LIBFUNC_INIT
3116 #else
3117 BOOL LIBFUNC
3118 CodesetsListDeleteA(REG(a0, struct TagItem *attrs))
3119 {
3120 #endif
3121   BOOL result = FALSE;
3122   ENTER();
3123
3124   ObtainSemaphore(&CodesetsBase->poolSem);
3125
3126   if(attrs != NULL)
3127   {
3128     BOOL freeCodesets;
3129     struct TagItem *tstate = attrs;
3130     struct TagItem *tag;
3131
3132     // check if the caller wants us also to free the codesets
3133     freeCodesets = (BOOL)GetTagData(CSA_FreeCodesets, TRUE, attrs);
3134
3135     // now we iterate through or tagItems and see what the
3136     // user wants to remove from the list
3137     while((tag = NextTagItem(&tstate)))
3138     {
3139       switch(tag->ti_Tag)
3140       {
3141         case CSA_CodesetList:
3142         {
3143           struct codesetList *csList = (struct codesetList *)tag->ti_Data;
3144
3145           if(csList)
3146           {
3147             // cleanup the codesets within the list
3148             if(freeCodesets)
3149               codesetsCleanup(csList);
3150
3151             // then free the list itself
3152             freeArbitrateVecPooled(csList);
3153
3154             result = TRUE;
3155           }
3156         }
3157       }
3158     }
3159   }
3160
3161   ReleaseSemaphore(&CodesetsBase->poolSem);
3162
3163   RETURN(result);
3164   return result;
3165 #ifdef __AROS__
3166     AROS_LIBFUNC_EXIT
3167 #endif
3168 }
3169
3170 #ifndef __AROS__
3171 LIBSTUB(CodesetsListDeleteA, BOOL, REG(a0, struct TagItem *attrs))
3172 {
3173   #ifdef __MORPHOS__
3174   return CodesetsListDeleteA((struct TagItem *)REG_A0);
3175   #else
3176   return CodesetsListDeleteA(attrs);
3177   #endif
3178 }
3179 #endif
3180
3181 #ifdef __amigaos4__
3182 LIBSTUBVA(CodesetsListDelete, BOOL, ...)
3183 {
3184   BOOL result;
3185   VA_LIST args;
3186
3187   VA_START(args, self);
3188   result = CodesetsListDeleteA(VA_ARG(args, struct TagItem *));
3189   VA_END(args);
3190
3191   return result;
3192 }
3193 #endif
3194
3195 ///
3196 /// CodesetsListAddA()
3197 #ifdef __AROS__
3198 AROS_LH2(BOOL, CodesetsListAddA,
3199     AROS_LHA(struct codesetList *, csList, A0),
3200     AROS_LHA(struct TagItem *, attrs, A1),
3201     struct LibraryHeader *, library, 29, Codesets
3202 )
3203 {
3204     AROS_LIBFUNC_INIT
3205 #else
3206 BOOL LIBFUNC
3207 CodesetsListAddA(REG(a0, struct codesetList *csList),
3208                  REG(a1, struct TagItem *attrs))
3209 {
3210 #endif
3211   BOOL result = FALSE;
3212   ENTER();
3213
3214   ObtainSemaphore(&CodesetsBase->poolSem);
3215
3216   if(csList != NULL && attrs != NULL)
3217   {
3218     struct TagItem *tstate = attrs;
3219     struct TagItem *tag;
3220
3221     // now we iterate through or tagItems and see if the user
3222     // wants to scan a whole directory or just adds a file.
3223     while((tag = NextTagItem(&tstate)))
3224     {
3225       switch(tag->ti_Tag)
3226       {
3227         case CSA_CodesetDir:
3228         {
3229           codesetsScanDir(csList, (STRPTR)tag->ti_Data);
3230           result = TRUE;
3231         }
3232         break;
3233
3234         case CSA_CodesetFile:
3235         {
3236           codesetsReadTable(csList, (STRPTR)tag->ti_Data);
3237           result = TRUE;
3238         }
3239         break;
3240
3241         case CSA_SourceCodeset:
3242         {
3243           struct codeset *cs = (struct codeset *)tag->ti_Data;
3244
3245           AddTail((struct List *)csList, (struct Node *)&cs->node);
3246           result = TRUE;
3247         }
3248         break;
3249       }
3250     }
3251   }
3252
3253   ReleaseSemaphore(&CodesetsBase->poolSem);
3254
3255   RETURN(result);
3256   return result;
3257 #ifdef __AROS__
3258     AROS_LIBFUNC_EXIT
3259 #endif
3260 }
3261
3262 #ifndef __AROS__
3263 LIBSTUB(CodesetsListAddA, BOOL, REG(a0, struct codesetList *csList), REG(a1, struct TagItem *attrs))
3264 {
3265   #ifdef __MORPHOS__
3266   return CodesetsListAddA((struct codesetList *)REG_A0, (struct TagItem *)REG_A1);
3267   #else
3268   return CodesetsListAddA(csList, attrs);
3269   #endif
3270 }
3271 #endif
3272
3273 #ifdef __amigaos4__
3274 LIBSTUBVA(CodesetsListAdd, BOOL, struct codesetList *csList, ...)
3275 {
3276   BOOL result;
3277   VA_LIST args;
3278
3279   VA_START(args, csList);
3280   result = CodesetsListAddA(csList, VA_ARG(args, struct TagItem *));
3281   VA_END(args);
3282
3283   return result;
3284 }
3285 #endif
3286
3287 ///
3288 /// CodesetsListRemoveA()
3289 #ifdef __AROS__
3290 AROS_LH1(BOOL, CodesetsListRemoveA,
3291     AROS_LHA(struct TagItem *, attrs, A0),
3292     struct LibraryHeader *, library, 30, Codesets
3293 )
3294 {
3295     AROS_LIBFUNC_INIT
3296 #else
3297 BOOL LIBFUNC
3298 CodesetsListRemoveA(REG(a0, struct TagItem *attrs))
3299 {
3300 #endif
3301   BOOL result = FALSE;
3302   ENTER();
3303
3304   ObtainSemaphore(&CodesetsBase->poolSem);
3305
3306   if(attrs != NULL)
3307   {
3308     BOOL freeCodesets;
3309     struct TagItem *tstate = attrs;
3310     struct TagItem *tag;
3311
3312     // check if the caller wants us also to free the codesets
3313     freeCodesets = (BOOL)GetTagData(CSA_FreeCodesets, TRUE, attrs);
3314
3315     // now we iterate through or tagItems and see what the
3316     // user wants to remove from the list
3317     while((tag = NextTagItem(&tstate)))
3318     {
3319       switch(tag->ti_Tag)
3320       {
3321         case CSA_SourceCodeset:
3322         {
3323           struct codeset *cs = (struct codeset *)tag->ti_Data;
3324
3325           if(cs)
3326           {
3327             struct MinNode *mstate = &cs->node;
3328
3329             // before we actually remove the node from its list, we
3330             // have to make sure it isn't part of our internal codesets list
3331             while(mstate->mln_Succ)
3332               mstate = mstate->mln_Succ;
3333
3334             if(mstate != CodesetsBase->codesets.list.mlh_Tail)
3335             {
3336               Remove((struct Node *)&cs->node);
3337
3338               // free all codesets data if requested.
3339               if(freeCodesets == TRUE)
3340               {
3341                 if(cs->name)             freeArbitrateVecPooled(cs->name);
3342                 if(cs->alt_name)         freeArbitrateVecPooled(cs->alt_name);
3343                 if(cs->characterization) freeArbitrateVecPooled(cs->characterization);
3344
3345                 freeArbitrateVecPooled(cs);
3346               }
3347
3348               result = TRUE;
3349             }
3350             else
3351               W(DBF_ALWAYS, "user tried to remove an internal codesets!");
3352           }
3353         }
3354         break;
3355       }
3356     }
3357   }
3358
3359   ReleaseSemaphore(&CodesetsBase->poolSem);
3360
3361   RETURN(result);
3362   return result;
3363 #ifdef __AROS__
3364     AROS_LIBFUNC_EXIT
3365 #endif
3366 }
3367
3368 #ifndef __AROS__
3369 LIBSTUB(CodesetsListRemoveA, BOOL, REG(a0, struct TagItem *attrs))
3370 {
3371   #ifdef __MORPHOS__
3372   return CodesetsListRemoveA((struct TagItem *)REG_A0);
3373   #else
3374   return CodesetsListRemoveA(attrs);
3375   #endif
3376 }
3377 #endif
3378
3379 #ifdef __amigaos4__
3380 LIBSTUBVA(CodesetsListRemove, BOOL, ...)
3381 {
3382   BOOL result;
3383   VA_LIST args;
3384
3385   VA_START(args, self);
3386   result = CodesetsListRemoveA(VA_ARG(args, struct TagItem *));
3387   VA_END(args);
3388
3389   return result;
3390 }
3391 #endif
3392
3393 ///
3394
3395 /**************************************************************************/