sal/textenc/tencinfo.c

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*************************************************************************
   3  *
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * Copyright 2000, 2010 Oracle and/or its affiliates.
   7  *
   8  * OpenOffice.org - a multi-platform office productivity suite
   9  *
  10  * This file is part of OpenOffice.org.
  11  *
  12  * OpenOffice.org is free software: you can redistribute it and/or modify
  13  * it under the terms of the GNU Lesser General Public License version 3
  14  * only, as published by the Free Software Foundation.
  15  *
  16  * OpenOffice.org is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19  * GNU Lesser General Public License version 3 for more details
  20  * (a copy is included in the LICENSE file that accompanied this code).
  21  *
  22  * You should have received a copy of the GNU Lesser General Public License
  23  * version 3 along with OpenOffice.org.  If not, see
  24  * <http://www.openoffice.org/license.html>
  25  * for a copy of the LGPLv3 License.
  26  *
  27  ************************************************************************/
  28
  29 #include "rtl/tencinfo.h"
  30 #include "gettextencodingdata.h"
  31 #include "tenchelp.h"
  32
  33 #ifndef _RTL_ALLOC_H
  34 #include "rtl/alloc.h"
  35 #endif
  36
  37 #ifndef INCLUDED_STDDEF_H
  38 #include <stddef.h>
  39 #define INCLUDED_STDDEF_H
  40 #endif
  41 #ifndef INCLUDED_STRING_H
  42 #include <string.h>
  43 #define INCLUDED_STRING_H
  44 #endif
  45
  46 sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
  47 {
  48     return (sal_Bool)
  49         (nEncoding > RTL_TEXTENCODING_DONTKNOW
  50          && (nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS)
  51              /* always update this! */
  52          && nEncoding != 9); /* RTL_TEXTENCODING_SYSTEM */
  53 }
  54
  55 /* ======================================================================= */
  56
  57 static void Impl_toAsciiLower( const sal_Char* pName, sal_Char* pBuf )
  58 {
  59     while ( *pName )
  60     {
  61         /* A-Z */
  62         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
  63             *pBuf = (*pName)+0x20;  /* toAsciiLower */
  64         else
  65             *pBuf = *pName;
  66
  67         pBuf++;
  68         pName++;
  69     }
  70
  71     *pBuf = '\0';
  72 }
  73
  74 /* ----------------------------------------------------------------------- */
  75
  76 static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const sal_Char* pName, sal_Char* pBuf )
  77 {
  78     while ( *pName )
  79     {
  80         /* A-Z */
  81         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
  82         {
  83             *pBuf = (*pName)+0x20;  /* toAsciiLower */
  84             pBuf++;
  85         }
  86         /* a-z, 0-9 */
  87         else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
  88                   ((*pName >= 0x30) && (*pName <= 0x39)) )
  89         {
  90             *pBuf = *pName;
  91             pBuf++;
  92         }
  93
  94         pName++;
  95     }
  96
  97     *pBuf = '\0';
  98 }
  99
 100 /* ----------------------------------------------------------------------- */
 101
 102 /* pMatchStr must match with all characters in pCompStr */
 103 static sal_Bool Impl_matchString( const sal_Char* pCompStr, const sal_Char* pMatchStr )
 104 {
 105     /* We test only for end in MatchStr, because the last 0 character from */
 106     /* pCompStr is unequal a character in MatchStr, so the loop terminates */
 107     while ( *pMatchStr )
 108     {
 109         if ( *pCompStr != *pMatchStr )
 110             return sal_False;
 111
 112         pCompStr++;
 113         pMatchStr++;
 114     }
 115
 116     return sal_True;
 117 }
 118
 119 /* ======================================================================= */
 120
 121 typedef struct
 122 {
 123     const sal_Char*             mpCharsetStr;
 124     rtl_TextEncoding            meTextEncoding;
 125 } ImplStrCharsetDef;
 126
 127 typedef struct
 128 {
 129     const sal_Char*             mpCharsetStr;
 130     const ImplStrCharsetDef*    mpSecondPartTab;
 131 } ImplStrFirstPartCharsetDef;
 132
 133 /* ======================================================================= */
 134
 135 sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
 136 {
 137     const ImplTextEncodingData* pData;
 138
 139     pData = Impl_getTextEncodingData( eTextEncoding );
 140     if ( !pData )
 141     {
 142         /* HACK: For not implemented encoding, because not all
 143            calls handle the errors */
 144         if ( pEncInfo->StructSize < 5 )
 145             return sal_False;
 146         pEncInfo->MinimumCharSize = 1;
 147
 148         if ( pEncInfo->StructSize < 6 )
 149             return sal_True;
 150         pEncInfo->MaximumCharSize = 1;
 151
 152         if ( pEncInfo->StructSize < 7 )
 153             return sal_True;
 154         pEncInfo->AverageCharSize = 1;
 155
 156         if ( pEncInfo->StructSize < 12 )
 157             return sal_True;
 158         pEncInfo->Flags = 0;
 159
 160         return sal_False;
 161     }
 162
 163     if ( pEncInfo->StructSize < 5 )
 164         return sal_False;
 165     pEncInfo->MinimumCharSize = pData->mnMinCharSize;
 166
 167     if ( pEncInfo->StructSize < 6 )
 168         return sal_True;
 169     pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
 170
 171     if ( pEncInfo->StructSize < 7 )
 172         return sal_True;
 173     pEncInfo->AverageCharSize = pData->mnAveCharSize;
 174
 175     if ( pEncInfo->StructSize < 12 )
 176         return sal_True;
 177     pEncInfo->Flags = pData->mnInfoFlags;
 178
 179     return sal_True;
 180 }
 181
 182 /* ======================================================================= */
 183
 184 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
 185 {
 186     rtl_TextEncoding eTextEncoding;
 187
 188     switch ( nWinCharset )
 189     {
 190         case 0:     eTextEncoding = RTL_TEXTENCODING_MS_1252; break;    /* ANSI_CHARSET */
 191         case 2:     eTextEncoding = RTL_TEXTENCODING_SYMBOL; break;     /* SYMBOL_CHARSET */
 192         case 77:    eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
 193         case 128:   eTextEncoding = RTL_TEXTENCODING_MS_932; break;     /* SHIFTJIS_CHARSET */
 194         case 129:   eTextEncoding = RTL_TEXTENCODING_MS_949; break;     /* HANGEUL_CHARSET */
 195         case 130:   eTextEncoding = RTL_TEXTENCODING_MS_1361; break;    /* JOHAB_CHARSET */
 196         case 134:   eTextEncoding = RTL_TEXTENCODING_MS_936; break;     /* GB2312_CHARSET */
 197         case 136:   eTextEncoding = RTL_TEXTENCODING_MS_950; break;     /* CHINESEBIG5_CHARSET */
 198         case 161:   eTextEncoding = RTL_TEXTENCODING_MS_1253; break;    /* GREEK_CHARSET */
 199         case 162:   eTextEncoding = RTL_TEXTENCODING_MS_1254; break;    /* TURKISH_CHARSET */
 200         case 163:   eTextEncoding = RTL_TEXTENCODING_MS_1258; break;    /* VIETNAMESE_CHARSET !!! */
 201         case 177:   eTextEncoding = RTL_TEXTENCODING_MS_1255; break;    /* HEBREW_CHARSET */
 202         case 178:   eTextEncoding = RTL_TEXTENCODING_MS_1256; break;    /* ARABIC_CHARSET */
 203         case 186:   eTextEncoding = RTL_TEXTENCODING_MS_1257; break;    /* BALTIC_CHARSET */
 204         case 204:   eTextEncoding = RTL_TEXTENCODING_MS_1251; break;    /* RUSSIAN_CHARSET */
 205         case 222:   eTextEncoding = RTL_TEXTENCODING_MS_874; break;     /* THAI_CHARSET */
 206         case 238:   eTextEncoding = RTL_TEXTENCODING_MS_1250; break;    /* EASTEUROPE_CHARSET */
 207         case 255:   eTextEncoding = RTL_TEXTENCODING_IBM_850; break;    /* OEM_CHARSET */
 208         default:    eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
 209     };
 210
 211     return eTextEncoding;
 212 }
 213
 214 /* ----------------------------------------------------------------------- */
 215
 216 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const sal_Char* pUnixCharset )
 217 {
 218     /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
 219      * (Registry and Encoding) Names").
 220      */
 221
 222     /* All Identifiers in the tables are lower case The function search */
 223     /* for the first matching string in the tables. */
 224     /* Sort order: unique (first 14, than 1), important */
 225
 226     static ImplStrCharsetDef const aUnixCharsetISOTab[] =
 227     {
 228         { "15", RTL_TEXTENCODING_ISO_8859_15 },
 229         { "14", RTL_TEXTENCODING_ISO_8859_14 },
 230         { "13", RTL_TEXTENCODING_ISO_8859_13 },
 231         { "11", RTL_TEXTENCODING_TIS_620 },
 232         { "10", RTL_TEXTENCODING_ISO_8859_10 },
 233         { "1", RTL_TEXTENCODING_ISO_8859_1 },
 234         { "2", RTL_TEXTENCODING_ISO_8859_2 },
 235         { "3", RTL_TEXTENCODING_ISO_8859_3 },
 236         { "4", RTL_TEXTENCODING_ISO_8859_4 },
 237         { "5", RTL_TEXTENCODING_ISO_8859_5 },
 238         { "6", RTL_TEXTENCODING_ISO_8859_6 },
 239         { "7", RTL_TEXTENCODING_ISO_8859_7 },
 240         { "8", RTL_TEXTENCODING_ISO_8859_8 },
 241         { "9", RTL_TEXTENCODING_ISO_8859_9 },
 242         { NULL, RTL_TEXTENCODING_DONTKNOW }
 243     };
 244
 245     static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
 246     {
 247         { "fontspecific", RTL_TEXTENCODING_SYMBOL },
 248         { NULL, RTL_TEXTENCODING_DONTKNOW }
 249     };
 250
 251     static ImplStrCharsetDef const aUnixCharsetMSTab[] =
 252     {
 253         { "1252", RTL_TEXTENCODING_MS_1252 },
 254         { "1250", RTL_TEXTENCODING_MS_1250 },
 255         { "1251", RTL_TEXTENCODING_MS_1251 },
 256         { "1253", RTL_TEXTENCODING_MS_1253 },
 257         { "1254", RTL_TEXTENCODING_MS_1254 },
 258         { "1255", RTL_TEXTENCODING_MS_1255 },
 259         { "1256", RTL_TEXTENCODING_MS_1256 },
 260         { "1257", RTL_TEXTENCODING_MS_1257 },
 261         { "1258", RTL_TEXTENCODING_MS_1258 },
 262         { "932", RTL_TEXTENCODING_MS_932 },
 263         { "936", RTL_TEXTENCODING_MS_936 },
 264         { "949", RTL_TEXTENCODING_MS_949 },
 265         { "950", RTL_TEXTENCODING_MS_950 },
 266         { "1361", RTL_TEXTENCODING_MS_1361 },
 267         { "cp1252", RTL_TEXTENCODING_MS_1252 },
 268         { "cp1250", RTL_TEXTENCODING_MS_1250 },
 269         { "cp1251", RTL_TEXTENCODING_MS_1251 },
 270         { "cp1253", RTL_TEXTENCODING_MS_1253 },
 271         { "cp1254", RTL_TEXTENCODING_MS_1254 },
 272         { "cp1255", RTL_TEXTENCODING_MS_1255 },
 273         { "cp1256", RTL_TEXTENCODING_MS_1256 },
 274         { "cp1257", RTL_TEXTENCODING_MS_1257 },
 275         { "cp1258", RTL_TEXTENCODING_MS_1258 },
 276         { "cp932", RTL_TEXTENCODING_MS_932 },
 277         { "cp936", RTL_TEXTENCODING_MS_936 },
 278         { "cp949", RTL_TEXTENCODING_MS_949 },
 279         { "cp950", RTL_TEXTENCODING_MS_950 },
 280         { "cp1361", RTL_TEXTENCODING_MS_1361 },
 281         { NULL, RTL_TEXTENCODING_DONTKNOW }
 282     };
 283
 284     static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
 285     {
 286         { "437", RTL_TEXTENCODING_IBM_437 },
 287         { "850", RTL_TEXTENCODING_IBM_850 },
 288         { "860", RTL_TEXTENCODING_IBM_860 },
 289         { "861", RTL_TEXTENCODING_IBM_861 },
 290         { "863", RTL_TEXTENCODING_IBM_863 },
 291         { "865", RTL_TEXTENCODING_IBM_865 },
 292         { "737", RTL_TEXTENCODING_IBM_737 },
 293         { "775", RTL_TEXTENCODING_IBM_775 },
 294         { "852", RTL_TEXTENCODING_IBM_852 },
 295         { "855", RTL_TEXTENCODING_IBM_855 },
 296         { "857", RTL_TEXTENCODING_IBM_857 },
 297         { "862", RTL_TEXTENCODING_IBM_862 },
 298         { "864", RTL_TEXTENCODING_IBM_864 },
 299         { "866", RTL_TEXTENCODING_IBM_866 },
 300         { "869", RTL_TEXTENCODING_IBM_869 },
 301         { "874", RTL_TEXTENCODING_MS_874 },
 302         { "1004", RTL_TEXTENCODING_MS_1252 },
 303         { "65400", RTL_TEXTENCODING_SYMBOL },
 304         { NULL, RTL_TEXTENCODING_DONTKNOW }
 305     };
 306
 307     static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
 308     {
 309         { "r", RTL_TEXTENCODING_KOI8_R },
 310         { "u", RTL_TEXTENCODING_KOI8_U },
 311         { NULL, RTL_TEXTENCODING_DONTKNOW }
 312     };
 313
 314     static ImplStrCharsetDef aUnixCharsetJISX0208Tab[] =
 315     {
 316         { NULL, RTL_TEXTENCODING_JIS_X_0208 }
 317     };
 318
 319     static ImplStrCharsetDef aUnixCharsetJISX0201Tab[] =
 320     {
 321         { NULL, RTL_TEXTENCODING_JIS_X_0201 }
 322     };
 323
 324     static ImplStrCharsetDef aUnixCharsetJISX0212Tab[] =
 325     {
 326         { NULL, RTL_TEXTENCODING_JIS_X_0212 }
 327     };
 328
 329     static ImplStrCharsetDef aUnixCharsetGBTab[] =
 330     {
 331         { NULL, RTL_TEXTENCODING_GB_2312 }
 332     };
 333
 334     static ImplStrCharsetDef aUnixCharsetGBKTab[] =
 335     {
 336         { NULL, RTL_TEXTENCODING_GBK }
 337     };
 338
 339     static ImplStrCharsetDef aUnixCharsetBIG5Tab[] =
 340     {
 341         { NULL, RTL_TEXTENCODING_BIG5 }
 342     };
 343
 344     static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
 345     {
 346         { NULL, RTL_TEXTENCODING_EUC_KR }
 347     };
 348
 349     static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
 350     {
 351         { NULL, RTL_TEXTENCODING_MS_1361 }
 352     };
 353
 354     static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
 355     {
 356         { NULL, RTL_TEXTENCODING_UNICODE }
 357     };
 358
 359     static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
 360     {
 361 /* Currently every Unicode Encoding is for us Unicode */
 362 /*        { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
 363         { NULL, RTL_TEXTENCODING_UNICODE }
 364     };
 365
 366     static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
 367     {
 368         { NULL, RTL_TEXTENCODING_SYMBOL }
 369     };
 370
 371     /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
 372        rev=1.1.1.1>: */
 373     static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
 374     {
 375         { "0", RTL_TEXTENCODING_TIS_620 },
 376         { "2529", RTL_TEXTENCODING_TIS_620 },
 377         { "2533", RTL_TEXTENCODING_TIS_620 },
 378         { NULL, RTL_TEXTENCODING_DONTKNOW }
 379     };
 380     static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
 381     {
 382         { "1", RTL_TEXTENCODING_TIS_620 },
 383         { NULL, RTL_TEXTENCODING_DONTKNOW }
 384     };
 385     static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
 386     {
 387         { "0", RTL_TEXTENCODING_TIS_620 },
 388         { "1", RTL_TEXTENCODING_TIS_620 },
 389         { NULL, RTL_TEXTENCODING_DONTKNOW }
 390     };
 391
 392     static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
 393     {
 394         { "iso8859", aUnixCharsetISOTab },
 395         { "adobe", aUnixCharsetADOBETab },
 396         { "ansi", aUnixCharsetMSTab },
 397         { "microsoft", aUnixCharsetMSTab },
 398         { "ibm", aUnixCharsetIBMTab },
 399         { "koi8", aUnixCharsetKOI8Tab },
 400         { "jisx0208", aUnixCharsetJISX0208Tab },
 401         { "jisx0208.1983", aUnixCharsetJISX0208Tab },
 402         { "jisx0201", aUnixCharsetJISX0201Tab },
 403         { "jisx0201.1976", aUnixCharsetJISX0201Tab },
 404         { "jisx0212", aUnixCharsetJISX0212Tab },
 405         { "jisx0212.1990", aUnixCharsetJISX0212Tab },
 406         { "gb2312", aUnixCharsetGBTab },
 407         { "gbk", aUnixCharsetGBKTab },
 408         { "big5", aUnixCharsetBIG5Tab },
 409         { "iso10646", aUnixCharsetISO10646Tab },
 410 /*      { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
 411         { "sunolcursor", aUnixCharsetSymbolTab },
 412         { "sunolglyph", aUnixCharsetSymbolTab },
 413         { "iso10646", aUnixCharsetUNICODETab },
 414         { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
 415         { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
 416         { "tis620.2529", aUnixCharsetTIS6202529Tab },
 417         { "tis620.2533", aUnixCharsetTIS6202533Tab },
 418         { "tis620", aUnixCharsetTIS620Tab },
 419 /*        { "sunudcja.1997",  },        */
 420 /*        { "sunudcko.1997",  },        */
 421 /*        { "sunudczh.1997",  },        */
 422 /*        { "sunudczhtw.1997",  },      */
 423         { NULL, NULL }
 424     };
 425
 426     rtl_TextEncoding    eEncoding = RTL_TEXTENCODING_DONTKNOW;
 427     sal_Char*           pBuf;
 428     sal_Char*           pTempBuf;
 429     sal_uInt32          nBufLen = strlen( pUnixCharset )+1;
 430     const sal_Char*     pFirstPart;
 431     const sal_Char*     pSecondPart;
 432
 433     /* Alloc Buffer and map to lower case */
 434     pBuf = (char*)rtl_allocateMemory( nBufLen );
 435     Impl_toAsciiLower( pUnixCharset, pBuf );
 436
 437     /* Search FirstPart */
 438     pFirstPart = pBuf;
 439     pSecondPart = NULL;
 440     pTempBuf = pBuf;
 441     while ( *pTempBuf )
 442     {
 443         if ( *pTempBuf == '-' )
 444         {
 445             *pTempBuf = '\0';
 446             pSecondPart = pTempBuf+1;
 447             break;
 448         }
 449
 450         pTempBuf++;
 451     }
 452
 453     /* Parttrenner gefunden */
 454     if ( pSecondPart )
 455     {
 456         /* Search for the part tab */
 457         const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
 458         while ( pFirstPartData->mpCharsetStr )
 459         {
 460             if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
 461             {
 462                 /* Search for the charset in the second part tab */
 463                 const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
 464                 while ( pData->mpCharsetStr )
 465                 {
 466                     if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
 467                     {
 468                         eEncoding = pData->meTextEncoding;
 469                         break;
 470                     }
 471
 472                     pData++;
 473                 }
 474
 475                 /* use default encoding for first part */
 476                 eEncoding = pData->meTextEncoding;
 477                 break;
 478             }
 479
 480             pFirstPartData++;
 481         }
 482     }
 483
 484     rtl_freeMemory( pBuf );
 485
 486     return eEncoding;
 487 }
 488
 489 /* ----------------------------------------------------------------------- */
 490
 491 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const sal_Char* pMimeCharset )
 492 {
 493     /* All Identifiers are in lower case and contain only alphanumeric */
 494     /* characters. The function search for the first equal string in */
 495     /* the table. In this table are only the most used mime types. */
 496     /* Sort order: important */
 497     static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
 498     {
 499         { "usascii", RTL_TEXTENCODING_ASCII_US },
 500         { "utf8", RTL_TEXTENCODING_UTF8 },
 501         { "utf7", RTL_TEXTENCODING_UTF7 },
 502         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
 503         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
 504         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
 505         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
 506         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
 507         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
 508         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
 509         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
 510         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
 511         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
 512         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
 513         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
 514         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
 515         { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
 516         { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
 517         { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
 518         { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
 519         { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
 520         { "eucjp", RTL_TEXTENCODING_EUC_JP },
 521         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
 522         { "mskanji", RTL_TEXTENCODING_MS_932 },
 523         { "gb2312", RTL_TEXTENCODING_GB_2312 },
 524         { "cngb", RTL_TEXTENCODING_GB_2312 },
 525         { "big5", RTL_TEXTENCODING_BIG5 },
 526         { "cnbig5", RTL_TEXTENCODING_BIG5 },
 527         { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
 528         { "euckr", RTL_TEXTENCODING_EUC_KR },
 529         { "koi8r", RTL_TEXTENCODING_KOI8_R },
 530         { "windows1252", RTL_TEXTENCODING_MS_1252 },
 531         { "windows1250", RTL_TEXTENCODING_MS_1250 },
 532         { "windows1251", RTL_TEXTENCODING_MS_1251 },
 533         { "windows1253", RTL_TEXTENCODING_MS_1253 },
 534         { "windows1254", RTL_TEXTENCODING_MS_1254 },
 535         { "windows1255", RTL_TEXTENCODING_MS_1255 },
 536         { "windows1256", RTL_TEXTENCODING_MS_1256 },
 537         { "windows1257", RTL_TEXTENCODING_MS_1257 },
 538         { "windows1258", RTL_TEXTENCODING_MS_1258 },
 539         { NULL, RTL_TEXTENCODING_DONTKNOW }
 540     };
 541
 542     /* All Identifiers are in lower case and contain only alphanumeric */
 543     /* characters. The function search for the first matching string in */
 544     /* the table. */
 545     /* Sort order: unique (first iso885914, than iso88591), important */
 546     static ImplStrCharsetDef const aMimeCharsetTab[] =
 547     {
 548         { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
 549         { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
 550         { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
 551         { "iso88591win", RTL_TEXTENCODING_MS_1252 },
 552         { "iso88592win", RTL_TEXTENCODING_MS_1250 },
 553         { "iso88599win", RTL_TEXTENCODING_MS_1254 },
 554         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
 555         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
 556         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
 557         { "iso885911", RTL_TEXTENCODING_TIS_620 },
 558             /* This is no official MIME character set name, but it might be in
 559                use in Thailand. */
 560         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
 561         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
 562         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
 563         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
 564         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
 565         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
 566         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
 567         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
 568         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
 569         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
 570         { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
 571         { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
 572         { "l1", RTL_TEXTENCODING_ISO_8859_1 },
 573         { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
 574         { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
 575         { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
 576         { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
 577         { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
 578         { "l2", RTL_TEXTENCODING_ISO_8859_2 },
 579         { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
 580         { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
 581         { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
 582         { "l3", RTL_TEXTENCODING_ISO_8859_3 },
 583         { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
 584         { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
 585         { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
 586         { "l4", RTL_TEXTENCODING_ISO_8859_4 },
 587         { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
 588         { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
 589         { "cyrillicasian", RTL_TEXTENCODING_PT154 },
 590         { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
 591         { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
 592         { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
 593         { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
 594         { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
 595         { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
 596         { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
 597         { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
 598         { "greek", RTL_TEXTENCODING_ISO_8859_7 },
 599         { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
 600         { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
 601         { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
 602         { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
 603         { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
 604         { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
 605         { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
 606         { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
 607         { "l5", RTL_TEXTENCODING_ISO_8859_9 },
 608         { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
 609         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
 610         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
 611         { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
 612         { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
 613         { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
 614         { "iso10646us", RTL_TEXTENCODING_ASCII_US },
 615         { "iso646irv", RTL_TEXTENCODING_ASCII_US },
 616         { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
 617         { "ibm437", RTL_TEXTENCODING_IBM_437 },
 618         { "cp437", RTL_TEXTENCODING_IBM_437 },
 619         { "437", RTL_TEXTENCODING_IBM_437 },
 620         { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
 621         { "ansix34", RTL_TEXTENCODING_ASCII_US },
 622         { "ibm367", RTL_TEXTENCODING_ASCII_US },
 623         { "cp367", RTL_TEXTENCODING_ASCII_US },
 624         { "csascii", RTL_TEXTENCODING_ASCII_US },
 625         { "ibm775", RTL_TEXTENCODING_IBM_775 },
 626         { "cp775", RTL_TEXTENCODING_IBM_775 },
 627         { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
 628         { "ibm850", RTL_TEXTENCODING_IBM_850 },
 629         { "cp850", RTL_TEXTENCODING_IBM_850 },
 630         { "850", RTL_TEXTENCODING_IBM_850 },
 631         { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
 632 /*        { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
 633 /*        { "cp851", RTL_TEXTENCODING_IBM_851 }, */
 634 /*        { "851", RTL_TEXTENCODING_IBM_851 }, */
 635 /*        { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
 636         { "ibm852", RTL_TEXTENCODING_IBM_852 },
 637         { "cp852", RTL_TEXTENCODING_IBM_852 },
 638         { "852", RTL_TEXTENCODING_IBM_852 },
 639         { "cspcp852", RTL_TEXTENCODING_IBM_852 },
 640         { "ibm855", RTL_TEXTENCODING_IBM_855 },
 641         { "cp855", RTL_TEXTENCODING_IBM_855 },
 642         { "855", RTL_TEXTENCODING_IBM_855 },
 643         { "csibm855", RTL_TEXTENCODING_IBM_855 },
 644         { "ibm857", RTL_TEXTENCODING_IBM_857 },
 645         { "cp857", RTL_TEXTENCODING_IBM_857 },
 646         { "857", RTL_TEXTENCODING_IBM_857 },
 647         { "csibm857", RTL_TEXTENCODING_IBM_857 },
 648         { "ibm860", RTL_TEXTENCODING_IBM_860 },
 649         { "cp860", RTL_TEXTENCODING_IBM_860 },
 650         { "860", RTL_TEXTENCODING_IBM_860 },
 651         { "csibm860", RTL_TEXTENCODING_IBM_860 },
 652         { "ibm861", RTL_TEXTENCODING_IBM_861 },
 653         { "cp861", RTL_TEXTENCODING_IBM_861 },
 654         { "861", RTL_TEXTENCODING_IBM_861 },
 655         { "csis", RTL_TEXTENCODING_IBM_861 },
 656         { "csibm861", RTL_TEXTENCODING_IBM_861 },
 657         { "ibm862", RTL_TEXTENCODING_IBM_862 },
 658         { "cp862", RTL_TEXTENCODING_IBM_862 },
 659         { "862", RTL_TEXTENCODING_IBM_862 },
 660         { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
 661         { "ibm863", RTL_TEXTENCODING_IBM_863 },
 662         { "cp863", RTL_TEXTENCODING_IBM_863 },
 663         { "863", RTL_TEXTENCODING_IBM_863 },
 664         { "csibm863", RTL_TEXTENCODING_IBM_863 },
 665         { "ibm864", RTL_TEXTENCODING_IBM_864 },
 666         { "cp864", RTL_TEXTENCODING_IBM_864 },
 667         { "864", RTL_TEXTENCODING_IBM_864 },
 668         { "csibm864", RTL_TEXTENCODING_IBM_864 },
 669         { "ibm865", RTL_TEXTENCODING_IBM_865 },
 670         { "cp865", RTL_TEXTENCODING_IBM_865 },
 671         { "865", RTL_TEXTENCODING_IBM_865 },
 672         { "csibm865", RTL_TEXTENCODING_IBM_865 },
 673         { "ibm866", RTL_TEXTENCODING_IBM_866 },
 674         { "cp866", RTL_TEXTENCODING_IBM_866 },
 675         { "866", RTL_TEXTENCODING_IBM_866 },
 676         { "csibm866", RTL_TEXTENCODING_IBM_866 },
 677 /*        { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
 678 /*        { "cp868", RTL_TEXTENCODING_IBM_868 }, */
 679 /*        { "cpar", RTL_TEXTENCODING_IBM_868 }, */
 680 /*        { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
 681         { "ibm869", RTL_TEXTENCODING_IBM_869 },
 682         { "cp869", RTL_TEXTENCODING_IBM_869 },
 683         { "869", RTL_TEXTENCODING_IBM_869 },
 684         { "cpgr", RTL_TEXTENCODING_IBM_869 },
 685         { "csibm869", RTL_TEXTENCODING_IBM_869 },
 686         { "ibm869", RTL_TEXTENCODING_IBM_869 },
 687         { "cp869", RTL_TEXTENCODING_IBM_869 },
 688         { "869", RTL_TEXTENCODING_IBM_869 },
 689         { "cpgr", RTL_TEXTENCODING_IBM_869 },
 690         { "csibm869", RTL_TEXTENCODING_IBM_869 },
 691         { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
 692         { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
 693         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
 694         { "mskanji", RTL_TEXTENCODING_MS_932 },
 695         { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
 696         { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
 697         { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
 698         { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
 699         { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
 700         { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
 701         { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
 702         { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
 703         { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
 704         { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
 705         { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
 706         { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
 707         { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
 708         { "isoir6", RTL_TEXTENCODING_ASCII_US },
 709         { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
 710         { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
 711         { "ascii", RTL_TEXTENCODING_ASCII_US },
 712         { "us", RTL_TEXTENCODING_ASCII_US },
 713         { "gb180302000", RTL_TEXTENCODING_GB_18030 },
 714             /* This is no actual MIME character set name, it is only in here
 715                for backwards compatibility (before "GB18030" was officially
 716                registered with IANA, this code contained some guesses of what
 717                would become official names for GB18030). */
 718         { "gb18030", RTL_TEXTENCODING_GB_18030 },
 719         { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
 720         { "tis620", RTL_TEXTENCODING_TIS_620 },
 721         { "gbk", RTL_TEXTENCODING_GBK },
 722         { "cp936", RTL_TEXTENCODING_GBK },
 723         { "ms936", RTL_TEXTENCODING_GBK },
 724         { "windows936", RTL_TEXTENCODING_GBK },
 725         { "cp874", RTL_TEXTENCODING_MS_874 },
 726             /* This is no official MIME character set name, but it might be in
 727                use in Thailand. */
 728         { "ms874", RTL_TEXTENCODING_MS_874 },
 729             /* This is no official MIME character set name, but it might be in
 730                use in Thailand. */
 731         { "windows874", RTL_TEXTENCODING_MS_874 },
 732             /* This is no official MIME character set name, but it might be in
 733                use in Thailand. */
 734         { "koi8u", RTL_TEXTENCODING_KOI8_U },
 735         { "cpis", RTL_TEXTENCODING_IBM_861 },
 736         { "ksc56011987", RTL_TEXTENCODING_MS_949 },
 737         { "isoir149", RTL_TEXTENCODING_MS_949 },
 738         { "ksc56011989", RTL_TEXTENCODING_MS_949 },
 739         { "ksc5601", RTL_TEXTENCODING_MS_949 },
 740         { "korean", RTL_TEXTENCODING_MS_949 },
 741         { "csksc56011987", RTL_TEXTENCODING_MS_949 },
 742             /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
 743                this character set identifier seems to be prominently used by MS
 744                to stand for KS C 5601 plus MS-949 extensions */
 745         { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
 746         { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
 747         { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
 748         { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
 749         { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
 750         { "ptcp154", RTL_TEXTENCODING_PT154 },
 751         { "csptcp154", RTL_TEXTENCODING_PT154 },
 752         { "pt154", RTL_TEXTENCODING_PT154 },
 753         { "cp154", RTL_TEXTENCODING_PT154 },
 754         { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
 755             /* This is not an official MIME character set name, but is in use by
 756                various windows APIs. */
 757         { NULL, RTL_TEXTENCODING_DONTKNOW }
 758     };
 759
 760     rtl_TextEncoding            eEncoding = RTL_TEXTENCODING_DONTKNOW;
 761     sal_Char*                   pBuf;
 762     const ImplStrCharsetDef*    pData = aVIPMimeCharsetTab;
 763     sal_uInt32                  nBufLen = strlen( pMimeCharset )+1;
 764
 765     /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
 766     pBuf = (char*)rtl_allocateMemory( nBufLen );
 767     Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf );
 768
 769     /* Search for equal in the VIP table */
 770     while ( pData->mpCharsetStr )
 771     {
 772         if ( strcmp( pBuf, pData->mpCharsetStr ) == 0 )
 773         {
 774             eEncoding = pData->meTextEncoding;
 775             break;
 776         }
 777
 778         pData++;
 779     }
 780
 781     /* Search for matching in the mime table */
 782     if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
 783     {
 784         pData = aMimeCharsetTab;
 785         while ( pData->mpCharsetStr )
 786         {
 787             if ( Impl_matchString( pBuf, pData->mpCharsetStr ) )
 788             {
 789                 eEncoding = pData->meTextEncoding;
 790                 break;
 791             }
 792
 793             pData++;
 794         }
 795     }
 796
 797     rtl_freeMemory( pBuf );
 798
 799     return eEncoding;
 800 }
 801
 802 /* ======================================================================= */
 803
 804 sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
 805 {
 806     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
 807     if ( pData )
 808         return pData->mnBestWindowsCharset;
 809     else
 810         return 1;
 811 }
 812
 813 /* ----------------------------------------------------------------------- */
 814
 815 const sal_Char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding  )
 816 {
 817     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
 818     if ( pData )
 819         return (sal_Char const *) pData->mpBestUnixCharset;
 820     else if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
 821         return (sal_Char const *) "iso10646-1";
 822     else
 823         return 0;
 824 }
 825
 826 /* ----------------------------------------------------------------------- */
 827
 828 char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
 829                                                              nEncoding)
 830 {
 831     ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
 832     return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
 833                p->mpBestMimeCharset : NULL;
 834 }
 835
 836 const sal_Char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
 837 {
 838     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
 839     if ( pData )
 840         return (sal_Char const *) pData->mpBestMimeCharset;
 841     else
 842         return 0;
 843 }
 844
 845 /* The following two functions are based on <http://www.sharmahd.com/tm/
 846    codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
 847    charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
 848  */
 849
 850 rtl_TextEncoding SAL_CALL
 851 rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
 852 {
 853     switch (nCodePage)
 854     {
 855     case 437: return RTL_TEXTENCODING_IBM_437;
 856     case 708: return RTL_TEXTENCODING_ISO_8859_6;
 857     case 737: return RTL_TEXTENCODING_IBM_737;
 858     case 775: return RTL_TEXTENCODING_IBM_775;
 859     case 850: return RTL_TEXTENCODING_IBM_850;
 860     case 852: return RTL_TEXTENCODING_IBM_852;
 861     case 855: return RTL_TEXTENCODING_IBM_855;
 862     case 857: return RTL_TEXTENCODING_IBM_857;
 863     case 860: return RTL_TEXTENCODING_IBM_860;
 864     case 861: return RTL_TEXTENCODING_IBM_861;
 865     case 862: return RTL_TEXTENCODING_IBM_862;
 866     case 863: return RTL_TEXTENCODING_IBM_863;
 867     case 864: return RTL_TEXTENCODING_IBM_864;
 868     case 865: return RTL_TEXTENCODING_IBM_865;
 869     case 866: return RTL_TEXTENCODING_IBM_866;
 870     case 869: return RTL_TEXTENCODING_IBM_869;
 871     case 874: return RTL_TEXTENCODING_MS_874;
 872     case 932: return RTL_TEXTENCODING_MS_932;
 873     case 936: return RTL_TEXTENCODING_MS_936;
 874     case 949: return RTL_TEXTENCODING_MS_949;
 875     case 950: return RTL_TEXTENCODING_MS_950;
 876     case 1250: return RTL_TEXTENCODING_MS_1250;
 877     case 1251: return RTL_TEXTENCODING_MS_1251;
 878     case 1252: return RTL_TEXTENCODING_MS_1252;
 879     case 1253: return RTL_TEXTENCODING_MS_1253;
 880     case 1254: return RTL_TEXTENCODING_MS_1254;
 881     case 1255: return RTL_TEXTENCODING_MS_1255;
 882     case 1256: return RTL_TEXTENCODING_MS_1256;
 883     case 1257: return RTL_TEXTENCODING_MS_1257;
 884     case 1258: return RTL_TEXTENCODING_MS_1258;
 885     case 1361: return RTL_TEXTENCODING_MS_1361;
 886     case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
 887     case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
 888     case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
 889     case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
 890     case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
 891     case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
 892     case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
 893     case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
 894     case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
 895     case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
 896     case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
 897     case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
 898     case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
 899     case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
 900     case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
 901     case 20127: return RTL_TEXTENCODING_ASCII_US;
 902     case 20866: return RTL_TEXTENCODING_KOI8_R;
 903     case 21866: return RTL_TEXTENCODING_KOI8_U;
 904     case 28591: return RTL_TEXTENCODING_ISO_8859_1;
 905     case 28592: return RTL_TEXTENCODING_ISO_8859_2;
 906     case 28593: return RTL_TEXTENCODING_ISO_8859_3;
 907     case 28594: return RTL_TEXTENCODING_ISO_8859_4;
 908     case 28595: return RTL_TEXTENCODING_ISO_8859_5;
 909     case 28596: return RTL_TEXTENCODING_ISO_8859_6;
 910     case 28597: return RTL_TEXTENCODING_ISO_8859_7;
 911     case 28598: return RTL_TEXTENCODING_ISO_8859_8;
 912     case 28599: return RTL_TEXTENCODING_ISO_8859_9;
 913     case 28605: return RTL_TEXTENCODING_ISO_8859_15;
 914     case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
 915     case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
 916     case 51932: return RTL_TEXTENCODING_EUC_JP;
 917     case 51936: return RTL_TEXTENCODING_EUC_CN;
 918     case 51949: return RTL_TEXTENCODING_EUC_KR;
 919     case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
 920     case 65000: return RTL_TEXTENCODING_UTF7;
 921     case 65001: return RTL_TEXTENCODING_UTF8;
 922     default: return RTL_TEXTENCODING_DONTKNOW;
 923     }
 924 }
 925
 926 sal_uInt32 SAL_CALL
 927 rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
 928 {
 929     switch (nEncoding)
 930     {
 931     case RTL_TEXTENCODING_IBM_437: return 437;
 932  /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
 933     case RTL_TEXTENCODING_IBM_737: return 737;
 934     case RTL_TEXTENCODING_IBM_775: return 775;
 935     case RTL_TEXTENCODING_IBM_850: return 850;
 936     case RTL_TEXTENCODING_IBM_852: return 852;
 937     case RTL_TEXTENCODING_IBM_855: return 855;
 938     case RTL_TEXTENCODING_IBM_857: return 857;
 939     case RTL_TEXTENCODING_IBM_860: return 860;
 940     case RTL_TEXTENCODING_IBM_861: return 861;
 941     case RTL_TEXTENCODING_IBM_862: return 862;
 942     case RTL_TEXTENCODING_IBM_863: return 863;
 943     case RTL_TEXTENCODING_IBM_864: return 864;
 944     case RTL_TEXTENCODING_IBM_865: return 865;
 945     case RTL_TEXTENCODING_IBM_866: return 866;
 946     case RTL_TEXTENCODING_IBM_869: return 869;
 947     case RTL_TEXTENCODING_MS_874: return 874;
 948     case RTL_TEXTENCODING_MS_932: return 932;
 949     case RTL_TEXTENCODING_MS_936: return 936;
 950     case RTL_TEXTENCODING_MS_949: return 949;
 951     case RTL_TEXTENCODING_MS_950: return 950;
 952     case RTL_TEXTENCODING_MS_1250: return 1250;
 953     case RTL_TEXTENCODING_MS_1251: return 1251;
 954     case RTL_TEXTENCODING_MS_1252: return 1252;
 955     case RTL_TEXTENCODING_MS_1253: return 1253;
 956     case RTL_TEXTENCODING_MS_1254: return 1254;
 957     case RTL_TEXTENCODING_MS_1255: return 1255;
 958     case RTL_TEXTENCODING_MS_1256: return 1256;
 959     case RTL_TEXTENCODING_MS_1257: return 1257;
 960     case RTL_TEXTENCODING_MS_1258: return 1258;
 961     case RTL_TEXTENCODING_MS_1361: return 1361;
 962     case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
 963     case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
 964     case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
 965     case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
 966     case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
 967     case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
 968     case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
 969     case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
 970     case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
 971     case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
 972     case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
 973     case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
 974     case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
 975     case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
 976     case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
 977     case RTL_TEXTENCODING_ASCII_US: return 20127;
 978     case RTL_TEXTENCODING_KOI8_R: return 20866;
 979     case RTL_TEXTENCODING_KOI8_U: return 21866;
 980     case RTL_TEXTENCODING_ISO_8859_1: return 28591;
 981     case RTL_TEXTENCODING_ISO_8859_2: return 28592;
 982     case RTL_TEXTENCODING_ISO_8859_3: return 28593;
 983     case RTL_TEXTENCODING_ISO_8859_4: return 28594;
 984     case RTL_TEXTENCODING_ISO_8859_5: return 28595;
 985     case RTL_TEXTENCODING_ISO_8859_6: return 28596;
 986     case RTL_TEXTENCODING_ISO_8859_7: return 28597;
 987     case RTL_TEXTENCODING_ISO_8859_8: return 28598;
 988     case RTL_TEXTENCODING_ISO_8859_9: return 28599;
 989     case RTL_TEXTENCODING_ISO_8859_15: return 28605;
 990     case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
 991     case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
 992     case RTL_TEXTENCODING_EUC_JP: return 51932;
 993     case RTL_TEXTENCODING_EUC_CN: return 51936;
 994     case RTL_TEXTENCODING_EUC_KR: return 51949;
 995     case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
 996     case RTL_TEXTENCODING_UTF7: return 65000;
 997     case RTL_TEXTENCODING_UTF8: return 65001;
 998     default: return 0;
 999     }
1000 }
1001
1002 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */