libanjuta/anjuta-encodings.c

   1 /*
   2  * anjuta-encodings.c
   3  * This file is part of anjuta
   4  *
   5  * Copyright (C) 2002-2005 Paolo Maggi
   6  *
   7  * This program is free software; you can redistribute it and/or modify
   8  * it under the terms of the GNU General Public License as published by
   9  * the Free Software Foundation; either version 2 of the License, or
  10  * (at your option) any later version.
  11  *
  12  * This program is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU General Public License
  18  * along with this program; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20  * Boston, MA 02110-1301, USA.
  21  */
  22
  23 /*
  24  * Modified by the anjuta Team, 2002-2005. See the AUTHORS file for a
  25  * list of people on the anjuta Team.
  26  * See the ChangeLog files for a list of changes.
  27  *
  28  * $Id$
  29  */
  30
  31 #ifdef HAVE_CONFIG_H
  32 #include <config.h>
  33 #endif
  34
  35 #include <string.h>
  36
  37 #include <glib/gi18n.h>
  38
  39 #include "anjuta-encodings.h"
  40
  41
  42 struct _AnjutaEncoding
  43 {
  44         gint   index;
  45         gchar *charset;
  46         gchar *name;
  47 };
  48
  49 /*
  50  * The original versions of the following tables are taken from profterm
  51  *
  52  * Copyright (C) 2002 Red Hat, Inc.
  53  */
  54
  55 typedef enum
  56 {
  57
  58   ANJUTA_ENCODING_ISO_8859_1,
  59   ANJUTA_ENCODING_ISO_8859_2,
  60   ANJUTA_ENCODING_ISO_8859_3,
  61   ANJUTA_ENCODING_ISO_8859_4,
  62   ANJUTA_ENCODING_ISO_8859_5,
  63   ANJUTA_ENCODING_ISO_8859_6,
  64   ANJUTA_ENCODING_ISO_8859_7,
  65   ANJUTA_ENCODING_ISO_8859_8,
  66   ANJUTA_ENCODING_ISO_8859_8_I,
  67   ANJUTA_ENCODING_ISO_8859_9,
  68   ANJUTA_ENCODING_ISO_8859_10,
  69   ANJUTA_ENCODING_ISO_8859_13,
  70   ANJUTA_ENCODING_ISO_8859_14,
  71   ANJUTA_ENCODING_ISO_8859_15,
  72   ANJUTA_ENCODING_ISO_8859_16,
  73
  74   ANJUTA_ENCODING_UTF_7,
  75   ANJUTA_ENCODING_UTF_16,
  76   ANJUTA_ENCODING_UCS_2,
  77   ANJUTA_ENCODING_UCS_4,
  78
  79   ANJUTA_ENCODING_ARMSCII_8,
  80   ANJUTA_ENCODING_BIG5,
  81   ANJUTA_ENCODING_BIG5_HKSCS,
  82   ANJUTA_ENCODING_CP_866,
  83
  84   ANJUTA_ENCODING_EUC_JP,
  85   ANJUTA_ENCODING_EUC_JP_MS,
  86   ANJUTA_ENCODING_CP932,
  87   ANJUTA_ENCODING_EUC_KR,
  88   ANJUTA_ENCODING_EUC_TW,
  89
  90   ANJUTA_ENCODING_GB18030,
  91   ANJUTA_ENCODING_GB2312,
  92   ANJUTA_ENCODING_GBK,
  93   ANJUTA_ENCODING_GEOSTD8,
  94   ANJUTA_ENCODING_HZ,
  95
  96   ANJUTA_ENCODING_IBM_850,
  97   ANJUTA_ENCODING_IBM_852,
  98   ANJUTA_ENCODING_IBM_855,
  99   ANJUTA_ENCODING_IBM_857,
 100   ANJUTA_ENCODING_IBM_862,
 101   ANJUTA_ENCODING_IBM_864,
 102
 103   ANJUTA_ENCODING_ISO_2022_JP,
 104   ANJUTA_ENCODING_ISO_2022_KR,
 105   ANJUTA_ENCODING_ISO_IR_111,
 106   ANJUTA_ENCODING_JOHAB,
 107   ANJUTA_ENCODING_KOI8_R,
 108   ANJUTA_ENCODING_KOI8__R,
 109   ANJUTA_ENCODING_KOI8_U,
 110
 111   ANJUTA_ENCODING_SHIFT_JIS,
 112   ANJUTA_ENCODING_TCVN,
 113   ANJUTA_ENCODING_TIS_620,
 114   ANJUTA_ENCODING_UHC,
 115   ANJUTA_ENCODING_VISCII,
 116
 117   ANJUTA_ENCODING_WINDOWS_1250,
 118   ANJUTA_ENCODING_WINDOWS_1251,
 119   ANJUTA_ENCODING_WINDOWS_1252,
 120   ANJUTA_ENCODING_WINDOWS_1253,
 121   ANJUTA_ENCODING_WINDOWS_1254,
 122   ANJUTA_ENCODING_WINDOWS_1255,
 123   ANJUTA_ENCODING_WINDOWS_1256,
 124   ANJUTA_ENCODING_WINDOWS_1257,
 125   ANJUTA_ENCODING_WINDOWS_1258,
 126
 127   ANJUTA_ENCODING_LAST,
 128
 129   ANJUTA_ENCODING_UTF_8,
 130   ANJUTA_ENCODING_UNKNOWN
 131
 132 } AnjutaEncodingIndex;
 133
 134 static AnjutaEncoding utf8_encoding =
 135         { ANJUTA_ENCODING_UTF_8,
 136           "UTF-8",
 137           N_("Unicode")
 138         };
 139
 140 /* initialized in anjuta_encoding_lazy_init() */
 141 static AnjutaEncoding unknown_encoding =
 142         { ANJUTA_ENCODING_UNKNOWN,
 143           NULL,
 144           NULL
 145         };
 146
 147
 148 static AnjutaEncoding encodings [] = {
 149
 150   { ANJUTA_ENCODING_ISO_8859_1,
 151     "ISO-8859-1", N_("Western") },
 152   { ANJUTA_ENCODING_ISO_8859_2,
 153    "ISO-8859-2", N_("Central European") },
 154   { ANJUTA_ENCODING_ISO_8859_3,
 155     "ISO-8859-3", N_("South European") },
 156   { ANJUTA_ENCODING_ISO_8859_4,
 157     "ISO-8859-4", N_("Baltic") },
 158   { ANJUTA_ENCODING_ISO_8859_5,
 159     "ISO-8859-5", N_("Cyrillic") },
 160   { ANJUTA_ENCODING_ISO_8859_6,
 161     "ISO-8859-6", N_("Arabic") },
 162   { ANJUTA_ENCODING_ISO_8859_7,
 163     "ISO-8859-7", N_("Greek") },
 164   { ANJUTA_ENCODING_ISO_8859_8,
 165     "ISO-8859-8", N_("Hebrew Visual") },
 166   { ANJUTA_ENCODING_ISO_8859_8_I,
 167     "ISO-8859-8-I", N_("Hebrew") },
 168   { ANJUTA_ENCODING_ISO_8859_9,
 169     "ISO-8859-9", N_("Turkish") },
 170   { ANJUTA_ENCODING_ISO_8859_10,
 171     "ISO-8859-10", N_("Nordic") },
 172   { ANJUTA_ENCODING_ISO_8859_13,
 173     "ISO-8859-13", N_("Baltic") },
 174   { ANJUTA_ENCODING_ISO_8859_14,
 175     "ISO-8859-14", N_("Celtic") },
 176   { ANJUTA_ENCODING_ISO_8859_15,
 177     "ISO-8859-15", N_("Western") },
 178   { ANJUTA_ENCODING_ISO_8859_16,
 179     "ISO-8859-16", N_("Romanian") },
 180
 181   { ANJUTA_ENCODING_UTF_7,
 182     "UTF-7", N_("Unicode") },
 183   { ANJUTA_ENCODING_UTF_16,
 184     "UTF-16", N_("Unicode") },
 185   { ANJUTA_ENCODING_UCS_2,
 186     "UCS-2", N_("Unicode") },
 187   { ANJUTA_ENCODING_UCS_4,
 188     "UCS-4", N_("Unicode") },
 189
 190   { ANJUTA_ENCODING_ARMSCII_8,
 191     "ARMSCII-8", N_("Armenian") },
 192   { ANJUTA_ENCODING_BIG5,
 193     "BIG5", N_("Chinese Traditional") },
 194   { ANJUTA_ENCODING_BIG5_HKSCS,
 195     "BIG5-HKSCS", N_("Chinese Traditional") },
 196   { ANJUTA_ENCODING_CP_866,
 197     "CP866", N_("Cyrillic/Russian") },
 198
 199   { ANJUTA_ENCODING_EUC_JP,
 200     "EUC-JP", N_("Japanese") },
 201   { ANJUTA_ENCODING_EUC_JP_MS,
 202     "EUC-JP-MS", N_("Japanese") },
 203   { ANJUTA_ENCODING_CP932,
 204     "CP932", N_("Japanese") },
 205
 206   { ANJUTA_ENCODING_EUC_KR,
 207     "EUC-KR", N_("Korean") },
 208   { ANJUTA_ENCODING_EUC_TW,
 209     "EUC-TW", N_("Chinese Traditional") },
 210
 211   { ANJUTA_ENCODING_GB18030,
 212     "GB18030", N_("Chinese Simplified") },
 213   { ANJUTA_ENCODING_GB2312,
 214     "GB2312", N_("Chinese Simplified") },
 215   { ANJUTA_ENCODING_GBK,
 216     "GBK", N_("Chinese Simplified") },
 217   { ANJUTA_ENCODING_GEOSTD8,
 218     "GEORGIAN-ACADEMY", N_("Georgian") }, /* FIXME GEOSTD8 ? */
 219   { ANJUTA_ENCODING_HZ,
 220     "HZ", N_("Chinese Simplified") },
 221
 222   { ANJUTA_ENCODING_IBM_850,
 223     "IBM850", N_("Western") },
 224   { ANJUTA_ENCODING_IBM_852,
 225     "IBM852", N_("Central European") },
 226   { ANJUTA_ENCODING_IBM_855,
 227     "IBM855", N_("Cyrillic") },
 228   { ANJUTA_ENCODING_IBM_857,
 229     "IBM857", N_("Turkish") },
 230   { ANJUTA_ENCODING_IBM_862,
 231     "IBM862", N_("Hebrew") },
 232   { ANJUTA_ENCODING_IBM_864,
 233     "IBM864", N_("Arabic") },
 234
 235   { ANJUTA_ENCODING_ISO_2022_JP,
 236     "ISO-2022-JP", N_("Japanese") },
 237   { ANJUTA_ENCODING_ISO_2022_KR,
 238     "ISO-2022-KR", N_("Korean") },
 239   { ANJUTA_ENCODING_ISO_IR_111,
 240     "ISO-IR-111", N_("Cyrillic") },
 241   { ANJUTA_ENCODING_JOHAB,
 242     "JOHAB", N_("Korean") },
 243   { ANJUTA_ENCODING_KOI8_R,
 244     "KOI8R", N_("Cyrillic") },
 245   { ANJUTA_ENCODING_KOI8__R,
 246     "KOI8-R", N_("Cyrillic") },
 247   { ANJUTA_ENCODING_KOI8_U,
 248     "KOI8U", N_("Cyrillic/Ukrainian") },
 249
 250   { ANJUTA_ENCODING_SHIFT_JIS,
 251     "SHIFT_JIS", N_("Japanese") },
 252   { ANJUTA_ENCODING_TCVN,
 253     "TCVN", N_("Vietnamese") },
 254   { ANJUTA_ENCODING_TIS_620,
 255     "TIS-620", N_("Thai") },
 256   { ANJUTA_ENCODING_UHC,
 257     "UHC", N_("Korean") },
 258   { ANJUTA_ENCODING_VISCII,
 259     "VISCII", N_("Vietnamese") },
 260
 261   { ANJUTA_ENCODING_WINDOWS_1250,
 262     "WINDOWS-1250", N_("Central European") },
 263   { ANJUTA_ENCODING_WINDOWS_1251,
 264     "WINDOWS-1251", N_("Cyrillic") },
 265   { ANJUTA_ENCODING_WINDOWS_1252,
 266     "WINDOWS-1252", N_("Western") },
 267   { ANJUTA_ENCODING_WINDOWS_1253,
 268     "WINDOWS-1253", N_("Greek") },
 269   { ANJUTA_ENCODING_WINDOWS_1254,
 270     "WINDOWS-1254", N_("Turkish") },
 271   { ANJUTA_ENCODING_WINDOWS_1255,
 272     "WINDOWS-1255", N_("Hebrew") },
 273   { ANJUTA_ENCODING_WINDOWS_1256,
 274     "WINDOWS-1256", N_("Arabic") },
 275   { ANJUTA_ENCODING_WINDOWS_1257,
 276     "WINDOWS-1257", N_("Baltic") },
 277   { ANJUTA_ENCODING_WINDOWS_1258,
 278     "WINDOWS-1258", N_("Vietnamese") }
 279 };
 280
 281 static void
 282 anjuta_encoding_lazy_init (void)
 283 {
 284         static gboolean initialized = FALSE;
 285         gint i;
 286         const gchar *locale_charset;
 287
 288         if (initialized)
 289                 return;
 290
 291         i = 0;
 292         while (i < ANJUTA_ENCODING_LAST)
 293         {
 294                 g_return_if_fail (encodings[i].index == i);
 295
 296                 /* Translate the names */
 297                 encodings[i].name = _(encodings[i].name);
 298
 299                 ++i;
 300         }
 301
 302         utf8_encoding.name = _(utf8_encoding.name);
 303
 304         if (g_get_charset (&locale_charset) == FALSE)
 305         {
 306                 unknown_encoding.charset = g_strdup (locale_charset);
 307         }
 308
 309         initialized = TRUE;
 310 }
 311
 312 const AnjutaEncoding *
 313 anjuta_encoding_get_from_charset (const gchar *charset)
 314 {
 315         gint i;
 316
 317         g_return_val_if_fail (charset != NULL, NULL);
 318
 319         anjuta_encoding_lazy_init ();
 320
 321         if (charset == NULL)
 322                 return NULL;
 323
 324         if (g_ascii_strcasecmp (charset, "UTF-8") == 0)
 325                 return anjuta_encoding_get_utf8 ();
 326
 327         i = 0;
 328         while (i < ANJUTA_ENCODING_LAST)
 329         {
 330                 if (g_ascii_strcasecmp (charset, encodings[i].charset) == 0)
 331                         return &encodings[i];
 332
 333                 ++i;
 334         }
 335
 336         if (unknown_encoding.charset != NULL)
 337         {
 338                 if (g_ascii_strcasecmp (charset, unknown_encoding.charset) == 0)
 339                         return &unknown_encoding;
 340         }
 341
 342         return NULL;
 343 }
 344
 345 const AnjutaEncoding *
 346 anjuta_encoding_get_from_index (gint index)
 347 {
 348         g_return_val_if_fail (index >= 0, NULL);
 349
 350         if (index >= ANJUTA_ENCODING_LAST)
 351                 return NULL;
 352
 353         anjuta_encoding_lazy_init ();
 354
 355         return &encodings [index];
 356 }
 357
 358 const AnjutaEncoding *
 359 anjuta_encoding_get_utf8 (void)
 360 {
 361         anjuta_encoding_lazy_init ();
 362
 363         return &utf8_encoding;
 364 }
 365
 366 const AnjutaEncoding *
 367 anjuta_encoding_get_current (void)
 368 {
 369         static gboolean initialized = FALSE;
 370         static const AnjutaEncoding *locale_encoding = NULL;
 371
 372         const gchar *locale_charset;
 373
 374         anjuta_encoding_lazy_init ();
 375
 376         if (initialized != FALSE)
 377                 return locale_encoding;
 378
 379         if (g_get_charset (&locale_charset) == FALSE)
 380         {
 381                 g_return_val_if_fail (locale_charset != NULL, &utf8_encoding);
 382
 383                 locale_encoding = anjuta_encoding_get_from_charset (locale_charset);
 384         }
 385         else
 386         {
 387                 locale_encoding = &utf8_encoding;
 388         }
 389
 390         if (locale_encoding == NULL)
 391         {
 392                 locale_encoding = &unknown_encoding;
 393         }
 394
 395         g_return_val_if_fail (locale_encoding != NULL, NULL);
 396
 397         initialized = TRUE;
 398
 399         return locale_encoding;
 400 }
 401
 402 gchar *
 403 anjuta_encoding_to_string (const AnjutaEncoding* enc)
 404 {
 405         g_return_val_if_fail (enc != NULL, NULL);
 406
 407         anjuta_encoding_lazy_init ();
 408
 409         g_return_val_if_fail (enc->charset != NULL, NULL);
 410
 411         if (enc->name != NULL)
 412                 return g_strdup_printf ("%s (%s)", enc->name, enc->charset);
 413         else
 414         {
 415                 if (g_ascii_strcasecmp (enc->charset, "ANSI_X3.4-1968") == 0)
 416                         return g_strdup_printf ("US-ASCII (%s)", enc->charset);
 417                 else
 418                         return g_strdup (enc->charset);
 419         }
 420 }
 421
 422 const gchar *
 423 anjuta_encoding_get_charset (const AnjutaEncoding* enc)
 424 {
 425         g_return_val_if_fail (enc != NULL, NULL);
 426
 427         anjuta_encoding_lazy_init ();
 428
 429         g_return_val_if_fail (enc->charset != NULL, NULL);
 430
 431         return enc->charset;
 432 }
 433
 434 const gchar *
 435 anjuta_encoding_get_name (const AnjutaEncoding* enc)
 436 {
 437         g_return_val_if_fail (enc != NULL, NULL);
 438
 439         anjuta_encoding_lazy_init ();
 440
 441         return (enc->name == NULL) ? _("Unknown") : enc->name;
 442 }
 443
 444 /**
 445  * anjuta_encoding_get_type:
 446  *
 447  * Retrieves the GType object which is associated with the
 448  * #AnjutaEncoding class.
 449  *
 450  * Return value: the GType associated with #AnjutaEncoding.
 451  **/
 452 GType
 453 anjuta_encoding_get_type (void)
 454 {
 455         static GType our_type = 0;
 456
 457         if (!our_type)
 458                 our_type = g_boxed_type_register_static (
 459                         "AnjutaEncoding",
 460                         (GBoxedCopyFunc) anjuta_encoding_copy,
 461                         (GBoxedFreeFunc) anjuta_encoding_free);
 462
 463         return our_type;
 464 }
 465
 466 /**
 467  * anjuta_encoding_copy:
 468  * @enc: a #AnjutaEncoding.
 469  *
 470  * Makes a copy of the given encoding.
 471  * This function is used by language bindings.
 472  *
 473  * Return value: a new #AnjutaEncoding.
 474  **/
 475 AnjutaEncoding *
 476 anjuta_encoding_copy (const AnjutaEncoding *enc)
 477 {
 478         AnjutaEncoding *new_enc;
 479
 480         g_return_val_if_fail (enc != NULL, NULL);
 481
 482         new_enc = g_new0 (AnjutaEncoding, 1);
 483         *new_enc = *enc;
 484
 485         return new_enc;
 486 }
 487
 488
 489 /**
 490  * anjuta_encoding_free:
 491  * @enc: a #AnjutaEncoding.
 492  *
 493  * Frees the resources allocated by the given encoding.
 494  * This function is used by language bindings.
 495  **/
 496 void
 497 anjuta_encoding_free (AnjutaEncoding *enc)
 498 {
 499         g_return_if_fail (enc != NULL);
 500
 501         g_free (enc);
 502 }
 503
 504
 505
 506