3 * This file is part of anjuta
5 * Copyright (C) 2002-2005 Paolo Maggi
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
24 * Modified by the anjuta Team, 2002-2005. See the AUTHORS file for a
25 * list of people on the anjuta Team.
26 * See the ChangeLog files for a list of changes.
37 #include <glib/gi18n.h>
39 #include "anjuta-encodings.h"
42 struct _AnjutaEncoding
50 * The original versions of the following tables are taken from profterm
52 * Copyright (C) 2002 Red Hat, Inc.
58 ANJUTA_ENCODING_ISO_8859_1
,
59 ANJUTA_ENCODING_ISO_8859_2
,
60 ANJUTA_ENCODING_ISO_8859_3
,
61 ANJUTA_ENCODING_ISO_8859_4
,
62 ANJUTA_ENCODING_ISO_8859_5
,
63 ANJUTA_ENCODING_ISO_8859_6
,
64 ANJUTA_ENCODING_ISO_8859_7
,
65 ANJUTA_ENCODING_ISO_8859_8
,
66 ANJUTA_ENCODING_ISO_8859_8_I
,
67 ANJUTA_ENCODING_ISO_8859_9
,
68 ANJUTA_ENCODING_ISO_8859_10
,
69 ANJUTA_ENCODING_ISO_8859_13
,
70 ANJUTA_ENCODING_ISO_8859_14
,
71 ANJUTA_ENCODING_ISO_8859_15
,
72 ANJUTA_ENCODING_ISO_8859_16
,
74 ANJUTA_ENCODING_UTF_7
,
75 ANJUTA_ENCODING_UTF_16
,
76 ANJUTA_ENCODING_UCS_2
,
77 ANJUTA_ENCODING_UCS_4
,
79 ANJUTA_ENCODING_ARMSCII_8
,
81 ANJUTA_ENCODING_BIG5_HKSCS
,
82 ANJUTA_ENCODING_CP_866
,
84 ANJUTA_ENCODING_EUC_JP
,
85 ANJUTA_ENCODING_EUC_JP_MS
,
86 ANJUTA_ENCODING_CP932
,
87 ANJUTA_ENCODING_EUC_KR
,
88 ANJUTA_ENCODING_EUC_TW
,
90 ANJUTA_ENCODING_GB18030
,
91 ANJUTA_ENCODING_GB2312
,
93 ANJUTA_ENCODING_GEOSTD8
,
96 ANJUTA_ENCODING_IBM_850
,
97 ANJUTA_ENCODING_IBM_852
,
98 ANJUTA_ENCODING_IBM_855
,
99 ANJUTA_ENCODING_IBM_857
,
100 ANJUTA_ENCODING_IBM_862
,
101 ANJUTA_ENCODING_IBM_864
,
103 ANJUTA_ENCODING_ISO_2022_JP
,
104 ANJUTA_ENCODING_ISO_2022_KR
,
105 ANJUTA_ENCODING_ISO_IR_111
,
106 ANJUTA_ENCODING_JOHAB
,
107 ANJUTA_ENCODING_KOI8_R
,
108 ANJUTA_ENCODING_KOI8__R
,
109 ANJUTA_ENCODING_KOI8_U
,
111 ANJUTA_ENCODING_SHIFT_JIS
,
112 ANJUTA_ENCODING_TCVN
,
113 ANJUTA_ENCODING_TIS_620
,
115 ANJUTA_ENCODING_VISCII
,
117 ANJUTA_ENCODING_WINDOWS_1250
,
118 ANJUTA_ENCODING_WINDOWS_1251
,
119 ANJUTA_ENCODING_WINDOWS_1252
,
120 ANJUTA_ENCODING_WINDOWS_1253
,
121 ANJUTA_ENCODING_WINDOWS_1254
,
122 ANJUTA_ENCODING_WINDOWS_1255
,
123 ANJUTA_ENCODING_WINDOWS_1256
,
124 ANJUTA_ENCODING_WINDOWS_1257
,
125 ANJUTA_ENCODING_WINDOWS_1258
,
127 ANJUTA_ENCODING_LAST
,
129 ANJUTA_ENCODING_UTF_8
,
130 ANJUTA_ENCODING_UNKNOWN
132 } AnjutaEncodingIndex
;
134 static AnjutaEncoding utf8_encoding
=
135 { ANJUTA_ENCODING_UTF_8
,
140 /* initialized in anjuta_encoding_lazy_init() */
141 static AnjutaEncoding unknown_encoding
=
142 { ANJUTA_ENCODING_UNKNOWN
,
148 static AnjutaEncoding encodings
[] = {
150 { ANJUTA_ENCODING_ISO_8859_1
,
151 "ISO-8859-1", N_("Western") },
152 { ANJUTA_ENCODING_ISO_8859_2
,
153 "ISO-8859-2", N_("Central European") },
154 { ANJUTA_ENCODING_ISO_8859_3
,
155 "ISO-8859-3", N_("South European") },
156 { ANJUTA_ENCODING_ISO_8859_4
,
157 "ISO-8859-4", N_("Baltic") },
158 { ANJUTA_ENCODING_ISO_8859_5
,
159 "ISO-8859-5", N_("Cyrillic") },
160 { ANJUTA_ENCODING_ISO_8859_6
,
161 "ISO-8859-6", N_("Arabic") },
162 { ANJUTA_ENCODING_ISO_8859_7
,
163 "ISO-8859-7", N_("Greek") },
164 { ANJUTA_ENCODING_ISO_8859_8
,
165 "ISO-8859-8", N_("Hebrew Visual") },
166 { ANJUTA_ENCODING_ISO_8859_8_I
,
167 "ISO-8859-8-I", N_("Hebrew") },
168 { ANJUTA_ENCODING_ISO_8859_9
,
169 "ISO-8859-9", N_("Turkish") },
170 { ANJUTA_ENCODING_ISO_8859_10
,
171 "ISO-8859-10", N_("Nordic") },
172 { ANJUTA_ENCODING_ISO_8859_13
,
173 "ISO-8859-13", N_("Baltic") },
174 { ANJUTA_ENCODING_ISO_8859_14
,
175 "ISO-8859-14", N_("Celtic") },
176 { ANJUTA_ENCODING_ISO_8859_15
,
177 "ISO-8859-15", N_("Western") },
178 { ANJUTA_ENCODING_ISO_8859_16
,
179 "ISO-8859-16", N_("Romanian") },
181 { ANJUTA_ENCODING_UTF_7
,
182 "UTF-7", N_("Unicode") },
183 { ANJUTA_ENCODING_UTF_16
,
184 "UTF-16", N_("Unicode") },
185 { ANJUTA_ENCODING_UCS_2
,
186 "UCS-2", N_("Unicode") },
187 { ANJUTA_ENCODING_UCS_4
,
188 "UCS-4", N_("Unicode") },
190 { ANJUTA_ENCODING_ARMSCII_8
,
191 "ARMSCII-8", N_("Armenian") },
192 { ANJUTA_ENCODING_BIG5
,
193 "BIG5", N_("Chinese Traditional") },
194 { ANJUTA_ENCODING_BIG5_HKSCS
,
195 "BIG5-HKSCS", N_("Chinese Traditional") },
196 { ANJUTA_ENCODING_CP_866
,
197 "CP866", N_("Cyrillic/Russian") },
199 { ANJUTA_ENCODING_EUC_JP
,
200 "EUC-JP", N_("Japanese") },
201 { ANJUTA_ENCODING_EUC_JP_MS
,
202 "EUC-JP-MS", N_("Japanese") },
203 { ANJUTA_ENCODING_CP932
,
204 "CP932", N_("Japanese") },
206 { ANJUTA_ENCODING_EUC_KR
,
207 "EUC-KR", N_("Korean") },
208 { ANJUTA_ENCODING_EUC_TW
,
209 "EUC-TW", N_("Chinese Traditional") },
211 { ANJUTA_ENCODING_GB18030
,
212 "GB18030", N_("Chinese Simplified") },
213 { ANJUTA_ENCODING_GB2312
,
214 "GB2312", N_("Chinese Simplified") },
215 { ANJUTA_ENCODING_GBK
,
216 "GBK", N_("Chinese Simplified") },
217 { ANJUTA_ENCODING_GEOSTD8
,
218 "GEORGIAN-ACADEMY", N_("Georgian") }, /* FIXME GEOSTD8 ? */
219 { ANJUTA_ENCODING_HZ
,
220 "HZ", N_("Chinese Simplified") },
222 { ANJUTA_ENCODING_IBM_850
,
223 "IBM850", N_("Western") },
224 { ANJUTA_ENCODING_IBM_852
,
225 "IBM852", N_("Central European") },
226 { ANJUTA_ENCODING_IBM_855
,
227 "IBM855", N_("Cyrillic") },
228 { ANJUTA_ENCODING_IBM_857
,
229 "IBM857", N_("Turkish") },
230 { ANJUTA_ENCODING_IBM_862
,
231 "IBM862", N_("Hebrew") },
232 { ANJUTA_ENCODING_IBM_864
,
233 "IBM864", N_("Arabic") },
235 { ANJUTA_ENCODING_ISO_2022_JP
,
236 "ISO-2022-JP", N_("Japanese") },
237 { ANJUTA_ENCODING_ISO_2022_KR
,
238 "ISO-2022-KR", N_("Korean") },
239 { ANJUTA_ENCODING_ISO_IR_111
,
240 "ISO-IR-111", N_("Cyrillic") },
241 { ANJUTA_ENCODING_JOHAB
,
242 "JOHAB", N_("Korean") },
243 { ANJUTA_ENCODING_KOI8_R
,
244 "KOI8R", N_("Cyrillic") },
245 { ANJUTA_ENCODING_KOI8__R
,
246 "KOI8-R", N_("Cyrillic") },
247 { ANJUTA_ENCODING_KOI8_U
,
248 "KOI8U", N_("Cyrillic/Ukrainian") },
250 { ANJUTA_ENCODING_SHIFT_JIS
,
251 "SHIFT_JIS", N_("Japanese") },
252 { ANJUTA_ENCODING_TCVN
,
253 "TCVN", N_("Vietnamese") },
254 { ANJUTA_ENCODING_TIS_620
,
255 "TIS-620", N_("Thai") },
256 { ANJUTA_ENCODING_UHC
,
257 "UHC", N_("Korean") },
258 { ANJUTA_ENCODING_VISCII
,
259 "VISCII", N_("Vietnamese") },
261 { ANJUTA_ENCODING_WINDOWS_1250
,
262 "WINDOWS-1250", N_("Central European") },
263 { ANJUTA_ENCODING_WINDOWS_1251
,
264 "WINDOWS-1251", N_("Cyrillic") },
265 { ANJUTA_ENCODING_WINDOWS_1252
,
266 "WINDOWS-1252", N_("Western") },
267 { ANJUTA_ENCODING_WINDOWS_1253
,
268 "WINDOWS-1253", N_("Greek") },
269 { ANJUTA_ENCODING_WINDOWS_1254
,
270 "WINDOWS-1254", N_("Turkish") },
271 { ANJUTA_ENCODING_WINDOWS_1255
,
272 "WINDOWS-1255", N_("Hebrew") },
273 { ANJUTA_ENCODING_WINDOWS_1256
,
274 "WINDOWS-1256", N_("Arabic") },
275 { ANJUTA_ENCODING_WINDOWS_1257
,
276 "WINDOWS-1257", N_("Baltic") },
277 { ANJUTA_ENCODING_WINDOWS_1258
,
278 "WINDOWS-1258", N_("Vietnamese") }
282 anjuta_encoding_lazy_init (void)
284 static gboolean initialized
= FALSE
;
286 const gchar
*locale_charset
;
292 while (i
< ANJUTA_ENCODING_LAST
)
294 g_return_if_fail (encodings
[i
].index
== i
);
296 /* Translate the names */
297 encodings
[i
].name
= _(encodings
[i
].name
);
302 utf8_encoding
.name
= _(utf8_encoding
.name
);
304 if (g_get_charset (&locale_charset
) == FALSE
)
306 unknown_encoding
.charset
= g_strdup (locale_charset
);
312 const AnjutaEncoding
*
313 anjuta_encoding_get_from_charset (const gchar
*charset
)
317 g_return_val_if_fail (charset
!= NULL
, NULL
);
319 anjuta_encoding_lazy_init ();
324 if (g_ascii_strcasecmp (charset
, "UTF-8") == 0)
325 return anjuta_encoding_get_utf8 ();
328 while (i
< ANJUTA_ENCODING_LAST
)
330 if (g_ascii_strcasecmp (charset
, encodings
[i
].charset
) == 0)
331 return &encodings
[i
];
336 if (unknown_encoding
.charset
!= NULL
)
338 if (g_ascii_strcasecmp (charset
, unknown_encoding
.charset
) == 0)
339 return &unknown_encoding
;
345 const AnjutaEncoding
*
346 anjuta_encoding_get_from_index (gint index
)
348 g_return_val_if_fail (index
>= 0, NULL
);
350 if (index
>= ANJUTA_ENCODING_LAST
)
353 anjuta_encoding_lazy_init ();
355 return &encodings
[index
];
358 const AnjutaEncoding
*
359 anjuta_encoding_get_utf8 (void)
361 anjuta_encoding_lazy_init ();
363 return &utf8_encoding
;
366 const AnjutaEncoding
*
367 anjuta_encoding_get_current (void)
369 static gboolean initialized
= FALSE
;
370 static const AnjutaEncoding
*locale_encoding
= NULL
;
372 const gchar
*locale_charset
;
374 anjuta_encoding_lazy_init ();
376 if (initialized
!= FALSE
)
377 return locale_encoding
;
379 if (g_get_charset (&locale_charset
) == FALSE
)
381 g_return_val_if_fail (locale_charset
!= NULL
, &utf8_encoding
);
383 locale_encoding
= anjuta_encoding_get_from_charset (locale_charset
);
387 locale_encoding
= &utf8_encoding
;
390 if (locale_encoding
== NULL
)
392 locale_encoding
= &unknown_encoding
;
395 g_return_val_if_fail (locale_encoding
!= NULL
, NULL
);
399 return locale_encoding
;
403 anjuta_encoding_to_string (const AnjutaEncoding
* enc
)
405 g_return_val_if_fail (enc
!= NULL
, NULL
);
407 anjuta_encoding_lazy_init ();
409 g_return_val_if_fail (enc
->charset
!= NULL
, NULL
);
411 if (enc
->name
!= NULL
)
412 return g_strdup_printf ("%s (%s)", enc
->name
, enc
->charset
);
415 if (g_ascii_strcasecmp (enc
->charset
, "ANSI_X3.4-1968") == 0)
416 return g_strdup_printf ("US-ASCII (%s)", enc
->charset
);
418 return g_strdup (enc
->charset
);
423 anjuta_encoding_get_charset (const AnjutaEncoding
* enc
)
425 g_return_val_if_fail (enc
!= NULL
, NULL
);
427 anjuta_encoding_lazy_init ();
429 g_return_val_if_fail (enc
->charset
!= NULL
, NULL
);
435 anjuta_encoding_get_name (const AnjutaEncoding
* enc
)
437 g_return_val_if_fail (enc
!= NULL
, NULL
);
439 anjuta_encoding_lazy_init ();
441 /* Translator: Unknown refers to a character encoding like UTF8,
443 return (enc
->name
== NULL
) ? _("Unknown") : enc
->name
;
447 * anjuta_encoding_get_type:
449 * Retrieves the GType object which is associated with the
450 * #AnjutaEncoding class.
452 * Return value: the GType associated with #AnjutaEncoding.
455 anjuta_encoding_get_type (void)
457 static GType our_type
= 0;
460 our_type
= g_boxed_type_register_static (
462 (GBoxedCopyFunc
) anjuta_encoding_copy
,
463 (GBoxedFreeFunc
) anjuta_encoding_free
);
469 * anjuta_encoding_copy:
470 * @enc: a #AnjutaEncoding.
472 * Makes a copy of the given encoding.
473 * This function is used by language bindings.
475 * Return value: a new #AnjutaEncoding.
478 anjuta_encoding_copy (const AnjutaEncoding
*enc
)
480 AnjutaEncoding
*new_enc
;
482 g_return_val_if_fail (enc
!= NULL
, NULL
);
484 new_enc
= g_new0 (AnjutaEncoding
, 1);
492 * anjuta_encoding_free:
493 * @enc: a #AnjutaEncoding.
495 * Frees the resources allocated by the given encoding.
496 * This function is used by language bindings.
499 anjuta_encoding_free (AnjutaEncoding
*enc
)
501 g_return_if_fail (enc
!= NULL
);