2 * Copyright (C) 2001-2006 Bastien Nocera <hadess@hadess.net>
4 * encoding list copied from gnome-terminal/encoding.c
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * The Totem project hereby grant permission for non-gpl compatible GStreamer
21 * plugins to be used and distributed together with GStreamer and Totem. This
22 * permission are above and beyond the permissions granted by the GPL license
23 * Totem is covered by.
25 * Monday 7th February 2005: Christian Schaller: Add exception clause.
26 * See license_change file for details.
31 #include <glib/gi18n-lib.h>
32 #include "totem-subtitle-encoding.h"
37 SUBTITLE_ENCODING_CURRENT_LOCALE
,
39 SUBTITLE_ENCODING_ISO_8859_6
,
40 SUBTITLE_ENCODING_IBM_864
,
41 SUBTITLE_ENCODING_MAC_ARABIC
,
42 SUBTITLE_ENCODING_WINDOWS_1256
,
44 SUBTITLE_ENCODING_ARMSCII_8
,
46 SUBTITLE_ENCODING_ISO_8859_4
,
47 SUBTITLE_ENCODING_ISO_8859_13
,
48 SUBTITLE_ENCODING_WINDOWS_1257
,
50 SUBTITLE_ENCODING_ISO_8859_14
,
52 SUBTITLE_ENCODING_ISO_8859_2
,
53 SUBTITLE_ENCODING_IBM_852
,
54 SUBTITLE_ENCODING_MAC_CE
,
55 SUBTITLE_ENCODING_WINDOWS_1250
,
57 SUBTITLE_ENCODING_GB18030
,
58 SUBTITLE_ENCODING_GB2312
,
59 SUBTITLE_ENCODING_GBK
,
62 SUBTITLE_ENCODING_BIG5
,
63 SUBTITLE_ENCODING_BIG5_HKSCS
,
64 SUBTITLE_ENCODING_EUC_TW
,
66 SUBTITLE_ENCODING_MAC_CROATIAN
,
68 SUBTITLE_ENCODING_ISO_8859_5
,
69 SUBTITLE_ENCODING_IBM_855
,
70 SUBTITLE_ENCODING_ISO_IR_111
,
71 SUBTITLE_ENCODING_KOI8_R
,
72 SUBTITLE_ENCODING_MAC_CYRILLIC
,
73 SUBTITLE_ENCODING_WINDOWS_1251
,
75 SUBTITLE_ENCODING_CP_866
,
77 SUBTITLE_ENCODING_MAC_UKRAINIAN
,
78 SUBTITLE_ENCODING_KOI8_U
,
80 SUBTITLE_ENCODING_GEOSTD8
,
82 SUBTITLE_ENCODING_ISO_8859_7
,
83 SUBTITLE_ENCODING_MAC_GREEK
,
84 SUBTITLE_ENCODING_WINDOWS_1253
,
86 SUBTITLE_ENCODING_MAC_GUJARATI
,
88 SUBTITLE_ENCODING_MAC_GURMUKHI
,
90 SUBTITLE_ENCODING_ISO_8859_8_I
,
91 SUBTITLE_ENCODING_IBM_862
,
92 SUBTITLE_ENCODING_MAC_HEBREW
,
93 SUBTITLE_ENCODING_WINDOWS_1255
,
95 SUBTITLE_ENCODING_ISO_8859_8
,
97 SUBTITLE_ENCODING_MAC_DEVANAGARI
,
99 SUBTITLE_ENCODING_MAC_ICELANDIC
,
101 SUBTITLE_ENCODING_EUC_JP
,
102 SUBTITLE_ENCODING_ISO_2022_JP
,
103 SUBTITLE_ENCODING_SHIFT_JIS
,
105 SUBTITLE_ENCODING_EUC_KR
,
106 SUBTITLE_ENCODING_ISO_2022_KR
,
107 SUBTITLE_ENCODING_JOHAB
,
108 SUBTITLE_ENCODING_UHC
,
110 SUBTITLE_ENCODING_ISO_8859_10
,
112 SUBTITLE_ENCODING_MAC_FARSI
,
114 SUBTITLE_ENCODING_ISO_8859_16
,
115 SUBTITLE_ENCODING_MAC_ROMANIAN
,
117 SUBTITLE_ENCODING_ISO_8859_3
,
119 SUBTITLE_ENCODING_TIS_620
,
121 SUBTITLE_ENCODING_ISO_8859_9
,
122 SUBTITLE_ENCODING_IBM_857
,
123 SUBTITLE_ENCODING_MAC_TURKISH
,
124 SUBTITLE_ENCODING_WINDOWS_1254
,
126 SUBTITLE_ENCODING_UTF_7
,
127 SUBTITLE_ENCODING_UTF_8
,
128 SUBTITLE_ENCODING_UTF_16
,
129 SUBTITLE_ENCODING_UCS_2
,
130 SUBTITLE_ENCODING_UCS_4
,
132 SUBTITLE_ENCODING_ISO_8859_1
,
133 SUBTITLE_ENCODING_ISO_8859_15
,
134 SUBTITLE_ENCODING_IBM_850
,
135 SUBTITLE_ENCODING_MAC_ROMAN
,
136 SUBTITLE_ENCODING_WINDOWS_1252
,
138 SUBTITLE_ENCODING_TCVN
,
139 SUBTITLE_ENCODING_VISCII
,
140 SUBTITLE_ENCODING_WINDOWS_1258
,
142 SUBTITLE_ENCODING_LAST
143 } SubtitleEncodingIndex
;
155 static SubtitleEncoding encodings
[] = {
157 {SUBTITLE_ENCODING_CURRENT_LOCALE
, TRUE
,
158 NULL
, N_("Current Locale")},
160 {SUBTITLE_ENCODING_ISO_8859_6
, FALSE
,
161 "ISO-8859-6", N_("Arabic")},
162 {SUBTITLE_ENCODING_IBM_864
, FALSE
,
163 "IBM864", N_("Arabic")},
164 {SUBTITLE_ENCODING_MAC_ARABIC
, FALSE
,
165 "MAC_ARABIC", N_("Arabic")},
166 {SUBTITLE_ENCODING_WINDOWS_1256
, FALSE
,
167 "WINDOWS-1256", N_("Arabic")},
169 {SUBTITLE_ENCODING_ARMSCII_8
, FALSE
,
170 "ARMSCII-8", N_("Armenian")},
172 {SUBTITLE_ENCODING_ISO_8859_4
, FALSE
,
173 "ISO-8859-4", N_("Baltic")},
174 {SUBTITLE_ENCODING_ISO_8859_13
, FALSE
,
175 "ISO-8859-13", N_("Baltic")},
176 {SUBTITLE_ENCODING_WINDOWS_1257
, FALSE
,
177 "WINDOWS-1257", N_("Baltic")},
179 {SUBTITLE_ENCODING_ISO_8859_14
, FALSE
,
180 "ISO-8859-14", N_("Celtic")},
182 {SUBTITLE_ENCODING_ISO_8859_2
, FALSE
,
183 "ISO-8859-2", N_("Central European")},
184 {SUBTITLE_ENCODING_IBM_852
, FALSE
,
185 "IBM852", N_("Central European")},
186 {SUBTITLE_ENCODING_MAC_CE
, FALSE
,
187 "MAC_CE", N_("Central European")},
188 {SUBTITLE_ENCODING_WINDOWS_1250
, FALSE
,
189 "WINDOWS-1250", N_("Central European")},
191 {SUBTITLE_ENCODING_GB18030
, FALSE
,
192 "GB18030", N_("Chinese Simplified")},
193 {SUBTITLE_ENCODING_GB2312
, FALSE
,
194 "GB2312", N_("Chinese Simplified")},
195 {SUBTITLE_ENCODING_GBK
, FALSE
,
196 "GBK", N_("Chinese Simplified")},
197 {SUBTITLE_ENCODING_HZ
, FALSE
,
198 "HZ", N_("Chinese Simplified")},
200 {SUBTITLE_ENCODING_BIG5
, FALSE
,
201 "BIG5", N_("Chinese Traditional")},
202 {SUBTITLE_ENCODING_BIG5_HKSCS
, FALSE
,
203 "BIG5-HKSCS", N_("Chinese Traditional")},
204 {SUBTITLE_ENCODING_EUC_TW
, FALSE
,
205 "EUC-TW", N_("Chinese Traditional")},
207 {SUBTITLE_ENCODING_MAC_CROATIAN
, FALSE
,
208 "MAC_CROATIAN", N_("Croatian")},
210 {SUBTITLE_ENCODING_ISO_8859_5
, FALSE
,
211 "ISO-8859-5", N_("Cyrillic")},
212 {SUBTITLE_ENCODING_IBM_855
, FALSE
,
213 "IBM855", N_("Cyrillic")},
214 {SUBTITLE_ENCODING_ISO_IR_111
, FALSE
,
215 "ISO-IR-111", N_("Cyrillic")},
216 {SUBTITLE_ENCODING_KOI8_R
, FALSE
,
217 "KOI8-R", N_("Cyrillic")},
218 {SUBTITLE_ENCODING_MAC_CYRILLIC
, FALSE
,
219 "MAC-CYRILLIC", N_("Cyrillic")},
220 {SUBTITLE_ENCODING_WINDOWS_1251
, FALSE
,
221 "WINDOWS-1251", N_("Cyrillic")},
223 {SUBTITLE_ENCODING_CP_866
, FALSE
,
224 "CP866", N_("Cyrillic/Russian")},
226 {SUBTITLE_ENCODING_MAC_UKRAINIAN
, FALSE
,
227 "MAC_UKRAINIAN", N_("Cyrillic/Ukrainian")},
228 {SUBTITLE_ENCODING_KOI8_U
, FALSE
,
229 "KOI8-U", N_("Cyrillic/Ukrainian")},
231 {SUBTITLE_ENCODING_GEOSTD8
, FALSE
,
232 "GEORGIAN-PS", N_("Georgian")},
234 {SUBTITLE_ENCODING_ISO_8859_7
, FALSE
,
235 "ISO-8859-7", N_("Greek")},
236 {SUBTITLE_ENCODING_MAC_GREEK
, FALSE
,
237 "MAC_GREEK", N_("Greek")},
238 {SUBTITLE_ENCODING_WINDOWS_1253
, FALSE
,
239 "WINDOWS-1253", N_("Greek")},
241 {SUBTITLE_ENCODING_MAC_GUJARATI
, FALSE
,
242 "MAC_GUJARATI", N_("Gujarati")},
244 {SUBTITLE_ENCODING_MAC_GURMUKHI
, FALSE
,
245 "MAC_GURMUKHI", N_("Gurmukhi")},
247 {SUBTITLE_ENCODING_ISO_8859_8_I
, FALSE
,
248 "ISO-8859-8-I", N_("Hebrew")},
249 {SUBTITLE_ENCODING_IBM_862
, FALSE
,
250 "IBM862", N_("Hebrew")},
251 {SUBTITLE_ENCODING_MAC_HEBREW
, FALSE
,
252 "MAC_HEBREW", N_("Hebrew")},
253 {SUBTITLE_ENCODING_WINDOWS_1255
, FALSE
,
254 "WINDOWS-1255", N_("Hebrew")},
256 {SUBTITLE_ENCODING_ISO_8859_8
, FALSE
,
257 "ISO-8859-8", N_("Hebrew Visual")},
259 {SUBTITLE_ENCODING_MAC_DEVANAGARI
, FALSE
,
260 "MAC_DEVANAGARI", N_("Hindi")},
262 {SUBTITLE_ENCODING_MAC_ICELANDIC
, FALSE
,
263 "MAC_ICELANDIC", N_("Icelandic")},
265 {SUBTITLE_ENCODING_EUC_JP
, FALSE
,
266 "EUC-JP", N_("Japanese")},
267 {SUBTITLE_ENCODING_ISO_2022_JP
, FALSE
,
268 "ISO2022JP", N_("Japanese")},
269 {SUBTITLE_ENCODING_SHIFT_JIS
, FALSE
,
270 "SHIFT-JIS", N_("Japanese")},
272 {SUBTITLE_ENCODING_EUC_KR
, FALSE
,
273 "EUC-KR", N_("Korean")},
274 {SUBTITLE_ENCODING_ISO_2022_KR
, FALSE
,
275 "ISO2022KR", N_("Korean")},
276 {SUBTITLE_ENCODING_JOHAB
, FALSE
,
277 "JOHAB", N_("Korean")},
278 {SUBTITLE_ENCODING_UHC
, FALSE
,
279 "UHC", N_("Korean")},
281 {SUBTITLE_ENCODING_ISO_8859_10
, FALSE
,
282 "ISO-8859-10", N_("Nordic")},
284 {SUBTITLE_ENCODING_MAC_FARSI
, FALSE
,
285 "MAC_FARSI", N_("Persian")},
287 {SUBTITLE_ENCODING_ISO_8859_16
, FALSE
,
288 "ISO-8859-16", N_("Romanian")},
289 {SUBTITLE_ENCODING_MAC_ROMANIAN
, FALSE
,
290 "MAC_ROMANIAN", N_("Romanian")},
292 {SUBTITLE_ENCODING_ISO_8859_3
, FALSE
,
293 "ISO-8859-3", N_("South European")},
295 {SUBTITLE_ENCODING_TIS_620
, FALSE
,
296 "TIS-620", N_("Thai")},
298 {SUBTITLE_ENCODING_ISO_8859_9
, FALSE
,
299 "ISO-8859-9", N_("Turkish")},
300 {SUBTITLE_ENCODING_IBM_857
, FALSE
,
301 "IBM857", N_("Turkish")},
302 {SUBTITLE_ENCODING_MAC_TURKISH
, FALSE
,
303 "MAC_TURKISH", N_("Turkish")},
304 {SUBTITLE_ENCODING_WINDOWS_1254
, FALSE
,
305 "WINDOWS-1254", N_("Turkish")},
307 {SUBTITLE_ENCODING_UTF_7
, FALSE
,
308 "UTF-7", N_("Unicode")},
309 {SUBTITLE_ENCODING_UTF_8
, FALSE
,
310 "UTF-8", N_("Unicode")},
311 {SUBTITLE_ENCODING_UTF_16
, FALSE
,
312 "UTF-16", N_("Unicode")},
313 {SUBTITLE_ENCODING_UCS_2
, FALSE
,
314 "UCS-2", N_("Unicode")},
315 {SUBTITLE_ENCODING_UCS_4
, FALSE
,
316 "UCS-4", N_("Unicode")},
318 {SUBTITLE_ENCODING_ISO_8859_1
, FALSE
,
319 "ISO-8859-1", N_("Western")},
320 {SUBTITLE_ENCODING_ISO_8859_15
, FALSE
,
321 "ISO-8859-15", N_("Western")},
322 {SUBTITLE_ENCODING_IBM_850
, FALSE
,
323 "IBM850", N_("Western")},
324 {SUBTITLE_ENCODING_MAC_ROMAN
, FALSE
,
325 "MAC_ROMAN", N_("Western")},
326 {SUBTITLE_ENCODING_WINDOWS_1252
, FALSE
,
327 "WINDOWS-1252", N_("Western")},
329 {SUBTITLE_ENCODING_TCVN
, FALSE
,
330 "TCVN", N_("Vietnamese")},
331 {SUBTITLE_ENCODING_VISCII
, FALSE
,
332 "VISCII", N_("Vietnamese")},
333 {SUBTITLE_ENCODING_WINDOWS_1258
, FALSE
,
334 "WINDOWS-1258", N_("Vietnamese")}
337 static const SubtitleEncoding
*
338 find_encoding_by_charset (const char *charset
)
342 i
= 1; /* skip current locale */
343 while (i
< SUBTITLE_ENCODING_LAST
) {
344 if (strcasecmp (charset
, encodings
[i
].charset
) == 0)
345 return &encodings
[i
];
350 if (strcasecmp (charset
,
351 encodings
[SUBTITLE_ENCODING_CURRENT_LOCALE
].charset
) == 0)
352 return &encodings
[SUBTITLE_ENCODING_CURRENT_LOCALE
];
358 subtitle_encoding_init (void)
361 gsize bytes_read
, bytes_written
;
363 gchar ascii_sample
[96];
365 g_get_charset ((const char **)
366 &encodings
[SUBTITLE_ENCODING_CURRENT_LOCALE
].charset
);
368 g_assert (G_N_ELEMENTS (encodings
) == SUBTITLE_ENCODING_LAST
);
370 /* Initialize the sample text with all of the printing ASCII characters
371 * from space (32) to the tilde (126), 95 in all. */
372 for (i
= 0; i
< (int) sizeof (ascii_sample
); i
++)
373 ascii_sample
[i
] = i
+ 32;
375 ascii_sample
[sizeof (ascii_sample
) - 1] = '\0';
378 while (i
< SUBTITLE_ENCODING_LAST
) {
382 g_assert (encodings
[i
].index
== i
);
384 /* Translate the names */
385 encodings
[i
].name
= _(encodings
[i
].name
);
387 /* Test that the encoding is a proper superset of ASCII (which naive
388 * apps are going to use anyway) by attempting to validate the text
389 * using the current encoding. This also flushes out any encodings
390 * which the underlying GIConv implementation can't support.
392 converted
= g_convert (ascii_sample
, sizeof (ascii_sample
) - 1,
393 encodings
[i
].charset
, encodings
[i
].charset
,
394 &bytes_read
, &bytes_written
, NULL
);
396 /* The encoding is only valid if ASCII passes through cleanly. */
397 if (i
== SUBTITLE_ENCODING_CURRENT_LOCALE
)
398 encodings
[i
].valid
= TRUE
;
401 (bytes_read
== (sizeof (ascii_sample
) - 1)) &&
402 (converted
!= NULL
) && (strcmp (converted
, ascii_sample
) == 0);
404 #ifdef DEBUG_ENCODINGS
405 if (!encodings
[i
].valid
) {
406 g_print ("Rejecting encoding %s as invalid:\n", encodings
[i
].charset
);
407 g_print (" input \"%s\"\n", ascii_sample
);
408 g_print (" output \"%s\"\n\n", converted
? converted
: "(null)");
412 /* Discard the converted string. */
420 subtitle_encoding_get_index (const char *charset
)
422 const SubtitleEncoding
*e
;
424 e
= find_encoding_by_charset (charset
);
428 return SUBTITLE_ENCODING_CURRENT_LOCALE
;
432 subtitle_encoding_get_charset (int index_
)
434 const SubtitleEncoding
*e
;
436 if (index_
>= SUBTITLE_ENCODING_LAST
)
437 e
= &encodings
[SUBTITLE_ENCODING_CURRENT_LOCALE
];
438 else if (index_
< SUBTITLE_ENCODING_CURRENT_LOCALE
)
439 e
= &encodings
[SUBTITLE_ENCODING_CURRENT_LOCALE
];
440 else if (!encodings
[index_
].valid
)
441 e
= &encodings
[SUBTITLE_ENCODING_CURRENT_LOCALE
];
443 e
= &encodings
[index_
];
454 compare (GtkTreeModel
* model
, GtkTreeIter
* a
, GtkTreeIter
* b
, gpointer data
)
456 gchar
*str_a
, *str_b
;
459 gtk_tree_model_get (model
, a
, NAME_COL
, &str_a
, -1);
460 gtk_tree_model_get (model
, b
, NAME_COL
, &str_b
, -1);
462 result
= strcmp (str_a
, str_b
);
471 is_encoding_sensitive (GtkCellLayout
* cell_layout
,
472 GtkCellRenderer
* cell
,
473 GtkTreeModel
* tree_model
, GtkTreeIter
* iter
, gpointer data
)
478 sensitive
= !gtk_tree_model_iter_has_child (tree_model
, iter
);
479 g_object_set (cell
, "sensitive", sensitive
, NULL
);
482 static GtkTreeModel
*
483 subtitle_encoding_create_store (void)
486 const gchar
*lastlang
= "";
487 GtkTreeIter iter
, iter2
;
491 store
= gtk_tree_store_new (2, G_TYPE_INT
, G_TYPE_STRING
);
493 for (i
= 0; i
< SUBTITLE_ENCODING_LAST
; i
++) {
494 if (encodings
[i
].valid
) {
495 if (strcmp (lastlang
, encodings
[i
].name
)) {
496 lastlang
= encodings
[i
].name
;
497 gtk_tree_store_append (store
, &iter
, NULL
);
498 gtk_tree_store_set (store
, &iter
, INDEX_COL
,
499 -1, NAME_COL
, lastlang
, -1);
501 label
= g_strdup_printf("%s (%s)", lastlang
, encodings
[i
].charset
);
502 gtk_tree_store_append (store
, &iter2
, &iter
);
503 gtk_tree_store_set (store
, &iter2
, INDEX_COL
,
504 encodings
[i
].index
, NAME_COL
, label
, -1);
508 gtk_tree_sortable_set_default_sort_func (GTK_TREE_SORTABLE (store
),
509 compare
, NULL
, NULL
);
510 gtk_tree_sortable_set_sort_column_id (GTK_TREE_SORTABLE (store
),
511 NAME_COL
, GTK_SORT_ASCENDING
);
512 return GTK_TREE_MODEL (store
);
516 subtitle_encoding_combo_render (GtkComboBox
* combo
)
518 GtkCellRenderer
*renderer
;
520 renderer
= gtk_cell_renderer_text_new ();
521 gtk_cell_layout_pack_start (GTK_CELL_LAYOUT (combo
), renderer
, TRUE
);
522 gtk_cell_layout_set_attributes (GTK_CELL_LAYOUT (combo
), renderer
,
523 "text", NAME_COL
, NULL
);
524 gtk_cell_layout_set_cell_data_func (GTK_CELL_LAYOUT (combo
),
525 renderer
, is_encoding_sensitive
, NULL
, NULL
);
529 totem_subtitle_encoding_get_selected (GtkComboBox
* combo
)
535 model
= gtk_combo_box_get_model (combo
);
536 if (gtk_combo_box_get_active_iter (combo
, &iter
)) {
537 gtk_tree_model_get (model
, &iter
, INDEX_COL
, &index_
, -1);
541 return subtitle_encoding_get_charset (index_
);
545 totem_subtitle_encoding_set (GtkComboBox
* combo
, const char *encoding
)
548 GtkTreeIter iter
, iter2
;
551 g_return_if_fail (encoding
!= NULL
);
553 model
= gtk_combo_box_get_model (combo
);
554 index_
= subtitle_encoding_get_index (encoding
);
555 gtk_tree_model_get_iter_first (model
, &iter
);
557 if (!gtk_tree_model_iter_has_child (model
, &iter
))
559 if (!gtk_tree_model_iter_children (model
, &iter2
, &iter
))
562 gtk_tree_model_get (model
, &iter2
, INDEX_COL
, &i
, -1);
565 } while (gtk_tree_model_iter_next (model
, &iter2
));
568 } while (gtk_tree_model_iter_next (model
, &iter
));
569 gtk_combo_box_set_active_iter (combo
, &iter2
);
573 totem_subtitle_encoding_init (GtkComboBox
*combo
)
576 subtitle_encoding_init ();
577 model
= subtitle_encoding_create_store ();
578 gtk_combo_box_set_model (combo
, model
);
579 g_object_unref (model
);
580 subtitle_encoding_combo_render (combo
);
584 * vim: sw=2 ts=8 cindent noai bs=2