1 # git-gui encoding support
2 # Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
3 # (Copied from gitk, commit fd8ccbec4f0161)
5 # This list of encoding names and aliases is distilled from
6 # http://www.iana.org/assignments/character-sets.
7 # Not all of them are supported by Tcl.
9 { ANSI_X3.4
-1968 iso-ir-6 ANSI_X3.4
-1986 ISO_646.irv
:1991 ASCII
10 ISO646-US US-ASCII us IBM367 cp367 csASCII
}
11 { ISO-10646-UTF-1 csISO10646UTF1
}
12 { ISO_646.basic
:1983 ref csISO646basic1983
}
13 { INVARIANT csINVARIANT
}
14 { ISO_646.irv
:1983 iso-ir-2 irv csISO2IntlRefVersion
}
15 { BS_4730 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom
}
16 { NATS-SEFI iso-ir-8-1 csNATSSEFI
}
17 { NATS-SEFI-ADD iso-ir-8-2 csNATSSEFIADD
}
18 { NATS-DANO iso-ir-9-1 csNATSDANO
}
19 { NATS-DANO-ADD iso-ir-9-2 csNATSDANOADD
}
20 { SEN_850200_B iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish
}
21 { SEN_850200_C iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames
}
22 { KS_C_5601-1987 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987
}
23 { ISO-2022-KR csISO2022KR
}
25 { ISO-2022-JP csISO2022JP
}
26 { ISO-2022-JP-2 csISO2022JP2
}
27 { JIS_C6220-1969-jp JIS_C6220-1969 iso-ir-13 katakana x0201-7
29 { JIS_C6220-1969-ro iso-ir-14 jp ISO646-JP csISO14JISC6220ro
}
30 { IT iso-ir-15 ISO646-IT csISO15Italian
}
31 { PT iso-ir-16 ISO646-PT csISO16Portuguese
}
32 { ES iso-ir-17 ISO646-ES csISO17Spanish
}
33 { greek7-old iso-ir-18 csISO18Greek7Old
}
34 { latin-greek iso-ir-19 csISO19LatinGreek
}
35 { DIN_66003 iso-ir-21 de ISO646-DE csISO21German
}
36 { NF_Z_62-010_
(1973) iso-ir-25 ISO646-FR1 csISO25French
}
37 { Latin-greek-1 iso-ir-27 csISO27LatinGreek1
}
38 { ISO_5427 iso-ir-37 csISO5427Cyrillic
}
39 { JIS_C6226-1978 iso-ir-42 csISO42JISC62261978
}
40 { BS_viewdata iso-ir-47 csISO47BSViewdata
}
41 { INIS iso-ir-49 csISO49INIS
}
42 { INIS-8 iso-ir-50 csISO50INIS8
}
43 { INIS-cyrillic iso-ir-51 csISO51INISCyrillic
}
44 { ISO_5427
:1981 iso-ir-54 ISO5427Cyrillic1981
}
45 { ISO_5428
:1980 iso-ir-55 csISO5428Greek
}
46 { GB_1988-80 iso-ir-57 cn ISO646-CN csISO57GB1988
}
47 { GB_2312-80 iso-ir-58 chinese csISO58GB231280
}
48 { NS_4551-1 iso-ir-60 ISO646-NO no csISO60DanishNorwegian
50 { NS_4551-2 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2
}
51 { NF_Z_62-010 iso-ir-69 ISO646-FR fr csISO69French
}
52 { videotex-suppl iso-ir-70 csISO70VideotexSupp1
}
53 { PT2 iso-ir-84 ISO646-PT2 csISO84Portuguese2
}
54 { ES2 iso-ir-85 ISO646-ES2 csISO85Spanish2
}
55 { MSZ_7795.3 iso-ir-86 ISO646-HU hu csISO86Hungarian
}
56 { JIS_C6226-1983 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208
}
57 { greek7 iso-ir-88 csISO88Greek7
}
58 { ASMO_449 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449
}
60 { JIS_C6229-1984-a iso-ir-91 jp-ocr-a csISO91JISC62291984a
}
61 { JIS_C6229-1984-b iso-ir-92 ISO646-JP-OCR-B jp-ocr-b
62 csISO92JISC62991984b
}
63 { JIS_C6229-1984-b-add iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd
}
64 { JIS_C6229-1984-hand iso-ir-94 jp-ocr-hand csISO94JIS62291984hand
}
65 { JIS_C6229-1984-hand-add iso-ir-95 jp-ocr-hand-add
66 csISO95JIS62291984handadd
}
67 { JIS_C6229-1984-kana iso-ir-96 csISO96JISC62291984kana
}
68 { ISO_2033-1983 iso-ir-98 e13b csISO2033
}
69 { ANSI_X3.110
-1983 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS
}
70 { ISO_8859-1
:1987 iso-ir-100 ISO_8859-1 ISO-8859-1 latin1 l1 IBM819
72 { ISO_8859-2
:1987 iso-ir-101 ISO_8859-2 ISO-8859-2 latin2 l2 csISOLatin2
}
73 { T
.61-7bit iso-ir-102 csISO102T617bit
}
74 { T
.61-8bit T
.61 iso-ir-103 csISO103T618bit
}
75 { ISO_8859-3
:1988 iso-ir-109 ISO_8859-3 ISO-8859-3 latin3 l3 csISOLatin3
}
76 { ISO_8859-4
:1988 iso-ir-110 ISO_8859-4 ISO-8859-4 latin4 l4 csISOLatin4
}
77 { ECMA-cyrillic iso-ir-111 KOI8-E csISO111ECMACyrillic
}
78 { CSA_Z243.4
-1985-1 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1
}
79 { CSA_Z243.4
-1985-2 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2
}
80 { CSA_Z243.4
-1985-gr iso-ir-123 csISO123CSAZ24341985gr
}
81 { ISO_8859-6
:1987 iso-ir-127 ISO_8859-6 ISO-8859-6 ECMA-114 ASMO-708
82 arabic csISOLatinArabic
}
83 { ISO_8859-6-E csISO88596E ISO-8859-6-E
}
84 { ISO_8859-6-I csISO88596I ISO-8859-6-I
}
85 { ISO_8859-7
:1987 iso-ir-126 ISO_8859-7 ISO-8859-7 ELOT_928 ECMA-118
86 greek greek8 csISOLatinGreek
}
87 { T
.101-G2 iso-ir-128 csISO128T101G2
}
88 { ISO_8859-8
:1988 iso-ir-138 ISO_8859-8 ISO-8859-8 hebrew
90 { ISO_8859-8-E csISO88598E ISO-8859-8-E
}
91 { ISO_8859-8-I csISO88598I ISO-8859-8-I
}
92 { CSN_369103 iso-ir-139 csISO139CSN369103
}
93 { JUS_I.B1.002 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002
}
94 { ISO_6937-2-add iso-ir-142 csISOTextComm
}
95 { IEC_P27-1 iso-ir-143 csISO143IECP271
}
96 { ISO_8859-5
:1988 iso-ir-144 ISO_8859-5 ISO-8859-5 cyrillic
98 { JUS_I.B1.003
-serb iso-ir-146 serbian csISO146Serbian
}
99 { JUS_I.B1.003
-mac macedonian iso-ir-147 csISO147Macedonian
}
100 { ISO_8859-9
:1989 iso-ir-148 ISO_8859-9 ISO-8859-9 latin5 l5 csISOLatin5
}
101 { greek-ccitt iso-ir-150 csISO150 csISO150GreekCCITT
}
102 { NC_NC00-10
:81 cuba iso-ir-151 ISO646-CU csISO151Cuba
}
103 { ISO_6937-2-25 iso-ir-152 csISO6937Add
}
104 { GOST_19768-74 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874
}
105 { ISO_8859-supp iso-ir-154 latin1-2-5 csISO8859Supp
}
106 { ISO_10367-box iso-ir-155 csISO10367Box
}
107 { ISO-8859-10 iso-ir-157 l6 ISO_8859-10
:1992 csISOLatin6 latin6
}
108 { latin-lap lap iso-ir-158 csISO158Lap
}
109 { JIS_X0212-1990 x0212 iso-ir-159 csISO159JISX02121990
}
110 { DS_2089 DS2089 ISO646-DK dk csISO646Danish
}
113 { JIS_X0201 X0201 csHalfWidthKatakana
}
114 { KSC5636 ISO646-KR csKSC5636
}
115 { ISO-10646-UCS-2 csUnicode
}
116 { ISO-10646-UCS-4 csUCS4
}
117 { DEC-MCS dec csDECMCS
}
118 { hp-roman8 roman8 r8 csHPRoman8
}
119 { macintosh mac csMacintosh
}
120 { IBM037 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl
122 { IBM038 EBCDIC-INT cp038 csIBM038
}
123 { IBM273 CP273 csIBM273
}
124 { IBM274 EBCDIC-BE CP274 csIBM274
}
125 { IBM275 EBCDIC-BR cp275 csIBM275
}
126 { IBM277 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277
}
127 { IBM278 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278
}
128 { IBM280 CP280 ebcdic-cp-it csIBM280
}
129 { IBM281 EBCDIC-JP-E cp281 csIBM281
}
130 { IBM284 CP284 ebcdic-cp-es csIBM284
}
131 { IBM285 CP285 ebcdic-cp-gb csIBM285
}
132 { IBM290 cp290 EBCDIC-JP-kana csIBM290
}
133 { IBM297 cp297 ebcdic-cp-fr csIBM297
}
134 { IBM420 cp420 ebcdic-cp-ar1 csIBM420
}
135 { IBM423 cp423 ebcdic-cp-gr csIBM423
}
136 { IBM424 cp424 ebcdic-cp-he csIBM424
}
137 { IBM437 cp437
437 csPC8CodePage437
}
138 { IBM500 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500
}
139 { IBM775 cp775 csPC775Baltic
}
140 { IBM850 cp850
850 csPC850Multilingual
}
141 { IBM851 cp851
851 csIBM851
}
142 { IBM852 cp852
852 csPCp852
}
143 { IBM855 cp855
855 csIBM855
}
144 { IBM857 cp857
857 csIBM857
}
145 { IBM860 cp860
860 csIBM860
}
146 { IBM861 cp861
861 cp-is csIBM861
}
147 { IBM862 cp862
862 csPC862LatinHebrew
}
148 { IBM863 cp863
863 csIBM863
}
149 { IBM864 cp864 csIBM864
}
150 { IBM865 cp865
865 csIBM865
}
151 { IBM866 cp866
866 csIBM866
}
152 { IBM868 CP868 cp-ar csIBM868
}
153 { IBM869 cp869
869 cp-gr csIBM869
}
154 { IBM870 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870
}
155 { IBM871 CP871 ebcdic-cp-is csIBM871
}
156 { IBM880 cp880 EBCDIC-Cyrillic csIBM880
}
157 { IBM891 cp891 csIBM891
}
158 { IBM903 cp903 csIBM903
}
159 { IBM904 cp904
904 csIBBM904
}
160 { IBM905 CP905 ebcdic-cp-tr csIBM905
}
161 { IBM918 CP918 ebcdic-cp-ar2 csIBM918
}
162 { IBM1026 CP1026 csIBM1026
}
163 { EBCDIC-AT-DE csIBMEBCDICATDE
}
164 { EBCDIC-AT-DE-A csEBCDICATDEA
}
165 { EBCDIC-CA-FR csEBCDICCAFR
}
166 { EBCDIC-DK-NO csEBCDICDKNO
}
167 { EBCDIC-DK-NO-A csEBCDICDKNOA
}
168 { EBCDIC-FI-SE csEBCDICFISE
}
169 { EBCDIC-FI-SE-A csEBCDICFISEA
}
170 { EBCDIC-FR csEBCDICFR
}
171 { EBCDIC-IT csEBCDICIT
}
172 { EBCDIC-PT csEBCDICPT
}
173 { EBCDIC-ES csEBCDICES
}
174 { EBCDIC-ES-A csEBCDICESA
}
175 { EBCDIC-ES-S csEBCDICESS
}
176 { EBCDIC-UK csEBCDICUK
}
177 { EBCDIC-US csEBCDICUS
}
178 { UNKNOWN-8BIT csUnknown8BiT
}
179 { MNEMONIC csMnemonic
}
184 { IBM00858 CCSID00858 CP00858 PC-Multilingual-850
+euro
}
185 { IBM00924 CCSID00924 CP00924 ebcdic-Latin9--euro
}
186 { IBM01140 CCSID01140 CP01140 ebcdic-us-37
+euro
}
187 { IBM01141 CCSID01141 CP01141 ebcdic-de-273
+euro
}
188 { IBM01142 CCSID01142 CP01142 ebcdic-dk-277
+euro ebcdic-no-277
+euro
}
189 { IBM01143 CCSID01143 CP01143 ebcdic-fi-278
+euro ebcdic-se-278
+euro
}
190 { IBM01144 CCSID01144 CP01144 ebcdic-it-280
+euro
}
191 { IBM01145 CCSID01145 CP01145 ebcdic-es-284
+euro
}
192 { IBM01146 CCSID01146 CP01146 ebcdic-gb-285
+euro
}
193 { IBM01147 CCSID01147 CP01147 ebcdic-fr-297
+euro
}
194 { IBM01148 CCSID01148 CP01148 ebcdic-international-500
+euro
}
195 { IBM01149 CCSID01149 CP01149 ebcdic-is-871
+euro
}
197 { PTCP154 csPTCP154 PT154 CP154 Cyrillic-Asian
}
198 { Amiga-1251 Ami1251 Amiga1251 Ami-1251
}
199 { UNICODE-1-1 csUnicode11
}
202 { UNICODE-1-1-UTF-7 csUnicode11UTF7
}
203 { ISO-8859-14 iso-ir-199 ISO_8859-14
:1998 ISO_8859-14 latin8 iso-celtic
205 { ISO-8859-15 ISO_8859-15 Latin-9
}
206 { ISO-8859-16 iso-ir-226 ISO_8859-16
:2001 ISO_8859-16 latin10 l10
}
207 { GBK CP936 MS936 windows-936
}
208 { JIS_Encoding csJISEncoding
}
209 { Shift_JIS MS_Kanji csShiftJIS ShiftJIS Shift-JIS
}
210 { Extended_UNIX_Code_Packed_Format_for_Japanese csEUCPkdFmtJapanese
212 { Extended_UNIX_Code_Fixed_Width_for_Japanese csEUCFixWidJapanese
}
213 { ISO-10646-UCS-Basic csUnicodeASCII
}
214 { ISO-10646-Unicode-Latin1 csUnicodeLatin1 ISO-10646
}
215 { ISO-Unicode-IBM-1261 csUnicodeIBM1261
}
216 { ISO-Unicode-IBM-1268 csUnicodeIBM1268
}
217 { ISO-Unicode-IBM-1276 csUnicodeIBM1276
}
218 { ISO-Unicode-IBM-1264 csUnicodeIBM1264
}
219 { ISO-Unicode-IBM-1265 csUnicodeIBM1265
}
220 { ISO-8859-1-Windows-3.0
-Latin-1 csWindows30Latin1
}
221 { ISO-8859-1-Windows-3.1
-Latin-1 csWindows31Latin1
}
222 { ISO-8859-2-Windows-Latin-2 csWindows31Latin2
}
223 { ISO-8859-9-Windows-Latin-5 csWindows31Latin5
}
224 { Adobe-Standard-Encoding csAdobeStandardEncoding
}
225 { Ventura-US csVenturaUS
}
226 { Ventura-International csVenturaInternational
}
227 { PC8-Danish-Norwegian csPC8DanishNorwegian
}
228 { PC8-Turkish csPC8Turkish
}
229 { IBM-Symbols csIBMSymbols
}
230 { IBM-Thai csIBMThai
}
231 { HP-Legal csHPLegal
}
232 { HP-Pi-font csHPPiFont
}
233 { HP-Math8 csHPMath8
}
234 { Adobe-Symbol-Encoding csHPPSMath
}
235 { HP-DeskTop csHPDesktop
}
236 { Ventura-Math csVenturaMath
}
237 { Microsoft-Publishing csMicrosoftPublishing
}
238 { Windows-31J csWindows31J
}
243 set encoding_groups
{
246 {"Western" ISO-8859-1
}}
248 {"Western" ISO-8859-15 CP-437 CP-850 MacRoman CP-1252 Windows-1252
}
249 {"Celtic" ISO-8859-14
}
250 {"Greek" ISO-8859-14 ISO-8859-7 CP-737 CP-869 MacGreek CP-1253 Windows-1253
}
251 {"Icelandic" MacIceland MacIcelandic CP-861
}
252 {"Nordic" ISO-8859-10 CP-865
}
253 {"Portuguese" CP-860
}
254 {"South European" ISO-8859-3
}}
256 {"Baltic" CP-775 ISO-8859-4 ISO-8859-13 CP-1257 Windows-1257
}
257 {"Central European" CP-852 ISO-8859-2 MacCE CP-1250 Windows-1250
}
258 {"Croatian" MacCroatian
}
259 {"Cyrillic" CP-855 ISO-8859-5 ISO-IR-111 KOI8-R MacCyrillic CP-1251 Windows-1251
}
261 {"Ukrainian" KOI8-U MacUkraine MacUkrainian
}
262 {"Romanian" ISO-8859-16 MacRomania MacRomanian
}}
265 {"Chinese Simplified" GB2312 GB1988 GB12345 GB2312-RAW GBK EUC-CN GB18030 HZ ISO-2022-CN
}
266 {"Chinese Traditional" Big5 Big5-HKSCS EUC-TW CP-950
}
267 {"Japanese" EUC-JP ISO-2022-JP Shift-JIS JIS-0212 JIS-0208 JIS-0201 CP-932 MacJapan
}
268 {"Korean" EUC-KR UHC JOHAB ISO-2022-KR CP-949 KSC5601
}}
270 {"Armenian" ARMSCII-8
}
272 {"Thai" TIS-620 ISO-8859-11 CP-874 Windows-874 MacThai
}
273 {"Turkish" CP-857 CP857 ISO-8859-9 MacTurkish CP-1254 Windows-1254
}
274 {"Vietnamese" TCVN VISCII VPS CP-1258 Windows-1258
}
275 {"Hindi" MacDevanagari
}
276 {"Gujarati" MacGujarati
}
277 {"Gurmukhi" MacGurmukhi
}}
279 {"Arabic" ISO-8859-6 Windows-1256 CP-1256 CP-864 MacArabic
}
281 {"Hebrew" ISO-8859-8-I Windows-1255 CP-1255 ISO-8859-8 CP-862 MacHebrew
}}
285 {"Legacy" CP-863 EBCDIC
}
286 {"Symbol" Symbol Dingbats MacDingbats MacCentEuro
}}
289 proc build_encoding_table
{} {
290 global encoding_aliases encoding_lookup_table
292 # Prepare the lookup list; cannot use lsort -nocase because
293 # of compatibility issues with older Tcl (e.g. in msysgit)
295 foreach item
[encoding names
] {
296 lappend names
[list [string tolower
$item] $item]
298 set names
[lsort -ascii -index 0 $names]
299 # neither can we use lsearch -index
301 foreach item
$names {
302 lappend lnames
[lindex $item 0]
305 foreach grp
$encoding_aliases {
308 set i
[lsearch -sorted -ascii $lnames \
309 [string tolower
$item]]
311 set target
[lindex $names $i 1]
315 if {$target eq
{}} continue
317 set encoding_lookup_table
([string tolower
$item]) $target
321 foreach item
$names {
322 set encoding_lookup_table
([lindex $item 0]) [lindex $item 1]
326 proc tcl_encoding
{enc
} {
327 global encoding_lookup_table
331 if {![info exists encoding_lookup_table
]} {
334 set enc
[string tolower
$enc]
335 if {![info exists encoding_lookup_table
($enc)]} {
336 # look for "isonnn" instead of "iso-nnn" or "iso_nnn"
337 if {[regsub {^
(iso|cp|ibm|jis
)[-_]} $enc {\1} encx
]} {
341 if {[info exists encoding_lookup_table
($enc)]} {
342 return $encoding_lookup_table($enc)
348 proc force_path_encoding
{path enc
} {
349 global path_encoding_overrides last_encoding_override
351 set enc
[tcl_encoding
$enc]
353 catch { unset last_encoding_override
}
354 catch { unset path_encoding_overrides
($path) }
356 set last_encoding_override
$enc
358 set path_encoding_overrides
($path) $enc
363 proc get_path_encoding
{path
} {
364 global path_encoding_overrides last_encoding_override
366 if {[info exists last_encoding_override
]} {
367 set tcl_enc
$last_encoding_override
369 set tcl_enc
[tcl_encoding
[get_config gui.
encoding]]
371 if {$tcl_enc eq
{}} {
372 set tcl_enc
[encoding system
]
375 if {[info exists path_encoding_overrides
($path)]} {
376 set enc2
$path_encoding_overrides($path)
378 set enc2
[tcl_encoding
[gitattr
$path encoding $tcl_enc]]
387 proc build_encoding_submenu
{parent grp cmd
} {
388 global used_encodings
390 set mid
[lindex $grp 0]
391 set gname
[mc
[lindex $grp 1]]
394 foreach subset
[lrange $grp 2 end
] {
395 set name
[mc
[lindex $subset 0]]
397 foreach enc
[lrange $subset 1 end
] {
398 set tcl_enc
[tcl_encoding
$enc]
399 if {$tcl_enc eq
{}} continue
405 set smenu
"$parent.$mid"
407 $parent add cascade
\
414 set lbl
"$name ($enc)"
420 -command [concat $cmd [list $tcl_enc]]
422 lappend used_encodings
$tcl_enc
427 proc popup_btn_menu
{m b
} {
428 tk_popup $m [winfo pointerx
$b] [winfo pointery
$b]
431 proc build_encoding_menu
{emenu cmd
{nodef
0}} {
432 $emenu configure
-postcommand \
433 [list do_build_encoding_menu
$emenu $cmd $nodef]
436 proc do_build_encoding_menu
{emenu cmd
{nodef
0}} {
437 global used_encodings encoding_groups
439 $emenu configure
-postcommand {}
443 -label [mc
"Default"] \
444 -command [concat $cmd [list {}]]
446 set sysenc
[encoding system
]
448 -label [mc
"System (%s)" $sysenc] \
449 -command [concat $cmd [list $sysenc]]
452 set used_encodings
[list identity
]
454 foreach grp
$encoding_groups {
455 build_encoding_submenu
$emenu $grp $cmd
458 # Add unclassified encodings
459 set unused_grp
[list [mc Other
]]
460 foreach enc
[encoding names
] {
461 if {[lsearch -exact $used_encodings $enc] < 0} {
462 lappend unused_grp
$enc
465 build_encoding_submenu
$emenu [list other
[mc Other
] $unused_grp] $cmd