1 (* Copyright 2005 b8_bavard, INRIA *)
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 | ANSI_X3_4_1968
| ANSI_X3_4_1986
| ASCII
| CP367
| IBM367
| ISO_IR_6
| ISO646_US
| ISO_646_IRV_1991
| US
| US_ASCII
| CSASCII
25 | ISO_10646_UCS_2
| UCS_2
| CSUNICODE
26 | UCS_2BE
| UNICODE_1_1
| UNICODEBIG
| CSUNICODE11
27 | UCS_2LE
| UNICODELITTLE
28 | ISO_10646_UCS_4
| UCS_4
| CSUCS4
37 | UNICODE_1_1_UTF_7
| UTF_7
| CSUNICODE11UTF7
44 | CP819
| IBM819
| ISO_8859_1
| ISO_IR_100
| ISO8859_1
| ISO_8859_1_1987
| L1
| LATIN1
| CSISOLATIN1
45 | ISO_8859_2
| ISO_IR_101
| ISO8859_2
| ISO_8859_2_1987
| L2
| LATIN2
| CSISOLATIN2
46 | ISO_8859_3
| ISO_IR_109
| ISO8859_3
| ISO_8859_3_1988
| L3
| LATIN3
| CSISOLATIN3
47 | ISO_8859_4
| ISO_IR_110
| ISO8859_4
| ISO_8859_4_1988
| L4
| LATIN4
| CSISOLATIN4
48 | CYRILLIC
| ISO_8859_5
| ISO_IR_144
| ISO8859_5
| ISO_8859_5_1988
| CSISOLATINCYRILLIC
49 | ARABIC
| ASMO_708
| ECMA_114
| ISO_8859_6
| ISO_IR_127
| ISO8859_6
| ISO_8859_6_1987
| CSISOLATINARABIC
50 | ECMA_118
| ELOT_928
| GREEK
| GREEK8
| ISO_8859_7
| ISO_IR_126
| ISO8859_7
| ISO_8859_7_1987
| CSISOLATINGREEK
51 | HEBREW
| ISO_8859_8
| ISO_IR_138
| ISO8859_8
| ISO_8859_8_1988
| CSISOLATINHEBREW
52 | ISO_8859_9
| ISO_IR_148
| ISO8859_9
| ISO_8859_9_1989
| L5
| LATIN5
| CSISOLATIN5
53 | ISO_8859_10
| ISO_IR_157
| ISO8859_10
| ISO_8859_10_1992
| L6
| LATIN6
| CSISOLATIN6
54 | ISO_8859_13
| ISO_IR_179
| ISO8859_13
| L7
| LATIN7
55 | ISO_8859_14
| ISO_CELTIC
| ISO8859_14
| ISO_IR_199
| ISO_8859_14_1998
| L8
| LATIN8
56 | ISO_8859_15
| ISO_IR_203
| ISO8859_15
| ISO_8859_15_1998
57 | ISO_8859_16
| ISO_IR_226
| ISO8859_16
| ISO_8859_16_2000
61 | CP1250
| MS_EE
| WINDOWS_1250
62 | CP1251
| MS_CYRL
| WINDOWS_1251
63 | CP1252
| MS_ANSI
| WINDOWS_1252
64 | CP1253
| MS_GREEK
| WINDOWS_1253
65 | CP1254
| MS_TURK
| WINDOWS_1254
66 | CP1255
| MS_HEBR
| WINDOWS_1255
67 | CP1256
| MS_ARAB
| WINDOWS_1256
68 | CP1257
| WINBALTRIM
| WINDOWS_1257
69 | CP1258
| WINDOWS_1258
70 | I_850
| CP850
| IBM850
| CSPC850MULTILINGUAL
71 | I_862
| CP862
| IBM862
| CSPC862LATINHEBREW
72 | I_866
| CP866
| IBM866
| CSIBM866
73 | MAC
| MACINTOSH
| MACROMAN
| CSMACINTOSH
85 | HP_ROMAN8
| R8
| ROMAN8
| CSHPROMAN8
93 | ISO_IR_166
| TIS_620
| TIS620
| TIS620_0
| TIS620_2529_1
| TIS620_2533_0
| TIS620_2533_1
95 | VISCII
| VISCII1_1_1
| CSVISCII
96 | TCVN
| TCVN_5712
| TCVN5712_1
| TCVN5712_1_1993
97 | ISO_IR_14
| ISO646_JP
| JIS_C6220_1969_RO
| JP
| CSISO14JISC6220RO
98 | JISX0201_1976
| JIS_X0201
| X0201
| CSHALFWIDTHKATAKANA
99 | ISO_IR_87
| JIS0208
| JIS_C6226_1983
| JIS_X0208
| JIS_X0208_1983
| JIS_X0208_1990
| X0208
| CSISO87JISX0208
100 | ISO_IR_159
| JIS_X0212
| JIS_X0212_1990
| JIS_X0212_1990_0
| X0212
| CSISO159JISX02121990
101 | CN
| GB_1988_80
| ISO_IR_57
| ISO646_CN
| CSISO57GB1988
102 | CHINESE
| GB_2312_80
| ISO_IR_58
| CSISO58GB231280
103 | CN_GB_ISOIR165
| ISO_IR_165
104 | ISO_IR_149
| KOREAN
| KSC_5601
| KS_C_5601_1987
| KS_C_5601_1989
| CSKSC56011987
105 | EUC_JP
| EUCJP
| EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE
| CSEUCPKDFMTJAPANESE
106 | MS_KANJI
| SHIFT_JIS
| SJIS
| CSSHIFTJIS
108 | ISO_2022_JP
| CSISO2022JP
110 | ISO_2022_JP_2
| CSISO2022JP2
111 | CN_GB
| EUC_CN
| EUCCN
| GB2312
| CSGB2312
114 | ISO_2022_CN
| CSISO2022CN
117 | EUC_TW
| EUCTW
| CSEUCTW
118 | BIG_5
| BIG_FIVE
| BIG5
| BIGFIVE
| CN_BIG5
| CSBIG5
120 | BIG5_HKSCS
| BIG5HKSCS
121 | EUC_KR
| EUCKR
| CSEUCKR
124 | ISO_2022_KR
| CSISO2022KR
125 | I_437
| CP437
| IBM437
| CSPC8CODEPAGE437
127 | CP775
| IBM775
| CSPC775BALTIC
128 | I_852
| CP852
| IBM852
| CSPCP852
130 | I_855
| CP855
| IBM855
| CSIBM855
131 | I_857
| CP857
| IBM857
| CSIBM857
133 | I_860
| CP860
| IBM860
| CSIBM860
134 | I_861
| CP_IS
| CP861
| IBM861
| CSIBM861
135 | I_863
| CP863
| IBM863
| CSIBM863
136 | CP864
| IBM864
| CSIBM864
137 | I_865
| CP865
| IBM865
| CSIBM865
138 | I_869
| CP_GR
| CP869
| IBM869
| CSIBM869
141 (** @return ASCII if nothing matches *)
142 val charset_from_string
: string -> charset
144 val charset_to_string
: charset
-> string
146 (** [convert ~from_charset ~to_charset s]
147 @raise CharsetError if the string s is not entirely convertible. *)
148 val convert
: from_charset
: charset
-> to_charset
: charset
-> string -> string
150 (** [safe_convert enc s] convert [s] from encoding [enc] to UTF-8.
151 Return unmodified string if conversion fails.
153 val safe_convert
: string -> string -> string
156 returns TRUE if s is a valid UTF-8, otherwise returns FALSE.
157 Other functions assume strings are valid UTF-8, so it is prudent
158 to test their validity for strings from untrusted origins. *)
159 val is_utf8
: string -> bool
162 returns [n]-th Unicode character of [s].
163 The call requires O(n)-time. *)
164 val utf8_get
: string -> int -> uchar
167 returns the number of Unicode characters contained in s *)
168 val utf8_length
: string -> int
170 (** [utf8_nth s n] @return index of [n]-th utf-8 character in [s] (must be valid utf-8 string). O(n) *)
171 val utf8_nth
: string -> int -> int
173 (** [add_uchar buf u]
174 add one Unicode character to the buffer. *)
175 val add_uchar
: Buffer.t
-> uchar
-> unit
177 (** Locale dependent conversions *)
181 Converts the input string to UTF-8. *)
182 val to_utf8
: string -> string
185 Converts the input string to the encoding of the current locale. *)
186 val to_locale
: string -> string
188 val default_language
: string
189 val locale_string
: string
190 val conversion_enabled
: bool ref