* src/pmc/multisub.pmc:
[parrot.git] / src / charset.c
blobe707a23cf403434d4de8ca51ab86500f9fb7725c
1 /*
2 Copyright (C) 2004-2007, The Perl Foundation.
3 $Id$
5 =head1 NAME
7 src/charset.c - global charset functions
9 =head1 DESCRIPTION
11 These are Parrot's generic charset handling functions
15 #define PARROT_NO_EXTERN_CHARSET_PTRS
16 #include "parrot/parrot.h"
18 #include "encodings/fixed_8.h"
19 #include "encodings/utf8.h"
20 #include "encodings/utf16.h"
21 #include "encodings/ucs2.h"
23 #include "charset/ascii.h"
24 #include "charset/binary.h"
25 #include "charset/iso-8859-1.h"
26 #include "charset/unicode.h"
28 CHARSET *Parrot_iso_8859_1_charset_ptr;
29 CHARSET *Parrot_binary_charset_ptr;
30 CHARSET *Parrot_default_charset_ptr;
31 CHARSET *Parrot_unicode_charset_ptr;
32 CHARSET *Parrot_ascii_charset_ptr;
35 * all registered charsets are collected in one global structure
38 typedef struct To_converter {
39 NOTNULL(CHARSET *to);
40 NOTNULL(charset_converter_t func);
41 } To_converter;
43 typedef struct One_charset {
44 NOTNULL(CHARSET *charset);
45 STRING *name;
46 int n_converters;
47 To_converter *to_converters;
48 } One_charset;
50 typedef struct All_charsets {
51 int n_charsets;
52 One_charset *set;
53 } All_charsets;
55 static All_charsets *all_charsets;
57 /* HEADERIZER HFILE: include/parrot/charset.h */
59 /* HEADERIZER BEGIN: static */
61 static INTVAL register_charset( PARROT_INTERP,
62 NOTNULL(const char *charsetname),
63 NOTNULL(CHARSET *charset) )
64 __attribute__nonnull__(1)
65 __attribute__nonnull__(2)
66 __attribute__nonnull__(3);
68 static void register_static_converters( PARROT_INTERP )
69 __attribute__nonnull__(1);
71 /* HEADERIZER END: static */
73 PARROT_API
74 PARROT_CAN_RETURN_NULL
75 PARROT_MALLOC
76 CHARSET *
77 Parrot_new_charset(SHIM_INTERP)
79 return mem_allocate_typed(CHARSET);
82 PARROT_API
83 void
84 Parrot_charsets_encodings_deinit(SHIM_INTERP)
86 const int n = all_charsets->n_charsets;
87 int i;
89 for (i = 0; i < n; ++i) {
90 if (all_charsets->set[i].n_converters)
91 mem_sys_free(all_charsets->set[i].to_converters);
92 mem_sys_free(all_charsets->set[i].charset);
94 mem_sys_free(all_charsets->set);
95 mem_sys_free(all_charsets);
96 all_charsets = NULL;
97 parrot_deinit_encodings();
100 PARROT_API
101 PARROT_CAN_RETURN_NULL
102 PARROT_WARN_UNUSED_RESULT
103 const CHARSET *
104 Parrot_find_charset(SHIM_INTERP, NOTNULL(const char *charsetname))
106 int i;
107 const int n = all_charsets->n_charsets;
109 for (i = 0; i < n; ++i) {
110 if (strcmp(all_charsets->set[i].charset->name, charsetname) == 0) {
111 return all_charsets->set[i].charset;
114 return NULL;
117 PARROT_API
118 PARROT_CAN_RETURN_NULL
119 PARROT_WARN_UNUSED_RESULT
120 CHARSET *
121 Parrot_load_charset(PARROT_INTERP, NOTNULL(const char *charsetname))
123 UNUSED(charsetname);
125 real_exception(interp, NULL, UNIMPLEMENTED, "Can't load charsets yet");
126 return NULL; /* placeholder return until this is implemented */
131 FUNCDC: Parrot_charset_number
133 Return the number of the charset or -1 if not found.
137 PARROT_API
138 PARROT_WARN_UNUSED_RESULT
139 INTVAL
140 Parrot_charset_number(PARROT_INTERP, NOTNULL(STRING *charsetname))
142 const int n = all_charsets->n_charsets;
143 int i;
145 for (i = 0; i < n; ++i) {
146 if (!string_equal(interp, all_charsets->set[i].name, charsetname))
147 return i;
149 return -1;
154 FUNCDOC: Parrot_charset_number_of_str
156 Return the number of the charset of the given string or -1 if not found.
160 PARROT_API
161 PARROT_WARN_UNUSED_RESULT
162 INTVAL
163 Parrot_charset_number_of_str(SHIM_INTERP, NOTNULL(STRING *src))
165 int i;
166 const int n = all_charsets->n_charsets;
168 for (i = 0; i < n; ++i) {
169 if (src->charset == all_charsets->set[i].charset)
170 return i;
172 return -1;
175 PARROT_API
176 PARROT_CAN_RETURN_NULL
177 PARROT_WARN_UNUSED_RESULT
178 STRING*
179 Parrot_charset_name(SHIM_INTERP, INTVAL number_of_charset)
181 if (number_of_charset >= all_charsets->n_charsets)
182 return NULL;
183 return all_charsets->set[number_of_charset].name;
186 PARROT_API
187 PARROT_CAN_RETURN_NULL
188 PARROT_WARN_UNUSED_RESULT
189 const CHARSET *
190 Parrot_get_charset(SHIM_INTERP, INTVAL number_of_charset)
192 if (number_of_charset >= all_charsets->n_charsets)
193 return NULL;
194 return all_charsets->set[number_of_charset].charset;
197 PARROT_API
198 PARROT_CAN_RETURN_NULL
199 PARROT_WARN_UNUSED_RESULT
200 const char *
201 Parrot_charset_c_name(SHIM_INTERP, INTVAL number_of_charset)
203 if (number_of_charset >= all_charsets->n_charsets)
204 return NULL;
205 return all_charsets->set[number_of_charset].charset->name;
208 static INTVAL
209 register_charset(PARROT_INTERP, NOTNULL(const char *charsetname),
210 NOTNULL(CHARSET *charset))
212 const int n = all_charsets->n_charsets;
213 int i;
215 for (i = 0; i < n; ++i) {
216 if (strcmp(all_charsets->set[i].charset->name, charsetname) == 0)
217 return 0;
220 * TODO
221 * this needs either a LOCK or we just forbid dynamic
222 * loading of charsets from inside threads
224 if (!n)
225 all_charsets->set = mem_allocate_typed(One_charset);
226 else
227 all_charsets->set = (One_charset *)mem_sys_realloc(all_charsets->set,
228 (n + 1) * sizeof (One_charset));
229 all_charsets->n_charsets++;
230 all_charsets->set[n].charset = charset;
231 all_charsets->set[n].name = const_string(interp, charsetname);
232 all_charsets->set[n].n_converters = 0;
234 return 1;
237 static void
238 register_static_converters(PARROT_INTERP)
240 Parrot_register_charset_converter(interp,
241 Parrot_iso_8859_1_charset_ptr, Parrot_ascii_charset_ptr,
242 charset_cvt_iso_8859_1_to_ascii);
243 Parrot_register_charset_converter(interp,
244 Parrot_iso_8859_1_charset_ptr, Parrot_binary_charset_ptr,
245 charset_cvt_ascii_to_binary);
247 Parrot_register_charset_converter(interp,
248 Parrot_ascii_charset_ptr, Parrot_binary_charset_ptr,
249 charset_cvt_ascii_to_binary);
250 Parrot_register_charset_converter(interp,
251 Parrot_ascii_charset_ptr, Parrot_iso_8859_1_charset_ptr,
252 charset_cvt_ascii_to_iso_8859_1);
255 PARROT_API
256 INTVAL
257 Parrot_register_charset(PARROT_INTERP, NOTNULL(const char *charsetname),
258 NOTNULL(CHARSET *charset))
260 if (!all_charsets) {
261 all_charsets = mem_allocate_typed(All_charsets);
262 all_charsets->n_charsets = 0;
263 all_charsets->set = NULL;
265 if (strcmp("binary", charsetname) == 0) {
266 Parrot_binary_charset_ptr = charset;
267 return register_charset(interp, charsetname, charset);
269 if (strcmp("iso-8859-1", charsetname) == 0) {
270 Parrot_iso_8859_1_charset_ptr = charset;
271 return register_charset(interp, charsetname, charset);
273 if (strcmp("unicode", charsetname) == 0) {
274 Parrot_unicode_charset_ptr = charset;
275 return register_charset(interp, charsetname, charset);
277 if (strcmp("ascii", charsetname) == 0) {
278 if (!Parrot_default_charset_ptr) {
279 Parrot_default_charset_ptr = charset;
281 Parrot_ascii_charset_ptr = charset;
282 return register_charset(interp, charsetname, charset);
284 return 0;
287 PARROT_API
288 void
289 Parrot_charsets_encodings_init(PARROT_INTERP)
291 /* the order is crucial here:
292 * 1) encodings, default = fixed_8
293 * 2) charsets default = ascii
295 Parrot_encoding_fixed_8_init(interp);
296 Parrot_encoding_utf8_init(interp);
297 Parrot_encoding_ucs2_init(interp);
298 Parrot_encoding_utf16_init(interp);
300 Parrot_charset_ascii_init(interp);
301 Parrot_charset_iso_8859_1_init(interp);
302 Parrot_charset_binary_init(interp);
303 Parrot_charset_unicode_init(interp);
306 * now encoding strings don't have a charset yet - set default
308 parrot_init_encodings_2();
310 * now install charset converters
312 register_static_converters(interp);
315 PARROT_API
316 INTVAL
317 Parrot_make_default_charset(SHIM_INTERP, SHIM(const char *charsetname),
318 NOTNULL(CHARSET *charset))
320 Parrot_default_charset_ptr = charset;
321 return 1;
324 PARROT_API
325 PARROT_WARN_UNUSED_RESULT
326 PARROT_CAN_RETURN_NULL
327 const CHARSET *
328 Parrot_default_charset(SHIM_INTERP)
330 return Parrot_default_charset_ptr;
334 PARROT_API
335 PARROT_WARN_UNUSED_RESULT
336 PARROT_CAN_RETURN_NULL
337 charset_converter_t
338 Parrot_find_charset_converter(SHIM_INTERP,
339 NOTNULL(const CHARSET *lhs), NOTNULL(const CHARSET *rhs))
341 int i;
342 const int n = all_charsets->n_charsets;
344 for (i = 0; i < n; ++i) {
345 if (lhs == all_charsets->set[i].charset) {
346 One_charset * const left = all_charsets->set + i;
347 const int nc = left->n_converters;
348 int j;
350 for (j = 0; j < nc; ++j) {
351 if (left->to_converters[j].to == rhs)
352 return left->to_converters[j].func;
356 return NULL;
359 PARROT_API
360 void
361 Parrot_register_charset_converter(SHIM_INTERP,
362 NOTNULL(const CHARSET *lhs), NOTNULL(CHARSET *rhs),
363 NOTNULL(charset_converter_t func))
365 const int n = all_charsets->n_charsets;
366 int i;
368 for (i = 0; i < n; ++i) {
369 if (lhs == all_charsets->set[i].charset) {
370 One_charset * const left = all_charsets->set + i;
371 const int nc = left->n_converters++;
373 if (nc) {
374 left->to_converters = (To_converter *)mem_sys_realloc(
375 left->to_converters, sizeof (To_converter) * (nc + 1));
377 else
378 left->to_converters = (To_converter *)mem_sys_allocate(sizeof (To_converter));
379 left->to_converters[nc].to = rhs;
380 left->to_converters[nc].func = func;
386 * Local variables:
387 * c-file-style: "parrot"
388 * End:
389 * vim: expandtab shiftwidth=4: