2 Copyright (C) 2004-2010, Parrot Foundation.
7 src/string/encoding.c - global encoding functions
11 These are parrot's generic encoding handling functions
19 #include "parrot/encoding.h"
21 STR_VTABLE
*Parrot_default_encoding_ptr
= NULL
;
23 static STR_VTABLE
**encodings
;
24 static int n_encodings
;
25 /* for backwards compatibility */
26 static STRING
*unicode_str
;
27 static STRING
*fixed_8_str
;
29 /* HEADERIZER HFILE: include/parrot/encoding.h */
31 /* HEADERIZER BEGIN: static */
32 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
34 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
35 /* HEADERIZER END: static */
40 =item C<void Parrot_deinit_encodings(PARROT_INTERP)>
42 Deinitialize encodings and free all memory used by them.
49 Parrot_deinit_encodings(PARROT_INTERP
)
51 ASSERT_ARGS(Parrot_deinit_encodings
)
53 mem_gc_free(interp
, encodings
);
60 =item C<STR_VTABLE * Parrot_new_encoding(PARROT_INTERP)>
62 Allocates the memory for a new string vtable from the system.
70 PARROT_CANNOT_RETURN_NULL
72 Parrot_new_encoding(PARROT_INTERP
)
74 ASSERT_ARGS(Parrot_new_encoding
)
75 return mem_gc_allocate_typed(interp
, STR_VTABLE
);
80 =item C<const STR_VTABLE * Parrot_find_encoding(PARROT_INTERP, const char
83 Finds an encoding with the name C<encodingname>. Returns the encoding
84 if it is successfully found, returns NULL otherwise.
92 PARROT_WARN_UNUSED_RESULT
93 PARROT_CAN_RETURN_NULL
95 Parrot_find_encoding(SHIM_INTERP
, ARGIN(const char *encodingname
))
97 ASSERT_ARGS(Parrot_find_encoding
)
98 const int n
= n_encodings
;
101 for (i
= 0; i
< n
; ++i
)
102 if (STREQ(encodings
[i
]->name
, encodingname
))
105 /* backwards compatibility */
106 if (strcmp(encodingname
, "unicode") == 0)
107 return Parrot_utf8_encoding_ptr
;
114 =item C<const STR_VTABLE * Parrot_load_encoding(PARROT_INTERP, const char
117 Loads an encoding. Currently throws an exception because we cannot load
118 encodings. See https://trac.parrot.org/parrot/wiki/StringsTasklist.
124 /* Yep, this needs to be a char * parameter -- it's tough to load in
125 encodings and such for strings if we can't be sure we've got enough
126 info set up to actually build strings...
128 Also remember to use PARROT_WARN_UNUSED_RESULT and
129 PARROT_CANNOT_RETURN_NULL when this actually works.
133 PARROT_DOES_NOT_RETURN
134 PARROT_CANNOT_RETURN_NULL
136 Parrot_load_encoding(PARROT_INTERP
, ARGIN(const char *encodingname
))
138 ASSERT_ARGS(Parrot_load_encoding
)
139 UNUSED(encodingname
);
140 Parrot_ex_throw_from_c_args(interp
, NULL
, EXCEPTION_UNIMPLEMENTED
,
141 "Can't load encodings yet");
146 =item C<INTVAL Parrot_encoding_number(PARROT_INTERP, const STRING
149 Return the number of the encoding or -1 if not found.
157 PARROT_WARN_UNUSED_RESULT
159 Parrot_encoding_number(PARROT_INTERP
, ARGIN(const STRING
*encodingname
))
161 ASSERT_ARGS(Parrot_encoding_number
)
162 const int n
= n_encodings
;
165 for (i
= 0; i
< n
; ++i
) {
166 if (Parrot_str_equal(interp
, encodings
[i
]->name_str
, encodingname
))
170 /* backwards compatibility */
171 if (Parrot_str_equal(interp
, encodingname
, unicode_str
)) {
172 for (i
= 0; i
< n
; ++i
) {
173 if (STREQ(encodings
[i
]->name
, "utf8"))
177 else if (Parrot_str_equal(interp
, encodingname
, fixed_8_str
)) {
178 for (i
= 0; i
< n
; ++i
) {
179 if (STREQ(encodings
[i
]->name
, "ascii"))
189 =item C<INTVAL Parrot_encoding_number_of_str(PARROT_INTERP, const STRING *src)>
191 Return the number of the encoding of the given string or -1 if not found.
193 This could be converted to a macro.
201 PARROT_WARN_UNUSED_RESULT
203 Parrot_encoding_number_of_str(SHIM_INTERP
, ARGIN(const STRING
*src
))
205 ASSERT_ARGS(Parrot_encoding_number_of_str
)
207 return src
->encoding
->num
;
212 =item C<STRING* Parrot_encoding_name(PARROT_INTERP, INTVAL number_of_encoding)>
214 Returns the name of a character encoding based on the INTVAL index
215 C<number_of_encoding> to the All_encodings array.
217 This could be converted to a macro.
225 PARROT_WARN_UNUSED_RESULT
226 PARROT_CAN_RETURN_NULL
228 Parrot_encoding_name(SHIM_INTERP
, INTVAL number_of_encoding
)
230 ASSERT_ARGS(Parrot_encoding_name
)
231 if (number_of_encoding
>= n_encodings
||
232 number_of_encoding
< 0)
234 return encodings
[number_of_encoding
]->name_str
;
239 =item C<const STR_VTABLE* Parrot_get_encoding(PARROT_INTERP, INTVAL
242 Returns the encoding given by the INTVAL index C<number_of_encoding>.
250 PARROT_WARN_UNUSED_RESULT
251 PARROT_CAN_RETURN_NULL
253 Parrot_get_encoding(SHIM_INTERP
, INTVAL number_of_encoding
)
255 ASSERT_ARGS(Parrot_get_encoding
)
256 if (number_of_encoding
>= n_encodings
||
257 number_of_encoding
< 0)
259 return encodings
[number_of_encoding
];
264 =item C<const char * Parrot_encoding_c_name(PARROT_INTERP, INTVAL
267 Returns the NULL-terminated C string representation of the encodings name
268 given by the C<number_of_encoding>.
276 PARROT_WARN_UNUSED_RESULT
277 PARROT_CAN_RETURN_NULL
279 Parrot_encoding_c_name(SHIM_INTERP
, INTVAL number_of_encoding
)
281 ASSERT_ARGS(Parrot_encoding_c_name
)
282 if (number_of_encoding
>= n_encodings
||
283 number_of_encoding
< 0)
285 return encodings
[number_of_encoding
]->name
;
290 =item C<void Parrot_str_internal_register_encoding_names(PARROT_INTERP)>
292 Helper function for initializing characterset encoding names. We can't create
293 the STRING names until the default encodings are already initted,
294 so the name generation is split into a second init stage.
302 Parrot_str_internal_register_encoding_names(PARROT_INTERP
)
304 ASSERT_ARGS(Parrot_str_internal_register_encoding_names
)
306 for (n
= 0; n
< n_encodings
; ++n
)
307 encodings
[n
]->name_str
=
308 Parrot_str_new_constant(interp
, encodings
[n
]->name
);
309 unicode_str
= Parrot_str_new_constant(interp
, "unicode");
310 fixed_8_str
= Parrot_str_new_constant(interp
, "fixed_8");
315 =item C<INTVAL Parrot_register_encoding(PARROT_INTERP, STR_VTABLE *encoding)>
317 Registers a character encoding C<encoding> with name C<encodingname>.
318 Only allows one of 5 possibilities: fixed_8, utf8, utf16, ucs2 and ucs4.
326 Parrot_register_encoding(PARROT_INTERP
, ARGIN(STR_VTABLE
*encoding
))
328 ASSERT_ARGS(Parrot_register_encoding
)
332 for (i
= 0; i
< n_encodings
; ++i
) {
333 if (STREQ(encodings
[i
]->name
, encoding
->name
))
338 encodings
= mem_gc_allocate_zeroed_typed(interp
, STR_VTABLE
*);
340 encodings
= mem_gc_realloc_n_typed_zeroed(interp
,
341 encodings
, n
+ 1, n
, STR_VTABLE
*);
344 encodings
[n
] = encoding
;
352 =item C<void Parrot_encodings_init(PARROT_INTERP)>
354 Creates the initial encodings.
362 Parrot_encodings_init(PARROT_INTERP
)
364 ASSERT_ARGS(Parrot_encodings_init
)
366 Parrot_register_encoding(interp
, Parrot_ascii_encoding_ptr
);
367 Parrot_register_encoding(interp
, Parrot_latin1_encoding_ptr
);
368 Parrot_register_encoding(interp
, Parrot_binary_encoding_ptr
);
369 Parrot_register_encoding(interp
, Parrot_utf8_encoding_ptr
);
370 Parrot_register_encoding(interp
, Parrot_utf16_encoding_ptr
);
371 Parrot_register_encoding(interp
, Parrot_ucs2_encoding_ptr
);
372 Parrot_register_encoding(interp
, Parrot_ucs4_encoding_ptr
);
374 Parrot_default_encoding_ptr
= Parrot_ascii_encoding_ptr
;
376 /* Now that the plugins are registered, we can create STRING
378 Parrot_str_internal_register_encoding_names(interp
);
383 =item C<INTVAL Parrot_make_default_encoding(PARROT_INTERP, const char
384 *encodingname, STR_VTABLE *encoding)>
386 Sets the default encoding to C<encoding> with name C<encodingname>.
394 Parrot_make_default_encoding(SHIM_INTERP
, SHIM(const char *encodingname
),
395 ARGIN(STR_VTABLE
*encoding
))
397 ASSERT_ARGS(Parrot_make_default_encoding
)
398 Parrot_default_encoding_ptr
= encoding
;
404 =item C<const STR_VTABLE * Parrot_default_encoding(PARROT_INTERP)>
406 Gets the default encoding.
414 PARROT_WARN_UNUSED_RESULT
415 PARROT_CANNOT_RETURN_NULL
417 Parrot_default_encoding(SHIM_INTERP
)
419 ASSERT_ARGS(Parrot_default_encoding
)
420 return Parrot_default_encoding_ptr
;
426 * c-file-style: "parrot"
428 * vim: expandtab shiftwidth=4: