2 Copyright (C) 2006-2009, Parrot Foundation.
7 src/string/string_primitives.c - String Primitives
11 This file collects together all the functions that call into the ICU
22 /* HEADERIZER HFILE: include/parrot/string_primitives.h */
24 #include "parrot/parrot.h"
26 # include <unicode/ucnv.h>
27 # include <unicode/utypes.h>
28 # include <unicode/uchar.h>
29 # include <unicode/ustring.h>
36 =item C<void string_set_data_directory(PARROT_INTERP, const char *dir)>
38 Set the directory where ICU finds its data files (encodings, locales,
47 string_set_data_directory(PARROT_INTERP
, ARGIN(const char *dir
))
49 ASSERT_ARGS(string_set_data_directory
)
51 u_setDataDirectory(dir
);
53 /* Since u_setDataDirectory doesn't have a result code, we'll spot
54 check that everything is okay by making sure that '9' had decimal
55 value 9. Using 57 rather than '9' so that the encoding of this
56 source code file isn't an issue.... (Don't want to get bitten by
59 if (!u_isdigit(57) || (u_charDigitValue(57) != 9))
60 Parrot_ex_throw_from_c_args(interp
, NULL
, EXCEPTION_ICU_ERROR
,
61 "string_set_data_directory: ICU data files not found"
62 "(apparently) for directory [%s]", dir
);
66 Parrot_ex_throw_from_c_args(interp
, NULL
, EXCEPTION_ICU_ERROR
,
67 "string_set_data_directory: parrot compiled without ICU support");
73 =item C<Parrot_UInt4 string_unescape_one(PARROT_INTERP, UINTVAL *offset, const
76 Unescape a single character. We assume that we're at the start of a
77 sequence, right after the \.
85 string_unescape_one(PARROT_INTERP
, ARGMOD(UINTVAL
*offset
),
86 ARGIN(const STRING
*string
))
88 ASSERT_ARGS(string_unescape_one
)
90 UINTVAL charcount
= 0;
91 const UINTVAL len
= Parrot_str_byte_length(interp
, string
);
92 const unsigned char * const buf
= (unsigned char *)string
->strstart
;
94 /* Well, not right now */
95 UINTVAL codepoint
= buf
[*offset
];
100 codepoint
= buf
[*offset
];
101 if (codepoint
>= '0' && codepoint
<= '9') {
102 workchar
= codepoint
- '0';
104 else if (codepoint
>= 'a' && codepoint
<= 'f') {
105 workchar
= codepoint
- 'a' + 10;
107 else if (codepoint
>= 'A' && codepoint
<= 'F') {
108 workchar
= codepoint
- 'A' + 10;
110 else if (codepoint
== '{') {
114 for (i
= 0; i
< 8 && *offset
< len
; ++i
, ++*offset
) {
115 codepoint
= buf
[*offset
];
116 if (codepoint
== '}') {
121 if (codepoint
>= '0' && codepoint
<= '9') {
122 workchar
+= codepoint
- '0';
124 else if (codepoint
>= 'a' && codepoint
<= 'f') {
125 workchar
+= codepoint
- 'a' + 10;
127 else if (codepoint
>= 'A' && codepoint
<= 'F') {
128 workchar
+= codepoint
- 'A' + 10;
131 Parrot_ex_throw_from_c_args(interp
, NULL
,
132 EXCEPTION_UNIMPLEMENTED
,
133 "Illegal escape sequence inside {}");
137 Parrot_ex_throw_from_c_args(interp
, NULL
,
138 EXCEPTION_UNIMPLEMENTED
,
139 "Illegal escape sequence no '}'");
142 Parrot_ex_throw_from_c_args(interp
, NULL
, EXCEPTION_UNIMPLEMENTED
,
143 "Illegal escape sequence in");
149 codepoint
= buf
[*offset
];
150 if (codepoint
>= '0' && codepoint
<= '9') {
151 workchar
+= codepoint
- '0';
153 else if (codepoint
>= 'a' && codepoint
<= 'f') {
154 workchar
+= codepoint
- 'a' + 10;
156 else if (codepoint
>= 'A' && codepoint
<= 'F') {
157 workchar
+= codepoint
- 'A' + 10;
169 codepoint
= buf
[*offset
];
170 if (codepoint
>= 'A' && codepoint
<= 'Z') {
171 workchar
= codepoint
- 'A' + 1;
174 Parrot_ex_throw_from_c_args(interp
, NULL
, EXCEPTION_UNIMPLEMENTED
,
175 "Illegal escape sequence");
182 for (charcount
= 0; charcount
< 4; charcount
++) {
185 codepoint
= buf
[*offset
];
186 if (codepoint
>= '0' && codepoint
<= '9') {
187 workchar
+= codepoint
- '0';
189 else if (codepoint
>= 'a' && codepoint
<= 'f') {
190 workchar
+= codepoint
- 'a' + 10;
192 else if (codepoint
>= 'A' && codepoint
<= 'F') {
193 workchar
+= codepoint
- 'A' + 10;
196 Parrot_ex_throw_from_c_args(interp
, NULL
,
197 EXCEPTION_UNIMPLEMENTED
,
198 "Illegal escape sequence in uxxx escape");
202 Parrot_ex_throw_from_c_args(interp
, NULL
,
203 EXCEPTION_UNIMPLEMENTED
,
204 "Illegal escape sequence in uxxx escape - too short");
212 for (charcount
= 0; charcount
< 8; charcount
++) {
215 codepoint
= buf
[*offset
];
216 if (codepoint
>= '0' && codepoint
<= '9') {
217 workchar
+= codepoint
- '0';
219 else if (codepoint
>= 'a' && codepoint
<= 'f') {
220 workchar
+= codepoint
- 'a' + 10;
222 else if (codepoint
>= 'A' && codepoint
<= 'F') {
223 workchar
+= codepoint
- 'A' + 10;
226 Parrot_ex_throw_from_c_args(interp
, NULL
,
227 EXCEPTION_UNIMPLEMENTED
,
228 "Illegal escape sequence in Uxxx escape");
232 Parrot_ex_throw_from_c_args(interp
, NULL
,
233 EXCEPTION_UNIMPLEMENTED
,
234 "Illegal escape sequence in uxxx escape - too short");
248 workchar
= codepoint
- '0';
251 codepoint
= buf
[*offset
];
252 if (codepoint
>= '0' && codepoint
<= '7') {
253 workchar
+= codepoint
- '0';
265 codepoint
= buf
[*offset
];
266 if (codepoint
>= '0' && codepoint
<= '7') {
267 workchar
+= codepoint
- '0';
299 return codepoint
; /* any not special return the char */
307 =head2 Character Property Functions
311 =item C<INTVAL Parrot_char_digit_value(PARROT_INTERP, UINTVAL character)>
313 Returns the decimal digit value of the specified character if it is a decimal
314 digit character. If not, then -1 is returned.
316 Note that as currently written, C<Parrot_char_digit_value()> can
317 correctly return the decimal digit value of characters for which
318 C<Parrot_char_is_digit()> returns false.
325 PARROT_CONST_FUNCTION
327 Parrot_char_digit_value(SHIM_INTERP
, UINTVAL character
)
329 ASSERT_ARGS(Parrot_char_digit_value
)
331 return u_charDigitValue(character
);
333 if ((character
>= 0x30) && (character
<= 0x39))
334 return character
- 0x30;
341 =item C<char * str_dup_remove_quotes(const char *old)>
343 Duplicates a C string (minus the wrapping quotes). Similar to strdup(),
344 except it dies if it runs out of memory.
352 PARROT_CANNOT_RETURN_NULL
354 str_dup_remove_quotes(ARGIN(const char *old
))
356 ASSERT_ARGS(str_dup_remove_quotes
)
357 const size_t oldlen
= strlen(old
) + 1;
359 /* 2 for the beginning and ending quote chars */
360 const size_t newlen
= oldlen
- 2;
361 char * const copy
= (char *)mem_internal_allocate(newlen
);
363 memcpy(copy
, old
+ 1, newlen
);
364 copy
[newlen
- 1] = 0;
377 =item F<include/parrot/string_primitives.h>
379 =item F<include/parrot/string.h>
381 =item F<src/string.c>
392 * c-file-style: "parrot"
394 * vim: expandtab shiftwidth=4: