fix codetest failure - ASSERT_ARGS does not have a ; after and
[parrot.git] / src / string / primitives.c
blob77a64b58a5b0e4ec411ffb2ac908f1e79b0778b5
1 /*
2 Copyright (C) 2006-2009, Parrot Foundation.
3 $Id$
5 =head1 NAME
7 src/string/string_primitives.c - String Primitives
9 =head1 DESCRIPTION
11 This file collects together all the functions that call into the ICU
12 API.
14 =head2 Functions
16 =over 4
18 =cut
22 /* HEADERIZER HFILE: include/parrot/string_primitives.h */
24 #include "parrot/parrot.h"
25 #if PARROT_HAS_ICU
26 # include <unicode/ucnv.h>
27 # include <unicode/utypes.h>
28 # include <unicode/uchar.h>
29 # include <unicode/ustring.h>
30 #else
31 # include <ctype.h>
32 #endif
36 =item C<void string_set_data_directory(PARROT_INTERP, const char *dir)>
38 Set the directory where ICU finds its data files (encodings, locales,
39 etc.).
41 =cut
45 PARROT_EXPORT
46 void
47 string_set_data_directory(PARROT_INTERP, ARGIN(const char *dir))
49 ASSERT_ARGS(string_set_data_directory)
50 #if PARROT_HAS_ICU
51 u_setDataDirectory(dir);
53 /* Since u_setDataDirectory doesn't have a result code, we'll spot
54 check that everything is okay by making sure that '9' had decimal
55 value 9. Using 57 rather than '9' so that the encoding of this
56 source code file isn't an issue.... (Don't want to get bitten by
57 EBCDIC.) */
59 if (!u_isdigit(57) || (u_charDigitValue(57) != 9))
60 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ICU_ERROR,
61 "string_set_data_directory: ICU data files not found"
62 "(apparently) for directory [%s]", dir);
63 #else
64 UNUSED(dir);
66 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_ICU_ERROR,
67 "string_set_data_directory: parrot compiled without ICU support");
68 #endif
73 =item C<Parrot_UInt4 string_unescape_one(PARROT_INTERP, UINTVAL *offset, const
74 STRING *string)>
76 Unescape a single character. We assume that we're at the start of a
77 sequence, right after the \.
79 =cut
83 PARROT_EXPORT
84 Parrot_UInt4
85 string_unescape_one(PARROT_INTERP, ARGMOD(UINTVAL *offset),
86 ARGIN(const STRING *string))
88 ASSERT_ARGS(string_unescape_one)
89 UINTVAL workchar = 0;
90 UINTVAL charcount = 0;
91 const UINTVAL len = Parrot_str_byte_length(interp, string);
92 const unsigned char * const buf = (unsigned char *)string->strstart;
94 /* Well, not right now */
95 UINTVAL codepoint = buf[*offset];
96 ++*offset;
98 switch (codepoint) {
99 case 'x':
100 codepoint = buf[*offset];
101 if (codepoint >= '0' && codepoint <= '9') {
102 workchar = codepoint - '0';
104 else if (codepoint >= 'a' && codepoint <= 'f') {
105 workchar = codepoint - 'a' + 10;
107 else if (codepoint >= 'A' && codepoint <= 'F') {
108 workchar = codepoint - 'A' + 10;
110 else if (codepoint == '{') {
111 int i;
112 ++*offset;
113 workchar = 0;
114 for (i = 0; i < 8 && *offset < len; ++i, ++*offset) {
115 codepoint = buf[*offset];
116 if (codepoint == '}') {
117 ++*offset;
118 return workchar;
120 workchar *= 16;
121 if (codepoint >= '0' && codepoint <= '9') {
122 workchar += codepoint - '0';
124 else if (codepoint >= 'a' && codepoint <= 'f') {
125 workchar += codepoint - 'a' + 10;
127 else if (codepoint >= 'A' && codepoint <= 'F') {
128 workchar += codepoint - 'A' + 10;
130 else {
131 Parrot_ex_throw_from_c_args(interp, NULL,
132 EXCEPTION_UNIMPLEMENTED,
133 "Illegal escape sequence inside {}");
136 if (*offset == len)
137 Parrot_ex_throw_from_c_args(interp, NULL,
138 EXCEPTION_UNIMPLEMENTED,
139 "Illegal escape sequence no '}'");
141 else {
142 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
143 "Illegal escape sequence in");
146 ++*offset;
147 if (*offset < len) {
148 workchar *= 16;
149 codepoint = buf[*offset];
150 if (codepoint >= '0' && codepoint <= '9') {
151 workchar += codepoint - '0';
153 else if (codepoint >= 'a' && codepoint <= 'f') {
154 workchar += codepoint - 'a' + 10;
156 else if (codepoint >= 'A' && codepoint <= 'F') {
157 workchar += codepoint - 'A' + 10;
159 else {
160 return workchar;
163 else {
164 return workchar;
166 ++*offset;
167 return workchar;
168 case 'c':
169 codepoint = buf[*offset];
170 if (codepoint >= 'A' && codepoint <= 'Z') {
171 workchar = codepoint - 'A' + 1;
173 else {
174 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
175 "Illegal escape sequence");
178 ++*offset;
179 return workchar;
180 case 'u':
181 workchar = 0;
182 for (charcount = 0; charcount < 4; charcount++) {
183 if (*offset < len) {
184 workchar *= 16;
185 codepoint = buf[*offset];
186 if (codepoint >= '0' && codepoint <= '9') {
187 workchar += codepoint - '0';
189 else if (codepoint >= 'a' && codepoint <= 'f') {
190 workchar += codepoint - 'a' + 10;
192 else if (codepoint >= 'A' && codepoint <= 'F') {
193 workchar += codepoint - 'A' + 10;
195 else {
196 Parrot_ex_throw_from_c_args(interp, NULL,
197 EXCEPTION_UNIMPLEMENTED,
198 "Illegal escape sequence in uxxx escape");
201 else {
202 Parrot_ex_throw_from_c_args(interp, NULL,
203 EXCEPTION_UNIMPLEMENTED,
204 "Illegal escape sequence in uxxx escape - too short");
207 ++*offset;
209 return workchar;
210 case 'U':
211 workchar = 0;
212 for (charcount = 0; charcount < 8; charcount++) {
213 if (*offset < len) {
214 workchar *= 16;
215 codepoint = buf[*offset];
216 if (codepoint >= '0' && codepoint <= '9') {
217 workchar += codepoint - '0';
219 else if (codepoint >= 'a' && codepoint <= 'f') {
220 workchar += codepoint - 'a' + 10;
222 else if (codepoint >= 'A' && codepoint <= 'F') {
223 workchar += codepoint - 'A' + 10;
225 else {
226 Parrot_ex_throw_from_c_args(interp, NULL,
227 EXCEPTION_UNIMPLEMENTED,
228 "Illegal escape sequence in Uxxx escape");
231 else {
232 Parrot_ex_throw_from_c_args(interp, NULL,
233 EXCEPTION_UNIMPLEMENTED,
234 "Illegal escape sequence in uxxx escape - too short");
237 ++*offset;
239 return workchar;
240 case '0':
241 case '1':
242 case '2':
243 case '3':
244 case '4':
245 case '5':
246 case '6':
247 case '7':
248 workchar = codepoint - '0';
249 if (*offset < len) {
250 workchar *= 8;
251 codepoint = buf[*offset];
252 if (codepoint >= '0' && codepoint <= '7') {
253 workchar += codepoint - '0';
255 else {
256 return workchar;
259 else {
260 return workchar;
262 ++*offset;
263 if (*offset < len) {
264 workchar *= 8;
265 codepoint = buf[*offset];
266 if (codepoint >= '0' && codepoint <= '7') {
267 workchar += codepoint - '0';
269 else {
270 return workchar;
273 else {
274 return workchar;
276 ++*offset;
277 return workchar;
278 case 'a':
279 return 7; /* bell */
280 case 'b':
281 return 8; /* bs */
282 case 't':
283 return 9;
284 case 'n':
285 return 10;
286 case 'v':
287 return 11;
288 case 'f':
289 return 12;
290 case 'r':
291 return 13;
292 case 'e':
293 return 27;
294 case 92: /* \ */
295 return 92;
296 case '"':
297 return '"';
298 default:
299 return codepoint; /* any not special return the char */
305 =back
307 =head2 Character Property Functions
309 =over 4
311 =item C<INTVAL Parrot_char_digit_value(PARROT_INTERP, UINTVAL character)>
313 Returns the decimal digit value of the specified character if it is a decimal
314 digit character. If not, then -1 is returned.
316 Note that as currently written, C<Parrot_char_digit_value()> can
317 correctly return the decimal digit value of characters for which
318 C<Parrot_char_is_digit()> returns false.
320 =cut
324 PARROT_EXPORT
325 PARROT_CONST_FUNCTION
326 INTVAL
327 Parrot_char_digit_value(SHIM_INTERP, UINTVAL character)
329 ASSERT_ARGS(Parrot_char_digit_value)
330 #if PARROT_HAS_ICU
331 return u_charDigitValue(character);
332 #else
333 if ((character >= 0x30) && (character <= 0x39))
334 return character - 0x30;
335 return -1;
336 #endif
341 =item C<char * str_dup_remove_quotes(const char *old)>
343 Duplicates a C string (minus the wrapping quotes). Similar to strdup(),
344 except it dies if it runs out of memory.
346 =cut
350 PARROT_EXPORT
351 PARROT_MALLOC
352 PARROT_CANNOT_RETURN_NULL
353 char *
354 str_dup_remove_quotes(ARGIN(const char *old))
356 ASSERT_ARGS(str_dup_remove_quotes)
357 const size_t oldlen = strlen(old) + 1;
359 /* 2 for the beginning and ending quote chars */
360 const size_t newlen = oldlen - 2;
361 char * const copy = (char *)mem_internal_allocate(newlen);
363 memcpy(copy, old + 1, newlen);
364 copy[newlen - 1] = 0;
366 return copy;
371 =back
373 =head1 SEE ALSO
375 =over 4
377 =item F<include/parrot/string_primitives.h>
379 =item F<include/parrot/string.h>
381 =item F<src/string.c>
383 =back
385 =cut
391 * Local variables:
392 * c-file-style: "parrot"
393 * End:
394 * vim: expandtab shiftwidth=4: