2 Copyright (C) 2003-2008, The Perl Foundation.
7 src/pmc/string.pmc - String PMC Class
11 C<String> extends C<scalar> to provide a string for languages that want a
12 C<string> type without going to an S register. It acts as a wrapper for the
13 functions in F<src/string.c>.
23 #include "parrot/parrot.h"
25 pmclass String extends scalar provides string provides scalar {
31 Initializes the string.
38 PMC_str_val(SELF) = string_make_empty(INTERP, enum_stringrep_one, 0);
39 PObj_custom_mark_SET(SELF);
44 =item C<PMC new_from_string(STRING *rep)>
46 Class method to construct a String from the string representation C<rep>.
51 VTABLE PMC *new_from_string(STRING *rep, INTVAL flags) {
53 const INTVAL type = SELF->vtable->base_type;
55 if (flags & PObj_constant_FLAG)
56 res = constant_pmc_new(INTERP, type);
58 res = pmc_new(INTERP, type);
60 PMC_str_val(res) = rep;
68 Marks the string as live.
75 if (PMC_str_val(SELF))
76 pobject_lives(INTERP, (PObj *)PMC_str_val(SELF));
83 Creates a copy of the string.
90 PMC * const dest = pmc_new_noinit(INTERP, SELF->vtable->base_type);
91 PObj_custom_mark_SET(dest);
92 PMC_str_val(dest) = string_copy(INTERP, VTABLE_get_string(INTERP, SELF));
98 =item C<INTVAL get_integer()>
100 Returns the integer representation of the string.
106 VTABLE INTVAL get_integer() {
107 STRING * const s = (STRING *)VTABLE_get_string(INTERP, SELF);
108 return string_to_int(INTERP, s);
113 =item C<FLOATVAL get_number()>
115 Returns the floating-point representation of the string.
121 VTABLE FLOATVAL get_number() {
122 STRING * const s = (STRING*) VTABLE_get_string(INTERP, SELF);
123 return string_to_num(INTERP, s);
128 =item C<PMC *get_bignum()>
130 Returns the big numbers representation of the string.
136 VTABLE PMC *get_bignum() {
137 STRING * const s = VTABLE_get_string(INTERP, SELF);
138 PMC *ret = pmc_new(INTERP, enum_class_BigInt);
139 VTABLE_set_string_native(INTERP, ret, s);
145 =item C<STRING *get_string()>
147 Returns the string itself.
153 VTABLE STRING *get_string() {
154 STRING * const s = PMC_str_val(SELF);
155 return s ? string_copy(INTERP, s) : NULL;
160 =item C<INTVAL get_bool()>
162 Returns the boolean value of the string.
168 VTABLE INTVAL get_bool() {
169 STRING * const s = VTABLE_get_string(INTERP, SELF);
170 return string_bool(INTERP, s);
175 =item C<VOID set_integer_native(INTVAL value)>
177 =item C<VOID set_bool(INTVAL value)>
179 Sets the value of the string to the integer C<value>.
185 VTABLE void set_integer_native(INTVAL value) {
186 SELF.set_string_native(string_from_int(INTERP, value));
189 VTABLE void set_bool(INTVAL value) {
190 SELF.set_string_native(string_from_int(INTERP, value));
195 =item C<VOID set_number_native(FLOATVAL value)>
197 Sets the value of the string to the floating-point C<value>.
203 VTABLE void set_number_native(FLOATVAL value) {
204 SELF.set_string_native(string_from_num(INTERP, value));
210 =item C<VOID set_string_native(STRING *value)>
212 Sets the value of the string to that of the specified C<string>.
218 VTABLE void set_string_native(STRING *value) {
219 /* Only allow constant PMCs to embed constant strings */
220 if (PObj_constant_TEST(SELF) && !PObj_constant_TEST(value)) {
221 const char *copy = string_to_cstring(INTERP, value);
222 value = const_string(INTERP, copy);
224 PMC_str_val(SELF) = value;
229 =item C<VOID assign_string_native(STRING *value)>
231 Sets the value of the string to a copy of the specified C<string>.
237 VTABLE void assign_string_native(STRING *value) {
238 PMC_str_val(SELF) = string_set(INTERP, PMC_str_val(SELF), value);
243 =item C<VOID set_string_same(PMC *value)>
245 Sets the value of the string to the value of
246 the specified C<String> PMC.
252 VTABLE void set_string_same(PMC *value) {
254 string_set(INTERP, PMC_str_val(SELF), PMC_str_val(value));
259 =item C<VOID set_pmc(PMC *value)>
261 Sets the value of the string to the string value of
262 the specified C<PMC>.
267 VTABLE void set_pmc(PMC *value) {
268 SELF.set_string_native(VTABLE_get_string(INTERP, value));
273 =item C<PMC *bitwise_ors(PMC *value, PMC *dest)>
275 =item C<PMC *bitwise_ors_str(PMC *value, PMC *dest)>
277 =item C<PMC *bitwise_ands(PMC *value, PMC *dest)>
279 =item C<PMC *bitwise_ands_str(PMC *value, PMC *dest)>
281 =item C<PMC *bitwise_xors(PMC *value, PMC *dest)>
283 =item C<PMC *bitwise_xors_str(PMC *value, PMC *dest)>
285 =item C<void bitwise_nots(PMC *value)>
287 These functions perform bitwise operations on entire
288 strings, and place the result in C<dest>.
290 =item C<void i_bitwise_ors(PMC *value)>
292 =item C<void i_bitwise_ors_str(PMC *value)>
294 =item C<void i_bitwise_ands(PMC *value)>
296 =item C<void i_bitwise_ands_str(PMC *value)>
298 =item C<void i_bitwise_xors(PMC *value)>
300 =item C<void i_bitwise_xors_str(PMC *value)>
302 =item C<void i_bitwise_nots(PMC *value)>
304 These functions perform bitwise operations on entire
310 VTABLE PMC *bitwise_ors(PMC *value, PMC *dest) {
311 STRING * const s = VTABLE_get_string(INTERP, SELF);
312 STRING * const v = VTABLE_get_string(INTERP, value);
315 dest = pmc_new(INTERP, SELF->vtable->base_type);
317 VTABLE_set_string_native(INTERP, dest,
318 string_bitwise_or(INTERP, s, v, NULL));
323 VTABLE PMC *bitwise_ors_str(STRING *value, PMC *dest) {
324 STRING * const s = VTABLE_get_string(INTERP, SELF);
327 dest = pmc_new(INTERP, SELF->vtable->base_type);
329 VTABLE_set_string_native(INTERP, dest,
330 string_bitwise_or(INTERP, s, value, NULL));
335 VTABLE void i_bitwise_ors(PMC *value) {
336 STRING *s = VTABLE_get_string(INTERP, SELF);
337 STRING * const v = VTABLE_get_string(INTERP, value);
338 SELF.set_string_native(string_bitwise_or(INTERP, s, v, &s));
341 VTABLE void i_bitwise_ors_str(STRING *value) {
342 STRING *s = VTABLE_get_string(INTERP, SELF);
343 SELF.set_string_native(string_bitwise_or(INTERP, s, value, &s));
346 VTABLE PMC *bitwise_ands(PMC *value, PMC *dest) {
347 STRING * const s = VTABLE_get_string(INTERP, SELF);
348 STRING * const v = VTABLE_get_string(INTERP, value);
351 dest = pmc_new(INTERP, SELF->vtable->base_type);
353 VTABLE_set_string_native(INTERP, dest,
354 string_bitwise_and(INTERP, s, v, NULL));
359 VTABLE PMC *bitwise_ands_str(STRING *value, PMC *dest) {
360 STRING * const s = VTABLE_get_string(INTERP, SELF);
363 dest = pmc_new(INTERP, SELF->vtable->base_type);
365 VTABLE_set_string_native(INTERP, dest,
366 string_bitwise_and(INTERP, s, value, NULL));
372 VTABLE void i_bitwise_ands(PMC *value) {
373 STRING *s = VTABLE_get_string(INTERP, SELF);
374 STRING * const v = VTABLE_get_string(INTERP, value);
375 SELF.set_string_native(string_bitwise_and(INTERP, s, v, &s));
378 VTABLE void i_bitwise_ands_str(STRING *value) {
379 STRING *s = VTABLE_get_string(INTERP, SELF);
380 SELF.set_string_native(string_bitwise_and(INTERP, s, value, &s));
383 VTABLE PMC *bitwise_xors(PMC *value, PMC *dest) {
384 STRING * const s = VTABLE_get_string(INTERP, SELF);
385 STRING * const v = VTABLE_get_string(INTERP, value);
388 dest = pmc_new(INTERP, SELF->vtable->base_type);
390 VTABLE_set_string_native(INTERP, dest,
391 string_bitwise_xor(INTERP, s, v, NULL));
396 VTABLE PMC *bitwise_xors_str(STRING *value, PMC *dest) {
397 STRING * const s = VTABLE_get_string(INTERP, SELF);
400 dest = pmc_new(INTERP, SELF->vtable->base_type);
402 VTABLE_set_string_native(INTERP, dest,
403 string_bitwise_xor(INTERP, s, value, NULL));
408 VTABLE void i_bitwise_xors(PMC *value) {
409 STRING *s = VTABLE_get_string(INTERP, SELF);
410 STRING * const v = VTABLE_get_string(INTERP, value);
411 SELF.set_string_native(string_bitwise_xor(INTERP, s, v, &s));
414 VTABLE void i_bitwise_xors_str(STRING *value) {
415 STRING *s = VTABLE_get_string(INTERP, SELF);
416 SELF.set_string_native(string_bitwise_xor(INTERP, s, value, &s));
419 VTABLE PMC *bitwise_nots(PMC *dest) {
420 STRING * const s = VTABLE_get_string(INTERP, SELF);
423 dest = pmc_new(INTERP, SELF->vtable->base_type);
425 VTABLE_set_string_native(INTERP, dest,
426 string_bitwise_not(INTERP, s, NULL));
431 VTABLE void i_bitwise_nots() {
432 STRING *s = VTABLE_get_string(INTERP, SELF);
433 VTABLE_set_string_native(
434 INTERP, SELF, string_bitwise_not(INTERP, s, &s));
439 =item C<INTVAL is_equal(PMC *value)>
441 Compares the string with C<value>; returns true if
447 VTABLE INTVAL is_equal(PMC *value) {
448 STRING * const s = VTABLE_get_string(INTERP, SELF);
449 STRING * const v = VTABLE_get_string(INTERP, value);
450 return (INTVAL)(0 == string_equal(INTERP, s, v));
455 =item C<INTVAL is_equal_num(PMC *value)>
457 Compares the numerical value of the string with that of
458 C<value>; returns true if they match.
464 VTABLE INTVAL is_equal_num(PMC *value) {
465 const FLOATVAL sf = string_to_num(INTERP, VTABLE_get_string(INTERP, SELF));
466 const FLOATVAL vf = VTABLE_get_number(INTERP, value);
467 return (INTVAL)(sf == vf);
472 =item C<INTVAL is_equal_string(PMC *value)>
474 Compares the string with C<value>; returns FALSE if they match.
480 VTABLE INTVAL is_equal_string(PMC *value) {
481 STRING * const s = VTABLE_get_string(INTERP, SELF);
482 STRING * const v = VTABLE_get_string(INTERP, value);
483 return string_equal(INTERP, s, v) == 0;
488 =item C<INTVAL is_same(PMC *value)>
490 Compares the string in this PMC with the one in the C<value> PMC.
491 Returns true if this PMC and the one in C<value> are of the same PMC
492 class and their strings are aliases of the same internal string.
494 (this can only happen if you use the set_string_native method)
499 VTABLE INTVAL is_same(PMC *value) {
500 const STRING * const s = VTABLE_get_string(INTERP, SELF);
501 const STRING * const v = VTABLE_get_string(INTERP, value);
502 return (INTVAL)(value->vtable == SELF->vtable && s == v);
507 =item C<INTVAL cmp(PMC *value)>
509 Compares the string with C<value>; returns -1 if the
510 string is smaller, 0 if they are equal, and 1 if C<value>
516 VTABLE INTVAL cmp(PMC *value) {
517 STRING * const s = VTABLE_get_string(INTERP, SELF);
518 STRING * const v = VTABLE_get_string(INTERP, value);
519 return string_compare(INTERP, s, v);
524 =item C<INTVAL cmp_num(PMC *value)>
526 Compares the numerical value of the string with that of
527 C<value>; returns -1 if the string is smaller, 0 if they
528 are equal, and 1 if C<value> is smaller.
533 VTABLE INTVAL cmp_num(PMC *value) {
534 const FLOATVAL sf = string_to_num(INTERP, VTABLE_get_string(INTERP, SELF));
535 const FLOATVAL vf = VTABLE_get_number(INTERP, value);
548 =item C<INTVAL cmp_string(PMC *value)>
550 Compares the string with C<value>; returns -1 if the
551 string is smaller, 0 if they are equal, and 1 if C<value>
557 VTABLE INTVAL cmp_string(PMC *value) {
558 STRING * const s = VTABLE_get_string(INTERP, SELF);
559 STRING * const v = VTABLE_get_string(INTERP, value);
560 return string_compare(INTERP, s, v);
565 =item C<void substr(INTVAL offset, INTVAL length, PMC *dest)>
567 Extracts the substring starting at C<offset>, with size
568 C<length>, and places it in C<dest>.
573 VTABLE void substr(INTVAL offset, INTVAL length, PMC *dest) {
574 STRING * const s = VTABLE_get_string(INTERP, SELF);
575 STRING * const s2 = string_substr(INTERP, s, offset, length, NULL, 0);
576 VTABLE_set_string_native(INTERP, dest, s2);
581 =item C<STRING *substr_str(INTVAL offset, INTVAL length)>
583 Extracts the substring starting at C<offset>, with size
584 C<length>, and returns it.
589 VTABLE STRING *substr_str(INTVAL offset, INTVAL length) {
590 STRING * const s = VTABLE_get_string(INTERP, SELF);
591 return string_substr(INTERP, s, offset, length, NULL, 0);
596 =item C<INTVAL exists_keyed(PMC *key)>
598 Returns true if the C<key>'th character in the string exists. Negative
599 numbers count from the end.
605 VTABLE INTVAL exists_keyed(PMC *key) {
606 const INTVAL n = string_length(INTERP, VTABLE_get_string(INTERP, SELF));
607 const INTVAL k = VTABLE_get_integer(INTERP, key);
608 return (INTVAL)((k>=0 && k<=n) || (k<0 && -k<=n));
613 =item C<STRING *get_string_keyed(PMC *key)>
615 Returns the C<key>'th character in the string. Negative numbers count
618 =item C<INTVAL get_integer_keyed(PMC *key)>
620 Returns the integer value (ord) at C<*key>.
622 =item C<void set_string_keyed(PMC *key, STRING *val)>
624 Replace the string at C<key> with C<value>.
626 =item C<void set_integer_keyed(PMC *key, INTVAL val)>
628 Replace the string at C<key> with the chr of C<value>.
634 VTABLE STRING *get_string_keyed(PMC *key) {
635 STRING * const s = PMC_str_val(SELF);
636 const INTVAL k = key_integer(INTERP, key);
637 return string_substr(INTERP, s, k, 1, NULL, 0);
640 VTABLE INTVAL get_integer_keyed(PMC *key) {
641 STRING * const s = PMC_str_val(SELF);
642 return string_ord(INTERP, s, key_integer(INTERP, key));
645 void set_string_keyed(PMC *key, STRING * const value) {
646 STRING * const s = PMC_str_val(SELF);
647 const INTVAL len = string_length(INTERP, value);
648 string_replace(INTERP, s, key_integer(INTERP, key), len, value, NULL);
651 VTABLE void set_integer_keyed(PMC *key, INTVAL value) {
652 STRING * const s = PMC_str_val(SELF);
653 STRING * const c = string_chr(INTERP, (UINTVAL) value);
654 string_replace(INTERP, s, key_integer(INTERP, key), 1, c, NULL);
658 =item C<void replace(STRING *orig, STRING *_new)>
660 Replace every occurrence of C<orig> with C<_new>.
666 METHOD replace(STRING *orig, STRING *_new) {
667 const INTVAL old_len = string_length(INTERP, orig);
668 const INTVAL new_len = string_length(INTERP, _new);
669 STRING * const s = VTABLE_get_string(INTERP, SELF);
672 while (-1 != (i = string_str_index(INTERP, s, orig, i))) {
673 (void)string_replace(INTERP, s, i, old_len, _new, NULL);
677 VTABLE_set_string_native(INTERP, SELF, s);
681 =item C<PMC *to_int(INTVAL base)>
683 Return the integer equivalent of SELF, which is assumed to be a C<base>
684 digit string. The String is assumed to be in an ascii-compatible encoding.
685 The String is considered being unsigned, and no I<+> or I<-> chars are
686 processed. C<base> has to be within [2..36].
688 If any chars in the String aren't converted, an exception is thrown.
690 TODO Currently overflow to BigInt is B<not> handled nor detected.
696 METHOD to_int(INTVAL base) {
697 PMC *result = pmc_new(INTERP, Parrot_get_ctx_HLL_type(interp,
698 enum_class_Integer));
700 /* TODO verify encoding */
701 const STRING *me = VTABLE_get_string(INTERP, SELF);
702 const char *start = me->strstart;
703 const char * const end = start + me->bufused;
706 if (base < 2 || base > 36)
707 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION,
708 "invalid conversion to int - bad base %d", base);
710 while (start < end) {
711 const unsigned char c = *start;
714 if (isdigit((unsigned char)c))
716 else if (c >= 'a' && c <= 'z')
718 else if (c >= 'A' && c <= 'Z')
731 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION,
732 "invalid conversion to int - bad char %c", *start);
734 VTABLE_set_integer_native(INTERP, result, i);
741 =head2 Iterator Interface
745 =item C<PMC *slice(PMC *key, INTVAL f)>
747 Return a new iterator for the slice PMC C<key> if f==0.
749 Return a new pythonic slice if f == 1.
751 =item C<PMC *get_iter(PMC *key)>
753 Return a new iterator for this string.
755 =item C<INTVAL elements()>
757 Return length of the string.
764 VTABLE INTVAL elements() {
765 return string_length(INTERP, VTABLE_get_string(INTERP, SELF));
768 VTABLE PMC *slice(PMC *key, INTVAL f) {
770 STRING *name = CONST_STRING(interp, "set_key");
771 PMC * const iter = pmc_new_init(INTERP, enum_class_Iterator, SELF);
772 Parrot_PCCINVOKE(interp, iter, name, "P->", key);
776 Parrot_ex_throw_from_c_args(INTERP, NULL, EXCEPTION_INVALID_OPERATION,
777 "String: Unknown slice type");
780 VTABLE PMC *get_iter() {
781 STRING *name = CONST_STRING(interp, "set_key");
782 PMC * const iter = pmc_new_init(INTERP, enum_class_Iterator, SELF);
783 PMC * const key = pmc_new(INTERP, enum_class_Key);
785 Parrot_PCCINVOKE(interp, iter, name, "P->", key);
786 PObj_get_FLAGS(key) |= KEY_integer_FLAG;
787 PMC_int_val(key) = 0;
789 if (!string_length(INTERP, VTABLE_get_string(INTERP, SELF)))
790 PMC_int_val(key) = -1;
799 =head2 Freeze/thaw Interface
803 =item C<void freeze(visit_info *info)>
805 Used to archive the string.
810 VTABLE void freeze(visit_info *info) {
811 IMAGE_IO * const io = info->image_io;
813 VTABLE_push_string(INTERP, io, VTABLE_get_string(INTERP, SELF));
818 =item C<void thaw(visit_info *info)>
820 Used to unarchive the string.
825 VTABLE void thaw(visit_info *info) {
826 IMAGE_IO * const io = info->image_io;
828 if (info->extra_flags == EXTRA_IS_NULL)
829 SELF.set_string_native(VTABLE_shift_string(INTERP, io));
839 =item C<PMC *lower()>
848 STRING * const s = string_downcase(INTERP,
849 VTABLE_get_string(INTERP, SELF));
856 =item C<void trans(STRING *src, PMC *trams_table)>
858 Translate ascii string C<src> with entries from C<trans_table>.
864 METHOD trans(STRING *src, PMC *table) {
869 const INTVAL len = string_length(interp, src);
874 if (src->charset != Parrot_ascii_charset_ptr)
875 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
876 "Can't translate non-ascii");
878 p = (unsigned char *)src->strstart;
879 /* TODO verify trans table */
881 tr_data = PMC_data_typed(table, INTVAL *); /* XXX */
883 for (i = 0; i < len; ++i, ++p) {
884 const unsigned char ch = (unsigned char)tr_data[*p];
892 =item C<void reverse(STRING *src)>
894 Reverse the ascii STRING C<src> in place.
900 METHOD reverse(STRING *src) {
903 INTVAL len = string_length(interp, src);
908 if (src->charset != Parrot_ascii_charset_ptr)
909 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
910 "Can't reverse non-ascii");
912 p = (unsigned char *)src->strstart;
914 for (i = 0, --len; i < len; ++i, --len) {
915 const unsigned char ch = p[len];
924 =item C<INTEGER is_integer(STRING *src)>
926 Checks if the ascii STRING C<str> is just an integer.
932 METHOD is_integer(STRING *src) {
935 const INTVAL len = string_length(interp, src);
940 if (src->charset != Parrot_ascii_charset_ptr)
941 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
942 "Can't is_integer non-ascii");
945 p = (unsigned char *)src->strstart;
947 if (p[i] == '-' || p[i] == '+' ||
948 (p[i] >= '0' && p[i] <= '9')) {
949 for (i = 1; i < len; ++i)
950 if (p[i] < '0' || p[i] > '9')
959 VTABLE PMC *share_ro() {
960 PMC * const ret = SUPER();
962 /* prevent wrong garbage collection */
963 PObj_is_shared_SET(PMC_str_val(SELF));
969 =item C<INTEGER reverse_index(STRING *substring, INTVAL start)>
971 Find last occurence of C<substring>, but not after the C<start> position.
977 METHOD reverse_index(STRING *substring, INTVAL start) {
978 STRING * const src = VTABLE_get_string(INTERP, SELF);
985 len = string_length(interp, src);
989 if (start >= (INTVAL)len)
992 if (!string_length(interp, substring))
995 res = CHARSET_RINDEX(INTERP, src, substring, start);
1011 * c-file-style: "parrot"
1013 * vim: expandtab shiftwidth=4: