8 string.ops - String Opcodes
12 Operations that work on strings, whether constructing, modifying
15 When making changes to any ops file, run C<make bootstrap-ops> to regenerate
16 all generated ops files.
23 =item B<ord>(out INT, in STR)
25 The codepoint in the current character set of the first character of string $2
26 is returned in integer $1.
27 If $2 is empty, an exception is thrown.
29 =item B<ord>(out INT, in STR, in INT)
31 The codepoint in the current character set of the character at integer index $3
32 of string $2 is returned in integer $1.
33 If $2 is empty, an exception is thrown.
34 If $3 is greater than the length of $2, an exception is thrown.
35 If $3 is less then zero but greater than the negative of the length of $2,
36 counts backwards through $2, such that -1 is the last character,
37 -2 is the second-to-last character, and so on.
38 If $3 is less than the negative of the length of $2, an exception is thrown.
42 inline op ord(out INT, in STR) :base_core {
43 $1 = string_ord(interp, $2, 0);
46 inline op ord(out INT, in STR, in INT) :base_core {
47 $1 = string_ord(interp, $2, $3);
51 =item B<chr>(out STR, in INT)
53 The character specified by codepoint integer $2 is returned in string $1.
57 inline op chr(out STR, in INT) :base_core {
58 STRING * const s = string_chr(interp, (UINTVAL)$2);
63 =item B<chopn>(out STR, in STR, in INT)
65 Remove n characters specified by integer $3 from the tail of string $2,
66 and returns the characters not chopped in string $1.
67 If $3 is negative, cut the string after -$3 characters.
71 inline op chopn(out STR, in STR, in INT) :base_core {
72 $1 = Parrot_str_chopn(interp, $2, $3);
76 =item B<concat>(inout STR, in STR)
78 =item B<concat>(invar PMC, in STR)
80 =item B<concat>(invar PMC, invar PMC)
82 Modify string $1 in place, appending string $2.
84 =item B<concat>(out STR, in STR, in STR)
86 =item B<concat>(invar PMC, invar PMC, in STR)
88 =item B<concat>(invar PMC, invar PMC, invar PMC)
90 Append string $3 to string $2 and place the result into string $1.
94 inline op concat(inout STR, in STR) :base_mem :deprecated {
95 $1 = Parrot_str_concat(interp, $1, $2);
98 inline op concat(invar PMC, invar PMC) :base_core {
99 VTABLE_i_concatenate(interp, $1, $2);
102 inline op concat(invar PMC, in STR) :base_core {
103 VTABLE_i_concatenate_str(interp, $1, $2);
106 inline op concat(out STR, in STR, in STR) :base_mem {
107 $1 = Parrot_str_concat(interp, $2, $3);
110 inline op concat(invar PMC, invar PMC, in STR) :base_core {
111 $1 = VTABLE_concatenate_str(interp, $2, $3, $1);
114 inline op concat(invar PMC, invar PMC, invar PMC) :base_core {
115 $1 = VTABLE_concatenate(interp, $2, $3, $1);
118 =item B<repeat>(out STR, in STR, in INT)
120 =item B<repeat>(invar PMC, invar PMC, in INT)
122 =item B<repeat>(invar PMC, invar PMC, invar PMC)
124 Repeat string $2 integer $3 times and return result in string $1.
125 The C<PMC> versions are MMD operations.
129 inline op repeat(out STR, in STR, in INT) :base_mem {
131 opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
132 EXCEPTION_NEG_REPEAT,
133 "Cannot repeat with negative arg");
134 goto ADDRESS(handler);
136 $1 = Parrot_str_repeat(interp, $2, (UINTVAL)$3);
139 inline op repeat(invar PMC, invar PMC, in INT) :base_core {
141 opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
142 EXCEPTION_NEG_REPEAT,
143 "Cannot repeat with negative arg");
144 goto ADDRESS(handler);
146 $1 = VTABLE_repeat_int(interp, $2, $3, $1);
149 inline op repeat(invar PMC, invar PMC, invar PMC) :base_core {
150 if (VTABLE_get_integer(interp, $3) < 0) {
151 opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
152 EXCEPTION_NEG_REPEAT,
153 "Cannot repeat with negative arg");
154 goto ADDRESS(handler);
156 $1 = VTABLE_repeat(interp, $2, $3, $1);
159 =item B<repeat>(invar PMC, in INT)
161 =item B<repeat>(invar PMC, invar PMC)
163 Repeat string $1 number $2 times and return result in string $1.
164 The C<PMC> versions are MMD operations.
168 inline op repeat(invar PMC, in INT) :base_core {
169 VTABLE_i_repeat_int(interp, $1, $2);
172 inline op repeat(invar PMC, invar PMC) :base_core {
173 VTABLE_i_repeat(interp, $1, $2);
177 =item B<length>(out INT, in STR)
179 Calculate the length (in characters) of string $2 and return as integer $1.
180 If $2 is NULL or zero length, zero is returned.
182 =item B<bytelength>(out INT, in STR)
184 Calculate the length (in bytes) of string $2 and return as integer $1.
185 If $2 is NULL or zero length, zero is returned.
189 inline op length(out INT, in STR) :base_mem {
190 $1 = Parrot_str_length(interp, $2);
193 inline op bytelength(out INT, in STR) :base_mem {
194 $1 = Parrot_str_byte_length(interp, $2);
198 =item B<pin>(inout STR)
200 Make the memory in string $1 immobile. This memory will I<not> be moved
201 by the Garbage Collector, and may be safely passed to external libraries.
202 (Well, as long as they don't free it) Pinning a string will move the contents.
204 $1 should be unpinned if it is used after pinning is no longer necessary.
208 op pin(inout STR) :base_mem {
209 Parrot_str_pin(interp, $1);
213 =item B<unpin>(inout STR)
215 Make the memory in string $1 movable again.
216 This will make the memory in $1 move.
220 op unpin(inout STR) :base_mem {
221 Parrot_str_unpin(interp, $1);
225 =item B<substr>(out STR, in STR, in INT)
227 =item B<substr>(out STR, in STR, in INT, in INT)
229 =item B<substr>(out STR, invar PMC, in INT, in INT)
231 Set $1 to the portion of $2 starting at (zero-based) character position
232 $3 and having length $4. If no length ($4) is provided, it is equivalent to
233 passing in the length of $2.
235 =item B<replace>(out STR, in STR, in INT, in INT, in STR)
237 Replace part of $2 starting from $3 of length $4 with $5. If the length of $5 is
238 different from the length specified in $4, then $2 will grow or shrink
239 accordingly. If $3 is one character position larger than the length of
240 $2, then $5 is appended to $2 (and the empty string is returned);
241 this is essentially the same as
245 Finally, if $3 is negative, then it is taken to count backwards from
246 the end of the string (ie an offset of -1 corresponds to the last
249 New $1 string returned.
253 inline op substr(out STR, in STR, in INT) :base_core {
254 const INTVAL len = Parrot_str_byte_length(interp, $2);
255 $1 = Parrot_str_substr(interp, $2, $3, len);
258 inline op substr(out STR, in STR, in INT, in INT) :base_core {
259 $1 = Parrot_str_substr(interp, $2, $3, $4);
262 inline op substr(out STR, invar PMC, in INT, in INT) :base_core {
263 $1 = VTABLE_substr_str(interp, $2, $3, $4);
266 inline op replace(out STR, in STR, in INT, in INT, in STR) :base_core {
267 $1 = Parrot_str_replace(interp, $2, $3, $4, $5);
271 =item B<index>(out INT, in STR, in STR)
273 =item B<index>(out INT, in STR, in STR, in INT)
275 The B<index> function searches for a substring within target string, but
276 without the wildcard-like behavior of a full regular-expression pattern match.
277 It returns the position of the first occurrence of substring $3
278 in target string $2 at or after zero-based position $4.
279 If $4 is omitted, B<index> starts searching from the beginning of the string.
280 The return value is based at "0".
281 If the string is null, or the substring is not found or is null,
282 B<index> returns "-1".
286 inline op index(out INT, in STR, in STR) :base_core {
287 $1 = ($2 && $3) ? Parrot_str_find_index(interp, $2, $3, 0) : -1;
290 inline op index(out INT, in STR, in STR, in INT) :base_core {
291 $1 = ($2 && $3) ? Parrot_str_find_index(interp, $2, $3, $4) : -1;
295 =item B<sprintf>(out STR, in STR, invar PMC)
297 =item B<sprintf>(out PMC, invar PMC, invar PMC)
299 Sets $1 to the result of calling C<Parrot_psprintf> with the
300 given format ($2) and arguments ($3, which should be an ordered
303 The result is quite similar to using the system C<sprintf>, but is
304 protected against buffer overflows and the like. There are some
305 differences, especially concerning sizes (which are largely ignored);
306 see F<misc.c> for details.
310 inline op sprintf(out STR, in STR, invar PMC) :base_core {
311 $1=Parrot_psprintf(interp, $2, $3);
314 inline op sprintf(out PMC, invar PMC, invar PMC) :base_core {
315 VTABLE_set_string_native(interp, $1,
316 Parrot_psprintf(interp, VTABLE_get_string(interp, $2), $3));
320 =item B<new>(out STR)
322 =item B<new>(out STR, in INT)
324 Allocate a new empty string of length $2 (optional).
326 XXX: Do these ops make sense with immutable strings?
330 inline op new(out STR) :base_mem {
331 $1 = Parrot_str_new_noinit(interp, 0);
334 inline op new(out STR, in INT) :base_mem {
335 $1 = Parrot_str_new_noinit(interp, $2);
339 =item B<stringinfo>(out INT, in STR, in INT)
341 Extract some information about string $2 and store it in $1.
342 If a null string is passed, $1 is always set to 0.
343 If an invalid $3 is passed, an exception is thrown.
344 Possible values for $3 are:
348 =item 1 The location of the string buffer header.
350 =item 2 The location of the start of the string.
352 =item 3 The length of the string buffer (in bytes).
354 =item 4 The flags attached to the string (if any).
356 =item 5 The amount of the string buffer used (in bytes).
358 =item 6 The length of the string (in characters).
364 inline op stringinfo(out INT, in STR, in INT) :base_core {
369 case STRINGINFO_HEADER:
370 $1 = PTR2UINTVAL($2);
372 case STRINGINFO_STRSTART:
373 $1 = PTR2UINTVAL($2->strstart);
375 case STRINGINFO_BUFLEN:
376 $1 = Buffer_buflen($2);
378 case STRINGINFO_FLAGS:
379 $1 = PObj_get_FLAGS($2);
381 case STRINGINFO_BUFUSED:
384 case STRINGINFO_STRLEN:
389 opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
390 EXCEPTION_INVALID_OPERATION,
391 "stringinfo: unknown info type: %d", $3);
392 goto ADDRESS(handler);
399 =item B<upcase>(out STR, in STR)
401 Uppercase $2 and put the result in $1
405 inline op upcase(out STR, in STR) :base_core {
406 $1 = Parrot_str_upcase(interp, $2);
409 =item B<downcase>(out STR, in STR)
411 Downcase $2 and put the result in $1
415 inline op downcase(out STR, in STR) :base_core {
416 $1 = Parrot_str_downcase(interp, $2);
419 =item B<titlecase>(out STR, in STR)
421 Titlecase $2 and put the result in $1
425 inline op titlecase(out STR, in STR) :base_core {
426 $1 = Parrot_str_titlecase(interp, $2);
430 =item B<join>(out STR, in STR, invar PMC)
432 Create a new string $1 by joining array elements from array $3
435 =item B<split>(out PMC, in STR, in STR)
437 Create a new Array PMC $1 by splitting the string $3 into pieces
438 delimited by the string $2. If $2 does not appear in $3, then return $3
439 as the sole element of the Array PMC. Will return empty strings for
440 delimiters at the beginning and end of $3
442 Note: the string $2 is just a string. If you want a perl-ish split
443 on regular expression, use C<PGE::Util>'s split from the standard library.
447 op join(out STR, in STR, invar PMC) :base_core {
448 $1 = Parrot_str_join(interp, $2, $3);
451 op split(out PMC, in STR, in STR) :base_core {
452 $1 = Parrot_str_split(interp, $2, $3);
456 =item B<charset>(out INT, in STR)
458 Return the charset number $1 of string $2.
460 =item B<charsetname>(out STR, in INT)
462 Return the name $1 of charset number $2.
463 If charset number $2 is not found, name $1 is set to null.
465 =item B<find_charset>(out INT, in STR)
467 Return the charset number of the charset named $2. If the charset doesn't
468 exist, throw an exception.
470 =item B<trans_charset>(out STR, in STR, in INT)
472 Create a string $1 from $2 with the specified charset.
474 Both functions may throw an exception on information loss.
478 op charset(out INT, in STR) :base_core {
479 $1 = Parrot_encoding_number_of_str(interp, $2);
482 op charsetname(out STR, in INT) :base_core {
483 $1 = Parrot_encoding_name(interp, $2);
486 op find_charset(out INT, in STR) :base_core {
487 const INTVAL n = Parrot_encoding_number(interp, $2);
489 opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
490 EXCEPTION_INVALID_CHARTYPE,
491 "charset '%Ss' not found", $2);
492 goto ADDRESS(handler);
497 op trans_charset(out STR, in STR, in INT) {
498 $1 = Parrot_str_change_charset(interp, $2, $3);
502 =item B<encoding>(out INT, in STR)
504 Return the encoding number $1 of string $2.
506 =item B<encodingname>(out STR, in INT)
508 Return the name $1 of encoding number $2.
509 If encoding number $2 is not found, name $1 is set to null.
511 =item B<find_encoding>(out INT, in STR)
513 Return the encoding number of the encoding named $2. If the encoding doesn't
514 exist, throw an exception.
516 =item B<trans_encoding>(out STR, in STR, in INT)
518 Create a string $1 from $2 with the specified encoding.
520 Both functions may throw an exception on information loss.
524 op encoding(out INT, in STR) :base_core {
525 $1 = Parrot_encoding_number_of_str(interp, $2);
528 op encodingname(out STR, in INT) :base_core {
529 $1 = Parrot_encoding_name(interp, $2);
532 op find_encoding(out INT, in STR) :base_core {
533 const INTVAL n = Parrot_encoding_number(interp, $2);
535 opcode_t *handler = Parrot_ex_throw_from_op_args(interp, NULL,
536 EXCEPTION_INVALID_ENCODING,
537 "encoding '%Ss' not found", $2);
538 goto ADDRESS(handler);
543 op trans_encoding(out STR, in STR, in INT) {
544 $1 = Parrot_str_change_encoding(interp, $2, $3);
548 =item B<is_cclass>(out INT, in INT, in STR, in INT)
550 Set $1 to 1 if the codepoint of $3 at position $4 is in
551 the character class(es) given by $2.
555 inline op is_cclass(out INT, in INT, in STR, in INT) {
556 $1 = Parrot_str_is_cclass(interp, $2, $3, $4);
560 =item B<find_cclass>(out INT, in INT, in STR, in INT, in INT)
562 Set $1 to the offset of the first codepoint matching
563 the character class(es) given by $2 in string $3, starting
564 at offset $4 for up to $5 codepoints. If no matching
565 character is found, set $1 to (offset + count).
569 inline op find_cclass(out INT, in INT, in STR, in INT, in INT) {
570 $1 = Parrot_str_find_cclass(interp, $2, $3, $4, $5);
574 =item B<find_not_cclass>(out INT, in INT, in STR, in INT, in INT)
576 Set $1 to the offset of the first codepoint not matching
577 the character class(es) given by $2 in string $3, starting
578 at offset $4 for up to $5 codepoints. If the substring
579 consists entirely of matching characters, set $1 to (offset + count).
583 inline op find_not_cclass(out INT, in INT, in STR, in INT, in INT) {
584 $1 = Parrot_str_find_not_cclass(interp, $2, $3, $4, $5);
588 =item B<escape>(out STR, invar STR)
590 Escape all non-ascii chars to backslashed escape sequences. A
591 string with charset I<ascii> is created as result.
593 =item B<compose>(out STR, in STR)
595 Compose (normalize) a string.
599 op escape(out STR, invar STR) {
600 $1 = Parrot_str_escape(interp, $2);
603 op compose(out STR, in STR) {
604 $1 = Parrot_str_compose(interp, $2);
612 Copyright (C) 2001-2010, Parrot Foundation.
616 This program is free software. It is subject to the same license
617 as the Parrot interpreter itself.
623 * c-file-style: "parrot"
625 * vim: expandtab shiftwidth=4: