1 # Copyright (C) 2006-2008, The Perl Foundation.
6 runtime/wmlsstring.pir - WMLScript String library
10 This library contains a set of string functions. A string is an array of
11 characters. Each of the characters has an index. The first character in a
12 string has an index zero (0). The length of the string is the number of
13 characters in the array.
15 The user of the String library can specify a special separator by which
16 elements in a string can be separated. These elements can be accessed
17 by specifying the separator and the element index. The first element in a
18 string has an index zero (0). Each occurrence of the separator in the string
19 separates two elements (no escaping of separators is allowed).
21 A White space character is one of the following characters:
25 =item * TAB : Horizontal Tabulation
27 =item * VT : Vertical Tabulation
29 =item * FF : Form Feed
33 =item * LF : Line Feed
35 =item * CR : Carriage Return
39 See "WMLScript Standard Libraries Specification", section 9 "String".
46 .HLL 'WMLScript', 'wmls_group'
52 .const .Sub _string_length = '_string_length'
53 $P0[0] = _string_length
54 .const .Sub _string_isEmpty = '_string_isEmpty'
55 $P0[1] = _string_isEmpty
56 .const .Sub _string_charAt = '_string_charAt'
57 $P0[2] = _string_charAt
58 .const .Sub _string_subString = '_string_subString'
59 $P0[3] = _string_subString
60 .const .Sub _string_find = '_string_find'
62 .const .Sub _string_replace = '_string_replace'
63 $P0[5] = _string_replace
64 .const .Sub _string_elements = '_string_elements'
65 $P0[6] = _string_elements
66 .const .Sub _string_elementAt = '_string_elementAt'
67 $P0[7] = _string_elementAt
68 .const .Sub _string_removeAt = '_string_removeAt'
69 $P0[8] = _string_removeAt
70 .const .Sub _string_replaceAt = '_string_replaceAt'
71 $P0[9] = _string_replaceAt
72 .const .Sub _string_insertAt = '_string_insertAt'
73 $P0[10] = _string_insertAt
74 .const .Sub _string_squeeze = '_string_squeeze'
75 $P0[11] = _string_squeeze
76 .const .Sub _string_trim = '_string_trim'
77 $P0[12] = _string_trim
78 .const .Sub _string_compare = '_string_compare'
79 $P0[13] = _string_compare
80 .const .Sub _string_toString = '_string_toString'
81 $P0[14] = _string_toString
82 .const .Sub _string_format = '_string_format'
83 $P0[15] = _string_format
89 =head2 C<length(string)>
93 Returns the length (number of characters) of the given string.
105 .sub '_string_length' :anon
108 $I0 = isa str, 'WmlsInvalid'
112 new res, 'WmlsInteger'
116 new res, 'WmlsInvalid'
121 =head2 C<isEmpty(string)>
125 Returns a boolean true if the string length is zero and boolean false
138 .sub '_string_isEmpty' :anon
141 $I0 = isa str, 'WmlsInvalid'
145 new res, 'WmlsBoolean'
152 new res, 'WmlsInvalid'
157 =head2 C<charAt(string, index)>
161 Returns a new string of length one containing the character at the specified
162 index of the given string.
164 If the index is of type floating-point, Float.int() is first used to calculate the
165 actual integer index.
171 index = Number (the index of the character to be returned)
179 If index is out of range then an empty string (C<"">) is returned.
183 .sub '_string_charAt' :anon
187 $I0 = isa str, 'WmlsInvalid'
189 $I0 = isa index_, 'WmlsInteger'
191 $I0 = isa index_, 'WmlsFloat'
198 new res, 'WmlsString'
200 if $I2 >= $I1 goto L3
201 $S0 = substr $S1, $I2, 1
206 new res, 'WmlsInvalid'
211 =head2 C<subString(string, startIndex, length)>
215 Returns a new string that is a substring of the given string. The substring
216 begins at the specified startIndex and its length (number of characters) is
217 the given length. If the startIndex is less than 0 then 0 is used for the
218 startIndex. If the length is larger than the remaining number of characters in
219 the string, the length is replaced with the number of remaining characters.
221 If the startIndex or the length is of type floating-point, Float.int() is first used to
222 calculate the actual integer value.
228 startIndex = Number (the beginning index, inclusive)
230 length = Number (the length of the substring)
238 If startIndex is larger than the last index an empty string (C<"">) is returned.
240 If length <= 0 an empty string (C<"">) is returned.
244 .sub '_string_subString' :anon
246 .param pmc startIndex
249 $I0 = isa String, 'WmlsInvalid'
251 $I0 = isa startIndex, 'WmlsInteger'
253 $I0 = isa startIndex, 'WmlsFloat'
257 $I0 = isa Length, 'WmlsInteger'
259 $I0 = isa Length, 'WmlsFloat'
269 new res, 'WmlsString'
270 if $I2 >= $I1 goto L5
273 $S0 = substr $S1, $I2, $I3
278 new res, 'WmlsInvalid'
283 =head2 C<find(string, subString)>
287 Returns the index of the first character in the string that matches the
288 requested subString. If no match is found integer value -1 is returned.
290 Two strings are defined to match when they are identical. Characters with
291 multiple possible representations match only if they have the same
292 representation in both strings. No case folding is performed.
306 If subString is an empty string (C<"">), an invalid value is returned.
310 .sub '_string_find' :anon
314 $I0 = isa String, 'WmlsInvalid'
316 $I0 = isa subString, 'WmlsInvalid'
323 new res, 'WmlsInteger'
327 new res, 'WmlsInvalid'
332 =head2 C<replace(string, oldSubString, newSubString)>
336 Returns a new string resulting from replacing all occurrences of
337 oldSubString in this string with newSubString.
339 Two strings are defined to match when they are identical. Characters with
340 multiple possible representations match only if they have the same
341 representation in both strings. No case folding is performed.
347 oldSubString = String
349 newSubString = String
357 If oldSubString is an empty string an C<invalid> value is returned.
361 .sub '_string_replace' :anon
363 .param pmc oldSubString
364 .param pmc newSubString
366 $I0 = isa String, 'WmlsInvalid'
368 $I0 = isa oldSubString, 'WmlsInvalid'
370 $I0 = isa newSubString, 'WmlsInvalid'
379 new res, 'WmlsString'
383 new res, 'WmlsInvalid'
388 =head2 C<elements(string, separator)>
392 Returns the number of elements in the given string separated by the given
393 separator. Empty string ("") is a valid element (thus, this function can never
394 return a value that is less or equal to zero).
400 separator = String (the first character of the string used as separator)
408 Returns C<invalid> if the separator is an empty string.
412 .sub '_string_elements' :anon
416 $I0 = isa str, 'WmlsInvalid'
418 $I0 = isa separator, 'WmlsInvalid'
424 $S2 = substr $S2, 0, 1
425 new res, 'WmlsInteger'
436 new res, 'WmlsInvalid'
441 =head2 C<elementAt(string, index, separator)>
445 Search string for index'th element, elements being separated by separator
446 and return the corresponding element. If the index is less than 0 then the first
447 element is returned. If the index is larger than the number of elements then
448 the last element is returned. If the string is an empty string then an empty
451 If the index is of type floating-point, Float.int() is first used to calculate the
458 index = Number (the index of the element to be returned)
460 separator = String (the first character of the string used as separator)
468 Returns C<invalid> if the separator is an empty string.
472 .sub '_string_elementAt' :anon
477 $I0 = isa str, 'WmlsInvalid'
479 $I0 = isa index_, 'WmlsInteger'
481 $I0 = isa index_, 'WmlsFloat'
485 $I0 = isa separator, 'WmlsInvalid'
495 $S3 = substr $S3, 0, 1
496 new res, 'WmlsString'
510 new res, 'WmlsInvalid'
515 =head2 C<removeAt(string, index, separator)>
519 Returns a new string where the element and the corresponding separator (if
520 existing) with the given index are removed from the given string. If the index
521 is less than 0 then the first element is removed. If the index is larger than the
522 number of elements then the last element is removed. If the string is empty,
523 the function returns a new empty string.
525 If the index is of type floating-point, Float.int() is first used to calculate the
532 index = Number (the index of the element to be deleted)
534 separator = String (the first character of the string used as separator)
542 Returns C<invalid> if the separator is an empty string.
546 .sub '_string_removeAt' :anon
551 $I0 = isa str, 'WmlsInvalid'
553 $I0 = isa index_, 'WmlsInteger'
555 $I0 = isa index_, 'WmlsFloat'
559 $I0 = isa separator, 'WmlsInvalid'
569 $S3 = substr $S3, 0, 1
570 new res, 'WmlsString'
586 unless $I1 < $I4 goto L7
589 if $I0 == $I2 goto L8
599 new res, 'WmlsInvalid'
604 =head2 C<replaceAt(string, element, index, separator)>
608 Returns a string with the current element at the specified index replaced with
609 the given element. If the index is less than 0 then the first element is
610 replaced. If the index is larger than the number of elements then the last
611 element is replaced. If the string is empty, the function returns a new string
612 with the given element.
614 If the index is of type floating-point, Float.int() is first used to calculate the
623 index = Number (the index of the element to be replaced)
625 separator = String (the first character of the string used as separator)
633 Returns C<invalid> if the separator is an empty string.
637 .sub '_string_replaceAt' :anon
643 $I0 = isa str, 'WmlsInvalid'
645 $I0 = isa element, 'WmlsInvalid'
647 $I0 = isa index_, 'WmlsInteger'
649 $I0 = isa index_, 'WmlsFloat'
653 $I0 = isa separator, 'WmlsInvalid'
664 $S4 = substr $S4, 0, 1
665 new res, 'WmlsString'
681 new res, 'WmlsInvalid'
686 =head2 C<insertAt(string, element, index, separator)>
690 Returns a string with the element and the corresponding separator (if
691 needed) inserted at the specified element index of the original string. If the
692 index is less than 0 then 0 is used as the index. If the index is larger than the
693 number of elements then the element is appended at the end of the string. If
694 the string is empty, the function returns a new string with the given element.
696 If the index is of type floating-point, Float.int() is first used to calculate the
701 string = String (original string)
703 element = String (element to be inserted)
705 index = Number (the index of the element to be added)
707 separator = String (the first character of the string used as separator)
715 Returns C<invalid> if the separator is an empty string.
719 .sub '_string_insertAt' :anon
725 $I0 = isa str, 'WmlsInvalid'
727 $I0 = isa element, 'WmlsInvalid'
729 $I0 = isa index_, 'WmlsInteger'
731 $I0 = isa index_, 'WmlsFloat'
735 $I0 = isa separator, 'WmlsInvalid'
746 $S4 = substr $S4, 0, 1
747 new res, 'WmlsString'
755 if $I3 <= $I5 goto L5
764 unless $I0 < $I5 goto L7
765 if $I1 != $I3 goto L8
779 new res, 'WmlsInvalid'
784 =head2 C<squeeze(string)>
788 Returns a string where all consecutive series of white spaces within the
789 string are reduced to single inter-word space.
801 .include 'cclass.pasm'
811 unless idx < $I1 goto L2
812 $I0 = is_cclass .CCLASS_WHITESPACE, s, idx
814 $S0 = substr s, idx, 1
822 $I0 = is_cclass .CCLASS_WHITESPACE, s, idx
829 .sub '_string_squeeze' :anon
832 $I0 = isa str, 'WmlsInvalid'
834 new res, 'WmlsString'
840 new res, 'WmlsInvalid'
845 =head2 C<trim(string)>
849 Returns a string where all trailing and leading white spaces in the given
850 string have been trimmed.
867 $I0 = is_cclass .CCLASS_WHITESPACE, s, $I1
875 $I0 = is_cclass .CCLASS_WHITESPACE, s, $I2
879 res = substr s, $I1, $I2
883 .sub '_string_trim' :anon
886 $I0 = isa str, 'WmlsInvalid'
888 new res, 'WmlsString'
894 new res, 'WmlsInvalid'
899 =head2 C<compare(string1, string2)>
903 The return value indicates the lexicographic relation of string1 to string2. The
904 relation is based on the relation of the character codes in the native
905 character set. The return value is -1 if string1 is less than string2, 0 if string1
906 is identical to string2 or 1 if string1 is greater than string2.
920 .sub '_string_compare' :anon
924 $I0 = isa string1, 'WmlsInvalid'
926 $I0 = isa string2, 'WmlsInvalid'
930 new res, 'WmlsInteger'
931 if $S1 >= $S2 goto L2
942 new res, 'WmlsInvalid'
947 =head2 C<toString(value)>
951 Returns a string representation of the given value. This function performs
952 exactly the same conversions as supported by the [WMLScript] language
953 (automatic conversion from boolean, integer and floating-point values to
954 strings) except that C<invalid> value returns the string C<"invalid">.
966 .sub '_string_toString' :anon
970 new res, 'WmlsString'
976 =head2 C<format(format, value)>
980 Converts the given value to a string by using the given formatting provided as
981 a format string. The format string can contain only one format specifier,
982 which can be located anywhere inside the string. If more than one is
983 specified, only the first one (leftmost) is used and the remaining specifiers
984 are replaced by an empty string. The format specifier has the following form:
986 % [width] [.precision] type
988 The C<width> argument is a nonnegative decimal integer controlling the
989 minimum number of characters printed. If the number of characters in the
990 output value is less than the specified width, blanks are added to the left until
991 the minimum width is reached. The C<width> argument never causes the
992 value to be truncated. If the number of characters in the output value is
993 greater than the specified width or, if width is not given, all characters of the
994 value are printed (subject to the precision argument).
996 The C<precision> argument specifies a nonnegative decimal integer,
997 preceded by a period (.), which can be used to set the precision of the output
998 value. The interpretation of this value depends on the given C<type>:
1004 Specifies the minimum number of digits to be printed. If the number
1005 of digits in the value is less than precision, the output value is
1006 padded on the left with zeroes. The value is not truncated when the
1007 number of digits exceeds precision. Default precision is 1. If
1008 precision is specified as 0 and the value to be converted is 0, the
1009 result is an empty string.
1013 Specifies the number of digits after the decimal point. If a decimal
1014 point appears, at least one digit appears before it. The value is
1015 rounded to the appropriate number of digits. Default precision is 6; if
1016 precision is 0 or if the period (.) appears without a number following
1017 it, no decimal point is printed.
1021 Specifies the maximum number of characters to be printed. By
1022 default, all characters are printed.
1026 Unlike the C<width> argument, the C<precision> argument can cause either
1027 truncation of the output value or rounding of a floating-point value.
1029 The C<type> argument is the only required format argument; it appears after
1030 any optional format fields. The type character determines whether the given
1031 value is interpreted as integer, floating-point or string. If the value argument
1032 is of a different type than is specified by the type argument, it is converted
1033 according to WMLScript standard automatic conversion rules, with the
1034 addition that if value is of type floating-point and type is B<d>, Float.int() is called
1035 to convert the value. The supported C<type> arguments are:
1041 Integer: The output value has the form [-]dddd, where dddd is one
1042 or more decimal digits.
1046 Floating-point: The output value has the form [-]dddd.dddd, where
1047 dddd is one or more decimal digits. The number of digits before the
1048 decimal point depends on the magnitude of the number and the
1049 number of digits after the decimal point depends on the requested
1050 precision. When the number of digits after the decimal point in the value
1051 is less than the precision, letter 0 should be padded to fill columns
1052 (e.g. the result of String.format("%2.3f", 1.2) will be
1057 String: Characters are printed up to the end of the string or until the
1058 precision value is reached. When the width is larger than
1059 precision, the width should be ignored.
1063 A literal percent character (%) may be included in the format string by
1064 preceding it with another percent character (%%).
1066 MINIMALIST IMPLEMENTATION
1080 Illegal format specifier results in an C<invalid> return value.
1084 .sub '_string_format' :anon
1088 $I0 = isa format, 'WmlsInvalid'
1095 $S1 = sprintf $S0, $P0
1096 new res, 'WmlsString'
1100 new res, 'WmlsInvalid'
1116 # vim: expandtab shiftwidth=4 ft=pir: