1 From: Tony Balinski <ajbj@free.fr>
2 Subject: Extending the split() macro built-in
4 This patch extends split()'s functionality. It allows limited splitting,
5 where only a certain number of elements should be retrieved, and also
6 allows the dropping of the last element found if it is empty.
8 The limited count avoids the perhaps unnecessary overhead of generating a
9 large array if only the first few elements are to be used. The dropping of
10 the empty last element allows simple reconstruction after simple splits,
11 very useful for lines. For example, given:
13 a = get_range(0, $text_length)
14 lines = split(a, "\n", "lastnotnull")
16 for (i = 0; i < lines[]; i++)
19 assuming that all lines are '\n' terminated, b == a at the end. Otherwise
20 we have to resort to something like:
22 a = get_range(0, $text_length)
23 lines = split(a, "\n")
26 for (i = 0; i < lines[]; i++) {
31 to make a == b at the end. which is trickier (albeit more general).
35 source/macro.c | 102 ++++++++++++++++++++++++++++++++++++++++++++-------------
36 1 file changed, 80 insertions(+), 22 deletions(-)
38 diff --quilt old/source/macro.c new/source/macro.c
39 --- old/source/macro.c
40 +++ new/source/macro.c
41 @@ -3883,18 +3883,43 @@ static int stringCompareMS(WindowInfo *w
42 result->val.n = compareResult;
47 -** This function is intended to split strings into an array of substrings
48 -** Importatnt note: It should always return at least one entry with key 0
49 -** split("", ",") result[0] = ""
50 -** split("1,2", ",") result[0] = "1" result[1] = "2"
51 -** split("1,2,", ",") result[0] = "1" result[1] = "2" result[2] = ""
53 -** This behavior is specifically important when used to break up
55 +** This function is intended to split strings into an array of substrings.
57 +** array = split(string, separator[, searchType][, count][, "lastnotnull"])
59 +** Mandatory arguments:
60 +** string: string to split,
61 +** string: separator string or pattern marking where to split
62 +** Optional arguments:
63 +** searchType: separator search type (default is "literal") to use to find
64 +** occurrences of separator in string.
65 +** count: maximum number of pieces in the returned array (default is
66 +** infinite, must be greater than zero); if smaller than or equal to
67 +** the number of separators found in string, the last piece will
68 +** contain the remainder of the string to split (a count of 1 produces
69 +** a single result in the returned array, equal to the original
71 +** keyword "lastnotnull": if present, this stops an empty string being
72 +** returned in the last entry of the array if the string to split ends
73 +** with the separator. This has the effect of returning an empty array
74 +** if the string to split is originally empty. Otherwise, the returned
75 +** array will always contain at least one element.
77 +** Important note: It should always return at least one entry with key 0
78 +** unless "lastnotnull" is present.
81 +** split("", ",") result[0] = ""
82 +** split(",", ",") result[0] = "" result[1] = ""
83 +** split("1,2", ",") result[0] = "1" result[1] = "2"
84 +** split("1,2,", ",") result[0] = "1" result[1] = "2" result[2] = ""
86 +** This behavior is specifically important when used to break up
87 +** array sub-scripts (unless "lastnotnull" is present)
90 static int splitMS(WindowInfo *window, DataValue *argList, int nArgs,
91 DataValue *result, char **errMsg)
93 @@ -3903,12 +3928,17 @@ static int splitMS(WindowInfo *window, D
94 int searchType, beginPos, foundStart, foundEnd, strLength, lastEnd;
95 int found, elementEnd, indexNum;
96 char indexStr[TYPE_INT_STR_SIZE(int)], *allocIndexStr;
101 + int haveSearchType = False;
102 + int haveCount = False;
104 + int lastnotnull = False;
105 + int haveLastnotnull = False;
107 + if (nArgs < 2 || nArgs > 4) {
108 return(wrongNArgsErr(errMsg));
110 if (!readStringArg(argList[0], &sourceStr, stringStorage[0], errMsg)) {
111 *errMsg = "first argument must be a string: %s";
113 @@ -3923,20 +3953,44 @@ static int splitMS(WindowInfo *window, D
115 if (splitStr == NULL) {
116 *errMsg = "second argument must be a non-empty string: %s";
119 - if (nArgs > 2 && readStringArg(argList[2], &typeSplitStr, stringStorage[2], errMsg)) {
120 - if (!StringToSearchType(typeSplitStr, &searchType)) {
122 + /* get the search type and maximum element count */
123 + searchType = SEARCH_LITERAL;
124 + for (indexNum = 2; indexNum < nArgs; indexNum++) {
125 + if (!readStringArg(argList[indexNum], &typeSplitStr,
126 + stringStorage[indexNum], errMsg)) {
127 + *errMsg = "non-scalar arguments not allowed: %s";
130 + if (strcmp(typeSplitStr, "lastnotnull") == 0) {
131 + lastnotnull = True;
132 + if (haveLastnotnull) {
133 + *errMsg = "\"lastnotnull\" specified more than once: %s";
136 + } else if (StringToSearchType(typeSplitStr, &searchType)) {
137 + if (haveSearchType) {
138 + *errMsg = "split search type supplied more than once: %s";
141 + haveSearchType = True;
142 + } else if (!haveCount &&
143 + readIntArg(argList[indexNum], &count, errMsg)) {
146 + *errMsg = "split maximum count must be greater than 0: %s";
150 *errMsg = "unrecognized argument to %s";
155 - searchType = SEARCH_LITERAL;
159 result->tag = ARRAY_TAG;
160 result->val.arrayPtr = ArrayNew();
164 @@ -3949,13 +4003,17 @@ static int splitMS(WindowInfo *window, D
165 if (!allocIndexStr) {
166 *errMsg = "array element failed to allocate key: %s";
169 strcpy(allocIndexStr, indexStr);
170 - found = SearchString(sourceStr, splitStr, SEARCH_FORWARD, searchType,
171 - False, beginPos, &foundStart, &foundEnd,
172 - NULL, NULL, GetWindowDelimiters(window));
173 + if (haveCount && --count == 0) {
176 + found = SearchString(sourceStr, splitStr, SEARCH_FORWARD,
177 + searchType, False, beginPos, &foundStart, &foundEnd,
178 + NULL, NULL, GetWindowDelimiters(window));
180 elementEnd = found ? foundStart : strLength;
181 elementLen = elementEnd - lastEnd;
182 element.tag = STRING_TAG;
183 if (!AllocNStringNCpy(&element.val.str, &sourceStr[lastEnd], elementLen)) {
184 *errMsg = "failed to allocate element value: %s";
185 @@ -3985,12 +4043,12 @@ static int splitMS(WindowInfo *window, D
186 *errMsg = "array element failed to allocate key: %s";
189 strcpy(allocIndexStr, indexStr);
190 element.tag = STRING_TAG;
191 - if (lastEnd == strLength) {
192 - /* The pattern mathed the end of the string. Add an empty chunk. */
193 + if (lastEnd == strLength && !lastnotnull) {
194 + /* The pattern matched the end of the string. Add an empty chunk. */
195 element.val.str.rep = PERM_ALLOC_STR("");
196 element.val.str.len = 0;
198 if (!ArrayInsert(result, allocIndexStr, &element)) {
199 M_ARRAY_INSERT_FAILURE();
200 @@ -4017,11 +4075,11 @@ static int splitMS(WindowInfo *window, D
201 verify whether the pattern also matches the end of the string,
202 and add an empty chunk in case it does. */
203 found = SearchString(sourceStr, splitStr, SEARCH_FORWARD,
204 searchType, False, strLength, &foundStart, &foundEnd,
205 NULL, NULL, GetWindowDelimiters(window));
207 + if (found && !lastnotnull) {
209 sprintf(indexStr, "%d", indexNum);
210 allocIndexStr = AllocString(strlen(indexStr) + 1);
211 if (!allocIndexStr) {
212 *errMsg = "array element failed to allocate key: %s";