ajbj patch round-up
[nedit-bw.git] / FastConcat3.diff
blob2d5580e71b76c0b75a1ca305df423f8c0a72aa79
1 From: Tony Balinski <ajbj@free.fr>
2 Subject: Perform faster macro concatenation
4 Available as a patch:
6 http://sourceforge.net/tracker/index.php?func=detail&aid=971477&group_id=11005&atid=311005
7 [ 971477 ] Perform faster macro concatenation
8 FastConcat.diff 2004-06-11 15:55
10 Previously, concatenation was treated as a binary operator, taking two values
11 from the stack and concatenating them. Where an expression involves more than
12 one concatenation, this involved creating temporary strings, requiring
13 separate allocation, which would only be discarded later, for each application
14 of the binary concatenation. To make matters worse, concatenation would rescan
15 each string to determine its length using strlen().
17 This modification changes the way the concatenation operator is handled.
18 Instead of treating it as a binary operator, it sees it as a list operator,
19 rather like the comma in an argument list. Thus a concatenation expression
20 is treated as a counted sequence of subexpressions. The count is coded into
21 the "bytecode" to tell the concat() function (in interpret.c) how many
22 expression values to pull off the stack. The function then exploits the
23 fact that string DataValue structures now include the strings' lengths to
24 work out the total allocation length required for the whole result. (This
25 is based on the previous implementation of makeArrayKeyFromArgs().)
27 I have also added static functions longAsStr() and lenLongAsStr() to generate
28 a string representation of a number, or just to work out its length. This
29 allows us to avoid calling sprintf() with "%d" all the time. The new
30 AllocStringOfNumber() exploits this, and is used in various places in
31 interpret.c. Note that longAsStr() returns the string value in a static buffer
32 which must be used/copied immediately to avoid a second call overwriting
33 its content.
35 ---
37 source/interpret.c | 228 ++++++++++++++++++++++++++++++++++++++---------------
38 source/interpret.h | 1
39 source/parse.y | 19 +++-
40 3 files changed, 182 insertions(+), 66 deletions(-)
42 diff --quilt old/source/interpret.c new/source/interpret.c
43 --- old/source/interpret.c
44 +++ new/source/interpret.c
45 @@ -83,10 +83,12 @@ static void restoreContext(RestartData *
46 #define OP(name, fn) static int fn(void);
47 #include "ops.h"
48 #undef OP
49 static int returnValOrNone(int valOnStack);
51 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
52 + int leaveParams);
53 static void freeSymbolTable(Symbol *symTab);
54 static int errCheck(const char *s);
55 static int execError(const char *s1, const char *s2);
56 static rbTreeNode *arrayEmptyAllocator(void);
57 static rbTreeNode *arrayAllocateNode(rbTreeNode *src);
58 @@ -841,10 +843,64 @@ Symbol *PromoteToGlobal(Symbol *sym)
60 return sym;
64 +** Convert a long value to its decimal string representation, returned in a
65 +** static string.
66 +*/
67 +const char *longAsStr(long val)
69 + static const char digits[] = "0123456789";
70 + static char res[TYPE_INT_STR_SIZE(val) + 1];
71 + char *pos = &res[TYPE_INT_STR_SIZE(val)];
73 + /* the string is built backwards, so start by null terminating it */
74 + *pos = 0;
76 + if (val >= 0) {
77 + /* do-while loop will deal with the val == 0 case */
78 + do {
79 + /* we can use the modulo (%) operator here */
80 + *--pos = digits[val % 10];
81 + val /= 10;
82 + } while (val != 0);
83 + }
84 + else {
85 + /* we don't use the modulo (%) operator since its behaviour with
86 + negative numbers is undefined by the C standards */
87 + do {
88 + long val10 = val / 10;
89 + *--pos = digits[(10 * val10) - val];
90 + val = val10;
91 + } while (val != 0);
92 + *--pos = '-';
93 + }
94 + return pos;
97 +/*
98 +** Calculate the length of the decimal string representation of a long value.
99 +*/
100 +int lenLongAsStr(long val)
102 + int len = 0;
104 + if (val < 0) {
105 + len++; /* for leading '-' */
108 + /* do-while loop will deal with the val == 0 case */
109 + do {
110 + len++;
111 + val /= 10;
112 + } while (val != 0);
114 + return len;
118 ** Allocate memory for a string, and keep track of it, such that it
119 ** can be recovered later using GarbageCollectStrings. (A linked list
120 ** of pointers is maintained by threading through the memory behind
121 ** the returned pointers). Length does not include the terminating null
122 ** character, so to allocate space for a string of strlen == n, you must
123 @@ -931,10 +987,16 @@ int AllocNStringNCpy(NString *string, co
124 char *AllocStringCpy(const char *s)
126 return AllocStringNCpy(s, s ? strlen(s) : 0);
129 +/* Allocate a string holding the decimal value of the number */
130 +char *AllocStringOfNumber(long val)
132 + return AllocStringCpy(longAsStr(val));
136 * Allocate a new NString buffer, containing a copy of the given string.
137 * The length is set to the length of the string and resulting string is
138 * guaranteed to be \0-terminated.
140 @@ -1170,12 +1232,11 @@ static void freeSymbolTable(Symbol *symT
141 do { \
142 char *__str; \
143 if (dataVal.tag == STRING_TAG) { \
144 __str = dataVal.val.str.rep; \
145 } else if (dataVal.tag == INT_TAG) { \
146 - __str = AllocString(TYPE_INT_STR_SIZE(int)); \
147 - sprintf(__str, "%d", dataVal.val.n); \
148 + __str = AllocStringOfNumber(dataVal.val.n); \
149 } else { \
150 return(execError("incompatible type in string context: %s", \
151 tagToStr(dataVal.tag))); \
153 string = __str; \
154 @@ -1331,13 +1392,12 @@ static int pushArgVal(void)
156 POP_INT(argNum);
157 --argNum;
158 nArgs = FP_GET_ARG_COUNT(FrameP);
159 if (argNum >= nArgs || argNum < 0) {
160 - char argStr[TYPE_INT_STR_SIZE(argNum)];
161 - sprintf(argStr, "%d", argNum + 1);
162 - return execError("referenced undefined argument: $args[%s]", argStr);
163 + return execError("referenced undefined argument: $args[%s]",
164 + longAsStr(argNum + 1));
166 PUSH(FP_GET_ARG_N(FrameP, argNum));
167 return STAT_OK;
170 @@ -1363,15 +1423,13 @@ static int pushArgArray(void)
171 if (resultArray->tag != ARRAY_TAG) {
172 resultArray->tag = ARRAY_TAG;
173 resultArray->val.arrayPtr = ArrayNew();
175 for (argNum = 0; argNum < nArgs; ++argNum) {
176 - char intStr[TYPE_INT_STR_SIZE(argNum)];
178 - sprintf(intStr, "%d", argNum + 1);
179 argVal = FP_GET_ARG_N(FrameP, argNum);
180 - if (!ArrayInsert(resultArray, AllocStringCpy(intStr), &argVal)) {
181 + if (!ArrayInsert(resultArray, AllocStringOfNumber(argNum + 1),
182 + &argVal)) {
183 return(execError("array insertion failure", NULL));
187 PUSH(*resultArray);
188 @@ -1957,30 +2015,106 @@ static int power(void)
189 PUSH_INT(n3);
190 return errCheck("exponentiation");
194 -** concatenate two top items on the stack
195 -** Before: TheStack-> str2, str1, next, ...
196 -** After: TheStack-> result, next, ...
197 +** A helper routine used in concat(), and makeArrayKeyFromArgs().
198 +** Concatenate a number of values from the stack and return the result as a
199 +** character pointer in *result and its length as the return value, or less
200 +** than zero on failure. If a divider is specified, add it between each of the
201 +** stack elements. The stack elements are popped from the stack if leaveParams
202 +** is false.
204 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
205 + int leaveParams)
207 + DataValue value;
208 + char *res = NULL;
209 + char *pos;
210 + int len;
211 + int sepLen;
212 + int i;
214 + *result = NULL;
216 + if (sep == NULL) {
217 + sep = "";
219 + sepLen = strlen(sep);
221 + /* evaluate total length (upper limit) */
222 + len = sepLen * (nVals - 1);
223 + for (i = nVals - 1; i >= 0; --i) {
224 + PEEK(value, i);
225 + if (value.tag == INT_TAG) {
226 + len += lenLongAsStr(value.val.n);
228 + else if (value.tag == STRING_TAG) {
229 + len += value.val.str.len;
231 + else {
232 + return -1; /* invalid type */
236 + /* allocate the string */
237 + res = AllocString(len + 1);
238 + pos = res;
239 + /* write everything into the result */
240 + for (i = nVals - 1; i >= 0; --i) {
241 + PEEK(value, i);
242 + if (value.tag == INT_TAG) {
243 + pos += strlen(strcpy(pos, longAsStr(value.val.n)));
245 + else { /* value.tag == STRING_TAG */
246 + strcpy(pos, value.val.str.rep);
247 + pos += value.val.str.len;
249 + if (i && sepLen) {
250 + strcpy(pos, sep);
251 + pos += sepLen;
255 + /* remove the source expression values */
256 + if (!leaveParams) {
257 + while (nVals--) {
258 + POP(value);
262 + /* now return the results */
263 + *result = res;
264 + return pos - res;
268 +** concatenate a number of strings and push the result onto the stack
270 +** Before: Prog-> [nExpr], next, ...
271 +** TheStack-> exprValN, ... exprVal1, next, ...
272 +** After: Prog-> nExpr, [next], ...
273 +** TheStack-> concatResult, next, ...
275 static int concat(void)
277 - char *s1, *s2, *out;
278 - int len1, len2;
279 + char *out;
280 + int len;
282 - DISASM_RT(PC-1, 1);
283 - STACKDUMP(2, 3);
284 + int nExpr;
286 + nExpr = PC->value;
287 + PC++;
289 - POP_STRING(s2);
290 - POP_STRING(s1);
291 - len1 = strlen(s1);
292 - len2 = strlen(s2);
293 - out = AllocString(len1 + len2 + 1);
294 - strncpy(out, s1, len1);
295 - strcpy(&out[len1], s2);
296 - PUSH_STRING(out, len1 + len2);
297 + DISASM_RT(PC-2, 2);
298 + STACKDUMP(nExpr, 3);
300 + len = concatenateNwithSep(nExpr, "", &out, False);
301 + if (len < 0) {
302 + return(execError("can only concatenate with string or integer", NULL));
304 + PUSH_STRING(out, len);
305 return STAT_OK;
309 ** Call a subroutine or function (user defined or built-in). Args are the
310 @@ -2282,49 +2416,15 @@ int ArrayCopy(DataValue *dstArray, DataV
311 ** I really need to optimize the size approximation rather than assuming
312 ** a worst case size for every integer argument
314 static int makeArrayKeyFromArgs(int nArgs, char **keyString, int leaveParams)
316 - DataValue tmpVal;
317 - int sepLen = strlen(ARRAY_DIM_SEP);
318 - int keyLength = 0;
319 - int i;
320 + int len;
322 - keyLength = sepLen * (nArgs - 1);
323 - for (i = nArgs - 1; i >= 0; --i) {
324 - PEEK(tmpVal, i);
325 - if (tmpVal.tag == INT_TAG) {
326 - keyLength += TYPE_INT_STR_SIZE(tmpVal.val.n);
328 - else if (tmpVal.tag == STRING_TAG) {
329 - keyLength += tmpVal.val.str.len;
331 - else {
332 - return(execError("can only index array with string or int.", NULL));
335 - *keyString = AllocString(keyLength + 1);
336 - (*keyString)[0] = 0;
337 - for (i = nArgs - 1; i >= 0; --i) {
338 - if (i != nArgs - 1) {
339 - strcat(*keyString, ARRAY_DIM_SEP);
341 - PEEK(tmpVal, i);
342 - if (tmpVal.tag == INT_TAG) {
343 - sprintf(&((*keyString)[strlen(*keyString)]), "%d", tmpVal.val.n);
345 - else if (tmpVal.tag == STRING_TAG) {
346 - strcat(*keyString, tmpVal.val.str.rep);
348 - else {
349 - return(execError("can only index array with string or int.", NULL));
352 - if (!leaveParams) {
353 - for (i = nArgs - 1; i >= 0; --i) {
354 - POP(tmpVal);
356 + len = concatenateNwithSep(nArgs, ARRAY_DIM_SEP, keyString, leaveParams);
357 + if (len < 0) {
358 + return(execError("can only index array with string or int.", NULL));
360 return(STAT_OK);
364 @@ -3106,10 +3206,14 @@ static void disasm(Inst *inst, int nInst
365 j == OP_BRANCH_NEVER || j == OP_BRANCH_TRUE) {
366 printf("to=(%d) %p", inst[i+1].value,
367 &inst[i+1] + inst[i+1].value);
368 ++i;
370 + else if (j == OP_CONCAT) {
371 + printf("nExpr=%d", inst[i+1].value);
372 + ++i;
374 else if (j == OP_SUBR_CALL) {
375 printf("%s (%d arg)", inst[i+1].sym->name, inst[i+2].value);
376 i += 2;
378 else if (j == OP_BEGIN_ARRAY_ITER) {
379 diff --quilt old/source/interpret.h new/source/interpret.h
380 --- old/source/interpret.h
381 +++ new/source/interpret.h
382 @@ -170,10 +170,11 @@ int ContinueMacro(RestartData *continuat
383 void RunMacroAsSubrCall(Program *prog);
384 void PreemptMacro(void);
385 char *AllocString(int length);
386 char *AllocStringNCpy(const char *s, int length);
387 char *AllocStringCpy(const char *s);
388 +char *AllocStringOfNumber(long val);
389 int AllocNString(NString *string, int length);
390 int AllocNStringNCpy(NString *string, const char *s, int length);
391 int AllocNStringCpy(NString *string, const char *s);
392 void GarbageCollectStrings(void);
393 void FreeRestartData(RestartData *context);
394 diff --quilt old/source/parse.y new/source/parse.y
395 --- old/source/parse.y
396 +++ new/source/parse.y
397 @@ -88,11 +88,11 @@ static int AllowDefine;
398 %token <sym> STRING SYMBOL
399 %token <num> NUMBER
400 %token DELETE ARG_LOOKUP
401 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
402 %token <acc> DEFINE
403 -%type <num> arglist
404 +%type <num> arglist catlist
405 %type <inst> cond comastmts for while else and or arrayexpr
406 %type <sym> evalsym
408 %nonassoc IF_NO_ELSE
409 %nonassoc ELSE
410 @@ -352,13 +352,24 @@ arglist: /* nothing */ {
412 | arglist ',' expr {
413 $$ = $1 + 1;
416 -expr: numexpr %prec CONCAT
417 - | expr numexpr %prec CONCAT {
418 - ADD_OP(OP_CONCAT);
420 +/* string concatenation lists */
421 +catlist: numexpr %prec CONCAT {
422 + $$ = 1;
424 + | catlist numexpr %prec CONCAT {
425 + $$ = $1 + 1;
429 +expr: catlist {
430 + if ($1 > 1) {
431 + ADD_OP(OP_CONCAT); ADD_IMMED($1);
435 initarraylv: SYMBOL {
436 ADD_OP(OP_PUSH_ARRAY_SYM); ADD_SYM($1); ADD_IMMED(1);