let all function types return NO_TAG
[nedit-bw.git] / FastConcat.diff
blob81a20a0117e0e5bf214c79f192a83db0fa412c28
1 Perform faster macro concatenation
3 Available as a patch:
5 http://sourceforge.net/tracker/index.php?func=detail&aid=971477&group_id=11005&atid=311005
6 [ 971477 ] Perform faster macro concatenation
7 FastConcat.diff 2004-06-11 15:55
9 Previously, concatenation was treated as a binary operator, taking two values
10 from the stack and concatenating them. Where an expression involves more than
11 one concatenation, this involved creating temporary strings, requiring
12 separate allocation, which would only be discarded later, for each application
13 of the binary concatenation. To make matters worse, concatenation would rescan
14 each string to determine its length using strlen().
16 This modification changes the way the concatenation operator is handled.
17 Instead of treating it as a binary operator, it sees it as a list operator,
18 rather like the comma in an argument list. Thus a concatenation expression
19 is treated as a counted sequence of subexpressions. The count is coded into
20 the "bytecode" to tell the concat() function (in interpret.c) how many
21 expression values to pull off the stack. The function then exploits the
22 fact that string DataValue structures now include the strings' lengths to
23 work out the total allocation length required for the whole result. (This
24 is based on the previous implementation of makeArrayKeyFromArgs().)
26 I have also added static functions longAsStr() and lenLongAsStr() to generate
27 a string representation of a number, or just to work out its length. This
28 allows us to avoid calling sprintf() with "%d" all the time. The new
29 AllocStringOfNumber() exploits this, and is used in various places in
30 interpret.c. Note that longAsStr() returns the string value in a static buffer
31 which must be used/copied immediately to avoid a second call overwriting
32 its content.
34 ---
36 source/interpret.c | 243 +++++++++++++++++++++++++++++++++++++++--------------
37 source/interpret.h | 5 -
38 source/parse.y | 15 ++-
39 3 files changed, 193 insertions(+), 70 deletions(-)
41 diff --quilt old/source/interpret.c new/source/interpret.c
42 --- old/source/interpret.c
43 +++ new/source/interpret.c
44 @@ -109,10 +109,12 @@ static int bitAnd(void);
45 static int bitOr(void);
46 static int and(void);
47 static int or(void);
48 static int not(void);
49 static int power(void);
50 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
51 + int leaveParams);
52 static int concat(void);
53 static int assign(void);
54 static int callSubroutine(void);
55 static int fetchRetVal(void);
56 static int branch(void);
57 @@ -787,10 +789,72 @@ Symbol *PromoteToGlobal(Symbol *sym)
58 return s;
59 return InstallSymbol(sym->name, GLOBAL_SYM, noValue);
63 +** Convert a long value to its decimal string representation, returned in a
64 +** static string.
65 +*/
66 +static const char *longAsStr(long val, int *len)
68 + static const char digits[] = "0123456789";
69 + static char res[TYPE_INT_STR_SIZE(val) + 1];
70 + long val10;
71 + char *pos = &res[TYPE_INT_STR_SIZE(val)];
72 + char *end = pos;
74 + /* the string is built backwards, so start by null terminating it */
75 + *pos = 0;
77 + if (val >= 0) {
78 + /* do-while loop will deal with the val == 0 case */
79 + do {
80 + /* we can use the modulo (%) operator here */
81 + *--pos = digits[val % 10];
82 + val /= 10;
83 + } while (val != 0);
84 + }
85 + else {
86 + /* we don't use the modulo (%) operator since its behaviour with
87 + negative numbers is undefined by the C standards */
88 + do {
89 + val10 = val / 10;
90 + *--pos = digits[(10 * val10) - val];
91 + val = val10;
92 + } while (val != 0);
93 + *--pos = '-';
94 + }
96 + if (len) {
97 + *len = (int)(end - pos);
98 + }
100 + return pos;
104 +** Calculate the length of the decimal string representation of a long value.
106 +int lenLongAsStr(long val)
108 + long val10;
109 + int len = 0;
111 + if (val < 0) {
112 + len++; /* for leading '-' */
115 + /* do-while loop will deal with the val == 0 case */
116 + do {
117 + len++;
118 + val /= 10;
119 + } while (val != 0);
121 + return len;
125 ** Allocate memory for a string, and keep track of it, such that it
126 ** can be recovered later using GarbageCollectStrings. (A linked list
127 ** of pointers is maintained by threading through the memory behind
128 ** the returned pointers). Length does not include the terminating null
129 ** character, so to allocate space for a string of strlen == n, you must
130 @@ -877,10 +941,18 @@ int AllocNStringNCpy(NString *string, co
131 char *AllocStringCpy(const char *s)
133 return AllocStringNCpy(s, s ? strlen(s) : 0);
136 +/* Allocate a string holding the decimal value of the number */
137 +char *AllocStringOfNumber(long val)
139 + int len;
140 + const char *s = longAsStr(val, &len);
141 + return AllocStringNCpy(s, len);
145 * Allocate a new NString buffer, containing a copy of the given string.
146 * The length is set to the length of the string and resulting string is
147 * guaranteed to be \0-terminated.
149 @@ -1075,21 +1147,19 @@ static void freeSymbolTable(Symbol *symT
150 #define POP_STRING(string) \
151 if (StackP == TheStack) \
152 return execError(StackUnderflowMsg, ""); \
153 --StackP; \
154 if (StackP->tag == INT_TAG) { \
155 - string = AllocString(TYPE_INT_STR_SIZE(int)); \
156 - sprintf(string, "%d", StackP->val.n); \
157 + string = AllocStringOfNumber(StackP->val.n); \
158 } else if (StackP->tag == STRING_TAG) \
159 string = StackP->val.str.rep; \
160 else \
161 return(execError("can't convert array to string", NULL));
163 #define PEEK_STRING(string, peekIndex) \
164 if ((StackP - peekIndex - 1)->tag == INT_TAG) { \
165 - string = AllocString(TYPE_INT_STR_SIZE(int)); \
166 - sprintf(string, "%d", (StackP - peekIndex - 1)->val.n); \
167 + string = AllocStringOfNumber((StackP - peekIndex - 1)->val.n); \
169 else if ((StackP - peekIndex - 1)->tag == STRING_TAG) { \
170 string = (StackP - peekIndex - 1)->val.str.rep; \
172 else { \
173 @@ -1208,13 +1278,12 @@ static int pushArgVal(void)
175 POP_INT(argNum)
176 --argNum;
177 nArgs = FP_GET_ARG_COUNT(FrameP);
178 if (argNum >= nArgs || argNum < 0) {
179 - char argStr[TYPE_INT_STR_SIZE(argNum)];
180 - sprintf(argStr, "%d", argNum + 1);
181 - return execError("referenced undefined argument: $args[%s]", argStr);
182 + return execError("referenced undefined argument: $args[%s]",
183 + longAsStr(argNum + 1, NULL));
185 PUSH(FP_GET_ARG_N(FrameP, argNum));
186 return STAT_OK;
189 @@ -1240,15 +1309,13 @@ static int pushArgArray(void)
190 if (resultArray->tag != ARRAY_TAG) {
191 resultArray->tag = ARRAY_TAG;
192 resultArray->val.arrayPtr = ArrayNew();
194 for (argNum = 0; argNum < nArgs; ++argNum) {
195 - char intStr[TYPE_INT_STR_SIZE(argNum)];
197 - sprintf(intStr, "%d", argNum + 1);
198 argVal = FP_GET_ARG_N(FrameP, argNum);
199 - if (!ArrayInsert(resultArray, AllocStringCpy(intStr), &argVal)) {
200 + if (!ArrayInsert(resultArray, AllocStringOfNumber(argNum + 1),
201 + &argVal)) {
202 return(execError("array insertion failure", NULL));
206 PUSH(*resultArray);
207 @@ -1839,30 +1906,107 @@ static int power(void)
208 PUSH_INT(n3)
209 return errCheck("exponentiation");
213 -** concatenate two top items on the stack
214 -** Before: TheStack-> str2, str1, next, ...
215 -** After: TheStack-> result, next, ...
216 +** A helper routine used in concat(), and makeArrayKeyFromArgs().
217 +** Concatenate a number of values from the stack and return the result as a
218 +** character pointer in *result and its length as the return value, or less
219 +** than zero on failure. If a divider is specified, add it between each of the
220 +** stack elements. The stack elements are popped from the stack if leaveParams
221 +** is false.
223 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
224 + int leaveParams)
226 + int errNum;
227 + DataValue value;
228 + char *res = NULL;
229 + char *pos;
230 + int len;
231 + int sepLen;
232 + int i;
234 + *result = NULL;
236 + if (sep == NULL) {
237 + sep = "";
239 + sepLen = strlen(sep);
241 + /* evaluate total length (upper limit) */
242 + len = sepLen * (nVals - 1);
243 + for (i = nVals - 1; i >= 0; --i) {
244 + PEEK(value, i)
245 + if (value.tag == INT_TAG) {
246 + len += lenLongAsStr(value.val.n);
248 + else if (value.tag == STRING_TAG) {
249 + len += value.val.str.len;
251 + else {
252 + return -1; /* invalid type */
256 + /* allocate the string */
257 + res = AllocString(len + 1);
258 + pos = res;
259 + /* write everything into the result */
260 + for (i = nVals - 1; i >= 0; --i) {
261 + PEEK(value, i)
262 + if (value.tag == INT_TAG) {
263 + int l;
264 + strcpy(pos, longAsStr(value.val.n, &l));
265 + pos += l;
267 + else { /* value.tag == STRING_TAG */
268 + strcpy(pos, value.val.str.rep);
269 + pos += value.val.str.len;
271 + if (i && sepLen) {
272 + strcpy(pos, sep);
273 + pos += sepLen;
276 + /* remove the source expression values */
277 + if (!leaveParams) {
278 + while (nVals--) {
279 + POP(value)
283 + /* now return the results */
284 + *result = res;
285 + return pos - res;
289 +** concatenate a number of strings and push the result onto the stack
291 +** Before: Prog-> [nExpr], next, ...
292 +** TheStack-> exprValN, ... exprVal1, next, ...
293 +** After: Prog-> nExpr, [next], ...
294 +** TheStack-> concatResult, next, ...
296 static int concat(void)
298 - char *s1, *s2, *out;
299 - int len1, len2;
300 + char *out;
301 + int len;
302 + int nExpr;
304 - DISASM_RT(PC-1, 1);
305 - STACKDUMP(2, 3);
306 + nExpr = PC->value;
307 + PC++;
309 - POP_STRING(s2)
310 - POP_STRING(s1)
311 - len1 = strlen(s1);
312 - len2 = strlen(s2);
313 - out = AllocString(len1 + len2 + 1);
314 - strncpy(out, s1, len1);
315 - strcpy(&out[len1], s2);
316 - PUSH_STRING(out, len1 + len2)
317 + DISASM_RT(PC-2, 2);
318 + STACKDUMP(nExpr, 3);
320 + len = concatenateNwithSep(nExpr, "", &out, False);
321 + if (len < 0) {
322 + return(execError("can only concatenate with string or integer", NULL));
324 + PUSH_STRING(out, len)
325 return STAT_OK;
329 ** Call a subroutine or function (user defined or built-in). Args are the
330 @@ -2190,50 +2334,17 @@ int ArrayCopy(DataValue *dstArray, DataV
331 ** I really need to optimize the size approximation rather than assuming
332 ** a worst case size for every integer argument
334 static int makeArrayKeyFromArgs(int nArgs, char **keyString, int leaveParams)
336 - DataValue tmpVal;
337 - int sepLen = strlen(ARRAY_DIM_SEP);
338 - int keyLength = 0;
339 - int i;
340 + int len;
342 - keyLength = sepLen * (nArgs - 1);
343 - for (i = nArgs - 1; i >= 0; --i) {
344 - PEEK(tmpVal, i)
345 - if (tmpVal.tag == INT_TAG) {
346 - keyLength += TYPE_INT_STR_SIZE(tmpVal.val.n);
348 - else if (tmpVal.tag == STRING_TAG) {
349 - keyLength += tmpVal.val.str.len;
351 - else {
352 - return(execError("can only index array with string or int.", NULL));
355 - *keyString = AllocString(keyLength + 1);
356 - (*keyString)[0] = 0;
357 - for (i = nArgs - 1; i >= 0; --i) {
358 - if (i != nArgs - 1) {
359 - strcat(*keyString, ARRAY_DIM_SEP);
361 - PEEK(tmpVal, i)
362 - if (tmpVal.tag == INT_TAG) {
363 - sprintf(&((*keyString)[strlen(*keyString)]), "%d", tmpVal.val.n);
365 - else if (tmpVal.tag == STRING_TAG) {
366 - strcat(*keyString, tmpVal.val.str.rep);
368 - else {
369 - return(execError("can only index array with string or int.", NULL));
372 - if (!leaveParams) {
373 - for (i = nArgs - 1; i >= 0; --i) {
374 - POP(tmpVal)
376 + len = concatenateNwithSep(nArgs, ARRAY_DIM_SEP, keyString, leaveParams);
377 + if (len < 0) {
378 + return(execError("can only index array with string or int.", NULL));
381 return(STAT_OK);
385 ** allocate an empty array node, this is used as the root node and never
386 @@ -2983,10 +3094,14 @@ static void disasm(Inst *inst, int nInst
387 j == OP_BRANCH_NEVER || j == OP_BRANCH_TRUE) {
388 printf("to=(%d) %p", inst[i+1].value,
389 &inst[i+1] + inst[i+1].value);
390 ++i;
392 + else if (j == OP_CONCAT) {
393 + printd("nExpr=%d", inst[i+1].value);
394 + ++i;
396 else if (j == OP_SUBR_CALL) {
397 printf("%s (%d arg)", inst[i+1].sym->name, inst[i+2].value);
398 i += 2;
400 else if (j == OP_BEGIN_ARRAY_ITER) {
401 diff --quilt old/source/interpret.h new/source/interpret.h
402 --- old/source/interpret.h
403 +++ new/source/interpret.h
404 @@ -38,19 +38,19 @@
405 events passed to action routines. Tells
406 them that they were called from a macro */
408 enum symTypes {CONST_SYM, GLOBAL_SYM, LOCAL_SYM, ARG_SYM, PROC_VALUE_SYM,
409 C_FUNCTION_SYM, MACRO_FUNCTION_SYM, ACTION_ROUTINE_SYM};
410 -#define N_OPS 43
412 enum operations {OP_RETURN_NO_VAL, OP_RETURN, OP_PUSH_SYM, OP_DUP, OP_ADD,
413 OP_SUB, OP_MUL, OP_DIV, OP_MOD, OP_NEGATE, OP_INCR, OP_DECR, OP_GT, OP_LT,
414 OP_GE, OP_LE, OP_EQ, OP_NE, OP_BIT_AND, OP_BIT_OR, OP_AND, OP_OR, OP_NOT,
415 OP_POWER, OP_CONCAT, OP_ASSIGN, OP_SUBR_CALL, OP_FETCH_RET_VAL, OP_BRANCH,
416 OP_BRANCH_TRUE, OP_BRANCH_FALSE, OP_BRANCH_NEVER, OP_ARRAY_REF,
417 OP_ARRAY_ASSIGN, OP_BEGIN_ARRAY_ITER, OP_ARRAY_ITER, OP_IN_ARRAY,
418 OP_ARRAY_DELETE, OP_PUSH_ARRAY_SYM, OP_ARRAY_REF_ASSIGN_SETUP, OP_PUSH_ARG,
419 - OP_PUSH_ARG_COUNT, OP_PUSH_ARG_ARRAY};
420 + OP_PUSH_ARG_COUNT, OP_PUSH_ARG_ARRAY, N_OPS};
422 enum typeTags {NO_TAG, INT_TAG, STRING_TAG, ARRAY_TAG};
424 enum execReturnCodes {MACRO_TIME_LIMIT, MACRO_PREEMPT, MACRO_DONE, MACRO_ERROR};
426 @@ -160,10 +160,11 @@ int ContinueMacro(RestartData *continuat
427 void RunMacroAsSubrCall(Program *prog);
428 void PreemptMacro(void);
429 char *AllocString(int length);
430 char *AllocStringNCpy(const char *s, int length);
431 char *AllocStringCpy(const char *s);
432 +char *AllocStringOfNumber(long val);
433 int AllocNString(NString *string, int length);
434 int AllocNStringNCpy(NString *string, const char *s, int length);
435 int AllocNStringCpy(NString *string, const char *s);
436 void GarbageCollectStrings(void);
437 void FreeRestartData(RestartData *context);
438 diff --quilt old/source/parse.y new/source/parse.y
439 --- old/source/parse.y
440 +++ new/source/parse.y
441 @@ -60,11 +60,11 @@ extern Inst **LoopStackPtr; /* to fill
442 int nArgs;
444 %token <sym> NUMBER STRING SYMBOL
445 %token DELETE ARG_LOOKUP
446 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
447 -%type <nArgs> arglist
448 +%type <nArgs> arglist catlist
449 %type <inst> cond comastmts for while else and or arrayexpr
450 %type <sym> evalsym
452 %nonassoc IF_NO_ELSE
453 %nonassoc ELSE
454 @@ -281,13 +281,20 @@ arglist: /* nothing */ {
456 | arglist ',' expr {
457 $$ = $1 + 1;
460 -expr: numexpr %prec CONCAT
461 - | expr numexpr %prec CONCAT {
462 - ADD_OP(OP_CONCAT);
463 +catlist: numexpr %prec CONCAT {
464 + $$ = 1;
466 + | catlist numexpr %prec CONCAT {
467 + $$ = $1 + 1;
469 +expr: catlist {
470 + if ($1 > 1) {
471 + ADD_OP(OP_CONCAT); ADD_IMMED($1);
475 initarraylv: SYMBOL {
476 ADD_OP(OP_PUSH_ARRAY_SYM); ADD_SYM($1); ADD_IMMED(1);