1 Perform faster macro concatenation
5 http://sourceforge.net/tracker/index.php?func=detail&aid=971477&group_id=11005&atid=311005
6 [ 971477 ] Perform faster macro concatenation
7 FastConcat.diff 2004-06-11 15:55
9 Previously, concatenation was treated as a binary operator, taking two values
10 from the stack and concatenating them. Where an expression involves more than
11 one concatenation, this involved creating temporary strings, requiring
12 separate allocation, which would only be discarded later, for each application
13 of the binary concatenation. To make matters worse, concatenation would rescan
14 each string to determine its length using strlen().
16 This modification changes the way the concatenation operator is handled.
17 Instead of treating it as a binary operator, it sees it as a list operator,
18 rather like the comma in an argument list. Thus a concatenation expression
19 is treated as a counted sequence of subexpressions. The count is coded into
20 the "bytecode" to tell the concat() function (in interpret.c) how many
21 expression values to pull off the stack. The function then exploits the
22 fact that string DataValue structures now include the strings' lengths to
23 work out the total allocation length required for the whole result. (This
24 is based on the previous implementation of makeArrayKeyFromArgs().)
26 I have also added static functions longAsStr() and lenLongAsStr() to generate
27 a string representation of a number, or just to work out its length. This
28 allows us to avoid calling sprintf() with "%d" all the time. The new
29 AllocStringOfNumber() exploits this, and is used in various places in
30 interpret.c. Note that longAsStr() returns the string value in a static buffer
31 which must be used/copied immediately to avoid a second call overwriting
36 source/interpret.c | 243 +++++++++++++++++++++++++++++++++++++++--------------
37 source/interpret.h | 5 -
38 source/parse.y | 15 ++-
39 3 files changed, 193 insertions(+), 70 deletions(-)
41 diff --quilt old/source/interpret.c new/source/interpret.c
42 --- old/source/interpret.c
43 +++ new/source/interpret.c
44 @@ -109,10 +109,12 @@ static int bitAnd(void);
45 static int bitOr(void);
49 static int power(void);
50 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
52 static int concat(void);
53 static int assign(void);
54 static int callSubroutine(void);
55 static int fetchRetVal(void);
56 static int branch(void);
57 @@ -787,10 +789,72 @@ Symbol *PromoteToGlobal(Symbol *sym)
59 return InstallSymbol(sym->name, GLOBAL_SYM, noValue);
63 +** Convert a long value to its decimal string representation, returned in a
66 +static const char *longAsStr(long val, int *len)
68 + static const char digits[] = "0123456789";
69 + static char res[TYPE_INT_STR_SIZE(val) + 1];
71 + char *pos = &res[TYPE_INT_STR_SIZE(val)];
74 + /* the string is built backwards, so start by null terminating it */
78 + /* do-while loop will deal with the val == 0 case */
80 + /* we can use the modulo (%) operator here */
81 + *--pos = digits[val % 10];
86 + /* we don't use the modulo (%) operator since its behaviour with
87 + negative numbers is undefined by the C standards */
90 + *--pos = digits[(10 * val10) - val];
97 + *len = (int)(end - pos);
104 +** Calculate the length of the decimal string representation of a long value.
106 +int lenLongAsStr(long val)
112 + len++; /* for leading '-' */
115 + /* do-while loop will deal with the val == 0 case */
119 + } while (val != 0);
125 ** Allocate memory for a string, and keep track of it, such that it
126 ** can be recovered later using GarbageCollectStrings. (A linked list
127 ** of pointers is maintained by threading through the memory behind
128 ** the returned pointers). Length does not include the terminating null
129 ** character, so to allocate space for a string of strlen == n, you must
130 @@ -877,10 +941,18 @@ int AllocNStringNCpy(NString *string, co
131 char *AllocStringCpy(const char *s)
133 return AllocStringNCpy(s, s ? strlen(s) : 0);
136 +/* Allocate a string holding the decimal value of the number */
137 +char *AllocStringOfNumber(long val)
140 + const char *s = longAsStr(val, &len);
141 + return AllocStringNCpy(s, len);
145 * Allocate a new NString buffer, containing a copy of the given string.
146 * The length is set to the length of the string and resulting string is
147 * guaranteed to be \0-terminated.
149 @@ -1075,21 +1147,19 @@ static void freeSymbolTable(Symbol *symT
150 #define POP_STRING(string) \
151 if (StackP == TheStack) \
152 return execError(StackUnderflowMsg, ""); \
154 if (StackP->tag == INT_TAG) { \
155 - string = AllocString(TYPE_INT_STR_SIZE(int)); \
156 - sprintf(string, "%d", StackP->val.n); \
157 + string = AllocStringOfNumber(StackP->val.n); \
158 } else if (StackP->tag == STRING_TAG) \
159 string = StackP->val.str.rep; \
161 return(execError("can't convert array to string", NULL));
163 #define PEEK_STRING(string, peekIndex) \
164 if ((StackP - peekIndex - 1)->tag == INT_TAG) { \
165 - string = AllocString(TYPE_INT_STR_SIZE(int)); \
166 - sprintf(string, "%d", (StackP - peekIndex - 1)->val.n); \
167 + string = AllocStringOfNumber((StackP - peekIndex - 1)->val.n); \
169 else if ((StackP - peekIndex - 1)->tag == STRING_TAG) { \
170 string = (StackP - peekIndex - 1)->val.str.rep; \
173 @@ -1208,13 +1278,12 @@ static int pushArgVal(void)
177 nArgs = FP_GET_ARG_COUNT(FrameP);
178 if (argNum >= nArgs || argNum < 0) {
179 - char argStr[TYPE_INT_STR_SIZE(argNum)];
180 - sprintf(argStr, "%d", argNum + 1);
181 - return execError("referenced undefined argument: $args[%s]", argStr);
182 + return execError("referenced undefined argument: $args[%s]",
183 + longAsStr(argNum + 1, NULL));
185 PUSH(FP_GET_ARG_N(FrameP, argNum));
189 @@ -1240,15 +1309,13 @@ static int pushArgArray(void)
190 if (resultArray->tag != ARRAY_TAG) {
191 resultArray->tag = ARRAY_TAG;
192 resultArray->val.arrayPtr = ArrayNew();
194 for (argNum = 0; argNum < nArgs; ++argNum) {
195 - char intStr[TYPE_INT_STR_SIZE(argNum)];
197 - sprintf(intStr, "%d", argNum + 1);
198 argVal = FP_GET_ARG_N(FrameP, argNum);
199 - if (!ArrayInsert(resultArray, AllocStringCpy(intStr), &argVal)) {
200 + if (!ArrayInsert(resultArray, AllocStringOfNumber(argNum + 1),
202 return(execError("array insertion failure", NULL));
207 @@ -1839,30 +1906,107 @@ static int power(void)
209 return errCheck("exponentiation");
213 -** concatenate two top items on the stack
214 -** Before: TheStack-> str2, str1, next, ...
215 -** After: TheStack-> result, next, ...
216 +** A helper routine used in concat(), and makeArrayKeyFromArgs().
217 +** Concatenate a number of values from the stack and return the result as a
218 +** character pointer in *result and its length as the return value, or less
219 +** than zero on failure. If a divider is specified, add it between each of the
220 +** stack elements. The stack elements are popped from the stack if leaveParams
223 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
239 + sepLen = strlen(sep);
241 + /* evaluate total length (upper limit) */
242 + len = sepLen * (nVals - 1);
243 + for (i = nVals - 1; i >= 0; --i) {
245 + if (value.tag == INT_TAG) {
246 + len += lenLongAsStr(value.val.n);
248 + else if (value.tag == STRING_TAG) {
249 + len += value.val.str.len;
252 + return -1; /* invalid type */
256 + /* allocate the string */
257 + res = AllocString(len + 1);
259 + /* write everything into the result */
260 + for (i = nVals - 1; i >= 0; --i) {
262 + if (value.tag == INT_TAG) {
264 + strcpy(pos, longAsStr(value.val.n, &l));
267 + else { /* value.tag == STRING_TAG */
268 + strcpy(pos, value.val.str.rep);
269 + pos += value.val.str.len;
276 + /* remove the source expression values */
277 + if (!leaveParams) {
283 + /* now return the results */
289 +** concatenate a number of strings and push the result onto the stack
291 +** Before: Prog-> [nExpr], next, ...
292 +** TheStack-> exprValN, ... exprVal1, next, ...
293 +** After: Prog-> nExpr, [next], ...
294 +** TheStack-> concatResult, next, ...
296 static int concat(void)
298 - char *s1, *s2, *out;
304 - DISASM_RT(PC-1, 1);
313 - out = AllocString(len1 + len2 + 1);
314 - strncpy(out, s1, len1);
315 - strcpy(&out[len1], s2);
316 - PUSH_STRING(out, len1 + len2)
317 + DISASM_RT(PC-2, 2);
318 + STACKDUMP(nExpr, 3);
320 + len = concatenateNwithSep(nExpr, "", &out, False);
322 + return(execError("can only concatenate with string or integer", NULL));
324 + PUSH_STRING(out, len)
329 ** Call a subroutine or function (user defined or built-in). Args are the
330 @@ -2190,50 +2334,17 @@ int ArrayCopy(DataValue *dstArray, DataV
331 ** I really need to optimize the size approximation rather than assuming
332 ** a worst case size for every integer argument
334 static int makeArrayKeyFromArgs(int nArgs, char **keyString, int leaveParams)
337 - int sepLen = strlen(ARRAY_DIM_SEP);
342 - keyLength = sepLen * (nArgs - 1);
343 - for (i = nArgs - 1; i >= 0; --i) {
345 - if (tmpVal.tag == INT_TAG) {
346 - keyLength += TYPE_INT_STR_SIZE(tmpVal.val.n);
348 - else if (tmpVal.tag == STRING_TAG) {
349 - keyLength += tmpVal.val.str.len;
352 - return(execError("can only index array with string or int.", NULL));
355 - *keyString = AllocString(keyLength + 1);
356 - (*keyString)[0] = 0;
357 - for (i = nArgs - 1; i >= 0; --i) {
358 - if (i != nArgs - 1) {
359 - strcat(*keyString, ARRAY_DIM_SEP);
362 - if (tmpVal.tag == INT_TAG) {
363 - sprintf(&((*keyString)[strlen(*keyString)]), "%d", tmpVal.val.n);
365 - else if (tmpVal.tag == STRING_TAG) {
366 - strcat(*keyString, tmpVal.val.str.rep);
369 - return(execError("can only index array with string or int.", NULL));
372 - if (!leaveParams) {
373 - for (i = nArgs - 1; i >= 0; --i) {
376 + len = concatenateNwithSep(nArgs, ARRAY_DIM_SEP, keyString, leaveParams);
378 + return(execError("can only index array with string or int.", NULL));
385 ** allocate an empty array node, this is used as the root node and never
386 @@ -2983,10 +3094,14 @@ static void disasm(Inst *inst, int nInst
387 j == OP_BRANCH_NEVER || j == OP_BRANCH_TRUE) {
388 printf("to=(%d) %p", inst[i+1].value,
389 &inst[i+1] + inst[i+1].value);
392 + else if (j == OP_CONCAT) {
393 + printd("nExpr=%d", inst[i+1].value);
396 else if (j == OP_SUBR_CALL) {
397 printf("%s (%d arg)", inst[i+1].sym->name, inst[i+2].value);
400 else if (j == OP_BEGIN_ARRAY_ITER) {
401 diff --quilt old/source/interpret.h new/source/interpret.h
402 --- old/source/interpret.h
403 +++ new/source/interpret.h
405 events passed to action routines. Tells
406 them that they were called from a macro */
408 enum symTypes {CONST_SYM, GLOBAL_SYM, LOCAL_SYM, ARG_SYM, PROC_VALUE_SYM,
409 C_FUNCTION_SYM, MACRO_FUNCTION_SYM, ACTION_ROUTINE_SYM};
412 enum operations {OP_RETURN_NO_VAL, OP_RETURN, OP_PUSH_SYM, OP_DUP, OP_ADD,
413 OP_SUB, OP_MUL, OP_DIV, OP_MOD, OP_NEGATE, OP_INCR, OP_DECR, OP_GT, OP_LT,
414 OP_GE, OP_LE, OP_EQ, OP_NE, OP_BIT_AND, OP_BIT_OR, OP_AND, OP_OR, OP_NOT,
415 OP_POWER, OP_CONCAT, OP_ASSIGN, OP_SUBR_CALL, OP_FETCH_RET_VAL, OP_BRANCH,
416 OP_BRANCH_TRUE, OP_BRANCH_FALSE, OP_BRANCH_NEVER, OP_ARRAY_REF,
417 OP_ARRAY_ASSIGN, OP_BEGIN_ARRAY_ITER, OP_ARRAY_ITER, OP_IN_ARRAY,
418 OP_ARRAY_DELETE, OP_PUSH_ARRAY_SYM, OP_ARRAY_REF_ASSIGN_SETUP, OP_PUSH_ARG,
419 - OP_PUSH_ARG_COUNT, OP_PUSH_ARG_ARRAY};
420 + OP_PUSH_ARG_COUNT, OP_PUSH_ARG_ARRAY, N_OPS};
422 enum typeTags {NO_TAG, INT_TAG, STRING_TAG, ARRAY_TAG};
424 enum execReturnCodes {MACRO_TIME_LIMIT, MACRO_PREEMPT, MACRO_DONE, MACRO_ERROR};
426 @@ -160,10 +160,11 @@ int ContinueMacro(RestartData *continuat
427 void RunMacroAsSubrCall(Program *prog);
428 void PreemptMacro(void);
429 char *AllocString(int length);
430 char *AllocStringNCpy(const char *s, int length);
431 char *AllocStringCpy(const char *s);
432 +char *AllocStringOfNumber(long val);
433 int AllocNString(NString *string, int length);
434 int AllocNStringNCpy(NString *string, const char *s, int length);
435 int AllocNStringCpy(NString *string, const char *s);
436 void GarbageCollectStrings(void);
437 void FreeRestartData(RestartData *context);
438 diff --quilt old/source/parse.y new/source/parse.y
439 --- old/source/parse.y
440 +++ new/source/parse.y
441 @@ -60,11 +60,11 @@ extern Inst **LoopStackPtr; /* to fill
444 %token <sym> NUMBER STRING SYMBOL
445 %token DELETE ARG_LOOKUP
446 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
447 -%type <nArgs> arglist
448 +%type <nArgs> arglist catlist
449 %type <inst> cond comastmts for while else and or arrayexpr
454 @@ -281,13 +281,20 @@ arglist: /* nothing */ {
460 -expr: numexpr %prec CONCAT
461 - | expr numexpr %prec CONCAT {
463 +catlist: numexpr %prec CONCAT {
466 + | catlist numexpr %prec CONCAT {
471 + ADD_OP(OP_CONCAT); ADD_IMMED($1);
475 initarraylv: SYMBOL {
476 ADD_OP(OP_PUSH_ARRAY_SYM); ADD_SYM($1); ADD_IMMED(1);