fix hex regex in SH
[nedit-bw.git] / FastConcat3.diff
blob9a05b0e64812013134a4bb742dcc33453db17ca8
1 Perform faster macro concatenation
3 Available as a patch:
5 http://sourceforge.net/tracker/index.php?func=detail&aid=971477&group_id=11005&atid=311005
6 [ 971477 ] Perform faster macro concatenation
7 FastConcat.diff 2004-06-11 15:55
9 Previously, concatenation was treated as a binary operator, taking two values
10 from the stack and concatenating them. Where an expression involves more than
11 one concatenation, this involved creating temporary strings, requiring
12 separate allocation, which would only be discarded later, for each application
13 of the binary concatenation. To make matters worse, concatenation would rescan
14 each string to determine its length using strlen().
16 This modification changes the way the concatenation operator is handled.
17 Instead of treating it as a binary operator, it sees it as a list operator,
18 rather like the comma in an argument list. Thus a concatenation expression
19 is treated as a counted sequence of subexpressions. The count is coded into
20 the "bytecode" to tell the concat() function (in interpret.c) how many
21 expression values to pull off the stack. The function then exploits the
22 fact that string DataValue structures now include the strings' lengths to
23 work out the total allocation length required for the whole result. (This
24 is based on the previous implementation of makeArrayKeyFromArgs().)
26 I have also added static functions longAsStr() and lenLongAsStr() to generate
27 a string representation of a number, or just to work out its length. This
28 allows us to avoid calling sprintf() with "%d" all the time. The new
29 AllocStringOfNumber() exploits this, and is used in various places in
30 interpret.c. Note that longAsStr() returns the string value in a static buffer
31 which must be used/copied immediately to avoid a second call overwriting
32 its content.
34 ---
36 source/interpret.c | 231 ++++++++++++++++++++++++++++++++++++++---------------
37 source/interpret.h | 5 -
38 source/parse.y | 15 ++-
39 3 files changed, 181 insertions(+), 70 deletions(-)
41 diff --quilt old/source/interpret.c new/source/interpret.c
42 --- old/source/interpret.c
43 +++ new/source/interpret.c
44 @@ -109,10 +109,12 @@ static int bitAnd(void);
45 static int bitOr(void);
46 static int and(void);
47 static int or(void);
48 static int not(void);
49 static int power(void);
50 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
51 + int leaveParams);
52 static int concat(void);
53 static int assign(void);
54 static int callSubroutine(void);
55 static int fetchRetVal(void);
56 static int branch(void);
57 @@ -784,10 +786,64 @@ Symbol *PromoteToGlobal(Symbol *sym)
58 return s;
59 return InstallSymbol(sym->name, GLOBAL_SYM, noValue);
63 +** Convert a long value to its decimal string representation, returned in a
64 +** static string.
65 +*/
66 +const char *longAsStr(long val)
68 + static const char digits[] = "0123456789";
69 + static char res[TYPE_INT_STR_SIZE(val) + 1];
70 + char *pos = &res[TYPE_INT_STR_SIZE(val)];
72 + /* the string is built backwards, so start by null terminating it */
73 + *pos = 0;
75 + if (val >= 0) {
76 + /* do-while loop will deal with the val == 0 case */
77 + do {
78 + /* we can use the modulo (%) operator here */
79 + *--pos = digits[val % 10];
80 + val /= 10;
81 + } while (val != 0);
82 + }
83 + else {
84 + /* we don't use the modulo (%) operator since its behaviour with
85 + negative numbers is undefined by the C standards */
86 + do {
87 + long val10 = val / 10;
88 + *--pos = digits[(10 * val10) - val];
89 + val = val10;
90 + } while (val != 0);
91 + *--pos = '-';
92 + }
93 + return pos;
96 +/*
97 +** Calculate the length of the decimal string representation of a long value.
98 +*/
99 +int lenLongAsStr(long val)
101 + int len = 0;
103 + if (val < 0) {
104 + len++; /* for leading '-' */
107 + /* do-while loop will deal with the val == 0 case */
108 + do {
109 + len++;
110 + val /= 10;
111 + } while (val != 0);
113 + return len;
117 ** Allocate memory for a string, and keep track of it, such that it
118 ** can be recovered later using GarbageCollectStrings. (A linked list
119 ** of pointers is maintained by threading through the memory behind
120 ** the returned pointers). Length does not include the terminating null
121 ** character, so to allocate space for a string of strlen == n, you must
122 @@ -874,10 +930,16 @@ int AllocNStringNCpy(NString *string, co
123 char *AllocStringCpy(const char *s)
125 return AllocStringNCpy(s, s ? strlen(s) : 0);
128 +/* Allocate a string holding the decimal value of the number */
129 +char *AllocStringOfNumber(long val)
131 + return AllocStringCpy(longAsStr(val));
135 * Allocate a new NString buffer, containing a copy of the given string.
136 * The length is set to the length of the string and resulting string is
137 * guaranteed to be \0-terminated.
139 @@ -1071,21 +1133,19 @@ static void freeSymbolTable(Symbol *symT
140 #define POP_STRING(string) \
141 if (StackP == TheStack) \
142 return execError(StackUnderflowMsg, ""); \
143 --StackP; \
144 if (StackP->tag == INT_TAG) { \
145 - string = AllocString(TYPE_INT_STR_SIZE(int)); \
146 - sprintf(string, "%d", StackP->val.n); \
147 + string = AllocStringOfNumber(StackP->val.n); \
148 } else if (StackP->tag == STRING_TAG) \
149 string = StackP->val.str.rep; \
150 else \
151 return(execError("can't convert array to string", NULL));
153 #define PEEK_STRING(string, peekIndex) \
154 if ((StackP - peekIndex - 1)->tag == INT_TAG) { \
155 - string = AllocString(TYPE_INT_STR_SIZE(int)); \
156 - sprintf(string, "%d", (StackP - peekIndex - 1)->val.n); \
157 + string = AllocStringOfNumber((StackP - peekIndex - 1)->val.n); \
159 else if ((StackP - peekIndex - 1)->tag == STRING_TAG) { \
160 string = (StackP - peekIndex - 1)->val.str.rep; \
162 else { \
163 @@ -1200,13 +1260,12 @@ static int pushArgVal(void)
165 POP_INT(argNum)
166 --argNum;
167 nArgs = FP_GET_ARG_COUNT(FrameP);
168 if (argNum >= nArgs || argNum < 0) {
169 - char argStr[TYPE_INT_STR_SIZE(argNum)];
170 - sprintf(argStr, "%d", argNum + 1);
171 - return execError("referenced undefined argument: $args[%s]", argStr);
172 + return execError("referenced undefined argument: $args[%s]",
173 + longAsStr(argNum + 1));
175 PUSH(FP_GET_ARG_N(FrameP, argNum));
176 return STAT_OK;
179 @@ -1232,15 +1291,13 @@ static int pushArgArray(void)
180 if (resultArray->tag != ARRAY_TAG) {
181 resultArray->tag = ARRAY_TAG;
182 resultArray->val.arrayPtr = ArrayNew();
184 for (argNum = 0; argNum < nArgs; ++argNum) {
185 - char intStr[TYPE_INT_STR_SIZE(argNum)];
187 - sprintf(intStr, "%d", argNum + 1);
188 argVal = FP_GET_ARG_N(FrameP, argNum);
189 - if (!ArrayInsert(resultArray, AllocStringCpy(intStr), &argVal)) {
190 + if (!ArrayInsert(resultArray, AllocStringOfNumber(argNum + 1),
191 + &argVal)) {
192 return(execError("array insertion failure", NULL));
196 PUSH(*resultArray);
197 @@ -1829,30 +1886,106 @@ static int power(void)
198 PUSH_INT(n3)
199 return errCheck("exponentiation");
203 -** concatenate two top items on the stack
204 -** Before: TheStack-> str2, str1, next, ...
205 -** After: TheStack-> result, next, ...
206 +** A helper routine used in concat(), and makeArrayKeyFromArgs().
207 +** Concatenate a number of values from the stack and return the result as a
208 +** character pointer in *result and its length as the return value, or less
209 +** than zero on failure. If a divider is specified, add it between each of the
210 +** stack elements. The stack elements are popped from the stack if leaveParams
211 +** is false.
213 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
214 + int leaveParams)
216 + DataValue value;
217 + char *res = NULL;
218 + char *pos;
219 + int len;
220 + int sepLen;
221 + int i;
223 + *result = NULL;
225 + if (sep == NULL) {
226 + sep = "";
228 + sepLen = strlen(sep);
230 + /* evaluate total length (upper limit) */
231 + len = sepLen * (nVals - 1);
232 + for (i = nVals - 1; i >= 0; --i) {
233 + PEEK(value, i)
234 + if (value.tag == INT_TAG) {
235 + len += lenLongAsStr(value.val.n);
237 + else if (value.tag == STRING_TAG) {
238 + len += value.val.str.len;
240 + else {
241 + return -1; /* invalid type */
245 + /* allocate the string */
246 + res = AllocString(len + 1);
247 + pos = res;
248 + /* write everything into the result */
249 + for (i = nVals - 1; i >= 0; --i) {
250 + PEEK(value, i)
251 + if (value.tag == INT_TAG) {
252 + pos += strlen(strcpy(pos, longAsStr(value.val.n)));
254 + else { /* value.tag == STRING_TAG */
255 + strcpy(pos, value.val.str.rep);
256 + pos += value.val.str.len;
258 + if (i && sepLen) {
259 + strcpy(pos, sep);
260 + pos += sepLen;
264 + /* remove the source expression values */
265 + if (!leaveParams) {
266 + while (nVals--) {
267 + POP(value)
271 + /* now return the results */
272 + *result = res;
273 + return pos - res;
277 +** concatenate a number of strings and push the result onto the stack
279 +** Before: Prog-> [nExpr], next, ...
280 +** TheStack-> exprValN, ... exprVal1, next, ...
281 +** After: Prog-> nExpr, [next], ...
282 +** TheStack-> concatResult, next, ...
284 static int concat(void)
286 - char *s1, *s2, *out;
287 - int len1, len2;
288 + char *out;
289 + int len;
291 - DISASM_RT(PC-1, 1);
292 - STACKDUMP(2, 3);
293 + int nExpr;
295 + nExpr = PC->value;
296 + PC++;
298 + DISASM_RT(PC-2, 2);
299 + STACKDUMP(nExpr, 3);
301 - POP_STRING(s2)
302 - POP_STRING(s1)
303 - len1 = strlen(s1);
304 - len2 = strlen(s2);
305 - out = AllocString(len1 + len2 + 1);
306 - strncpy(out, s1, len1);
307 - strcpy(&out[len1], s2);
308 - PUSH_STRING(out, len1 + len2)
309 + len = concatenateNwithSep(nExpr, "", &out, False);
310 + if (len < 0) {
311 + return(execError("can only concatenate with string or integer", NULL));
313 + PUSH_STRING(out, len)
314 return STAT_OK;
318 ** Call a subroutine or function (user defined or built-in). Args are the
319 @@ -2180,49 +2313,15 @@ int ArrayCopy(DataValue *dstArray, DataV
320 ** I really need to optimize the size approximation rather than assuming
321 ** a worst case size for every integer argument
323 static int makeArrayKeyFromArgs(int nArgs, char **keyString, int leaveParams)
325 - DataValue tmpVal;
326 - int sepLen = strlen(ARRAY_DIM_SEP);
327 - int keyLength = 0;
328 - int i;
329 + int len;
331 - keyLength = sepLen * (nArgs - 1);
332 - for (i = nArgs - 1; i >= 0; --i) {
333 - PEEK(tmpVal, i)
334 - if (tmpVal.tag == INT_TAG) {
335 - keyLength += TYPE_INT_STR_SIZE(tmpVal.val.n);
337 - else if (tmpVal.tag == STRING_TAG) {
338 - keyLength += tmpVal.val.str.len;
340 - else {
341 - return(execError("can only index array with string or int.", NULL));
344 - *keyString = AllocString(keyLength + 1);
345 - (*keyString)[0] = 0;
346 - for (i = nArgs - 1; i >= 0; --i) {
347 - if (i != nArgs - 1) {
348 - strcat(*keyString, ARRAY_DIM_SEP);
350 - PEEK(tmpVal, i)
351 - if (tmpVal.tag == INT_TAG) {
352 - sprintf(&((*keyString)[strlen(*keyString)]), "%d", tmpVal.val.n);
354 - else if (tmpVal.tag == STRING_TAG) {
355 - strcat(*keyString, tmpVal.val.str.rep);
357 - else {
358 - return(execError("can only index array with string or int.", NULL));
361 - if (!leaveParams) {
362 - for (i = nArgs - 1; i >= 0; --i) {
363 - POP(tmpVal)
365 + len = concatenateNwithSep(nArgs, ARRAY_DIM_SEP, keyString, leaveParams);
366 + if (len < 0) {
367 + return(execError("can only index array with string or int.", NULL));
369 return(STAT_OK);
373 @@ -2949,10 +3048,14 @@ static void disasm(Inst *inst, int nInst
374 j == OP_BRANCH_NEVER || j == OP_BRANCH_TRUE) {
375 printf("to=(%d) %p", inst[i+1].value,
376 &inst[i+1] + inst[i+1].value);
377 ++i;
379 + else if (j == OP_CONCAT) {
380 + printd("nExpr=%d", inst[i+1].value);
381 + ++i;
383 else if (j == OP_SUBR_CALL) {
384 printf("%s (%d arg)", inst[i+1].sym->name, inst[i+2].value);
385 i += 2;
387 else if (j == OP_BEGIN_ARRAY_ITER) {
388 diff --quilt old/source/interpret.h new/source/interpret.h
389 --- old/source/interpret.h
390 +++ new/source/interpret.h
391 @@ -38,19 +38,19 @@
392 events passed to action routines. Tells
393 them that they were called from a macro */
395 enum symTypes {CONST_SYM, GLOBAL_SYM, LOCAL_SYM, ARG_SYM, PROC_VALUE_SYM,
396 C_FUNCTION_SYM, MACRO_FUNCTION_SYM, ACTION_ROUTINE_SYM};
397 -#define N_OPS 43
399 enum operations {OP_RETURN_NO_VAL, OP_RETURN, OP_PUSH_SYM, OP_DUP, OP_ADD,
400 OP_SUB, OP_MUL, OP_DIV, OP_MOD, OP_NEGATE, OP_INCR, OP_DECR, OP_GT, OP_LT,
401 OP_GE, OP_LE, OP_EQ, OP_NE, OP_BIT_AND, OP_BIT_OR, OP_AND, OP_OR, OP_NOT,
402 OP_POWER, OP_CONCAT, OP_ASSIGN, OP_SUBR_CALL, OP_FETCH_RET_VAL, OP_BRANCH,
403 OP_BRANCH_TRUE, OP_BRANCH_FALSE, OP_BRANCH_NEVER, OP_ARRAY_REF,
404 OP_ARRAY_ASSIGN, OP_BEGIN_ARRAY_ITER, OP_ARRAY_ITER, OP_IN_ARRAY,
405 OP_ARRAY_DELETE, OP_PUSH_ARRAY_SYM, OP_ARRAY_REF_ASSIGN_SETUP, OP_PUSH_ARG,
406 - OP_PUSH_ARG_COUNT, OP_PUSH_ARG_ARRAY};
407 + OP_PUSH_ARG_COUNT, OP_PUSH_ARG_ARRAY, N_OPS};
409 enum typeTags {NO_TAG, INT_TAG, STRING_TAG, ARRAY_TAG};
411 enum execReturnCodes {MACRO_TIME_LIMIT, MACRO_PREEMPT, MACRO_DONE, MACRO_ERROR};
413 @@ -160,10 +160,11 @@ int ContinueMacro(RestartData *continuat
414 void RunMacroAsSubrCall(Program *prog);
415 void PreemptMacro(void);
416 char *AllocString(int length);
417 char *AllocStringNCpy(const char *s, int length);
418 char *AllocStringCpy(const char *s);
419 +char *AllocStringOfNumber(long val);
420 int AllocNString(NString *string, int length);
421 int AllocNStringNCpy(NString *string, const char *s, int length);
422 int AllocNStringCpy(NString *string, const char *s);
423 void GarbageCollectStrings(void);
424 void FreeRestartData(RestartData *context);
425 diff --quilt old/source/parse.y new/source/parse.y
426 --- old/source/parse.y
427 +++ new/source/parse.y
428 @@ -60,11 +60,11 @@ extern Inst **LoopStackPtr; /* to fill
429 int nArgs;
431 %token <sym> NUMBER STRING SYMBOL
432 %token DELETE ARG_LOOKUP
433 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
434 -%type <nArgs> arglist
435 +%type <nArgs> arglist catlist
436 %type <inst> cond comastmts for while else and or arrayexpr
437 %type <sym> evalsym
439 %nonassoc IF_NO_ELSE
440 %nonassoc ELSE
441 @@ -281,13 +281,20 @@ arglist: /* nothing */ {
443 | arglist ',' expr {
444 $$ = $1 + 1;
447 -expr: numexpr %prec CONCAT
448 - | expr numexpr %prec CONCAT {
449 - ADD_OP(OP_CONCAT);
450 +catlist: numexpr %prec CONCAT {
451 + $$ = 1;
453 + | catlist numexpr %prec CONCAT {
454 + $$ = $1 + 1;
456 +expr: catlist {
457 + if ($1 > 1) {
458 + ADD_OP(OP_CONCAT); ADD_IMMED($1);
462 initarraylv: SYMBOL {
463 ADD_OP(OP_PUSH_ARRAY_SYM); ADD_SYM($1); ADD_IMMED(1);