final step to context based macro interpreter
[nedit-bw.git] / FastConcat3.diff
blobbab8b7b4e9413b80ef3834a27318bd6ab5546242
1 From: Tony Balinski <ajbj@free.fr>
2 Subject: Perform faster macro concatenation
4 Available as a patch:
6 http://sourceforge.net/tracker/index.php?func=detail&aid=971477&group_id=11005&atid=311005
7 [ 971477 ] Perform faster macro concatenation
8 FastConcat.diff 2004-06-11 15:55
10 Previously, concatenation was treated as a binary operator, taking two values
11 from the stack and concatenating them. Where an expression involves more than
12 one concatenation, this involved creating temporary strings, requiring
13 separate allocation, which would only be discarded later, for each application
14 of the binary concatenation. To make matters worse, concatenation would rescan
15 each string to determine its length using strlen().
17 This modification changes the way the concatenation operator is handled.
18 Instead of treating it as a binary operator, it sees it as a list operator,
19 rather like the comma in an argument list. Thus a concatenation expression
20 is treated as a counted sequence of subexpressions. The count is coded into
21 the "bytecode" to tell the concat() function (in interpret.c) how many
22 expression values to pull off the stack. The function then exploits the
23 fact that string DataValue structures now include the strings' lengths to
24 work out the total allocation length required for the whole result. (This
25 is based on the previous implementation of makeArrayKeyFromArgs().)
27 I have also added static functions longAsStr() and lenLongAsStr() to generate
28 a string representation of a number, or just to work out its length. This
29 allows us to avoid calling sprintf() with "%d" all the time. The new
30 AllocStringOfNumber() exploits this, and is used in various places in
31 interpret.c. Note that longAsStr() returns the string value in a static buffer
32 which must be used/copied immediately to avoid a second call overwriting
33 its content.
35 ---
37 source/interpret.c | 228 ++++++++++++++++++++++++++++++++++++++---------------
38 source/interpret.h | 3
39 source/parse.y | 19 +++-
40 3 files changed, 184 insertions(+), 66 deletions(-)
42 diff --quilt old/source/interpret.c new/source/interpret.c
43 --- old/source/interpret.c
44 +++ new/source/interpret.c
45 @@ -85,6 +85,8 @@ static void restoreContext(RestartData *
46 #undef OP
47 static int returnValOrNone(int valOnStack);
49 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
50 + int leaveParams);
51 static void freeSymbolTable(Symbol *symTab);
52 static int errCheck(const char *s);
53 static int execError(const char *s1, const char *s2);
54 @@ -843,6 +845,60 @@ Symbol *PromoteToGlobal(Symbol *sym)
58 +** Convert a long value to its decimal string representation, returned in a
59 +** static string.
60 +*/
61 +const char *longAsStr(long val)
63 + static const char digits[] = "0123456789";
64 + static char res[TYPE_INT_STR_SIZE(val) + 1];
65 + char *pos = &res[TYPE_INT_STR_SIZE(val)];
67 + /* the string is built backwards, so start by null terminating it */
68 + *pos = 0;
70 + if (val >= 0) {
71 + /* do-while loop will deal with the val == 0 case */
72 + do {
73 + /* we can use the modulo (%) operator here */
74 + *--pos = digits[val % 10];
75 + val /= 10;
76 + } while (val != 0);
77 + }
78 + else {
79 + /* we don't use the modulo (%) operator since its behaviour with
80 + negative numbers is undefined by the C standards */
81 + do {
82 + long val10 = val / 10;
83 + *--pos = digits[(10 * val10) - val];
84 + val = val10;
85 + } while (val != 0);
86 + *--pos = '-';
87 + }
88 + return pos;
91 +/*
92 +** Calculate the length of the decimal string representation of a long value.
93 +*/
94 +int lenLongAsStr(long val)
96 + int len = 0;
98 + if (val < 0) {
99 + len++; /* for leading '-' */
102 + /* do-while loop will deal with the val == 0 case */
103 + do {
104 + len++;
105 + val /= 10;
106 + } while (val != 0);
108 + return len;
112 ** Allocate memory for a string, and keep track of it, such that it
113 ** can be recovered later using GarbageCollectStrings. (A linked list
114 ** of pointers is maintained by threading through the memory behind
115 @@ -933,6 +989,12 @@ char *AllocStringCpy(const char *s)
116 return AllocStringNCpy(s, s ? strlen(s) : 0);
119 +/* Allocate a string holding the decimal value of the number */
120 +char *AllocStringOfNumber(long val)
122 + return AllocStringCpy(longAsStr(val));
126 * Allocate a new NString buffer, containing a copy of the given string.
127 * The length is set to the length of the string and resulting string is
128 @@ -1172,8 +1234,7 @@ static void freeSymbolTable(Symbol *symT
129 if (dataVal.tag == STRING_TAG) { \
130 __str = dataVal.val.str.rep; \
131 } else if (dataVal.tag == INT_TAG) { \
132 - __str = AllocString(TYPE_INT_STR_SIZE(int)); \
133 - sprintf(__str, "%d", dataVal.val.n); \
134 + __str = AllocStringOfNumber(dataVal.val.n); \
135 } else { \
136 return(execError("incompatible type in string context: %s", \
137 tagToStr(dataVal.tag))); \
138 @@ -1333,9 +1394,8 @@ static int pushArgVal(void)
139 --argNum;
140 nArgs = FP_GET_ARG_COUNT(FrameP);
141 if (argNum >= nArgs || argNum < 0) {
142 - char argStr[TYPE_INT_STR_SIZE(argNum)];
143 - sprintf(argStr, "%d", argNum + 1);
144 - return execError("referenced undefined argument: $args[%s]", argStr);
145 + return execError("referenced undefined argument: $args[%s]",
146 + longAsStr(argNum + 1));
148 PUSH(FP_GET_ARG_N(FrameP, argNum));
149 return STAT_OK;
150 @@ -1365,11 +1425,9 @@ static int pushArgArray(void)
151 resultArray->val.arrayPtr = ArrayNew();
153 for (argNum = 0; argNum < nArgs; ++argNum) {
154 - char intStr[TYPE_INT_STR_SIZE(argNum)];
156 - sprintf(intStr, "%d", argNum + 1);
157 argVal = FP_GET_ARG_N(FrameP, argNum);
158 - if (!ArrayInsert(resultArray, AllocStringCpy(intStr), &argVal)) {
159 + if (!ArrayInsert(resultArray, AllocStringOfNumber(argNum + 1),
160 + &argVal)) {
161 return(execError("array insertion failure", NULL));
164 @@ -1959,26 +2017,102 @@ static int power(void)
168 -** concatenate two top items on the stack
169 -** Before: TheStack-> str2, str1, next, ...
170 -** After: TheStack-> result, next, ...
171 +** A helper routine used in concat(), and makeArrayKeyFromArgs().
172 +** Concatenate a number of values from the stack and return the result as a
173 +** character pointer in *result and its length as the return value, or less
174 +** than zero on failure. If a divider is specified, add it between each of the
175 +** stack elements. The stack elements are popped from the stack if leaveParams
176 +** is false.
178 +static int concatenateNwithSep(int nVals, const char *sep, char **result,
179 + int leaveParams)
181 + DataValue value;
182 + char *res = NULL;
183 + char *pos;
184 + int len;
185 + int sepLen;
186 + int i;
188 + *result = NULL;
190 + if (sep == NULL) {
191 + sep = "";
193 + sepLen = strlen(sep);
195 + /* evaluate total length (upper limit) */
196 + len = sepLen * (nVals - 1);
197 + for (i = nVals - 1; i >= 0; --i) {
198 + PEEK(value, i);
199 + if (value.tag == INT_TAG) {
200 + len += lenLongAsStr(value.val.n);
202 + else if (value.tag == STRING_TAG) {
203 + len += value.val.str.len;
205 + else {
206 + return -1; /* invalid type */
210 + /* allocate the string */
211 + res = AllocString(len + 1);
212 + pos = res;
213 + /* write everything into the result */
214 + for (i = nVals - 1; i >= 0; --i) {
215 + PEEK(value, i);
216 + if (value.tag == INT_TAG) {
217 + pos += strlen(strcpy(pos, longAsStr(value.val.n)));
219 + else { /* value.tag == STRING_TAG */
220 + strcpy(pos, value.val.str.rep);
221 + pos += value.val.str.len;
223 + if (i && sepLen) {
224 + strcpy(pos, sep);
225 + pos += sepLen;
229 + /* remove the source expression values */
230 + if (!leaveParams) {
231 + while (nVals--) {
232 + POP(value);
236 + /* now return the results */
237 + *result = res;
238 + return pos - res;
242 +** concatenate a number of strings and push the result onto the stack
244 +** Before: Prog-> [nExpr], next, ...
245 +** TheStack-> exprValN, ... exprVal1, next, ...
246 +** After: Prog-> nExpr, [next], ...
247 +** TheStack-> concatResult, next, ...
249 static int concat(void)
251 - char *s1, *s2, *out;
252 - int len1, len2;
253 + char *out;
254 + int len;
256 - DISASM_RT(PC-1, 1);
257 - STACKDUMP(2, 3);
258 + int nExpr;
260 + nExpr = PC->value;
261 + PC++;
263 - POP_STRING(s2);
264 - POP_STRING(s1);
265 - len1 = strlen(s1);
266 - len2 = strlen(s2);
267 - out = AllocString(len1 + len2 + 1);
268 - strncpy(out, s1, len1);
269 - strcpy(&out[len1], s2);
270 - PUSH_STRING(out, len1 + len2);
271 + DISASM_RT(PC-2, 2);
272 + STACKDUMP(nExpr, 3);
274 + len = concatenateNwithSep(nExpr, "", &out, False);
275 + if (len < 0) {
276 + return(execError("can only concatenate with string or integer", NULL));
278 + PUSH_STRING(out, len);
279 return STAT_OK;
282 @@ -2284,45 +2418,11 @@ int ArrayCopy(DataValue *dstArray, DataV
284 static int makeArrayKeyFromArgs(int nArgs, char **keyString, int leaveParams)
286 - DataValue tmpVal;
287 - int sepLen = strlen(ARRAY_DIM_SEP);
288 - int keyLength = 0;
289 - int i;
290 + int len;
292 - keyLength = sepLen * (nArgs - 1);
293 - for (i = nArgs - 1; i >= 0; --i) {
294 - PEEK(tmpVal, i);
295 - if (tmpVal.tag == INT_TAG) {
296 - keyLength += TYPE_INT_STR_SIZE(tmpVal.val.n);
298 - else if (tmpVal.tag == STRING_TAG) {
299 - keyLength += tmpVal.val.str.len;
301 - else {
302 - return(execError("can only index array with string or int.", NULL));
305 - *keyString = AllocString(keyLength + 1);
306 - (*keyString)[0] = 0;
307 - for (i = nArgs - 1; i >= 0; --i) {
308 - if (i != nArgs - 1) {
309 - strcat(*keyString, ARRAY_DIM_SEP);
311 - PEEK(tmpVal, i);
312 - if (tmpVal.tag == INT_TAG) {
313 - sprintf(&((*keyString)[strlen(*keyString)]), "%d", tmpVal.val.n);
315 - else if (tmpVal.tag == STRING_TAG) {
316 - strcat(*keyString, tmpVal.val.str.rep);
318 - else {
319 - return(execError("can only index array with string or int.", NULL));
322 - if (!leaveParams) {
323 - for (i = nArgs - 1; i >= 0; --i) {
324 - POP(tmpVal);
326 + len = concatenateNwithSep(nArgs, ARRAY_DIM_SEP, keyString, leaveParams);
327 + if (len < 0) {
328 + return(execError("can only index array with string or int.", NULL));
330 return(STAT_OK);
332 @@ -3108,6 +3208,10 @@ static void disasm(Inst *inst, int nInst
333 &inst[i+1] + inst[i+1].value);
334 ++i;
336 + else if (j == OP_CONCAT) {
337 + printf("nExpr=%d", inst[i+1].value);
338 + ++i;
340 else if (j == OP_SUBR_CALL) {
341 printf("%s (%d arg)", inst[i+1].sym->name, inst[i+2].value);
342 i += 2;
343 diff --quilt old/source/interpret.h new/source/interpret.h
344 --- old/source/interpret.h
345 +++ new/source/interpret.h
346 @@ -172,6 +172,7 @@ void PreemptMacro(void);
347 char *AllocString(int length);
348 char *AllocStringNCpy(const char *s, int length);
349 char *AllocStringCpy(const char *s);
350 +char *AllocStringOfNumber(long val);
351 int AllocNString(NString *string, int length);
352 int AllocNStringNCpy(NString *string, const char *s, int length);
353 int AllocNStringCpy(NString *string, const char *s);
354 @@ -185,5 +186,7 @@ WindowInfo *MacroFocusWindow(void);
355 void SetMacroFocusWindow(WindowInfo *window);
356 /* function used for implicit conversion from string to number */
357 int StringToNum(const char *string, int *number);
358 +const char *longAsStr(long val);
359 +int lenLongAsStr(long val);
361 #endif /* NEDIT_INTERPRET_H_INCLUDED */
362 diff --quilt old/source/parse.y new/source/parse.y
363 --- old/source/parse.y
364 +++ new/source/parse.y
365 @@ -90,7 +90,7 @@ static int AllowDefine;
366 %token DELETE ARG_LOOKUP
367 %token IF WHILE ELSE FOR BREAK CONTINUE RETURN
368 %token <acc> DEFINE
369 -%type <num> arglist
370 +%type <num> arglist catlist
371 %type <inst> cond comastmts for while else and or arrayexpr
372 %type <sym> evalsym
374 @@ -354,9 +354,20 @@ arglist: /* nothing */ {
375 $$ = $1 + 1;
378 -expr: numexpr %prec CONCAT
379 - | expr numexpr %prec CONCAT {
380 - ADD_OP(OP_CONCAT);
382 +/* string concatenation lists */
383 +catlist: numexpr %prec CONCAT {
384 + $$ = 1;
386 + | catlist numexpr %prec CONCAT {
387 + $$ = $1 + 1;
391 +expr: catlist {
392 + if ($1 > 1) {
393 + ADD_OP(OP_CONCAT); ADD_IMMED($1);
397 initarraylv: SYMBOL {