compare each dimension, not numeric/non-numeric parts
[nedit-bw.git] / multi-dim-key-iterator.patch
blobec833dd635e50bdaa4d28c77fb184bf4a81424ea
1 Subject: multi-dimensional array iterator
3 To iterate over multi-dimensional keys from an array, use this:
5 for ([k1, ..., kN] in array)
6 { ... }
8 This will iterate only over keys with dimension N, if any. The multi-dimension
9 key is split-up and assigned to the symbols k1, ..., kN.
11 The key/value iteration is also supported:
13 for ([k1, ..., kN] = val in array)
14 { ... }
17 ---
19 doc/help.etx | 15 +++
20 source/interpret.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++++
21 source/ops.h | 1
22 source/parse.y | 95 ++++++++++++++++++++++
23 4 files changed, 333 insertions(+), 1 deletion(-)
25 diff --quilt old/source/interpret.c new/source/interpret.c
26 --- old/source/interpret.c
27 +++ new/source/interpret.c
28 @@ -3545,10 +3545,201 @@ static int arrayIter(void)
29 PC = branchAddr;
31 return(STAT_OK);
34 +static int countDim(const char *key)
36 + int nDims = 0;
37 + size_t seplen = strlen(ARRAY_DIM_SEP);
38 + const char *sep = key;
40 + do {
41 + nDims++;
42 + sep = strstr(sep, ARRAY_DIM_SEP);
43 + if (sep)
44 + sep += seplen;
45 + } while (sep);
47 + return nDims;
50 +static Boolean splitKey(const char *key, DataValue **keys)
52 + int nDims = 0;
53 + size_t seplen = strlen(ARRAY_DIM_SEP);
54 + size_t keylen;
55 + const char *sep = key;
56 + const char *nextsep = key;
58 + do {
59 + nextsep = strstr(sep, ARRAY_DIM_SEP);
60 + if (nextsep) {
61 + keylen = nextsep - sep;
62 + } else {
63 + keylen = strlen(sep);
64 + }
66 + keys[nDims]->tag = STRING_TAG;
67 + if (!AllocNStringNCpy(&keys[nDims]->val.str, sep, keylen)) {
68 + return False;
69 + }
71 + nDims++;
72 + sep = nextsep;
73 + if (sep)
74 + sep += seplen;
75 + } while (sep);
77 + return True;
80 +/*
81 +** copy key and value to symbols if node is still valid, marked bad by a color
82 +** of -1 then move iterator to next node
83 +** this allows iterators to progress even if you delete any node in the array
84 +** except the item just after the current key
85 +**
86 +** Before: Prog-> iter, ARRAY_MULTI_ITER, [withVal], nDims, iterVarKey1, ..., iterVarKeynDims, (, iterVarVal), iter, endLoopBranch, next, ...
87 +** TheStack-> [next], ...
88 +** After: Prog-> iter, ARRAY_MULTI_ITER, withVal, nDims, iterVarKey1, ..., iterVarKeynDims, (, iterVarVal), iter, endLoopBranch, [next], ...
89 +** TheStack-> [next], ... (unchanged)
90 +** Where:
91 +** iter is a symbol which gives the position of the iterator value in
92 +** the stack frame (set up by OP_BEGIN_ARRAY_ITER); that value refers
93 +** to the array and a position within it
94 +** iterVarKey1, ..., iterVarKeynDim is the programmer-visible symbol which
95 +** will take the current key value
96 +** iterVarVal is the programmer-visible symbol which will take the current
97 +** entry value (only if withVal is true)
98 +** endLoopBranch is the instruction offset to the instruction following the
99 +** loop (measured from itself)
100 +** arrayVal is the data value holding the array in question
101 +** The return-to-start-of-loop branch (at the end of the loop) should address
102 +** the ARRAY_MULTI_ITER instruction
104 +static int arrayMultiIter(void)
106 + Symbol *iterator;
107 + Symbol **keySyms;
108 + Symbol *valSym;
109 + DataValue *iteratorValPtr;
110 + DataValue **keyValPtrs;
111 + DataValue *valPtr;
112 + SparseArrayEntry *thisEntry;
113 + Inst *branchAddr;
114 + int withVal;
115 + int nDims, d;
116 + Boolean keyFound = False;
118 + DISASM_RT(PC-1, 4);
119 + STACKDUMP(0, 4);
121 + withVal = PC->value;
122 + PC++;
123 + nDims = PC->value;
124 + PC++;
126 + keySyms = malloc(nDims * sizeof(*keySyms));
127 + keyValPtrs = malloc(nDims * sizeof(*keyValPtrs));
129 + for (d = 0; d < nDims; d++) {
130 + keySyms[d] = PC->sym;
131 + PC++;
134 + if (withVal) {
135 + valSym = PC->sym;
136 + PC++;
139 + iterator = PC->sym;
140 + PC++;
141 + branchAddr = PC + PC->value;
142 + PC++;
144 + for (d = 0; d < nDims; d++) {
145 + if (keySyms[d]->type == LOCAL_SYM) {
146 + keyValPtrs[d] = &FP_GET_SYM_VAL(FrameP, keySyms[d]);
148 + else if (keySyms[d]->type == GLOBAL_SYM) {
149 + keyValPtrs[d] = &(keySyms[d]->value);
151 + else {
152 + const char *name = keySyms[d]->name;
153 + free(keySyms);
154 + free(keyValPtrs);
155 + return(execError("can't assign to: %s", name));
157 + keyValPtrs[d]->tag = NO_TAG;
160 + if (withVal) {
161 + if (valSym->type == LOCAL_SYM) {
162 + valPtr = &FP_GET_SYM_VAL(FrameP, valSym);
164 + else if (valSym->type == GLOBAL_SYM) {
165 + valPtr = &(valSym->value);
167 + else {
168 + free(keySyms);
169 + free(keyValPtrs);
170 + return(execError("can't assign to: %s", valSym->name));
172 + valPtr->tag = NO_TAG;
175 + if (iterator->type == LOCAL_SYM) {
176 + iteratorValPtr = &FP_GET_SYM_VAL(FrameP, iterator);
178 + else {
179 + free(keySyms);
180 + free(keyValPtrs);
181 + return(execError("bad temporary iterator: %s", iterator->name));
184 + thisEntry = iteratorValPtr->val.arrayPtr;
185 + while (thisEntry && thisEntry->nodePtrs.color != -1) {
186 + /* check if this is a nDims key */
187 + int thisDim = countDim(thisEntry->key);
189 + if (nDims != thisDim) {
190 + /* try next */
191 + thisEntry = arrayIterateNext(thisEntry);
192 + continue;
195 + keyFound = True;
197 + /* set keys */
198 + if (!splitKey(thisEntry->key, keyValPtrs)) {
199 + free(keySyms);
200 + free(keyValPtrs);
201 + return(execError("can't split key: %s", thisEntry->key));
204 + if (withVal) {
205 + /* set value */
206 + *valPtr = thisEntry->value;
209 + /* advance iterator */
210 + thisEntry = arrayIterateNext(thisEntry);
211 + break;
213 + iteratorValPtr->val.arrayPtr = thisEntry;
215 + if (!keyFound && (!thisEntry || thisEntry->nodePtrs.color == -1)) {
216 + PC = branchAddr;
219 + free(keySyms);
220 + free(keyValPtrs);
222 + return(STAT_OK);
226 ** determine if a key or keys exists in an array
227 ** if the left argument is a string or integer a single check is performed
228 ** if the key exists, 1 is pushed onto the stack, otherwise 0
229 ** if the left argument is an array 1 is pushed onto the stack if every key
230 @@ -4035,10 +4226,42 @@ static void disasmInternal(Inst *inst, i
231 inst[i+5].value,
232 &inst[i+5] + inst[i+5].value);
233 i += 5;
236 + else if (j == OP_ARRAY_MULTI_ITER) {
237 + int nDim = inst[i+2].value, d;
238 + if (!inst[i+1].value) {
239 + /* without val */
240 + printd(" [");
241 + for (d = 0; d < nDim; d++) {
242 + printd("%s%s",
243 + d ? "," : "",
244 + inst[i+3+d].sym->name);
246 + printd("] = %s++ end-loop=(%+d) %8p",
247 + inst[i+nDim+3].sym->name,
248 + inst[i+nDim+4].value,
249 + &inst[i+nDim+4] + inst[i+nDim+4].value);
250 + i += nDim+4;
252 + else {
253 + /* with val */
254 + printd(" [");
255 + for (d = 0; d < nDim; d++) {
256 + printd("%s%s",
257 + d ? "," : "",
258 + inst[i+3+d].sym->name);
260 + printd("]=%s = %s++ end-loop=(%+d) %8p",
261 + inst[i+nDim+3].sym->name,
262 + inst[i+nDim+4].sym->name,
263 + inst[i+nDim+5].value,
264 + &inst[i+nDim+5] + inst[i+nDim+5].value);
265 + i += nDim+5;
268 else if (j == OP_ARRAY_REF ||
269 j == OP_ARRAY_DELETE ||
270 j == OP_ARRAY_ASSIGN ||
271 j == OP_ANONARRAY_INDEX_VAL ||
272 j == OP_NAMED_ARG1 ||
273 diff --quilt old/source/parse.y new/source/parse.y
274 --- old/source/parse.y
275 +++ new/source/parse.y
276 @@ -81,13 +81,26 @@ static int AllowDefine;
278 static int nextSymIsField = 0;
279 /* set to 1 when we don't want a full symbol, just a name (string) for a
280 field name following a '.' */
282 +#define SYMBLK_SIZE ((64 - sizeof(void *)) / sizeof(void *))
284 +typedef struct SymBlkTag {
285 + Symbol *syms[SYMBLK_SIZE];
286 + struct SymBlkTag *next;
287 +} SymBlk;
289 +typedef struct SymListTag {
290 + SymBlk *head, *tail;
291 + int nSyms;
292 +} SymList;
296 %union {
297 + SymList symlist;
298 Symbol *sym;
299 Inst *inst;
300 int num;
301 enum operations oper;
302 AccumulatorData *acc;
303 @@ -99,10 +112,11 @@ static int nextSymIsField = 0;
304 %token <acc> DEFINE
305 %type <num> arrlist arrentry
306 %type <num> arglistopt arglist catlist fnarglsopt fnarglist fnarg
307 %type <inst> cond comastmts comastmtlst for while do else and or arrayexpr mark
308 %type <sym> evalsym
309 +%type <symlist> symlist
310 %type <oper> operassign incrdecr
311 %token <oper> '=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
312 %token <oper> INCR DECR
314 %nonassoc IF_NO_ELSE
315 @@ -248,10 +262,65 @@ stmt: ';' blank
316 ADD_OP(OP_BRANCH);
317 ADD_BR_OFF($9+2);
318 SET_BR_OFF($9+7, GetPC());
319 FillLoopAddrs(GetPC(), $9+2);
321 + | for '(' blank '[' symlist ']' IN blank arrayexpr blank ')' {
322 + Symbol *iterSym = InstallIteratorSymbol();
323 + int i = 0;
324 + ADD_OP(OP_BEGIN_ARRAY_ITER);
325 + ADD_SYM(iterSym);
326 + ADD_OP(OP_ARRAY_MULTI_ITER);
327 + ADD_IMMED(0); /* without val symbol */
328 + ADD_IMMED($5.nSyms);
329 + do {
330 + ADD_SYM($5.head->syms[i % SYMBLK_SIZE]);
331 + i++;
332 + if (i % SYMBLK_SIZE == 0 || i == $5.nSyms) {
333 + SymBlk *nextsymblk = $5.head->next;
334 + free($5.head);
335 + $5.head = nextsymblk;
337 + } while (i < $5.nSyms);
338 + $5.tail = $5.head;
339 + ADD_SYM(iterSym);
340 + ADD_BR_OFF(0);
342 + blank block {
343 + ADD_OP(OP_BRANCH);
344 + ADD_BR_OFF($9+2);
345 + SET_BR_OFF($9+6+$5.nSyms, GetPC());
346 + FillLoopAddrs(GetPC(), $9+2);
348 + | for '(' blank '[' symlist ']' KEYVAL SYMBOL IN blank arrayexpr blank ')' {
349 + Symbol *iterSym = InstallIteratorSymbol();
350 + int i = 0;
351 + ADD_OP(OP_BEGIN_ARRAY_ITER);
352 + ADD_SYM(iterSym);
353 + ADD_OP(OP_ARRAY_MULTI_ITER);
354 + ADD_IMMED(1); /* with val symbol */
355 + ADD_IMMED($5.nSyms);
356 + do {
357 + ADD_SYM($5.head->syms[i % SYMBLK_SIZE]);
358 + i++;
359 + if (i % SYMBLK_SIZE == 0 || i == $5.nSyms) {
360 + SymBlk *nextsymblk = $5.head->next;
361 + free($5.head);
362 + $5.head = nextsymblk;
364 + } while (i < $5.nSyms);
365 + $5.tail = $5.head;
366 + ADD_SYM($8);
367 + ADD_SYM(iterSym);
368 + ADD_BR_OFF(0);
370 + blank block {
371 + ADD_OP(OP_BRANCH);
372 + ADD_BR_OFF($11+2);
373 + SET_BR_OFF($11+7+$5.nSyms, GetPC());
374 + FillLoopAddrs(GetPC(), $11+2);
376 | BREAK stmtend blank {
377 ADD_OP(OP_BRANCH); ADD_BR_OFF(0);
378 if (AddBreakAddr(GetPC()-1)) {
379 yyerror("break outside loop"); YYERROR;
381 @@ -362,10 +431,36 @@ arglistopt: blank { $$ = 0; }
383 arglist: blank expr blank { $$ = 1; }
384 | arglist ',' blank expr blank { $$ = $1 + 1; }
387 +/* SYMBOL list for multi dimension array iterator */
388 +symlist: blank SYMBOL blank {
389 + SymBlk *symblk = malloc(sizeof(*symblk));
390 + symblk->syms[0] = $2;
391 + symblk->next = NULL;
392 + $$.head = $$.tail = symblk;
393 + $$.nSyms = 1;
395 + | symlist ',' blank SYMBOL blank {
396 + /* copy all from $1 to $$, and clear $1 */
397 + $$.head = $1.head;
398 + $$.tail = $1.tail;
399 + $$.nSyms = $1.nSyms;
400 + $1.head = $1.tail = NULL;
401 + /* alloc new SymBlk, if current tail is full */
402 + if ($$.nSyms % SYMBLK_SIZE == 0) {
403 + SymBlk *symblk = malloc(sizeof(*symblk));
404 + symblk->next = NULL;
405 + $$.tail->next = symblk;
406 + $$.tail = symblk;
408 + $$.tail->syms[$$.nSyms % SYMBLK_SIZE] = $4;
409 + $$.nSyms++;
413 /* string concatenation lists */
414 catlist: numexpr %prec CONCAT { $$ = 1; }
415 | catlist numexpr %prec CONCAT { $$ = $1 + 1; }
418 diff --quilt old/doc/help.etx new/doc/help.etx
419 --- old/doc/help.etx
420 +++ new/doc/help.etx
421 @@ -2334,11 +2334,24 @@ Macro Language
422 looking for $sub_sep in the key.
424 If you want to use the 'in' keyword, use this syntax:
426 if ([1,2] in myArray)
427 - {..}
428 + <body>
430 + There is also a special array iterator for multi-dimensional arrays:
432 + for ([dim1Key, ..., dimNKey] in myArray)
433 + <body>
435 + This would only iterate over all keys with N dimensions assigned to the
436 + symbols dim1Key, ..., dimNKey.
438 + Also with the associated value for the multi-dimensional key:
440 + for ([dim1Key, ..., dimNKey] = theVal in myArray)
441 + <body>
443 Note that if an array contains a value that is itself an array, you can
444 apply the index operator more than once. For example
446 subarray["a"] = "value"
447 diff --quilt old/source/ops.h new/source/ops.h
448 --- old/source/ops.h
449 +++ new/source/ops.h
450 @@ -38,10 +38,11 @@ OP(BRANCH_FALSE, branchFalse)
451 OP(BRANCH_NEVER, branchNever) /* off */ /* */
452 OP(ARRAY_REF, arrayRef) /* N */ /* pop(kN..k1,a), push(a[k1..kN]) */
453 OP(ARRAY_ASSIGN, arrayAssign) /* N */ /* pop(v,kN..k1,a), a[k1..kN]=v */
454 OP(BEGIN_ARRAY_ITER, beginArrayIter) /* it */ /* pop(a), it=a.begin */
455 OP(ARRAY_ITER, arrayIter) /*w,k[,v],it,pc*/ /* it?(k.v=it.k,(w?v.v=it.v:),it++):PC=pc */
456 +OP(ARRAY_MULTI_ITER, arrayMultiIter) /*w,N,k1..kN[,v],it,pc*/ /* while it: (if dim(it.k)==N: ([k1..kN].v=it.k,(w?v.v=it.v:),break), it++), if notfound: PC=pc */
457 OP(IN_ARRAY, inArray) /* pop(a,k), push(a[k]?1:0) */
458 OP(ARRAY_INDEX, arrayIndex) /* N */ /* pop(kN..k1), push([kN..k1]) */
459 OP(ARRAY_DELETE, deleteArrayElement) /*N*/ /* N>0 ? (pop(kN..k1,a), del(a[k])) : (pop(a), delall(a)) */
460 OP(PUSH_ARRAY_SYM, pushArraySymVal) /*s,i*/ /* if i: s.v=ary()), push(s.v) */
461 OP(ARRAY_REF_ASSIGN_SETUP, arrayRefAndAssignSetup) /*op,N*/ /* pop(v,kN..a), a[k1..kN] op= v */