1 Subject: multi-dimensional array iterator
3 To iterate over multi-dimensional keys from an array, use this:
5 for ([k1, ..., kN] in array)
8 This will iterate only over keys with dimension N, if any. The multi-dimension
9 key is split-up and assigned to the symbols k1, ..., kN.
11 The key/value iteration is also supported:
13 for ([k1, ..., kN] = val in array)
20 source/interpret.c | 223 +++++++++++++++++++++++++++++++++++++++++++++++++++++
22 source/parse.y | 95 ++++++++++++++++++++++
23 4 files changed, 333 insertions(+), 1 deletion(-)
25 diff --quilt old/source/interpret.c new/source/interpret.c
26 --- old/source/interpret.c
27 +++ new/source/interpret.c
28 @@ -3545,10 +3545,201 @@ static int arrayIter(void)
34 +static int countDim(const char *key)
37 + size_t seplen = strlen(ARRAY_DIM_SEP);
38 + const char *sep = key;
42 + sep = strstr(sep, ARRAY_DIM_SEP);
50 +static Boolean splitKey(const char *key, DataValue **keys)
53 + size_t seplen = strlen(ARRAY_DIM_SEP);
55 + const char *sep = key;
56 + const char *nextsep = key;
59 + nextsep = strstr(sep, ARRAY_DIM_SEP);
61 + keylen = nextsep - sep;
63 + keylen = strlen(sep);
66 + keys[nDims]->tag = STRING_TAG;
67 + if (!AllocNStringNCpy(&keys[nDims]->val.str, sep, keylen)) {
81 +** copy key and value to symbols if node is still valid, marked bad by a color
82 +** of -1 then move iterator to next node
83 +** this allows iterators to progress even if you delete any node in the array
84 +** except the item just after the current key
86 +** Before: Prog-> iter, ARRAY_MULTI_ITER, [withVal], nDims, iterVarKey1, ..., iterVarKeynDims, (, iterVarVal), iter, endLoopBranch, next, ...
87 +** TheStack-> [next], ...
88 +** After: Prog-> iter, ARRAY_MULTI_ITER, withVal, nDims, iterVarKey1, ..., iterVarKeynDims, (, iterVarVal), iter, endLoopBranch, [next], ...
89 +** TheStack-> [next], ... (unchanged)
91 +** iter is a symbol which gives the position of the iterator value in
92 +** the stack frame (set up by OP_BEGIN_ARRAY_ITER); that value refers
93 +** to the array and a position within it
94 +** iterVarKey1, ..., iterVarKeynDim is the programmer-visible symbol which
95 +** will take the current key value
96 +** iterVarVal is the programmer-visible symbol which will take the current
97 +** entry value (only if withVal is true)
98 +** endLoopBranch is the instruction offset to the instruction following the
99 +** loop (measured from itself)
100 +** arrayVal is the data value holding the array in question
101 +** The return-to-start-of-loop branch (at the end of the loop) should address
102 +** the ARRAY_MULTI_ITER instruction
104 +static int arrayMultiIter(void)
109 + DataValue *iteratorValPtr;
110 + DataValue **keyValPtrs;
112 + SparseArrayEntry *thisEntry;
116 + Boolean keyFound = False;
118 + DISASM_RT(PC-1, 4);
121 + withVal = PC->value;
126 + keySyms = malloc(nDims * sizeof(*keySyms));
127 + keyValPtrs = malloc(nDims * sizeof(*keyValPtrs));
129 + for (d = 0; d < nDims; d++) {
130 + keySyms[d] = PC->sym;
139 + iterator = PC->sym;
141 + branchAddr = PC + PC->value;
144 + for (d = 0; d < nDims; d++) {
145 + if (keySyms[d]->type == LOCAL_SYM) {
146 + keyValPtrs[d] = &FP_GET_SYM_VAL(FrameP, keySyms[d]);
148 + else if (keySyms[d]->type == GLOBAL_SYM) {
149 + keyValPtrs[d] = &(keySyms[d]->value);
152 + const char *name = keySyms[d]->name;
155 + return(execError("can't assign to: %s", name));
157 + keyValPtrs[d]->tag = NO_TAG;
161 + if (valSym->type == LOCAL_SYM) {
162 + valPtr = &FP_GET_SYM_VAL(FrameP, valSym);
164 + else if (valSym->type == GLOBAL_SYM) {
165 + valPtr = &(valSym->value);
170 + return(execError("can't assign to: %s", valSym->name));
172 + valPtr->tag = NO_TAG;
175 + if (iterator->type == LOCAL_SYM) {
176 + iteratorValPtr = &FP_GET_SYM_VAL(FrameP, iterator);
181 + return(execError("bad temporary iterator: %s", iterator->name));
184 + thisEntry = iteratorValPtr->val.arrayPtr;
185 + while (thisEntry && thisEntry->nodePtrs.color != -1) {
186 + /* check if this is a nDims key */
187 + int thisDim = countDim(thisEntry->key);
189 + if (nDims != thisDim) {
191 + thisEntry = arrayIterateNext(thisEntry);
198 + if (!splitKey(thisEntry->key, keyValPtrs)) {
201 + return(execError("can't split key: %s", thisEntry->key));
206 + *valPtr = thisEntry->value;
209 + /* advance iterator */
210 + thisEntry = arrayIterateNext(thisEntry);
213 + iteratorValPtr->val.arrayPtr = thisEntry;
215 + if (!keyFound && (!thisEntry || thisEntry->nodePtrs.color == -1)) {
226 ** determine if a key or keys exists in an array
227 ** if the left argument is a string or integer a single check is performed
228 ** if the key exists, 1 is pushed onto the stack, otherwise 0
229 ** if the left argument is an array 1 is pushed onto the stack if every key
230 @@ -4035,10 +4226,42 @@ static void disasmInternal(Inst *inst, i
232 &inst[i+5] + inst[i+5].value);
236 + else if (j == OP_ARRAY_MULTI_ITER) {
237 + int nDim = inst[i+2].value, d;
238 + if (!inst[i+1].value) {
241 + for (d = 0; d < nDim; d++) {
244 + inst[i+3+d].sym->name);
246 + printd("] = %s++ end-loop=(%+d) %8p",
247 + inst[i+nDim+3].sym->name,
248 + inst[i+nDim+4].value,
249 + &inst[i+nDim+4] + inst[i+nDim+4].value);
255 + for (d = 0; d < nDim; d++) {
258 + inst[i+3+d].sym->name);
260 + printd("]=%s = %s++ end-loop=(%+d) %8p",
261 + inst[i+nDim+3].sym->name,
262 + inst[i+nDim+4].sym->name,
263 + inst[i+nDim+5].value,
264 + &inst[i+nDim+5] + inst[i+nDim+5].value);
268 else if (j == OP_ARRAY_REF ||
269 j == OP_ARRAY_DELETE ||
270 j == OP_ARRAY_ASSIGN ||
271 j == OP_ANONARRAY_INDEX_VAL ||
272 j == OP_NAMED_ARG1 ||
273 diff --quilt old/source/parse.y new/source/parse.y
274 --- old/source/parse.y
275 +++ new/source/parse.y
276 @@ -81,13 +81,26 @@ static int AllowDefine;
278 static int nextSymIsField = 0;
279 /* set to 1 when we don't want a full symbol, just a name (string) for a
280 field name following a '.' */
282 +#define SYMBLK_SIZE ((64 - sizeof(void *)) / sizeof(void *))
284 +typedef struct SymBlkTag {
285 + Symbol *syms[SYMBLK_SIZE];
286 + struct SymBlkTag *next;
289 +typedef struct SymListTag {
290 + SymBlk *head, *tail;
301 enum operations oper;
302 AccumulatorData *acc;
303 @@ -99,10 +112,11 @@ static int nextSymIsField = 0;
305 %type <num> arrlist arrentry
306 %type <num> arglistopt arglist catlist fnarglsopt fnarglist fnarg
307 %type <inst> cond comastmts comastmtlst for while do else and or arrayexpr mark
309 +%type <symlist> symlist
310 %type <oper> operassign incrdecr
311 %token <oper> '=' ADDEQ SUBEQ MULEQ DIVEQ MODEQ ANDEQ OREQ
312 %token <oper> INCR DECR
315 @@ -248,10 +262,65 @@ stmt: ';' blank
318 SET_BR_OFF($9+7, GetPC());
319 FillLoopAddrs(GetPC(), $9+2);
321 + | for '(' blank '[' symlist ']' IN blank arrayexpr blank ')' {
322 + Symbol *iterSym = InstallIteratorSymbol();
324 + ADD_OP(OP_BEGIN_ARRAY_ITER);
326 + ADD_OP(OP_ARRAY_MULTI_ITER);
327 + ADD_IMMED(0); /* without val symbol */
328 + ADD_IMMED($5.nSyms);
330 + ADD_SYM($5.head->syms[i % SYMBLK_SIZE]);
332 + if (i % SYMBLK_SIZE == 0 || i == $5.nSyms) {
333 + SymBlk *nextsymblk = $5.head->next;
335 + $5.head = nextsymblk;
337 + } while (i < $5.nSyms);
345 + SET_BR_OFF($9+6+$5.nSyms, GetPC());
346 + FillLoopAddrs(GetPC(), $9+2);
348 + | for '(' blank '[' symlist ']' KEYVAL SYMBOL IN blank arrayexpr blank ')' {
349 + Symbol *iterSym = InstallIteratorSymbol();
351 + ADD_OP(OP_BEGIN_ARRAY_ITER);
353 + ADD_OP(OP_ARRAY_MULTI_ITER);
354 + ADD_IMMED(1); /* with val symbol */
355 + ADD_IMMED($5.nSyms);
357 + ADD_SYM($5.head->syms[i % SYMBLK_SIZE]);
359 + if (i % SYMBLK_SIZE == 0 || i == $5.nSyms) {
360 + SymBlk *nextsymblk = $5.head->next;
362 + $5.head = nextsymblk;
364 + } while (i < $5.nSyms);
373 + SET_BR_OFF($11+7+$5.nSyms, GetPC());
374 + FillLoopAddrs(GetPC(), $11+2);
376 | BREAK stmtend blank {
377 ADD_OP(OP_BRANCH); ADD_BR_OFF(0);
378 if (AddBreakAddr(GetPC()-1)) {
379 yyerror("break outside loop"); YYERROR;
381 @@ -362,10 +431,36 @@ arglistopt: blank { $$ = 0; }
383 arglist: blank expr blank { $$ = 1; }
384 | arglist ',' blank expr blank { $$ = $1 + 1; }
387 +/* SYMBOL list for multi dimension array iterator */
388 +symlist: blank SYMBOL blank {
389 + SymBlk *symblk = malloc(sizeof(*symblk));
390 + symblk->syms[0] = $2;
391 + symblk->next = NULL;
392 + $$.head = $$.tail = symblk;
395 + | symlist ',' blank SYMBOL blank {
396 + /* copy all from $1 to $$, and clear $1 */
399 + $$.nSyms = $1.nSyms;
400 + $1.head = $1.tail = NULL;
401 + /* alloc new SymBlk, if current tail is full */
402 + if ($$.nSyms % SYMBLK_SIZE == 0) {
403 + SymBlk *symblk = malloc(sizeof(*symblk));
404 + symblk->next = NULL;
405 + $$.tail->next = symblk;
408 + $$.tail->syms[$$.nSyms % SYMBLK_SIZE] = $4;
413 /* string concatenation lists */
414 catlist: numexpr %prec CONCAT { $$ = 1; }
415 | catlist numexpr %prec CONCAT { $$ = $1 + 1; }
418 diff --quilt old/doc/help.etx new/doc/help.etx
421 @@ -2334,11 +2334,24 @@ Macro Language
422 looking for $sub_sep in the key.
424 If you want to use the 'in' keyword, use this syntax:
426 if ([1,2] in myArray)
430 + There is also a special array iterator for multi-dimensional arrays:
432 + for ([dim1Key, ..., dimNKey] in myArray)
435 + This would only iterate over all keys with N dimensions assigned to the
436 + symbols dim1Key, ..., dimNKey.
438 + Also with the associated value for the multi-dimensional key:
440 + for ([dim1Key, ..., dimNKey] = theVal in myArray)
443 Note that if an array contains a value that is itself an array, you can
444 apply the index operator more than once. For example
446 subarray["a"] = "value"
447 diff --quilt old/source/ops.h new/source/ops.h
450 @@ -38,10 +38,11 @@ OP(BRANCH_FALSE, branchFalse)
451 OP(BRANCH_NEVER, branchNever) /* off */ /* */
452 OP(ARRAY_REF, arrayRef) /* N */ /* pop(kN..k1,a), push(a[k1..kN]) */
453 OP(ARRAY_ASSIGN, arrayAssign) /* N */ /* pop(v,kN..k1,a), a[k1..kN]=v */
454 OP(BEGIN_ARRAY_ITER, beginArrayIter) /* it */ /* pop(a), it=a.begin */
455 OP(ARRAY_ITER, arrayIter) /*w,k[,v],it,pc*/ /* it?(k.v=it.k,(w?v.v=it.v:),it++):PC=pc */
456 +OP(ARRAY_MULTI_ITER, arrayMultiIter) /*w,N,k1..kN[,v],it,pc*/ /* while it: (if dim(it.k)==N: ([k1..kN].v=it.k,(w?v.v=it.v:),break), it++), if notfound: PC=pc */
457 OP(IN_ARRAY, inArray) /* pop(a,k), push(a[k]?1:0) */
458 OP(ARRAY_INDEX, arrayIndex) /* N */ /* pop(kN..k1), push([kN..k1]) */
459 OP(ARRAY_DELETE, deleteArrayElement) /*N*/ /* N>0 ? (pop(kN..k1,a), del(a[k])) : (pop(a), delall(a)) */
460 OP(PUSH_ARRAY_SYM, pushArraySymVal) /*s,i*/ /* if i: s.v=ary()), push(s.v) */
461 OP(ARRAY_REF_ASSIGN_SETUP, arrayRefAndAssignSetup) /*op,N*/ /* pop(v,kN..a), a[k1..kN] op= v */