beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / luapeg / lpeg.c
blob30f54522de51edea1d032a04b61674f135c7bcb4
1 #include "lpeg.h"
3 /*
4 ** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $
5 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
6 */
8 /* #include <ctype.h> */
9 /* #include <limits.h> */
10 /* #include <stdio.h> */
13 /* #include "lptypes.h" */
14 /* #include "lpprint.h" */
15 /* #include "lpcode.h" */
18 #if defined(LPEG_DEBUG)
21 ** {======================================================
22 ** Printing patterns (for debugging)
23 ** =======================================================
27 void printcharset (const byte *st) {
28 int i;
29 printf("[");
30 for (i = 0; i <= UCHAR_MAX; i++) {
31 int first = i;
32 while (testchar(st, i) && i <= UCHAR_MAX) i++;
33 if (i - 1 == first) /* unary range? */
34 printf("(%02x)", first);
35 else if (i - 1 > first) /* non-empty range? */
36 printf("(%02x-%02x)", first, i - 1);
38 printf("]");
42 static void printcapkind (int kind) {
43 const char *const modes[] = {
44 "close", "position", "constant", "backref",
45 "argument", "simple", "table", "function",
46 "query", "string", "num", "substitution", "fold",
47 "runtime", "group"};
48 printf("%s", modes[kind]);
52 static void printjmp (const Instruction *op, const Instruction *p) {
53 printf("-> %d", (int)(p + (p + 1)->offset - op));
57 void printinst (const Instruction *op, const Instruction *p) {
58 const char *const names[] = {
59 "any", "char", "set",
60 "testany", "testchar", "testset",
61 "span", "behind",
62 "ret", "end",
63 "choice", "jmp", "call", "open_call",
64 "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
65 "fullcapture", "opencapture", "closecapture", "closeruntime"
67 printf("%02ld: %s ", (long)(p - op), names[p->i.code]);
68 switch ((Opcode)p->i.code) {
69 case IChar: {
70 printf("'%c'", p->i.aux);
71 break;
73 case ITestChar: {
74 printf("'%c'", p->i.aux); printjmp(op, p);
75 break;
77 case IFullCapture: {
78 printcapkind(getkind(p));
79 printf(" (size = %d) (idx = %d)", getoff(p), p->i.key);
80 break;
82 case IOpenCapture: {
83 printcapkind(getkind(p));
84 printf(" (idx = %d)", p->i.key);
85 break;
87 case ISet: {
88 printcharset((p+1)->buff);
89 break;
91 case ITestSet: {
92 printcharset((p+2)->buff); printjmp(op, p);
93 break;
95 case ISpan: {
96 printcharset((p+1)->buff);
97 break;
99 case IOpenCall: {
100 printf("-> %d", (p + 1)->offset);
101 break;
103 case IBehind: {
104 printf("%d", p->i.aux);
105 break;
107 case IJmp: case ICall: case ICommit: case IChoice:
108 case IPartialCommit: case IBackCommit: case ITestAny: {
109 printjmp(op, p);
110 break;
112 default: break;
114 printf("\n");
118 void printpatt (Instruction *p, int n) {
119 Instruction *op = p;
120 while (p < op + n) {
121 printinst(op, p);
122 p += sizei(p);
127 #if defined(LPEG_DEBUG)
128 static void printcap (Capture *cap) {
129 printcapkind(cap->kind);
130 printf(" (idx: %d - size: %d) -> %p\n", cap->idx, cap->siz, cap->s);
134 void printcaplist (Capture *cap, Capture *limit) {
135 printf(">======\n");
136 for (; cap->s && (limit == NULL || cap < limit); cap++)
137 printcap(cap);
138 printf("=======\n");
140 #endif
142 /* }====================================================== */
146 ** {======================================================
147 ** Printing trees (for debugging)
148 ** =======================================================
151 static const char *tagnames[] = {
152 "char", "set", "any",
153 "true", "false",
154 "rep",
155 "seq", "choice",
156 "not", "and",
157 "call", "opencall", "rule", "grammar",
158 "behind",
159 "capture", "run-time"
163 void printtree (TTree *tree, int ident) {
164 int i;
165 for (i = 0; i < ident; i++) printf(" ");
166 printf("%s", tagnames[tree->tag]);
167 switch (tree->tag) {
168 case TChar: {
169 int c = tree->u.n;
170 if (isprint(c))
171 printf(" '%c'\n", c);
172 else
173 printf(" (%02X)\n", c);
174 break;
176 case TSet: {
177 printcharset(treebuffer(tree));
178 printf("\n");
179 break;
181 case TOpenCall: case TCall: {
182 printf(" key: %d\n", tree->key);
183 break;
185 case TBehind: {
186 printf(" %d\n", tree->u.n);
187 printtree(sib1(tree), ident + 2);
188 break;
190 case TCapture: {
191 printf(" cap: %d key: %d n: %d\n", tree->cap, tree->key, tree->u.n);
192 printtree(sib1(tree), ident + 2);
193 break;
195 case TRule: {
196 printf(" n: %d key: %d\n", tree->cap, tree->key);
197 printtree(sib1(tree), ident + 2);
198 break; /* do not print next rule as a sibling */
200 case TGrammar: {
201 TTree *rule = sib1(tree);
202 printf(" %d\n", tree->u.n); /* number of rules */
203 for (i = 0; i < tree->u.n; i++) {
204 printtree(rule, ident + 2);
205 rule = sib2(rule);
207 assert(rule->tag == TTrue); /* sentinel */
208 break;
210 default: {
211 int sibs = numsiblings[tree->tag];
212 printf("\n");
213 if (sibs >= 1) {
214 printtree(sib1(tree), ident + 2);
215 if (sibs >= 2)
216 printtree(sib2(tree), ident + 2);
218 break;
224 void printktable (lua_State *L, int idx) {
225 int n, i;
226 lua_getuservalue(L, idx);
227 if (lua_isnil(L, -1)) /* no ktable? */
228 return;
229 n = lua_rawlen(L, -1);
230 printf("[");
231 for (i = 1; i <= n; i++) {
232 printf("%d = ", i);
233 lua_rawgeti(L, -1, i);
234 if (lua_isstring(L, -1))
235 printf("%s ", lua_tostring(L, -1));
236 else
237 printf("%s ", lua_typename(L, lua_type(L, -1)));
238 lua_pop(L, 1);
240 printf("]\n");
241 /* leave ktable at the stack */
244 /* }====================================================== */
246 #endif
249 ** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $
250 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
253 /* #include <limits.h> */
254 /* #include <string.h> */
257 /* #include "lua.h" */
258 /* #include "lauxlib.h" */
260 /* #include "lpcap.h" */
261 /* #include "lptypes.h" */
262 /* #include "lpvm.h" */
263 /* #include "lpprint.h" */
266 /* initial size for call/backtrack stack */
267 #if !defined(INITBACK)
268 #define INITBACK MAXBACK
269 #endif
272 #define getoffset(p) (((p) + 1)->offset)
274 static const Instruction giveup = {{IGiveup, 0, 0}};
278 ** {======================================================
279 ** Virtual Machine
280 ** =======================================================
284 typedef struct Stack {
285 const char *s; /* saved position (or NULL for calls) */
286 const Instruction *p; /* next instruction */
287 int caplevel;
288 } Stack;
291 #define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
295 ** Double the size of the array of captures
297 static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) {
298 Capture *newc;
299 if (captop >= INT_MAX/((int)sizeof(Capture) * 2))
300 luaL_error(L, "too many captures");
301 newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture));
302 memcpy(newc, cap, captop * sizeof(Capture));
303 lua_replace(L, caplistidx(ptop));
304 return newc;
309 ** Double the size of the stack
311 static Stack *doublestack (lua_State *L, Stack **stacklimit, int ptop) {
312 Stack *stack = getstackbase(L, ptop);
313 Stack *newstack;
314 int n = *stacklimit - stack; /* current stack size */
315 int max, newn;
316 lua_getfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
317 max = lua_tointeger(L, -1); /* maximum allowed size */
318 lua_pop(L, 1);
319 if (n >= max) /* already at maximum size? */
320 luaL_error(L, "backtrack stack overflow (current limit is %d)", max);
321 newn = 2 * n; /* new size */
322 if (newn > max) newn = max;
323 newstack = (Stack *)lua_newuserdata(L, newn * sizeof(Stack));
324 memcpy(newstack, stack, n * sizeof(Stack));
325 lua_replace(L, stackidx(ptop));
326 *stacklimit = newstack + newn;
327 return newstack + n; /* return next position */
332 ** Interpret the result of a dynamic capture: false -> fail;
333 ** true -> keep current position; number -> next position.
334 ** Return new subject position. 'fr' is stack index where
335 ** is the result; 'curr' is current subject position; 'limit'
336 ** is subject's size.
338 static int resdyncaptures (lua_State *L, int fr, int curr, int limit) {
339 lua_Integer res;
340 if (!lua_toboolean(L, fr)) { /* false value? */
341 lua_settop(L, fr - 1); /* remove results */
342 return -1; /* and fail */
344 else if (lua_isboolean(L, fr)) /* true? */
345 res = curr; /* keep current position */
346 else {
347 res = lua_tointeger(L, fr) - 1; /* new position */
348 if (res < curr || res > limit)
349 luaL_error(L, "invalid position returned by match-time capture");
351 lua_remove(L, fr); /* remove first result (offset) */
352 return res;
357 ** Add capture values returned by a dynamic capture to the capture list
358 ** 'base', nested inside a group capture. 'fd' indexes the first capture
359 ** value, 'n' is the number of values (at least 1).
361 static void adddyncaptures (const char *s, Capture *base, int n, int fd) {
362 int i;
363 /* Cgroup capture is already there */
364 assert(base[0].kind == Cgroup && base[0].siz == 0);
365 base[0].idx = 0; /* make it an anonymous group */
366 for (i = 1; i <= n; i++) { /* add runtime captures */
367 base[i].kind = Cruntime;
368 base[i].siz = 1; /* mark it as closed */
369 base[i].idx = fd + i - 1; /* stack index of capture value */
370 base[i].s = s;
372 base[i].kind = Cclose; /* close group */
373 base[i].siz = 1;
374 base[i].s = s;
379 ** Remove dynamic captures from the Lua stack (called in case of failure)
381 static int removedyncap (lua_State *L, Capture *capture,
382 int level, int last) {
383 int id = finddyncap(capture + level, capture + last); /* index of 1st cap. */
384 int top = lua_gettop(L);
385 if (id == 0) return 0; /* no dynamic captures? */
386 lua_settop(L, id - 1); /* remove captures */
387 return top - id + 1; /* number of values removed */
392 ** Opcode interpreter
394 const char *match (lua_State *L, const char *o, const char *s, const char *e,
395 Instruction *op, Capture *capture, int ptop) {
396 Stack stackbase[INITBACK];
397 Stack *stacklimit = stackbase + INITBACK;
398 Stack *stack = stackbase; /* point to first empty slot in stack */
399 int capsize = INITCAPSIZE;
400 int captop = 0; /* point to first empty slot in captures */
401 int ndyncap = 0; /* number of dynamic captures (in Lua stack) */
402 const Instruction *p = op; /* current instruction */
403 stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++;
404 lua_pushlightuserdata(L, stackbase);
405 for (;;) {
406 #if defined(DEBUG)
407 printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
408 s, stack - getstackbase(L, ptop), ndyncap, captop);
409 printinst(op, p);
410 printcaplist(capture, capture + captop);
411 #endif
412 assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop);
413 switch ((Opcode)p->i.code) {
414 case IEnd: {
415 assert(stack == getstackbase(L, ptop) + 1);
416 capture[captop].kind = Cclose;
417 capture[captop].s = NULL;
418 return s;
420 case IGiveup: {
421 assert(stack == getstackbase(L, ptop));
422 return NULL;
424 case IRet: {
425 assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL);
426 p = (--stack)->p;
427 continue;
429 case IAny: {
430 if (s < e) { p++; s++; }
431 else goto fail;
432 continue;
434 case ITestAny: {
435 if (s < e) p += 2;
436 else p += getoffset(p);
437 continue;
439 case IChar: {
440 if ((byte)*s == p->i.aux && s < e) { p++; s++; }
441 else goto fail;
442 continue;
444 case ITestChar: {
445 if ((byte)*s == p->i.aux && s < e) p += 2;
446 else p += getoffset(p);
447 continue;
449 case ISet: {
450 int c = (byte)*s;
451 if (testchar((p+1)->buff, c) && s < e)
452 { p += CHARSETINSTSIZE; s++; }
453 else goto fail;
454 continue;
456 case ITestSet: {
457 int c = (byte)*s;
458 if (testchar((p + 2)->buff, c) && s < e)
459 p += 1 + CHARSETINSTSIZE;
460 else p += getoffset(p);
461 continue;
463 case IBehind: {
464 int n = p->i.aux;
465 if (n > s - o) goto fail;
466 s -= n; p++;
467 continue;
469 case ISpan: {
470 for (; s < e; s++) {
471 int c = (byte)*s;
472 if (!testchar((p+1)->buff, c)) break;
474 p += CHARSETINSTSIZE;
475 continue;
477 case IJmp: {
478 p += getoffset(p);
479 continue;
481 case IChoice: {
482 if (stack == stacklimit)
483 stack = doublestack(L, &stacklimit, ptop);
484 stack->p = p + getoffset(p);
485 stack->s = s;
486 stack->caplevel = captop;
487 stack++;
488 p += 2;
489 continue;
491 case ICall: {
492 if (stack == stacklimit)
493 stack = doublestack(L, &stacklimit, ptop);
494 stack->s = NULL;
495 stack->p = p + 2; /* save return address */
496 stack++;
497 p += getoffset(p);
498 continue;
500 case ICommit: {
501 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
502 stack--;
503 p += getoffset(p);
504 continue;
506 case IPartialCommit: {
507 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
508 (stack - 1)->s = s;
509 (stack - 1)->caplevel = captop;
510 p += getoffset(p);
511 continue;
513 case IBackCommit: {
514 assert(stack > getstackbase(L, ptop) && (stack - 1)->s != NULL);
515 s = (--stack)->s;
516 captop = stack->caplevel;
517 p += getoffset(p);
518 continue;
520 case IFailTwice:
521 assert(stack > getstackbase(L, ptop));
522 stack--;
523 /* go through */
524 case IFail:
525 fail: { /* pattern failed: try to backtrack */
526 do { /* remove pending calls */
527 assert(stack > getstackbase(L, ptop));
528 s = (--stack)->s;
529 } while (s == NULL);
530 if (ndyncap > 0) /* is there matchtime captures? */
531 ndyncap -= removedyncap(L, capture, stack->caplevel, captop);
532 captop = stack->caplevel;
533 p = stack->p;
534 continue;
536 case ICloseRunTime: {
537 CapState cs;
538 int rem, res, n;
539 int fr = lua_gettop(L) + 1; /* stack index of first result */
540 cs.s = o; cs.L = L; cs.ocap = capture; cs.ptop = ptop;
541 n = runtimecap(&cs, capture + captop, s, &rem); /* call function */
542 captop -= n; /* remove nested captures */
543 fr -= rem; /* 'rem' items were popped from Lua stack */
544 res = resdyncaptures(L, fr, s - o, e - o); /* get result */
545 if (res == -1) /* fail? */
546 goto fail;
547 s = o + res; /* else update current position */
548 n = lua_gettop(L) - fr + 1; /* number of new captures */
549 ndyncap += n - rem; /* update number of dynamic captures */
550 if (n > 0) { /* any new capture? */
551 if ((captop += n + 2) >= capsize) {
552 capture = doublecap(L, capture, captop, ptop);
553 capsize = 2 * captop;
555 /* add new captures to 'capture' list */
556 adddyncaptures(s, capture + captop - n - 2, n, fr);
558 p++;
559 continue;
561 case ICloseCapture: {
562 const char *s1 = s;
563 assert(captop > 0);
564 /* if possible, turn capture into a full capture */
565 if (capture[captop - 1].siz == 0 &&
566 s1 - capture[captop - 1].s < UCHAR_MAX) {
567 capture[captop - 1].siz = s1 - capture[captop - 1].s + 1;
568 p++;
569 continue;
571 else {
572 capture[captop].siz = 1; /* mark entry as closed */
573 capture[captop].s = s;
574 goto pushcapture;
577 case IOpenCapture:
578 capture[captop].siz = 0; /* mark entry as open */
579 capture[captop].s = s;
580 goto pushcapture;
581 case IFullCapture:
582 capture[captop].siz = getoff(p) + 1; /* save capture size */
583 capture[captop].s = s - getoff(p);
584 /* goto pushcapture; */
585 pushcapture: {
586 capture[captop].idx = p->i.key;
587 capture[captop].kind = getkind(p);
588 if (++captop >= capsize) {
589 capture = doublecap(L, capture, captop, ptop);
590 capsize = 2 * captop;
592 p++;
593 continue;
595 default: assert(0); return NULL;
600 /* }====================================================== */
604 ** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $
605 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
608 /* #include <limits.h> */
611 /* #include "lua.h" */
612 /* #include "lauxlib.h" */
614 /* #include "lptypes.h" */
615 /* #include "lpcode.h" */
618 /* signals a "no-instruction */
619 #define NOINST -1
623 static const Charset fullset_ =
624 {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
625 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
626 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
627 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}};
629 static const Charset *fullset = &fullset_;
632 ** {======================================================
633 ** Analysis and some optimizations
634 ** =======================================================
638 ** Check whether a charset is empty (returns IFail), singleton (IChar),
639 ** full (IAny), or none of those (ISet). When singleton, '*c' returns
640 ** which character it is. (When generic set, the set was the input,
641 ** so there is no need to return it.)
643 static Opcode charsettype (const byte *cs, int *c) {
644 int count = 0; /* number of characters in the set */
645 int i;
646 int candidate = -1; /* candidate position for the singleton char */
647 for (i = 0; i < CHARSETSIZE; i++) { /* for each byte */
648 int b = cs[i];
649 if (b == 0) { /* is byte empty? */
650 if (count > 1) /* was set neither empty nor singleton? */
651 return ISet; /* neither full nor empty nor singleton */
652 /* else set is still empty or singleton */
654 else if (b == 0xFF) { /* is byte full? */
655 if (count < (i * BITSPERCHAR)) /* was set not full? */
656 return ISet; /* neither full nor empty nor singleton */
657 else count += BITSPERCHAR; /* set is still full */
659 else if ((b & (b - 1)) == 0) { /* has byte only one bit? */
660 if (count > 0) /* was set not empty? */
661 return ISet; /* neither full nor empty nor singleton */
662 else { /* set has only one char till now; track it */
663 count++;
664 candidate = i;
667 else return ISet; /* byte is neither empty, full, nor singleton */
669 switch (count) {
670 case 0: return IFail; /* empty set */
671 case 1: { /* singleton; find character bit inside byte */
672 int b = cs[candidate];
673 *c = candidate * BITSPERCHAR;
674 if ((b & 0xF0) != 0) { *c += 4; b >>= 4; }
675 if ((b & 0x0C) != 0) { *c += 2; b >>= 2; }
676 if ((b & 0x02) != 0) { *c += 1; }
677 return IChar;
679 default: {
680 assert(count == CHARSETSIZE * BITSPERCHAR); /* full set */
681 return IAny;
688 ** A few basic operations on Charsets
690 static void cs_complement (Charset *cs) {
691 loopset(i, cs->cs[i] = ~cs->cs[i]);
694 static int cs_equal (const byte *cs1, const byte *cs2) {
695 loopset(i, if (cs1[i] != cs2[i]) return 0);
696 return 1;
699 static int cs_disjoint (const Charset *cs1, const Charset *cs2) {
700 loopset(i, if ((cs1->cs[i] & cs2->cs[i]) != 0) return 0;)
701 return 1;
706 ** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
707 ** charset and return 1; else return 0.
709 int tocharset (TTree *tree, Charset *cs) {
710 switch (tree->tag) {
711 case TSet: { /* copy set */
712 loopset(i, cs->cs[i] = treebuffer(tree)[i]);
713 return 1;
715 case TChar: { /* only one char */
716 assert(0 <= tree->u.n && tree->u.n <= UCHAR_MAX);
717 loopset(i, cs->cs[i] = 0); /* erase all chars */
718 setchar(cs->cs, tree->u.n); /* add that one */
719 return 1;
721 case TAny: {
722 loopset(i, cs->cs[i] = 0xFF); /* add all characters to the set */
723 return 1;
725 default: return 0;
731 ** Check whether a pattern tree has captures
733 int hascaptures (TTree *tree) {
734 tailcall:
735 switch (tree->tag) {
736 case TCapture: case TRunTime:
737 return 1;
738 case TCall:
739 tree = sib2(tree); goto tailcall; /* return hascaptures(sib2(tree)); */
740 case TOpenCall: assert(0);
741 default: {
742 switch (numsiblings[tree->tag]) {
743 case 1: /* return hascaptures(sib1(tree)); */
744 tree = sib1(tree); goto tailcall;
745 case 2:
746 if (hascaptures(sib1(tree))) return 1;
747 /* else return hascaptures(sib2(tree)); */
748 tree = sib2(tree); goto tailcall;
749 default: assert(numsiblings[tree->tag] == 0); return 0;
757 ** Checks how a pattern behaves regarding the empty string,
758 ** in one of two different ways:
759 ** A pattern is *nullable* if it can match without consuming any character;
760 ** A pattern is *nofail* if it never fails for any string
761 ** (including the empty string).
762 ** The difference is only for predicates and run-time captures;
763 ** for other patterns, the two properties are equivalent.
764 ** (With predicates, &'a' is nullable but not nofail. Of course,
765 ** nofail => nullable.)
766 ** These functions are all convervative in the following way:
767 ** p is nullable => nullable(p)
768 ** nofail(p) => p cannot fail
769 ** The function assumes that TOpenCall is not nullable;
770 ** this will be checked again when the grammar is fixed.
771 ** Run-time captures can do whatever they want, so the result
772 ** is conservative.
774 int checkaux (TTree *tree, int pred) {
775 tailcall:
776 switch (tree->tag) {
777 case TChar: case TSet: case TAny:
778 case TFalse: case TOpenCall:
779 return 0; /* not nullable */
780 case TRep: case TTrue:
781 return 1; /* no fail */
782 case TNot: case TBehind: /* can match empty, but can fail */
783 if (pred == PEnofail) return 0;
784 else return 1; /* PEnullable */
785 case TAnd: /* can match empty; fail iff body does */
786 if (pred == PEnullable) return 1;
787 /* else return checkaux(sib1(tree), pred); */
788 tree = sib1(tree); goto tailcall;
789 case TRunTime: /* can fail; match empty iff body does */
790 if (pred == PEnofail) return 0;
791 /* else return checkaux(sib1(tree), pred); */
792 tree = sib1(tree); goto tailcall;
793 case TSeq:
794 if (!checkaux(sib1(tree), pred)) return 0;
795 /* else return checkaux(sib2(tree), pred); */
796 tree = sib2(tree); goto tailcall;
797 case TChoice:
798 if (checkaux(sib2(tree), pred)) return 1;
799 /* else return checkaux(sib1(tree), pred); */
800 tree = sib1(tree); goto tailcall;
801 case TCapture: case TGrammar: case TRule:
802 /* return checkaux(sib1(tree), pred); */
803 tree = sib1(tree); goto tailcall;
804 case TCall: /* return checkaux(sib2(tree), pred); */
805 tree = sib2(tree); goto tailcall;
806 default: assert(0); return 0;
812 ** number of characters to match a pattern (or -1 if variable)
813 ** ('count' avoids infinite loops for grammars)
815 int fixedlenx (TTree *tree, int count, int len) {
816 tailcall:
817 switch (tree->tag) {
818 case TChar: case TSet: case TAny:
819 return len + 1;
820 case TFalse: case TTrue: case TNot: case TAnd: case TBehind:
821 return len;
822 case TRep: case TRunTime: case TOpenCall:
823 return -1;
824 case TCapture: case TRule: case TGrammar:
825 /* return fixedlenx(sib1(tree), count); */
826 tree = sib1(tree); goto tailcall;
827 case TCall:
828 if (count++ >= MAXRULES)
829 return -1; /* may be a loop */
830 /* else return fixedlenx(sib2(tree), count); */
831 tree = sib2(tree); goto tailcall;
832 case TSeq: {
833 len = fixedlenx(sib1(tree), count, len);
834 if (len < 0) return -1;
835 /* else return fixedlenx(sib2(tree), count, len); */
836 tree = sib2(tree); goto tailcall;
838 case TChoice: {
839 int n1, n2;
840 n1 = fixedlenx(sib1(tree), count, len);
841 if (n1 < 0) return -1;
842 n2 = fixedlenx(sib2(tree), count, len);
843 if (n1 == n2) return n1;
844 else return -1;
846 default: assert(0); return 0;
852 ** Computes the 'first set' of a pattern.
853 ** The result is a conservative aproximation:
854 ** match p ax -> x (for some x) ==> a belongs to first(p)
855 ** or
856 ** a not in first(p) ==> match p ax -> fail (for all x)
858 ** The set 'follow' is the first set of what follows the
859 ** pattern (full set if nothing follows it).
861 ** The function returns 0 when this resulting set can be used for
862 ** test instructions that avoid the pattern altogether.
863 ** A non-zero return can happen for two reasons:
864 ** 1) match p '' -> '' ==> return has bit 1 set
865 ** (tests cannot be used because they would always fail for an empty input);
866 ** 2) there is a match-time capture ==> return has bit 2 set
867 ** (optimizations should not bypass match-time captures).
869 static int getfirst (TTree *tree, const Charset *follow, Charset *firstset) {
870 tailcall:
871 switch (tree->tag) {
872 case TChar: case TSet: case TAny: {
873 tocharset(tree, firstset);
874 return 0;
876 case TTrue: {
877 loopset(i, firstset->cs[i] = follow->cs[i]);
878 return 1; /* accepts the empty string */
880 case TFalse: {
881 loopset(i, firstset->cs[i] = 0);
882 return 0;
884 case TChoice: {
885 Charset csaux;
886 int e1 = getfirst(sib1(tree), follow, firstset);
887 int e2 = getfirst(sib2(tree), follow, &csaux);
888 loopset(i, firstset->cs[i] |= csaux.cs[i]);
889 return e1 | e2;
891 case TSeq: {
892 if (!nullable(sib1(tree))) {
893 /* when p1 is not nullable, p2 has nothing to contribute;
894 return getfirst(sib1(tree), fullset, firstset); */
895 tree = sib1(tree); follow = fullset; goto tailcall;
897 else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
898 Charset csaux;
899 int e2 = getfirst(sib2(tree), follow, &csaux);
900 int e1 = getfirst(sib1(tree), &csaux, firstset);
901 if (e1 == 0) return 0; /* 'e1' ensures that first can be used */
902 else if ((e1 | e2) & 2) /* one of the children has a matchtime? */
903 return 2; /* pattern has a matchtime capture */
904 else return e2; /* else depends on 'e2' */
907 case TRep: {
908 getfirst(sib1(tree), follow, firstset);
909 loopset(i, firstset->cs[i] |= follow->cs[i]);
910 return 1; /* accept the empty string */
912 case TCapture: case TGrammar: case TRule: {
913 /* return getfirst(sib1(tree), follow, firstset); */
914 tree = sib1(tree); goto tailcall;
916 case TRunTime: { /* function invalidates any follow info. */
917 int e = getfirst(sib1(tree), fullset, firstset);
918 if (e) return 2; /* function is not "protected"? */
919 else return 0; /* pattern inside capture ensures first can be used */
921 case TCall: {
922 /* return getfirst(sib2(tree), follow, firstset); */
923 tree = sib2(tree); goto tailcall;
925 case TAnd: {
926 int e = getfirst(sib1(tree), follow, firstset);
927 loopset(i, firstset->cs[i] &= follow->cs[i]);
928 return e;
930 case TNot: {
931 if (tocharset(sib1(tree), firstset)) {
932 cs_complement(firstset);
933 return 1;
935 /* else go through */
937 case TBehind: { /* instruction gives no new information */
938 /* call 'getfirst' only to check for math-time captures */
939 int e = getfirst(sib1(tree), follow, firstset);
940 loopset(i, firstset->cs[i] = follow->cs[i]); /* uses follow */
941 return e | 1; /* always can accept the empty string */
943 default: assert(0); return 0;
949 ** If 'headfail(tree)' true, then 'tree' can fail only depending on the
950 ** next character of the subject.
952 static int headfail (TTree *tree) {
953 tailcall:
954 switch (tree->tag) {
955 case TChar: case TSet: case TAny: case TFalse:
956 return 1;
957 case TTrue: case TRep: case TRunTime: case TNot:
958 case TBehind:
959 return 0;
960 case TCapture: case TGrammar: case TRule: case TAnd:
961 tree = sib1(tree); goto tailcall; /* return headfail(sib1(tree)); */
962 case TCall:
963 tree = sib2(tree); goto tailcall; /* return headfail(sib2(tree)); */
964 case TSeq:
965 if (!nofail(sib2(tree))) return 0;
966 /* else return headfail(sib1(tree)); */
967 tree = sib1(tree); goto tailcall;
968 case TChoice:
969 if (!headfail(sib1(tree))) return 0;
970 /* else return headfail(sib2(tree)); */
971 tree = sib2(tree); goto tailcall;
972 default: assert(0); return 0;
978 ** Check whether the code generation for the given tree can benefit
979 ** from a follow set (to avoid computing the follow set when it is
980 ** not needed)
982 static int needfollow (TTree *tree) {
983 tailcall:
984 switch (tree->tag) {
985 case TChar: case TSet: case TAny:
986 case TFalse: case TTrue: case TAnd: case TNot:
987 case TRunTime: case TGrammar: case TCall: case TBehind:
988 return 0;
989 case TChoice: case TRep:
990 return 1;
991 case TCapture:
992 tree = sib1(tree); goto tailcall;
993 case TSeq:
994 tree = sib2(tree); goto tailcall;
995 default: assert(0); return 0;
999 /* }====================================================== */
1004 ** {======================================================
1005 ** Code generation
1006 ** =======================================================
1011 ** size of an instruction
1013 int sizei (const Instruction *i) {
1014 switch((Opcode)i->i.code) {
1015 case ISet: case ISpan: return CHARSETINSTSIZE;
1016 case ITestSet: return CHARSETINSTSIZE + 1;
1017 case ITestChar: case ITestAny: case IChoice: case IJmp: case ICall:
1018 case IOpenCall: case ICommit: case IPartialCommit: case IBackCommit:
1019 return 2;
1020 default: return 1;
1026 ** state for the compiler
1028 typedef struct CompileState {
1029 Pattern *p; /* pattern being compiled */
1030 int ncode; /* next position in p->code to be filled */
1031 lua_State *L;
1032 } CompileState;
1036 ** code generation is recursive; 'opt' indicates that the code is being
1037 ** generated as the last thing inside an optional pattern (so, if that
1038 ** code is optional too, it can reuse the 'IChoice' already in place for
1039 ** the outer pattern). 'tt' points to a previous test protecting this
1040 ** code (or NOINST). 'fl' is the follow set of the pattern.
1042 static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
1043 const Charset *fl);
1046 void realloccode (lua_State *L, Pattern *p, int nsize) {
1047 void *ud;
1048 lua_Alloc f = lua_getallocf(L, &ud);
1049 void *newblock = f(ud, p->code, p->codesize * sizeof(Instruction),
1050 nsize * sizeof(Instruction));
1051 if (newblock == NULL && nsize > 0)
1052 luaL_error(L, "not enough memory");
1053 p->code = (Instruction *)newblock;
1054 p->codesize = nsize;
1058 static int nextinstruction (CompileState *compst) {
1059 int size = compst->p->codesize;
1060 if (compst->ncode >= size)
1061 realloccode(compst->L, compst->p, size * 2);
1062 return compst->ncode++;
1066 #define getinstr(cs,i) ((cs)->p->code[i])
1069 static int addinstruction (CompileState *compst, Opcode op, int aux) {
1070 int i = nextinstruction(compst);
1071 getinstr(compst, i).i.code = op;
1072 getinstr(compst, i).i.aux = aux;
1073 return i;
1078 ** Add an instruction followed by space for an offset (to be set later)
1080 static int addoffsetinst (CompileState *compst, Opcode op) {
1081 int i = addinstruction(compst, op, 0); /* instruction */
1082 addinstruction(compst, (Opcode)0, 0); /* open space for offset */
1083 assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2);
1084 return i;
1089 ** Set the offset of an instruction
1091 static void setoffset (CompileState *compst, int instruction, int offset) {
1092 getinstr(compst, instruction + 1).offset = offset;
1097 ** Add a capture instruction:
1098 ** 'op' is the capture instruction; 'cap' the capture kind;
1099 ** 'key' the key into ktable; 'aux' is the optional capture offset
1102 static int addinstcap (CompileState *compst, Opcode op, int cap, int key,
1103 int aux) {
1104 int i = addinstruction(compst, op, joinkindoff(cap, aux));
1105 getinstr(compst, i).i.key = key;
1106 return i;
1110 #define gethere(compst) ((compst)->ncode)
1112 #define target(code,i) ((i) + code[i + 1].offset)
1116 ** Patch 'instruction' to jump to 'target'
1118 static void jumptothere (CompileState *compst, int instruction, int target) {
1119 if (instruction >= 0)
1120 setoffset(compst, instruction, target - instruction);
1125 ** Patch 'instruction' to jump to current position
1127 static void jumptohere (CompileState *compst, int instruction) {
1128 jumptothere(compst, instruction, gethere(compst));
1133 ** Code an IChar instruction, or IAny if there is an equivalent
1134 ** test dominating it
1136 static void codechar (CompileState *compst, int c, int tt) {
1137 if (tt >= 0 && getinstr(compst, tt).i.code == ITestChar &&
1138 getinstr(compst, tt).i.aux == c)
1139 addinstruction(compst, IAny, 0);
1140 else
1141 addinstruction(compst, IChar, c);
1146 ** Add a charset posfix to an instruction
1148 static void addcharset (CompileState *compst, const byte *cs) {
1149 int p = gethere(compst);
1150 int i;
1151 for (i = 0; i < (int)CHARSETINSTSIZE - 1; i++)
1152 nextinstruction(compst); /* space for buffer */
1153 /* fill buffer with charset */
1154 loopset(j, getinstr(compst, p).buff[j] = cs[j]);
1159 ** code a char set, optimizing unit sets for IChar, "complete"
1160 ** sets for IAny, and empty sets for IFail; also use an IAny
1161 ** when instruction is dominated by an equivalent test.
1163 static void codecharset (CompileState *compst, const byte *cs, int tt) {
1164 int c = 0; /* (=) to avoid warnings */
1165 Opcode op = charsettype(cs, &c);
1166 switch (op) {
1167 case IChar: codechar(compst, c, tt); break;
1168 case ISet: { /* non-trivial set? */
1169 if (tt >= 0 && getinstr(compst, tt).i.code == ITestSet &&
1170 cs_equal(cs, getinstr(compst, tt + 2).buff))
1171 addinstruction(compst, IAny, 0);
1172 else {
1173 addinstruction(compst, ISet, 0);
1174 addcharset(compst, cs);
1176 break;
1178 default: addinstruction(compst, op, c); break;
1184 ** code a test set, optimizing unit sets for ITestChar, "complete"
1185 ** sets for ITestAny, and empty sets for IJmp (always fails).
1186 ** 'e' is true iff test should accept the empty string. (Test
1187 ** instructions in the current VM never accept the empty string.)
1189 static int codetestset (CompileState *compst, Charset *cs, int e) {
1190 if (e) return NOINST; /* no test */
1191 else {
1192 int c = 0;
1193 Opcode op = charsettype(cs->cs, &c);
1194 switch (op) {
1195 case IFail: return addoffsetinst(compst, IJmp); /* always jump */
1196 case IAny: return addoffsetinst(compst, ITestAny);
1197 case IChar: {
1198 int i = addoffsetinst(compst, ITestChar);
1199 getinstr(compst, i).i.aux = c;
1200 return i;
1202 case ISet: {
1203 int i = addoffsetinst(compst, ITestSet);
1204 addcharset(compst, cs->cs);
1205 return i;
1207 default: assert(0); return 0;
1214 ** Find the final destination of a sequence of jumps
1216 static int finaltarget (Instruction *code, int i) {
1217 while (code[i].i.code == IJmp)
1218 i = target(code, i);
1219 return i;
1224 ** final label (after traversing any jumps)
1226 static int finallabel (Instruction *code, int i) {
1227 return finaltarget(code, target(code, i));
1232 ** <behind(p)> == behind n; <p> (where n = fixedlen(p))
1234 static void codebehind (CompileState *compst, TTree *tree) {
1235 if (tree->u.n > 0)
1236 addinstruction(compst, IBehind, tree->u.n);
1237 codegen(compst, sib1(tree), 0, NOINST, fullset);
1242 ** Choice; optimizations:
1243 ** - when p1 is headfail or
1244 ** when first(p1) and first(p2) are disjoint, than
1245 ** a character not in first(p1) cannot go to p1, and a character
1246 ** in first(p1) cannot go to p2 (at it is not in first(p2)).
1247 ** (The optimization is not valid if p1 accepts the empty string,
1248 ** as then there is no character at all...)
1249 ** - when p2 is empty and opt is true; a IPartialCommit can reuse
1250 ** the Choice already active in the stack.
1252 static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
1253 const Charset *fl) {
1254 int emptyp2 = (p2->tag == TTrue);
1255 Charset cs1, cs2;
1256 int e1 = getfirst(p1, fullset, &cs1);
1257 if (headfail(p1) ||
1258 (!e1 && (getfirst(p2, fl, &cs2), cs_disjoint(&cs1, &cs2)))) {
1259 /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
1260 int test = codetestset(compst, &cs1, 0);
1261 int jmp = NOINST;
1262 codegen(compst, p1, 0, test, fl);
1263 if (!emptyp2)
1264 jmp = addoffsetinst(compst, IJmp);
1265 jumptohere(compst, test);
1266 codegen(compst, p2, opt, NOINST, fl);
1267 jumptohere(compst, jmp);
1269 else if (opt && emptyp2) {
1270 /* p1? == IPartialCommit; p1 */
1271 jumptohere(compst, addoffsetinst(compst, IPartialCommit));
1272 codegen(compst, p1, 1, NOINST, fullset);
1274 else {
1275 /* <p1 / p2> ==
1276 test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
1277 int pcommit;
1278 int test = codetestset(compst, &cs1, e1);
1279 int pchoice = addoffsetinst(compst, IChoice);
1280 codegen(compst, p1, emptyp2, test, fullset);
1281 pcommit = addoffsetinst(compst, ICommit);
1282 jumptohere(compst, pchoice);
1283 jumptohere(compst, test);
1284 codegen(compst, p2, opt, NOINST, fl);
1285 jumptohere(compst, pcommit);
1291 ** And predicate
1292 ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
1293 ** (valid only when 'p' has no captures)
1295 static void codeand (CompileState *compst, TTree *tree, int tt) {
1296 int n = fixedlen(tree);
1297 if (n >= 0 && n <= MAXBEHIND && !hascaptures(tree)) {
1298 codegen(compst, tree, 0, tt, fullset);
1299 if (n > 0)
1300 addinstruction(compst, IBehind, n);
1302 else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
1303 int pcommit;
1304 int pchoice = addoffsetinst(compst, IChoice);
1305 codegen(compst, tree, 0, tt, fullset);
1306 pcommit = addoffsetinst(compst, IBackCommit);
1307 jumptohere(compst, pchoice);
1308 addinstruction(compst, IFail, 0);
1309 jumptohere(compst, pcommit);
1315 ** Captures: if pattern has fixed (and not too big) length, use
1316 ** a single IFullCapture instruction after the match; otherwise,
1317 ** enclose the pattern with OpenCapture - CloseCapture.
1319 static void codecapture (CompileState *compst, TTree *tree, int tt,
1320 const Charset *fl) {
1321 int len = fixedlen(sib1(tree));
1322 if (len >= 0 && len <= MAXOFF && !hascaptures(sib1(tree))) {
1323 codegen(compst, sib1(tree), 0, tt, fl);
1324 addinstcap(compst, IFullCapture, tree->cap, tree->key, len);
1326 else {
1327 addinstcap(compst, IOpenCapture, tree->cap, tree->key, 0);
1328 codegen(compst, sib1(tree), 0, tt, fl);
1329 addinstcap(compst, ICloseCapture, Cclose, 0, 0);
1334 static void coderuntime (CompileState *compst, TTree *tree, int tt) {
1335 addinstcap(compst, IOpenCapture, Cgroup, tree->key, 0);
1336 codegen(compst, sib1(tree), 0, tt, fullset);
1337 addinstcap(compst, ICloseRunTime, Cclose, 0, 0);
1342 ** Repetion; optimizations:
1343 ** When pattern is a charset, can use special instruction ISpan.
1344 ** When pattern is head fail, or if it starts with characters that
1345 ** are disjoint from what follows the repetions, a simple test
1346 ** is enough (a fail inside the repetition would backtrack to fail
1347 ** again in the following pattern, so there is no need for a choice).
1348 ** When 'opt' is true, the repetion can reuse the Choice already
1349 ** active in the stack.
1351 static void coderep (CompileState *compst, TTree *tree, int opt,
1352 const Charset *fl) {
1353 Charset st;
1354 if (tocharset(tree, &st)) {
1355 addinstruction(compst, ISpan, 0);
1356 addcharset(compst, st.cs);
1358 else {
1359 int e1 = getfirst(tree, fullset, &st);
1360 if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
1361 /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
1362 int jmp;
1363 int test = codetestset(compst, &st, 0);
1364 codegen(compst, tree, 0, test, fullset);
1365 jmp = addoffsetinst(compst, IJmp);
1366 jumptohere(compst, test);
1367 jumptothere(compst, jmp, test);
1369 else {
1370 /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */
1371 /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */
1372 int commit, l2;
1373 int test = codetestset(compst, &st, e1);
1374 int pchoice = NOINST;
1375 if (opt)
1376 jumptohere(compst, addoffsetinst(compst, IPartialCommit));
1377 else
1378 pchoice = addoffsetinst(compst, IChoice);
1379 l2 = gethere(compst);
1380 codegen(compst, tree, 0, NOINST, fullset);
1381 commit = addoffsetinst(compst, IPartialCommit);
1382 jumptothere(compst, commit, l2);
1383 jumptohere(compst, pchoice);
1384 jumptohere(compst, test);
1391 ** Not predicate; optimizations:
1392 ** In any case, if first test fails, 'not' succeeds, so it can jump to
1393 ** the end. If pattern is headfail, that is all (it cannot fail
1394 ** in other parts); this case includes 'not' of simple sets. Otherwise,
1395 ** use the default code (a choice plus a failtwice).
1397 static void codenot (CompileState *compst, TTree *tree) {
1398 Charset st;
1399 int e = getfirst(tree, fullset, &st);
1400 int test = codetestset(compst, &st, e);
1401 if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */
1402 addinstruction(compst, IFail, 0);
1403 else {
1404 /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */
1405 int pchoice = addoffsetinst(compst, IChoice);
1406 codegen(compst, tree, 0, NOINST, fullset);
1407 addinstruction(compst, IFailTwice, 0);
1408 jumptohere(compst, pchoice);
1410 jumptohere(compst, test);
1415 ** change open calls to calls, using list 'positions' to find
1416 ** correct offsets; also optimize tail calls
1418 static void correctcalls (CompileState *compst, int *positions,
1419 int from, int to) {
1420 int i;
1421 Instruction *code = compst->p->code;
1422 for (i = from; i < to; i += sizei(&code[i])) {
1423 if (code[i].i.code == IOpenCall) {
1424 int n = code[i].i.key; /* rule number */
1425 int rule = positions[n]; /* rule position */
1426 assert(rule == from || code[rule - 1].i.code == IRet);
1427 if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */
1428 code[i].i.code = IJmp; /* tail call */
1429 else
1430 code[i].i.code = ICall;
1431 jumptothere(compst, i, rule); /* call jumps to respective rule */
1434 assert(i == to);
1439 ** Code for a grammar:
1440 ** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
1442 static void codegrammar (CompileState *compst, TTree *grammar) {
1443 int positions[MAXRULES];
1444 int rulenumber = 0;
1445 TTree *rule;
1446 int firstcall = addoffsetinst(compst, ICall); /* call initial rule */
1447 int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */
1448 int start = gethere(compst); /* here starts the initial rule */
1449 jumptohere(compst, firstcall);
1450 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
1451 positions[rulenumber++] = gethere(compst); /* save rule position */
1452 codegen(compst, sib1(rule), 0, NOINST, fullset); /* code rule */
1453 addinstruction(compst, IRet, 0);
1455 assert(rule->tag == TTrue);
1456 jumptohere(compst, jumptoend);
1457 correctcalls(compst, positions, start, gethere(compst));
1461 static void codecall (CompileState *compst, TTree *call) {
1462 int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
1463 getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */
1464 assert(sib2(call)->tag == TRule);
1469 ** Code first child of a sequence
1470 ** (second child is called in-place to allow tail call)
1471 ** Return 'tt' for second child
1473 static int codeseq1 (CompileState *compst, TTree *p1, TTree *p2,
1474 int tt, const Charset *fl) {
1475 if (needfollow(p1)) {
1476 Charset fl1;
1477 getfirst(p2, fl, &fl1); /* p1 follow is p2 first */
1478 codegen(compst, p1, 0, tt, &fl1);
1480 else /* use 'fullset' as follow */
1481 codegen(compst, p1, 0, tt, fullset);
1482 if (fixedlen(p1) != 0) /* can 'p1' consume anything? */
1483 return NOINST; /* invalidate test */
1484 else return tt; /* else 'tt' still protects sib2 */
1489 ** Main code-generation function: dispatch to auxiliar functions
1490 ** according to kind of tree. ('needfollow' should return true
1491 ** only for consructions that use 'fl'.)
1493 static void codegen (CompileState *compst, TTree *tree, int opt, int tt,
1494 const Charset *fl) {
1495 tailcall:
1496 switch (tree->tag) {
1497 case TChar: codechar(compst, tree->u.n, tt); break;
1498 case TAny: addinstruction(compst, IAny, 0); break;
1499 case TSet: codecharset(compst, treebuffer(tree), tt); break;
1500 case TTrue: break;
1501 case TFalse: addinstruction(compst, IFail, 0); break;
1502 case TChoice: codechoice(compst, sib1(tree), sib2(tree), opt, fl); break;
1503 case TRep: coderep(compst, sib1(tree), opt, fl); break;
1504 case TBehind: codebehind(compst, tree); break;
1505 case TNot: codenot(compst, sib1(tree)); break;
1506 case TAnd: codeand(compst, sib1(tree), tt); break;
1507 case TCapture: codecapture(compst, tree, tt, fl); break;
1508 case TRunTime: coderuntime(compst, tree, tt); break;
1509 case TGrammar: codegrammar(compst, tree); break;
1510 case TCall: codecall(compst, tree); break;
1511 case TSeq: {
1512 tt = codeseq1(compst, sib1(tree), sib2(tree), tt, fl); /* code 'p1' */
1513 /* codegen(compst, p2, opt, tt, fl); */
1514 tree = sib2(tree); goto tailcall;
1516 default: assert(0);
1522 ** Optimize jumps and other jump-like instructions.
1523 ** * Update labels of instructions with labels to their final
1524 ** destinations (e.g., choice L1; ... L1: jmp L2: becomes
1525 ** choice L2)
1526 ** * Jumps to other instructions that do jumps become those
1527 ** instructions (e.g., jump to return becomes a return; jump
1528 ** to commit becomes a commit)
1530 static void peephole (CompileState *compst) {
1531 Instruction *code = compst->p->code;
1532 int i;
1533 for (i = 0; i < compst->ncode; i += sizei(&code[i])) {
1534 redo:
1535 switch (code[i].i.code) {
1536 case IChoice: case ICall: case ICommit: case IPartialCommit:
1537 case IBackCommit: case ITestChar: case ITestSet:
1538 case ITestAny: { /* instructions with labels */
1539 jumptothere(compst, i, finallabel(code, i)); /* optimize label */
1540 break;
1542 case IJmp: {
1543 int ft = finaltarget(code, i);
1544 switch (code[ft].i.code) { /* jumping to what? */
1545 case IRet: case IFail: case IFailTwice:
1546 case IEnd: { /* instructions with unconditional implicit jumps */
1547 code[i] = code[ft]; /* jump becomes that instruction */
1548 code[i + 1].i.code = IAny; /* 'no-op' for target position */
1549 break;
1551 case ICommit: case IPartialCommit:
1552 case IBackCommit: { /* inst. with unconditional explicit jumps */
1553 int fft = finallabel(code, ft);
1554 code[i] = code[ft]; /* jump becomes that instruction... */
1555 jumptothere(compst, i, fft); /* but must correct its offset */
1556 goto redo; /* reoptimize its label */
1558 default: {
1559 jumptothere(compst, i, ft); /* optimize label */
1560 break;
1563 break;
1565 default: break;
1568 assert(code[i - 1].i.code == IEnd);
1573 ** Compile a pattern
1575 Instruction *compile (lua_State *L, Pattern *p) {
1576 CompileState compst;
1577 compst.p = p; compst.ncode = 0; compst.L = L;
1578 realloccode(L, p, 2); /* minimum initial size */
1579 codegen(&compst, p->tree, 0, NOINST, fullset);
1580 addinstruction(&compst, IEnd, 0);
1581 realloccode(L, p, compst.ncode); /* set final size */
1582 peephole(&compst);
1583 return p->code;
1587 /* }====================================================== */
1592 ** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
1593 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
1596 /* #include "lua.h" */
1597 /* #include "lauxlib.h" */
1599 /* #include "lpcap.h" */
1600 /* #include "lptypes.h" */
1603 #define captype(cap) ((cap)->kind)
1605 #define isclosecap(cap) (captype(cap) == Cclose)
1607 #define closeaddr(c) ((c)->s + (c)->siz - 1)
1609 #define isfullcap(cap) ((cap)->siz != 0)
1611 #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
1613 #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
1618 ** Put at the cache for Lua values the value indexed by 'v' in ktable
1619 ** of the running pattern (if it is not there yet); returns its index.
1621 static int updatecache (CapState *cs, int v) {
1622 int idx = cs->ptop + 1; /* stack index of cache for Lua values */
1623 if (v != cs->valuecached) { /* not there? */
1624 getfromktable(cs, v); /* get value from 'ktable' */
1625 lua_replace(cs->L, idx); /* put it at reserved stack position */
1626 cs->valuecached = v; /* keep track of what is there */
1628 return idx;
1632 static int pushcapture (CapState *cs);
1636 ** Goes back in a list of captures looking for an open capture
1637 ** corresponding to a close
1639 static Capture *findopen (Capture *cap) {
1640 int n = 0; /* number of closes waiting an open */
1641 for (;;) {
1642 cap--;
1643 if (isclosecap(cap)) n++; /* one more open to skip */
1644 else if (!isfullcap(cap))
1645 if (n-- == 0) return cap;
1651 ** Go to the next capture
1653 static void nextcap (CapState *cs) {
1654 Capture *cap = cs->cap;
1655 if (!isfullcap(cap)) { /* not a single capture? */
1656 int n = 0; /* number of opens waiting a close */
1657 for (;;) { /* look for corresponding close */
1658 cap++;
1659 if (isclosecap(cap)) {
1660 if (n-- == 0) break;
1662 else if (!isfullcap(cap)) n++;
1665 cs->cap = cap + 1; /* + 1 to skip last close (or entire single capture) */
1670 ** Push on the Lua stack all values generated by nested captures inside
1671 ** the current capture. Returns number of values pushed. 'addextra'
1672 ** makes it push the entire match after all captured values. The
1673 ** entire match is pushed also if there are no other nested values,
1674 ** so the function never returns zero.
1676 static int pushnestedvalues (CapState *cs, int addextra) {
1677 Capture *co = cs->cap;
1678 if (isfullcap(cs->cap++)) { /* no nested captures? */
1679 lua_pushlstring(cs->L, co->s, co->siz - 1); /* push whole match */
1680 return 1; /* that is it */
1682 else {
1683 int n = 0;
1684 while (!isclosecap(cs->cap)) /* repeat for all nested patterns */
1685 n += pushcapture(cs);
1686 if (addextra || n == 0) { /* need extra? */
1687 lua_pushlstring(cs->L, co->s, cs->cap->s - co->s); /* push whole match */
1688 n++;
1690 cs->cap++; /* skip close entry */
1691 return n;
1697 ** Push only the first value generated by nested captures
1699 static void pushonenestedvalue (CapState *cs) {
1700 int n = pushnestedvalues(cs, 0);
1701 if (n > 1)
1702 lua_pop(cs->L, n - 1); /* pop extra values */
1707 ** Try to find a named group capture with the name given at the top of
1708 ** the stack; goes backward from 'cap'.
1710 static Capture *findback (CapState *cs, Capture *cap) {
1711 lua_State *L = cs->L;
1712 while (cap-- > cs->ocap) { /* repeat until end of list */
1713 if (isclosecap(cap))
1714 cap = findopen(cap); /* skip nested captures */
1715 else if (!isfullcap(cap))
1716 continue; /* opening an enclosing capture: skip and get previous */
1717 if (captype(cap) == Cgroup) {
1718 getfromktable(cs, cap->idx); /* get group name */
1719 if (lp_equal(L, -2, -1)) { /* right group? */
1720 lua_pop(L, 2); /* remove reference name and group name */
1721 return cap;
1723 else lua_pop(L, 1); /* remove group name */
1726 luaL_error(L, "back reference '%s' not found", lua_tostring(L, -1));
1727 return NULL; /* to avoid warnings */
1732 ** Back-reference capture. Return number of values pushed.
1734 static int backrefcap (CapState *cs) {
1735 int n;
1736 Capture *curr = cs->cap;
1737 pushluaval(cs); /* reference name */
1738 cs->cap = findback(cs, curr); /* find corresponding group */
1739 n = pushnestedvalues(cs, 0); /* push group's values */
1740 cs->cap = curr + 1;
1741 return n;
1746 ** Table capture: creates a new table and populates it with nested
1747 ** captures.
1749 static int tablecap (CapState *cs) {
1750 lua_State *L = cs->L;
1751 int n = 0;
1752 lua_newtable(L);
1753 if (isfullcap(cs->cap++))
1754 return 1; /* table is empty */
1755 while (!isclosecap(cs->cap)) {
1756 if (captype(cs->cap) == Cgroup && cs->cap->idx != 0) { /* named group? */
1757 pushluaval(cs); /* push group name */
1758 pushonenestedvalue(cs);
1759 lua_settable(L, -3);
1761 else { /* not a named group */
1762 int i;
1763 int k = pushcapture(cs);
1764 for (i = k; i > 0; i--) /* store all values into table */
1765 lua_rawseti(L, -(i + 1), n + i);
1766 n += k;
1769 cs->cap++; /* skip close entry */
1770 return 1; /* number of values pushed (only the table) */
1775 ** Table-query capture
1777 static int querycap (CapState *cs) {
1778 int idx = cs->cap->idx;
1779 pushonenestedvalue(cs); /* get nested capture */
1780 lua_gettable(cs->L, updatecache(cs, idx)); /* query cap. value at table */
1781 if (!lua_isnil(cs->L, -1))
1782 return 1;
1783 else { /* no value */
1784 lua_pop(cs->L, 1); /* remove nil */
1785 return 0;
1791 ** Fold capture
1793 static int foldcap (CapState *cs) {
1794 int n;
1795 lua_State *L = cs->L;
1796 int idx = cs->cap->idx;
1797 if (isfullcap(cs->cap++) || /* no nested captures? */
1798 isclosecap(cs->cap) || /* no nested captures (large subject)? */
1799 (n = pushcapture(cs)) == 0) /* nested captures with no values? */
1800 return luaL_error(L, "no initial value for fold capture");
1801 if (n > 1)
1802 lua_pop(L, n - 1); /* leave only one result for accumulator */
1803 while (!isclosecap(cs->cap)) {
1804 lua_pushvalue(L, updatecache(cs, idx)); /* get folding function */
1805 lua_insert(L, -2); /* put it before accumulator */
1806 n = pushcapture(cs); /* get next capture's values */
1807 lua_call(L, n + 1, 1); /* call folding function */
1809 cs->cap++; /* skip close entry */
1810 return 1; /* only accumulator left on the stack */
1815 ** Function capture
1817 static int functioncap (CapState *cs) {
1818 int n;
1819 int top = lua_gettop(cs->L);
1820 pushluaval(cs); /* push function */
1821 n = pushnestedvalues(cs, 0); /* push nested captures */
1822 lua_call(cs->L, n, LUA_MULTRET); /* call function */
1823 return lua_gettop(cs->L) - top; /* return function's results */
1828 ** Select capture
1830 static int numcap (CapState *cs) {
1831 int idx = cs->cap->idx; /* value to select */
1832 if (idx == 0) { /* no values? */
1833 nextcap(cs); /* skip entire capture */
1834 return 0; /* no value produced */
1836 else {
1837 int n = pushnestedvalues(cs, 0);
1838 if (n < idx) /* invalid index? */
1839 return luaL_error(cs->L, "no capture '%d'", idx);
1840 else {
1841 lua_pushvalue(cs->L, -(n - idx + 1)); /* get selected capture */
1842 lua_replace(cs->L, -(n + 1)); /* put it in place of 1st capture */
1843 lua_pop(cs->L, n - 1); /* remove other captures */
1844 return 1;
1851 ** Return the stack index of the first runtime capture in the given
1852 ** list of captures (or zero if no runtime captures)
1854 int finddyncap (Capture *cap, Capture *last) {
1855 for (; cap < last; cap++) {
1856 if (cap->kind == Cruntime)
1857 return cap->idx; /* stack position of first capture */
1859 return 0; /* no dynamic captures in this segment */
1864 ** Calls a runtime capture. Returns number of captures removed by
1865 ** the call, including the initial Cgroup. (Captures to be added are
1866 ** on the Lua stack.)
1868 int runtimecap (CapState *cs, Capture *close, const char *s, int *rem) {
1869 int n, id;
1870 lua_State *L = cs->L;
1871 int otop = lua_gettop(L);
1872 Capture *open = findopen(close);
1873 assert(captype(open) == Cgroup);
1874 id = finddyncap(open, close); /* get first dynamic capture argument */
1875 close->kind = Cclose; /* closes the group */
1876 close->s = s;
1877 cs->cap = open; cs->valuecached = 0; /* prepare capture state */
1878 luaL_checkstack(L, 4, "too many runtime captures");
1879 pushluaval(cs); /* push function to be called */
1880 lua_pushvalue(L, SUBJIDX); /* push original subject */
1881 lua_pushinteger(L, s - cs->s + 1); /* push current position */
1882 n = pushnestedvalues(cs, 0); /* push nested captures */
1883 lua_call(L, n + 2, LUA_MULTRET); /* call dynamic function */
1884 if (id > 0) { /* are there old dynamic captures to be removed? */
1885 int i;
1886 for (i = id; i <= otop; i++)
1887 lua_remove(L, id); /* remove old dynamic captures */
1888 *rem = otop - id + 1; /* total number of dynamic captures removed */
1890 else
1891 *rem = 0; /* no dynamic captures removed */
1892 return close - open; /* number of captures of all kinds removed */
1897 ** Auxiliary structure for substitution and string captures: keep
1898 ** information about nested captures for future use, avoiding to push
1899 ** string results into Lua
1901 typedef struct StrAux {
1902 int isstring; /* whether capture is a string */
1903 union {
1904 Capture *cp; /* if not a string, respective capture */
1905 struct { /* if it is a string... */
1906 const char *s; /* ... starts here */
1907 const char *e; /* ... ends here */
1908 } s;
1909 } u;
1910 } StrAux;
1912 #define MAXSTRCAPS 10
1915 ** Collect values from current capture into array 'cps'. Current
1916 ** capture must be Cstring (first call) or Csimple (recursive calls).
1917 ** (In first call, fills %0 with whole match for Cstring.)
1918 ** Returns number of elements in the array that were filled.
1920 static int getstrcaps (CapState *cs, StrAux *cps, int n) {
1921 int k = n++;
1922 cps[k].isstring = 1; /* get string value */
1923 cps[k].u.s.s = cs->cap->s; /* starts here */
1924 if (!isfullcap(cs->cap++)) { /* nested captures? */
1925 while (!isclosecap(cs->cap)) { /* traverse them */
1926 if (n >= MAXSTRCAPS) /* too many captures? */
1927 nextcap(cs); /* skip extra captures (will not need them) */
1928 else if (captype(cs->cap) == Csimple) /* string? */
1929 n = getstrcaps(cs, cps, n); /* put info. into array */
1930 else {
1931 cps[n].isstring = 0; /* not a string */
1932 cps[n].u.cp = cs->cap; /* keep original capture */
1933 nextcap(cs);
1934 n++;
1937 cs->cap++; /* skip close */
1939 cps[k].u.s.e = closeaddr(cs->cap - 1); /* ends here */
1940 return n;
1945 ** add next capture value (which should be a string) to buffer 'b'
1947 static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
1951 ** String capture: add result to buffer 'b' (instead of pushing
1952 ** it into the stack)
1954 static void stringcap (luaL_Buffer *b, CapState *cs) {
1955 StrAux cps[MAXSTRCAPS];
1956 int n;
1957 size_t len, i;
1958 const char *fmt; /* format string */
1959 fmt = lua_tolstring(cs->L, updatecache(cs, cs->cap->idx), &len);
1960 n = getstrcaps(cs, cps, 0) - 1; /* collect nested captures */
1961 for (i = 0; i < len; i++) { /* traverse them */
1962 if (fmt[i] != '%') /* not an escape? */
1963 luaL_addchar(b, fmt[i]); /* add it to buffer */
1964 else if (fmt[++i] < '0' || fmt[i] > '9') /* not followed by a digit? */
1965 luaL_addchar(b, fmt[i]); /* add to buffer */
1966 else {
1967 int l = fmt[i] - '0'; /* capture index */
1968 if (l > n)
1969 luaL_error(cs->L, "invalid capture index (%d)", l);
1970 else if (cps[l].isstring)
1971 luaL_addlstring(b, cps[l].u.s.s, cps[l].u.s.e - cps[l].u.s.s);
1972 else {
1973 Capture *curr = cs->cap;
1974 cs->cap = cps[l].u.cp; /* go back to evaluate that nested capture */
1975 if (!addonestring(b, cs, "capture"))
1976 luaL_error(cs->L, "no values in capture index %d", l);
1977 cs->cap = curr; /* continue from where it stopped */
1985 ** Substitution capture: add result to buffer 'b'
1987 static void substcap (luaL_Buffer *b, CapState *cs) {
1988 const char *curr = cs->cap->s;
1989 if (isfullcap(cs->cap)) /* no nested captures? */
1990 luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
1991 else {
1992 cs->cap++; /* skip open entry */
1993 while (!isclosecap(cs->cap)) { /* traverse nested captures */
1994 const char *next = cs->cap->s;
1995 luaL_addlstring(b, curr, next - curr); /* add text up to capture */
1996 if (addonestring(b, cs, "replacement"))
1997 curr = closeaddr(cs->cap - 1); /* continue after match */
1998 else /* no capture value */
1999 curr = next; /* keep original text in final result */
2001 luaL_addlstring(b, curr, cs->cap->s - curr); /* add last piece of text */
2003 cs->cap++; /* go to next capture */
2008 ** Evaluates a capture and adds its first value to buffer 'b'; returns
2009 ** whether there was a value
2011 static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
2012 switch (captype(cs->cap)) {
2013 case Cstring:
2014 stringcap(b, cs); /* add capture directly to buffer */
2015 return 1;
2016 case Csubst:
2017 substcap(b, cs); /* add capture directly to buffer */
2018 return 1;
2019 default: {
2020 lua_State *L = cs->L;
2021 int n = pushcapture(cs);
2022 if (n > 0) {
2023 if (n > 1) lua_pop(L, n - 1); /* only one result */
2024 if (!lua_isstring(L, -1))
2025 luaL_error(L, "invalid %s value (a %s)", what, luaL_typename(L, -1));
2026 luaL_addvalue(b);
2028 return n;
2035 ** Push all values of the current capture into the stack; returns
2036 ** number of values pushed
2038 static int pushcapture (CapState *cs) {
2039 lua_State *L = cs->L;
2040 luaL_checkstack(L, 4, "too many captures");
2041 switch (captype(cs->cap)) {
2042 case Cposition: {
2043 lua_pushinteger(L, cs->cap->s - cs->s + 1);
2044 cs->cap++;
2045 return 1;
2047 case Cconst: {
2048 pushluaval(cs);
2049 cs->cap++;
2050 return 1;
2052 case Carg: {
2053 int arg = (cs->cap++)->idx;
2054 if (arg + FIXEDARGS > cs->ptop)
2055 return luaL_error(L, "reference to absent extra argument #%d", arg);
2056 lua_pushvalue(L, arg + FIXEDARGS);
2057 return 1;
2059 case Csimple: {
2060 int k = pushnestedvalues(cs, 1);
2061 lua_insert(L, -k); /* make whole match be first result */
2062 return k;
2064 case Cruntime: {
2065 lua_pushvalue(L, (cs->cap++)->idx); /* value is in the stack */
2066 return 1;
2068 case Cstring: {
2069 luaL_Buffer b;
2070 luaL_buffinit(L, &b);
2071 stringcap(&b, cs);
2072 luaL_pushresult(&b);
2073 return 1;
2075 case Csubst: {
2076 luaL_Buffer b;
2077 luaL_buffinit(L, &b);
2078 substcap(&b, cs);
2079 luaL_pushresult(&b);
2080 return 1;
2082 case Cgroup: {
2083 if (cs->cap->idx == 0) /* anonymous group? */
2084 return pushnestedvalues(cs, 0); /* add all nested values */
2085 else { /* named group: add no values */
2086 nextcap(cs); /* skip capture */
2087 return 0;
2090 case Cbackref: return backrefcap(cs);
2091 case Ctable: return tablecap(cs);
2092 case Cfunction: return functioncap(cs);
2093 case Cnum: return numcap(cs);
2094 case Cquery: return querycap(cs);
2095 case Cfold: return foldcap(cs);
2096 default: assert(0); return 0;
2102 ** Prepare a CapState structure and traverse the entire list of
2103 ** captures in the stack pushing its results. 's' is the subject
2104 ** string, 'r' is the final position of the match, and 'ptop'
2105 ** the index in the stack where some useful values were pushed.
2106 ** Returns the number of results pushed. (If the list produces no
2107 ** results, push the final position of the match.)
2109 int getcaptures (lua_State *L, const char *s, const char *r, int ptop) {
2110 Capture *capture = (Capture *)lua_touserdata(L, caplistidx(ptop));
2111 int n = 0;
2112 if (!isclosecap(capture)) { /* is there any capture? */
2113 CapState cs;
2114 cs.ocap = cs.cap = capture; cs.L = L;
2115 cs.s = s; cs.valuecached = 0; cs.ptop = ptop;
2116 do { /* collect their values */
2117 n += pushcapture(&cs);
2118 } while (!isclosecap(cs.cap));
2120 if (n == 0) { /* no capture values? */
2121 lua_pushinteger(L, r - s + 1); /* return only end position */
2122 n = 1;
2124 return n;
2129 ** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $
2130 ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
2133 /* #include <ctype.h> */
2134 /* #include <limits.h> */
2135 /* #include <string.h> */
2138 /* #include "lua.h" */
2139 /* #include "lauxlib.h" */
2141 /* #include "lptypes.h" */
2142 /* #include "lpcap.h" */
2143 /* #include "lpcode.h" */
2144 /* #include "lpprint.h" */
2145 /* #include "lptree.h" */
2148 /* number of siblings for each tree */
2149 const byte numsiblings[] = {
2150 0, 0, 0, /* char, set, any */
2151 0, 0, /* true, false */
2152 1, /* rep */
2153 2, 2, /* seq, choice */
2154 1, 1, /* not, and */
2155 0, 0, 2, 1, /* call, opencall, rule, grammar */
2156 1, /* behind */
2157 1, 1 /* capture, runtime capture */
2161 static TTree *newgrammar (lua_State *L, int arg);
2165 ** returns a reasonable name for value at index 'idx' on the stack
2167 static const char *val2str (lua_State *L, int idx) {
2168 const char *k = lua_tostring(L, idx);
2169 if (k != NULL)
2170 return lua_pushfstring(L, "%s", k);
2171 else
2172 return lua_pushfstring(L, "(a %s)", luaL_typename(L, idx));
2177 ** Fix a TOpenCall into a TCall node, using table 'postable' to
2178 ** translate a key to its rule address in the tree. Raises an
2179 ** error if key does not exist.
2181 static void fixonecall (lua_State *L, int postable, TTree *g, TTree *t) {
2182 int n;
2183 lua_rawgeti(L, -1, t->key); /* get rule's name */
2184 lua_gettable(L, postable); /* query name in position table */
2185 n = lua_tonumber(L, -1); /* get (absolute) position */
2186 lua_pop(L, 1); /* remove position */
2187 if (n == 0) { /* no position? */
2188 lua_rawgeti(L, -1, t->key); /* get rule's name again */
2189 luaL_error(L, "rule '%s' undefined in given grammar", val2str(L, -1));
2191 t->tag = TCall;
2192 t->u.ps = n - (t - g); /* position relative to node */
2193 assert(sib2(t)->tag == TRule);
2194 sib2(t)->key = t->key;
2199 ** Transform left associative constructions into right
2200 ** associative ones, for sequence and choice; that is:
2201 ** (t11 + t12) + t2 => t11 + (t12 + t2)
2202 ** (t11 * t12) * t2 => t11 * (t12 * t2)
2203 ** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
2205 static void correctassociativity (TTree *tree) {
2206 TTree *t1 = sib1(tree);
2207 assert(tree->tag == TChoice || tree->tag == TSeq);
2208 while (t1->tag == tree->tag) {
2209 int n1size = tree->u.ps - 1; /* t1 == Op t11 t12 */
2210 int n11size = t1->u.ps - 1;
2211 int n12size = n1size - n11size - 1;
2212 memmove(sib1(tree), sib1(t1), n11size * sizeof(TTree)); /* move t11 */
2213 tree->u.ps = n11size + 1;
2214 sib2(tree)->tag = tree->tag;
2215 sib2(tree)->u.ps = n12size + 1;
2221 ** Make final adjustments in a tree. Fix open calls in tree 't',
2222 ** making them refer to their respective rules or raising appropriate
2223 ** errors (if not inside a grammar). Correct associativity of associative
2224 ** constructions (making them right associative). Assume that tree's
2225 ** ktable is at the top of the stack (for error messages).
2227 static void finalfix (lua_State *L, int postable, TTree *g, TTree *t) {
2228 tailcall:
2229 switch (t->tag) {
2230 case TGrammar: /* subgrammars were already fixed */
2231 return;
2232 case TOpenCall: {
2233 if (g != NULL) /* inside a grammar? */
2234 fixonecall(L, postable, g, t);
2235 else { /* open call outside grammar */
2236 lua_rawgeti(L, -1, t->key);
2237 luaL_error(L, "rule '%s' used outside a grammar", val2str(L, -1));
2239 break;
2241 case TSeq: case TChoice:
2242 correctassociativity(t);
2243 break;
2245 switch (numsiblings[t->tag]) {
2246 case 1: /* finalfix(L, postable, g, sib1(t)); */
2247 t = sib1(t); goto tailcall;
2248 case 2:
2249 finalfix(L, postable, g, sib1(t));
2250 t = sib2(t); goto tailcall; /* finalfix(L, postable, g, sib2(t)); */
2251 default: assert(numsiblings[t->tag] == 0); break;
2258 ** {===================================================================
2259 ** KTable manipulation
2261 ** - The ktable of a pattern 'p' can be shared by other patterns that
2262 ** contain 'p' and no other constants. Because of this sharing, we
2263 ** should not add elements to a 'ktable' unless it was freshly created
2264 ** for the new pattern.
2266 ** - The maximum index in a ktable is USHRT_MAX, because trees and
2267 ** patterns use unsigned shorts to store those indices.
2268 ** ====================================================================
2272 ** Create a new 'ktable' to the pattern at the top of the stack.
2274 static void newktable (lua_State *L, int n) {
2275 lua_createtable(L, n, 0); /* create a fresh table */
2276 lua_setuservalue(L, -2); /* set it as 'ktable' for pattern */
2281 ** Add element 'idx' to 'ktable' of pattern at the top of the stack;
2282 ** Return index of new element.
2283 ** If new element is nil, does not add it to table (as it would be
2284 ** useless) and returns 0, as ktable[0] is always nil.
2286 static int addtoktable (lua_State *L, int idx) {
2287 if (lua_isnil(L, idx)) /* nil value? */
2288 return 0;
2289 else {
2290 int n;
2291 lua_getuservalue(L, -1); /* get ktable from pattern */
2292 n = lua_rawlen(L, -1);
2293 if (n >= USHRT_MAX)
2294 luaL_error(L, "too many Lua values in pattern");
2295 lua_pushvalue(L, idx); /* element to be added */
2296 lua_rawseti(L, -2, ++n);
2297 lua_pop(L, 1); /* remove 'ktable' */
2298 return n;
2304 ** Return the number of elements in the ktable at 'idx'.
2305 ** In Lua 5.2/5.3, default "environment" for patterns is nil, not
2306 ** a table. Treat it as an empty table. In Lua 5.1, assumes that
2307 ** the environment has no numeric indices (len == 0)
2309 static int ktablelen (lua_State *L, int idx) {
2310 if (!lua_istable(L, idx)) return 0;
2311 else return lua_rawlen(L, idx);
2316 ** Concatentate the contents of table 'idx1' into table 'idx2'.
2317 ** (Assume that both indices are negative.)
2318 ** Return the original length of table 'idx2' (or 0, if no
2319 ** element was added, as there is no need to correct any index).
2321 static int concattable (lua_State *L, int idx1, int idx2) {
2322 int i;
2323 int n1 = ktablelen(L, idx1);
2324 int n2 = ktablelen(L, idx2);
2325 if (n1 + n2 > USHRT_MAX)
2326 luaL_error(L, "too many Lua values in pattern");
2327 if (n1 == 0) return 0; /* nothing to correct */
2328 for (i = 1; i <= n1; i++) {
2329 lua_rawgeti(L, idx1, i);
2330 lua_rawseti(L, idx2 - 1, n2 + i); /* correct 'idx2' */
2332 return n2;
2337 ** When joining 'ktables', constants from one of the subpatterns must
2338 ** be renumbered; 'correctkeys' corrects their indices (adding 'n'
2339 ** to each of them)
2341 static void correctkeys (TTree *tree, int n) {
2342 if (n == 0) return; /* no correction? */
2343 tailcall:
2344 switch (tree->tag) {
2345 case TOpenCall: case TCall: case TRunTime: case TRule: {
2346 if (tree->key > 0)
2347 tree->key += n;
2348 break;
2350 case TCapture: {
2351 if (tree->key > 0 && tree->cap != Carg && tree->cap != Cnum)
2352 tree->key += n;
2353 break;
2355 default: break;
2357 switch (numsiblings[tree->tag]) {
2358 case 1: /* correctkeys(sib1(tree), n); */
2359 tree = sib1(tree); goto tailcall;
2360 case 2:
2361 correctkeys(sib1(tree), n);
2362 tree = sib2(tree); goto tailcall; /* correctkeys(sib2(tree), n); */
2363 default: assert(numsiblings[tree->tag] == 0); break;
2369 ** Join the ktables from p1 and p2 the ktable for the new pattern at the
2370 ** top of the stack, reusing them when possible.
2372 static void joinktables (lua_State *L, int p1, TTree *t2, int p2) {
2373 int n1, n2;
2374 lua_getuservalue(L, p1); /* get ktables */
2375 lua_getuservalue(L, p2);
2376 n1 = ktablelen(L, -2);
2377 n2 = ktablelen(L, -1);
2378 if (n1 == 0 && n2 == 0) /* are both tables empty? */
2379 lua_pop(L, 2); /* nothing to be done; pop tables */
2380 else if (n2 == 0 || lp_equal(L, -2, -1)) { /* 2nd table empty or equal? */
2381 lua_pop(L, 1); /* pop 2nd table */
2382 lua_setuservalue(L, -2); /* set 1st ktable into new pattern */
2384 else if (n1 == 0) { /* first table is empty? */
2385 lua_setuservalue(L, -3); /* set 2nd table into new pattern */
2386 lua_pop(L, 1); /* pop 1st table */
2388 else {
2389 lua_createtable(L, n1 + n2, 0); /* create ktable for new pattern */
2390 /* stack: new p; ktable p1; ktable p2; new ktable */
2391 concattable(L, -3, -1); /* from p1 into new ktable */
2392 concattable(L, -2, -1); /* from p2 into new ktable */
2393 lua_setuservalue(L, -4); /* new ktable becomes 'p' environment */
2394 lua_pop(L, 2); /* pop other ktables */
2395 correctkeys(t2, n1); /* correction for indices from p2 */
2401 ** copy 'ktable' of element 'idx' to new tree (on top of stack)
2403 static void copyktable (lua_State *L, int idx) {
2404 lua_getuservalue(L, idx);
2405 lua_setuservalue(L, -2);
2410 ** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable'
2411 ** from tree at the top of the stack, and correct corresponding
2412 ** tree.
2414 static void mergektable (lua_State *L, int idx, TTree *stree) {
2415 int n;
2416 lua_getuservalue(L, -1); /* get ktables */
2417 lua_getuservalue(L, idx);
2418 n = concattable(L, -1, -2);
2419 lua_pop(L, 2); /* remove both ktables */
2420 correctkeys(stree, n);
2425 ** Create a new 'ktable' to the pattern at the top of the stack, adding
2426 ** all elements from pattern 'p' (if not 0) plus element 'idx' to it.
2427 ** Return index of new element.
2429 static int addtonewktable (lua_State *L, int p, int idx) {
2430 newktable(L, 1);
2431 if (p)
2432 mergektable(L, p, NULL);
2433 return addtoktable(L, idx);
2436 /* }====================================================== */
2440 ** {======================================================
2441 ** Tree generation
2442 ** =======================================================
2446 ** In 5.2, could use 'luaL_testudata'...
2448 static int testpattern (lua_State *L, int idx) {
2449 if (lua_touserdata(L, idx)) { /* value is a userdata? */
2450 if (lua_getmetatable(L, idx)) { /* does it have a metatable? */
2451 luaL_getmetatable(L, PATTERN_T);
2452 if (lua_rawequal(L, -1, -2)) { /* does it have the correct mt? */
2453 lua_pop(L, 2); /* remove both metatables */
2454 return 1;
2458 return 0;
2462 static Pattern *getpattern (lua_State *L, int idx) {
2463 return (Pattern *)luaL_checkudata(L, idx, PATTERN_T);
2467 static int getsize (lua_State *L, int idx) {
2468 return (lua_rawlen(L, idx) - sizeof(Pattern)) / sizeof(TTree) + 1;
2472 static TTree *gettree (lua_State *L, int idx, int *len) {
2473 Pattern *p = getpattern(L, idx);
2474 if (len)
2475 *len = getsize(L, idx);
2476 return p->tree;
2481 ** create a pattern. Set its uservalue (the 'ktable') equal to its
2482 ** metatable. (It could be any empty sequence; the metatable is at
2483 ** hand here, so we use it.)
2485 static TTree *newtree (lua_State *L, int len) {
2486 size_t size = (len - 1) * sizeof(TTree) + sizeof(Pattern);
2487 Pattern *p = (Pattern *)lua_newuserdata(L, size);
2488 luaL_getmetatable(L, PATTERN_T);
2489 lua_pushvalue(L, -1);
2490 lua_setuservalue(L, -3);
2491 lua_setmetatable(L, -2);
2492 p->code = NULL; p->codesize = 0;
2493 return p->tree;
2497 static TTree *newleaf (lua_State *L, int tag) {
2498 TTree *tree = newtree(L, 1);
2499 tree->tag = tag;
2500 return tree;
2504 static TTree *newcharset (lua_State *L) {
2505 TTree *tree = newtree(L, bytes2slots(CHARSETSIZE) + 1);
2506 tree->tag = TSet;
2507 loopset(i, treebuffer(tree)[i] = 0);
2508 return tree;
2513 ** add to tree a sequence where first sibling is 'sib' (with size
2514 ** 'sibsize'); returns position for second sibling
2516 static TTree *seqaux (TTree *tree, TTree *sib, int sibsize) {
2517 tree->tag = TSeq; tree->u.ps = sibsize + 1;
2518 memcpy(sib1(tree), sib, sibsize * sizeof(TTree));
2519 return sib2(tree);
2524 ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got
2525 ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
2526 ** must build a sequence of sequence of sequence...)
2528 static void fillseq (TTree *tree, int tag, int n, const char *s) {
2529 int i;
2530 for (i = 0; i < n - 1; i++) { /* initial n-1 copies of Seq tag; Seq ... */
2531 tree->tag = TSeq; tree->u.ps = 2;
2532 sib1(tree)->tag = tag;
2533 sib1(tree)->u.n = s ? (byte)s[i] : 0;
2534 tree = sib2(tree);
2536 tree->tag = tag; /* last one does not need TSeq */
2537 tree->u.n = s ? (byte)s[i] : 0;
2542 ** Numbers as patterns:
2543 ** 0 == true (always match); n == TAny repeated 'n' times;
2544 ** -n == not (TAny repeated 'n' times)
2546 static TTree *numtree (lua_State *L, int n) {
2547 if (n == 0)
2548 return newleaf(L, TTrue);
2549 else {
2550 TTree *tree, *nd;
2551 if (n > 0)
2552 tree = nd = newtree(L, 2 * n - 1);
2553 else { /* negative: code it as !(-n) */
2554 n = -n;
2555 tree = newtree(L, 2 * n);
2556 tree->tag = TNot;
2557 nd = sib1(tree);
2559 fillseq(nd, TAny, n, NULL); /* sequence of 'n' any's */
2560 return tree;
2566 ** Convert value at index 'idx' to a pattern
2568 static TTree *getpatt (lua_State *L, int idx, int *len) {
2569 TTree *tree;
2570 switch (lua_type(L, idx)) {
2571 case LUA_TSTRING: {
2572 size_t slen;
2573 const char *s = lua_tolstring(L, idx, &slen); /* get string */
2574 if (slen == 0) /* empty? */
2575 tree = newleaf(L, TTrue); /* always match */
2576 else {
2577 tree = newtree(L, 2 * (slen - 1) + 1);
2578 fillseq(tree, TChar, slen, s); /* sequence of 'slen' chars */
2580 break;
2582 case LUA_TNUMBER: {
2583 int n = lua_tointeger(L, idx);
2584 tree = numtree(L, n);
2585 break;
2587 case LUA_TBOOLEAN: {
2588 tree = (lua_toboolean(L, idx) ? newleaf(L, TTrue) : newleaf(L, TFalse));
2589 break;
2591 case LUA_TTABLE: {
2592 tree = newgrammar(L, idx);
2593 break;
2595 case LUA_TFUNCTION: {
2596 tree = newtree(L, 2);
2597 tree->tag = TRunTime;
2598 tree->key = addtonewktable(L, 0, idx);
2599 sib1(tree)->tag = TTrue;
2600 break;
2602 default: {
2603 return gettree(L, idx, len);
2606 lua_replace(L, idx); /* put new tree into 'idx' slot */
2607 if (len)
2608 *len = getsize(L, idx);
2609 return tree;
2614 ** create a new tree, whith a new root and one sibling.
2615 ** Sibling must be on the Lua stack, at index 1.
2617 static TTree *newroot1sib (lua_State *L, int tag) {
2618 int s1;
2619 TTree *tree1 = getpatt(L, 1, &s1);
2620 TTree *tree = newtree(L, 1 + s1); /* create new tree */
2621 tree->tag = tag;
2622 memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
2623 copyktable(L, 1);
2624 return tree;
2629 ** create a new tree, whith a new root and 2 siblings.
2630 ** Siblings must be on the Lua stack, first one at index 1.
2632 static TTree *newroot2sib (lua_State *L, int tag) {
2633 int s1, s2;
2634 TTree *tree1 = getpatt(L, 1, &s1);
2635 TTree *tree2 = getpatt(L, 2, &s2);
2636 TTree *tree = newtree(L, 1 + s1 + s2); /* create new tree */
2637 tree->tag = tag;
2638 tree->u.ps = 1 + s1;
2639 memcpy(sib1(tree), tree1, s1 * sizeof(TTree));
2640 memcpy(sib2(tree), tree2, s2 * sizeof(TTree));
2641 joinktables(L, 1, sib2(tree), 2);
2642 return tree;
2646 static int lp_P (lua_State *L) {
2647 luaL_checkany(L, 1);
2648 getpatt(L, 1, NULL);
2649 lua_settop(L, 1);
2650 return 1;
2655 ** sequence operator; optimizations:
2656 ** false x => false, x true => x, true x => x
2657 ** (cannot do x . false => false because x may have runtime captures)
2659 static int lp_seq (lua_State *L) {
2660 TTree *tree1 = getpatt(L, 1, NULL);
2661 TTree *tree2 = getpatt(L, 2, NULL);
2662 if (tree1->tag == TFalse || tree2->tag == TTrue)
2663 lua_pushvalue(L, 1); /* false . x == false, x . true = x */
2664 else if (tree1->tag == TTrue)
2665 lua_pushvalue(L, 2); /* true . x = x */
2666 else
2667 newroot2sib(L, TSeq);
2668 return 1;
2673 ** choice operator; optimizations:
2674 ** charset / charset => charset
2675 ** true / x => true, x / false => x, false / x => x
2676 ** (x / true is not equivalent to true)
2678 static int lp_choice (lua_State *L) {
2679 Charset st1, st2;
2680 TTree *t1 = getpatt(L, 1, NULL);
2681 TTree *t2 = getpatt(L, 2, NULL);
2682 if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
2683 TTree *t = newcharset(L);
2684 loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]);
2686 else if (nofail(t1) || t2->tag == TFalse)
2687 lua_pushvalue(L, 1); /* true / x => true, x / false => x */
2688 else if (t1->tag == TFalse)
2689 lua_pushvalue(L, 2); /* false / x => x */
2690 else
2691 newroot2sib(L, TChoice);
2692 return 1;
2697 ** p^n
2699 static int lp_star (lua_State *L) {
2700 int size1;
2701 int n = (int)luaL_checkinteger(L, 2);
2702 TTree *tree1 = getpatt(L, 1, &size1);
2703 if (n >= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */
2704 TTree *tree = newtree(L, (n + 1) * (size1 + 1));
2705 if (nullable(tree1))
2706 luaL_error(L, "loop body may accept empty string");
2707 while (n--) /* repeat 'n' times */
2708 tree = seqaux(tree, tree1, size1);
2709 tree->tag = TRep;
2710 memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
2712 else { /* choice (seq tree1 ... choice tree1 true ...) true */
2713 TTree *tree;
2714 n = -n;
2715 /* size = (choice + seq + tree1 + true) * n, but the last has no seq */
2716 tree = newtree(L, n * (size1 + 3) - 1);
2717 for (; n > 1; n--) { /* repeat (n - 1) times */
2718 tree->tag = TChoice; tree->u.ps = n * (size1 + 3) - 2;
2719 sib2(tree)->tag = TTrue;
2720 tree = sib1(tree);
2721 tree = seqaux(tree, tree1, size1);
2723 tree->tag = TChoice; tree->u.ps = size1 + 1;
2724 sib2(tree)->tag = TTrue;
2725 memcpy(sib1(tree), tree1, size1 * sizeof(TTree));
2727 copyktable(L, 1);
2728 return 1;
2733 ** #p == &p
2735 static int lp_and (lua_State *L) {
2736 newroot1sib(L, TAnd);
2737 return 1;
2742 ** -p == !p
2744 static int lp_not (lua_State *L) {
2745 newroot1sib(L, TNot);
2746 return 1;
2751 ** [t1 - t2] == Seq (Not t2) t1
2752 ** If t1 and t2 are charsets, make their difference.
2754 static int lp_sub (lua_State *L) {
2755 Charset st1, st2;
2756 int s1, s2;
2757 TTree *t1 = getpatt(L, 1, &s1);
2758 TTree *t2 = getpatt(L, 2, &s2);
2759 if (tocharset(t1, &st1) && tocharset(t2, &st2)) {
2760 TTree *t = newcharset(L);
2761 loopset(i, treebuffer(t)[i] = st1.cs[i] & ~st2.cs[i]);
2763 else {
2764 TTree *tree = newtree(L, 2 + s1 + s2);
2765 tree->tag = TSeq; /* sequence of... */
2766 tree->u.ps = 2 + s2;
2767 sib1(tree)->tag = TNot; /* ...not... */
2768 memcpy(sib1(sib1(tree)), t2, s2 * sizeof(TTree)); /* ...t2 */
2769 memcpy(sib2(tree), t1, s1 * sizeof(TTree)); /* ... and t1 */
2770 joinktables(L, 1, sib1(tree), 2);
2772 return 1;
2776 static int lp_set (lua_State *L) {
2777 size_t l;
2778 const char *s = luaL_checklstring(L, 1, &l);
2779 TTree *tree = newcharset(L);
2780 while (l--) {
2781 setchar(treebuffer(tree), (byte)(*s));
2782 s++;
2784 return 1;
2788 static int lp_range (lua_State *L) {
2789 int arg;
2790 int top = lua_gettop(L);
2791 TTree *tree = newcharset(L);
2792 for (arg = 1; arg <= top; arg++) {
2793 int c;
2794 size_t l;
2795 const char *r = luaL_checklstring(L, arg, &l);
2796 luaL_argcheck(L, l == 2, arg, "range must have two characters");
2797 for (c = (byte)r[0]; c <= (byte)r[1]; c++)
2798 setchar(treebuffer(tree), c);
2800 return 1;
2805 ** Look-behind predicate
2807 static int lp_behind (lua_State *L) {
2808 TTree *tree;
2809 TTree *tree1 = getpatt(L, 1, NULL);
2810 int n = fixedlen(tree1);
2811 luaL_argcheck(L, n >= 0, 1, "pattern may not have fixed length");
2812 luaL_argcheck(L, !hascaptures(tree1), 1, "pattern have captures");
2813 luaL_argcheck(L, n <= MAXBEHIND, 1, "pattern too long to look behind");
2814 tree = newroot1sib(L, TBehind);
2815 tree->u.n = n;
2816 return 1;
2821 ** Create a non-terminal
2823 static int lp_V (lua_State *L) {
2824 TTree *tree = newleaf(L, TOpenCall);
2825 luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
2826 tree->key = addtonewktable(L, 0, 1);
2827 return 1;
2832 ** Create a tree for a non-empty capture, with a body and
2833 ** optionally with an associated Lua value (at index 'labelidx' in the
2834 ** stack)
2836 static int capture_aux (lua_State *L, int cap, int labelidx) {
2837 TTree *tree = newroot1sib(L, TCapture);
2838 tree->cap = cap;
2839 tree->key = (labelidx == 0) ? 0 : addtonewktable(L, 1, labelidx);
2840 return 1;
2845 ** Fill a tree with an empty capture, using an empty (TTrue) sibling.
2847 static TTree *auxemptycap (TTree *tree, int cap) {
2848 tree->tag = TCapture;
2849 tree->cap = cap;
2850 sib1(tree)->tag = TTrue;
2851 return tree;
2856 ** Create a tree for an empty capture
2858 static TTree *newemptycap (lua_State *L, int cap) {
2859 return auxemptycap(newtree(L, 2), cap);
2864 ** Create a tree for an empty capture with an associated Lua value
2866 static TTree *newemptycapkey (lua_State *L, int cap, int idx) {
2867 TTree *tree = auxemptycap(newtree(L, 2), cap);
2868 tree->key = addtonewktable(L, 0, idx);
2869 return tree;
2874 ** Captures with syntax p / v
2875 ** (function capture, query capture, string capture, or number capture)
2877 static int lp_divcapture (lua_State *L) {
2878 switch (lua_type(L, 2)) {
2879 case LUA_TFUNCTION: return capture_aux(L, Cfunction, 2);
2880 case LUA_TTABLE: return capture_aux(L, Cquery, 2);
2881 case LUA_TSTRING: return capture_aux(L, Cstring, 2);
2882 case LUA_TNUMBER: {
2883 int n = lua_tointeger(L, 2);
2884 TTree *tree = newroot1sib(L, TCapture);
2885 luaL_argcheck(L, 0 <= n && n <= SHRT_MAX, 1, "invalid number");
2886 tree->cap = Cnum;
2887 tree->key = n;
2888 return 1;
2890 default: return luaL_argerror(L, 2, "invalid replacement value");
2895 static int lp_substcapture (lua_State *L) {
2896 return capture_aux(L, Csubst, 0);
2900 static int lp_tablecapture (lua_State *L) {
2901 return capture_aux(L, Ctable, 0);
2905 static int lp_groupcapture (lua_State *L) {
2906 if (lua_isnoneornil(L, 2))
2907 return capture_aux(L, Cgroup, 0);
2908 else
2909 return capture_aux(L, Cgroup, 2);
2913 static int lp_foldcapture (lua_State *L) {
2914 luaL_checktype(L, 2, LUA_TFUNCTION);
2915 return capture_aux(L, Cfold, 2);
2919 static int lp_simplecapture (lua_State *L) {
2920 return capture_aux(L, Csimple, 0);
2924 static int lp_poscapture (lua_State *L) {
2925 newemptycap(L, Cposition);
2926 return 1;
2930 static int lp_argcapture (lua_State *L) {
2931 int n = (int)luaL_checkinteger(L, 1);
2932 TTree *tree = newemptycap(L, Carg);
2933 tree->key = n;
2934 luaL_argcheck(L, 0 < n && n <= SHRT_MAX, 1, "invalid argument index");
2935 return 1;
2939 static int lp_backref (lua_State *L) {
2940 luaL_checkany(L, 1);
2941 newemptycapkey(L, Cbackref, 1);
2942 return 1;
2947 ** Constant capture
2949 static int lp_constcapture (lua_State *L) {
2950 int i;
2951 int n = lua_gettop(L); /* number of values */
2952 if (n == 0) /* no values? */
2953 newleaf(L, TTrue); /* no capture */
2954 else if (n == 1)
2955 newemptycapkey(L, Cconst, 1); /* single constant capture */
2956 else { /* create a group capture with all values */
2957 TTree *tree = newtree(L, 1 + 3 * (n - 1) + 2);
2958 newktable(L, n); /* create a 'ktable' for new tree */
2959 tree->tag = TCapture;
2960 tree->cap = Cgroup;
2961 tree->key = 0;
2962 tree = sib1(tree);
2963 for (i = 1; i <= n - 1; i++) {
2964 tree->tag = TSeq;
2965 tree->u.ps = 3; /* skip TCapture and its sibling */
2966 auxemptycap(sib1(tree), Cconst);
2967 sib1(tree)->key = addtoktable(L, i);
2968 tree = sib2(tree);
2970 auxemptycap(tree, Cconst);
2971 tree->key = addtoktable(L, i);
2973 return 1;
2977 static int lp_matchtime (lua_State *L) {
2978 TTree *tree;
2979 luaL_checktype(L, 2, LUA_TFUNCTION);
2980 tree = newroot1sib(L, TRunTime);
2981 tree->key = addtonewktable(L, 1, 2);
2982 return 1;
2985 /* }====================================================== */
2989 ** {======================================================
2990 ** Grammar - Tree generation
2991 ** =======================================================
2995 ** push on the stack the index and the pattern for the
2996 ** initial rule of grammar at index 'arg' in the stack;
2997 ** also add that index into position table.
2999 static void getfirstrule (lua_State *L, int arg, int postab) {
3000 lua_rawgeti(L, arg, 1); /* access first element */
3001 if (lua_isstring(L, -1)) { /* is it the name of initial rule? */
3002 lua_pushvalue(L, -1); /* duplicate it to use as key */
3003 lua_gettable(L, arg); /* get associated rule */
3005 else {
3006 lua_pushinteger(L, 1); /* key for initial rule */
3007 lua_insert(L, -2); /* put it before rule */
3009 if (!testpattern(L, -1)) { /* initial rule not a pattern? */
3010 if (lua_isnil(L, -1))
3011 luaL_error(L, "grammar has no initial rule");
3012 else
3013 luaL_error(L, "initial rule '%s' is not a pattern", lua_tostring(L, -2));
3015 lua_pushvalue(L, -2); /* push key */
3016 lua_pushinteger(L, 1); /* push rule position (after TGrammar) */
3017 lua_settable(L, postab); /* insert pair at position table */
3021 ** traverse grammar at index 'arg', pushing all its keys and patterns
3022 ** into the stack. Create a new table (before all pairs key-pattern) to
3023 ** collect all keys and their associated positions in the final tree
3024 ** (the "position table").
3025 ** Return the number of rules and (in 'totalsize') the total size
3026 ** for the new tree.
3028 static int collectrules (lua_State *L, int arg, int *totalsize) {
3029 int n = 1; /* to count number of rules */
3030 int postab = lua_gettop(L) + 1; /* index of position table */
3031 int size; /* accumulator for total size */
3032 lua_newtable(L); /* create position table */
3033 getfirstrule(L, arg, postab);
3034 size = 2 + getsize(L, postab + 2); /* TGrammar + TRule + rule */
3035 lua_pushnil(L); /* prepare to traverse grammar table */
3036 while (lua_next(L, arg) != 0) {
3037 if (lua_tonumber(L, -2) == 1 ||
3038 lp_equal(L, -2, postab + 1)) { /* initial rule? */
3039 lua_pop(L, 1); /* remove value (keep key for lua_next) */
3040 continue;
3042 if (!testpattern(L, -1)) /* value is not a pattern? */
3043 luaL_error(L, "rule '%s' is not a pattern", val2str(L, -2));
3044 luaL_checkstack(L, LUA_MINSTACK, "grammar has too many rules");
3045 lua_pushvalue(L, -2); /* push key (to insert into position table) */
3046 lua_pushinteger(L, size);
3047 lua_settable(L, postab);
3048 size += 1 + getsize(L, -1); /* update size */
3049 lua_pushvalue(L, -2); /* push key (for next lua_next) */
3050 n++;
3052 *totalsize = size + 1; /* TTrue to finish list of rules */
3053 return n;
3057 static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
3058 int i;
3059 TTree *nd = sib1(grammar); /* auxiliary pointer to traverse the tree */
3060 for (i = 0; i < n; i++) { /* add each rule into new tree */
3061 int ridx = frule + 2*i + 1; /* index of i-th rule */
3062 int rulesize;
3063 TTree *rn = gettree(L, ridx, &rulesize);
3064 nd->tag = TRule;
3065 nd->key = 0;
3066 nd->cap = i; /* rule number */
3067 nd->u.ps = rulesize + 1; /* point to next rule */
3068 memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */
3069 mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */
3070 nd = sib2(nd); /* move to next rule */
3072 nd->tag = TTrue; /* finish list of rules */
3077 ** Check whether a tree has potential infinite loops
3079 static int checkloops (TTree *tree) {
3080 tailcall:
3081 if (tree->tag == TRep && nullable(sib1(tree)))
3082 return 1;
3083 else if (tree->tag == TGrammar)
3084 return 0; /* sub-grammars already checked */
3085 else {
3086 switch (numsiblings[tree->tag]) {
3087 case 1: /* return checkloops(sib1(tree)); */
3088 tree = sib1(tree); goto tailcall;
3089 case 2:
3090 if (checkloops(sib1(tree))) return 1;
3091 /* else return checkloops(sib2(tree)); */
3092 tree = sib2(tree); goto tailcall;
3093 default: assert(numsiblings[tree->tag] == 0); return 0;
3099 static int verifyerror (lua_State *L, int *passed, int npassed) {
3100 int i, j;
3101 for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
3102 for (j = i - 1; j >= 0; j--) {
3103 if (passed[i] == passed[j]) {
3104 lua_rawgeti(L, -1, passed[i]); /* get rule's key */
3105 return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1));
3109 return luaL_error(L, "too many left calls in grammar");
3114 ** Check whether a rule can be left recursive; raise an error in that
3115 ** case; otherwise return 1 iff pattern is nullable.
3116 ** The return value is used to check sequences, where the second pattern
3117 ** is only relevant if the first is nullable.
3118 ** Parameter 'nb' works as an accumulator, to allow tail calls in
3119 ** choices. ('nb' true makes function returns true.)
3120 ** Assume ktable at the top of the stack.
3122 static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
3123 int nb) {
3124 tailcall:
3125 switch (tree->tag) {
3126 case TChar: case TSet: case TAny:
3127 case TFalse:
3128 return nb; /* cannot pass from here */
3129 case TTrue:
3130 case TBehind: /* look-behind cannot have calls */
3131 return 1;
3132 case TNot: case TAnd: case TRep:
3133 /* return verifyrule(L, sib1(tree), passed, npassed, 1); */
3134 tree = sib1(tree); nb = 1; goto tailcall;
3135 case TCapture: case TRunTime:
3136 /* return verifyrule(L, sib1(tree), passed, npassed, nb); */
3137 tree = sib1(tree); goto tailcall;
3138 case TCall:
3139 /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
3140 tree = sib2(tree); goto tailcall;
3141 case TSeq: /* only check 2nd child if first is nb */
3142 if (!verifyrule(L, sib1(tree), passed, npassed, 0))
3143 return nb;
3144 /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */
3145 tree = sib2(tree); goto tailcall;
3146 case TChoice: /* must check both children */
3147 nb = verifyrule(L, sib1(tree), passed, npassed, nb);
3148 /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
3149 tree = sib2(tree); goto tailcall;
3150 case TRule:
3151 if (npassed >= MAXRULES)
3152 return verifyerror(L, passed, npassed);
3153 else {
3154 passed[npassed++] = tree->key;
3155 /* return verifyrule(L, sib1(tree), passed, npassed); */
3156 tree = sib1(tree); goto tailcall;
3158 case TGrammar:
3159 return nullable(tree); /* sub-grammar cannot be left recursive */
3160 default: assert(0); return 0;
3165 static void verifygrammar (lua_State *L, TTree *grammar) {
3166 int passed[MAXRULES];
3167 TTree *rule;
3168 /* check left-recursive rules */
3169 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
3170 if (rule->key == 0) continue; /* unused rule */
3171 verifyrule(L, sib1(rule), passed, 0, 0);
3173 assert(rule->tag == TTrue);
3174 /* check infinite loops inside rules */
3175 for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
3176 if (rule->key == 0) continue; /* unused rule */
3177 if (checkloops(sib1(rule))) {
3178 lua_rawgeti(L, -1, rule->key); /* get rule's key */
3179 luaL_error(L, "empty loop in rule '%s'", val2str(L, -1));
3182 assert(rule->tag == TTrue);
3187 ** Give a name for the initial rule if it is not referenced
3189 static void initialrulename (lua_State *L, TTree *grammar, int frule) {
3190 if (sib1(grammar)->key == 0) { /* initial rule is not referenced? */
3191 int n = lua_rawlen(L, -1) + 1; /* index for name */
3192 lua_pushvalue(L, frule); /* rule's name */
3193 lua_rawseti(L, -2, n); /* ktable was on the top of the stack */
3194 sib1(grammar)->key = n;
3199 static TTree *newgrammar (lua_State *L, int arg) {
3200 int treesize;
3201 int frule = lua_gettop(L) + 2; /* position of first rule's key */
3202 int n = collectrules(L, arg, &treesize);
3203 TTree *g = newtree(L, treesize);
3204 luaL_argcheck(L, n <= MAXRULES, arg, "grammar has too many rules");
3205 g->tag = TGrammar; g->u.n = n;
3206 lua_newtable(L); /* create 'ktable' */
3207 lua_setuservalue(L, -2);
3208 buildgrammar(L, g, frule, n);
3209 lua_getuservalue(L, -1); /* get 'ktable' for new tree */
3210 finalfix(L, frule - 1, g, sib1(g));
3211 initialrulename(L, g, frule);
3212 verifygrammar(L, g);
3213 lua_pop(L, 1); /* remove 'ktable' */
3214 lua_insert(L, -(n * 2 + 2)); /* move new table to proper position */
3215 lua_pop(L, n * 2 + 1); /* remove position table + rule pairs */
3216 return g; /* new table at the top of the stack */
3219 /* }====================================================== */
3222 static Instruction *prepcompile (lua_State *L, Pattern *p, int idx) {
3223 lua_getuservalue(L, idx); /* push 'ktable' (may be used by 'finalfix') */
3224 finalfix(L, 0, NULL, p->tree);
3225 lua_pop(L, 1); /* remove 'ktable' */
3226 return compile(L, p);
3230 static int lp_printtree (lua_State *L) {
3231 TTree *tree = getpatt(L, 1, NULL);
3232 int c = lua_toboolean(L, 2);
3233 if (c) {
3234 lua_getuservalue(L, 1); /* push 'ktable' (may be used by 'finalfix') */
3235 finalfix(L, 0, NULL, tree);
3236 lua_pop(L, 1); /* remove 'ktable' */
3238 printktable(L, 1);
3239 printtree(tree, 0);
3240 return 0;
3244 static int lp_printcode (lua_State *L) {
3245 Pattern *p = getpattern(L, 1);
3246 printktable(L, 1);
3247 if (p->code == NULL) /* not compiled yet? */
3248 prepcompile(L, p, 1);
3249 printpatt(p->code, p->codesize);
3250 return 0;
3255 ** Get the initial position for the match, interpreting negative
3256 ** values from the end of the subject
3258 static size_t initposition (lua_State *L, size_t len) {
3259 lua_Integer ii = luaL_optinteger(L, 3, 1);
3260 if (ii > 0) { /* positive index? */
3261 if ((size_t)ii <= len) /* inside the string? */
3262 return (size_t)ii - 1; /* return it (corrected to 0-base) */
3263 else return len; /* crop at the end */
3265 else { /* negative index */
3266 if ((size_t)(-ii) <= len) /* inside the string? */
3267 return len - ((size_t)(-ii)); /* return position from the end */
3268 else return 0; /* crop at the beginning */
3274 ** Main match function
3276 static int lp_match (lua_State *L) {
3277 Capture capture[INITCAPSIZE];
3278 const char *r;
3279 size_t l;
3280 Pattern *p = (getpatt(L, 1, NULL), getpattern(L, 1));
3281 Instruction *code = (p->code != NULL) ? p->code : prepcompile(L, p, 1);
3282 const char *s = luaL_checklstring(L, SUBJIDX, &l);
3283 size_t i = initposition(L, l);
3284 int ptop = lua_gettop(L);
3285 lua_pushnil(L); /* initialize subscache */
3286 lua_pushlightuserdata(L, capture); /* initialize caplistidx */
3287 lua_getuservalue(L, 1); /* initialize penvidx */
3288 r = match(L, s, s + i, s + l, code, capture, ptop);
3289 if (r == NULL) {
3290 lua_pushnil(L);
3291 return 1;
3293 return getcaptures(L, s, r, ptop);
3299 ** {======================================================
3300 ** Library creation and functions not related to matching
3301 ** =======================================================
3304 /* maximum limit for stack size */
3305 #define MAXLIM (INT_MAX / 100)
3307 static int lp_setmax (lua_State *L) {
3308 lua_Integer lim = luaL_checkinteger(L, 1);
3309 luaL_argcheck(L, 0 < lim && lim <= MAXLIM, 1, "out of range");
3310 lua_settop(L, 1);
3311 lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
3312 return 0;
3316 static int lp_version (lua_State *L) {
3317 lua_pushstring(L, VERSION);
3318 return 1;
3322 static int lp_type (lua_State *L) {
3323 if (testpattern(L, 1))
3324 lua_pushliteral(L, "pattern");
3325 else
3326 lua_pushnil(L);
3327 return 1;
3331 int lp_gc (lua_State *L) {
3332 Pattern *p = getpattern(L, 1);
3333 realloccode(L, p, 0); /* delete code block */
3334 return 0;
3338 static void createcat (lua_State *L, const char *catname, int (catf) (int)) {
3339 TTree *t = newcharset(L);
3340 int i;
3341 for (i = 0; i <= UCHAR_MAX; i++)
3342 if (catf(i)) setchar(treebuffer(t), i);
3343 lua_setfield(L, -2, catname);
3347 static int lp_locale (lua_State *L) {
3348 if (lua_isnoneornil(L, 1)) {
3349 lua_settop(L, 0);
3350 lua_createtable(L, 0, 12);
3352 else {
3353 luaL_checktype(L, 1, LUA_TTABLE);
3354 lua_settop(L, 1);
3356 createcat(L, "alnum", isalnum);
3357 createcat(L, "alpha", isalpha);
3358 createcat(L, "cntrl", iscntrl);
3359 createcat(L, "digit", isdigit);
3360 createcat(L, "graph", isgraph);
3361 createcat(L, "lower", islower);
3362 createcat(L, "print", isprint);
3363 createcat(L, "punct", ispunct);
3364 createcat(L, "space", isspace);
3365 createcat(L, "upper", isupper);
3366 createcat(L, "xdigit", isxdigit);
3367 return 1;
3371 static struct luaL_Reg pattreg[] = {
3372 {"ptree", lp_printtree},
3373 {"pcode", lp_printcode},
3374 {"match", lp_match},
3375 {"B", lp_behind},
3376 {"V", lp_V},
3377 {"C", lp_simplecapture},
3378 {"Cc", lp_constcapture},
3379 {"Cmt", lp_matchtime},
3380 {"Cb", lp_backref},
3381 {"Carg", lp_argcapture},
3382 {"Cp", lp_poscapture},
3383 {"Cs", lp_substcapture},
3384 {"Ct", lp_tablecapture},
3385 {"Cf", lp_foldcapture},
3386 {"Cg", lp_groupcapture},
3387 {"P", lp_P},
3388 {"S", lp_set},
3389 {"R", lp_range},
3390 {"locale", lp_locale},
3391 {"version", lp_version},
3392 {"setmaxstack", lp_setmax},
3393 {"type", lp_type},
3394 {NULL, NULL}
3398 static struct luaL_Reg metareg[] = {
3399 {"__mul", lp_seq},
3400 {"__add", lp_choice},
3401 {"__pow", lp_star},
3402 {"__gc", lp_gc},
3403 {"__len", lp_and},
3404 {"__div", lp_divcapture},
3405 {"__unm", lp_not},
3406 {"__sub", lp_sub},
3407 {NULL, NULL}
3411 int luaopen_lpeg (lua_State *L);
3412 int luaopen_lpeg (lua_State *L) {
3413 luaL_newmetatable(L, PATTERN_T);
3414 lua_pushnumber(L, MAXBACK); /* initialize maximum backtracking */
3415 lua_setfield(L, LUA_REGISTRYINDEX, MAXSTACKIDX);
3416 luaL_setfuncs(L, metareg, 0);
3417 luaL_newlib(L, pattreg);
3418 lua_pushvalue(L, -1);
3419 lua_setfield(L, -3, "__index");
3420 return 1;
3423 /* }====================================================== */