4 ** $Id: lpprint.c,v 1.9 2015/06/15 16:09:57 roberto Exp $
5 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
8 /* #include <ctype.h> */
9 /* #include <limits.h> */
10 /* #include <stdio.h> */
13 /* #include "lptypes.h" */
14 /* #include "lpprint.h" */
15 /* #include "lpcode.h" */
18 #if defined(LPEG_DEBUG)
21 ** {======================================================
22 ** Printing patterns (for debugging)
23 ** =======================================================
27 void printcharset (const byte
*st
) {
30 for (i
= 0; i
<= UCHAR_MAX
; i
++) {
32 while (testchar(st
, i
) && i
<= UCHAR_MAX
) i
++;
33 if (i
- 1 == first
) /* unary range? */
34 printf("(%02x)", first
);
35 else if (i
- 1 > first
) /* non-empty range? */
36 printf("(%02x-%02x)", first
, i
- 1);
42 static void printcapkind (int kind
) {
43 const char *const modes
[] = {
44 "close", "position", "constant", "backref",
45 "argument", "simple", "table", "function",
46 "query", "string", "num", "substitution", "fold",
48 printf("%s", modes
[kind
]);
52 static void printjmp (const Instruction
*op
, const Instruction
*p
) {
53 printf("-> %d", (int)(p
+ (p
+ 1)->offset
- op
));
57 void printinst (const Instruction
*op
, const Instruction
*p
) {
58 const char *const names
[] = {
60 "testany", "testchar", "testset",
63 "choice", "jmp", "call", "open_call",
64 "commit", "partial_commit", "back_commit", "failtwice", "fail", "giveup",
65 "fullcapture", "opencapture", "closecapture", "closeruntime"
67 printf("%02ld: %s ", (long)(p
- op
), names
[p
->i
.code
]);
68 switch ((Opcode
)p
->i
.code
) {
70 printf("'%c'", p
->i
.aux
);
74 printf("'%c'", p
->i
.aux
); printjmp(op
, p
);
78 printcapkind(getkind(p
));
79 printf(" (size = %d) (idx = %d)", getoff(p
), p
->i
.key
);
83 printcapkind(getkind(p
));
84 printf(" (idx = %d)", p
->i
.key
);
88 printcharset((p
+1)->buff
);
92 printcharset((p
+2)->buff
); printjmp(op
, p
);
96 printcharset((p
+1)->buff
);
100 printf("-> %d", (p
+ 1)->offset
);
104 printf("%d", p
->i
.aux
);
107 case IJmp
: case ICall
: case ICommit
: case IChoice
:
108 case IPartialCommit
: case IBackCommit
: case ITestAny
: {
118 void printpatt (Instruction
*p
, int n
) {
127 #if defined(LPEG_DEBUG)
128 static void printcap (Capture
*cap
) {
129 printcapkind(cap
->kind
);
130 printf(" (idx: %d - size: %d) -> %p\n", cap
->idx
, cap
->siz
, cap
->s
);
134 void printcaplist (Capture
*cap
, Capture
*limit
) {
136 for (; cap
->s
&& (limit
== NULL
|| cap
< limit
); cap
++)
142 /* }====================================================== */
146 ** {======================================================
147 ** Printing trees (for debugging)
148 ** =======================================================
151 static const char *tagnames
[] = {
152 "char", "set", "any",
157 "call", "opencall", "rule", "grammar",
159 "capture", "run-time"
163 void printtree (TTree
*tree
, int ident
) {
165 for (i
= 0; i
< ident
; i
++) printf(" ");
166 printf("%s", tagnames
[tree
->tag
]);
171 printf(" '%c'\n", c
);
173 printf(" (%02X)\n", c
);
177 printcharset(treebuffer(tree
));
181 case TOpenCall
: case TCall
: {
182 printf(" key: %d\n", tree
->key
);
186 printf(" %d\n", tree
->u
.n
);
187 printtree(sib1(tree
), ident
+ 2);
191 printf(" cap: %d key: %d n: %d\n", tree
->cap
, tree
->key
, tree
->u
.n
);
192 printtree(sib1(tree
), ident
+ 2);
196 printf(" n: %d key: %d\n", tree
->cap
, tree
->key
);
197 printtree(sib1(tree
), ident
+ 2);
198 break; /* do not print next rule as a sibling */
201 TTree
*rule
= sib1(tree
);
202 printf(" %d\n", tree
->u
.n
); /* number of rules */
203 for (i
= 0; i
< tree
->u
.n
; i
++) {
204 printtree(rule
, ident
+ 2);
207 assert(rule
->tag
== TTrue
); /* sentinel */
211 int sibs
= numsiblings
[tree
->tag
];
214 printtree(sib1(tree
), ident
+ 2);
216 printtree(sib2(tree
), ident
+ 2);
224 void printktable (lua_State
*L
, int idx
) {
226 lua_getuservalue(L
, idx
);
227 if (lua_isnil(L
, -1)) /* no ktable? */
229 n
= lua_rawlen(L
, -1);
231 for (i
= 1; i
<= n
; i
++) {
233 lua_rawgeti(L
, -1, i
);
234 if (lua_isstring(L
, -1))
235 printf("%s ", lua_tostring(L
, -1));
237 printf("%s ", lua_typename(L
, lua_type(L
, -1)));
241 /* leave ktable at the stack */
244 /* }====================================================== */
249 ** $Id: lpvm.c,v 1.6 2015/09/28 17:01:25 roberto Exp $
250 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
253 /* #include <limits.h> */
254 /* #include <string.h> */
257 /* #include "lua.h" */
258 /* #include "lauxlib.h" */
260 /* #include "lpcap.h" */
261 /* #include "lptypes.h" */
262 /* #include "lpvm.h" */
263 /* #include "lpprint.h" */
266 /* initial size for call/backtrack stack */
267 #if !defined(INITBACK)
268 #define INITBACK MAXBACK
272 #define getoffset(p) (((p) + 1)->offset)
274 static const Instruction giveup
= {{IGiveup
, 0, 0}};
278 ** {======================================================
280 ** =======================================================
284 typedef struct Stack
{
285 const char *s
; /* saved position (or NULL for calls) */
286 const Instruction
*p
; /* next instruction */
291 #define getstackbase(L, ptop) ((Stack *)lua_touserdata(L, stackidx(ptop)))
295 ** Double the size of the array of captures
297 static Capture
*doublecap (lua_State
*L
, Capture
*cap
, int captop
, int ptop
) {
299 if (captop
>= INT_MAX
/((int)sizeof(Capture
) * 2))
300 luaL_error(L
, "too many captures");
301 newc
= (Capture
*)lua_newuserdata(L
, captop
* 2 * sizeof(Capture
));
302 memcpy(newc
, cap
, captop
* sizeof(Capture
));
303 lua_replace(L
, caplistidx(ptop
));
309 ** Double the size of the stack
311 static Stack
*doublestack (lua_State
*L
, Stack
**stacklimit
, int ptop
) {
312 Stack
*stack
= getstackbase(L
, ptop
);
314 int n
= *stacklimit
- stack
; /* current stack size */
316 lua_getfield(L
, LUA_REGISTRYINDEX
, MAXSTACKIDX
);
317 max
= lua_tointeger(L
, -1); /* maximum allowed size */
319 if (n
>= max
) /* already at maximum size? */
320 luaL_error(L
, "backtrack stack overflow (current limit is %d)", max
);
321 newn
= 2 * n
; /* new size */
322 if (newn
> max
) newn
= max
;
323 newstack
= (Stack
*)lua_newuserdata(L
, newn
* sizeof(Stack
));
324 memcpy(newstack
, stack
, n
* sizeof(Stack
));
325 lua_replace(L
, stackidx(ptop
));
326 *stacklimit
= newstack
+ newn
;
327 return newstack
+ n
; /* return next position */
332 ** Interpret the result of a dynamic capture: false -> fail;
333 ** true -> keep current position; number -> next position.
334 ** Return new subject position. 'fr' is stack index where
335 ** is the result; 'curr' is current subject position; 'limit'
336 ** is subject's size.
338 static int resdyncaptures (lua_State
*L
, int fr
, int curr
, int limit
) {
340 if (!lua_toboolean(L
, fr
)) { /* false value? */
341 lua_settop(L
, fr
- 1); /* remove results */
342 return -1; /* and fail */
344 else if (lua_isboolean(L
, fr
)) /* true? */
345 res
= curr
; /* keep current position */
347 res
= lua_tointeger(L
, fr
) - 1; /* new position */
348 if (res
< curr
|| res
> limit
)
349 luaL_error(L
, "invalid position returned by match-time capture");
351 lua_remove(L
, fr
); /* remove first result (offset) */
357 ** Add capture values returned by a dynamic capture to the capture list
358 ** 'base', nested inside a group capture. 'fd' indexes the first capture
359 ** value, 'n' is the number of values (at least 1).
361 static void adddyncaptures (const char *s
, Capture
*base
, int n
, int fd
) {
363 /* Cgroup capture is already there */
364 assert(base
[0].kind
== Cgroup
&& base
[0].siz
== 0);
365 base
[0].idx
= 0; /* make it an anonymous group */
366 for (i
= 1; i
<= n
; i
++) { /* add runtime captures */
367 base
[i
].kind
= Cruntime
;
368 base
[i
].siz
= 1; /* mark it as closed */
369 base
[i
].idx
= fd
+ i
- 1; /* stack index of capture value */
372 base
[i
].kind
= Cclose
; /* close group */
379 ** Remove dynamic captures from the Lua stack (called in case of failure)
381 static int removedyncap (lua_State
*L
, Capture
*capture
,
382 int level
, int last
) {
383 int id
= finddyncap(capture
+ level
, capture
+ last
); /* index of 1st cap. */
384 int top
= lua_gettop(L
);
385 if (id
== 0) return 0; /* no dynamic captures? */
386 lua_settop(L
, id
- 1); /* remove captures */
387 return top
- id
+ 1; /* number of values removed */
392 ** Opcode interpreter
394 const char *match (lua_State
*L
, const char *o
, const char *s
, const char *e
,
395 Instruction
*op
, Capture
*capture
, int ptop
) {
396 Stack stackbase
[INITBACK
];
397 Stack
*stacklimit
= stackbase
+ INITBACK
;
398 Stack
*stack
= stackbase
; /* point to first empty slot in stack */
399 int capsize
= INITCAPSIZE
;
400 int captop
= 0; /* point to first empty slot in captures */
401 int ndyncap
= 0; /* number of dynamic captures (in Lua stack) */
402 const Instruction
*p
= op
; /* current instruction */
403 stack
->p
= &giveup
; stack
->s
= s
; stack
->caplevel
= 0; stack
++;
404 lua_pushlightuserdata(L
, stackbase
);
407 printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ",
408 s
, stack
- getstackbase(L
, ptop
), ndyncap
, captop
);
410 printcaplist(capture
, capture
+ captop
);
412 assert(stackidx(ptop
) + ndyncap
== lua_gettop(L
) && ndyncap
<= captop
);
413 switch ((Opcode
)p
->i
.code
) {
415 assert(stack
== getstackbase(L
, ptop
) + 1);
416 capture
[captop
].kind
= Cclose
;
417 capture
[captop
].s
= NULL
;
421 assert(stack
== getstackbase(L
, ptop
));
425 assert(stack
> getstackbase(L
, ptop
) && (stack
- 1)->s
== NULL
);
430 if (s
< e
) { p
++; s
++; }
436 else p
+= getoffset(p
);
440 if ((byte
)*s
== p
->i
.aux
&& s
< e
) { p
++; s
++; }
445 if ((byte
)*s
== p
->i
.aux
&& s
< e
) p
+= 2;
446 else p
+= getoffset(p
);
451 if (testchar((p
+1)->buff
, c
) && s
< e
)
452 { p
+= CHARSETINSTSIZE
; s
++; }
458 if (testchar((p
+ 2)->buff
, c
) && s
< e
)
459 p
+= 1 + CHARSETINSTSIZE
;
460 else p
+= getoffset(p
);
465 if (n
> s
- o
) goto fail
;
472 if (!testchar((p
+1)->buff
, c
)) break;
474 p
+= CHARSETINSTSIZE
;
482 if (stack
== stacklimit
)
483 stack
= doublestack(L
, &stacklimit
, ptop
);
484 stack
->p
= p
+ getoffset(p
);
486 stack
->caplevel
= captop
;
492 if (stack
== stacklimit
)
493 stack
= doublestack(L
, &stacklimit
, ptop
);
495 stack
->p
= p
+ 2; /* save return address */
501 assert(stack
> getstackbase(L
, ptop
) && (stack
- 1)->s
!= NULL
);
506 case IPartialCommit
: {
507 assert(stack
> getstackbase(L
, ptop
) && (stack
- 1)->s
!= NULL
);
509 (stack
- 1)->caplevel
= captop
;
514 assert(stack
> getstackbase(L
, ptop
) && (stack
- 1)->s
!= NULL
);
516 captop
= stack
->caplevel
;
521 assert(stack
> getstackbase(L
, ptop
));
525 fail
: { /* pattern failed: try to backtrack */
526 do { /* remove pending calls */
527 assert(stack
> getstackbase(L
, ptop
));
530 if (ndyncap
> 0) /* is there matchtime captures? */
531 ndyncap
-= removedyncap(L
, capture
, stack
->caplevel
, captop
);
532 captop
= stack
->caplevel
;
536 case ICloseRunTime
: {
539 int fr
= lua_gettop(L
) + 1; /* stack index of first result */
540 cs
.s
= o
; cs
.L
= L
; cs
.ocap
= capture
; cs
.ptop
= ptop
;
541 n
= runtimecap(&cs
, capture
+ captop
, s
, &rem
); /* call function */
542 captop
-= n
; /* remove nested captures */
543 fr
-= rem
; /* 'rem' items were popped from Lua stack */
544 res
= resdyncaptures(L
, fr
, s
- o
, e
- o
); /* get result */
545 if (res
== -1) /* fail? */
547 s
= o
+ res
; /* else update current position */
548 n
= lua_gettop(L
) - fr
+ 1; /* number of new captures */
549 ndyncap
+= n
- rem
; /* update number of dynamic captures */
550 if (n
> 0) { /* any new capture? */
551 if ((captop
+= n
+ 2) >= capsize
) {
552 capture
= doublecap(L
, capture
, captop
, ptop
);
553 capsize
= 2 * captop
;
555 /* add new captures to 'capture' list */
556 adddyncaptures(s
, capture
+ captop
- n
- 2, n
, fr
);
561 case ICloseCapture
: {
564 /* if possible, turn capture into a full capture */
565 if (capture
[captop
- 1].siz
== 0 &&
566 s1
- capture
[captop
- 1].s
< UCHAR_MAX
) {
567 capture
[captop
- 1].siz
= s1
- capture
[captop
- 1].s
+ 1;
572 capture
[captop
].siz
= 1; /* mark entry as closed */
573 capture
[captop
].s
= s
;
578 capture
[captop
].siz
= 0; /* mark entry as open */
579 capture
[captop
].s
= s
;
582 capture
[captop
].siz
= getoff(p
) + 1; /* save capture size */
583 capture
[captop
].s
= s
- getoff(p
);
584 /* goto pushcapture; */
586 capture
[captop
].idx
= p
->i
.key
;
587 capture
[captop
].kind
= getkind(p
);
588 if (++captop
>= capsize
) {
589 capture
= doublecap(L
, capture
, captop
, ptop
);
590 capsize
= 2 * captop
;
595 default: assert(0); return NULL
;
600 /* }====================================================== */
604 ** $Id: lpcode.c,v 1.23 2015/06/12 18:36:47 roberto Exp $
605 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
608 /* #include <limits.h> */
611 /* #include "lua.h" */
612 /* #include "lauxlib.h" */
614 /* #include "lptypes.h" */
615 /* #include "lpcode.h" */
618 /* signals a "no-instruction */
623 static const Charset fullset_
=
624 {{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
625 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
626 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
627 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}};
629 static const Charset
*fullset
= &fullset_
;
632 ** {======================================================
633 ** Analysis and some optimizations
634 ** =======================================================
638 ** Check whether a charset is empty (returns IFail), singleton (IChar),
639 ** full (IAny), or none of those (ISet). When singleton, '*c' returns
640 ** which character it is. (When generic set, the set was the input,
641 ** so there is no need to return it.)
643 static Opcode
charsettype (const byte
*cs
, int *c
) {
644 int count
= 0; /* number of characters in the set */
646 int candidate
= -1; /* candidate position for the singleton char */
647 for (i
= 0; i
< CHARSETSIZE
; i
++) { /* for each byte */
649 if (b
== 0) { /* is byte empty? */
650 if (count
> 1) /* was set neither empty nor singleton? */
651 return ISet
; /* neither full nor empty nor singleton */
652 /* else set is still empty or singleton */
654 else if (b
== 0xFF) { /* is byte full? */
655 if (count
< (i
* BITSPERCHAR
)) /* was set not full? */
656 return ISet
; /* neither full nor empty nor singleton */
657 else count
+= BITSPERCHAR
; /* set is still full */
659 else if ((b
& (b
- 1)) == 0) { /* has byte only one bit? */
660 if (count
> 0) /* was set not empty? */
661 return ISet
; /* neither full nor empty nor singleton */
662 else { /* set has only one char till now; track it */
667 else return ISet
; /* byte is neither empty, full, nor singleton */
670 case 0: return IFail
; /* empty set */
671 case 1: { /* singleton; find character bit inside byte */
672 int b
= cs
[candidate
];
673 *c
= candidate
* BITSPERCHAR
;
674 if ((b
& 0xF0) != 0) { *c
+= 4; b
>>= 4; }
675 if ((b
& 0x0C) != 0) { *c
+= 2; b
>>= 2; }
676 if ((b
& 0x02) != 0) { *c
+= 1; }
680 assert(count
== CHARSETSIZE
* BITSPERCHAR
); /* full set */
688 ** A few basic operations on Charsets
690 static void cs_complement (Charset
*cs
) {
691 loopset(i
, cs
->cs
[i
] = ~cs
->cs
[i
]);
694 static int cs_equal (const byte
*cs1
, const byte
*cs2
) {
695 loopset(i
, if (cs1
[i
] != cs2
[i
]) return 0);
699 static int cs_disjoint (const Charset
*cs1
, const Charset
*cs2
) {
700 loopset(i
, if ((cs1
->cs
[i
] & cs2
->cs
[i
]) != 0) return 0;)
706 ** If 'tree' is a 'char' pattern (TSet, TChar, TAny), convert it into a
707 ** charset and return 1; else return 0.
709 int tocharset (TTree
*tree
, Charset
*cs
) {
711 case TSet
: { /* copy set */
712 loopset(i
, cs
->cs
[i
] = treebuffer(tree
)[i
]);
715 case TChar
: { /* only one char */
716 assert(0 <= tree
->u
.n
&& tree
->u
.n
<= UCHAR_MAX
);
717 loopset(i
, cs
->cs
[i
] = 0); /* erase all chars */
718 setchar(cs
->cs
, tree
->u
.n
); /* add that one */
722 loopset(i
, cs
->cs
[i
] = 0xFF); /* add all characters to the set */
731 ** Check whether a pattern tree has captures
733 int hascaptures (TTree
*tree
) {
736 case TCapture
: case TRunTime
:
739 tree
= sib2(tree
); goto tailcall
; /* return hascaptures(sib2(tree)); */
740 case TOpenCall
: assert(0);
742 switch (numsiblings
[tree
->tag
]) {
743 case 1: /* return hascaptures(sib1(tree)); */
744 tree
= sib1(tree
); goto tailcall
;
746 if (hascaptures(sib1(tree
))) return 1;
747 /* else return hascaptures(sib2(tree)); */
748 tree
= sib2(tree
); goto tailcall
;
749 default: assert(numsiblings
[tree
->tag
] == 0); return 0;
757 ** Checks how a pattern behaves regarding the empty string,
758 ** in one of two different ways:
759 ** A pattern is *nullable* if it can match without consuming any character;
760 ** A pattern is *nofail* if it never fails for any string
761 ** (including the empty string).
762 ** The difference is only for predicates and run-time captures;
763 ** for other patterns, the two properties are equivalent.
764 ** (With predicates, &'a' is nullable but not nofail. Of course,
765 ** nofail => nullable.)
766 ** These functions are all convervative in the following way:
767 ** p is nullable => nullable(p)
768 ** nofail(p) => p cannot fail
769 ** The function assumes that TOpenCall is not nullable;
770 ** this will be checked again when the grammar is fixed.
771 ** Run-time captures can do whatever they want, so the result
774 int checkaux (TTree
*tree
, int pred
) {
777 case TChar
: case TSet
: case TAny
:
778 case TFalse
: case TOpenCall
:
779 return 0; /* not nullable */
780 case TRep
: case TTrue
:
781 return 1; /* no fail */
782 case TNot
: case TBehind
: /* can match empty, but can fail */
783 if (pred
== PEnofail
) return 0;
784 else return 1; /* PEnullable */
785 case TAnd
: /* can match empty; fail iff body does */
786 if (pred
== PEnullable
) return 1;
787 /* else return checkaux(sib1(tree), pred); */
788 tree
= sib1(tree
); goto tailcall
;
789 case TRunTime
: /* can fail; match empty iff body does */
790 if (pred
== PEnofail
) return 0;
791 /* else return checkaux(sib1(tree), pred); */
792 tree
= sib1(tree
); goto tailcall
;
794 if (!checkaux(sib1(tree
), pred
)) return 0;
795 /* else return checkaux(sib2(tree), pred); */
796 tree
= sib2(tree
); goto tailcall
;
798 if (checkaux(sib2(tree
), pred
)) return 1;
799 /* else return checkaux(sib1(tree), pred); */
800 tree
= sib1(tree
); goto tailcall
;
801 case TCapture
: case TGrammar
: case TRule
:
802 /* return checkaux(sib1(tree), pred); */
803 tree
= sib1(tree
); goto tailcall
;
804 case TCall
: /* return checkaux(sib2(tree), pred); */
805 tree
= sib2(tree
); goto tailcall
;
806 default: assert(0); return 0;
812 ** number of characters to match a pattern (or -1 if variable)
813 ** ('count' avoids infinite loops for grammars)
815 int fixedlenx (TTree
*tree
, int count
, int len
) {
818 case TChar
: case TSet
: case TAny
:
820 case TFalse
: case TTrue
: case TNot
: case TAnd
: case TBehind
:
822 case TRep
: case TRunTime
: case TOpenCall
:
824 case TCapture
: case TRule
: case TGrammar
:
825 /* return fixedlenx(sib1(tree), count); */
826 tree
= sib1(tree
); goto tailcall
;
828 if (count
++ >= MAXRULES
)
829 return -1; /* may be a loop */
830 /* else return fixedlenx(sib2(tree), count); */
831 tree
= sib2(tree
); goto tailcall
;
833 len
= fixedlenx(sib1(tree
), count
, len
);
834 if (len
< 0) return -1;
835 /* else return fixedlenx(sib2(tree), count, len); */
836 tree
= sib2(tree
); goto tailcall
;
840 n1
= fixedlenx(sib1(tree
), count
, len
);
841 if (n1
< 0) return -1;
842 n2
= fixedlenx(sib2(tree
), count
, len
);
843 if (n1
== n2
) return n1
;
846 default: assert(0); return 0;
852 ** Computes the 'first set' of a pattern.
853 ** The result is a conservative aproximation:
854 ** match p ax -> x (for some x) ==> a belongs to first(p)
856 ** a not in first(p) ==> match p ax -> fail (for all x)
858 ** The set 'follow' is the first set of what follows the
859 ** pattern (full set if nothing follows it).
861 ** The function returns 0 when this resulting set can be used for
862 ** test instructions that avoid the pattern altogether.
863 ** A non-zero return can happen for two reasons:
864 ** 1) match p '' -> '' ==> return has bit 1 set
865 ** (tests cannot be used because they would always fail for an empty input);
866 ** 2) there is a match-time capture ==> return has bit 2 set
867 ** (optimizations should not bypass match-time captures).
869 static int getfirst (TTree
*tree
, const Charset
*follow
, Charset
*firstset
) {
872 case TChar
: case TSet
: case TAny
: {
873 tocharset(tree
, firstset
);
877 loopset(i
, firstset
->cs
[i
] = follow
->cs
[i
]);
878 return 1; /* accepts the empty string */
881 loopset(i
, firstset
->cs
[i
] = 0);
886 int e1
= getfirst(sib1(tree
), follow
, firstset
);
887 int e2
= getfirst(sib2(tree
), follow
, &csaux
);
888 loopset(i
, firstset
->cs
[i
] |= csaux
.cs
[i
]);
892 if (!nullable(sib1(tree
))) {
893 /* when p1 is not nullable, p2 has nothing to contribute;
894 return getfirst(sib1(tree), fullset, firstset); */
895 tree
= sib1(tree
); follow
= fullset
; goto tailcall
;
897 else { /* FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) */
899 int e2
= getfirst(sib2(tree
), follow
, &csaux
);
900 int e1
= getfirst(sib1(tree
), &csaux
, firstset
);
901 if (e1
== 0) return 0; /* 'e1' ensures that first can be used */
902 else if ((e1
| e2
) & 2) /* one of the children has a matchtime? */
903 return 2; /* pattern has a matchtime capture */
904 else return e2
; /* else depends on 'e2' */
908 getfirst(sib1(tree
), follow
, firstset
);
909 loopset(i
, firstset
->cs
[i
] |= follow
->cs
[i
]);
910 return 1; /* accept the empty string */
912 case TCapture
: case TGrammar
: case TRule
: {
913 /* return getfirst(sib1(tree), follow, firstset); */
914 tree
= sib1(tree
); goto tailcall
;
916 case TRunTime
: { /* function invalidates any follow info. */
917 int e
= getfirst(sib1(tree
), fullset
, firstset
);
918 if (e
) return 2; /* function is not "protected"? */
919 else return 0; /* pattern inside capture ensures first can be used */
922 /* return getfirst(sib2(tree), follow, firstset); */
923 tree
= sib2(tree
); goto tailcall
;
926 int e
= getfirst(sib1(tree
), follow
, firstset
);
927 loopset(i
, firstset
->cs
[i
] &= follow
->cs
[i
]);
931 if (tocharset(sib1(tree
), firstset
)) {
932 cs_complement(firstset
);
935 /* else go through */
937 case TBehind
: { /* instruction gives no new information */
938 /* call 'getfirst' only to check for math-time captures */
939 int e
= getfirst(sib1(tree
), follow
, firstset
);
940 loopset(i
, firstset
->cs
[i
] = follow
->cs
[i
]); /* uses follow */
941 return e
| 1; /* always can accept the empty string */
943 default: assert(0); return 0;
949 ** If 'headfail(tree)' true, then 'tree' can fail only depending on the
950 ** next character of the subject.
952 static int headfail (TTree
*tree
) {
955 case TChar
: case TSet
: case TAny
: case TFalse
:
957 case TTrue
: case TRep
: case TRunTime
: case TNot
:
960 case TCapture
: case TGrammar
: case TRule
: case TAnd
:
961 tree
= sib1(tree
); goto tailcall
; /* return headfail(sib1(tree)); */
963 tree
= sib2(tree
); goto tailcall
; /* return headfail(sib2(tree)); */
965 if (!nofail(sib2(tree
))) return 0;
966 /* else return headfail(sib1(tree)); */
967 tree
= sib1(tree
); goto tailcall
;
969 if (!headfail(sib1(tree
))) return 0;
970 /* else return headfail(sib2(tree)); */
971 tree
= sib2(tree
); goto tailcall
;
972 default: assert(0); return 0;
978 ** Check whether the code generation for the given tree can benefit
979 ** from a follow set (to avoid computing the follow set when it is
982 static int needfollow (TTree
*tree
) {
985 case TChar
: case TSet
: case TAny
:
986 case TFalse
: case TTrue
: case TAnd
: case TNot
:
987 case TRunTime
: case TGrammar
: case TCall
: case TBehind
:
989 case TChoice
: case TRep
:
992 tree
= sib1(tree
); goto tailcall
;
994 tree
= sib2(tree
); goto tailcall
;
995 default: assert(0); return 0;
999 /* }====================================================== */
1004 ** {======================================================
1006 ** =======================================================
1011 ** size of an instruction
1013 int sizei (const Instruction
*i
) {
1014 switch((Opcode
)i
->i
.code
) {
1015 case ISet
: case ISpan
: return CHARSETINSTSIZE
;
1016 case ITestSet
: return CHARSETINSTSIZE
+ 1;
1017 case ITestChar
: case ITestAny
: case IChoice
: case IJmp
: case ICall
:
1018 case IOpenCall
: case ICommit
: case IPartialCommit
: case IBackCommit
:
1026 ** state for the compiler
1028 typedef struct CompileState
{
1029 Pattern
*p
; /* pattern being compiled */
1030 int ncode
; /* next position in p->code to be filled */
1036 ** code generation is recursive; 'opt' indicates that the code is being
1037 ** generated as the last thing inside an optional pattern (so, if that
1038 ** code is optional too, it can reuse the 'IChoice' already in place for
1039 ** the outer pattern). 'tt' points to a previous test protecting this
1040 ** code (or NOINST). 'fl' is the follow set of the pattern.
1042 static void codegen (CompileState
*compst
, TTree
*tree
, int opt
, int tt
,
1046 void realloccode (lua_State
*L
, Pattern
*p
, int nsize
) {
1048 lua_Alloc f
= lua_getallocf(L
, &ud
);
1049 void *newblock
= f(ud
, p
->code
, p
->codesize
* sizeof(Instruction
),
1050 nsize
* sizeof(Instruction
));
1051 if (newblock
== NULL
&& nsize
> 0)
1052 luaL_error(L
, "not enough memory");
1053 p
->code
= (Instruction
*)newblock
;
1054 p
->codesize
= nsize
;
1058 static int nextinstruction (CompileState
*compst
) {
1059 int size
= compst
->p
->codesize
;
1060 if (compst
->ncode
>= size
)
1061 realloccode(compst
->L
, compst
->p
, size
* 2);
1062 return compst
->ncode
++;
1066 #define getinstr(cs,i) ((cs)->p->code[i])
1069 static int addinstruction (CompileState
*compst
, Opcode op
, int aux
) {
1070 int i
= nextinstruction(compst
);
1071 getinstr(compst
, i
).i
.code
= op
;
1072 getinstr(compst
, i
).i
.aux
= aux
;
1078 ** Add an instruction followed by space for an offset (to be set later)
1080 static int addoffsetinst (CompileState
*compst
, Opcode op
) {
1081 int i
= addinstruction(compst
, op
, 0); /* instruction */
1082 addinstruction(compst
, (Opcode
)0, 0); /* open space for offset */
1083 assert(op
== ITestSet
|| sizei(&getinstr(compst
, i
)) == 2);
1089 ** Set the offset of an instruction
1091 static void setoffset (CompileState
*compst
, int instruction
, int offset
) {
1092 getinstr(compst
, instruction
+ 1).offset
= offset
;
1097 ** Add a capture instruction:
1098 ** 'op' is the capture instruction; 'cap' the capture kind;
1099 ** 'key' the key into ktable; 'aux' is the optional capture offset
1102 static int addinstcap (CompileState
*compst
, Opcode op
, int cap
, int key
,
1104 int i
= addinstruction(compst
, op
, joinkindoff(cap
, aux
));
1105 getinstr(compst
, i
).i
.key
= key
;
1110 #define gethere(compst) ((compst)->ncode)
1112 #define target(code,i) ((i) + code[i + 1].offset)
1116 ** Patch 'instruction' to jump to 'target'
1118 static void jumptothere (CompileState
*compst
, int instruction
, int target
) {
1119 if (instruction
>= 0)
1120 setoffset(compst
, instruction
, target
- instruction
);
1125 ** Patch 'instruction' to jump to current position
1127 static void jumptohere (CompileState
*compst
, int instruction
) {
1128 jumptothere(compst
, instruction
, gethere(compst
));
1133 ** Code an IChar instruction, or IAny if there is an equivalent
1134 ** test dominating it
1136 static void codechar (CompileState
*compst
, int c
, int tt
) {
1137 if (tt
>= 0 && getinstr(compst
, tt
).i
.code
== ITestChar
&&
1138 getinstr(compst
, tt
).i
.aux
== c
)
1139 addinstruction(compst
, IAny
, 0);
1141 addinstruction(compst
, IChar
, c
);
1146 ** Add a charset posfix to an instruction
1148 static void addcharset (CompileState
*compst
, const byte
*cs
) {
1149 int p
= gethere(compst
);
1151 for (i
= 0; i
< (int)CHARSETINSTSIZE
- 1; i
++)
1152 nextinstruction(compst
); /* space for buffer */
1153 /* fill buffer with charset */
1154 loopset(j
, getinstr(compst
, p
).buff
[j
] = cs
[j
]);
1159 ** code a char set, optimizing unit sets for IChar, "complete"
1160 ** sets for IAny, and empty sets for IFail; also use an IAny
1161 ** when instruction is dominated by an equivalent test.
1163 static void codecharset (CompileState
*compst
, const byte
*cs
, int tt
) {
1164 int c
= 0; /* (=) to avoid warnings */
1165 Opcode op
= charsettype(cs
, &c
);
1167 case IChar
: codechar(compst
, c
, tt
); break;
1168 case ISet
: { /* non-trivial set? */
1169 if (tt
>= 0 && getinstr(compst
, tt
).i
.code
== ITestSet
&&
1170 cs_equal(cs
, getinstr(compst
, tt
+ 2).buff
))
1171 addinstruction(compst
, IAny
, 0);
1173 addinstruction(compst
, ISet
, 0);
1174 addcharset(compst
, cs
);
1178 default: addinstruction(compst
, op
, c
); break;
1184 ** code a test set, optimizing unit sets for ITestChar, "complete"
1185 ** sets for ITestAny, and empty sets for IJmp (always fails).
1186 ** 'e' is true iff test should accept the empty string. (Test
1187 ** instructions in the current VM never accept the empty string.)
1189 static int codetestset (CompileState
*compst
, Charset
*cs
, int e
) {
1190 if (e
) return NOINST
; /* no test */
1193 Opcode op
= charsettype(cs
->cs
, &c
);
1195 case IFail
: return addoffsetinst(compst
, IJmp
); /* always jump */
1196 case IAny
: return addoffsetinst(compst
, ITestAny
);
1198 int i
= addoffsetinst(compst
, ITestChar
);
1199 getinstr(compst
, i
).i
.aux
= c
;
1203 int i
= addoffsetinst(compst
, ITestSet
);
1204 addcharset(compst
, cs
->cs
);
1207 default: assert(0); return 0;
1214 ** Find the final destination of a sequence of jumps
1216 static int finaltarget (Instruction
*code
, int i
) {
1217 while (code
[i
].i
.code
== IJmp
)
1218 i
= target(code
, i
);
1224 ** final label (after traversing any jumps)
1226 static int finallabel (Instruction
*code
, int i
) {
1227 return finaltarget(code
, target(code
, i
));
1232 ** <behind(p)> == behind n; <p> (where n = fixedlen(p))
1234 static void codebehind (CompileState
*compst
, TTree
*tree
) {
1236 addinstruction(compst
, IBehind
, tree
->u
.n
);
1237 codegen(compst
, sib1(tree
), 0, NOINST
, fullset
);
1242 ** Choice; optimizations:
1243 ** - when p1 is headfail or
1244 ** when first(p1) and first(p2) are disjoint, than
1245 ** a character not in first(p1) cannot go to p1, and a character
1246 ** in first(p1) cannot go to p2 (at it is not in first(p2)).
1247 ** (The optimization is not valid if p1 accepts the empty string,
1248 ** as then there is no character at all...)
1249 ** - when p2 is empty and opt is true; a IPartialCommit can reuse
1250 ** the Choice already active in the stack.
1252 static void codechoice (CompileState
*compst
, TTree
*p1
, TTree
*p2
, int opt
,
1253 const Charset
*fl
) {
1254 int emptyp2
= (p2
->tag
== TTrue
);
1256 int e1
= getfirst(p1
, fullset
, &cs1
);
1258 (!e1
&& (getfirst(p2
, fl
, &cs2
), cs_disjoint(&cs1
, &cs2
)))) {
1259 /* <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: */
1260 int test
= codetestset(compst
, &cs1
, 0);
1262 codegen(compst
, p1
, 0, test
, fl
);
1264 jmp
= addoffsetinst(compst
, IJmp
);
1265 jumptohere(compst
, test
);
1266 codegen(compst
, p2
, opt
, NOINST
, fl
);
1267 jumptohere(compst
, jmp
);
1269 else if (opt
&& emptyp2
) {
1270 /* p1? == IPartialCommit; p1 */
1271 jumptohere(compst
, addoffsetinst(compst
, IPartialCommit
));
1272 codegen(compst
, p1
, 1, NOINST
, fullset
);
1276 test(first(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
1278 int test
= codetestset(compst
, &cs1
, e1
);
1279 int pchoice
= addoffsetinst(compst
, IChoice
);
1280 codegen(compst
, p1
, emptyp2
, test
, fullset
);
1281 pcommit
= addoffsetinst(compst
, ICommit
);
1282 jumptohere(compst
, pchoice
);
1283 jumptohere(compst
, test
);
1284 codegen(compst
, p2
, opt
, NOINST
, fl
);
1285 jumptohere(compst
, pcommit
);
1292 ** optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
1293 ** (valid only when 'p' has no captures)
1295 static void codeand (CompileState
*compst
, TTree
*tree
, int tt
) {
1296 int n
= fixedlen(tree
);
1297 if (n
>= 0 && n
<= MAXBEHIND
&& !hascaptures(tree
)) {
1298 codegen(compst
, tree
, 0, tt
, fullset
);
1300 addinstruction(compst
, IBehind
, n
);
1302 else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
1304 int pchoice
= addoffsetinst(compst
, IChoice
);
1305 codegen(compst
, tree
, 0, tt
, fullset
);
1306 pcommit
= addoffsetinst(compst
, IBackCommit
);
1307 jumptohere(compst
, pchoice
);
1308 addinstruction(compst
, IFail
, 0);
1309 jumptohere(compst
, pcommit
);
1315 ** Captures: if pattern has fixed (and not too big) length, use
1316 ** a single IFullCapture instruction after the match; otherwise,
1317 ** enclose the pattern with OpenCapture - CloseCapture.
1319 static void codecapture (CompileState
*compst
, TTree
*tree
, int tt
,
1320 const Charset
*fl
) {
1321 int len
= fixedlen(sib1(tree
));
1322 if (len
>= 0 && len
<= MAXOFF
&& !hascaptures(sib1(tree
))) {
1323 codegen(compst
, sib1(tree
), 0, tt
, fl
);
1324 addinstcap(compst
, IFullCapture
, tree
->cap
, tree
->key
, len
);
1327 addinstcap(compst
, IOpenCapture
, tree
->cap
, tree
->key
, 0);
1328 codegen(compst
, sib1(tree
), 0, tt
, fl
);
1329 addinstcap(compst
, ICloseCapture
, Cclose
, 0, 0);
1334 static void coderuntime (CompileState
*compst
, TTree
*tree
, int tt
) {
1335 addinstcap(compst
, IOpenCapture
, Cgroup
, tree
->key
, 0);
1336 codegen(compst
, sib1(tree
), 0, tt
, fullset
);
1337 addinstcap(compst
, ICloseRunTime
, Cclose
, 0, 0);
1342 ** Repetion; optimizations:
1343 ** When pattern is a charset, can use special instruction ISpan.
1344 ** When pattern is head fail, or if it starts with characters that
1345 ** are disjoint from what follows the repetions, a simple test
1346 ** is enough (a fail inside the repetition would backtrack to fail
1347 ** again in the following pattern, so there is no need for a choice).
1348 ** When 'opt' is true, the repetion can reuse the Choice already
1349 ** active in the stack.
1351 static void coderep (CompileState
*compst
, TTree
*tree
, int opt
,
1352 const Charset
*fl
) {
1354 if (tocharset(tree
, &st
)) {
1355 addinstruction(compst
, ISpan
, 0);
1356 addcharset(compst
, st
.cs
);
1359 int e1
= getfirst(tree
, fullset
, &st
);
1360 if (headfail(tree
) || (!e1
&& cs_disjoint(&st
, fl
))) {
1361 /* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
1363 int test
= codetestset(compst
, &st
, 0);
1364 codegen(compst
, tree
, 0, test
, fullset
);
1365 jmp
= addoffsetinst(compst
, IJmp
);
1366 jumptohere(compst
, test
);
1367 jumptothere(compst
, jmp
, test
);
1370 /* test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: */
1371 /* or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; */
1373 int test
= codetestset(compst
, &st
, e1
);
1374 int pchoice
= NOINST
;
1376 jumptohere(compst
, addoffsetinst(compst
, IPartialCommit
));
1378 pchoice
= addoffsetinst(compst
, IChoice
);
1379 l2
= gethere(compst
);
1380 codegen(compst
, tree
, 0, NOINST
, fullset
);
1381 commit
= addoffsetinst(compst
, IPartialCommit
);
1382 jumptothere(compst
, commit
, l2
);
1383 jumptohere(compst
, pchoice
);
1384 jumptohere(compst
, test
);
1391 ** Not predicate; optimizations:
1392 ** In any case, if first test fails, 'not' succeeds, so it can jump to
1393 ** the end. If pattern is headfail, that is all (it cannot fail
1394 ** in other parts); this case includes 'not' of simple sets. Otherwise,
1395 ** use the default code (a choice plus a failtwice).
1397 static void codenot (CompileState
*compst
, TTree
*tree
) {
1399 int e
= getfirst(tree
, fullset
, &st
);
1400 int test
= codetestset(compst
, &st
, e
);
1401 if (headfail(tree
)) /* test (fail(p1)) -> L1; fail; L1: */
1402 addinstruction(compst
, IFail
, 0);
1404 /* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */
1405 int pchoice
= addoffsetinst(compst
, IChoice
);
1406 codegen(compst
, tree
, 0, NOINST
, fullset
);
1407 addinstruction(compst
, IFailTwice
, 0);
1408 jumptohere(compst
, pchoice
);
1410 jumptohere(compst
, test
);
1415 ** change open calls to calls, using list 'positions' to find
1416 ** correct offsets; also optimize tail calls
1418 static void correctcalls (CompileState
*compst
, int *positions
,
1421 Instruction
*code
= compst
->p
->code
;
1422 for (i
= from
; i
< to
; i
+= sizei(&code
[i
])) {
1423 if (code
[i
].i
.code
== IOpenCall
) {
1424 int n
= code
[i
].i
.key
; /* rule number */
1425 int rule
= positions
[n
]; /* rule position */
1426 assert(rule
== from
|| code
[rule
- 1].i
.code
== IRet
);
1427 if (code
[finaltarget(code
, i
+ 2)].i
.code
== IRet
) /* call; ret ? */
1428 code
[i
].i
.code
= IJmp
; /* tail call */
1430 code
[i
].i
.code
= ICall
;
1431 jumptothere(compst
, i
, rule
); /* call jumps to respective rule */
1439 ** Code for a grammar:
1440 ** call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
1442 static void codegrammar (CompileState
*compst
, TTree
*grammar
) {
1443 int positions
[MAXRULES
];
1446 int firstcall
= addoffsetinst(compst
, ICall
); /* call initial rule */
1447 int jumptoend
= addoffsetinst(compst
, IJmp
); /* jump to the end */
1448 int start
= gethere(compst
); /* here starts the initial rule */
1449 jumptohere(compst
, firstcall
);
1450 for (rule
= sib1(grammar
); rule
->tag
== TRule
; rule
= sib2(rule
)) {
1451 positions
[rulenumber
++] = gethere(compst
); /* save rule position */
1452 codegen(compst
, sib1(rule
), 0, NOINST
, fullset
); /* code rule */
1453 addinstruction(compst
, IRet
, 0);
1455 assert(rule
->tag
== TTrue
);
1456 jumptohere(compst
, jumptoend
);
1457 correctcalls(compst
, positions
, start
, gethere(compst
));
1461 static void codecall (CompileState
*compst
, TTree
*call
) {
1462 int c
= addoffsetinst(compst
, IOpenCall
); /* to be corrected later */
1463 getinstr(compst
, c
).i
.key
= sib2(call
)->cap
; /* rule number */
1464 assert(sib2(call
)->tag
== TRule
);
1469 ** Code first child of a sequence
1470 ** (second child is called in-place to allow tail call)
1471 ** Return 'tt' for second child
1473 static int codeseq1 (CompileState
*compst
, TTree
*p1
, TTree
*p2
,
1474 int tt
, const Charset
*fl
) {
1475 if (needfollow(p1
)) {
1477 getfirst(p2
, fl
, &fl1
); /* p1 follow is p2 first */
1478 codegen(compst
, p1
, 0, tt
, &fl1
);
1480 else /* use 'fullset' as follow */
1481 codegen(compst
, p1
, 0, tt
, fullset
);
1482 if (fixedlen(p1
) != 0) /* can 'p1' consume anything? */
1483 return NOINST
; /* invalidate test */
1484 else return tt
; /* else 'tt' still protects sib2 */
1489 ** Main code-generation function: dispatch to auxiliar functions
1490 ** according to kind of tree. ('needfollow' should return true
1491 ** only for consructions that use 'fl'.)
1493 static void codegen (CompileState
*compst
, TTree
*tree
, int opt
, int tt
,
1494 const Charset
*fl
) {
1496 switch (tree
->tag
) {
1497 case TChar
: codechar(compst
, tree
->u
.n
, tt
); break;
1498 case TAny
: addinstruction(compst
, IAny
, 0); break;
1499 case TSet
: codecharset(compst
, treebuffer(tree
), tt
); break;
1501 case TFalse
: addinstruction(compst
, IFail
, 0); break;
1502 case TChoice
: codechoice(compst
, sib1(tree
), sib2(tree
), opt
, fl
); break;
1503 case TRep
: coderep(compst
, sib1(tree
), opt
, fl
); break;
1504 case TBehind
: codebehind(compst
, tree
); break;
1505 case TNot
: codenot(compst
, sib1(tree
)); break;
1506 case TAnd
: codeand(compst
, sib1(tree
), tt
); break;
1507 case TCapture
: codecapture(compst
, tree
, tt
, fl
); break;
1508 case TRunTime
: coderuntime(compst
, tree
, tt
); break;
1509 case TGrammar
: codegrammar(compst
, tree
); break;
1510 case TCall
: codecall(compst
, tree
); break;
1512 tt
= codeseq1(compst
, sib1(tree
), sib2(tree
), tt
, fl
); /* code 'p1' */
1513 /* codegen(compst, p2, opt, tt, fl); */
1514 tree
= sib2(tree
); goto tailcall
;
1522 ** Optimize jumps and other jump-like instructions.
1523 ** * Update labels of instructions with labels to their final
1524 ** destinations (e.g., choice L1; ... L1: jmp L2: becomes
1526 ** * Jumps to other instructions that do jumps become those
1527 ** instructions (e.g., jump to return becomes a return; jump
1528 ** to commit becomes a commit)
1530 static void peephole (CompileState
*compst
) {
1531 Instruction
*code
= compst
->p
->code
;
1533 for (i
= 0; i
< compst
->ncode
; i
+= sizei(&code
[i
])) {
1535 switch (code
[i
].i
.code
) {
1536 case IChoice
: case ICall
: case ICommit
: case IPartialCommit
:
1537 case IBackCommit
: case ITestChar
: case ITestSet
:
1538 case ITestAny
: { /* instructions with labels */
1539 jumptothere(compst
, i
, finallabel(code
, i
)); /* optimize label */
1543 int ft
= finaltarget(code
, i
);
1544 switch (code
[ft
].i
.code
) { /* jumping to what? */
1545 case IRet
: case IFail
: case IFailTwice
:
1546 case IEnd
: { /* instructions with unconditional implicit jumps */
1547 code
[i
] = code
[ft
]; /* jump becomes that instruction */
1548 code
[i
+ 1].i
.code
= IAny
; /* 'no-op' for target position */
1551 case ICommit
: case IPartialCommit
:
1552 case IBackCommit
: { /* inst. with unconditional explicit jumps */
1553 int fft
= finallabel(code
, ft
);
1554 code
[i
] = code
[ft
]; /* jump becomes that instruction... */
1555 jumptothere(compst
, i
, fft
); /* but must correct its offset */
1556 goto redo
; /* reoptimize its label */
1559 jumptothere(compst
, i
, ft
); /* optimize label */
1568 assert(code
[i
- 1].i
.code
== IEnd
);
1573 ** Compile a pattern
1575 Instruction
*compile (lua_State
*L
, Pattern
*p
) {
1576 CompileState compst
;
1577 compst
.p
= p
; compst
.ncode
= 0; compst
.L
= L
;
1578 realloccode(L
, p
, 2); /* minimum initial size */
1579 codegen(&compst
, p
->tree
, 0, NOINST
, fullset
);
1580 addinstruction(&compst
, IEnd
, 0);
1581 realloccode(L
, p
, compst
.ncode
); /* set final size */
1587 /* }====================================================== */
1592 ** $Id: lpcap.c,v 1.6 2015/06/15 16:09:57 roberto Exp $
1593 ** Copyright 2007, Lua.org & PUC-Rio (see 'lpeg.html' for license)
1596 /* #include "lua.h" */
1597 /* #include "lauxlib.h" */
1599 /* #include "lpcap.h" */
1600 /* #include "lptypes.h" */
1603 #define captype(cap) ((cap)->kind)
1605 #define isclosecap(cap) (captype(cap) == Cclose)
1607 #define closeaddr(c) ((c)->s + (c)->siz - 1)
1609 #define isfullcap(cap) ((cap)->siz != 0)
1611 #define getfromktable(cs,v) lua_rawgeti((cs)->L, ktableidx((cs)->ptop), v)
1613 #define pushluaval(cs) getfromktable(cs, (cs)->cap->idx)
1618 ** Put at the cache for Lua values the value indexed by 'v' in ktable
1619 ** of the running pattern (if it is not there yet); returns its index.
1621 static int updatecache (CapState
*cs
, int v
) {
1622 int idx
= cs
->ptop
+ 1; /* stack index of cache for Lua values */
1623 if (v
!= cs
->valuecached
) { /* not there? */
1624 getfromktable(cs
, v
); /* get value from 'ktable' */
1625 lua_replace(cs
->L
, idx
); /* put it at reserved stack position */
1626 cs
->valuecached
= v
; /* keep track of what is there */
1632 static int pushcapture (CapState
*cs
);
1636 ** Goes back in a list of captures looking for an open capture
1637 ** corresponding to a close
1639 static Capture
*findopen (Capture
*cap
) {
1640 int n
= 0; /* number of closes waiting an open */
1643 if (isclosecap(cap
)) n
++; /* one more open to skip */
1644 else if (!isfullcap(cap
))
1645 if (n
-- == 0) return cap
;
1651 ** Go to the next capture
1653 static void nextcap (CapState
*cs
) {
1654 Capture
*cap
= cs
->cap
;
1655 if (!isfullcap(cap
)) { /* not a single capture? */
1656 int n
= 0; /* number of opens waiting a close */
1657 for (;;) { /* look for corresponding close */
1659 if (isclosecap(cap
)) {
1660 if (n
-- == 0) break;
1662 else if (!isfullcap(cap
)) n
++;
1665 cs
->cap
= cap
+ 1; /* + 1 to skip last close (or entire single capture) */
1670 ** Push on the Lua stack all values generated by nested captures inside
1671 ** the current capture. Returns number of values pushed. 'addextra'
1672 ** makes it push the entire match after all captured values. The
1673 ** entire match is pushed also if there are no other nested values,
1674 ** so the function never returns zero.
1676 static int pushnestedvalues (CapState
*cs
, int addextra
) {
1677 Capture
*co
= cs
->cap
;
1678 if (isfullcap(cs
->cap
++)) { /* no nested captures? */
1679 lua_pushlstring(cs
->L
, co
->s
, co
->siz
- 1); /* push whole match */
1680 return 1; /* that is it */
1684 while (!isclosecap(cs
->cap
)) /* repeat for all nested patterns */
1685 n
+= pushcapture(cs
);
1686 if (addextra
|| n
== 0) { /* need extra? */
1687 lua_pushlstring(cs
->L
, co
->s
, cs
->cap
->s
- co
->s
); /* push whole match */
1690 cs
->cap
++; /* skip close entry */
1697 ** Push only the first value generated by nested captures
1699 static void pushonenestedvalue (CapState
*cs
) {
1700 int n
= pushnestedvalues(cs
, 0);
1702 lua_pop(cs
->L
, n
- 1); /* pop extra values */
1707 ** Try to find a named group capture with the name given at the top of
1708 ** the stack; goes backward from 'cap'.
1710 static Capture
*findback (CapState
*cs
, Capture
*cap
) {
1711 lua_State
*L
= cs
->L
;
1712 while (cap
-- > cs
->ocap
) { /* repeat until end of list */
1713 if (isclosecap(cap
))
1714 cap
= findopen(cap
); /* skip nested captures */
1715 else if (!isfullcap(cap
))
1716 continue; /* opening an enclosing capture: skip and get previous */
1717 if (captype(cap
) == Cgroup
) {
1718 getfromktable(cs
, cap
->idx
); /* get group name */
1719 if (lp_equal(L
, -2, -1)) { /* right group? */
1720 lua_pop(L
, 2); /* remove reference name and group name */
1723 else lua_pop(L
, 1); /* remove group name */
1726 luaL_error(L
, "back reference '%s' not found", lua_tostring(L
, -1));
1727 return NULL
; /* to avoid warnings */
1732 ** Back-reference capture. Return number of values pushed.
1734 static int backrefcap (CapState
*cs
) {
1736 Capture
*curr
= cs
->cap
;
1737 pushluaval(cs
); /* reference name */
1738 cs
->cap
= findback(cs
, curr
); /* find corresponding group */
1739 n
= pushnestedvalues(cs
, 0); /* push group's values */
1746 ** Table capture: creates a new table and populates it with nested
1749 static int tablecap (CapState
*cs
) {
1750 lua_State
*L
= cs
->L
;
1753 if (isfullcap(cs
->cap
++))
1754 return 1; /* table is empty */
1755 while (!isclosecap(cs
->cap
)) {
1756 if (captype(cs
->cap
) == Cgroup
&& cs
->cap
->idx
!= 0) { /* named group? */
1757 pushluaval(cs
); /* push group name */
1758 pushonenestedvalue(cs
);
1759 lua_settable(L
, -3);
1761 else { /* not a named group */
1763 int k
= pushcapture(cs
);
1764 for (i
= k
; i
> 0; i
--) /* store all values into table */
1765 lua_rawseti(L
, -(i
+ 1), n
+ i
);
1769 cs
->cap
++; /* skip close entry */
1770 return 1; /* number of values pushed (only the table) */
1775 ** Table-query capture
1777 static int querycap (CapState
*cs
) {
1778 int idx
= cs
->cap
->idx
;
1779 pushonenestedvalue(cs
); /* get nested capture */
1780 lua_gettable(cs
->L
, updatecache(cs
, idx
)); /* query cap. value at table */
1781 if (!lua_isnil(cs
->L
, -1))
1783 else { /* no value */
1784 lua_pop(cs
->L
, 1); /* remove nil */
1793 static int foldcap (CapState
*cs
) {
1795 lua_State
*L
= cs
->L
;
1796 int idx
= cs
->cap
->idx
;
1797 if (isfullcap(cs
->cap
++) || /* no nested captures? */
1798 isclosecap(cs
->cap
) || /* no nested captures (large subject)? */
1799 (n
= pushcapture(cs
)) == 0) /* nested captures with no values? */
1800 return luaL_error(L
, "no initial value for fold capture");
1802 lua_pop(L
, n
- 1); /* leave only one result for accumulator */
1803 while (!isclosecap(cs
->cap
)) {
1804 lua_pushvalue(L
, updatecache(cs
, idx
)); /* get folding function */
1805 lua_insert(L
, -2); /* put it before accumulator */
1806 n
= pushcapture(cs
); /* get next capture's values */
1807 lua_call(L
, n
+ 1, 1); /* call folding function */
1809 cs
->cap
++; /* skip close entry */
1810 return 1; /* only accumulator left on the stack */
1817 static int functioncap (CapState
*cs
) {
1819 int top
= lua_gettop(cs
->L
);
1820 pushluaval(cs
); /* push function */
1821 n
= pushnestedvalues(cs
, 0); /* push nested captures */
1822 lua_call(cs
->L
, n
, LUA_MULTRET
); /* call function */
1823 return lua_gettop(cs
->L
) - top
; /* return function's results */
1830 static int numcap (CapState
*cs
) {
1831 int idx
= cs
->cap
->idx
; /* value to select */
1832 if (idx
== 0) { /* no values? */
1833 nextcap(cs
); /* skip entire capture */
1834 return 0; /* no value produced */
1837 int n
= pushnestedvalues(cs
, 0);
1838 if (n
< idx
) /* invalid index? */
1839 return luaL_error(cs
->L
, "no capture '%d'", idx
);
1841 lua_pushvalue(cs
->L
, -(n
- idx
+ 1)); /* get selected capture */
1842 lua_replace(cs
->L
, -(n
+ 1)); /* put it in place of 1st capture */
1843 lua_pop(cs
->L
, n
- 1); /* remove other captures */
1851 ** Return the stack index of the first runtime capture in the given
1852 ** list of captures (or zero if no runtime captures)
1854 int finddyncap (Capture
*cap
, Capture
*last
) {
1855 for (; cap
< last
; cap
++) {
1856 if (cap
->kind
== Cruntime
)
1857 return cap
->idx
; /* stack position of first capture */
1859 return 0; /* no dynamic captures in this segment */
1864 ** Calls a runtime capture. Returns number of captures removed by
1865 ** the call, including the initial Cgroup. (Captures to be added are
1866 ** on the Lua stack.)
1868 int runtimecap (CapState
*cs
, Capture
*close
, const char *s
, int *rem
) {
1870 lua_State
*L
= cs
->L
;
1871 int otop
= lua_gettop(L
);
1872 Capture
*open
= findopen(close
);
1873 assert(captype(open
) == Cgroup
);
1874 id
= finddyncap(open
, close
); /* get first dynamic capture argument */
1875 close
->kind
= Cclose
; /* closes the group */
1877 cs
->cap
= open
; cs
->valuecached
= 0; /* prepare capture state */
1878 luaL_checkstack(L
, 4, "too many runtime captures");
1879 pushluaval(cs
); /* push function to be called */
1880 lua_pushvalue(L
, SUBJIDX
); /* push original subject */
1881 lua_pushinteger(L
, s
- cs
->s
+ 1); /* push current position */
1882 n
= pushnestedvalues(cs
, 0); /* push nested captures */
1883 lua_call(L
, n
+ 2, LUA_MULTRET
); /* call dynamic function */
1884 if (id
> 0) { /* are there old dynamic captures to be removed? */
1886 for (i
= id
; i
<= otop
; i
++)
1887 lua_remove(L
, id
); /* remove old dynamic captures */
1888 *rem
= otop
- id
+ 1; /* total number of dynamic captures removed */
1891 *rem
= 0; /* no dynamic captures removed */
1892 return close
- open
; /* number of captures of all kinds removed */
1897 ** Auxiliary structure for substitution and string captures: keep
1898 ** information about nested captures for future use, avoiding to push
1899 ** string results into Lua
1901 typedef struct StrAux
{
1902 int isstring
; /* whether capture is a string */
1904 Capture
*cp
; /* if not a string, respective capture */
1905 struct { /* if it is a string... */
1906 const char *s
; /* ... starts here */
1907 const char *e
; /* ... ends here */
1912 #define MAXSTRCAPS 10
1915 ** Collect values from current capture into array 'cps'. Current
1916 ** capture must be Cstring (first call) or Csimple (recursive calls).
1917 ** (In first call, fills %0 with whole match for Cstring.)
1918 ** Returns number of elements in the array that were filled.
1920 static int getstrcaps (CapState
*cs
, StrAux
*cps
, int n
) {
1922 cps
[k
].isstring
= 1; /* get string value */
1923 cps
[k
].u
.s
.s
= cs
->cap
->s
; /* starts here */
1924 if (!isfullcap(cs
->cap
++)) { /* nested captures? */
1925 while (!isclosecap(cs
->cap
)) { /* traverse them */
1926 if (n
>= MAXSTRCAPS
) /* too many captures? */
1927 nextcap(cs
); /* skip extra captures (will not need them) */
1928 else if (captype(cs
->cap
) == Csimple
) /* string? */
1929 n
= getstrcaps(cs
, cps
, n
); /* put info. into array */
1931 cps
[n
].isstring
= 0; /* not a string */
1932 cps
[n
].u
.cp
= cs
->cap
; /* keep original capture */
1937 cs
->cap
++; /* skip close */
1939 cps
[k
].u
.s
.e
= closeaddr(cs
->cap
- 1); /* ends here */
1945 ** add next capture value (which should be a string) to buffer 'b'
1947 static int addonestring (luaL_Buffer
*b
, CapState
*cs
, const char *what
);
1951 ** String capture: add result to buffer 'b' (instead of pushing
1952 ** it into the stack)
1954 static void stringcap (luaL_Buffer
*b
, CapState
*cs
) {
1955 StrAux cps
[MAXSTRCAPS
];
1958 const char *fmt
; /* format string */
1959 fmt
= lua_tolstring(cs
->L
, updatecache(cs
, cs
->cap
->idx
), &len
);
1960 n
= getstrcaps(cs
, cps
, 0) - 1; /* collect nested captures */
1961 for (i
= 0; i
< len
; i
++) { /* traverse them */
1962 if (fmt
[i
] != '%') /* not an escape? */
1963 luaL_addchar(b
, fmt
[i
]); /* add it to buffer */
1964 else if (fmt
[++i
] < '0' || fmt
[i
] > '9') /* not followed by a digit? */
1965 luaL_addchar(b
, fmt
[i
]); /* add to buffer */
1967 int l
= fmt
[i
] - '0'; /* capture index */
1969 luaL_error(cs
->L
, "invalid capture index (%d)", l
);
1970 else if (cps
[l
].isstring
)
1971 luaL_addlstring(b
, cps
[l
].u
.s
.s
, cps
[l
].u
.s
.e
- cps
[l
].u
.s
.s
);
1973 Capture
*curr
= cs
->cap
;
1974 cs
->cap
= cps
[l
].u
.cp
; /* go back to evaluate that nested capture */
1975 if (!addonestring(b
, cs
, "capture"))
1976 luaL_error(cs
->L
, "no values in capture index %d", l
);
1977 cs
->cap
= curr
; /* continue from where it stopped */
1985 ** Substitution capture: add result to buffer 'b'
1987 static void substcap (luaL_Buffer
*b
, CapState
*cs
) {
1988 const char *curr
= cs
->cap
->s
;
1989 if (isfullcap(cs
->cap
)) /* no nested captures? */
1990 luaL_addlstring(b
, curr
, cs
->cap
->siz
- 1); /* keep original text */
1992 cs
->cap
++; /* skip open entry */
1993 while (!isclosecap(cs
->cap
)) { /* traverse nested captures */
1994 const char *next
= cs
->cap
->s
;
1995 luaL_addlstring(b
, curr
, next
- curr
); /* add text up to capture */
1996 if (addonestring(b
, cs
, "replacement"))
1997 curr
= closeaddr(cs
->cap
- 1); /* continue after match */
1998 else /* no capture value */
1999 curr
= next
; /* keep original text in final result */
2001 luaL_addlstring(b
, curr
, cs
->cap
->s
- curr
); /* add last piece of text */
2003 cs
->cap
++; /* go to next capture */
2008 ** Evaluates a capture and adds its first value to buffer 'b'; returns
2009 ** whether there was a value
2011 static int addonestring (luaL_Buffer
*b
, CapState
*cs
, const char *what
) {
2012 switch (captype(cs
->cap
)) {
2014 stringcap(b
, cs
); /* add capture directly to buffer */
2017 substcap(b
, cs
); /* add capture directly to buffer */
2020 lua_State
*L
= cs
->L
;
2021 int n
= pushcapture(cs
);
2023 if (n
> 1) lua_pop(L
, n
- 1); /* only one result */
2024 if (!lua_isstring(L
, -1))
2025 luaL_error(L
, "invalid %s value (a %s)", what
, luaL_typename(L
, -1));
2035 ** Push all values of the current capture into the stack; returns
2036 ** number of values pushed
2038 static int pushcapture (CapState
*cs
) {
2039 lua_State
*L
= cs
->L
;
2040 luaL_checkstack(L
, 4, "too many captures");
2041 switch (captype(cs
->cap
)) {
2043 lua_pushinteger(L
, cs
->cap
->s
- cs
->s
+ 1);
2053 int arg
= (cs
->cap
++)->idx
;
2054 if (arg
+ FIXEDARGS
> cs
->ptop
)
2055 return luaL_error(L
, "reference to absent extra argument #%d", arg
);
2056 lua_pushvalue(L
, arg
+ FIXEDARGS
);
2060 int k
= pushnestedvalues(cs
, 1);
2061 lua_insert(L
, -k
); /* make whole match be first result */
2065 lua_pushvalue(L
, (cs
->cap
++)->idx
); /* value is in the stack */
2070 luaL_buffinit(L
, &b
);
2072 luaL_pushresult(&b
);
2077 luaL_buffinit(L
, &b
);
2079 luaL_pushresult(&b
);
2083 if (cs
->cap
->idx
== 0) /* anonymous group? */
2084 return pushnestedvalues(cs
, 0); /* add all nested values */
2085 else { /* named group: add no values */
2086 nextcap(cs
); /* skip capture */
2090 case Cbackref
: return backrefcap(cs
);
2091 case Ctable
: return tablecap(cs
);
2092 case Cfunction
: return functioncap(cs
);
2093 case Cnum
: return numcap(cs
);
2094 case Cquery
: return querycap(cs
);
2095 case Cfold
: return foldcap(cs
);
2096 default: assert(0); return 0;
2102 ** Prepare a CapState structure and traverse the entire list of
2103 ** captures in the stack pushing its results. 's' is the subject
2104 ** string, 'r' is the final position of the match, and 'ptop'
2105 ** the index in the stack where some useful values were pushed.
2106 ** Returns the number of results pushed. (If the list produces no
2107 ** results, push the final position of the match.)
2109 int getcaptures (lua_State
*L
, const char *s
, const char *r
, int ptop
) {
2110 Capture
*capture
= (Capture
*)lua_touserdata(L
, caplistidx(ptop
));
2112 if (!isclosecap(capture
)) { /* is there any capture? */
2114 cs
.ocap
= cs
.cap
= capture
; cs
.L
= L
;
2115 cs
.s
= s
; cs
.valuecached
= 0; cs
.ptop
= ptop
;
2116 do { /* collect their values */
2117 n
+= pushcapture(&cs
);
2118 } while (!isclosecap(cs
.cap
));
2120 if (n
== 0) { /* no capture values? */
2121 lua_pushinteger(L
, r
- s
+ 1); /* return only end position */
2129 ** $Id: lptree.c,v 1.21 2015/09/28 17:01:25 roberto Exp $
2130 ** Copyright 2013, Lua.org & PUC-Rio (see 'lpeg.html' for license)
2133 /* #include <ctype.h> */
2134 /* #include <limits.h> */
2135 /* #include <string.h> */
2138 /* #include "lua.h" */
2139 /* #include "lauxlib.h" */
2141 /* #include "lptypes.h" */
2142 /* #include "lpcap.h" */
2143 /* #include "lpcode.h" */
2144 /* #include "lpprint.h" */
2145 /* #include "lptree.h" */
2148 /* number of siblings for each tree */
2149 const byte numsiblings
[] = {
2150 0, 0, 0, /* char, set, any */
2151 0, 0, /* true, false */
2153 2, 2, /* seq, choice */
2154 1, 1, /* not, and */
2155 0, 0, 2, 1, /* call, opencall, rule, grammar */
2157 1, 1 /* capture, runtime capture */
2161 static TTree
*newgrammar (lua_State
*L
, int arg
);
2165 ** returns a reasonable name for value at index 'idx' on the stack
2167 static const char *val2str (lua_State
*L
, int idx
) {
2168 const char *k
= lua_tostring(L
, idx
);
2170 return lua_pushfstring(L
, "%s", k
);
2172 return lua_pushfstring(L
, "(a %s)", luaL_typename(L
, idx
));
2177 ** Fix a TOpenCall into a TCall node, using table 'postable' to
2178 ** translate a key to its rule address in the tree. Raises an
2179 ** error if key does not exist.
2181 static void fixonecall (lua_State
*L
, int postable
, TTree
*g
, TTree
*t
) {
2183 lua_rawgeti(L
, -1, t
->key
); /* get rule's name */
2184 lua_gettable(L
, postable
); /* query name in position table */
2185 n
= lua_tonumber(L
, -1); /* get (absolute) position */
2186 lua_pop(L
, 1); /* remove position */
2187 if (n
== 0) { /* no position? */
2188 lua_rawgeti(L
, -1, t
->key
); /* get rule's name again */
2189 luaL_error(L
, "rule '%s' undefined in given grammar", val2str(L
, -1));
2192 t
->u
.ps
= n
- (t
- g
); /* position relative to node */
2193 assert(sib2(t
)->tag
== TRule
);
2194 sib2(t
)->key
= t
->key
;
2199 ** Transform left associative constructions into right
2200 ** associative ones, for sequence and choice; that is:
2201 ** (t11 + t12) + t2 => t11 + (t12 + t2)
2202 ** (t11 * t12) * t2 => t11 * (t12 * t2)
2203 ** (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
2205 static void correctassociativity (TTree
*tree
) {
2206 TTree
*t1
= sib1(tree
);
2207 assert(tree
->tag
== TChoice
|| tree
->tag
== TSeq
);
2208 while (t1
->tag
== tree
->tag
) {
2209 int n1size
= tree
->u
.ps
- 1; /* t1 == Op t11 t12 */
2210 int n11size
= t1
->u
.ps
- 1;
2211 int n12size
= n1size
- n11size
- 1;
2212 memmove(sib1(tree
), sib1(t1
), n11size
* sizeof(TTree
)); /* move t11 */
2213 tree
->u
.ps
= n11size
+ 1;
2214 sib2(tree
)->tag
= tree
->tag
;
2215 sib2(tree
)->u
.ps
= n12size
+ 1;
2221 ** Make final adjustments in a tree. Fix open calls in tree 't',
2222 ** making them refer to their respective rules or raising appropriate
2223 ** errors (if not inside a grammar). Correct associativity of associative
2224 ** constructions (making them right associative). Assume that tree's
2225 ** ktable is at the top of the stack (for error messages).
2227 static void finalfix (lua_State
*L
, int postable
, TTree
*g
, TTree
*t
) {
2230 case TGrammar
: /* subgrammars were already fixed */
2233 if (g
!= NULL
) /* inside a grammar? */
2234 fixonecall(L
, postable
, g
, t
);
2235 else { /* open call outside grammar */
2236 lua_rawgeti(L
, -1, t
->key
);
2237 luaL_error(L
, "rule '%s' used outside a grammar", val2str(L
, -1));
2241 case TSeq
: case TChoice
:
2242 correctassociativity(t
);
2245 switch (numsiblings
[t
->tag
]) {
2246 case 1: /* finalfix(L, postable, g, sib1(t)); */
2247 t
= sib1(t
); goto tailcall
;
2249 finalfix(L
, postable
, g
, sib1(t
));
2250 t
= sib2(t
); goto tailcall
; /* finalfix(L, postable, g, sib2(t)); */
2251 default: assert(numsiblings
[t
->tag
] == 0); break;
2258 ** {===================================================================
2259 ** KTable manipulation
2261 ** - The ktable of a pattern 'p' can be shared by other patterns that
2262 ** contain 'p' and no other constants. Because of this sharing, we
2263 ** should not add elements to a 'ktable' unless it was freshly created
2264 ** for the new pattern.
2266 ** - The maximum index in a ktable is USHRT_MAX, because trees and
2267 ** patterns use unsigned shorts to store those indices.
2268 ** ====================================================================
2272 ** Create a new 'ktable' to the pattern at the top of the stack.
2274 static void newktable (lua_State
*L
, int n
) {
2275 lua_createtable(L
, n
, 0); /* create a fresh table */
2276 lua_setuservalue(L
, -2); /* set it as 'ktable' for pattern */
2281 ** Add element 'idx' to 'ktable' of pattern at the top of the stack;
2282 ** Return index of new element.
2283 ** If new element is nil, does not add it to table (as it would be
2284 ** useless) and returns 0, as ktable[0] is always nil.
2286 static int addtoktable (lua_State
*L
, int idx
) {
2287 if (lua_isnil(L
, idx
)) /* nil value? */
2291 lua_getuservalue(L
, -1); /* get ktable from pattern */
2292 n
= lua_rawlen(L
, -1);
2294 luaL_error(L
, "too many Lua values in pattern");
2295 lua_pushvalue(L
, idx
); /* element to be added */
2296 lua_rawseti(L
, -2, ++n
);
2297 lua_pop(L
, 1); /* remove 'ktable' */
2304 ** Return the number of elements in the ktable at 'idx'.
2305 ** In Lua 5.2/5.3, default "environment" for patterns is nil, not
2306 ** a table. Treat it as an empty table. In Lua 5.1, assumes that
2307 ** the environment has no numeric indices (len == 0)
2309 static int ktablelen (lua_State
*L
, int idx
) {
2310 if (!lua_istable(L
, idx
)) return 0;
2311 else return lua_rawlen(L
, idx
);
2316 ** Concatentate the contents of table 'idx1' into table 'idx2'.
2317 ** (Assume that both indices are negative.)
2318 ** Return the original length of table 'idx2' (or 0, if no
2319 ** element was added, as there is no need to correct any index).
2321 static int concattable (lua_State
*L
, int idx1
, int idx2
) {
2323 int n1
= ktablelen(L
, idx1
);
2324 int n2
= ktablelen(L
, idx2
);
2325 if (n1
+ n2
> USHRT_MAX
)
2326 luaL_error(L
, "too many Lua values in pattern");
2327 if (n1
== 0) return 0; /* nothing to correct */
2328 for (i
= 1; i
<= n1
; i
++) {
2329 lua_rawgeti(L
, idx1
, i
);
2330 lua_rawseti(L
, idx2
- 1, n2
+ i
); /* correct 'idx2' */
2337 ** When joining 'ktables', constants from one of the subpatterns must
2338 ** be renumbered; 'correctkeys' corrects their indices (adding 'n'
2341 static void correctkeys (TTree
*tree
, int n
) {
2342 if (n
== 0) return; /* no correction? */
2344 switch (tree
->tag
) {
2345 case TOpenCall
: case TCall
: case TRunTime
: case TRule
: {
2351 if (tree
->key
> 0 && tree
->cap
!= Carg
&& tree
->cap
!= Cnum
)
2357 switch (numsiblings
[tree
->tag
]) {
2358 case 1: /* correctkeys(sib1(tree), n); */
2359 tree
= sib1(tree
); goto tailcall
;
2361 correctkeys(sib1(tree
), n
);
2362 tree
= sib2(tree
); goto tailcall
; /* correctkeys(sib2(tree), n); */
2363 default: assert(numsiblings
[tree
->tag
] == 0); break;
2369 ** Join the ktables from p1 and p2 the ktable for the new pattern at the
2370 ** top of the stack, reusing them when possible.
2372 static void joinktables (lua_State
*L
, int p1
, TTree
*t2
, int p2
) {
2374 lua_getuservalue(L
, p1
); /* get ktables */
2375 lua_getuservalue(L
, p2
);
2376 n1
= ktablelen(L
, -2);
2377 n2
= ktablelen(L
, -1);
2378 if (n1
== 0 && n2
== 0) /* are both tables empty? */
2379 lua_pop(L
, 2); /* nothing to be done; pop tables */
2380 else if (n2
== 0 || lp_equal(L
, -2, -1)) { /* 2nd table empty or equal? */
2381 lua_pop(L
, 1); /* pop 2nd table */
2382 lua_setuservalue(L
, -2); /* set 1st ktable into new pattern */
2384 else if (n1
== 0) { /* first table is empty? */
2385 lua_setuservalue(L
, -3); /* set 2nd table into new pattern */
2386 lua_pop(L
, 1); /* pop 1st table */
2389 lua_createtable(L
, n1
+ n2
, 0); /* create ktable for new pattern */
2390 /* stack: new p; ktable p1; ktable p2; new ktable */
2391 concattable(L
, -3, -1); /* from p1 into new ktable */
2392 concattable(L
, -2, -1); /* from p2 into new ktable */
2393 lua_setuservalue(L
, -4); /* new ktable becomes 'p' environment */
2394 lua_pop(L
, 2); /* pop other ktables */
2395 correctkeys(t2
, n1
); /* correction for indices from p2 */
2401 ** copy 'ktable' of element 'idx' to new tree (on top of stack)
2403 static void copyktable (lua_State
*L
, int idx
) {
2404 lua_getuservalue(L
, idx
);
2405 lua_setuservalue(L
, -2);
2410 ** merge 'ktable' from 'stree' at stack index 'idx' into 'ktable'
2411 ** from tree at the top of the stack, and correct corresponding
2414 static void mergektable (lua_State
*L
, int idx
, TTree
*stree
) {
2416 lua_getuservalue(L
, -1); /* get ktables */
2417 lua_getuservalue(L
, idx
);
2418 n
= concattable(L
, -1, -2);
2419 lua_pop(L
, 2); /* remove both ktables */
2420 correctkeys(stree
, n
);
2425 ** Create a new 'ktable' to the pattern at the top of the stack, adding
2426 ** all elements from pattern 'p' (if not 0) plus element 'idx' to it.
2427 ** Return index of new element.
2429 static int addtonewktable (lua_State
*L
, int p
, int idx
) {
2432 mergektable(L
, p
, NULL
);
2433 return addtoktable(L
, idx
);
2436 /* }====================================================== */
2440 ** {======================================================
2442 ** =======================================================
2446 ** In 5.2, could use 'luaL_testudata'...
2448 static int testpattern (lua_State
*L
, int idx
) {
2449 if (lua_touserdata(L
, idx
)) { /* value is a userdata? */
2450 if (lua_getmetatable(L
, idx
)) { /* does it have a metatable? */
2451 luaL_getmetatable(L
, PATTERN_T
);
2452 if (lua_rawequal(L
, -1, -2)) { /* does it have the correct mt? */
2453 lua_pop(L
, 2); /* remove both metatables */
2462 static Pattern
*getpattern (lua_State
*L
, int idx
) {
2463 return (Pattern
*)luaL_checkudata(L
, idx
, PATTERN_T
);
2467 static int getsize (lua_State
*L
, int idx
) {
2468 return (lua_rawlen(L
, idx
) - sizeof(Pattern
)) / sizeof(TTree
) + 1;
2472 static TTree
*gettree (lua_State
*L
, int idx
, int *len
) {
2473 Pattern
*p
= getpattern(L
, idx
);
2475 *len
= getsize(L
, idx
);
2481 ** create a pattern. Set its uservalue (the 'ktable') equal to its
2482 ** metatable. (It could be any empty sequence; the metatable is at
2483 ** hand here, so we use it.)
2485 static TTree
*newtree (lua_State
*L
, int len
) {
2486 size_t size
= (len
- 1) * sizeof(TTree
) + sizeof(Pattern
);
2487 Pattern
*p
= (Pattern
*)lua_newuserdata(L
, size
);
2488 luaL_getmetatable(L
, PATTERN_T
);
2489 lua_pushvalue(L
, -1);
2490 lua_setuservalue(L
, -3);
2491 lua_setmetatable(L
, -2);
2492 p
->code
= NULL
; p
->codesize
= 0;
2497 static TTree
*newleaf (lua_State
*L
, int tag
) {
2498 TTree
*tree
= newtree(L
, 1);
2504 static TTree
*newcharset (lua_State
*L
) {
2505 TTree
*tree
= newtree(L
, bytes2slots(CHARSETSIZE
) + 1);
2507 loopset(i
, treebuffer(tree
)[i
] = 0);
2513 ** add to tree a sequence where first sibling is 'sib' (with size
2514 ** 'sibsize'); returns position for second sibling
2516 static TTree
*seqaux (TTree
*tree
, TTree
*sib
, int sibsize
) {
2517 tree
->tag
= TSeq
; tree
->u
.ps
= sibsize
+ 1;
2518 memcpy(sib1(tree
), sib
, sibsize
* sizeof(TTree
));
2524 ** Build a sequence of 'n' nodes, each with tag 'tag' and 'u.n' got
2525 ** from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
2526 ** must build a sequence of sequence of sequence...)
2528 static void fillseq (TTree
*tree
, int tag
, int n
, const char *s
) {
2530 for (i
= 0; i
< n
- 1; i
++) { /* initial n-1 copies of Seq tag; Seq ... */
2531 tree
->tag
= TSeq
; tree
->u
.ps
= 2;
2532 sib1(tree
)->tag
= tag
;
2533 sib1(tree
)->u
.n
= s
? (byte
)s
[i
] : 0;
2536 tree
->tag
= tag
; /* last one does not need TSeq */
2537 tree
->u
.n
= s
? (byte
)s
[i
] : 0;
2542 ** Numbers as patterns:
2543 ** 0 == true (always match); n == TAny repeated 'n' times;
2544 ** -n == not (TAny repeated 'n' times)
2546 static TTree
*numtree (lua_State
*L
, int n
) {
2548 return newleaf(L
, TTrue
);
2552 tree
= nd
= newtree(L
, 2 * n
- 1);
2553 else { /* negative: code it as !(-n) */
2555 tree
= newtree(L
, 2 * n
);
2559 fillseq(nd
, TAny
, n
, NULL
); /* sequence of 'n' any's */
2566 ** Convert value at index 'idx' to a pattern
2568 static TTree
*getpatt (lua_State
*L
, int idx
, int *len
) {
2570 switch (lua_type(L
, idx
)) {
2573 const char *s
= lua_tolstring(L
, idx
, &slen
); /* get string */
2574 if (slen
== 0) /* empty? */
2575 tree
= newleaf(L
, TTrue
); /* always match */
2577 tree
= newtree(L
, 2 * (slen
- 1) + 1);
2578 fillseq(tree
, TChar
, slen
, s
); /* sequence of 'slen' chars */
2583 int n
= lua_tointeger(L
, idx
);
2584 tree
= numtree(L
, n
);
2587 case LUA_TBOOLEAN
: {
2588 tree
= (lua_toboolean(L
, idx
) ? newleaf(L
, TTrue
) : newleaf(L
, TFalse
));
2592 tree
= newgrammar(L
, idx
);
2595 case LUA_TFUNCTION
: {
2596 tree
= newtree(L
, 2);
2597 tree
->tag
= TRunTime
;
2598 tree
->key
= addtonewktable(L
, 0, idx
);
2599 sib1(tree
)->tag
= TTrue
;
2603 return gettree(L
, idx
, len
);
2606 lua_replace(L
, idx
); /* put new tree into 'idx' slot */
2608 *len
= getsize(L
, idx
);
2614 ** create a new tree, whith a new root and one sibling.
2615 ** Sibling must be on the Lua stack, at index 1.
2617 static TTree
*newroot1sib (lua_State
*L
, int tag
) {
2619 TTree
*tree1
= getpatt(L
, 1, &s1
);
2620 TTree
*tree
= newtree(L
, 1 + s1
); /* create new tree */
2622 memcpy(sib1(tree
), tree1
, s1
* sizeof(TTree
));
2629 ** create a new tree, whith a new root and 2 siblings.
2630 ** Siblings must be on the Lua stack, first one at index 1.
2632 static TTree
*newroot2sib (lua_State
*L
, int tag
) {
2634 TTree
*tree1
= getpatt(L
, 1, &s1
);
2635 TTree
*tree2
= getpatt(L
, 2, &s2
);
2636 TTree
*tree
= newtree(L
, 1 + s1
+ s2
); /* create new tree */
2638 tree
->u
.ps
= 1 + s1
;
2639 memcpy(sib1(tree
), tree1
, s1
* sizeof(TTree
));
2640 memcpy(sib2(tree
), tree2
, s2
* sizeof(TTree
));
2641 joinktables(L
, 1, sib2(tree
), 2);
2646 static int lp_P (lua_State
*L
) {
2647 luaL_checkany(L
, 1);
2648 getpatt(L
, 1, NULL
);
2655 ** sequence operator; optimizations:
2656 ** false x => false, x true => x, true x => x
2657 ** (cannot do x . false => false because x may have runtime captures)
2659 static int lp_seq (lua_State
*L
) {
2660 TTree
*tree1
= getpatt(L
, 1, NULL
);
2661 TTree
*tree2
= getpatt(L
, 2, NULL
);
2662 if (tree1
->tag
== TFalse
|| tree2
->tag
== TTrue
)
2663 lua_pushvalue(L
, 1); /* false . x == false, x . true = x */
2664 else if (tree1
->tag
== TTrue
)
2665 lua_pushvalue(L
, 2); /* true . x = x */
2667 newroot2sib(L
, TSeq
);
2673 ** choice operator; optimizations:
2674 ** charset / charset => charset
2675 ** true / x => true, x / false => x, false / x => x
2676 ** (x / true is not equivalent to true)
2678 static int lp_choice (lua_State
*L
) {
2680 TTree
*t1
= getpatt(L
, 1, NULL
);
2681 TTree
*t2
= getpatt(L
, 2, NULL
);
2682 if (tocharset(t1
, &st1
) && tocharset(t2
, &st2
)) {
2683 TTree
*t
= newcharset(L
);
2684 loopset(i
, treebuffer(t
)[i
] = st1
.cs
[i
] | st2
.cs
[i
]);
2686 else if (nofail(t1
) || t2
->tag
== TFalse
)
2687 lua_pushvalue(L
, 1); /* true / x => true, x / false => x */
2688 else if (t1
->tag
== TFalse
)
2689 lua_pushvalue(L
, 2); /* false / x => x */
2691 newroot2sib(L
, TChoice
);
2699 static int lp_star (lua_State
*L
) {
2701 int n
= (int)luaL_checkinteger(L
, 2);
2702 TTree
*tree1
= getpatt(L
, 1, &size1
);
2703 if (n
>= 0) { /* seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) */
2704 TTree
*tree
= newtree(L
, (n
+ 1) * (size1
+ 1));
2705 if (nullable(tree1
))
2706 luaL_error(L
, "loop body may accept empty string");
2707 while (n
--) /* repeat 'n' times */
2708 tree
= seqaux(tree
, tree1
, size1
);
2710 memcpy(sib1(tree
), tree1
, size1
* sizeof(TTree
));
2712 else { /* choice (seq tree1 ... choice tree1 true ...) true */
2715 /* size = (choice + seq + tree1 + true) * n, but the last has no seq */
2716 tree
= newtree(L
, n
* (size1
+ 3) - 1);
2717 for (; n
> 1; n
--) { /* repeat (n - 1) times */
2718 tree
->tag
= TChoice
; tree
->u
.ps
= n
* (size1
+ 3) - 2;
2719 sib2(tree
)->tag
= TTrue
;
2721 tree
= seqaux(tree
, tree1
, size1
);
2723 tree
->tag
= TChoice
; tree
->u
.ps
= size1
+ 1;
2724 sib2(tree
)->tag
= TTrue
;
2725 memcpy(sib1(tree
), tree1
, size1
* sizeof(TTree
));
2735 static int lp_and (lua_State
*L
) {
2736 newroot1sib(L
, TAnd
);
2744 static int lp_not (lua_State
*L
) {
2745 newroot1sib(L
, TNot
);
2751 ** [t1 - t2] == Seq (Not t2) t1
2752 ** If t1 and t2 are charsets, make their difference.
2754 static int lp_sub (lua_State
*L
) {
2757 TTree
*t1
= getpatt(L
, 1, &s1
);
2758 TTree
*t2
= getpatt(L
, 2, &s2
);
2759 if (tocharset(t1
, &st1
) && tocharset(t2
, &st2
)) {
2760 TTree
*t
= newcharset(L
);
2761 loopset(i
, treebuffer(t
)[i
] = st1
.cs
[i
] & ~st2
.cs
[i
]);
2764 TTree
*tree
= newtree(L
, 2 + s1
+ s2
);
2765 tree
->tag
= TSeq
; /* sequence of... */
2766 tree
->u
.ps
= 2 + s2
;
2767 sib1(tree
)->tag
= TNot
; /* ...not... */
2768 memcpy(sib1(sib1(tree
)), t2
, s2
* sizeof(TTree
)); /* ...t2 */
2769 memcpy(sib2(tree
), t1
, s1
* sizeof(TTree
)); /* ... and t1 */
2770 joinktables(L
, 1, sib1(tree
), 2);
2776 static int lp_set (lua_State
*L
) {
2778 const char *s
= luaL_checklstring(L
, 1, &l
);
2779 TTree
*tree
= newcharset(L
);
2781 setchar(treebuffer(tree
), (byte
)(*s
));
2788 static int lp_range (lua_State
*L
) {
2790 int top
= lua_gettop(L
);
2791 TTree
*tree
= newcharset(L
);
2792 for (arg
= 1; arg
<= top
; arg
++) {
2795 const char *r
= luaL_checklstring(L
, arg
, &l
);
2796 luaL_argcheck(L
, l
== 2, arg
, "range must have two characters");
2797 for (c
= (byte
)r
[0]; c
<= (byte
)r
[1]; c
++)
2798 setchar(treebuffer(tree
), c
);
2805 ** Look-behind predicate
2807 static int lp_behind (lua_State
*L
) {
2809 TTree
*tree1
= getpatt(L
, 1, NULL
);
2810 int n
= fixedlen(tree1
);
2811 luaL_argcheck(L
, n
>= 0, 1, "pattern may not have fixed length");
2812 luaL_argcheck(L
, !hascaptures(tree1
), 1, "pattern have captures");
2813 luaL_argcheck(L
, n
<= MAXBEHIND
, 1, "pattern too long to look behind");
2814 tree
= newroot1sib(L
, TBehind
);
2821 ** Create a non-terminal
2823 static int lp_V (lua_State
*L
) {
2824 TTree
*tree
= newleaf(L
, TOpenCall
);
2825 luaL_argcheck(L
, !lua_isnoneornil(L
, 1), 1, "non-nil value expected");
2826 tree
->key
= addtonewktable(L
, 0, 1);
2832 ** Create a tree for a non-empty capture, with a body and
2833 ** optionally with an associated Lua value (at index 'labelidx' in the
2836 static int capture_aux (lua_State
*L
, int cap
, int labelidx
) {
2837 TTree
*tree
= newroot1sib(L
, TCapture
);
2839 tree
->key
= (labelidx
== 0) ? 0 : addtonewktable(L
, 1, labelidx
);
2845 ** Fill a tree with an empty capture, using an empty (TTrue) sibling.
2847 static TTree
*auxemptycap (TTree
*tree
, int cap
) {
2848 tree
->tag
= TCapture
;
2850 sib1(tree
)->tag
= TTrue
;
2856 ** Create a tree for an empty capture
2858 static TTree
*newemptycap (lua_State
*L
, int cap
) {
2859 return auxemptycap(newtree(L
, 2), cap
);
2864 ** Create a tree for an empty capture with an associated Lua value
2866 static TTree
*newemptycapkey (lua_State
*L
, int cap
, int idx
) {
2867 TTree
*tree
= auxemptycap(newtree(L
, 2), cap
);
2868 tree
->key
= addtonewktable(L
, 0, idx
);
2874 ** Captures with syntax p / v
2875 ** (function capture, query capture, string capture, or number capture)
2877 static int lp_divcapture (lua_State
*L
) {
2878 switch (lua_type(L
, 2)) {
2879 case LUA_TFUNCTION
: return capture_aux(L
, Cfunction
, 2);
2880 case LUA_TTABLE
: return capture_aux(L
, Cquery
, 2);
2881 case LUA_TSTRING
: return capture_aux(L
, Cstring
, 2);
2883 int n
= lua_tointeger(L
, 2);
2884 TTree
*tree
= newroot1sib(L
, TCapture
);
2885 luaL_argcheck(L
, 0 <= n
&& n
<= SHRT_MAX
, 1, "invalid number");
2890 default: return luaL_argerror(L
, 2, "invalid replacement value");
2895 static int lp_substcapture (lua_State
*L
) {
2896 return capture_aux(L
, Csubst
, 0);
2900 static int lp_tablecapture (lua_State
*L
) {
2901 return capture_aux(L
, Ctable
, 0);
2905 static int lp_groupcapture (lua_State
*L
) {
2906 if (lua_isnoneornil(L
, 2))
2907 return capture_aux(L
, Cgroup
, 0);
2909 return capture_aux(L
, Cgroup
, 2);
2913 static int lp_foldcapture (lua_State
*L
) {
2914 luaL_checktype(L
, 2, LUA_TFUNCTION
);
2915 return capture_aux(L
, Cfold
, 2);
2919 static int lp_simplecapture (lua_State
*L
) {
2920 return capture_aux(L
, Csimple
, 0);
2924 static int lp_poscapture (lua_State
*L
) {
2925 newemptycap(L
, Cposition
);
2930 static int lp_argcapture (lua_State
*L
) {
2931 int n
= (int)luaL_checkinteger(L
, 1);
2932 TTree
*tree
= newemptycap(L
, Carg
);
2934 luaL_argcheck(L
, 0 < n
&& n
<= SHRT_MAX
, 1, "invalid argument index");
2939 static int lp_backref (lua_State
*L
) {
2940 luaL_checkany(L
, 1);
2941 newemptycapkey(L
, Cbackref
, 1);
2949 static int lp_constcapture (lua_State
*L
) {
2951 int n
= lua_gettop(L
); /* number of values */
2952 if (n
== 0) /* no values? */
2953 newleaf(L
, TTrue
); /* no capture */
2955 newemptycapkey(L
, Cconst
, 1); /* single constant capture */
2956 else { /* create a group capture with all values */
2957 TTree
*tree
= newtree(L
, 1 + 3 * (n
- 1) + 2);
2958 newktable(L
, n
); /* create a 'ktable' for new tree */
2959 tree
->tag
= TCapture
;
2963 for (i
= 1; i
<= n
- 1; i
++) {
2965 tree
->u
.ps
= 3; /* skip TCapture and its sibling */
2966 auxemptycap(sib1(tree
), Cconst
);
2967 sib1(tree
)->key
= addtoktable(L
, i
);
2970 auxemptycap(tree
, Cconst
);
2971 tree
->key
= addtoktable(L
, i
);
2977 static int lp_matchtime (lua_State
*L
) {
2979 luaL_checktype(L
, 2, LUA_TFUNCTION
);
2980 tree
= newroot1sib(L
, TRunTime
);
2981 tree
->key
= addtonewktable(L
, 1, 2);
2985 /* }====================================================== */
2989 ** {======================================================
2990 ** Grammar - Tree generation
2991 ** =======================================================
2995 ** push on the stack the index and the pattern for the
2996 ** initial rule of grammar at index 'arg' in the stack;
2997 ** also add that index into position table.
2999 static void getfirstrule (lua_State
*L
, int arg
, int postab
) {
3000 lua_rawgeti(L
, arg
, 1); /* access first element */
3001 if (lua_isstring(L
, -1)) { /* is it the name of initial rule? */
3002 lua_pushvalue(L
, -1); /* duplicate it to use as key */
3003 lua_gettable(L
, arg
); /* get associated rule */
3006 lua_pushinteger(L
, 1); /* key for initial rule */
3007 lua_insert(L
, -2); /* put it before rule */
3009 if (!testpattern(L
, -1)) { /* initial rule not a pattern? */
3010 if (lua_isnil(L
, -1))
3011 luaL_error(L
, "grammar has no initial rule");
3013 luaL_error(L
, "initial rule '%s' is not a pattern", lua_tostring(L
, -2));
3015 lua_pushvalue(L
, -2); /* push key */
3016 lua_pushinteger(L
, 1); /* push rule position (after TGrammar) */
3017 lua_settable(L
, postab
); /* insert pair at position table */
3021 ** traverse grammar at index 'arg', pushing all its keys and patterns
3022 ** into the stack. Create a new table (before all pairs key-pattern) to
3023 ** collect all keys and their associated positions in the final tree
3024 ** (the "position table").
3025 ** Return the number of rules and (in 'totalsize') the total size
3026 ** for the new tree.
3028 static int collectrules (lua_State
*L
, int arg
, int *totalsize
) {
3029 int n
= 1; /* to count number of rules */
3030 int postab
= lua_gettop(L
) + 1; /* index of position table */
3031 int size
; /* accumulator for total size */
3032 lua_newtable(L
); /* create position table */
3033 getfirstrule(L
, arg
, postab
);
3034 size
= 2 + getsize(L
, postab
+ 2); /* TGrammar + TRule + rule */
3035 lua_pushnil(L
); /* prepare to traverse grammar table */
3036 while (lua_next(L
, arg
) != 0) {
3037 if (lua_tonumber(L
, -2) == 1 ||
3038 lp_equal(L
, -2, postab
+ 1)) { /* initial rule? */
3039 lua_pop(L
, 1); /* remove value (keep key for lua_next) */
3042 if (!testpattern(L
, -1)) /* value is not a pattern? */
3043 luaL_error(L
, "rule '%s' is not a pattern", val2str(L
, -2));
3044 luaL_checkstack(L
, LUA_MINSTACK
, "grammar has too many rules");
3045 lua_pushvalue(L
, -2); /* push key (to insert into position table) */
3046 lua_pushinteger(L
, size
);
3047 lua_settable(L
, postab
);
3048 size
+= 1 + getsize(L
, -1); /* update size */
3049 lua_pushvalue(L
, -2); /* push key (for next lua_next) */
3052 *totalsize
= size
+ 1; /* TTrue to finish list of rules */
3057 static void buildgrammar (lua_State
*L
, TTree
*grammar
, int frule
, int n
) {
3059 TTree
*nd
= sib1(grammar
); /* auxiliary pointer to traverse the tree */
3060 for (i
= 0; i
< n
; i
++) { /* add each rule into new tree */
3061 int ridx
= frule
+ 2*i
+ 1; /* index of i-th rule */
3063 TTree
*rn
= gettree(L
, ridx
, &rulesize
);
3066 nd
->cap
= i
; /* rule number */
3067 nd
->u
.ps
= rulesize
+ 1; /* point to next rule */
3068 memcpy(sib1(nd
), rn
, rulesize
* sizeof(TTree
)); /* copy rule */
3069 mergektable(L
, ridx
, sib1(nd
)); /* merge its ktable into new one */
3070 nd
= sib2(nd
); /* move to next rule */
3072 nd
->tag
= TTrue
; /* finish list of rules */
3077 ** Check whether a tree has potential infinite loops
3079 static int checkloops (TTree
*tree
) {
3081 if (tree
->tag
== TRep
&& nullable(sib1(tree
)))
3083 else if (tree
->tag
== TGrammar
)
3084 return 0; /* sub-grammars already checked */
3086 switch (numsiblings
[tree
->tag
]) {
3087 case 1: /* return checkloops(sib1(tree)); */
3088 tree
= sib1(tree
); goto tailcall
;
3090 if (checkloops(sib1(tree
))) return 1;
3091 /* else return checkloops(sib2(tree)); */
3092 tree
= sib2(tree
); goto tailcall
;
3093 default: assert(numsiblings
[tree
->tag
] == 0); return 0;
3099 static int verifyerror (lua_State
*L
, int *passed
, int npassed
) {
3101 for (i
= npassed
- 1; i
>= 0; i
--) { /* search for a repetition */
3102 for (j
= i
- 1; j
>= 0; j
--) {
3103 if (passed
[i
] == passed
[j
]) {
3104 lua_rawgeti(L
, -1, passed
[i
]); /* get rule's key */
3105 return luaL_error(L
, "rule '%s' may be left recursive", val2str(L
, -1));
3109 return luaL_error(L
, "too many left calls in grammar");
3114 ** Check whether a rule can be left recursive; raise an error in that
3115 ** case; otherwise return 1 iff pattern is nullable.
3116 ** The return value is used to check sequences, where the second pattern
3117 ** is only relevant if the first is nullable.
3118 ** Parameter 'nb' works as an accumulator, to allow tail calls in
3119 ** choices. ('nb' true makes function returns true.)
3120 ** Assume ktable at the top of the stack.
3122 static int verifyrule (lua_State
*L
, TTree
*tree
, int *passed
, int npassed
,
3125 switch (tree
->tag
) {
3126 case TChar
: case TSet
: case TAny
:
3128 return nb
; /* cannot pass from here */
3130 case TBehind
: /* look-behind cannot have calls */
3132 case TNot
: case TAnd
: case TRep
:
3133 /* return verifyrule(L, sib1(tree), passed, npassed, 1); */
3134 tree
= sib1(tree
); nb
= 1; goto tailcall
;
3135 case TCapture
: case TRunTime
:
3136 /* return verifyrule(L, sib1(tree), passed, npassed, nb); */
3137 tree
= sib1(tree
); goto tailcall
;
3139 /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
3140 tree
= sib2(tree
); goto tailcall
;
3141 case TSeq
: /* only check 2nd child if first is nb */
3142 if (!verifyrule(L
, sib1(tree
), passed
, npassed
, 0))
3144 /* else return verifyrule(L, sib2(tree), passed, npassed, nb); */
3145 tree
= sib2(tree
); goto tailcall
;
3146 case TChoice
: /* must check both children */
3147 nb
= verifyrule(L
, sib1(tree
), passed
, npassed
, nb
);
3148 /* return verifyrule(L, sib2(tree), passed, npassed, nb); */
3149 tree
= sib2(tree
); goto tailcall
;
3151 if (npassed
>= MAXRULES
)
3152 return verifyerror(L
, passed
, npassed
);
3154 passed
[npassed
++] = tree
->key
;
3155 /* return verifyrule(L, sib1(tree), passed, npassed); */
3156 tree
= sib1(tree
); goto tailcall
;
3159 return nullable(tree
); /* sub-grammar cannot be left recursive */
3160 default: assert(0); return 0;
3165 static void verifygrammar (lua_State
*L
, TTree
*grammar
) {
3166 int passed
[MAXRULES
];
3168 /* check left-recursive rules */
3169 for (rule
= sib1(grammar
); rule
->tag
== TRule
; rule
= sib2(rule
)) {
3170 if (rule
->key
== 0) continue; /* unused rule */
3171 verifyrule(L
, sib1(rule
), passed
, 0, 0);
3173 assert(rule
->tag
== TTrue
);
3174 /* check infinite loops inside rules */
3175 for (rule
= sib1(grammar
); rule
->tag
== TRule
; rule
= sib2(rule
)) {
3176 if (rule
->key
== 0) continue; /* unused rule */
3177 if (checkloops(sib1(rule
))) {
3178 lua_rawgeti(L
, -1, rule
->key
); /* get rule's key */
3179 luaL_error(L
, "empty loop in rule '%s'", val2str(L
, -1));
3182 assert(rule
->tag
== TTrue
);
3187 ** Give a name for the initial rule if it is not referenced
3189 static void initialrulename (lua_State
*L
, TTree
*grammar
, int frule
) {
3190 if (sib1(grammar
)->key
== 0) { /* initial rule is not referenced? */
3191 int n
= lua_rawlen(L
, -1) + 1; /* index for name */
3192 lua_pushvalue(L
, frule
); /* rule's name */
3193 lua_rawseti(L
, -2, n
); /* ktable was on the top of the stack */
3194 sib1(grammar
)->key
= n
;
3199 static TTree
*newgrammar (lua_State
*L
, int arg
) {
3201 int frule
= lua_gettop(L
) + 2; /* position of first rule's key */
3202 int n
= collectrules(L
, arg
, &treesize
);
3203 TTree
*g
= newtree(L
, treesize
);
3204 luaL_argcheck(L
, n
<= MAXRULES
, arg
, "grammar has too many rules");
3205 g
->tag
= TGrammar
; g
->u
.n
= n
;
3206 lua_newtable(L
); /* create 'ktable' */
3207 lua_setuservalue(L
, -2);
3208 buildgrammar(L
, g
, frule
, n
);
3209 lua_getuservalue(L
, -1); /* get 'ktable' for new tree */
3210 finalfix(L
, frule
- 1, g
, sib1(g
));
3211 initialrulename(L
, g
, frule
);
3212 verifygrammar(L
, g
);
3213 lua_pop(L
, 1); /* remove 'ktable' */
3214 lua_insert(L
, -(n
* 2 + 2)); /* move new table to proper position */
3215 lua_pop(L
, n
* 2 + 1); /* remove position table + rule pairs */
3216 return g
; /* new table at the top of the stack */
3219 /* }====================================================== */
3222 static Instruction
*prepcompile (lua_State
*L
, Pattern
*p
, int idx
) {
3223 lua_getuservalue(L
, idx
); /* push 'ktable' (may be used by 'finalfix') */
3224 finalfix(L
, 0, NULL
, p
->tree
);
3225 lua_pop(L
, 1); /* remove 'ktable' */
3226 return compile(L
, p
);
3230 static int lp_printtree (lua_State
*L
) {
3231 TTree
*tree
= getpatt(L
, 1, NULL
);
3232 int c
= lua_toboolean(L
, 2);
3234 lua_getuservalue(L
, 1); /* push 'ktable' (may be used by 'finalfix') */
3235 finalfix(L
, 0, NULL
, tree
);
3236 lua_pop(L
, 1); /* remove 'ktable' */
3244 static int lp_printcode (lua_State
*L
) {
3245 Pattern
*p
= getpattern(L
, 1);
3247 if (p
->code
== NULL
) /* not compiled yet? */
3248 prepcompile(L
, p
, 1);
3249 printpatt(p
->code
, p
->codesize
);
3255 ** Get the initial position for the match, interpreting negative
3256 ** values from the end of the subject
3258 static size_t initposition (lua_State
*L
, size_t len
) {
3259 lua_Integer ii
= luaL_optinteger(L
, 3, 1);
3260 if (ii
> 0) { /* positive index? */
3261 if ((size_t)ii
<= len
) /* inside the string? */
3262 return (size_t)ii
- 1; /* return it (corrected to 0-base) */
3263 else return len
; /* crop at the end */
3265 else { /* negative index */
3266 if ((size_t)(-ii
) <= len
) /* inside the string? */
3267 return len
- ((size_t)(-ii
)); /* return position from the end */
3268 else return 0; /* crop at the beginning */
3274 ** Main match function
3276 static int lp_match (lua_State
*L
) {
3277 Capture capture
[INITCAPSIZE
];
3280 Pattern
*p
= (getpatt(L
, 1, NULL
), getpattern(L
, 1));
3281 Instruction
*code
= (p
->code
!= NULL
) ? p
->code
: prepcompile(L
, p
, 1);
3282 const char *s
= luaL_checklstring(L
, SUBJIDX
, &l
);
3283 size_t i
= initposition(L
, l
);
3284 int ptop
= lua_gettop(L
);
3285 lua_pushnil(L
); /* initialize subscache */
3286 lua_pushlightuserdata(L
, capture
); /* initialize caplistidx */
3287 lua_getuservalue(L
, 1); /* initialize penvidx */
3288 r
= match(L
, s
, s
+ i
, s
+ l
, code
, capture
, ptop
);
3293 return getcaptures(L
, s
, r
, ptop
);
3299 ** {======================================================
3300 ** Library creation and functions not related to matching
3301 ** =======================================================
3304 /* maximum limit for stack size */
3305 #define MAXLIM (INT_MAX / 100)
3307 static int lp_setmax (lua_State
*L
) {
3308 lua_Integer lim
= luaL_checkinteger(L
, 1);
3309 luaL_argcheck(L
, 0 < lim
&& lim
<= MAXLIM
, 1, "out of range");
3311 lua_setfield(L
, LUA_REGISTRYINDEX
, MAXSTACKIDX
);
3316 static int lp_version (lua_State
*L
) {
3317 lua_pushstring(L
, VERSION
);
3322 static int lp_type (lua_State
*L
) {
3323 if (testpattern(L
, 1))
3324 lua_pushliteral(L
, "pattern");
3331 int lp_gc (lua_State
*L
) {
3332 Pattern
*p
= getpattern(L
, 1);
3333 realloccode(L
, p
, 0); /* delete code block */
3338 static void createcat (lua_State
*L
, const char *catname
, int (catf
) (int)) {
3339 TTree
*t
= newcharset(L
);
3341 for (i
= 0; i
<= UCHAR_MAX
; i
++)
3342 if (catf(i
)) setchar(treebuffer(t
), i
);
3343 lua_setfield(L
, -2, catname
);
3347 static int lp_locale (lua_State
*L
) {
3348 if (lua_isnoneornil(L
, 1)) {
3350 lua_createtable(L
, 0, 12);
3353 luaL_checktype(L
, 1, LUA_TTABLE
);
3356 createcat(L
, "alnum", isalnum
);
3357 createcat(L
, "alpha", isalpha
);
3358 createcat(L
, "cntrl", iscntrl
);
3359 createcat(L
, "digit", isdigit
);
3360 createcat(L
, "graph", isgraph
);
3361 createcat(L
, "lower", islower
);
3362 createcat(L
, "print", isprint
);
3363 createcat(L
, "punct", ispunct
);
3364 createcat(L
, "space", isspace
);
3365 createcat(L
, "upper", isupper
);
3366 createcat(L
, "xdigit", isxdigit
);
3371 static struct luaL_Reg pattreg
[] = {
3372 {"ptree", lp_printtree
},
3373 {"pcode", lp_printcode
},
3374 {"match", lp_match
},
3377 {"C", lp_simplecapture
},
3378 {"Cc", lp_constcapture
},
3379 {"Cmt", lp_matchtime
},
3381 {"Carg", lp_argcapture
},
3382 {"Cp", lp_poscapture
},
3383 {"Cs", lp_substcapture
},
3384 {"Ct", lp_tablecapture
},
3385 {"Cf", lp_foldcapture
},
3386 {"Cg", lp_groupcapture
},
3390 {"locale", lp_locale
},
3391 {"version", lp_version
},
3392 {"setmaxstack", lp_setmax
},
3398 static struct luaL_Reg metareg
[] = {
3400 {"__add", lp_choice
},
3404 {"__div", lp_divcapture
},
3411 int luaopen_lpeg (lua_State
*L
);
3412 int luaopen_lpeg (lua_State
*L
) {
3413 luaL_newmetatable(L
, PATTERN_T
);
3414 lua_pushnumber(L
, MAXBACK
); /* initialize maximum backtracking */
3415 lua_setfield(L
, LUA_REGISTRYINDEX
, MAXSTACKIDX
);
3416 luaL_setfuncs(L
, metareg
, 0);
3417 luaL_newlib(L
, pattreg
);
3418 lua_pushvalue(L
, -1);
3419 lua_setfield(L
, -3, "__index");
3423 /* }====================================================== */