beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / luapeg / lpeg.h
blobf722b8871748d3e455a76adb8ec7693b83186d47
1 /*
2 ** $Id: lptypes.h,v 1.14 2015/09/28 17:17:41 roberto Exp $
3 ** LPeg - PEG pattern matching for Lua
4 ** Copyright 2007-2015, Lua.org & PUC-Rio (see 'lpeg.html' for license)
5 ** written by Roberto Ierusalimschy
6 */
8 /*
9 "Amalgamated" version for Lua(jit)TeX written by Scarso Luigi.
13 #if !defined(lptypes_h)
14 #define lptypes_h
17 #if !defined(LPEG_DEBUG)
18 #define NDEBUG
19 #endif
21 #include <assert.h>
22 #include <limits.h>
23 /* added */
24 #include <ctype.h>
25 #include <stdio.h>
26 #include <string.h>
29 #include "lua.h"
30 #include "lauxlib.h"
32 #define VERSION "1.0.0"
35 #define PATTERN_T "lpeg-pattern"
36 #define MAXSTACKIDX "lpeg-maxstack"
40 ** compatibility with Lua 5.1
42 #if (LUA_VERSION_NUM == 501)
44 #define lp_equal lua_equal
46 #define lua_getuservalue lua_getfenv
47 #define lua_setuservalue lua_setfenv
49 #define lua_rawlen lua_objlen
51 #define luaL_setfuncs(L,f,n) luaL_register(L,NULL,f)
52 #define luaL_newlib(L,f) luaL_register(L,"lpeg",f)
54 #endif
57 #if !defined(lp_equal)
58 #define lp_equal(L,idx1,idx2) lua_compare(L,(idx1),(idx2),LUA_OPEQ)
59 #endif
62 /* default maximum size for call/backtrack stack */
63 #if !defined(MAXBACK)
64 #define MAXBACK 400
65 #endif
68 /* maximum number of rules in a grammar */
69 #if !defined(MAXRULES)
70 #define MAXRULES 1000
71 #endif
75 /* initial size for capture's list */
76 #define INITCAPSIZE 32
79 /* index, on Lua stack, for subject */
80 #define SUBJIDX 2
82 /* number of fixed arguments to 'match' (before capture arguments) */
83 #define FIXEDARGS 3
85 /* index, on Lua stack, for capture list */
86 #define caplistidx(ptop) ((ptop) + 2)
88 /* index, on Lua stack, for pattern's ktable */
89 #define ktableidx(ptop) ((ptop) + 3)
91 /* index, on Lua stack, for backtracking stack */
92 #define stackidx(ptop) ((ptop) + 4)
96 typedef unsigned char byte;
99 #define BITSPERCHAR 8
101 #define CHARSETSIZE ((UCHAR_MAX/BITSPERCHAR) + 1)
105 typedef struct Charset {
106 byte cs[CHARSETSIZE];
107 } Charset;
111 #define loopset(v,b) { int v; for (v = 0; v < CHARSETSIZE; v++) {b;} }
113 /* access to charset */
114 #define treebuffer(t) ((byte *)((t) + 1))
116 /* number of slots needed for 'n' bytes */
117 #define bytes2slots(n) (((n) - 1) / sizeof(TTree) + 1)
119 /* set 'b' bit in charset 'cs' */
120 #define setchar(cs,b) ((cs)[(b) >> 3] |= (1 << ((b) & 7)))
124 ** in capture instructions, 'kind' of capture and its offset are
125 ** packed in field 'aux', 4 bits for each
127 #define getkind(op) ((op)->i.aux & 0xF)
128 #define getoff(op) (((op)->i.aux >> 4) & 0xF)
129 #define joinkindoff(k,o) ((k) | ((o) << 4))
131 #define MAXOFF 0xF
132 #define MAXAUX 0xFF
135 /* maximum number of bytes to look behind */
136 #define MAXBEHIND MAXAUX
139 /* maximum size (in elements) for a pattern */
140 #define MAXPATTSIZE (SHRT_MAX - 10)
143 /* size (in elements) for an instruction plus extra l bytes */
144 #define instsize(l) (((l) + sizeof(Instruction) - 1)/sizeof(Instruction) + 1)
147 /* size (in elements) for a ISet instruction */
148 #define CHARSETINSTSIZE instsize(CHARSETSIZE)
150 /* size (in elements) for a IFunc instruction */
151 #define funcinstsize(p) ((p)->i.aux + 2)
155 #define testchar(st,c) (((int)(st)[((c) >> 3)] & (1 << ((c) & 7))))
158 #endif
161 ** $Id: lpcap.h,v 1.2 2015/02/27 17:13:17 roberto Exp $
164 #if !defined(lpcap_h)
165 #define lpcap_h
168 /*#include "lptypes.h"*/
171 /* kinds of captures */
172 typedef enum CapKind {
173 Cclose, Cposition, Cconst, Cbackref, Carg, Csimple, Ctable, Cfunction,
174 Cquery, Cstring, Cnum, Csubst, Cfold, Cruntime, Cgroup
175 } CapKind;
178 typedef struct Capture {
179 const char *s; /* subject position */
180 unsigned short idx; /* extra info (group name, arg index, etc.) */
181 byte kind; /* kind of capture */
182 byte siz; /* size of full capture + 1 (0 = not a full capture) */
183 } Capture;
186 typedef struct CapState {
187 Capture *cap; /* current capture */
188 Capture *ocap; /* (original) capture list */
189 lua_State *L;
190 int ptop; /* index of last argument to 'match' */
191 const char *s; /* original string */
192 int valuecached; /* value stored in cache slot */
193 } CapState;
196 int runtimecap (CapState *cs, Capture *close, const char *s, int *rem);
197 int getcaptures (lua_State *L, const char *s, const char *r, int ptop);
198 int finddyncap (Capture *cap, Capture *last);
200 #endif
204 ** $Id: lptree.h,v 1.2 2013/03/24 13:51:12 roberto Exp $
207 #if !defined(lptree_h)
208 #define lptree_h
211 /*#include "lptypes.h" */
215 ** types of trees
217 typedef enum TTag {
218 TChar = 0, TSet, TAny, /* standard PEG elements */
219 TTrue, TFalse,
220 TRep,
221 TSeq, TChoice,
222 TNot, TAnd,
223 TCall,
224 TOpenCall,
225 TRule, /* sib1 is rule's pattern, sib2 is 'next' rule */
226 TGrammar, /* sib1 is initial (and first) rule */
227 TBehind, /* match behind */
228 TCapture, /* regular capture */
229 TRunTime /* run-time capture */
230 } TTag;
232 /* number of siblings for each tree */
233 extern const byte numsiblings[];
237 ** Tree trees
238 ** The first sibling of a tree (if there is one) is immediately after
239 ** the tree. A reference to a second sibling (ps) is its position
240 ** relative to the position of the tree itself. A key in ktable
241 ** uses the (unique) address of the original tree that created that
242 ** entry. NULL means no data.
244 typedef struct TTree {
245 byte tag;
246 byte cap; /* kind of capture (if it is a capture) */
247 unsigned short key; /* key in ktable for Lua data (0 if no key) */
248 union {
249 int ps; /* occasional second sibling */
250 int n; /* occasional counter */
251 } u;
252 } TTree;
256 ** A complete pattern has its tree plus, if already compiled,
257 ** its corresponding code
259 typedef struct Pattern {
260 union Instruction *code;
261 int codesize;
262 TTree tree[1];
263 } Pattern;
266 /* number of siblings for each tree */
267 extern const byte numsiblings[];
269 /* access to siblings */
270 #define sib1(t) ((t) + 1)
271 #define sib2(t) ((t) + (t)->u.ps)
278 #endif
282 ** $Id: lpvm.h,v 1.3 2014/02/21 13:06:41 roberto Exp $
285 #if !defined(lpvm_h)
286 #define lpvm_h
288 /*#include "lpcap.h"*/
291 /* Virtual Machine's instructions */
292 typedef enum Opcode {
293 IAny, /* if no char, fail */
294 IChar, /* if char != aux, fail */
295 ISet, /* if char not in buff, fail */
296 ITestAny, /* in no char, jump to 'offset' */
297 ITestChar, /* if char != aux, jump to 'offset' */
298 ITestSet, /* if char not in buff, jump to 'offset' */
299 ISpan, /* read a span of chars in buff */
300 IBehind, /* walk back 'aux' characters (fail if not possible) */
301 IRet, /* return from a rule */
302 IEnd, /* end of pattern */
303 IChoice, /* stack a choice; next fail will jump to 'offset' */
304 IJmp, /* jump to 'offset' */
305 ICall, /* call rule at 'offset' */
306 IOpenCall, /* call rule number 'key' (must be closed to a ICall) */
307 ICommit, /* pop choice and jump to 'offset' */
308 IPartialCommit, /* update top choice to current position and jump */
309 IBackCommit, /* "fails" but jump to its own 'offset' */
310 IFailTwice, /* pop one choice and then fail */
311 IFail, /* go back to saved state on choice and jump to saved offset */
312 IGiveup, /* internal use */
313 IFullCapture, /* complete capture of last 'off' chars */
314 IOpenCapture, /* start a capture */
315 ICloseCapture,
316 ICloseRunTime
317 } Opcode;
321 typedef union Instruction {
322 struct Inst {
323 byte code;
324 byte aux;
325 short key;
326 } i;
327 int offset;
328 byte buff[1];
329 } Instruction;
332 void printpatt (Instruction *p, int n);
333 const char *match (lua_State *L, const char *o, const char *s, const char *e,
334 Instruction *op, Capture *capture, int ptop);
337 #endif
342 ** $Id: lpcode.h,v 1.7 2015/06/12 18:24:45 roberto Exp $
345 #if !defined(lpcode_h)
346 #define lpcode_h
348 /*#include "lua.h"*/
350 /*#include "lptypes.h"*/
351 /*#include "lptree.h"*/
352 /*#include "lpvm.h"*/
354 int tocharset (TTree *tree, Charset *cs);
355 int checkaux (TTree *tree, int pred);
356 int fixedlenx (TTree *tree, int count, int len);
357 int hascaptures (TTree *tree);
358 int lp_gc (lua_State *L);
359 Instruction *compile (lua_State *L, Pattern *p);
360 void realloccode (lua_State *L, Pattern *p, int nsize);
361 int sizei (const Instruction *i);
364 #define PEnullable 0
365 #define PEnofail 1
368 ** nofail(t) implies that 't' cannot fail with any input
370 #define nofail(t) checkaux(t, PEnofail)
373 ** (not nullable(t)) implies 't' cannot match without consuming
374 ** something
376 #define nullable(t) checkaux(t, PEnullable)
378 #define fixedlen(t) fixedlenx(t, 0, 0)
382 #endif
386 ** $Id: lpprint.h,v 1.2 2015/06/12 18:18:08 roberto Exp $
390 #if !defined(lpprint_h)
391 #define lpprint_h
394 /* #include "lptree.h" */
395 /* #include "lpvm.h" */
398 #if defined(LPEG_DEBUG)
400 void printpatt (Instruction *p, int n);
401 void printtree (TTree *tree, int ident);
402 void printktable (lua_State *L, int idx);
403 void printcharset (const byte *st);
404 void printcaplist (Capture *cap, Capture *limit);
405 void printinst (const Instruction *op, const Instruction *p);
407 #else
409 #define printktable(L,idx) \
410 luaL_error(L, "function only implemented in debug mode")
411 #define printtree(tree,i) \
412 luaL_error(L, "function only implemented in debug mode")
413 #define printpatt(p,n) \
414 luaL_error(L, "function only implemented in debug mode")
416 #endif
419 #endif