Notes on version 6.3.
[ragel.git] / ragel / parsetree.h
blob66a4d68431bc1e9150e2c2b1e2d55a046c7ab02c
1 /*
2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #ifndef _PARSETREE_H
23 #define _PARSETREE_H
25 #include "ragel.h"
26 #include "avlmap.h"
27 #include "bstmap.h"
28 #include "vector.h"
29 #include "dlist.h"
31 struct NameInst;
33 /* Types of builtin machines. */
34 enum BuiltinMachine
36 BT_Any,
37 BT_Ascii,
38 BT_Extend,
39 BT_Alpha,
40 BT_Digit,
41 BT_Alnum,
42 BT_Lower,
43 BT_Upper,
44 BT_Cntrl,
45 BT_Graph,
46 BT_Print,
47 BT_Punct,
48 BT_Space,
49 BT_Xdigit,
50 BT_Lambda,
51 BT_Empty
55 struct ParseData;
57 /* Leaf type. */
58 struct Literal;
60 /* Tree nodes. */
62 struct Term;
63 struct FactorWithAug;
64 struct FactorWithRep;
65 struct FactorWithNeg;
66 struct Factor;
67 struct Expression;
68 struct Join;
69 struct JoinOrLm;
70 struct LongestMatch;
71 struct LongestMatchPart;
72 struct LmPartList;
73 struct Range;
75 /* Type of augmentation. Describes locations in the machine. */
76 enum AugType
78 /* Transition actions/priorities. */
79 at_start,
80 at_all,
81 at_finish,
82 at_leave,
84 /* Global error actions. */
85 at_start_gbl_error,
86 at_all_gbl_error,
87 at_final_gbl_error,
88 at_not_start_gbl_error,
89 at_not_final_gbl_error,
90 at_middle_gbl_error,
92 /* Local error actions. */
93 at_start_local_error,
94 at_all_local_error,
95 at_final_local_error,
96 at_not_start_local_error,
97 at_not_final_local_error,
98 at_middle_local_error,
100 /* To State Action embedding. */
101 at_start_to_state,
102 at_all_to_state,
103 at_final_to_state,
104 at_not_start_to_state,
105 at_not_final_to_state,
106 at_middle_to_state,
108 /* From State Action embedding. */
109 at_start_from_state,
110 at_all_from_state,
111 at_final_from_state,
112 at_not_start_from_state,
113 at_not_final_from_state,
114 at_middle_from_state,
116 /* EOF Action embedding. */
117 at_start_eof,
118 at_all_eof,
119 at_final_eof,
120 at_not_start_eof,
121 at_not_final_eof,
122 at_middle_eof
125 /* IMPORTANT: These must follow the same order as the state augs in AugType
126 * since we will be using this to compose AugType. */
127 enum StateAugType
129 sat_start = 0,
130 sat_all,
131 sat_final,
132 sat_not_start,
133 sat_not_final,
134 sat_middle
137 struct Action;
138 struct PriorDesc;
139 struct RegExpr;
140 struct ReItem;
141 struct ReOrBlock;
142 struct ReOrItem;
143 struct ExplicitMachine;
144 struct InlineItem;
145 struct InlineList;
147 /* Reference to a named state. */
148 typedef Vector<char*> NameRef;
149 typedef Vector<NameRef*> NameRefList;
150 typedef Vector<NameInst*> NameTargList;
152 /* Structure for storing location of epsilon transitons. */
153 struct EpsilonLink
155 EpsilonLink( const InputLoc &loc, NameRef &target )
156 : loc(loc), target(target) { }
158 InputLoc loc;
159 NameRef target;
162 struct Label
164 Label( const InputLoc &loc, char *data )
165 : loc(loc), data(data) { }
167 InputLoc loc;
168 char *data;
171 /* Structrue represents an action assigned to some FactorWithAug node. The
172 * factor with aug will keep an array of these. */
173 struct ParserAction
175 ParserAction( const InputLoc &loc, AugType type, int localErrKey, Action *action )
176 : loc(loc), type(type), localErrKey(localErrKey), action(action) { }
178 InputLoc loc;
179 AugType type;
180 int localErrKey;
181 Action *action;
184 struct ConditionTest
186 ConditionTest( const InputLoc &loc, AugType type, Action *action, bool sense ) :
187 loc(loc), type(type), action(action), sense(sense) { }
189 InputLoc loc;
190 AugType type;
191 Action *action;
192 bool sense;
195 struct Token
197 char *data;
198 int length;
199 InputLoc loc;
201 void append( const Token &other );
202 void set( const char *str, int len );
205 char *prepareLitString( const InputLoc &loc, char *src, long length,
206 long &resLen, bool &caseInsensitive );
208 /* Store the value and type of a priority augmentation. */
209 struct PriorityAug
211 PriorityAug( AugType type, int priorKey, int priorValue ) :
212 type(type), priorKey(priorKey), priorValue(priorValue) { }
214 AugType type;
215 int priorKey;
216 int priorValue;
220 * A Variable Definition
222 struct VarDef
224 VarDef( const char *name, JoinOrLm *joinOrLm )
225 : name(name), joinOrLm(joinOrLm), isExport(false) { }
227 /* Parse tree traversal. */
228 FsmAp *walk( ParseData *pd );
229 void makeNameTree( const InputLoc &loc, ParseData *pd );
230 void resolveNameRefs( ParseData *pd );
232 const char *name;
233 JoinOrLm *joinOrLm;
234 bool isExport;
239 * LongestMatch
241 * Wherever possible the item match will execute on the character. If not
242 * possible the item match will execute on a lookahead character and either
243 * hold the current char (if one away) or backup.
245 * How to handle the problem of backing up over a buffer break?
247 * Don't want to use pending out transitions for embedding item match because
248 * the role of item match action is different: it may sometimes match on the
249 * final transition, or may match on a lookahead character.
251 * Don't want to invent a new operator just for this. So just trail action
252 * after machine, this means we can only use literal actions.
254 * The item action may
256 * What states of the machine will be final. The item actions that wrap around
257 * on the last character will go straight to the start state.
259 * Some transitions will be lookahead transitions, they will hold the current
260 * character. Crossing them with regular transitions must be restricted
261 * because it does not make sense. The transition cannot simultaneously hold
262 * and consume the current character.
264 struct LongestMatchPart
266 LongestMatchPart( Join *join, Action *action,
267 InputLoc &semiLoc, int longestMatchId )
269 join(join), action(action), semiLoc(semiLoc),
270 longestMatchId(longestMatchId), inLmSelect(false) { }
272 InputLoc getLoc();
274 Join *join;
275 Action *action;
276 InputLoc semiLoc;
278 Action *setActId;
279 Action *actOnLast;
280 Action *actOnNext;
281 Action *actLagBehind;
282 int longestMatchId;
283 bool inLmSelect;
284 LongestMatch *longestMatch;
286 LongestMatchPart *prev, *next;
289 /* Declare a new type so that ptreetypes.h need not include dlist.h. */
290 struct LmPartList : DList<LongestMatchPart> {};
292 struct LongestMatch
294 /* Construct with a list of joins */
295 LongestMatch( const InputLoc &loc, LmPartList *longestMatchList ) :
296 loc(loc), longestMatchList(longestMatchList), name(0),
297 lmSwitchHandlesError(false) { }
299 /* Tree traversal. */
300 FsmAp *walk( ParseData *pd );
301 void makeNameTree( ParseData *pd );
302 void resolveNameRefs( ParseData *pd );
303 void transferScannerLeavingActions( FsmAp *graph );
304 void runLongestMatch( ParseData *pd, FsmAp *graph );
305 Action *newAction( ParseData *pd, const InputLoc &loc, const char *name,
306 InlineList *inlineList );
307 void makeActions( ParseData *pd );
308 void findName( ParseData *pd );
309 void restart( FsmAp *graph, TransAp *trans );
311 InputLoc loc;
312 LmPartList *longestMatchList;
313 const char *name;
315 Action *lmActSelect;
316 bool lmSwitchHandlesError;
318 LongestMatch *next, *prev;
322 /* List of Expressions. */
323 typedef DList<Expression> ExprList;
325 struct JoinOrLm
327 enum Type {
328 JoinType,
329 LongestMatchType
332 JoinOrLm( Join *join ) :
333 join(join), longestMatch(0), type(JoinType) {}
334 JoinOrLm( LongestMatch *longestMatch ) :
335 join(0), longestMatch(longestMatch), type(LongestMatchType) {}
337 FsmAp *walk( ParseData *pd );
338 void makeNameTree( ParseData *pd );
339 void resolveNameRefs( ParseData *pd );
341 Join *join;
342 LongestMatch *longestMatch;
343 Type type;
347 * Join
349 struct Join
351 /* Construct with the first expression. */
352 Join( Expression *expr );
353 Join( const InputLoc &loc, Expression *expr );
355 /* Tree traversal. */
356 FsmAp *walk( ParseData *pd );
357 FsmAp *walkJoin( ParseData *pd );
358 void makeNameTree( ParseData *pd );
359 void resolveNameRefs( ParseData *pd );
361 /* Data. */
362 InputLoc loc;
363 ExprList exprList;
367 * Expression
369 struct Expression
371 enum Type {
372 OrType,
373 IntersectType,
374 SubtractType,
375 StrongSubtractType,
376 TermType,
377 BuiltinType
380 /* Construct with an expression on the left and a term on the right. */
381 Expression( Expression *expression, Term *term, Type type ) :
382 expression(expression), term(term),
383 builtin(builtin), type(type), prev(this), next(this) { }
385 /* Construct with only a term. */
386 Expression( Term *term ) :
387 expression(0), term(term), builtin(builtin),
388 type(TermType) , prev(this), next(this) { }
390 /* Construct with a builtin type. */
391 Expression( BuiltinMachine builtin ) :
392 expression(0), term(0), builtin(builtin),
393 type(BuiltinType), prev(this), next(this) { }
395 ~Expression();
397 /* Tree traversal. */
398 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
399 void makeNameTree( ParseData *pd );
400 void resolveNameRefs( ParseData *pd );
402 /* Node data. */
403 Expression *expression;
404 Term *term;
405 BuiltinMachine builtin;
406 Type type;
408 Expression *prev, *next;
412 * Term
414 struct Term
416 enum Type {
417 ConcatType,
418 RightStartType,
419 RightFinishType,
420 LeftType,
421 FactorWithAugType
424 Term( Term *term, FactorWithAug *factorWithAug ) :
425 term(term), factorWithAug(factorWithAug), type(ConcatType) { }
427 Term( Term *term, FactorWithAug *factorWithAug, Type type ) :
428 term(term), factorWithAug(factorWithAug), type(type) { }
430 Term( FactorWithAug *factorWithAug ) :
431 term(0), factorWithAug(factorWithAug), type(FactorWithAugType) { }
433 ~Term();
435 FsmAp *walk( ParseData *pd, bool lastInSeq = true );
436 void makeNameTree( ParseData *pd );
437 void resolveNameRefs( ParseData *pd );
439 Term *term;
440 FactorWithAug *factorWithAug;
441 Type type;
443 /* Priority descriptor for RightFinish type. */
444 PriorDesc priorDescs[2];
448 /* Third level of precedence. Augmenting nodes with actions and priorities. */
449 struct FactorWithAug
451 FactorWithAug( FactorWithRep *factorWithRep ) :
452 priorDescs(0), factorWithRep(factorWithRep) { }
453 ~FactorWithAug();
455 /* Tree traversal. */
456 FsmAp *walk( ParseData *pd );
457 void makeNameTree( ParseData *pd );
458 void resolveNameRefs( ParseData *pd );
460 void assignActions( ParseData *pd, FsmAp *graph, int *actionOrd );
461 void assignPriorities( FsmAp *graph, int *priorOrd );
463 void assignConditions( FsmAp *graph );
465 /* Actions and priorities assigned to the factor node. */
466 Vector<ParserAction> actions;
467 Vector<PriorityAug> priorityAugs;
468 PriorDesc *priorDescs;
469 Vector<Label> labels;
470 Vector<EpsilonLink> epsilonLinks;
471 Vector<ConditionTest> conditions;
473 FactorWithRep *factorWithRep;
476 /* Fourth level of precedence. Trailing unary operators. Provide kleen star,
477 * optional and plus. */
478 struct FactorWithRep
480 enum Type {
481 StarType,
482 StarStarType,
483 OptionalType,
484 PlusType,
485 ExactType,
486 MaxType,
487 MinType,
488 RangeType,
489 FactorWithNegType
492 FactorWithRep( const InputLoc &loc, FactorWithRep *factorWithRep,
493 int lowerRep, int upperRep, Type type ) :
494 loc(loc), factorWithRep(factorWithRep),
495 factorWithNeg(0), lowerRep(lowerRep),
496 upperRep(upperRep), type(type) { }
498 FactorWithRep( FactorWithNeg *factorWithNeg )
499 : factorWithNeg(factorWithNeg), type(FactorWithNegType) { }
501 ~FactorWithRep();
503 /* Tree traversal. */
504 FsmAp *walk( ParseData *pd );
505 void makeNameTree( ParseData *pd );
506 void resolveNameRefs( ParseData *pd );
508 InputLoc loc;
509 FactorWithRep *factorWithRep;
510 FactorWithNeg *factorWithNeg;
511 int lowerRep, upperRep;
512 Type type;
514 /* Priority descriptor for StarStar type. */
515 PriorDesc priorDescs[2];
518 /* Fifth level of precedence. Provides Negation. */
519 struct FactorWithNeg
521 enum Type {
522 NegateType,
523 CharNegateType,
524 FactorType
527 FactorWithNeg( const InputLoc &loc, FactorWithNeg *factorWithNeg, Type type) :
528 loc(loc), factorWithNeg(factorWithNeg), factor(0), type(type) { }
530 FactorWithNeg( Factor *factor ) :
531 factorWithNeg(0), factor(factor), type(FactorType) { }
533 ~FactorWithNeg();
535 /* Tree traversal. */
536 FsmAp *walk( ParseData *pd );
537 void makeNameTree( ParseData *pd );
538 void resolveNameRefs( ParseData *pd );
540 InputLoc loc;
541 FactorWithNeg *factorWithNeg;
542 Factor *factor;
543 Type type;
547 * Factor
549 struct Factor
551 /* Language elements a factor node can be. */
552 enum Type {
553 LiteralType,
554 RangeType,
555 OrExprType,
556 RegExprType,
557 ReferenceType,
558 ParenType,
559 LongestMatchType,
562 /* Construct with a literal fsm. */
563 Factor( Literal *literal ) :
564 literal(literal), type(LiteralType) { }
566 /* Construct with a range. */
567 Factor( Range *range ) :
568 range(range), type(RangeType) { }
570 /* Construct with the or part of a regular expression. */
571 Factor( ReItem *reItem ) :
572 reItem(reItem), type(OrExprType) { }
574 /* Construct with a regular expression. */
575 Factor( RegExpr *regExpr ) :
576 regExpr(regExpr), type(RegExprType) { }
578 /* Construct with a reference to a var def. */
579 Factor( const InputLoc &loc, VarDef *varDef ) :
580 loc(loc), varDef(varDef), type(ReferenceType) {}
582 /* Construct with a parenthesized join. */
583 Factor( Join *join ) :
584 join(join), type(ParenType) {}
586 /* Construct with a longest match operator. */
587 Factor( LongestMatch *longestMatch ) :
588 longestMatch(longestMatch), type(LongestMatchType) {}
590 /* Cleanup. */
591 ~Factor();
593 /* Tree traversal. */
594 FsmAp *walk( ParseData *pd );
595 void makeNameTree( ParseData *pd );
596 void resolveNameRefs( ParseData *pd );
598 InputLoc loc;
599 Literal *literal;
600 Range *range;
601 ReItem *reItem;
602 RegExpr *regExpr;
603 VarDef *varDef;
604 Join *join;
605 LongestMatch *longestMatch;
606 int lower, upper;
607 Type type;
610 /* A range machine. Only ever composed of two literals. */
611 struct Range
613 Range( Literal *lowerLit, Literal *upperLit )
614 : lowerLit(lowerLit), upperLit(upperLit) { }
616 ~Range();
617 FsmAp *walk( ParseData *pd );
619 Literal *lowerLit;
620 Literal *upperLit;
623 /* Some literal machine. Can be a number or literal string. */
624 struct Literal
626 enum LiteralType { Number, LitString };
628 Literal( const Token &token, LiteralType type )
629 : token(token), type(type) { }
631 FsmAp *walk( ParseData *pd );
633 Token token;
634 LiteralType type;
637 /* Regular expression. */
638 struct RegExpr
640 enum RegExpType { RecurseItem, Empty };
642 /* Constructors. */
643 RegExpr() :
644 type(Empty), caseInsensitive(false) { }
645 RegExpr(RegExpr *regExpr, ReItem *item) :
646 regExpr(regExpr), item(item),
647 type(RecurseItem), caseInsensitive(false) { }
649 ~RegExpr();
650 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
652 RegExpr *regExpr;
653 ReItem *item;
654 RegExpType type;
655 bool caseInsensitive;
658 /* An item in a regular expression. */
659 struct ReItem
661 enum ReItemType { Data, Dot, OrBlock, NegOrBlock };
663 ReItem( const InputLoc &loc, const Token &token )
664 : loc(loc), token(token), star(false), type(Data) { }
665 ReItem( const InputLoc &loc, ReItemType type )
666 : loc(loc), star(false), type(type) { }
667 ReItem( const InputLoc &loc, ReOrBlock *orBlock, ReItemType type )
668 : loc(loc), orBlock(orBlock), star(false), type(type) { }
670 ~ReItem();
671 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
673 InputLoc loc;
674 Token token;
675 ReOrBlock *orBlock;
676 bool star;
677 ReItemType type;
680 /* An or block item. */
681 struct ReOrBlock
683 enum ReOrBlockType { RecurseItem, Empty };
685 /* Constructors. */
686 ReOrBlock()
687 : type(Empty) { }
688 ReOrBlock(ReOrBlock *orBlock, ReOrItem *item)
689 : orBlock(orBlock), item(item), type(RecurseItem) { }
691 ~ReOrBlock();
692 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
694 ReOrBlock *orBlock;
695 ReOrItem *item;
696 ReOrBlockType type;
699 /* An item in an or block. */
700 struct ReOrItem
702 enum ReOrItemType { Data, Range };
704 ReOrItem( const InputLoc &loc, const Token &token )
705 : loc(loc), token(token), type(Data) {}
706 ReOrItem( const InputLoc &loc, char lower, char upper )
707 : loc(loc), lower(lower), upper(upper), type(Range) { }
709 FsmAp *walk( ParseData *pd, RegExpr *rootRegex );
711 InputLoc loc;
712 Token token;
713 char lower;
714 char upper;
715 ReOrItemType type;
720 * Inline code tree
722 struct InlineList;
723 struct InlineItem
725 enum Type
727 Text, Goto, Call, Next, GotoExpr, CallExpr, NextExpr, Ret, PChar,
728 Char, Hold, Curs, Targs, Entry, Exec, LmSwitch, LmSetActId,
729 LmSetTokEnd, LmOnLast, LmOnNext, LmOnLagBehind, LmInitAct,
730 LmInitTokStart, LmSetTokStart, Break
733 InlineItem( const InputLoc &loc, char *data, Type type ) :
734 loc(loc), data(data), nameRef(0), children(0), type(type) { }
736 InlineItem( const InputLoc &loc, NameRef *nameRef, Type type ) :
737 loc(loc), data(0), nameRef(nameRef), children(0), type(type) { }
739 InlineItem( const InputLoc &loc, LongestMatch *longestMatch,
740 LongestMatchPart *longestMatchPart, Type type ) : loc(loc), data(0),
741 nameRef(0), children(0), longestMatch(longestMatch),
742 longestMatchPart(longestMatchPart), type(type) { }
744 InlineItem( const InputLoc &loc, NameInst *nameTarg, Type type ) :
745 loc(loc), data(0), nameRef(0), nameTarg(nameTarg), children(0),
746 type(type) { }
748 InlineItem( const InputLoc &loc, Type type ) :
749 loc(loc), data(0), nameRef(0), children(0), type(type) { }
751 InputLoc loc;
752 char *data;
753 NameRef *nameRef;
754 NameInst *nameTarg;
755 InlineList *children;
756 LongestMatch *longestMatch;
757 LongestMatchPart *longestMatchPart;
758 Type type;
760 InlineItem *prev, *next;
763 /* Normally this would be atypedef, but that would entail including DList from
764 * ptreetypes, which should be just typedef forwards. */
765 struct InlineList : public DList<InlineItem> { };
769 #endif /* _PARSETREE_H */