Notes on version 6.3.
[ragel.git] / ragel / parsedata.h
blobfc2b9c0c7772f56b7a0bc9bfdc6b1761de55072e
1 /*
2 * Copyright 2001-2006 Adrian Thurston <thurston@cs.queensu.ca>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #ifndef _PARSEDATA_H
23 #define _PARSEDATA_H
25 #include <iostream>
26 #include <limits.h>
27 #include "avlmap.h"
28 #include "bstmap.h"
29 #include "vector.h"
30 #include "dlist.h"
31 #include "fsmgraph.h"
32 #include "compare.h"
33 #include "vector.h"
34 #include "common.h"
35 #include "parsetree.h"
37 /* Forwards. */
38 using std::ostream;
40 struct VarDef;
41 struct Join;
42 struct Expression;
43 struct Term;
44 struct FactorWithAug;
45 struct FactorWithLabel;
46 struct FactorWithRep;
47 struct FactorWithNeg;
48 struct Factor;
49 struct Literal;
50 struct Range;
51 struct RegExpr;
52 struct ReItem;
53 struct ReOrBlock;
54 struct ReOrItem;
55 struct LongestMatch;
56 typedef DList<LongestMatch> LmList;
59 /* Graph dictionary. */
60 struct GraphDictEl
62 public AvlTreeEl<GraphDictEl>,
63 public DListEl<GraphDictEl>
65 GraphDictEl( const char *k )
66 : key(k), value(0), isInstance(false) { }
67 GraphDictEl( const char *k, VarDef *value )
68 : key(k), value(value), isInstance(false) { }
70 const char *getKey() { return key; }
72 const char *key;
73 VarDef *value;
74 bool isInstance;
76 /* Location info of graph definition. Points to variable name of assignment. */
77 InputLoc loc;
80 typedef AvlTree<GraphDictEl, const char*, CmpStr> GraphDict;
81 typedef DList<GraphDictEl> GraphList;
83 /* Priority name dictionary. */
84 typedef AvlMapEl<char*, int> PriorDictEl;
85 typedef AvlMap<char*, int, CmpStr> PriorDict;
87 /* Local error name dictionary. */
88 typedef AvlMapEl<const char*, int> LocalErrDictEl;
89 typedef AvlMap<const char*, int, CmpStr> LocalErrDict;
91 /* Tree of instantiated names. */
92 typedef BstMapEl<const char*, NameInst*> NameMapEl;
93 typedef BstMap<const char*, NameInst*, CmpStr> NameMap;
94 typedef Vector<NameInst*> NameVect;
95 typedef BstSet<NameInst*> NameSet;
97 /* Node in the tree of instantiated names. */
98 struct NameInst
100 NameInst( const InputLoc &loc, NameInst *parent, const char *name, int id, bool isLabel ) :
101 loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
102 isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
104 InputLoc loc;
106 /* Keep parent pointers in the name tree to retrieve
107 * fully qulified names. */
108 NameInst *parent;
110 const char *name;
111 int id;
112 bool isLabel;
113 bool isLongestMatch;
115 int numRefs;
116 int numUses;
118 /* Names underneath us, excludes anonymous names. */
119 NameMap children;
121 /* All names underneath us in order of appearance. */
122 NameVect childVect;
124 /* Join scopes need an implicit "final" target. */
125 NameInst *start, *final;
127 /* During a fsm generation walk, lists the names that are referenced by
128 * epsilon operations in the current scope. After the link is made by the
129 * epsilon reference and the join operation is complete, the label can
130 * have its refcount decremented. Once there are no more references the
131 * entry point can be removed from the fsm returned. */
132 NameVect referencedNames;
134 /* Pointers for the name search queue. */
135 NameInst *prev, *next;
137 /* Check if this name inst or any name inst below is referenced. */
138 bool anyRefsRec();
141 typedef DList<NameInst> NameInstList;
143 /* Stack frame used in walking the name tree. */
144 struct NameFrame
146 NameInst *prevNameInst;
147 int prevNameChild;
148 NameInst *prevLocalScope;
151 /* Class to collect information about the machine during the
152 * parse of input. */
153 struct ParseData
155 /* Create a new parse data object. This is done at the beginning of every
156 * fsm specification. */
157 ParseData( char *fileName, char *sectionName, const InputLoc &sectionLoc );
158 ~ParseData();
161 * Setting up the graph dict.
164 /* Initialize a graph dict with the basic fsms. */
165 void initGraphDict();
166 void createBuiltin( const char *name, BuiltinMachine builtin );
168 /* Make a name id in the current name instantiation scope if it is not
169 * already there. */
170 NameInst *addNameInst( const InputLoc &loc, const char *data, bool isLabel );
171 void makeRootNames();
172 void makeNameTree( GraphDictEl *gdNode );
173 void makeExportsNameTree();
174 void fillNameIndex( NameInst *from );
175 void printNameTree();
177 /* Increments the usage count on entry names. Names that are no longer
178 * needed will have their entry points unset. */
179 void unsetObsoleteEntries( FsmAp *graph );
181 /* Resove name references in action code and epsilon transitions. */
182 NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly );
183 void resolveFrom( NameSet &result, NameInst *refFrom,
184 const NameRef &nameRef, int namePos );
185 NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action );
186 void resolveNameRefs( InlineList *inlineList, Action *action );
187 void resolveActionNameRefs();
189 /* Set the alphabet type. If type types are not valid returns false. */
190 bool setAlphType( const InputLoc &loc, char *s1, char *s2 );
191 bool setAlphType( const InputLoc &loc, char *s1 );
193 /* Override one of the variables ragel uses. */
194 bool setVariable( char *var, InlineList *inlineList );
196 /* Unique actions. */
197 void removeDups( ActionTable &actionTable );
198 void removeActionDups( FsmAp *graph );
200 /* Dumping the name instantiation tree. */
201 void printNameInst( NameInst *nameInst, int level );
203 /* Make the graph from a graph dict node. Does minimization. */
204 FsmAp *makeInstance( GraphDictEl *gdNode );
205 FsmAp *makeSpecific( GraphDictEl *gdNode );
206 FsmAp *makeAll();
208 /* Checking the contents of actions. */
209 void checkAction( Action *action );
210 void checkInlineList( Action *act, InlineList *inlineList );
212 void analyzeAction( Action *action, InlineList *inlineList );
213 void analyzeGraph( FsmAp *graph );
214 void makeExports();
216 void prepareMachineGen( GraphDictEl *graphDictEl );
217 void prepareMachineGenTBWrapped( GraphDictEl *graphDictEl );
218 void generateXML( ostream &out );
219 FsmAp *sectionGraph;
220 bool generatingSectionSubset;
222 void initKeyOps();
225 * Data collected during the parse.
228 /* Dictionary of graphs. Both instances and non-instances go here. */
229 GraphDict graphDict;
231 /* The list of instances. */
232 GraphList instanceList;
234 /* Dictionary of actions. Lets actions be defined and then referenced. */
235 ActionDict actionDict;
237 /* Dictionary of named priorities. */
238 PriorDict priorDict;
240 /* Dictionary of named local errors. */
241 LocalErrDict localErrDict;
243 /* List of actions. Will be pasted into a switch statement. */
244 ActionList actionList;
246 /* The id of the next priority name and label. */
247 int nextPriorKey, nextLocalErrKey, nextNameId, nextCondId;
249 /* The default priority number key for a machine. This is active during
250 * the parse of the rhs of a machine assignment. */
251 int curDefPriorKey;
253 int curDefLocalErrKey;
255 /* Alphabet type. */
256 HostType *userAlphType;
257 bool alphTypeSet;
258 InputLoc alphTypeLoc;
260 /* Element type and get key expression. */
261 InlineList *getKeyExpr;
262 InlineList *accessExpr;
264 /* Stack management */
265 InlineList *prePushExpr;
266 InlineList *postPopExpr;
268 /* Overriding variables. */
269 InlineList *pExpr;
270 InlineList *peExpr;
271 InlineList *eofExpr;
272 InlineList *csExpr;
273 InlineList *topExpr;
274 InlineList *stackExpr;
275 InlineList *actExpr;
276 InlineList *tokstartExpr;
277 InlineList *tokendExpr;
278 InlineList *dataExpr;
280 /* The alphabet range. */
281 char *lowerNum, *upperNum;
282 Key lowKey, highKey;
283 InputLoc rangeLowLoc, rangeHighLoc;
285 /* The name of the file the fsm is from, and the spec name. */
286 char *fileName;
287 char *sectionName;
288 InputLoc sectionLoc;
290 /* Counting the action and priority ordering. */
291 int curActionOrd;
292 int curPriorOrd;
294 /* Root of the name tree. One root is for the instantiated machines. The
295 * other root is for exported definitions. */
296 NameInst *rootName;
297 NameInst *exportsRootName;
299 /* Name tree walking. */
300 NameInst *curNameInst;
301 int curNameChild;
303 /* The place where resolved epsilon transitions go. These cannot go into
304 * the parse tree because a single epsilon op can resolve more than once
305 * to different nameInsts if the machine it's in is used more than once. */
306 NameVect epsilonResolvedLinks;
307 int nextEpsilonResolvedLink;
309 /* Root of the name tree used for doing local name searches. */
310 NameInst *localNameScope;
312 void setLmInRetLoc( InlineList *inlineList );
313 void initLongestMatchData();
314 void setLongestMatchData( FsmAp *graph );
315 void initNameWalk();
316 void initExportsNameWalk();
317 NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
318 NameFrame enterNameScope( bool isLocal, int numScopes );
319 void popNameScope( const NameFrame &frame );
320 void resetNameScope( const NameFrame &frame );
322 /* Make name ids to name inst pointers. */
323 NameInst **nameIndex;
325 /* Counter for assigning ids to longest match items. */
326 int nextLongestMatchId;
327 bool lmRequiresErrorState;
329 /* List of all longest match parse tree items. */
330 LmList lmList;
332 Action *newAction( const char *name, InlineList *inlineList );
334 Action *initTokStart;
335 int initTokStartOrd;
337 Action *setTokStart;
338 int setTokStartOrd;
340 Action *initActId;
341 int initActIdOrd;
343 Action *setTokEnd;
344 int setTokEndOrd;
346 void beginProcessing()
348 ::condData = &thisCondData;
349 ::keyOps = &thisKeyOps;
352 CondData thisCondData;
353 KeyOps thisKeyOps;
355 ExportList exportList;
358 void afterOpMinimize( FsmAp *fsm, bool lastInSeq = true );
359 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
360 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
361 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
362 Key makeFsmKeyChar( char c, ParseData *pd );
363 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd );
364 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
365 bool caseInsensitive, ParseData *pd );
366 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );
367 FsmAp *dotFsm( ParseData *pd );
368 FsmAp *dotStarFsm( ParseData *pd );
370 void errorStateLabels( const NameSet &locations );
372 /* Data used by the parser specific to the current file. Supports the include
373 * system, since a new parser is executed for each included file. */
374 struct InputData
376 InputData( char *fileName, char *includeSpec, char *includeTo ) :
377 pd(0), sectionName(0), defaultParseData(0),
378 first_line(1), first_column(1),
379 last_line(1), last_column(0),
380 fileName(fileName), includeSpec(includeSpec),
381 includeTo(includeTo), active(true)
384 /* For collecting a name references. */
385 NameRef nameRef;
386 NameRefList nameRefList;
388 /* The parse data. For each fsm spec, the parser collects things that it parses
389 * in data structures in here. */
390 ParseData *pd;
392 char *sectionName;
393 ParseData *defaultParseData;
395 int first_line;
396 int first_column;
397 int last_line;
398 int last_column;
400 char *fileName;
402 /* If this is an included file, this contains the specification to search
403 * for. IncludeTo will contain the spec name that does the includng. */
404 char *includeSpec;
405 char *includeTo;
407 bool active;
408 InputLoc sectionLoc;
411 struct Parser;
413 typedef AvlMap<char*, Parser *, CmpStr> ParserDict;
414 typedef AvlMapEl<char*, Parser *> ParserDictEl;
416 extern ParserDict parserDict;
419 #endif /* _PARSEDATA_H */