2002-02-06 Aldy Hernandez <aldyh@redhat.com>
[official-gcc.git] / gcc / ada / scans.ads
blobbcb12e1c93a37a072b6b57abe02049e34d922099
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S C A N S --
6 -- --
7 -- S p e c --
8 -- --
9 -- $Revision: 1.3 $
10 -- --
11 -- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
12 -- --
13 -- GNAT is free software; you can redistribute it and/or modify it under --
14 -- terms of the GNU General Public License as published by the Free Soft- --
15 -- ware Foundation; either version 2, or (at your option) any later ver- --
16 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
17 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
18 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
19 -- for more details. You should have received a copy of the GNU General --
20 -- Public License distributed with GNAT; see file COPYING. If not, write --
21 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
22 -- MA 02111-1307, USA. --
23 -- --
24 -- As a special exception, if other files instantiate generics from this --
25 -- unit, or you link this unit with other files to produce an executable, --
26 -- this unit does not by itself cause the resulting executable to be --
27 -- covered by the GNU General Public License. This exception does not --
28 -- however invalidate any other reasons why the executable file might be --
29 -- covered by the GNU Public License. --
30 -- --
31 -- GNAT was originally developed by the GNAT team at New York University. --
32 -- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
33 -- --
34 ------------------------------------------------------------------------------
36 with Types; use Types;
38 package Scans is
40 -- The scanner maintains a current state in the global variables defined
41 -- in this package. The call to the Scan routine advances this state to
42 -- the next token. The state is initialized by the call to one of the
43 -- initialization routines in Sinput.
45 -- The following type is used to identify token types returned by Scan.
46 -- The class column in this table indicates the token classes which
47 -- apply to the token, as defined by subsquent subtype declarations.
49 -- Note: the coding in SCN depends on the fact that the first entry in
50 -- this type declaration is *not* for a reserved word. For details on
51 -- why there is this requirement, see Scn.Initialize_Scanner.
53 type Token_Type is (
55 -- Token name Token type Class(es)
57 Tok_Integer_Literal, -- numeric lit Literal, Lit_Or_Name
59 Tok_Real_Literal, -- numeric lit Literal, Lit_Or_Name
61 Tok_String_Literal, -- string lit Literal. Lit_Or_Name
63 Tok_Char_Literal, -- char lit Name, Literal. Lit_Or_Name
65 Tok_Operator_Symbol, -- op symbol Name, Literal, Lit_Or_Name, Desig
67 Tok_Identifier, -- identifier Name, Lit_Or_Name, Desig
69 Tok_Double_Asterisk, -- **
71 Tok_Ampersand, -- & Binary_Addop
72 Tok_Minus, -- - Binary_Addop, Unary_Addop
73 Tok_Plus, -- + Binary_Addop, Unary_Addop
75 Tok_Asterisk, -- * Mulop
76 Tok_Mod, -- MOD Mulop
77 Tok_Rem, -- REM Mulop
78 Tok_Slash, -- / Mulop
80 Tok_New, -- NEW
82 Tok_Abs, -- ABS
83 Tok_Others, -- OTHERS
84 Tok_Null, -- NULL
86 Tok_Dot, -- . Namext
87 Tok_Apostrophe, -- ' Namext
89 Tok_Left_Paren, -- ( Namext, Consk
91 Tok_Delta, -- DELTA Atkwd, Sterm, Consk
92 Tok_Digits, -- DIGITS Atkwd, Sterm, Consk
93 Tok_Range, -- RANGE Atkwd, Sterm, Consk
95 Tok_Right_Paren, -- ) Sterm
96 Tok_Comma, -- , Sterm
98 Tok_And, -- AND Logop, Sterm
99 Tok_Or, -- OR Logop, Sterm
100 Tok_Xor, -- XOR Logop, Sterm
102 Tok_Less, -- < Relop, Sterm
103 Tok_Equal, -- = Relop, Sterm
104 Tok_Greater, -- > Relop, Sterm
105 Tok_Not_Equal, -- /= Relop, Sterm
106 Tok_Greater_Equal, -- >= Relop, Sterm
107 Tok_Less_Equal, -- <= Relop, Sterm
109 Tok_In, -- IN Relop, Sterm
110 Tok_Not, -- NOT Relop, Sterm
112 Tok_Box, -- <> Relop, Eterm, Sterm
113 Tok_Colon_Equal, -- := Eterm, Sterm
114 Tok_Colon, -- : Eterm, Sterm
115 Tok_Greater_Greater, -- >> Eterm, Sterm
117 Tok_Abstract, -- ABSTRACT Eterm, Sterm
118 Tok_Access, -- ACCESS Eterm, Sterm
119 Tok_Aliased, -- ALIASED Eterm, Sterm
120 Tok_All, -- ALL Eterm, Sterm
121 Tok_Array, -- ARRAY Eterm, Sterm
122 Tok_At, -- AT Eterm, Sterm
123 Tok_Body, -- BODY Eterm, Sterm
124 Tok_Constant, -- CONSTANT Eterm, Sterm
125 Tok_Do, -- DO Eterm, Sterm
126 Tok_Is, -- IS Eterm, Sterm
127 Tok_Limited, -- LIMITED Eterm, Sterm
128 Tok_Of, -- OF Eterm, Sterm
129 Tok_Out, -- OUT Eterm, Sterm
130 Tok_Record, -- RECORD Eterm, Sterm
131 Tok_Renames, -- RENAMES Eterm, Sterm
132 Tok_Reverse, -- REVERSE Eterm, Sterm
133 Tok_Tagged, -- TAGGED Eterm, Sterm
134 Tok_Then, -- THEN Eterm, Sterm
136 Tok_Less_Less, -- << Eterm, Sterm, After_SM
138 Tok_Abort, -- ABORT Eterm, Sterm, After_SM
139 Tok_Accept, -- ACCEPT Eterm, Sterm, After_SM
140 Tok_Case, -- CASE Eterm, Sterm, After_SM
141 Tok_Delay, -- DELAY Eterm, Sterm, After_SM
142 Tok_Else, -- ELSE Eterm, Sterm, After_SM
143 Tok_Elsif, -- ELSIF Eterm, Sterm, After_SM
144 Tok_End, -- END Eterm, Sterm, After_SM
145 Tok_Exception, -- EXCEPTION Eterm, Sterm, After_SM
146 Tok_Exit, -- EXIT Eterm, Sterm, After_SM
147 Tok_Goto, -- GOTO Eterm, Sterm, After_SM
148 Tok_If, -- IF Eterm, Sterm, After_SM
149 Tok_Pragma, -- PRAGMA Eterm, Sterm, After_SM
150 Tok_Raise, -- RAISE Eterm, Sterm, After_SM
151 Tok_Requeue, -- REQUEUE Eterm, Sterm, After_SM
152 Tok_Return, -- RETURN Eterm, Sterm, After_SM
153 Tok_Select, -- SELECT Eterm, Sterm, After_SM
154 Tok_Terminate, -- TERMINATE Eterm, Sterm, After_SM
155 Tok_Until, -- UNTIL Eterm, Sterm, After_SM
156 Tok_When, -- WHEN Eterm, Sterm, After_SM
158 Tok_Begin, -- BEGIN Eterm, Sterm, After_SM, Labeled_Stmt
159 Tok_Declare, -- DECLARE Eterm, Sterm, After_SM, Labeled_Stmt
160 Tok_For, -- FOR Eterm, Sterm, After_SM, Labeled_Stmt
161 Tok_Loop, -- LOOP Eterm, Sterm, After_SM, Labeled_Stmt
162 Tok_While, -- WHILE Eterm, Sterm, After_SM, Labeled_Stmt
164 Tok_Entry, -- ENTRY Eterm, Sterm, Declk, Deckn, After_SM
165 Tok_Protected, -- PROTECTED Eterm, Sterm, Declk, Deckn, After_SM
166 Tok_Task, -- TASK Eterm, Sterm, Declk, Deckn, After_SM
167 Tok_Type, -- TYPE Eterm, Sterm, Declk, Deckn, After_SM
168 Tok_Subtype, -- SUBTYPE Eterm, Sterm, Declk, Deckn, After_SM
169 Tok_Use, -- USE Eterm, Sterm, Declk, Deckn, After_SM
171 Tok_Function, -- FUNCTION Eterm, Sterm, Cunit, Declk, After_SM
172 Tok_Generic, -- GENERIC Eterm, Sterm, Cunit, Declk, After_SM
173 Tok_Package, -- PACKAGE Eterm, Sterm, Cunit, Declk, After_SM
174 Tok_Procedure, -- PROCEDURE Eterm, Sterm, Cunit, Declk, After_SM
176 Tok_Private, -- PRIVATE Eterm, Sterm, Cunit, After_SM
177 Tok_With, -- WITH Eterm, Sterm, Cunit, After_SM
178 Tok_Separate, -- SEPARATE Eterm, Sterm, Cunit, After_SM
180 Tok_EOF, -- End of file Eterm, Sterm, Cterm, After_SM
182 Tok_Semicolon, -- ; Eterm, Sterm, Cterm
184 Tok_Arrow, -- => Sterm, Cterm, Chtok
186 Tok_Vertical_Bar, -- | Cterm, Sterm, Chtok
188 Tok_Dot_Dot, -- .. Sterm, Chtok
190 -- The following three entries are used only when scanning
191 -- project files.
193 Tok_Project,
194 Tok_Extends,
195 Tok_External,
197 No_Token);
198 -- No_Token is used for initializing Token values to indicate that
199 -- no value has been set yet.
201 -- Note: in the RM, operator symbol is a special case of string literal.
202 -- We distinguish at the lexical level in this compiler, since there are
203 -- many syntactic situations in which only an operator symbol is allowed.
205 -- The following subtype declarations group the token types into classes.
206 -- These are used for class tests in the parser.
208 subtype Token_Class_Numeric_Literal is
209 Token_Type range Tok_Integer_Literal .. Tok_Real_Literal;
210 -- Numeric literal
212 subtype Token_Class_Literal is
213 Token_Type range Tok_Integer_Literal .. Tok_Operator_Symbol;
214 -- Literal
216 subtype Token_Class_Lit_Or_Name is
217 Token_Type range Tok_Integer_Literal .. Tok_Identifier;
219 subtype Token_Class_Binary_Addop is
220 Token_Type range Tok_Ampersand .. Tok_Plus;
221 -- Binary adding operator (& + -)
223 subtype Token_Class_Unary_Addop is
224 Token_Type range Tok_Minus .. Tok_Plus;
225 -- Unary adding operator (+ -)
227 subtype Token_Class_Mulop is
228 Token_Type range Tok_Asterisk .. Tok_Slash;
229 -- Multiplying operator
231 subtype Token_Class_Logop is
232 Token_Type range Tok_And .. Tok_Xor;
233 -- Logical operator (and, or, xor)
235 subtype Token_Class_Relop is
236 Token_Type range Tok_Less .. Tok_Box;
237 -- Relational operator (= /= < <= > >= not, in plus <> to catch misuse
238 -- of Pascal style not equal operator).
240 subtype Token_Class_Name is
241 Token_Type range Tok_Char_Literal .. Tok_Identifier;
242 -- First token of name (4.1),
243 -- (identifier, char literal, operator symbol)
245 subtype Token_Class_Desig is
246 Token_Type range Tok_Operator_Symbol .. Tok_Identifier;
247 -- Token which can be a Designator (identifier, operator symbol)
249 subtype Token_Class_Namext is
250 Token_Type range Tok_Dot .. Tok_Left_Paren;
251 -- Name extension tokens. These are tokens which can appear immediately
252 -- after a name to extend it recursively (period, quote, left paren)
254 subtype Token_Class_Consk is
255 Token_Type range Tok_Left_Paren .. Tok_Range;
256 -- Keywords which can start constraint
257 -- (left paren, delta, digits, range)
259 subtype Token_Class_Eterm is
260 Token_Type range Tok_Colon_Equal .. Tok_Semicolon;
261 -- Expression terminators. These tokens can never appear within a simple
262 -- expression. This is used for error recovery purposes (if we encounter
263 -- an error in an expression, we simply scan to the next Eterm token).
265 subtype Token_Class_Sterm is
266 Token_Type range Tok_Delta .. Tok_Dot_Dot;
267 -- Simple_Expression terminators. A Simple_Expression must be followed
268 -- by a token in this class, or an error message is issued complaining
269 -- about a missing binary operator.
271 subtype Token_Class_Atkwd is
272 Token_Type range Tok_Delta .. Tok_Range;
273 -- Attribute keywords. This class includes keywords which can be used
274 -- as an Attribute_Designator, namely DELTA, DIGITS and RANGE
276 subtype Token_Class_Cterm is
277 Token_Type range Tok_EOF .. Tok_Vertical_Bar;
278 -- Choice terminators. These tokens terminate a choice. This is used for
279 -- error recovery purposes (if we encounter an error in a Choice, we
280 -- simply scan to the next Cterm token).
282 subtype Token_Class_Chtok is
283 Token_Type range Tok_Arrow .. Tok_Dot_Dot;
284 -- Choice tokens. These tokens signal a choice when used in an Aggregate
286 subtype Token_Class_Cunit is
287 Token_Type range Tok_Function .. Tok_Separate;
288 -- Tokens which can begin a compilation unit
290 subtype Token_Class_Declk is
291 Token_Type range Tok_Entry .. Tok_Procedure;
292 -- Keywords which start a declaration
294 subtype Token_Class_Deckn is
295 Token_Type range Tok_Entry .. Tok_Use;
296 -- Keywords which start a declaration but can't start a compilation unit
298 subtype Token_Class_After_SM is
299 Token_Type range Tok_Less_Less .. Tok_EOF;
300 -- Tokens which always, or almost always, appear after a semicolon. Used
301 -- in the Resync_Past_Semicolon routine to avoid gobbling up stuff when
302 -- a semicolon is missing. Of significance only for error recovery.
304 subtype Token_Class_Labeled_Stmt is
305 Token_Type range Tok_Begin .. Tok_While;
306 -- Tokens which start labeled statements
308 type Token_Flag_Array is array (Token_Type) of Boolean;
309 Is_Reserved_Keyword : constant Token_Flag_Array := Token_Flag_Array'(
310 Tok_Mod .. Tok_Rem => True,
311 Tok_New .. Tok_Null => True,
312 Tok_Delta .. Tok_Range => True,
313 Tok_And .. Tok_Xor => True,
314 Tok_In .. Tok_Not => True,
315 Tok_Abstract .. Tok_Then => True,
316 Tok_Abort .. Tok_Separate => True,
317 others => False);
318 -- Flag array used to test for reserved word
320 --------------------------
321 -- Scan State Variables --
322 --------------------------
324 -- Note: these variables can only be referenced during the parsing of a
325 -- file. Reference to any of them from Sem or the expander is wrong.
327 Scan_Ptr : Source_Ptr;
328 -- Current scan pointer location. After a call to Scan, this points
329 -- just past the end of the token just scanned.
331 Token : Token_Type;
332 -- Type of current token
334 Token_Ptr : Source_Ptr;
335 -- Pointer to first character of current token
337 Current_Line_Start : Source_Ptr;
338 -- Pointer to first character of line containing current token
340 Start_Column : Column_Number;
341 -- Starting column number (zero origin) of the first non-blank character
342 -- on the line containing the current token. This is used for error
343 -- recovery circuits which depend on looking at the column line up.
345 Checksum : Word;
346 -- Used to accumulate a CRC representing the tokens in the source
347 -- file being compiled. This CRC includes only program tokens, and
348 -- excludes comments.
350 First_Non_Blank_Location : Source_Ptr;
351 -- Location of first non-blank character on the line containing the
352 -- current token (i.e. the location of the character whose column number
353 -- is stored in Start_Column).
355 Token_Node : Node_Id := Empty;
356 -- Node table Id for the current token. This is set only if the current
357 -- token is one for which the scanner constructs a node (i.e. it is an
358 -- identifier, operator symbol, or literal. For other token types,
359 -- Token_Node is undefined.
361 Token_Name : Name_Id := No_Name;
362 -- For identifiers, this is set to the Name_Id of the identifier scanned.
363 -- For all other tokens, Token_Name is set to Error_Name. Note that it
364 -- would be possible for the caller to extract this information from
365 -- Token_Node. We set Token_Name separately for two reasons. First it
366 -- allows a quicker test for a specific identifier. Second, it allows
367 -- a version of the parser to be built that does not build tree nodes,
368 -- usable as a syntax checker.
370 Prev_Token : Token_Type := No_Token;
371 -- Type of previous token
373 Prev_Token_Ptr : Source_Ptr;
374 -- Pointer to first character of previous token
376 Version_To_Be_Found : Boolean;
377 -- This flag is True if the scanner is still looking for an RCS version
378 -- number in a comment. Normally it is initialized to False so that this
379 -- circuit is not activated. If the -dv switch is set, then this flag is
380 -- initialized to True, and then reset when the version number is found.
381 -- We do things this way to minimize the impact on comment scanning.
383 --------------------------------------------------------
384 -- Procedures for Saving and Restoring the Scan State --
385 --------------------------------------------------------
387 -- The following procedures can be used to save and restore the entire
388 -- scan state. They are used in cases where it is necessary to backup
389 -- the scan during the parse.
391 type Saved_Scan_State is private;
392 -- Used for saving and restoring the scan state
394 procedure Save_Scan_State (Saved_State : out Saved_Scan_State);
395 pragma Inline (Save_Scan_State);
396 -- Saves the current scan state for possible later restoration. Note that
397 -- there is no harm in saving the state and then never restoring it.
399 procedure Restore_Scan_State (Saved_State : in Saved_Scan_State);
400 pragma Inline (Restore_Scan_State);
401 -- Restores a scan state saved by a call to Save_Scan_State.
402 -- The saved scan state must refer to the current source file.
404 private
405 type Saved_Scan_State is record
406 Save_Scan_Ptr : Source_Ptr;
407 Save_Token : Token_Type;
408 Save_Token_Ptr : Source_Ptr;
409 Save_Current_Line_Start : Source_Ptr;
410 Save_Start_Column : Column_Number;
411 Save_Checksum : Word;
412 Save_First_Non_Blank_Location : Source_Ptr;
413 Save_Token_Node : Node_Id;
414 Save_Token_Name : Name_Id;
415 Save_Prev_Token : Token_Type;
416 Save_Prev_Token_Ptr : Source_Ptr;
417 end record;
419 end Scans;