1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
10 -- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
12 -- GNAT is free software; you can redistribute it and/or modify it under --
13 -- terms of the GNU General Public License as published by the Free Soft- --
14 -- ware Foundation; either version 2, or (at your option) any later ver- --
15 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
16 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
17 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
18 -- for more details. You should have received a copy of the GNU General --
19 -- Public License distributed with GNAT; see file COPYING. If not, write --
20 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
21 -- MA 02111-1307, USA. --
23 -- As a special exception, if other files instantiate generics from this --
24 -- unit, or you link this unit with other files to produce an executable, --
25 -- this unit does not by itself cause the resulting executable to be --
26 -- covered by the GNU General Public License. This exception does not --
27 -- however invalidate any other reasons why the executable file might be --
28 -- covered by the GNU Public License. --
30 -- GNAT was originally developed by the GNAT team at New York University. --
31 -- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
33 ------------------------------------------------------------------------------
35 with Types
; use Types
;
39 -- The scanner maintains a current state in the global variables defined
40 -- in this package. The call to the Scan routine advances this state to
41 -- the next token. The state is initialized by the call to one of the
42 -- initialization routines in Sinput.
44 -- The following type is used to identify token types returned by Scan.
45 -- The class column in this table indicates the token classes which
46 -- apply to the token, as defined by subsquent subtype declarations.
48 -- Note: the coding in SCN depends on the fact that the first entry in
49 -- this type declaration is *not* for a reserved word. For details on
50 -- why there is this requirement, see Scn.Initialize_Scanner.
54 -- Token name Token type Class(es)
56 Tok_Integer_Literal
, -- numeric lit Literal, Lit_Or_Name
58 Tok_Real_Literal
, -- numeric lit Literal, Lit_Or_Name
60 Tok_String_Literal
, -- string lit Literal. Lit_Or_Name
62 Tok_Char_Literal
, -- char lit Name, Literal. Lit_Or_Name
64 Tok_Operator_Symbol
, -- op symbol Name, Literal, Lit_Or_Name, Desig
66 Tok_Identifier
, -- identifier Name, Lit_Or_Name, Desig
68 Tok_Double_Asterisk
, -- **
70 Tok_Ampersand
, -- & Binary_Addop
71 Tok_Minus
, -- - Binary_Addop, Unary_Addop
72 Tok_Plus
, -- + Binary_Addop, Unary_Addop
74 Tok_Asterisk
, -- * Mulop
86 Tok_Apostrophe
, -- ' Namext
88 Tok_Left_Paren
, -- ( Namext, Consk
90 Tok_Delta
, -- DELTA Atkwd, Sterm, Consk
91 Tok_Digits
, -- DIGITS Atkwd, Sterm, Consk
92 Tok_Range
, -- RANGE Atkwd, Sterm, Consk
94 Tok_Right_Paren
, -- ) Sterm
97 Tok_And
, -- AND Logop, Sterm
98 Tok_Or
, -- OR Logop, Sterm
99 Tok_Xor
, -- XOR Logop, Sterm
101 Tok_Less
, -- < Relop, Sterm
102 Tok_Equal
, -- = Relop, Sterm
103 Tok_Greater
, -- > Relop, Sterm
104 Tok_Not_Equal
, -- /= Relop, Sterm
105 Tok_Greater_Equal
, -- >= Relop, Sterm
106 Tok_Less_Equal
, -- <= Relop, Sterm
108 Tok_In
, -- IN Relop, Sterm
109 Tok_Not
, -- NOT Relop, Sterm
111 Tok_Box
, -- <> Relop, Eterm, Sterm
112 Tok_Colon_Equal
, -- := Eterm, Sterm
113 Tok_Colon
, -- : Eterm, Sterm
114 Tok_Greater_Greater
, -- >> Eterm, Sterm
116 Tok_Abstract
, -- ABSTRACT Eterm, Sterm
117 Tok_Access
, -- ACCESS Eterm, Sterm
118 Tok_Aliased
, -- ALIASED Eterm, Sterm
119 Tok_All
, -- ALL Eterm, Sterm
120 Tok_Array
, -- ARRAY Eterm, Sterm
121 Tok_At
, -- AT Eterm, Sterm
122 Tok_Body
, -- BODY Eterm, Sterm
123 Tok_Constant
, -- CONSTANT Eterm, Sterm
124 Tok_Do
, -- DO Eterm, Sterm
125 Tok_Is
, -- IS Eterm, Sterm
126 Tok_Limited
, -- LIMITED Eterm, Sterm
127 Tok_Of
, -- OF Eterm, Sterm
128 Tok_Out
, -- OUT Eterm, Sterm
129 Tok_Record
, -- RECORD Eterm, Sterm
130 Tok_Renames
, -- RENAMES Eterm, Sterm
131 Tok_Reverse
, -- REVERSE Eterm, Sterm
132 Tok_Tagged
, -- TAGGED Eterm, Sterm
133 Tok_Then
, -- THEN Eterm, Sterm
135 Tok_Less_Less
, -- << Eterm, Sterm, After_SM
137 Tok_Abort
, -- ABORT Eterm, Sterm, After_SM
138 Tok_Accept
, -- ACCEPT Eterm, Sterm, After_SM
139 Tok_Case
, -- CASE Eterm, Sterm, After_SM
140 Tok_Delay
, -- DELAY Eterm, Sterm, After_SM
141 Tok_Else
, -- ELSE Eterm, Sterm, After_SM
142 Tok_Elsif
, -- ELSIF Eterm, Sterm, After_SM
143 Tok_End
, -- END Eterm, Sterm, After_SM
144 Tok_Exception
, -- EXCEPTION Eterm, Sterm, After_SM
145 Tok_Exit
, -- EXIT Eterm, Sterm, After_SM
146 Tok_Goto
, -- GOTO Eterm, Sterm, After_SM
147 Tok_If
, -- IF Eterm, Sterm, After_SM
148 Tok_Pragma
, -- PRAGMA Eterm, Sterm, After_SM
149 Tok_Raise
, -- RAISE Eterm, Sterm, After_SM
150 Tok_Requeue
, -- REQUEUE Eterm, Sterm, After_SM
151 Tok_Return
, -- RETURN Eterm, Sterm, After_SM
152 Tok_Select
, -- SELECT Eterm, Sterm, After_SM
153 Tok_Terminate
, -- TERMINATE Eterm, Sterm, After_SM
154 Tok_Until
, -- UNTIL Eterm, Sterm, After_SM
155 Tok_When
, -- WHEN Eterm, Sterm, After_SM
157 Tok_Begin
, -- BEGIN Eterm, Sterm, After_SM, Labeled_Stmt
158 Tok_Declare
, -- DECLARE Eterm, Sterm, After_SM, Labeled_Stmt
159 Tok_For
, -- FOR Eterm, Sterm, After_SM, Labeled_Stmt
160 Tok_Loop
, -- LOOP Eterm, Sterm, After_SM, Labeled_Stmt
161 Tok_While
, -- WHILE Eterm, Sterm, After_SM, Labeled_Stmt
163 Tok_Entry
, -- ENTRY Eterm, Sterm, Declk, Deckn, After_SM
164 Tok_Protected
, -- PROTECTED Eterm, Sterm, Declk, Deckn, After_SM
165 Tok_Task
, -- TASK Eterm, Sterm, Declk, Deckn, After_SM
166 Tok_Type
, -- TYPE Eterm, Sterm, Declk, Deckn, After_SM
167 Tok_Subtype
, -- SUBTYPE Eterm, Sterm, Declk, Deckn, After_SM
168 Tok_Use
, -- USE Eterm, Sterm, Declk, Deckn, After_SM
170 Tok_Function
, -- FUNCTION Eterm, Sterm, Cunit, Declk, After_SM
171 Tok_Generic
, -- GENERIC Eterm, Sterm, Cunit, Declk, After_SM
172 Tok_Package
, -- PACKAGE Eterm, Sterm, Cunit, Declk, After_SM
173 Tok_Procedure
, -- PROCEDURE Eterm, Sterm, Cunit, Declk, After_SM
175 Tok_Private
, -- PRIVATE Eterm, Sterm, Cunit, After_SM
176 Tok_With
, -- WITH Eterm, Sterm, Cunit, After_SM
177 Tok_Separate
, -- SEPARATE Eterm, Sterm, Cunit, After_SM
179 Tok_EOF
, -- End of file Eterm, Sterm, Cterm, After_SM
181 Tok_Semicolon
, -- ; Eterm, Sterm, Cterm
183 Tok_Arrow
, -- => Sterm, Cterm, Chtok
185 Tok_Vertical_Bar
, -- | Cterm, Sterm, Chtok
187 Tok_Dot_Dot
, -- .. Sterm, Chtok
189 -- The following three entries are used only when scanning
197 -- No_Token is used for initializing Token values to indicate that
198 -- no value has been set yet.
200 -- Note: in the RM, operator symbol is a special case of string literal.
201 -- We distinguish at the lexical level in this compiler, since there are
202 -- many syntactic situations in which only an operator symbol is allowed.
204 -- The following subtype declarations group the token types into classes.
205 -- These are used for class tests in the parser.
207 subtype Token_Class_Numeric_Literal
is
208 Token_Type
range Tok_Integer_Literal
.. Tok_Real_Literal
;
211 subtype Token_Class_Literal
is
212 Token_Type
range Tok_Integer_Literal
.. Tok_Operator_Symbol
;
215 subtype Token_Class_Lit_Or_Name
is
216 Token_Type
range Tok_Integer_Literal
.. Tok_Identifier
;
218 subtype Token_Class_Binary_Addop
is
219 Token_Type
range Tok_Ampersand
.. Tok_Plus
;
220 -- Binary adding operator (& + -)
222 subtype Token_Class_Unary_Addop
is
223 Token_Type
range Tok_Minus
.. Tok_Plus
;
224 -- Unary adding operator (+ -)
226 subtype Token_Class_Mulop
is
227 Token_Type
range Tok_Asterisk
.. Tok_Slash
;
228 -- Multiplying operator
230 subtype Token_Class_Logop
is
231 Token_Type
range Tok_And
.. Tok_Xor
;
232 -- Logical operator (and, or, xor)
234 subtype Token_Class_Relop
is
235 Token_Type
range Tok_Less
.. Tok_Box
;
236 -- Relational operator (= /= < <= > >= not, in plus <> to catch misuse
237 -- of Pascal style not equal operator).
239 subtype Token_Class_Name
is
240 Token_Type
range Tok_Char_Literal
.. Tok_Identifier
;
241 -- First token of name (4.1),
242 -- (identifier, char literal, operator symbol)
244 subtype Token_Class_Desig
is
245 Token_Type
range Tok_Operator_Symbol
.. Tok_Identifier
;
246 -- Token which can be a Designator (identifier, operator symbol)
248 subtype Token_Class_Namext
is
249 Token_Type
range Tok_Dot
.. Tok_Left_Paren
;
250 -- Name extension tokens. These are tokens which can appear immediately
251 -- after a name to extend it recursively (period, quote, left paren)
253 subtype Token_Class_Consk
is
254 Token_Type
range Tok_Left_Paren
.. Tok_Range
;
255 -- Keywords which can start constraint
256 -- (left paren, delta, digits, range)
258 subtype Token_Class_Eterm
is
259 Token_Type
range Tok_Colon_Equal
.. Tok_Semicolon
;
260 -- Expression terminators. These tokens can never appear within a simple
261 -- expression. This is used for error recovery purposes (if we encounter
262 -- an error in an expression, we simply scan to the next Eterm token).
264 subtype Token_Class_Sterm
is
265 Token_Type
range Tok_Delta
.. Tok_Dot_Dot
;
266 -- Simple_Expression terminators. A Simple_Expression must be followed
267 -- by a token in this class, or an error message is issued complaining
268 -- about a missing binary operator.
270 subtype Token_Class_Atkwd
is
271 Token_Type
range Tok_Delta
.. Tok_Range
;
272 -- Attribute keywords. This class includes keywords which can be used
273 -- as an Attribute_Designator, namely DELTA, DIGITS and RANGE
275 subtype Token_Class_Cterm
is
276 Token_Type
range Tok_EOF
.. Tok_Vertical_Bar
;
277 -- Choice terminators. These tokens terminate a choice. This is used for
278 -- error recovery purposes (if we encounter an error in a Choice, we
279 -- simply scan to the next Cterm token).
281 subtype Token_Class_Chtok
is
282 Token_Type
range Tok_Arrow
.. Tok_Dot_Dot
;
283 -- Choice tokens. These tokens signal a choice when used in an Aggregate
285 subtype Token_Class_Cunit
is
286 Token_Type
range Tok_Function
.. Tok_Separate
;
287 -- Tokens which can begin a compilation unit
289 subtype Token_Class_Declk
is
290 Token_Type
range Tok_Entry
.. Tok_Procedure
;
291 -- Keywords which start a declaration
293 subtype Token_Class_Deckn
is
294 Token_Type
range Tok_Entry
.. Tok_Use
;
295 -- Keywords which start a declaration but can't start a compilation unit
297 subtype Token_Class_After_SM
is
298 Token_Type
range Tok_Less_Less
.. Tok_EOF
;
299 -- Tokens which always, or almost always, appear after a semicolon. Used
300 -- in the Resync_Past_Semicolon routine to avoid gobbling up stuff when
301 -- a semicolon is missing. Of significance only for error recovery.
303 subtype Token_Class_Labeled_Stmt
is
304 Token_Type
range Tok_Begin
.. Tok_While
;
305 -- Tokens which start labeled statements
307 type Token_Flag_Array
is array (Token_Type
) of Boolean;
308 Is_Reserved_Keyword
: constant Token_Flag_Array
:= Token_Flag_Array
'(
309 Tok_Mod .. Tok_Rem => True,
310 Tok_New .. Tok_Null => True,
311 Tok_Delta .. Tok_Range => True,
312 Tok_And .. Tok_Xor => True,
313 Tok_In .. Tok_Not => True,
314 Tok_Abstract .. Tok_Then => True,
315 Tok_Abort .. Tok_Separate => True,
317 -- Flag array used to test for reserved word
319 --------------------------
320 -- Scan State Variables --
321 --------------------------
323 -- Note: these variables can only be referenced during the parsing of a
324 -- file. Reference to any of them from Sem or the expander is wrong.
326 Scan_Ptr : Source_Ptr;
327 -- Current scan pointer location. After a call to Scan, this points
328 -- just past the end of the token just scanned.
331 -- Type of current token
333 Token_Ptr : Source_Ptr;
334 -- Pointer to first character of current token
336 Current_Line_Start : Source_Ptr;
337 -- Pointer to first character of line containing current token
339 Start_Column : Column_Number;
340 -- Starting column number (zero origin) of the first non-blank character
341 -- on the line containing the current token. This is used for error
342 -- recovery circuits which depend on looking at the column line up.
345 -- Used to accumulate a CRC representing the tokens in the source
346 -- file being compiled. This CRC includes only program tokens, and
347 -- excludes comments.
349 First_Non_Blank_Location : Source_Ptr;
350 -- Location of first non-blank character on the line containing the
351 -- current token (i.e. the location of the character whose column number
352 -- is stored in Start_Column).
354 Token_Node : Node_Id := Empty;
355 -- Node table Id for the current token. This is set only if the current
356 -- token is one for which the scanner constructs a node (i.e. it is an
357 -- identifier, operator symbol, or literal. For other token types,
358 -- Token_Node is undefined.
360 Token_Name : Name_Id := No_Name;
361 -- For identifiers, this is set to the Name_Id of the identifier scanned.
362 -- For all other tokens, Token_Name is set to Error_Name. Note that it
363 -- would be possible for the caller to extract this information from
364 -- Token_Node. We set Token_Name separately for two reasons. First it
365 -- allows a quicker test for a specific identifier. Second, it allows
366 -- a version of the parser to be built that does not build tree nodes,
367 -- usable as a syntax checker.
369 Prev_Token : Token_Type := No_Token;
370 -- Type of previous token
372 Prev_Token_Ptr : Source_Ptr;
373 -- Pointer to first character of previous token
375 Version_To_Be_Found : Boolean;
376 -- This flag is True if the scanner is still looking for an RCS version
377 -- number in a comment. Normally it is initialized to False so that this
378 -- circuit is not activated. If the -dv switch is set, then this flag is
379 -- initialized to True, and then reset when the version number is found.
380 -- We do things this way to minimize the impact on comment scanning.
382 --------------------------------------------------------
383 -- Procedures for Saving and Restoring the Scan State --
384 --------------------------------------------------------
386 -- The following procedures can be used to save and restore the entire
387 -- scan state. They are used in cases where it is necessary to backup
388 -- the scan during the parse.
390 type Saved_Scan_State is private;
391 -- Used for saving and restoring the scan state
393 procedure Save_Scan_State (Saved_State : out Saved_Scan_State);
394 pragma Inline (Save_Scan_State);
395 -- Saves the current scan state for possible later restoration. Note that
396 -- there is no harm in saving the state and then never restoring it.
398 procedure Restore_Scan_State (Saved_State : in Saved_Scan_State);
399 pragma Inline (Restore_Scan_State);
400 -- Restores a scan state saved by a call to Save_Scan_State.
401 -- The saved scan state must refer to the current source file.
404 type Saved_Scan_State is record
405 Save_Scan_Ptr : Source_Ptr;
406 Save_Token : Token_Type;
407 Save_Token_Ptr : Source_Ptr;
408 Save_Current_Line_Start : Source_Ptr;
409 Save_Start_Column : Column_Number;
410 Save_Checksum : Word;
411 Save_First_Non_Blank_Location : Source_Ptr;
412 Save_Token_Node : Node_Id;
413 Save_Token_Name : Name_Id;
414 Save_Prev_Token : Token_Type;
415 Save_Prev_Token_Ptr : Source_Ptr;