PR rtl-optimization/82913
[official-gcc.git] / gcc / ada / scans.ads
blobfaa06f2087dd4dd89e21f931875e4ea2c25df63a
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S C A N S --
6 -- --
7 -- S p e c --
8 -- --
9 -- Copyright (C) 1992-2017, Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 3, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. --
17 -- --
18 -- As a special exception under Section 7 of GPL version 3, you are granted --
19 -- additional permissions described in the GCC Runtime Library Exception, --
20 -- version 3.1, as published by the Free Software Foundation. --
21 -- --
22 -- You should have received a copy of the GNU General Public License and --
23 -- a copy of the GCC Runtime Library Exception along with this program; --
24 -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
25 -- <http://www.gnu.org/licenses/>. --
26 -- --
27 -- GNAT was originally developed by the GNAT team at New York University. --
28 -- Extensive contributions were provided by Ada Core Technologies Inc. --
29 -- --
30 ------------------------------------------------------------------------------
32 with Namet; use Namet;
33 with Types; use Types;
34 with Uintp; use Uintp;
35 with Urealp; use Urealp;
37 package Scans is
39 -- The scanner maintains a current state in the global variables defined
40 -- in this package. The call to the Scan routine advances this state to
41 -- the next token. The state is initialized by the call to one of the
42 -- initialization routines in Sinput.
44 -- The following type is used to identify token types returned by Scan.
45 -- The class column in this table indicates the token classes which
46 -- apply to the token, as defined by subsequent subtype declarations.
48 type Token_Type is (
50 -- Token name Token type Class(es)
52 Tok_Integer_Literal, -- numeric lit Literal, Lit_Or_Name
54 Tok_Real_Literal, -- numeric lit Literal, Lit_Or_Name
56 Tok_String_Literal, -- string lit Literal. Lit_Or_Name
58 Tok_Char_Literal, -- char lit Name, Literal. Lit_Or_Name
60 Tok_Operator_Symbol, -- op symbol Name, Literal, Lit_Or_Name, Desig
62 Tok_Identifier, -- identifier Name, Lit_Or_Name, Desig
64 Tok_At_Sign, -- @ AI12-0125-3 : target name
66 Tok_Double_Asterisk, -- **
68 Tok_Ampersand, -- & Binary_Addop
69 Tok_Minus, -- - Binary_Addop, Unary_Addop
70 Tok_Plus, -- + Binary_Addop, Unary_Addop
72 Tok_Asterisk, -- * Mulop
73 Tok_Mod, -- MOD Mulop
74 Tok_Rem, -- REM Mulop
75 Tok_Slash, -- / Mulop
77 Tok_New, -- NEW
79 Tok_Abs, -- ABS
80 Tok_Others, -- OTHERS
81 Tok_Null, -- NULL
83 -- Note: Tok_Raise is in no categories now, it used to be Cterm, Eterm,
84 -- After_SM, but now that Ada 2012 has added raise expressions, the
85 -- raise token can appear anywhere. Note in particular that Tok_Raise
86 -- being in Eterm stopped the parser from recognizing "return raise
87 -- exception-name". This degrades error recovery slightly, and perhaps
88 -- we could do better, but not worth the effort.
90 Tok_Raise, -- RAISE
92 Tok_Dot, -- . Namext
93 Tok_Apostrophe, -- ' Namext
95 Tok_Left_Paren, -- ( Namext, Consk
97 Tok_Delta, -- DELTA Atkwd, Sterm, Consk
98 Tok_Digits, -- DIGITS Atkwd, Sterm, Consk
99 Tok_Range, -- RANGE Atkwd, Sterm, Consk
101 Tok_Right_Paren, -- ) Sterm
102 Tok_Comma, -- , Sterm
104 Tok_And, -- AND Logop, Sterm
105 Tok_Or, -- OR Logop, Sterm
106 Tok_Xor, -- XOR Logop, Sterm
108 Tok_Less, -- < Relop, Sterm
109 Tok_Equal, -- = Relop, Sterm
110 Tok_Greater, -- > Relop, Sterm
111 Tok_Not_Equal, -- /= Relop, Sterm
112 Tok_Greater_Equal, -- >= Relop, Sterm
113 Tok_Less_Equal, -- <= Relop, Sterm
115 Tok_In, -- IN Relop, Sterm
116 Tok_Not, -- NOT Relop, Sterm
118 Tok_Box, -- <> Relop, Eterm, Sterm
119 Tok_Colon_Equal, -- := Eterm, Sterm
120 Tok_Colon, -- : Eterm, Sterm
121 Tok_Greater_Greater, -- >> Eterm, Sterm
123 Tok_Abstract, -- ABSTRACT Eterm, Sterm
124 Tok_Access, -- ACCESS Eterm, Sterm
125 Tok_Aliased, -- ALIASED Eterm, Sterm
126 Tok_All, -- ALL Eterm, Sterm
127 Tok_Array, -- ARRAY Eterm, Sterm
128 Tok_At, -- AT Eterm, Sterm
129 Tok_Body, -- BODY Eterm, Sterm
130 Tok_Constant, -- CONSTANT Eterm, Sterm
131 Tok_Do, -- DO Eterm, Sterm
132 Tok_Is, -- IS Eterm, Sterm
133 Tok_Interface, -- INTERFACE Eterm, Sterm
134 Tok_Limited, -- LIMITED Eterm, Sterm
135 Tok_Of, -- OF Eterm, Sterm
136 Tok_Out, -- OUT Eterm, Sterm
137 Tok_Record, -- RECORD Eterm, Sterm
138 Tok_Renames, -- RENAMES Eterm, Sterm
139 Tok_Reverse, -- REVERSE Eterm, Sterm
140 Tok_Some, -- SOME Eterm, Sterm
141 Tok_Tagged, -- TAGGED Eterm, Sterm
142 Tok_Then, -- THEN Eterm, Sterm
144 Tok_Less_Less, -- << Eterm, Sterm, After_SM
146 Tok_Abort, -- ABORT Eterm, Sterm, After_SM
147 Tok_Accept, -- ACCEPT Eterm, Sterm, After_SM
148 Tok_Case, -- CASE Eterm, Sterm, After_SM
149 Tok_Delay, -- DELAY Eterm, Sterm, After_SM
150 Tok_Else, -- ELSE Eterm, Sterm, After_SM
151 Tok_Elsif, -- ELSIF Eterm, Sterm, After_SM
152 Tok_End, -- END Eterm, Sterm, After_SM
153 Tok_Exception, -- EXCEPTION Eterm, Sterm, After_SM
154 Tok_Exit, -- EXIT Eterm, Sterm, After_SM
155 Tok_Goto, -- GOTO Eterm, Sterm, After_SM
156 Tok_If, -- IF Eterm, Sterm, After_SM
157 Tok_Pragma, -- PRAGMA Eterm, Sterm, After_SM
158 Tok_Requeue, -- REQUEUE Eterm, Sterm, After_SM
159 Tok_Return, -- RETURN Eterm, Sterm, After_SM
160 Tok_Select, -- SELECT Eterm, Sterm, After_SM
161 Tok_Terminate, -- TERMINATE Eterm, Sterm, After_SM
162 Tok_Until, -- UNTIL Eterm, Sterm, After_SM
163 Tok_When, -- WHEN Eterm, Sterm, After_SM
165 Tok_Begin, -- BEGIN Eterm, Sterm, After_SM, Labeled_Stmt
166 Tok_Declare, -- DECLARE Eterm, Sterm, After_SM, Labeled_Stmt
167 Tok_For, -- FOR Eterm, Sterm, After_SM, Labeled_Stmt
168 Tok_Loop, -- LOOP Eterm, Sterm, After_SM, Labeled_Stmt
169 Tok_While, -- WHILE Eterm, Sterm, After_SM, Labeled_Stmt
171 Tok_Entry, -- ENTRY Eterm, Sterm, Declk, Deckn, After_SM
172 Tok_Protected, -- PROTECTED Eterm, Sterm, Declk, Deckn, After_SM
173 Tok_Task, -- TASK Eterm, Sterm, Declk, Deckn, After_SM
174 Tok_Type, -- TYPE Eterm, Sterm, Declk, Deckn, After_SM
175 Tok_Subtype, -- SUBTYPE Eterm, Sterm, Declk, Deckn, After_SM
176 Tok_Overriding, -- OVERRIDING Eterm, Sterm, Declk, Declk, After_SM
177 Tok_Synchronized, -- SYNCHRONIZED Eterm, Sterm, Declk, Deckn, After_SM
178 Tok_Use, -- USE Eterm, Sterm, Declk, Deckn, After_SM
180 Tok_Function, -- FUNCTION Eterm, Sterm, Cunit, Declk, After_SM
181 Tok_Generic, -- GENERIC Eterm, Sterm, Cunit, Declk, After_SM
182 Tok_Package, -- PACKAGE Eterm, Sterm, Cunit, Declk, After_SM
183 Tok_Procedure, -- PROCEDURE Eterm, Sterm, Cunit, Declk, After_SM
185 Tok_Private, -- PRIVATE Eterm, Sterm, Cunit, After_SM
186 Tok_With, -- WITH Eterm, Sterm, Cunit, After_SM
187 Tok_Separate, -- SEPARATE Eterm, Sterm, Cunit, After_SM
189 Tok_EOF, -- End of file Eterm, Sterm, Cterm, After_SM
191 Tok_Semicolon, -- ; Eterm, Sterm, Cterm
193 Tok_Arrow, -- => Sterm, Cterm, Chtok
195 Tok_Vertical_Bar, -- | Cterm, Sterm, Chtok
197 Tok_Dot_Dot, -- .. Sterm, Chtok
199 Tok_Project,
200 Tok_Extends,
201 Tok_External,
202 Tok_External_As_List,
203 -- These four entries represent keywords for the project file language
204 -- and can be returned only in the case of scanning project files.
206 Tok_Comment,
207 -- This entry is used when scanning project files (where it represents
208 -- an entire comment), and in preprocessing with the -C switch set
209 -- (where it represents just the "--" of a comment). For the project
210 -- file case, the text of the comment is stored in Comment_Id.
212 Tok_End_Of_Line,
213 -- Represents an end of line. Not used during normal compilation scans
214 -- where end of line is ignored. Active for preprocessor scanning and
215 -- also when scanning project files (where it is needed because of ???)
217 Tok_Special,
218 -- AI12-0125-03 : target name as abbreviation for LHS
220 -- Otherwise used only in preprocessor scanning (to represent one of
221 -- the characters '#', '$', '?', '@', '`', '\', '^', '~', or '_'. The
222 -- character value itself is stored in Scans.Special_Character.
224 Tok_SPARK_Hide,
225 -- HIDE directive in SPARK
227 No_Token);
228 -- No_Token is used for initializing Token values to indicate that
229 -- no value has been set yet.
231 function Keyword_Name (Token : Token_Type) return Name_Id;
232 -- Given a token that is a reserved word, return the corresponding Name_Id
233 -- in lower case. E.g. Keyword_Name (Tok_Begin) = Name_Find ("begin").
234 -- It is an error to pass any other kind of token.
236 -- Note: in the RM, operator symbol is a special case of string literal.
237 -- We distinguish at the lexical level in this compiler, since there are
238 -- many syntactic situations in which only an operator symbol is allowed.
240 -- The following subtype declarations group the token types into classes.
241 -- These are used for class tests in the parser.
243 subtype Token_Class_Numeric_Literal is
244 Token_Type range Tok_Integer_Literal .. Tok_Real_Literal;
245 -- Numeric literal
247 subtype Token_Class_Literal is
248 Token_Type range Tok_Integer_Literal .. Tok_Operator_Symbol;
249 -- Literal
251 subtype Token_Class_Lit_Or_Name is
252 Token_Type range Tok_Integer_Literal .. Tok_Identifier;
254 subtype Token_Class_Binary_Addop is
255 Token_Type range Tok_Ampersand .. Tok_Plus;
256 -- Binary adding operator (& + -)
258 subtype Token_Class_Unary_Addop is
259 Token_Type range Tok_Minus .. Tok_Plus;
260 -- Unary adding operator (+ -)
262 subtype Token_Class_Mulop is
263 Token_Type range Tok_Asterisk .. Tok_Slash;
264 -- Multiplying operator
266 subtype Token_Class_Logop is
267 Token_Type range Tok_And .. Tok_Xor;
268 -- Logical operator (and, or, xor)
270 subtype Token_Class_Relop is
271 Token_Type range Tok_Less .. Tok_Box;
272 -- Relational operator (= /= < <= > >= not, in plus <> to catch misuse
273 -- of Pascal style not equal operator).
275 subtype Token_Class_Name is
276 Token_Type range Tok_Char_Literal .. Tok_At_Sign;
277 -- First token of name (4.1),
278 -- (identifier, char literal, operator symbol)
279 -- Includes '@' after Ada2012 corrigendum.
281 subtype Token_Class_Desig is
282 Token_Type range Tok_Operator_Symbol .. Tok_At_Sign;
283 -- Token which can be a Designator (identifier, operator symbol)
285 subtype Token_Class_Namext is
286 Token_Type range Tok_Dot .. Tok_Left_Paren;
287 -- Name extension tokens. These are tokens which can appear immediately
288 -- after a name to extend it recursively (period, quote, left paren)
290 subtype Token_Class_Consk is
291 Token_Type range Tok_Left_Paren .. Tok_Range;
292 -- Keywords which can start constraint
293 -- (left paren, delta, digits, range)
295 subtype Token_Class_Eterm is
296 Token_Type range Tok_Colon_Equal .. Tok_Semicolon;
297 -- Expression terminators. These tokens can never appear within a simple
298 -- expression. This is used for error recovery purposes (if we encounter
299 -- an error in an expression, we simply scan to the next Eterm token).
301 subtype Token_Class_Sterm is
302 Token_Type range Tok_Delta .. Tok_Dot_Dot;
303 -- Simple_Expression terminators. A Simple_Expression must be followed
304 -- by a token in this class, or an error message is issued complaining
305 -- about a missing binary operator.
307 subtype Token_Class_Atkwd is
308 Token_Type range Tok_Delta .. Tok_Range;
309 -- Attribute keywords. This class includes keywords which can be used
310 -- as an Attribute_Designator, namely DELTA, DIGITS and RANGE
312 subtype Token_Class_Cterm is
313 Token_Type range Tok_EOF .. Tok_Vertical_Bar;
314 -- Choice terminators. These tokens terminate a choice. This is used for
315 -- error recovery purposes (if we encounter an error in a Choice, we
316 -- simply scan to the next Cterm token).
318 subtype Token_Class_Chtok is
319 Token_Type range Tok_Arrow .. Tok_Dot_Dot;
320 -- Choice tokens. These tokens signal a choice when used in an Aggregate
322 subtype Token_Class_Cunit is
323 Token_Type range Tok_Function .. Tok_Separate;
324 -- Tokens which can begin a compilation unit
326 subtype Token_Class_Declk is
327 Token_Type range Tok_Entry .. Tok_Procedure;
328 -- Keywords which start a declaration
330 subtype Token_Class_Deckn is
331 Token_Type range Tok_Entry .. Tok_Use;
332 -- Keywords which start a declaration but can't start a compilation unit
334 subtype Token_Class_After_SM is
335 Token_Type range Tok_Less_Less .. Tok_EOF;
336 -- Tokens which always, or almost always, appear after a semicolon. Used
337 -- in the Resync_Past_Semicolon routine to avoid gobbling up stuff when
338 -- a semicolon is missing. Of significance only for error recovery.
340 subtype Token_Class_Labeled_Stmt is
341 Token_Type range Tok_Begin .. Tok_While;
342 -- Tokens which start labeled statements
344 type Token_Flag_Array is array (Token_Type) of Boolean;
345 Is_Reserved_Keyword : constant Token_Flag_Array :=
346 Token_Flag_Array'
347 (Tok_Mod .. Tok_Rem => True,
348 Tok_New .. Tok_Null => True,
349 Tok_Delta .. Tok_Range => True,
350 Tok_And .. Tok_Xor => True,
351 Tok_In .. Tok_Not => True,
352 Tok_Abstract .. Tok_Then => True,
353 Tok_Abort .. Tok_Separate => True,
354 others => False);
355 -- Flag array used to test for reserved word
357 procedure Initialize_Ada_Keywords;
358 -- Set up Token_Type values in Names table entries for Ada reserved
359 -- words. This ignores Ada_Version; Ada_Version is taken into account in
360 -- Snames.Is_Keyword_Name.
362 --------------------------
363 -- Scan State Variables --
364 --------------------------
366 -- Note: these variables can only be referenced during the parsing of a
367 -- file. Reference to any of them from Sem or the expander is wrong.
369 -- These variables are initialized as required by Scn.Initialize_Scanner,
370 -- and should not be referenced before such a call. However, there are
371 -- situations in which these variables are saved and restored, and this
372 -- may happen before the first Initialize_Scanner call, resulting in the
373 -- assignment of invalid values. To avoid this, and allow building with
374 -- the -gnatVa switch, we initialize some variables to known valid values.
376 Scan_Ptr : Source_Ptr := No_Location; -- init for -gnatVa
377 -- Current scan pointer location. After a call to Scan, this points
378 -- just past the end of the token just scanned.
380 Token : Token_Type := No_Token; -- init for -gnatVa
381 -- Type of current token
383 Token_Ptr : Source_Ptr := No_Location; -- init for -gnatVa
384 -- Pointer to first character of current token
386 Current_Line_Start : Source_Ptr := No_Location; -- init for -gnatVa
387 -- Pointer to first character of line containing current token
389 Start_Column : Column_Number := No_Column_Number; -- init for -gnatVa
390 -- Starting column number (zero origin) of the first non-blank character
391 -- on the line containing the current token. This is used for error
392 -- recovery circuits which depend on looking at the column line up.
394 Type_Token_Location : Source_Ptr := No_Location; -- init for -gnatVa
395 -- Within a type declaration, gives the location of the TYPE keyword that
396 -- opened the type declaration. Used in checking the end column of a record
397 -- declaration, which can line up either with the TYPE keyword, or with the
398 -- start of the line containing the RECORD keyword.
400 Checksum : Word := 0; -- init for -gnatVa
401 -- Used to accumulate a CRC representing the tokens in the source
402 -- file being compiled. This CRC includes only program tokens, and
403 -- excludes comments.
405 Limited_Checksum : Word := 0;
406 -- Used to accumulate a CRC representing significant tokens in the
407 -- limited view of a package, i.e. visible type names and related
408 -- tagged indicators.
410 First_Non_Blank_Location : Source_Ptr := No_Location; -- init for -gnatVa
411 -- Location of first non-blank character on the line containing the
412 -- current token (i.e. the location of the character whose column number
413 -- is stored in Start_Column).
415 Token_Node : Node_Id := Empty;
416 -- Node table Id for the current token. This is set only if the current
417 -- token is one for which the scanner constructs a node (i.e. it is an
418 -- identifier, operator symbol, or literal). For other token types,
419 -- Token_Node is undefined.
421 Token_Name : Name_Id := No_Name;
422 -- For identifiers, this is set to the Name_Id of the identifier scanned.
423 -- For all other tokens, Token_Name is set to Error_Name. Note that it
424 -- would be possible for the caller to extract this information from
425 -- Token_Node. We set Token_Name separately for two reasons. First it
426 -- allows a quicker test for a specific identifier. Second, it allows
427 -- a version of the parser to be built that does not build tree nodes,
428 -- usable as a syntax checker.
430 Prev_Token : Token_Type := No_Token;
431 -- Type of previous token
433 Prev_Token_Ptr : Source_Ptr;
434 -- Pointer to first character of previous token
436 Version_To_Be_Found : Boolean;
437 -- This flag is True if the scanner is still looking for an RCS version
438 -- number in a comment. Normally it is initialized to False so that this
439 -- circuit is not activated. If the -dv switch is set, then this flag is
440 -- initialized to True, and then reset when the version number is found.
441 -- We do things this way to minimize the impact on comment scanning.
443 Character_Code : Char_Code;
444 -- Valid only when Token is Tok_Char_Literal. Contains the value of the
445 -- scanned literal.
447 Real_Literal_Value : Ureal;
448 -- Valid only when Token is Tok_Real_Literal, contains the value of the
449 -- scanned literal.
451 Int_Literal_Value : Uint;
452 -- Valid only when Token = Tok_Integer_Literal, contains the value of the
453 -- scanned literal.
455 Based_Literal_Uses_Colon : Boolean;
456 -- Valid only when Token = Tok_Integer_Literal or Tok_Real_Literal. Set
457 -- True only for the case of a based literal using ':' instead of '#'.
459 String_Literal_Id : String_Id;
460 -- Valid only when Token = Tok_String_Literal or Tok_Operator_Symbol.
461 -- Contains the Id for currently scanned string value.
463 Wide_Character_Found : Boolean := False;
464 -- Valid only when Token = Tok_String_Literal. Set True if wide character
465 -- found (i.e. a character that does not fit in Character, but fits in
466 -- Wide_Wide_Character).
468 Wide_Wide_Character_Found : Boolean := False;
469 -- Valid only when Token = Tok_String_Literal. Set True if wide wide
470 -- character found (i.e. a character that does not fit in Character or
471 -- Wide_Character).
473 Special_Character : Character;
474 -- AI12-0125-03 : '@' as target name is handled elsewhere.
475 -- Valid only when Token = Tok_Special. Returns one of the characters
476 -- '#', '$', '?', '`', '\', '^', '~', or '_'.
478 -- Why only this set? What about wide characters???
480 Comment_Id : Name_Id := No_Name;
481 -- Valid only when Token = Tok_Comment. Store the string that follows
482 -- the "--" of a comment when scanning project files.
484 -- Is it really right for this to be a Name rather than a String, what
485 -- about the case of Wide_Wide_Characters???
487 Inside_Depends : Boolean := False;
488 -- True while parsing the argument of a Depends or Refined_Depends pragma
489 -- or aspect. Used to allow/require nonstandard style rules for =>+ with
490 -- -gnatyt.
492 Inside_If_Expression : Nat := 0;
493 -- This is a counter that is set non-zero while scanning out an if
494 -- expression (incremented on entry, decremented on exit). It is used to
495 -- disconnect format checks that normally apply to keywords THEN, ELSE etc.
497 Inside_Pragma : Boolean := False;
498 -- True within a pragma. Used to avoid complaining about reserved words
499 -- within pragmas (see Scan_Reserved_Identifier).
501 --------------------------------------------------------
502 -- Procedures for Saving and Restoring the Scan State --
503 --------------------------------------------------------
505 -- The following procedures can be used to save and restore the entire
506 -- scan state. They are used in cases where it is necessary to backup
507 -- the scan during the parse.
509 type Saved_Scan_State is private;
510 -- Used for saving and restoring the scan state
512 procedure Save_Scan_State (Saved_State : out Saved_Scan_State);
513 pragma Inline (Save_Scan_State);
514 -- Saves the current scan state for possible later restoration. Note that
515 -- there is no harm in saving the state and then never restoring it.
517 procedure Restore_Scan_State (Saved_State : Saved_Scan_State);
518 pragma Inline (Restore_Scan_State);
519 -- Restores a scan state saved by a call to Save_Scan_State.
520 -- The saved scan state must refer to the current source file.
522 private
523 type Saved_Scan_State is record
524 Save_Scan_Ptr : Source_Ptr;
525 Save_Token : Token_Type;
526 Save_Token_Ptr : Source_Ptr;
527 Save_Current_Line_Start : Source_Ptr;
528 Save_Start_Column : Column_Number;
529 Save_Checksum : Word;
530 Save_First_Non_Blank_Location : Source_Ptr;
531 Save_Token_Node : Node_Id;
532 Save_Token_Name : Name_Id;
533 Save_Prev_Token : Token_Type;
534 Save_Prev_Token_Ptr : Source_Ptr;
535 end record;
537 end Scans;