gcc/ada/scans.ads

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                S C A N S                                 --
   6 --                                                                          --
   7 --                                 S p e c                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2009, Free Software Foundation, Inc.         --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
  17 --                                                                          --
  18 -- As a special exception under Section 7 of GPL version 3, you are granted --
  19 -- additional permissions described in the GCC Runtime Library Exception,   --
  20 -- version 3.1, as published by the Free Software Foundation.               --
  21 --                                                                          --
  22 -- You should have received a copy of the GNU General Public License and    --
  23 -- a copy of the GCC Runtime Library Exception along with this program;     --
  24 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
  25 -- <http://www.gnu.org/licenses/>.                                          --
  26 --                                                                          --
  27 -- GNAT was originally developed  by the GNAT team at  New York University. --
  28 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  29 --                                                                          --
  30 ------------------------------------------------------------------------------
  31
  32 with Namet;  use Namet;
  33 with Types;  use Types;
  34 with Uintp;  use Uintp;
  35 with Urealp; use Urealp;
  36
  37 package Scans is
  38
  39 --  The scanner maintains a current state in the global variables defined
  40 --  in this package. The call to the Scan routine advances this state to
  41 --  the next token. The state is initialized by the call to one of the
  42 --  initialization routines in Sinput.
  43
  44    --  The following type is used to identify token types returned by Scan.
  45    --  The class column in this table indicates the token classes which
  46    --  apply to the token, as defined by subsequent subtype declarations.
  47
  48    --  Note: Namet.Is_Keyword_Name depends on the fact that the first entry in
  49    --  this type declaration is *not* for a reserved word. For details on why
  50    --  there is this requirement, see Scans.Initialize_Ada_Keywords.
  51
  52    type Token_Type is (
  53
  54       --  Token name          Token type   Class(es)
  55
  56       Tok_Integer_Literal, -- numeric lit  Literal, Lit_Or_Name
  57
  58       Tok_Real_Literal,    -- numeric lit  Literal, Lit_Or_Name
  59
  60       Tok_String_Literal,  -- string lit   Literal. Lit_Or_Name
  61
  62       Tok_Char_Literal,    -- char lit     Name, Literal. Lit_Or_Name
  63
  64       Tok_Operator_Symbol, -- op symbol    Name, Literal, Lit_Or_Name, Desig
  65
  66       Tok_Identifier,      -- identifier   Name, Lit_Or_Name, Desig
  67
  68       Tok_Double_Asterisk, -- **
  69
  70       Tok_Ampersand,       -- &            Binary_Addop
  71       Tok_Minus,           -- -            Binary_Addop, Unary_Addop
  72       Tok_Plus,            -- +            Binary_Addop, Unary_Addop
  73
  74       Tok_Asterisk,        -- *            Mulop
  75       Tok_Mod,             -- MOD          Mulop
  76       Tok_Rem,             -- REM          Mulop
  77       Tok_Slash,           -- /            Mulop
  78
  79       Tok_New,             -- NEW
  80
  81       Tok_Abs,             -- ABS
  82       Tok_Others,          -- OTHERS
  83       Tok_Null,            -- NULL
  84
  85       Tok_Dot,             -- .            Namext
  86       Tok_Apostrophe,      -- '            Namext
  87
  88       Tok_Left_Paren,      -- (            Namext, Consk
  89
  90       Tok_Delta,           -- DELTA        Atkwd, Sterm, Consk
  91       Tok_Digits,          -- DIGITS       Atkwd, Sterm, Consk
  92       Tok_Range,           -- RANGE        Atkwd, Sterm, Consk
  93
  94       Tok_Right_Paren,     -- )            Sterm
  95       Tok_Comma,           -- ,            Sterm
  96
  97       Tok_And,             -- AND          Logop, Sterm
  98       Tok_Or,              -- OR           Logop, Sterm
  99       Tok_Xor,             -- XOR          Logop, Sterm
 100
 101       Tok_Less,            -- <            Relop, Sterm
 102       Tok_Equal,           -- =            Relop, Sterm
 103       Tok_Greater,         -- >            Relop, Sterm
 104       Tok_Not_Equal,       -- /=           Relop, Sterm
 105       Tok_Greater_Equal,   -- >=           Relop, Sterm
 106       Tok_Less_Equal,      -- <=           Relop, Sterm
 107
 108       Tok_In,              -- IN           Relop, Sterm
 109       Tok_Not,             -- NOT          Relop, Sterm
 110
 111       Tok_Box,             -- <>           Relop, Eterm, Sterm
 112       Tok_Colon_Equal,     -- :=           Eterm, Sterm
 113       Tok_Colon,           -- :            Eterm, Sterm
 114       Tok_Greater_Greater, -- >>           Eterm, Sterm
 115
 116       Tok_Abstract,        -- ABSTRACT     Eterm, Sterm
 117       Tok_Access,          -- ACCESS       Eterm, Sterm
 118       Tok_Aliased,         -- ALIASED      Eterm, Sterm
 119       Tok_All,             -- ALL          Eterm, Sterm
 120       Tok_Array,           -- ARRAY        Eterm, Sterm
 121       Tok_At,              -- AT           Eterm, Sterm
 122       Tok_Body,            -- BODY         Eterm, Sterm
 123       Tok_Constant,        -- CONSTANT     Eterm, Sterm
 124       Tok_Do,              -- DO           Eterm, Sterm
 125       Tok_Is,              -- IS           Eterm, Sterm
 126       Tok_Interface,       -- INTERFACE    Eterm, Sterm
 127       Tok_Limited,         -- LIMITED      Eterm, Sterm
 128       Tok_Of,              -- OF           Eterm, Sterm
 129       Tok_Out,             -- OUT          Eterm, Sterm
 130       Tok_Record,          -- RECORD       Eterm, Sterm
 131       Tok_Renames,         -- RENAMES      Eterm, Sterm
 132       Tok_Reverse,         -- REVERSE      Eterm, Sterm
 133       Tok_Tagged,          -- TAGGED       Eterm, Sterm
 134       Tok_Then,            -- THEN         Eterm, Sterm
 135
 136       Tok_Less_Less,       -- <<           Eterm, Sterm, After_SM
 137
 138       Tok_Abort,           -- ABORT        Eterm, Sterm, After_SM
 139       Tok_Accept,          -- ACCEPT       Eterm, Sterm, After_SM
 140       Tok_Case,            -- CASE         Eterm, Sterm, After_SM
 141       Tok_Delay,           -- DELAY        Eterm, Sterm, After_SM
 142       Tok_Else,            -- ELSE         Eterm, Sterm, After_SM
 143       Tok_Elsif,           -- ELSIF        Eterm, Sterm, After_SM
 144       Tok_End,             -- END          Eterm, Sterm, After_SM
 145       Tok_Exception,       -- EXCEPTION    Eterm, Sterm, After_SM
 146       Tok_Exit,            -- EXIT         Eterm, Sterm, After_SM
 147       Tok_Goto,            -- GOTO         Eterm, Sterm, After_SM
 148       Tok_If,              -- IF           Eterm, Sterm, After_SM
 149       Tok_Pragma,          -- PRAGMA       Eterm, Sterm, After_SM
 150       Tok_Raise,           -- RAISE        Eterm, Sterm, After_SM
 151       Tok_Requeue,         -- REQUEUE      Eterm, Sterm, After_SM
 152       Tok_Return,          -- RETURN       Eterm, Sterm, After_SM
 153       Tok_Select,          -- SELECT       Eterm, Sterm, After_SM
 154       Tok_Terminate,       -- TERMINATE    Eterm, Sterm, After_SM
 155       Tok_Until,           -- UNTIL        Eterm, Sterm, After_SM
 156       Tok_When,            -- WHEN         Eterm, Sterm, After_SM
 157
 158       Tok_Begin,           -- BEGIN        Eterm, Sterm, After_SM, Labeled_Stmt
 159       Tok_Declare,         -- DECLARE      Eterm, Sterm, After_SM, Labeled_Stmt
 160       Tok_For,             -- FOR          Eterm, Sterm, After_SM, Labeled_Stmt
 161       Tok_Loop,            -- LOOP         Eterm, Sterm, After_SM, Labeled_Stmt
 162       Tok_While,           -- WHILE        Eterm, Sterm, After_SM, Labeled_Stmt
 163
 164       Tok_Entry,           -- ENTRY        Eterm, Sterm, Declk, Deckn, After_SM
 165       Tok_Protected,       -- PROTECTED    Eterm, Sterm, Declk, Deckn, After_SM
 166       Tok_Task,            -- TASK         Eterm, Sterm, Declk, Deckn, After_SM
 167       Tok_Type,            -- TYPE         Eterm, Sterm, Declk, Deckn, After_SM
 168       Tok_Subtype,         -- SUBTYPE      Eterm, Sterm, Declk, Deckn, After_SM
 169       Tok_Overriding,      -- OVERRIDING   Eterm, Sterm, Declk, Declk, After_SM
 170       Tok_Synchronized,    -- SYNCHRONIZED Eterm, Sterm, Declk, Deckn, After_SM
 171       Tok_Use,             -- USE          Eterm, Sterm, Declk, Deckn, After_SM
 172
 173       Tok_Function,        -- FUNCTION     Eterm, Sterm, Cunit, Declk, After_SM
 174       Tok_Generic,         -- GENERIC      Eterm, Sterm, Cunit, Declk, After_SM
 175       Tok_Package,         -- PACKAGE      Eterm, Sterm, Cunit, Declk, After_SM
 176       Tok_Procedure,       -- PROCEDURE    Eterm, Sterm, Cunit, Declk, After_SM
 177
 178       Tok_Private,         -- PRIVATE      Eterm, Sterm, Cunit, After_SM
 179       Tok_With,            -- WITH         Eterm, Sterm, Cunit, After_SM
 180       Tok_Separate,        -- SEPARATE     Eterm, Sterm, Cunit, After_SM
 181
 182       Tok_EOF,             -- End of file  Eterm, Sterm, Cterm, After_SM
 183
 184       Tok_Semicolon,       -- ;            Eterm, Sterm, Cterm
 185
 186       Tok_Arrow,           -- =>           Sterm, Cterm, Chtok
 187
 188       Tok_Vertical_Bar,    -- |            Cterm, Sterm, Chtok
 189
 190       Tok_Dot_Dot,         -- ..           Sterm, Chtok
 191
 192       Tok_Project,
 193       Tok_Extends,
 194       Tok_External,
 195       --  These three entries represent keywords for the project file language
 196       --  and can be returned only in the case of scanning project files.
 197
 198       Tok_Comment,
 199       --  This entry is used when scanning project files (where it represents
 200       --  an entire comment), and in preprocessing with the -C switch set
 201       --  (where it represents just the "--" of a comment). For the project
 202       --  file case, the text of the comment is stored in
 203
 204       Tok_End_Of_Line,
 205       --  Represents an end of line. Not used during normal compilation scans
 206       --  where end of line is ignored. Active for preprocessor scanning and
 207       --  also when scanning project files (where it is needed because of ???)
 208
 209       Tok_Special,
 210       --  Used only in preprocessor scanning (to represent one of the
 211       --  characters '#', '$', '?', '@', '`', '\', '^', '~', or '_'. The
 212       --  character value itself is stored in Scans.Special_Character.
 213
 214       No_Token);
 215       --  No_Token is used for initializing Token values to indicate that
 216       --  no value has been set yet.
 217
 218    --  Note: in the RM, operator symbol is a special case of string literal.
 219    --  We distinguish at the lexical level in this compiler, since there are
 220    --  many syntactic situations in which only an operator symbol is allowed.
 221
 222    --  The following subtype declarations group the token types into classes.
 223    --  These are used for class tests in the parser.
 224
 225    subtype Token_Class_Numeric_Literal is
 226      Token_Type range Tok_Integer_Literal .. Tok_Real_Literal;
 227    --  Numeric literal
 228
 229    subtype Token_Class_Literal is
 230      Token_Type range Tok_Integer_Literal .. Tok_Operator_Symbol;
 231    --  Literal
 232
 233    subtype Token_Class_Lit_Or_Name is
 234      Token_Type range Tok_Integer_Literal .. Tok_Identifier;
 235
 236    subtype Token_Class_Binary_Addop is
 237      Token_Type range Tok_Ampersand .. Tok_Plus;
 238    --  Binary adding operator (& + -)
 239
 240    subtype Token_Class_Unary_Addop is
 241      Token_Type range Tok_Minus .. Tok_Plus;
 242    --  Unary adding operator (+ -)
 243
 244    subtype Token_Class_Mulop is
 245      Token_Type range Tok_Asterisk .. Tok_Slash;
 246    --  Multiplying operator
 247
 248    subtype Token_Class_Logop is
 249      Token_Type range Tok_And .. Tok_Xor;
 250    --  Logical operator (and, or, xor)
 251
 252    subtype Token_Class_Relop is
 253      Token_Type range Tok_Less .. Tok_Box;
 254    --  Relational operator (= /= < <= > >= not, in plus <> to catch misuse
 255    --  of Pascal style not equal operator).
 256
 257    subtype Token_Class_Name is
 258      Token_Type range Tok_Char_Literal .. Tok_Identifier;
 259    --  First token of name (4.1),
 260    --    (identifier, char literal, operator symbol)
 261
 262    subtype Token_Class_Desig is
 263      Token_Type range Tok_Operator_Symbol .. Tok_Identifier;
 264    --  Token which can be a Designator (identifier, operator symbol)
 265
 266    subtype Token_Class_Namext is
 267      Token_Type range Tok_Dot .. Tok_Left_Paren;
 268    --  Name extension tokens. These are tokens which can appear immediately
 269    --  after a name to extend it recursively (period, quote, left paren)
 270
 271    subtype Token_Class_Consk is
 272      Token_Type range Tok_Left_Paren .. Tok_Range;
 273    --  Keywords which can start constraint
 274    --    (left paren, delta, digits, range)
 275
 276    subtype Token_Class_Eterm is
 277      Token_Type range Tok_Colon_Equal .. Tok_Semicolon;
 278    --  Expression terminators. These tokens can never appear within a simple
 279    --  expression. This is used for error recovery purposes (if we encounter
 280    --  an error in an expression, we simply scan to the next Eterm token).
 281
 282    subtype Token_Class_Sterm is
 283      Token_Type range Tok_Delta .. Tok_Dot_Dot;
 284    --  Simple_Expression terminators. A Simple_Expression must be followed
 285    --  by a token in this class, or an error message is issued complaining
 286    --  about a missing binary operator.
 287
 288    subtype Token_Class_Atkwd is
 289      Token_Type range Tok_Delta .. Tok_Range;
 290    --  Attribute keywords. This class includes keywords which can be used
 291    --  as an Attribute_Designator, namely DELTA, DIGITS and RANGE
 292
 293    subtype Token_Class_Cterm is
 294      Token_Type range Tok_EOF .. Tok_Vertical_Bar;
 295    --  Choice terminators. These tokens terminate a choice. This is used for
 296    --  error recovery purposes (if we encounter an error in a Choice, we
 297    --  simply scan to the next Cterm token).
 298
 299    subtype Token_Class_Chtok is
 300      Token_Type range Tok_Arrow .. Tok_Dot_Dot;
 301    --  Choice tokens. These tokens signal a choice when used in an Aggregate
 302
 303    subtype Token_Class_Cunit is
 304      Token_Type range Tok_Function .. Tok_Separate;
 305    --  Tokens which can begin a compilation unit
 306
 307    subtype Token_Class_Declk is
 308      Token_Type range Tok_Entry .. Tok_Procedure;
 309    --  Keywords which start a declaration
 310
 311    subtype Token_Class_Deckn is
 312      Token_Type range Tok_Entry .. Tok_Use;
 313    --  Keywords which start a declaration but can't start a compilation unit
 314
 315    subtype Token_Class_After_SM is
 316      Token_Type range Tok_Less_Less .. Tok_EOF;
 317    --  Tokens which always, or almost always, appear after a semicolon. Used
 318    --  in the Resync_Past_Semicolon routine to avoid gobbling up stuff when
 319    --  a semicolon is missing. Of significance only for error recovery.
 320
 321    subtype Token_Class_Labeled_Stmt is
 322      Token_Type range Tok_Begin .. Tok_While;
 323    --  Tokens which start labeled statements
 324
 325    type Token_Flag_Array is array (Token_Type) of Boolean;
 326    Is_Reserved_Keyword : constant Token_Flag_Array :=
 327                            Token_Flag_Array'
 328                              (Tok_Mod      .. Tok_Rem      => True,
 329                               Tok_New      .. Tok_Null     => True,
 330                               Tok_Delta    .. Tok_Range    => True,
 331                               Tok_And      .. Tok_Xor      => True,
 332                               Tok_In       .. Tok_Not      => True,
 333                               Tok_Abstract .. Tok_Then     => True,
 334                               Tok_Abort    .. Tok_Separate => True,
 335                               others                       => False);
 336    --  Flag array used to test for reserved word
 337
 338    procedure Initialize_Ada_Keywords;
 339    --  Set up Token_Type values in Names table entries for Ada reserved words
 340
 341    --------------------------
 342    -- Scan State Variables --
 343    --------------------------
 344
 345    --  Note: these variables can only be referenced during the parsing of a
 346    --  file. Reference to any of them from Sem or the expander is wrong.
 347
 348    --  These variables are initialized as required by Scn.Initialize_Scanner,
 349    --  and should not be referenced before such a call. However, there are
 350    --  situations in which these variables are saved and restored, and this
 351    --  may happen before the first Initialize_Scanner call, resulting in the
 352    --  assignment of invalid values. To avoid this, and allow building with
 353    --  the -gnatVa switch, we initialize some variables to known valid values.
 354
 355    Scan_Ptr : Source_Ptr := No_Location; -- init for -gnatVa
 356    --  Current scan pointer location. After a call to Scan, this points
 357    --  just past the end of the token just scanned.
 358
 359    Token : Token_Type := No_Token; -- init for -gnatVa
 360    --  Type of current token
 361
 362    Token_Ptr : Source_Ptr := No_Location; -- init for -gnatVa
 363    --  Pointer to first character of current token
 364
 365    Current_Line_Start : Source_Ptr := No_Location; -- init for -gnatVa
 366    --  Pointer to first character of line containing current token
 367
 368    Start_Column : Column_Number := No_Column_Number; -- init for -gnatVa
 369    --  Starting column number (zero origin) of the first non-blank character
 370    --  on the line containing the current token. This is used for error
 371    --  recovery circuits which depend on looking at the column line up.
 372
 373    Type_Token_Location : Source_Ptr := No_Location; -- init for -gnatVa
 374    --  Within a type declaration, gives the location of the TYPE keyword that
 375    --  opened the type declaration. Used in checking the end column of a record
 376    --  declaration, which can line up either with the TYPE keyword, or with the
 377    --  start of the line containing the RECORD keyword.
 378
 379    Checksum : Word := 0; -- init for -gnatVa
 380    --  Used to accumulate a CRC representing the tokens in the source
 381    --  file being compiled. This CRC includes only program tokens, and
 382    --  excludes comments.
 383
 384    First_Non_Blank_Location : Source_Ptr := No_Location; -- init for -gnatVa
 385    --  Location of first non-blank character on the line containing the
 386    --  current token (i.e. the location of the character whose column number
 387    --  is stored in Start_Column).
 388
 389    Token_Node : Node_Id := Empty;
 390    --  Node table Id for the current token. This is set only if the current
 391    --  token is one for which the scanner constructs a node (i.e. it is an
 392    --  identifier, operator symbol, or literal. For other token types,
 393    --  Token_Node is undefined.
 394
 395    Token_Name : Name_Id := No_Name;
 396    --  For identifiers, this is set to the Name_Id of the identifier scanned.
 397    --  For all other tokens, Token_Name is set to Error_Name. Note that it
 398    --  would be possible for the caller to extract this information from
 399    --  Token_Node. We set Token_Name separately for two reasons. First it
 400    --  allows a quicker test for a specific identifier. Second, it allows
 401    --  a version of the parser to be built that does not build tree nodes,
 402    --  usable as a syntax checker.
 403
 404    Prev_Token : Token_Type := No_Token;
 405    --  Type of previous token
 406
 407    Prev_Token_Ptr : Source_Ptr;
 408    --  Pointer to first character of previous token
 409
 410    Version_To_Be_Found : Boolean;
 411    --  This flag is True if the scanner is still looking for an RCS version
 412    --  number in a comment. Normally it is initialized to False so that this
 413    --  circuit is not activated. If the -dv switch is set, then this flag is
 414    --  initialized to True, and then reset when the version number is found.
 415    --  We do things this way to minimize the impact on comment scanning.
 416
 417    Character_Code : Char_Code;
 418    --  Valid only when Token is Tok_Char_Literal
 419
 420    Real_Literal_Value : Ureal;
 421    --  Valid only when Token is Tok_Real_Literal
 422
 423    Int_Literal_Value : Uint;
 424    --  Valid only when Token = Tok_Integer_Literal;
 425
 426    String_Literal_Id : String_Id;
 427    --  Id for currently scanned string value.
 428    --  Valid only when Token = Tok_String_Literal or Tok_Operator_Symbol.
 429
 430    Wide_Character_Found : Boolean := False;
 431    --  Set True if wide character found.
 432    --  Valid only when Token = Tok_String_Literal.
 433
 434    Special_Character : Character;
 435    --  Valid only when Token = Tok_Special. Returns one of the characters
 436    --  '#', '$', '?', '@', '`', '\', '^', '~', or '_'.
 437    --
 438    --  Why only this set? What about wide characters???
 439
 440    Comment_Id : Name_Id := No_Name;
 441    --  Valid only when Token = Tok_Comment. Store the string that follows
 442    --  the "--" of a comment when scanning project files.
 443    --
 444    --  Is it really right for this to be a Name rather than a String, what
 445    --  about the case of Wide_Wide_Characters???
 446
 447    Inside_Conditional_Expression : Nat := 0;
 448    --  This is a counter that is set non-zero while scanning out a conditional
 449    --  expression (incremented on entry, decremented on exit). It is used to
 450    --  disconnect format checks that normally apply to keywords THEN, ELSE etc.
 451
 452    --------------------------------------------------------
 453    -- Procedures for Saving and Restoring the Scan State --
 454    --------------------------------------------------------
 455
 456    --  The following procedures can be used to save and restore the entire
 457    --  scan state. They are used in cases where it is necessary to backup
 458    --  the scan during the parse.
 459
 460    type Saved_Scan_State is private;
 461    --  Used for saving and restoring the scan state
 462
 463    procedure Save_Scan_State (Saved_State : out Saved_Scan_State);
 464    pragma Inline (Save_Scan_State);
 465    --  Saves the current scan state for possible later restoration. Note that
 466    --  there is no harm in saving the state and then never restoring it.
 467
 468    procedure Restore_Scan_State (Saved_State : Saved_Scan_State);
 469    pragma Inline (Restore_Scan_State);
 470    --  Restores a scan state saved by a call to Save_Scan_State.
 471    --  The saved scan state must refer to the current source file.
 472
 473 private
 474    type Saved_Scan_State is record
 475       Save_Scan_Ptr                 : Source_Ptr;
 476       Save_Token                    : Token_Type;
 477       Save_Token_Ptr                : Source_Ptr;
 478       Save_Current_Line_Start       : Source_Ptr;
 479       Save_Start_Column             : Column_Number;
 480       Save_Checksum                 : Word;
 481       Save_First_Non_Blank_Location : Source_Ptr;
 482       Save_Token_Node               : Node_Id;
 483       Save_Token_Name               : Name_Id;
 484       Save_Prev_Token               : Token_Type;
 485       Save_Prev_Token_Ptr           : Source_Ptr;
 486    end record;
 487
 488 end Scans;