gcc/ada/scn.adb

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                  S C N                                   --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --                            $Revision: 1.3 $
  10 --                                                                          --
  11 --          Copyright (C) 1992-2001 Free Software Foundation, Inc.          --
  12 --                                                                          --
  13 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  14 -- terms of the  GNU General Public License as published  by the Free Soft- --
  15 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  16 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  17 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  18 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  19 -- for  more details.  You should have  received  a copy of the GNU General --
  20 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  21 -- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
  22 -- MA 02111-1307, USA.                                                      --
  23 --                                                                          --
  24 -- GNAT was originally developed  by the GNAT team at  New York University. --
  25 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  26 --                                                                          --
  27 ------------------------------------------------------------------------------
  28
  29 with Atree;    use Atree;
  30 with Csets;    use Csets;
  31 with Errout;   use Errout;
  32 with Hostparm; use Hostparm;
  33 with Namet;    use Namet;
  34 with Opt;      use Opt;
  35 with Scans;    use Scans;
  36 with Sinput;   use Sinput;
  37 with Sinfo;    use Sinfo;
  38 with Snames;   use Snames;
  39 with Style;
  40 with Widechar; use Widechar;
  41
  42 with System.CRC32;
  43 with System.WCh_Con; use System.WCh_Con;
  44
  45 package body Scn is
  46
  47    use ASCII;
  48    --  Make control characters visible
  49
  50    Used_As_Identifier : array (Token_Type) of Boolean;
  51    --  Flags set True if a given keyword is used as an identifier (used to
  52    --  make sure that we only post an error message for incorrect use of a
  53    --  keyword as an identifier once for a given keyword).
  54
  55    -----------------------
  56    -- Local Subprograms --
  57    -----------------------
  58
  59    procedure Accumulate_Checksum (C : Character);
  60    pragma Inline (Accumulate_Checksum);
  61    --  This routine accumulates the checksum given character C. During the
  62    --  scanning of a source file, this routine is called with every character
  63    --  in the source, excluding blanks, and all control characters (except
  64    --  that ESC is included in the checksum). Upper case letters not in string
  65    --  literals are folded by the caller. See Sinput spec for the documentation
  66    --  of the checksum algorithm. Note: checksum values are only used if we
  67    --  generate code, so it is not necessary to worry about making the right
  68    --  sequence of calls in any error situation.
  69
  70    procedure Accumulate_Checksum (C : Char_Code);
  71    pragma Inline (Accumulate_Checksum);
  72    --  This version is identical, except that the argument, C, is a character
  73    --  code value instead of a character. This is used when wide characters
  74    --  are scanned. We use the character code rather than the ASCII characters
  75    --  so that the checksum is independent of wide character encoding method.
  76
  77    procedure Initialize_Checksum;
  78    pragma Inline (Initialize_Checksum);
  79    --  Initialize checksum value
  80
  81    procedure Check_End_Of_Line;
  82    --  Called when end of line encountered. Checks that line is not
  83    --  too long, and that other style checks for the end of line are met.
  84
  85    function Determine_License return License_Type;
  86    --  Scan header of file and check that it has an appropriate GNAT-style
  87    --  header with a proper license statement. Returns GPL, Unrestricted,
  88    --  or Modified_GPL depending on header. If none of these, returns Unknown.
  89
  90    function Double_Char_Token (C : Character) return Boolean;
  91    --  This function is used for double character tokens like := or <>. It
  92    --  checks if the character following Source (Scan_Ptr) is C, and if so
  93    --  bumps Scan_Ptr past the pair of characters and returns True. A space
  94    --  between the two characters is also recognized with an appropriate
  95    --  error message being issued. If C is not present, False is returned.
  96    --  Note that Double_Char_Token can only be used for tokens defined in
  97    --  the Ada syntax (it's use for error cases like && is not appropriate
  98    --  since we do not want a junk message for a case like &-space-&).
  99
 100    procedure Error_Illegal_Character;
 101    --  Give illegal character error, Scan_Ptr points to character. On return,
 102    --  Scan_Ptr is bumped past the illegal character.
 103
 104    procedure Error_Illegal_Wide_Character;
 105    --  Give illegal wide character message. On return, Scan_Ptr is bumped
 106    --  past the illegal character, which may still leave us pointing to
 107    --  junk, not much we can do if the escape sequence is messed up!
 108
 109    procedure Error_Long_Line;
 110    --  Signal error of excessively long line
 111
 112    procedure Error_No_Double_Underline;
 113    --  Signal error of double underline character
 114
 115    procedure Nlit;
 116    --  This is the procedure for scanning out numeric literals. On entry,
 117    --  Scan_Ptr points to the digit that starts the numeric literal (the
 118    --  checksum for this character has not been accumulated yet). On return
 119    --  Scan_Ptr points past the last character of the numeric literal, Token
 120    --  and Token_Node are set appropriately, and the checksum is updated.
 121
 122    function Set_Start_Column return Column_Number;
 123    --  This routine is called with Scan_Ptr pointing to the first character
 124    --  of a line. On exit, Scan_Ptr is advanced to the first non-blank
 125    --  character of this line (or to the terminating format effector if the
 126    --  line contains no non-blank characters), and the returned result is the
 127    --  column number of this non-blank character (zero origin), which is the
 128    --  value to be stored in the Start_Column scan variable.
 129
 130    procedure Slit;
 131    --  This is the procedure for scanning out string literals. On entry,
 132    --  Scan_Ptr points to the opening string quote (the checksum for this
 133    --  character has not been accumulated yet). On return Scan_Ptr points
 134    --  past the closing quote of the string literal, Token and Token_Node
 135    --  are set appropriately, and the checksum is upated.
 136
 137    -------------------------
 138    -- Accumulate_Checksum --
 139    -------------------------
 140
 141    procedure Accumulate_Checksum (C : Character) is
 142    begin
 143       System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
 144    end Accumulate_Checksum;
 145
 146    procedure Accumulate_Checksum (C : Char_Code) is
 147    begin
 148       Accumulate_Checksum (Character'Val (C / 256));
 149       Accumulate_Checksum (Character'Val (C mod 256));
 150    end Accumulate_Checksum;
 151
 152    -----------------------
 153    -- Check_End_Of_Line --
 154    -----------------------
 155
 156    procedure Check_End_Of_Line is
 157       Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
 158
 159    begin
 160       if Len > Hostparm.Max_Line_Length then
 161          Error_Long_Line;
 162
 163       elsif Style_Check then
 164          Style.Check_Line_Terminator (Len);
 165       end if;
 166    end Check_End_Of_Line;
 167
 168    -----------------------
 169    -- Determine_License --
 170    -----------------------
 171
 172    function Determine_License return License_Type is
 173       GPL_Found : Boolean := False;
 174
 175       function Contains (S : String) return Boolean;
 176       --  See if current comment contains successive non-blank characters
 177       --  matching the contents of S. If so leave Scan_Ptr unchanged and
 178       --  return True, otherwise leave Scan_Ptr unchanged and return False.
 179
 180       procedure Skip_EOL;
 181       --  Skip to line terminator character
 182
 183       --------------
 184       -- Contains --
 185       --------------
 186
 187       function Contains (S : String) return Boolean is
 188          CP : Natural;
 189          SP : Source_Ptr;
 190          SS : Source_Ptr;
 191
 192       begin
 193          SP := Scan_Ptr;
 194          while Source (SP) /= CR and then Source (SP) /= LF loop
 195             if Source (SP) = S (S'First) then
 196                SS := SP;
 197                CP := S'First;
 198
 199                loop
 200                   SS := SS + 1;
 201                   CP := CP + 1;
 202
 203                   if CP > S'Last then
 204                      return True;
 205                   end if;
 206
 207                   while Source (SS) = ' ' loop
 208                      SS := SS + 1;
 209                   end loop;
 210
 211                   exit when Source (SS) /= S (CP);
 212                end loop;
 213             end if;
 214
 215             SP := SP + 1;
 216          end loop;
 217
 218          return False;
 219       end Contains;
 220
 221       --------------
 222       -- Skip_EOL --
 223       --------------
 224
 225       procedure Skip_EOL is
 226       begin
 227          while Source (Scan_Ptr) /= CR
 228            and then Source (Scan_Ptr) /= LF
 229          loop
 230             Scan_Ptr := Scan_Ptr + 1;
 231          end loop;
 232       end Skip_EOL;
 233
 234    --  Start of processing for Determine_License
 235
 236    begin
 237       loop
 238          if Source (Scan_Ptr) /= '-'
 239            or else Source (Scan_Ptr + 1) /= '-'
 240          then
 241             if GPL_Found then
 242                return GPL;
 243             else
 244                return Unknown;
 245             end if;
 246
 247          elsif Contains ("Asaspecialexception") then
 248             if GPL_Found then
 249                return Modified_GPL;
 250             end if;
 251
 252          elsif Contains ("GNUGeneralPublicLicense") then
 253             GPL_Found := True;
 254
 255          elsif
 256              Contains
 257                ("ThisspecificationisadaptedfromtheAdaSemanticInterface")
 258            or else
 259              Contains
 260               ("ThisspecificationisderivedfromtheAdaReferenceManual")
 261          then
 262             return Unrestricted;
 263          end if;
 264
 265          Skip_EOL;
 266
 267          Check_End_Of_Line;
 268
 269          declare
 270             Physical : Boolean;
 271
 272          begin
 273             Skip_Line_Terminators (Scan_Ptr, Physical);
 274
 275             --  If we are at start of physical line, update scan pointers
 276             --  to reflect the start of the new line.
 277
 278             if Physical then
 279                Current_Line_Start       := Scan_Ptr;
 280                Start_Column             := Set_Start_Column;
 281                First_Non_Blank_Location := Scan_Ptr;
 282             end if;
 283          end;
 284       end loop;
 285    end Determine_License;
 286
 287    ----------------------------
 288    -- Determine_Token_Casing --
 289    ----------------------------
 290
 291    function Determine_Token_Casing return Casing_Type is
 292    begin
 293       return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
 294    end Determine_Token_Casing;
 295
 296    -----------------------
 297    -- Double_Char_Token --
 298    -----------------------
 299
 300    function Double_Char_Token (C : Character) return Boolean is
 301    begin
 302       if Source (Scan_Ptr + 1) = C then
 303          Accumulate_Checksum (C);
 304          Scan_Ptr := Scan_Ptr + 2;
 305          return True;
 306
 307       elsif Source (Scan_Ptr + 1) = ' '
 308         and then Source (Scan_Ptr + 2) = C
 309       then
 310          Scan_Ptr := Scan_Ptr + 1;
 311          Error_Msg_S ("no space allowed here");
 312          Scan_Ptr := Scan_Ptr + 2;
 313          return True;
 314
 315       else
 316          return False;
 317       end if;
 318    end Double_Char_Token;
 319
 320    -----------------------------
 321    -- Error_Illegal_Character --
 322    -----------------------------
 323
 324    procedure Error_Illegal_Character is
 325    begin
 326       Error_Msg_S ("illegal character");
 327       Scan_Ptr := Scan_Ptr + 1;
 328    end Error_Illegal_Character;
 329
 330    ----------------------------------
 331    -- Error_Illegal_Wide_Character --
 332    ----------------------------------
 333
 334    procedure Error_Illegal_Wide_Character is
 335    begin
 336       if OpenVMS then
 337          Error_Msg_S
 338            ("illegal wide character, check " &
 339             "'/'W'I'D'E'_'C'H'A'R'A'C'T'E'R'_'E'N'C'O'D'I'N'G qualifer");
 340       else
 341          Error_Msg_S
 342            ("illegal wide character, check -gnatW switch");
 343       end if;
 344
 345       Scan_Ptr := Scan_Ptr + 1;
 346    end Error_Illegal_Wide_Character;
 347
 348    ---------------------
 349    -- Error_Long_Line --
 350    ---------------------
 351
 352    procedure Error_Long_Line is
 353    begin
 354       Error_Msg
 355         ("this line is too long",
 356          Current_Line_Start + Hostparm.Max_Line_Length);
 357    end Error_Long_Line;
 358
 359    -------------------------------
 360    -- Error_No_Double_Underline --
 361    -------------------------------
 362
 363    procedure Error_No_Double_Underline is
 364    begin
 365       Error_Msg_S ("two consecutive underlines not permitted");
 366    end Error_No_Double_Underline;
 367
 368    -------------------------
 369    -- Initialize_Checksum --
 370    -------------------------
 371
 372    procedure Initialize_Checksum is
 373    begin
 374       System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
 375    end Initialize_Checksum;
 376
 377    ------------------------
 378    -- Initialize_Scanner --
 379    ------------------------
 380
 381    procedure Initialize_Scanner
 382      (Unit  : Unit_Number_Type;
 383       Index : Source_File_Index)
 384    is
 385       GNAT_Hedr : constant Text_Buffer (1 .. 78) := (others => '-');
 386
 387    begin
 388       --  Set up Token_Type values in Names Table entries for reserved keywords
 389       --  We use the Pos value of the Token_Type value. Note we are relying on
 390       --  the fact that Token_Type'Val (0) is not a reserved word!
 391
 392       Set_Name_Table_Byte (Name_Abort,      Token_Type'Pos (Tok_Abort));
 393       Set_Name_Table_Byte (Name_Abs,        Token_Type'Pos (Tok_Abs));
 394       Set_Name_Table_Byte (Name_Abstract,   Token_Type'Pos (Tok_Abstract));
 395       Set_Name_Table_Byte (Name_Accept,     Token_Type'Pos (Tok_Accept));
 396       Set_Name_Table_Byte (Name_Access,     Token_Type'Pos (Tok_Access));
 397       Set_Name_Table_Byte (Name_And,        Token_Type'Pos (Tok_And));
 398       Set_Name_Table_Byte (Name_Aliased,    Token_Type'Pos (Tok_Aliased));
 399       Set_Name_Table_Byte (Name_All,        Token_Type'Pos (Tok_All));
 400       Set_Name_Table_Byte (Name_Array,      Token_Type'Pos (Tok_Array));
 401       Set_Name_Table_Byte (Name_At,         Token_Type'Pos (Tok_At));
 402       Set_Name_Table_Byte (Name_Begin,      Token_Type'Pos (Tok_Begin));
 403       Set_Name_Table_Byte (Name_Body,       Token_Type'Pos (Tok_Body));
 404       Set_Name_Table_Byte (Name_Case,       Token_Type'Pos (Tok_Case));
 405       Set_Name_Table_Byte (Name_Constant,   Token_Type'Pos (Tok_Constant));
 406       Set_Name_Table_Byte (Name_Declare,    Token_Type'Pos (Tok_Declare));
 407       Set_Name_Table_Byte (Name_Delay,      Token_Type'Pos (Tok_Delay));
 408       Set_Name_Table_Byte (Name_Delta,      Token_Type'Pos (Tok_Delta));
 409       Set_Name_Table_Byte (Name_Digits,     Token_Type'Pos (Tok_Digits));
 410       Set_Name_Table_Byte (Name_Do,         Token_Type'Pos (Tok_Do));
 411       Set_Name_Table_Byte (Name_Else,       Token_Type'Pos (Tok_Else));
 412       Set_Name_Table_Byte (Name_Elsif,      Token_Type'Pos (Tok_Elsif));
 413       Set_Name_Table_Byte (Name_End,        Token_Type'Pos (Tok_End));
 414       Set_Name_Table_Byte (Name_Entry,      Token_Type'Pos (Tok_Entry));
 415       Set_Name_Table_Byte (Name_Exception,  Token_Type'Pos (Tok_Exception));
 416       Set_Name_Table_Byte (Name_Exit,       Token_Type'Pos (Tok_Exit));
 417       Set_Name_Table_Byte (Name_For,        Token_Type'Pos (Tok_For));
 418       Set_Name_Table_Byte (Name_Function,   Token_Type'Pos (Tok_Function));
 419       Set_Name_Table_Byte (Name_Generic,    Token_Type'Pos (Tok_Generic));
 420       Set_Name_Table_Byte (Name_Goto,       Token_Type'Pos (Tok_Goto));
 421       Set_Name_Table_Byte (Name_If,         Token_Type'Pos (Tok_If));
 422       Set_Name_Table_Byte (Name_In,         Token_Type'Pos (Tok_In));
 423       Set_Name_Table_Byte (Name_Is,         Token_Type'Pos (Tok_Is));
 424       Set_Name_Table_Byte (Name_Limited,    Token_Type'Pos (Tok_Limited));
 425       Set_Name_Table_Byte (Name_Loop,       Token_Type'Pos (Tok_Loop));
 426       Set_Name_Table_Byte (Name_Mod,        Token_Type'Pos (Tok_Mod));
 427       Set_Name_Table_Byte (Name_New,        Token_Type'Pos (Tok_New));
 428       Set_Name_Table_Byte (Name_Not,        Token_Type'Pos (Tok_Not));
 429       Set_Name_Table_Byte (Name_Null,       Token_Type'Pos (Tok_Null));
 430       Set_Name_Table_Byte (Name_Of,         Token_Type'Pos (Tok_Of));
 431       Set_Name_Table_Byte (Name_Or,         Token_Type'Pos (Tok_Or));
 432       Set_Name_Table_Byte (Name_Others,     Token_Type'Pos (Tok_Others));
 433       Set_Name_Table_Byte (Name_Out,        Token_Type'Pos (Tok_Out));
 434       Set_Name_Table_Byte (Name_Package,    Token_Type'Pos (Tok_Package));
 435       Set_Name_Table_Byte (Name_Pragma,     Token_Type'Pos (Tok_Pragma));
 436       Set_Name_Table_Byte (Name_Private,    Token_Type'Pos (Tok_Private));
 437       Set_Name_Table_Byte (Name_Procedure,  Token_Type'Pos (Tok_Procedure));
 438       Set_Name_Table_Byte (Name_Protected,  Token_Type'Pos (Tok_Protected));
 439       Set_Name_Table_Byte (Name_Raise,      Token_Type'Pos (Tok_Raise));
 440       Set_Name_Table_Byte (Name_Range,      Token_Type'Pos (Tok_Range));
 441       Set_Name_Table_Byte (Name_Record,     Token_Type'Pos (Tok_Record));
 442       Set_Name_Table_Byte (Name_Rem,        Token_Type'Pos (Tok_Rem));
 443       Set_Name_Table_Byte (Name_Renames,    Token_Type'Pos (Tok_Renames));
 444       Set_Name_Table_Byte (Name_Requeue,    Token_Type'Pos (Tok_Requeue));
 445       Set_Name_Table_Byte (Name_Return,     Token_Type'Pos (Tok_Return));
 446       Set_Name_Table_Byte (Name_Reverse,    Token_Type'Pos (Tok_Reverse));
 447       Set_Name_Table_Byte (Name_Select,     Token_Type'Pos (Tok_Select));
 448       Set_Name_Table_Byte (Name_Separate,   Token_Type'Pos (Tok_Separate));
 449       Set_Name_Table_Byte (Name_Subtype,    Token_Type'Pos (Tok_Subtype));
 450       Set_Name_Table_Byte (Name_Tagged,     Token_Type'Pos (Tok_Tagged));
 451       Set_Name_Table_Byte (Name_Task,       Token_Type'Pos (Tok_Task));
 452       Set_Name_Table_Byte (Name_Terminate,  Token_Type'Pos (Tok_Terminate));
 453       Set_Name_Table_Byte (Name_Then,       Token_Type'Pos (Tok_Then));
 454       Set_Name_Table_Byte (Name_Type,       Token_Type'Pos (Tok_Type));
 455       Set_Name_Table_Byte (Name_Until,      Token_Type'Pos (Tok_Until));
 456       Set_Name_Table_Byte (Name_Use,        Token_Type'Pos (Tok_Use));
 457       Set_Name_Table_Byte (Name_When,       Token_Type'Pos (Tok_When));
 458       Set_Name_Table_Byte (Name_While,      Token_Type'Pos (Tok_While));
 459       Set_Name_Table_Byte (Name_With,       Token_Type'Pos (Tok_With));
 460       Set_Name_Table_Byte (Name_Xor,        Token_Type'Pos (Tok_Xor));
 461
 462       --  Initialize scan control variables
 463
 464       Current_Source_File       := Index;
 465       Source                    := Source_Text (Current_Source_File);
 466       Current_Source_Unit       := Unit;
 467       Scan_Ptr                  := Source_First (Current_Source_File);
 468       Token                     := No_Token;
 469       Token_Ptr                 := Scan_Ptr;
 470       Current_Line_Start        := Scan_Ptr;
 471       Token_Node                := Empty;
 472       Token_Name                := No_Name;
 473       Start_Column              := Set_Start_Column;
 474       First_Non_Blank_Location  := Scan_Ptr;
 475
 476       Initialize_Checksum;
 477
 478       --  Set default for Comes_From_Source. All nodes built now until we
 479       --  reenter the analyzer will have Comes_From_Source set to True
 480
 481       Set_Comes_From_Source_Default (True);
 482
 483       --  Check license if GNAT type header possibly present
 484
 485       if Source_Last (Index) - Scan_Ptr > 80
 486         and then Source (Scan_Ptr .. Scan_Ptr + 77) = GNAT_Hedr
 487       then
 488          Set_License (Current_Source_File, Determine_License);
 489       end if;
 490
 491       --  Scan initial token (note this initializes Prev_Token, Prev_Token_Ptr)
 492
 493       Scan;
 494
 495       --  Clear flags for reserved words used as identifiers
 496
 497       for J in Token_Type loop
 498          Used_As_Identifier (J) := False;
 499       end loop;
 500
 501    end Initialize_Scanner;
 502
 503    ----------
 504    -- Nlit --
 505    ----------
 506
 507    procedure Nlit is separate;
 508
 509    ----------
 510    -- Scan --
 511    ----------
 512
 513    procedure Scan is
 514    begin
 515       Prev_Token := Token;
 516       Prev_Token_Ptr := Token_Ptr;
 517       Token_Name := Error_Name;
 518
 519       --  The following loop runs more than once only if a format effector
 520       --  (tab, vertical tab, form  feed, line feed, carriage return) is
 521       --  encountered and skipped, or some error situation, such as an
 522       --  illegal character, is encountered.
 523
 524       loop
 525          --  Skip past blanks, loop is opened up for speed
 526
 527          while Source (Scan_Ptr) = ' ' loop
 528
 529             if Source (Scan_Ptr + 1) /= ' ' then
 530                Scan_Ptr := Scan_Ptr + 1;
 531                exit;
 532             end if;
 533
 534             if Source (Scan_Ptr + 2) /= ' ' then
 535                Scan_Ptr := Scan_Ptr + 2;
 536                exit;
 537             end if;
 538
 539             if Source (Scan_Ptr + 3) /= ' ' then
 540                Scan_Ptr := Scan_Ptr + 3;
 541                exit;
 542             end if;
 543
 544             if Source (Scan_Ptr + 4) /= ' ' then
 545                Scan_Ptr := Scan_Ptr + 4;
 546                exit;
 547             end if;
 548
 549             if Source (Scan_Ptr + 5) /= ' ' then
 550                Scan_Ptr := Scan_Ptr + 5;
 551                exit;
 552             end if;
 553
 554             if Source (Scan_Ptr + 6) /= ' ' then
 555                Scan_Ptr := Scan_Ptr + 6;
 556                exit;
 557             end if;
 558
 559             if Source (Scan_Ptr + 7) /= ' ' then
 560                Scan_Ptr := Scan_Ptr + 7;
 561                exit;
 562             end if;
 563
 564             Scan_Ptr := Scan_Ptr + 8;
 565          end loop;
 566
 567          --  We are now at a non-blank character, which is the first character
 568          --  of the token we will scan, and hence the value of Token_Ptr.
 569
 570          Token_Ptr := Scan_Ptr;
 571
 572          --  Here begins the main case statement which transfers control on
 573          --  the basis of the non-blank character we have encountered.
 574
 575          case Source (Scan_Ptr) is
 576
 577          --  Line terminator characters
 578
 579          when CR | LF | FF | VT => Line_Terminator_Case : begin
 580
 581             --  Check line too long
 582
 583             Check_End_Of_Line;
 584
 585             declare
 586                Physical : Boolean;
 587
 588             begin
 589                Skip_Line_Terminators (Scan_Ptr, Physical);
 590
 591                --  If we are at start of physical line, update scan pointers
 592                --  to reflect the start of the new line.
 593
 594                if Physical then
 595                   Current_Line_Start       := Scan_Ptr;
 596                   Start_Column             := Set_Start_Column;
 597                   First_Non_Blank_Location := Scan_Ptr;
 598                end if;
 599             end;
 600          end Line_Terminator_Case;
 601
 602          --  Horizontal tab, just skip past it
 603
 604          when HT =>
 605             if Style_Check then Style.Check_HT; end if;
 606             Scan_Ptr := Scan_Ptr + 1;
 607
 608          --  End of file character, treated as an end of file only if it
 609          --  is the last character in the buffer, otherwise it is ignored.
 610
 611          when EOF =>
 612             if Scan_Ptr = Source_Last (Current_Source_File) then
 613                Check_End_Of_Line;
 614                Token := Tok_EOF;
 615                return;
 616
 617             else
 618                Scan_Ptr := Scan_Ptr + 1;
 619             end if;
 620
 621          --  Ampersand
 622
 623          when '&' =>
 624             Accumulate_Checksum ('&');
 625
 626             if Source (Scan_Ptr + 1) = '&' then
 627                Error_Msg_S ("'&'& should be `AND THEN`");
 628                Scan_Ptr := Scan_Ptr + 2;
 629                Token := Tok_And;
 630                return;
 631
 632             else
 633                Scan_Ptr := Scan_Ptr + 1;
 634                Token := Tok_Ampersand;
 635                return;
 636             end if;
 637
 638          --  Asterisk (can be multiplication operator or double asterisk
 639          --  which is the exponentiation compound delimtier).
 640
 641          when '*' =>
 642             Accumulate_Checksum ('*');
 643
 644             if Source (Scan_Ptr + 1) = '*' then
 645                Accumulate_Checksum ('*');
 646                Scan_Ptr := Scan_Ptr + 2;
 647                Token := Tok_Double_Asterisk;
 648                return;
 649
 650             else
 651                Scan_Ptr := Scan_Ptr + 1;
 652                Token := Tok_Asterisk;
 653                return;
 654             end if;
 655
 656          --  Colon, which can either be an isolated colon, or part of an
 657          --  assignment compound delimiter.
 658
 659          when ':' =>
 660             Accumulate_Checksum (':');
 661
 662             if Double_Char_Token ('=') then
 663                Token := Tok_Colon_Equal;
 664                if Style_Check then Style.Check_Colon_Equal; end if;
 665                return;
 666
 667             elsif Source (Scan_Ptr + 1) = '-'
 668               and then Source (Scan_Ptr + 2) /= '-'
 669             then
 670                Token := Tok_Colon_Equal;
 671                Error_Msg (":- should be :=", Scan_Ptr);
 672                Scan_Ptr := Scan_Ptr + 2;
 673                return;
 674
 675             else
 676                Scan_Ptr := Scan_Ptr + 1;
 677                Token := Tok_Colon;
 678                if Style_Check then Style.Check_Colon; end if;
 679                return;
 680             end if;
 681
 682          --  Left parenthesis
 683
 684          when '(' =>
 685             Accumulate_Checksum ('(');
 686             Scan_Ptr := Scan_Ptr + 1;
 687             Token := Tok_Left_Paren;
 688             if Style_Check then Style.Check_Left_Paren; end if;
 689             return;
 690
 691          --  Left bracket
 692
 693          when '[' =>
 694             if Source (Scan_Ptr + 1) = '"' then
 695                Name_Len := 0;
 696                goto Scan_Identifier;
 697
 698             else
 699                Error_Msg_S ("illegal character, replaced by ""(""");
 700                Scan_Ptr := Scan_Ptr + 1;
 701                Token := Tok_Left_Paren;
 702                return;
 703             end if;
 704
 705          --  Left brace
 706
 707          when '{' =>
 708             Error_Msg_S ("illegal character, replaced by ""(""");
 709             Scan_Ptr := Scan_Ptr + 1;
 710             Token := Tok_Left_Paren;
 711             return;
 712
 713          --  Comma
 714
 715          when ',' =>
 716             Accumulate_Checksum (',');
 717             Scan_Ptr := Scan_Ptr + 1;
 718             Token := Tok_Comma;
 719             if Style_Check then Style.Check_Comma; end if;
 720             return;
 721
 722          --  Dot, which is either an isolated period, or part of a double
 723          --  dot compound delimiter sequence. We also check for the case of
 724          --  a digit following the period, to give a better error message.
 725
 726          when '.' =>
 727             Accumulate_Checksum ('.');
 728
 729             if Double_Char_Token ('.') then
 730                Token := Tok_Dot_Dot;
 731                if Style_Check then Style.Check_Dot_Dot; end if;
 732                return;
 733
 734             elsif Source (Scan_Ptr + 1) in '0' .. '9' then
 735                Error_Msg_S ("numeric literal cannot start with point");
 736                Scan_Ptr := Scan_Ptr + 1;
 737
 738             else
 739                Scan_Ptr := Scan_Ptr + 1;
 740                Token := Tok_Dot;
 741                return;
 742             end if;
 743
 744          --  Equal, which can either be an equality operator, or part of the
 745          --  arrow (=>) compound delimiter.
 746
 747          when '=' =>
 748             Accumulate_Checksum ('=');
 749
 750             if Double_Char_Token ('>') then
 751                Token := Tok_Arrow;
 752                if Style_Check then Style.Check_Arrow; end if;
 753                return;
 754
 755             elsif Source (Scan_Ptr + 1) = '=' then
 756                Error_Msg_S ("== should be =");
 757                Scan_Ptr := Scan_Ptr + 1;
 758             end if;
 759
 760             Scan_Ptr := Scan_Ptr + 1;
 761             Token := Tok_Equal;
 762             return;
 763
 764          --  Greater than, which can be a greater than operator, greater than
 765          --  or equal operator, or first character of a right label bracket.
 766
 767          when '>' =>
 768             Accumulate_Checksum ('>');
 769
 770             if Double_Char_Token ('=') then
 771                Token := Tok_Greater_Equal;
 772                return;
 773
 774             elsif Double_Char_Token ('>') then
 775                Token := Tok_Greater_Greater;
 776                return;
 777
 778             else
 779                Scan_Ptr := Scan_Ptr + 1;
 780                Token := Tok_Greater;
 781                return;
 782             end if;
 783
 784          --  Less than, which can be a less than operator, less than or equal
 785          --  operator, or the first character of a left label bracket, or the
 786          --  first character of a box (<>) compound delimiter.
 787
 788          when '<' =>
 789             Accumulate_Checksum ('<');
 790
 791             if Double_Char_Token ('=') then
 792                Token := Tok_Less_Equal;
 793                return;
 794
 795             elsif Double_Char_Token ('>') then
 796                Token := Tok_Box;
 797                if Style_Check then Style.Check_Box; end if;
 798                return;
 799
 800             elsif Double_Char_Token ('<') then
 801                Token := Tok_Less_Less;
 802                return;
 803
 804             else
 805                Scan_Ptr := Scan_Ptr + 1;
 806                Token := Tok_Less;
 807                return;
 808             end if;
 809
 810          --  Minus, which is either a subtraction operator, or the first
 811          --  character of double minus starting a comment
 812
 813          when '-' => Minus_Case : begin
 814             if Source (Scan_Ptr + 1) = '>' then
 815                Error_Msg_S ("invalid token");
 816                Scan_Ptr := Scan_Ptr + 2;
 817                Token := Tok_Arrow;
 818                return;
 819
 820             elsif Source (Scan_Ptr + 1) /= '-' then
 821                Accumulate_Checksum ('-');
 822                Scan_Ptr := Scan_Ptr + 1;
 823                Token := Tok_Minus;
 824                return;
 825
 826             --  Comment
 827
 828             else -- Source (Scan_Ptr + 1) = '-' then
 829                if Style_Check then Style.Check_Comment; end if;
 830                Scan_Ptr := Scan_Ptr + 2;
 831
 832                --  Loop to scan comment (this loop runs more than once only if
 833                --  a horizontal tab or other non-graphic character is scanned)
 834
 835                loop
 836                   --  Scan to non graphic character (opened up for speed)
 837
 838                   loop
 839                      exit when Source (Scan_Ptr) not in Graphic_Character;
 840                      Scan_Ptr := Scan_Ptr + 1;
 841                      exit when Source (Scan_Ptr) not in Graphic_Character;
 842                      Scan_Ptr := Scan_Ptr + 1;
 843                      exit when Source (Scan_Ptr) not in Graphic_Character;
 844                      Scan_Ptr := Scan_Ptr + 1;
 845                      exit when Source (Scan_Ptr) not in Graphic_Character;
 846                      Scan_Ptr := Scan_Ptr + 1;
 847                      exit when Source (Scan_Ptr) not in Graphic_Character;
 848                      Scan_Ptr := Scan_Ptr + 1;
 849                   end loop;
 850
 851                   --  Keep going if horizontal tab
 852
 853                   if Source (Scan_Ptr) = HT then
 854                      if Style_Check then Style.Check_HT; end if;
 855                      Scan_Ptr := Scan_Ptr + 1;
 856
 857                   --  Terminate scan of comment if line terminator
 858
 859                   elsif Source (Scan_Ptr) in Line_Terminator then
 860                      exit;
 861
 862                   --  Terminate scan of comment if end of file encountered
 863                   --  (embedded EOF character or real last character in file)
 864
 865                   elsif Source (Scan_Ptr) = EOF then
 866                      exit;
 867
 868                   --  Keep going if character in 80-FF range, or is ESC. These
 869                   --  characters are allowed in comments by RM-2.1(1), 2.7(2).
 870                   --  They are allowed even in Ada 83 mode according to the
 871                   --  approved AI. ESC was added to the AI in June 93.
 872
 873                   elsif Source (Scan_Ptr) in Upper_Half_Character
 874                     or else Source (Scan_Ptr) = ESC
 875                   then
 876                      Scan_Ptr := Scan_Ptr + 1;
 877
 878                   --  Otherwise we have an illegal comment character
 879
 880                   else
 881                      Error_Illegal_Character;
 882                   end if;
 883
 884                end loop;
 885
 886                --  Note that we do NOT execute a return here, instead we fall
 887                --  through to reexecute the scan loop to look for a token.
 888
 889             end if;
 890          end Minus_Case;
 891
 892          --  Double quote or percent starting a string literal
 893
 894          when '"' | '%' =>
 895             Slit;
 896             return;
 897
 898          --  Apostrophe. This can either be the start of a character literal,
 899          --  or an isolated apostrophe used in a qualified expression or an
 900          --  attribute. We treat it as a character literal if it does not
 901          --  follow a right parenthesis, identifier, the keyword ALL or
 902          --  a literal. This means that we correctly treat constructs like:
 903
 904          --    A := CHARACTER'('A');
 905
 906          --  Note that RM-2.2(7) does not require a separator between
 907          --  "CHARACTER" and "'" in the above.
 908
 909          when ''' => Char_Literal_Case : declare
 910             Code : Char_Code;
 911             Err  : Boolean;
 912
 913          begin
 914             Accumulate_Checksum (''');
 915             Scan_Ptr := Scan_Ptr + 1;
 916
 917             --  Here is where we make the test to distinguish the cases. Treat
 918             --  as apostrophe if previous token is an identifier, right paren
 919             --  or the reserved word "all" (latter case as in A.all'Address)
 920             --  Also treat it as apostrophe after a literal (this catches
 921             --  some legitimate cases, like A."abs"'Address, and also gives
 922             --  better error behavior for impossible cases like 123'xxx).
 923
 924             if Prev_Token = Tok_Identifier
 925                or else Prev_Token = Tok_Right_Paren
 926                or else Prev_Token = Tok_All
 927                or else Prev_Token in Token_Class_Literal
 928             then
 929                Token := Tok_Apostrophe;
 930                return;
 931
 932             --  Otherwise the apostrophe starts a character literal
 933
 934             else
 935                --  Case of wide character literal with ESC or [ encoding
 936
 937                if (Source (Scan_Ptr) = ESC
 938                      and then
 939                     Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
 940                  or else
 941                    (Source (Scan_Ptr) in Upper_Half_Character
 942                      and then
 943                     Upper_Half_Encoding)
 944                  or else
 945                    (Source (Scan_Ptr) = '['
 946                      and then
 947                     Source (Scan_Ptr + 1) = '"')
 948                then
 949                   Scan_Wide (Source, Scan_Ptr, Code, Err);
 950                   Accumulate_Checksum (Code);
 951
 952                   if Err then
 953                      Error_Illegal_Wide_Character;
 954                   end if;
 955
 956                   if Source (Scan_Ptr) /= ''' then
 957                      Error_Msg_S ("missing apostrophe");
 958                   else
 959                      Scan_Ptr := Scan_Ptr + 1;
 960                   end if;
 961
 962                --  If we do not find a closing quote in the expected place then
 963                --  assume that we have a misguided attempt at a string literal.
 964
 965                --  However, if previous token is RANGE, then we return an
 966                --  apostrophe instead since this gives better error recovery
 967
 968                elsif Source (Scan_Ptr + 1) /= ''' then
 969
 970                   if Prev_Token = Tok_Range then
 971                      Token := Tok_Apostrophe;
 972                      return;
 973
 974                   else
 975                      Scan_Ptr := Scan_Ptr - 1;
 976                      Error_Msg_S
 977                        ("strings are delimited by double quote character");
 978                      Scn.Slit;
 979                      return;
 980                   end if;
 981
 982                --  Otherwise we have a (non-wide) character literal
 983
 984                else
 985                   Accumulate_Checksum (Source (Scan_Ptr));
 986
 987                   if Source (Scan_Ptr) not in Graphic_Character then
 988                      if Source (Scan_Ptr) in Upper_Half_Character then
 989                         if Ada_83 then
 990                            Error_Illegal_Character;
 991                         end if;
 992
 993                      else
 994                         Error_Illegal_Character;
 995                      end if;
 996                   end if;
 997
 998                   Code := Get_Char_Code (Source (Scan_Ptr));
 999                   Scan_Ptr := Scan_Ptr + 2;
1000                end if;
1001
1002                --  Fall through here with Scan_Ptr updated past the closing
1003                --  quote, and Code set to the Char_Code value for the literal
1004
1005                Accumulate_Checksum (''');
1006                Token := Tok_Char_Literal;
1007                Token_Node := New_Node (N_Character_Literal, Token_Ptr);
1008                Set_Char_Literal_Value (Token_Node, Code);
1009                Set_Character_Literal_Name (Code);
1010                Token_Name := Name_Find;
1011                Set_Chars (Token_Node, Token_Name);
1012                return;
1013             end if;
1014          end Char_Literal_Case;
1015
1016          --  Right parenthesis
1017
1018          when ')' =>
1019             Accumulate_Checksum (')');
1020             Scan_Ptr := Scan_Ptr + 1;
1021             Token := Tok_Right_Paren;
1022             if Style_Check then Style.Check_Right_Paren; end if;
1023             return;
1024
1025          --  Right bracket or right brace, treated as right paren
1026
1027          when ']' | '}' =>
1028             Error_Msg_S ("illegal character, replaced by "")""");
1029             Scan_Ptr := Scan_Ptr + 1;
1030             Token := Tok_Right_Paren;
1031             return;
1032
1033          --  Slash (can be division operator or first character of not equal)
1034
1035          when '/' =>
1036             Accumulate_Checksum ('/');
1037
1038             if Double_Char_Token ('=') then
1039                Token := Tok_Not_Equal;
1040                return;
1041             else
1042                Scan_Ptr := Scan_Ptr + 1;
1043                Token := Tok_Slash;
1044                return;
1045             end if;
1046
1047          --  Semicolon
1048
1049          when ';' =>
1050             Accumulate_Checksum (';');
1051             Scan_Ptr := Scan_Ptr + 1;
1052             Token := Tok_Semicolon;
1053             if Style_Check then Style.Check_Semicolon; end if;
1054             return;
1055
1056          --  Vertical bar
1057
1058          when '|' => Vertical_Bar_Case : begin
1059             Accumulate_Checksum ('|');
1060
1061             --  Special check for || to give nice message
1062
1063             if Source (Scan_Ptr + 1) = '|' then
1064                Error_Msg_S ("""||"" should be `OR ELSE`");
1065                Scan_Ptr := Scan_Ptr + 2;
1066                Token := Tok_Or;
1067                return;
1068
1069             else
1070                Scan_Ptr := Scan_Ptr + 1;
1071                Token := Tok_Vertical_Bar;
1072                if Style_Check then Style.Check_Vertical_Bar; end if;
1073                return;
1074             end if;
1075          end Vertical_Bar_Case;
1076
1077          --  Exclamation, replacement character for vertical bar
1078
1079          when '!' => Exclamation_Case : begin
1080             Accumulate_Checksum ('!');
1081
1082             if Source (Scan_Ptr + 1) = '=' then
1083                Error_Msg_S ("'!= should be /=");
1084                Scan_Ptr := Scan_Ptr + 2;
1085                Token := Tok_Not_Equal;
1086                return;
1087
1088             else
1089                Scan_Ptr := Scan_Ptr + 1;
1090                Token := Tok_Vertical_Bar;
1091                return;
1092             end if;
1093
1094          end Exclamation_Case;
1095
1096          --  Plus
1097
1098          when '+' => Plus_Case : begin
1099             Accumulate_Checksum ('+');
1100             Scan_Ptr := Scan_Ptr + 1;
1101             Token := Tok_Plus;
1102             return;
1103          end Plus_Case;
1104
1105          --  Digits starting a numeric literal
1106
1107          when '0' .. '9' =>
1108             Nlit;
1109
1110             if Identifier_Char (Source (Scan_Ptr)) then
1111                Error_Msg_S
1112                  ("delimiter required between literal and identifier");
1113             end if;
1114
1115             return;
1116
1117          --  Lower case letters
1118
1119          when 'a' .. 'z' =>
1120             Name_Len := 1;
1121             Name_Buffer (1) := Source (Scan_Ptr);
1122             Accumulate_Checksum (Name_Buffer (1));
1123             Scan_Ptr := Scan_Ptr + 1;
1124             goto Scan_Identifier;
1125
1126          --  Upper case letters
1127
1128          when 'A' .. 'Z' =>
1129             Name_Len := 1;
1130             Name_Buffer (1) :=
1131               Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1132             Accumulate_Checksum (Name_Buffer (1));
1133             Scan_Ptr := Scan_Ptr + 1;
1134             goto Scan_Identifier;
1135
1136          --  Underline character
1137
1138          when '_' =>
1139             Error_Msg_S ("identifier cannot start with underline");
1140             Name_Len := 1;
1141             Name_Buffer (1) := '_';
1142             Scan_Ptr := Scan_Ptr + 1;
1143             goto Scan_Identifier;
1144
1145          --  Space (not possible, because we scanned past blanks)
1146
1147          when ' ' =>
1148             raise Program_Error;
1149
1150          --  Characters in top half of ASCII 8-bit chart
1151
1152          when Upper_Half_Character =>
1153
1154             --  Wide character case. Note that Scan_Identifier will issue
1155             --  an appropriate message if wide characters are not allowed
1156             --  in identifiers.
1157
1158             if Upper_Half_Encoding then
1159                Name_Len := 0;
1160                goto Scan_Identifier;
1161
1162             --  Otherwise we have OK Latin-1 character
1163
1164             else
1165                --  Upper half characters may possibly be identifier letters
1166                --  but can never be digits, so Identifier_Character can be
1167                --  used to test for a valid start of identifier character.
1168
1169                if Identifier_Char (Source (Scan_Ptr)) then
1170                   Name_Len := 0;
1171                   goto Scan_Identifier;
1172                else
1173                   Error_Illegal_Character;
1174                end if;
1175             end if;
1176
1177          when ESC =>
1178
1179             --  ESC character, possible start of identifier if wide characters
1180             --  using ESC encoding are allowed in identifiers, which we can
1181             --  tell by looking at the Identifier_Char flag for ESC, which is
1182             --  only true if these conditions are met.
1183
1184             if Identifier_Char (ESC) then
1185                Name_Len := 0;
1186                goto Scan_Identifier;
1187             else
1188                Error_Illegal_Wide_Character;
1189             end if;
1190
1191          --  Invalid control characters
1192
1193          when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | SO  |
1194               SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1195               EM  | FS  | GS  | RS  | US  | DEL
1196          =>
1197             Error_Illegal_Character;
1198
1199          --  Invalid graphic characters
1200
1201          when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1202             Error_Illegal_Character;
1203
1204          --  End switch on non-blank character
1205
1206          end case;
1207
1208       --  End loop past format effectors. The exit from this loop is by
1209       --  executing a return statement following completion of token scan
1210       --  (control never falls out of this loop to the code which follows)
1211
1212       end loop;
1213
1214       --  Identifier scanning routine. On entry, some initial characters
1215       --  of the identifier may have already been stored in Name_Buffer.
1216       --  If so, Name_Len has the number of characters stored. otherwise
1217       --  Name_Len is set to zero on entry.
1218
1219       <<Scan_Identifier>>
1220
1221          --  This loop scans as fast as possible past lower half letters
1222          --  and digits, which we expect to be the most common characters.
1223
1224          loop
1225             if Source (Scan_Ptr) in 'a' .. 'z'
1226               or else Source (Scan_Ptr) in '0' .. '9'
1227             then
1228                Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1229                Accumulate_Checksum (Source (Scan_Ptr));
1230
1231             elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1232                Name_Buffer (Name_Len + 1) :=
1233                  Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1234                Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1235             else
1236                exit;
1237             end if;
1238
1239             --  Open out the loop a couple of times for speed
1240
1241             if Source (Scan_Ptr + 1) in 'a' .. 'z'
1242               or else Source (Scan_Ptr + 1) in '0' .. '9'
1243             then
1244                Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1245                Accumulate_Checksum (Source (Scan_Ptr + 1));
1246
1247             elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1248                Name_Buffer (Name_Len + 2) :=
1249                  Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1250                Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1251
1252             else
1253                Scan_Ptr := Scan_Ptr + 1;
1254                Name_Len := Name_Len + 1;
1255                exit;
1256             end if;
1257
1258             if Source (Scan_Ptr + 2) in 'a' .. 'z'
1259               or else Source (Scan_Ptr + 2) in '0' .. '9'
1260             then
1261                Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1262                Accumulate_Checksum (Source (Scan_Ptr + 2));
1263
1264             elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1265                Name_Buffer (Name_Len + 3) :=
1266                  Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1267                Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1268             else
1269                Scan_Ptr := Scan_Ptr + 2;
1270                Name_Len := Name_Len + 2;
1271                exit;
1272             end if;
1273
1274             if Source (Scan_Ptr + 3) in 'a' .. 'z'
1275               or else Source (Scan_Ptr + 3) in '0' .. '9'
1276             then
1277                Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1278                Accumulate_Checksum (Source (Scan_Ptr + 3));
1279
1280             elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1281                Name_Buffer (Name_Len + 4) :=
1282                  Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1283                Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1284
1285             else
1286                Scan_Ptr := Scan_Ptr + 3;
1287                Name_Len := Name_Len + 3;
1288                exit;
1289             end if;
1290
1291             Scan_Ptr := Scan_Ptr + 4;
1292             Name_Len := Name_Len + 4;
1293          end loop;
1294
1295          --  If we fall through, then we have encountered either an underline
1296          --  character, or an extended identifier character (i.e. one from the
1297          --  upper half), or a wide character, or an identifier terminator.
1298          --  The initial test speeds us up in the most common case where we
1299          --  have an identifier terminator. Note that ESC is an identifier
1300          --  character only if a wide character encoding method that uses
1301          --  ESC encoding is active, so if we find an ESC character we know
1302          --  that we have a wide character.
1303
1304          if Identifier_Char (Source (Scan_Ptr)) then
1305
1306             --  Case of underline, check for error cases of double underline,
1307             --  and for a trailing underline character
1308
1309             if Source (Scan_Ptr) = '_' then
1310                Accumulate_Checksum ('_');
1311                Name_Len := Name_Len + 1;
1312                Name_Buffer (Name_Len) := '_';
1313
1314                if Identifier_Char (Source (Scan_Ptr + 1)) then
1315                   Scan_Ptr := Scan_Ptr + 1;
1316
1317                   if Source (Scan_Ptr) = '_' then
1318                      Error_No_Double_Underline;
1319                   end if;
1320
1321                else
1322                   Error_Msg_S ("identifier cannot end with underline");
1323                   Scan_Ptr := Scan_Ptr + 1;
1324                end if;
1325
1326                goto Scan_Identifier;
1327
1328             --  Upper half character
1329
1330             elsif Source (Scan_Ptr) in Upper_Half_Character
1331               and then not Upper_Half_Encoding
1332             then
1333                Accumulate_Checksum (Source (Scan_Ptr));
1334                Store_Encoded_Character
1335                  (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1336                Scan_Ptr := Scan_Ptr + 1;
1337                goto Scan_Identifier;
1338
1339             --  Left bracket not followed by a quote terminates an identifier.
1340             --  This is an error, but we don't want to give a junk error msg
1341             --  about wide characters in this case!
1342
1343             elsif Source (Scan_Ptr) = '['
1344               and then Source (Scan_Ptr + 1) /= '"'
1345             then
1346                null;
1347
1348             --  We know we have a wide character encoding here (the current
1349             --  character is either ESC, left bracket, or an upper half
1350             --  character depending on the encoding method).
1351
1352             else
1353                --  Scan out the wide character and insert the appropriate
1354                --  encoding into the name table entry for the identifier.
1355
1356                declare
1357                   Sptr : constant Source_Ptr := Scan_Ptr;
1358                   Code : Char_Code;
1359                   Err  : Boolean;
1360
1361                begin
1362                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1363                   Accumulate_Checksum (Code);
1364
1365                   if Err then
1366                      Error_Illegal_Wide_Character;
1367                   else
1368                      Store_Encoded_Character (Code);
1369                   end if;
1370
1371                   --  Make sure we are allowing wide characters in identifiers.
1372                   --  Note that we allow wide character notation for an OK
1373                   --  identifier character. This in particular allows bracket
1374                   --  or other notation to be used for upper half letters.
1375
1376                   if Identifier_Character_Set /= 'w'
1377                     and then
1378                       (not In_Character_Range (Code)
1379                          or else
1380                        not Identifier_Char (Get_Character (Code)))
1381                   then
1382                      Error_Msg
1383                        ("wide character not allowed in identifier", Sptr);
1384                   end if;
1385                end;
1386
1387                goto Scan_Identifier;
1388             end if;
1389          end if;
1390
1391          --  Scan of identifier is complete. The identifier is stored in
1392          --  Name_Buffer, and Scan_Ptr points past the last character.
1393
1394          Token_Name := Name_Find;
1395
1396          --  Here is where we check if it was a keyword
1397
1398          if Get_Name_Table_Byte (Token_Name) /= 0
1399            and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
1400          then
1401             Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
1402
1403             --  Deal with possible style check for non-lower case keyword,
1404             --  but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
1405             --  for this purpose if they appear as attribute designators.
1406             --  Actually we only check the first character for speed.
1407
1408             if Style_Check
1409               and then Source (Token_Ptr) <= 'Z'
1410               and then (Prev_Token /= Tok_Apostrophe
1411                           or else
1412                             (Token /= Tok_Access
1413                                and then Token /= Tok_Delta
1414                                and then Token /= Tok_Digits
1415                                and then Token /= Tok_Range))
1416             then
1417                Style.Non_Lower_Case_Keyword;
1418             end if;
1419
1420             --  We must reset Token_Name since this is not an identifier
1421             --  and if we leave Token_Name set, the parser gets confused
1422             --  because it thinks it is dealing with an identifier instead
1423             --  of the corresponding keyword.
1424
1425             Token_Name := No_Name;
1426             return;
1427
1428          --  It is an identifier after all
1429
1430          else
1431             Token_Node := New_Node (N_Identifier, Token_Ptr);
1432             Set_Chars (Token_Node, Token_Name);
1433             Token := Tok_Identifier;
1434             return;
1435          end if;
1436    end Scan;
1437
1438    ---------------------
1439    -- Scan_First_Char --
1440    ---------------------
1441
1442    function Scan_First_Char return Source_Ptr is
1443       Ptr : Source_Ptr := Current_Line_Start;
1444
1445    begin
1446       loop
1447          if Source (Ptr) = ' ' then
1448             Ptr := Ptr + 1;
1449
1450          elsif Source (Ptr) = HT then
1451             if Style_Check then Style.Check_HT; end if;
1452             Ptr := Ptr + 1;
1453
1454          else
1455             return Ptr;
1456          end if;
1457       end loop;
1458    end Scan_First_Char;
1459
1460    ------------------------------
1461    -- Scan_Reserved_Identifier --
1462    ------------------------------
1463
1464    procedure Scan_Reserved_Identifier (Force_Msg : Boolean) is
1465       Token_Chars : constant String := Token_Type'Image (Token);
1466
1467    begin
1468       --  We have in Token_Chars the image of the Token name, i.e. Tok_xxx.
1469       --  This code extracts the xxx and makes an identifier out of it.
1470
1471       Name_Len := 0;
1472
1473       for J in 5 .. Token_Chars'Length loop
1474          Name_Len := Name_Len + 1;
1475          Name_Buffer (Name_Len) := Fold_Lower (Token_Chars (J));
1476       end loop;
1477
1478       Token_Name := Name_Find;
1479
1480       if not Used_As_Identifier (Token) or else Force_Msg then
1481          Error_Msg_Name_1 := Token_Name;
1482          Error_Msg_SC ("reserved word* cannot be used as identifier!");
1483          Used_As_Identifier (Token) := True;
1484       end if;
1485
1486       Token := Tok_Identifier;
1487       Token_Node := New_Node (N_Identifier, Token_Ptr);
1488       Set_Chars (Token_Node, Token_Name);
1489    end Scan_Reserved_Identifier;
1490
1491    ----------------------
1492    -- Set_Start_Column --
1493    ----------------------
1494
1495    --  Note: it seems at first glance a little expensive to compute this value
1496    --  for every source line (since it is certainly not used for all source
1497    --  lines). On the other hand, it doesn't take much more work to skip past
1498    --  the initial white space on the line counting the columns than it would
1499    --  to scan past the white space using the standard scanning circuits.
1500
1501    function Set_Start_Column return Column_Number is
1502       Start_Column : Column_Number := 0;
1503
1504    begin
1505       --  Outer loop scans past horizontal tab characters
1506
1507       Tabs_Loop : loop
1508
1509          --  Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
1510          --  past the blanks and adjusting Start_Column to account for them.
1511
1512          Blanks_Loop : loop
1513             if Source (Scan_Ptr) = ' ' then
1514                if Source (Scan_Ptr + 1) = ' ' then
1515                   if Source (Scan_Ptr + 2) = ' ' then
1516                      if Source (Scan_Ptr + 3) = ' ' then
1517                         if Source (Scan_Ptr + 4) = ' ' then
1518                            if Source (Scan_Ptr + 5) = ' ' then
1519                               if Source (Scan_Ptr + 6) = ' ' then
1520                                  Scan_Ptr := Scan_Ptr + 7;
1521                                  Start_Column := Start_Column + 7;
1522                               else
1523                                  Scan_Ptr := Scan_Ptr + 6;
1524                                  Start_Column := Start_Column + 6;
1525                                  exit Blanks_Loop;
1526                               end if;
1527                            else
1528                               Scan_Ptr := Scan_Ptr + 5;
1529                               Start_Column := Start_Column + 5;
1530                               exit Blanks_Loop;
1531                            end if;
1532                         else
1533                            Scan_Ptr := Scan_Ptr + 4;
1534                            Start_Column := Start_Column + 4;
1535                            exit Blanks_Loop;
1536                         end if;
1537                      else
1538                         Scan_Ptr := Scan_Ptr + 3;
1539                         Start_Column := Start_Column + 3;
1540                         exit Blanks_Loop;
1541                      end if;
1542                   else
1543                      Scan_Ptr := Scan_Ptr + 2;
1544                      Start_Column := Start_Column + 2;
1545                      exit Blanks_Loop;
1546                   end if;
1547                else
1548                   Scan_Ptr := Scan_Ptr + 1;
1549                   Start_Column := Start_Column + 1;
1550                   exit Blanks_Loop;
1551                end if;
1552             else
1553                exit Blanks_Loop;
1554             end if;
1555          end loop Blanks_Loop;
1556
1557          --  Outer loop keeps going only if a horizontal tab follows
1558
1559          if Source (Scan_Ptr) = HT then
1560             if Style_Check then Style.Check_HT; end if;
1561             Scan_Ptr := Scan_Ptr + 1;
1562             Start_Column := (Start_Column / 8) * 8 + 8;
1563          else
1564             exit Tabs_Loop;
1565          end if;
1566
1567       end loop Tabs_Loop;
1568
1569       return Start_Column;
1570    end Set_Start_Column;
1571
1572    ----------
1573    -- Slit --
1574    ----------
1575
1576    procedure Slit is separate;
1577
1578 end Scn;