gcc/ada/scn.adb

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                  S C N                                   --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2002 Free Software Foundation, Inc.          --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  19 -- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
  20 -- MA 02111-1307, USA.                                                      --
  21 --                                                                          --
  22 -- GNAT was originally developed  by the GNAT team at  New York University. --
  23 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  24 --                                                                          --
  25 ------------------------------------------------------------------------------
  26
  27 with Atree;    use Atree;
  28 with Csets;    use Csets;
  29 with Errout;   use Errout;
  30 with Hostparm; use Hostparm;
  31 with Namet;    use Namet;
  32 with Opt;      use Opt;
  33 with Scans;    use Scans;
  34 with Sinput;   use Sinput;
  35 with Sinfo;    use Sinfo;
  36 with Snames;   use Snames;
  37 with Style;
  38 with Widechar; use Widechar;
  39
  40 with System.CRC32;
  41 with System.WCh_Con; use System.WCh_Con;
  42
  43 package body Scn is
  44
  45    use ASCII;
  46    --  Make control characters visible
  47
  48    Used_As_Identifier : array (Token_Type) of Boolean;
  49    --  Flags set True if a given keyword is used as an identifier (used to
  50    --  make sure that we only post an error message for incorrect use of a
  51    --  keyword as an identifier once for a given keyword).
  52
  53    -----------------------
  54    -- Local Subprograms --
  55    -----------------------
  56
  57    procedure Accumulate_Checksum (C : Character);
  58    pragma Inline (Accumulate_Checksum);
  59    --  This routine accumulates the checksum given character C. During the
  60    --  scanning of a source file, this routine is called with every character
  61    --  in the source, excluding blanks, and all control characters (except
  62    --  that ESC is included in the checksum). Upper case letters not in string
  63    --  literals are folded by the caller. See Sinput spec for the documentation
  64    --  of the checksum algorithm. Note: checksum values are only used if we
  65    --  generate code, so it is not necessary to worry about making the right
  66    --  sequence of calls in any error situation.
  67
  68    procedure Accumulate_Checksum (C : Char_Code);
  69    pragma Inline (Accumulate_Checksum);
  70    --  This version is identical, except that the argument, C, is a character
  71    --  code value instead of a character. This is used when wide characters
  72    --  are scanned. We use the character code rather than the ASCII characters
  73    --  so that the checksum is independent of wide character encoding method.
  74
  75    procedure Initialize_Checksum;
  76    pragma Inline (Initialize_Checksum);
  77    --  Initialize checksum value
  78
  79    procedure Check_End_Of_Line;
  80    --  Called when end of line encountered. Checks that line is not
  81    --  too long, and that other style checks for the end of line are met.
  82
  83    function Determine_License return License_Type;
  84    --  Scan header of file and check that it has an appropriate GNAT-style
  85    --  header with a proper license statement. Returns GPL, Unrestricted,
  86    --  or Modified_GPL depending on header. If none of these, returns Unknown.
  87
  88    function Double_Char_Token (C : Character) return Boolean;
  89    --  This function is used for double character tokens like := or <>. It
  90    --  checks if the character following Source (Scan_Ptr) is C, and if so
  91    --  bumps Scan_Ptr past the pair of characters and returns True. A space
  92    --  between the two characters is also recognized with an appropriate
  93    --  error message being issued. If C is not present, False is returned.
  94    --  Note that Double_Char_Token can only be used for tokens defined in
  95    --  the Ada syntax (it's use for error cases like && is not appropriate
  96    --  since we do not want a junk message for a case like &-space-&).
  97
  98    procedure Error_Illegal_Character;
  99    --  Give illegal character error, Scan_Ptr points to character. On return,
 100    --  Scan_Ptr is bumped past the illegal character.
 101
 102    procedure Error_Illegal_Wide_Character;
 103    --  Give illegal wide character message. On return, Scan_Ptr is bumped
 104    --  past the illegal character, which may still leave us pointing to
 105    --  junk, not much we can do if the escape sequence is messed up!
 106
 107    procedure Error_Long_Line;
 108    --  Signal error of excessively long line
 109
 110    procedure Error_No_Double_Underline;
 111    --  Signal error of double underline character
 112
 113    procedure Nlit;
 114    --  This is the procedure for scanning out numeric literals. On entry,
 115    --  Scan_Ptr points to the digit that starts the numeric literal (the
 116    --  checksum for this character has not been accumulated yet). On return
 117    --  Scan_Ptr points past the last character of the numeric literal, Token
 118    --  and Token_Node are set appropriately, and the checksum is updated.
 119
 120    function Set_Start_Column return Column_Number;
 121    --  This routine is called with Scan_Ptr pointing to the first character
 122    --  of a line. On exit, Scan_Ptr is advanced to the first non-blank
 123    --  character of this line (or to the terminating format effector if the
 124    --  line contains no non-blank characters), and the returned result is the
 125    --  column number of this non-blank character (zero origin), which is the
 126    --  value to be stored in the Start_Column scan variable.
 127
 128    procedure Slit;
 129    --  This is the procedure for scanning out string literals. On entry,
 130    --  Scan_Ptr points to the opening string quote (the checksum for this
 131    --  character has not been accumulated yet). On return Scan_Ptr points
 132    --  past the closing quote of the string literal, Token and Token_Node
 133    --  are set appropriately, and the checksum is upated.
 134
 135    -------------------------
 136    -- Accumulate_Checksum --
 137    -------------------------
 138
 139    procedure Accumulate_Checksum (C : Character) is
 140    begin
 141       System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
 142    end Accumulate_Checksum;
 143
 144    procedure Accumulate_Checksum (C : Char_Code) is
 145    begin
 146       Accumulate_Checksum (Character'Val (C / 256));
 147       Accumulate_Checksum (Character'Val (C mod 256));
 148    end Accumulate_Checksum;
 149
 150    -----------------------
 151    -- Check_End_Of_Line --
 152    -----------------------
 153
 154    procedure Check_End_Of_Line is
 155       Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
 156
 157    begin
 158       if Len > Hostparm.Max_Line_Length then
 159          Error_Long_Line;
 160
 161       elsif Style_Check then
 162          Style.Check_Line_Terminator (Len);
 163       end if;
 164    end Check_End_Of_Line;
 165
 166    -----------------------
 167    -- Determine_License --
 168    -----------------------
 169
 170    function Determine_License return License_Type is
 171       GPL_Found : Boolean := False;
 172
 173       function Contains (S : String) return Boolean;
 174       --  See if current comment contains successive non-blank characters
 175       --  matching the contents of S. If so leave Scan_Ptr unchanged and
 176       --  return True, otherwise leave Scan_Ptr unchanged and return False.
 177
 178       procedure Skip_EOL;
 179       --  Skip to line terminator character
 180
 181       --------------
 182       -- Contains --
 183       --------------
 184
 185       function Contains (S : String) return Boolean is
 186          CP : Natural;
 187          SP : Source_Ptr;
 188          SS : Source_Ptr;
 189
 190       begin
 191          SP := Scan_Ptr;
 192          while Source (SP) /= CR and then Source (SP) /= LF loop
 193             if Source (SP) = S (S'First) then
 194                SS := SP;
 195                CP := S'First;
 196
 197                loop
 198                   SS := SS + 1;
 199                   CP := CP + 1;
 200
 201                   if CP > S'Last then
 202                      return True;
 203                   end if;
 204
 205                   while Source (SS) = ' ' loop
 206                      SS := SS + 1;
 207                   end loop;
 208
 209                   exit when Source (SS) /= S (CP);
 210                end loop;
 211             end if;
 212
 213             SP := SP + 1;
 214          end loop;
 215
 216          return False;
 217       end Contains;
 218
 219       --------------
 220       -- Skip_EOL --
 221       --------------
 222
 223       procedure Skip_EOL is
 224       begin
 225          while Source (Scan_Ptr) /= CR
 226            and then Source (Scan_Ptr) /= LF
 227          loop
 228             Scan_Ptr := Scan_Ptr + 1;
 229          end loop;
 230       end Skip_EOL;
 231
 232    --  Start of processing for Determine_License
 233
 234    begin
 235       loop
 236          if Source (Scan_Ptr) /= '-'
 237            or else Source (Scan_Ptr + 1) /= '-'
 238          then
 239             if GPL_Found then
 240                return GPL;
 241             else
 242                return Unknown;
 243             end if;
 244
 245          elsif Contains ("Asaspecialexception") then
 246             if GPL_Found then
 247                return Modified_GPL;
 248             end if;
 249
 250          elsif Contains ("GNUGeneralPublicLicense") then
 251             GPL_Found := True;
 252
 253          elsif
 254              Contains
 255                ("ThisspecificationisadaptedfromtheAdaSemanticInterface")
 256            or else
 257              Contains
 258               ("ThisspecificationisderivedfromtheAdaReferenceManual")
 259          then
 260             return Unrestricted;
 261          end if;
 262
 263          Skip_EOL;
 264
 265          Check_End_Of_Line;
 266
 267          declare
 268             Physical : Boolean;
 269
 270          begin
 271             Skip_Line_Terminators (Scan_Ptr, Physical);
 272
 273             --  If we are at start of physical line, update scan pointers
 274             --  to reflect the start of the new line.
 275
 276             if Physical then
 277                Current_Line_Start       := Scan_Ptr;
 278                Start_Column             := Set_Start_Column;
 279                First_Non_Blank_Location := Scan_Ptr;
 280             end if;
 281          end;
 282       end loop;
 283    end Determine_License;
 284
 285    ----------------------------
 286    -- Determine_Token_Casing --
 287    ----------------------------
 288
 289    function Determine_Token_Casing return Casing_Type is
 290    begin
 291       return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
 292    end Determine_Token_Casing;
 293
 294    -----------------------
 295    -- Double_Char_Token --
 296    -----------------------
 297
 298    function Double_Char_Token (C : Character) return Boolean is
 299    begin
 300       if Source (Scan_Ptr + 1) = C then
 301          Accumulate_Checksum (C);
 302          Scan_Ptr := Scan_Ptr + 2;
 303          return True;
 304
 305       elsif Source (Scan_Ptr + 1) = ' '
 306         and then Source (Scan_Ptr + 2) = C
 307       then
 308          Scan_Ptr := Scan_Ptr + 1;
 309          Error_Msg_S ("no space allowed here");
 310          Scan_Ptr := Scan_Ptr + 2;
 311          return True;
 312
 313       else
 314          return False;
 315       end if;
 316    end Double_Char_Token;
 317
 318    -----------------------------
 319    -- Error_Illegal_Character --
 320    -----------------------------
 321
 322    procedure Error_Illegal_Character is
 323    begin
 324       Error_Msg_S ("illegal character");
 325       Scan_Ptr := Scan_Ptr + 1;
 326    end Error_Illegal_Character;
 327
 328    ----------------------------------
 329    -- Error_Illegal_Wide_Character --
 330    ----------------------------------
 331
 332    procedure Error_Illegal_Wide_Character is
 333    begin
 334       if OpenVMS then
 335          Error_Msg_S
 336            ("illegal wide character, check " &
 337             "'/'W'I'D'E'_'C'H'A'R'A'C'T'E'R'_'E'N'C'O'D'I'N'G qualifer");
 338       else
 339          Error_Msg_S
 340            ("illegal wide character, check -gnatW switch");
 341       end if;
 342
 343       Scan_Ptr := Scan_Ptr + 1;
 344    end Error_Illegal_Wide_Character;
 345
 346    ---------------------
 347    -- Error_Long_Line --
 348    ---------------------
 349
 350    procedure Error_Long_Line is
 351    begin
 352       Error_Msg
 353         ("this line is too long",
 354          Current_Line_Start + Hostparm.Max_Line_Length);
 355    end Error_Long_Line;
 356
 357    -------------------------------
 358    -- Error_No_Double_Underline --
 359    -------------------------------
 360
 361    procedure Error_No_Double_Underline is
 362    begin
 363       Error_Msg_S ("two consecutive underlines not permitted");
 364    end Error_No_Double_Underline;
 365
 366    -------------------------
 367    -- Initialize_Checksum --
 368    -------------------------
 369
 370    procedure Initialize_Checksum is
 371    begin
 372       System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
 373    end Initialize_Checksum;
 374
 375    ------------------------
 376    -- Initialize_Scanner --
 377    ------------------------
 378
 379    procedure Initialize_Scanner
 380      (Unit  : Unit_Number_Type;
 381       Index : Source_File_Index)
 382    is
 383       GNAT_Hedr : constant Text_Buffer (1 .. 78) := (others => '-');
 384
 385    begin
 386       --  Set up Token_Type values in Names Table entries for reserved keywords
 387       --  We use the Pos value of the Token_Type value. Note we are relying on
 388       --  the fact that Token_Type'Val (0) is not a reserved word!
 389
 390       Set_Name_Table_Byte (Name_Abort,      Token_Type'Pos (Tok_Abort));
 391       Set_Name_Table_Byte (Name_Abs,        Token_Type'Pos (Tok_Abs));
 392       Set_Name_Table_Byte (Name_Abstract,   Token_Type'Pos (Tok_Abstract));
 393       Set_Name_Table_Byte (Name_Accept,     Token_Type'Pos (Tok_Accept));
 394       Set_Name_Table_Byte (Name_Access,     Token_Type'Pos (Tok_Access));
 395       Set_Name_Table_Byte (Name_And,        Token_Type'Pos (Tok_And));
 396       Set_Name_Table_Byte (Name_Aliased,    Token_Type'Pos (Tok_Aliased));
 397       Set_Name_Table_Byte (Name_All,        Token_Type'Pos (Tok_All));
 398       Set_Name_Table_Byte (Name_Array,      Token_Type'Pos (Tok_Array));
 399       Set_Name_Table_Byte (Name_At,         Token_Type'Pos (Tok_At));
 400       Set_Name_Table_Byte (Name_Begin,      Token_Type'Pos (Tok_Begin));
 401       Set_Name_Table_Byte (Name_Body,       Token_Type'Pos (Tok_Body));
 402       Set_Name_Table_Byte (Name_Case,       Token_Type'Pos (Tok_Case));
 403       Set_Name_Table_Byte (Name_Constant,   Token_Type'Pos (Tok_Constant));
 404       Set_Name_Table_Byte (Name_Declare,    Token_Type'Pos (Tok_Declare));
 405       Set_Name_Table_Byte (Name_Delay,      Token_Type'Pos (Tok_Delay));
 406       Set_Name_Table_Byte (Name_Delta,      Token_Type'Pos (Tok_Delta));
 407       Set_Name_Table_Byte (Name_Digits,     Token_Type'Pos (Tok_Digits));
 408       Set_Name_Table_Byte (Name_Do,         Token_Type'Pos (Tok_Do));
 409       Set_Name_Table_Byte (Name_Else,       Token_Type'Pos (Tok_Else));
 410       Set_Name_Table_Byte (Name_Elsif,      Token_Type'Pos (Tok_Elsif));
 411       Set_Name_Table_Byte (Name_End,        Token_Type'Pos (Tok_End));
 412       Set_Name_Table_Byte (Name_Entry,      Token_Type'Pos (Tok_Entry));
 413       Set_Name_Table_Byte (Name_Exception,  Token_Type'Pos (Tok_Exception));
 414       Set_Name_Table_Byte (Name_Exit,       Token_Type'Pos (Tok_Exit));
 415       Set_Name_Table_Byte (Name_For,        Token_Type'Pos (Tok_For));
 416       Set_Name_Table_Byte (Name_Function,   Token_Type'Pos (Tok_Function));
 417       Set_Name_Table_Byte (Name_Generic,    Token_Type'Pos (Tok_Generic));
 418       Set_Name_Table_Byte (Name_Goto,       Token_Type'Pos (Tok_Goto));
 419       Set_Name_Table_Byte (Name_If,         Token_Type'Pos (Tok_If));
 420       Set_Name_Table_Byte (Name_In,         Token_Type'Pos (Tok_In));
 421       Set_Name_Table_Byte (Name_Is,         Token_Type'Pos (Tok_Is));
 422       Set_Name_Table_Byte (Name_Limited,    Token_Type'Pos (Tok_Limited));
 423       Set_Name_Table_Byte (Name_Loop,       Token_Type'Pos (Tok_Loop));
 424       Set_Name_Table_Byte (Name_Mod,        Token_Type'Pos (Tok_Mod));
 425       Set_Name_Table_Byte (Name_New,        Token_Type'Pos (Tok_New));
 426       Set_Name_Table_Byte (Name_Not,        Token_Type'Pos (Tok_Not));
 427       Set_Name_Table_Byte (Name_Null,       Token_Type'Pos (Tok_Null));
 428       Set_Name_Table_Byte (Name_Of,         Token_Type'Pos (Tok_Of));
 429       Set_Name_Table_Byte (Name_Or,         Token_Type'Pos (Tok_Or));
 430       Set_Name_Table_Byte (Name_Others,     Token_Type'Pos (Tok_Others));
 431       Set_Name_Table_Byte (Name_Out,        Token_Type'Pos (Tok_Out));
 432       Set_Name_Table_Byte (Name_Package,    Token_Type'Pos (Tok_Package));
 433       Set_Name_Table_Byte (Name_Pragma,     Token_Type'Pos (Tok_Pragma));
 434       Set_Name_Table_Byte (Name_Private,    Token_Type'Pos (Tok_Private));
 435       Set_Name_Table_Byte (Name_Procedure,  Token_Type'Pos (Tok_Procedure));
 436       Set_Name_Table_Byte (Name_Protected,  Token_Type'Pos (Tok_Protected));
 437       Set_Name_Table_Byte (Name_Raise,      Token_Type'Pos (Tok_Raise));
 438       Set_Name_Table_Byte (Name_Range,      Token_Type'Pos (Tok_Range));
 439       Set_Name_Table_Byte (Name_Record,     Token_Type'Pos (Tok_Record));
 440       Set_Name_Table_Byte (Name_Rem,        Token_Type'Pos (Tok_Rem));
 441       Set_Name_Table_Byte (Name_Renames,    Token_Type'Pos (Tok_Renames));
 442       Set_Name_Table_Byte (Name_Requeue,    Token_Type'Pos (Tok_Requeue));
 443       Set_Name_Table_Byte (Name_Return,     Token_Type'Pos (Tok_Return));
 444       Set_Name_Table_Byte (Name_Reverse,    Token_Type'Pos (Tok_Reverse));
 445       Set_Name_Table_Byte (Name_Select,     Token_Type'Pos (Tok_Select));
 446       Set_Name_Table_Byte (Name_Separate,   Token_Type'Pos (Tok_Separate));
 447       Set_Name_Table_Byte (Name_Subtype,    Token_Type'Pos (Tok_Subtype));
 448       Set_Name_Table_Byte (Name_Tagged,     Token_Type'Pos (Tok_Tagged));
 449       Set_Name_Table_Byte (Name_Task,       Token_Type'Pos (Tok_Task));
 450       Set_Name_Table_Byte (Name_Terminate,  Token_Type'Pos (Tok_Terminate));
 451       Set_Name_Table_Byte (Name_Then,       Token_Type'Pos (Tok_Then));
 452       Set_Name_Table_Byte (Name_Type,       Token_Type'Pos (Tok_Type));
 453       Set_Name_Table_Byte (Name_Until,      Token_Type'Pos (Tok_Until));
 454       Set_Name_Table_Byte (Name_Use,        Token_Type'Pos (Tok_Use));
 455       Set_Name_Table_Byte (Name_When,       Token_Type'Pos (Tok_When));
 456       Set_Name_Table_Byte (Name_While,      Token_Type'Pos (Tok_While));
 457       Set_Name_Table_Byte (Name_With,       Token_Type'Pos (Tok_With));
 458       Set_Name_Table_Byte (Name_Xor,        Token_Type'Pos (Tok_Xor));
 459
 460       --  Initialize scan control variables
 461
 462       Current_Source_File       := Index;
 463       Source                    := Source_Text (Current_Source_File);
 464       Current_Source_Unit       := Unit;
 465       Scan_Ptr                  := Source_First (Current_Source_File);
 466       Token                     := No_Token;
 467       Token_Ptr                 := Scan_Ptr;
 468       Current_Line_Start        := Scan_Ptr;
 469       Token_Node                := Empty;
 470       Token_Name                := No_Name;
 471       Start_Column              := Set_Start_Column;
 472       First_Non_Blank_Location  := Scan_Ptr;
 473
 474       Initialize_Checksum;
 475
 476       --  Set default for Comes_From_Source. All nodes built now until we
 477       --  reenter the analyzer will have Comes_From_Source set to True
 478
 479       Set_Comes_From_Source_Default (True);
 480
 481       --  Check license if GNAT type header possibly present
 482
 483       if Source_Last (Index) - Scan_Ptr > 80
 484         and then Source (Scan_Ptr .. Scan_Ptr + 77) = GNAT_Hedr
 485       then
 486          Set_License (Current_Source_File, Determine_License);
 487       end if;
 488
 489       --  Scan initial token (note this initializes Prev_Token, Prev_Token_Ptr)
 490
 491       Scan;
 492
 493       --  Clear flags for reserved words used as identifiers
 494
 495       for J in Token_Type loop
 496          Used_As_Identifier (J) := False;
 497       end loop;
 498
 499    end Initialize_Scanner;
 500
 501    ----------
 502    -- Nlit --
 503    ----------
 504
 505    procedure Nlit is separate;
 506
 507    ----------
 508    -- Scan --
 509    ----------
 510
 511    procedure Scan is
 512    begin
 513       Prev_Token := Token;
 514       Prev_Token_Ptr := Token_Ptr;
 515       Token_Name := Error_Name;
 516
 517       --  The following loop runs more than once only if a format effector
 518       --  (tab, vertical tab, form  feed, line feed, carriage return) is
 519       --  encountered and skipped, or some error situation, such as an
 520       --  illegal character, is encountered.
 521
 522       loop
 523          --  Skip past blanks, loop is opened up for speed
 524
 525          while Source (Scan_Ptr) = ' ' loop
 526
 527             if Source (Scan_Ptr + 1) /= ' ' then
 528                Scan_Ptr := Scan_Ptr + 1;
 529                exit;
 530             end if;
 531
 532             if Source (Scan_Ptr + 2) /= ' ' then
 533                Scan_Ptr := Scan_Ptr + 2;
 534                exit;
 535             end if;
 536
 537             if Source (Scan_Ptr + 3) /= ' ' then
 538                Scan_Ptr := Scan_Ptr + 3;
 539                exit;
 540             end if;
 541
 542             if Source (Scan_Ptr + 4) /= ' ' then
 543                Scan_Ptr := Scan_Ptr + 4;
 544                exit;
 545             end if;
 546
 547             if Source (Scan_Ptr + 5) /= ' ' then
 548                Scan_Ptr := Scan_Ptr + 5;
 549                exit;
 550             end if;
 551
 552             if Source (Scan_Ptr + 6) /= ' ' then
 553                Scan_Ptr := Scan_Ptr + 6;
 554                exit;
 555             end if;
 556
 557             if Source (Scan_Ptr + 7) /= ' ' then
 558                Scan_Ptr := Scan_Ptr + 7;
 559                exit;
 560             end if;
 561
 562             Scan_Ptr := Scan_Ptr + 8;
 563          end loop;
 564
 565          --  We are now at a non-blank character, which is the first character
 566          --  of the token we will scan, and hence the value of Token_Ptr.
 567
 568          Token_Ptr := Scan_Ptr;
 569
 570          --  Here begins the main case statement which transfers control on
 571          --  the basis of the non-blank character we have encountered.
 572
 573          case Source (Scan_Ptr) is
 574
 575          --  Line terminator characters
 576
 577          when CR | LF | FF | VT => Line_Terminator_Case : begin
 578
 579             --  Check line too long
 580
 581             Check_End_Of_Line;
 582
 583             declare
 584                Physical : Boolean;
 585
 586             begin
 587                Skip_Line_Terminators (Scan_Ptr, Physical);
 588
 589                --  If we are at start of physical line, update scan pointers
 590                --  to reflect the start of the new line.
 591
 592                if Physical then
 593                   Current_Line_Start       := Scan_Ptr;
 594                   Start_Column             := Set_Start_Column;
 595                   First_Non_Blank_Location := Scan_Ptr;
 596                end if;
 597             end;
 598          end Line_Terminator_Case;
 599
 600          --  Horizontal tab, just skip past it
 601
 602          when HT =>
 603             if Style_Check then Style.Check_HT; end if;
 604             Scan_Ptr := Scan_Ptr + 1;
 605
 606          --  End of file character, treated as an end of file only if it
 607          --  is the last character in the buffer, otherwise it is ignored.
 608
 609          when EOF =>
 610             if Scan_Ptr = Source_Last (Current_Source_File) then
 611                Check_End_Of_Line;
 612                Token := Tok_EOF;
 613                return;
 614
 615             else
 616                Scan_Ptr := Scan_Ptr + 1;
 617             end if;
 618
 619          --  Ampersand
 620
 621          when '&' =>
 622             Accumulate_Checksum ('&');
 623
 624             if Source (Scan_Ptr + 1) = '&' then
 625                Error_Msg_S ("'&'& should be `AND THEN`");
 626                Scan_Ptr := Scan_Ptr + 2;
 627                Token := Tok_And;
 628                return;
 629
 630             else
 631                Scan_Ptr := Scan_Ptr + 1;
 632                Token := Tok_Ampersand;
 633                return;
 634             end if;
 635
 636          --  Asterisk (can be multiplication operator or double asterisk
 637          --  which is the exponentiation compound delimtier).
 638
 639          when '*' =>
 640             Accumulate_Checksum ('*');
 641
 642             if Source (Scan_Ptr + 1) = '*' then
 643                Accumulate_Checksum ('*');
 644                Scan_Ptr := Scan_Ptr + 2;
 645                Token := Tok_Double_Asterisk;
 646                return;
 647
 648             else
 649                Scan_Ptr := Scan_Ptr + 1;
 650                Token := Tok_Asterisk;
 651                return;
 652             end if;
 653
 654          --  Colon, which can either be an isolated colon, or part of an
 655          --  assignment compound delimiter.
 656
 657          when ':' =>
 658             Accumulate_Checksum (':');
 659
 660             if Double_Char_Token ('=') then
 661                Token := Tok_Colon_Equal;
 662                if Style_Check then Style.Check_Colon_Equal; end if;
 663                return;
 664
 665             elsif Source (Scan_Ptr + 1) = '-'
 666               and then Source (Scan_Ptr + 2) /= '-'
 667             then
 668                Token := Tok_Colon_Equal;
 669                Error_Msg (":- should be :=", Scan_Ptr);
 670                Scan_Ptr := Scan_Ptr + 2;
 671                return;
 672
 673             else
 674                Scan_Ptr := Scan_Ptr + 1;
 675                Token := Tok_Colon;
 676                if Style_Check then Style.Check_Colon; end if;
 677                return;
 678             end if;
 679
 680          --  Left parenthesis
 681
 682          when '(' =>
 683             Accumulate_Checksum ('(');
 684             Scan_Ptr := Scan_Ptr + 1;
 685             Token := Tok_Left_Paren;
 686             if Style_Check then Style.Check_Left_Paren; end if;
 687             return;
 688
 689          --  Left bracket
 690
 691          when '[' =>
 692             if Source (Scan_Ptr + 1) = '"' then
 693                Name_Len := 0;
 694                goto Scan_Identifier;
 695
 696             else
 697                Error_Msg_S ("illegal character, replaced by ""(""");
 698                Scan_Ptr := Scan_Ptr + 1;
 699                Token := Tok_Left_Paren;
 700                return;
 701             end if;
 702
 703          --  Left brace
 704
 705          when '{' =>
 706             Error_Msg_S ("illegal character, replaced by ""(""");
 707             Scan_Ptr := Scan_Ptr + 1;
 708             Token := Tok_Left_Paren;
 709             return;
 710
 711          --  Comma
 712
 713          when ',' =>
 714             Accumulate_Checksum (',');
 715             Scan_Ptr := Scan_Ptr + 1;
 716             Token := Tok_Comma;
 717             if Style_Check then Style.Check_Comma; end if;
 718             return;
 719
 720          --  Dot, which is either an isolated period, or part of a double
 721          --  dot compound delimiter sequence. We also check for the case of
 722          --  a digit following the period, to give a better error message.
 723
 724          when '.' =>
 725             Accumulate_Checksum ('.');
 726
 727             if Double_Char_Token ('.') then
 728                Token := Tok_Dot_Dot;
 729                if Style_Check then Style.Check_Dot_Dot; end if;
 730                return;
 731
 732             elsif Source (Scan_Ptr + 1) in '0' .. '9' then
 733                Error_Msg_S ("numeric literal cannot start with point");
 734                Scan_Ptr := Scan_Ptr + 1;
 735
 736             else
 737                Scan_Ptr := Scan_Ptr + 1;
 738                Token := Tok_Dot;
 739                return;
 740             end if;
 741
 742          --  Equal, which can either be an equality operator, or part of the
 743          --  arrow (=>) compound delimiter.
 744
 745          when '=' =>
 746             Accumulate_Checksum ('=');
 747
 748             if Double_Char_Token ('>') then
 749                Token := Tok_Arrow;
 750                if Style_Check then Style.Check_Arrow; end if;
 751                return;
 752
 753             elsif Source (Scan_Ptr + 1) = '=' then
 754                Error_Msg_S ("== should be =");
 755                Scan_Ptr := Scan_Ptr + 1;
 756             end if;
 757
 758             Scan_Ptr := Scan_Ptr + 1;
 759             Token := Tok_Equal;
 760             return;
 761
 762          --  Greater than, which can be a greater than operator, greater than
 763          --  or equal operator, or first character of a right label bracket.
 764
 765          when '>' =>
 766             Accumulate_Checksum ('>');
 767
 768             if Double_Char_Token ('=') then
 769                Token := Tok_Greater_Equal;
 770                return;
 771
 772             elsif Double_Char_Token ('>') then
 773                Token := Tok_Greater_Greater;
 774                return;
 775
 776             else
 777                Scan_Ptr := Scan_Ptr + 1;
 778                Token := Tok_Greater;
 779                return;
 780             end if;
 781
 782          --  Less than, which can be a less than operator, less than or equal
 783          --  operator, or the first character of a left label bracket, or the
 784          --  first character of a box (<>) compound delimiter.
 785
 786          when '<' =>
 787             Accumulate_Checksum ('<');
 788
 789             if Double_Char_Token ('=') then
 790                Token := Tok_Less_Equal;
 791                return;
 792
 793             elsif Double_Char_Token ('>') then
 794                Token := Tok_Box;
 795                if Style_Check then Style.Check_Box; end if;
 796                return;
 797
 798             elsif Double_Char_Token ('<') then
 799                Token := Tok_Less_Less;
 800                return;
 801
 802             else
 803                Scan_Ptr := Scan_Ptr + 1;
 804                Token := Tok_Less;
 805                return;
 806             end if;
 807
 808          --  Minus, which is either a subtraction operator, or the first
 809          --  character of double minus starting a comment
 810
 811          when '-' => Minus_Case : begin
 812             if Source (Scan_Ptr + 1) = '>' then
 813                Error_Msg_S ("invalid token");
 814                Scan_Ptr := Scan_Ptr + 2;
 815                Token := Tok_Arrow;
 816                return;
 817
 818             elsif Source (Scan_Ptr + 1) /= '-' then
 819                Accumulate_Checksum ('-');
 820                Scan_Ptr := Scan_Ptr + 1;
 821                Token := Tok_Minus;
 822                return;
 823
 824             --  Comment
 825
 826             else -- Source (Scan_Ptr + 1) = '-' then
 827                if Style_Check then Style.Check_Comment; end if;
 828                Scan_Ptr := Scan_Ptr + 2;
 829
 830                --  Loop to scan comment (this loop runs more than once only if
 831                --  a horizontal tab or other non-graphic character is scanned)
 832
 833                loop
 834                   --  Scan to non graphic character (opened up for speed)
 835
 836                   loop
 837                      exit when Source (Scan_Ptr) not in Graphic_Character;
 838                      Scan_Ptr := Scan_Ptr + 1;
 839                      exit when Source (Scan_Ptr) not in Graphic_Character;
 840                      Scan_Ptr := Scan_Ptr + 1;
 841                      exit when Source (Scan_Ptr) not in Graphic_Character;
 842                      Scan_Ptr := Scan_Ptr + 1;
 843                      exit when Source (Scan_Ptr) not in Graphic_Character;
 844                      Scan_Ptr := Scan_Ptr + 1;
 845                      exit when Source (Scan_Ptr) not in Graphic_Character;
 846                      Scan_Ptr := Scan_Ptr + 1;
 847                   end loop;
 848
 849                   --  Keep going if horizontal tab
 850
 851                   if Source (Scan_Ptr) = HT then
 852                      if Style_Check then Style.Check_HT; end if;
 853                      Scan_Ptr := Scan_Ptr + 1;
 854
 855                   --  Terminate scan of comment if line terminator
 856
 857                   elsif Source (Scan_Ptr) in Line_Terminator then
 858                      exit;
 859
 860                   --  Terminate scan of comment if end of file encountered
 861                   --  (embedded EOF character or real last character in file)
 862
 863                   elsif Source (Scan_Ptr) = EOF then
 864                      exit;
 865
 866                   --  Keep going if character in 80-FF range, or is ESC. These
 867                   --  characters are allowed in comments by RM-2.1(1), 2.7(2).
 868                   --  They are allowed even in Ada 83 mode according to the
 869                   --  approved AI. ESC was added to the AI in June 93.
 870
 871                   elsif Source (Scan_Ptr) in Upper_Half_Character
 872                     or else Source (Scan_Ptr) = ESC
 873                   then
 874                      Scan_Ptr := Scan_Ptr + 1;
 875
 876                   --  Otherwise we have an illegal comment character
 877
 878                   else
 879                      Error_Illegal_Character;
 880                   end if;
 881
 882                end loop;
 883
 884                --  Note that we do NOT execute a return here, instead we fall
 885                --  through to reexecute the scan loop to look for a token.
 886
 887             end if;
 888          end Minus_Case;
 889
 890          --  Double quote or percent starting a string literal
 891
 892          when '"' | '%' =>
 893             Slit;
 894             return;
 895
 896          --  Apostrophe. This can either be the start of a character literal,
 897          --  or an isolated apostrophe used in a qualified expression or an
 898          --  attribute. We treat it as a character literal if it does not
 899          --  follow a right parenthesis, identifier, the keyword ALL or
 900          --  a literal. This means that we correctly treat constructs like:
 901
 902          --    A := CHARACTER'('A');
 903
 904          --  Note that RM-2.2(7) does not require a separator between
 905          --  "CHARACTER" and "'" in the above.
 906
 907          when ''' => Char_Literal_Case : declare
 908             Code : Char_Code;
 909             Err  : Boolean;
 910
 911          begin
 912             Accumulate_Checksum (''');
 913             Scan_Ptr := Scan_Ptr + 1;
 914
 915             --  Here is where we make the test to distinguish the cases. Treat
 916             --  as apostrophe if previous token is an identifier, right paren
 917             --  or the reserved word "all" (latter case as in A.all'Address)
 918             --  Also treat it as apostrophe after a literal (this catches
 919             --  some legitimate cases, like A."abs"'Address, and also gives
 920             --  better error behavior for impossible cases like 123'xxx).
 921
 922             if Prev_Token = Tok_Identifier
 923                or else Prev_Token = Tok_Right_Paren
 924                or else Prev_Token = Tok_All
 925                or else Prev_Token in Token_Class_Literal
 926             then
 927                Token := Tok_Apostrophe;
 928                return;
 929
 930             --  Otherwise the apostrophe starts a character literal
 931
 932             else
 933                --  Case of wide character literal with ESC or [ encoding
 934
 935                if (Source (Scan_Ptr) = ESC
 936                      and then
 937                     Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
 938                  or else
 939                    (Source (Scan_Ptr) in Upper_Half_Character
 940                      and then
 941                     Upper_Half_Encoding)
 942                  or else
 943                    (Source (Scan_Ptr) = '['
 944                      and then
 945                     Source (Scan_Ptr + 1) = '"')
 946                then
 947                   Scan_Wide (Source, Scan_Ptr, Code, Err);
 948                   Accumulate_Checksum (Code);
 949
 950                   if Err then
 951                      Error_Illegal_Wide_Character;
 952                   end if;
 953
 954                   if Source (Scan_Ptr) /= ''' then
 955                      Error_Msg_S ("missing apostrophe");
 956                   else
 957                      Scan_Ptr := Scan_Ptr + 1;
 958                   end if;
 959
 960                --  If we do not find a closing quote in the expected place then
 961                --  assume that we have a misguided attempt at a string literal.
 962
 963                --  However, if previous token is RANGE, then we return an
 964                --  apostrophe instead since this gives better error recovery
 965
 966                elsif Source (Scan_Ptr + 1) /= ''' then
 967
 968                   if Prev_Token = Tok_Range then
 969                      Token := Tok_Apostrophe;
 970                      return;
 971
 972                   else
 973                      Scan_Ptr := Scan_Ptr - 1;
 974                      Error_Msg_S
 975                        ("strings are delimited by double quote character");
 976                      Scn.Slit;
 977                      return;
 978                   end if;
 979
 980                --  Otherwise we have a (non-wide) character literal
 981
 982                else
 983                   Accumulate_Checksum (Source (Scan_Ptr));
 984
 985                   if Source (Scan_Ptr) not in Graphic_Character then
 986                      if Source (Scan_Ptr) in Upper_Half_Character then
 987                         if Ada_83 then
 988                            Error_Illegal_Character;
 989                         end if;
 990
 991                      else
 992                         Error_Illegal_Character;
 993                      end if;
 994                   end if;
 995
 996                   Code := Get_Char_Code (Source (Scan_Ptr));
 997                   Scan_Ptr := Scan_Ptr + 2;
 998                end if;
 999
1000                --  Fall through here with Scan_Ptr updated past the closing
1001                --  quote, and Code set to the Char_Code value for the literal
1002
1003                Accumulate_Checksum (''');
1004                Token := Tok_Char_Literal;
1005                Token_Node := New_Node (N_Character_Literal, Token_Ptr);
1006                Set_Char_Literal_Value (Token_Node, Code);
1007                Set_Character_Literal_Name (Code);
1008                Token_Name := Name_Find;
1009                Set_Chars (Token_Node, Token_Name);
1010                return;
1011             end if;
1012          end Char_Literal_Case;
1013
1014          --  Right parenthesis
1015
1016          when ')' =>
1017             Accumulate_Checksum (')');
1018             Scan_Ptr := Scan_Ptr + 1;
1019             Token := Tok_Right_Paren;
1020             if Style_Check then Style.Check_Right_Paren; end if;
1021             return;
1022
1023          --  Right bracket or right brace, treated as right paren
1024
1025          when ']' | '}' =>
1026             Error_Msg_S ("illegal character, replaced by "")""");
1027             Scan_Ptr := Scan_Ptr + 1;
1028             Token := Tok_Right_Paren;
1029             return;
1030
1031          --  Slash (can be division operator or first character of not equal)
1032
1033          when '/' =>
1034             Accumulate_Checksum ('/');
1035
1036             if Double_Char_Token ('=') then
1037                Token := Tok_Not_Equal;
1038                return;
1039             else
1040                Scan_Ptr := Scan_Ptr + 1;
1041                Token := Tok_Slash;
1042                return;
1043             end if;
1044
1045          --  Semicolon
1046
1047          when ';' =>
1048             Accumulate_Checksum (';');
1049             Scan_Ptr := Scan_Ptr + 1;
1050             Token := Tok_Semicolon;
1051             if Style_Check then Style.Check_Semicolon; end if;
1052             return;
1053
1054          --  Vertical bar
1055
1056          when '|' => Vertical_Bar_Case : begin
1057             Accumulate_Checksum ('|');
1058
1059             --  Special check for || to give nice message
1060
1061             if Source (Scan_Ptr + 1) = '|' then
1062                Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1063                Scan_Ptr := Scan_Ptr + 2;
1064                Token := Tok_Or;
1065                return;
1066
1067             else
1068                Scan_Ptr := Scan_Ptr + 1;
1069                Token := Tok_Vertical_Bar;
1070                if Style_Check then Style.Check_Vertical_Bar; end if;
1071                return;
1072             end if;
1073          end Vertical_Bar_Case;
1074
1075          --  Exclamation, replacement character for vertical bar
1076
1077          when '!' => Exclamation_Case : begin
1078             Accumulate_Checksum ('!');
1079
1080             if Source (Scan_Ptr + 1) = '=' then
1081                Error_Msg_S ("'!= should be /=");
1082                Scan_Ptr := Scan_Ptr + 2;
1083                Token := Tok_Not_Equal;
1084                return;
1085
1086             else
1087                Scan_Ptr := Scan_Ptr + 1;
1088                Token := Tok_Vertical_Bar;
1089                return;
1090             end if;
1091
1092          end Exclamation_Case;
1093
1094          --  Plus
1095
1096          when '+' => Plus_Case : begin
1097             Accumulate_Checksum ('+');
1098             Scan_Ptr := Scan_Ptr + 1;
1099             Token := Tok_Plus;
1100             return;
1101          end Plus_Case;
1102
1103          --  Digits starting a numeric literal
1104
1105          when '0' .. '9' =>
1106             Nlit;
1107
1108             if Identifier_Char (Source (Scan_Ptr)) then
1109                Error_Msg_S
1110                  ("delimiter required between literal and identifier");
1111             end if;
1112
1113             return;
1114
1115          --  Lower case letters
1116
1117          when 'a' .. 'z' =>
1118             Name_Len := 1;
1119             Name_Buffer (1) := Source (Scan_Ptr);
1120             Accumulate_Checksum (Name_Buffer (1));
1121             Scan_Ptr := Scan_Ptr + 1;
1122             goto Scan_Identifier;
1123
1124          --  Upper case letters
1125
1126          when 'A' .. 'Z' =>
1127             Name_Len := 1;
1128             Name_Buffer (1) :=
1129               Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1130             Accumulate_Checksum (Name_Buffer (1));
1131             Scan_Ptr := Scan_Ptr + 1;
1132             goto Scan_Identifier;
1133
1134          --  Underline character
1135
1136          when '_' =>
1137             Error_Msg_S ("identifier cannot start with underline");
1138             Name_Len := 1;
1139             Name_Buffer (1) := '_';
1140             Scan_Ptr := Scan_Ptr + 1;
1141             goto Scan_Identifier;
1142
1143          --  Space (not possible, because we scanned past blanks)
1144
1145          when ' ' =>
1146             raise Program_Error;
1147
1148          --  Characters in top half of ASCII 8-bit chart
1149
1150          when Upper_Half_Character =>
1151
1152             --  Wide character case. Note that Scan_Identifier will issue
1153             --  an appropriate message if wide characters are not allowed
1154             --  in identifiers.
1155
1156             if Upper_Half_Encoding then
1157                Name_Len := 0;
1158                goto Scan_Identifier;
1159
1160             --  Otherwise we have OK Latin-1 character
1161
1162             else
1163                --  Upper half characters may possibly be identifier letters
1164                --  but can never be digits, so Identifier_Char can be used
1165                --  to test for a valid start of identifier character.
1166
1167                if Identifier_Char (Source (Scan_Ptr)) then
1168                   Name_Len := 0;
1169                   goto Scan_Identifier;
1170                else
1171                   Error_Illegal_Character;
1172                end if;
1173             end if;
1174
1175          when ESC =>
1176
1177             --  ESC character, possible start of identifier if wide characters
1178             --  using ESC encoding are allowed in identifiers, which we can
1179             --  tell by looking at the Identifier_Char flag for ESC, which is
1180             --  only true if these conditions are met.
1181
1182             if Identifier_Char (ESC) then
1183                Name_Len := 0;
1184                goto Scan_Identifier;
1185             else
1186                Error_Illegal_Wide_Character;
1187             end if;
1188
1189          --  Invalid control characters
1190
1191          when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | SO  |
1192               SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1193               EM  | FS  | GS  | RS  | US  | DEL
1194          =>
1195             Error_Illegal_Character;
1196
1197          --  Invalid graphic characters
1198
1199          when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1200             Error_Illegal_Character;
1201
1202          --  End switch on non-blank character
1203
1204          end case;
1205
1206       --  End loop past format effectors. The exit from this loop is by
1207       --  executing a return statement following completion of token scan
1208       --  (control never falls out of this loop to the code which follows)
1209
1210       end loop;
1211
1212       --  Identifier scanning routine. On entry, some initial characters
1213       --  of the identifier may have already been stored in Name_Buffer.
1214       --  If so, Name_Len has the number of characters stored. otherwise
1215       --  Name_Len is set to zero on entry.
1216
1217       <<Scan_Identifier>>
1218
1219          --  This loop scans as fast as possible past lower half letters
1220          --  and digits, which we expect to be the most common characters.
1221
1222          loop
1223             if Source (Scan_Ptr) in 'a' .. 'z'
1224               or else Source (Scan_Ptr) in '0' .. '9'
1225             then
1226                Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1227                Accumulate_Checksum (Source (Scan_Ptr));
1228
1229             elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1230                Name_Buffer (Name_Len + 1) :=
1231                  Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1232                Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1233             else
1234                exit;
1235             end if;
1236
1237             --  Open out the loop a couple of times for speed
1238
1239             if Source (Scan_Ptr + 1) in 'a' .. 'z'
1240               or else Source (Scan_Ptr + 1) in '0' .. '9'
1241             then
1242                Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1243                Accumulate_Checksum (Source (Scan_Ptr + 1));
1244
1245             elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1246                Name_Buffer (Name_Len + 2) :=
1247                  Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1248                Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1249
1250             else
1251                Scan_Ptr := Scan_Ptr + 1;
1252                Name_Len := Name_Len + 1;
1253                exit;
1254             end if;
1255
1256             if Source (Scan_Ptr + 2) in 'a' .. 'z'
1257               or else Source (Scan_Ptr + 2) in '0' .. '9'
1258             then
1259                Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1260                Accumulate_Checksum (Source (Scan_Ptr + 2));
1261
1262             elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1263                Name_Buffer (Name_Len + 3) :=
1264                  Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1265                Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1266             else
1267                Scan_Ptr := Scan_Ptr + 2;
1268                Name_Len := Name_Len + 2;
1269                exit;
1270             end if;
1271
1272             if Source (Scan_Ptr + 3) in 'a' .. 'z'
1273               or else Source (Scan_Ptr + 3) in '0' .. '9'
1274             then
1275                Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1276                Accumulate_Checksum (Source (Scan_Ptr + 3));
1277
1278             elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1279                Name_Buffer (Name_Len + 4) :=
1280                  Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1281                Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1282
1283             else
1284                Scan_Ptr := Scan_Ptr + 3;
1285                Name_Len := Name_Len + 3;
1286                exit;
1287             end if;
1288
1289             Scan_Ptr := Scan_Ptr + 4;
1290             Name_Len := Name_Len + 4;
1291          end loop;
1292
1293          --  If we fall through, then we have encountered either an underline
1294          --  character, or an extended identifier character (i.e. one from the
1295          --  upper half), or a wide character, or an identifier terminator.
1296          --  The initial test speeds us up in the most common case where we
1297          --  have an identifier terminator. Note that ESC is an identifier
1298          --  character only if a wide character encoding method that uses
1299          --  ESC encoding is active, so if we find an ESC character we know
1300          --  that we have a wide character.
1301
1302          if Identifier_Char (Source (Scan_Ptr)) then
1303
1304             --  Case of underline, check for error cases of double underline,
1305             --  and for a trailing underline character
1306
1307             if Source (Scan_Ptr) = '_' then
1308                Accumulate_Checksum ('_');
1309                Name_Len := Name_Len + 1;
1310                Name_Buffer (Name_Len) := '_';
1311
1312                if Identifier_Char (Source (Scan_Ptr + 1)) then
1313                   Scan_Ptr := Scan_Ptr + 1;
1314
1315                   if Source (Scan_Ptr) = '_' then
1316                      Error_No_Double_Underline;
1317                   end if;
1318
1319                else
1320                   Error_Msg_S ("identifier cannot end with underline");
1321                   Scan_Ptr := Scan_Ptr + 1;
1322                end if;
1323
1324                goto Scan_Identifier;
1325
1326             --  Upper half character
1327
1328             elsif Source (Scan_Ptr) in Upper_Half_Character
1329               and then not Upper_Half_Encoding
1330             then
1331                Accumulate_Checksum (Source (Scan_Ptr));
1332                Store_Encoded_Character
1333                  (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1334                Scan_Ptr := Scan_Ptr + 1;
1335                goto Scan_Identifier;
1336
1337             --  Left bracket not followed by a quote terminates an identifier.
1338             --  This is an error, but we don't want to give a junk error msg
1339             --  about wide characters in this case!
1340
1341             elsif Source (Scan_Ptr) = '['
1342               and then Source (Scan_Ptr + 1) /= '"'
1343             then
1344                null;
1345
1346             --  We know we have a wide character encoding here (the current
1347             --  character is either ESC, left bracket, or an upper half
1348             --  character depending on the encoding method).
1349
1350             else
1351                --  Scan out the wide character and insert the appropriate
1352                --  encoding into the name table entry for the identifier.
1353
1354                declare
1355                   Sptr : constant Source_Ptr := Scan_Ptr;
1356                   Code : Char_Code;
1357                   Err  : Boolean;
1358                   Chr  : Character;
1359
1360                begin
1361                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1362
1363                   --  If error, signal error
1364
1365                   if Err then
1366                      Error_Illegal_Wide_Character;
1367
1368                   --  If the character scanned is a normal identifier
1369                   --  character, then we treat it that way.
1370
1371                   elsif In_Character_Range (Code)
1372                     and then Identifier_Char (Get_Character (Code))
1373                   then
1374                      Chr := Get_Character (Code);
1375                      Accumulate_Checksum (Chr);
1376                      Store_Encoded_Character
1377                        (Get_Char_Code (Fold_Lower (Chr)));
1378
1379                   --  Character is not normal identifier character, store
1380                   --  it in encoded form.
1381
1382                   else
1383                      Accumulate_Checksum (Code);
1384                      Store_Encoded_Character (Code);
1385
1386                      --  Make sure we are allowing wide characters in
1387                      --  identifiers. Note that we allow wide character
1388                      --  notation for an OK identifier character. This
1389                      --  in particular allows bracket or other notation
1390                      --  to be used for upper half letters.
1391
1392                      if Identifier_Character_Set /= 'w' then
1393                         Error_Msg
1394                           ("wide character not allowed in identifier", Sptr);
1395                      end if;
1396                   end if;
1397                end;
1398
1399                goto Scan_Identifier;
1400             end if;
1401          end if;
1402
1403          --  Scan of identifier is complete. The identifier is stored in
1404          --  Name_Buffer, and Scan_Ptr points past the last character.
1405
1406          Token_Name := Name_Find;
1407
1408          --  Here is where we check if it was a keyword
1409
1410          if Get_Name_Table_Byte (Token_Name) /= 0
1411            and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
1412          then
1413             Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
1414
1415             --  Deal with possible style check for non-lower case keyword,
1416             --  but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
1417             --  for this purpose if they appear as attribute designators.
1418             --  Actually we only check the first character for speed.
1419
1420             if Style_Check
1421               and then Source (Token_Ptr) <= 'Z'
1422               and then (Prev_Token /= Tok_Apostrophe
1423                           or else
1424                             (Token /= Tok_Access
1425                                and then Token /= Tok_Delta
1426                                and then Token /= Tok_Digits
1427                                and then Token /= Tok_Range))
1428             then
1429                Style.Non_Lower_Case_Keyword;
1430             end if;
1431
1432             --  We must reset Token_Name since this is not an identifier
1433             --  and if we leave Token_Name set, the parser gets confused
1434             --  because it thinks it is dealing with an identifier instead
1435             --  of the corresponding keyword.
1436
1437             Token_Name := No_Name;
1438             return;
1439
1440          --  It is an identifier after all
1441
1442          else
1443             Token_Node := New_Node (N_Identifier, Token_Ptr);
1444             Set_Chars (Token_Node, Token_Name);
1445             Token := Tok_Identifier;
1446             return;
1447          end if;
1448    end Scan;
1449
1450    ---------------------
1451    -- Scan_First_Char --
1452    ---------------------
1453
1454    function Scan_First_Char return Source_Ptr is
1455       Ptr : Source_Ptr := Current_Line_Start;
1456
1457    begin
1458       loop
1459          if Source (Ptr) = ' ' then
1460             Ptr := Ptr + 1;
1461
1462          elsif Source (Ptr) = HT then
1463             if Style_Check then Style.Check_HT; end if;
1464             Ptr := Ptr + 1;
1465
1466          else
1467             return Ptr;
1468          end if;
1469       end loop;
1470    end Scan_First_Char;
1471
1472    ------------------------------
1473    -- Scan_Reserved_Identifier --
1474    ------------------------------
1475
1476    procedure Scan_Reserved_Identifier (Force_Msg : Boolean) is
1477       Token_Chars : constant String := Token_Type'Image (Token);
1478
1479    begin
1480       --  We have in Token_Chars the image of the Token name, i.e. Tok_xxx.
1481       --  This code extracts the xxx and makes an identifier out of it.
1482
1483       Name_Len := 0;
1484
1485       for J in 5 .. Token_Chars'Length loop
1486          Name_Len := Name_Len + 1;
1487          Name_Buffer (Name_Len) := Fold_Lower (Token_Chars (J));
1488       end loop;
1489
1490       Token_Name := Name_Find;
1491
1492       if not Used_As_Identifier (Token) or else Force_Msg then
1493          Error_Msg_Name_1 := Token_Name;
1494          Error_Msg_SC ("reserved word* cannot be used as identifier!");
1495          Used_As_Identifier (Token) := True;
1496       end if;
1497
1498       Token := Tok_Identifier;
1499       Token_Node := New_Node (N_Identifier, Token_Ptr);
1500       Set_Chars (Token_Node, Token_Name);
1501    end Scan_Reserved_Identifier;
1502
1503    ----------------------
1504    -- Set_Start_Column --
1505    ----------------------
1506
1507    --  Note: it seems at first glance a little expensive to compute this value
1508    --  for every source line (since it is certainly not used for all source
1509    --  lines). On the other hand, it doesn't take much more work to skip past
1510    --  the initial white space on the line counting the columns than it would
1511    --  to scan past the white space using the standard scanning circuits.
1512
1513    function Set_Start_Column return Column_Number is
1514       Start_Column : Column_Number := 0;
1515
1516    begin
1517       --  Outer loop scans past horizontal tab characters
1518
1519       Tabs_Loop : loop
1520
1521          --  Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
1522          --  past the blanks and adjusting Start_Column to account for them.
1523
1524          Blanks_Loop : loop
1525             if Source (Scan_Ptr) = ' ' then
1526                if Source (Scan_Ptr + 1) = ' ' then
1527                   if Source (Scan_Ptr + 2) = ' ' then
1528                      if Source (Scan_Ptr + 3) = ' ' then
1529                         if Source (Scan_Ptr + 4) = ' ' then
1530                            if Source (Scan_Ptr + 5) = ' ' then
1531                               if Source (Scan_Ptr + 6) = ' ' then
1532                                  Scan_Ptr := Scan_Ptr + 7;
1533                                  Start_Column := Start_Column + 7;
1534                               else
1535                                  Scan_Ptr := Scan_Ptr + 6;
1536                                  Start_Column := Start_Column + 6;
1537                                  exit Blanks_Loop;
1538                               end if;
1539                            else
1540                               Scan_Ptr := Scan_Ptr + 5;
1541                               Start_Column := Start_Column + 5;
1542                               exit Blanks_Loop;
1543                            end if;
1544                         else
1545                            Scan_Ptr := Scan_Ptr + 4;
1546                            Start_Column := Start_Column + 4;
1547                            exit Blanks_Loop;
1548                         end if;
1549                      else
1550                         Scan_Ptr := Scan_Ptr + 3;
1551                         Start_Column := Start_Column + 3;
1552                         exit Blanks_Loop;
1553                      end if;
1554                   else
1555                      Scan_Ptr := Scan_Ptr + 2;
1556                      Start_Column := Start_Column + 2;
1557                      exit Blanks_Loop;
1558                   end if;
1559                else
1560                   Scan_Ptr := Scan_Ptr + 1;
1561                   Start_Column := Start_Column + 1;
1562                   exit Blanks_Loop;
1563                end if;
1564             else
1565                exit Blanks_Loop;
1566             end if;
1567          end loop Blanks_Loop;
1568
1569          --  Outer loop keeps going only if a horizontal tab follows
1570
1571          if Source (Scan_Ptr) = HT then
1572             if Style_Check then Style.Check_HT; end if;
1573             Scan_Ptr := Scan_Ptr + 1;
1574             Start_Column := (Start_Column / 8) * 8 + 8;
1575          else
1576             exit Tabs_Loop;
1577          end if;
1578
1579       end loop Tabs_Loop;
1580
1581       return Start_Column;
1582    end Set_Start_Column;
1583
1584    ----------
1585    -- Slit --
1586    ----------
1587
1588    procedure Slit is separate;
1589
1590 end Scn;