gcc/ada/scng.adb

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                 S C N G                                  --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2005 Free Software Foundation, Inc.          --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  19 -- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
  20 -- MA 02111-1307, USA.                                                      --
  21 --                                                                          --
  22 -- GNAT was originally developed  by the GNAT team at  New York University. --
  23 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  24 --                                                                          --
  25 ------------------------------------------------------------------------------
  26
  27 with Csets;    use Csets;
  28 with Err_Vars; use Err_Vars;
  29 with Namet;    use Namet;
  30 with Opt;      use Opt;
  31 with Scans;    use Scans;
  32 with Sinput;   use Sinput;
  33 with Snames;   use Snames;
  34 with Stringt;  use Stringt;
  35 with Stylesw;  use Stylesw;
  36 with Uintp;    use Uintp;
  37 with Urealp;   use Urealp;
  38 with Widechar; use Widechar;
  39
  40 with System.CRC32;
  41 with System.WCh_Con; use System.WCh_Con;
  42
  43 with GNAT.UTF_32; use GNAT.UTF_32;
  44
  45 package body Scng is
  46
  47    use ASCII;
  48    --  Make control characters visible
  49
  50    Special_Characters : array (Character) of Boolean := (others => False);
  51    --  For characters that are Special token, the value is True
  52
  53    Comment_Is_Token : Boolean := False;
  54    --  True if comments are tokens
  55
  56    End_Of_Line_Is_Token : Boolean := False;
  57    --  True if End_Of_Line is a token
  58
  59    -----------------------
  60    -- Local Subprograms --
  61    -----------------------
  62
  63    procedure Accumulate_Token_Checksum;
  64    pragma Inline (Accumulate_Token_Checksum);
  65
  66    procedure Accumulate_Checksum (C : Character);
  67    pragma Inline (Accumulate_Checksum);
  68    --  This routine accumulates the checksum given character C. During the
  69    --  scanning of a source file, this routine is called with every character
  70    --  in the source, excluding blanks, and all control characters (except
  71    --  that ESC is included in the checksum). Upper case letters not in string
  72    --  literals are folded by the caller. See Sinput spec for the documentation
  73    --  of the checksum algorithm. Note: checksum values are only used if we
  74    --  generate code, so it is not necessary to worry about making the right
  75    --  sequence of calls in any error situation.
  76
  77    procedure Accumulate_Checksum (C : Char_Code);
  78    pragma Inline (Accumulate_Checksum);
  79    --  This version is identical, except that the argument, C, is a character
  80    --  code value instead of a character. This is used when wide characters
  81    --  are scanned. We use the character code rather than the ASCII characters
  82    --  so that the checksum is independent of wide character encoding method.
  83
  84    procedure Initialize_Checksum;
  85    pragma Inline (Initialize_Checksum);
  86    --  Initialize checksum value
  87
  88    -------------------------
  89    -- Accumulate_Checksum --
  90    -------------------------
  91
  92    procedure Accumulate_Checksum (C : Character) is
  93    begin
  94       System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
  95    end Accumulate_Checksum;
  96
  97    procedure Accumulate_Checksum (C : Char_Code) is
  98    begin
  99       if C > 16#FFFF# then
 100          Accumulate_Checksum (Character'Val (C / 2 ** 16));
 101          Accumulate_Checksum (Character'Val ((C / 256) mod 256));
 102       else
 103          Accumulate_Checksum (Character'Val (C / 256));
 104       end if;
 105
 106       Accumulate_Checksum (Character'Val (C mod 256));
 107    end Accumulate_Checksum;
 108
 109    -------------------------------
 110    -- Accumulate_Token_Checksum --
 111    -------------------------------
 112
 113    procedure Accumulate_Token_Checksum is
 114    begin
 115       System.CRC32.Update
 116         (System.CRC32.CRC32 (Checksum),
 117          Character'Val (Token_Type'Pos (Token)));
 118    end Accumulate_Token_Checksum;
 119
 120    ----------------------------
 121    -- Determine_Token_Casing --
 122    ----------------------------
 123
 124    function Determine_Token_Casing return Casing_Type is
 125    begin
 126       return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
 127    end Determine_Token_Casing;
 128
 129    -------------------------
 130    -- Initialize_Checksum --
 131    -------------------------
 132
 133    procedure Initialize_Checksum is
 134    begin
 135       System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
 136    end Initialize_Checksum;
 137
 138    ------------------------
 139    -- Initialize_Scanner --
 140    ------------------------
 141
 142    procedure Initialize_Scanner
 143      (Unit  : Unit_Number_Type;
 144       Index : Source_File_Index)
 145    is
 146       procedure Set_Reserved (N : Name_Id; T : Token_Type);
 147       pragma Inline (Set_Reserved);
 148       --  Set given name as a reserved keyword (T is the corresponding token)
 149
 150       -------------
 151       -- Set_NTB --
 152       -------------
 153
 154       procedure Set_Reserved (N : Name_Id; T : Token_Type) is
 155       begin
 156          --  Set up Token_Type values in Names Table entries for reserved
 157          --  keywords We use the Pos value of the Token_Type value. Note we
 158          --  rely on the fact that Token_Type'Val (0) is not a reserved word!
 159
 160          Set_Name_Table_Byte (N, Token_Type'Pos (T));
 161       end Set_Reserved;
 162
 163    --  Start of processing for Initialize_Scanner
 164
 165    begin
 166       --  Establish reserved words
 167
 168       Set_Reserved (Name_Abort,     Tok_Abort);
 169       Set_Reserved (Name_Abs,       Tok_Abs);
 170       Set_Reserved (Name_Abstract,  Tok_Abstract);
 171       Set_Reserved (Name_Accept,    Tok_Accept);
 172       Set_Reserved (Name_Access,    Tok_Access);
 173       Set_Reserved (Name_And,       Tok_And);
 174       Set_Reserved (Name_Aliased,   Tok_Aliased);
 175       Set_Reserved (Name_All,       Tok_All);
 176       Set_Reserved (Name_Array,     Tok_Array);
 177       Set_Reserved (Name_At,        Tok_At);
 178       Set_Reserved (Name_Begin,     Tok_Begin);
 179       Set_Reserved (Name_Body,      Tok_Body);
 180       Set_Reserved (Name_Case,      Tok_Case);
 181       Set_Reserved (Name_Constant,  Tok_Constant);
 182       Set_Reserved (Name_Declare,   Tok_Declare);
 183       Set_Reserved (Name_Delay,     Tok_Delay);
 184       Set_Reserved (Name_Delta,     Tok_Delta);
 185       Set_Reserved (Name_Digits,    Tok_Digits);
 186       Set_Reserved (Name_Do,        Tok_Do);
 187       Set_Reserved (Name_Else,      Tok_Else);
 188       Set_Reserved (Name_Elsif,     Tok_Elsif);
 189       Set_Reserved (Name_End,       Tok_End);
 190       Set_Reserved (Name_Entry,     Tok_Entry);
 191       Set_Reserved (Name_Exception, Tok_Exception);
 192       Set_Reserved (Name_Exit,      Tok_Exit);
 193       Set_Reserved (Name_For,       Tok_For);
 194       Set_Reserved (Name_Function,  Tok_Function);
 195       Set_Reserved (Name_Generic,   Tok_Generic);
 196       Set_Reserved (Name_Goto,      Tok_Goto);
 197       Set_Reserved (Name_If,        Tok_If);
 198       Set_Reserved (Name_In,        Tok_In);
 199       Set_Reserved (Name_Is,        Tok_Is);
 200       Set_Reserved (Name_Limited,   Tok_Limited);
 201       Set_Reserved (Name_Loop,      Tok_Loop);
 202       Set_Reserved (Name_Mod,       Tok_Mod);
 203       Set_Reserved (Name_New,       Tok_New);
 204       Set_Reserved (Name_Not,       Tok_Not);
 205       Set_Reserved (Name_Null,      Tok_Null);
 206       Set_Reserved (Name_Of,        Tok_Of);
 207       Set_Reserved (Name_Or,        Tok_Or);
 208       Set_Reserved (Name_Others,    Tok_Others);
 209       Set_Reserved (Name_Out,       Tok_Out);
 210       Set_Reserved (Name_Package,   Tok_Package);
 211       Set_Reserved (Name_Pragma,    Tok_Pragma);
 212       Set_Reserved (Name_Private,   Tok_Private);
 213       Set_Reserved (Name_Procedure, Tok_Procedure);
 214       Set_Reserved (Name_Protected, Tok_Protected);
 215       Set_Reserved (Name_Raise,     Tok_Raise);
 216       Set_Reserved (Name_Range,     Tok_Range);
 217       Set_Reserved (Name_Record,    Tok_Record);
 218       Set_Reserved (Name_Rem,       Tok_Rem);
 219       Set_Reserved (Name_Renames,   Tok_Renames);
 220       Set_Reserved (Name_Requeue,   Tok_Requeue);
 221       Set_Reserved (Name_Return,    Tok_Return);
 222       Set_Reserved (Name_Reverse,   Tok_Reverse);
 223       Set_Reserved (Name_Select,    Tok_Select);
 224       Set_Reserved (Name_Separate,  Tok_Separate);
 225       Set_Reserved (Name_Subtype,   Tok_Subtype);
 226       Set_Reserved (Name_Tagged,    Tok_Tagged);
 227       Set_Reserved (Name_Task,      Tok_Task);
 228       Set_Reserved (Name_Terminate, Tok_Terminate);
 229       Set_Reserved (Name_Then,      Tok_Then);
 230       Set_Reserved (Name_Type,      Tok_Type);
 231       Set_Reserved (Name_Until,     Tok_Until);
 232       Set_Reserved (Name_Use,       Tok_Use);
 233       Set_Reserved (Name_When,      Tok_When);
 234       Set_Reserved (Name_While,     Tok_While);
 235       Set_Reserved (Name_With,      Tok_With);
 236       Set_Reserved (Name_Xor,       Tok_Xor);
 237
 238       --  Ada 2005 reserved words
 239
 240       Set_Reserved (Name_Interface,     Tok_Interface);
 241       Set_Reserved (Name_Overriding,    Tok_Overriding);
 242       Set_Reserved (Name_Synchronized,  Tok_Synchronized);
 243
 244       --  Initialize scan control variables
 245
 246       Current_Source_File       := Index;
 247       Source                    := Source_Text (Current_Source_File);
 248       Current_Source_Unit       := Unit;
 249       Scan_Ptr                  := Source_First (Current_Source_File);
 250       Token                     := No_Token;
 251       Token_Ptr                 := Scan_Ptr;
 252       Current_Line_Start        := Scan_Ptr;
 253       Token_Node                := Empty;
 254       Token_Name                := No_Name;
 255       Start_Column              := Set_Start_Column;
 256       First_Non_Blank_Location  := Scan_Ptr;
 257
 258       Initialize_Checksum;
 259
 260       --  Do not call Scan, otherwise the License stuff does not work in Scn
 261
 262    end Initialize_Scanner;
 263
 264    ------------------------------
 265    -- Reset_Special_Characters --
 266    ------------------------------
 267
 268    procedure Reset_Special_Characters is
 269    begin
 270       Special_Characters := (others => False);
 271    end Reset_Special_Characters;
 272
 273    ----------
 274    -- Scan --
 275    ----------
 276
 277    procedure Scan is
 278
 279       Start_Of_Comment : Source_Ptr;
 280       --  Record start of comment position
 281
 282       Underline_Found : Boolean;
 283       --  During scanning of an identifier, set to True if last character
 284       --  scanned was an underline or other punctuation character. This
 285       --  is used to flag the error of two underlines/punctuations in a
 286       --  row or ending an identifier with a underline/punctuation. Here
 287       --  punctuation means any UTF_32 character in the Unicode category
 288       --  Punctuation,Connector.
 289
 290       Wptr : Source_Ptr;
 291       --  Used to remember start of last wide character scanned
 292
 293       procedure Check_End_Of_Line;
 294       --  Called when end of line encountered. Checks that line is not too
 295       --  long, and that other style checks for the end of line are met.
 296
 297       function Double_Char_Token (C : Character) return Boolean;
 298       --  This function is used for double character tokens like := or <>. It
 299       --  checks if the character following Source (Scan_Ptr) is C, and if so
 300       --  bumps Scan_Ptr past the pair of characters and returns True. A space
 301       --  between the two characters is also recognized with an appropriate
 302       --  error message being issued. If C is not present, False is returned.
 303       --  Note that Double_Char_Token can only be used for tokens defined in
 304       --  the Ada syntax (it's use for error cases like && is not appropriate
 305       --  since we do not want a junk message for a case like &-space-&).
 306
 307       procedure Error_Illegal_Character;
 308       --  Give illegal character error, Scan_Ptr points to character. On
 309       --  return, Scan_Ptr is bumped past the illegal character.
 310
 311       procedure Error_Illegal_Wide_Character;
 312       --  Give illegal wide character message. On return, Scan_Ptr is bumped
 313       --  past the illegal character, which may still leave us pointing to
 314       --  junk, not much we can do if the escape sequence is messed up!
 315
 316       procedure Error_Long_Line;
 317       --  Signal error of excessively long line
 318
 319       procedure Error_No_Double_Underline;
 320       --  Signal error of two underline or punctuation characters in a row.
 321       --  Called with Scan_Ptr pointing to second underline/punctuation char.
 322
 323       procedure Nlit;
 324       --  This is the procedure for scanning out numeric literals. On entry,
 325       --  Scan_Ptr points to the digit that starts the numeric literal (the
 326       --  checksum for this character has not been accumulated yet). On return
 327       --  Scan_Ptr points past the last character of the numeric literal, Token
 328       --  and Token_Node are set appropriately, and the checksum is updated.
 329
 330       procedure Slit;
 331       --  This is the procedure for scanning out string literals. On entry,
 332       --  Scan_Ptr points to the opening string quote (the checksum for this
 333       --  character has not been accumulated yet). On return Scan_Ptr points
 334       --  past the closing quote of the string literal, Token and Token_Node
 335       --  are set appropriately, and the checksum is upated.
 336
 337       -----------------------
 338       -- Check_End_Of_Line --
 339       -----------------------
 340
 341       procedure Check_End_Of_Line is
 342          Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
 343
 344       begin
 345          if Style_Check then
 346             Style.Check_Line_Terminator (Len);
 347          end if;
 348
 349          --  Deal with checking maximum line length
 350
 351          if Style_Check and Style_Check_Max_Line_Length then
 352             Style.Check_Line_Max_Length (Len);
 353
 354          --  If style checking is inactive, check maximum line length against
 355          --  standard value. Note that we take this from Opt.Max_Line_Length
 356          --  rather than Hostparm.Max_Line_Length because we do not want to
 357          --  impose any limit during scanning of configuration pragma files,
 358          --  and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
 359          --  is reset to Column_Number'Max during scanning of such files.
 360
 361          elsif Len > Opt.Max_Line_Length then
 362             Error_Long_Line;
 363          end if;
 364       end Check_End_Of_Line;
 365
 366       -----------------------
 367       -- Double_Char_Token --
 368       -----------------------
 369
 370       function Double_Char_Token (C : Character) return Boolean is
 371       begin
 372          if Source (Scan_Ptr + 1) = C then
 373             Accumulate_Checksum (C);
 374             Scan_Ptr := Scan_Ptr + 2;
 375             return True;
 376
 377          elsif Source (Scan_Ptr + 1) = ' '
 378            and then Source (Scan_Ptr + 2) = C
 379          then
 380             Scan_Ptr := Scan_Ptr + 1;
 381             Error_Msg_S ("no space allowed here");
 382             Scan_Ptr := Scan_Ptr + 2;
 383             return True;
 384
 385          else
 386             return False;
 387          end if;
 388       end Double_Char_Token;
 389
 390       -----------------------------
 391       -- Error_Illegal_Character --
 392       -----------------------------
 393
 394       procedure Error_Illegal_Character is
 395       begin
 396          Error_Msg_S ("illegal character");
 397          Scan_Ptr := Scan_Ptr + 1;
 398       end Error_Illegal_Character;
 399
 400       ----------------------------------
 401       -- Error_Illegal_Wide_Character --
 402       ----------------------------------
 403
 404       procedure Error_Illegal_Wide_Character is
 405       begin
 406          Error_Msg ("illegal wide character", Wptr);
 407       end Error_Illegal_Wide_Character;
 408
 409       ---------------------
 410       -- Error_Long_Line --
 411       ---------------------
 412
 413       procedure Error_Long_Line is
 414       begin
 415          Error_Msg
 416            ("this line is too long",
 417             Current_Line_Start + Source_Ptr (Opt.Max_Line_Length));
 418       end Error_Long_Line;
 419
 420       -------------------------------
 421       -- Error_No_Double_Underline --
 422       -------------------------------
 423
 424       procedure Error_No_Double_Underline is
 425       begin
 426          Underline_Found := False;
 427
 428          --  There are four cases, and we special case the messages
 429
 430          if Source (Scan_Ptr) = '_' then
 431             if Source (Scan_Ptr - 1) = '_' then
 432                Error_Msg_S
 433                  ("two consecutive underlines not permitted");
 434             else
 435                Error_Msg_S
 436                  ("underline cannot follow punctuation character");
 437             end if;
 438
 439          else
 440             if Source (Scan_Ptr - 1) = '_' then
 441                Error_Msg_S
 442                  ("punctuation character cannot follow underline");
 443             else
 444                Error_Msg_S
 445                  ("two consecutive punctuation characters not permitted");
 446             end if;
 447          end if;
 448       end Error_No_Double_Underline;
 449
 450       ----------
 451       -- Nlit --
 452       ----------
 453
 454       procedure Nlit is
 455
 456          C : Character;
 457          --  Current source program character
 458
 459          Base_Char : Character;
 460          --  Either # or : (character at start of based number)
 461
 462          Base : Int;
 463          --  Value of base
 464
 465          UI_Base : Uint;
 466          --  Value of base in Uint format
 467
 468          UI_Int_Value : Uint;
 469          --  Value of integer scanned by Scan_Integer in Uint format
 470
 471          UI_Num_Value : Uint;
 472          --  Value of integer in numeric value being scanned
 473
 474          Scale : Int;
 475          --  Scale value for real literal
 476
 477          UI_Scale : Uint;
 478          --  Scale in Uint format
 479
 480          Exponent_Is_Negative : Boolean;
 481          --  Set true for negative exponent
 482
 483          Extended_Digit_Value : Int;
 484          --  Extended digit value
 485
 486          Point_Scanned : Boolean;
 487          --  Flag for decimal point scanned in numeric literal
 488
 489          -----------------------
 490          -- Local Subprograms --
 491          -----------------------
 492
 493          procedure Error_Digit_Expected;
 494          --  Signal error of bad digit, Scan_Ptr points to the location at
 495          --  which the digit was expected on input, and is unchanged on return.
 496
 497          procedure Scan_Integer;
 498          --  Procedure to scan integer literal. On entry, Scan_Ptr points to a
 499          --  digit, on exit Scan_Ptr points past the last character of the
 500          --  integer.
 501          --
 502          --  For each digit encountered, UI_Int_Value is multiplied by 10, and
 503          --  the value of the digit added to the result. In addition, the
 504          --  value in Scale is decremented by one for each actual digit
 505          --  scanned.
 506
 507          --------------------------
 508          -- Error_Digit_Expected --
 509          --------------------------
 510
 511          procedure Error_Digit_Expected is
 512          begin
 513             Error_Msg_S ("digit expected");
 514          end Error_Digit_Expected;
 515
 516          ------------------
 517          -- Scan_Integer --
 518          ------------------
 519
 520          procedure Scan_Integer is
 521             C : Character;
 522             --  Next character scanned
 523
 524          begin
 525             C := Source (Scan_Ptr);
 526
 527             --  Loop through digits (allowing underlines)
 528
 529             loop
 530                Accumulate_Checksum (C);
 531                UI_Int_Value :=
 532                  UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
 533                Scan_Ptr := Scan_Ptr + 1;
 534                Scale := Scale - 1;
 535                C := Source (Scan_Ptr);
 536
 537                --  Case of underline encountered
 538
 539                if C = '_' then
 540
 541                   --  We do not accumulate the '_' in the checksum, so that
 542                   --  1_234 is equivalent to 1234, and does not trigger
 543                   --  compilation for "minimal recompilation" (gnatmake -m).
 544
 545                   loop
 546                      Scan_Ptr := Scan_Ptr + 1;
 547                      C := Source (Scan_Ptr);
 548                      exit when C /= '_';
 549                      Error_No_Double_Underline;
 550                   end loop;
 551
 552                   if C not in '0' .. '9' then
 553                      Error_Digit_Expected;
 554                      exit;
 555                   end if;
 556
 557                else
 558                   exit when C not in '0' .. '9';
 559                end if;
 560             end loop;
 561          end Scan_Integer;
 562
 563       --  Start of Processing for Nlit
 564
 565       begin
 566          Base := 10;
 567          UI_Base := Uint_10;
 568          UI_Int_Value := Uint_0;
 569          Scale := 0;
 570          Scan_Integer;
 571          Scale := 0;
 572          Point_Scanned := False;
 573          UI_Num_Value := UI_Int_Value;
 574
 575          --  Various possibilities now for continuing the literal are period,
 576          --  E/e (for exponent), or :/# (for based literal).
 577
 578          Scale := 0;
 579          C := Source (Scan_Ptr);
 580
 581          if C = '.' then
 582
 583             --  Scan out point, but do not scan past .. which is a range
 584             --  sequence, and must not be eaten up scanning a numeric literal.
 585
 586             while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
 587                Accumulate_Checksum ('.');
 588
 589                if Point_Scanned then
 590                   Error_Msg_S ("duplicate point ignored");
 591                end if;
 592
 593                Point_Scanned := True;
 594                Scan_Ptr := Scan_Ptr + 1;
 595                C := Source (Scan_Ptr);
 596
 597                if C not in '0' .. '9' then
 598                   Error_Msg
 599                     ("real literal cannot end with point", Scan_Ptr - 1);
 600                else
 601                   Scan_Integer;
 602                   UI_Num_Value := UI_Int_Value;
 603                end if;
 604             end loop;
 605
 606          --  Based literal case. The base is the value we already scanned.
 607          --  In the case of colon, we insist that the following character
 608          --  is indeed an extended digit or a period. This catches a number
 609          --  of common errors, as well as catching the well known tricky
 610          --  bug otherwise arising from "x : integer range 1 .. 10:= 6;"
 611
 612          elsif C = '#'
 613            or else (C = ':' and then
 614                       (Source (Scan_Ptr + 1) = '.'
 615                          or else
 616                        Source (Scan_Ptr + 1) in '0' .. '9'
 617                          or else
 618                        Source (Scan_Ptr + 1) in 'A' .. 'Z'
 619                          or else
 620                        Source (Scan_Ptr + 1) in 'a' .. 'z'))
 621          then
 622             if C = ':' then
 623                Obsolescent_Check (Scan_Ptr);
 624
 625                if Warn_On_Obsolescent_Feature then
 626                   Error_Msg_S
 627                     ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
 628                   Error_Msg_S
 629                     ("\use ""'#"" instead?");
 630                end if;
 631             end if;
 632
 633             Accumulate_Checksum (C);
 634             Base_Char := C;
 635             UI_Base := UI_Int_Value;
 636
 637             if UI_Base < 2 or else UI_Base > 16 then
 638                Error_Msg_SC ("base not 2-16");
 639                UI_Base := Uint_16;
 640             end if;
 641
 642             Base := UI_To_Int (UI_Base);
 643             Scan_Ptr := Scan_Ptr + 1;
 644
 645             --  Scan out extended integer [. integer]
 646
 647             C := Source (Scan_Ptr);
 648             UI_Int_Value := Uint_0;
 649             Scale := 0;
 650
 651             loop
 652                if C in '0' .. '9' then
 653                   Accumulate_Checksum (C);
 654                   Extended_Digit_Value :=
 655                     Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
 656
 657                elsif C in 'A' .. 'F' then
 658                   Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
 659                   Extended_Digit_Value :=
 660                     Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
 661
 662                elsif C in 'a' .. 'f' then
 663                   Accumulate_Checksum (C);
 664                   Extended_Digit_Value :=
 665                     Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
 666
 667                else
 668                   Error_Msg_S ("extended digit expected");
 669                   exit;
 670                end if;
 671
 672                if Extended_Digit_Value >= Base then
 673                   Error_Msg_S ("digit '>= base");
 674                end if;
 675
 676                UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
 677                Scale := Scale - 1;
 678                Scan_Ptr := Scan_Ptr + 1;
 679                C := Source (Scan_Ptr);
 680
 681                if C = '_' then
 682                   loop
 683                      Accumulate_Checksum ('_');
 684                      Scan_Ptr := Scan_Ptr + 1;
 685                      C := Source (Scan_Ptr);
 686                      exit when C /= '_';
 687                      Error_No_Double_Underline;
 688                   end loop;
 689
 690                elsif C = '.' then
 691                   Accumulate_Checksum ('.');
 692
 693                   if Point_Scanned then
 694                      Error_Msg_S ("duplicate point ignored");
 695                   end if;
 696
 697                   Scan_Ptr := Scan_Ptr + 1;
 698                   C := Source (Scan_Ptr);
 699                   Point_Scanned := True;
 700                   Scale := 0;
 701
 702                elsif C = Base_Char then
 703                   Accumulate_Checksum (C);
 704                   Scan_Ptr := Scan_Ptr + 1;
 705                   exit;
 706
 707                elsif C = '#' or else C = ':' then
 708                   Error_Msg_S ("based number delimiters must match");
 709                   Scan_Ptr := Scan_Ptr + 1;
 710                   exit;
 711
 712                elsif not Identifier_Char (C) then
 713                   if Base_Char = '#' then
 714                      Error_Msg_S ("missing '#");
 715                   else
 716                      Error_Msg_S ("missing ':");
 717                   end if;
 718
 719                   exit;
 720                end if;
 721
 722             end loop;
 723
 724             UI_Num_Value := UI_Int_Value;
 725          end if;
 726
 727          --  Scan out exponent
 728
 729          if not Point_Scanned then
 730             Scale := 0;
 731             UI_Scale := Uint_0;
 732          else
 733             UI_Scale := UI_From_Int (Scale);
 734          end if;
 735
 736          if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
 737             Accumulate_Checksum ('e');
 738             Scan_Ptr := Scan_Ptr + 1;
 739             Exponent_Is_Negative := False;
 740
 741             if Source (Scan_Ptr) = '+' then
 742                Accumulate_Checksum ('+');
 743                Scan_Ptr := Scan_Ptr + 1;
 744
 745             elsif Source (Scan_Ptr) = '-' then
 746                Accumulate_Checksum ('-');
 747
 748                if not Point_Scanned then
 749                   Error_Msg_S
 750                     ("negative exponent not allowed for integer literal");
 751                else
 752                   Exponent_Is_Negative := True;
 753                end if;
 754
 755                Scan_Ptr := Scan_Ptr + 1;
 756             end if;
 757
 758             UI_Int_Value := Uint_0;
 759
 760             if Source (Scan_Ptr) in '0' .. '9' then
 761                Scan_Integer;
 762             else
 763                Error_Digit_Expected;
 764             end if;
 765
 766             if Exponent_Is_Negative then
 767                UI_Scale := UI_Scale - UI_Int_Value;
 768             else
 769                UI_Scale := UI_Scale + UI_Int_Value;
 770             end if;
 771          end if;
 772
 773          --  Case of real literal to be returned
 774
 775          if Point_Scanned then
 776             Token := Tok_Real_Literal;
 777             Real_Literal_Value :=
 778               UR_From_Components (
 779                                   Num   => UI_Num_Value,
 780                                   Den   => -UI_Scale,
 781                                   Rbase => Base);
 782
 783          --  Case of integer literal to be returned
 784
 785          else
 786             Token := Tok_Integer_Literal;
 787
 788             if UI_Scale = 0 then
 789                Int_Literal_Value := UI_Num_Value;
 790
 791             --  Avoid doing possibly expensive calculations in cases like
 792             --  parsing 163E800_000# when semantics will not be done anyway.
 793             --  This is especially useful when parsing garbled input.
 794
 795             elsif Operating_Mode /= Check_Syntax
 796               and then (Serious_Errors_Detected = 0 or else Try_Semantics)
 797             then
 798                Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
 799
 800             else
 801                Int_Literal_Value := No_Uint;
 802             end if;
 803          end if;
 804
 805          Accumulate_Token_Checksum;
 806
 807          return;
 808       end Nlit;
 809
 810       ----------
 811       -- Slit --
 812       ----------
 813
 814       procedure Slit is
 815
 816          Delimiter : Character;
 817          --  Delimiter (first character of string)
 818
 819          C : Character;
 820          --  Current source program character
 821
 822          Code : Char_Code;
 823          --  Current character code value
 824
 825          Err : Boolean;
 826          --  Error flag for Scan_Wide call
 827
 828          procedure Error_Bad_String_Char;
 829          --  Signal bad character in string/character literal. On entry
 830          --  Scan_Ptr points to the improper character encountered during the
 831          --  scan. Scan_Ptr is not modified, so it still points to the bad
 832          --  character on return.
 833
 834          procedure Error_Unterminated_String;
 835          --  Procedure called if a line terminator character is encountered
 836          --  during scanning a string, meaning that the string is not properly
 837          --  terminated.
 838
 839          procedure Set_String;
 840          --  Procedure used to distinguish between string and operator symbol.
 841          --  On entry the string has been scanned out, and its characters
 842          --  start at Token_Ptr and end one character before Scan_Ptr. On exit
 843          --  Token is set to Tok_String_Literal or Tok_Operator_Symbol as
 844          --  appropriate, and Token_Node is appropriately initialized. In
 845          --  addition, in the operator symbol case, Token_Name is
 846          --  appropriately set.
 847
 848          ---------------------------
 849          -- Error_Bad_String_Char --
 850          ---------------------------
 851
 852          procedure Error_Bad_String_Char is
 853             C : constant Character := Source (Scan_Ptr);
 854
 855          begin
 856             if C = HT then
 857                Error_Msg_S ("horizontal tab not allowed in string");
 858
 859             elsif C = VT or else C = FF then
 860                Error_Msg_S ("format effector not allowed in string");
 861
 862             elsif C in Upper_Half_Character then
 863                Error_Msg_S ("(Ada 83) upper half character not allowed");
 864
 865             else
 866                Error_Msg_S ("control character not allowed in string");
 867             end if;
 868          end Error_Bad_String_Char;
 869
 870          -------------------------------
 871          -- Error_Unterminated_String --
 872          -------------------------------
 873
 874          procedure Error_Unterminated_String is
 875          begin
 876             --  An interesting little refinement. Consider the following
 877             --  examples:
 878
 879             --     A := "this is an unterminated string;
 880             --     A := "this is an unterminated string &
 881             --     P(A, "this is a parameter that didn't get terminated);
 882
 883             --  We fiddle a little to do slightly better placement in these
 884             --  cases also if there is white space at the end of the line we
 885             --  place the flag at the start of this white space, not at the
 886             --  end. Note that we only have to test for blanks, since tabs
 887             --  aren't allowed in strings in the first place and would have
 888             --  caused an error message.
 889
 890             --  Two more cases that we treat specially are:
 891
 892             --     A := "this string uses the wrong terminator'
 893             --     A := "this string uses the wrong terminator' &
 894
 895             --  In these cases we give a different error message as well
 896
 897             --  We actually reposition the scan pointer to the point where we
 898             --  place the flag in these cases, since it seems a better bet on
 899             --  the original intention.
 900
 901             while Source (Scan_Ptr - 1) = ' '
 902               or else Source (Scan_Ptr - 1) = '&'
 903             loop
 904                Scan_Ptr := Scan_Ptr - 1;
 905                Unstore_String_Char;
 906             end loop;
 907
 908             --  Check for case of incorrect string terminator, but single quote
 909             --  is not considered incorrect if the opening terminator misused
 910             --  a single quote (error message already given).
 911
 912             if Delimiter /= '''
 913               and then Source (Scan_Ptr - 1) = '''
 914             then
 915                Unstore_String_Char;
 916                Error_Msg
 917                  ("incorrect string terminator character", Scan_Ptr - 1);
 918                return;
 919             end if;
 920
 921             if Source (Scan_Ptr - 1) = ';' then
 922                Scan_Ptr := Scan_Ptr - 1;
 923                Unstore_String_Char;
 924
 925                if Source (Scan_Ptr - 1) = ')' then
 926                   Scan_Ptr := Scan_Ptr - 1;
 927                   Unstore_String_Char;
 928                end if;
 929             end if;
 930
 931             Error_Msg_S ("missing string quote");
 932          end Error_Unterminated_String;
 933
 934          ----------------
 935          -- Set_String --
 936          ----------------
 937
 938          procedure Set_String is
 939             Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
 940             C1   : Character;
 941             C2   : Character;
 942             C3   : Character;
 943
 944          begin
 945             --  Token_Name is currently set to Error_Name. The following
 946             --  section of code resets Token_Name to the proper Name_Op_xx
 947             --  value if the string is a valid operator symbol, otherwise it is
 948             --  left set to Error_Name.
 949
 950             if Slen = 1 then
 951                C1 := Source (Token_Ptr + 1);
 952
 953                case C1 is
 954                   when '=' =>
 955                      Token_Name := Name_Op_Eq;
 956
 957                   when '>' =>
 958                      Token_Name := Name_Op_Gt;
 959
 960                   when '<' =>
 961                      Token_Name := Name_Op_Lt;
 962
 963                   when '+' =>
 964                      Token_Name := Name_Op_Add;
 965
 966                   when '-' =>
 967                      Token_Name := Name_Op_Subtract;
 968
 969                   when '&' =>
 970                      Token_Name := Name_Op_Concat;
 971
 972                   when '*' =>
 973                      Token_Name := Name_Op_Multiply;
 974
 975                   when '/' =>
 976                      Token_Name := Name_Op_Divide;
 977
 978                   when others =>
 979                      null;
 980                end case;
 981
 982             elsif Slen = 2 then
 983                C1 := Source (Token_Ptr + 1);
 984                C2 := Source (Token_Ptr + 2);
 985
 986                if C1 = '*' and then C2 = '*' then
 987                   Token_Name := Name_Op_Expon;
 988
 989                elsif C2 = '=' then
 990
 991                   if C1 = '/' then
 992                      Token_Name := Name_Op_Ne;
 993                   elsif C1 = '<' then
 994                      Token_Name := Name_Op_Le;
 995                   elsif C1 = '>' then
 996                      Token_Name := Name_Op_Ge;
 997                   end if;
 998
 999                elsif (C1 = 'O' or else C1 = 'o') and then    -- OR
1000                  (C2 = 'R' or else C2 = 'r')
1001                then
1002                   Token_Name := Name_Op_Or;
1003                end if;
1004
1005             elsif Slen = 3 then
1006                C1 := Source (Token_Ptr + 1);
1007                C2 := Source (Token_Ptr + 2);
1008                C3 := Source (Token_Ptr + 3);
1009
1010                if (C1 = 'A' or else C1 = 'a') and then       -- AND
1011                  (C2 = 'N' or else C2 = 'n') and then
1012                  (C3 = 'D' or else C3 = 'd')
1013                then
1014                   Token_Name := Name_Op_And;
1015
1016                elsif (C1 = 'A' or else C1 = 'a') and then    -- ABS
1017                  (C2 = 'B' or else C2 = 'b') and then
1018                  (C3 = 'S' or else C3 = 's')
1019                then
1020                   Token_Name := Name_Op_Abs;
1021
1022                elsif (C1 = 'M' or else C1 = 'm') and then    -- MOD
1023                  (C2 = 'O' or else C2 = 'o') and then
1024                  (C3 = 'D' or else C3 = 'd')
1025                then
1026                   Token_Name := Name_Op_Mod;
1027
1028                elsif (C1 = 'N' or else C1 = 'n') and then    -- NOT
1029                  (C2 = 'O' or else C2 = 'o') and then
1030                  (C3 = 'T' or else C3 = 't')
1031                then
1032                   Token_Name := Name_Op_Not;
1033
1034                elsif (C1 = 'R' or else C1 = 'r') and then    -- REM
1035                  (C2 = 'E' or else C2 = 'e') and then
1036                  (C3 = 'M' or else C3 = 'm')
1037                then
1038                   Token_Name := Name_Op_Rem;
1039
1040                elsif (C1 = 'X' or else C1 = 'x') and then    -- XOR
1041                  (C2 = 'O' or else C2 = 'o') and then
1042                  (C3 = 'R' or else C3 = 'r')
1043                then
1044                   Token_Name := Name_Op_Xor;
1045                end if;
1046
1047             end if;
1048
1049             --  If it is an operator symbol, then Token_Name is set. If it is
1050             --  some other string value, then Token_Name still contains
1051             --  Error_Name.
1052
1053             if Token_Name = Error_Name then
1054                Token := Tok_String_Literal;
1055
1056             else
1057                Token := Tok_Operator_Symbol;
1058             end if;
1059          end Set_String;
1060
1061       --  Start of processing for Slit
1062
1063       begin
1064          --  On entry, Scan_Ptr points to the opening character of the string
1065          --  which is either a percent, double quote, or apostrophe (single
1066          --  quote). The latter case is an error detected by the character
1067          --  literal circuit.
1068
1069          Delimiter := Source (Scan_Ptr);
1070          Accumulate_Checksum (Delimiter);
1071          Start_String;
1072          Scan_Ptr := Scan_Ptr + 1;
1073
1074          --  Loop to scan out characters of string literal
1075
1076          loop
1077             C := Source (Scan_Ptr);
1078
1079             if C = Delimiter then
1080                Accumulate_Checksum (C);
1081                Scan_Ptr := Scan_Ptr + 1;
1082                exit when Source (Scan_Ptr) /= Delimiter;
1083                Code := Get_Char_Code (C);
1084                Accumulate_Checksum (C);
1085                Scan_Ptr := Scan_Ptr + 1;
1086
1087             else
1088                if C = '"' and then Delimiter = '%' then
1089                   Error_Msg_S
1090                     ("quote not allowed in percent delimited string");
1091                   Code := Get_Char_Code (C);
1092                   Scan_Ptr := Scan_Ptr + 1;
1093
1094                elsif (C = ESC
1095                         and then Wide_Character_Encoding_Method
1096                                    in WC_ESC_Encoding_Method)
1097                  or else (C in Upper_Half_Character
1098                             and then Upper_Half_Encoding)
1099                  or else (C = '['
1100                             and then Source (Scan_Ptr + 1) = '"'
1101                             and then Identifier_Char (Source (Scan_Ptr + 2)))
1102                then
1103                   Wptr := Scan_Ptr;
1104                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1105
1106                   if Err then
1107                      Error_Illegal_Wide_Character;
1108                      Code := Get_Char_Code (' ');
1109                   end if;
1110
1111                   Accumulate_Checksum (Code);
1112
1113                   if Ada_Version >= Ada_05
1114                     and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1115                   then
1116                      Error_Msg
1117                        ("(Ada 2005) non-graphic character not permitted " &
1118                         "in string literal", Wptr);
1119                   end if;
1120
1121                else
1122                   Accumulate_Checksum (C);
1123
1124                   if C not in Graphic_Character then
1125                      if C in Line_Terminator then
1126                         Error_Unterminated_String;
1127                         exit;
1128
1129                      elsif C in Upper_Half_Character then
1130                         if Ada_Version = Ada_83 then
1131                            Error_Bad_String_Char;
1132                         end if;
1133
1134                      else
1135                         Error_Bad_String_Char;
1136                      end if;
1137                   end if;
1138
1139                   Code := Get_Char_Code (C);
1140                   Scan_Ptr := Scan_Ptr + 1;
1141                end if;
1142             end if;
1143
1144             Store_String_Char (Code);
1145
1146             if not In_Character_Range (Code) then
1147                Wide_Character_Found := True;
1148             end if;
1149          end loop;
1150
1151          String_Literal_Id := End_String;
1152          Set_String;
1153          return;
1154       end Slit;
1155
1156    --  Start of processing for Scan
1157
1158    begin
1159       Prev_Token := Token;
1160       Prev_Token_Ptr := Token_Ptr;
1161       Token_Name := Error_Name;
1162
1163       --  The following loop runs more than once only if a format effector
1164       --  (tab, vertical tab, form  feed, line feed, carriage return) is
1165       --  encountered and skipped, or some error situation, such as an
1166       --  illegal character, is encountered.
1167
1168       <<Scan_Next_Character>>
1169
1170       loop
1171          --  Skip past blanks, loop is opened up for speed
1172
1173          while Source (Scan_Ptr) = ' ' loop
1174             if Source (Scan_Ptr + 1) /= ' ' then
1175                Scan_Ptr := Scan_Ptr + 1;
1176                exit;
1177             end if;
1178
1179             if Source (Scan_Ptr + 2) /= ' ' then
1180                Scan_Ptr := Scan_Ptr + 2;
1181                exit;
1182             end if;
1183
1184             if Source (Scan_Ptr + 3) /= ' ' then
1185                Scan_Ptr := Scan_Ptr + 3;
1186                exit;
1187             end if;
1188
1189             if Source (Scan_Ptr + 4) /= ' ' then
1190                Scan_Ptr := Scan_Ptr + 4;
1191                exit;
1192             end if;
1193
1194             if Source (Scan_Ptr + 5) /= ' ' then
1195                Scan_Ptr := Scan_Ptr + 5;
1196                exit;
1197             end if;
1198
1199             if Source (Scan_Ptr + 6) /= ' ' then
1200                Scan_Ptr := Scan_Ptr + 6;
1201                exit;
1202             end if;
1203
1204             if Source (Scan_Ptr + 7) /= ' ' then
1205                Scan_Ptr := Scan_Ptr + 7;
1206                exit;
1207             end if;
1208
1209             Scan_Ptr := Scan_Ptr + 8;
1210          end loop;
1211
1212          --  We are now at a non-blank character, which is the first character
1213          --  of the token we will scan, and hence the value of Token_Ptr.
1214
1215          Token_Ptr := Scan_Ptr;
1216
1217          --  Here begins the main case statement which transfers control on the
1218          --  basis of the non-blank character we have encountered.
1219
1220          case Source (Scan_Ptr) is
1221
1222          --  Line terminator characters
1223
1224          when CR | LF | FF | VT =>
1225             goto Scan_Line_Terminator;
1226
1227          --  Horizontal tab, just skip past it
1228
1229          when HT =>
1230             if Style_Check then Style.Check_HT; end if;
1231             Scan_Ptr := Scan_Ptr + 1;
1232
1233          --  End of file character, treated as an end of file only if it is
1234          --  the last character in the buffer, otherwise it is ignored.
1235
1236          when EOF =>
1237             if Scan_Ptr = Source_Last (Current_Source_File) then
1238                Check_End_Of_Line;
1239                Token := Tok_EOF;
1240                return;
1241             else
1242                Scan_Ptr := Scan_Ptr + 1;
1243             end if;
1244
1245          --  Ampersand
1246
1247          when '&' =>
1248             Accumulate_Checksum ('&');
1249
1250             if Source (Scan_Ptr + 1) = '&' then
1251                Error_Msg_S ("'&'& should be `AND THEN`");
1252                Scan_Ptr := Scan_Ptr + 2;
1253                Token := Tok_And;
1254                return;
1255
1256             else
1257                Scan_Ptr := Scan_Ptr + 1;
1258                Token := Tok_Ampersand;
1259                return;
1260             end if;
1261
1262          --  Asterisk (can be multiplication operator or double asterisk which
1263          --  is the exponentiation compound delimiter).
1264
1265          when '*' =>
1266             Accumulate_Checksum ('*');
1267
1268             if Source (Scan_Ptr + 1) = '*' then
1269                Accumulate_Checksum ('*');
1270                Scan_Ptr := Scan_Ptr + 2;
1271                Token := Tok_Double_Asterisk;
1272                return;
1273
1274             else
1275                Scan_Ptr := Scan_Ptr + 1;
1276                Token := Tok_Asterisk;
1277                return;
1278             end if;
1279
1280          --  Colon, which can either be an isolated colon, or part of an
1281          --  assignment compound delimiter.
1282
1283          when ':' =>
1284             Accumulate_Checksum (':');
1285
1286             if Double_Char_Token ('=') then
1287                Token := Tok_Colon_Equal;
1288                if Style_Check then Style.Check_Colon_Equal; end if;
1289                return;
1290
1291             elsif Source (Scan_Ptr + 1) = '-'
1292               and then Source (Scan_Ptr + 2) /= '-'
1293             then
1294                Token := Tok_Colon_Equal;
1295                Error_Msg (":- should be :=", Scan_Ptr);
1296                Scan_Ptr := Scan_Ptr + 2;
1297                return;
1298
1299             else
1300                Scan_Ptr := Scan_Ptr + 1;
1301                Token := Tok_Colon;
1302                if Style_Check then Style.Check_Colon; end if;
1303                return;
1304             end if;
1305
1306          --  Left parenthesis
1307
1308          when '(' =>
1309             Accumulate_Checksum ('(');
1310             Scan_Ptr := Scan_Ptr + 1;
1311             Token := Tok_Left_Paren;
1312             if Style_Check then Style.Check_Left_Paren; end if;
1313             return;
1314
1315          --  Left bracket
1316
1317          when '[' =>
1318             if Source (Scan_Ptr + 1) = '"' then
1319                goto Scan_Wide_Character;
1320
1321             else
1322                Error_Msg_S ("illegal character, replaced by ""(""");
1323                Scan_Ptr := Scan_Ptr + 1;
1324                Token := Tok_Left_Paren;
1325                return;
1326             end if;
1327
1328          --  Left brace
1329
1330          when '{' =>
1331             Error_Msg_S ("illegal character, replaced by ""(""");
1332             Scan_Ptr := Scan_Ptr + 1;
1333             Token := Tok_Left_Paren;
1334             return;
1335
1336          --  Comma
1337
1338          when ',' =>
1339             Accumulate_Checksum (',');
1340             Scan_Ptr := Scan_Ptr + 1;
1341             Token := Tok_Comma;
1342             if Style_Check then Style.Check_Comma; end if;
1343             return;
1344
1345          --  Dot, which is either an isolated period, or part of a double dot
1346          --  compound delimiter sequence. We also check for the case of a
1347          --  digit following the period, to give a better error message.
1348
1349          when '.' =>
1350             Accumulate_Checksum ('.');
1351
1352             if Double_Char_Token ('.') then
1353                Token := Tok_Dot_Dot;
1354                if Style_Check then Style.Check_Dot_Dot; end if;
1355                return;
1356
1357             elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1358                Error_Msg_S ("numeric literal cannot start with point");
1359                Scan_Ptr := Scan_Ptr + 1;
1360
1361             else
1362                Scan_Ptr := Scan_Ptr + 1;
1363                Token := Tok_Dot;
1364                return;
1365             end if;
1366
1367          --  Equal, which can either be an equality operator, or part of the
1368          --  arrow (=>) compound delimiter.
1369
1370          when '=' =>
1371             Accumulate_Checksum ('=');
1372
1373             if Double_Char_Token ('>') then
1374                Token := Tok_Arrow;
1375                if Style_Check then Style.Check_Arrow; end if;
1376                return;
1377
1378             elsif Source (Scan_Ptr + 1) = '=' then
1379                Error_Msg_S ("== should be =");
1380                Scan_Ptr := Scan_Ptr + 1;
1381             end if;
1382
1383             Scan_Ptr := Scan_Ptr + 1;
1384             Token := Tok_Equal;
1385             return;
1386
1387          --  Greater than, which can be a greater than operator, greater than
1388          --  or equal operator, or first character of a right label bracket.
1389
1390          when '>' =>
1391             Accumulate_Checksum ('>');
1392
1393             if Double_Char_Token ('=') then
1394                Token := Tok_Greater_Equal;
1395                return;
1396
1397             elsif Double_Char_Token ('>') then
1398                Token := Tok_Greater_Greater;
1399                return;
1400
1401             else
1402                Scan_Ptr := Scan_Ptr + 1;
1403                Token := Tok_Greater;
1404                return;
1405             end if;
1406
1407          --  Less than, which can be a less than operator, less than or equal
1408          --  operator, or the first character of a left label bracket, or the
1409          --  first character of a box (<>) compound delimiter.
1410
1411          when '<' =>
1412             Accumulate_Checksum ('<');
1413
1414             if Double_Char_Token ('=') then
1415                Token := Tok_Less_Equal;
1416                return;
1417
1418             elsif Double_Char_Token ('>') then
1419                Token := Tok_Box;
1420                if Style_Check then Style.Check_Box; end if;
1421                return;
1422
1423             elsif Double_Char_Token ('<') then
1424                Token := Tok_Less_Less;
1425                return;
1426
1427             else
1428                Scan_Ptr := Scan_Ptr + 1;
1429                Token := Tok_Less;
1430                return;
1431             end if;
1432
1433          --  Minus, which is either a subtraction operator, or the first
1434          --  character of double minus starting a comment
1435
1436          when '-' => Minus_Case : begin
1437             if Source (Scan_Ptr + 1) = '>' then
1438                Error_Msg_S ("invalid token");
1439                Scan_Ptr := Scan_Ptr + 2;
1440                Token := Tok_Arrow;
1441                return;
1442
1443             elsif Source (Scan_Ptr + 1) /= '-' then
1444                Accumulate_Checksum ('-');
1445                Scan_Ptr := Scan_Ptr + 1;
1446                Token := Tok_Minus;
1447                return;
1448
1449             --  Comment
1450
1451             else -- Source (Scan_Ptr + 1) = '-' then
1452                if Style_Check then Style.Check_Comment; end if;
1453                Scan_Ptr := Scan_Ptr + 2;
1454                Start_Of_Comment := Scan_Ptr;
1455
1456                --  Loop to scan comment (this loop runs more than once only if
1457                --  a horizontal tab or other non-graphic character is scanned)
1458
1459                loop
1460                   --  Scan to non graphic character (opened up for speed)
1461
1462                   --  Note that we just eat left brackets, which means that
1463                   --  bracket notation cannot be used for end of line
1464                   --  characters in comments. This seems a reasonable choice,
1465                   --  since no one would ever use brackets notation in a real
1466                   --  program in this situation, and if we allow brackets
1467                   --  notation, we forbid some valid comments which contain a
1468                   --  brackets sequence that happens to match an end of line
1469                   --  character.
1470
1471                   loop
1472                      exit when Source (Scan_Ptr) not in Graphic_Character;
1473                      Scan_Ptr := Scan_Ptr + 1;
1474                      exit when Source (Scan_Ptr) not in Graphic_Character;
1475                      Scan_Ptr := Scan_Ptr + 1;
1476                      exit when Source (Scan_Ptr) not in Graphic_Character;
1477                      Scan_Ptr := Scan_Ptr + 1;
1478                      exit when Source (Scan_Ptr) not in Graphic_Character;
1479                      Scan_Ptr := Scan_Ptr + 1;
1480                      exit when Source (Scan_Ptr) not in Graphic_Character;
1481                      Scan_Ptr := Scan_Ptr + 1;
1482                   end loop;
1483
1484                   --  Keep going if horizontal tab
1485
1486                   if Source (Scan_Ptr) = HT then
1487                      if Style_Check then Style.Check_HT; end if;
1488                      Scan_Ptr := Scan_Ptr + 1;
1489
1490                   --  Terminate scan of comment if line terminator
1491
1492                   elsif Source (Scan_Ptr) in Line_Terminator then
1493                      exit;
1494
1495                   --  Terminate scan of comment if end of file encountered
1496                   --  (embedded EOF character or real last character in file)
1497
1498                   elsif Source (Scan_Ptr) = EOF then
1499                      exit;
1500
1501                   --  If we have a wide character, we have to scan it out,
1502                   --  because it might be a legitimate line terminator
1503
1504                   elsif (Source (Scan_Ptr) = ESC
1505                            and then Identifier_Char (ESC))
1506                     or else
1507                          (Source (Scan_Ptr) in Upper_Half_Character
1508                             and then Upper_Half_Encoding)
1509                   then
1510                      declare
1511                         Wptr : constant Source_Ptr := Scan_Ptr;
1512                         Code : Char_Code;
1513                         Err  : Boolean;
1514
1515                      begin
1516                         Scan_Wide (Source, Scan_Ptr, Code, Err);
1517
1518                         --  If not well formed wide character, then just skip
1519                         --  past it and ignore it.
1520
1521                         if Err then
1522                            Scan_Ptr := Wptr + 1;
1523
1524                         --  If UTF_32 terminator, terminate comment scan
1525
1526                         elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1527                            Scan_Ptr := Wptr;
1528                            exit;
1529                         end if;
1530                      end;
1531
1532                   --  Keep going if character in 80-FF range, or is ESC. These
1533                   --  characters are allowed in comments by RM-2.1(1), 2.7(2).
1534                   --  They are allowed even in Ada 83 mode according to the
1535                   --  approved AI. ESC was added to the AI in June 93.
1536
1537                   elsif Source (Scan_Ptr) in Upper_Half_Character
1538                      or else Source (Scan_Ptr) = ESC
1539                   then
1540                      Scan_Ptr := Scan_Ptr + 1;
1541
1542                   --  Otherwise we have an illegal comment character
1543
1544                   else
1545                      Error_Illegal_Character;
1546                   end if;
1547                end loop;
1548
1549                --  Note that, except when comments are tokens, we do NOT
1550                --  execute a return here, instead we fall through to reexecute
1551                --  the scan loop to look for a token.
1552
1553                if Comment_Is_Token then
1554                   Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1555                   Name_Buffer (1 .. Name_Len) :=
1556                     String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1557                   Comment_Id := Name_Find;
1558                   Token := Tok_Comment;
1559                   return;
1560                end if;
1561             end if;
1562          end Minus_Case;
1563
1564          --  Double quote starting a string literal
1565
1566          when '"' =>
1567             Slit;
1568             Post_Scan;
1569             return;
1570
1571          --  Percent starting a string literal
1572
1573          when '%' =>
1574             Obsolescent_Check (Token_Ptr);
1575
1576             if Warn_On_Obsolescent_Feature then
1577                Error_Msg_S
1578                  ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1579                Error_Msg_S
1580                  ("\use """""" instead?");
1581             end if;
1582
1583             Slit;
1584             Post_Scan;
1585             return;
1586
1587          --  Apostrophe. This can either be the start of a character literal,
1588          --  or an isolated apostrophe used in a qualified expression or an
1589          --  attribute. We treat it as a character literal if it does not
1590          --  follow a right parenthesis, identifier, the keyword ALL or
1591          --  a literal. This means that we correctly treat constructs like:
1592
1593          --    A := CHARACTER'('A');
1594
1595          --  Note that RM-2.2(7) does not require a separator between
1596          --  "CHARACTER" and "'" in the above.
1597
1598          when ''' => Char_Literal_Case : declare
1599             Code : Char_Code;
1600             Err  : Boolean;
1601
1602          begin
1603             Accumulate_Checksum (''');
1604             Scan_Ptr := Scan_Ptr + 1;
1605
1606             --  Here is where we make the test to distinguish the cases. Treat
1607             --  as apostrophe if previous token is an identifier, right paren
1608             --  or the reserved word "all" (latter case as in A.all'Address)
1609             --  (or the reserved word "project" in project files). Also treat
1610             --  it as apostrophe after a literal (this catches some legitimate
1611             --  cases, like A."abs"'Address, and also gives better error
1612             --  behavior for impossible cases like 123'xxx).
1613
1614             if Prev_Token = Tok_Identifier
1615                or else Prev_Token = Tok_Right_Paren
1616                or else Prev_Token = Tok_All
1617                or else Prev_Token = Tok_Project
1618                or else Prev_Token in Token_Class_Literal
1619             then
1620                Token := Tok_Apostrophe;
1621                if Style_Check then Style.Check_Apostrophe; end if;
1622                return;
1623
1624             --  Otherwise the apostrophe starts a character literal
1625
1626             else
1627                --  Case of wide character literal
1628
1629                if (Source (Scan_Ptr) = ESC
1630                      and then
1631                     Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1632                  or else
1633                    (Source (Scan_Ptr) in Upper_Half_Character
1634                      and then
1635                     Upper_Half_Encoding)
1636                  or else
1637                    (Source (Scan_Ptr) = '['
1638                      and then
1639                     Source (Scan_Ptr + 1) = '"')
1640                then
1641                   Wptr := Scan_Ptr;
1642                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1643                   Accumulate_Checksum (Code);
1644
1645                   if Err then
1646                      Error_Illegal_Wide_Character;
1647                      Code := Character'Pos (' ');
1648
1649                   elsif Ada_Version >= Ada_05
1650                     and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1651                   then
1652                      Error_Msg
1653                        ("(Ada 2005) non-graphic character not permitted " &
1654                         "in character literal", Wptr);
1655                   end if;
1656
1657                   if Source (Scan_Ptr) /= ''' then
1658                      Error_Msg_S ("missing apostrophe");
1659                   else
1660                      Scan_Ptr := Scan_Ptr + 1;
1661                   end if;
1662
1663                --  If we do not find a closing quote in the expected place then
1664                --  assume that we have a misguided attempt at a string literal.
1665
1666                --  However, if previous token is RANGE, then we return an
1667                --  apostrophe instead since this gives better error recovery
1668
1669                elsif Source (Scan_Ptr + 1) /= ''' then
1670                   if Prev_Token = Tok_Range then
1671                      Token := Tok_Apostrophe;
1672                      return;
1673
1674                   else
1675                      Scan_Ptr := Scan_Ptr - 1;
1676                      Error_Msg_S
1677                        ("strings are delimited by double quote character");
1678                      Slit;
1679                      Post_Scan;
1680                      return;
1681                   end if;
1682
1683                --  Otherwise we have a (non-wide) character literal
1684
1685                else
1686                   Accumulate_Checksum (Source (Scan_Ptr));
1687
1688                   if Source (Scan_Ptr) not in Graphic_Character then
1689                      if Source (Scan_Ptr) in Upper_Half_Character then
1690                         if Ada_Version = Ada_83 then
1691                            Error_Illegal_Character;
1692                         end if;
1693
1694                      else
1695                         Error_Illegal_Character;
1696                      end if;
1697                   end if;
1698
1699                   Code := Get_Char_Code (Source (Scan_Ptr));
1700                   Scan_Ptr := Scan_Ptr + 2;
1701                end if;
1702
1703                --  Fall through here with Scan_Ptr updated past the closing
1704                --  quote, and Code set to the Char_Code value for the literal
1705
1706                Accumulate_Checksum (''');
1707                Token := Tok_Char_Literal;
1708                Set_Character_Literal_Name (Code);
1709                Token_Name := Name_Find;
1710                Character_Code := Code;
1711                Post_Scan;
1712                return;
1713             end if;
1714          end Char_Literal_Case;
1715
1716          --  Right parenthesis
1717
1718          when ')' =>
1719             Accumulate_Checksum (')');
1720             Scan_Ptr := Scan_Ptr + 1;
1721             Token := Tok_Right_Paren;
1722             if Style_Check then Style.Check_Right_Paren; end if;
1723             return;
1724
1725          --  Right bracket or right brace, treated as right paren
1726
1727          when ']' | '}' =>
1728             Error_Msg_S ("illegal character, replaced by "")""");
1729             Scan_Ptr := Scan_Ptr + 1;
1730             Token := Tok_Right_Paren;
1731             return;
1732
1733          --  Slash (can be division operator or first character of not equal)
1734
1735          when '/' =>
1736             Accumulate_Checksum ('/');
1737
1738             if Double_Char_Token ('=') then
1739                Token := Tok_Not_Equal;
1740                return;
1741             else
1742                Scan_Ptr := Scan_Ptr + 1;
1743                Token := Tok_Slash;
1744                return;
1745             end if;
1746
1747          --  Semicolon
1748
1749          when ';' =>
1750             Accumulate_Checksum (';');
1751             Scan_Ptr := Scan_Ptr + 1;
1752             Token := Tok_Semicolon;
1753             if Style_Check then Style.Check_Semicolon; end if;
1754             return;
1755
1756          --  Vertical bar
1757
1758          when '|' => Vertical_Bar_Case : begin
1759             Accumulate_Checksum ('|');
1760
1761             --  Special check for || to give nice message
1762
1763             if Source (Scan_Ptr + 1) = '|' then
1764                Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1765                Scan_Ptr := Scan_Ptr + 2;
1766                Token := Tok_Or;
1767                return;
1768
1769             else
1770                Scan_Ptr := Scan_Ptr + 1;
1771                Token := Tok_Vertical_Bar;
1772                if Style_Check then Style.Check_Vertical_Bar; end if;
1773                return;
1774             end if;
1775          end Vertical_Bar_Case;
1776
1777          --  Exclamation, replacement character for vertical bar
1778
1779          when '!' => Exclamation_Case : begin
1780             Accumulate_Checksum ('!');
1781             Obsolescent_Check (Token_Ptr);
1782
1783             if Warn_On_Obsolescent_Feature then
1784                Error_Msg_S
1785                  ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1786                Error_Msg_S
1787                  ("\use ""'|"" instead?");
1788             end if;
1789
1790             if Source (Scan_Ptr + 1) = '=' then
1791                Error_Msg_S ("'!= should be /=");
1792                Scan_Ptr := Scan_Ptr + 2;
1793                Token := Tok_Not_Equal;
1794                return;
1795
1796             else
1797                Scan_Ptr := Scan_Ptr + 1;
1798                Token := Tok_Vertical_Bar;
1799                return;
1800             end if;
1801          end Exclamation_Case;
1802
1803          --  Plus
1804
1805          when '+' => Plus_Case : begin
1806             Accumulate_Checksum ('+');
1807             Scan_Ptr := Scan_Ptr + 1;
1808             Token := Tok_Plus;
1809             return;
1810          end Plus_Case;
1811
1812          --  Digits starting a numeric literal
1813
1814          when '0' .. '9' =>
1815             Nlit;
1816
1817             if Identifier_Char (Source (Scan_Ptr)) then
1818                Error_Msg_S
1819                  ("delimiter required between literal and identifier");
1820             end if;
1821             Post_Scan;
1822             return;
1823
1824          --  Lower case letters
1825
1826          when 'a' .. 'z' =>
1827             Name_Len := 1;
1828             Underline_Found := False;
1829             Name_Buffer (1) := Source (Scan_Ptr);
1830             Accumulate_Checksum (Name_Buffer (1));
1831             Scan_Ptr := Scan_Ptr + 1;
1832             goto Scan_Identifier;
1833
1834          --  Upper case letters
1835
1836          when 'A' .. 'Z' =>
1837             Name_Len := 1;
1838             Underline_Found := False;
1839             Name_Buffer (1) :=
1840               Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1841             Accumulate_Checksum (Name_Buffer (1));
1842             Scan_Ptr := Scan_Ptr + 1;
1843             goto Scan_Identifier;
1844
1845          --  Underline character
1846
1847          when '_' =>
1848             if Special_Characters ('_') then
1849                Token_Ptr := Scan_Ptr;
1850                Scan_Ptr := Scan_Ptr + 1;
1851                Token := Tok_Special;
1852                Special_Character := '_';
1853                return;
1854             end if;
1855
1856             Error_Msg_S ("identifier cannot start with underline");
1857             Name_Len := 1;
1858             Name_Buffer (1) := '_';
1859             Scan_Ptr := Scan_Ptr + 1;
1860             Underline_Found := False;
1861             goto Scan_Identifier;
1862
1863          --  Space (not possible, because we scanned past blanks)
1864
1865          when ' ' =>
1866             raise Program_Error;
1867
1868          --  Characters in top half of ASCII 8-bit chart
1869
1870          when Upper_Half_Character =>
1871
1872             --  Wide character case
1873
1874             if Upper_Half_Encoding then
1875                goto Scan_Wide_Character;
1876
1877             --  Otherwise we have OK Latin-1 character
1878
1879             else
1880                --  Upper half characters may possibly be identifier letters
1881                --  but can never be digits, so Identifier_Char can be used to
1882                --  test for a valid start of identifier character.
1883
1884                if Identifier_Char (Source (Scan_Ptr)) then
1885                   Name_Len := 0;
1886                   Underline_Found := False;
1887                   goto Scan_Identifier;
1888                else
1889                   Error_Illegal_Character;
1890                end if;
1891             end if;
1892
1893          when ESC =>
1894
1895             --  ESC character, possible start of identifier if wide characters
1896             --  using ESC encoding are allowed in identifiers, which we can
1897             --  tell by looking at the Identifier_Char flag for ESC, which is
1898             --  only true if these conditions are met. In Ada 2005 mode, may
1899             --  also be valid UTF_32 space or line terminator character.
1900
1901             if Identifier_Char (ESC) then
1902                Name_Len := 0;
1903                goto Scan_Wide_Character;
1904             else
1905                Error_Illegal_Character;
1906             end if;
1907
1908          --  Invalid control characters
1909
1910          when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | ASCII.SO |
1911               SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1912               EM  | FS  | GS  | RS  | US  | DEL
1913          =>
1914             Error_Illegal_Character;
1915
1916          --  Invalid graphic characters
1917
1918          when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1919
1920             --  If Set_Special_Character has been called for this character,
1921             --  set Scans.Special_Character and return a Special token.
1922
1923             if Special_Characters (Source (Scan_Ptr)) then
1924                Token_Ptr := Scan_Ptr;
1925                Token := Tok_Special;
1926                Special_Character := Source (Scan_Ptr);
1927                Scan_Ptr := Scan_Ptr + 1;
1928                return;
1929
1930             --  Otherwise, this is an illegal character
1931
1932             else
1933                Error_Illegal_Character;
1934             end if;
1935
1936          --  End switch on non-blank character
1937
1938          end case;
1939
1940       --  End loop past format effectors. The exit from this loop is by
1941       --  executing a return statement following completion of token scan
1942       --  (control never falls out of this loop to the code which follows)
1943
1944       end loop;
1945
1946       --  Wide_Character scanning routine. On entry we have encountered the
1947       --  initial character of a wide character sequence.
1948
1949       <<Scan_Wide_Character>>
1950
1951          declare
1952             Code : Char_Code;
1953             Cat  : Category;
1954             Err  : Boolean;
1955
1956          begin
1957             Wptr := Scan_Ptr;
1958             Scan_Wide (Source, Scan_Ptr, Code, Err);
1959
1960             --  If bad wide character, signal error and continue scan
1961
1962             if Err then
1963                Error_Illegal_Wide_Character;
1964                goto Scan_Next_Character;
1965             end if;
1966
1967             Cat := Get_Category (UTF_32 (Code));
1968
1969             --  If OK letter, reset scan ptr and go scan identifier
1970
1971             if Is_UTF_32_Letter (Cat) then
1972                Scan_Ptr := Wptr;
1973                Name_Len := 0;
1974                Underline_Found := False;
1975                goto Scan_Identifier;
1976
1977             --  If OK wide space, ignore and keep scanning (we do not include
1978             --  any ignored spaces in checksum)
1979
1980             elsif Is_UTF_32_Space (Cat) then
1981                goto Scan_Next_Character;
1982
1983             --  If OK wide line terminator, terminate current line
1984
1985             elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1986                Scan_Ptr := Wptr;
1987                goto Scan_Line_Terminator;
1988
1989             --  Punctuation is an error (at start of identifier)
1990
1991             elsif Is_UTF_32_Punctuation (Cat) then
1992                Error_Msg
1993                  ("identifier cannot start with punctuation", Wptr);
1994                Scan_Ptr := Wptr;
1995                Name_Len := 0;
1996                Underline_Found := False;
1997                goto Scan_Identifier;
1998
1999             --  Mark character is an error (at start of identifer)
2000
2001             elsif Is_UTF_32_Mark (Cat) then
2002                Error_Msg
2003                  ("identifier cannot start with mark character", Wptr);
2004                Scan_Ptr := Wptr;
2005                Name_Len := 0;
2006                Underline_Found := False;
2007                goto Scan_Identifier;
2008
2009             --  Other format character is an error (at start of identifer)
2010
2011             elsif Is_UTF_32_Other (Cat) then
2012                Error_Msg
2013                  ("identifier cannot start with other format character", Wptr);
2014                Scan_Ptr := Wptr;
2015                Name_Len := 0;
2016                Underline_Found := False;
2017                goto Scan_Identifier;
2018
2019             --  Extended digit character is an error. Could be bad start of
2020             --  identifier or bad literal. Not worth doing too much to try to
2021             --  distinguish these cases, but we will do a little bit.
2022
2023             elsif Is_UTF_32_Digit (Cat) then
2024                Error_Msg
2025                  ("identifier cannot start with digit character", Wptr);
2026                Scan_Ptr := Wptr;
2027                Name_Len := 0;
2028                Underline_Found := False;
2029                goto Scan_Identifier;
2030
2031             --  All other wide characters are illegal here
2032
2033             else
2034                Error_Illegal_Wide_Character;
2035                goto Scan_Next_Character;
2036             end if;
2037          end;
2038
2039       --  Routine to scan line terminator. On entry Scan_Ptr points to a
2040       --  character which is one of FF,LR,CR,VT, or one of the wide characters
2041       --  that is treated as a line termiantor.
2042
2043       <<Scan_Line_Terminator>>
2044
2045          --  Check line too long
2046
2047          Check_End_Of_Line;
2048
2049          --  Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2050          --  a physical line.
2051
2052          if End_Of_Line_Is_Token then
2053             Token_Ptr := Scan_Ptr;
2054          end if;
2055
2056          declare
2057             Physical : Boolean;
2058
2059          begin
2060             Skip_Line_Terminators (Scan_Ptr, Physical);
2061
2062             --  If we are at start of physical line, update scan pointers to
2063             --  reflect the start of the new line.
2064
2065             if Physical then
2066                Current_Line_Start       := Scan_Ptr;
2067                Start_Column             := Set_Start_Column;
2068                First_Non_Blank_Location := Scan_Ptr;
2069
2070                --  If End_Of_Line is a token, we return it as it is a
2071                --  physical line.
2072
2073                if End_Of_Line_Is_Token then
2074                   Token := Tok_End_Of_Line;
2075                   return;
2076                end if;
2077             end if;
2078          end;
2079
2080          goto Scan_Next_Character;
2081
2082       --  Identifier scanning routine. On entry, some initial characters of
2083       --  the identifier may have already been stored in Name_Buffer. If so,
2084       --  Name_Len has the number of characters stored. otherwise Name_Len is
2085       --  set to zero on entry. Underline_Found is also set False on entry.
2086
2087       <<Scan_Identifier>>
2088
2089          --  This loop scans as fast as possible past lower half letters and
2090          --  digits, which we expect to be the most common characters.
2091
2092          loop
2093             if Source (Scan_Ptr) in 'a' .. 'z'
2094               or else Source (Scan_Ptr) in '0' .. '9'
2095             then
2096                Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2097                Accumulate_Checksum (Source (Scan_Ptr));
2098
2099             elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2100                Name_Buffer (Name_Len + 1) :=
2101                  Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2102                Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2103
2104             else
2105                exit;
2106             end if;
2107
2108             Underline_Found := False;
2109             Scan_Ptr := Scan_Ptr + 1;
2110             Name_Len := Name_Len + 1;
2111          end loop;
2112
2113          --  If we fall through, then we have encountered either an underline
2114          --  character, or an extended identifier character (i.e. one from the
2115          --  upper half), or a wide character, or an identifier terminator. The
2116          --  initial test speeds us up in the most common case where we have
2117          --  an identifier terminator. Note that ESC is an identifier character
2118          --  only if a wide character encoding method that uses ESC encoding
2119          --  is active, so if we find an ESC character we know that we have a
2120          --  wide character.
2121
2122          if Identifier_Char (Source (Scan_Ptr)) then
2123
2124             --  Case of underline
2125
2126             if Source (Scan_Ptr) = '_' then
2127                Accumulate_Checksum ('_');
2128
2129                if Underline_Found then
2130                   Error_No_Double_Underline;
2131                else
2132                   Underline_Found := True;
2133                   Name_Len := Name_Len + 1;
2134                   Name_Buffer (Name_Len) := '_';
2135                end if;
2136
2137                Scan_Ptr := Scan_Ptr + 1;
2138                goto Scan_Identifier;
2139
2140             --  Upper half character
2141
2142             elsif Source (Scan_Ptr) in Upper_Half_Character
2143               and then not Upper_Half_Encoding
2144             then
2145                Accumulate_Checksum (Source (Scan_Ptr));
2146                Store_Encoded_Character
2147                  (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2148                Scan_Ptr := Scan_Ptr + 1;
2149                Underline_Found := False;
2150                goto Scan_Identifier;
2151
2152             --  Left bracket not followed by a quote terminates an identifier.
2153             --  This is an error, but we don't want to give a junk error msg
2154             --  about wide characters in this case!
2155
2156             elsif Source (Scan_Ptr) = '['
2157               and then Source (Scan_Ptr + 1) /= '"'
2158             then
2159                null;
2160
2161             --  We know we have a wide character encoding here (the current
2162             --  character is either ESC, left bracket, or an upper half
2163             --  character depending on the encoding method).
2164
2165             else
2166                --  Scan out the wide character and insert the appropriate
2167                --  encoding into the name table entry for the identifier.
2168
2169                declare
2170                   Code : Char_Code;
2171                   Err  : Boolean;
2172                   Chr  : Character;
2173                   Cat  : Category;
2174
2175                begin
2176                   Wptr := Scan_Ptr;
2177                   Scan_Wide (Source, Scan_Ptr, Code, Err);
2178
2179                   --  If error, signal error
2180
2181                   if Err then
2182                      Error_Illegal_Wide_Character;
2183
2184                   --  If the character scanned is a normal identifier
2185                   --  character, then we treat it that way.
2186
2187                   elsif In_Character_Range (Code)
2188                     and then Identifier_Char (Get_Character (Code))
2189                   then
2190                      Chr := Get_Character (Code);
2191                      Accumulate_Checksum (Chr);
2192                      Store_Encoded_Character
2193                        (Get_Char_Code (Fold_Lower (Chr)));
2194                      Underline_Found := False;
2195
2196                   --  Here if not a normal identifier character
2197
2198                   else
2199                      --  Make sure we are allowing wide characters in
2200                      --  identifiers. Note that we allow wide character
2201                      --  notation for an OK identifier character. This in
2202                      --  particular allows bracket or other notation to be
2203                      --  used for upper half letters.
2204
2205                      --  Wide characters are always allowed in Ada 2005
2206
2207                      if Identifier_Character_Set /= 'w'
2208                        and then Ada_Version < Ada_05
2209                      then
2210                         Error_Msg
2211                        ("wide character not allowed in identifier", Wptr);
2212                      end if;
2213
2214                      Cat := Get_Category (UTF_32 (Code));
2215
2216                      --  If OK letter, store it folding to upper case. Note
2217                      --  that we include the folded letter in the checksum.
2218
2219                      if Is_UTF_32_Letter (Cat) then
2220                         Code :=
2221                           Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
2222                         Accumulate_Checksum (Code);
2223                         Store_Encoded_Character (Code);
2224                         Underline_Found := False;
2225
2226                      --  If OK extended digit or mark, then store it
2227
2228                      elsif Is_UTF_32_Digit (Cat)
2229                        or else Is_UTF_32_Mark (Cat)
2230                      then
2231                         Accumulate_Checksum (Code);
2232                         Store_Encoded_Character (Code);
2233                         Underline_Found := False;
2234
2235                      --  Wide punctuation is also stored, but counts as an
2236                      --  underline character for error checking purposes.
2237
2238                      elsif Is_UTF_32_Punctuation (Cat) then
2239                         Accumulate_Checksum (Code);
2240
2241                         if Underline_Found then
2242                            declare
2243                               Cend : constant Source_Ptr := Scan_Ptr;
2244                            begin
2245                               Scan_Ptr := Wptr;
2246                               Error_No_Double_Underline;
2247                               Scan_Ptr := Cend;
2248                            end;
2249
2250                         else
2251                            Store_Encoded_Character (Code);
2252                            Underline_Found := True;
2253                         end if;
2254
2255                      --  Wide character in Unicode cateogory "Other, Format"
2256                      --  is accepted in an identifier, but is ignored and not
2257                      --  stored. It seems reasonable to exclude it from the
2258                      --  checksum.
2259
2260                      elsif Is_UTF_32_Other (Cat) then
2261                         null;
2262
2263                      --  Wide character in category Separator,Space terminates
2264
2265                      elsif Is_UTF_32_Space (Cat) then
2266                         goto Scan_Identifier_Complete;
2267
2268                      --  Any other wide character is not acceptable
2269
2270                      else
2271                         Error_Msg
2272                           ("invalid wide character in identifier", Wptr);
2273                      end if;
2274                   end if;
2275
2276                   goto Scan_Identifier;
2277                end;
2278             end if;
2279          end if;
2280
2281       --  Scan of identifier is complete. The identifier is stored in
2282       --  Name_Buffer, and Scan_Ptr points past the last character.
2283
2284       <<Scan_Identifier_Complete>>
2285          Token_Name := Name_Find;
2286
2287          --  Check for identifier ending with underline or punctuation char
2288
2289          if Underline_Found then
2290             Underline_Found := False;
2291
2292             if Source (Scan_Ptr - 1) = '_' then
2293                Error_Msg
2294                  ("identifier cannot end with underline", Scan_Ptr - 1);
2295             else
2296                Error_Msg
2297                  ("identifier cannot end with punctuation character", Wptr);
2298             end if;
2299          end if;
2300
2301          --  Here is where we check if it was a keyword
2302
2303          if Get_Name_Table_Byte (Token_Name) /= 0
2304            and then (Ada_Version >= Ada_95
2305                        or else Token_Name not in Ada_95_Reserved_Words)
2306            and then (Ada_Version >= Ada_05
2307                        or else Token_Name not in Ada_2005_Reserved_Words)
2308          then
2309             Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2310
2311             --  Deal with possible style check for non-lower case keyword, but
2312             --  we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2313             --  this purpose if they appear as attribute designators. Actually
2314             --  we only check the first character for speed.
2315
2316             --  Ada 2005 (AI-284): Do not apply the style check in case of
2317             --  "pragma Interface"
2318
2319             if Style_Check
2320               and then Source (Token_Ptr) <= 'Z'
2321               and then (Prev_Token /= Tok_Apostrophe
2322                           or else
2323                             (Token /= Tok_Access
2324                                and then Token /= Tok_Delta
2325                                and then Token /= Tok_Digits
2326                                and then Token /= Tok_Range))
2327               and then (Token /= Tok_Interface
2328                           or else
2329                             (Token = Tok_Interface
2330                                and then Prev_Token /= Tok_Pragma))
2331             then
2332                Style.Non_Lower_Case_Keyword;
2333             end if;
2334
2335             --  We must reset Token_Name since this is not an identifier and
2336             --  if we leave Token_Name set, the parser gets confused because
2337             --  it thinks it is dealing with an identifier instead of the
2338             --  corresponding keyword.
2339
2340             Token_Name := No_Name;
2341             Accumulate_Token_Checksum;
2342             return;
2343
2344          --  It is an identifier after all
2345
2346          else
2347             Token := Tok_Identifier;
2348             Accumulate_Token_Checksum;
2349             Post_Scan;
2350             return;
2351          end if;
2352    end Scan;
2353
2354    --------------------------
2355    -- Set_Comment_As_Token --
2356    --------------------------
2357
2358    procedure Set_Comment_As_Token (Value : Boolean) is
2359    begin
2360       Comment_Is_Token := Value;
2361    end Set_Comment_As_Token;
2362
2363    ------------------------------
2364    -- Set_End_Of_Line_As_Token --
2365    ------------------------------
2366
2367    procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2368    begin
2369       End_Of_Line_Is_Token := Value;
2370    end Set_End_Of_Line_As_Token;
2371
2372    ---------------------------
2373    -- Set_Special_Character --
2374    ---------------------------
2375
2376    procedure Set_Special_Character (C : Character) is
2377    begin
2378       case C is
2379          when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2380             Special_Characters (C) := True;
2381
2382          when others =>
2383             null;
2384       end case;
2385    end Set_Special_Character;
2386
2387    ----------------------
2388    -- Set_Start_Column --
2389    ----------------------
2390
2391    --  Note: it seems at first glance a little expensive to compute this value
2392    --  for every source line (since it is certainly not used for all source
2393    --  lines). On the other hand, it doesn't take much more work to skip past
2394    --  the initial white space on the line counting the columns than it would
2395    --  to scan past the white space using the standard scanning circuits.
2396
2397    function Set_Start_Column return Column_Number is
2398       Start_Column : Column_Number := 0;
2399
2400    begin
2401       --  Outer loop scans past horizontal tab characters
2402
2403       Tabs_Loop : loop
2404
2405          --  Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2406          --  past the blanks and adjusting Start_Column to account for them.
2407
2408          Blanks_Loop : loop
2409             if Source (Scan_Ptr) = ' ' then
2410                if Source (Scan_Ptr + 1) = ' ' then
2411                   if Source (Scan_Ptr + 2) = ' ' then
2412                      if Source (Scan_Ptr + 3) = ' ' then
2413                         if Source (Scan_Ptr + 4) = ' ' then
2414                            if Source (Scan_Ptr + 5) = ' ' then
2415                               if Source (Scan_Ptr + 6) = ' ' then
2416                                  Scan_Ptr := Scan_Ptr + 7;
2417                                  Start_Column := Start_Column + 7;
2418                               else
2419                                  Scan_Ptr := Scan_Ptr + 6;
2420                                  Start_Column := Start_Column + 6;
2421                                  exit Blanks_Loop;
2422                               end if;
2423                            else
2424                               Scan_Ptr := Scan_Ptr + 5;
2425                               Start_Column := Start_Column + 5;
2426                               exit Blanks_Loop;
2427                            end if;
2428                         else
2429                            Scan_Ptr := Scan_Ptr + 4;
2430                            Start_Column := Start_Column + 4;
2431                            exit Blanks_Loop;
2432                         end if;
2433                      else
2434                         Scan_Ptr := Scan_Ptr + 3;
2435                         Start_Column := Start_Column + 3;
2436                         exit Blanks_Loop;
2437                      end if;
2438                   else
2439                      Scan_Ptr := Scan_Ptr + 2;
2440                      Start_Column := Start_Column + 2;
2441                      exit Blanks_Loop;
2442                   end if;
2443                else
2444                   Scan_Ptr := Scan_Ptr + 1;
2445                   Start_Column := Start_Column + 1;
2446                   exit Blanks_Loop;
2447                end if;
2448             else
2449                exit Blanks_Loop;
2450             end if;
2451          end loop Blanks_Loop;
2452
2453          --  Outer loop keeps going only if a horizontal tab follows
2454
2455          if Source (Scan_Ptr) = HT then
2456             if Style_Check then Style.Check_HT; end if;
2457             Scan_Ptr := Scan_Ptr + 1;
2458             Start_Column := (Start_Column / 8) * 8 + 8;
2459          else
2460             exit Tabs_Loop;
2461          end if;
2462
2463       end loop Tabs_Loop;
2464
2465       return Start_Column;
2466    end Set_Start_Column;
2467
2468 end Scng;