gcc/ada/scng.adb

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                 S C N G                                  --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2005 Free Software Foundation, Inc.          --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  19 -- to  the  Free Software Foundation,  51  Franklin  Street,  Fifth  Floor, --
  20 -- Boston, MA 02110-1301, USA.                                              --
  21 --                                                                          --
  22 -- GNAT was originally developed  by the GNAT team at  New York University. --
  23 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  24 --                                                                          --
  25 ------------------------------------------------------------------------------
  26
  27 with Csets;    use Csets;
  28 with Err_Vars; use Err_Vars;
  29 with Hostparm; use Hostparm;
  30 with Namet;    use Namet;
  31 with Opt;      use Opt;
  32 with Scans;    use Scans;
  33 with Sinput;   use Sinput;
  34 with Snames;   use Snames;
  35 with Stringt;  use Stringt;
  36 with Stylesw;  use Stylesw;
  37 with Uintp;    use Uintp;
  38 with Urealp;   use Urealp;
  39 with Widechar; use Widechar;
  40
  41 with System.CRC32;
  42 with System.WCh_Con; use System.WCh_Con;
  43
  44 with GNAT.UTF_32; use GNAT.UTF_32;
  45
  46 package body Scng is
  47
  48    use ASCII;
  49    --  Make control characters visible
  50
  51    Special_Characters : array (Character) of Boolean := (others => False);
  52    --  For characters that are Special token, the value is True
  53
  54    Comment_Is_Token : Boolean := False;
  55    --  True if comments are tokens
  56
  57    End_Of_Line_Is_Token : Boolean := False;
  58    --  True if End_Of_Line is a token
  59
  60    -----------------------
  61    -- Local Subprograms --
  62    -----------------------
  63
  64    procedure Accumulate_Token_Checksum;
  65    pragma Inline (Accumulate_Token_Checksum);
  66
  67    procedure Accumulate_Checksum (C : Character);
  68    pragma Inline (Accumulate_Checksum);
  69    --  This routine accumulates the checksum given character C. During the
  70    --  scanning of a source file, this routine is called with every character
  71    --  in the source, excluding blanks, and all control characters (except
  72    --  that ESC is included in the checksum). Upper case letters not in string
  73    --  literals are folded by the caller. See Sinput spec for the documentation
  74    --  of the checksum algorithm. Note: checksum values are only used if we
  75    --  generate code, so it is not necessary to worry about making the right
  76    --  sequence of calls in any error situation.
  77
  78    procedure Accumulate_Checksum (C : Char_Code);
  79    pragma Inline (Accumulate_Checksum);
  80    --  This version is identical, except that the argument, C, is a character
  81    --  code value instead of a character. This is used when wide characters
  82    --  are scanned. We use the character code rather than the ASCII characters
  83    --  so that the checksum is independent of wide character encoding method.
  84
  85    procedure Initialize_Checksum;
  86    pragma Inline (Initialize_Checksum);
  87    --  Initialize checksum value
  88
  89    -------------------------
  90    -- Accumulate_Checksum --
  91    -------------------------
  92
  93    procedure Accumulate_Checksum (C : Character) is
  94    begin
  95       System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
  96    end Accumulate_Checksum;
  97
  98    procedure Accumulate_Checksum (C : Char_Code) is
  99    begin
 100       if C > 16#FFFF# then
 101          Accumulate_Checksum (Character'Val (C / 2 ** 24));
 102          Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
 103          Accumulate_Checksum (Character'Val ((C / 256) mod 256));
 104       else
 105          Accumulate_Checksum (Character'Val (C / 256));
 106       end if;
 107
 108       Accumulate_Checksum (Character'Val (C mod 256));
 109    end Accumulate_Checksum;
 110
 111    -------------------------------
 112    -- Accumulate_Token_Checksum --
 113    -------------------------------
 114
 115    procedure Accumulate_Token_Checksum is
 116    begin
 117       System.CRC32.Update
 118         (System.CRC32.CRC32 (Checksum),
 119          Character'Val (Token_Type'Pos (Token)));
 120    end Accumulate_Token_Checksum;
 121
 122    ----------------------------
 123    -- Determine_Token_Casing --
 124    ----------------------------
 125
 126    function Determine_Token_Casing return Casing_Type is
 127    begin
 128       return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
 129    end Determine_Token_Casing;
 130
 131    -------------------------
 132    -- Initialize_Checksum --
 133    -------------------------
 134
 135    procedure Initialize_Checksum is
 136    begin
 137       System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
 138    end Initialize_Checksum;
 139
 140    ------------------------
 141    -- Initialize_Scanner --
 142    ------------------------
 143
 144    procedure Initialize_Scanner
 145      (Unit  : Unit_Number_Type;
 146       Index : Source_File_Index)
 147    is
 148       procedure Set_Reserved (N : Name_Id; T : Token_Type);
 149       pragma Inline (Set_Reserved);
 150       --  Set given name as a reserved keyword (T is the corresponding token)
 151
 152       -------------
 153       -- Set_NTB --
 154       -------------
 155
 156       procedure Set_Reserved (N : Name_Id; T : Token_Type) is
 157       begin
 158          --  Set up Token_Type values in Names Table entries for reserved
 159          --  keywords We use the Pos value of the Token_Type value. Note we
 160          --  rely on the fact that Token_Type'Val (0) is not a reserved word!
 161
 162          Set_Name_Table_Byte (N, Token_Type'Pos (T));
 163       end Set_Reserved;
 164
 165    --  Start of processing for Initialize_Scanner
 166
 167    begin
 168       --  Establish reserved words
 169
 170       Set_Reserved (Name_Abort,     Tok_Abort);
 171       Set_Reserved (Name_Abs,       Tok_Abs);
 172       Set_Reserved (Name_Abstract,  Tok_Abstract);
 173       Set_Reserved (Name_Accept,    Tok_Accept);
 174       Set_Reserved (Name_Access,    Tok_Access);
 175       Set_Reserved (Name_And,       Tok_And);
 176       Set_Reserved (Name_Aliased,   Tok_Aliased);
 177       Set_Reserved (Name_All,       Tok_All);
 178       Set_Reserved (Name_Array,     Tok_Array);
 179       Set_Reserved (Name_At,        Tok_At);
 180       Set_Reserved (Name_Begin,     Tok_Begin);
 181       Set_Reserved (Name_Body,      Tok_Body);
 182       Set_Reserved (Name_Case,      Tok_Case);
 183       Set_Reserved (Name_Constant,  Tok_Constant);
 184       Set_Reserved (Name_Declare,   Tok_Declare);
 185       Set_Reserved (Name_Delay,     Tok_Delay);
 186       Set_Reserved (Name_Delta,     Tok_Delta);
 187       Set_Reserved (Name_Digits,    Tok_Digits);
 188       Set_Reserved (Name_Do,        Tok_Do);
 189       Set_Reserved (Name_Else,      Tok_Else);
 190       Set_Reserved (Name_Elsif,     Tok_Elsif);
 191       Set_Reserved (Name_End,       Tok_End);
 192       Set_Reserved (Name_Entry,     Tok_Entry);
 193       Set_Reserved (Name_Exception, Tok_Exception);
 194       Set_Reserved (Name_Exit,      Tok_Exit);
 195       Set_Reserved (Name_For,       Tok_For);
 196       Set_Reserved (Name_Function,  Tok_Function);
 197       Set_Reserved (Name_Generic,   Tok_Generic);
 198       Set_Reserved (Name_Goto,      Tok_Goto);
 199       Set_Reserved (Name_If,        Tok_If);
 200       Set_Reserved (Name_In,        Tok_In);
 201       Set_Reserved (Name_Is,        Tok_Is);
 202       Set_Reserved (Name_Limited,   Tok_Limited);
 203       Set_Reserved (Name_Loop,      Tok_Loop);
 204       Set_Reserved (Name_Mod,       Tok_Mod);
 205       Set_Reserved (Name_New,       Tok_New);
 206       Set_Reserved (Name_Not,       Tok_Not);
 207       Set_Reserved (Name_Null,      Tok_Null);
 208       Set_Reserved (Name_Of,        Tok_Of);
 209       Set_Reserved (Name_Or,        Tok_Or);
 210       Set_Reserved (Name_Others,    Tok_Others);
 211       Set_Reserved (Name_Out,       Tok_Out);
 212       Set_Reserved (Name_Package,   Tok_Package);
 213       Set_Reserved (Name_Pragma,    Tok_Pragma);
 214       Set_Reserved (Name_Private,   Tok_Private);
 215       Set_Reserved (Name_Procedure, Tok_Procedure);
 216       Set_Reserved (Name_Protected, Tok_Protected);
 217       Set_Reserved (Name_Raise,     Tok_Raise);
 218       Set_Reserved (Name_Range,     Tok_Range);
 219       Set_Reserved (Name_Record,    Tok_Record);
 220       Set_Reserved (Name_Rem,       Tok_Rem);
 221       Set_Reserved (Name_Renames,   Tok_Renames);
 222       Set_Reserved (Name_Requeue,   Tok_Requeue);
 223       Set_Reserved (Name_Return,    Tok_Return);
 224       Set_Reserved (Name_Reverse,   Tok_Reverse);
 225       Set_Reserved (Name_Select,    Tok_Select);
 226       Set_Reserved (Name_Separate,  Tok_Separate);
 227       Set_Reserved (Name_Subtype,   Tok_Subtype);
 228       Set_Reserved (Name_Tagged,    Tok_Tagged);
 229       Set_Reserved (Name_Task,      Tok_Task);
 230       Set_Reserved (Name_Terminate, Tok_Terminate);
 231       Set_Reserved (Name_Then,      Tok_Then);
 232       Set_Reserved (Name_Type,      Tok_Type);
 233       Set_Reserved (Name_Until,     Tok_Until);
 234       Set_Reserved (Name_Use,       Tok_Use);
 235       Set_Reserved (Name_When,      Tok_When);
 236       Set_Reserved (Name_While,     Tok_While);
 237       Set_Reserved (Name_With,      Tok_With);
 238       Set_Reserved (Name_Xor,       Tok_Xor);
 239
 240       --  Ada 2005 reserved words
 241
 242       Set_Reserved (Name_Interface,     Tok_Interface);
 243       Set_Reserved (Name_Overriding,    Tok_Overriding);
 244       Set_Reserved (Name_Synchronized,  Tok_Synchronized);
 245
 246       --  Initialize scan control variables
 247
 248       Current_Source_File       := Index;
 249       Source                    := Source_Text (Current_Source_File);
 250       Current_Source_Unit       := Unit;
 251       Scan_Ptr                  := Source_First (Current_Source_File);
 252       Token                     := No_Token;
 253       Token_Ptr                 := Scan_Ptr;
 254       Current_Line_Start        := Scan_Ptr;
 255       Token_Node                := Empty;
 256       Token_Name                := No_Name;
 257       Start_Column              := Set_Start_Column;
 258       First_Non_Blank_Location  := Scan_Ptr;
 259
 260       Initialize_Checksum;
 261       Wide_Char_Byte_Count := 0;
 262
 263       --  Do not call Scan, otherwise the License stuff does not work in Scn
 264
 265    end Initialize_Scanner;
 266
 267    ------------------------------
 268    -- Reset_Special_Characters --
 269    ------------------------------
 270
 271    procedure Reset_Special_Characters is
 272    begin
 273       Special_Characters := (others => False);
 274    end Reset_Special_Characters;
 275
 276    ----------
 277    -- Scan --
 278    ----------
 279
 280    procedure Scan is
 281
 282       Start_Of_Comment : Source_Ptr;
 283       --  Record start of comment position
 284
 285       Underline_Found : Boolean;
 286       --  During scanning of an identifier, set to True if last character
 287       --  scanned was an underline or other punctuation character. This
 288       --  is used to flag the error of two underlines/punctuations in a
 289       --  row or ending an identifier with a underline/punctuation. Here
 290       --  punctuation means any UTF_32 character in the Unicode category
 291       --  Punctuation,Connector.
 292
 293       Wptr : Source_Ptr;
 294       --  Used to remember start of last wide character scanned
 295
 296       procedure Check_End_Of_Line;
 297       --  Called when end of line encountered. Checks that line is not too
 298       --  long, and that other style checks for the end of line are met.
 299
 300       function Double_Char_Token (C : Character) return Boolean;
 301       --  This function is used for double character tokens like := or <>. It
 302       --  checks if the character following Source (Scan_Ptr) is C, and if so
 303       --  bumps Scan_Ptr past the pair of characters and returns True. A space
 304       --  between the two characters is also recognized with an appropriate
 305       --  error message being issued. If C is not present, False is returned.
 306       --  Note that Double_Char_Token can only be used for tokens defined in
 307       --  the Ada syntax (it's use for error cases like && is not appropriate
 308       --  since we do not want a junk message for a case like &-space-&).
 309
 310       procedure Error_Illegal_Character;
 311       --  Give illegal character error, Scan_Ptr points to character. On
 312       --  return, Scan_Ptr is bumped past the illegal character.
 313
 314       procedure Error_Illegal_Wide_Character;
 315       --  Give illegal wide character message. On return, Scan_Ptr is bumped
 316       --  past the illegal character, which may still leave us pointing to
 317       --  junk, not much we can do if the escape sequence is messed up!
 318
 319       procedure Error_Long_Line;
 320       --  Signal error of excessively long line
 321
 322       procedure Error_No_Double_Underline;
 323       --  Signal error of two underline or punctuation characters in a row.
 324       --  Called with Scan_Ptr pointing to second underline/punctuation char.
 325
 326       procedure Nlit;
 327       --  This is the procedure for scanning out numeric literals. On entry,
 328       --  Scan_Ptr points to the digit that starts the numeric literal (the
 329       --  checksum for this character has not been accumulated yet). On return
 330       --  Scan_Ptr points past the last character of the numeric literal, Token
 331       --  and Token_Node are set appropriately, and the checksum is updated.
 332
 333       procedure Slit;
 334       --  This is the procedure for scanning out string literals. On entry,
 335       --  Scan_Ptr points to the opening string quote (the checksum for this
 336       --  character has not been accumulated yet). On return Scan_Ptr points
 337       --  past the closing quote of the string literal, Token and Token_Node
 338       --  are set appropriately, and the checksum is upated.
 339
 340       -----------------------
 341       -- Check_End_Of_Line --
 342       -----------------------
 343
 344       procedure Check_End_Of_Line is
 345          Len : constant Int :=
 346                  Int (Scan_Ptr) -
 347                  Int (Current_Line_Start) -
 348                  Wide_Char_Byte_Count;
 349
 350       begin
 351          if Style_Check then
 352             Style.Check_Line_Terminator (Len);
 353          end if;
 354
 355          --  Deal with checking maximum line length
 356
 357          if Style_Check and Style_Check_Max_Line_Length then
 358             Style.Check_Line_Max_Length (Len);
 359
 360          --  If style checking is inactive, check maximum line length against
 361          --  standard value.
 362
 363          elsif Len > Max_Line_Length then
 364             Error_Long_Line;
 365          end if;
 366
 367          --  Reset wide character byte count for next line
 368
 369          Wide_Char_Byte_Count := 0;
 370       end Check_End_Of_Line;
 371
 372       -----------------------
 373       -- Double_Char_Token --
 374       -----------------------
 375
 376       function Double_Char_Token (C : Character) return Boolean is
 377       begin
 378          if Source (Scan_Ptr + 1) = C then
 379             Accumulate_Checksum (C);
 380             Scan_Ptr := Scan_Ptr + 2;
 381             return True;
 382
 383          elsif Source (Scan_Ptr + 1) = ' '
 384            and then Source (Scan_Ptr + 2) = C
 385          then
 386             Scan_Ptr := Scan_Ptr + 1;
 387             Error_Msg_S ("no space allowed here");
 388             Scan_Ptr := Scan_Ptr + 2;
 389             return True;
 390
 391          else
 392             return False;
 393          end if;
 394       end Double_Char_Token;
 395
 396       -----------------------------
 397       -- Error_Illegal_Character --
 398       -----------------------------
 399
 400       procedure Error_Illegal_Character is
 401       begin
 402          Error_Msg_S ("illegal character");
 403          Scan_Ptr := Scan_Ptr + 1;
 404       end Error_Illegal_Character;
 405
 406       ----------------------------------
 407       -- Error_Illegal_Wide_Character --
 408       ----------------------------------
 409
 410       procedure Error_Illegal_Wide_Character is
 411       begin
 412          Error_Msg ("illegal wide character", Wptr);
 413       end Error_Illegal_Wide_Character;
 414
 415       ---------------------
 416       -- Error_Long_Line --
 417       ---------------------
 418
 419       procedure Error_Long_Line is
 420       begin
 421          Error_Msg
 422            ("this line is too long",
 423             Current_Line_Start + Source_Ptr (Max_Line_Length));
 424       end Error_Long_Line;
 425
 426       -------------------------------
 427       -- Error_No_Double_Underline --
 428       -------------------------------
 429
 430       procedure Error_No_Double_Underline is
 431       begin
 432          Underline_Found := False;
 433
 434          --  There are four cases, and we special case the messages
 435
 436          if Source (Scan_Ptr) = '_' then
 437             if Source (Scan_Ptr - 1) = '_' then
 438                Error_Msg_S
 439                  ("two consecutive underlines not permitted");
 440             else
 441                Error_Msg_S
 442                  ("underline cannot follow punctuation character");
 443             end if;
 444
 445          else
 446             if Source (Scan_Ptr - 1) = '_' then
 447                Error_Msg_S
 448                  ("punctuation character cannot follow underline");
 449             else
 450                Error_Msg_S
 451                  ("two consecutive punctuation characters not permitted");
 452             end if;
 453          end if;
 454       end Error_No_Double_Underline;
 455
 456       ----------
 457       -- Nlit --
 458       ----------
 459
 460       procedure Nlit is
 461
 462          C : Character;
 463          --  Current source program character
 464
 465          Base_Char : Character;
 466          --  Either # or : (character at start of based number)
 467
 468          Base : Int;
 469          --  Value of base
 470
 471          UI_Base : Uint;
 472          --  Value of base in Uint format
 473
 474          UI_Int_Value : Uint;
 475          --  Value of integer scanned by Scan_Integer in Uint format
 476
 477          UI_Num_Value : Uint;
 478          --  Value of integer in numeric value being scanned
 479
 480          Scale : Int;
 481          --  Scale value for real literal
 482
 483          UI_Scale : Uint;
 484          --  Scale in Uint format
 485
 486          Exponent_Is_Negative : Boolean;
 487          --  Set true for negative exponent
 488
 489          Extended_Digit_Value : Int;
 490          --  Extended digit value
 491
 492          Point_Scanned : Boolean;
 493          --  Flag for decimal point scanned in numeric literal
 494
 495          -----------------------
 496          -- Local Subprograms --
 497          -----------------------
 498
 499          procedure Error_Digit_Expected;
 500          --  Signal error of bad digit, Scan_Ptr points to the location at
 501          --  which the digit was expected on input, and is unchanged on return.
 502
 503          procedure Scan_Integer;
 504          --  Procedure to scan integer literal. On entry, Scan_Ptr points to a
 505          --  digit, on exit Scan_Ptr points past the last character of the
 506          --  integer.
 507          --
 508          --  For each digit encountered, UI_Int_Value is multiplied by 10, and
 509          --  the value of the digit added to the result. In addition, the
 510          --  value in Scale is decremented by one for each actual digit
 511          --  scanned.
 512
 513          --------------------------
 514          -- Error_Digit_Expected --
 515          --------------------------
 516
 517          procedure Error_Digit_Expected is
 518          begin
 519             Error_Msg_S ("digit expected");
 520          end Error_Digit_Expected;
 521
 522          ------------------
 523          -- Scan_Integer --
 524          ------------------
 525
 526          procedure Scan_Integer is
 527             C : Character;
 528             --  Next character scanned
 529
 530          begin
 531             C := Source (Scan_Ptr);
 532
 533             --  Loop through digits (allowing underlines)
 534
 535             loop
 536                Accumulate_Checksum (C);
 537                UI_Int_Value :=
 538                  UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
 539                Scan_Ptr := Scan_Ptr + 1;
 540                Scale := Scale - 1;
 541                C := Source (Scan_Ptr);
 542
 543                --  Case of underline encountered
 544
 545                if C = '_' then
 546
 547                   --  We do not accumulate the '_' in the checksum, so that
 548                   --  1_234 is equivalent to 1234, and does not trigger
 549                   --  compilation for "minimal recompilation" (gnatmake -m).
 550
 551                   loop
 552                      Scan_Ptr := Scan_Ptr + 1;
 553                      C := Source (Scan_Ptr);
 554                      exit when C /= '_';
 555                      Error_No_Double_Underline;
 556                   end loop;
 557
 558                   if C not in '0' .. '9' then
 559                      Error_Digit_Expected;
 560                      exit;
 561                   end if;
 562
 563                else
 564                   exit when C not in '0' .. '9';
 565                end if;
 566             end loop;
 567          end Scan_Integer;
 568
 569       --  Start of Processing for Nlit
 570
 571       begin
 572          Base := 10;
 573          UI_Base := Uint_10;
 574          UI_Int_Value := Uint_0;
 575          Scale := 0;
 576          Scan_Integer;
 577          Scale := 0;
 578          Point_Scanned := False;
 579          UI_Num_Value := UI_Int_Value;
 580
 581          --  Various possibilities now for continuing the literal are period,
 582          --  E/e (for exponent), or :/# (for based literal).
 583
 584          Scale := 0;
 585          C := Source (Scan_Ptr);
 586
 587          if C = '.' then
 588
 589             --  Scan out point, but do not scan past .. which is a range
 590             --  sequence, and must not be eaten up scanning a numeric literal.
 591
 592             while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
 593                Accumulate_Checksum ('.');
 594
 595                if Point_Scanned then
 596                   Error_Msg_S ("duplicate point ignored");
 597                end if;
 598
 599                Point_Scanned := True;
 600                Scan_Ptr := Scan_Ptr + 1;
 601                C := Source (Scan_Ptr);
 602
 603                if C not in '0' .. '9' then
 604                   Error_Msg
 605                     ("real literal cannot end with point", Scan_Ptr - 1);
 606                else
 607                   Scan_Integer;
 608                   UI_Num_Value := UI_Int_Value;
 609                end if;
 610             end loop;
 611
 612          --  Based literal case. The base is the value we already scanned.
 613          --  In the case of colon, we insist that the following character
 614          --  is indeed an extended digit or a period. This catches a number
 615          --  of common errors, as well as catching the well known tricky
 616          --  bug otherwise arising from "x : integer range 1 .. 10:= 6;"
 617
 618          elsif C = '#'
 619            or else (C = ':' and then
 620                       (Source (Scan_Ptr + 1) = '.'
 621                          or else
 622                        Source (Scan_Ptr + 1) in '0' .. '9'
 623                          or else
 624                        Source (Scan_Ptr + 1) in 'A' .. 'Z'
 625                          or else
 626                        Source (Scan_Ptr + 1) in 'a' .. 'z'))
 627          then
 628             if C = ':' then
 629                Obsolescent_Check (Scan_Ptr);
 630
 631                if Warn_On_Obsolescent_Feature then
 632                   Error_Msg_S
 633                     ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
 634                   Error_Msg_S
 635                     ("\use ""'#"" instead?");
 636                end if;
 637             end if;
 638
 639             Accumulate_Checksum (C);
 640             Base_Char := C;
 641             UI_Base := UI_Int_Value;
 642
 643             if UI_Base < 2 or else UI_Base > 16 then
 644                Error_Msg_SC ("base not 2-16");
 645                UI_Base := Uint_16;
 646             end if;
 647
 648             Base := UI_To_Int (UI_Base);
 649             Scan_Ptr := Scan_Ptr + 1;
 650
 651             --  Scan out extended integer [. integer]
 652
 653             C := Source (Scan_Ptr);
 654             UI_Int_Value := Uint_0;
 655             Scale := 0;
 656
 657             loop
 658                if C in '0' .. '9' then
 659                   Accumulate_Checksum (C);
 660                   Extended_Digit_Value :=
 661                     Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
 662
 663                elsif C in 'A' .. 'F' then
 664                   Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
 665                   Extended_Digit_Value :=
 666                     Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
 667
 668                elsif C in 'a' .. 'f' then
 669                   Accumulate_Checksum (C);
 670                   Extended_Digit_Value :=
 671                     Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
 672
 673                else
 674                   Error_Msg_S ("extended digit expected");
 675                   exit;
 676                end if;
 677
 678                if Extended_Digit_Value >= Base then
 679                   Error_Msg_S ("digit '>= base");
 680                end if;
 681
 682                UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
 683                Scale := Scale - 1;
 684                Scan_Ptr := Scan_Ptr + 1;
 685                C := Source (Scan_Ptr);
 686
 687                if C = '_' then
 688                   loop
 689                      Accumulate_Checksum ('_');
 690                      Scan_Ptr := Scan_Ptr + 1;
 691                      C := Source (Scan_Ptr);
 692                      exit when C /= '_';
 693                      Error_No_Double_Underline;
 694                   end loop;
 695
 696                elsif C = '.' then
 697                   Accumulate_Checksum ('.');
 698
 699                   if Point_Scanned then
 700                      Error_Msg_S ("duplicate point ignored");
 701                   end if;
 702
 703                   Scan_Ptr := Scan_Ptr + 1;
 704                   C := Source (Scan_Ptr);
 705                   Point_Scanned := True;
 706                   Scale := 0;
 707
 708                elsif C = Base_Char then
 709                   Accumulate_Checksum (C);
 710                   Scan_Ptr := Scan_Ptr + 1;
 711                   exit;
 712
 713                elsif C = '#' or else C = ':' then
 714                   Error_Msg_S ("based number delimiters must match");
 715                   Scan_Ptr := Scan_Ptr + 1;
 716                   exit;
 717
 718                elsif not Identifier_Char (C) then
 719                   if Base_Char = '#' then
 720                      Error_Msg_S ("missing '#");
 721                   else
 722                      Error_Msg_S ("missing ':");
 723                   end if;
 724
 725                   exit;
 726                end if;
 727
 728             end loop;
 729
 730             UI_Num_Value := UI_Int_Value;
 731          end if;
 732
 733          --  Scan out exponent
 734
 735          if not Point_Scanned then
 736             Scale := 0;
 737             UI_Scale := Uint_0;
 738          else
 739             UI_Scale := UI_From_Int (Scale);
 740          end if;
 741
 742          if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
 743             Accumulate_Checksum ('e');
 744             Scan_Ptr := Scan_Ptr + 1;
 745             Exponent_Is_Negative := False;
 746
 747             if Source (Scan_Ptr) = '+' then
 748                Accumulate_Checksum ('+');
 749                Scan_Ptr := Scan_Ptr + 1;
 750
 751             elsif Source (Scan_Ptr) = '-' then
 752                Accumulate_Checksum ('-');
 753
 754                if not Point_Scanned then
 755                   Error_Msg_S
 756                     ("negative exponent not allowed for integer literal");
 757                else
 758                   Exponent_Is_Negative := True;
 759                end if;
 760
 761                Scan_Ptr := Scan_Ptr + 1;
 762             end if;
 763
 764             UI_Int_Value := Uint_0;
 765
 766             if Source (Scan_Ptr) in '0' .. '9' then
 767                Scan_Integer;
 768             else
 769                Error_Digit_Expected;
 770             end if;
 771
 772             if Exponent_Is_Negative then
 773                UI_Scale := UI_Scale - UI_Int_Value;
 774             else
 775                UI_Scale := UI_Scale + UI_Int_Value;
 776             end if;
 777          end if;
 778
 779          --  Case of real literal to be returned
 780
 781          if Point_Scanned then
 782             Token := Tok_Real_Literal;
 783             Real_Literal_Value :=
 784               UR_From_Components (
 785                                   Num   => UI_Num_Value,
 786                                   Den   => -UI_Scale,
 787                                   Rbase => Base);
 788
 789          --  Case of integer literal to be returned
 790
 791          else
 792             Token := Tok_Integer_Literal;
 793
 794             if UI_Scale = 0 then
 795                Int_Literal_Value := UI_Num_Value;
 796
 797             --  Avoid doing possibly expensive calculations in cases like
 798             --  parsing 163E800_000# when semantics will not be done anyway.
 799             --  This is especially useful when parsing garbled input.
 800
 801             elsif Operating_Mode /= Check_Syntax
 802               and then (Serious_Errors_Detected = 0 or else Try_Semantics)
 803             then
 804                Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
 805
 806             else
 807                Int_Literal_Value := No_Uint;
 808             end if;
 809          end if;
 810
 811          Accumulate_Token_Checksum;
 812
 813          return;
 814       end Nlit;
 815
 816       ----------
 817       -- Slit --
 818       ----------
 819
 820       procedure Slit is
 821
 822          Delimiter : Character;
 823          --  Delimiter (first character of string)
 824
 825          C : Character;
 826          --  Current source program character
 827
 828          Code : Char_Code;
 829          --  Current character code value
 830
 831          Err : Boolean;
 832          --  Error flag for Scan_Wide call
 833
 834          procedure Error_Bad_String_Char;
 835          --  Signal bad character in string/character literal. On entry
 836          --  Scan_Ptr points to the improper character encountered during the
 837          --  scan. Scan_Ptr is not modified, so it still points to the bad
 838          --  character on return.
 839
 840          procedure Error_Unterminated_String;
 841          --  Procedure called if a line terminator character is encountered
 842          --  during scanning a string, meaning that the string is not properly
 843          --  terminated.
 844
 845          procedure Set_String;
 846          --  Procedure used to distinguish between string and operator symbol.
 847          --  On entry the string has been scanned out, and its characters
 848          --  start at Token_Ptr and end one character before Scan_Ptr. On exit
 849          --  Token is set to Tok_String_Literal or Tok_Operator_Symbol as
 850          --  appropriate, and Token_Node is appropriately initialized. In
 851          --  addition, in the operator symbol case, Token_Name is
 852          --  appropriately set.
 853
 854          ---------------------------
 855          -- Error_Bad_String_Char --
 856          ---------------------------
 857
 858          procedure Error_Bad_String_Char is
 859             C : constant Character := Source (Scan_Ptr);
 860
 861          begin
 862             if C = HT then
 863                Error_Msg_S ("horizontal tab not allowed in string");
 864
 865             elsif C = VT or else C = FF then
 866                Error_Msg_S ("format effector not allowed in string");
 867
 868             elsif C in Upper_Half_Character then
 869                Error_Msg_S ("(Ada 83) upper half character not allowed");
 870
 871             else
 872                Error_Msg_S ("control character not allowed in string");
 873             end if;
 874          end Error_Bad_String_Char;
 875
 876          -------------------------------
 877          -- Error_Unterminated_String --
 878          -------------------------------
 879
 880          procedure Error_Unterminated_String is
 881          begin
 882             --  An interesting little refinement. Consider the following
 883             --  examples:
 884
 885             --     A := "this is an unterminated string;
 886             --     A := "this is an unterminated string &
 887             --     P(A, "this is a parameter that didn't get terminated);
 888
 889             --  We fiddle a little to do slightly better placement in these
 890             --  cases also if there is white space at the end of the line we
 891             --  place the flag at the start of this white space, not at the
 892             --  end. Note that we only have to test for blanks, since tabs
 893             --  aren't allowed in strings in the first place and would have
 894             --  caused an error message.
 895
 896             --  Two more cases that we treat specially are:
 897
 898             --     A := "this string uses the wrong terminator'
 899             --     A := "this string uses the wrong terminator' &
 900
 901             --  In these cases we give a different error message as well
 902
 903             --  We actually reposition the scan pointer to the point where we
 904             --  place the flag in these cases, since it seems a better bet on
 905             --  the original intention.
 906
 907             while Source (Scan_Ptr - 1) = ' '
 908               or else Source (Scan_Ptr - 1) = '&'
 909             loop
 910                Scan_Ptr := Scan_Ptr - 1;
 911                Unstore_String_Char;
 912             end loop;
 913
 914             --  Check for case of incorrect string terminator, but single quote
 915             --  is not considered incorrect if the opening terminator misused
 916             --  a single quote (error message already given).
 917
 918             if Delimiter /= '''
 919               and then Source (Scan_Ptr - 1) = '''
 920             then
 921                Unstore_String_Char;
 922                Error_Msg
 923                  ("incorrect string terminator character", Scan_Ptr - 1);
 924                return;
 925             end if;
 926
 927             if Source (Scan_Ptr - 1) = ';' then
 928                Scan_Ptr := Scan_Ptr - 1;
 929                Unstore_String_Char;
 930
 931                if Source (Scan_Ptr - 1) = ')' then
 932                   Scan_Ptr := Scan_Ptr - 1;
 933                   Unstore_String_Char;
 934                end if;
 935             end if;
 936
 937             Error_Msg_S ("missing string quote");
 938          end Error_Unterminated_String;
 939
 940          ----------------
 941          -- Set_String --
 942          ----------------
 943
 944          procedure Set_String is
 945             Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
 946             C1   : Character;
 947             C2   : Character;
 948             C3   : Character;
 949
 950          begin
 951             --  Token_Name is currently set to Error_Name. The following
 952             --  section of code resets Token_Name to the proper Name_Op_xx
 953             --  value if the string is a valid operator symbol, otherwise it is
 954             --  left set to Error_Name.
 955
 956             if Slen = 1 then
 957                C1 := Source (Token_Ptr + 1);
 958
 959                case C1 is
 960                   when '=' =>
 961                      Token_Name := Name_Op_Eq;
 962
 963                   when '>' =>
 964                      Token_Name := Name_Op_Gt;
 965
 966                   when '<' =>
 967                      Token_Name := Name_Op_Lt;
 968
 969                   when '+' =>
 970                      Token_Name := Name_Op_Add;
 971
 972                   when '-' =>
 973                      Token_Name := Name_Op_Subtract;
 974
 975                   when '&' =>
 976                      Token_Name := Name_Op_Concat;
 977
 978                   when '*' =>
 979                      Token_Name := Name_Op_Multiply;
 980
 981                   when '/' =>
 982                      Token_Name := Name_Op_Divide;
 983
 984                   when others =>
 985                      null;
 986                end case;
 987
 988             elsif Slen = 2 then
 989                C1 := Source (Token_Ptr + 1);
 990                C2 := Source (Token_Ptr + 2);
 991
 992                if C1 = '*' and then C2 = '*' then
 993                   Token_Name := Name_Op_Expon;
 994
 995                elsif C2 = '=' then
 996
 997                   if C1 = '/' then
 998                      Token_Name := Name_Op_Ne;
 999                   elsif C1 = '<' then
1000                      Token_Name := Name_Op_Le;
1001                   elsif C1 = '>' then
1002                      Token_Name := Name_Op_Ge;
1003                   end if;
1004
1005                elsif (C1 = 'O' or else C1 = 'o') and then    -- OR
1006                  (C2 = 'R' or else C2 = 'r')
1007                then
1008                   Token_Name := Name_Op_Or;
1009                end if;
1010
1011             elsif Slen = 3 then
1012                C1 := Source (Token_Ptr + 1);
1013                C2 := Source (Token_Ptr + 2);
1014                C3 := Source (Token_Ptr + 3);
1015
1016                if (C1 = 'A' or else C1 = 'a') and then       -- AND
1017                  (C2 = 'N' or else C2 = 'n') and then
1018                  (C3 = 'D' or else C3 = 'd')
1019                then
1020                   Token_Name := Name_Op_And;
1021
1022                elsif (C1 = 'A' or else C1 = 'a') and then    -- ABS
1023                  (C2 = 'B' or else C2 = 'b') and then
1024                  (C3 = 'S' or else C3 = 's')
1025                then
1026                   Token_Name := Name_Op_Abs;
1027
1028                elsif (C1 = 'M' or else C1 = 'm') and then    -- MOD
1029                  (C2 = 'O' or else C2 = 'o') and then
1030                  (C3 = 'D' or else C3 = 'd')
1031                then
1032                   Token_Name := Name_Op_Mod;
1033
1034                elsif (C1 = 'N' or else C1 = 'n') and then    -- NOT
1035                  (C2 = 'O' or else C2 = 'o') and then
1036                  (C3 = 'T' or else C3 = 't')
1037                then
1038                   Token_Name := Name_Op_Not;
1039
1040                elsif (C1 = 'R' or else C1 = 'r') and then    -- REM
1041                  (C2 = 'E' or else C2 = 'e') and then
1042                  (C3 = 'M' or else C3 = 'm')
1043                then
1044                   Token_Name := Name_Op_Rem;
1045
1046                elsif (C1 = 'X' or else C1 = 'x') and then    -- XOR
1047                  (C2 = 'O' or else C2 = 'o') and then
1048                  (C3 = 'R' or else C3 = 'r')
1049                then
1050                   Token_Name := Name_Op_Xor;
1051                end if;
1052
1053             end if;
1054
1055             --  If it is an operator symbol, then Token_Name is set. If it is
1056             --  some other string value, then Token_Name still contains
1057             --  Error_Name.
1058
1059             if Token_Name = Error_Name then
1060                Token := Tok_String_Literal;
1061
1062             else
1063                Token := Tok_Operator_Symbol;
1064             end if;
1065          end Set_String;
1066
1067       --  Start of processing for Slit
1068
1069       begin
1070          --  On entry, Scan_Ptr points to the opening character of the string
1071          --  which is either a percent, double quote, or apostrophe (single
1072          --  quote). The latter case is an error detected by the character
1073          --  literal circuit.
1074
1075          Delimiter := Source (Scan_Ptr);
1076          Accumulate_Checksum (Delimiter);
1077          Start_String;
1078          Scan_Ptr := Scan_Ptr + 1;
1079
1080          --  Loop to scan out characters of string literal
1081
1082          loop
1083             C := Source (Scan_Ptr);
1084
1085             if C = Delimiter then
1086                Accumulate_Checksum (C);
1087                Scan_Ptr := Scan_Ptr + 1;
1088                exit when Source (Scan_Ptr) /= Delimiter;
1089                Code := Get_Char_Code (C);
1090                Accumulate_Checksum (C);
1091                Scan_Ptr := Scan_Ptr + 1;
1092
1093             else
1094                if C = '"' and then Delimiter = '%' then
1095                   Error_Msg_S
1096                     ("quote not allowed in percent delimited string");
1097                   Code := Get_Char_Code (C);
1098                   Scan_Ptr := Scan_Ptr + 1;
1099
1100                elsif (C = ESC
1101                         and then Wide_Character_Encoding_Method
1102                                    in WC_ESC_Encoding_Method)
1103                  or else (C in Upper_Half_Character
1104                             and then Upper_Half_Encoding)
1105                  or else (C = '['
1106                             and then Source (Scan_Ptr + 1) = '"'
1107                             and then Identifier_Char (Source (Scan_Ptr + 2)))
1108                then
1109                   Wptr := Scan_Ptr;
1110                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1111
1112                   if Err then
1113                      Error_Illegal_Wide_Character;
1114                      Code := Get_Char_Code (' ');
1115                   end if;
1116
1117                   Accumulate_Checksum (Code);
1118
1119                   --  In Ada 95 mode we allow any wide characters in a string
1120                   --  but in Ada 2005, the set of characters allowed has been
1121                   --  restricted to graphic characters.
1122
1123                   if Ada_Version >= Ada_05
1124                     and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1125                   then
1126                      Error_Msg
1127                        ("(Ada 2005) non-graphic character not permitted " &
1128                         "in string literal", Wptr);
1129                   end if;
1130
1131                else
1132                   Accumulate_Checksum (C);
1133
1134                   if C not in Graphic_Character then
1135                      if C in Line_Terminator then
1136                         Error_Unterminated_String;
1137                         exit;
1138
1139                      elsif C in Upper_Half_Character then
1140                         if Ada_Version = Ada_83 then
1141                            Error_Bad_String_Char;
1142                         end if;
1143
1144                      else
1145                         Error_Bad_String_Char;
1146                      end if;
1147                   end if;
1148
1149                   Code := Get_Char_Code (C);
1150                   Scan_Ptr := Scan_Ptr + 1;
1151                end if;
1152             end if;
1153
1154             Store_String_Char (Code);
1155
1156             if not In_Character_Range (Code) then
1157                Wide_Character_Found := True;
1158             end if;
1159          end loop;
1160
1161          String_Literal_Id := End_String;
1162          Set_String;
1163          return;
1164       end Slit;
1165
1166    --  Start of processing for Scan
1167
1168    begin
1169       Prev_Token := Token;
1170       Prev_Token_Ptr := Token_Ptr;
1171       Token_Name := Error_Name;
1172
1173       --  The following loop runs more than once only if a format effector
1174       --  (tab, vertical tab, form  feed, line feed, carriage return) is
1175       --  encountered and skipped, or some error situation, such as an
1176       --  illegal character, is encountered.
1177
1178       <<Scan_Next_Character>>
1179
1180       loop
1181          --  Skip past blanks, loop is opened up for speed
1182
1183          while Source (Scan_Ptr) = ' ' loop
1184             if Source (Scan_Ptr + 1) /= ' ' then
1185                Scan_Ptr := Scan_Ptr + 1;
1186                exit;
1187             end if;
1188
1189             if Source (Scan_Ptr + 2) /= ' ' then
1190                Scan_Ptr := Scan_Ptr + 2;
1191                exit;
1192             end if;
1193
1194             if Source (Scan_Ptr + 3) /= ' ' then
1195                Scan_Ptr := Scan_Ptr + 3;
1196                exit;
1197             end if;
1198
1199             if Source (Scan_Ptr + 4) /= ' ' then
1200                Scan_Ptr := Scan_Ptr + 4;
1201                exit;
1202             end if;
1203
1204             if Source (Scan_Ptr + 5) /= ' ' then
1205                Scan_Ptr := Scan_Ptr + 5;
1206                exit;
1207             end if;
1208
1209             if Source (Scan_Ptr + 6) /= ' ' then
1210                Scan_Ptr := Scan_Ptr + 6;
1211                exit;
1212             end if;
1213
1214             if Source (Scan_Ptr + 7) /= ' ' then
1215                Scan_Ptr := Scan_Ptr + 7;
1216                exit;
1217             end if;
1218
1219             Scan_Ptr := Scan_Ptr + 8;
1220          end loop;
1221
1222          --  We are now at a non-blank character, which is the first character
1223          --  of the token we will scan, and hence the value of Token_Ptr.
1224
1225          Token_Ptr := Scan_Ptr;
1226
1227          --  Here begins the main case statement which transfers control on the
1228          --  basis of the non-blank character we have encountered.
1229
1230          case Source (Scan_Ptr) is
1231
1232          --  Line terminator characters
1233
1234          when CR | LF | FF | VT =>
1235             goto Scan_Line_Terminator;
1236
1237          --  Horizontal tab, just skip past it
1238
1239          when HT =>
1240             if Style_Check then Style.Check_HT; end if;
1241             Scan_Ptr := Scan_Ptr + 1;
1242
1243          --  End of file character, treated as an end of file only if it is
1244          --  the last character in the buffer, otherwise it is ignored.
1245
1246          when EOF =>
1247             if Scan_Ptr = Source_Last (Current_Source_File) then
1248                Check_End_Of_Line;
1249                if Style_Check then Style.Check_EOF; end if;
1250                Token := Tok_EOF;
1251                return;
1252             else
1253                Scan_Ptr := Scan_Ptr + 1;
1254             end if;
1255
1256          --  Ampersand
1257
1258          when '&' =>
1259             Accumulate_Checksum ('&');
1260
1261             if Source (Scan_Ptr + 1) = '&' then
1262                Error_Msg_S ("'&'& should be `AND THEN`");
1263                Scan_Ptr := Scan_Ptr + 2;
1264                Token := Tok_And;
1265                return;
1266
1267             else
1268                Scan_Ptr := Scan_Ptr + 1;
1269                Token := Tok_Ampersand;
1270                return;
1271             end if;
1272
1273          --  Asterisk (can be multiplication operator or double asterisk which
1274          --  is the exponentiation compound delimiter).
1275
1276          when '*' =>
1277             Accumulate_Checksum ('*');
1278
1279             if Source (Scan_Ptr + 1) = '*' then
1280                Accumulate_Checksum ('*');
1281                Scan_Ptr := Scan_Ptr + 2;
1282                Token := Tok_Double_Asterisk;
1283                return;
1284
1285             else
1286                Scan_Ptr := Scan_Ptr + 1;
1287                Token := Tok_Asterisk;
1288                return;
1289             end if;
1290
1291          --  Colon, which can either be an isolated colon, or part of an
1292          --  assignment compound delimiter.
1293
1294          when ':' =>
1295             Accumulate_Checksum (':');
1296
1297             if Double_Char_Token ('=') then
1298                Token := Tok_Colon_Equal;
1299                if Style_Check then Style.Check_Colon_Equal; end if;
1300                return;
1301
1302             elsif Source (Scan_Ptr + 1) = '-'
1303               and then Source (Scan_Ptr + 2) /= '-'
1304             then
1305                Token := Tok_Colon_Equal;
1306                Error_Msg (":- should be :=", Scan_Ptr);
1307                Scan_Ptr := Scan_Ptr + 2;
1308                return;
1309
1310             else
1311                Scan_Ptr := Scan_Ptr + 1;
1312                Token := Tok_Colon;
1313                if Style_Check then Style.Check_Colon; end if;
1314                return;
1315             end if;
1316
1317          --  Left parenthesis
1318
1319          when '(' =>
1320             Accumulate_Checksum ('(');
1321             Scan_Ptr := Scan_Ptr + 1;
1322             Token := Tok_Left_Paren;
1323             if Style_Check then Style.Check_Left_Paren; end if;
1324             return;
1325
1326          --  Left bracket
1327
1328          when '[' =>
1329             if Source (Scan_Ptr + 1) = '"' then
1330                goto Scan_Wide_Character;
1331
1332             else
1333                Error_Msg_S ("illegal character, replaced by ""(""");
1334                Scan_Ptr := Scan_Ptr + 1;
1335                Token := Tok_Left_Paren;
1336                return;
1337             end if;
1338
1339          --  Left brace
1340
1341          when '{' =>
1342             Error_Msg_S ("illegal character, replaced by ""(""");
1343             Scan_Ptr := Scan_Ptr + 1;
1344             Token := Tok_Left_Paren;
1345             return;
1346
1347          --  Comma
1348
1349          when ',' =>
1350             Accumulate_Checksum (',');
1351             Scan_Ptr := Scan_Ptr + 1;
1352             Token := Tok_Comma;
1353             if Style_Check then Style.Check_Comma; end if;
1354             return;
1355
1356          --  Dot, which is either an isolated period, or part of a double dot
1357          --  compound delimiter sequence. We also check for the case of a
1358          --  digit following the period, to give a better error message.
1359
1360          when '.' =>
1361             Accumulate_Checksum ('.');
1362
1363             if Double_Char_Token ('.') then
1364                Token := Tok_Dot_Dot;
1365                if Style_Check then Style.Check_Dot_Dot; end if;
1366                return;
1367
1368             elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1369                Error_Msg_S ("numeric literal cannot start with point");
1370                Scan_Ptr := Scan_Ptr + 1;
1371
1372             else
1373                Scan_Ptr := Scan_Ptr + 1;
1374                Token := Tok_Dot;
1375                return;
1376             end if;
1377
1378          --  Equal, which can either be an equality operator, or part of the
1379          --  arrow (=>) compound delimiter.
1380
1381          when '=' =>
1382             Accumulate_Checksum ('=');
1383
1384             if Double_Char_Token ('>') then
1385                Token := Tok_Arrow;
1386                if Style_Check then Style.Check_Arrow; end if;
1387                return;
1388
1389             elsif Source (Scan_Ptr + 1) = '=' then
1390                Error_Msg_S ("== should be =");
1391                Scan_Ptr := Scan_Ptr + 1;
1392             end if;
1393
1394             Scan_Ptr := Scan_Ptr + 1;
1395             Token := Tok_Equal;
1396             return;
1397
1398          --  Greater than, which can be a greater than operator, greater than
1399          --  or equal operator, or first character of a right label bracket.
1400
1401          when '>' =>
1402             Accumulate_Checksum ('>');
1403
1404             if Double_Char_Token ('=') then
1405                Token := Tok_Greater_Equal;
1406                return;
1407
1408             elsif Double_Char_Token ('>') then
1409                Token := Tok_Greater_Greater;
1410                return;
1411
1412             else
1413                Scan_Ptr := Scan_Ptr + 1;
1414                Token := Tok_Greater;
1415                return;
1416             end if;
1417
1418          --  Less than, which can be a less than operator, less than or equal
1419          --  operator, or the first character of a left label bracket, or the
1420          --  first character of a box (<>) compound delimiter.
1421
1422          when '<' =>
1423             Accumulate_Checksum ('<');
1424
1425             if Double_Char_Token ('=') then
1426                Token := Tok_Less_Equal;
1427                return;
1428
1429             elsif Double_Char_Token ('>') then
1430                Token := Tok_Box;
1431                if Style_Check then Style.Check_Box; end if;
1432                return;
1433
1434             elsif Double_Char_Token ('<') then
1435                Token := Tok_Less_Less;
1436                return;
1437
1438             else
1439                Scan_Ptr := Scan_Ptr + 1;
1440                Token := Tok_Less;
1441                return;
1442             end if;
1443
1444          --  Minus, which is either a subtraction operator, or the first
1445          --  character of double minus starting a comment
1446
1447          when '-' => Minus_Case : begin
1448             if Source (Scan_Ptr + 1) = '>' then
1449                Error_Msg_S ("invalid token");
1450                Scan_Ptr := Scan_Ptr + 2;
1451                Token := Tok_Arrow;
1452                return;
1453
1454             elsif Source (Scan_Ptr + 1) /= '-' then
1455                Accumulate_Checksum ('-');
1456                Scan_Ptr := Scan_Ptr + 1;
1457                Token := Tok_Minus;
1458                return;
1459
1460             --  Comment
1461
1462             else -- Source (Scan_Ptr + 1) = '-' then
1463                if Style_Check then Style.Check_Comment; end if;
1464                Scan_Ptr := Scan_Ptr + 2;
1465
1466                --  If we are in preprocessor mode with Replace_In_Comments set,
1467                --  then we return the "--" as a token on its own.
1468
1469                if Replace_In_Comments then
1470                   Token := Tok_Comment;
1471                   return;
1472                end if;
1473
1474                --  Otherwise scan out the comment
1475
1476                Start_Of_Comment := Scan_Ptr;
1477
1478                --  Loop to scan comment (this loop runs more than once only if
1479                --  a horizontal tab or other non-graphic character is scanned)
1480
1481                loop
1482                   --  Scan to non graphic character (opened up for speed)
1483
1484                   --  Note that we just eat left brackets, which means that
1485                   --  bracket notation cannot be used for end of line
1486                   --  characters in comments. This seems a reasonable choice,
1487                   --  since no one would ever use brackets notation in a real
1488                   --  program in this situation, and if we allow brackets
1489                   --  notation, we forbid some valid comments which contain a
1490                   --  brackets sequence that happens to match an end of line
1491                   --  character.
1492
1493                   loop
1494                      exit when Source (Scan_Ptr) not in Graphic_Character;
1495                      Scan_Ptr := Scan_Ptr + 1;
1496                      exit when Source (Scan_Ptr) not in Graphic_Character;
1497                      Scan_Ptr := Scan_Ptr + 1;
1498                      exit when Source (Scan_Ptr) not in Graphic_Character;
1499                      Scan_Ptr := Scan_Ptr + 1;
1500                      exit when Source (Scan_Ptr) not in Graphic_Character;
1501                      Scan_Ptr := Scan_Ptr + 1;
1502                      exit when Source (Scan_Ptr) not in Graphic_Character;
1503                      Scan_Ptr := Scan_Ptr + 1;
1504                   end loop;
1505
1506                   --  Keep going if horizontal tab
1507
1508                   if Source (Scan_Ptr) = HT then
1509                      if Style_Check then Style.Check_HT; end if;
1510                      Scan_Ptr := Scan_Ptr + 1;
1511
1512                   --  Terminate scan of comment if line terminator
1513
1514                   elsif Source (Scan_Ptr) in Line_Terminator then
1515                      exit;
1516
1517                   --  Terminate scan of comment if end of file encountered
1518                   --  (embedded EOF character or real last character in file)
1519
1520                   elsif Source (Scan_Ptr) = EOF then
1521                      exit;
1522
1523                   --  If we have a wide character, we have to scan it out,
1524                   --  because it might be a legitimate line terminator
1525
1526                   elsif (Source (Scan_Ptr) = ESC
1527                            and then Identifier_Char (ESC))
1528                     or else
1529                          (Source (Scan_Ptr) in Upper_Half_Character
1530                             and then Upper_Half_Encoding)
1531                   then
1532                      declare
1533                         Wptr : constant Source_Ptr := Scan_Ptr;
1534                         Code : Char_Code;
1535                         Err  : Boolean;
1536
1537                      begin
1538                         Scan_Wide (Source, Scan_Ptr, Code, Err);
1539
1540                         --  If not well formed wide character, then just skip
1541                         --  past it and ignore it.
1542
1543                         if Err then
1544                            Scan_Ptr := Wptr + 1;
1545
1546                         --  If UTF_32 terminator, terminate comment scan
1547
1548                         elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1549                            Scan_Ptr := Wptr;
1550                            exit;
1551                         end if;
1552                      end;
1553
1554                   --  Keep going if character in 80-FF range, or is ESC. These
1555                   --  characters are allowed in comments by RM-2.1(1), 2.7(2).
1556                   --  They are allowed even in Ada 83 mode according to the
1557                   --  approved AI. ESC was added to the AI in June 93.
1558
1559                   elsif Source (Scan_Ptr) in Upper_Half_Character
1560                      or else Source (Scan_Ptr) = ESC
1561                   then
1562                      Scan_Ptr := Scan_Ptr + 1;
1563
1564                   --  Otherwise we have an illegal comment character
1565
1566                   else
1567                      Error_Illegal_Character;
1568                   end if;
1569                end loop;
1570
1571                --  Note that, except when comments are tokens, we do NOT
1572                --  execute a return here, instead we fall through to reexecute
1573                --  the scan loop to look for a token.
1574
1575                if Comment_Is_Token then
1576                   Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1577                   Name_Buffer (1 .. Name_Len) :=
1578                     String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1579                   Comment_Id := Name_Find;
1580                   Token := Tok_Comment;
1581                   return;
1582                end if;
1583             end if;
1584          end Minus_Case;
1585
1586          --  Double quote starting a string literal
1587
1588          when '"' =>
1589             Slit;
1590             Post_Scan;
1591             return;
1592
1593          --  Percent starting a string literal
1594
1595          when '%' =>
1596             Obsolescent_Check (Token_Ptr);
1597
1598             if Warn_On_Obsolescent_Feature then
1599                Error_Msg_S
1600                  ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1601                Error_Msg_S
1602                  ("\use """""" instead?");
1603             end if;
1604
1605             Slit;
1606             Post_Scan;
1607             return;
1608
1609          --  Apostrophe. This can either be the start of a character literal,
1610          --  or an isolated apostrophe used in a qualified expression or an
1611          --  attribute. We treat it as a character literal if it does not
1612          --  follow a right parenthesis, identifier, the keyword ALL or
1613          --  a literal. This means that we correctly treat constructs like:
1614
1615          --    A := CHARACTER'('A');
1616
1617          --  Note that RM-2.2(7) does not require a separator between
1618          --  "CHARACTER" and "'" in the above.
1619
1620          when ''' => Char_Literal_Case : declare
1621             Code : Char_Code;
1622             Err  : Boolean;
1623
1624          begin
1625             Accumulate_Checksum (''');
1626             Scan_Ptr := Scan_Ptr + 1;
1627
1628             --  Here is where we make the test to distinguish the cases. Treat
1629             --  as apostrophe if previous token is an identifier, right paren
1630             --  or the reserved word "all" (latter case as in A.all'Address)
1631             --  (or the reserved word "project" in project files). Also treat
1632             --  it as apostrophe after a literal (this catches some legitimate
1633             --  cases, like A."abs"'Address, and also gives better error
1634             --  behavior for impossible cases like 123'xxx).
1635
1636             if Prev_Token = Tok_Identifier
1637                or else Prev_Token = Tok_Right_Paren
1638                or else Prev_Token = Tok_All
1639                or else Prev_Token = Tok_Project
1640                or else Prev_Token in Token_Class_Literal
1641             then
1642                Token := Tok_Apostrophe;
1643                if Style_Check then Style.Check_Apostrophe; end if;
1644                return;
1645
1646             --  Otherwise the apostrophe starts a character literal
1647
1648             else
1649                --  Case of wide character literal
1650
1651                if (Source (Scan_Ptr) = ESC
1652                      and then
1653                     Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1654                  or else
1655                    (Source (Scan_Ptr) in Upper_Half_Character
1656                      and then
1657                     Upper_Half_Encoding)
1658                  or else
1659                    (Source (Scan_Ptr) = '['
1660                      and then
1661                     Source (Scan_Ptr + 1) = '"')
1662                then
1663                   Wptr := Scan_Ptr;
1664                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1665                   Accumulate_Checksum (Code);
1666
1667                   if Err then
1668                      Error_Illegal_Wide_Character;
1669                         Code := Character'Pos (' ');
1670
1671                   --  In Ada 95 mode we allow any wide character in a character
1672                   --  literal, but in Ada 2005, the set of characters allowed
1673                   --  is restricted to graphic characters.
1674
1675                   elsif Ada_Version >= Ada_05
1676                     and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1677                   then
1678                      Error_Msg
1679                        ("(Ada 2005) non-graphic character not permitted " &
1680                         "in character literal", Wptr);
1681                   end if;
1682
1683                   if Source (Scan_Ptr) /= ''' then
1684                      Error_Msg_S ("missing apostrophe");
1685                   else
1686                      Scan_Ptr := Scan_Ptr + 1;
1687                   end if;
1688
1689                --  If we do not find a closing quote in the expected place then
1690                --  assume that we have a misguided attempt at a string literal.
1691
1692                --  However, if previous token is RANGE, then we return an
1693                --  apostrophe instead since this gives better error recovery
1694
1695                elsif Source (Scan_Ptr + 1) /= ''' then
1696                   if Prev_Token = Tok_Range then
1697                      Token := Tok_Apostrophe;
1698                      return;
1699
1700                   else
1701                      Scan_Ptr := Scan_Ptr - 1;
1702                      Error_Msg_S
1703                        ("strings are delimited by double quote character");
1704                      Slit;
1705                      Post_Scan;
1706                      return;
1707                   end if;
1708
1709                --  Otherwise we have a (non-wide) character literal
1710
1711                else
1712                   Accumulate_Checksum (Source (Scan_Ptr));
1713
1714                   if Source (Scan_Ptr) not in Graphic_Character then
1715                      if Source (Scan_Ptr) in Upper_Half_Character then
1716                         if Ada_Version = Ada_83 then
1717                            Error_Illegal_Character;
1718                         end if;
1719
1720                      else
1721                         Error_Illegal_Character;
1722                      end if;
1723                   end if;
1724
1725                   Code := Get_Char_Code (Source (Scan_Ptr));
1726                   Scan_Ptr := Scan_Ptr + 2;
1727                end if;
1728
1729                --  Fall through here with Scan_Ptr updated past the closing
1730                --  quote, and Code set to the Char_Code value for the literal
1731
1732                Accumulate_Checksum (''');
1733                Token := Tok_Char_Literal;
1734                Set_Character_Literal_Name (Code);
1735                Token_Name := Name_Find;
1736                Character_Code := Code;
1737                Post_Scan;
1738                return;
1739             end if;
1740          end Char_Literal_Case;
1741
1742          --  Right parenthesis
1743
1744          when ')' =>
1745             Accumulate_Checksum (')');
1746             Scan_Ptr := Scan_Ptr + 1;
1747             Token := Tok_Right_Paren;
1748             if Style_Check then Style.Check_Right_Paren; end if;
1749             return;
1750
1751          --  Right bracket or right brace, treated as right paren
1752
1753          when ']' | '}' =>
1754             Error_Msg_S ("illegal character, replaced by "")""");
1755             Scan_Ptr := Scan_Ptr + 1;
1756             Token := Tok_Right_Paren;
1757             return;
1758
1759          --  Slash (can be division operator or first character of not equal)
1760
1761          when '/' =>
1762             Accumulate_Checksum ('/');
1763
1764             if Double_Char_Token ('=') then
1765                Token := Tok_Not_Equal;
1766                return;
1767             else
1768                Scan_Ptr := Scan_Ptr + 1;
1769                Token := Tok_Slash;
1770                return;
1771             end if;
1772
1773          --  Semicolon
1774
1775          when ';' =>
1776             Accumulate_Checksum (';');
1777             Scan_Ptr := Scan_Ptr + 1;
1778             Token := Tok_Semicolon;
1779             if Style_Check then Style.Check_Semicolon; end if;
1780             return;
1781
1782          --  Vertical bar
1783
1784          when '|' => Vertical_Bar_Case : begin
1785             Accumulate_Checksum ('|');
1786
1787             --  Special check for || to give nice message
1788
1789             if Source (Scan_Ptr + 1) = '|' then
1790                Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1791                Scan_Ptr := Scan_Ptr + 2;
1792                Token := Tok_Or;
1793                return;
1794
1795             else
1796                Scan_Ptr := Scan_Ptr + 1;
1797                Token := Tok_Vertical_Bar;
1798                if Style_Check then Style.Check_Vertical_Bar; end if;
1799                return;
1800             end if;
1801          end Vertical_Bar_Case;
1802
1803          --  Exclamation, replacement character for vertical bar
1804
1805          when '!' => Exclamation_Case : begin
1806             Accumulate_Checksum ('!');
1807             Obsolescent_Check (Token_Ptr);
1808
1809             if Warn_On_Obsolescent_Feature then
1810                Error_Msg_S
1811                  ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1812                Error_Msg_S
1813                  ("\use ""'|"" instead?");
1814             end if;
1815
1816             if Source (Scan_Ptr + 1) = '=' then
1817                Error_Msg_S ("'!= should be /=");
1818                Scan_Ptr := Scan_Ptr + 2;
1819                Token := Tok_Not_Equal;
1820                return;
1821
1822             else
1823                Scan_Ptr := Scan_Ptr + 1;
1824                Token := Tok_Vertical_Bar;
1825                return;
1826             end if;
1827          end Exclamation_Case;
1828
1829          --  Plus
1830
1831          when '+' => Plus_Case : begin
1832             Accumulate_Checksum ('+');
1833             Scan_Ptr := Scan_Ptr + 1;
1834             Token := Tok_Plus;
1835             return;
1836          end Plus_Case;
1837
1838          --  Digits starting a numeric literal
1839
1840          when '0' .. '9' =>
1841             Nlit;
1842
1843             if Identifier_Char (Source (Scan_Ptr)) then
1844                Error_Msg_S
1845                  ("delimiter required between literal and identifier");
1846             end if;
1847             Post_Scan;
1848             return;
1849
1850          --  Lower case letters
1851
1852          when 'a' .. 'z' =>
1853             Name_Len := 1;
1854             Underline_Found := False;
1855             Name_Buffer (1) := Source (Scan_Ptr);
1856             Accumulate_Checksum (Name_Buffer (1));
1857             Scan_Ptr := Scan_Ptr + 1;
1858             goto Scan_Identifier;
1859
1860          --  Upper case letters
1861
1862          when 'A' .. 'Z' =>
1863             Name_Len := 1;
1864             Underline_Found := False;
1865             Name_Buffer (1) :=
1866               Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1867             Accumulate_Checksum (Name_Buffer (1));
1868             Scan_Ptr := Scan_Ptr + 1;
1869             goto Scan_Identifier;
1870
1871          --  Underline character
1872
1873          when '_' =>
1874             if Special_Characters ('_') then
1875                Token_Ptr := Scan_Ptr;
1876                Scan_Ptr := Scan_Ptr + 1;
1877                Token := Tok_Special;
1878                Special_Character := '_';
1879                return;
1880             end if;
1881
1882             Error_Msg_S ("identifier cannot start with underline");
1883             Name_Len := 1;
1884             Name_Buffer (1) := '_';
1885             Scan_Ptr := Scan_Ptr + 1;
1886             Underline_Found := False;
1887             goto Scan_Identifier;
1888
1889          --  Space (not possible, because we scanned past blanks)
1890
1891          when ' ' =>
1892             raise Program_Error;
1893
1894          --  Characters in top half of ASCII 8-bit chart
1895
1896          when Upper_Half_Character =>
1897
1898             --  Wide character case
1899
1900             if Upper_Half_Encoding then
1901                goto Scan_Wide_Character;
1902
1903             --  Otherwise we have OK Latin-1 character
1904
1905             else
1906                --  Upper half characters may possibly be identifier letters
1907                --  but can never be digits, so Identifier_Char can be used to
1908                --  test for a valid start of identifier character.
1909
1910                if Identifier_Char (Source (Scan_Ptr)) then
1911                   Name_Len := 0;
1912                   Underline_Found := False;
1913                   goto Scan_Identifier;
1914                else
1915                   Error_Illegal_Character;
1916                end if;
1917             end if;
1918
1919          when ESC =>
1920
1921             --  ESC character, possible start of identifier if wide characters
1922             --  using ESC encoding are allowed in identifiers, which we can
1923             --  tell by looking at the Identifier_Char flag for ESC, which is
1924             --  only true if these conditions are met. In Ada 2005 mode, may
1925             --  also be valid UTF_32 space or line terminator character.
1926
1927             if Identifier_Char (ESC) then
1928                Name_Len := 0;
1929                goto Scan_Wide_Character;
1930             else
1931                Error_Illegal_Character;
1932             end if;
1933
1934          --  Invalid control characters
1935
1936          when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | ASCII.SO |
1937               SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1938               EM  | FS  | GS  | RS  | US  | DEL
1939          =>
1940             Error_Illegal_Character;
1941
1942          --  Invalid graphic characters
1943
1944          when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1945
1946             --  If Set_Special_Character has been called for this character,
1947             --  set Scans.Special_Character and return a Special token.
1948
1949             if Special_Characters (Source (Scan_Ptr)) then
1950                Token_Ptr := Scan_Ptr;
1951                Token := Tok_Special;
1952                Special_Character := Source (Scan_Ptr);
1953                Scan_Ptr := Scan_Ptr + 1;
1954                return;
1955
1956             --  Otherwise, this is an illegal character
1957
1958             else
1959                Error_Illegal_Character;
1960             end if;
1961
1962          --  End switch on non-blank character
1963
1964          end case;
1965
1966       --  End loop past format effectors. The exit from this loop is by
1967       --  executing a return statement following completion of token scan
1968       --  (control never falls out of this loop to the code which follows)
1969
1970       end loop;
1971
1972       --  Wide_Character scanning routine. On entry we have encountered the
1973       --  initial character of a wide character sequence.
1974
1975       <<Scan_Wide_Character>>
1976
1977          declare
1978             Code : Char_Code;
1979             Cat  : Category;
1980             Err  : Boolean;
1981
1982          begin
1983             Wptr := Scan_Ptr;
1984             Scan_Wide (Source, Scan_Ptr, Code, Err);
1985
1986             --  If bad wide character, signal error and continue scan
1987
1988             if Err then
1989                Error_Illegal_Wide_Character;
1990                goto Scan_Next_Character;
1991             end if;
1992
1993             Cat := Get_Category (UTF_32 (Code));
1994
1995             --  If OK letter, reset scan ptr and go scan identifier
1996
1997             if Is_UTF_32_Letter (Cat) then
1998                Scan_Ptr := Wptr;
1999                Name_Len := 0;
2000                Underline_Found := False;
2001                goto Scan_Identifier;
2002
2003             --  If OK wide space, ignore and keep scanning (we do not include
2004             --  any ignored spaces in checksum)
2005
2006             elsif Is_UTF_32_Space (Cat) then
2007                goto Scan_Next_Character;
2008
2009             --  If OK wide line terminator, terminate current line
2010
2011             elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
2012                Scan_Ptr := Wptr;
2013                goto Scan_Line_Terminator;
2014
2015             --  Punctuation is an error (at start of identifier)
2016
2017             elsif Is_UTF_32_Punctuation (Cat) then
2018                Error_Msg
2019                  ("identifier cannot start with punctuation", Wptr);
2020                Scan_Ptr := Wptr;
2021                Name_Len := 0;
2022                Underline_Found := False;
2023                goto Scan_Identifier;
2024
2025             --  Mark character is an error (at start of identifer)
2026
2027             elsif Is_UTF_32_Mark (Cat) then
2028                Error_Msg
2029                  ("identifier cannot start with mark character", Wptr);
2030                Scan_Ptr := Wptr;
2031                Name_Len := 0;
2032                Underline_Found := False;
2033                goto Scan_Identifier;
2034
2035             --  Other format character is an error (at start of identifer)
2036
2037             elsif Is_UTF_32_Other (Cat) then
2038                Error_Msg
2039                  ("identifier cannot start with other format character", Wptr);
2040                Scan_Ptr := Wptr;
2041                Name_Len := 0;
2042                Underline_Found := False;
2043                goto Scan_Identifier;
2044
2045             --  Extended digit character is an error. Could be bad start of
2046             --  identifier or bad literal. Not worth doing too much to try to
2047             --  distinguish these cases, but we will do a little bit.
2048
2049             elsif Is_UTF_32_Digit (Cat) then
2050                Error_Msg
2051                  ("identifier cannot start with digit character", Wptr);
2052                Scan_Ptr := Wptr;
2053                Name_Len := 0;
2054                Underline_Found := False;
2055                goto Scan_Identifier;
2056
2057             --  All other wide characters are illegal here
2058
2059             else
2060                Error_Illegal_Wide_Character;
2061                goto Scan_Next_Character;
2062             end if;
2063          end;
2064
2065       --  Routine to scan line terminator. On entry Scan_Ptr points to a
2066       --  character which is one of FF,LR,CR,VT, or one of the wide characters
2067       --  that is treated as a line termiantor.
2068
2069       <<Scan_Line_Terminator>>
2070
2071          --  Check line too long
2072
2073          Check_End_Of_Line;
2074
2075          --  Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2076          --  a physical line.
2077
2078          if End_Of_Line_Is_Token then
2079             Token_Ptr := Scan_Ptr;
2080          end if;
2081
2082          declare
2083             Physical : Boolean;
2084
2085          begin
2086             Skip_Line_Terminators (Scan_Ptr, Physical);
2087
2088             --  If we are at start of physical line, update scan pointers to
2089             --  reflect the start of the new line.
2090
2091             if Physical then
2092                Current_Line_Start       := Scan_Ptr;
2093                Start_Column             := Set_Start_Column;
2094                First_Non_Blank_Location := Scan_Ptr;
2095
2096                --  If End_Of_Line is a token, we return it as it is a
2097                --  physical line.
2098
2099                if End_Of_Line_Is_Token then
2100                   Token := Tok_End_Of_Line;
2101                   return;
2102                end if;
2103             end if;
2104          end;
2105
2106          goto Scan_Next_Character;
2107
2108       --  Identifier scanning routine. On entry, some initial characters of
2109       --  the identifier may have already been stored in Name_Buffer. If so,
2110       --  Name_Len has the number of characters stored. otherwise Name_Len is
2111       --  set to zero on entry. Underline_Found is also set False on entry.
2112
2113       <<Scan_Identifier>>
2114
2115          --  This loop scans as fast as possible past lower half letters and
2116          --  digits, which we expect to be the most common characters.
2117
2118          loop
2119             if Source (Scan_Ptr) in 'a' .. 'z'
2120               or else Source (Scan_Ptr) in '0' .. '9'
2121             then
2122                Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2123                Accumulate_Checksum (Source (Scan_Ptr));
2124
2125             elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2126                Name_Buffer (Name_Len + 1) :=
2127                  Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2128                Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2129
2130             else
2131                exit;
2132             end if;
2133
2134             Underline_Found := False;
2135             Scan_Ptr := Scan_Ptr + 1;
2136             Name_Len := Name_Len + 1;
2137          end loop;
2138
2139          --  If we fall through, then we have encountered either an underline
2140          --  character, or an extended identifier character (i.e. one from the
2141          --  upper half), or a wide character, or an identifier terminator. The
2142          --  initial test speeds us up in the most common case where we have
2143          --  an identifier terminator. Note that ESC is an identifier character
2144          --  only if a wide character encoding method that uses ESC encoding
2145          --  is active, so if we find an ESC character we know that we have a
2146          --  wide character.
2147
2148          if Identifier_Char (Source (Scan_Ptr)) then
2149
2150             --  Case of underline
2151
2152             if Source (Scan_Ptr) = '_' then
2153                Accumulate_Checksum ('_');
2154
2155                if Underline_Found then
2156                   Error_No_Double_Underline;
2157                else
2158                   Underline_Found := True;
2159                   Name_Len := Name_Len + 1;
2160                   Name_Buffer (Name_Len) := '_';
2161                end if;
2162
2163                Scan_Ptr := Scan_Ptr + 1;
2164                goto Scan_Identifier;
2165
2166             --  Upper half character
2167
2168             elsif Source (Scan_Ptr) in Upper_Half_Character
2169               and then not Upper_Half_Encoding
2170             then
2171                Accumulate_Checksum (Source (Scan_Ptr));
2172                Store_Encoded_Character
2173                  (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2174                Scan_Ptr := Scan_Ptr + 1;
2175                Underline_Found := False;
2176                goto Scan_Identifier;
2177
2178             --  Left bracket not followed by a quote terminates an identifier.
2179             --  This is an error, but we don't want to give a junk error msg
2180             --  about wide characters in this case!
2181
2182             elsif Source (Scan_Ptr) = '['
2183               and then Source (Scan_Ptr + 1) /= '"'
2184             then
2185                null;
2186
2187             --  We know we have a wide character encoding here (the current
2188             --  character is either ESC, left bracket, or an upper half
2189             --  character depending on the encoding method).
2190
2191             else
2192                --  Scan out the wide character and insert the appropriate
2193                --  encoding into the name table entry for the identifier.
2194
2195                declare
2196                   Code : Char_Code;
2197                   Err  : Boolean;
2198                   Chr  : Character;
2199                   Cat  : Category;
2200
2201                begin
2202                   Wptr := Scan_Ptr;
2203                   Scan_Wide (Source, Scan_Ptr, Code, Err);
2204
2205                   --  If error, signal error
2206
2207                   if Err then
2208                      Error_Illegal_Wide_Character;
2209
2210                   --  If the character scanned is a normal identifier
2211                   --  character, then we treat it that way.
2212
2213                   elsif In_Character_Range (Code)
2214                     and then Identifier_Char (Get_Character (Code))
2215                   then
2216                      Chr := Get_Character (Code);
2217                      Accumulate_Checksum (Chr);
2218                      Store_Encoded_Character
2219                        (Get_Char_Code (Fold_Lower (Chr)));
2220                      Underline_Found := False;
2221
2222                   --  Here if not a normal identifier character
2223
2224                   else
2225                      --  Make sure we are allowing wide characters in
2226                      --  identifiers. Note that we allow wide character
2227                      --  notation for an OK identifier character. This in
2228                      --  particular allows bracket or other notation to be
2229                      --  used for upper half letters.
2230
2231                      --  Wide characters are always allowed in Ada 2005
2232
2233                      if Identifier_Character_Set /= 'w'
2234                        and then Ada_Version < Ada_05
2235                      then
2236                         Error_Msg
2237                        ("wide character not allowed in identifier", Wptr);
2238                      end if;
2239
2240                      Cat := Get_Category (UTF_32 (Code));
2241
2242                      --  If OK letter, store it folding to upper case. Note
2243                      --  that we include the folded letter in the checksum.
2244
2245                      if Is_UTF_32_Letter (Cat) then
2246                         Code :=
2247                           Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
2248                         Accumulate_Checksum (Code);
2249                         Store_Encoded_Character (Code);
2250                         Underline_Found := False;
2251
2252                      --  If OK extended digit or mark, then store it
2253
2254                      elsif Is_UTF_32_Digit (Cat)
2255                        or else Is_UTF_32_Mark (Cat)
2256                      then
2257                         Accumulate_Checksum (Code);
2258                         Store_Encoded_Character (Code);
2259                         Underline_Found := False;
2260
2261                      --  Wide punctuation is also stored, but counts as an
2262                      --  underline character for error checking purposes.
2263
2264                      elsif Is_UTF_32_Punctuation (Cat) then
2265                         Accumulate_Checksum (Code);
2266
2267                         if Underline_Found then
2268                            declare
2269                               Cend : constant Source_Ptr := Scan_Ptr;
2270                            begin
2271                               Scan_Ptr := Wptr;
2272                               Error_No_Double_Underline;
2273                               Scan_Ptr := Cend;
2274                            end;
2275
2276                         else
2277                            Store_Encoded_Character (Code);
2278                            Underline_Found := True;
2279                         end if;
2280
2281                      --  Wide character in Unicode cateogory "Other, Format"
2282                      --  is accepted in an identifier, but is ignored and not
2283                      --  stored. It seems reasonable to exclude it from the
2284                      --  checksum.
2285
2286                      --  Note that it is correct (see AI-395) to simply strip
2287                      --  other format characters, before testing for double
2288                      --  underlines, or for reserved words).
2289
2290                      elsif Is_UTF_32_Other (Cat) then
2291                         null;
2292
2293                      --  Wide character in category Separator,Space terminates
2294
2295                      elsif Is_UTF_32_Space (Cat) then
2296                         goto Scan_Identifier_Complete;
2297
2298                      --  Any other wide character is not acceptable
2299
2300                      else
2301                         Error_Msg
2302                           ("invalid wide character in identifier", Wptr);
2303                      end if;
2304                   end if;
2305
2306                   goto Scan_Identifier;
2307                end;
2308             end if;
2309          end if;
2310
2311       --  Scan of identifier is complete. The identifier is stored in
2312       --  Name_Buffer, and Scan_Ptr points past the last character.
2313
2314       <<Scan_Identifier_Complete>>
2315          Token_Name := Name_Find;
2316
2317          --  Check for identifier ending with underline or punctuation char
2318
2319          if Underline_Found then
2320             Underline_Found := False;
2321
2322             if Source (Scan_Ptr - 1) = '_' then
2323                Error_Msg
2324                  ("identifier cannot end with underline", Scan_Ptr - 1);
2325             else
2326                Error_Msg
2327                  ("identifier cannot end with punctuation character", Wptr);
2328             end if;
2329          end if;
2330
2331          --  Here is where we check if it was a keyword
2332
2333          if Get_Name_Table_Byte (Token_Name) /= 0
2334            and then (Ada_Version >= Ada_95
2335                        or else Token_Name not in Ada_95_Reserved_Words)
2336            and then (Ada_Version >= Ada_05
2337                        or else Token_Name not in Ada_2005_Reserved_Words)
2338          then
2339             Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2340
2341             --  Deal with possible style check for non-lower case keyword, but
2342             --  we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2343             --  this purpose if they appear as attribute designators. Actually
2344             --  we only check the first character for speed.
2345
2346             --  Ada 2005 (AI-284): Do not apply the style check in case of
2347             --  "pragma Interface"
2348
2349             --  Ada 2005 (AI-340): Do not apply the style check in case of
2350             --  MOD attribute.
2351
2352             if Style_Check
2353               and then Source (Token_Ptr) <= 'Z'
2354               and then (Prev_Token /= Tok_Apostrophe
2355                           or else
2356                             (Token /= Tok_Access and then
2357                              Token /= Tok_Delta  and then
2358                              Token /= Tok_Digits and then
2359                              Token /= Tok_Mod    and then
2360                              Token /= Tok_Range))
2361               and then (Token /= Tok_Interface
2362                           or else
2363                             (Token = Tok_Interface
2364                                and then Prev_Token /= Tok_Pragma))
2365             then
2366                Style.Non_Lower_Case_Keyword;
2367             end if;
2368
2369             --  We must reset Token_Name since this is not an identifier and
2370             --  if we leave Token_Name set, the parser gets confused because
2371             --  it thinks it is dealing with an identifier instead of the
2372             --  corresponding keyword.
2373
2374             Token_Name := No_Name;
2375             Accumulate_Token_Checksum;
2376             return;
2377
2378          --  It is an identifier after all
2379
2380          else
2381             Token := Tok_Identifier;
2382             Accumulate_Token_Checksum;
2383             Post_Scan;
2384             return;
2385          end if;
2386    end Scan;
2387
2388    --------------------------
2389    -- Set_Comment_As_Token --
2390    --------------------------
2391
2392    procedure Set_Comment_As_Token (Value : Boolean) is
2393    begin
2394       Comment_Is_Token := Value;
2395    end Set_Comment_As_Token;
2396
2397    ------------------------------
2398    -- Set_End_Of_Line_As_Token --
2399    ------------------------------
2400
2401    procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2402    begin
2403       End_Of_Line_Is_Token := Value;
2404    end Set_End_Of_Line_As_Token;
2405
2406    ---------------------------
2407    -- Set_Special_Character --
2408    ---------------------------
2409
2410    procedure Set_Special_Character (C : Character) is
2411    begin
2412       case C is
2413          when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2414             Special_Characters (C) := True;
2415
2416          when others =>
2417             null;
2418       end case;
2419    end Set_Special_Character;
2420
2421    ----------------------
2422    -- Set_Start_Column --
2423    ----------------------
2424
2425    --  Note: it seems at first glance a little expensive to compute this value
2426    --  for every source line (since it is certainly not used for all source
2427    --  lines). On the other hand, it doesn't take much more work to skip past
2428    --  the initial white space on the line counting the columns than it would
2429    --  to scan past the white space using the standard scanning circuits.
2430
2431    function Set_Start_Column return Column_Number is
2432       Start_Column : Column_Number := 0;
2433
2434    begin
2435       --  Outer loop scans past horizontal tab characters
2436
2437       Tabs_Loop : loop
2438
2439          --  Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2440          --  past the blanks and adjusting Start_Column to account for them.
2441
2442          Blanks_Loop : loop
2443             if Source (Scan_Ptr) = ' ' then
2444                if Source (Scan_Ptr + 1) = ' ' then
2445                   if Source (Scan_Ptr + 2) = ' ' then
2446                      if Source (Scan_Ptr + 3) = ' ' then
2447                         if Source (Scan_Ptr + 4) = ' ' then
2448                            if Source (Scan_Ptr + 5) = ' ' then
2449                               if Source (Scan_Ptr + 6) = ' ' then
2450                                  Scan_Ptr := Scan_Ptr + 7;
2451                                  Start_Column := Start_Column + 7;
2452                               else
2453                                  Scan_Ptr := Scan_Ptr + 6;
2454                                  Start_Column := Start_Column + 6;
2455                                  exit Blanks_Loop;
2456                               end if;
2457                            else
2458                               Scan_Ptr := Scan_Ptr + 5;
2459                               Start_Column := Start_Column + 5;
2460                               exit Blanks_Loop;
2461                            end if;
2462                         else
2463                            Scan_Ptr := Scan_Ptr + 4;
2464                            Start_Column := Start_Column + 4;
2465                            exit Blanks_Loop;
2466                         end if;
2467                      else
2468                         Scan_Ptr := Scan_Ptr + 3;
2469                         Start_Column := Start_Column + 3;
2470                         exit Blanks_Loop;
2471                      end if;
2472                   else
2473                      Scan_Ptr := Scan_Ptr + 2;
2474                      Start_Column := Start_Column + 2;
2475                      exit Blanks_Loop;
2476                   end if;
2477                else
2478                   Scan_Ptr := Scan_Ptr + 1;
2479                   Start_Column := Start_Column + 1;
2480                   exit Blanks_Loop;
2481                end if;
2482             else
2483                exit Blanks_Loop;
2484             end if;
2485          end loop Blanks_Loop;
2486
2487          --  Outer loop keeps going only if a horizontal tab follows
2488
2489          if Source (Scan_Ptr) = HT then
2490             if Style_Check then Style.Check_HT; end if;
2491             Scan_Ptr := Scan_Ptr + 1;
2492             Start_Column := (Start_Column / 8) * 8 + 8;
2493          else
2494             exit Tabs_Loop;
2495          end if;
2496
2497       end loop Tabs_Loop;
2498
2499       return Start_Column;
2500    end Set_Start_Column;
2501
2502 end Scng;