gcc/ada/scng.adb

   1 ------------------------------------------------------------------------------
   2 --                                                                          --
   3 --                         GNAT COMPILER COMPONENTS                         --
   4 --                                                                          --
   5 --                                 S C N G                                  --
   6 --                                                                          --
   7 --                                 B o d y                                  --
   8 --                                                                          --
   9 --          Copyright (C) 1992-2006, Free Software Foundation, Inc.         --
  10 --                                                                          --
  11 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
  12 -- terms of the  GNU General Public License as published  by the Free Soft- --
  13 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
  14 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
  15 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
  16 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
  17 -- for  more details.  You should have  received  a copy of the GNU General --
  18 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
  19 -- to  the  Free Software Foundation,  51  Franklin  Street,  Fifth  Floor, --
  20 -- Boston, MA 02110-1301, USA.                                              --
  21 --                                                                          --
  22 -- GNAT was originally developed  by the GNAT team at  New York University. --
  23 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
  24 --                                                                          --
  25 ------------------------------------------------------------------------------
  26
  27 with Csets;    use Csets;
  28 with Err_Vars; use Err_Vars;
  29 with Hostparm; use Hostparm;
  30 with Namet;    use Namet;
  31 with Opt;      use Opt;
  32 with Scans;    use Scans;
  33 with Sinput;   use Sinput;
  34 with Snames;   use Snames;
  35 with Stringt;  use Stringt;
  36 with Stylesw;  use Stylesw;
  37 with Uintp;    use Uintp;
  38 with Urealp;   use Urealp;
  39 with Widechar; use Widechar;
  40
  41 with System.CRC32;
  42 with System.WCh_Con; use System.WCh_Con;
  43
  44 with GNAT.UTF_32; use GNAT.UTF_32;
  45
  46 package body Scng is
  47
  48    use ASCII;
  49    --  Make control characters visible
  50
  51    Special_Characters : array (Character) of Boolean := (others => False);
  52    --  For characters that are Special token, the value is True
  53
  54    Comment_Is_Token : Boolean := False;
  55    --  True if comments are tokens
  56
  57    End_Of_Line_Is_Token : Boolean := False;
  58    --  True if End_Of_Line is a token
  59
  60    -----------------------
  61    -- Local Subprograms --
  62    -----------------------
  63
  64    procedure Accumulate_Token_Checksum;
  65    pragma Inline (Accumulate_Token_Checksum);
  66
  67    procedure Accumulate_Checksum (C : Character);
  68    pragma Inline (Accumulate_Checksum);
  69    --  This routine accumulates the checksum given character C. During the
  70    --  scanning of a source file, this routine is called with every character
  71    --  in the source, excluding blanks, and all control characters (except
  72    --  that ESC is included in the checksum). Upper case letters not in string
  73    --  literals are folded by the caller. See Sinput spec for the documentation
  74    --  of the checksum algorithm. Note: checksum values are only used if we
  75    --  generate code, so it is not necessary to worry about making the right
  76    --  sequence of calls in any error situation.
  77
  78    procedure Accumulate_Checksum (C : Char_Code);
  79    pragma Inline (Accumulate_Checksum);
  80    --  This version is identical, except that the argument, C, is a character
  81    --  code value instead of a character. This is used when wide characters
  82    --  are scanned. We use the character code rather than the ASCII characters
  83    --  so that the checksum is independent of wide character encoding method.
  84
  85    procedure Initialize_Checksum;
  86    pragma Inline (Initialize_Checksum);
  87    --  Initialize checksum value
  88
  89    -------------------------
  90    -- Accumulate_Checksum --
  91    -------------------------
  92
  93    procedure Accumulate_Checksum (C : Character) is
  94    begin
  95       System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
  96    end Accumulate_Checksum;
  97
  98    procedure Accumulate_Checksum (C : Char_Code) is
  99    begin
 100       if C > 16#FFFF# then
 101          Accumulate_Checksum (Character'Val (C / 2 ** 24));
 102          Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
 103          Accumulate_Checksum (Character'Val ((C / 256) mod 256));
 104       else
 105          Accumulate_Checksum (Character'Val (C / 256));
 106       end if;
 107
 108       Accumulate_Checksum (Character'Val (C mod 256));
 109    end Accumulate_Checksum;
 110
 111    -------------------------------
 112    -- Accumulate_Token_Checksum --
 113    -------------------------------
 114
 115    procedure Accumulate_Token_Checksum is
 116    begin
 117       System.CRC32.Update
 118         (System.CRC32.CRC32 (Checksum),
 119          Character'Val (Token_Type'Pos (Token)));
 120    end Accumulate_Token_Checksum;
 121
 122    ----------------------------
 123    -- Determine_Token_Casing --
 124    ----------------------------
 125
 126    function Determine_Token_Casing return Casing_Type is
 127    begin
 128       return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
 129    end Determine_Token_Casing;
 130
 131    -------------------------
 132    -- Initialize_Checksum --
 133    -------------------------
 134
 135    procedure Initialize_Checksum is
 136    begin
 137       System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
 138    end Initialize_Checksum;
 139
 140    ------------------------
 141    -- Initialize_Scanner --
 142    ------------------------
 143
 144    procedure Initialize_Scanner (Index : Source_File_Index) is
 145    begin
 146       --  Establish reserved words
 147
 148       Scans.Initialize_Ada_Keywords;
 149
 150       --  Initialize scan control variables
 151
 152       Current_Source_File       := Index;
 153       Source                    := Source_Text (Current_Source_File);
 154       Scan_Ptr                  := Source_First (Current_Source_File);
 155       Token                     := No_Token;
 156       Token_Ptr                 := Scan_Ptr;
 157       Current_Line_Start        := Scan_Ptr;
 158       Token_Node                := Empty;
 159       Token_Name                := No_Name;
 160       Start_Column              := Set_Start_Column;
 161       First_Non_Blank_Location  := Scan_Ptr;
 162
 163       Initialize_Checksum;
 164       Wide_Char_Byte_Count := 0;
 165
 166       --  Do not call Scan, otherwise the License stuff does not work in Scn
 167
 168    end Initialize_Scanner;
 169
 170    ------------------------------
 171    -- Reset_Special_Characters --
 172    ------------------------------
 173
 174    procedure Reset_Special_Characters is
 175    begin
 176       Special_Characters := (others => False);
 177    end Reset_Special_Characters;
 178
 179    ----------
 180    -- Scan --
 181    ----------
 182
 183    procedure Scan is
 184
 185       Start_Of_Comment : Source_Ptr;
 186       --  Record start of comment position
 187
 188       Underline_Found : Boolean;
 189       --  During scanning of an identifier, set to True if last character
 190       --  scanned was an underline or other punctuation character. This
 191       --  is used to flag the error of two underlines/punctuations in a
 192       --  row or ending an identifier with a underline/punctuation. Here
 193       --  punctuation means any UTF_32 character in the Unicode category
 194       --  Punctuation,Connector.
 195
 196       Wptr : Source_Ptr;
 197       --  Used to remember start of last wide character scanned
 198
 199       procedure Check_End_Of_Line;
 200       --  Called when end of line encountered. Checks that line is not too
 201       --  long, and that other style checks for the end of line are met.
 202
 203       function Double_Char_Token (C : Character) return Boolean;
 204       --  This function is used for double character tokens like := or <>. It
 205       --  checks if the character following Source (Scan_Ptr) is C, and if so
 206       --  bumps Scan_Ptr past the pair of characters and returns True. A space
 207       --  between the two characters is also recognized with an appropriate
 208       --  error message being issued. If C is not present, False is returned.
 209       --  Note that Double_Char_Token can only be used for tokens defined in
 210       --  the Ada syntax (it's use for error cases like && is not appropriate
 211       --  since we do not want a junk message for a case like &-space-&).
 212
 213       procedure Error_Illegal_Character;
 214       --  Give illegal character error, Scan_Ptr points to character. On
 215       --  return, Scan_Ptr is bumped past the illegal character.
 216
 217       procedure Error_Illegal_Wide_Character;
 218       --  Give illegal wide character message. On return, Scan_Ptr is bumped
 219       --  past the illegal character, which may still leave us pointing to
 220       --  junk, not much we can do if the escape sequence is messed up!
 221
 222       procedure Error_Long_Line;
 223       --  Signal error of excessively long line
 224
 225       procedure Error_No_Double_Underline;
 226       --  Signal error of two underline or punctuation characters in a row.
 227       --  Called with Scan_Ptr pointing to second underline/punctuation char.
 228
 229       procedure Nlit;
 230       --  This is the procedure for scanning out numeric literals. On entry,
 231       --  Scan_Ptr points to the digit that starts the numeric literal (the
 232       --  checksum for this character has not been accumulated yet). On return
 233       --  Scan_Ptr points past the last character of the numeric literal, Token
 234       --  and Token_Node are set appropriately, and the checksum is updated.
 235
 236       procedure Slit;
 237       --  This is the procedure for scanning out string literals. On entry,
 238       --  Scan_Ptr points to the opening string quote (the checksum for this
 239       --  character has not been accumulated yet). On return Scan_Ptr points
 240       --  past the closing quote of the string literal, Token and Token_Node
 241       --  are set appropriately, and the checksum is upated.
 242
 243       -----------------------
 244       -- Check_End_Of_Line --
 245       -----------------------
 246
 247       procedure Check_End_Of_Line is
 248          Len : constant Int :=
 249                  Int (Scan_Ptr) -
 250                  Int (Current_Line_Start) -
 251                  Wide_Char_Byte_Count;
 252
 253       begin
 254          if Style_Check then
 255             Style.Check_Line_Terminator (Len);
 256          end if;
 257
 258          --  Deal with checking maximum line length
 259
 260          if Style_Check and Style_Check_Max_Line_Length then
 261             Style.Check_Line_Max_Length (Len);
 262
 263          --  If style checking is inactive, check maximum line length against
 264          --  standard value.
 265
 266          elsif Len > Max_Line_Length then
 267             Error_Long_Line;
 268          end if;
 269
 270          --  Reset wide character byte count for next line
 271
 272          Wide_Char_Byte_Count := 0;
 273       end Check_End_Of_Line;
 274
 275       -----------------------
 276       -- Double_Char_Token --
 277       -----------------------
 278
 279       function Double_Char_Token (C : Character) return Boolean is
 280       begin
 281          if Source (Scan_Ptr + 1) = C then
 282             Accumulate_Checksum (C);
 283             Scan_Ptr := Scan_Ptr + 2;
 284             return True;
 285
 286          elsif Source (Scan_Ptr + 1) = ' '
 287            and then Source (Scan_Ptr + 2) = C
 288          then
 289             Scan_Ptr := Scan_Ptr + 1;
 290             Error_Msg_S ("no space allowed here");
 291             Scan_Ptr := Scan_Ptr + 2;
 292             return True;
 293
 294          else
 295             return False;
 296          end if;
 297       end Double_Char_Token;
 298
 299       -----------------------------
 300       -- Error_Illegal_Character --
 301       -----------------------------
 302
 303       procedure Error_Illegal_Character is
 304       begin
 305          Error_Msg_S ("illegal character");
 306          Scan_Ptr := Scan_Ptr + 1;
 307       end Error_Illegal_Character;
 308
 309       ----------------------------------
 310       -- Error_Illegal_Wide_Character --
 311       ----------------------------------
 312
 313       procedure Error_Illegal_Wide_Character is
 314       begin
 315          Error_Msg ("illegal wide character", Wptr);
 316       end Error_Illegal_Wide_Character;
 317
 318       ---------------------
 319       -- Error_Long_Line --
 320       ---------------------
 321
 322       procedure Error_Long_Line is
 323       begin
 324          Error_Msg
 325            ("this line is too long",
 326             Current_Line_Start + Source_Ptr (Max_Line_Length));
 327       end Error_Long_Line;
 328
 329       -------------------------------
 330       -- Error_No_Double_Underline --
 331       -------------------------------
 332
 333       procedure Error_No_Double_Underline is
 334       begin
 335          Underline_Found := False;
 336
 337          --  There are four cases, and we special case the messages
 338
 339          if Source (Scan_Ptr) = '_' then
 340             if Source (Scan_Ptr - 1) = '_' then
 341                Error_Msg_S
 342                  ("two consecutive underlines not permitted");
 343             else
 344                Error_Msg_S
 345                  ("underline cannot follow punctuation character");
 346             end if;
 347
 348          else
 349             if Source (Scan_Ptr - 1) = '_' then
 350                Error_Msg_S
 351                  ("punctuation character cannot follow underline");
 352             else
 353                Error_Msg_S
 354                  ("two consecutive punctuation characters not permitted");
 355             end if;
 356          end if;
 357       end Error_No_Double_Underline;
 358
 359       ----------
 360       -- Nlit --
 361       ----------
 362
 363       procedure Nlit is
 364
 365          C : Character;
 366          --  Current source program character
 367
 368          Base_Char : Character;
 369          --  Either # or : (character at start of based number)
 370
 371          Base : Int;
 372          --  Value of base
 373
 374          UI_Base : Uint;
 375          --  Value of base in Uint format
 376
 377          UI_Int_Value : Uint;
 378          --  Value of integer scanned by Scan_Integer in Uint format
 379
 380          UI_Num_Value : Uint;
 381          --  Value of integer in numeric value being scanned
 382
 383          Scale : Int;
 384          --  Scale value for real literal
 385
 386          UI_Scale : Uint;
 387          --  Scale in Uint format
 388
 389          Exponent_Is_Negative : Boolean;
 390          --  Set true for negative exponent
 391
 392          Extended_Digit_Value : Int;
 393          --  Extended digit value
 394
 395          Point_Scanned : Boolean;
 396          --  Flag for decimal point scanned in numeric literal
 397
 398          -----------------------
 399          -- Local Subprograms --
 400          -----------------------
 401
 402          procedure Error_Digit_Expected;
 403          --  Signal error of bad digit, Scan_Ptr points to the location at
 404          --  which the digit was expected on input, and is unchanged on return.
 405
 406          procedure Scan_Integer;
 407          --  Procedure to scan integer literal. On entry, Scan_Ptr points to a
 408          --  digit, on exit Scan_Ptr points past the last character of the
 409          --  integer.
 410          --
 411          --  For each digit encountered, UI_Int_Value is multiplied by 10, and
 412          --  the value of the digit added to the result. In addition, the
 413          --  value in Scale is decremented by one for each actual digit
 414          --  scanned.
 415
 416          --------------------------
 417          -- Error_Digit_Expected --
 418          --------------------------
 419
 420          procedure Error_Digit_Expected is
 421          begin
 422             Error_Msg_S ("digit expected");
 423          end Error_Digit_Expected;
 424
 425          ------------------
 426          -- Scan_Integer --
 427          ------------------
 428
 429          procedure Scan_Integer is
 430             C : Character;
 431             --  Next character scanned
 432
 433          begin
 434             C := Source (Scan_Ptr);
 435
 436             --  Loop through digits (allowing underlines)
 437
 438             loop
 439                Accumulate_Checksum (C);
 440                UI_Int_Value :=
 441                  UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
 442                Scan_Ptr := Scan_Ptr + 1;
 443                Scale := Scale - 1;
 444                C := Source (Scan_Ptr);
 445
 446                --  Case of underline encountered
 447
 448                if C = '_' then
 449
 450                   --  We do not accumulate the '_' in the checksum, so that
 451                   --  1_234 is equivalent to 1234, and does not trigger
 452                   --  compilation for "minimal recompilation" (gnatmake -m).
 453
 454                   loop
 455                      Scan_Ptr := Scan_Ptr + 1;
 456                      C := Source (Scan_Ptr);
 457                      exit when C /= '_';
 458                      Error_No_Double_Underline;
 459                   end loop;
 460
 461                   if C not in '0' .. '9' then
 462                      Error_Digit_Expected;
 463                      exit;
 464                   end if;
 465
 466                else
 467                   exit when C not in '0' .. '9';
 468                end if;
 469             end loop;
 470          end Scan_Integer;
 471
 472       --  Start of Processing for Nlit
 473
 474       begin
 475          Base := 10;
 476          UI_Base := Uint_10;
 477          UI_Int_Value := Uint_0;
 478          Scale := 0;
 479          Scan_Integer;
 480          Scale := 0;
 481          Point_Scanned := False;
 482          UI_Num_Value := UI_Int_Value;
 483
 484          --  Various possibilities now for continuing the literal are period,
 485          --  E/e (for exponent), or :/# (for based literal).
 486
 487          Scale := 0;
 488          C := Source (Scan_Ptr);
 489
 490          if C = '.' then
 491
 492             --  Scan out point, but do not scan past .. which is a range
 493             --  sequence, and must not be eaten up scanning a numeric literal.
 494
 495             while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
 496                Accumulate_Checksum ('.');
 497
 498                if Point_Scanned then
 499                   Error_Msg_S ("duplicate point ignored");
 500                end if;
 501
 502                Point_Scanned := True;
 503                Scan_Ptr := Scan_Ptr + 1;
 504                C := Source (Scan_Ptr);
 505
 506                if C not in '0' .. '9' then
 507                   Error_Msg
 508                     ("real literal cannot end with point", Scan_Ptr - 1);
 509                else
 510                   Scan_Integer;
 511                   UI_Num_Value := UI_Int_Value;
 512                end if;
 513             end loop;
 514
 515          --  Based literal case. The base is the value we already scanned.
 516          --  In the case of colon, we insist that the following character
 517          --  is indeed an extended digit or a period. This catches a number
 518          --  of common errors, as well as catching the well known tricky
 519          --  bug otherwise arising from "x : integer range 1 .. 10:= 6;"
 520
 521          elsif C = '#'
 522            or else (C = ':' and then
 523                       (Source (Scan_Ptr + 1) = '.'
 524                          or else
 525                        Source (Scan_Ptr + 1) in '0' .. '9'
 526                          or else
 527                        Source (Scan_Ptr + 1) in 'A' .. 'Z'
 528                          or else
 529                        Source (Scan_Ptr + 1) in 'a' .. 'z'))
 530          then
 531             if C = ':' then
 532                Obsolescent_Check (Scan_Ptr);
 533
 534                if Warn_On_Obsolescent_Feature then
 535                   Error_Msg_S
 536                     ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
 537                   Error_Msg_S
 538                     ("\use ""'#"" instead?");
 539                end if;
 540             end if;
 541
 542             Accumulate_Checksum (C);
 543             Base_Char := C;
 544             UI_Base := UI_Int_Value;
 545
 546             if UI_Base < 2 or else UI_Base > 16 then
 547                Error_Msg_SC ("base not 2-16");
 548                UI_Base := Uint_16;
 549             end if;
 550
 551             Base := UI_To_Int (UI_Base);
 552             Scan_Ptr := Scan_Ptr + 1;
 553
 554             --  Scan out extended integer [. integer]
 555
 556             C := Source (Scan_Ptr);
 557             UI_Int_Value := Uint_0;
 558             Scale := 0;
 559
 560             loop
 561                if C in '0' .. '9' then
 562                   Accumulate_Checksum (C);
 563                   Extended_Digit_Value :=
 564                     Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
 565
 566                elsif C in 'A' .. 'F' then
 567                   Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
 568                   Extended_Digit_Value :=
 569                     Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
 570
 571                elsif C in 'a' .. 'f' then
 572                   Accumulate_Checksum (C);
 573                   Extended_Digit_Value :=
 574                     Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
 575
 576                else
 577                   Error_Msg_S ("extended digit expected");
 578                   exit;
 579                end if;
 580
 581                if Extended_Digit_Value >= Base then
 582                   Error_Msg_S ("digit '>= base");
 583                end if;
 584
 585                UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
 586                Scale := Scale - 1;
 587                Scan_Ptr := Scan_Ptr + 1;
 588                C := Source (Scan_Ptr);
 589
 590                if C = '_' then
 591                   loop
 592                      Accumulate_Checksum ('_');
 593                      Scan_Ptr := Scan_Ptr + 1;
 594                      C := Source (Scan_Ptr);
 595                      exit when C /= '_';
 596                      Error_No_Double_Underline;
 597                   end loop;
 598
 599                elsif C = '.' then
 600                   Accumulate_Checksum ('.');
 601
 602                   if Point_Scanned then
 603                      Error_Msg_S ("duplicate point ignored");
 604                   end if;
 605
 606                   Scan_Ptr := Scan_Ptr + 1;
 607                   C := Source (Scan_Ptr);
 608                   Point_Scanned := True;
 609                   Scale := 0;
 610
 611                elsif C = Base_Char then
 612                   Accumulate_Checksum (C);
 613                   Scan_Ptr := Scan_Ptr + 1;
 614                   exit;
 615
 616                elsif C = '#' or else C = ':' then
 617                   Error_Msg_S ("based number delimiters must match");
 618                   Scan_Ptr := Scan_Ptr + 1;
 619                   exit;
 620
 621                elsif not Identifier_Char (C) then
 622                   if Base_Char = '#' then
 623                      Error_Msg_S ("missing '#");
 624                   else
 625                      Error_Msg_S ("missing ':");
 626                   end if;
 627
 628                   exit;
 629                end if;
 630
 631             end loop;
 632
 633             UI_Num_Value := UI_Int_Value;
 634          end if;
 635
 636          --  Scan out exponent
 637
 638          if not Point_Scanned then
 639             Scale := 0;
 640             UI_Scale := Uint_0;
 641          else
 642             UI_Scale := UI_From_Int (Scale);
 643          end if;
 644
 645          if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
 646             Accumulate_Checksum ('e');
 647             Scan_Ptr := Scan_Ptr + 1;
 648             Exponent_Is_Negative := False;
 649
 650             if Source (Scan_Ptr) = '+' then
 651                Accumulate_Checksum ('+');
 652                Scan_Ptr := Scan_Ptr + 1;
 653
 654             elsif Source (Scan_Ptr) = '-' then
 655                Accumulate_Checksum ('-');
 656
 657                if not Point_Scanned then
 658                   Error_Msg_S
 659                     ("negative exponent not allowed for integer literal");
 660                else
 661                   Exponent_Is_Negative := True;
 662                end if;
 663
 664                Scan_Ptr := Scan_Ptr + 1;
 665             end if;
 666
 667             UI_Int_Value := Uint_0;
 668
 669             if Source (Scan_Ptr) in '0' .. '9' then
 670                Scan_Integer;
 671             else
 672                Error_Digit_Expected;
 673             end if;
 674
 675             if Exponent_Is_Negative then
 676                UI_Scale := UI_Scale - UI_Int_Value;
 677             else
 678                UI_Scale := UI_Scale + UI_Int_Value;
 679             end if;
 680          end if;
 681
 682          --  Case of real literal to be returned
 683
 684          if Point_Scanned then
 685             Token := Tok_Real_Literal;
 686             Real_Literal_Value :=
 687               UR_From_Components (
 688                                   Num   => UI_Num_Value,
 689                                   Den   => -UI_Scale,
 690                                   Rbase => Base);
 691
 692          --  Case of integer literal to be returned
 693
 694          else
 695             Token := Tok_Integer_Literal;
 696
 697             if UI_Scale = 0 then
 698                Int_Literal_Value := UI_Num_Value;
 699
 700             --  Avoid doing possibly expensive calculations in cases like
 701             --  parsing 163E800_000# when semantics will not be done anyway.
 702             --  This is especially useful when parsing garbled input.
 703
 704             elsif Operating_Mode /= Check_Syntax
 705               and then (Serious_Errors_Detected = 0 or else Try_Semantics)
 706             then
 707                Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
 708
 709             else
 710                Int_Literal_Value := No_Uint;
 711             end if;
 712          end if;
 713
 714          Accumulate_Token_Checksum;
 715
 716          return;
 717       end Nlit;
 718
 719       ----------
 720       -- Slit --
 721       ----------
 722
 723       procedure Slit is
 724
 725          Delimiter : Character;
 726          --  Delimiter (first character of string)
 727
 728          C : Character;
 729          --  Current source program character
 730
 731          Code : Char_Code;
 732          --  Current character code value
 733
 734          Err : Boolean;
 735          --  Error flag for Scan_Wide call
 736
 737          procedure Error_Bad_String_Char;
 738          --  Signal bad character in string/character literal. On entry
 739          --  Scan_Ptr points to the improper character encountered during the
 740          --  scan. Scan_Ptr is not modified, so it still points to the bad
 741          --  character on return.
 742
 743          procedure Error_Unterminated_String;
 744          --  Procedure called if a line terminator character is encountered
 745          --  during scanning a string, meaning that the string is not properly
 746          --  terminated.
 747
 748          procedure Set_String;
 749          --  Procedure used to distinguish between string and operator symbol.
 750          --  On entry the string has been scanned out, and its characters
 751          --  start at Token_Ptr and end one character before Scan_Ptr. On exit
 752          --  Token is set to Tok_String_Literal or Tok_Operator_Symbol as
 753          --  appropriate, and Token_Node is appropriately initialized. In
 754          --  addition, in the operator symbol case, Token_Name is
 755          --  appropriately set.
 756
 757          ---------------------------
 758          -- Error_Bad_String_Char --
 759          ---------------------------
 760
 761          procedure Error_Bad_String_Char is
 762             C : constant Character := Source (Scan_Ptr);
 763
 764          begin
 765             if C = HT then
 766                Error_Msg_S ("horizontal tab not allowed in string");
 767
 768             elsif C = VT or else C = FF then
 769                Error_Msg_S ("format effector not allowed in string");
 770
 771             elsif C in Upper_Half_Character then
 772                Error_Msg_S ("(Ada 83) upper half character not allowed");
 773
 774             else
 775                Error_Msg_S ("control character not allowed in string");
 776             end if;
 777          end Error_Bad_String_Char;
 778
 779          -------------------------------
 780          -- Error_Unterminated_String --
 781          -------------------------------
 782
 783          procedure Error_Unterminated_String is
 784          begin
 785             --  An interesting little refinement. Consider the following
 786             --  examples:
 787
 788             --     A := "this is an unterminated string;
 789             --     A := "this is an unterminated string &
 790             --     P(A, "this is a parameter that didn't get terminated);
 791
 792             --  We fiddle a little to do slightly better placement in these
 793             --  cases also if there is white space at the end of the line we
 794             --  place the flag at the start of this white space, not at the
 795             --  end. Note that we only have to test for blanks, since tabs
 796             --  aren't allowed in strings in the first place and would have
 797             --  caused an error message.
 798
 799             --  Two more cases that we treat specially are:
 800
 801             --     A := "this string uses the wrong terminator'
 802             --     A := "this string uses the wrong terminator' &
 803
 804             --  In these cases we give a different error message as well
 805
 806             --  We actually reposition the scan pointer to the point where we
 807             --  place the flag in these cases, since it seems a better bet on
 808             --  the original intention.
 809
 810             while Source (Scan_Ptr - 1) = ' '
 811               or else Source (Scan_Ptr - 1) = '&'
 812             loop
 813                Scan_Ptr := Scan_Ptr - 1;
 814                Unstore_String_Char;
 815             end loop;
 816
 817             --  Check for case of incorrect string terminator, but single quote
 818             --  is not considered incorrect if the opening terminator misused
 819             --  a single quote (error message already given).
 820
 821             if Delimiter /= '''
 822               and then Source (Scan_Ptr - 1) = '''
 823             then
 824                Unstore_String_Char;
 825                Error_Msg
 826                  ("incorrect string terminator character", Scan_Ptr - 1);
 827                return;
 828             end if;
 829
 830             if Source (Scan_Ptr - 1) = ';' then
 831                Scan_Ptr := Scan_Ptr - 1;
 832                Unstore_String_Char;
 833
 834                if Source (Scan_Ptr - 1) = ')' then
 835                   Scan_Ptr := Scan_Ptr - 1;
 836                   Unstore_String_Char;
 837                end if;
 838             end if;
 839
 840             Error_Msg_S ("missing string quote");
 841          end Error_Unterminated_String;
 842
 843          ----------------
 844          -- Set_String --
 845          ----------------
 846
 847          procedure Set_String is
 848             Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
 849             C1   : Character;
 850             C2   : Character;
 851             C3   : Character;
 852
 853          begin
 854             --  Token_Name is currently set to Error_Name. The following
 855             --  section of code resets Token_Name to the proper Name_Op_xx
 856             --  value if the string is a valid operator symbol, otherwise it is
 857             --  left set to Error_Name.
 858
 859             if Slen = 1 then
 860                C1 := Source (Token_Ptr + 1);
 861
 862                case C1 is
 863                   when '=' =>
 864                      Token_Name := Name_Op_Eq;
 865
 866                   when '>' =>
 867                      Token_Name := Name_Op_Gt;
 868
 869                   when '<' =>
 870                      Token_Name := Name_Op_Lt;
 871
 872                   when '+' =>
 873                      Token_Name := Name_Op_Add;
 874
 875                   when '-' =>
 876                      Token_Name := Name_Op_Subtract;
 877
 878                   when '&' =>
 879                      Token_Name := Name_Op_Concat;
 880
 881                   when '*' =>
 882                      Token_Name := Name_Op_Multiply;
 883
 884                   when '/' =>
 885                      Token_Name := Name_Op_Divide;
 886
 887                   when others =>
 888                      null;
 889                end case;
 890
 891             elsif Slen = 2 then
 892                C1 := Source (Token_Ptr + 1);
 893                C2 := Source (Token_Ptr + 2);
 894
 895                if C1 = '*' and then C2 = '*' then
 896                   Token_Name := Name_Op_Expon;
 897
 898                elsif C2 = '=' then
 899
 900                   if C1 = '/' then
 901                      Token_Name := Name_Op_Ne;
 902                   elsif C1 = '<' then
 903                      Token_Name := Name_Op_Le;
 904                   elsif C1 = '>' then
 905                      Token_Name := Name_Op_Ge;
 906                   end if;
 907
 908                elsif (C1 = 'O' or else C1 = 'o') and then    -- OR
 909                  (C2 = 'R' or else C2 = 'r')
 910                then
 911                   Token_Name := Name_Op_Or;
 912                end if;
 913
 914             elsif Slen = 3 then
 915                C1 := Source (Token_Ptr + 1);
 916                C2 := Source (Token_Ptr + 2);
 917                C3 := Source (Token_Ptr + 3);
 918
 919                if (C1 = 'A' or else C1 = 'a') and then       -- AND
 920                  (C2 = 'N' or else C2 = 'n') and then
 921                  (C3 = 'D' or else C3 = 'd')
 922                then
 923                   Token_Name := Name_Op_And;
 924
 925                elsif (C1 = 'A' or else C1 = 'a') and then    -- ABS
 926                  (C2 = 'B' or else C2 = 'b') and then
 927                  (C3 = 'S' or else C3 = 's')
 928                then
 929                   Token_Name := Name_Op_Abs;
 930
 931                elsif (C1 = 'M' or else C1 = 'm') and then    -- MOD
 932                  (C2 = 'O' or else C2 = 'o') and then
 933                  (C3 = 'D' or else C3 = 'd')
 934                then
 935                   Token_Name := Name_Op_Mod;
 936
 937                elsif (C1 = 'N' or else C1 = 'n') and then    -- NOT
 938                  (C2 = 'O' or else C2 = 'o') and then
 939                  (C3 = 'T' or else C3 = 't')
 940                then
 941                   Token_Name := Name_Op_Not;
 942
 943                elsif (C1 = 'R' or else C1 = 'r') and then    -- REM
 944                  (C2 = 'E' or else C2 = 'e') and then
 945                  (C3 = 'M' or else C3 = 'm')
 946                then
 947                   Token_Name := Name_Op_Rem;
 948
 949                elsif (C1 = 'X' or else C1 = 'x') and then    -- XOR
 950                  (C2 = 'O' or else C2 = 'o') and then
 951                  (C3 = 'R' or else C3 = 'r')
 952                then
 953                   Token_Name := Name_Op_Xor;
 954                end if;
 955
 956             end if;
 957
 958             --  If it is an operator symbol, then Token_Name is set. If it is
 959             --  some other string value, then Token_Name still contains
 960             --  Error_Name.
 961
 962             if Token_Name = Error_Name then
 963                Token := Tok_String_Literal;
 964
 965             else
 966                Token := Tok_Operator_Symbol;
 967             end if;
 968          end Set_String;
 969
 970       --  Start of processing for Slit
 971
 972       begin
 973          --  On entry, Scan_Ptr points to the opening character of the string
 974          --  which is either a percent, double quote, or apostrophe (single
 975          --  quote). The latter case is an error detected by the character
 976          --  literal circuit.
 977
 978          Delimiter := Source (Scan_Ptr);
 979          Accumulate_Checksum (Delimiter);
 980          Start_String;
 981          Scan_Ptr := Scan_Ptr + 1;
 982
 983          --  Loop to scan out characters of string literal
 984
 985          loop
 986             C := Source (Scan_Ptr);
 987
 988             if C = Delimiter then
 989                Accumulate_Checksum (C);
 990                Scan_Ptr := Scan_Ptr + 1;
 991                exit when Source (Scan_Ptr) /= Delimiter;
 992                Code := Get_Char_Code (C);
 993                Accumulate_Checksum (C);
 994                Scan_Ptr := Scan_Ptr + 1;
 995
 996             else
 997                if C = '"' and then Delimiter = '%' then
 998                   Error_Msg_S
 999                     ("quote not allowed in percent delimited string");
1000                   Code := Get_Char_Code (C);
1001                   Scan_Ptr := Scan_Ptr + 1;
1002
1003                elsif (C = ESC
1004                         and then Wide_Character_Encoding_Method
1005                                    in WC_ESC_Encoding_Method)
1006                  or else (C in Upper_Half_Character
1007                             and then Upper_Half_Encoding)
1008                  or else (C = '['
1009                             and then Source (Scan_Ptr + 1) = '"'
1010                             and then Identifier_Char (Source (Scan_Ptr + 2)))
1011                then
1012                   Wptr := Scan_Ptr;
1013                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1014
1015                   if Err then
1016                      Error_Illegal_Wide_Character;
1017                      Code := Get_Char_Code (' ');
1018                   end if;
1019
1020                   Accumulate_Checksum (Code);
1021
1022                   --  In Ada 95 mode we allow any wide characters in a string
1023                   --  but in Ada 2005, the set of characters allowed has been
1024                   --  restricted to graphic characters.
1025
1026                   if Ada_Version >= Ada_05
1027                     and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1028                   then
1029                      Error_Msg
1030                        ("(Ada 2005) non-graphic character not permitted " &
1031                         "in string literal", Wptr);
1032                   end if;
1033
1034                else
1035                   Accumulate_Checksum (C);
1036
1037                   if C not in Graphic_Character then
1038                      if C in Line_Terminator then
1039                         Error_Unterminated_String;
1040                         exit;
1041
1042                      elsif C in Upper_Half_Character then
1043                         if Ada_Version = Ada_83 then
1044                            Error_Bad_String_Char;
1045                         end if;
1046
1047                      else
1048                         Error_Bad_String_Char;
1049                      end if;
1050                   end if;
1051
1052                   Code := Get_Char_Code (C);
1053                   Scan_Ptr := Scan_Ptr + 1;
1054                end if;
1055             end if;
1056
1057             Store_String_Char (Code);
1058
1059             if not In_Character_Range (Code) then
1060                Wide_Character_Found := True;
1061             end if;
1062          end loop;
1063
1064          String_Literal_Id := End_String;
1065          Set_String;
1066          return;
1067       end Slit;
1068
1069    --  Start of processing for Scan
1070
1071    begin
1072       Prev_Token := Token;
1073       Prev_Token_Ptr := Token_Ptr;
1074       Token_Name := Error_Name;
1075
1076       --  The following loop runs more than once only if a format effector
1077       --  (tab, vertical tab, form  feed, line feed, carriage return) is
1078       --  encountered and skipped, or some error situation, such as an
1079       --  illegal character, is encountered.
1080
1081       <<Scan_Next_Character>>
1082
1083       loop
1084          --  Skip past blanks, loop is opened up for speed
1085
1086          while Source (Scan_Ptr) = ' ' loop
1087             if Source (Scan_Ptr + 1) /= ' ' then
1088                Scan_Ptr := Scan_Ptr + 1;
1089                exit;
1090             end if;
1091
1092             if Source (Scan_Ptr + 2) /= ' ' then
1093                Scan_Ptr := Scan_Ptr + 2;
1094                exit;
1095             end if;
1096
1097             if Source (Scan_Ptr + 3) /= ' ' then
1098                Scan_Ptr := Scan_Ptr + 3;
1099                exit;
1100             end if;
1101
1102             if Source (Scan_Ptr + 4) /= ' ' then
1103                Scan_Ptr := Scan_Ptr + 4;
1104                exit;
1105             end if;
1106
1107             if Source (Scan_Ptr + 5) /= ' ' then
1108                Scan_Ptr := Scan_Ptr + 5;
1109                exit;
1110             end if;
1111
1112             if Source (Scan_Ptr + 6) /= ' ' then
1113                Scan_Ptr := Scan_Ptr + 6;
1114                exit;
1115             end if;
1116
1117             if Source (Scan_Ptr + 7) /= ' ' then
1118                Scan_Ptr := Scan_Ptr + 7;
1119                exit;
1120             end if;
1121
1122             Scan_Ptr := Scan_Ptr + 8;
1123          end loop;
1124
1125          --  We are now at a non-blank character, which is the first character
1126          --  of the token we will scan, and hence the value of Token_Ptr.
1127
1128          Token_Ptr := Scan_Ptr;
1129
1130          --  Here begins the main case statement which transfers control on the
1131          --  basis of the non-blank character we have encountered.
1132
1133          case Source (Scan_Ptr) is
1134
1135          --  Line terminator characters
1136
1137          when CR | LF | FF | VT =>
1138             goto Scan_Line_Terminator;
1139
1140          --  Horizontal tab, just skip past it
1141
1142          when HT =>
1143             if Style_Check then Style.Check_HT; end if;
1144             Scan_Ptr := Scan_Ptr + 1;
1145
1146          --  End of file character, treated as an end of file only if it is
1147          --  the last character in the buffer, otherwise it is ignored.
1148
1149          when EOF =>
1150             if Scan_Ptr = Source_Last (Current_Source_File) then
1151                Check_End_Of_Line;
1152                if Style_Check then Style.Check_EOF; end if;
1153                Token := Tok_EOF;
1154                return;
1155             else
1156                Scan_Ptr := Scan_Ptr + 1;
1157             end if;
1158
1159          --  Ampersand
1160
1161          when '&' =>
1162             Accumulate_Checksum ('&');
1163
1164             if Source (Scan_Ptr + 1) = '&' then
1165                Error_Msg_S ("'&'& should be `AND THEN`");
1166                Scan_Ptr := Scan_Ptr + 2;
1167                Token := Tok_And;
1168                return;
1169
1170             else
1171                Scan_Ptr := Scan_Ptr + 1;
1172                Token := Tok_Ampersand;
1173                return;
1174             end if;
1175
1176          --  Asterisk (can be multiplication operator or double asterisk which
1177          --  is the exponentiation compound delimiter).
1178
1179          when '*' =>
1180             Accumulate_Checksum ('*');
1181
1182             if Source (Scan_Ptr + 1) = '*' then
1183                Accumulate_Checksum ('*');
1184                Scan_Ptr := Scan_Ptr + 2;
1185                Token := Tok_Double_Asterisk;
1186                return;
1187
1188             else
1189                Scan_Ptr := Scan_Ptr + 1;
1190                Token := Tok_Asterisk;
1191                return;
1192             end if;
1193
1194          --  Colon, which can either be an isolated colon, or part of an
1195          --  assignment compound delimiter.
1196
1197          when ':' =>
1198             Accumulate_Checksum (':');
1199
1200             if Double_Char_Token ('=') then
1201                Token := Tok_Colon_Equal;
1202                if Style_Check then Style.Check_Colon_Equal; end if;
1203                return;
1204
1205             elsif Source (Scan_Ptr + 1) = '-'
1206               and then Source (Scan_Ptr + 2) /= '-'
1207             then
1208                Token := Tok_Colon_Equal;
1209                Error_Msg (":- should be :=", Scan_Ptr);
1210                Scan_Ptr := Scan_Ptr + 2;
1211                return;
1212
1213             else
1214                Scan_Ptr := Scan_Ptr + 1;
1215                Token := Tok_Colon;
1216                if Style_Check then Style.Check_Colon; end if;
1217                return;
1218             end if;
1219
1220          --  Left parenthesis
1221
1222          when '(' =>
1223             Accumulate_Checksum ('(');
1224             Scan_Ptr := Scan_Ptr + 1;
1225             Token := Tok_Left_Paren;
1226             if Style_Check then Style.Check_Left_Paren; end if;
1227             return;
1228
1229          --  Left bracket
1230
1231          when '[' =>
1232             if Source (Scan_Ptr + 1) = '"' then
1233                goto Scan_Wide_Character;
1234
1235             else
1236                Error_Msg_S ("illegal character, replaced by ""(""");
1237                Scan_Ptr := Scan_Ptr + 1;
1238                Token := Tok_Left_Paren;
1239                return;
1240             end if;
1241
1242          --  Left brace
1243
1244          when '{' =>
1245             Error_Msg_S ("illegal character, replaced by ""(""");
1246             Scan_Ptr := Scan_Ptr + 1;
1247             Token := Tok_Left_Paren;
1248             return;
1249
1250          --  Comma
1251
1252          when ',' =>
1253             Accumulate_Checksum (',');
1254             Scan_Ptr := Scan_Ptr + 1;
1255             Token := Tok_Comma;
1256             if Style_Check then Style.Check_Comma; end if;
1257             return;
1258
1259          --  Dot, which is either an isolated period, or part of a double dot
1260          --  compound delimiter sequence. We also check for the case of a
1261          --  digit following the period, to give a better error message.
1262
1263          when '.' =>
1264             Accumulate_Checksum ('.');
1265
1266             if Double_Char_Token ('.') then
1267                Token := Tok_Dot_Dot;
1268                if Style_Check then Style.Check_Dot_Dot; end if;
1269                return;
1270
1271             elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1272                Error_Msg_S ("numeric literal cannot start with point");
1273                Scan_Ptr := Scan_Ptr + 1;
1274
1275             else
1276                Scan_Ptr := Scan_Ptr + 1;
1277                Token := Tok_Dot;
1278                return;
1279             end if;
1280
1281          --  Equal, which can either be an equality operator, or part of the
1282          --  arrow (=>) compound delimiter.
1283
1284          when '=' =>
1285             Accumulate_Checksum ('=');
1286
1287             if Double_Char_Token ('>') then
1288                Token := Tok_Arrow;
1289                if Style_Check then Style.Check_Arrow; end if;
1290                return;
1291
1292             elsif Source (Scan_Ptr + 1) = '=' then
1293                Error_Msg_S ("== should be =");
1294                Scan_Ptr := Scan_Ptr + 1;
1295             end if;
1296
1297             Scan_Ptr := Scan_Ptr + 1;
1298             Token := Tok_Equal;
1299             return;
1300
1301          --  Greater than, which can be a greater than operator, greater than
1302          --  or equal operator, or first character of a right label bracket.
1303
1304          when '>' =>
1305             Accumulate_Checksum ('>');
1306
1307             if Double_Char_Token ('=') then
1308                Token := Tok_Greater_Equal;
1309                return;
1310
1311             elsif Double_Char_Token ('>') then
1312                Token := Tok_Greater_Greater;
1313                return;
1314
1315             else
1316                Scan_Ptr := Scan_Ptr + 1;
1317                Token := Tok_Greater;
1318                return;
1319             end if;
1320
1321          --  Less than, which can be a less than operator, less than or equal
1322          --  operator, or the first character of a left label bracket, or the
1323          --  first character of a box (<>) compound delimiter.
1324
1325          when '<' =>
1326             Accumulate_Checksum ('<');
1327
1328             if Double_Char_Token ('=') then
1329                Token := Tok_Less_Equal;
1330                return;
1331
1332             elsif Double_Char_Token ('>') then
1333                Token := Tok_Box;
1334                if Style_Check then Style.Check_Box; end if;
1335                return;
1336
1337             elsif Double_Char_Token ('<') then
1338                Token := Tok_Less_Less;
1339                return;
1340
1341             else
1342                Scan_Ptr := Scan_Ptr + 1;
1343                Token := Tok_Less;
1344                return;
1345             end if;
1346
1347          --  Minus, which is either a subtraction operator, or the first
1348          --  character of double minus starting a comment
1349
1350          when '-' => Minus_Case : begin
1351             if Source (Scan_Ptr + 1) = '>' then
1352                Error_Msg_S ("invalid token");
1353                Scan_Ptr := Scan_Ptr + 2;
1354                Token := Tok_Arrow;
1355                return;
1356
1357             elsif Source (Scan_Ptr + 1) /= '-' then
1358                Accumulate_Checksum ('-');
1359                Scan_Ptr := Scan_Ptr + 1;
1360                Token := Tok_Minus;
1361                return;
1362
1363             --  Comment
1364
1365             else -- Source (Scan_Ptr + 1) = '-' then
1366                if Style_Check then Style.Check_Comment; end if;
1367                Scan_Ptr := Scan_Ptr + 2;
1368
1369                --  If we are in preprocessor mode with Replace_In_Comments set,
1370                --  then we return the "--" as a token on its own.
1371
1372                if Replace_In_Comments then
1373                   Token := Tok_Comment;
1374                   return;
1375                end if;
1376
1377                --  Otherwise scan out the comment
1378
1379                Start_Of_Comment := Scan_Ptr;
1380
1381                --  Loop to scan comment (this loop runs more than once only if
1382                --  a horizontal tab or other non-graphic character is scanned)
1383
1384                loop
1385                   --  Scan to non graphic character (opened up for speed)
1386
1387                   --  Note that we just eat left brackets, which means that
1388                   --  bracket notation cannot be used for end of line
1389                   --  characters in comments. This seems a reasonable choice,
1390                   --  since no one would ever use brackets notation in a real
1391                   --  program in this situation, and if we allow brackets
1392                   --  notation, we forbid some valid comments which contain a
1393                   --  brackets sequence that happens to match an end of line
1394                   --  character.
1395
1396                   loop
1397                      exit when Source (Scan_Ptr) not in Graphic_Character;
1398                      Scan_Ptr := Scan_Ptr + 1;
1399                      exit when Source (Scan_Ptr) not in Graphic_Character;
1400                      Scan_Ptr := Scan_Ptr + 1;
1401                      exit when Source (Scan_Ptr) not in Graphic_Character;
1402                      Scan_Ptr := Scan_Ptr + 1;
1403                      exit when Source (Scan_Ptr) not in Graphic_Character;
1404                      Scan_Ptr := Scan_Ptr + 1;
1405                      exit when Source (Scan_Ptr) not in Graphic_Character;
1406                      Scan_Ptr := Scan_Ptr + 1;
1407                   end loop;
1408
1409                   --  Keep going if horizontal tab
1410
1411                   if Source (Scan_Ptr) = HT then
1412                      if Style_Check then Style.Check_HT; end if;
1413                      Scan_Ptr := Scan_Ptr + 1;
1414
1415                   --  Terminate scan of comment if line terminator
1416
1417                   elsif Source (Scan_Ptr) in Line_Terminator then
1418                      exit;
1419
1420                   --  Terminate scan of comment if end of file encountered
1421                   --  (embedded EOF character or real last character in file)
1422
1423                   elsif Source (Scan_Ptr) = EOF then
1424                      exit;
1425
1426                   --  If we have a wide character, we have to scan it out,
1427                   --  because it might be a legitimate line terminator
1428
1429                   elsif (Source (Scan_Ptr) = ESC
1430                            and then Identifier_Char (ESC))
1431                     or else
1432                          (Source (Scan_Ptr) in Upper_Half_Character
1433                             and then Upper_Half_Encoding)
1434                   then
1435                      declare
1436                         Wptr : constant Source_Ptr := Scan_Ptr;
1437                         Code : Char_Code;
1438                         Err  : Boolean;
1439
1440                      begin
1441                         Scan_Wide (Source, Scan_Ptr, Code, Err);
1442
1443                         --  If not well formed wide character, then just skip
1444                         --  past it and ignore it.
1445
1446                         if Err then
1447                            Scan_Ptr := Wptr + 1;
1448
1449                         --  If UTF_32 terminator, terminate comment scan
1450
1451                         elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1452                            Scan_Ptr := Wptr;
1453                            exit;
1454                         end if;
1455                      end;
1456
1457                   --  Keep going if character in 80-FF range, or is ESC. These
1458                   --  characters are allowed in comments by RM-2.1(1), 2.7(2).
1459                   --  They are allowed even in Ada 83 mode according to the
1460                   --  approved AI. ESC was added to the AI in June 93.
1461
1462                   elsif Source (Scan_Ptr) in Upper_Half_Character
1463                      or else Source (Scan_Ptr) = ESC
1464                   then
1465                      Scan_Ptr := Scan_Ptr + 1;
1466
1467                   --  Otherwise we have an illegal comment character
1468
1469                   else
1470                      Error_Illegal_Character;
1471                   end if;
1472                end loop;
1473
1474                --  Note that, except when comments are tokens, we do NOT
1475                --  execute a return here, instead we fall through to reexecute
1476                --  the scan loop to look for a token.
1477
1478                if Comment_Is_Token then
1479                   Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1480                   Name_Buffer (1 .. Name_Len) :=
1481                     String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1482                   Comment_Id := Name_Find;
1483                   Token := Tok_Comment;
1484                   return;
1485                end if;
1486             end if;
1487          end Minus_Case;
1488
1489          --  Double quote starting a string literal
1490
1491          when '"' =>
1492             Slit;
1493             Post_Scan;
1494             return;
1495
1496          --  Percent starting a string literal
1497
1498          when '%' =>
1499             Obsolescent_Check (Token_Ptr);
1500
1501             if Warn_On_Obsolescent_Feature then
1502                Error_Msg_S
1503                  ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1504                Error_Msg_S
1505                  ("\use """""" instead?");
1506             end if;
1507
1508             Slit;
1509             Post_Scan;
1510             return;
1511
1512          --  Apostrophe. This can either be the start of a character literal,
1513          --  or an isolated apostrophe used in a qualified expression or an
1514          --  attribute. We treat it as a character literal if it does not
1515          --  follow a right parenthesis, identifier, the keyword ALL or
1516          --  a literal. This means that we correctly treat constructs like:
1517
1518          --    A := CHARACTER'('A');
1519
1520          --  Note that RM-2.2(7) does not require a separator between
1521          --  "CHARACTER" and "'" in the above.
1522
1523          when ''' => Char_Literal_Case : declare
1524             Code : Char_Code;
1525             Err  : Boolean;
1526
1527          begin
1528             Accumulate_Checksum (''');
1529             Scan_Ptr := Scan_Ptr + 1;
1530
1531             --  Here is where we make the test to distinguish the cases. Treat
1532             --  as apostrophe if previous token is an identifier, right paren
1533             --  or the reserved word "all" (latter case as in A.all'Address)
1534             --  (or the reserved word "project" in project files). Also treat
1535             --  it as apostrophe after a literal (this catches some legitimate
1536             --  cases, like A."abs"'Address, and also gives better error
1537             --  behavior for impossible cases like 123'xxx).
1538
1539             if Prev_Token = Tok_Identifier
1540                or else Prev_Token = Tok_Right_Paren
1541                or else Prev_Token = Tok_All
1542                or else Prev_Token = Tok_Project
1543                or else Prev_Token in Token_Class_Literal
1544             then
1545                Token := Tok_Apostrophe;
1546                if Style_Check then Style.Check_Apostrophe; end if;
1547                return;
1548
1549             --  Otherwise the apostrophe starts a character literal
1550
1551             else
1552                --  Case of wide character literal
1553
1554                if (Source (Scan_Ptr) = ESC
1555                      and then
1556                     Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1557                  or else
1558                    (Source (Scan_Ptr) in Upper_Half_Character
1559                      and then
1560                     Upper_Half_Encoding)
1561                  or else
1562                    (Source (Scan_Ptr) = '['
1563                      and then
1564                     Source (Scan_Ptr + 1) = '"')
1565                then
1566                   Wptr := Scan_Ptr;
1567                   Scan_Wide (Source, Scan_Ptr, Code, Err);
1568                   Accumulate_Checksum (Code);
1569
1570                   if Err then
1571                      Error_Illegal_Wide_Character;
1572                         Code := Character'Pos (' ');
1573
1574                   --  In Ada 95 mode we allow any wide character in a character
1575                   --  literal, but in Ada 2005, the set of characters allowed
1576                   --  is restricted to graphic characters.
1577
1578                   elsif Ada_Version >= Ada_05
1579                     and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1580                   then
1581                      Error_Msg
1582                        ("(Ada 2005) non-graphic character not permitted " &
1583                         "in character literal", Wptr);
1584                   end if;
1585
1586                   if Source (Scan_Ptr) /= ''' then
1587                      Error_Msg_S ("missing apostrophe");
1588                   else
1589                      Scan_Ptr := Scan_Ptr + 1;
1590                   end if;
1591
1592                --  If we do not find a closing quote in the expected place then
1593                --  assume that we have a misguided attempt at a string literal.
1594
1595                --  However, if previous token is RANGE, then we return an
1596                --  apostrophe instead since this gives better error recovery
1597
1598                elsif Source (Scan_Ptr + 1) /= ''' then
1599                   if Prev_Token = Tok_Range then
1600                      Token := Tok_Apostrophe;
1601                      return;
1602
1603                   else
1604                      Scan_Ptr := Scan_Ptr - 1;
1605                      Error_Msg_S
1606                        ("strings are delimited by double quote character");
1607                      Slit;
1608                      Post_Scan;
1609                      return;
1610                   end if;
1611
1612                --  Otherwise we have a (non-wide) character literal
1613
1614                else
1615                   Accumulate_Checksum (Source (Scan_Ptr));
1616
1617                   if Source (Scan_Ptr) not in Graphic_Character then
1618                      if Source (Scan_Ptr) in Upper_Half_Character then
1619                         if Ada_Version = Ada_83 then
1620                            Error_Illegal_Character;
1621                         end if;
1622
1623                      else
1624                         Error_Illegal_Character;
1625                      end if;
1626                   end if;
1627
1628                   Code := Get_Char_Code (Source (Scan_Ptr));
1629                   Scan_Ptr := Scan_Ptr + 2;
1630                end if;
1631
1632                --  Fall through here with Scan_Ptr updated past the closing
1633                --  quote, and Code set to the Char_Code value for the literal
1634
1635                Accumulate_Checksum (''');
1636                Token := Tok_Char_Literal;
1637                Set_Character_Literal_Name (Code);
1638                Token_Name := Name_Find;
1639                Character_Code := Code;
1640                Post_Scan;
1641                return;
1642             end if;
1643          end Char_Literal_Case;
1644
1645          --  Right parenthesis
1646
1647          when ')' =>
1648             Accumulate_Checksum (')');
1649             Scan_Ptr := Scan_Ptr + 1;
1650             Token := Tok_Right_Paren;
1651             if Style_Check then Style.Check_Right_Paren; end if;
1652             return;
1653
1654          --  Right bracket or right brace, treated as right paren
1655
1656          when ']' | '}' =>
1657             Error_Msg_S ("illegal character, replaced by "")""");
1658             Scan_Ptr := Scan_Ptr + 1;
1659             Token := Tok_Right_Paren;
1660             return;
1661
1662          --  Slash (can be division operator or first character of not equal)
1663
1664          when '/' =>
1665             Accumulate_Checksum ('/');
1666
1667             if Double_Char_Token ('=') then
1668                Token := Tok_Not_Equal;
1669                return;
1670             else
1671                Scan_Ptr := Scan_Ptr + 1;
1672                Token := Tok_Slash;
1673                return;
1674             end if;
1675
1676          --  Semicolon
1677
1678          when ';' =>
1679             Accumulate_Checksum (';');
1680             Scan_Ptr := Scan_Ptr + 1;
1681             Token := Tok_Semicolon;
1682             if Style_Check then Style.Check_Semicolon; end if;
1683             return;
1684
1685          --  Vertical bar
1686
1687          when '|' => Vertical_Bar_Case : begin
1688             Accumulate_Checksum ('|');
1689
1690             --  Special check for || to give nice message
1691
1692             if Source (Scan_Ptr + 1) = '|' then
1693                Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1694                Scan_Ptr := Scan_Ptr + 2;
1695                Token := Tok_Or;
1696                return;
1697
1698             else
1699                Scan_Ptr := Scan_Ptr + 1;
1700                Token := Tok_Vertical_Bar;
1701                if Style_Check then Style.Check_Vertical_Bar; end if;
1702                return;
1703             end if;
1704          end Vertical_Bar_Case;
1705
1706          --  Exclamation, replacement character for vertical bar
1707
1708          when '!' => Exclamation_Case : begin
1709             Accumulate_Checksum ('!');
1710             Obsolescent_Check (Token_Ptr);
1711
1712             if Warn_On_Obsolescent_Feature then
1713                Error_Msg_S
1714                  ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1715                Error_Msg_S
1716                  ("\use ""'|"" instead?");
1717             end if;
1718
1719             if Source (Scan_Ptr + 1) = '=' then
1720                Error_Msg_S ("'!= should be /=");
1721                Scan_Ptr := Scan_Ptr + 2;
1722                Token := Tok_Not_Equal;
1723                return;
1724
1725             else
1726                Scan_Ptr := Scan_Ptr + 1;
1727                Token := Tok_Vertical_Bar;
1728                return;
1729             end if;
1730          end Exclamation_Case;
1731
1732          --  Plus
1733
1734          when '+' => Plus_Case : begin
1735             Accumulate_Checksum ('+');
1736             Scan_Ptr := Scan_Ptr + 1;
1737             Token := Tok_Plus;
1738             return;
1739          end Plus_Case;
1740
1741          --  Digits starting a numeric literal
1742
1743          when '0' .. '9' =>
1744             Nlit;
1745
1746             if Identifier_Char (Source (Scan_Ptr)) then
1747                Error_Msg_S
1748                  ("delimiter required between literal and identifier");
1749             end if;
1750             Post_Scan;
1751             return;
1752
1753          --  Lower case letters
1754
1755          when 'a' .. 'z' =>
1756             Name_Len := 1;
1757             Underline_Found := False;
1758             Name_Buffer (1) := Source (Scan_Ptr);
1759             Accumulate_Checksum (Name_Buffer (1));
1760             Scan_Ptr := Scan_Ptr + 1;
1761             goto Scan_Identifier;
1762
1763          --  Upper case letters
1764
1765          when 'A' .. 'Z' =>
1766             Name_Len := 1;
1767             Underline_Found := False;
1768             Name_Buffer (1) :=
1769               Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1770             Accumulate_Checksum (Name_Buffer (1));
1771             Scan_Ptr := Scan_Ptr + 1;
1772             goto Scan_Identifier;
1773
1774          --  Underline character
1775
1776          when '_' =>
1777             if Special_Characters ('_') then
1778                Token_Ptr := Scan_Ptr;
1779                Scan_Ptr := Scan_Ptr + 1;
1780                Token := Tok_Special;
1781                Special_Character := '_';
1782                return;
1783             end if;
1784
1785             Error_Msg_S ("identifier cannot start with underline");
1786             Name_Len := 1;
1787             Name_Buffer (1) := '_';
1788             Scan_Ptr := Scan_Ptr + 1;
1789             Underline_Found := False;
1790             goto Scan_Identifier;
1791
1792          --  Space (not possible, because we scanned past blanks)
1793
1794          when ' ' =>
1795             raise Program_Error;
1796
1797          --  Characters in top half of ASCII 8-bit chart
1798
1799          when Upper_Half_Character =>
1800
1801             --  Wide character case
1802
1803             if Upper_Half_Encoding then
1804                goto Scan_Wide_Character;
1805
1806             --  Otherwise we have OK Latin-1 character
1807
1808             else
1809                --  Upper half characters may possibly be identifier letters
1810                --  but can never be digits, so Identifier_Char can be used to
1811                --  test for a valid start of identifier character.
1812
1813                if Identifier_Char (Source (Scan_Ptr)) then
1814                   Name_Len := 0;
1815                   Underline_Found := False;
1816                   goto Scan_Identifier;
1817                else
1818                   Error_Illegal_Character;
1819                end if;
1820             end if;
1821
1822          when ESC =>
1823
1824             --  ESC character, possible start of identifier if wide characters
1825             --  using ESC encoding are allowed in identifiers, which we can
1826             --  tell by looking at the Identifier_Char flag for ESC, which is
1827             --  only true if these conditions are met. In Ada 2005 mode, may
1828             --  also be valid UTF_32 space or line terminator character.
1829
1830             if Identifier_Char (ESC) then
1831                Name_Len := 0;
1832                goto Scan_Wide_Character;
1833             else
1834                Error_Illegal_Character;
1835             end if;
1836
1837          --  Invalid control characters
1838
1839          when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS  | ASCII.SO |
1840               SI  | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1841               EM  | FS  | GS  | RS  | US  | DEL
1842          =>
1843             Error_Illegal_Character;
1844
1845          --  Invalid graphic characters
1846
1847          when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1848
1849             --  If Set_Special_Character has been called for this character,
1850             --  set Scans.Special_Character and return a Special token.
1851
1852             if Special_Characters (Source (Scan_Ptr)) then
1853                Token_Ptr := Scan_Ptr;
1854                Token := Tok_Special;
1855                Special_Character := Source (Scan_Ptr);
1856                Scan_Ptr := Scan_Ptr + 1;
1857                return;
1858
1859             --  Otherwise, this is an illegal character
1860
1861             else
1862                Error_Illegal_Character;
1863             end if;
1864
1865          --  End switch on non-blank character
1866
1867          end case;
1868
1869       --  End loop past format effectors. The exit from this loop is by
1870       --  executing a return statement following completion of token scan
1871       --  (control never falls out of this loop to the code which follows)
1872
1873       end loop;
1874
1875       --  Wide_Character scanning routine. On entry we have encountered the
1876       --  initial character of a wide character sequence.
1877
1878       <<Scan_Wide_Character>>
1879
1880          declare
1881             Code : Char_Code;
1882             Cat  : Category;
1883             Err  : Boolean;
1884
1885          begin
1886             Wptr := Scan_Ptr;
1887             Scan_Wide (Source, Scan_Ptr, Code, Err);
1888
1889             --  If bad wide character, signal error and continue scan
1890
1891             if Err then
1892                Error_Illegal_Wide_Character;
1893                goto Scan_Next_Character;
1894             end if;
1895
1896             Cat := Get_Category (UTF_32 (Code));
1897
1898             --  If OK letter, reset scan ptr and go scan identifier
1899
1900             if Is_UTF_32_Letter (Cat) then
1901                Scan_Ptr := Wptr;
1902                Name_Len := 0;
1903                Underline_Found := False;
1904                goto Scan_Identifier;
1905
1906             --  If OK wide space, ignore and keep scanning (we do not include
1907             --  any ignored spaces in checksum)
1908
1909             elsif Is_UTF_32_Space (Cat) then
1910                goto Scan_Next_Character;
1911
1912             --  If OK wide line terminator, terminate current line
1913
1914             elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1915                Scan_Ptr := Wptr;
1916                goto Scan_Line_Terminator;
1917
1918             --  Punctuation is an error (at start of identifier)
1919
1920             elsif Is_UTF_32_Punctuation (Cat) then
1921                Error_Msg
1922                  ("identifier cannot start with punctuation", Wptr);
1923                Scan_Ptr := Wptr;
1924                Name_Len := 0;
1925                Underline_Found := False;
1926                goto Scan_Identifier;
1927
1928             --  Mark character is an error (at start of identifer)
1929
1930             elsif Is_UTF_32_Mark (Cat) then
1931                Error_Msg
1932                  ("identifier cannot start with mark character", Wptr);
1933                Scan_Ptr := Wptr;
1934                Name_Len := 0;
1935                Underline_Found := False;
1936                goto Scan_Identifier;
1937
1938             --  Other format character is an error (at start of identifer)
1939
1940             elsif Is_UTF_32_Other (Cat) then
1941                Error_Msg
1942                  ("identifier cannot start with other format character", Wptr);
1943                Scan_Ptr := Wptr;
1944                Name_Len := 0;
1945                Underline_Found := False;
1946                goto Scan_Identifier;
1947
1948             --  Extended digit character is an error. Could be bad start of
1949             --  identifier or bad literal. Not worth doing too much to try to
1950             --  distinguish these cases, but we will do a little bit.
1951
1952             elsif Is_UTF_32_Digit (Cat) then
1953                Error_Msg
1954                  ("identifier cannot start with digit character", Wptr);
1955                Scan_Ptr := Wptr;
1956                Name_Len := 0;
1957                Underline_Found := False;
1958                goto Scan_Identifier;
1959
1960             --  All other wide characters are illegal here
1961
1962             else
1963                Error_Illegal_Wide_Character;
1964                goto Scan_Next_Character;
1965             end if;
1966          end;
1967
1968       --  Routine to scan line terminator. On entry Scan_Ptr points to a
1969       --  character which is one of FF,LR,CR,VT, or one of the wide characters
1970       --  that is treated as a line termiantor.
1971
1972       <<Scan_Line_Terminator>>
1973
1974          --  Check line too long
1975
1976          Check_End_Of_Line;
1977
1978          --  Set Token_Ptr, if End_Of_Line is a token, for the case when it is
1979          --  a physical line.
1980
1981          if End_Of_Line_Is_Token then
1982             Token_Ptr := Scan_Ptr;
1983          end if;
1984
1985          declare
1986             Physical : Boolean;
1987
1988          begin
1989             Skip_Line_Terminators (Scan_Ptr, Physical);
1990
1991             --  If we are at start of physical line, update scan pointers to
1992             --  reflect the start of the new line.
1993
1994             if Physical then
1995                Current_Line_Start       := Scan_Ptr;
1996                Start_Column             := Set_Start_Column;
1997                First_Non_Blank_Location := Scan_Ptr;
1998
1999                --  If End_Of_Line is a token, we return it as it is a
2000                --  physical line.
2001
2002                if End_Of_Line_Is_Token then
2003                   Token := Tok_End_Of_Line;
2004                   return;
2005                end if;
2006             end if;
2007          end;
2008
2009          goto Scan_Next_Character;
2010
2011       --  Identifier scanning routine. On entry, some initial characters of
2012       --  the identifier may have already been stored in Name_Buffer. If so,
2013       --  Name_Len has the number of characters stored. otherwise Name_Len is
2014       --  set to zero on entry. Underline_Found is also set False on entry.
2015
2016       <<Scan_Identifier>>
2017
2018          --  This loop scans as fast as possible past lower half letters and
2019          --  digits, which we expect to be the most common characters.
2020
2021          loop
2022             if Source (Scan_Ptr) in 'a' .. 'z'
2023               or else Source (Scan_Ptr) in '0' .. '9'
2024             then
2025                Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2026                Accumulate_Checksum (Source (Scan_Ptr));
2027
2028             elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2029                Name_Buffer (Name_Len + 1) :=
2030                  Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2031                Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2032
2033             else
2034                exit;
2035             end if;
2036
2037             Underline_Found := False;
2038             Scan_Ptr := Scan_Ptr + 1;
2039             Name_Len := Name_Len + 1;
2040          end loop;
2041
2042          --  If we fall through, then we have encountered either an underline
2043          --  character, or an extended identifier character (i.e. one from the
2044          --  upper half), or a wide character, or an identifier terminator. The
2045          --  initial test speeds us up in the most common case where we have
2046          --  an identifier terminator. Note that ESC is an identifier character
2047          --  only if a wide character encoding method that uses ESC encoding
2048          --  is active, so if we find an ESC character we know that we have a
2049          --  wide character.
2050
2051          if Identifier_Char (Source (Scan_Ptr)) then
2052
2053             --  Case of underline
2054
2055             if Source (Scan_Ptr) = '_' then
2056                Accumulate_Checksum ('_');
2057
2058                if Underline_Found then
2059                   Error_No_Double_Underline;
2060                else
2061                   Underline_Found := True;
2062                   Name_Len := Name_Len + 1;
2063                   Name_Buffer (Name_Len) := '_';
2064                end if;
2065
2066                Scan_Ptr := Scan_Ptr + 1;
2067                goto Scan_Identifier;
2068
2069             --  Upper half character
2070
2071             elsif Source (Scan_Ptr) in Upper_Half_Character
2072               and then not Upper_Half_Encoding
2073             then
2074                Accumulate_Checksum (Source (Scan_Ptr));
2075                Store_Encoded_Character
2076                  (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2077                Scan_Ptr := Scan_Ptr + 1;
2078                Underline_Found := False;
2079                goto Scan_Identifier;
2080
2081             --  Left bracket not followed by a quote terminates an identifier.
2082             --  This is an error, but we don't want to give a junk error msg
2083             --  about wide characters in this case!
2084
2085             elsif Source (Scan_Ptr) = '['
2086               and then Source (Scan_Ptr + 1) /= '"'
2087             then
2088                null;
2089
2090             --  We know we have a wide character encoding here (the current
2091             --  character is either ESC, left bracket, or an upper half
2092             --  character depending on the encoding method).
2093
2094             else
2095                --  Scan out the wide character and insert the appropriate
2096                --  encoding into the name table entry for the identifier.
2097
2098                declare
2099                   Code : Char_Code;
2100                   Err  : Boolean;
2101                   Chr  : Character;
2102                   Cat  : Category;
2103
2104                begin
2105                   Wptr := Scan_Ptr;
2106                   Scan_Wide (Source, Scan_Ptr, Code, Err);
2107
2108                   --  If error, signal error
2109
2110                   if Err then
2111                      Error_Illegal_Wide_Character;
2112
2113                   --  If the character scanned is a normal identifier
2114                   --  character, then we treat it that way.
2115
2116                   elsif In_Character_Range (Code)
2117                     and then Identifier_Char (Get_Character (Code))
2118                   then
2119                      Chr := Get_Character (Code);
2120                      Accumulate_Checksum (Chr);
2121                      Store_Encoded_Character
2122                        (Get_Char_Code (Fold_Lower (Chr)));
2123                      Underline_Found := False;
2124
2125                   --  Here if not a normal identifier character
2126
2127                   else
2128                      --  Make sure we are allowing wide characters in
2129                      --  identifiers. Note that we allow wide character
2130                      --  notation for an OK identifier character. This in
2131                      --  particular allows bracket or other notation to be
2132                      --  used for upper half letters.
2133
2134                      --  Wide characters are always allowed in Ada 2005
2135
2136                      if Identifier_Character_Set /= 'w'
2137                        and then Ada_Version < Ada_05
2138                      then
2139                         Error_Msg
2140                        ("wide character not allowed in identifier", Wptr);
2141                      end if;
2142
2143                      Cat := Get_Category (UTF_32 (Code));
2144
2145                      --  If OK letter, store it folding to upper case. Note
2146                      --  that we include the folded letter in the checksum.
2147
2148                      if Is_UTF_32_Letter (Cat) then
2149                         Code :=
2150                           Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
2151                         Accumulate_Checksum (Code);
2152                         Store_Encoded_Character (Code);
2153                         Underline_Found := False;
2154
2155                      --  If OK extended digit or mark, then store it
2156
2157                      elsif Is_UTF_32_Digit (Cat)
2158                        or else Is_UTF_32_Mark (Cat)
2159                      then
2160                         Accumulate_Checksum (Code);
2161                         Store_Encoded_Character (Code);
2162                         Underline_Found := False;
2163
2164                      --  Wide punctuation is also stored, but counts as an
2165                      --  underline character for error checking purposes.
2166
2167                      elsif Is_UTF_32_Punctuation (Cat) then
2168                         Accumulate_Checksum (Code);
2169
2170                         if Underline_Found then
2171                            declare
2172                               Cend : constant Source_Ptr := Scan_Ptr;
2173                            begin
2174                               Scan_Ptr := Wptr;
2175                               Error_No_Double_Underline;
2176                               Scan_Ptr := Cend;
2177                            end;
2178
2179                         else
2180                            Store_Encoded_Character (Code);
2181                            Underline_Found := True;
2182                         end if;
2183
2184                      --  Wide character in Unicode cateogory "Other, Format"
2185                      --  is accepted in an identifier, but is ignored and not
2186                      --  stored. It seems reasonable to exclude it from the
2187                      --  checksum.
2188
2189                      --  Note that it is correct (see AI-395) to simply strip
2190                      --  other format characters, before testing for double
2191                      --  underlines, or for reserved words).
2192
2193                      elsif Is_UTF_32_Other (Cat) then
2194                         null;
2195
2196                      --  Wide character in category Separator,Space terminates
2197
2198                      elsif Is_UTF_32_Space (Cat) then
2199                         goto Scan_Identifier_Complete;
2200
2201                      --  Any other wide character is not acceptable
2202
2203                      else
2204                         Error_Msg
2205                           ("invalid wide character in identifier", Wptr);
2206                      end if;
2207                   end if;
2208
2209                   goto Scan_Identifier;
2210                end;
2211             end if;
2212          end if;
2213
2214       --  Scan of identifier is complete. The identifier is stored in
2215       --  Name_Buffer, and Scan_Ptr points past the last character.
2216
2217       <<Scan_Identifier_Complete>>
2218          Token_Name := Name_Find;
2219
2220          --  Check for identifier ending with underline or punctuation char
2221
2222          if Underline_Found then
2223             Underline_Found := False;
2224
2225             if Source (Scan_Ptr - 1) = '_' then
2226                Error_Msg
2227                  ("identifier cannot end with underline", Scan_Ptr - 1);
2228             else
2229                Error_Msg
2230                  ("identifier cannot end with punctuation character", Wptr);
2231             end if;
2232          end if;
2233
2234          --  Here is where we check if it was a keyword
2235
2236          if Is_Keyword_Name (Token_Name) then
2237             Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2238
2239             --  Deal with possible style check for non-lower case keyword, but
2240             --  we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2241             --  this purpose if they appear as attribute designators. Actually
2242             --  we only check the first character for speed.
2243
2244             --  Ada 2005 (AI-284): Do not apply the style check in case of
2245             --  "pragma Interface"
2246
2247             --  Ada 2005 (AI-340): Do not apply the style check in case of
2248             --  MOD attribute.
2249
2250             if Style_Check
2251               and then Source (Token_Ptr) <= 'Z'
2252               and then (Prev_Token /= Tok_Apostrophe
2253                           or else
2254                             (Token /= Tok_Access and then
2255                              Token /= Tok_Delta  and then
2256                              Token /= Tok_Digits and then
2257                              Token /= Tok_Mod    and then
2258                              Token /= Tok_Range))
2259               and then (Token /= Tok_Interface
2260                           or else
2261                             (Token = Tok_Interface
2262                                and then Prev_Token /= Tok_Pragma))
2263             then
2264                Style.Non_Lower_Case_Keyword;
2265             end if;
2266
2267             --  We must reset Token_Name since this is not an identifier and
2268             --  if we leave Token_Name set, the parser gets confused because
2269             --  it thinks it is dealing with an identifier instead of the
2270             --  corresponding keyword.
2271
2272             Token_Name := No_Name;
2273             Accumulate_Token_Checksum;
2274             return;
2275
2276          --  It is an identifier after all
2277
2278          else
2279             Token := Tok_Identifier;
2280             Accumulate_Token_Checksum;
2281             Post_Scan;
2282             return;
2283          end if;
2284    end Scan;
2285
2286    --------------------------
2287    -- Set_Comment_As_Token --
2288    --------------------------
2289
2290    procedure Set_Comment_As_Token (Value : Boolean) is
2291    begin
2292       Comment_Is_Token := Value;
2293    end Set_Comment_As_Token;
2294
2295    ------------------------------
2296    -- Set_End_Of_Line_As_Token --
2297    ------------------------------
2298
2299    procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2300    begin
2301       End_Of_Line_Is_Token := Value;
2302    end Set_End_Of_Line_As_Token;
2303
2304    ---------------------------
2305    -- Set_Special_Character --
2306    ---------------------------
2307
2308    procedure Set_Special_Character (C : Character) is
2309    begin
2310       case C is
2311          when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2312             Special_Characters (C) := True;
2313
2314          when others =>
2315             null;
2316       end case;
2317    end Set_Special_Character;
2318
2319    ----------------------
2320    -- Set_Start_Column --
2321    ----------------------
2322
2323    --  Note: it seems at first glance a little expensive to compute this value
2324    --  for every source line (since it is certainly not used for all source
2325    --  lines). On the other hand, it doesn't take much more work to skip past
2326    --  the initial white space on the line counting the columns than it would
2327    --  to scan past the white space using the standard scanning circuits.
2328
2329    function Set_Start_Column return Column_Number is
2330       Start_Column : Column_Number := 0;
2331
2332    begin
2333       --  Outer loop scans past horizontal tab characters
2334
2335       Tabs_Loop : loop
2336
2337          --  Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2338          --  past the blanks and adjusting Start_Column to account for them.
2339
2340          Blanks_Loop : loop
2341             if Source (Scan_Ptr) = ' ' then
2342                if Source (Scan_Ptr + 1) = ' ' then
2343                   if Source (Scan_Ptr + 2) = ' ' then
2344                      if Source (Scan_Ptr + 3) = ' ' then
2345                         if Source (Scan_Ptr + 4) = ' ' then
2346                            if Source (Scan_Ptr + 5) = ' ' then
2347                               if Source (Scan_Ptr + 6) = ' ' then
2348                                  Scan_Ptr := Scan_Ptr + 7;
2349                                  Start_Column := Start_Column + 7;
2350                               else
2351                                  Scan_Ptr := Scan_Ptr + 6;
2352                                  Start_Column := Start_Column + 6;
2353                                  exit Blanks_Loop;
2354                               end if;
2355                            else
2356                               Scan_Ptr := Scan_Ptr + 5;
2357                               Start_Column := Start_Column + 5;
2358                               exit Blanks_Loop;
2359                            end if;
2360                         else
2361                            Scan_Ptr := Scan_Ptr + 4;
2362                            Start_Column := Start_Column + 4;
2363                            exit Blanks_Loop;
2364                         end if;
2365                      else
2366                         Scan_Ptr := Scan_Ptr + 3;
2367                         Start_Column := Start_Column + 3;
2368                         exit Blanks_Loop;
2369                      end if;
2370                   else
2371                      Scan_Ptr := Scan_Ptr + 2;
2372                      Start_Column := Start_Column + 2;
2373                      exit Blanks_Loop;
2374                   end if;
2375                else
2376                   Scan_Ptr := Scan_Ptr + 1;
2377                   Start_Column := Start_Column + 1;
2378                   exit Blanks_Loop;
2379                end if;
2380             else
2381                exit Blanks_Loop;
2382             end if;
2383          end loop Blanks_Loop;
2384
2385          --  Outer loop keeps going only if a horizontal tab follows
2386
2387          if Source (Scan_Ptr) = HT then
2388             if Style_Check then Style.Check_HT; end if;
2389             Scan_Ptr := Scan_Ptr + 1;
2390             Start_Column := (Start_Column / 8) * 8 + 8;
2391          else
2392             exit Tabs_Loop;
2393          end if;
2394
2395       end loop Tabs_Loop;
2396
2397       return Start_Column;
2398    end Set_Start_Column;
2399
2400 end Scng;