* gimplify.c (find_single_pointer_decl_1): New static function.
[official-gcc.git] / gcc / ada / scng.adb
blob9d3483e82515b0a007d1bf23beb9eef8398ccd43
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S C N G --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
21 -- --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
24 -- --
25 ------------------------------------------------------------------------------
27 with Csets; use Csets;
28 with Err_Vars; use Err_Vars;
29 with Namet; use Namet;
30 with Opt; use Opt;
31 with Scans; use Scans;
32 with Sinput; use Sinput;
33 with Snames; use Snames;
34 with Stringt; use Stringt;
35 with Stylesw; use Stylesw;
36 with Uintp; use Uintp;
37 with Urealp; use Urealp;
38 with Widechar; use Widechar;
40 with System.CRC32;
41 with System.WCh_Con; use System.WCh_Con;
43 with GNAT.UTF_32; use GNAT.UTF_32;
45 package body Scng is
47 use ASCII;
48 -- Make control characters visible
50 Special_Characters : array (Character) of Boolean := (others => False);
51 -- For characters that are Special token, the value is True
53 Comment_Is_Token : Boolean := False;
54 -- True if comments are tokens
56 End_Of_Line_Is_Token : Boolean := False;
57 -- True if End_Of_Line is a token
59 -----------------------
60 -- Local Subprograms --
61 -----------------------
63 procedure Accumulate_Token_Checksum;
64 pragma Inline (Accumulate_Token_Checksum);
66 procedure Accumulate_Checksum (C : Character);
67 pragma Inline (Accumulate_Checksum);
68 -- This routine accumulates the checksum given character C. During the
69 -- scanning of a source file, this routine is called with every character
70 -- in the source, excluding blanks, and all control characters (except
71 -- that ESC is included in the checksum). Upper case letters not in string
72 -- literals are folded by the caller. See Sinput spec for the documentation
73 -- of the checksum algorithm. Note: checksum values are only used if we
74 -- generate code, so it is not necessary to worry about making the right
75 -- sequence of calls in any error situation.
77 procedure Accumulate_Checksum (C : Char_Code);
78 pragma Inline (Accumulate_Checksum);
79 -- This version is identical, except that the argument, C, is a character
80 -- code value instead of a character. This is used when wide characters
81 -- are scanned. We use the character code rather than the ASCII characters
82 -- so that the checksum is independent of wide character encoding method.
84 procedure Initialize_Checksum;
85 pragma Inline (Initialize_Checksum);
86 -- Initialize checksum value
88 -------------------------
89 -- Accumulate_Checksum --
90 -------------------------
92 procedure Accumulate_Checksum (C : Character) is
93 begin
94 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
95 end Accumulate_Checksum;
97 procedure Accumulate_Checksum (C : Char_Code) is
98 begin
99 if C > 16#FFFF# then
100 Accumulate_Checksum (Character'Val (C / 2 ** 24));
101 Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
102 Accumulate_Checksum (Character'Val ((C / 256) mod 256));
103 else
104 Accumulate_Checksum (Character'Val (C / 256));
105 end if;
107 Accumulate_Checksum (Character'Val (C mod 256));
108 end Accumulate_Checksum;
110 -------------------------------
111 -- Accumulate_Token_Checksum --
112 -------------------------------
114 procedure Accumulate_Token_Checksum is
115 begin
116 System.CRC32.Update
117 (System.CRC32.CRC32 (Checksum),
118 Character'Val (Token_Type'Pos (Token)));
119 end Accumulate_Token_Checksum;
121 ----------------------------
122 -- Determine_Token_Casing --
123 ----------------------------
125 function Determine_Token_Casing return Casing_Type is
126 begin
127 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
128 end Determine_Token_Casing;
130 -------------------------
131 -- Initialize_Checksum --
132 -------------------------
134 procedure Initialize_Checksum is
135 begin
136 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
137 end Initialize_Checksum;
139 ------------------------
140 -- Initialize_Scanner --
141 ------------------------
143 procedure Initialize_Scanner
144 (Unit : Unit_Number_Type;
145 Index : Source_File_Index)
147 procedure Set_Reserved (N : Name_Id; T : Token_Type);
148 pragma Inline (Set_Reserved);
149 -- Set given name as a reserved keyword (T is the corresponding token)
151 -------------
152 -- Set_NTB --
153 -------------
155 procedure Set_Reserved (N : Name_Id; T : Token_Type) is
156 begin
157 -- Set up Token_Type values in Names Table entries for reserved
158 -- keywords We use the Pos value of the Token_Type value. Note we
159 -- rely on the fact that Token_Type'Val (0) is not a reserved word!
161 Set_Name_Table_Byte (N, Token_Type'Pos (T));
162 end Set_Reserved;
164 -- Start of processing for Initialize_Scanner
166 begin
167 -- Establish reserved words
169 Set_Reserved (Name_Abort, Tok_Abort);
170 Set_Reserved (Name_Abs, Tok_Abs);
171 Set_Reserved (Name_Abstract, Tok_Abstract);
172 Set_Reserved (Name_Accept, Tok_Accept);
173 Set_Reserved (Name_Access, Tok_Access);
174 Set_Reserved (Name_And, Tok_And);
175 Set_Reserved (Name_Aliased, Tok_Aliased);
176 Set_Reserved (Name_All, Tok_All);
177 Set_Reserved (Name_Array, Tok_Array);
178 Set_Reserved (Name_At, Tok_At);
179 Set_Reserved (Name_Begin, Tok_Begin);
180 Set_Reserved (Name_Body, Tok_Body);
181 Set_Reserved (Name_Case, Tok_Case);
182 Set_Reserved (Name_Constant, Tok_Constant);
183 Set_Reserved (Name_Declare, Tok_Declare);
184 Set_Reserved (Name_Delay, Tok_Delay);
185 Set_Reserved (Name_Delta, Tok_Delta);
186 Set_Reserved (Name_Digits, Tok_Digits);
187 Set_Reserved (Name_Do, Tok_Do);
188 Set_Reserved (Name_Else, Tok_Else);
189 Set_Reserved (Name_Elsif, Tok_Elsif);
190 Set_Reserved (Name_End, Tok_End);
191 Set_Reserved (Name_Entry, Tok_Entry);
192 Set_Reserved (Name_Exception, Tok_Exception);
193 Set_Reserved (Name_Exit, Tok_Exit);
194 Set_Reserved (Name_For, Tok_For);
195 Set_Reserved (Name_Function, Tok_Function);
196 Set_Reserved (Name_Generic, Tok_Generic);
197 Set_Reserved (Name_Goto, Tok_Goto);
198 Set_Reserved (Name_If, Tok_If);
199 Set_Reserved (Name_In, Tok_In);
200 Set_Reserved (Name_Is, Tok_Is);
201 Set_Reserved (Name_Limited, Tok_Limited);
202 Set_Reserved (Name_Loop, Tok_Loop);
203 Set_Reserved (Name_Mod, Tok_Mod);
204 Set_Reserved (Name_New, Tok_New);
205 Set_Reserved (Name_Not, Tok_Not);
206 Set_Reserved (Name_Null, Tok_Null);
207 Set_Reserved (Name_Of, Tok_Of);
208 Set_Reserved (Name_Or, Tok_Or);
209 Set_Reserved (Name_Others, Tok_Others);
210 Set_Reserved (Name_Out, Tok_Out);
211 Set_Reserved (Name_Package, Tok_Package);
212 Set_Reserved (Name_Pragma, Tok_Pragma);
213 Set_Reserved (Name_Private, Tok_Private);
214 Set_Reserved (Name_Procedure, Tok_Procedure);
215 Set_Reserved (Name_Protected, Tok_Protected);
216 Set_Reserved (Name_Raise, Tok_Raise);
217 Set_Reserved (Name_Range, Tok_Range);
218 Set_Reserved (Name_Record, Tok_Record);
219 Set_Reserved (Name_Rem, Tok_Rem);
220 Set_Reserved (Name_Renames, Tok_Renames);
221 Set_Reserved (Name_Requeue, Tok_Requeue);
222 Set_Reserved (Name_Return, Tok_Return);
223 Set_Reserved (Name_Reverse, Tok_Reverse);
224 Set_Reserved (Name_Select, Tok_Select);
225 Set_Reserved (Name_Separate, Tok_Separate);
226 Set_Reserved (Name_Subtype, Tok_Subtype);
227 Set_Reserved (Name_Tagged, Tok_Tagged);
228 Set_Reserved (Name_Task, Tok_Task);
229 Set_Reserved (Name_Terminate, Tok_Terminate);
230 Set_Reserved (Name_Then, Tok_Then);
231 Set_Reserved (Name_Type, Tok_Type);
232 Set_Reserved (Name_Until, Tok_Until);
233 Set_Reserved (Name_Use, Tok_Use);
234 Set_Reserved (Name_When, Tok_When);
235 Set_Reserved (Name_While, Tok_While);
236 Set_Reserved (Name_With, Tok_With);
237 Set_Reserved (Name_Xor, Tok_Xor);
239 -- Ada 2005 reserved words
241 Set_Reserved (Name_Interface, Tok_Interface);
242 Set_Reserved (Name_Overriding, Tok_Overriding);
243 Set_Reserved (Name_Synchronized, Tok_Synchronized);
245 -- Initialize scan control variables
247 Current_Source_File := Index;
248 Source := Source_Text (Current_Source_File);
249 Current_Source_Unit := Unit;
250 Scan_Ptr := Source_First (Current_Source_File);
251 Token := No_Token;
252 Token_Ptr := Scan_Ptr;
253 Current_Line_Start := Scan_Ptr;
254 Token_Node := Empty;
255 Token_Name := No_Name;
256 Start_Column := Set_Start_Column;
257 First_Non_Blank_Location := Scan_Ptr;
259 Initialize_Checksum;
260 Wide_Char_Byte_Count := 0;
262 -- Do not call Scan, otherwise the License stuff does not work in Scn
264 end Initialize_Scanner;
266 ------------------------------
267 -- Reset_Special_Characters --
268 ------------------------------
270 procedure Reset_Special_Characters is
271 begin
272 Special_Characters := (others => False);
273 end Reset_Special_Characters;
275 ----------
276 -- Scan --
277 ----------
279 procedure Scan is
281 Start_Of_Comment : Source_Ptr;
282 -- Record start of comment position
284 Underline_Found : Boolean;
285 -- During scanning of an identifier, set to True if last character
286 -- scanned was an underline or other punctuation character. This
287 -- is used to flag the error of two underlines/punctuations in a
288 -- row or ending an identifier with a underline/punctuation. Here
289 -- punctuation means any UTF_32 character in the Unicode category
290 -- Punctuation,Connector.
292 Wptr : Source_Ptr;
293 -- Used to remember start of last wide character scanned
295 procedure Check_End_Of_Line;
296 -- Called when end of line encountered. Checks that line is not too
297 -- long, and that other style checks for the end of line are met.
299 function Double_Char_Token (C : Character) return Boolean;
300 -- This function is used for double character tokens like := or <>. It
301 -- checks if the character following Source (Scan_Ptr) is C, and if so
302 -- bumps Scan_Ptr past the pair of characters and returns True. A space
303 -- between the two characters is also recognized with an appropriate
304 -- error message being issued. If C is not present, False is returned.
305 -- Note that Double_Char_Token can only be used for tokens defined in
306 -- the Ada syntax (it's use for error cases like && is not appropriate
307 -- since we do not want a junk message for a case like &-space-&).
309 procedure Error_Illegal_Character;
310 -- Give illegal character error, Scan_Ptr points to character. On
311 -- return, Scan_Ptr is bumped past the illegal character.
313 procedure Error_Illegal_Wide_Character;
314 -- Give illegal wide character message. On return, Scan_Ptr is bumped
315 -- past the illegal character, which may still leave us pointing to
316 -- junk, not much we can do if the escape sequence is messed up!
318 procedure Error_Long_Line;
319 -- Signal error of excessively long line
321 procedure Error_No_Double_Underline;
322 -- Signal error of two underline or punctuation characters in a row.
323 -- Called with Scan_Ptr pointing to second underline/punctuation char.
325 procedure Nlit;
326 -- This is the procedure for scanning out numeric literals. On entry,
327 -- Scan_Ptr points to the digit that starts the numeric literal (the
328 -- checksum for this character has not been accumulated yet). On return
329 -- Scan_Ptr points past the last character of the numeric literal, Token
330 -- and Token_Node are set appropriately, and the checksum is updated.
332 procedure Slit;
333 -- This is the procedure for scanning out string literals. On entry,
334 -- Scan_Ptr points to the opening string quote (the checksum for this
335 -- character has not been accumulated yet). On return Scan_Ptr points
336 -- past the closing quote of the string literal, Token and Token_Node
337 -- are set appropriately, and the checksum is upated.
339 -----------------------
340 -- Check_End_Of_Line --
341 -----------------------
343 procedure Check_End_Of_Line is
344 Len : constant Int :=
345 Int (Scan_Ptr) -
346 Int (Current_Line_Start) -
347 Wide_Char_Byte_Count;
349 begin
350 if Style_Check then
351 Style.Check_Line_Terminator (Len);
352 end if;
354 -- Deal with checking maximum line length
356 if Style_Check and Style_Check_Max_Line_Length then
357 Style.Check_Line_Max_Length (Len);
359 -- If style checking is inactive, check maximum line length against
360 -- standard value. Note that we take this from Opt.Max_Line_Length
361 -- rather than Hostparm.Max_Line_Length because we do not want to
362 -- impose any limit during scanning of configuration pragma files,
363 -- and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
364 -- is reset to Column_Number'Max during scanning of such files.
366 elsif Len > Opt.Max_Line_Length then
367 Error_Long_Line;
368 end if;
370 -- Reset wide character byte count for next line
372 Wide_Char_Byte_Count := 0;
373 end Check_End_Of_Line;
375 -----------------------
376 -- Double_Char_Token --
377 -----------------------
379 function Double_Char_Token (C : Character) return Boolean is
380 begin
381 if Source (Scan_Ptr + 1) = C then
382 Accumulate_Checksum (C);
383 Scan_Ptr := Scan_Ptr + 2;
384 return True;
386 elsif Source (Scan_Ptr + 1) = ' '
387 and then Source (Scan_Ptr + 2) = C
388 then
389 Scan_Ptr := Scan_Ptr + 1;
390 Error_Msg_S ("no space allowed here");
391 Scan_Ptr := Scan_Ptr + 2;
392 return True;
394 else
395 return False;
396 end if;
397 end Double_Char_Token;
399 -----------------------------
400 -- Error_Illegal_Character --
401 -----------------------------
403 procedure Error_Illegal_Character is
404 begin
405 Error_Msg_S ("illegal character");
406 Scan_Ptr := Scan_Ptr + 1;
407 end Error_Illegal_Character;
409 ----------------------------------
410 -- Error_Illegal_Wide_Character --
411 ----------------------------------
413 procedure Error_Illegal_Wide_Character is
414 begin
415 Error_Msg ("illegal wide character", Wptr);
416 end Error_Illegal_Wide_Character;
418 ---------------------
419 -- Error_Long_Line --
420 ---------------------
422 procedure Error_Long_Line is
423 begin
424 Error_Msg
425 ("this line is too long",
426 Current_Line_Start + Source_Ptr (Opt.Max_Line_Length));
427 end Error_Long_Line;
429 -------------------------------
430 -- Error_No_Double_Underline --
431 -------------------------------
433 procedure Error_No_Double_Underline is
434 begin
435 Underline_Found := False;
437 -- There are four cases, and we special case the messages
439 if Source (Scan_Ptr) = '_' then
440 if Source (Scan_Ptr - 1) = '_' then
441 Error_Msg_S
442 ("two consecutive underlines not permitted");
443 else
444 Error_Msg_S
445 ("underline cannot follow punctuation character");
446 end if;
448 else
449 if Source (Scan_Ptr - 1) = '_' then
450 Error_Msg_S
451 ("punctuation character cannot follow underline");
452 else
453 Error_Msg_S
454 ("two consecutive punctuation characters not permitted");
455 end if;
456 end if;
457 end Error_No_Double_Underline;
459 ----------
460 -- Nlit --
461 ----------
463 procedure Nlit is
465 C : Character;
466 -- Current source program character
468 Base_Char : Character;
469 -- Either # or : (character at start of based number)
471 Base : Int;
472 -- Value of base
474 UI_Base : Uint;
475 -- Value of base in Uint format
477 UI_Int_Value : Uint;
478 -- Value of integer scanned by Scan_Integer in Uint format
480 UI_Num_Value : Uint;
481 -- Value of integer in numeric value being scanned
483 Scale : Int;
484 -- Scale value for real literal
486 UI_Scale : Uint;
487 -- Scale in Uint format
489 Exponent_Is_Negative : Boolean;
490 -- Set true for negative exponent
492 Extended_Digit_Value : Int;
493 -- Extended digit value
495 Point_Scanned : Boolean;
496 -- Flag for decimal point scanned in numeric literal
498 -----------------------
499 -- Local Subprograms --
500 -----------------------
502 procedure Error_Digit_Expected;
503 -- Signal error of bad digit, Scan_Ptr points to the location at
504 -- which the digit was expected on input, and is unchanged on return.
506 procedure Scan_Integer;
507 -- Procedure to scan integer literal. On entry, Scan_Ptr points to a
508 -- digit, on exit Scan_Ptr points past the last character of the
509 -- integer.
511 -- For each digit encountered, UI_Int_Value is multiplied by 10, and
512 -- the value of the digit added to the result. In addition, the
513 -- value in Scale is decremented by one for each actual digit
514 -- scanned.
516 --------------------------
517 -- Error_Digit_Expected --
518 --------------------------
520 procedure Error_Digit_Expected is
521 begin
522 Error_Msg_S ("digit expected");
523 end Error_Digit_Expected;
525 ------------------
526 -- Scan_Integer --
527 ------------------
529 procedure Scan_Integer is
530 C : Character;
531 -- Next character scanned
533 begin
534 C := Source (Scan_Ptr);
536 -- Loop through digits (allowing underlines)
538 loop
539 Accumulate_Checksum (C);
540 UI_Int_Value :=
541 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
542 Scan_Ptr := Scan_Ptr + 1;
543 Scale := Scale - 1;
544 C := Source (Scan_Ptr);
546 -- Case of underline encountered
548 if C = '_' then
550 -- We do not accumulate the '_' in the checksum, so that
551 -- 1_234 is equivalent to 1234, and does not trigger
552 -- compilation for "minimal recompilation" (gnatmake -m).
554 loop
555 Scan_Ptr := Scan_Ptr + 1;
556 C := Source (Scan_Ptr);
557 exit when C /= '_';
558 Error_No_Double_Underline;
559 end loop;
561 if C not in '0' .. '9' then
562 Error_Digit_Expected;
563 exit;
564 end if;
566 else
567 exit when C not in '0' .. '9';
568 end if;
569 end loop;
570 end Scan_Integer;
572 -- Start of Processing for Nlit
574 begin
575 Base := 10;
576 UI_Base := Uint_10;
577 UI_Int_Value := Uint_0;
578 Scale := 0;
579 Scan_Integer;
580 Scale := 0;
581 Point_Scanned := False;
582 UI_Num_Value := UI_Int_Value;
584 -- Various possibilities now for continuing the literal are period,
585 -- E/e (for exponent), or :/# (for based literal).
587 Scale := 0;
588 C := Source (Scan_Ptr);
590 if C = '.' then
592 -- Scan out point, but do not scan past .. which is a range
593 -- sequence, and must not be eaten up scanning a numeric literal.
595 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
596 Accumulate_Checksum ('.');
598 if Point_Scanned then
599 Error_Msg_S ("duplicate point ignored");
600 end if;
602 Point_Scanned := True;
603 Scan_Ptr := Scan_Ptr + 1;
604 C := Source (Scan_Ptr);
606 if C not in '0' .. '9' then
607 Error_Msg
608 ("real literal cannot end with point", Scan_Ptr - 1);
609 else
610 Scan_Integer;
611 UI_Num_Value := UI_Int_Value;
612 end if;
613 end loop;
615 -- Based literal case. The base is the value we already scanned.
616 -- In the case of colon, we insist that the following character
617 -- is indeed an extended digit or a period. This catches a number
618 -- of common errors, as well as catching the well known tricky
619 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
621 elsif C = '#'
622 or else (C = ':' and then
623 (Source (Scan_Ptr + 1) = '.'
624 or else
625 Source (Scan_Ptr + 1) in '0' .. '9'
626 or else
627 Source (Scan_Ptr + 1) in 'A' .. 'Z'
628 or else
629 Source (Scan_Ptr + 1) in 'a' .. 'z'))
630 then
631 if C = ':' then
632 Obsolescent_Check (Scan_Ptr);
634 if Warn_On_Obsolescent_Feature then
635 Error_Msg_S
636 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
637 Error_Msg_S
638 ("\use ""'#"" instead?");
639 end if;
640 end if;
642 Accumulate_Checksum (C);
643 Base_Char := C;
644 UI_Base := UI_Int_Value;
646 if UI_Base < 2 or else UI_Base > 16 then
647 Error_Msg_SC ("base not 2-16");
648 UI_Base := Uint_16;
649 end if;
651 Base := UI_To_Int (UI_Base);
652 Scan_Ptr := Scan_Ptr + 1;
654 -- Scan out extended integer [. integer]
656 C := Source (Scan_Ptr);
657 UI_Int_Value := Uint_0;
658 Scale := 0;
660 loop
661 if C in '0' .. '9' then
662 Accumulate_Checksum (C);
663 Extended_Digit_Value :=
664 Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
666 elsif C in 'A' .. 'F' then
667 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
668 Extended_Digit_Value :=
669 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
671 elsif C in 'a' .. 'f' then
672 Accumulate_Checksum (C);
673 Extended_Digit_Value :=
674 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
676 else
677 Error_Msg_S ("extended digit expected");
678 exit;
679 end if;
681 if Extended_Digit_Value >= Base then
682 Error_Msg_S ("digit '>= base");
683 end if;
685 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
686 Scale := Scale - 1;
687 Scan_Ptr := Scan_Ptr + 1;
688 C := Source (Scan_Ptr);
690 if C = '_' then
691 loop
692 Accumulate_Checksum ('_');
693 Scan_Ptr := Scan_Ptr + 1;
694 C := Source (Scan_Ptr);
695 exit when C /= '_';
696 Error_No_Double_Underline;
697 end loop;
699 elsif C = '.' then
700 Accumulate_Checksum ('.');
702 if Point_Scanned then
703 Error_Msg_S ("duplicate point ignored");
704 end if;
706 Scan_Ptr := Scan_Ptr + 1;
707 C := Source (Scan_Ptr);
708 Point_Scanned := True;
709 Scale := 0;
711 elsif C = Base_Char then
712 Accumulate_Checksum (C);
713 Scan_Ptr := Scan_Ptr + 1;
714 exit;
716 elsif C = '#' or else C = ':' then
717 Error_Msg_S ("based number delimiters must match");
718 Scan_Ptr := Scan_Ptr + 1;
719 exit;
721 elsif not Identifier_Char (C) then
722 if Base_Char = '#' then
723 Error_Msg_S ("missing '#");
724 else
725 Error_Msg_S ("missing ':");
726 end if;
728 exit;
729 end if;
731 end loop;
733 UI_Num_Value := UI_Int_Value;
734 end if;
736 -- Scan out exponent
738 if not Point_Scanned then
739 Scale := 0;
740 UI_Scale := Uint_0;
741 else
742 UI_Scale := UI_From_Int (Scale);
743 end if;
745 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
746 Accumulate_Checksum ('e');
747 Scan_Ptr := Scan_Ptr + 1;
748 Exponent_Is_Negative := False;
750 if Source (Scan_Ptr) = '+' then
751 Accumulate_Checksum ('+');
752 Scan_Ptr := Scan_Ptr + 1;
754 elsif Source (Scan_Ptr) = '-' then
755 Accumulate_Checksum ('-');
757 if not Point_Scanned then
758 Error_Msg_S
759 ("negative exponent not allowed for integer literal");
760 else
761 Exponent_Is_Negative := True;
762 end if;
764 Scan_Ptr := Scan_Ptr + 1;
765 end if;
767 UI_Int_Value := Uint_0;
769 if Source (Scan_Ptr) in '0' .. '9' then
770 Scan_Integer;
771 else
772 Error_Digit_Expected;
773 end if;
775 if Exponent_Is_Negative then
776 UI_Scale := UI_Scale - UI_Int_Value;
777 else
778 UI_Scale := UI_Scale + UI_Int_Value;
779 end if;
780 end if;
782 -- Case of real literal to be returned
784 if Point_Scanned then
785 Token := Tok_Real_Literal;
786 Real_Literal_Value :=
787 UR_From_Components (
788 Num => UI_Num_Value,
789 Den => -UI_Scale,
790 Rbase => Base);
792 -- Case of integer literal to be returned
794 else
795 Token := Tok_Integer_Literal;
797 if UI_Scale = 0 then
798 Int_Literal_Value := UI_Num_Value;
800 -- Avoid doing possibly expensive calculations in cases like
801 -- parsing 163E800_000# when semantics will not be done anyway.
802 -- This is especially useful when parsing garbled input.
804 elsif Operating_Mode /= Check_Syntax
805 and then (Serious_Errors_Detected = 0 or else Try_Semantics)
806 then
807 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
809 else
810 Int_Literal_Value := No_Uint;
811 end if;
812 end if;
814 Accumulate_Token_Checksum;
816 return;
817 end Nlit;
819 ----------
820 -- Slit --
821 ----------
823 procedure Slit is
825 Delimiter : Character;
826 -- Delimiter (first character of string)
828 C : Character;
829 -- Current source program character
831 Code : Char_Code;
832 -- Current character code value
834 Err : Boolean;
835 -- Error flag for Scan_Wide call
837 procedure Error_Bad_String_Char;
838 -- Signal bad character in string/character literal. On entry
839 -- Scan_Ptr points to the improper character encountered during the
840 -- scan. Scan_Ptr is not modified, so it still points to the bad
841 -- character on return.
843 procedure Error_Unterminated_String;
844 -- Procedure called if a line terminator character is encountered
845 -- during scanning a string, meaning that the string is not properly
846 -- terminated.
848 procedure Set_String;
849 -- Procedure used to distinguish between string and operator symbol.
850 -- On entry the string has been scanned out, and its characters
851 -- start at Token_Ptr and end one character before Scan_Ptr. On exit
852 -- Token is set to Tok_String_Literal or Tok_Operator_Symbol as
853 -- appropriate, and Token_Node is appropriately initialized. In
854 -- addition, in the operator symbol case, Token_Name is
855 -- appropriately set.
857 ---------------------------
858 -- Error_Bad_String_Char --
859 ---------------------------
861 procedure Error_Bad_String_Char is
862 C : constant Character := Source (Scan_Ptr);
864 begin
865 if C = HT then
866 Error_Msg_S ("horizontal tab not allowed in string");
868 elsif C = VT or else C = FF then
869 Error_Msg_S ("format effector not allowed in string");
871 elsif C in Upper_Half_Character then
872 Error_Msg_S ("(Ada 83) upper half character not allowed");
874 else
875 Error_Msg_S ("control character not allowed in string");
876 end if;
877 end Error_Bad_String_Char;
879 -------------------------------
880 -- Error_Unterminated_String --
881 -------------------------------
883 procedure Error_Unterminated_String is
884 begin
885 -- An interesting little refinement. Consider the following
886 -- examples:
888 -- A := "this is an unterminated string;
889 -- A := "this is an unterminated string &
890 -- P(A, "this is a parameter that didn't get terminated);
892 -- We fiddle a little to do slightly better placement in these
893 -- cases also if there is white space at the end of the line we
894 -- place the flag at the start of this white space, not at the
895 -- end. Note that we only have to test for blanks, since tabs
896 -- aren't allowed in strings in the first place and would have
897 -- caused an error message.
899 -- Two more cases that we treat specially are:
901 -- A := "this string uses the wrong terminator'
902 -- A := "this string uses the wrong terminator' &
904 -- In these cases we give a different error message as well
906 -- We actually reposition the scan pointer to the point where we
907 -- place the flag in these cases, since it seems a better bet on
908 -- the original intention.
910 while Source (Scan_Ptr - 1) = ' '
911 or else Source (Scan_Ptr - 1) = '&'
912 loop
913 Scan_Ptr := Scan_Ptr - 1;
914 Unstore_String_Char;
915 end loop;
917 -- Check for case of incorrect string terminator, but single quote
918 -- is not considered incorrect if the opening terminator misused
919 -- a single quote (error message already given).
921 if Delimiter /= '''
922 and then Source (Scan_Ptr - 1) = '''
923 then
924 Unstore_String_Char;
925 Error_Msg
926 ("incorrect string terminator character", Scan_Ptr - 1);
927 return;
928 end if;
930 if Source (Scan_Ptr - 1) = ';' then
931 Scan_Ptr := Scan_Ptr - 1;
932 Unstore_String_Char;
934 if Source (Scan_Ptr - 1) = ')' then
935 Scan_Ptr := Scan_Ptr - 1;
936 Unstore_String_Char;
937 end if;
938 end if;
940 Error_Msg_S ("missing string quote");
941 end Error_Unterminated_String;
943 ----------------
944 -- Set_String --
945 ----------------
947 procedure Set_String is
948 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
949 C1 : Character;
950 C2 : Character;
951 C3 : Character;
953 begin
954 -- Token_Name is currently set to Error_Name. The following
955 -- section of code resets Token_Name to the proper Name_Op_xx
956 -- value if the string is a valid operator symbol, otherwise it is
957 -- left set to Error_Name.
959 if Slen = 1 then
960 C1 := Source (Token_Ptr + 1);
962 case C1 is
963 when '=' =>
964 Token_Name := Name_Op_Eq;
966 when '>' =>
967 Token_Name := Name_Op_Gt;
969 when '<' =>
970 Token_Name := Name_Op_Lt;
972 when '+' =>
973 Token_Name := Name_Op_Add;
975 when '-' =>
976 Token_Name := Name_Op_Subtract;
978 when '&' =>
979 Token_Name := Name_Op_Concat;
981 when '*' =>
982 Token_Name := Name_Op_Multiply;
984 when '/' =>
985 Token_Name := Name_Op_Divide;
987 when others =>
988 null;
989 end case;
991 elsif Slen = 2 then
992 C1 := Source (Token_Ptr + 1);
993 C2 := Source (Token_Ptr + 2);
995 if C1 = '*' and then C2 = '*' then
996 Token_Name := Name_Op_Expon;
998 elsif C2 = '=' then
1000 if C1 = '/' then
1001 Token_Name := Name_Op_Ne;
1002 elsif C1 = '<' then
1003 Token_Name := Name_Op_Le;
1004 elsif C1 = '>' then
1005 Token_Name := Name_Op_Ge;
1006 end if;
1008 elsif (C1 = 'O' or else C1 = 'o') and then -- OR
1009 (C2 = 'R' or else C2 = 'r')
1010 then
1011 Token_Name := Name_Op_Or;
1012 end if;
1014 elsif Slen = 3 then
1015 C1 := Source (Token_Ptr + 1);
1016 C2 := Source (Token_Ptr + 2);
1017 C3 := Source (Token_Ptr + 3);
1019 if (C1 = 'A' or else C1 = 'a') and then -- AND
1020 (C2 = 'N' or else C2 = 'n') and then
1021 (C3 = 'D' or else C3 = 'd')
1022 then
1023 Token_Name := Name_Op_And;
1025 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
1026 (C2 = 'B' or else C2 = 'b') and then
1027 (C3 = 'S' or else C3 = 's')
1028 then
1029 Token_Name := Name_Op_Abs;
1031 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
1032 (C2 = 'O' or else C2 = 'o') and then
1033 (C3 = 'D' or else C3 = 'd')
1034 then
1035 Token_Name := Name_Op_Mod;
1037 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
1038 (C2 = 'O' or else C2 = 'o') and then
1039 (C3 = 'T' or else C3 = 't')
1040 then
1041 Token_Name := Name_Op_Not;
1043 elsif (C1 = 'R' or else C1 = 'r') and then -- REM
1044 (C2 = 'E' or else C2 = 'e') and then
1045 (C3 = 'M' or else C3 = 'm')
1046 then
1047 Token_Name := Name_Op_Rem;
1049 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
1050 (C2 = 'O' or else C2 = 'o') and then
1051 (C3 = 'R' or else C3 = 'r')
1052 then
1053 Token_Name := Name_Op_Xor;
1054 end if;
1056 end if;
1058 -- If it is an operator symbol, then Token_Name is set. If it is
1059 -- some other string value, then Token_Name still contains
1060 -- Error_Name.
1062 if Token_Name = Error_Name then
1063 Token := Tok_String_Literal;
1065 else
1066 Token := Tok_Operator_Symbol;
1067 end if;
1068 end Set_String;
1070 -- Start of processing for Slit
1072 begin
1073 -- On entry, Scan_Ptr points to the opening character of the string
1074 -- which is either a percent, double quote, or apostrophe (single
1075 -- quote). The latter case is an error detected by the character
1076 -- literal circuit.
1078 Delimiter := Source (Scan_Ptr);
1079 Accumulate_Checksum (Delimiter);
1080 Start_String;
1081 Scan_Ptr := Scan_Ptr + 1;
1083 -- Loop to scan out characters of string literal
1085 loop
1086 C := Source (Scan_Ptr);
1088 if C = Delimiter then
1089 Accumulate_Checksum (C);
1090 Scan_Ptr := Scan_Ptr + 1;
1091 exit when Source (Scan_Ptr) /= Delimiter;
1092 Code := Get_Char_Code (C);
1093 Accumulate_Checksum (C);
1094 Scan_Ptr := Scan_Ptr + 1;
1096 else
1097 if C = '"' and then Delimiter = '%' then
1098 Error_Msg_S
1099 ("quote not allowed in percent delimited string");
1100 Code := Get_Char_Code (C);
1101 Scan_Ptr := Scan_Ptr + 1;
1103 elsif (C = ESC
1104 and then Wide_Character_Encoding_Method
1105 in WC_ESC_Encoding_Method)
1106 or else (C in Upper_Half_Character
1107 and then Upper_Half_Encoding)
1108 or else (C = '['
1109 and then Source (Scan_Ptr + 1) = '"'
1110 and then Identifier_Char (Source (Scan_Ptr + 2)))
1111 then
1112 Wptr := Scan_Ptr;
1113 Scan_Wide (Source, Scan_Ptr, Code, Err);
1115 if Err then
1116 Error_Illegal_Wide_Character;
1117 Code := Get_Char_Code (' ');
1118 end if;
1120 Accumulate_Checksum (Code);
1122 -- In Ada 95 mode we allow any wide characters in a string
1123 -- but in Ada 2005, the set of characters allowed has been
1124 -- restricted to graphic characters.
1126 if Ada_Version >= Ada_05
1127 and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1128 then
1129 Error_Msg
1130 ("(Ada 2005) non-graphic character not permitted " &
1131 "in string literal", Wptr);
1132 end if;
1134 else
1135 Accumulate_Checksum (C);
1137 if C not in Graphic_Character then
1138 if C in Line_Terminator then
1139 Error_Unterminated_String;
1140 exit;
1142 elsif C in Upper_Half_Character then
1143 if Ada_Version = Ada_83 then
1144 Error_Bad_String_Char;
1145 end if;
1147 else
1148 Error_Bad_String_Char;
1149 end if;
1150 end if;
1152 Code := Get_Char_Code (C);
1153 Scan_Ptr := Scan_Ptr + 1;
1154 end if;
1155 end if;
1157 Store_String_Char (Code);
1159 if not In_Character_Range (Code) then
1160 Wide_Character_Found := True;
1161 end if;
1162 end loop;
1164 String_Literal_Id := End_String;
1165 Set_String;
1166 return;
1167 end Slit;
1169 -- Start of processing for Scan
1171 begin
1172 Prev_Token := Token;
1173 Prev_Token_Ptr := Token_Ptr;
1174 Token_Name := Error_Name;
1176 -- The following loop runs more than once only if a format effector
1177 -- (tab, vertical tab, form feed, line feed, carriage return) is
1178 -- encountered and skipped, or some error situation, such as an
1179 -- illegal character, is encountered.
1181 <<Scan_Next_Character>>
1183 loop
1184 -- Skip past blanks, loop is opened up for speed
1186 while Source (Scan_Ptr) = ' ' loop
1187 if Source (Scan_Ptr + 1) /= ' ' then
1188 Scan_Ptr := Scan_Ptr + 1;
1189 exit;
1190 end if;
1192 if Source (Scan_Ptr + 2) /= ' ' then
1193 Scan_Ptr := Scan_Ptr + 2;
1194 exit;
1195 end if;
1197 if Source (Scan_Ptr + 3) /= ' ' then
1198 Scan_Ptr := Scan_Ptr + 3;
1199 exit;
1200 end if;
1202 if Source (Scan_Ptr + 4) /= ' ' then
1203 Scan_Ptr := Scan_Ptr + 4;
1204 exit;
1205 end if;
1207 if Source (Scan_Ptr + 5) /= ' ' then
1208 Scan_Ptr := Scan_Ptr + 5;
1209 exit;
1210 end if;
1212 if Source (Scan_Ptr + 6) /= ' ' then
1213 Scan_Ptr := Scan_Ptr + 6;
1214 exit;
1215 end if;
1217 if Source (Scan_Ptr + 7) /= ' ' then
1218 Scan_Ptr := Scan_Ptr + 7;
1219 exit;
1220 end if;
1222 Scan_Ptr := Scan_Ptr + 8;
1223 end loop;
1225 -- We are now at a non-blank character, which is the first character
1226 -- of the token we will scan, and hence the value of Token_Ptr.
1228 Token_Ptr := Scan_Ptr;
1230 -- Here begins the main case statement which transfers control on the
1231 -- basis of the non-blank character we have encountered.
1233 case Source (Scan_Ptr) is
1235 -- Line terminator characters
1237 when CR | LF | FF | VT =>
1238 goto Scan_Line_Terminator;
1240 -- Horizontal tab, just skip past it
1242 when HT =>
1243 if Style_Check then Style.Check_HT; end if;
1244 Scan_Ptr := Scan_Ptr + 1;
1246 -- End of file character, treated as an end of file only if it is
1247 -- the last character in the buffer, otherwise it is ignored.
1249 when EOF =>
1250 if Scan_Ptr = Source_Last (Current_Source_File) then
1251 Check_End_Of_Line;
1252 if Style_Check then Style.Check_EOF; end if;
1253 Token := Tok_EOF;
1254 return;
1255 else
1256 Scan_Ptr := Scan_Ptr + 1;
1257 end if;
1259 -- Ampersand
1261 when '&' =>
1262 Accumulate_Checksum ('&');
1264 if Source (Scan_Ptr + 1) = '&' then
1265 Error_Msg_S ("'&'& should be `AND THEN`");
1266 Scan_Ptr := Scan_Ptr + 2;
1267 Token := Tok_And;
1268 return;
1270 else
1271 Scan_Ptr := Scan_Ptr + 1;
1272 Token := Tok_Ampersand;
1273 return;
1274 end if;
1276 -- Asterisk (can be multiplication operator or double asterisk which
1277 -- is the exponentiation compound delimiter).
1279 when '*' =>
1280 Accumulate_Checksum ('*');
1282 if Source (Scan_Ptr + 1) = '*' then
1283 Accumulate_Checksum ('*');
1284 Scan_Ptr := Scan_Ptr + 2;
1285 Token := Tok_Double_Asterisk;
1286 return;
1288 else
1289 Scan_Ptr := Scan_Ptr + 1;
1290 Token := Tok_Asterisk;
1291 return;
1292 end if;
1294 -- Colon, which can either be an isolated colon, or part of an
1295 -- assignment compound delimiter.
1297 when ':' =>
1298 Accumulate_Checksum (':');
1300 if Double_Char_Token ('=') then
1301 Token := Tok_Colon_Equal;
1302 if Style_Check then Style.Check_Colon_Equal; end if;
1303 return;
1305 elsif Source (Scan_Ptr + 1) = '-'
1306 and then Source (Scan_Ptr + 2) /= '-'
1307 then
1308 Token := Tok_Colon_Equal;
1309 Error_Msg (":- should be :=", Scan_Ptr);
1310 Scan_Ptr := Scan_Ptr + 2;
1311 return;
1313 else
1314 Scan_Ptr := Scan_Ptr + 1;
1315 Token := Tok_Colon;
1316 if Style_Check then Style.Check_Colon; end if;
1317 return;
1318 end if;
1320 -- Left parenthesis
1322 when '(' =>
1323 Accumulate_Checksum ('(');
1324 Scan_Ptr := Scan_Ptr + 1;
1325 Token := Tok_Left_Paren;
1326 if Style_Check then Style.Check_Left_Paren; end if;
1327 return;
1329 -- Left bracket
1331 when '[' =>
1332 if Source (Scan_Ptr + 1) = '"' then
1333 goto Scan_Wide_Character;
1335 else
1336 Error_Msg_S ("illegal character, replaced by ""(""");
1337 Scan_Ptr := Scan_Ptr + 1;
1338 Token := Tok_Left_Paren;
1339 return;
1340 end if;
1342 -- Left brace
1344 when '{' =>
1345 Error_Msg_S ("illegal character, replaced by ""(""");
1346 Scan_Ptr := Scan_Ptr + 1;
1347 Token := Tok_Left_Paren;
1348 return;
1350 -- Comma
1352 when ',' =>
1353 Accumulate_Checksum (',');
1354 Scan_Ptr := Scan_Ptr + 1;
1355 Token := Tok_Comma;
1356 if Style_Check then Style.Check_Comma; end if;
1357 return;
1359 -- Dot, which is either an isolated period, or part of a double dot
1360 -- compound delimiter sequence. We also check for the case of a
1361 -- digit following the period, to give a better error message.
1363 when '.' =>
1364 Accumulate_Checksum ('.');
1366 if Double_Char_Token ('.') then
1367 Token := Tok_Dot_Dot;
1368 if Style_Check then Style.Check_Dot_Dot; end if;
1369 return;
1371 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1372 Error_Msg_S ("numeric literal cannot start with point");
1373 Scan_Ptr := Scan_Ptr + 1;
1375 else
1376 Scan_Ptr := Scan_Ptr + 1;
1377 Token := Tok_Dot;
1378 return;
1379 end if;
1381 -- Equal, which can either be an equality operator, or part of the
1382 -- arrow (=>) compound delimiter.
1384 when '=' =>
1385 Accumulate_Checksum ('=');
1387 if Double_Char_Token ('>') then
1388 Token := Tok_Arrow;
1389 if Style_Check then Style.Check_Arrow; end if;
1390 return;
1392 elsif Source (Scan_Ptr + 1) = '=' then
1393 Error_Msg_S ("== should be =");
1394 Scan_Ptr := Scan_Ptr + 1;
1395 end if;
1397 Scan_Ptr := Scan_Ptr + 1;
1398 Token := Tok_Equal;
1399 return;
1401 -- Greater than, which can be a greater than operator, greater than
1402 -- or equal operator, or first character of a right label bracket.
1404 when '>' =>
1405 Accumulate_Checksum ('>');
1407 if Double_Char_Token ('=') then
1408 Token := Tok_Greater_Equal;
1409 return;
1411 elsif Double_Char_Token ('>') then
1412 Token := Tok_Greater_Greater;
1413 return;
1415 else
1416 Scan_Ptr := Scan_Ptr + 1;
1417 Token := Tok_Greater;
1418 return;
1419 end if;
1421 -- Less than, which can be a less than operator, less than or equal
1422 -- operator, or the first character of a left label bracket, or the
1423 -- first character of a box (<>) compound delimiter.
1425 when '<' =>
1426 Accumulate_Checksum ('<');
1428 if Double_Char_Token ('=') then
1429 Token := Tok_Less_Equal;
1430 return;
1432 elsif Double_Char_Token ('>') then
1433 Token := Tok_Box;
1434 if Style_Check then Style.Check_Box; end if;
1435 return;
1437 elsif Double_Char_Token ('<') then
1438 Token := Tok_Less_Less;
1439 return;
1441 else
1442 Scan_Ptr := Scan_Ptr + 1;
1443 Token := Tok_Less;
1444 return;
1445 end if;
1447 -- Minus, which is either a subtraction operator, or the first
1448 -- character of double minus starting a comment
1450 when '-' => Minus_Case : begin
1451 if Source (Scan_Ptr + 1) = '>' then
1452 Error_Msg_S ("invalid token");
1453 Scan_Ptr := Scan_Ptr + 2;
1454 Token := Tok_Arrow;
1455 return;
1457 elsif Source (Scan_Ptr + 1) /= '-' then
1458 Accumulate_Checksum ('-');
1459 Scan_Ptr := Scan_Ptr + 1;
1460 Token := Tok_Minus;
1461 return;
1463 -- Comment
1465 else -- Source (Scan_Ptr + 1) = '-' then
1466 if Style_Check then Style.Check_Comment; end if;
1467 Scan_Ptr := Scan_Ptr + 2;
1468 Start_Of_Comment := Scan_Ptr;
1470 -- Loop to scan comment (this loop runs more than once only if
1471 -- a horizontal tab or other non-graphic character is scanned)
1473 loop
1474 -- Scan to non graphic character (opened up for speed)
1476 -- Note that we just eat left brackets, which means that
1477 -- bracket notation cannot be used for end of line
1478 -- characters in comments. This seems a reasonable choice,
1479 -- since no one would ever use brackets notation in a real
1480 -- program in this situation, and if we allow brackets
1481 -- notation, we forbid some valid comments which contain a
1482 -- brackets sequence that happens to match an end of line
1483 -- character.
1485 loop
1486 exit when Source (Scan_Ptr) not in Graphic_Character;
1487 Scan_Ptr := Scan_Ptr + 1;
1488 exit when Source (Scan_Ptr) not in Graphic_Character;
1489 Scan_Ptr := Scan_Ptr + 1;
1490 exit when Source (Scan_Ptr) not in Graphic_Character;
1491 Scan_Ptr := Scan_Ptr + 1;
1492 exit when Source (Scan_Ptr) not in Graphic_Character;
1493 Scan_Ptr := Scan_Ptr + 1;
1494 exit when Source (Scan_Ptr) not in Graphic_Character;
1495 Scan_Ptr := Scan_Ptr + 1;
1496 end loop;
1498 -- Keep going if horizontal tab
1500 if Source (Scan_Ptr) = HT then
1501 if Style_Check then Style.Check_HT; end if;
1502 Scan_Ptr := Scan_Ptr + 1;
1504 -- Terminate scan of comment if line terminator
1506 elsif Source (Scan_Ptr) in Line_Terminator then
1507 exit;
1509 -- Terminate scan of comment if end of file encountered
1510 -- (embedded EOF character or real last character in file)
1512 elsif Source (Scan_Ptr) = EOF then
1513 exit;
1515 -- If we have a wide character, we have to scan it out,
1516 -- because it might be a legitimate line terminator
1518 elsif (Source (Scan_Ptr) = ESC
1519 and then Identifier_Char (ESC))
1520 or else
1521 (Source (Scan_Ptr) in Upper_Half_Character
1522 and then Upper_Half_Encoding)
1523 then
1524 declare
1525 Wptr : constant Source_Ptr := Scan_Ptr;
1526 Code : Char_Code;
1527 Err : Boolean;
1529 begin
1530 Scan_Wide (Source, Scan_Ptr, Code, Err);
1532 -- If not well formed wide character, then just skip
1533 -- past it and ignore it.
1535 if Err then
1536 Scan_Ptr := Wptr + 1;
1538 -- If UTF_32 terminator, terminate comment scan
1540 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1541 Scan_Ptr := Wptr;
1542 exit;
1543 end if;
1544 end;
1546 -- Keep going if character in 80-FF range, or is ESC. These
1547 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1548 -- They are allowed even in Ada 83 mode according to the
1549 -- approved AI. ESC was added to the AI in June 93.
1551 elsif Source (Scan_Ptr) in Upper_Half_Character
1552 or else Source (Scan_Ptr) = ESC
1553 then
1554 Scan_Ptr := Scan_Ptr + 1;
1556 -- Otherwise we have an illegal comment character
1558 else
1559 Error_Illegal_Character;
1560 end if;
1561 end loop;
1563 -- Note that, except when comments are tokens, we do NOT
1564 -- execute a return here, instead we fall through to reexecute
1565 -- the scan loop to look for a token.
1567 if Comment_Is_Token then
1568 Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1569 Name_Buffer (1 .. Name_Len) :=
1570 String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1571 Comment_Id := Name_Find;
1572 Token := Tok_Comment;
1573 return;
1574 end if;
1575 end if;
1576 end Minus_Case;
1578 -- Double quote starting a string literal
1580 when '"' =>
1581 Slit;
1582 Post_Scan;
1583 return;
1585 -- Percent starting a string literal
1587 when '%' =>
1588 Obsolescent_Check (Token_Ptr);
1590 if Warn_On_Obsolescent_Feature then
1591 Error_Msg_S
1592 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1593 Error_Msg_S
1594 ("\use """""" instead?");
1595 end if;
1597 Slit;
1598 Post_Scan;
1599 return;
1601 -- Apostrophe. This can either be the start of a character literal,
1602 -- or an isolated apostrophe used in a qualified expression or an
1603 -- attribute. We treat it as a character literal if it does not
1604 -- follow a right parenthesis, identifier, the keyword ALL or
1605 -- a literal. This means that we correctly treat constructs like:
1607 -- A := CHARACTER'('A');
1609 -- Note that RM-2.2(7) does not require a separator between
1610 -- "CHARACTER" and "'" in the above.
1612 when ''' => Char_Literal_Case : declare
1613 Code : Char_Code;
1614 Err : Boolean;
1616 begin
1617 Accumulate_Checksum (''');
1618 Scan_Ptr := Scan_Ptr + 1;
1620 -- Here is where we make the test to distinguish the cases. Treat
1621 -- as apostrophe if previous token is an identifier, right paren
1622 -- or the reserved word "all" (latter case as in A.all'Address)
1623 -- (or the reserved word "project" in project files). Also treat
1624 -- it as apostrophe after a literal (this catches some legitimate
1625 -- cases, like A."abs"'Address, and also gives better error
1626 -- behavior for impossible cases like 123'xxx).
1628 if Prev_Token = Tok_Identifier
1629 or else Prev_Token = Tok_Right_Paren
1630 or else Prev_Token = Tok_All
1631 or else Prev_Token = Tok_Project
1632 or else Prev_Token in Token_Class_Literal
1633 then
1634 Token := Tok_Apostrophe;
1635 if Style_Check then Style.Check_Apostrophe; end if;
1636 return;
1638 -- Otherwise the apostrophe starts a character literal
1640 else
1641 -- Case of wide character literal
1643 if (Source (Scan_Ptr) = ESC
1644 and then
1645 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1646 or else
1647 (Source (Scan_Ptr) in Upper_Half_Character
1648 and then
1649 Upper_Half_Encoding)
1650 or else
1651 (Source (Scan_Ptr) = '['
1652 and then
1653 Source (Scan_Ptr + 1) = '"')
1654 then
1655 Wptr := Scan_Ptr;
1656 Scan_Wide (Source, Scan_Ptr, Code, Err);
1657 Accumulate_Checksum (Code);
1659 if Err then
1660 Error_Illegal_Wide_Character;
1661 Code := Character'Pos (' ');
1663 -- In Ada 95 mode we allow any wide character in a character
1664 -- literal, but in Ada 2005, the set of characters allowed
1665 -- is restricted to graphic characters.
1667 elsif Ada_Version >= Ada_05
1668 and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1669 then
1670 Error_Msg
1671 ("(Ada 2005) non-graphic character not permitted " &
1672 "in character literal", Wptr);
1673 end if;
1675 if Source (Scan_Ptr) /= ''' then
1676 Error_Msg_S ("missing apostrophe");
1677 else
1678 Scan_Ptr := Scan_Ptr + 1;
1679 end if;
1681 -- If we do not find a closing quote in the expected place then
1682 -- assume that we have a misguided attempt at a string literal.
1684 -- However, if previous token is RANGE, then we return an
1685 -- apostrophe instead since this gives better error recovery
1687 elsif Source (Scan_Ptr + 1) /= ''' then
1688 if Prev_Token = Tok_Range then
1689 Token := Tok_Apostrophe;
1690 return;
1692 else
1693 Scan_Ptr := Scan_Ptr - 1;
1694 Error_Msg_S
1695 ("strings are delimited by double quote character");
1696 Slit;
1697 Post_Scan;
1698 return;
1699 end if;
1701 -- Otherwise we have a (non-wide) character literal
1703 else
1704 Accumulate_Checksum (Source (Scan_Ptr));
1706 if Source (Scan_Ptr) not in Graphic_Character then
1707 if Source (Scan_Ptr) in Upper_Half_Character then
1708 if Ada_Version = Ada_83 then
1709 Error_Illegal_Character;
1710 end if;
1712 else
1713 Error_Illegal_Character;
1714 end if;
1715 end if;
1717 Code := Get_Char_Code (Source (Scan_Ptr));
1718 Scan_Ptr := Scan_Ptr + 2;
1719 end if;
1721 -- Fall through here with Scan_Ptr updated past the closing
1722 -- quote, and Code set to the Char_Code value for the literal
1724 Accumulate_Checksum (''');
1725 Token := Tok_Char_Literal;
1726 Set_Character_Literal_Name (Code);
1727 Token_Name := Name_Find;
1728 Character_Code := Code;
1729 Post_Scan;
1730 return;
1731 end if;
1732 end Char_Literal_Case;
1734 -- Right parenthesis
1736 when ')' =>
1737 Accumulate_Checksum (')');
1738 Scan_Ptr := Scan_Ptr + 1;
1739 Token := Tok_Right_Paren;
1740 if Style_Check then Style.Check_Right_Paren; end if;
1741 return;
1743 -- Right bracket or right brace, treated as right paren
1745 when ']' | '}' =>
1746 Error_Msg_S ("illegal character, replaced by "")""");
1747 Scan_Ptr := Scan_Ptr + 1;
1748 Token := Tok_Right_Paren;
1749 return;
1751 -- Slash (can be division operator or first character of not equal)
1753 when '/' =>
1754 Accumulate_Checksum ('/');
1756 if Double_Char_Token ('=') then
1757 Token := Tok_Not_Equal;
1758 return;
1759 else
1760 Scan_Ptr := Scan_Ptr + 1;
1761 Token := Tok_Slash;
1762 return;
1763 end if;
1765 -- Semicolon
1767 when ';' =>
1768 Accumulate_Checksum (';');
1769 Scan_Ptr := Scan_Ptr + 1;
1770 Token := Tok_Semicolon;
1771 if Style_Check then Style.Check_Semicolon; end if;
1772 return;
1774 -- Vertical bar
1776 when '|' => Vertical_Bar_Case : begin
1777 Accumulate_Checksum ('|');
1779 -- Special check for || to give nice message
1781 if Source (Scan_Ptr + 1) = '|' then
1782 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1783 Scan_Ptr := Scan_Ptr + 2;
1784 Token := Tok_Or;
1785 return;
1787 else
1788 Scan_Ptr := Scan_Ptr + 1;
1789 Token := Tok_Vertical_Bar;
1790 if Style_Check then Style.Check_Vertical_Bar; end if;
1791 return;
1792 end if;
1793 end Vertical_Bar_Case;
1795 -- Exclamation, replacement character for vertical bar
1797 when '!' => Exclamation_Case : begin
1798 Accumulate_Checksum ('!');
1799 Obsolescent_Check (Token_Ptr);
1801 if Warn_On_Obsolescent_Feature then
1802 Error_Msg_S
1803 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1804 Error_Msg_S
1805 ("\use ""'|"" instead?");
1806 end if;
1808 if Source (Scan_Ptr + 1) = '=' then
1809 Error_Msg_S ("'!= should be /=");
1810 Scan_Ptr := Scan_Ptr + 2;
1811 Token := Tok_Not_Equal;
1812 return;
1814 else
1815 Scan_Ptr := Scan_Ptr + 1;
1816 Token := Tok_Vertical_Bar;
1817 return;
1818 end if;
1819 end Exclamation_Case;
1821 -- Plus
1823 when '+' => Plus_Case : begin
1824 Accumulate_Checksum ('+');
1825 Scan_Ptr := Scan_Ptr + 1;
1826 Token := Tok_Plus;
1827 return;
1828 end Plus_Case;
1830 -- Digits starting a numeric literal
1832 when '0' .. '9' =>
1833 Nlit;
1835 if Identifier_Char (Source (Scan_Ptr)) then
1836 Error_Msg_S
1837 ("delimiter required between literal and identifier");
1838 end if;
1839 Post_Scan;
1840 return;
1842 -- Lower case letters
1844 when 'a' .. 'z' =>
1845 Name_Len := 1;
1846 Underline_Found := False;
1847 Name_Buffer (1) := Source (Scan_Ptr);
1848 Accumulate_Checksum (Name_Buffer (1));
1849 Scan_Ptr := Scan_Ptr + 1;
1850 goto Scan_Identifier;
1852 -- Upper case letters
1854 when 'A' .. 'Z' =>
1855 Name_Len := 1;
1856 Underline_Found := False;
1857 Name_Buffer (1) :=
1858 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1859 Accumulate_Checksum (Name_Buffer (1));
1860 Scan_Ptr := Scan_Ptr + 1;
1861 goto Scan_Identifier;
1863 -- Underline character
1865 when '_' =>
1866 if Special_Characters ('_') then
1867 Token_Ptr := Scan_Ptr;
1868 Scan_Ptr := Scan_Ptr + 1;
1869 Token := Tok_Special;
1870 Special_Character := '_';
1871 return;
1872 end if;
1874 Error_Msg_S ("identifier cannot start with underline");
1875 Name_Len := 1;
1876 Name_Buffer (1) := '_';
1877 Scan_Ptr := Scan_Ptr + 1;
1878 Underline_Found := False;
1879 goto Scan_Identifier;
1881 -- Space (not possible, because we scanned past blanks)
1883 when ' ' =>
1884 raise Program_Error;
1886 -- Characters in top half of ASCII 8-bit chart
1888 when Upper_Half_Character =>
1890 -- Wide character case
1892 if Upper_Half_Encoding then
1893 goto Scan_Wide_Character;
1895 -- Otherwise we have OK Latin-1 character
1897 else
1898 -- Upper half characters may possibly be identifier letters
1899 -- but can never be digits, so Identifier_Char can be used to
1900 -- test for a valid start of identifier character.
1902 if Identifier_Char (Source (Scan_Ptr)) then
1903 Name_Len := 0;
1904 Underline_Found := False;
1905 goto Scan_Identifier;
1906 else
1907 Error_Illegal_Character;
1908 end if;
1909 end if;
1911 when ESC =>
1913 -- ESC character, possible start of identifier if wide characters
1914 -- using ESC encoding are allowed in identifiers, which we can
1915 -- tell by looking at the Identifier_Char flag for ESC, which is
1916 -- only true if these conditions are met. In Ada 2005 mode, may
1917 -- also be valid UTF_32 space or line terminator character.
1919 if Identifier_Char (ESC) then
1920 Name_Len := 0;
1921 goto Scan_Wide_Character;
1922 else
1923 Error_Illegal_Character;
1924 end if;
1926 -- Invalid control characters
1928 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII.SO |
1929 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1930 EM | FS | GS | RS | US | DEL
1932 Error_Illegal_Character;
1934 -- Invalid graphic characters
1936 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1938 -- If Set_Special_Character has been called for this character,
1939 -- set Scans.Special_Character and return a Special token.
1941 if Special_Characters (Source (Scan_Ptr)) then
1942 Token_Ptr := Scan_Ptr;
1943 Token := Tok_Special;
1944 Special_Character := Source (Scan_Ptr);
1945 Scan_Ptr := Scan_Ptr + 1;
1946 return;
1948 -- Otherwise, this is an illegal character
1950 else
1951 Error_Illegal_Character;
1952 end if;
1954 -- End switch on non-blank character
1956 end case;
1958 -- End loop past format effectors. The exit from this loop is by
1959 -- executing a return statement following completion of token scan
1960 -- (control never falls out of this loop to the code which follows)
1962 end loop;
1964 -- Wide_Character scanning routine. On entry we have encountered the
1965 -- initial character of a wide character sequence.
1967 <<Scan_Wide_Character>>
1969 declare
1970 Code : Char_Code;
1971 Cat : Category;
1972 Err : Boolean;
1974 begin
1975 Wptr := Scan_Ptr;
1976 Scan_Wide (Source, Scan_Ptr, Code, Err);
1978 -- If bad wide character, signal error and continue scan
1980 if Err then
1981 Error_Illegal_Wide_Character;
1982 goto Scan_Next_Character;
1983 end if;
1985 Cat := Get_Category (UTF_32 (Code));
1987 -- If OK letter, reset scan ptr and go scan identifier
1989 if Is_UTF_32_Letter (Cat) then
1990 Scan_Ptr := Wptr;
1991 Name_Len := 0;
1992 Underline_Found := False;
1993 goto Scan_Identifier;
1995 -- If OK wide space, ignore and keep scanning (we do not include
1996 -- any ignored spaces in checksum)
1998 elsif Is_UTF_32_Space (Cat) then
1999 goto Scan_Next_Character;
2001 -- If OK wide line terminator, terminate current line
2003 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
2004 Scan_Ptr := Wptr;
2005 goto Scan_Line_Terminator;
2007 -- Punctuation is an error (at start of identifier)
2009 elsif Is_UTF_32_Punctuation (Cat) then
2010 Error_Msg
2011 ("identifier cannot start with punctuation", Wptr);
2012 Scan_Ptr := Wptr;
2013 Name_Len := 0;
2014 Underline_Found := False;
2015 goto Scan_Identifier;
2017 -- Mark character is an error (at start of identifer)
2019 elsif Is_UTF_32_Mark (Cat) then
2020 Error_Msg
2021 ("identifier cannot start with mark character", Wptr);
2022 Scan_Ptr := Wptr;
2023 Name_Len := 0;
2024 Underline_Found := False;
2025 goto Scan_Identifier;
2027 -- Other format character is an error (at start of identifer)
2029 elsif Is_UTF_32_Other (Cat) then
2030 Error_Msg
2031 ("identifier cannot start with other format character", Wptr);
2032 Scan_Ptr := Wptr;
2033 Name_Len := 0;
2034 Underline_Found := False;
2035 goto Scan_Identifier;
2037 -- Extended digit character is an error. Could be bad start of
2038 -- identifier or bad literal. Not worth doing too much to try to
2039 -- distinguish these cases, but we will do a little bit.
2041 elsif Is_UTF_32_Digit (Cat) then
2042 Error_Msg
2043 ("identifier cannot start with digit character", Wptr);
2044 Scan_Ptr := Wptr;
2045 Name_Len := 0;
2046 Underline_Found := False;
2047 goto Scan_Identifier;
2049 -- All other wide characters are illegal here
2051 else
2052 Error_Illegal_Wide_Character;
2053 goto Scan_Next_Character;
2054 end if;
2055 end;
2057 -- Routine to scan line terminator. On entry Scan_Ptr points to a
2058 -- character which is one of FF,LR,CR,VT, or one of the wide characters
2059 -- that is treated as a line termiantor.
2061 <<Scan_Line_Terminator>>
2063 -- Check line too long
2065 Check_End_Of_Line;
2067 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2068 -- a physical line.
2070 if End_Of_Line_Is_Token then
2071 Token_Ptr := Scan_Ptr;
2072 end if;
2074 declare
2075 Physical : Boolean;
2077 begin
2078 Skip_Line_Terminators (Scan_Ptr, Physical);
2080 -- If we are at start of physical line, update scan pointers to
2081 -- reflect the start of the new line.
2083 if Physical then
2084 Current_Line_Start := Scan_Ptr;
2085 Start_Column := Set_Start_Column;
2086 First_Non_Blank_Location := Scan_Ptr;
2088 -- If End_Of_Line is a token, we return it as it is a
2089 -- physical line.
2091 if End_Of_Line_Is_Token then
2092 Token := Tok_End_Of_Line;
2093 return;
2094 end if;
2095 end if;
2096 end;
2098 goto Scan_Next_Character;
2100 -- Identifier scanning routine. On entry, some initial characters of
2101 -- the identifier may have already been stored in Name_Buffer. If so,
2102 -- Name_Len has the number of characters stored. otherwise Name_Len is
2103 -- set to zero on entry. Underline_Found is also set False on entry.
2105 <<Scan_Identifier>>
2107 -- This loop scans as fast as possible past lower half letters and
2108 -- digits, which we expect to be the most common characters.
2110 loop
2111 if Source (Scan_Ptr) in 'a' .. 'z'
2112 or else Source (Scan_Ptr) in '0' .. '9'
2113 then
2114 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2115 Accumulate_Checksum (Source (Scan_Ptr));
2117 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2118 Name_Buffer (Name_Len + 1) :=
2119 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2120 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2122 else
2123 exit;
2124 end if;
2126 Underline_Found := False;
2127 Scan_Ptr := Scan_Ptr + 1;
2128 Name_Len := Name_Len + 1;
2129 end loop;
2131 -- If we fall through, then we have encountered either an underline
2132 -- character, or an extended identifier character (i.e. one from the
2133 -- upper half), or a wide character, or an identifier terminator. The
2134 -- initial test speeds us up in the most common case where we have
2135 -- an identifier terminator. Note that ESC is an identifier character
2136 -- only if a wide character encoding method that uses ESC encoding
2137 -- is active, so if we find an ESC character we know that we have a
2138 -- wide character.
2140 if Identifier_Char (Source (Scan_Ptr)) then
2142 -- Case of underline
2144 if Source (Scan_Ptr) = '_' then
2145 Accumulate_Checksum ('_');
2147 if Underline_Found then
2148 Error_No_Double_Underline;
2149 else
2150 Underline_Found := True;
2151 Name_Len := Name_Len + 1;
2152 Name_Buffer (Name_Len) := '_';
2153 end if;
2155 Scan_Ptr := Scan_Ptr + 1;
2156 goto Scan_Identifier;
2158 -- Upper half character
2160 elsif Source (Scan_Ptr) in Upper_Half_Character
2161 and then not Upper_Half_Encoding
2162 then
2163 Accumulate_Checksum (Source (Scan_Ptr));
2164 Store_Encoded_Character
2165 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2166 Scan_Ptr := Scan_Ptr + 1;
2167 Underline_Found := False;
2168 goto Scan_Identifier;
2170 -- Left bracket not followed by a quote terminates an identifier.
2171 -- This is an error, but we don't want to give a junk error msg
2172 -- about wide characters in this case!
2174 elsif Source (Scan_Ptr) = '['
2175 and then Source (Scan_Ptr + 1) /= '"'
2176 then
2177 null;
2179 -- We know we have a wide character encoding here (the current
2180 -- character is either ESC, left bracket, or an upper half
2181 -- character depending on the encoding method).
2183 else
2184 -- Scan out the wide character and insert the appropriate
2185 -- encoding into the name table entry for the identifier.
2187 declare
2188 Code : Char_Code;
2189 Err : Boolean;
2190 Chr : Character;
2191 Cat : Category;
2193 begin
2194 Wptr := Scan_Ptr;
2195 Scan_Wide (Source, Scan_Ptr, Code, Err);
2197 -- If error, signal error
2199 if Err then
2200 Error_Illegal_Wide_Character;
2202 -- If the character scanned is a normal identifier
2203 -- character, then we treat it that way.
2205 elsif In_Character_Range (Code)
2206 and then Identifier_Char (Get_Character (Code))
2207 then
2208 Chr := Get_Character (Code);
2209 Accumulate_Checksum (Chr);
2210 Store_Encoded_Character
2211 (Get_Char_Code (Fold_Lower (Chr)));
2212 Underline_Found := False;
2214 -- Here if not a normal identifier character
2216 else
2217 -- Make sure we are allowing wide characters in
2218 -- identifiers. Note that we allow wide character
2219 -- notation for an OK identifier character. This in
2220 -- particular allows bracket or other notation to be
2221 -- used for upper half letters.
2223 -- Wide characters are always allowed in Ada 2005
2225 if Identifier_Character_Set /= 'w'
2226 and then Ada_Version < Ada_05
2227 then
2228 Error_Msg
2229 ("wide character not allowed in identifier", Wptr);
2230 end if;
2232 Cat := Get_Category (UTF_32 (Code));
2234 -- If OK letter, store it folding to upper case. Note
2235 -- that we include the folded letter in the checksum.
2237 if Is_UTF_32_Letter (Cat) then
2238 Code :=
2239 Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
2240 Accumulate_Checksum (Code);
2241 Store_Encoded_Character (Code);
2242 Underline_Found := False;
2244 -- If OK extended digit or mark, then store it
2246 elsif Is_UTF_32_Digit (Cat)
2247 or else Is_UTF_32_Mark (Cat)
2248 then
2249 Accumulate_Checksum (Code);
2250 Store_Encoded_Character (Code);
2251 Underline_Found := False;
2253 -- Wide punctuation is also stored, but counts as an
2254 -- underline character for error checking purposes.
2256 elsif Is_UTF_32_Punctuation (Cat) then
2257 Accumulate_Checksum (Code);
2259 if Underline_Found then
2260 declare
2261 Cend : constant Source_Ptr := Scan_Ptr;
2262 begin
2263 Scan_Ptr := Wptr;
2264 Error_No_Double_Underline;
2265 Scan_Ptr := Cend;
2266 end;
2268 else
2269 Store_Encoded_Character (Code);
2270 Underline_Found := True;
2271 end if;
2273 -- Wide character in Unicode cateogory "Other, Format"
2274 -- is accepted in an identifier, but is ignored and not
2275 -- stored. It seems reasonable to exclude it from the
2276 -- checksum.
2278 -- Note that it is correct (see AI-395) to simply strip
2279 -- other format characters, before testing for double
2280 -- underlines, or for reserved words).
2282 elsif Is_UTF_32_Other (Cat) then
2283 null;
2285 -- Wide character in category Separator,Space terminates
2287 elsif Is_UTF_32_Space (Cat) then
2288 goto Scan_Identifier_Complete;
2290 -- Any other wide character is not acceptable
2292 else
2293 Error_Msg
2294 ("invalid wide character in identifier", Wptr);
2295 end if;
2296 end if;
2298 goto Scan_Identifier;
2299 end;
2300 end if;
2301 end if;
2303 -- Scan of identifier is complete. The identifier is stored in
2304 -- Name_Buffer, and Scan_Ptr points past the last character.
2306 <<Scan_Identifier_Complete>>
2307 Token_Name := Name_Find;
2309 -- Check for identifier ending with underline or punctuation char
2311 if Underline_Found then
2312 Underline_Found := False;
2314 if Source (Scan_Ptr - 1) = '_' then
2315 Error_Msg
2316 ("identifier cannot end with underline", Scan_Ptr - 1);
2317 else
2318 Error_Msg
2319 ("identifier cannot end with punctuation character", Wptr);
2320 end if;
2321 end if;
2323 -- Here is where we check if it was a keyword
2325 if Get_Name_Table_Byte (Token_Name) /= 0
2326 and then (Ada_Version >= Ada_95
2327 or else Token_Name not in Ada_95_Reserved_Words)
2328 and then (Ada_Version >= Ada_05
2329 or else Token_Name not in Ada_2005_Reserved_Words)
2330 then
2331 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2333 -- Deal with possible style check for non-lower case keyword, but
2334 -- we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2335 -- this purpose if they appear as attribute designators. Actually
2336 -- we only check the first character for speed.
2338 -- Ada 2005 (AI-284): Do not apply the style check in case of
2339 -- "pragma Interface"
2341 -- Ada 2005 (AI-340): Do not apply the style check in case of
2342 -- MOD attribute.
2344 if Style_Check
2345 and then Source (Token_Ptr) <= 'Z'
2346 and then (Prev_Token /= Tok_Apostrophe
2347 or else
2348 (Token /= Tok_Access and then
2349 Token /= Tok_Delta and then
2350 Token /= Tok_Digits and then
2351 Token /= Tok_Mod and then
2352 Token /= Tok_Range))
2353 and then (Token /= Tok_Interface
2354 or else
2355 (Token = Tok_Interface
2356 and then Prev_Token /= Tok_Pragma))
2357 then
2358 Style.Non_Lower_Case_Keyword;
2359 end if;
2361 -- We must reset Token_Name since this is not an identifier and
2362 -- if we leave Token_Name set, the parser gets confused because
2363 -- it thinks it is dealing with an identifier instead of the
2364 -- corresponding keyword.
2366 Token_Name := No_Name;
2367 Accumulate_Token_Checksum;
2368 return;
2370 -- It is an identifier after all
2372 else
2373 Token := Tok_Identifier;
2374 Accumulate_Token_Checksum;
2375 Post_Scan;
2376 return;
2377 end if;
2378 end Scan;
2380 --------------------------
2381 -- Set_Comment_As_Token --
2382 --------------------------
2384 procedure Set_Comment_As_Token (Value : Boolean) is
2385 begin
2386 Comment_Is_Token := Value;
2387 end Set_Comment_As_Token;
2389 ------------------------------
2390 -- Set_End_Of_Line_As_Token --
2391 ------------------------------
2393 procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2394 begin
2395 End_Of_Line_Is_Token := Value;
2396 end Set_End_Of_Line_As_Token;
2398 ---------------------------
2399 -- Set_Special_Character --
2400 ---------------------------
2402 procedure Set_Special_Character (C : Character) is
2403 begin
2404 case C is
2405 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2406 Special_Characters (C) := True;
2408 when others =>
2409 null;
2410 end case;
2411 end Set_Special_Character;
2413 ----------------------
2414 -- Set_Start_Column --
2415 ----------------------
2417 -- Note: it seems at first glance a little expensive to compute this value
2418 -- for every source line (since it is certainly not used for all source
2419 -- lines). On the other hand, it doesn't take much more work to skip past
2420 -- the initial white space on the line counting the columns than it would
2421 -- to scan past the white space using the standard scanning circuits.
2423 function Set_Start_Column return Column_Number is
2424 Start_Column : Column_Number := 0;
2426 begin
2427 -- Outer loop scans past horizontal tab characters
2429 Tabs_Loop : loop
2431 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2432 -- past the blanks and adjusting Start_Column to account for them.
2434 Blanks_Loop : loop
2435 if Source (Scan_Ptr) = ' ' then
2436 if Source (Scan_Ptr + 1) = ' ' then
2437 if Source (Scan_Ptr + 2) = ' ' then
2438 if Source (Scan_Ptr + 3) = ' ' then
2439 if Source (Scan_Ptr + 4) = ' ' then
2440 if Source (Scan_Ptr + 5) = ' ' then
2441 if Source (Scan_Ptr + 6) = ' ' then
2442 Scan_Ptr := Scan_Ptr + 7;
2443 Start_Column := Start_Column + 7;
2444 else
2445 Scan_Ptr := Scan_Ptr + 6;
2446 Start_Column := Start_Column + 6;
2447 exit Blanks_Loop;
2448 end if;
2449 else
2450 Scan_Ptr := Scan_Ptr + 5;
2451 Start_Column := Start_Column + 5;
2452 exit Blanks_Loop;
2453 end if;
2454 else
2455 Scan_Ptr := Scan_Ptr + 4;
2456 Start_Column := Start_Column + 4;
2457 exit Blanks_Loop;
2458 end if;
2459 else
2460 Scan_Ptr := Scan_Ptr + 3;
2461 Start_Column := Start_Column + 3;
2462 exit Blanks_Loop;
2463 end if;
2464 else
2465 Scan_Ptr := Scan_Ptr + 2;
2466 Start_Column := Start_Column + 2;
2467 exit Blanks_Loop;
2468 end if;
2469 else
2470 Scan_Ptr := Scan_Ptr + 1;
2471 Start_Column := Start_Column + 1;
2472 exit Blanks_Loop;
2473 end if;
2474 else
2475 exit Blanks_Loop;
2476 end if;
2477 end loop Blanks_Loop;
2479 -- Outer loop keeps going only if a horizontal tab follows
2481 if Source (Scan_Ptr) = HT then
2482 if Style_Check then Style.Check_HT; end if;
2483 Scan_Ptr := Scan_Ptr + 1;
2484 Start_Column := (Start_Column / 8) * 8 + 8;
2485 else
2486 exit Tabs_Loop;
2487 end if;
2489 end loop Tabs_Loop;
2491 return Start_Column;
2492 end Set_Start_Column;
2494 end Scng;