Merge from the pain train
[official-gcc.git] / gcc / ada / scng.adb
blob158524df989a9381f9e1111e852882331e9a3dd7
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S C N G --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
21 -- --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
24 -- --
25 ------------------------------------------------------------------------------
27 with Csets; use Csets;
28 with Err_Vars; use Err_Vars;
29 with Namet; use Namet;
30 with Opt; use Opt;
31 with Scans; use Scans;
32 with Sinput; use Sinput;
33 with Snames; use Snames;
34 with Stringt; use Stringt;
35 with Stylesw; use Stylesw;
36 with Uintp; use Uintp;
37 with Urealp; use Urealp;
38 with Widechar; use Widechar;
40 with System.CRC32;
41 with System.WCh_Con; use System.WCh_Con;
43 package body Scng is
45 use ASCII;
46 -- Make control characters visible
48 Special_Characters : array (Character) of Boolean := (others => False);
49 -- For characters that are Special token, the value is True
51 Comment_Is_Token : Boolean := False;
52 -- True if comments are tokens
54 End_Of_Line_Is_Token : Boolean := False;
55 -- True if End_Of_Line is a token
57 -----------------------
58 -- Local Subprograms --
59 -----------------------
61 procedure Accumulate_Token_Checksum;
62 pragma Inline (Accumulate_Token_Checksum);
64 procedure Accumulate_Checksum (C : Character);
65 pragma Inline (Accumulate_Checksum);
66 -- This routine accumulates the checksum given character C. During the
67 -- scanning of a source file, this routine is called with every character
68 -- in the source, excluding blanks, and all control characters (except
69 -- that ESC is included in the checksum). Upper case letters not in string
70 -- literals are folded by the caller. See Sinput spec for the documentation
71 -- of the checksum algorithm. Note: checksum values are only used if we
72 -- generate code, so it is not necessary to worry about making the right
73 -- sequence of calls in any error situation.
75 procedure Accumulate_Checksum (C : Char_Code);
76 pragma Inline (Accumulate_Checksum);
77 -- This version is identical, except that the argument, C, is a character
78 -- code value instead of a character. This is used when wide characters
79 -- are scanned. We use the character code rather than the ASCII characters
80 -- so that the checksum is independent of wide character encoding method.
82 procedure Initialize_Checksum;
83 pragma Inline (Initialize_Checksum);
84 -- Initialize checksum value
86 -------------------------
87 -- Accumulate_Checksum --
88 -------------------------
90 procedure Accumulate_Checksum (C : Character) is
91 begin
92 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
93 end Accumulate_Checksum;
95 procedure Accumulate_Checksum (C : Char_Code) is
96 begin
97 if C > 16#FFFF# then
98 Accumulate_Checksum (Character'Val (C / 2 ** 16));
99 Accumulate_Checksum (Character'Val ((C / 256) mod 256));
100 else
101 Accumulate_Checksum (Character'Val (C / 256));
102 end if;
104 Accumulate_Checksum (Character'Val (C mod 256));
105 end Accumulate_Checksum;
107 -------------------------------
108 -- Accumulate_Token_Checksum --
109 -------------------------------
111 procedure Accumulate_Token_Checksum is
112 begin
113 System.CRC32.Update
114 (System.CRC32.CRC32 (Checksum),
115 Character'Val (Token_Type'Pos (Token)));
116 end Accumulate_Token_Checksum;
118 ----------------------------
119 -- Determine_Token_Casing --
120 ----------------------------
122 function Determine_Token_Casing return Casing_Type is
123 begin
124 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
125 end Determine_Token_Casing;
127 -------------------------
128 -- Initialize_Checksum --
129 -------------------------
131 procedure Initialize_Checksum is
132 begin
133 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
134 end Initialize_Checksum;
136 ------------------------
137 -- Initialize_Scanner --
138 ------------------------
140 procedure Initialize_Scanner
141 (Unit : Unit_Number_Type;
142 Index : Source_File_Index)
144 procedure Set_Reserved (N : Name_Id; T : Token_Type);
145 pragma Inline (Set_Reserved);
146 -- Set given name as a reserved keyword (T is the corresponding token)
148 -------------
149 -- Set_NTB --
150 -------------
152 procedure Set_Reserved (N : Name_Id; T : Token_Type) is
153 begin
154 -- Set up Token_Type values in Names Table entries for reserved
155 -- keywords We use the Pos value of the Token_Type value. Note we
156 -- rely on the fact that Token_Type'Val (0) is not a reserved word!
158 Set_Name_Table_Byte (N, Token_Type'Pos (T));
159 end Set_Reserved;
161 -- Start of processing for Initialize_Scanner
163 begin
164 -- Establish reserved words
166 Set_Reserved (Name_Abort, Tok_Abort);
167 Set_Reserved (Name_Abs, Tok_Abs);
168 Set_Reserved (Name_Abstract, Tok_Abstract);
169 Set_Reserved (Name_Accept, Tok_Accept);
170 Set_Reserved (Name_Access, Tok_Access);
171 Set_Reserved (Name_And, Tok_And);
172 Set_Reserved (Name_Aliased, Tok_Aliased);
173 Set_Reserved (Name_All, Tok_All);
174 Set_Reserved (Name_Array, Tok_Array);
175 Set_Reserved (Name_At, Tok_At);
176 Set_Reserved (Name_Begin, Tok_Begin);
177 Set_Reserved (Name_Body, Tok_Body);
178 Set_Reserved (Name_Case, Tok_Case);
179 Set_Reserved (Name_Constant, Tok_Constant);
180 Set_Reserved (Name_Declare, Tok_Declare);
181 Set_Reserved (Name_Delay, Tok_Delay);
182 Set_Reserved (Name_Delta, Tok_Delta);
183 Set_Reserved (Name_Digits, Tok_Digits);
184 Set_Reserved (Name_Do, Tok_Do);
185 Set_Reserved (Name_Else, Tok_Else);
186 Set_Reserved (Name_Elsif, Tok_Elsif);
187 Set_Reserved (Name_End, Tok_End);
188 Set_Reserved (Name_Entry, Tok_Entry);
189 Set_Reserved (Name_Exception, Tok_Exception);
190 Set_Reserved (Name_Exit, Tok_Exit);
191 Set_Reserved (Name_For, Tok_For);
192 Set_Reserved (Name_Function, Tok_Function);
193 Set_Reserved (Name_Generic, Tok_Generic);
194 Set_Reserved (Name_Goto, Tok_Goto);
195 Set_Reserved (Name_If, Tok_If);
196 Set_Reserved (Name_In, Tok_In);
197 Set_Reserved (Name_Is, Tok_Is);
198 Set_Reserved (Name_Limited, Tok_Limited);
199 Set_Reserved (Name_Loop, Tok_Loop);
200 Set_Reserved (Name_Mod, Tok_Mod);
201 Set_Reserved (Name_New, Tok_New);
202 Set_Reserved (Name_Not, Tok_Not);
203 Set_Reserved (Name_Null, Tok_Null);
204 Set_Reserved (Name_Of, Tok_Of);
205 Set_Reserved (Name_Or, Tok_Or);
206 Set_Reserved (Name_Others, Tok_Others);
207 Set_Reserved (Name_Out, Tok_Out);
208 Set_Reserved (Name_Package, Tok_Package);
209 Set_Reserved (Name_Pragma, Tok_Pragma);
210 Set_Reserved (Name_Private, Tok_Private);
211 Set_Reserved (Name_Procedure, Tok_Procedure);
212 Set_Reserved (Name_Protected, Tok_Protected);
213 Set_Reserved (Name_Raise, Tok_Raise);
214 Set_Reserved (Name_Range, Tok_Range);
215 Set_Reserved (Name_Record, Tok_Record);
216 Set_Reserved (Name_Rem, Tok_Rem);
217 Set_Reserved (Name_Renames, Tok_Renames);
218 Set_Reserved (Name_Requeue, Tok_Requeue);
219 Set_Reserved (Name_Return, Tok_Return);
220 Set_Reserved (Name_Reverse, Tok_Reverse);
221 Set_Reserved (Name_Select, Tok_Select);
222 Set_Reserved (Name_Separate, Tok_Separate);
223 Set_Reserved (Name_Subtype, Tok_Subtype);
224 Set_Reserved (Name_Tagged, Tok_Tagged);
225 Set_Reserved (Name_Task, Tok_Task);
226 Set_Reserved (Name_Terminate, Tok_Terminate);
227 Set_Reserved (Name_Then, Tok_Then);
228 Set_Reserved (Name_Type, Tok_Type);
229 Set_Reserved (Name_Until, Tok_Until);
230 Set_Reserved (Name_Use, Tok_Use);
231 Set_Reserved (Name_When, Tok_When);
232 Set_Reserved (Name_While, Tok_While);
233 Set_Reserved (Name_With, Tok_With);
234 Set_Reserved (Name_Xor, Tok_Xor);
236 -- Ada 2005 reserved words
238 Set_Reserved (Name_Interface, Tok_Interface);
239 Set_Reserved (Name_Overriding, Tok_Overriding);
240 Set_Reserved (Name_Synchronized, Tok_Synchronized);
242 -- Initialize scan control variables
244 Current_Source_File := Index;
245 Source := Source_Text (Current_Source_File);
246 Current_Source_Unit := Unit;
247 Scan_Ptr := Source_First (Current_Source_File);
248 Token := No_Token;
249 Token_Ptr := Scan_Ptr;
250 Current_Line_Start := Scan_Ptr;
251 Token_Node := Empty;
252 Token_Name := No_Name;
253 Start_Column := Set_Start_Column;
254 First_Non_Blank_Location := Scan_Ptr;
256 Initialize_Checksum;
258 -- Do not call Scan, otherwise the License stuff does not work in Scn
260 end Initialize_Scanner;
262 ------------------------------
263 -- Reset_Special_Characters --
264 ------------------------------
266 procedure Reset_Special_Characters is
267 begin
268 Special_Characters := (others => False);
269 end Reset_Special_Characters;
271 ----------
272 -- Scan --
273 ----------
275 procedure Scan is
277 Start_Of_Comment : Source_Ptr;
278 -- Record start of comment position
280 Underline_Found : Boolean;
281 -- During scanning of an identifier, set to True if last character
282 -- scanned was an underline or other punctuation character. This
283 -- is used to flag the error of two underlines/punctuations in a
284 -- row or ending an identifier with a underline/punctuation. Here
285 -- punctuation means any UTF_32 character in the Unicode category
286 -- Punctuation,Connector.
288 Wptr : Source_Ptr;
289 -- Used to remember start of last wide character scanned
291 procedure Check_End_Of_Line;
292 -- Called when end of line encountered. Checks that line is not too
293 -- long, and that other style checks for the end of line are met.
295 function Double_Char_Token (C : Character) return Boolean;
296 -- This function is used for double character tokens like := or <>. It
297 -- checks if the character following Source (Scan_Ptr) is C, and if so
298 -- bumps Scan_Ptr past the pair of characters and returns True. A space
299 -- between the two characters is also recognized with an appropriate
300 -- error message being issued. If C is not present, False is returned.
301 -- Note that Double_Char_Token can only be used for tokens defined in
302 -- the Ada syntax (it's use for error cases like && is not appropriate
303 -- since we do not want a junk message for a case like &-space-&).
305 procedure Error_Illegal_Character;
306 -- Give illegal character error, Scan_Ptr points to character. On
307 -- return, Scan_Ptr is bumped past the illegal character.
309 procedure Error_Illegal_Wide_Character;
310 -- Give illegal wide character message. On return, Scan_Ptr is bumped
311 -- past the illegal character, which may still leave us pointing to
312 -- junk, not much we can do if the escape sequence is messed up!
314 procedure Error_Long_Line;
315 -- Signal error of excessively long line
317 procedure Error_No_Double_Underline;
318 -- Signal error of two underline or punctuation characters in a row.
319 -- Called with Scan_Ptr pointing to second underline/punctuation char.
321 procedure Nlit;
322 -- This is the procedure for scanning out numeric literals. On entry,
323 -- Scan_Ptr points to the digit that starts the numeric literal (the
324 -- checksum for this character has not been accumulated yet). On return
325 -- Scan_Ptr points past the last character of the numeric literal, Token
326 -- and Token_Node are set appropriately, and the checksum is updated.
328 procedure Slit;
329 -- This is the procedure for scanning out string literals. On entry,
330 -- Scan_Ptr points to the opening string quote (the checksum for this
331 -- character has not been accumulated yet). On return Scan_Ptr points
332 -- past the closing quote of the string literal, Token and Token_Node
333 -- are set appropriately, and the checksum is upated.
335 -----------------------
336 -- Check_End_Of_Line --
337 -----------------------
339 procedure Check_End_Of_Line is
340 Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
342 begin
343 if Style_Check and Style_Check_Max_Line_Length then
344 Style.Check_Line_Terminator (Len);
346 -- If style checking is inactive, check maximum line length against
347 -- standard value. Note that we take this from Opt.Max_Line_Length
348 -- rather than Hostparm.Max_Line_Length because we do not want to
349 -- impose any limit during scanning of configuration pragma files,
350 -- and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
351 -- is reset to Column_Number'Max during scanning of such files.
353 elsif Len > Opt.Max_Line_Length then
354 Error_Long_Line;
355 end if;
356 end Check_End_Of_Line;
358 -----------------------
359 -- Double_Char_Token --
360 -----------------------
362 function Double_Char_Token (C : Character) return Boolean is
363 begin
364 if Source (Scan_Ptr + 1) = C then
365 Accumulate_Checksum (C);
366 Scan_Ptr := Scan_Ptr + 2;
367 return True;
369 elsif Source (Scan_Ptr + 1) = ' '
370 and then Source (Scan_Ptr + 2) = C
371 then
372 Scan_Ptr := Scan_Ptr + 1;
373 Error_Msg_S ("no space allowed here");
374 Scan_Ptr := Scan_Ptr + 2;
375 return True;
377 else
378 return False;
379 end if;
380 end Double_Char_Token;
382 -----------------------------
383 -- Error_Illegal_Character --
384 -----------------------------
386 procedure Error_Illegal_Character is
387 begin
388 Error_Msg_S ("illegal character");
389 Scan_Ptr := Scan_Ptr + 1;
390 end Error_Illegal_Character;
392 ----------------------------------
393 -- Error_Illegal_Wide_Character --
394 ----------------------------------
396 procedure Error_Illegal_Wide_Character is
397 begin
398 Error_Msg ("illegal wide character", Wptr);
399 end Error_Illegal_Wide_Character;
401 ---------------------
402 -- Error_Long_Line --
403 ---------------------
405 procedure Error_Long_Line is
406 begin
407 Error_Msg
408 ("this line is too long",
409 Current_Line_Start + Source_Ptr (Opt.Max_Line_Length));
410 end Error_Long_Line;
412 -------------------------------
413 -- Error_No_Double_Underline --
414 -------------------------------
416 procedure Error_No_Double_Underline is
417 begin
418 Underline_Found := False;
420 -- There are four cases, and we special case the messages
422 if Source (Scan_Ptr) = '_' then
423 if Source (Scan_Ptr - 1) = '_' then
424 Error_Msg_S
425 ("two consecutive underlines not permitted");
426 else
427 Error_Msg_S
428 ("underline cannot follow punctuation character");
429 end if;
431 else
432 if Source (Scan_Ptr - 1) = '_' then
433 Error_Msg_S
434 ("punctuation character cannot follow underline");
435 else
436 Error_Msg_S
437 ("two consecutive punctuation characters not permitted");
438 end if;
439 end if;
440 end Error_No_Double_Underline;
442 ----------
443 -- Nlit --
444 ----------
446 procedure Nlit is
448 C : Character;
449 -- Current source program character
451 Base_Char : Character;
452 -- Either # or : (character at start of based number)
454 Base : Int;
455 -- Value of base
457 UI_Base : Uint;
458 -- Value of base in Uint format
460 UI_Int_Value : Uint;
461 -- Value of integer scanned by Scan_Integer in Uint format
463 UI_Num_Value : Uint;
464 -- Value of integer in numeric value being scanned
466 Scale : Int;
467 -- Scale value for real literal
469 UI_Scale : Uint;
470 -- Scale in Uint format
472 Exponent_Is_Negative : Boolean;
473 -- Set true for negative exponent
475 Extended_Digit_Value : Int;
476 -- Extended digit value
478 Point_Scanned : Boolean;
479 -- Flag for decimal point scanned in numeric literal
481 -----------------------
482 -- Local Subprograms --
483 -----------------------
485 procedure Error_Digit_Expected;
486 -- Signal error of bad digit, Scan_Ptr points to the location at
487 -- which the digit was expected on input, and is unchanged on return.
489 procedure Scan_Integer;
490 -- Procedure to scan integer literal. On entry, Scan_Ptr points to a
491 -- digit, on exit Scan_Ptr points past the last character of the
492 -- integer.
494 -- For each digit encountered, UI_Int_Value is multiplied by 10, and
495 -- the value of the digit added to the result. In addition, the
496 -- value in Scale is decremented by one for each actual digit
497 -- scanned.
499 --------------------------
500 -- Error_Digit_Expected --
501 --------------------------
503 procedure Error_Digit_Expected is
504 begin
505 Error_Msg_S ("digit expected");
506 end Error_Digit_Expected;
508 ------------------
509 -- Scan_Integer --
510 ------------------
512 procedure Scan_Integer is
513 C : Character;
514 -- Next character scanned
516 begin
517 C := Source (Scan_Ptr);
519 -- Loop through digits (allowing underlines)
521 loop
522 Accumulate_Checksum (C);
523 UI_Int_Value :=
524 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
525 Scan_Ptr := Scan_Ptr + 1;
526 Scale := Scale - 1;
527 C := Source (Scan_Ptr);
529 -- Case of underline encountered
531 if C = '_' then
533 -- We do not accumulate the '_' in the checksum, so that
534 -- 1_234 is equivalent to 1234, and does not trigger
535 -- compilation for "minimal recompilation" (gnatmake -m).
537 loop
538 Scan_Ptr := Scan_Ptr + 1;
539 C := Source (Scan_Ptr);
540 exit when C /= '_';
541 Error_No_Double_Underline;
542 end loop;
544 if C not in '0' .. '9' then
545 Error_Digit_Expected;
546 exit;
547 end if;
549 else
550 exit when C not in '0' .. '9';
551 end if;
552 end loop;
553 end Scan_Integer;
555 -- Start of Processing for Nlit
557 begin
558 Base := 10;
559 UI_Base := Uint_10;
560 UI_Int_Value := Uint_0;
561 Scale := 0;
562 Scan_Integer;
563 Scale := 0;
564 Point_Scanned := False;
565 UI_Num_Value := UI_Int_Value;
567 -- Various possibilities now for continuing the literal are period,
568 -- E/e (for exponent), or :/# (for based literal).
570 Scale := 0;
571 C := Source (Scan_Ptr);
573 if C = '.' then
575 -- Scan out point, but do not scan past .. which is a range
576 -- sequence, and must not be eaten up scanning a numeric literal.
578 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
579 Accumulate_Checksum ('.');
581 if Point_Scanned then
582 Error_Msg_S ("duplicate point ignored");
583 end if;
585 Point_Scanned := True;
586 Scan_Ptr := Scan_Ptr + 1;
587 C := Source (Scan_Ptr);
589 if C not in '0' .. '9' then
590 Error_Msg
591 ("real literal cannot end with point", Scan_Ptr - 1);
592 else
593 Scan_Integer;
594 UI_Num_Value := UI_Int_Value;
595 end if;
596 end loop;
598 -- Based literal case. The base is the value we already scanned.
599 -- In the case of colon, we insist that the following character
600 -- is indeed an extended digit or a period. This catches a number
601 -- of common errors, as well as catching the well known tricky
602 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
604 elsif C = '#'
605 or else (C = ':' and then
606 (Source (Scan_Ptr + 1) = '.'
607 or else
608 Source (Scan_Ptr + 1) in '0' .. '9'
609 or else
610 Source (Scan_Ptr + 1) in 'A' .. 'Z'
611 or else
612 Source (Scan_Ptr + 1) in 'a' .. 'z'))
613 then
614 if C = ':' then
615 Obsolescent_Check (Scan_Ptr);
617 if Warn_On_Obsolescent_Feature then
618 Error_Msg_S
619 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
620 Error_Msg_S
621 ("\use ""'#"" instead?");
622 end if;
623 end if;
625 Accumulate_Checksum (C);
626 Base_Char := C;
627 UI_Base := UI_Int_Value;
629 if UI_Base < 2 or else UI_Base > 16 then
630 Error_Msg_SC ("base not 2-16");
631 UI_Base := Uint_16;
632 end if;
634 Base := UI_To_Int (UI_Base);
635 Scan_Ptr := Scan_Ptr + 1;
637 -- Scan out extended integer [. integer]
639 C := Source (Scan_Ptr);
640 UI_Int_Value := Uint_0;
641 Scale := 0;
643 loop
644 if C in '0' .. '9' then
645 Accumulate_Checksum (C);
646 Extended_Digit_Value :=
647 Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
649 elsif C in 'A' .. 'F' then
650 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
651 Extended_Digit_Value :=
652 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
654 elsif C in 'a' .. 'f' then
655 Accumulate_Checksum (C);
656 Extended_Digit_Value :=
657 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
659 else
660 Error_Msg_S ("extended digit expected");
661 exit;
662 end if;
664 if Extended_Digit_Value >= Base then
665 Error_Msg_S ("digit '>= base");
666 end if;
668 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
669 Scale := Scale - 1;
670 Scan_Ptr := Scan_Ptr + 1;
671 C := Source (Scan_Ptr);
673 if C = '_' then
674 loop
675 Accumulate_Checksum ('_');
676 Scan_Ptr := Scan_Ptr + 1;
677 C := Source (Scan_Ptr);
678 exit when C /= '_';
679 Error_No_Double_Underline;
680 end loop;
682 elsif C = '.' then
683 Accumulate_Checksum ('.');
685 if Point_Scanned then
686 Error_Msg_S ("duplicate point ignored");
687 end if;
689 Scan_Ptr := Scan_Ptr + 1;
690 C := Source (Scan_Ptr);
691 Point_Scanned := True;
692 Scale := 0;
694 elsif C = Base_Char then
695 Accumulate_Checksum (C);
696 Scan_Ptr := Scan_Ptr + 1;
697 exit;
699 elsif C = '#' or else C = ':' then
700 Error_Msg_S ("based number delimiters must match");
701 Scan_Ptr := Scan_Ptr + 1;
702 exit;
704 elsif not Identifier_Char (C) then
705 if Base_Char = '#' then
706 Error_Msg_S ("missing '#");
707 else
708 Error_Msg_S ("missing ':");
709 end if;
711 exit;
712 end if;
714 end loop;
716 UI_Num_Value := UI_Int_Value;
717 end if;
719 -- Scan out exponent
721 if not Point_Scanned then
722 Scale := 0;
723 UI_Scale := Uint_0;
724 else
725 UI_Scale := UI_From_Int (Scale);
726 end if;
728 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
729 Accumulate_Checksum ('e');
730 Scan_Ptr := Scan_Ptr + 1;
731 Exponent_Is_Negative := False;
733 if Source (Scan_Ptr) = '+' then
734 Accumulate_Checksum ('+');
735 Scan_Ptr := Scan_Ptr + 1;
737 elsif Source (Scan_Ptr) = '-' then
738 Accumulate_Checksum ('-');
740 if not Point_Scanned then
741 Error_Msg_S
742 ("negative exponent not allowed for integer literal");
743 else
744 Exponent_Is_Negative := True;
745 end if;
747 Scan_Ptr := Scan_Ptr + 1;
748 end if;
750 UI_Int_Value := Uint_0;
752 if Source (Scan_Ptr) in '0' .. '9' then
753 Scan_Integer;
754 else
755 Error_Digit_Expected;
756 end if;
758 if Exponent_Is_Negative then
759 UI_Scale := UI_Scale - UI_Int_Value;
760 else
761 UI_Scale := UI_Scale + UI_Int_Value;
762 end if;
763 end if;
765 -- Case of real literal to be returned
767 if Point_Scanned then
768 Token := Tok_Real_Literal;
769 Real_Literal_Value :=
770 UR_From_Components (
771 Num => UI_Num_Value,
772 Den => -UI_Scale,
773 Rbase => Base);
775 -- Case of integer literal to be returned
777 else
778 Token := Tok_Integer_Literal;
780 if UI_Scale = 0 then
781 Int_Literal_Value := UI_Num_Value;
783 -- Avoid doing possibly expensive calculations in cases like
784 -- parsing 163E800_000# when semantics will not be done anyway.
785 -- This is especially useful when parsing garbled input.
787 elsif Operating_Mode /= Check_Syntax
788 and then (Serious_Errors_Detected = 0 or else Try_Semantics)
789 then
790 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
792 else
793 Int_Literal_Value := No_Uint;
794 end if;
795 end if;
797 Accumulate_Token_Checksum;
799 return;
800 end Nlit;
802 ----------
803 -- Slit --
804 ----------
806 procedure Slit is
808 Delimiter : Character;
809 -- Delimiter (first character of string)
811 C : Character;
812 -- Current source program character
814 Code : Char_Code;
815 -- Current character code value
817 Err : Boolean;
818 -- Error flag for Scan_Wide call
820 procedure Error_Bad_String_Char;
821 -- Signal bad character in string/character literal. On entry
822 -- Scan_Ptr points to the improper character encountered during the
823 -- scan. Scan_Ptr is not modified, so it still points to the bad
824 -- character on return.
826 procedure Error_Unterminated_String;
827 -- Procedure called if a line terminator character is encountered
828 -- during scanning a string, meaning that the string is not properly
829 -- terminated.
831 procedure Set_String;
832 -- Procedure used to distinguish between string and operator symbol.
833 -- On entry the string has been scanned out, and its characters
834 -- start at Token_Ptr and end one character before Scan_Ptr. On exit
835 -- Token is set to Tok_String_Literal or Tok_Operator_Symbol as
836 -- appropriate, and Token_Node is appropriately initialized. In
837 -- addition, in the operator symbol case, Token_Name is
838 -- appropriately set.
840 ---------------------------
841 -- Error_Bad_String_Char --
842 ---------------------------
844 procedure Error_Bad_String_Char is
845 C : constant Character := Source (Scan_Ptr);
847 begin
848 if C = HT then
849 Error_Msg_S ("horizontal tab not allowed in string");
851 elsif C = VT or else C = FF then
852 Error_Msg_S ("format effector not allowed in string");
854 elsif C in Upper_Half_Character then
855 Error_Msg_S ("(Ada 83) upper half character not allowed");
857 else
858 Error_Msg_S ("control character not allowed in string");
859 end if;
860 end Error_Bad_String_Char;
862 -------------------------------
863 -- Error_Unterminated_String --
864 -------------------------------
866 procedure Error_Unterminated_String is
867 begin
868 -- An interesting little refinement. Consider the following
869 -- examples:
871 -- A := "this is an unterminated string;
872 -- A := "this is an unterminated string &
873 -- P(A, "this is a parameter that didn't get terminated);
875 -- We fiddle a little to do slightly better placement in these
876 -- cases also if there is white space at the end of the line we
877 -- place the flag at the start of this white space, not at the
878 -- end. Note that we only have to test for blanks, since tabs
879 -- aren't allowed in strings in the first place and would have
880 -- caused an error message.
882 -- Two more cases that we treat specially are:
884 -- A := "this string uses the wrong terminator'
885 -- A := "this string uses the wrong terminator' &
887 -- In these cases we give a different error message as well
889 -- We actually reposition the scan pointer to the point where we
890 -- place the flag in these cases, since it seems a better bet on
891 -- the original intention.
893 while Source (Scan_Ptr - 1) = ' '
894 or else Source (Scan_Ptr - 1) = '&'
895 loop
896 Scan_Ptr := Scan_Ptr - 1;
897 Unstore_String_Char;
898 end loop;
900 -- Check for case of incorrect string terminator, but single quote
901 -- is not considered incorrect if the opening terminator misused
902 -- a single quote (error message already given).
904 if Delimiter /= '''
905 and then Source (Scan_Ptr - 1) = '''
906 then
907 Unstore_String_Char;
908 Error_Msg
909 ("incorrect string terminator character", Scan_Ptr - 1);
910 return;
911 end if;
913 if Source (Scan_Ptr - 1) = ';' then
914 Scan_Ptr := Scan_Ptr - 1;
915 Unstore_String_Char;
917 if Source (Scan_Ptr - 1) = ')' then
918 Scan_Ptr := Scan_Ptr - 1;
919 Unstore_String_Char;
920 end if;
921 end if;
923 Error_Msg_S ("missing string quote");
924 end Error_Unterminated_String;
926 ----------------
927 -- Set_String --
928 ----------------
930 procedure Set_String is
931 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
932 C1 : Character;
933 C2 : Character;
934 C3 : Character;
936 begin
937 -- Token_Name is currently set to Error_Name. The following
938 -- section of code resets Token_Name to the proper Name_Op_xx
939 -- value if the string is a valid operator symbol, otherwise it is
940 -- left set to Error_Name.
942 if Slen = 1 then
943 C1 := Source (Token_Ptr + 1);
945 case C1 is
946 when '=' =>
947 Token_Name := Name_Op_Eq;
949 when '>' =>
950 Token_Name := Name_Op_Gt;
952 when '<' =>
953 Token_Name := Name_Op_Lt;
955 when '+' =>
956 Token_Name := Name_Op_Add;
958 when '-' =>
959 Token_Name := Name_Op_Subtract;
961 when '&' =>
962 Token_Name := Name_Op_Concat;
964 when '*' =>
965 Token_Name := Name_Op_Multiply;
967 when '/' =>
968 Token_Name := Name_Op_Divide;
970 when others =>
971 null;
972 end case;
974 elsif Slen = 2 then
975 C1 := Source (Token_Ptr + 1);
976 C2 := Source (Token_Ptr + 2);
978 if C1 = '*' and then C2 = '*' then
979 Token_Name := Name_Op_Expon;
981 elsif C2 = '=' then
983 if C1 = '/' then
984 Token_Name := Name_Op_Ne;
985 elsif C1 = '<' then
986 Token_Name := Name_Op_Le;
987 elsif C1 = '>' then
988 Token_Name := Name_Op_Ge;
989 end if;
991 elsif (C1 = 'O' or else C1 = 'o') and then -- OR
992 (C2 = 'R' or else C2 = 'r')
993 then
994 Token_Name := Name_Op_Or;
995 end if;
997 elsif Slen = 3 then
998 C1 := Source (Token_Ptr + 1);
999 C2 := Source (Token_Ptr + 2);
1000 C3 := Source (Token_Ptr + 3);
1002 if (C1 = 'A' or else C1 = 'a') and then -- AND
1003 (C2 = 'N' or else C2 = 'n') and then
1004 (C3 = 'D' or else C3 = 'd')
1005 then
1006 Token_Name := Name_Op_And;
1008 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
1009 (C2 = 'B' or else C2 = 'b') and then
1010 (C3 = 'S' or else C3 = 's')
1011 then
1012 Token_Name := Name_Op_Abs;
1014 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
1015 (C2 = 'O' or else C2 = 'o') and then
1016 (C3 = 'D' or else C3 = 'd')
1017 then
1018 Token_Name := Name_Op_Mod;
1020 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
1021 (C2 = 'O' or else C2 = 'o') and then
1022 (C3 = 'T' or else C3 = 't')
1023 then
1024 Token_Name := Name_Op_Not;
1026 elsif (C1 = 'R' or else C1 = 'r') and then -- REM
1027 (C2 = 'E' or else C2 = 'e') and then
1028 (C3 = 'M' or else C3 = 'm')
1029 then
1030 Token_Name := Name_Op_Rem;
1032 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
1033 (C2 = 'O' or else C2 = 'o') and then
1034 (C3 = 'R' or else C3 = 'r')
1035 then
1036 Token_Name := Name_Op_Xor;
1037 end if;
1039 end if;
1041 -- If it is an operator symbol, then Token_Name is set. If it is
1042 -- some other string value, then Token_Name still contains
1043 -- Error_Name.
1045 if Token_Name = Error_Name then
1046 Token := Tok_String_Literal;
1048 else
1049 Token := Tok_Operator_Symbol;
1050 end if;
1051 end Set_String;
1053 -- Start of processing for Slit
1055 begin
1056 -- On entry, Scan_Ptr points to the opening character of the string
1057 -- which is either a percent, double quote, or apostrophe (single
1058 -- quote). The latter case is an error detected by the character
1059 -- literal circuit.
1061 Delimiter := Source (Scan_Ptr);
1062 Accumulate_Checksum (Delimiter);
1063 Start_String;
1064 Scan_Ptr := Scan_Ptr + 1;
1066 -- Loop to scan out characters of string literal
1068 loop
1069 C := Source (Scan_Ptr);
1071 if C = Delimiter then
1072 Accumulate_Checksum (C);
1073 Scan_Ptr := Scan_Ptr + 1;
1074 exit when Source (Scan_Ptr) /= Delimiter;
1075 Code := Get_Char_Code (C);
1076 Accumulate_Checksum (C);
1077 Scan_Ptr := Scan_Ptr + 1;
1079 else
1080 if C = '"' and then Delimiter = '%' then
1081 Error_Msg_S
1082 ("quote not allowed in percent delimited string");
1083 Code := Get_Char_Code (C);
1084 Scan_Ptr := Scan_Ptr + 1;
1086 elsif (C = ESC
1087 and then Wide_Character_Encoding_Method
1088 in WC_ESC_Encoding_Method)
1089 or else (C in Upper_Half_Character
1090 and then Upper_Half_Encoding)
1091 or else (C = '['
1092 and then Source (Scan_Ptr + 1) = '"'
1093 and then Identifier_Char (Source (Scan_Ptr + 2)))
1094 then
1095 Wptr := Scan_Ptr;
1096 Scan_Wide (Source, Scan_Ptr, Code, Err);
1098 if Err then
1099 Error_Illegal_Wide_Character;
1100 Code := Get_Char_Code (' ');
1101 end if;
1103 Accumulate_Checksum (Code);
1105 if Ada_Version >= Ada_05
1106 and then Is_UTF_32_Non_Graphic (Code)
1107 then
1108 Error_Msg
1109 ("(Ada 2005) non-graphic character not permitted " &
1110 "in string literal", Wptr);
1111 end if;
1113 else
1114 Accumulate_Checksum (C);
1116 if C not in Graphic_Character then
1117 if C in Line_Terminator then
1118 Error_Unterminated_String;
1119 exit;
1121 elsif C in Upper_Half_Character then
1122 if Ada_Version = Ada_83 then
1123 Error_Bad_String_Char;
1124 end if;
1126 else
1127 Error_Bad_String_Char;
1128 end if;
1129 end if;
1131 Code := Get_Char_Code (C);
1132 Scan_Ptr := Scan_Ptr + 1;
1133 end if;
1134 end if;
1136 Store_String_Char (Code);
1138 if not In_Character_Range (Code) then
1139 Wide_Character_Found := True;
1140 end if;
1141 end loop;
1143 String_Literal_Id := End_String;
1144 Set_String;
1145 return;
1146 end Slit;
1148 -- Start of processing for Scan
1150 begin
1151 Prev_Token := Token;
1152 Prev_Token_Ptr := Token_Ptr;
1153 Token_Name := Error_Name;
1155 -- The following loop runs more than once only if a format effector
1156 -- (tab, vertical tab, form feed, line feed, carriage return) is
1157 -- encountered and skipped, or some error situation, such as an
1158 -- illegal character, is encountered.
1160 <<Scan_Next_Character>>
1162 loop
1163 -- Skip past blanks, loop is opened up for speed
1165 while Source (Scan_Ptr) = ' ' loop
1166 if Source (Scan_Ptr + 1) /= ' ' then
1167 Scan_Ptr := Scan_Ptr + 1;
1168 exit;
1169 end if;
1171 if Source (Scan_Ptr + 2) /= ' ' then
1172 Scan_Ptr := Scan_Ptr + 2;
1173 exit;
1174 end if;
1176 if Source (Scan_Ptr + 3) /= ' ' then
1177 Scan_Ptr := Scan_Ptr + 3;
1178 exit;
1179 end if;
1181 if Source (Scan_Ptr + 4) /= ' ' then
1182 Scan_Ptr := Scan_Ptr + 4;
1183 exit;
1184 end if;
1186 if Source (Scan_Ptr + 5) /= ' ' then
1187 Scan_Ptr := Scan_Ptr + 5;
1188 exit;
1189 end if;
1191 if Source (Scan_Ptr + 6) /= ' ' then
1192 Scan_Ptr := Scan_Ptr + 6;
1193 exit;
1194 end if;
1196 if Source (Scan_Ptr + 7) /= ' ' then
1197 Scan_Ptr := Scan_Ptr + 7;
1198 exit;
1199 end if;
1201 Scan_Ptr := Scan_Ptr + 8;
1202 end loop;
1204 -- We are now at a non-blank character, which is the first character
1205 -- of the token we will scan, and hence the value of Token_Ptr.
1207 Token_Ptr := Scan_Ptr;
1209 -- Here begins the main case statement which transfers control on the
1210 -- basis of the non-blank character we have encountered.
1212 case Source (Scan_Ptr) is
1214 -- Line terminator characters
1216 when CR | LF | FF | VT =>
1217 goto Scan_Line_Terminator;
1219 -- Horizontal tab, just skip past it
1221 when HT =>
1222 if Style_Check then Style.Check_HT; end if;
1223 Scan_Ptr := Scan_Ptr + 1;
1225 -- End of file character, treated as an end of file only if it is
1226 -- the last character in the buffer, otherwise it is ignored.
1228 when EOF =>
1229 if Scan_Ptr = Source_Last (Current_Source_File) then
1230 Check_End_Of_Line;
1231 Token := Tok_EOF;
1232 return;
1233 else
1234 Scan_Ptr := Scan_Ptr + 1;
1235 end if;
1237 -- Ampersand
1239 when '&' =>
1240 Accumulate_Checksum ('&');
1242 if Source (Scan_Ptr + 1) = '&' then
1243 Error_Msg_S ("'&'& should be `AND THEN`");
1244 Scan_Ptr := Scan_Ptr + 2;
1245 Token := Tok_And;
1246 return;
1248 else
1249 Scan_Ptr := Scan_Ptr + 1;
1250 Token := Tok_Ampersand;
1251 return;
1252 end if;
1254 -- Asterisk (can be multiplication operator or double asterisk which
1255 -- is the exponentiation compound delimiter).
1257 when '*' =>
1258 Accumulate_Checksum ('*');
1260 if Source (Scan_Ptr + 1) = '*' then
1261 Accumulate_Checksum ('*');
1262 Scan_Ptr := Scan_Ptr + 2;
1263 Token := Tok_Double_Asterisk;
1264 return;
1266 else
1267 Scan_Ptr := Scan_Ptr + 1;
1268 Token := Tok_Asterisk;
1269 return;
1270 end if;
1272 -- Colon, which can either be an isolated colon, or part of an
1273 -- assignment compound delimiter.
1275 when ':' =>
1276 Accumulate_Checksum (':');
1278 if Double_Char_Token ('=') then
1279 Token := Tok_Colon_Equal;
1280 if Style_Check then Style.Check_Colon_Equal; end if;
1281 return;
1283 elsif Source (Scan_Ptr + 1) = '-'
1284 and then Source (Scan_Ptr + 2) /= '-'
1285 then
1286 Token := Tok_Colon_Equal;
1287 Error_Msg (":- should be :=", Scan_Ptr);
1288 Scan_Ptr := Scan_Ptr + 2;
1289 return;
1291 else
1292 Scan_Ptr := Scan_Ptr + 1;
1293 Token := Tok_Colon;
1294 if Style_Check then Style.Check_Colon; end if;
1295 return;
1296 end if;
1298 -- Left parenthesis
1300 when '(' =>
1301 Accumulate_Checksum ('(');
1302 Scan_Ptr := Scan_Ptr + 1;
1303 Token := Tok_Left_Paren;
1304 if Style_Check then Style.Check_Left_Paren; end if;
1305 return;
1307 -- Left bracket
1309 when '[' =>
1310 if Source (Scan_Ptr + 1) = '"' then
1311 goto Scan_Wide_Character;
1313 else
1314 Error_Msg_S ("illegal character, replaced by ""(""");
1315 Scan_Ptr := Scan_Ptr + 1;
1316 Token := Tok_Left_Paren;
1317 return;
1318 end if;
1320 -- Left brace
1322 when '{' =>
1323 Error_Msg_S ("illegal character, replaced by ""(""");
1324 Scan_Ptr := Scan_Ptr + 1;
1325 Token := Tok_Left_Paren;
1326 return;
1328 -- Comma
1330 when ',' =>
1331 Accumulate_Checksum (',');
1332 Scan_Ptr := Scan_Ptr + 1;
1333 Token := Tok_Comma;
1334 if Style_Check then Style.Check_Comma; end if;
1335 return;
1337 -- Dot, which is either an isolated period, or part of a double dot
1338 -- compound delimiter sequence. We also check for the case of a
1339 -- digit following the period, to give a better error message.
1341 when '.' =>
1342 Accumulate_Checksum ('.');
1344 if Double_Char_Token ('.') then
1345 Token := Tok_Dot_Dot;
1346 if Style_Check then Style.Check_Dot_Dot; end if;
1347 return;
1349 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1350 Error_Msg_S ("numeric literal cannot start with point");
1351 Scan_Ptr := Scan_Ptr + 1;
1353 else
1354 Scan_Ptr := Scan_Ptr + 1;
1355 Token := Tok_Dot;
1356 return;
1357 end if;
1359 -- Equal, which can either be an equality operator, or part of the
1360 -- arrow (=>) compound delimiter.
1362 when '=' =>
1363 Accumulate_Checksum ('=');
1365 if Double_Char_Token ('>') then
1366 Token := Tok_Arrow;
1367 if Style_Check then Style.Check_Arrow; end if;
1368 return;
1370 elsif Source (Scan_Ptr + 1) = '=' then
1371 Error_Msg_S ("== should be =");
1372 Scan_Ptr := Scan_Ptr + 1;
1373 end if;
1375 Scan_Ptr := Scan_Ptr + 1;
1376 Token := Tok_Equal;
1377 return;
1379 -- Greater than, which can be a greater than operator, greater than
1380 -- or equal operator, or first character of a right label bracket.
1382 when '>' =>
1383 Accumulate_Checksum ('>');
1385 if Double_Char_Token ('=') then
1386 Token := Tok_Greater_Equal;
1387 return;
1389 elsif Double_Char_Token ('>') then
1390 Token := Tok_Greater_Greater;
1391 return;
1393 else
1394 Scan_Ptr := Scan_Ptr + 1;
1395 Token := Tok_Greater;
1396 return;
1397 end if;
1399 -- Less than, which can be a less than operator, less than or equal
1400 -- operator, or the first character of a left label bracket, or the
1401 -- first character of a box (<>) compound delimiter.
1403 when '<' =>
1404 Accumulate_Checksum ('<');
1406 if Double_Char_Token ('=') then
1407 Token := Tok_Less_Equal;
1408 return;
1410 elsif Double_Char_Token ('>') then
1411 Token := Tok_Box;
1412 if Style_Check then Style.Check_Box; end if;
1413 return;
1415 elsif Double_Char_Token ('<') then
1416 Token := Tok_Less_Less;
1417 return;
1419 else
1420 Scan_Ptr := Scan_Ptr + 1;
1421 Token := Tok_Less;
1422 return;
1423 end if;
1425 -- Minus, which is either a subtraction operator, or the first
1426 -- character of double minus starting a comment
1428 when '-' => Minus_Case : begin
1429 if Source (Scan_Ptr + 1) = '>' then
1430 Error_Msg_S ("invalid token");
1431 Scan_Ptr := Scan_Ptr + 2;
1432 Token := Tok_Arrow;
1433 return;
1435 elsif Source (Scan_Ptr + 1) /= '-' then
1436 Accumulate_Checksum ('-');
1437 Scan_Ptr := Scan_Ptr + 1;
1438 Token := Tok_Minus;
1439 return;
1441 -- Comment
1443 else -- Source (Scan_Ptr + 1) = '-' then
1444 if Style_Check then Style.Check_Comment; end if;
1445 Scan_Ptr := Scan_Ptr + 2;
1446 Start_Of_Comment := Scan_Ptr;
1448 -- Loop to scan comment (this loop runs more than once only if
1449 -- a horizontal tab or other non-graphic character is scanned)
1451 loop
1452 -- Scan to non graphic character (opened up for speed)
1454 -- Note that we just eat left brackets, which means that
1455 -- bracket notation cannot be used for end of line
1456 -- characters in comments. This seems a reasonable choice,
1457 -- since no one would ever use brackets notation in a real
1458 -- program in this situation, and if we allow brackets
1459 -- notation, we forbid some valid comments which contain a
1460 -- brackets sequence that happens to match an end of line
1461 -- character.
1463 loop
1464 exit when Source (Scan_Ptr) not in Graphic_Character;
1465 Scan_Ptr := Scan_Ptr + 1;
1466 exit when Source (Scan_Ptr) not in Graphic_Character;
1467 Scan_Ptr := Scan_Ptr + 1;
1468 exit when Source (Scan_Ptr) not in Graphic_Character;
1469 Scan_Ptr := Scan_Ptr + 1;
1470 exit when Source (Scan_Ptr) not in Graphic_Character;
1471 Scan_Ptr := Scan_Ptr + 1;
1472 exit when Source (Scan_Ptr) not in Graphic_Character;
1473 Scan_Ptr := Scan_Ptr + 1;
1474 end loop;
1476 -- Keep going if horizontal tab
1478 if Source (Scan_Ptr) = HT then
1479 if Style_Check then Style.Check_HT; end if;
1480 Scan_Ptr := Scan_Ptr + 1;
1482 -- Terminate scan of comment if line terminator
1484 elsif Source (Scan_Ptr) in Line_Terminator then
1485 exit;
1487 -- Terminate scan of comment if end of file encountered
1488 -- (embedded EOF character or real last character in file)
1490 elsif Source (Scan_Ptr) = EOF then
1491 exit;
1493 -- If we have a wide character, we have to scan it out,
1494 -- because it might be a legitimate line terminator
1496 elsif (Source (Scan_Ptr) = ESC
1497 and then Identifier_Char (ESC))
1498 or else
1499 (Source (Scan_Ptr) in Upper_Half_Character
1500 and then Upper_Half_Encoding)
1501 then
1502 declare
1503 Wptr : constant Source_Ptr := Scan_Ptr;
1504 Code : Char_Code;
1505 Err : Boolean;
1507 begin
1508 Scan_Wide (Source, Scan_Ptr, Code, Err);
1510 -- If not well formed wide character, then just skip
1511 -- past it and ignore it.
1513 if Err then
1514 Scan_Ptr := Wptr + 1;
1516 -- If UTF_32 terminator, terminate comment scan
1518 elsif Is_UTF_32_Line_Terminator (Code) then
1519 Scan_Ptr := Wptr;
1520 exit;
1521 end if;
1522 end;
1524 -- Keep going if character in 80-FF range, or is ESC. These
1525 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1526 -- They are allowed even in Ada 83 mode according to the
1527 -- approved AI. ESC was added to the AI in June 93.
1529 elsif Source (Scan_Ptr) in Upper_Half_Character
1530 or else Source (Scan_Ptr) = ESC
1531 then
1532 Scan_Ptr := Scan_Ptr + 1;
1534 -- Otherwise we have an illegal comment character
1536 else
1537 Error_Illegal_Character;
1538 end if;
1539 end loop;
1541 -- Note that, except when comments are tokens, we do NOT
1542 -- execute a return here, instead we fall through to reexecute
1543 -- the scan loop to look for a token.
1545 if Comment_Is_Token then
1546 Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1547 Name_Buffer (1 .. Name_Len) :=
1548 String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1549 Comment_Id := Name_Find;
1550 Token := Tok_Comment;
1551 return;
1552 end if;
1553 end if;
1554 end Minus_Case;
1556 -- Double quote starting a string literal
1558 when '"' =>
1559 Slit;
1560 Post_Scan;
1561 return;
1563 -- Percent starting a string literal
1565 when '%' =>
1566 Obsolescent_Check (Token_Ptr);
1568 if Warn_On_Obsolescent_Feature then
1569 Error_Msg_S
1570 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1571 Error_Msg_S
1572 ("\use """""" instead?");
1573 end if;
1575 Slit;
1576 Post_Scan;
1577 return;
1579 -- Apostrophe. This can either be the start of a character literal,
1580 -- or an isolated apostrophe used in a qualified expression or an
1581 -- attribute. We treat it as a character literal if it does not
1582 -- follow a right parenthesis, identifier, the keyword ALL or
1583 -- a literal. This means that we correctly treat constructs like:
1585 -- A := CHARACTER'('A');
1587 -- Note that RM-2.2(7) does not require a separator between
1588 -- "CHARACTER" and "'" in the above.
1590 when ''' => Char_Literal_Case : declare
1591 Code : Char_Code;
1592 Err : Boolean;
1594 begin
1595 Accumulate_Checksum (''');
1596 Scan_Ptr := Scan_Ptr + 1;
1598 -- Here is where we make the test to distinguish the cases. Treat
1599 -- as apostrophe if previous token is an identifier, right paren
1600 -- or the reserved word "all" (latter case as in A.all'Address)
1601 -- (or the reserved word "project" in project files). Also treat
1602 -- it as apostrophe after a literal (this catches some legitimate
1603 -- cases, like A."abs"'Address, and also gives better error
1604 -- behavior for impossible cases like 123'xxx).
1606 if Prev_Token = Tok_Identifier
1607 or else Prev_Token = Tok_Right_Paren
1608 or else Prev_Token = Tok_All
1609 or else Prev_Token = Tok_Project
1610 or else Prev_Token in Token_Class_Literal
1611 then
1612 Token := Tok_Apostrophe;
1613 if Style_Check then Style.Check_Apostrophe; end if;
1614 return;
1616 -- Otherwise the apostrophe starts a character literal
1618 else
1619 -- Case of wide character literal
1621 if (Source (Scan_Ptr) = ESC
1622 and then
1623 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1624 or else
1625 (Source (Scan_Ptr) in Upper_Half_Character
1626 and then
1627 Upper_Half_Encoding)
1628 or else
1629 (Source (Scan_Ptr) = '['
1630 and then
1631 Source (Scan_Ptr + 1) = '"')
1632 then
1633 Wptr := Scan_Ptr;
1634 Scan_Wide (Source, Scan_Ptr, Code, Err);
1635 Accumulate_Checksum (Code);
1637 if Err then
1638 Error_Illegal_Wide_Character;
1639 Code := Character'Pos (' ');
1641 elsif Ada_Version >= Ada_05
1642 and then Is_UTF_32_Non_Graphic (Code)
1643 then
1644 Error_Msg
1645 ("(Ada 2005) non-graphic character not permitted " &
1646 "in character literal", Wptr);
1647 end if;
1649 if Source (Scan_Ptr) /= ''' then
1650 Error_Msg_S ("missing apostrophe");
1651 else
1652 Scan_Ptr := Scan_Ptr + 1;
1653 end if;
1655 -- If we do not find a closing quote in the expected place then
1656 -- assume that we have a misguided attempt at a string literal.
1658 -- However, if previous token is RANGE, then we return an
1659 -- apostrophe instead since this gives better error recovery
1661 elsif Source (Scan_Ptr + 1) /= ''' then
1662 if Prev_Token = Tok_Range then
1663 Token := Tok_Apostrophe;
1664 return;
1666 else
1667 Scan_Ptr := Scan_Ptr - 1;
1668 Error_Msg_S
1669 ("strings are delimited by double quote character");
1670 Slit;
1671 Post_Scan;
1672 return;
1673 end if;
1675 -- Otherwise we have a (non-wide) character literal
1677 else
1678 Accumulate_Checksum (Source (Scan_Ptr));
1680 if Source (Scan_Ptr) not in Graphic_Character then
1681 if Source (Scan_Ptr) in Upper_Half_Character then
1682 if Ada_Version = Ada_83 then
1683 Error_Illegal_Character;
1684 end if;
1686 else
1687 Error_Illegal_Character;
1688 end if;
1689 end if;
1691 Code := Get_Char_Code (Source (Scan_Ptr));
1692 Scan_Ptr := Scan_Ptr + 2;
1693 end if;
1695 -- Fall through here with Scan_Ptr updated past the closing
1696 -- quote, and Code set to the Char_Code value for the literal
1698 Accumulate_Checksum (''');
1699 Token := Tok_Char_Literal;
1700 Set_Character_Literal_Name (Code);
1701 Token_Name := Name_Find;
1702 Character_Code := Code;
1703 Post_Scan;
1704 return;
1705 end if;
1706 end Char_Literal_Case;
1708 -- Right parenthesis
1710 when ')' =>
1711 Accumulate_Checksum (')');
1712 Scan_Ptr := Scan_Ptr + 1;
1713 Token := Tok_Right_Paren;
1714 if Style_Check then Style.Check_Right_Paren; end if;
1715 return;
1717 -- Right bracket or right brace, treated as right paren
1719 when ']' | '}' =>
1720 Error_Msg_S ("illegal character, replaced by "")""");
1721 Scan_Ptr := Scan_Ptr + 1;
1722 Token := Tok_Right_Paren;
1723 return;
1725 -- Slash (can be division operator or first character of not equal)
1727 when '/' =>
1728 Accumulate_Checksum ('/');
1730 if Double_Char_Token ('=') then
1731 Token := Tok_Not_Equal;
1732 return;
1733 else
1734 Scan_Ptr := Scan_Ptr + 1;
1735 Token := Tok_Slash;
1736 return;
1737 end if;
1739 -- Semicolon
1741 when ';' =>
1742 Accumulate_Checksum (';');
1743 Scan_Ptr := Scan_Ptr + 1;
1744 Token := Tok_Semicolon;
1745 if Style_Check then Style.Check_Semicolon; end if;
1746 return;
1748 -- Vertical bar
1750 when '|' => Vertical_Bar_Case : begin
1751 Accumulate_Checksum ('|');
1753 -- Special check for || to give nice message
1755 if Source (Scan_Ptr + 1) = '|' then
1756 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1757 Scan_Ptr := Scan_Ptr + 2;
1758 Token := Tok_Or;
1759 return;
1761 else
1762 Scan_Ptr := Scan_Ptr + 1;
1763 Token := Tok_Vertical_Bar;
1764 if Style_Check then Style.Check_Vertical_Bar; end if;
1765 return;
1766 end if;
1767 end Vertical_Bar_Case;
1769 -- Exclamation, replacement character for vertical bar
1771 when '!' => Exclamation_Case : begin
1772 Accumulate_Checksum ('!');
1773 Obsolescent_Check (Token_Ptr);
1775 if Warn_On_Obsolescent_Feature then
1776 Error_Msg_S
1777 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1778 Error_Msg_S
1779 ("\use ""'|"" instead?");
1780 end if;
1782 if Source (Scan_Ptr + 1) = '=' then
1783 Error_Msg_S ("'!= should be /=");
1784 Scan_Ptr := Scan_Ptr + 2;
1785 Token := Tok_Not_Equal;
1786 return;
1788 else
1789 Scan_Ptr := Scan_Ptr + 1;
1790 Token := Tok_Vertical_Bar;
1791 return;
1792 end if;
1793 end Exclamation_Case;
1795 -- Plus
1797 when '+' => Plus_Case : begin
1798 Accumulate_Checksum ('+');
1799 Scan_Ptr := Scan_Ptr + 1;
1800 Token := Tok_Plus;
1801 return;
1802 end Plus_Case;
1804 -- Digits starting a numeric literal
1806 when '0' .. '9' =>
1807 Nlit;
1809 if Identifier_Char (Source (Scan_Ptr)) then
1810 Error_Msg_S
1811 ("delimiter required between literal and identifier");
1812 end if;
1813 Post_Scan;
1814 return;
1816 -- Lower case letters
1818 when 'a' .. 'z' =>
1819 Name_Len := 1;
1820 Underline_Found := False;
1821 Name_Buffer (1) := Source (Scan_Ptr);
1822 Accumulate_Checksum (Name_Buffer (1));
1823 Scan_Ptr := Scan_Ptr + 1;
1824 goto Scan_Identifier;
1826 -- Upper case letters
1828 when 'A' .. 'Z' =>
1829 Name_Len := 1;
1830 Underline_Found := False;
1831 Name_Buffer (1) :=
1832 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1833 Accumulate_Checksum (Name_Buffer (1));
1834 Scan_Ptr := Scan_Ptr + 1;
1835 goto Scan_Identifier;
1837 -- Underline character
1839 when '_' =>
1840 if Special_Characters ('_') then
1841 Token_Ptr := Scan_Ptr;
1842 Scan_Ptr := Scan_Ptr + 1;
1843 Token := Tok_Special;
1844 Special_Character := '_';
1845 return;
1846 end if;
1848 Error_Msg_S ("identifier cannot start with underline");
1849 Name_Len := 1;
1850 Name_Buffer (1) := '_';
1851 Scan_Ptr := Scan_Ptr + 1;
1852 Underline_Found := False;
1853 goto Scan_Identifier;
1855 -- Space (not possible, because we scanned past blanks)
1857 when ' ' =>
1858 raise Program_Error;
1860 -- Characters in top half of ASCII 8-bit chart
1862 when Upper_Half_Character =>
1864 -- Wide character case
1866 if Upper_Half_Encoding then
1867 goto Scan_Wide_Character;
1869 -- Otherwise we have OK Latin-1 character
1871 else
1872 -- Upper half characters may possibly be identifier letters
1873 -- but can never be digits, so Identifier_Char can be used to
1874 -- test for a valid start of identifier character.
1876 if Identifier_Char (Source (Scan_Ptr)) then
1877 Name_Len := 0;
1878 Underline_Found := False;
1879 goto Scan_Identifier;
1880 else
1881 Error_Illegal_Character;
1882 end if;
1883 end if;
1885 when ESC =>
1887 -- ESC character, possible start of identifier if wide characters
1888 -- using ESC encoding are allowed in identifiers, which we can
1889 -- tell by looking at the Identifier_Char flag for ESC, which is
1890 -- only true if these conditions are met. In Ada 2005 mode, may
1891 -- also be valid UTF_32 space or line terminator character.
1893 if Identifier_Char (ESC) then
1894 Name_Len := 0;
1895 goto Scan_Wide_Character;
1896 else
1897 Error_Illegal_Character;
1898 end if;
1900 -- Invalid control characters
1902 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
1903 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1904 EM | FS | GS | RS | US | DEL
1906 Error_Illegal_Character;
1908 -- Invalid graphic characters
1910 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1912 -- If Set_Special_Character has been called for this character,
1913 -- set Scans.Special_Character and return a Special token.
1915 if Special_Characters (Source (Scan_Ptr)) then
1916 Token_Ptr := Scan_Ptr;
1917 Token := Tok_Special;
1918 Special_Character := Source (Scan_Ptr);
1919 Scan_Ptr := Scan_Ptr + 1;
1920 return;
1922 -- Otherwise, this is an illegal character
1924 else
1925 Error_Illegal_Character;
1926 end if;
1928 -- End switch on non-blank character
1930 end case;
1932 -- End loop past format effectors. The exit from this loop is by
1933 -- executing a return statement following completion of token scan
1934 -- (control never falls out of this loop to the code which follows)
1936 end loop;
1938 -- Wide_Character scanning routine. On entry we have encountered the
1939 -- initial character of a wide character sequence.
1941 <<Scan_Wide_Character>>
1943 declare
1944 Code : Char_Code;
1945 Err : Boolean;
1947 begin
1948 Wptr := Scan_Ptr;
1949 Scan_Wide (Source, Scan_Ptr, Code, Err);
1951 -- If bad wide character, signal error and continue scan
1953 if Err then
1954 Error_Illegal_Wide_Character;
1955 goto Scan_Next_Character;
1957 -- If OK letter, reset scan ptr and go scan identifier
1959 elsif Is_UTF_32_Letter (Code) then
1960 Scan_Ptr := Wptr;
1961 Name_Len := 0;
1962 Underline_Found := False;
1963 goto Scan_Identifier;
1965 -- If OK wide space, ignore and keep scanning (we do not include
1966 -- any ignored spaces in checksum)
1968 elsif Is_UTF_32_Space (Code) then
1969 goto Scan_Next_Character;
1971 -- If OK wide line terminator, terminate current line
1973 elsif Is_UTF_32_Line_Terminator (Code) then
1974 Scan_Ptr := Wptr;
1975 goto Scan_Line_Terminator;
1977 -- Punctuation is an error (at start of identifier)
1979 elsif Is_UTF_32_Punctuation (Code) then
1980 Error_Msg
1981 ("identifier cannot start with punctuation", Wptr);
1982 Scan_Ptr := Wptr;
1983 Name_Len := 0;
1984 Underline_Found := False;
1985 goto Scan_Identifier;
1987 -- Mark character is an error (at start of identifer)
1989 elsif Is_UTF_32_Mark (Code) then
1990 Error_Msg
1991 ("identifier cannot start with mark character", Wptr);
1992 Scan_Ptr := Wptr;
1993 Name_Len := 0;
1994 Underline_Found := False;
1995 goto Scan_Identifier;
1997 -- Other format character is an error (at start of identifer)
1999 elsif Is_UTF_32_Other (Code) then
2000 Error_Msg
2001 ("identifier cannot start with other format character", Wptr);
2002 Scan_Ptr := Wptr;
2003 Name_Len := 0;
2004 Underline_Found := False;
2005 goto Scan_Identifier;
2007 -- Extended digit character is an error. Could be bad start of
2008 -- identifier or bad literal. Not worth doing too much to try to
2009 -- distinguish these cases, but we will do a little bit.
2011 elsif Is_UTF_32_Digit (Code) then
2012 Error_Msg
2013 ("identifier cannot start with digit character", Wptr);
2014 Scan_Ptr := Wptr;
2015 Name_Len := 0;
2016 Underline_Found := False;
2017 goto Scan_Identifier;
2019 -- All other wide characters are illegal here
2021 else
2022 Error_Illegal_Wide_Character;
2023 goto Scan_Next_Character;
2024 end if;
2025 end;
2027 -- Routine to scan line terminator. On entry Scan_Ptr points to a
2028 -- character which is one of FF,LR,CR,VT, or one of the wide characters
2029 -- that is treated as a line termiantor.
2031 <<Scan_Line_Terminator>>
2033 -- Check line too long
2035 Check_End_Of_Line;
2037 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2038 -- a physical line.
2040 if End_Of_Line_Is_Token then
2041 Token_Ptr := Scan_Ptr;
2042 end if;
2044 declare
2045 Physical : Boolean;
2047 begin
2048 Skip_Line_Terminators (Scan_Ptr, Physical);
2050 -- If we are at start of physical line, update scan pointers to
2051 -- reflect the start of the new line.
2053 if Physical then
2054 Current_Line_Start := Scan_Ptr;
2055 Start_Column := Set_Start_Column;
2056 First_Non_Blank_Location := Scan_Ptr;
2058 -- If End_Of_Line is a token, we return it as it is a
2059 -- physical line.
2061 if End_Of_Line_Is_Token then
2062 Token := Tok_End_Of_Line;
2063 return;
2064 end if;
2065 end if;
2066 end;
2068 goto Scan_Next_Character;
2070 -- Identifier scanning routine. On entry, some initial characters of
2071 -- the identifier may have already been stored in Name_Buffer. If so,
2072 -- Name_Len has the number of characters stored. otherwise Name_Len is
2073 -- set to zero on entry. Underline_Found is also set False on entry.
2075 <<Scan_Identifier>>
2077 -- This loop scans as fast as possible past lower half letters and
2078 -- digits, which we expect to be the most common characters.
2080 loop
2081 if Source (Scan_Ptr) in 'a' .. 'z'
2082 or else Source (Scan_Ptr) in '0' .. '9'
2083 then
2084 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2085 Accumulate_Checksum (Source (Scan_Ptr));
2087 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2088 Name_Buffer (Name_Len + 1) :=
2089 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2090 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2092 else
2093 exit;
2094 end if;
2096 Underline_Found := False;
2097 Scan_Ptr := Scan_Ptr + 1;
2098 Name_Len := Name_Len + 1;
2099 end loop;
2101 -- If we fall through, then we have encountered either an underline
2102 -- character, or an extended identifier character (i.e. one from the
2103 -- upper half), or a wide character, or an identifier terminator. The
2104 -- initial test speeds us up in the most common case where we have
2105 -- an identifier terminator. Note that ESC is an identifier character
2106 -- only if a wide character encoding method that uses ESC encoding
2107 -- is active, so if we find an ESC character we know that we have a
2108 -- wide character.
2110 if Identifier_Char (Source (Scan_Ptr)) then
2112 -- Case of underline
2114 if Source (Scan_Ptr) = '_' then
2115 Accumulate_Checksum ('_');
2117 if Underline_Found then
2118 Error_No_Double_Underline;
2119 else
2120 Underline_Found := True;
2121 Name_Len := Name_Len + 1;
2122 Name_Buffer (Name_Len) := '_';
2123 end if;
2125 Scan_Ptr := Scan_Ptr + 1;
2126 goto Scan_Identifier;
2128 -- Upper half character
2130 elsif Source (Scan_Ptr) in Upper_Half_Character
2131 and then not Upper_Half_Encoding
2132 then
2133 Accumulate_Checksum (Source (Scan_Ptr));
2134 Store_Encoded_Character
2135 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2136 Scan_Ptr := Scan_Ptr + 1;
2137 Underline_Found := False;
2138 goto Scan_Identifier;
2140 -- Left bracket not followed by a quote terminates an identifier.
2141 -- This is an error, but we don't want to give a junk error msg
2142 -- about wide characters in this case!
2144 elsif Source (Scan_Ptr) = '['
2145 and then Source (Scan_Ptr + 1) /= '"'
2146 then
2147 null;
2149 -- We know we have a wide character encoding here (the current
2150 -- character is either ESC, left bracket, or an upper half
2151 -- character depending on the encoding method).
2153 else
2154 -- Scan out the wide character and insert the appropriate
2155 -- encoding into the name table entry for the identifier.
2157 declare
2158 Code : Char_Code;
2159 Err : Boolean;
2160 Chr : Character;
2162 begin
2163 Wptr := Scan_Ptr;
2164 Scan_Wide (Source, Scan_Ptr, Code, Err);
2166 -- If error, signal error
2168 if Err then
2169 Error_Illegal_Wide_Character;
2171 -- If the character scanned is a normal identifier
2172 -- character, then we treat it that way.
2174 elsif In_Character_Range (Code)
2175 and then Identifier_Char (Get_Character (Code))
2176 then
2177 Chr := Get_Character (Code);
2178 Accumulate_Checksum (Chr);
2179 Store_Encoded_Character
2180 (Get_Char_Code (Fold_Lower (Chr)));
2181 Underline_Found := False;
2183 -- Here if not a normal identifier character
2185 else
2186 -- Make sure we are allowing wide characters in
2187 -- identifiers. Note that we allow wide character
2188 -- notation for an OK identifier character. This in
2189 -- particular allows bracket or other notation to be
2190 -- used for upper half letters.
2192 -- Wide characters are always allowed in Ada 2005
2194 if Identifier_Character_Set /= 'w'
2195 and then Ada_Version < Ada_05
2196 then
2197 Error_Msg
2198 ("wide character not allowed in identifier", Wptr);
2199 end if;
2201 -- If OK letter, store it folding to upper case. Note
2202 -- that we include the folded letter in the checksum.
2204 if Is_UTF_32_Letter (Code) then
2205 Code := UTF_32_To_Upper_Case (Code);
2206 Accumulate_Checksum (Code);
2207 Store_Encoded_Character (Code);
2208 Underline_Found := False;
2210 -- If OK extended digit or mark, then store it
2212 elsif Is_UTF_32_Digit (Code)
2213 or else Is_UTF_32_Mark (Code)
2214 then
2215 Accumulate_Checksum (Code);
2216 Store_Encoded_Character (Code);
2217 Underline_Found := False;
2219 -- Wide punctuation is also stored, but counts as an
2220 -- underline character for error checking purposes.
2222 elsif Is_UTF_32_Punctuation (Code) then
2223 Accumulate_Checksum (Code);
2225 if Underline_Found then
2226 declare
2227 Cend : constant Source_Ptr := Scan_Ptr;
2228 begin
2229 Scan_Ptr := Wptr;
2230 Error_No_Double_Underline;
2231 Scan_Ptr := Cend;
2232 end;
2234 else
2235 Store_Encoded_Character (Code);
2236 Underline_Found := True;
2237 end if;
2239 -- Wide character in Unicode cateogory "Other, Format"
2240 -- is accepted in an identifier, but is ignored and not
2241 -- stored. It seems reasonable to exclude it from the
2242 -- checksum.
2244 elsif Is_UTF_32_Other (Code) then
2245 null;
2247 -- Wide character in category Separator,Space terminates
2249 elsif Is_UTF_32_Space (Code) then
2250 goto Scan_Identifier_Complete;
2252 -- Any other wide character is not acceptable
2254 else
2255 Error_Msg
2256 ("invalid wide character in identifier", Wptr);
2257 end if;
2258 end if;
2260 goto Scan_Identifier;
2261 end;
2262 end if;
2263 end if;
2265 -- Scan of identifier is complete. The identifier is stored in
2266 -- Name_Buffer, and Scan_Ptr points past the last character.
2268 <<Scan_Identifier_Complete>>
2269 Token_Name := Name_Find;
2271 -- Check for identifier ending with underline or punctuation char
2273 if Underline_Found then
2274 Underline_Found := False;
2276 if Source (Scan_Ptr - 1) = '_' then
2277 Error_Msg
2278 ("identifier cannot end with underline", Scan_Ptr - 1);
2279 else
2280 Error_Msg
2281 ("identifier cannot end with punctuation character", Wptr);
2282 end if;
2283 end if;
2285 -- Here is where we check if it was a keyword
2287 if Get_Name_Table_Byte (Token_Name) /= 0
2288 and then (Ada_Version >= Ada_95
2289 or else Token_Name not in Ada_95_Reserved_Words)
2290 and then (Ada_Version >= Ada_05
2291 or else Token_Name not in Ada_2005_Reserved_Words)
2292 then
2293 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2295 -- Deal with possible style check for non-lower case keyword, but
2296 -- we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2297 -- this purpose if they appear as attribute designators. Actually
2298 -- we only check the first character for speed.
2300 -- Ada 2005 (AI-284): Do not apply the style check in case of
2301 -- "pragma Interface"
2303 if Style_Check
2304 and then Source (Token_Ptr) <= 'Z'
2305 and then (Prev_Token /= Tok_Apostrophe
2306 or else
2307 (Token /= Tok_Access
2308 and then Token /= Tok_Delta
2309 and then Token /= Tok_Digits
2310 and then Token /= Tok_Range))
2311 and then (Token /= Tok_Interface
2312 or else
2313 (Token = Tok_Interface
2314 and then Prev_Token /= Tok_Pragma))
2315 then
2316 Style.Non_Lower_Case_Keyword;
2317 end if;
2319 -- We must reset Token_Name since this is not an identifier and
2320 -- if we leave Token_Name set, the parser gets confused because
2321 -- it thinks it is dealing with an identifier instead of the
2322 -- corresponding keyword.
2324 Token_Name := No_Name;
2325 Accumulate_Token_Checksum;
2326 return;
2328 -- It is an identifier after all
2330 else
2331 Token := Tok_Identifier;
2332 Accumulate_Token_Checksum;
2333 Post_Scan;
2334 return;
2335 end if;
2336 end Scan;
2338 --------------------------
2339 -- Set_Comment_As_Token --
2340 --------------------------
2342 procedure Set_Comment_As_Token (Value : Boolean) is
2343 begin
2344 Comment_Is_Token := Value;
2345 end Set_Comment_As_Token;
2347 ------------------------------
2348 -- Set_End_Of_Line_As_Token --
2349 ------------------------------
2351 procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2352 begin
2353 End_Of_Line_Is_Token := Value;
2354 end Set_End_Of_Line_As_Token;
2356 ---------------------------
2357 -- Set_Special_Character --
2358 ---------------------------
2360 procedure Set_Special_Character (C : Character) is
2361 begin
2362 case C is
2363 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2364 Special_Characters (C) := True;
2366 when others =>
2367 null;
2368 end case;
2369 end Set_Special_Character;
2371 ----------------------
2372 -- Set_Start_Column --
2373 ----------------------
2375 -- Note: it seems at first glance a little expensive to compute this value
2376 -- for every source line (since it is certainly not used for all source
2377 -- lines). On the other hand, it doesn't take much more work to skip past
2378 -- the initial white space on the line counting the columns than it would
2379 -- to scan past the white space using the standard scanning circuits.
2381 function Set_Start_Column return Column_Number is
2382 Start_Column : Column_Number := 0;
2384 begin
2385 -- Outer loop scans past horizontal tab characters
2387 Tabs_Loop : loop
2389 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2390 -- past the blanks and adjusting Start_Column to account for them.
2392 Blanks_Loop : loop
2393 if Source (Scan_Ptr) = ' ' then
2394 if Source (Scan_Ptr + 1) = ' ' then
2395 if Source (Scan_Ptr + 2) = ' ' then
2396 if Source (Scan_Ptr + 3) = ' ' then
2397 if Source (Scan_Ptr + 4) = ' ' then
2398 if Source (Scan_Ptr + 5) = ' ' then
2399 if Source (Scan_Ptr + 6) = ' ' then
2400 Scan_Ptr := Scan_Ptr + 7;
2401 Start_Column := Start_Column + 7;
2402 else
2403 Scan_Ptr := Scan_Ptr + 6;
2404 Start_Column := Start_Column + 6;
2405 exit Blanks_Loop;
2406 end if;
2407 else
2408 Scan_Ptr := Scan_Ptr + 5;
2409 Start_Column := Start_Column + 5;
2410 exit Blanks_Loop;
2411 end if;
2412 else
2413 Scan_Ptr := Scan_Ptr + 4;
2414 Start_Column := Start_Column + 4;
2415 exit Blanks_Loop;
2416 end if;
2417 else
2418 Scan_Ptr := Scan_Ptr + 3;
2419 Start_Column := Start_Column + 3;
2420 exit Blanks_Loop;
2421 end if;
2422 else
2423 Scan_Ptr := Scan_Ptr + 2;
2424 Start_Column := Start_Column + 2;
2425 exit Blanks_Loop;
2426 end if;
2427 else
2428 Scan_Ptr := Scan_Ptr + 1;
2429 Start_Column := Start_Column + 1;
2430 exit Blanks_Loop;
2431 end if;
2432 else
2433 exit Blanks_Loop;
2434 end if;
2435 end loop Blanks_Loop;
2437 -- Outer loop keeps going only if a horizontal tab follows
2439 if Source (Scan_Ptr) = HT then
2440 if Style_Check then Style.Check_HT; end if;
2441 Scan_Ptr := Scan_Ptr + 1;
2442 Start_Column := (Start_Column / 8) * 8 + 8;
2443 else
2444 exit Tabs_Loop;
2445 end if;
2447 end loop Tabs_Loop;
2449 return Start_Column;
2450 end Set_Start_Column;
2452 end Scng;