2003-11-27 Guilhem Lavaux <guilhem@kaffe.org>
[official-gcc.git] / gcc / ada / scng.adb
blob369a6acc944ed9ae7bbd78e733e9e567716bbfff
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S C N G --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1992-2003 Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
21 -- --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
24 -- --
25 ------------------------------------------------------------------------------
27 with Csets; use Csets;
28 with Err_Vars; use Err_Vars;
29 with Hostparm; use Hostparm;
30 with Namet; use Namet;
31 with Opt; use Opt;
32 with Scans; use Scans;
33 with Sinput; use Sinput;
34 with Snames; use Snames;
35 with Stringt; use Stringt;
36 with Stylesw; use Stylesw;
37 with Uintp; use Uintp;
38 with Urealp; use Urealp;
39 with Widechar; use Widechar;
41 with System.CRC32;
42 with System.WCh_Con; use System.WCh_Con;
44 package body Scng is
46 use ASCII;
47 -- Make control characters visible
49 Special_Characters : array (Character) of Boolean := (others => False);
50 -- For characters that are Special token, the value is True
52 End_Of_Line_Is_Token : Boolean := False;
53 -- True if End_Of_Line is a token
55 -----------------------
56 -- Local Subprograms --
57 -----------------------
59 procedure Accumulate_Checksum (C : Character);
60 pragma Inline (Accumulate_Checksum);
61 -- This routine accumulates the checksum given character C. During the
62 -- scanning of a source file, this routine is called with every character
63 -- in the source, excluding blanks, and all control characters (except
64 -- that ESC is included in the checksum). Upper case letters not in string
65 -- literals are folded by the caller. See Sinput spec for the documentation
66 -- of the checksum algorithm. Note: checksum values are only used if we
67 -- generate code, so it is not necessary to worry about making the right
68 -- sequence of calls in any error situation.
70 procedure Accumulate_Checksum (C : Char_Code);
71 pragma Inline (Accumulate_Checksum);
72 -- This version is identical, except that the argument, C, is a character
73 -- code value instead of a character. This is used when wide characters
74 -- are scanned. We use the character code rather than the ASCII characters
75 -- so that the checksum is independent of wide character encoding method.
77 procedure Initialize_Checksum;
78 pragma Inline (Initialize_Checksum);
79 -- Initialize checksum value
81 -------------------------
82 -- Accumulate_Checksum --
83 -------------------------
85 procedure Accumulate_Checksum (C : Character) is
86 begin
87 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
88 end Accumulate_Checksum;
90 procedure Accumulate_Checksum (C : Char_Code) is
91 begin
92 Accumulate_Checksum (Character'Val (C / 256));
93 Accumulate_Checksum (Character'Val (C mod 256));
94 end Accumulate_Checksum;
96 ----------------------------
97 -- Determine_Token_Casing --
98 ----------------------------
100 function Determine_Token_Casing return Casing_Type is
101 begin
102 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
103 end Determine_Token_Casing;
105 -------------------------
106 -- Initialize_Checksum --
107 -------------------------
109 procedure Initialize_Checksum is
110 begin
111 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
112 end Initialize_Checksum;
114 ------------------------
115 -- Initialize_Scanner --
116 ------------------------
118 procedure Initialize_Scanner
119 (Unit : Unit_Number_Type;
120 Index : Source_File_Index)
122 begin
123 -- Set up Token_Type values in Names Table entries for reserved keywords
124 -- We use the Pos value of the Token_Type value. Note we are relying on
125 -- the fact that Token_Type'Val (0) is not a reserved word!
127 Set_Name_Table_Byte (Name_Abort, Token_Type'Pos (Tok_Abort));
128 Set_Name_Table_Byte (Name_Abs, Token_Type'Pos (Tok_Abs));
129 Set_Name_Table_Byte (Name_Abstract, Token_Type'Pos (Tok_Abstract));
130 Set_Name_Table_Byte (Name_Accept, Token_Type'Pos (Tok_Accept));
131 Set_Name_Table_Byte (Name_Access, Token_Type'Pos (Tok_Access));
132 Set_Name_Table_Byte (Name_And, Token_Type'Pos (Tok_And));
133 Set_Name_Table_Byte (Name_Aliased, Token_Type'Pos (Tok_Aliased));
134 Set_Name_Table_Byte (Name_All, Token_Type'Pos (Tok_All));
135 Set_Name_Table_Byte (Name_Array, Token_Type'Pos (Tok_Array));
136 Set_Name_Table_Byte (Name_At, Token_Type'Pos (Tok_At));
137 Set_Name_Table_Byte (Name_Begin, Token_Type'Pos (Tok_Begin));
138 Set_Name_Table_Byte (Name_Body, Token_Type'Pos (Tok_Body));
139 Set_Name_Table_Byte (Name_Case, Token_Type'Pos (Tok_Case));
140 Set_Name_Table_Byte (Name_Constant, Token_Type'Pos (Tok_Constant));
141 Set_Name_Table_Byte (Name_Declare, Token_Type'Pos (Tok_Declare));
142 Set_Name_Table_Byte (Name_Delay, Token_Type'Pos (Tok_Delay));
143 Set_Name_Table_Byte (Name_Delta, Token_Type'Pos (Tok_Delta));
144 Set_Name_Table_Byte (Name_Digits, Token_Type'Pos (Tok_Digits));
145 Set_Name_Table_Byte (Name_Do, Token_Type'Pos (Tok_Do));
146 Set_Name_Table_Byte (Name_Else, Token_Type'Pos (Tok_Else));
147 Set_Name_Table_Byte (Name_Elsif, Token_Type'Pos (Tok_Elsif));
148 Set_Name_Table_Byte (Name_End, Token_Type'Pos (Tok_End));
149 Set_Name_Table_Byte (Name_Entry, Token_Type'Pos (Tok_Entry));
150 Set_Name_Table_Byte (Name_Exception, Token_Type'Pos (Tok_Exception));
151 Set_Name_Table_Byte (Name_Exit, Token_Type'Pos (Tok_Exit));
152 Set_Name_Table_Byte (Name_For, Token_Type'Pos (Tok_For));
153 Set_Name_Table_Byte (Name_Function, Token_Type'Pos (Tok_Function));
154 Set_Name_Table_Byte (Name_Generic, Token_Type'Pos (Tok_Generic));
155 Set_Name_Table_Byte (Name_Goto, Token_Type'Pos (Tok_Goto));
156 Set_Name_Table_Byte (Name_If, Token_Type'Pos (Tok_If));
157 Set_Name_Table_Byte (Name_In, Token_Type'Pos (Tok_In));
158 Set_Name_Table_Byte (Name_Is, Token_Type'Pos (Tok_Is));
159 Set_Name_Table_Byte (Name_Limited, Token_Type'Pos (Tok_Limited));
160 Set_Name_Table_Byte (Name_Loop, Token_Type'Pos (Tok_Loop));
161 Set_Name_Table_Byte (Name_Mod, Token_Type'Pos (Tok_Mod));
162 Set_Name_Table_Byte (Name_New, Token_Type'Pos (Tok_New));
163 Set_Name_Table_Byte (Name_Not, Token_Type'Pos (Tok_Not));
164 Set_Name_Table_Byte (Name_Null, Token_Type'Pos (Tok_Null));
165 Set_Name_Table_Byte (Name_Of, Token_Type'Pos (Tok_Of));
166 Set_Name_Table_Byte (Name_Or, Token_Type'Pos (Tok_Or));
167 Set_Name_Table_Byte (Name_Others, Token_Type'Pos (Tok_Others));
168 Set_Name_Table_Byte (Name_Out, Token_Type'Pos (Tok_Out));
169 Set_Name_Table_Byte (Name_Package, Token_Type'Pos (Tok_Package));
170 Set_Name_Table_Byte (Name_Pragma, Token_Type'Pos (Tok_Pragma));
171 Set_Name_Table_Byte (Name_Private, Token_Type'Pos (Tok_Private));
172 Set_Name_Table_Byte (Name_Procedure, Token_Type'Pos (Tok_Procedure));
173 Set_Name_Table_Byte (Name_Protected, Token_Type'Pos (Tok_Protected));
174 Set_Name_Table_Byte (Name_Raise, Token_Type'Pos (Tok_Raise));
175 Set_Name_Table_Byte (Name_Range, Token_Type'Pos (Tok_Range));
176 Set_Name_Table_Byte (Name_Record, Token_Type'Pos (Tok_Record));
177 Set_Name_Table_Byte (Name_Rem, Token_Type'Pos (Tok_Rem));
178 Set_Name_Table_Byte (Name_Renames, Token_Type'Pos (Tok_Renames));
179 Set_Name_Table_Byte (Name_Requeue, Token_Type'Pos (Tok_Requeue));
180 Set_Name_Table_Byte (Name_Return, Token_Type'Pos (Tok_Return));
181 Set_Name_Table_Byte (Name_Reverse, Token_Type'Pos (Tok_Reverse));
182 Set_Name_Table_Byte (Name_Select, Token_Type'Pos (Tok_Select));
183 Set_Name_Table_Byte (Name_Separate, Token_Type'Pos (Tok_Separate));
184 Set_Name_Table_Byte (Name_Subtype, Token_Type'Pos (Tok_Subtype));
185 Set_Name_Table_Byte (Name_Tagged, Token_Type'Pos (Tok_Tagged));
186 Set_Name_Table_Byte (Name_Task, Token_Type'Pos (Tok_Task));
187 Set_Name_Table_Byte (Name_Terminate, Token_Type'Pos (Tok_Terminate));
188 Set_Name_Table_Byte (Name_Then, Token_Type'Pos (Tok_Then));
189 Set_Name_Table_Byte (Name_Type, Token_Type'Pos (Tok_Type));
190 Set_Name_Table_Byte (Name_Until, Token_Type'Pos (Tok_Until));
191 Set_Name_Table_Byte (Name_Use, Token_Type'Pos (Tok_Use));
192 Set_Name_Table_Byte (Name_When, Token_Type'Pos (Tok_When));
193 Set_Name_Table_Byte (Name_While, Token_Type'Pos (Tok_While));
194 Set_Name_Table_Byte (Name_With, Token_Type'Pos (Tok_With));
195 Set_Name_Table_Byte (Name_Xor, Token_Type'Pos (Tok_Xor));
197 -- Initialize scan control variables
199 Current_Source_File := Index;
200 Source := Source_Text (Current_Source_File);
201 Current_Source_Unit := Unit;
202 Scan_Ptr := Source_First (Current_Source_File);
203 Token := No_Token;
204 Token_Ptr := Scan_Ptr;
205 Current_Line_Start := Scan_Ptr;
206 Token_Node := Empty;
207 Token_Name := No_Name;
208 Start_Column := Set_Start_Column;
209 First_Non_Blank_Location := Scan_Ptr;
211 Initialize_Checksum;
213 -- Do not call Scan, otherwise the License stuff does not work in Scn.
215 end Initialize_Scanner;
217 ------------------------------
218 -- Reset_Special_Characters --
219 ------------------------------
221 procedure Reset_Special_Characters is
222 begin
223 Special_Characters := (others => False);
224 end Reset_Special_Characters;
226 ----------
227 -- Scan --
228 ----------
230 procedure Scan is
232 procedure Check_End_Of_Line;
233 -- Called when end of line encountered. Checks that line is not
234 -- too long, and that other style checks for the end of line are met.
236 function Double_Char_Token (C : Character) return Boolean;
237 -- This function is used for double character tokens like := or <>. It
238 -- checks if the character following Source (Scan_Ptr) is C, and if so
239 -- bumps Scan_Ptr past the pair of characters and returns True. A space
240 -- between the two characters is also recognized with an appropriate
241 -- error message being issued. If C is not present, False is returned.
242 -- Note that Double_Char_Token can only be used for tokens defined in
243 -- the Ada syntax (it's use for error cases like && is not appropriate
244 -- since we do not want a junk message for a case like &-space-&).
246 procedure Error_Illegal_Character;
247 -- Give illegal character error, Scan_Ptr points to character.
248 -- On return, Scan_Ptr is bumped past the illegal character.
250 procedure Error_Illegal_Wide_Character;
251 -- Give illegal wide character message. On return, Scan_Ptr is bumped
252 -- past the illegal character, which may still leave us pointing to
253 -- junk, not much we can do if the escape sequence is messed up!
255 procedure Error_Long_Line;
256 -- Signal error of excessively long line
258 procedure Error_No_Double_Underline;
259 -- Signal error of double underline character
261 procedure Nlit;
262 -- This is the procedure for scanning out numeric literals. On entry,
263 -- Scan_Ptr points to the digit that starts the numeric literal (the
264 -- checksum for this character has not been accumulated yet). On return
265 -- Scan_Ptr points past the last character of the numeric literal, Token
266 -- and Token_Node are set appropriately, and the checksum is updated.
268 procedure Slit;
269 -- This is the procedure for scanning out string literals. On entry,
270 -- Scan_Ptr points to the opening string quote (the checksum for this
271 -- character has not been accumulated yet). On return Scan_Ptr points
272 -- past the closing quote of the string literal, Token and Token_Node
273 -- are set appropriately, and the checksum is upated.
275 -----------------------
276 -- Check_End_Of_Line --
277 -----------------------
279 procedure Check_End_Of_Line is
280 Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
282 begin
283 if Style_Check and Style_Check_Max_Line_Length then
284 Style.Check_Line_Terminator (Len);
286 elsif Len > Hostparm.Max_Line_Length then
287 Error_Long_Line;
288 end if;
289 end Check_End_Of_Line;
291 -----------------------
292 -- Double_Char_Token --
293 -----------------------
295 function Double_Char_Token (C : Character) return Boolean is
296 begin
297 if Source (Scan_Ptr + 1) = C then
298 Accumulate_Checksum (C);
299 Scan_Ptr := Scan_Ptr + 2;
300 return True;
302 elsif Source (Scan_Ptr + 1) = ' '
303 and then Source (Scan_Ptr + 2) = C
304 then
305 Scan_Ptr := Scan_Ptr + 1;
306 Error_Msg_S ("no space allowed here");
307 Scan_Ptr := Scan_Ptr + 2;
308 return True;
310 else
311 return False;
312 end if;
313 end Double_Char_Token;
315 -----------------------------
316 -- Error_Illegal_Character --
317 -----------------------------
319 procedure Error_Illegal_Character is
320 begin
321 Error_Msg_S ("illegal character");
322 Scan_Ptr := Scan_Ptr + 1;
323 end Error_Illegal_Character;
325 ----------------------------------
326 -- Error_Illegal_Wide_Character --
327 ----------------------------------
329 procedure Error_Illegal_Wide_Character is
330 begin
331 if OpenVMS then
332 Error_Msg_S
333 ("illegal wide character, check " &
334 "'/'W'I'D'E'_'C'H'A'R'A'C'T'E'R'_'E'N'C'O'D'I'N'G qualifier");
335 else
336 Error_Msg_S
337 ("illegal wide character, check -gnatW switch");
338 end if;
340 Scan_Ptr := Scan_Ptr + 1;
341 end Error_Illegal_Wide_Character;
343 ---------------------
344 -- Error_Long_Line --
345 ---------------------
347 procedure Error_Long_Line is
348 begin
349 Error_Msg
350 ("this line is too long",
351 Current_Line_Start + Hostparm.Max_Line_Length);
352 end Error_Long_Line;
354 -------------------------------
355 -- Error_No_Double_Underline --
356 -------------------------------
358 procedure Error_No_Double_Underline is
359 begin
360 Error_Msg_S ("two consecutive underlines not permitted");
361 end Error_No_Double_Underline;
363 ----------
364 -- Nlit --
365 ----------
367 procedure Nlit is
369 C : Character;
370 -- Current source program character
372 Base_Char : Character;
373 -- Either # or : (character at start of based number)
375 Base : Int;
376 -- Value of base
378 UI_Base : Uint;
379 -- Value of base in Uint format
381 UI_Int_Value : Uint;
382 -- Value of integer scanned by Scan_Integer in Uint format
384 UI_Num_Value : Uint;
385 -- Value of integer in numeric value being scanned
387 Scale : Int;
388 -- Scale value for real literal
390 UI_Scale : Uint;
391 -- Scale in Uint format
393 Exponent_Is_Negative : Boolean;
394 -- Set true for negative exponent
396 Extended_Digit_Value : Int;
397 -- Extended digit value
399 Point_Scanned : Boolean;
400 -- Flag for decimal point scanned in numeric literal
402 -----------------------
403 -- Local Subprograms --
404 -----------------------
406 procedure Error_Digit_Expected;
407 -- Signal error of bad digit, Scan_Ptr points to the location at
408 -- which the digit was expected on input, and is unchanged on return.
410 procedure Scan_Integer;
411 -- Procedure to scan integer literal. On entry, Scan_Ptr points to
412 -- a digit, on exit Scan_Ptr points past the last character of
413 -- the integer.
414 -- For each digit encountered, UI_Int_Value is multiplied by 10,
415 -- and the value of the digit added to the result. In addition,
416 -- the value in Scale is decremented by one for each actual digit
417 -- scanned.
419 --------------------------
420 -- Error_Digit_Expected --
421 --------------------------
423 procedure Error_Digit_Expected is
424 begin
425 Error_Msg_S ("digit expected");
426 end Error_Digit_Expected;
428 -------------------
429 -- Scan_Integer --
430 -------------------
432 procedure Scan_Integer is
433 C : Character;
434 -- Next character scanned
436 begin
437 C := Source (Scan_Ptr);
439 -- Loop through digits (allowing underlines)
441 loop
442 Accumulate_Checksum (C);
443 UI_Int_Value :=
444 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
445 Scan_Ptr := Scan_Ptr + 1;
446 Scale := Scale - 1;
447 C := Source (Scan_Ptr);
449 if C = '_' then
450 Accumulate_Checksum ('_');
452 loop
453 Scan_Ptr := Scan_Ptr + 1;
454 C := Source (Scan_Ptr);
455 exit when C /= '_';
456 Error_No_Double_Underline;
457 end loop;
459 if C not in '0' .. '9' then
460 Error_Digit_Expected;
461 exit;
462 end if;
464 else
465 exit when C not in '0' .. '9';
466 end if;
467 end loop;
469 end Scan_Integer;
471 ----------------------------------
472 -- Start of Processing for Nlit --
473 ----------------------------------
475 begin
476 Base := 10;
477 UI_Base := Uint_10;
478 UI_Int_Value := Uint_0;
479 Scale := 0;
480 Scan_Integer;
481 Scale := 0;
482 Point_Scanned := False;
483 UI_Num_Value := UI_Int_Value;
485 -- Various possibilities now for continuing the literal are
486 -- period, E/e (for exponent), or :/# (for based literal).
488 Scale := 0;
489 C := Source (Scan_Ptr);
491 if C = '.' then
493 -- Scan out point, but do not scan past .. which is a range
494 -- sequence, and must not be eaten up scanning a numeric literal.
496 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
497 Accumulate_Checksum ('.');
499 if Point_Scanned then
500 Error_Msg_S ("duplicate point ignored");
501 end if;
503 Point_Scanned := True;
504 Scan_Ptr := Scan_Ptr + 1;
505 C := Source (Scan_Ptr);
507 if C not in '0' .. '9' then
508 Error_Msg
509 ("real literal cannot end with point", Scan_Ptr - 1);
510 else
511 Scan_Integer;
512 UI_Num_Value := UI_Int_Value;
513 end if;
514 end loop;
516 -- Based literal case. The base is the value we already scanned.
517 -- In the case of colon, we insist that the following character
518 -- is indeed an extended digit or a period. This catches a number
519 -- of common errors, as well as catching the well known tricky
520 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
522 elsif C = '#'
523 or else (C = ':' and then
524 (Source (Scan_Ptr + 1) = '.'
525 or else
526 Source (Scan_Ptr + 1) in '0' .. '9'
527 or else
528 Source (Scan_Ptr + 1) in 'A' .. 'Z'
529 or else
530 Source (Scan_Ptr + 1) in 'a' .. 'z'))
531 then
532 if C = ':' and then Warn_On_Obsolescent_Feature then
533 Error_Msg_S
534 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
535 Error_Msg_S
536 ("\use ""'#"" instead?");
537 end if;
539 Accumulate_Checksum (C);
540 Base_Char := C;
541 UI_Base := UI_Int_Value;
543 if UI_Base < 2 or else UI_Base > 16 then
544 Error_Msg_SC ("base not 2-16");
545 UI_Base := Uint_16;
546 end if;
548 Base := UI_To_Int (UI_Base);
549 Scan_Ptr := Scan_Ptr + 1;
551 -- Scan out extended integer [. integer]
553 C := Source (Scan_Ptr);
554 UI_Int_Value := Uint_0;
555 Scale := 0;
557 loop
558 if C in '0' .. '9' then
559 Accumulate_Checksum (C);
560 Extended_Digit_Value :=
561 Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
563 elsif C in 'A' .. 'F' then
564 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
565 Extended_Digit_Value :=
566 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
568 elsif C in 'a' .. 'f' then
569 Accumulate_Checksum (C);
570 Extended_Digit_Value :=
571 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
573 else
574 Error_Msg_S ("extended digit expected");
575 exit;
576 end if;
578 if Extended_Digit_Value >= Base then
579 Error_Msg_S ("digit '>= base");
580 end if;
582 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
583 Scale := Scale - 1;
584 Scan_Ptr := Scan_Ptr + 1;
585 C := Source (Scan_Ptr);
587 if C = '_' then
588 loop
589 Accumulate_Checksum ('_');
590 Scan_Ptr := Scan_Ptr + 1;
591 C := Source (Scan_Ptr);
592 exit when C /= '_';
593 Error_No_Double_Underline;
594 end loop;
596 elsif C = '.' then
597 Accumulate_Checksum ('.');
599 if Point_Scanned then
600 Error_Msg_S ("duplicate point ignored");
601 end if;
603 Scan_Ptr := Scan_Ptr + 1;
604 C := Source (Scan_Ptr);
605 Point_Scanned := True;
606 Scale := 0;
608 elsif C = Base_Char then
609 Accumulate_Checksum (C);
610 Scan_Ptr := Scan_Ptr + 1;
611 exit;
613 elsif C = '#' or else C = ':' then
614 Error_Msg_S ("based number delimiters must match");
615 Scan_Ptr := Scan_Ptr + 1;
616 exit;
618 elsif not Identifier_Char (C) then
619 if Base_Char = '#' then
620 Error_Msg_S ("missing '#");
621 else
622 Error_Msg_S ("missing ':");
623 end if;
625 exit;
626 end if;
628 end loop;
630 UI_Num_Value := UI_Int_Value;
631 end if;
633 -- Scan out exponent
635 if not Point_Scanned then
636 Scale := 0;
637 UI_Scale := Uint_0;
638 else
639 UI_Scale := UI_From_Int (Scale);
640 end if;
642 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
643 Accumulate_Checksum ('e');
644 Scan_Ptr := Scan_Ptr + 1;
645 Exponent_Is_Negative := False;
647 if Source (Scan_Ptr) = '+' then
648 Accumulate_Checksum ('+');
649 Scan_Ptr := Scan_Ptr + 1;
651 elsif Source (Scan_Ptr) = '-' then
652 Accumulate_Checksum ('-');
654 if not Point_Scanned then
655 Error_Msg_S
656 ("negative exponent not allowed for integer literal");
657 else
658 Exponent_Is_Negative := True;
659 end if;
661 Scan_Ptr := Scan_Ptr + 1;
662 end if;
664 UI_Int_Value := Uint_0;
666 if Source (Scan_Ptr) in '0' .. '9' then
667 Scan_Integer;
668 else
669 Error_Digit_Expected;
670 end if;
672 if Exponent_Is_Negative then
673 UI_Scale := UI_Scale - UI_Int_Value;
674 else
675 UI_Scale := UI_Scale + UI_Int_Value;
676 end if;
677 end if;
679 -- Case of real literal to be returned
681 if Point_Scanned then
682 Token := Tok_Real_Literal;
683 Real_Literal_Value :=
684 UR_From_Components (
685 Num => UI_Num_Value,
686 Den => -UI_Scale,
687 Rbase => Base);
689 -- Case of integer literal to be returned
691 else
692 Token := Tok_Integer_Literal;
694 if UI_Scale = 0 then
695 Int_Literal_Value := UI_Num_Value;
697 -- Avoid doing possibly expensive calculations in cases like
698 -- parsing 163E800_000# when semantics will not be done anyway.
699 -- This is especially useful when parsing garbled input.
701 elsif Operating_Mode /= Check_Syntax
702 and then (Serious_Errors_Detected = 0 or else Try_Semantics)
703 then
704 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
706 else
707 Int_Literal_Value := No_Uint;
709 end if;
711 end if;
713 return;
715 end Nlit;
717 ----------
718 -- Slit --
719 ----------
721 procedure Slit is
723 Delimiter : Character;
724 -- Delimiter (first character of string)
726 C : Character;
727 -- Current source program character
729 Code : Char_Code;
730 -- Current character code value
732 Err : Boolean;
733 -- Error flag for Scan_Wide call
735 procedure Error_Bad_String_Char;
736 -- Signal bad character in string/character literal. On entry
737 -- Scan_Ptr points to the improper character encountered during
738 -- the scan. Scan_Ptr is not modified, so it still points to the bad
739 -- character on return.
741 procedure Error_Unterminated_String;
742 -- Procedure called if a line terminator character is encountered
743 -- during scanning a string, meaning that the string is not properly
744 -- terminated.
746 procedure Set_String;
747 -- Procedure used to distinguish between string and operator symbol.
748 -- On entry the string has been scanned out, and its characters start
749 -- at Token_Ptr and end one character before Scan_Ptr. On exit Token
750 -- is set to Tok_String_Literal or Tok_Operator_Symbol as
751 -- appropriate, and Token_Node is appropriately initialized.
752 -- In addition, in the operator symbol case, Token_Name is
753 -- appropriately set.
755 ---------------------------
756 -- Error_Bad_String_Char --
757 ---------------------------
759 procedure Error_Bad_String_Char is
760 C : constant Character := Source (Scan_Ptr);
762 begin
763 if C = HT then
764 Error_Msg_S ("horizontal tab not allowed in string");
766 elsif C = VT or else C = FF then
767 Error_Msg_S ("format effector not allowed in string");
769 elsif C in Upper_Half_Character then
770 Error_Msg_S ("(Ada 83) upper half character not allowed");
772 else
773 Error_Msg_S ("control character not allowed in string");
774 end if;
775 end Error_Bad_String_Char;
777 -------------------------------
778 -- Error_Unterminated_String --
779 -------------------------------
781 procedure Error_Unterminated_String is
782 begin
783 -- An interesting little refinement. Consider the following
784 -- examples:
786 -- A := "this is an unterminated string;
787 -- A := "this is an unterminated string &
788 -- P(A, "this is a parameter that didn't get terminated);
790 -- We fiddle a little to do slightly better placement in these
791 -- cases also if there is white space at the end of the line we
792 -- place the flag at the start of this white space, not at the
793 -- end. Note that we only have to test for blanks, since tabs
794 -- aren't allowed in strings in the first place and would have
795 -- caused an error message.
797 -- Two more cases that we treat specially are:
799 -- A := "this string uses the wrong terminator'
800 -- A := "this string uses the wrong terminator' &
802 -- In these cases we give a different error message as well
804 -- We actually reposition the scan pointer to the point where we
805 -- place the flag in these cases, since it seems a better bet on
806 -- the original intention.
808 while Source (Scan_Ptr - 1) = ' '
809 or else Source (Scan_Ptr - 1) = '&'
810 loop
811 Scan_Ptr := Scan_Ptr - 1;
812 Unstore_String_Char;
813 end loop;
815 -- Check for case of incorrect string terminator, but single quote
816 -- is not considered incorrect if the opening terminator misused
817 -- a single quote (error message already given).
819 if Delimiter /= '''
820 and then Source (Scan_Ptr - 1) = '''
821 then
822 Unstore_String_Char;
823 Error_Msg
824 ("incorrect string terminator character", Scan_Ptr - 1);
825 return;
826 end if;
828 if Source (Scan_Ptr - 1) = ';' then
829 Scan_Ptr := Scan_Ptr - 1;
830 Unstore_String_Char;
832 if Source (Scan_Ptr - 1) = ')' then
833 Scan_Ptr := Scan_Ptr - 1;
834 Unstore_String_Char;
835 end if;
836 end if;
838 Error_Msg_S ("missing string quote");
839 end Error_Unterminated_String;
841 ----------------
842 -- Set_String --
843 ----------------
845 procedure Set_String is
846 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
847 C1 : Character;
848 C2 : Character;
849 C3 : Character;
851 begin
852 -- Token_Name is currently set to Error_Name. The following
853 -- section of code resets Token_Name to the proper Name_Op_xx
854 -- value if the string is a valid operator symbol, otherwise it is
855 -- left set to Error_Name.
857 if Slen = 1 then
858 C1 := Source (Token_Ptr + 1);
860 case C1 is
861 when '=' =>
862 Token_Name := Name_Op_Eq;
864 when '>' =>
865 Token_Name := Name_Op_Gt;
867 when '<' =>
868 Token_Name := Name_Op_Lt;
870 when '+' =>
871 Token_Name := Name_Op_Add;
873 when '-' =>
874 Token_Name := Name_Op_Subtract;
876 when '&' =>
877 Token_Name := Name_Op_Concat;
879 when '*' =>
880 Token_Name := Name_Op_Multiply;
882 when '/' =>
883 Token_Name := Name_Op_Divide;
885 when others =>
886 null;
887 end case;
889 elsif Slen = 2 then
890 C1 := Source (Token_Ptr + 1);
891 C2 := Source (Token_Ptr + 2);
893 if C1 = '*' and then C2 = '*' then
894 Token_Name := Name_Op_Expon;
896 elsif C2 = '=' then
898 if C1 = '/' then
899 Token_Name := Name_Op_Ne;
900 elsif C1 = '<' then
901 Token_Name := Name_Op_Le;
902 elsif C1 = '>' then
903 Token_Name := Name_Op_Ge;
904 end if;
906 elsif (C1 = 'O' or else C1 = 'o') and then -- OR
907 (C2 = 'R' or else C2 = 'r')
908 then
909 Token_Name := Name_Op_Or;
910 end if;
912 elsif Slen = 3 then
913 C1 := Source (Token_Ptr + 1);
914 C2 := Source (Token_Ptr + 2);
915 C3 := Source (Token_Ptr + 3);
917 if (C1 = 'A' or else C1 = 'a') and then -- AND
918 (C2 = 'N' or else C2 = 'n') and then
919 (C3 = 'D' or else C3 = 'd')
920 then
921 Token_Name := Name_Op_And;
923 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
924 (C2 = 'B' or else C2 = 'b') and then
925 (C3 = 'S' or else C3 = 's')
926 then
927 Token_Name := Name_Op_Abs;
929 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
930 (C2 = 'O' or else C2 = 'o') and then
931 (C3 = 'D' or else C3 = 'd')
932 then
933 Token_Name := Name_Op_Mod;
935 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
936 (C2 = 'O' or else C2 = 'o') and then
937 (C3 = 'T' or else C3 = 't')
938 then
939 Token_Name := Name_Op_Not;
941 elsif (C1 = 'R' or else C1 = 'r') and then -- REM
942 (C2 = 'E' or else C2 = 'e') and then
943 (C3 = 'M' or else C3 = 'm')
944 then
945 Token_Name := Name_Op_Rem;
947 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
948 (C2 = 'O' or else C2 = 'o') and then
949 (C3 = 'R' or else C3 = 'r')
950 then
951 Token_Name := Name_Op_Xor;
952 end if;
954 end if;
956 -- If it is an operator symbol, then Token_Name is set.
957 -- If it is some other string value, then Token_Name still
958 -- contains Error_Name.
960 if Token_Name = Error_Name then
961 Token := Tok_String_Literal;
963 else
964 Token := Tok_Operator_Symbol;
965 end if;
967 end Set_String;
969 ----------
970 -- Slit --
971 ----------
973 begin
974 -- On entry, Scan_Ptr points to the opening character of the string
975 -- which is either a percent, double quote, or apostrophe
976 -- (single quote). The latter case is an error detected by
977 -- the character literal circuit.
979 Delimiter := Source (Scan_Ptr);
980 Accumulate_Checksum (Delimiter);
981 Start_String;
982 Scan_Ptr := Scan_Ptr + 1;
984 -- Loop to scan out characters of string literal
986 loop
987 C := Source (Scan_Ptr);
989 if C = Delimiter then
990 Accumulate_Checksum (C);
991 Scan_Ptr := Scan_Ptr + 1;
992 exit when Source (Scan_Ptr) /= Delimiter;
993 Code := Get_Char_Code (C);
994 Accumulate_Checksum (C);
995 Scan_Ptr := Scan_Ptr + 1;
997 else
998 if C = '"' and then Delimiter = '%' then
999 Error_Msg_S
1000 ("quote not allowed in percent delimited string");
1001 Code := Get_Char_Code (C);
1002 Scan_Ptr := Scan_Ptr + 1;
1004 elsif (C = ESC
1005 and then
1006 Wide_Character_Encoding_Method
1007 in WC_ESC_Encoding_Method)
1008 or else
1009 (C in Upper_Half_Character
1010 and then
1011 Upper_Half_Encoding)
1012 or else
1013 (C = '['
1014 and then
1015 Source (Scan_Ptr + 1) = '"'
1016 and then
1017 Identifier_Char (Source (Scan_Ptr + 2)))
1018 then
1019 Scan_Wide (Source, Scan_Ptr, Code, Err);
1020 Accumulate_Checksum (Code);
1022 if Err then
1023 Error_Illegal_Wide_Character;
1024 Code := Get_Char_Code (' ');
1025 end if;
1027 else
1028 Accumulate_Checksum (C);
1030 if C not in Graphic_Character then
1031 if C in Line_Terminator then
1032 Error_Unterminated_String;
1033 exit;
1035 elsif C in Upper_Half_Character then
1036 if Ada_83 then
1037 Error_Bad_String_Char;
1038 end if;
1040 else
1041 Error_Bad_String_Char;
1042 end if;
1043 end if;
1045 Code := Get_Char_Code (C);
1046 Scan_Ptr := Scan_Ptr + 1;
1047 end if;
1048 end if;
1050 Store_String_Char (Code);
1052 if not In_Character_Range (Code) then
1053 Wide_Character_Found := True;
1054 end if;
1055 end loop;
1057 String_Literal_Id := End_String;
1058 Set_String;
1059 return;
1061 end Slit;
1063 -- Start of body of Scan
1065 begin
1066 Prev_Token := Token;
1067 Prev_Token_Ptr := Token_Ptr;
1068 Token_Name := Error_Name;
1070 -- The following loop runs more than once only if a format effector
1071 -- (tab, vertical tab, form feed, line feed, carriage return) is
1072 -- encountered and skipped, or some error situation, such as an
1073 -- illegal character, is encountered.
1075 loop
1076 -- Skip past blanks, loop is opened up for speed
1078 while Source (Scan_Ptr) = ' ' loop
1080 if Source (Scan_Ptr + 1) /= ' ' then
1081 Scan_Ptr := Scan_Ptr + 1;
1082 exit;
1083 end if;
1085 if Source (Scan_Ptr + 2) /= ' ' then
1086 Scan_Ptr := Scan_Ptr + 2;
1087 exit;
1088 end if;
1090 if Source (Scan_Ptr + 3) /= ' ' then
1091 Scan_Ptr := Scan_Ptr + 3;
1092 exit;
1093 end if;
1095 if Source (Scan_Ptr + 4) /= ' ' then
1096 Scan_Ptr := Scan_Ptr + 4;
1097 exit;
1098 end if;
1100 if Source (Scan_Ptr + 5) /= ' ' then
1101 Scan_Ptr := Scan_Ptr + 5;
1102 exit;
1103 end if;
1105 if Source (Scan_Ptr + 6) /= ' ' then
1106 Scan_Ptr := Scan_Ptr + 6;
1107 exit;
1108 end if;
1110 if Source (Scan_Ptr + 7) /= ' ' then
1111 Scan_Ptr := Scan_Ptr + 7;
1112 exit;
1113 end if;
1115 Scan_Ptr := Scan_Ptr + 8;
1116 end loop;
1118 -- We are now at a non-blank character, which is the first character
1119 -- of the token we will scan, and hence the value of Token_Ptr.
1121 Token_Ptr := Scan_Ptr;
1123 -- Here begins the main case statement which transfers control on
1124 -- the basis of the non-blank character we have encountered.
1126 case Source (Scan_Ptr) is
1128 -- Line terminator characters
1130 when CR | LF | FF | VT => Line_Terminator_Case : begin
1132 -- Check line too long
1134 Check_End_Of_Line;
1136 -- Set Token_Ptr, if End_Of_Line is a token, for the case when
1137 -- it is a physical line.
1139 if End_Of_Line_Is_Token then
1140 Token_Ptr := Scan_Ptr;
1141 end if;
1143 declare
1144 Physical : Boolean;
1146 begin
1147 Skip_Line_Terminators (Scan_Ptr, Physical);
1149 -- If we are at start of physical line, update scan pointers
1150 -- to reflect the start of the new line.
1152 if Physical then
1153 Current_Line_Start := Scan_Ptr;
1154 Start_Column := Set_Start_Column;
1155 First_Non_Blank_Location := Scan_Ptr;
1157 -- If End_Of_Line is a token, we return it as it is
1158 -- a physical line.
1160 if End_Of_Line_Is_Token then
1161 Token := Tok_End_Of_Line;
1162 return;
1163 end if;
1164 end if;
1165 end;
1166 end Line_Terminator_Case;
1168 -- Horizontal tab, just skip past it
1170 when HT =>
1171 if Style_Check then Style.Check_HT; end if;
1172 Scan_Ptr := Scan_Ptr + 1;
1174 -- End of file character, treated as an end of file only if it
1175 -- is the last character in the buffer, otherwise it is ignored.
1177 when EOF =>
1178 if Scan_Ptr = Source_Last (Current_Source_File) then
1179 Check_End_Of_Line;
1180 Token := Tok_EOF;
1181 return;
1183 else
1184 Scan_Ptr := Scan_Ptr + 1;
1185 end if;
1187 -- Ampersand
1189 when '&' =>
1190 Accumulate_Checksum ('&');
1192 if Source (Scan_Ptr + 1) = '&' then
1193 Error_Msg_S ("'&'& should be `AND THEN`");
1194 Scan_Ptr := Scan_Ptr + 2;
1195 Token := Tok_And;
1196 return;
1198 else
1199 Scan_Ptr := Scan_Ptr + 1;
1200 Token := Tok_Ampersand;
1201 return;
1202 end if;
1204 -- Asterisk (can be multiplication operator or double asterisk
1205 -- which is the exponentiation compound delimiter).
1207 when '*' =>
1208 Accumulate_Checksum ('*');
1210 if Source (Scan_Ptr + 1) = '*' then
1211 Accumulate_Checksum ('*');
1212 Scan_Ptr := Scan_Ptr + 2;
1213 Token := Tok_Double_Asterisk;
1214 return;
1216 else
1217 Scan_Ptr := Scan_Ptr + 1;
1218 Token := Tok_Asterisk;
1219 return;
1220 end if;
1222 -- Colon, which can either be an isolated colon, or part of an
1223 -- assignment compound delimiter.
1225 when ':' =>
1226 Accumulate_Checksum (':');
1228 if Double_Char_Token ('=') then
1229 Token := Tok_Colon_Equal;
1230 if Style_Check then Style.Check_Colon_Equal; end if;
1231 return;
1233 elsif Source (Scan_Ptr + 1) = '-'
1234 and then Source (Scan_Ptr + 2) /= '-'
1235 then
1236 Token := Tok_Colon_Equal;
1237 Error_Msg (":- should be :=", Scan_Ptr);
1238 Scan_Ptr := Scan_Ptr + 2;
1239 return;
1241 else
1242 Scan_Ptr := Scan_Ptr + 1;
1243 Token := Tok_Colon;
1244 if Style_Check then Style.Check_Colon; end if;
1245 return;
1246 end if;
1248 -- Left parenthesis
1250 when '(' =>
1251 Accumulate_Checksum ('(');
1252 Scan_Ptr := Scan_Ptr + 1;
1253 Token := Tok_Left_Paren;
1254 if Style_Check then Style.Check_Left_Paren; end if;
1255 return;
1257 -- Left bracket
1259 when '[' =>
1260 if Source (Scan_Ptr + 1) = '"' then
1261 Name_Len := 0;
1262 goto Scan_Identifier;
1264 else
1265 Error_Msg_S ("illegal character, replaced by ""(""");
1266 Scan_Ptr := Scan_Ptr + 1;
1267 Token := Tok_Left_Paren;
1268 return;
1269 end if;
1271 -- Left brace
1273 when '{' =>
1274 Error_Msg_S ("illegal character, replaced by ""(""");
1275 Scan_Ptr := Scan_Ptr + 1;
1276 Token := Tok_Left_Paren;
1277 return;
1279 -- Comma
1281 when ',' =>
1282 Accumulate_Checksum (',');
1283 Scan_Ptr := Scan_Ptr + 1;
1284 Token := Tok_Comma;
1285 if Style_Check then Style.Check_Comma; end if;
1286 return;
1288 -- Dot, which is either an isolated period, or part of a double
1289 -- dot compound delimiter sequence. We also check for the case of
1290 -- a digit following the period, to give a better error message.
1292 when '.' =>
1293 Accumulate_Checksum ('.');
1295 if Double_Char_Token ('.') then
1296 Token := Tok_Dot_Dot;
1297 if Style_Check then Style.Check_Dot_Dot; end if;
1298 return;
1300 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1301 Error_Msg_S ("numeric literal cannot start with point");
1302 Scan_Ptr := Scan_Ptr + 1;
1304 else
1305 Scan_Ptr := Scan_Ptr + 1;
1306 Token := Tok_Dot;
1307 return;
1308 end if;
1310 -- Equal, which can either be an equality operator, or part of the
1311 -- arrow (=>) compound delimiter.
1313 when '=' =>
1314 Accumulate_Checksum ('=');
1316 if Double_Char_Token ('>') then
1317 Token := Tok_Arrow;
1318 if Style_Check then Style.Check_Arrow; end if;
1319 return;
1321 elsif Source (Scan_Ptr + 1) = '=' then
1322 Error_Msg_S ("== should be =");
1323 Scan_Ptr := Scan_Ptr + 1;
1324 end if;
1326 Scan_Ptr := Scan_Ptr + 1;
1327 Token := Tok_Equal;
1328 return;
1330 -- Greater than, which can be a greater than operator, greater than
1331 -- or equal operator, or first character of a right label bracket.
1333 when '>' =>
1334 Accumulate_Checksum ('>');
1336 if Double_Char_Token ('=') then
1337 Token := Tok_Greater_Equal;
1338 return;
1340 elsif Double_Char_Token ('>') then
1341 Token := Tok_Greater_Greater;
1342 return;
1344 else
1345 Scan_Ptr := Scan_Ptr + 1;
1346 Token := Tok_Greater;
1347 return;
1348 end if;
1350 -- Less than, which can be a less than operator, less than or equal
1351 -- operator, or the first character of a left label bracket, or the
1352 -- first character of a box (<>) compound delimiter.
1354 when '<' =>
1355 Accumulate_Checksum ('<');
1357 if Double_Char_Token ('=') then
1358 Token := Tok_Less_Equal;
1359 return;
1361 elsif Double_Char_Token ('>') then
1362 Token := Tok_Box;
1363 if Style_Check then Style.Check_Box; end if;
1364 return;
1366 elsif Double_Char_Token ('<') then
1367 Token := Tok_Less_Less;
1368 return;
1370 else
1371 Scan_Ptr := Scan_Ptr + 1;
1372 Token := Tok_Less;
1373 return;
1374 end if;
1376 -- Minus, which is either a subtraction operator, or the first
1377 -- character of double minus starting a comment
1379 when '-' => Minus_Case : begin
1380 if Source (Scan_Ptr + 1) = '>' then
1381 Error_Msg_S ("invalid token");
1382 Scan_Ptr := Scan_Ptr + 2;
1383 Token := Tok_Arrow;
1384 return;
1386 elsif Source (Scan_Ptr + 1) /= '-' then
1387 Accumulate_Checksum ('-');
1388 Scan_Ptr := Scan_Ptr + 1;
1389 Token := Tok_Minus;
1390 return;
1392 -- Comment
1394 else -- Source (Scan_Ptr + 1) = '-' then
1395 if Style_Check then Style.Check_Comment; end if;
1396 Scan_Ptr := Scan_Ptr + 2;
1398 -- Loop to scan comment (this loop runs more than once only if
1399 -- a horizontal tab or other non-graphic character is scanned)
1401 loop
1402 -- Scan to non graphic character (opened up for speed)
1404 loop
1405 exit when Source (Scan_Ptr) not in Graphic_Character;
1406 Scan_Ptr := Scan_Ptr + 1;
1407 exit when Source (Scan_Ptr) not in Graphic_Character;
1408 Scan_Ptr := Scan_Ptr + 1;
1409 exit when Source (Scan_Ptr) not in Graphic_Character;
1410 Scan_Ptr := Scan_Ptr + 1;
1411 exit when Source (Scan_Ptr) not in Graphic_Character;
1412 Scan_Ptr := Scan_Ptr + 1;
1413 exit when Source (Scan_Ptr) not in Graphic_Character;
1414 Scan_Ptr := Scan_Ptr + 1;
1415 end loop;
1417 -- Keep going if horizontal tab
1419 if Source (Scan_Ptr) = HT then
1420 if Style_Check then Style.Check_HT; end if;
1421 Scan_Ptr := Scan_Ptr + 1;
1423 -- Terminate scan of comment if line terminator
1425 elsif Source (Scan_Ptr) in Line_Terminator then
1426 exit;
1428 -- Terminate scan of comment if end of file encountered
1429 -- (embedded EOF character or real last character in file)
1431 elsif Source (Scan_Ptr) = EOF then
1432 exit;
1434 -- Keep going if character in 80-FF range, or is ESC. These
1435 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1436 -- They are allowed even in Ada 83 mode according to the
1437 -- approved AI. ESC was added to the AI in June 93.
1439 elsif Source (Scan_Ptr) in Upper_Half_Character
1440 or else Source (Scan_Ptr) = ESC
1441 then
1442 Scan_Ptr := Scan_Ptr + 1;
1444 -- Otherwise we have an illegal comment character
1446 else
1447 Error_Illegal_Character;
1448 end if;
1450 end loop;
1452 -- Note that we do NOT execute a return here, instead we fall
1453 -- through to reexecute the scan loop to look for a token.
1455 end if;
1456 end Minus_Case;
1458 -- Double quote starting a string literal
1460 when '"' =>
1461 Slit;
1462 Post_Scan;
1463 return;
1465 -- Percent starting a string literal
1467 when '%' =>
1468 if Warn_On_Obsolescent_Feature then
1469 Error_Msg_S
1470 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1471 Error_Msg_S
1472 ("\use """""" instead?");
1473 end if;
1475 Slit;
1476 Post_Scan;
1477 return;
1479 -- Apostrophe. This can either be the start of a character literal,
1480 -- or an isolated apostrophe used in a qualified expression or an
1481 -- attribute. We treat it as a character literal if it does not
1482 -- follow a right parenthesis, identifier, the keyword ALL or
1483 -- a literal. This means that we correctly treat constructs like:
1485 -- A := CHARACTER'('A');
1487 -- Note that RM-2.2(7) does not require a separator between
1488 -- "CHARACTER" and "'" in the above.
1490 when ''' => Char_Literal_Case : declare
1491 Code : Char_Code;
1492 Err : Boolean;
1494 begin
1495 Accumulate_Checksum (''');
1496 Scan_Ptr := Scan_Ptr + 1;
1498 -- Here is where we make the test to distinguish the cases. Treat
1499 -- as apostrophe if previous token is an identifier, right paren
1500 -- or the reserved word "all" (latter case as in A.all'Address)
1501 -- (or the reserved word "project" in project files).
1502 -- Also treat it as apostrophe after a literal (this catches
1503 -- some legitimate cases, like A."abs"'Address, and also gives
1504 -- better error behavior for impossible cases like 123'xxx).
1506 if Prev_Token = Tok_Identifier
1507 or else Prev_Token = Tok_Right_Paren
1508 or else Prev_Token = Tok_All
1509 or else Prev_Token = Tok_Project
1510 or else Prev_Token in Token_Class_Literal
1511 then
1512 Token := Tok_Apostrophe;
1513 if Style_Check then Style.Check_Apostrophe; end if;
1514 return;
1516 -- Otherwise the apostrophe starts a character literal
1518 else
1519 -- Case of wide character literal with ESC or [ encoding
1521 if (Source (Scan_Ptr) = ESC
1522 and then
1523 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1524 or else
1525 (Source (Scan_Ptr) in Upper_Half_Character
1526 and then
1527 Upper_Half_Encoding)
1528 or else
1529 (Source (Scan_Ptr) = '['
1530 and then
1531 Source (Scan_Ptr + 1) = '"')
1532 then
1533 Scan_Wide (Source, Scan_Ptr, Code, Err);
1534 Accumulate_Checksum (Code);
1536 if Err then
1537 Error_Illegal_Wide_Character;
1538 end if;
1540 if Source (Scan_Ptr) /= ''' then
1541 Error_Msg_S ("missing apostrophe");
1542 else
1543 Scan_Ptr := Scan_Ptr + 1;
1544 end if;
1546 -- If we do not find a closing quote in the expected place then
1547 -- assume that we have a misguided attempt at a string literal.
1549 -- However, if previous token is RANGE, then we return an
1550 -- apostrophe instead since this gives better error recovery
1552 elsif Source (Scan_Ptr + 1) /= ''' then
1554 if Prev_Token = Tok_Range then
1555 Token := Tok_Apostrophe;
1556 return;
1558 else
1559 Scan_Ptr := Scan_Ptr - 1;
1560 Error_Msg_S
1561 ("strings are delimited by double quote character");
1562 Slit;
1563 Post_Scan;
1564 return;
1565 end if;
1567 -- Otherwise we have a (non-wide) character literal
1569 else
1570 Accumulate_Checksum (Source (Scan_Ptr));
1572 if Source (Scan_Ptr) not in Graphic_Character then
1573 if Source (Scan_Ptr) in Upper_Half_Character then
1574 if Ada_83 then
1575 Error_Illegal_Character;
1576 end if;
1578 else
1579 Error_Illegal_Character;
1580 end if;
1581 end if;
1583 Code := Get_Char_Code (Source (Scan_Ptr));
1584 Scan_Ptr := Scan_Ptr + 2;
1585 end if;
1587 -- Fall through here with Scan_Ptr updated past the closing
1588 -- quote, and Code set to the Char_Code value for the literal
1590 Accumulate_Checksum (''');
1591 Token := Tok_Char_Literal;
1592 Set_Character_Literal_Name (Code);
1593 Token_Name := Name_Find;
1594 Character_Code := Code;
1595 Post_Scan;
1596 return;
1597 end if;
1598 end Char_Literal_Case;
1600 -- Right parenthesis
1602 when ')' =>
1603 Accumulate_Checksum (')');
1604 Scan_Ptr := Scan_Ptr + 1;
1605 Token := Tok_Right_Paren;
1606 if Style_Check then Style.Check_Right_Paren; end if;
1607 return;
1609 -- Right bracket or right brace, treated as right paren
1611 when ']' | '}' =>
1612 Error_Msg_S ("illegal character, replaced by "")""");
1613 Scan_Ptr := Scan_Ptr + 1;
1614 Token := Tok_Right_Paren;
1615 return;
1617 -- Slash (can be division operator or first character of not equal)
1619 when '/' =>
1620 Accumulate_Checksum ('/');
1622 if Double_Char_Token ('=') then
1623 Token := Tok_Not_Equal;
1624 return;
1625 else
1626 Scan_Ptr := Scan_Ptr + 1;
1627 Token := Tok_Slash;
1628 return;
1629 end if;
1631 -- Semicolon
1633 when ';' =>
1634 Accumulate_Checksum (';');
1635 Scan_Ptr := Scan_Ptr + 1;
1636 Token := Tok_Semicolon;
1637 if Style_Check then Style.Check_Semicolon; end if;
1638 return;
1640 -- Vertical bar
1642 when '|' => Vertical_Bar_Case : begin
1643 Accumulate_Checksum ('|');
1645 -- Special check for || to give nice message
1647 if Source (Scan_Ptr + 1) = '|' then
1648 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1649 Scan_Ptr := Scan_Ptr + 2;
1650 Token := Tok_Or;
1651 return;
1653 else
1654 Scan_Ptr := Scan_Ptr + 1;
1655 Token := Tok_Vertical_Bar;
1656 if Style_Check then Style.Check_Vertical_Bar; end if;
1657 return;
1658 end if;
1659 end Vertical_Bar_Case;
1661 -- Exclamation, replacement character for vertical bar
1663 when '!' => Exclamation_Case : begin
1664 Accumulate_Checksum ('!');
1666 if Warn_On_Obsolescent_Feature then
1667 Error_Msg_S
1668 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1669 Error_Msg_S
1670 ("\use ""'|"" instead?");
1671 end if;
1673 if Source (Scan_Ptr + 1) = '=' then
1674 Error_Msg_S ("'!= should be /=");
1675 Scan_Ptr := Scan_Ptr + 2;
1676 Token := Tok_Not_Equal;
1677 return;
1679 else
1680 Scan_Ptr := Scan_Ptr + 1;
1681 Token := Tok_Vertical_Bar;
1682 return;
1683 end if;
1685 end Exclamation_Case;
1687 -- Plus
1689 when '+' => Plus_Case : begin
1690 Accumulate_Checksum ('+');
1691 Scan_Ptr := Scan_Ptr + 1;
1692 Token := Tok_Plus;
1693 return;
1694 end Plus_Case;
1696 -- Digits starting a numeric literal
1698 when '0' .. '9' =>
1699 Nlit;
1701 if Identifier_Char (Source (Scan_Ptr)) then
1702 Error_Msg_S
1703 ("delimiter required between literal and identifier");
1704 end if;
1705 Post_Scan;
1706 return;
1708 -- Lower case letters
1710 when 'a' .. 'z' =>
1711 Name_Len := 1;
1712 Name_Buffer (1) := Source (Scan_Ptr);
1713 Accumulate_Checksum (Name_Buffer (1));
1714 Scan_Ptr := Scan_Ptr + 1;
1715 goto Scan_Identifier;
1717 -- Upper case letters
1719 when 'A' .. 'Z' =>
1720 Name_Len := 1;
1721 Name_Buffer (1) :=
1722 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1723 Accumulate_Checksum (Name_Buffer (1));
1724 Scan_Ptr := Scan_Ptr + 1;
1725 goto Scan_Identifier;
1727 -- Underline character
1729 when '_' =>
1730 if Special_Characters ('_') then
1731 Token_Ptr := Scan_Ptr;
1732 Scan_Ptr := Scan_Ptr + 1;
1733 Token := Tok_Special;
1734 Special_Character := '_';
1735 return;
1736 end if;
1738 Error_Msg_S ("identifier cannot start with underline");
1739 Name_Len := 1;
1740 Name_Buffer (1) := '_';
1741 Scan_Ptr := Scan_Ptr + 1;
1742 goto Scan_Identifier;
1744 -- Space (not possible, because we scanned past blanks)
1746 when ' ' =>
1747 raise Program_Error;
1749 -- Characters in top half of ASCII 8-bit chart
1751 when Upper_Half_Character =>
1753 -- Wide character case. Note that Scan_Identifier will issue
1754 -- an appropriate message if wide characters are not allowed
1755 -- in identifiers.
1757 if Upper_Half_Encoding then
1758 Name_Len := 0;
1759 goto Scan_Identifier;
1761 -- Otherwise we have OK Latin-1 character
1763 else
1764 -- Upper half characters may possibly be identifier letters
1765 -- but can never be digits, so Identifier_Char can be used
1766 -- to test for a valid start of identifier character.
1768 if Identifier_Char (Source (Scan_Ptr)) then
1769 Name_Len := 0;
1770 goto Scan_Identifier;
1771 else
1772 Error_Illegal_Character;
1773 end if;
1774 end if;
1776 when ESC =>
1778 -- ESC character, possible start of identifier if wide characters
1779 -- using ESC encoding are allowed in identifiers, which we can
1780 -- tell by looking at the Identifier_Char flag for ESC, which is
1781 -- only true if these conditions are met.
1783 if Identifier_Char (ESC) then
1784 Name_Len := 0;
1785 goto Scan_Identifier;
1786 else
1787 Error_Illegal_Wide_Character;
1788 end if;
1790 -- Invalid control characters
1792 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
1793 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1794 EM | FS | GS | RS | US | DEL
1796 Error_Illegal_Character;
1798 -- Invalid graphic characters
1800 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1801 -- If Set_Special_Character has been called for this character,
1802 -- set Scans.Special_Character and return a Special token.
1804 if Special_Characters (Source (Scan_Ptr)) then
1805 Token_Ptr := Scan_Ptr;
1806 Token := Tok_Special;
1807 Special_Character := Source (Scan_Ptr);
1808 Scan_Ptr := Scan_Ptr + 1;
1809 return;
1811 -- otherwise, this is an illegal character
1813 else
1814 Error_Illegal_Character;
1815 end if;
1817 -- End switch on non-blank character
1819 end case;
1821 -- End loop past format effectors. The exit from this loop is by
1822 -- executing a return statement following completion of token scan
1823 -- (control never falls out of this loop to the code which follows)
1825 end loop;
1827 -- Identifier scanning routine. On entry, some initial characters
1828 -- of the identifier may have already been stored in Name_Buffer.
1829 -- If so, Name_Len has the number of characters stored. otherwise
1830 -- Name_Len is set to zero on entry.
1832 <<Scan_Identifier>>
1834 -- This loop scans as fast as possible past lower half letters
1835 -- and digits, which we expect to be the most common characters.
1837 loop
1838 if Source (Scan_Ptr) in 'a' .. 'z'
1839 or else Source (Scan_Ptr) in '0' .. '9'
1840 then
1841 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1842 Accumulate_Checksum (Source (Scan_Ptr));
1844 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1845 Name_Buffer (Name_Len + 1) :=
1846 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1847 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1848 else
1849 exit;
1850 end if;
1852 -- Open out the loop a couple of times for speed
1854 if Source (Scan_Ptr + 1) in 'a' .. 'z'
1855 or else Source (Scan_Ptr + 1) in '0' .. '9'
1856 then
1857 Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1858 Accumulate_Checksum (Source (Scan_Ptr + 1));
1860 elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1861 Name_Buffer (Name_Len + 2) :=
1862 Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1863 Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1865 else
1866 Scan_Ptr := Scan_Ptr + 1;
1867 Name_Len := Name_Len + 1;
1868 exit;
1869 end if;
1871 if Source (Scan_Ptr + 2) in 'a' .. 'z'
1872 or else Source (Scan_Ptr + 2) in '0' .. '9'
1873 then
1874 Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1875 Accumulate_Checksum (Source (Scan_Ptr + 2));
1877 elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1878 Name_Buffer (Name_Len + 3) :=
1879 Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1880 Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1881 else
1882 Scan_Ptr := Scan_Ptr + 2;
1883 Name_Len := Name_Len + 2;
1884 exit;
1885 end if;
1887 if Source (Scan_Ptr + 3) in 'a' .. 'z'
1888 or else Source (Scan_Ptr + 3) in '0' .. '9'
1889 then
1890 Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1891 Accumulate_Checksum (Source (Scan_Ptr + 3));
1893 elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1894 Name_Buffer (Name_Len + 4) :=
1895 Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1896 Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1898 else
1899 Scan_Ptr := Scan_Ptr + 3;
1900 Name_Len := Name_Len + 3;
1901 exit;
1902 end if;
1904 Scan_Ptr := Scan_Ptr + 4;
1905 Name_Len := Name_Len + 4;
1906 end loop;
1908 -- If we fall through, then we have encountered either an underline
1909 -- character, or an extended identifier character (i.e. one from the
1910 -- upper half), or a wide character, or an identifier terminator.
1911 -- The initial test speeds us up in the most common case where we
1912 -- have an identifier terminator. Note that ESC is an identifier
1913 -- character only if a wide character encoding method that uses
1914 -- ESC encoding is active, so if we find an ESC character we know
1915 -- that we have a wide character.
1917 if Identifier_Char (Source (Scan_Ptr)) then
1919 -- Case of underline
1921 if Source (Scan_Ptr) = '_' then
1922 Accumulate_Checksum ('_');
1924 -- Check error case of identifier ending with underscore
1925 -- In this case we ignore the underscore and do not store it.
1927 if not Identifier_Char (Source (Scan_Ptr + 1)) then
1928 Error_Msg_S ("identifier cannot end with underline");
1929 Scan_Ptr := Scan_Ptr + 1;
1931 -- Check error case of two underscores. In this case we do
1932 -- not store the first underscore (we will store the second)
1934 elsif Source (Scan_Ptr + 1) = '_' then
1935 Error_No_Double_Underline;
1937 -- Normal case of legal underscore
1939 else
1940 Name_Len := Name_Len + 1;
1941 Name_Buffer (Name_Len) := '_';
1942 end if;
1944 Scan_Ptr := Scan_Ptr + 1;
1945 goto Scan_Identifier;
1947 -- Upper half character
1949 elsif Source (Scan_Ptr) in Upper_Half_Character
1950 and then not Upper_Half_Encoding
1951 then
1952 Accumulate_Checksum (Source (Scan_Ptr));
1953 Store_Encoded_Character
1954 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1955 Scan_Ptr := Scan_Ptr + 1;
1956 goto Scan_Identifier;
1958 -- Left bracket not followed by a quote terminates an identifier.
1959 -- This is an error, but we don't want to give a junk error msg
1960 -- about wide characters in this case!
1962 elsif Source (Scan_Ptr) = '['
1963 and then Source (Scan_Ptr + 1) /= '"'
1964 then
1965 null;
1967 -- We know we have a wide character encoding here (the current
1968 -- character is either ESC, left bracket, or an upper half
1969 -- character depending on the encoding method).
1971 else
1972 -- Scan out the wide character and insert the appropriate
1973 -- encoding into the name table entry for the identifier.
1975 declare
1976 Sptr : constant Source_Ptr := Scan_Ptr;
1977 Code : Char_Code;
1978 Err : Boolean;
1979 Chr : Character;
1981 begin
1982 Scan_Wide (Source, Scan_Ptr, Code, Err);
1984 -- If error, signal error
1986 if Err then
1987 Error_Illegal_Wide_Character;
1989 -- If the character scanned is a normal identifier
1990 -- character, then we treat it that way.
1992 elsif In_Character_Range (Code)
1993 and then Identifier_Char (Get_Character (Code))
1994 then
1995 Chr := Get_Character (Code);
1996 Accumulate_Checksum (Chr);
1997 Store_Encoded_Character
1998 (Get_Char_Code (Fold_Lower (Chr)));
2000 -- Character is not normal identifier character, store
2001 -- it in encoded form.
2003 else
2004 Accumulate_Checksum (Code);
2005 Store_Encoded_Character (Code);
2007 -- Make sure we are allowing wide characters in
2008 -- identifiers. Note that we allow wide character
2009 -- notation for an OK identifier character. This
2010 -- in particular allows bracket or other notation
2011 -- to be used for upper half letters.
2013 if Identifier_Character_Set /= 'w' then
2014 Error_Msg
2015 ("wide character not allowed in identifier", Sptr);
2016 end if;
2017 end if;
2018 end;
2020 goto Scan_Identifier;
2021 end if;
2022 end if;
2024 -- Scan of identifier is complete. The identifier is stored in
2025 -- Name_Buffer, and Scan_Ptr points past the last character.
2027 Token_Name := Name_Find;
2029 -- Here is where we check if it was a keyword
2031 if Get_Name_Table_Byte (Token_Name) /= 0
2032 and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
2033 then
2034 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2036 -- Deal with possible style check for non-lower case keyword,
2037 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
2038 -- for this purpose if they appear as attribute designators.
2039 -- Actually we only check the first character for speed.
2041 if Style_Check
2042 and then Source (Token_Ptr) <= 'Z'
2043 and then (Prev_Token /= Tok_Apostrophe
2044 or else
2045 (Token /= Tok_Access
2046 and then Token /= Tok_Delta
2047 and then Token /= Tok_Digits
2048 and then Token /= Tok_Range))
2049 then
2050 Style.Non_Lower_Case_Keyword;
2051 end if;
2053 -- We must reset Token_Name since this is not an identifier
2054 -- and if we leave Token_Name set, the parser gets confused
2055 -- because it thinks it is dealing with an identifier instead
2056 -- of the corresponding keyword.
2058 Token_Name := No_Name;
2059 return;
2061 -- It is an identifier after all
2063 else
2064 Token := Tok_Identifier;
2065 Post_Scan;
2066 return;
2067 end if;
2068 end Scan;
2070 ------------------------------
2071 -- Set_End_Of_Line_As_Token --
2072 ------------------------------
2074 procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2075 begin
2076 End_Of_Line_Is_Token := Value;
2077 end Set_End_Of_Line_As_Token;
2079 ---------------------------
2080 -- Set_Special_Character --
2081 ---------------------------
2083 procedure Set_Special_Character (C : Character) is
2084 begin
2085 case C is
2086 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2087 Special_Characters (C) := True;
2089 when others =>
2090 null;
2091 end case;
2092 end Set_Special_Character;
2094 ----------------------
2095 -- Set_Start_Column --
2096 ----------------------
2098 -- Note: it seems at first glance a little expensive to compute this value
2099 -- for every source line (since it is certainly not used for all source
2100 -- lines). On the other hand, it doesn't take much more work to skip past
2101 -- the initial white space on the line counting the columns than it would
2102 -- to scan past the white space using the standard scanning circuits.
2104 function Set_Start_Column return Column_Number is
2105 Start_Column : Column_Number := 0;
2107 begin
2108 -- Outer loop scans past horizontal tab characters
2110 Tabs_Loop : loop
2112 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2113 -- past the blanks and adjusting Start_Column to account for them.
2115 Blanks_Loop : loop
2116 if Source (Scan_Ptr) = ' ' then
2117 if Source (Scan_Ptr + 1) = ' ' then
2118 if Source (Scan_Ptr + 2) = ' ' then
2119 if Source (Scan_Ptr + 3) = ' ' then
2120 if Source (Scan_Ptr + 4) = ' ' then
2121 if Source (Scan_Ptr + 5) = ' ' then
2122 if Source (Scan_Ptr + 6) = ' ' then
2123 Scan_Ptr := Scan_Ptr + 7;
2124 Start_Column := Start_Column + 7;
2125 else
2126 Scan_Ptr := Scan_Ptr + 6;
2127 Start_Column := Start_Column + 6;
2128 exit Blanks_Loop;
2129 end if;
2130 else
2131 Scan_Ptr := Scan_Ptr + 5;
2132 Start_Column := Start_Column + 5;
2133 exit Blanks_Loop;
2134 end if;
2135 else
2136 Scan_Ptr := Scan_Ptr + 4;
2137 Start_Column := Start_Column + 4;
2138 exit Blanks_Loop;
2139 end if;
2140 else
2141 Scan_Ptr := Scan_Ptr + 3;
2142 Start_Column := Start_Column + 3;
2143 exit Blanks_Loop;
2144 end if;
2145 else
2146 Scan_Ptr := Scan_Ptr + 2;
2147 Start_Column := Start_Column + 2;
2148 exit Blanks_Loop;
2149 end if;
2150 else
2151 Scan_Ptr := Scan_Ptr + 1;
2152 Start_Column := Start_Column + 1;
2153 exit Blanks_Loop;
2154 end if;
2155 else
2156 exit Blanks_Loop;
2157 end if;
2158 end loop Blanks_Loop;
2160 -- Outer loop keeps going only if a horizontal tab follows
2162 if Source (Scan_Ptr) = HT then
2163 if Style_Check then Style.Check_HT; end if;
2164 Scan_Ptr := Scan_Ptr + 1;
2165 Start_Column := (Start_Column / 8) * 8 + 8;
2166 else
2167 exit Tabs_Loop;
2168 end if;
2170 end loop Tabs_Loop;
2172 return Start_Column;
2173 end Set_Start_Column;
2175 end Scng;