2003-12-26 Guilhem Lavaux <guilhem@kaffe.org>
[official-gcc.git] / gcc / ada / scng.adb
blobcb46bf189ee90138f42a5218de764dcbfeb73df7
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S C N G --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1992-2003 Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
21 -- --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
24 -- --
25 ------------------------------------------------------------------------------
27 with Csets; use Csets;
28 with Err_Vars; use Err_Vars;
29 with Hostparm; use Hostparm;
30 with Namet; use Namet;
31 with Opt; use Opt;
32 with Scans; use Scans;
33 with Sinput; use Sinput;
34 with Snames; use Snames;
35 with Stringt; use Stringt;
36 with Stylesw; use Stylesw;
37 with Uintp; use Uintp;
38 with Urealp; use Urealp;
39 with Widechar; use Widechar;
41 with System.CRC32;
42 with System.WCh_Con; use System.WCh_Con;
44 package body Scng is
46 use ASCII;
47 -- Make control characters visible
49 Special_Characters : array (Character) of Boolean := (others => False);
50 -- For characters that are Special token, the value is True
52 Comment_Is_Token : Boolean := False;
53 -- True if comments are tokens
55 End_Of_Line_Is_Token : Boolean := False;
56 -- True if End_Of_Line is a token
58 -----------------------
59 -- Local Subprograms --
60 -----------------------
62 procedure Accumulate_Checksum (C : Character);
63 pragma Inline (Accumulate_Checksum);
64 -- This routine accumulates the checksum given character C. During the
65 -- scanning of a source file, this routine is called with every character
66 -- in the source, excluding blanks, and all control characters (except
67 -- that ESC is included in the checksum). Upper case letters not in string
68 -- literals are folded by the caller. See Sinput spec for the documentation
69 -- of the checksum algorithm. Note: checksum values are only used if we
70 -- generate code, so it is not necessary to worry about making the right
71 -- sequence of calls in any error situation.
73 procedure Accumulate_Checksum (C : Char_Code);
74 pragma Inline (Accumulate_Checksum);
75 -- This version is identical, except that the argument, C, is a character
76 -- code value instead of a character. This is used when wide characters
77 -- are scanned. We use the character code rather than the ASCII characters
78 -- so that the checksum is independent of wide character encoding method.
80 procedure Initialize_Checksum;
81 pragma Inline (Initialize_Checksum);
82 -- Initialize checksum value
84 -------------------------
85 -- Accumulate_Checksum --
86 -------------------------
88 procedure Accumulate_Checksum (C : Character) is
89 begin
90 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
91 end Accumulate_Checksum;
93 procedure Accumulate_Checksum (C : Char_Code) is
94 begin
95 Accumulate_Checksum (Character'Val (C / 256));
96 Accumulate_Checksum (Character'Val (C mod 256));
97 end Accumulate_Checksum;
99 ----------------------------
100 -- Determine_Token_Casing --
101 ----------------------------
103 function Determine_Token_Casing return Casing_Type is
104 begin
105 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
106 end Determine_Token_Casing;
108 -------------------------
109 -- Initialize_Checksum --
110 -------------------------
112 procedure Initialize_Checksum is
113 begin
114 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
115 end Initialize_Checksum;
117 ------------------------
118 -- Initialize_Scanner --
119 ------------------------
121 procedure Initialize_Scanner
122 (Unit : Unit_Number_Type;
123 Index : Source_File_Index)
125 begin
126 -- Set up Token_Type values in Names Table entries for reserved keywords
127 -- We use the Pos value of the Token_Type value. Note we are relying on
128 -- the fact that Token_Type'Val (0) is not a reserved word!
130 Set_Name_Table_Byte (Name_Abort, Token_Type'Pos (Tok_Abort));
131 Set_Name_Table_Byte (Name_Abs, Token_Type'Pos (Tok_Abs));
132 Set_Name_Table_Byte (Name_Abstract, Token_Type'Pos (Tok_Abstract));
133 Set_Name_Table_Byte (Name_Accept, Token_Type'Pos (Tok_Accept));
134 Set_Name_Table_Byte (Name_Access, Token_Type'Pos (Tok_Access));
135 Set_Name_Table_Byte (Name_And, Token_Type'Pos (Tok_And));
136 Set_Name_Table_Byte (Name_Aliased, Token_Type'Pos (Tok_Aliased));
137 Set_Name_Table_Byte (Name_All, Token_Type'Pos (Tok_All));
138 Set_Name_Table_Byte (Name_Array, Token_Type'Pos (Tok_Array));
139 Set_Name_Table_Byte (Name_At, Token_Type'Pos (Tok_At));
140 Set_Name_Table_Byte (Name_Begin, Token_Type'Pos (Tok_Begin));
141 Set_Name_Table_Byte (Name_Body, Token_Type'Pos (Tok_Body));
142 Set_Name_Table_Byte (Name_Case, Token_Type'Pos (Tok_Case));
143 Set_Name_Table_Byte (Name_Constant, Token_Type'Pos (Tok_Constant));
144 Set_Name_Table_Byte (Name_Declare, Token_Type'Pos (Tok_Declare));
145 Set_Name_Table_Byte (Name_Delay, Token_Type'Pos (Tok_Delay));
146 Set_Name_Table_Byte (Name_Delta, Token_Type'Pos (Tok_Delta));
147 Set_Name_Table_Byte (Name_Digits, Token_Type'Pos (Tok_Digits));
148 Set_Name_Table_Byte (Name_Do, Token_Type'Pos (Tok_Do));
149 Set_Name_Table_Byte (Name_Else, Token_Type'Pos (Tok_Else));
150 Set_Name_Table_Byte (Name_Elsif, Token_Type'Pos (Tok_Elsif));
151 Set_Name_Table_Byte (Name_End, Token_Type'Pos (Tok_End));
152 Set_Name_Table_Byte (Name_Entry, Token_Type'Pos (Tok_Entry));
153 Set_Name_Table_Byte (Name_Exception, Token_Type'Pos (Tok_Exception));
154 Set_Name_Table_Byte (Name_Exit, Token_Type'Pos (Tok_Exit));
155 Set_Name_Table_Byte (Name_For, Token_Type'Pos (Tok_For));
156 Set_Name_Table_Byte (Name_Function, Token_Type'Pos (Tok_Function));
157 Set_Name_Table_Byte (Name_Generic, Token_Type'Pos (Tok_Generic));
158 Set_Name_Table_Byte (Name_Goto, Token_Type'Pos (Tok_Goto));
159 Set_Name_Table_Byte (Name_If, Token_Type'Pos (Tok_If));
160 Set_Name_Table_Byte (Name_In, Token_Type'Pos (Tok_In));
161 Set_Name_Table_Byte (Name_Is, Token_Type'Pos (Tok_Is));
162 Set_Name_Table_Byte (Name_Limited, Token_Type'Pos (Tok_Limited));
163 Set_Name_Table_Byte (Name_Loop, Token_Type'Pos (Tok_Loop));
164 Set_Name_Table_Byte (Name_Mod, Token_Type'Pos (Tok_Mod));
165 Set_Name_Table_Byte (Name_New, Token_Type'Pos (Tok_New));
166 Set_Name_Table_Byte (Name_Not, Token_Type'Pos (Tok_Not));
167 Set_Name_Table_Byte (Name_Null, Token_Type'Pos (Tok_Null));
168 Set_Name_Table_Byte (Name_Of, Token_Type'Pos (Tok_Of));
169 Set_Name_Table_Byte (Name_Or, Token_Type'Pos (Tok_Or));
170 Set_Name_Table_Byte (Name_Others, Token_Type'Pos (Tok_Others));
171 Set_Name_Table_Byte (Name_Out, Token_Type'Pos (Tok_Out));
172 Set_Name_Table_Byte (Name_Package, Token_Type'Pos (Tok_Package));
173 Set_Name_Table_Byte (Name_Pragma, Token_Type'Pos (Tok_Pragma));
174 Set_Name_Table_Byte (Name_Private, Token_Type'Pos (Tok_Private));
175 Set_Name_Table_Byte (Name_Procedure, Token_Type'Pos (Tok_Procedure));
176 Set_Name_Table_Byte (Name_Protected, Token_Type'Pos (Tok_Protected));
177 Set_Name_Table_Byte (Name_Raise, Token_Type'Pos (Tok_Raise));
178 Set_Name_Table_Byte (Name_Range, Token_Type'Pos (Tok_Range));
179 Set_Name_Table_Byte (Name_Record, Token_Type'Pos (Tok_Record));
180 Set_Name_Table_Byte (Name_Rem, Token_Type'Pos (Tok_Rem));
181 Set_Name_Table_Byte (Name_Renames, Token_Type'Pos (Tok_Renames));
182 Set_Name_Table_Byte (Name_Requeue, Token_Type'Pos (Tok_Requeue));
183 Set_Name_Table_Byte (Name_Return, Token_Type'Pos (Tok_Return));
184 Set_Name_Table_Byte (Name_Reverse, Token_Type'Pos (Tok_Reverse));
185 Set_Name_Table_Byte (Name_Select, Token_Type'Pos (Tok_Select));
186 Set_Name_Table_Byte (Name_Separate, Token_Type'Pos (Tok_Separate));
187 Set_Name_Table_Byte (Name_Subtype, Token_Type'Pos (Tok_Subtype));
188 Set_Name_Table_Byte (Name_Tagged, Token_Type'Pos (Tok_Tagged));
189 Set_Name_Table_Byte (Name_Task, Token_Type'Pos (Tok_Task));
190 Set_Name_Table_Byte (Name_Terminate, Token_Type'Pos (Tok_Terminate));
191 Set_Name_Table_Byte (Name_Then, Token_Type'Pos (Tok_Then));
192 Set_Name_Table_Byte (Name_Type, Token_Type'Pos (Tok_Type));
193 Set_Name_Table_Byte (Name_Until, Token_Type'Pos (Tok_Until));
194 Set_Name_Table_Byte (Name_Use, Token_Type'Pos (Tok_Use));
195 Set_Name_Table_Byte (Name_When, Token_Type'Pos (Tok_When));
196 Set_Name_Table_Byte (Name_While, Token_Type'Pos (Tok_While));
197 Set_Name_Table_Byte (Name_With, Token_Type'Pos (Tok_With));
198 Set_Name_Table_Byte (Name_Xor, Token_Type'Pos (Tok_Xor));
200 -- Initialize scan control variables
202 Current_Source_File := Index;
203 Source := Source_Text (Current_Source_File);
204 Current_Source_Unit := Unit;
205 Scan_Ptr := Source_First (Current_Source_File);
206 Token := No_Token;
207 Token_Ptr := Scan_Ptr;
208 Current_Line_Start := Scan_Ptr;
209 Token_Node := Empty;
210 Token_Name := No_Name;
211 Start_Column := Set_Start_Column;
212 First_Non_Blank_Location := Scan_Ptr;
214 Initialize_Checksum;
216 -- Do not call Scan, otherwise the License stuff does not work in Scn.
218 end Initialize_Scanner;
220 ------------------------------
221 -- Reset_Special_Characters --
222 ------------------------------
224 procedure Reset_Special_Characters is
225 begin
226 Special_Characters := (others => False);
227 end Reset_Special_Characters;
229 ----------
230 -- Scan --
231 ----------
233 procedure Scan is
235 Start_Of_Comment : Source_Ptr;
237 procedure Check_End_Of_Line;
238 -- Called when end of line encountered. Checks that line is not
239 -- too long, and that other style checks for the end of line are met.
241 function Double_Char_Token (C : Character) return Boolean;
242 -- This function is used for double character tokens like := or <>. It
243 -- checks if the character following Source (Scan_Ptr) is C, and if so
244 -- bumps Scan_Ptr past the pair of characters and returns True. A space
245 -- between the two characters is also recognized with an appropriate
246 -- error message being issued. If C is not present, False is returned.
247 -- Note that Double_Char_Token can only be used for tokens defined in
248 -- the Ada syntax (it's use for error cases like && is not appropriate
249 -- since we do not want a junk message for a case like &-space-&).
251 procedure Error_Illegal_Character;
252 -- Give illegal character error, Scan_Ptr points to character.
253 -- On return, Scan_Ptr is bumped past the illegal character.
255 procedure Error_Illegal_Wide_Character;
256 -- Give illegal wide character message. On return, Scan_Ptr is bumped
257 -- past the illegal character, which may still leave us pointing to
258 -- junk, not much we can do if the escape sequence is messed up!
260 procedure Error_Long_Line;
261 -- Signal error of excessively long line
263 procedure Error_No_Double_Underline;
264 -- Signal error of double underline character
266 procedure Nlit;
267 -- This is the procedure for scanning out numeric literals. On entry,
268 -- Scan_Ptr points to the digit that starts the numeric literal (the
269 -- checksum for this character has not been accumulated yet). On return
270 -- Scan_Ptr points past the last character of the numeric literal, Token
271 -- and Token_Node are set appropriately, and the checksum is updated.
273 procedure Slit;
274 -- This is the procedure for scanning out string literals. On entry,
275 -- Scan_Ptr points to the opening string quote (the checksum for this
276 -- character has not been accumulated yet). On return Scan_Ptr points
277 -- past the closing quote of the string literal, Token and Token_Node
278 -- are set appropriately, and the checksum is upated.
280 -----------------------
281 -- Check_End_Of_Line --
282 -----------------------
284 procedure Check_End_Of_Line is
285 Len : constant Int := Int (Scan_Ptr) - Int (Current_Line_Start);
287 begin
288 if Style_Check and Style_Check_Max_Line_Length then
289 Style.Check_Line_Terminator (Len);
291 elsif Len > Hostparm.Max_Line_Length then
292 Error_Long_Line;
293 end if;
294 end Check_End_Of_Line;
296 -----------------------
297 -- Double_Char_Token --
298 -----------------------
300 function Double_Char_Token (C : Character) return Boolean is
301 begin
302 if Source (Scan_Ptr + 1) = C then
303 Accumulate_Checksum (C);
304 Scan_Ptr := Scan_Ptr + 2;
305 return True;
307 elsif Source (Scan_Ptr + 1) = ' '
308 and then Source (Scan_Ptr + 2) = C
309 then
310 Scan_Ptr := Scan_Ptr + 1;
311 Error_Msg_S ("no space allowed here");
312 Scan_Ptr := Scan_Ptr + 2;
313 return True;
315 else
316 return False;
317 end if;
318 end Double_Char_Token;
320 -----------------------------
321 -- Error_Illegal_Character --
322 -----------------------------
324 procedure Error_Illegal_Character is
325 begin
326 Error_Msg_S ("illegal character");
327 Scan_Ptr := Scan_Ptr + 1;
328 end Error_Illegal_Character;
330 ----------------------------------
331 -- Error_Illegal_Wide_Character --
332 ----------------------------------
334 procedure Error_Illegal_Wide_Character is
335 begin
336 if OpenVMS then
337 Error_Msg_S
338 ("illegal wide character, check " &
339 "'/'W'I'D'E'_'C'H'A'R'A'C'T'E'R'_'E'N'C'O'D'I'N'G qualifier");
340 else
341 Error_Msg_S
342 ("illegal wide character, check -gnatW switch");
343 end if;
345 Scan_Ptr := Scan_Ptr + 1;
346 end Error_Illegal_Wide_Character;
348 ---------------------
349 -- Error_Long_Line --
350 ---------------------
352 procedure Error_Long_Line is
353 begin
354 Error_Msg
355 ("this line is too long",
356 Current_Line_Start + Hostparm.Max_Line_Length);
357 end Error_Long_Line;
359 -------------------------------
360 -- Error_No_Double_Underline --
361 -------------------------------
363 procedure Error_No_Double_Underline is
364 begin
365 Error_Msg_S ("two consecutive underlines not permitted");
366 end Error_No_Double_Underline;
368 ----------
369 -- Nlit --
370 ----------
372 procedure Nlit is
374 C : Character;
375 -- Current source program character
377 Base_Char : Character;
378 -- Either # or : (character at start of based number)
380 Base : Int;
381 -- Value of base
383 UI_Base : Uint;
384 -- Value of base in Uint format
386 UI_Int_Value : Uint;
387 -- Value of integer scanned by Scan_Integer in Uint format
389 UI_Num_Value : Uint;
390 -- Value of integer in numeric value being scanned
392 Scale : Int;
393 -- Scale value for real literal
395 UI_Scale : Uint;
396 -- Scale in Uint format
398 Exponent_Is_Negative : Boolean;
399 -- Set true for negative exponent
401 Extended_Digit_Value : Int;
402 -- Extended digit value
404 Point_Scanned : Boolean;
405 -- Flag for decimal point scanned in numeric literal
407 -----------------------
408 -- Local Subprograms --
409 -----------------------
411 procedure Error_Digit_Expected;
412 -- Signal error of bad digit, Scan_Ptr points to the location at
413 -- which the digit was expected on input, and is unchanged on return.
415 procedure Scan_Integer;
416 -- Procedure to scan integer literal. On entry, Scan_Ptr points to
417 -- a digit, on exit Scan_Ptr points past the last character of
418 -- the integer.
419 -- For each digit encountered, UI_Int_Value is multiplied by 10,
420 -- and the value of the digit added to the result. In addition,
421 -- the value in Scale is decremented by one for each actual digit
422 -- scanned.
424 --------------------------
425 -- Error_Digit_Expected --
426 --------------------------
428 procedure Error_Digit_Expected is
429 begin
430 Error_Msg_S ("digit expected");
431 end Error_Digit_Expected;
433 -------------------
434 -- Scan_Integer --
435 -------------------
437 procedure Scan_Integer is
438 C : Character;
439 -- Next character scanned
441 begin
442 C := Source (Scan_Ptr);
444 -- Loop through digits (allowing underlines)
446 loop
447 Accumulate_Checksum (C);
448 UI_Int_Value :=
449 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
450 Scan_Ptr := Scan_Ptr + 1;
451 Scale := Scale - 1;
452 C := Source (Scan_Ptr);
454 if C = '_' then
455 Accumulate_Checksum ('_');
457 loop
458 Scan_Ptr := Scan_Ptr + 1;
459 C := Source (Scan_Ptr);
460 exit when C /= '_';
461 Error_No_Double_Underline;
462 end loop;
464 if C not in '0' .. '9' then
465 Error_Digit_Expected;
466 exit;
467 end if;
469 else
470 exit when C not in '0' .. '9';
471 end if;
472 end loop;
474 end Scan_Integer;
476 ----------------------------------
477 -- Start of Processing for Nlit --
478 ----------------------------------
480 begin
481 Base := 10;
482 UI_Base := Uint_10;
483 UI_Int_Value := Uint_0;
484 Scale := 0;
485 Scan_Integer;
486 Scale := 0;
487 Point_Scanned := False;
488 UI_Num_Value := UI_Int_Value;
490 -- Various possibilities now for continuing the literal are
491 -- period, E/e (for exponent), or :/# (for based literal).
493 Scale := 0;
494 C := Source (Scan_Ptr);
496 if C = '.' then
498 -- Scan out point, but do not scan past .. which is a range
499 -- sequence, and must not be eaten up scanning a numeric literal.
501 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
502 Accumulate_Checksum ('.');
504 if Point_Scanned then
505 Error_Msg_S ("duplicate point ignored");
506 end if;
508 Point_Scanned := True;
509 Scan_Ptr := Scan_Ptr + 1;
510 C := Source (Scan_Ptr);
512 if C not in '0' .. '9' then
513 Error_Msg
514 ("real literal cannot end with point", Scan_Ptr - 1);
515 else
516 Scan_Integer;
517 UI_Num_Value := UI_Int_Value;
518 end if;
519 end loop;
521 -- Based literal case. The base is the value we already scanned.
522 -- In the case of colon, we insist that the following character
523 -- is indeed an extended digit or a period. This catches a number
524 -- of common errors, as well as catching the well known tricky
525 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
527 elsif C = '#'
528 or else (C = ':' and then
529 (Source (Scan_Ptr + 1) = '.'
530 or else
531 Source (Scan_Ptr + 1) in '0' .. '9'
532 or else
533 Source (Scan_Ptr + 1) in 'A' .. 'Z'
534 or else
535 Source (Scan_Ptr + 1) in 'a' .. 'z'))
536 then
537 if C = ':' and then Warn_On_Obsolescent_Feature then
538 Error_Msg_S
539 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
540 Error_Msg_S
541 ("\use ""'#"" instead?");
542 end if;
544 Accumulate_Checksum (C);
545 Base_Char := C;
546 UI_Base := UI_Int_Value;
548 if UI_Base < 2 or else UI_Base > 16 then
549 Error_Msg_SC ("base not 2-16");
550 UI_Base := Uint_16;
551 end if;
553 Base := UI_To_Int (UI_Base);
554 Scan_Ptr := Scan_Ptr + 1;
556 -- Scan out extended integer [. integer]
558 C := Source (Scan_Ptr);
559 UI_Int_Value := Uint_0;
560 Scale := 0;
562 loop
563 if C in '0' .. '9' then
564 Accumulate_Checksum (C);
565 Extended_Digit_Value :=
566 Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
568 elsif C in 'A' .. 'F' then
569 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
570 Extended_Digit_Value :=
571 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
573 elsif C in 'a' .. 'f' then
574 Accumulate_Checksum (C);
575 Extended_Digit_Value :=
576 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
578 else
579 Error_Msg_S ("extended digit expected");
580 exit;
581 end if;
583 if Extended_Digit_Value >= Base then
584 Error_Msg_S ("digit '>= base");
585 end if;
587 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
588 Scale := Scale - 1;
589 Scan_Ptr := Scan_Ptr + 1;
590 C := Source (Scan_Ptr);
592 if C = '_' then
593 loop
594 Accumulate_Checksum ('_');
595 Scan_Ptr := Scan_Ptr + 1;
596 C := Source (Scan_Ptr);
597 exit when C /= '_';
598 Error_No_Double_Underline;
599 end loop;
601 elsif C = '.' then
602 Accumulate_Checksum ('.');
604 if Point_Scanned then
605 Error_Msg_S ("duplicate point ignored");
606 end if;
608 Scan_Ptr := Scan_Ptr + 1;
609 C := Source (Scan_Ptr);
610 Point_Scanned := True;
611 Scale := 0;
613 elsif C = Base_Char then
614 Accumulate_Checksum (C);
615 Scan_Ptr := Scan_Ptr + 1;
616 exit;
618 elsif C = '#' or else C = ':' then
619 Error_Msg_S ("based number delimiters must match");
620 Scan_Ptr := Scan_Ptr + 1;
621 exit;
623 elsif not Identifier_Char (C) then
624 if Base_Char = '#' then
625 Error_Msg_S ("missing '#");
626 else
627 Error_Msg_S ("missing ':");
628 end if;
630 exit;
631 end if;
633 end loop;
635 UI_Num_Value := UI_Int_Value;
636 end if;
638 -- Scan out exponent
640 if not Point_Scanned then
641 Scale := 0;
642 UI_Scale := Uint_0;
643 else
644 UI_Scale := UI_From_Int (Scale);
645 end if;
647 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
648 Accumulate_Checksum ('e');
649 Scan_Ptr := Scan_Ptr + 1;
650 Exponent_Is_Negative := False;
652 if Source (Scan_Ptr) = '+' then
653 Accumulate_Checksum ('+');
654 Scan_Ptr := Scan_Ptr + 1;
656 elsif Source (Scan_Ptr) = '-' then
657 Accumulate_Checksum ('-');
659 if not Point_Scanned then
660 Error_Msg_S
661 ("negative exponent not allowed for integer literal");
662 else
663 Exponent_Is_Negative := True;
664 end if;
666 Scan_Ptr := Scan_Ptr + 1;
667 end if;
669 UI_Int_Value := Uint_0;
671 if Source (Scan_Ptr) in '0' .. '9' then
672 Scan_Integer;
673 else
674 Error_Digit_Expected;
675 end if;
677 if Exponent_Is_Negative then
678 UI_Scale := UI_Scale - UI_Int_Value;
679 else
680 UI_Scale := UI_Scale + UI_Int_Value;
681 end if;
682 end if;
684 -- Case of real literal to be returned
686 if Point_Scanned then
687 Token := Tok_Real_Literal;
688 Real_Literal_Value :=
689 UR_From_Components (
690 Num => UI_Num_Value,
691 Den => -UI_Scale,
692 Rbase => Base);
694 -- Case of integer literal to be returned
696 else
697 Token := Tok_Integer_Literal;
699 if UI_Scale = 0 then
700 Int_Literal_Value := UI_Num_Value;
702 -- Avoid doing possibly expensive calculations in cases like
703 -- parsing 163E800_000# when semantics will not be done anyway.
704 -- This is especially useful when parsing garbled input.
706 elsif Operating_Mode /= Check_Syntax
707 and then (Serious_Errors_Detected = 0 or else Try_Semantics)
708 then
709 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
711 else
712 Int_Literal_Value := No_Uint;
714 end if;
716 end if;
718 return;
720 end Nlit;
722 ----------
723 -- Slit --
724 ----------
726 procedure Slit is
728 Delimiter : Character;
729 -- Delimiter (first character of string)
731 C : Character;
732 -- Current source program character
734 Code : Char_Code;
735 -- Current character code value
737 Err : Boolean;
738 -- Error flag for Scan_Wide call
740 procedure Error_Bad_String_Char;
741 -- Signal bad character in string/character literal. On entry
742 -- Scan_Ptr points to the improper character encountered during
743 -- the scan. Scan_Ptr is not modified, so it still points to the bad
744 -- character on return.
746 procedure Error_Unterminated_String;
747 -- Procedure called if a line terminator character is encountered
748 -- during scanning a string, meaning that the string is not properly
749 -- terminated.
751 procedure Set_String;
752 -- Procedure used to distinguish between string and operator symbol.
753 -- On entry the string has been scanned out, and its characters start
754 -- at Token_Ptr and end one character before Scan_Ptr. On exit Token
755 -- is set to Tok_String_Literal or Tok_Operator_Symbol as
756 -- appropriate, and Token_Node is appropriately initialized.
757 -- In addition, in the operator symbol case, Token_Name is
758 -- appropriately set.
760 ---------------------------
761 -- Error_Bad_String_Char --
762 ---------------------------
764 procedure Error_Bad_String_Char is
765 C : constant Character := Source (Scan_Ptr);
767 begin
768 if C = HT then
769 Error_Msg_S ("horizontal tab not allowed in string");
771 elsif C = VT or else C = FF then
772 Error_Msg_S ("format effector not allowed in string");
774 elsif C in Upper_Half_Character then
775 Error_Msg_S ("(Ada 83) upper half character not allowed");
777 else
778 Error_Msg_S ("control character not allowed in string");
779 end if;
780 end Error_Bad_String_Char;
782 -------------------------------
783 -- Error_Unterminated_String --
784 -------------------------------
786 procedure Error_Unterminated_String is
787 begin
788 -- An interesting little refinement. Consider the following
789 -- examples:
791 -- A := "this is an unterminated string;
792 -- A := "this is an unterminated string &
793 -- P(A, "this is a parameter that didn't get terminated);
795 -- We fiddle a little to do slightly better placement in these
796 -- cases also if there is white space at the end of the line we
797 -- place the flag at the start of this white space, not at the
798 -- end. Note that we only have to test for blanks, since tabs
799 -- aren't allowed in strings in the first place and would have
800 -- caused an error message.
802 -- Two more cases that we treat specially are:
804 -- A := "this string uses the wrong terminator'
805 -- A := "this string uses the wrong terminator' &
807 -- In these cases we give a different error message as well
809 -- We actually reposition the scan pointer to the point where we
810 -- place the flag in these cases, since it seems a better bet on
811 -- the original intention.
813 while Source (Scan_Ptr - 1) = ' '
814 or else Source (Scan_Ptr - 1) = '&'
815 loop
816 Scan_Ptr := Scan_Ptr - 1;
817 Unstore_String_Char;
818 end loop;
820 -- Check for case of incorrect string terminator, but single quote
821 -- is not considered incorrect if the opening terminator misused
822 -- a single quote (error message already given).
824 if Delimiter /= '''
825 and then Source (Scan_Ptr - 1) = '''
826 then
827 Unstore_String_Char;
828 Error_Msg
829 ("incorrect string terminator character", Scan_Ptr - 1);
830 return;
831 end if;
833 if Source (Scan_Ptr - 1) = ';' then
834 Scan_Ptr := Scan_Ptr - 1;
835 Unstore_String_Char;
837 if Source (Scan_Ptr - 1) = ')' then
838 Scan_Ptr := Scan_Ptr - 1;
839 Unstore_String_Char;
840 end if;
841 end if;
843 Error_Msg_S ("missing string quote");
844 end Error_Unterminated_String;
846 ----------------
847 -- Set_String --
848 ----------------
850 procedure Set_String is
851 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
852 C1 : Character;
853 C2 : Character;
854 C3 : Character;
856 begin
857 -- Token_Name is currently set to Error_Name. The following
858 -- section of code resets Token_Name to the proper Name_Op_xx
859 -- value if the string is a valid operator symbol, otherwise it is
860 -- left set to Error_Name.
862 if Slen = 1 then
863 C1 := Source (Token_Ptr + 1);
865 case C1 is
866 when '=' =>
867 Token_Name := Name_Op_Eq;
869 when '>' =>
870 Token_Name := Name_Op_Gt;
872 when '<' =>
873 Token_Name := Name_Op_Lt;
875 when '+' =>
876 Token_Name := Name_Op_Add;
878 when '-' =>
879 Token_Name := Name_Op_Subtract;
881 when '&' =>
882 Token_Name := Name_Op_Concat;
884 when '*' =>
885 Token_Name := Name_Op_Multiply;
887 when '/' =>
888 Token_Name := Name_Op_Divide;
890 when others =>
891 null;
892 end case;
894 elsif Slen = 2 then
895 C1 := Source (Token_Ptr + 1);
896 C2 := Source (Token_Ptr + 2);
898 if C1 = '*' and then C2 = '*' then
899 Token_Name := Name_Op_Expon;
901 elsif C2 = '=' then
903 if C1 = '/' then
904 Token_Name := Name_Op_Ne;
905 elsif C1 = '<' then
906 Token_Name := Name_Op_Le;
907 elsif C1 = '>' then
908 Token_Name := Name_Op_Ge;
909 end if;
911 elsif (C1 = 'O' or else C1 = 'o') and then -- OR
912 (C2 = 'R' or else C2 = 'r')
913 then
914 Token_Name := Name_Op_Or;
915 end if;
917 elsif Slen = 3 then
918 C1 := Source (Token_Ptr + 1);
919 C2 := Source (Token_Ptr + 2);
920 C3 := Source (Token_Ptr + 3);
922 if (C1 = 'A' or else C1 = 'a') and then -- AND
923 (C2 = 'N' or else C2 = 'n') and then
924 (C3 = 'D' or else C3 = 'd')
925 then
926 Token_Name := Name_Op_And;
928 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
929 (C2 = 'B' or else C2 = 'b') and then
930 (C3 = 'S' or else C3 = 's')
931 then
932 Token_Name := Name_Op_Abs;
934 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
935 (C2 = 'O' or else C2 = 'o') and then
936 (C3 = 'D' or else C3 = 'd')
937 then
938 Token_Name := Name_Op_Mod;
940 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
941 (C2 = 'O' or else C2 = 'o') and then
942 (C3 = 'T' or else C3 = 't')
943 then
944 Token_Name := Name_Op_Not;
946 elsif (C1 = 'R' or else C1 = 'r') and then -- REM
947 (C2 = 'E' or else C2 = 'e') and then
948 (C3 = 'M' or else C3 = 'm')
949 then
950 Token_Name := Name_Op_Rem;
952 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
953 (C2 = 'O' or else C2 = 'o') and then
954 (C3 = 'R' or else C3 = 'r')
955 then
956 Token_Name := Name_Op_Xor;
957 end if;
959 end if;
961 -- If it is an operator symbol, then Token_Name is set.
962 -- If it is some other string value, then Token_Name still
963 -- contains Error_Name.
965 if Token_Name = Error_Name then
966 Token := Tok_String_Literal;
968 else
969 Token := Tok_Operator_Symbol;
970 end if;
972 end Set_String;
974 ----------
975 -- Slit --
976 ----------
978 begin
979 -- On entry, Scan_Ptr points to the opening character of the string
980 -- which is either a percent, double quote, or apostrophe
981 -- (single quote). The latter case is an error detected by
982 -- the character literal circuit.
984 Delimiter := Source (Scan_Ptr);
985 Accumulate_Checksum (Delimiter);
986 Start_String;
987 Scan_Ptr := Scan_Ptr + 1;
989 -- Loop to scan out characters of string literal
991 loop
992 C := Source (Scan_Ptr);
994 if C = Delimiter then
995 Accumulate_Checksum (C);
996 Scan_Ptr := Scan_Ptr + 1;
997 exit when Source (Scan_Ptr) /= Delimiter;
998 Code := Get_Char_Code (C);
999 Accumulate_Checksum (C);
1000 Scan_Ptr := Scan_Ptr + 1;
1002 else
1003 if C = '"' and then Delimiter = '%' then
1004 Error_Msg_S
1005 ("quote not allowed in percent delimited string");
1006 Code := Get_Char_Code (C);
1007 Scan_Ptr := Scan_Ptr + 1;
1009 elsif (C = ESC
1010 and then
1011 Wide_Character_Encoding_Method
1012 in WC_ESC_Encoding_Method)
1013 or else
1014 (C in Upper_Half_Character
1015 and then
1016 Upper_Half_Encoding)
1017 or else
1018 (C = '['
1019 and then
1020 Source (Scan_Ptr + 1) = '"'
1021 and then
1022 Identifier_Char (Source (Scan_Ptr + 2)))
1023 then
1024 Scan_Wide (Source, Scan_Ptr, Code, Err);
1025 Accumulate_Checksum (Code);
1027 if Err then
1028 Error_Illegal_Wide_Character;
1029 Code := Get_Char_Code (' ');
1030 end if;
1032 else
1033 Accumulate_Checksum (C);
1035 if C not in Graphic_Character then
1036 if C in Line_Terminator then
1037 Error_Unterminated_String;
1038 exit;
1040 elsif C in Upper_Half_Character then
1041 if Ada_83 then
1042 Error_Bad_String_Char;
1043 end if;
1045 else
1046 Error_Bad_String_Char;
1047 end if;
1048 end if;
1050 Code := Get_Char_Code (C);
1051 Scan_Ptr := Scan_Ptr + 1;
1052 end if;
1053 end if;
1055 Store_String_Char (Code);
1057 if not In_Character_Range (Code) then
1058 Wide_Character_Found := True;
1059 end if;
1060 end loop;
1062 String_Literal_Id := End_String;
1063 Set_String;
1064 return;
1066 end Slit;
1068 -- Start of body of Scan
1070 begin
1071 Prev_Token := Token;
1072 Prev_Token_Ptr := Token_Ptr;
1073 Token_Name := Error_Name;
1075 -- The following loop runs more than once only if a format effector
1076 -- (tab, vertical tab, form feed, line feed, carriage return) is
1077 -- encountered and skipped, or some error situation, such as an
1078 -- illegal character, is encountered.
1080 loop
1081 -- Skip past blanks, loop is opened up for speed
1083 while Source (Scan_Ptr) = ' ' loop
1085 if Source (Scan_Ptr + 1) /= ' ' then
1086 Scan_Ptr := Scan_Ptr + 1;
1087 exit;
1088 end if;
1090 if Source (Scan_Ptr + 2) /= ' ' then
1091 Scan_Ptr := Scan_Ptr + 2;
1092 exit;
1093 end if;
1095 if Source (Scan_Ptr + 3) /= ' ' then
1096 Scan_Ptr := Scan_Ptr + 3;
1097 exit;
1098 end if;
1100 if Source (Scan_Ptr + 4) /= ' ' then
1101 Scan_Ptr := Scan_Ptr + 4;
1102 exit;
1103 end if;
1105 if Source (Scan_Ptr + 5) /= ' ' then
1106 Scan_Ptr := Scan_Ptr + 5;
1107 exit;
1108 end if;
1110 if Source (Scan_Ptr + 6) /= ' ' then
1111 Scan_Ptr := Scan_Ptr + 6;
1112 exit;
1113 end if;
1115 if Source (Scan_Ptr + 7) /= ' ' then
1116 Scan_Ptr := Scan_Ptr + 7;
1117 exit;
1118 end if;
1120 Scan_Ptr := Scan_Ptr + 8;
1121 end loop;
1123 -- We are now at a non-blank character, which is the first character
1124 -- of the token we will scan, and hence the value of Token_Ptr.
1126 Token_Ptr := Scan_Ptr;
1128 -- Here begins the main case statement which transfers control on
1129 -- the basis of the non-blank character we have encountered.
1131 case Source (Scan_Ptr) is
1133 -- Line terminator characters
1135 when CR | LF | FF | VT => Line_Terminator_Case : begin
1137 -- Check line too long
1139 Check_End_Of_Line;
1141 -- Set Token_Ptr, if End_Of_Line is a token, for the case when
1142 -- it is a physical line.
1144 if End_Of_Line_Is_Token then
1145 Token_Ptr := Scan_Ptr;
1146 end if;
1148 declare
1149 Physical : Boolean;
1151 begin
1152 Skip_Line_Terminators (Scan_Ptr, Physical);
1154 -- If we are at start of physical line, update scan pointers
1155 -- to reflect the start of the new line.
1157 if Physical then
1158 Current_Line_Start := Scan_Ptr;
1159 Start_Column := Set_Start_Column;
1160 First_Non_Blank_Location := Scan_Ptr;
1162 -- If End_Of_Line is a token, we return it as it is
1163 -- a physical line.
1165 if End_Of_Line_Is_Token then
1166 Token := Tok_End_Of_Line;
1167 return;
1168 end if;
1169 end if;
1170 end;
1171 end Line_Terminator_Case;
1173 -- Horizontal tab, just skip past it
1175 when HT =>
1176 if Style_Check then Style.Check_HT; end if;
1177 Scan_Ptr := Scan_Ptr + 1;
1179 -- End of file character, treated as an end of file only if it
1180 -- is the last character in the buffer, otherwise it is ignored.
1182 when EOF =>
1183 if Scan_Ptr = Source_Last (Current_Source_File) then
1184 Check_End_Of_Line;
1185 Token := Tok_EOF;
1186 return;
1188 else
1189 Scan_Ptr := Scan_Ptr + 1;
1190 end if;
1192 -- Ampersand
1194 when '&' =>
1195 Accumulate_Checksum ('&');
1197 if Source (Scan_Ptr + 1) = '&' then
1198 Error_Msg_S ("'&'& should be `AND THEN`");
1199 Scan_Ptr := Scan_Ptr + 2;
1200 Token := Tok_And;
1201 return;
1203 else
1204 Scan_Ptr := Scan_Ptr + 1;
1205 Token := Tok_Ampersand;
1206 return;
1207 end if;
1209 -- Asterisk (can be multiplication operator or double asterisk
1210 -- which is the exponentiation compound delimiter).
1212 when '*' =>
1213 Accumulate_Checksum ('*');
1215 if Source (Scan_Ptr + 1) = '*' then
1216 Accumulate_Checksum ('*');
1217 Scan_Ptr := Scan_Ptr + 2;
1218 Token := Tok_Double_Asterisk;
1219 return;
1221 else
1222 Scan_Ptr := Scan_Ptr + 1;
1223 Token := Tok_Asterisk;
1224 return;
1225 end if;
1227 -- Colon, which can either be an isolated colon, or part of an
1228 -- assignment compound delimiter.
1230 when ':' =>
1231 Accumulate_Checksum (':');
1233 if Double_Char_Token ('=') then
1234 Token := Tok_Colon_Equal;
1235 if Style_Check then Style.Check_Colon_Equal; end if;
1236 return;
1238 elsif Source (Scan_Ptr + 1) = '-'
1239 and then Source (Scan_Ptr + 2) /= '-'
1240 then
1241 Token := Tok_Colon_Equal;
1242 Error_Msg (":- should be :=", Scan_Ptr);
1243 Scan_Ptr := Scan_Ptr + 2;
1244 return;
1246 else
1247 Scan_Ptr := Scan_Ptr + 1;
1248 Token := Tok_Colon;
1249 if Style_Check then Style.Check_Colon; end if;
1250 return;
1251 end if;
1253 -- Left parenthesis
1255 when '(' =>
1256 Accumulate_Checksum ('(');
1257 Scan_Ptr := Scan_Ptr + 1;
1258 Token := Tok_Left_Paren;
1259 if Style_Check then Style.Check_Left_Paren; end if;
1260 return;
1262 -- Left bracket
1264 when '[' =>
1265 if Source (Scan_Ptr + 1) = '"' then
1266 Name_Len := 0;
1267 goto Scan_Identifier;
1269 else
1270 Error_Msg_S ("illegal character, replaced by ""(""");
1271 Scan_Ptr := Scan_Ptr + 1;
1272 Token := Tok_Left_Paren;
1273 return;
1274 end if;
1276 -- Left brace
1278 when '{' =>
1279 Error_Msg_S ("illegal character, replaced by ""(""");
1280 Scan_Ptr := Scan_Ptr + 1;
1281 Token := Tok_Left_Paren;
1282 return;
1284 -- Comma
1286 when ',' =>
1287 Accumulate_Checksum (',');
1288 Scan_Ptr := Scan_Ptr + 1;
1289 Token := Tok_Comma;
1290 if Style_Check then Style.Check_Comma; end if;
1291 return;
1293 -- Dot, which is either an isolated period, or part of a double
1294 -- dot compound delimiter sequence. We also check for the case of
1295 -- a digit following the period, to give a better error message.
1297 when '.' =>
1298 Accumulate_Checksum ('.');
1300 if Double_Char_Token ('.') then
1301 Token := Tok_Dot_Dot;
1302 if Style_Check then Style.Check_Dot_Dot; end if;
1303 return;
1305 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1306 Error_Msg_S ("numeric literal cannot start with point");
1307 Scan_Ptr := Scan_Ptr + 1;
1309 else
1310 Scan_Ptr := Scan_Ptr + 1;
1311 Token := Tok_Dot;
1312 return;
1313 end if;
1315 -- Equal, which can either be an equality operator, or part of the
1316 -- arrow (=>) compound delimiter.
1318 when '=' =>
1319 Accumulate_Checksum ('=');
1321 if Double_Char_Token ('>') then
1322 Token := Tok_Arrow;
1323 if Style_Check then Style.Check_Arrow; end if;
1324 return;
1326 elsif Source (Scan_Ptr + 1) = '=' then
1327 Error_Msg_S ("== should be =");
1328 Scan_Ptr := Scan_Ptr + 1;
1329 end if;
1331 Scan_Ptr := Scan_Ptr + 1;
1332 Token := Tok_Equal;
1333 return;
1335 -- Greater than, which can be a greater than operator, greater than
1336 -- or equal operator, or first character of a right label bracket.
1338 when '>' =>
1339 Accumulate_Checksum ('>');
1341 if Double_Char_Token ('=') then
1342 Token := Tok_Greater_Equal;
1343 return;
1345 elsif Double_Char_Token ('>') then
1346 Token := Tok_Greater_Greater;
1347 return;
1349 else
1350 Scan_Ptr := Scan_Ptr + 1;
1351 Token := Tok_Greater;
1352 return;
1353 end if;
1355 -- Less than, which can be a less than operator, less than or equal
1356 -- operator, or the first character of a left label bracket, or the
1357 -- first character of a box (<>) compound delimiter.
1359 when '<' =>
1360 Accumulate_Checksum ('<');
1362 if Double_Char_Token ('=') then
1363 Token := Tok_Less_Equal;
1364 return;
1366 elsif Double_Char_Token ('>') then
1367 Token := Tok_Box;
1368 if Style_Check then Style.Check_Box; end if;
1369 return;
1371 elsif Double_Char_Token ('<') then
1372 Token := Tok_Less_Less;
1373 return;
1375 else
1376 Scan_Ptr := Scan_Ptr + 1;
1377 Token := Tok_Less;
1378 return;
1379 end if;
1381 -- Minus, which is either a subtraction operator, or the first
1382 -- character of double minus starting a comment
1384 when '-' => Minus_Case : begin
1385 if Source (Scan_Ptr + 1) = '>' then
1386 Error_Msg_S ("invalid token");
1387 Scan_Ptr := Scan_Ptr + 2;
1388 Token := Tok_Arrow;
1389 return;
1391 elsif Source (Scan_Ptr + 1) /= '-' then
1392 Accumulate_Checksum ('-');
1393 Scan_Ptr := Scan_Ptr + 1;
1394 Token := Tok_Minus;
1395 return;
1397 -- Comment
1399 else -- Source (Scan_Ptr + 1) = '-' then
1400 if Style_Check then Style.Check_Comment; end if;
1401 Scan_Ptr := Scan_Ptr + 2;
1402 Start_Of_Comment := Scan_Ptr;
1404 -- Loop to scan comment (this loop runs more than once only if
1405 -- a horizontal tab or other non-graphic character is scanned)
1407 loop
1408 -- Scan to non graphic character (opened up for speed)
1410 loop
1411 exit when Source (Scan_Ptr) not in Graphic_Character;
1412 Scan_Ptr := Scan_Ptr + 1;
1413 exit when Source (Scan_Ptr) not in Graphic_Character;
1414 Scan_Ptr := Scan_Ptr + 1;
1415 exit when Source (Scan_Ptr) not in Graphic_Character;
1416 Scan_Ptr := Scan_Ptr + 1;
1417 exit when Source (Scan_Ptr) not in Graphic_Character;
1418 Scan_Ptr := Scan_Ptr + 1;
1419 exit when Source (Scan_Ptr) not in Graphic_Character;
1420 Scan_Ptr := Scan_Ptr + 1;
1421 end loop;
1423 -- Keep going if horizontal tab
1425 if Source (Scan_Ptr) = HT then
1426 if Style_Check then Style.Check_HT; end if;
1427 Scan_Ptr := Scan_Ptr + 1;
1429 -- Terminate scan of comment if line terminator
1431 elsif Source (Scan_Ptr) in Line_Terminator then
1432 exit;
1434 -- Terminate scan of comment if end of file encountered
1435 -- (embedded EOF character or real last character in file)
1437 elsif Source (Scan_Ptr) = EOF then
1438 exit;
1440 -- Keep going if character in 80-FF range, or is ESC. These
1441 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1442 -- They are allowed even in Ada 83 mode according to the
1443 -- approved AI. ESC was added to the AI in June 93.
1445 elsif Source (Scan_Ptr) in Upper_Half_Character
1446 or else Source (Scan_Ptr) = ESC
1447 then
1448 Scan_Ptr := Scan_Ptr + 1;
1450 -- Otherwise we have an illegal comment character
1452 else
1453 Error_Illegal_Character;
1454 end if;
1456 end loop;
1458 -- Note that, except when comments are tokens, we do NOT
1459 -- execute a return here, instead we fall through to reexecute
1460 -- the scan loop to look for a token.
1462 if Comment_Is_Token then
1463 Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1464 Name_Buffer (1 .. Name_Len) :=
1465 String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1466 Comment_Id := Name_Find;
1467 Token := Tok_Comment;
1468 return;
1469 end if;
1470 end if;
1471 end Minus_Case;
1473 -- Double quote starting a string literal
1475 when '"' =>
1476 Slit;
1477 Post_Scan;
1478 return;
1480 -- Percent starting a string literal
1482 when '%' =>
1483 if Warn_On_Obsolescent_Feature then
1484 Error_Msg_S
1485 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1486 Error_Msg_S
1487 ("\use """""" instead?");
1488 end if;
1490 Slit;
1491 Post_Scan;
1492 return;
1494 -- Apostrophe. This can either be the start of a character literal,
1495 -- or an isolated apostrophe used in a qualified expression or an
1496 -- attribute. We treat it as a character literal if it does not
1497 -- follow a right parenthesis, identifier, the keyword ALL or
1498 -- a literal. This means that we correctly treat constructs like:
1500 -- A := CHARACTER'('A');
1502 -- Note that RM-2.2(7) does not require a separator between
1503 -- "CHARACTER" and "'" in the above.
1505 when ''' => Char_Literal_Case : declare
1506 Code : Char_Code;
1507 Err : Boolean;
1509 begin
1510 Accumulate_Checksum (''');
1511 Scan_Ptr := Scan_Ptr + 1;
1513 -- Here is where we make the test to distinguish the cases. Treat
1514 -- as apostrophe if previous token is an identifier, right paren
1515 -- or the reserved word "all" (latter case as in A.all'Address)
1516 -- (or the reserved word "project" in project files).
1517 -- Also treat it as apostrophe after a literal (this catches
1518 -- some legitimate cases, like A."abs"'Address, and also gives
1519 -- better error behavior for impossible cases like 123'xxx).
1521 if Prev_Token = Tok_Identifier
1522 or else Prev_Token = Tok_Right_Paren
1523 or else Prev_Token = Tok_All
1524 or else Prev_Token = Tok_Project
1525 or else Prev_Token in Token_Class_Literal
1526 then
1527 Token := Tok_Apostrophe;
1528 if Style_Check then Style.Check_Apostrophe; end if;
1529 return;
1531 -- Otherwise the apostrophe starts a character literal
1533 else
1534 -- Case of wide character literal with ESC or [ encoding
1536 if (Source (Scan_Ptr) = ESC
1537 and then
1538 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1539 or else
1540 (Source (Scan_Ptr) in Upper_Half_Character
1541 and then
1542 Upper_Half_Encoding)
1543 or else
1544 (Source (Scan_Ptr) = '['
1545 and then
1546 Source (Scan_Ptr + 1) = '"')
1547 then
1548 Scan_Wide (Source, Scan_Ptr, Code, Err);
1549 Accumulate_Checksum (Code);
1551 if Err then
1552 Error_Illegal_Wide_Character;
1553 end if;
1555 if Source (Scan_Ptr) /= ''' then
1556 Error_Msg_S ("missing apostrophe");
1557 else
1558 Scan_Ptr := Scan_Ptr + 1;
1559 end if;
1561 -- If we do not find a closing quote in the expected place then
1562 -- assume that we have a misguided attempt at a string literal.
1564 -- However, if previous token is RANGE, then we return an
1565 -- apostrophe instead since this gives better error recovery
1567 elsif Source (Scan_Ptr + 1) /= ''' then
1569 if Prev_Token = Tok_Range then
1570 Token := Tok_Apostrophe;
1571 return;
1573 else
1574 Scan_Ptr := Scan_Ptr - 1;
1575 Error_Msg_S
1576 ("strings are delimited by double quote character");
1577 Slit;
1578 Post_Scan;
1579 return;
1580 end if;
1582 -- Otherwise we have a (non-wide) character literal
1584 else
1585 Accumulate_Checksum (Source (Scan_Ptr));
1587 if Source (Scan_Ptr) not in Graphic_Character then
1588 if Source (Scan_Ptr) in Upper_Half_Character then
1589 if Ada_83 then
1590 Error_Illegal_Character;
1591 end if;
1593 else
1594 Error_Illegal_Character;
1595 end if;
1596 end if;
1598 Code := Get_Char_Code (Source (Scan_Ptr));
1599 Scan_Ptr := Scan_Ptr + 2;
1600 end if;
1602 -- Fall through here with Scan_Ptr updated past the closing
1603 -- quote, and Code set to the Char_Code value for the literal
1605 Accumulate_Checksum (''');
1606 Token := Tok_Char_Literal;
1607 Set_Character_Literal_Name (Code);
1608 Token_Name := Name_Find;
1609 Character_Code := Code;
1610 Post_Scan;
1611 return;
1612 end if;
1613 end Char_Literal_Case;
1615 -- Right parenthesis
1617 when ')' =>
1618 Accumulate_Checksum (')');
1619 Scan_Ptr := Scan_Ptr + 1;
1620 Token := Tok_Right_Paren;
1621 if Style_Check then Style.Check_Right_Paren; end if;
1622 return;
1624 -- Right bracket or right brace, treated as right paren
1626 when ']' | '}' =>
1627 Error_Msg_S ("illegal character, replaced by "")""");
1628 Scan_Ptr := Scan_Ptr + 1;
1629 Token := Tok_Right_Paren;
1630 return;
1632 -- Slash (can be division operator or first character of not equal)
1634 when '/' =>
1635 Accumulate_Checksum ('/');
1637 if Double_Char_Token ('=') then
1638 Token := Tok_Not_Equal;
1639 return;
1640 else
1641 Scan_Ptr := Scan_Ptr + 1;
1642 Token := Tok_Slash;
1643 return;
1644 end if;
1646 -- Semicolon
1648 when ';' =>
1649 Accumulate_Checksum (';');
1650 Scan_Ptr := Scan_Ptr + 1;
1651 Token := Tok_Semicolon;
1652 if Style_Check then Style.Check_Semicolon; end if;
1653 return;
1655 -- Vertical bar
1657 when '|' => Vertical_Bar_Case : begin
1658 Accumulate_Checksum ('|');
1660 -- Special check for || to give nice message
1662 if Source (Scan_Ptr + 1) = '|' then
1663 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1664 Scan_Ptr := Scan_Ptr + 2;
1665 Token := Tok_Or;
1666 return;
1668 else
1669 Scan_Ptr := Scan_Ptr + 1;
1670 Token := Tok_Vertical_Bar;
1671 if Style_Check then Style.Check_Vertical_Bar; end if;
1672 return;
1673 end if;
1674 end Vertical_Bar_Case;
1676 -- Exclamation, replacement character for vertical bar
1678 when '!' => Exclamation_Case : begin
1679 Accumulate_Checksum ('!');
1681 if Warn_On_Obsolescent_Feature then
1682 Error_Msg_S
1683 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1684 Error_Msg_S
1685 ("\use ""'|"" instead?");
1686 end if;
1688 if Source (Scan_Ptr + 1) = '=' then
1689 Error_Msg_S ("'!= should be /=");
1690 Scan_Ptr := Scan_Ptr + 2;
1691 Token := Tok_Not_Equal;
1692 return;
1694 else
1695 Scan_Ptr := Scan_Ptr + 1;
1696 Token := Tok_Vertical_Bar;
1697 return;
1698 end if;
1700 end Exclamation_Case;
1702 -- Plus
1704 when '+' => Plus_Case : begin
1705 Accumulate_Checksum ('+');
1706 Scan_Ptr := Scan_Ptr + 1;
1707 Token := Tok_Plus;
1708 return;
1709 end Plus_Case;
1711 -- Digits starting a numeric literal
1713 when '0' .. '9' =>
1714 Nlit;
1716 if Identifier_Char (Source (Scan_Ptr)) then
1717 Error_Msg_S
1718 ("delimiter required between literal and identifier");
1719 end if;
1720 Post_Scan;
1721 return;
1723 -- Lower case letters
1725 when 'a' .. 'z' =>
1726 Name_Len := 1;
1727 Name_Buffer (1) := Source (Scan_Ptr);
1728 Accumulate_Checksum (Name_Buffer (1));
1729 Scan_Ptr := Scan_Ptr + 1;
1730 goto Scan_Identifier;
1732 -- Upper case letters
1734 when 'A' .. 'Z' =>
1735 Name_Len := 1;
1736 Name_Buffer (1) :=
1737 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1738 Accumulate_Checksum (Name_Buffer (1));
1739 Scan_Ptr := Scan_Ptr + 1;
1740 goto Scan_Identifier;
1742 -- Underline character
1744 when '_' =>
1745 if Special_Characters ('_') then
1746 Token_Ptr := Scan_Ptr;
1747 Scan_Ptr := Scan_Ptr + 1;
1748 Token := Tok_Special;
1749 Special_Character := '_';
1750 return;
1751 end if;
1753 Error_Msg_S ("identifier cannot start with underline");
1754 Name_Len := 1;
1755 Name_Buffer (1) := '_';
1756 Scan_Ptr := Scan_Ptr + 1;
1757 goto Scan_Identifier;
1759 -- Space (not possible, because we scanned past blanks)
1761 when ' ' =>
1762 raise Program_Error;
1764 -- Characters in top half of ASCII 8-bit chart
1766 when Upper_Half_Character =>
1768 -- Wide character case. Note that Scan_Identifier will issue
1769 -- an appropriate message if wide characters are not allowed
1770 -- in identifiers.
1772 if Upper_Half_Encoding then
1773 Name_Len := 0;
1774 goto Scan_Identifier;
1776 -- Otherwise we have OK Latin-1 character
1778 else
1779 -- Upper half characters may possibly be identifier letters
1780 -- but can never be digits, so Identifier_Char can be used
1781 -- to test for a valid start of identifier character.
1783 if Identifier_Char (Source (Scan_Ptr)) then
1784 Name_Len := 0;
1785 goto Scan_Identifier;
1786 else
1787 Error_Illegal_Character;
1788 end if;
1789 end if;
1791 when ESC =>
1793 -- ESC character, possible start of identifier if wide characters
1794 -- using ESC encoding are allowed in identifiers, which we can
1795 -- tell by looking at the Identifier_Char flag for ESC, which is
1796 -- only true if these conditions are met.
1798 if Identifier_Char (ESC) then
1799 Name_Len := 0;
1800 goto Scan_Identifier;
1801 else
1802 Error_Illegal_Wide_Character;
1803 end if;
1805 -- Invalid control characters
1807 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
1808 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1809 EM | FS | GS | RS | US | DEL
1811 Error_Illegal_Character;
1813 -- Invalid graphic characters
1815 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1816 -- If Set_Special_Character has been called for this character,
1817 -- set Scans.Special_Character and return a Special token.
1819 if Special_Characters (Source (Scan_Ptr)) then
1820 Token_Ptr := Scan_Ptr;
1821 Token := Tok_Special;
1822 Special_Character := Source (Scan_Ptr);
1823 Scan_Ptr := Scan_Ptr + 1;
1824 return;
1826 -- otherwise, this is an illegal character
1828 else
1829 Error_Illegal_Character;
1830 end if;
1832 -- End switch on non-blank character
1834 end case;
1836 -- End loop past format effectors. The exit from this loop is by
1837 -- executing a return statement following completion of token scan
1838 -- (control never falls out of this loop to the code which follows)
1840 end loop;
1842 -- Identifier scanning routine. On entry, some initial characters
1843 -- of the identifier may have already been stored in Name_Buffer.
1844 -- If so, Name_Len has the number of characters stored. otherwise
1845 -- Name_Len is set to zero on entry.
1847 <<Scan_Identifier>>
1849 -- This loop scans as fast as possible past lower half letters
1850 -- and digits, which we expect to be the most common characters.
1852 loop
1853 if Source (Scan_Ptr) in 'a' .. 'z'
1854 or else Source (Scan_Ptr) in '0' .. '9'
1855 then
1856 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
1857 Accumulate_Checksum (Source (Scan_Ptr));
1859 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
1860 Name_Buffer (Name_Len + 1) :=
1861 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1862 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
1863 else
1864 exit;
1865 end if;
1867 -- Open out the loop a couple of times for speed
1869 if Source (Scan_Ptr + 1) in 'a' .. 'z'
1870 or else Source (Scan_Ptr + 1) in '0' .. '9'
1871 then
1872 Name_Buffer (Name_Len + 2) := Source (Scan_Ptr + 1);
1873 Accumulate_Checksum (Source (Scan_Ptr + 1));
1875 elsif Source (Scan_Ptr + 1) in 'A' .. 'Z' then
1876 Name_Buffer (Name_Len + 2) :=
1877 Character'Val (Character'Pos (Source (Scan_Ptr + 1)) + 32);
1878 Accumulate_Checksum (Name_Buffer (Name_Len + 2));
1880 else
1881 Scan_Ptr := Scan_Ptr + 1;
1882 Name_Len := Name_Len + 1;
1883 exit;
1884 end if;
1886 if Source (Scan_Ptr + 2) in 'a' .. 'z'
1887 or else Source (Scan_Ptr + 2) in '0' .. '9'
1888 then
1889 Name_Buffer (Name_Len + 3) := Source (Scan_Ptr + 2);
1890 Accumulate_Checksum (Source (Scan_Ptr + 2));
1892 elsif Source (Scan_Ptr + 2) in 'A' .. 'Z' then
1893 Name_Buffer (Name_Len + 3) :=
1894 Character'Val (Character'Pos (Source (Scan_Ptr + 2)) + 32);
1895 Accumulate_Checksum (Name_Buffer (Name_Len + 3));
1896 else
1897 Scan_Ptr := Scan_Ptr + 2;
1898 Name_Len := Name_Len + 2;
1899 exit;
1900 end if;
1902 if Source (Scan_Ptr + 3) in 'a' .. 'z'
1903 or else Source (Scan_Ptr + 3) in '0' .. '9'
1904 then
1905 Name_Buffer (Name_Len + 4) := Source (Scan_Ptr + 3);
1906 Accumulate_Checksum (Source (Scan_Ptr + 3));
1908 elsif Source (Scan_Ptr + 3) in 'A' .. 'Z' then
1909 Name_Buffer (Name_Len + 4) :=
1910 Character'Val (Character'Pos (Source (Scan_Ptr + 3)) + 32);
1911 Accumulate_Checksum (Name_Buffer (Name_Len + 4));
1913 else
1914 Scan_Ptr := Scan_Ptr + 3;
1915 Name_Len := Name_Len + 3;
1916 exit;
1917 end if;
1919 Scan_Ptr := Scan_Ptr + 4;
1920 Name_Len := Name_Len + 4;
1921 end loop;
1923 -- If we fall through, then we have encountered either an underline
1924 -- character, or an extended identifier character (i.e. one from the
1925 -- upper half), or a wide character, or an identifier terminator.
1926 -- The initial test speeds us up in the most common case where we
1927 -- have an identifier terminator. Note that ESC is an identifier
1928 -- character only if a wide character encoding method that uses
1929 -- ESC encoding is active, so if we find an ESC character we know
1930 -- that we have a wide character.
1932 if Identifier_Char (Source (Scan_Ptr)) then
1934 -- Case of underline
1936 if Source (Scan_Ptr) = '_' then
1937 Accumulate_Checksum ('_');
1939 -- Check error case of identifier ending with underscore
1940 -- In this case we ignore the underscore and do not store it.
1942 if not Identifier_Char (Source (Scan_Ptr + 1)) then
1943 Error_Msg_S ("identifier cannot end with underline");
1944 Scan_Ptr := Scan_Ptr + 1;
1946 -- Check error case of two underscores. In this case we do
1947 -- not store the first underscore (we will store the second)
1949 elsif Source (Scan_Ptr + 1) = '_' then
1950 Error_No_Double_Underline;
1952 -- Normal case of legal underscore
1954 else
1955 Name_Len := Name_Len + 1;
1956 Name_Buffer (Name_Len) := '_';
1957 end if;
1959 Scan_Ptr := Scan_Ptr + 1;
1960 goto Scan_Identifier;
1962 -- Upper half character
1964 elsif Source (Scan_Ptr) in Upper_Half_Character
1965 and then not Upper_Half_Encoding
1966 then
1967 Accumulate_Checksum (Source (Scan_Ptr));
1968 Store_Encoded_Character
1969 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
1970 Scan_Ptr := Scan_Ptr + 1;
1971 goto Scan_Identifier;
1973 -- Left bracket not followed by a quote terminates an identifier.
1974 -- This is an error, but we don't want to give a junk error msg
1975 -- about wide characters in this case!
1977 elsif Source (Scan_Ptr) = '['
1978 and then Source (Scan_Ptr + 1) /= '"'
1979 then
1980 null;
1982 -- We know we have a wide character encoding here (the current
1983 -- character is either ESC, left bracket, or an upper half
1984 -- character depending on the encoding method).
1986 else
1987 -- Scan out the wide character and insert the appropriate
1988 -- encoding into the name table entry for the identifier.
1990 declare
1991 Sptr : constant Source_Ptr := Scan_Ptr;
1992 Code : Char_Code;
1993 Err : Boolean;
1994 Chr : Character;
1996 begin
1997 Scan_Wide (Source, Scan_Ptr, Code, Err);
1999 -- If error, signal error
2001 if Err then
2002 Error_Illegal_Wide_Character;
2004 -- If the character scanned is a normal identifier
2005 -- character, then we treat it that way.
2007 elsif In_Character_Range (Code)
2008 and then Identifier_Char (Get_Character (Code))
2009 then
2010 Chr := Get_Character (Code);
2011 Accumulate_Checksum (Chr);
2012 Store_Encoded_Character
2013 (Get_Char_Code (Fold_Lower (Chr)));
2015 -- Character is not normal identifier character, store
2016 -- it in encoded form.
2018 else
2019 Accumulate_Checksum (Code);
2020 Store_Encoded_Character (Code);
2022 -- Make sure we are allowing wide characters in
2023 -- identifiers. Note that we allow wide character
2024 -- notation for an OK identifier character. This
2025 -- in particular allows bracket or other notation
2026 -- to be used for upper half letters.
2028 if Identifier_Character_Set /= 'w' then
2029 Error_Msg
2030 ("wide character not allowed in identifier", Sptr);
2031 end if;
2032 end if;
2033 end;
2035 goto Scan_Identifier;
2036 end if;
2037 end if;
2039 -- Scan of identifier is complete. The identifier is stored in
2040 -- Name_Buffer, and Scan_Ptr points past the last character.
2042 Token_Name := Name_Find;
2044 -- Here is where we check if it was a keyword
2046 if Get_Name_Table_Byte (Token_Name) /= 0
2047 and then (Ada_95 or else Token_Name not in Ada_95_Reserved_Words)
2048 then
2049 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2051 -- Deal with possible style check for non-lower case keyword,
2052 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
2053 -- for this purpose if they appear as attribute designators.
2054 -- Actually we only check the first character for speed.
2056 if Style_Check
2057 and then Source (Token_Ptr) <= 'Z'
2058 and then (Prev_Token /= Tok_Apostrophe
2059 or else
2060 (Token /= Tok_Access
2061 and then Token /= Tok_Delta
2062 and then Token /= Tok_Digits
2063 and then Token /= Tok_Range))
2064 then
2065 Style.Non_Lower_Case_Keyword;
2066 end if;
2068 -- We must reset Token_Name since this is not an identifier
2069 -- and if we leave Token_Name set, the parser gets confused
2070 -- because it thinks it is dealing with an identifier instead
2071 -- of the corresponding keyword.
2073 Token_Name := No_Name;
2074 return;
2076 -- It is an identifier after all
2078 else
2079 Token := Tok_Identifier;
2080 Post_Scan;
2081 return;
2082 end if;
2083 end Scan;
2084 --------------------------
2085 -- Set_Comment_As_Token --
2086 --------------------------
2088 procedure Set_Comment_As_Token (Value : Boolean) is
2089 begin
2090 Comment_Is_Token := Value;
2091 end Set_Comment_As_Token;
2093 ------------------------------
2094 -- Set_End_Of_Line_As_Token --
2095 ------------------------------
2097 procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2098 begin
2099 End_Of_Line_Is_Token := Value;
2100 end Set_End_Of_Line_As_Token;
2102 ---------------------------
2103 -- Set_Special_Character --
2104 ---------------------------
2106 procedure Set_Special_Character (C : Character) is
2107 begin
2108 case C is
2109 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2110 Special_Characters (C) := True;
2112 when others =>
2113 null;
2114 end case;
2115 end Set_Special_Character;
2117 ----------------------
2118 -- Set_Start_Column --
2119 ----------------------
2121 -- Note: it seems at first glance a little expensive to compute this value
2122 -- for every source line (since it is certainly not used for all source
2123 -- lines). On the other hand, it doesn't take much more work to skip past
2124 -- the initial white space on the line counting the columns than it would
2125 -- to scan past the white space using the standard scanning circuits.
2127 function Set_Start_Column return Column_Number is
2128 Start_Column : Column_Number := 0;
2130 begin
2131 -- Outer loop scans past horizontal tab characters
2133 Tabs_Loop : loop
2135 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2136 -- past the blanks and adjusting Start_Column to account for them.
2138 Blanks_Loop : loop
2139 if Source (Scan_Ptr) = ' ' then
2140 if Source (Scan_Ptr + 1) = ' ' then
2141 if Source (Scan_Ptr + 2) = ' ' then
2142 if Source (Scan_Ptr + 3) = ' ' then
2143 if Source (Scan_Ptr + 4) = ' ' then
2144 if Source (Scan_Ptr + 5) = ' ' then
2145 if Source (Scan_Ptr + 6) = ' ' then
2146 Scan_Ptr := Scan_Ptr + 7;
2147 Start_Column := Start_Column + 7;
2148 else
2149 Scan_Ptr := Scan_Ptr + 6;
2150 Start_Column := Start_Column + 6;
2151 exit Blanks_Loop;
2152 end if;
2153 else
2154 Scan_Ptr := Scan_Ptr + 5;
2155 Start_Column := Start_Column + 5;
2156 exit Blanks_Loop;
2157 end if;
2158 else
2159 Scan_Ptr := Scan_Ptr + 4;
2160 Start_Column := Start_Column + 4;
2161 exit Blanks_Loop;
2162 end if;
2163 else
2164 Scan_Ptr := Scan_Ptr + 3;
2165 Start_Column := Start_Column + 3;
2166 exit Blanks_Loop;
2167 end if;
2168 else
2169 Scan_Ptr := Scan_Ptr + 2;
2170 Start_Column := Start_Column + 2;
2171 exit Blanks_Loop;
2172 end if;
2173 else
2174 Scan_Ptr := Scan_Ptr + 1;
2175 Start_Column := Start_Column + 1;
2176 exit Blanks_Loop;
2177 end if;
2178 else
2179 exit Blanks_Loop;
2180 end if;
2181 end loop Blanks_Loop;
2183 -- Outer loop keeps going only if a horizontal tab follows
2185 if Source (Scan_Ptr) = HT then
2186 if Style_Check then Style.Check_HT; end if;
2187 Scan_Ptr := Scan_Ptr + 1;
2188 Start_Column := (Start_Column / 8) * 8 + 8;
2189 else
2190 exit Tabs_Loop;
2191 end if;
2193 end loop Tabs_Loop;
2195 return Start_Column;
2196 end Set_Start_Column;
2198 end Scng;