* gcc.c (getenv_spec_function): New function.
[official-gcc.git] / gcc / ada / scng.adb
blobc4fdd86fbcf823903dd1caf8395725dd37755416
1 ------------------------------------------------------------------------------
2 -- --
3 -- GNAT COMPILER COMPONENTS --
4 -- --
5 -- S C N G --
6 -- --
7 -- B o d y --
8 -- --
9 -- Copyright (C) 1992-2006, Free Software Foundation, Inc. --
10 -- --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 51 Franklin Street, Fifth Floor, --
20 -- Boston, MA 02110-1301, USA. --
21 -- --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
24 -- --
25 ------------------------------------------------------------------------------
27 with Csets; use Csets;
28 with Err_Vars; use Err_Vars;
29 with Hostparm; use Hostparm;
30 with Namet; use Namet;
31 with Opt; use Opt;
32 with Scans; use Scans;
33 with Sinput; use Sinput;
34 with Snames; use Snames;
35 with Stringt; use Stringt;
36 with Stylesw; use Stylesw;
37 with Uintp; use Uintp;
38 with Urealp; use Urealp;
39 with Widechar; use Widechar;
41 with System.CRC32;
42 with System.WCh_Con; use System.WCh_Con;
44 with GNAT.UTF_32; use GNAT.UTF_32;
46 package body Scng is
48 use ASCII;
49 -- Make control characters visible
51 Special_Characters : array (Character) of Boolean := (others => False);
52 -- For characters that are Special token, the value is True
54 Comment_Is_Token : Boolean := False;
55 -- True if comments are tokens
57 End_Of_Line_Is_Token : Boolean := False;
58 -- True if End_Of_Line is a token
60 -----------------------
61 -- Local Subprograms --
62 -----------------------
64 procedure Accumulate_Token_Checksum;
65 pragma Inline (Accumulate_Token_Checksum);
67 procedure Accumulate_Checksum (C : Character);
68 pragma Inline (Accumulate_Checksum);
69 -- This routine accumulates the checksum given character C. During the
70 -- scanning of a source file, this routine is called with every character
71 -- in the source, excluding blanks, and all control characters (except
72 -- that ESC is included in the checksum). Upper case letters not in string
73 -- literals are folded by the caller. See Sinput spec for the documentation
74 -- of the checksum algorithm. Note: checksum values are only used if we
75 -- generate code, so it is not necessary to worry about making the right
76 -- sequence of calls in any error situation.
78 procedure Accumulate_Checksum (C : Char_Code);
79 pragma Inline (Accumulate_Checksum);
80 -- This version is identical, except that the argument, C, is a character
81 -- code value instead of a character. This is used when wide characters
82 -- are scanned. We use the character code rather than the ASCII characters
83 -- so that the checksum is independent of wide character encoding method.
85 procedure Initialize_Checksum;
86 pragma Inline (Initialize_Checksum);
87 -- Initialize checksum value
89 -------------------------
90 -- Accumulate_Checksum --
91 -------------------------
93 procedure Accumulate_Checksum (C : Character) is
94 begin
95 System.CRC32.Update (System.CRC32.CRC32 (Checksum), C);
96 end Accumulate_Checksum;
98 procedure Accumulate_Checksum (C : Char_Code) is
99 begin
100 if C > 16#FFFF# then
101 Accumulate_Checksum (Character'Val (C / 2 ** 24));
102 Accumulate_Checksum (Character'Val ((C / 2 ** 16) mod 256));
103 Accumulate_Checksum (Character'Val ((C / 256) mod 256));
104 else
105 Accumulate_Checksum (Character'Val (C / 256));
106 end if;
108 Accumulate_Checksum (Character'Val (C mod 256));
109 end Accumulate_Checksum;
111 -------------------------------
112 -- Accumulate_Token_Checksum --
113 -------------------------------
115 procedure Accumulate_Token_Checksum is
116 begin
117 System.CRC32.Update
118 (System.CRC32.CRC32 (Checksum),
119 Character'Val (Token_Type'Pos (Token)));
120 end Accumulate_Token_Checksum;
122 ----------------------------
123 -- Determine_Token_Casing --
124 ----------------------------
126 function Determine_Token_Casing return Casing_Type is
127 begin
128 return Determine_Casing (Source (Token_Ptr .. Scan_Ptr - 1));
129 end Determine_Token_Casing;
131 -------------------------
132 -- Initialize_Checksum --
133 -------------------------
135 procedure Initialize_Checksum is
136 begin
137 System.CRC32.Initialize (System.CRC32.CRC32 (Checksum));
138 end Initialize_Checksum;
140 ------------------------
141 -- Initialize_Scanner --
142 ------------------------
144 procedure Initialize_Scanner (Index : Source_File_Index) is
145 begin
146 -- Establish reserved words
148 Scans.Initialize_Ada_Keywords;
150 -- Initialize scan control variables
152 Current_Source_File := Index;
153 Source := Source_Text (Current_Source_File);
154 Scan_Ptr := Source_First (Current_Source_File);
155 Token := No_Token;
156 Token_Ptr := Scan_Ptr;
157 Current_Line_Start := Scan_Ptr;
158 Token_Node := Empty;
159 Token_Name := No_Name;
160 Start_Column := Set_Start_Column;
161 First_Non_Blank_Location := Scan_Ptr;
163 Initialize_Checksum;
164 Wide_Char_Byte_Count := 0;
166 -- Do not call Scan, otherwise the License stuff does not work in Scn
168 end Initialize_Scanner;
170 ------------------------------
171 -- Reset_Special_Characters --
172 ------------------------------
174 procedure Reset_Special_Characters is
175 begin
176 Special_Characters := (others => False);
177 end Reset_Special_Characters;
179 ----------
180 -- Scan --
181 ----------
183 procedure Scan is
185 Start_Of_Comment : Source_Ptr;
186 -- Record start of comment position
188 Underline_Found : Boolean;
189 -- During scanning of an identifier, set to True if last character
190 -- scanned was an underline or other punctuation character. This
191 -- is used to flag the error of two underlines/punctuations in a
192 -- row or ending an identifier with a underline/punctuation. Here
193 -- punctuation means any UTF_32 character in the Unicode category
194 -- Punctuation,Connector.
196 Wptr : Source_Ptr;
197 -- Used to remember start of last wide character scanned
199 procedure Check_End_Of_Line;
200 -- Called when end of line encountered. Checks that line is not too
201 -- long, and that other style checks for the end of line are met.
203 function Double_Char_Token (C : Character) return Boolean;
204 -- This function is used for double character tokens like := or <>. It
205 -- checks if the character following Source (Scan_Ptr) is C, and if so
206 -- bumps Scan_Ptr past the pair of characters and returns True. A space
207 -- between the two characters is also recognized with an appropriate
208 -- error message being issued. If C is not present, False is returned.
209 -- Note that Double_Char_Token can only be used for tokens defined in
210 -- the Ada syntax (it's use for error cases like && is not appropriate
211 -- since we do not want a junk message for a case like &-space-&).
213 procedure Error_Illegal_Character;
214 -- Give illegal character error, Scan_Ptr points to character. On
215 -- return, Scan_Ptr is bumped past the illegal character.
217 procedure Error_Illegal_Wide_Character;
218 -- Give illegal wide character message. On return, Scan_Ptr is bumped
219 -- past the illegal character, which may still leave us pointing to
220 -- junk, not much we can do if the escape sequence is messed up!
222 procedure Error_Long_Line;
223 -- Signal error of excessively long line
225 procedure Error_No_Double_Underline;
226 -- Signal error of two underline or punctuation characters in a row.
227 -- Called with Scan_Ptr pointing to second underline/punctuation char.
229 procedure Nlit;
230 -- This is the procedure for scanning out numeric literals. On entry,
231 -- Scan_Ptr points to the digit that starts the numeric literal (the
232 -- checksum for this character has not been accumulated yet). On return
233 -- Scan_Ptr points past the last character of the numeric literal, Token
234 -- and Token_Node are set appropriately, and the checksum is updated.
236 procedure Slit;
237 -- This is the procedure for scanning out string literals. On entry,
238 -- Scan_Ptr points to the opening string quote (the checksum for this
239 -- character has not been accumulated yet). On return Scan_Ptr points
240 -- past the closing quote of the string literal, Token and Token_Node
241 -- are set appropriately, and the checksum is upated.
243 -----------------------
244 -- Check_End_Of_Line --
245 -----------------------
247 procedure Check_End_Of_Line is
248 Len : constant Int :=
249 Int (Scan_Ptr) -
250 Int (Current_Line_Start) -
251 Wide_Char_Byte_Count;
253 begin
254 if Style_Check then
255 Style.Check_Line_Terminator (Len);
256 end if;
258 -- Deal with checking maximum line length
260 if Style_Check and Style_Check_Max_Line_Length then
261 Style.Check_Line_Max_Length (Len);
263 -- If style checking is inactive, check maximum line length against
264 -- standard value.
266 elsif Len > Max_Line_Length then
267 Error_Long_Line;
268 end if;
270 -- Reset wide character byte count for next line
272 Wide_Char_Byte_Count := 0;
273 end Check_End_Of_Line;
275 -----------------------
276 -- Double_Char_Token --
277 -----------------------
279 function Double_Char_Token (C : Character) return Boolean is
280 begin
281 if Source (Scan_Ptr + 1) = C then
282 Accumulate_Checksum (C);
283 Scan_Ptr := Scan_Ptr + 2;
284 return True;
286 elsif Source (Scan_Ptr + 1) = ' '
287 and then Source (Scan_Ptr + 2) = C
288 then
289 Scan_Ptr := Scan_Ptr + 1;
290 Error_Msg_S ("no space allowed here");
291 Scan_Ptr := Scan_Ptr + 2;
292 return True;
294 else
295 return False;
296 end if;
297 end Double_Char_Token;
299 -----------------------------
300 -- Error_Illegal_Character --
301 -----------------------------
303 procedure Error_Illegal_Character is
304 begin
305 Error_Msg_S ("illegal character");
306 Scan_Ptr := Scan_Ptr + 1;
307 end Error_Illegal_Character;
309 ----------------------------------
310 -- Error_Illegal_Wide_Character --
311 ----------------------------------
313 procedure Error_Illegal_Wide_Character is
314 begin
315 Error_Msg ("illegal wide character", Wptr);
316 end Error_Illegal_Wide_Character;
318 ---------------------
319 -- Error_Long_Line --
320 ---------------------
322 procedure Error_Long_Line is
323 begin
324 Error_Msg
325 ("this line is too long",
326 Current_Line_Start + Source_Ptr (Max_Line_Length));
327 end Error_Long_Line;
329 -------------------------------
330 -- Error_No_Double_Underline --
331 -------------------------------
333 procedure Error_No_Double_Underline is
334 begin
335 Underline_Found := False;
337 -- There are four cases, and we special case the messages
339 if Source (Scan_Ptr) = '_' then
340 if Source (Scan_Ptr - 1) = '_' then
341 Error_Msg_S
342 ("two consecutive underlines not permitted");
343 else
344 Error_Msg_S
345 ("underline cannot follow punctuation character");
346 end if;
348 else
349 if Source (Scan_Ptr - 1) = '_' then
350 Error_Msg_S
351 ("punctuation character cannot follow underline");
352 else
353 Error_Msg_S
354 ("two consecutive punctuation characters not permitted");
355 end if;
356 end if;
357 end Error_No_Double_Underline;
359 ----------
360 -- Nlit --
361 ----------
363 procedure Nlit is
365 C : Character;
366 -- Current source program character
368 Base_Char : Character;
369 -- Either # or : (character at start of based number)
371 Base : Int;
372 -- Value of base
374 UI_Base : Uint;
375 -- Value of base in Uint format
377 UI_Int_Value : Uint;
378 -- Value of integer scanned by Scan_Integer in Uint format
380 UI_Num_Value : Uint;
381 -- Value of integer in numeric value being scanned
383 Scale : Int;
384 -- Scale value for real literal
386 UI_Scale : Uint;
387 -- Scale in Uint format
389 Exponent_Is_Negative : Boolean;
390 -- Set true for negative exponent
392 Extended_Digit_Value : Int;
393 -- Extended digit value
395 Point_Scanned : Boolean;
396 -- Flag for decimal point scanned in numeric literal
398 -----------------------
399 -- Local Subprograms --
400 -----------------------
402 procedure Error_Digit_Expected;
403 -- Signal error of bad digit, Scan_Ptr points to the location at
404 -- which the digit was expected on input, and is unchanged on return.
406 procedure Scan_Integer;
407 -- Procedure to scan integer literal. On entry, Scan_Ptr points to a
408 -- digit, on exit Scan_Ptr points past the last character of the
409 -- integer.
411 -- For each digit encountered, UI_Int_Value is multiplied by 10, and
412 -- the value of the digit added to the result. In addition, the
413 -- value in Scale is decremented by one for each actual digit
414 -- scanned.
416 --------------------------
417 -- Error_Digit_Expected --
418 --------------------------
420 procedure Error_Digit_Expected is
421 begin
422 Error_Msg_S ("digit expected");
423 end Error_Digit_Expected;
425 ------------------
426 -- Scan_Integer --
427 ------------------
429 procedure Scan_Integer is
430 C : Character;
431 -- Next character scanned
433 begin
434 C := Source (Scan_Ptr);
436 -- Loop through digits (allowing underlines)
438 loop
439 Accumulate_Checksum (C);
440 UI_Int_Value :=
441 UI_Int_Value * 10 + (Character'Pos (C) - Character'Pos ('0'));
442 Scan_Ptr := Scan_Ptr + 1;
443 Scale := Scale - 1;
444 C := Source (Scan_Ptr);
446 -- Case of underline encountered
448 if C = '_' then
450 -- We do not accumulate the '_' in the checksum, so that
451 -- 1_234 is equivalent to 1234, and does not trigger
452 -- compilation for "minimal recompilation" (gnatmake -m).
454 loop
455 Scan_Ptr := Scan_Ptr + 1;
456 C := Source (Scan_Ptr);
457 exit when C /= '_';
458 Error_No_Double_Underline;
459 end loop;
461 if C not in '0' .. '9' then
462 Error_Digit_Expected;
463 exit;
464 end if;
466 else
467 exit when C not in '0' .. '9';
468 end if;
469 end loop;
470 end Scan_Integer;
472 -- Start of Processing for Nlit
474 begin
475 Base := 10;
476 UI_Base := Uint_10;
477 UI_Int_Value := Uint_0;
478 Scale := 0;
479 Scan_Integer;
480 Point_Scanned := False;
481 UI_Num_Value := UI_Int_Value;
483 -- Various possibilities now for continuing the literal are period,
484 -- E/e (for exponent), or :/# (for based literal).
486 Scale := 0;
487 C := Source (Scan_Ptr);
489 if C = '.' then
491 -- Scan out point, but do not scan past .. which is a range
492 -- sequence, and must not be eaten up scanning a numeric literal.
494 while C = '.' and then Source (Scan_Ptr + 1) /= '.' loop
495 Accumulate_Checksum ('.');
497 if Point_Scanned then
498 Error_Msg_S ("duplicate point ignored");
499 end if;
501 Point_Scanned := True;
502 Scan_Ptr := Scan_Ptr + 1;
503 C := Source (Scan_Ptr);
505 if C not in '0' .. '9' then
506 Error_Msg
507 ("real literal cannot end with point", Scan_Ptr - 1);
508 else
509 Scan_Integer;
510 UI_Num_Value := UI_Int_Value;
511 end if;
512 end loop;
514 -- Based literal case. The base is the value we already scanned.
515 -- In the case of colon, we insist that the following character
516 -- is indeed an extended digit or a period. This catches a number
517 -- of common errors, as well as catching the well known tricky
518 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
520 elsif C = '#'
521 or else (C = ':' and then
522 (Source (Scan_Ptr + 1) = '.'
523 or else
524 Source (Scan_Ptr + 1) in '0' .. '9'
525 or else
526 Source (Scan_Ptr + 1) in 'A' .. 'Z'
527 or else
528 Source (Scan_Ptr + 1) in 'a' .. 'z'))
529 then
530 if C = ':' then
531 Obsolescent_Check (Scan_Ptr);
533 if Warn_On_Obsolescent_Feature then
534 Error_Msg_S
535 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
536 Error_Msg_S
537 ("\use ""'#"" instead?");
538 end if;
539 end if;
541 Accumulate_Checksum (C);
542 Base_Char := C;
543 UI_Base := UI_Int_Value;
545 if UI_Base < 2 or else UI_Base > 16 then
546 Error_Msg_SC ("base not 2-16");
547 UI_Base := Uint_16;
548 end if;
550 Base := UI_To_Int (UI_Base);
551 Scan_Ptr := Scan_Ptr + 1;
553 -- Scan out extended integer [. integer]
555 C := Source (Scan_Ptr);
556 UI_Int_Value := Uint_0;
557 Scale := 0;
559 loop
560 if C in '0' .. '9' then
561 Accumulate_Checksum (C);
562 Extended_Digit_Value :=
563 Int'(Character'Pos (C)) - Int'(Character'Pos ('0'));
565 elsif C in 'A' .. 'F' then
566 Accumulate_Checksum (Character'Val (Character'Pos (C) + 32));
567 Extended_Digit_Value :=
568 Int'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
570 elsif C in 'a' .. 'f' then
571 Accumulate_Checksum (C);
572 Extended_Digit_Value :=
573 Int'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
575 else
576 Error_Msg_S ("extended digit expected");
577 exit;
578 end if;
580 if Extended_Digit_Value >= Base then
581 Error_Msg_S ("digit '>= base");
582 end if;
584 UI_Int_Value := UI_Int_Value * UI_Base + Extended_Digit_Value;
585 Scale := Scale - 1;
586 Scan_Ptr := Scan_Ptr + 1;
587 C := Source (Scan_Ptr);
589 if C = '_' then
590 loop
591 Accumulate_Checksum ('_');
592 Scan_Ptr := Scan_Ptr + 1;
593 C := Source (Scan_Ptr);
594 exit when C /= '_';
595 Error_No_Double_Underline;
596 end loop;
598 elsif C = '.' then
599 Accumulate_Checksum ('.');
601 if Point_Scanned then
602 Error_Msg_S ("duplicate point ignored");
603 end if;
605 Scan_Ptr := Scan_Ptr + 1;
606 C := Source (Scan_Ptr);
607 Point_Scanned := True;
608 Scale := 0;
610 elsif C = Base_Char then
611 Accumulate_Checksum (C);
612 Scan_Ptr := Scan_Ptr + 1;
613 exit;
615 elsif C = '#' or else C = ':' then
616 Error_Msg_S ("based number delimiters must match");
617 Scan_Ptr := Scan_Ptr + 1;
618 exit;
620 elsif not Identifier_Char (C) then
621 if Base_Char = '#' then
622 Error_Msg_S ("missing '#");
623 else
624 Error_Msg_S ("missing ':");
625 end if;
627 exit;
628 end if;
630 end loop;
632 UI_Num_Value := UI_Int_Value;
633 end if;
635 -- Scan out exponent
637 if not Point_Scanned then
638 Scale := 0;
639 UI_Scale := Uint_0;
640 else
641 UI_Scale := UI_From_Int (Scale);
642 end if;
644 if Source (Scan_Ptr) = 'e' or else Source (Scan_Ptr) = 'E' then
645 Accumulate_Checksum ('e');
646 Scan_Ptr := Scan_Ptr + 1;
647 Exponent_Is_Negative := False;
649 if Source (Scan_Ptr) = '+' then
650 Accumulate_Checksum ('+');
651 Scan_Ptr := Scan_Ptr + 1;
653 elsif Source (Scan_Ptr) = '-' then
654 Accumulate_Checksum ('-');
656 if not Point_Scanned then
657 Error_Msg_S
658 ("negative exponent not allowed for integer literal");
659 else
660 Exponent_Is_Negative := True;
661 end if;
663 Scan_Ptr := Scan_Ptr + 1;
664 end if;
666 UI_Int_Value := Uint_0;
668 if Source (Scan_Ptr) in '0' .. '9' then
669 Scan_Integer;
670 else
671 Error_Digit_Expected;
672 end if;
674 if Exponent_Is_Negative then
675 UI_Scale := UI_Scale - UI_Int_Value;
676 else
677 UI_Scale := UI_Scale + UI_Int_Value;
678 end if;
679 end if;
681 -- Case of real literal to be returned
683 if Point_Scanned then
684 Token := Tok_Real_Literal;
685 Real_Literal_Value :=
686 UR_From_Components (
687 Num => UI_Num_Value,
688 Den => -UI_Scale,
689 Rbase => Base);
691 -- Case of integer literal to be returned
693 else
694 Token := Tok_Integer_Literal;
696 if UI_Scale = 0 then
697 Int_Literal_Value := UI_Num_Value;
699 -- Avoid doing possibly expensive calculations in cases like
700 -- parsing 163E800_000# when semantics will not be done anyway.
701 -- This is especially useful when parsing garbled input.
703 elsif Operating_Mode /= Check_Syntax
704 and then (Serious_Errors_Detected = 0 or else Try_Semantics)
705 then
706 Int_Literal_Value := UI_Num_Value * UI_Base ** UI_Scale;
708 else
709 Int_Literal_Value := No_Uint;
710 end if;
711 end if;
713 Accumulate_Token_Checksum;
715 return;
716 end Nlit;
718 ----------
719 -- Slit --
720 ----------
722 procedure Slit is
724 Delimiter : Character;
725 -- Delimiter (first character of string)
727 C : Character;
728 -- Current source program character
730 Code : Char_Code;
731 -- Current character code value
733 Err : Boolean;
734 -- Error flag for Scan_Wide call
736 procedure Error_Bad_String_Char;
737 -- Signal bad character in string/character literal. On entry
738 -- Scan_Ptr points to the improper character encountered during the
739 -- scan. Scan_Ptr is not modified, so it still points to the bad
740 -- character on return.
742 procedure Error_Unterminated_String;
743 -- Procedure called if a line terminator character is encountered
744 -- during scanning a string, meaning that the string is not properly
745 -- terminated.
747 procedure Set_String;
748 -- Procedure used to distinguish between string and operator symbol.
749 -- On entry the string has been scanned out, and its characters
750 -- start at Token_Ptr and end one character before Scan_Ptr. On exit
751 -- Token is set to Tok_String_Literal or Tok_Operator_Symbol as
752 -- appropriate, and Token_Node is appropriately initialized. In
753 -- addition, in the operator symbol case, Token_Name is
754 -- appropriately set.
756 ---------------------------
757 -- Error_Bad_String_Char --
758 ---------------------------
760 procedure Error_Bad_String_Char is
761 C : constant Character := Source (Scan_Ptr);
763 begin
764 if C = HT then
765 Error_Msg_S ("horizontal tab not allowed in string");
767 elsif C = VT or else C = FF then
768 Error_Msg_S ("format effector not allowed in string");
770 elsif C in Upper_Half_Character then
771 Error_Msg_S ("(Ada 83) upper half character not allowed");
773 else
774 Error_Msg_S ("control character not allowed in string");
775 end if;
776 end Error_Bad_String_Char;
778 -------------------------------
779 -- Error_Unterminated_String --
780 -------------------------------
782 procedure Error_Unterminated_String is
783 begin
784 -- An interesting little refinement. Consider the following
785 -- examples:
787 -- A := "this is an unterminated string;
788 -- A := "this is an unterminated string &
789 -- P(A, "this is a parameter that didn't get terminated);
791 -- We fiddle a little to do slightly better placement in these
792 -- cases also if there is white space at the end of the line we
793 -- place the flag at the start of this white space, not at the
794 -- end. Note that we only have to test for blanks, since tabs
795 -- aren't allowed in strings in the first place and would have
796 -- caused an error message.
798 -- Two more cases that we treat specially are:
800 -- A := "this string uses the wrong terminator'
801 -- A := "this string uses the wrong terminator' &
803 -- In these cases we give a different error message as well
805 -- We actually reposition the scan pointer to the point where we
806 -- place the flag in these cases, since it seems a better bet on
807 -- the original intention.
809 while Source (Scan_Ptr - 1) = ' '
810 or else Source (Scan_Ptr - 1) = '&'
811 loop
812 Scan_Ptr := Scan_Ptr - 1;
813 Unstore_String_Char;
814 end loop;
816 -- Check for case of incorrect string terminator, but single quote
817 -- is not considered incorrect if the opening terminator misused
818 -- a single quote (error message already given).
820 if Delimiter /= '''
821 and then Source (Scan_Ptr - 1) = '''
822 then
823 Unstore_String_Char;
824 Error_Msg
825 ("incorrect string terminator character", Scan_Ptr - 1);
826 return;
827 end if;
829 if Source (Scan_Ptr - 1) = ';' then
830 Scan_Ptr := Scan_Ptr - 1;
831 Unstore_String_Char;
833 if Source (Scan_Ptr - 1) = ')' then
834 Scan_Ptr := Scan_Ptr - 1;
835 Unstore_String_Char;
836 end if;
837 end if;
839 Error_Msg_S ("missing string quote");
840 end Error_Unterminated_String;
842 ----------------
843 -- Set_String --
844 ----------------
846 procedure Set_String is
847 Slen : constant Int := Int (Scan_Ptr - Token_Ptr - 2);
848 C1 : Character;
849 C2 : Character;
850 C3 : Character;
852 begin
853 -- Token_Name is currently set to Error_Name. The following
854 -- section of code resets Token_Name to the proper Name_Op_xx
855 -- value if the string is a valid operator symbol, otherwise it is
856 -- left set to Error_Name.
858 if Slen = 1 then
859 C1 := Source (Token_Ptr + 1);
861 case C1 is
862 when '=' =>
863 Token_Name := Name_Op_Eq;
865 when '>' =>
866 Token_Name := Name_Op_Gt;
868 when '<' =>
869 Token_Name := Name_Op_Lt;
871 when '+' =>
872 Token_Name := Name_Op_Add;
874 when '-' =>
875 Token_Name := Name_Op_Subtract;
877 when '&' =>
878 Token_Name := Name_Op_Concat;
880 when '*' =>
881 Token_Name := Name_Op_Multiply;
883 when '/' =>
884 Token_Name := Name_Op_Divide;
886 when others =>
887 null;
888 end case;
890 elsif Slen = 2 then
891 C1 := Source (Token_Ptr + 1);
892 C2 := Source (Token_Ptr + 2);
894 if C1 = '*' and then C2 = '*' then
895 Token_Name := Name_Op_Expon;
897 elsif C2 = '=' then
899 if C1 = '/' then
900 Token_Name := Name_Op_Ne;
901 elsif C1 = '<' then
902 Token_Name := Name_Op_Le;
903 elsif C1 = '>' then
904 Token_Name := Name_Op_Ge;
905 end if;
907 elsif (C1 = 'O' or else C1 = 'o') and then -- OR
908 (C2 = 'R' or else C2 = 'r')
909 then
910 Token_Name := Name_Op_Or;
911 end if;
913 elsif Slen = 3 then
914 C1 := Source (Token_Ptr + 1);
915 C2 := Source (Token_Ptr + 2);
916 C3 := Source (Token_Ptr + 3);
918 if (C1 = 'A' or else C1 = 'a') and then -- AND
919 (C2 = 'N' or else C2 = 'n') and then
920 (C3 = 'D' or else C3 = 'd')
921 then
922 Token_Name := Name_Op_And;
924 elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
925 (C2 = 'B' or else C2 = 'b') and then
926 (C3 = 'S' or else C3 = 's')
927 then
928 Token_Name := Name_Op_Abs;
930 elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
931 (C2 = 'O' or else C2 = 'o') and then
932 (C3 = 'D' or else C3 = 'd')
933 then
934 Token_Name := Name_Op_Mod;
936 elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
937 (C2 = 'O' or else C2 = 'o') and then
938 (C3 = 'T' or else C3 = 't')
939 then
940 Token_Name := Name_Op_Not;
942 elsif (C1 = 'R' or else C1 = 'r') and then -- REM
943 (C2 = 'E' or else C2 = 'e') and then
944 (C3 = 'M' or else C3 = 'm')
945 then
946 Token_Name := Name_Op_Rem;
948 elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
949 (C2 = 'O' or else C2 = 'o') and then
950 (C3 = 'R' or else C3 = 'r')
951 then
952 Token_Name := Name_Op_Xor;
953 end if;
955 end if;
957 -- If it is an operator symbol, then Token_Name is set. If it is
958 -- some other string value, then Token_Name still contains
959 -- Error_Name.
961 if Token_Name = Error_Name then
962 Token := Tok_String_Literal;
964 else
965 Token := Tok_Operator_Symbol;
966 end if;
967 end Set_String;
969 -- Start of processing for Slit
971 begin
972 -- On entry, Scan_Ptr points to the opening character of the string
973 -- which is either a percent, double quote, or apostrophe (single
974 -- quote). The latter case is an error detected by the character
975 -- literal circuit.
977 Delimiter := Source (Scan_Ptr);
978 Accumulate_Checksum (Delimiter);
979 Start_String;
980 Scan_Ptr := Scan_Ptr + 1;
982 -- Loop to scan out characters of string literal
984 loop
985 C := Source (Scan_Ptr);
987 if C = Delimiter then
988 Accumulate_Checksum (C);
989 Scan_Ptr := Scan_Ptr + 1;
990 exit when Source (Scan_Ptr) /= Delimiter;
991 Code := Get_Char_Code (C);
992 Accumulate_Checksum (C);
993 Scan_Ptr := Scan_Ptr + 1;
995 else
996 if C = '"' and then Delimiter = '%' then
997 Error_Msg_S
998 ("quote not allowed in percent delimited string");
999 Code := Get_Char_Code (C);
1000 Scan_Ptr := Scan_Ptr + 1;
1002 elsif (C = ESC
1003 and then Wide_Character_Encoding_Method
1004 in WC_ESC_Encoding_Method)
1005 or else (C in Upper_Half_Character
1006 and then Upper_Half_Encoding)
1007 or else (C = '['
1008 and then Source (Scan_Ptr + 1) = '"'
1009 and then Identifier_Char (Source (Scan_Ptr + 2)))
1010 then
1011 Wptr := Scan_Ptr;
1012 Scan_Wide (Source, Scan_Ptr, Code, Err);
1014 if Err then
1015 Error_Illegal_Wide_Character;
1016 Code := Get_Char_Code (' ');
1017 end if;
1019 Accumulate_Checksum (Code);
1021 -- In Ada 95 mode we allow any wide characters in a string
1022 -- but in Ada 2005, the set of characters allowed has been
1023 -- restricted to graphic characters.
1025 if Ada_Version >= Ada_05
1026 and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1027 then
1028 Error_Msg
1029 ("(Ada 2005) non-graphic character not permitted " &
1030 "in string literal", Wptr);
1031 end if;
1033 else
1034 Accumulate_Checksum (C);
1036 if C not in Graphic_Character then
1037 if C in Line_Terminator then
1038 Error_Unterminated_String;
1039 exit;
1041 elsif C in Upper_Half_Character then
1042 if Ada_Version = Ada_83 then
1043 Error_Bad_String_Char;
1044 end if;
1046 else
1047 Error_Bad_String_Char;
1048 end if;
1049 end if;
1051 Code := Get_Char_Code (C);
1052 Scan_Ptr := Scan_Ptr + 1;
1053 end if;
1054 end if;
1056 Store_String_Char (Code);
1058 if not In_Character_Range (Code) then
1059 Wide_Character_Found := True;
1060 end if;
1061 end loop;
1063 String_Literal_Id := End_String;
1064 Set_String;
1065 return;
1066 end Slit;
1068 -- Start of processing for Scan
1070 begin
1071 Prev_Token := Token;
1072 Prev_Token_Ptr := Token_Ptr;
1073 Token_Name := Error_Name;
1075 -- The following loop runs more than once only if a format effector
1076 -- (tab, vertical tab, form feed, line feed, carriage return) is
1077 -- encountered and skipped, or some error situation, such as an
1078 -- illegal character, is encountered.
1080 <<Scan_Next_Character>>
1082 loop
1083 -- Skip past blanks, loop is opened up for speed
1085 while Source (Scan_Ptr) = ' ' loop
1086 if Source (Scan_Ptr + 1) /= ' ' then
1087 Scan_Ptr := Scan_Ptr + 1;
1088 exit;
1089 end if;
1091 if Source (Scan_Ptr + 2) /= ' ' then
1092 Scan_Ptr := Scan_Ptr + 2;
1093 exit;
1094 end if;
1096 if Source (Scan_Ptr + 3) /= ' ' then
1097 Scan_Ptr := Scan_Ptr + 3;
1098 exit;
1099 end if;
1101 if Source (Scan_Ptr + 4) /= ' ' then
1102 Scan_Ptr := Scan_Ptr + 4;
1103 exit;
1104 end if;
1106 if Source (Scan_Ptr + 5) /= ' ' then
1107 Scan_Ptr := Scan_Ptr + 5;
1108 exit;
1109 end if;
1111 if Source (Scan_Ptr + 6) /= ' ' then
1112 Scan_Ptr := Scan_Ptr + 6;
1113 exit;
1114 end if;
1116 if Source (Scan_Ptr + 7) /= ' ' then
1117 Scan_Ptr := Scan_Ptr + 7;
1118 exit;
1119 end if;
1121 Scan_Ptr := Scan_Ptr + 8;
1122 end loop;
1124 -- We are now at a non-blank character, which is the first character
1125 -- of the token we will scan, and hence the value of Token_Ptr.
1127 Token_Ptr := Scan_Ptr;
1129 -- Here begins the main case statement which transfers control on the
1130 -- basis of the non-blank character we have encountered.
1132 case Source (Scan_Ptr) is
1134 -- Line terminator characters
1136 when CR | LF | FF | VT =>
1137 goto Scan_Line_Terminator;
1139 -- Horizontal tab, just skip past it
1141 when HT =>
1142 if Style_Check then Style.Check_HT; end if;
1143 Scan_Ptr := Scan_Ptr + 1;
1145 -- End of file character, treated as an end of file only if it is
1146 -- the last character in the buffer, otherwise it is ignored.
1148 when EOF =>
1149 if Scan_Ptr = Source_Last (Current_Source_File) then
1150 Check_End_Of_Line;
1151 if Style_Check then Style.Check_EOF; end if;
1152 Token := Tok_EOF;
1153 return;
1154 else
1155 Scan_Ptr := Scan_Ptr + 1;
1156 end if;
1158 -- Ampersand
1160 when '&' =>
1161 Accumulate_Checksum ('&');
1163 if Source (Scan_Ptr + 1) = '&' then
1164 Error_Msg_S ("'&'& should be `AND THEN`");
1165 Scan_Ptr := Scan_Ptr + 2;
1166 Token := Tok_And;
1167 return;
1169 else
1170 Scan_Ptr := Scan_Ptr + 1;
1171 Token := Tok_Ampersand;
1172 return;
1173 end if;
1175 -- Asterisk (can be multiplication operator or double asterisk which
1176 -- is the exponentiation compound delimiter).
1178 when '*' =>
1179 Accumulate_Checksum ('*');
1181 if Source (Scan_Ptr + 1) = '*' then
1182 Accumulate_Checksum ('*');
1183 Scan_Ptr := Scan_Ptr + 2;
1184 Token := Tok_Double_Asterisk;
1185 return;
1187 else
1188 Scan_Ptr := Scan_Ptr + 1;
1189 Token := Tok_Asterisk;
1190 return;
1191 end if;
1193 -- Colon, which can either be an isolated colon, or part of an
1194 -- assignment compound delimiter.
1196 when ':' =>
1197 Accumulate_Checksum (':');
1199 if Double_Char_Token ('=') then
1200 Token := Tok_Colon_Equal;
1201 if Style_Check then Style.Check_Colon_Equal; end if;
1202 return;
1204 elsif Source (Scan_Ptr + 1) = '-'
1205 and then Source (Scan_Ptr + 2) /= '-'
1206 then
1207 Token := Tok_Colon_Equal;
1208 Error_Msg (":- should be :=", Scan_Ptr);
1209 Scan_Ptr := Scan_Ptr + 2;
1210 return;
1212 else
1213 Scan_Ptr := Scan_Ptr + 1;
1214 Token := Tok_Colon;
1215 if Style_Check then Style.Check_Colon; end if;
1216 return;
1217 end if;
1219 -- Left parenthesis
1221 when '(' =>
1222 Accumulate_Checksum ('(');
1223 Scan_Ptr := Scan_Ptr + 1;
1224 Token := Tok_Left_Paren;
1225 if Style_Check then Style.Check_Left_Paren; end if;
1226 return;
1228 -- Left bracket
1230 when '[' =>
1231 if Source (Scan_Ptr + 1) = '"' then
1232 goto Scan_Wide_Character;
1234 else
1235 Error_Msg_S ("illegal character, replaced by ""(""");
1236 Scan_Ptr := Scan_Ptr + 1;
1237 Token := Tok_Left_Paren;
1238 return;
1239 end if;
1241 -- Left brace
1243 when '{' =>
1244 Error_Msg_S ("illegal character, replaced by ""(""");
1245 Scan_Ptr := Scan_Ptr + 1;
1246 Token := Tok_Left_Paren;
1247 return;
1249 -- Comma
1251 when ',' =>
1252 Accumulate_Checksum (',');
1253 Scan_Ptr := Scan_Ptr + 1;
1254 Token := Tok_Comma;
1255 if Style_Check then Style.Check_Comma; end if;
1256 return;
1258 -- Dot, which is either an isolated period, or part of a double dot
1259 -- compound delimiter sequence. We also check for the case of a
1260 -- digit following the period, to give a better error message.
1262 when '.' =>
1263 Accumulate_Checksum ('.');
1265 if Double_Char_Token ('.') then
1266 Token := Tok_Dot_Dot;
1267 if Style_Check then Style.Check_Dot_Dot; end if;
1268 return;
1270 elsif Source (Scan_Ptr + 1) in '0' .. '9' then
1271 Error_Msg_S ("numeric literal cannot start with point");
1272 Scan_Ptr := Scan_Ptr + 1;
1274 else
1275 Scan_Ptr := Scan_Ptr + 1;
1276 Token := Tok_Dot;
1277 return;
1278 end if;
1280 -- Equal, which can either be an equality operator, or part of the
1281 -- arrow (=>) compound delimiter.
1283 when '=' =>
1284 Accumulate_Checksum ('=');
1286 if Double_Char_Token ('>') then
1287 Token := Tok_Arrow;
1288 if Style_Check then Style.Check_Arrow; end if;
1289 return;
1291 elsif Source (Scan_Ptr + 1) = '=' then
1292 Error_Msg_S ("== should be =");
1293 Scan_Ptr := Scan_Ptr + 1;
1294 end if;
1296 Scan_Ptr := Scan_Ptr + 1;
1297 Token := Tok_Equal;
1298 return;
1300 -- Greater than, which can be a greater than operator, greater than
1301 -- or equal operator, or first character of a right label bracket.
1303 when '>' =>
1304 Accumulate_Checksum ('>');
1306 if Double_Char_Token ('=') then
1307 Token := Tok_Greater_Equal;
1308 return;
1310 elsif Double_Char_Token ('>') then
1311 Token := Tok_Greater_Greater;
1312 return;
1314 else
1315 Scan_Ptr := Scan_Ptr + 1;
1316 Token := Tok_Greater;
1317 return;
1318 end if;
1320 -- Less than, which can be a less than operator, less than or equal
1321 -- operator, or the first character of a left label bracket, or the
1322 -- first character of a box (<>) compound delimiter.
1324 when '<' =>
1325 Accumulate_Checksum ('<');
1327 if Double_Char_Token ('=') then
1328 Token := Tok_Less_Equal;
1329 return;
1331 elsif Double_Char_Token ('>') then
1332 Token := Tok_Box;
1333 if Style_Check then Style.Check_Box; end if;
1334 return;
1336 elsif Double_Char_Token ('<') then
1337 Token := Tok_Less_Less;
1338 return;
1340 else
1341 Scan_Ptr := Scan_Ptr + 1;
1342 Token := Tok_Less;
1343 return;
1344 end if;
1346 -- Minus, which is either a subtraction operator, or the first
1347 -- character of double minus starting a comment
1349 when '-' => Minus_Case : begin
1350 if Source (Scan_Ptr + 1) = '>' then
1351 Error_Msg_S ("invalid token");
1352 Scan_Ptr := Scan_Ptr + 2;
1353 Token := Tok_Arrow;
1354 return;
1356 elsif Source (Scan_Ptr + 1) /= '-' then
1357 Accumulate_Checksum ('-');
1358 Scan_Ptr := Scan_Ptr + 1;
1359 Token := Tok_Minus;
1360 return;
1362 -- Comment
1364 else -- Source (Scan_Ptr + 1) = '-' then
1365 if Style_Check then Style.Check_Comment; end if;
1366 Scan_Ptr := Scan_Ptr + 2;
1368 -- If we are in preprocessor mode with Replace_In_Comments set,
1369 -- then we return the "--" as a token on its own.
1371 if Replace_In_Comments then
1372 Token := Tok_Comment;
1373 return;
1374 end if;
1376 -- Otherwise scan out the comment
1378 Start_Of_Comment := Scan_Ptr;
1380 -- Loop to scan comment (this loop runs more than once only if
1381 -- a horizontal tab or other non-graphic character is scanned)
1383 loop
1384 -- Scan to non graphic character (opened up for speed)
1386 -- Note that we just eat left brackets, which means that
1387 -- bracket notation cannot be used for end of line
1388 -- characters in comments. This seems a reasonable choice,
1389 -- since no one would ever use brackets notation in a real
1390 -- program in this situation, and if we allow brackets
1391 -- notation, we forbid some valid comments which contain a
1392 -- brackets sequence that happens to match an end of line
1393 -- character.
1395 loop
1396 exit when Source (Scan_Ptr) not in Graphic_Character;
1397 Scan_Ptr := Scan_Ptr + 1;
1398 exit when Source (Scan_Ptr) not in Graphic_Character;
1399 Scan_Ptr := Scan_Ptr + 1;
1400 exit when Source (Scan_Ptr) not in Graphic_Character;
1401 Scan_Ptr := Scan_Ptr + 1;
1402 exit when Source (Scan_Ptr) not in Graphic_Character;
1403 Scan_Ptr := Scan_Ptr + 1;
1404 exit when Source (Scan_Ptr) not in Graphic_Character;
1405 Scan_Ptr := Scan_Ptr + 1;
1406 end loop;
1408 -- Keep going if horizontal tab
1410 if Source (Scan_Ptr) = HT then
1411 if Style_Check then Style.Check_HT; end if;
1412 Scan_Ptr := Scan_Ptr + 1;
1414 -- Terminate scan of comment if line terminator
1416 elsif Source (Scan_Ptr) in Line_Terminator then
1417 exit;
1419 -- Terminate scan of comment if end of file encountered
1420 -- (embedded EOF character or real last character in file)
1422 elsif Source (Scan_Ptr) = EOF then
1423 exit;
1425 -- If we have a wide character, we have to scan it out,
1426 -- because it might be a legitimate line terminator
1428 elsif (Source (Scan_Ptr) = ESC
1429 and then Identifier_Char (ESC))
1430 or else
1431 (Source (Scan_Ptr) in Upper_Half_Character
1432 and then Upper_Half_Encoding)
1433 then
1434 declare
1435 Wptr : constant Source_Ptr := Scan_Ptr;
1436 Code : Char_Code;
1437 Err : Boolean;
1439 begin
1440 Scan_Wide (Source, Scan_Ptr, Code, Err);
1442 -- If not well formed wide character, then just skip
1443 -- past it and ignore it.
1445 if Err then
1446 Scan_Ptr := Wptr + 1;
1448 -- If UTF_32 terminator, terminate comment scan
1450 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1451 Scan_Ptr := Wptr;
1452 exit;
1453 end if;
1454 end;
1456 -- Keep going if character in 80-FF range, or is ESC. These
1457 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1458 -- They are allowed even in Ada 83 mode according to the
1459 -- approved AI. ESC was added to the AI in June 93.
1461 elsif Source (Scan_Ptr) in Upper_Half_Character
1462 or else Source (Scan_Ptr) = ESC
1463 then
1464 Scan_Ptr := Scan_Ptr + 1;
1466 -- Otherwise we have an illegal comment character
1468 else
1469 Error_Illegal_Character;
1470 end if;
1471 end loop;
1473 -- Note that, except when comments are tokens, we do NOT
1474 -- execute a return here, instead we fall through to reexecute
1475 -- the scan loop to look for a token.
1477 if Comment_Is_Token then
1478 Name_Len := Integer (Scan_Ptr - Start_Of_Comment);
1479 Name_Buffer (1 .. Name_Len) :=
1480 String (Source (Start_Of_Comment .. Scan_Ptr - 1));
1481 Comment_Id := Name_Find;
1482 Token := Tok_Comment;
1483 return;
1484 end if;
1485 end if;
1486 end Minus_Case;
1488 -- Double quote starting a string literal
1490 when '"' =>
1491 Slit;
1492 Post_Scan;
1493 return;
1495 -- Percent starting a string literal
1497 when '%' =>
1498 Obsolescent_Check (Token_Ptr);
1500 if Warn_On_Obsolescent_Feature then
1501 Error_Msg_S
1502 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1503 Error_Msg_S
1504 ("\use """""" instead?");
1505 end if;
1507 Slit;
1508 Post_Scan;
1509 return;
1511 -- Apostrophe. This can either be the start of a character literal,
1512 -- or an isolated apostrophe used in a qualified expression or an
1513 -- attribute. We treat it as a character literal if it does not
1514 -- follow a right parenthesis, identifier, the keyword ALL or
1515 -- a literal. This means that we correctly treat constructs like:
1517 -- A := CHARACTER'('A');
1519 -- Note that RM-2.2(7) does not require a separator between
1520 -- "CHARACTER" and "'" in the above.
1522 when ''' => Char_Literal_Case : declare
1523 Code : Char_Code;
1524 Err : Boolean;
1526 begin
1527 Accumulate_Checksum (''');
1528 Scan_Ptr := Scan_Ptr + 1;
1530 -- Here is where we make the test to distinguish the cases. Treat
1531 -- as apostrophe if previous token is an identifier, right paren
1532 -- or the reserved word "all" (latter case as in A.all'Address)
1533 -- (or the reserved word "project" in project files). Also treat
1534 -- it as apostrophe after a literal (this catches some legitimate
1535 -- cases, like A."abs"'Address, and also gives better error
1536 -- behavior for impossible cases like 123'xxx).
1538 if Prev_Token = Tok_Identifier
1539 or else Prev_Token = Tok_Right_Paren
1540 or else Prev_Token = Tok_All
1541 or else Prev_Token = Tok_Project
1542 or else Prev_Token in Token_Class_Literal
1543 then
1544 Token := Tok_Apostrophe;
1545 if Style_Check then Style.Check_Apostrophe; end if;
1546 return;
1548 -- Otherwise the apostrophe starts a character literal
1550 else
1551 -- Case of wide character literal
1553 if (Source (Scan_Ptr) = ESC
1554 and then
1555 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
1556 or else
1557 (Source (Scan_Ptr) in Upper_Half_Character
1558 and then
1559 Upper_Half_Encoding)
1560 or else
1561 (Source (Scan_Ptr) = '['
1562 and then
1563 Source (Scan_Ptr + 1) = '"')
1564 then
1565 Wptr := Scan_Ptr;
1566 Scan_Wide (Source, Scan_Ptr, Code, Err);
1567 Accumulate_Checksum (Code);
1569 if Err then
1570 Error_Illegal_Wide_Character;
1571 Code := Character'Pos (' ');
1573 -- In Ada 95 mode we allow any wide character in a character
1574 -- literal, but in Ada 2005, the set of characters allowed
1575 -- is restricted to graphic characters.
1577 elsif Ada_Version >= Ada_05
1578 and then Is_UTF_32_Non_Graphic (UTF_32 (Code))
1579 then
1580 Error_Msg
1581 ("(Ada 2005) non-graphic character not permitted " &
1582 "in character literal", Wptr);
1583 end if;
1585 if Source (Scan_Ptr) /= ''' then
1586 Error_Msg_S ("missing apostrophe");
1587 else
1588 Scan_Ptr := Scan_Ptr + 1;
1589 end if;
1591 -- If we do not find a closing quote in the expected place then
1592 -- assume that we have a misguided attempt at a string literal.
1594 -- However, if previous token is RANGE, then we return an
1595 -- apostrophe instead since this gives better error recovery
1597 elsif Source (Scan_Ptr + 1) /= ''' then
1598 if Prev_Token = Tok_Range then
1599 Token := Tok_Apostrophe;
1600 return;
1602 else
1603 Scan_Ptr := Scan_Ptr - 1;
1604 Error_Msg_S
1605 ("strings are delimited by double quote character");
1606 Slit;
1607 Post_Scan;
1608 return;
1609 end if;
1611 -- Otherwise we have a (non-wide) character literal
1613 else
1614 Accumulate_Checksum (Source (Scan_Ptr));
1616 if Source (Scan_Ptr) not in Graphic_Character then
1617 if Source (Scan_Ptr) in Upper_Half_Character then
1618 if Ada_Version = Ada_83 then
1619 Error_Illegal_Character;
1620 end if;
1622 else
1623 Error_Illegal_Character;
1624 end if;
1625 end if;
1627 Code := Get_Char_Code (Source (Scan_Ptr));
1628 Scan_Ptr := Scan_Ptr + 2;
1629 end if;
1631 -- Fall through here with Scan_Ptr updated past the closing
1632 -- quote, and Code set to the Char_Code value for the literal
1634 Accumulate_Checksum (''');
1635 Token := Tok_Char_Literal;
1636 Set_Character_Literal_Name (Code);
1637 Token_Name := Name_Find;
1638 Character_Code := Code;
1639 Post_Scan;
1640 return;
1641 end if;
1642 end Char_Literal_Case;
1644 -- Right parenthesis
1646 when ')' =>
1647 Accumulate_Checksum (')');
1648 Scan_Ptr := Scan_Ptr + 1;
1649 Token := Tok_Right_Paren;
1650 if Style_Check then Style.Check_Right_Paren; end if;
1651 return;
1653 -- Right bracket or right brace, treated as right paren
1655 when ']' | '}' =>
1656 Error_Msg_S ("illegal character, replaced by "")""");
1657 Scan_Ptr := Scan_Ptr + 1;
1658 Token := Tok_Right_Paren;
1659 return;
1661 -- Slash (can be division operator or first character of not equal)
1663 when '/' =>
1664 Accumulate_Checksum ('/');
1666 if Double_Char_Token ('=') then
1667 Token := Tok_Not_Equal;
1668 return;
1669 else
1670 Scan_Ptr := Scan_Ptr + 1;
1671 Token := Tok_Slash;
1672 return;
1673 end if;
1675 -- Semicolon
1677 when ';' =>
1678 Accumulate_Checksum (';');
1679 Scan_Ptr := Scan_Ptr + 1;
1680 Token := Tok_Semicolon;
1681 if Style_Check then Style.Check_Semicolon; end if;
1682 return;
1684 -- Vertical bar
1686 when '|' => Vertical_Bar_Case : begin
1687 Accumulate_Checksum ('|');
1689 -- Special check for || to give nice message
1691 if Source (Scan_Ptr + 1) = '|' then
1692 Error_Msg_S ("""'|'|"" should be `OR ELSE`");
1693 Scan_Ptr := Scan_Ptr + 2;
1694 Token := Tok_Or;
1695 return;
1697 else
1698 Scan_Ptr := Scan_Ptr + 1;
1699 Token := Tok_Vertical_Bar;
1700 if Style_Check then Style.Check_Vertical_Bar; end if;
1701 return;
1702 end if;
1703 end Vertical_Bar_Case;
1705 -- Exclamation, replacement character for vertical bar
1707 when '!' => Exclamation_Case : begin
1708 Accumulate_Checksum ('!');
1709 Obsolescent_Check (Token_Ptr);
1711 if Warn_On_Obsolescent_Feature then
1712 Error_Msg_S
1713 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1714 Error_Msg_S
1715 ("\use ""'|"" instead?");
1716 end if;
1718 if Source (Scan_Ptr + 1) = '=' then
1719 Error_Msg_S ("'!= should be /=");
1720 Scan_Ptr := Scan_Ptr + 2;
1721 Token := Tok_Not_Equal;
1722 return;
1724 else
1725 Scan_Ptr := Scan_Ptr + 1;
1726 Token := Tok_Vertical_Bar;
1727 return;
1728 end if;
1729 end Exclamation_Case;
1731 -- Plus
1733 when '+' => Plus_Case : begin
1734 Accumulate_Checksum ('+');
1735 Scan_Ptr := Scan_Ptr + 1;
1736 Token := Tok_Plus;
1737 return;
1738 end Plus_Case;
1740 -- Digits starting a numeric literal
1742 when '0' .. '9' =>
1744 -- First a bit of a scan ahead to see if we have a case of an
1745 -- identifier starting with a digit (remembering exponent case).
1747 declare
1748 C : constant Character := Source (Scan_Ptr + 1);
1750 begin
1751 -- OK literal if digit followed by digit or underscore
1753 if C in '0' .. '9' or else C = '_' then
1754 null;
1756 -- OK literal if digit not followed by identifier char
1758 elsif not Identifier_Char (C) then
1759 null;
1761 -- OK literal if digit followed by e/E followed by digit/sign.
1762 -- We also allow underscore after the E, which is an error, but
1763 -- better handled by Nlit than deciding this is an identifier.
1765 elsif (C = 'e' or else C = 'E')
1766 and then (Source (Scan_Ptr + 2) in '0' .. '9'
1767 or else Source (Scan_Ptr + 2) = '+'
1768 or else Source (Scan_Ptr + 2) = '-'
1769 or else Source (Scan_Ptr + 2) = '_')
1770 then
1771 null;
1773 -- Here we have what really looks like an identifier that
1774 -- starts with a digit, so give error msg.
1776 else
1777 Error_Msg_S ("identifier may not start with digit");
1778 Name_Len := 1;
1779 Underline_Found := False;
1780 Name_Buffer (1) := Source (Scan_Ptr);
1781 Accumulate_Checksum (Name_Buffer (1));
1782 Scan_Ptr := Scan_Ptr + 1;
1783 goto Scan_Identifier;
1784 end if;
1785 end;
1787 -- Here we have an OK integer literal
1789 Nlit;
1791 if Identifier_Char (Source (Scan_Ptr)) then
1792 Error_Msg_S
1793 ("delimiter required between literal and identifier");
1794 end if;
1796 Post_Scan;
1797 return;
1799 -- Lower case letters
1801 when 'a' .. 'z' =>
1802 Name_Len := 1;
1803 Underline_Found := False;
1804 Name_Buffer (1) := Source (Scan_Ptr);
1805 Accumulate_Checksum (Name_Buffer (1));
1806 Scan_Ptr := Scan_Ptr + 1;
1807 goto Scan_Identifier;
1809 -- Upper case letters
1811 when 'A' .. 'Z' =>
1812 Name_Len := 1;
1813 Underline_Found := False;
1814 Name_Buffer (1) :=
1815 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
1816 Accumulate_Checksum (Name_Buffer (1));
1817 Scan_Ptr := Scan_Ptr + 1;
1818 goto Scan_Identifier;
1820 -- Underline character
1822 when '_' =>
1823 if Special_Characters ('_') then
1824 Token_Ptr := Scan_Ptr;
1825 Scan_Ptr := Scan_Ptr + 1;
1826 Token := Tok_Special;
1827 Special_Character := '_';
1828 return;
1829 end if;
1831 Error_Msg_S ("identifier cannot start with underline");
1832 Name_Len := 1;
1833 Name_Buffer (1) := '_';
1834 Scan_Ptr := Scan_Ptr + 1;
1835 Underline_Found := False;
1836 goto Scan_Identifier;
1838 -- Space (not possible, because we scanned past blanks)
1840 when ' ' =>
1841 raise Program_Error;
1843 -- Characters in top half of ASCII 8-bit chart
1845 when Upper_Half_Character =>
1847 -- Wide character case
1849 if Upper_Half_Encoding then
1850 goto Scan_Wide_Character;
1852 -- Otherwise we have OK Latin-1 character
1854 else
1855 -- Upper half characters may possibly be identifier letters
1856 -- but can never be digits, so Identifier_Char can be used to
1857 -- test for a valid start of identifier character.
1859 if Identifier_Char (Source (Scan_Ptr)) then
1860 Name_Len := 0;
1861 Underline_Found := False;
1862 goto Scan_Identifier;
1863 else
1864 Error_Illegal_Character;
1865 end if;
1866 end if;
1868 when ESC =>
1870 -- ESC character, possible start of identifier if wide characters
1871 -- using ESC encoding are allowed in identifiers, which we can
1872 -- tell by looking at the Identifier_Char flag for ESC, which is
1873 -- only true if these conditions are met. In Ada 2005 mode, may
1874 -- also be valid UTF_32 space or line terminator character.
1876 if Identifier_Char (ESC) then
1877 Name_Len := 0;
1878 goto Scan_Wide_Character;
1879 else
1880 Error_Illegal_Character;
1881 end if;
1883 -- Invalid control characters
1885 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII.SO |
1886 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1887 EM | FS | GS | RS | US | DEL
1889 Error_Illegal_Character;
1891 -- Invalid graphic characters
1893 when '#' | '$' | '?' | '@' | '`' | '\' | '^' | '~' =>
1895 -- If Set_Special_Character has been called for this character,
1896 -- set Scans.Special_Character and return a Special token.
1898 if Special_Characters (Source (Scan_Ptr)) then
1899 Token_Ptr := Scan_Ptr;
1900 Token := Tok_Special;
1901 Special_Character := Source (Scan_Ptr);
1902 Scan_Ptr := Scan_Ptr + 1;
1903 return;
1905 -- Otherwise, this is an illegal character
1907 else
1908 Error_Illegal_Character;
1909 end if;
1911 -- End switch on non-blank character
1913 end case;
1915 -- End loop past format effectors. The exit from this loop is by
1916 -- executing a return statement following completion of token scan
1917 -- (control never falls out of this loop to the code which follows)
1919 end loop;
1921 -- Wide_Character scanning routine. On entry we have encountered the
1922 -- initial character of a wide character sequence.
1924 <<Scan_Wide_Character>>
1926 declare
1927 Code : Char_Code;
1928 Cat : Category;
1929 Err : Boolean;
1931 begin
1932 Wptr := Scan_Ptr;
1933 Scan_Wide (Source, Scan_Ptr, Code, Err);
1935 -- If bad wide character, signal error and continue scan
1937 if Err then
1938 Error_Illegal_Wide_Character;
1939 goto Scan_Next_Character;
1940 end if;
1942 Cat := Get_Category (UTF_32 (Code));
1944 -- If OK letter, reset scan ptr and go scan identifier
1946 if Is_UTF_32_Letter (Cat) then
1947 Scan_Ptr := Wptr;
1948 Name_Len := 0;
1949 Underline_Found := False;
1950 goto Scan_Identifier;
1952 -- If OK wide space, ignore and keep scanning (we do not include
1953 -- any ignored spaces in checksum)
1955 elsif Is_UTF_32_Space (Cat) then
1956 goto Scan_Next_Character;
1958 -- If OK wide line terminator, terminate current line
1960 elsif Is_UTF_32_Line_Terminator (UTF_32 (Code)) then
1961 Scan_Ptr := Wptr;
1962 goto Scan_Line_Terminator;
1964 -- Punctuation is an error (at start of identifier)
1966 elsif Is_UTF_32_Punctuation (Cat) then
1967 Error_Msg
1968 ("identifier cannot start with punctuation", Wptr);
1969 Scan_Ptr := Wptr;
1970 Name_Len := 0;
1971 Underline_Found := False;
1972 goto Scan_Identifier;
1974 -- Mark character is an error (at start of identifer)
1976 elsif Is_UTF_32_Mark (Cat) then
1977 Error_Msg
1978 ("identifier cannot start with mark character", Wptr);
1979 Scan_Ptr := Wptr;
1980 Name_Len := 0;
1981 Underline_Found := False;
1982 goto Scan_Identifier;
1984 -- Other format character is an error (at start of identifer)
1986 elsif Is_UTF_32_Other (Cat) then
1987 Error_Msg
1988 ("identifier cannot start with other format character", Wptr);
1989 Scan_Ptr := Wptr;
1990 Name_Len := 0;
1991 Underline_Found := False;
1992 goto Scan_Identifier;
1994 -- Extended digit character is an error. Could be bad start of
1995 -- identifier or bad literal. Not worth doing too much to try to
1996 -- distinguish these cases, but we will do a little bit.
1998 elsif Is_UTF_32_Digit (Cat) then
1999 Error_Msg
2000 ("identifier cannot start with digit character", Wptr);
2001 Scan_Ptr := Wptr;
2002 Name_Len := 0;
2003 Underline_Found := False;
2004 goto Scan_Identifier;
2006 -- All other wide characters are illegal here
2008 else
2009 Error_Illegal_Wide_Character;
2010 goto Scan_Next_Character;
2011 end if;
2012 end;
2014 -- Routine to scan line terminator. On entry Scan_Ptr points to a
2015 -- character which is one of FF,LR,CR,VT, or one of the wide characters
2016 -- that is treated as a line termiantor.
2018 <<Scan_Line_Terminator>>
2020 -- Check line too long
2022 Check_End_Of_Line;
2024 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2025 -- a physical line.
2027 if End_Of_Line_Is_Token then
2028 Token_Ptr := Scan_Ptr;
2029 end if;
2031 declare
2032 Physical : Boolean;
2034 begin
2035 Skip_Line_Terminators (Scan_Ptr, Physical);
2037 -- If we are at start of physical line, update scan pointers to
2038 -- reflect the start of the new line.
2040 if Physical then
2041 Current_Line_Start := Scan_Ptr;
2042 Start_Column := Set_Start_Column;
2043 First_Non_Blank_Location := Scan_Ptr;
2045 -- If End_Of_Line is a token, we return it as it is a
2046 -- physical line.
2048 if End_Of_Line_Is_Token then
2049 Token := Tok_End_Of_Line;
2050 return;
2051 end if;
2052 end if;
2053 end;
2055 goto Scan_Next_Character;
2057 -- Identifier scanning routine. On entry, some initial characters of
2058 -- the identifier may have already been stored in Name_Buffer. If so,
2059 -- Name_Len has the number of characters stored. otherwise Name_Len is
2060 -- set to zero on entry. Underline_Found is also set False on entry.
2062 <<Scan_Identifier>>
2064 -- This loop scans as fast as possible past lower half letters and
2065 -- digits, which we expect to be the most common characters.
2067 loop
2068 if Source (Scan_Ptr) in 'a' .. 'z'
2069 or else Source (Scan_Ptr) in '0' .. '9'
2070 then
2071 Name_Buffer (Name_Len + 1) := Source (Scan_Ptr);
2072 Accumulate_Checksum (Source (Scan_Ptr));
2074 elsif Source (Scan_Ptr) in 'A' .. 'Z' then
2075 Name_Buffer (Name_Len + 1) :=
2076 Character'Val (Character'Pos (Source (Scan_Ptr)) + 32);
2077 Accumulate_Checksum (Name_Buffer (Name_Len + 1));
2079 else
2080 exit;
2081 end if;
2083 Underline_Found := False;
2084 Scan_Ptr := Scan_Ptr + 1;
2085 Name_Len := Name_Len + 1;
2086 end loop;
2088 -- If we fall through, then we have encountered either an underline
2089 -- character, or an extended identifier character (i.e. one from the
2090 -- upper half), or a wide character, or an identifier terminator. The
2091 -- initial test speeds us up in the most common case where we have
2092 -- an identifier terminator. Note that ESC is an identifier character
2093 -- only if a wide character encoding method that uses ESC encoding
2094 -- is active, so if we find an ESC character we know that we have a
2095 -- wide character.
2097 if Identifier_Char (Source (Scan_Ptr)) then
2099 -- Case of underline
2101 if Source (Scan_Ptr) = '_' then
2102 Accumulate_Checksum ('_');
2104 if Underline_Found then
2105 Error_No_Double_Underline;
2106 else
2107 Underline_Found := True;
2108 Name_Len := Name_Len + 1;
2109 Name_Buffer (Name_Len) := '_';
2110 end if;
2112 Scan_Ptr := Scan_Ptr + 1;
2113 goto Scan_Identifier;
2115 -- Upper half character
2117 elsif Source (Scan_Ptr) in Upper_Half_Character
2118 and then not Upper_Half_Encoding
2119 then
2120 Accumulate_Checksum (Source (Scan_Ptr));
2121 Store_Encoded_Character
2122 (Get_Char_Code (Fold_Lower (Source (Scan_Ptr))));
2123 Scan_Ptr := Scan_Ptr + 1;
2124 Underline_Found := False;
2125 goto Scan_Identifier;
2127 -- Left bracket not followed by a quote terminates an identifier.
2128 -- This is an error, but we don't want to give a junk error msg
2129 -- about wide characters in this case!
2131 elsif Source (Scan_Ptr) = '['
2132 and then Source (Scan_Ptr + 1) /= '"'
2133 then
2134 null;
2136 -- We know we have a wide character encoding here (the current
2137 -- character is either ESC, left bracket, or an upper half
2138 -- character depending on the encoding method).
2140 else
2141 -- Scan out the wide character and insert the appropriate
2142 -- encoding into the name table entry for the identifier.
2144 declare
2145 Code : Char_Code;
2146 Err : Boolean;
2147 Chr : Character;
2148 Cat : Category;
2150 begin
2151 Wptr := Scan_Ptr;
2152 Scan_Wide (Source, Scan_Ptr, Code, Err);
2154 -- If error, signal error
2156 if Err then
2157 Error_Illegal_Wide_Character;
2159 -- If the character scanned is a normal identifier
2160 -- character, then we treat it that way.
2162 elsif In_Character_Range (Code)
2163 and then Identifier_Char (Get_Character (Code))
2164 then
2165 Chr := Get_Character (Code);
2166 Accumulate_Checksum (Chr);
2167 Store_Encoded_Character
2168 (Get_Char_Code (Fold_Lower (Chr)));
2169 Underline_Found := False;
2171 -- Here if not a normal identifier character
2173 else
2174 -- Make sure we are allowing wide characters in
2175 -- identifiers. Note that we allow wide character
2176 -- notation for an OK identifier character. This in
2177 -- particular allows bracket or other notation to be
2178 -- used for upper half letters.
2180 -- Wide characters are always allowed in Ada 2005
2182 if Identifier_Character_Set /= 'w'
2183 and then Ada_Version < Ada_05
2184 then
2185 Error_Msg
2186 ("wide character not allowed in identifier", Wptr);
2187 end if;
2189 Cat := Get_Category (UTF_32 (Code));
2191 -- If OK letter, store it folding to upper case. Note
2192 -- that we include the folded letter in the checksum.
2194 if Is_UTF_32_Letter (Cat) then
2195 Code :=
2196 Char_Code (UTF_32_To_Upper_Case (UTF_32 (Code)));
2197 Accumulate_Checksum (Code);
2198 Store_Encoded_Character (Code);
2199 Underline_Found := False;
2201 -- If OK extended digit or mark, then store it
2203 elsif Is_UTF_32_Digit (Cat)
2204 or else Is_UTF_32_Mark (Cat)
2205 then
2206 Accumulate_Checksum (Code);
2207 Store_Encoded_Character (Code);
2208 Underline_Found := False;
2210 -- Wide punctuation is also stored, but counts as an
2211 -- underline character for error checking purposes.
2213 elsif Is_UTF_32_Punctuation (Cat) then
2214 Accumulate_Checksum (Code);
2216 if Underline_Found then
2217 declare
2218 Cend : constant Source_Ptr := Scan_Ptr;
2219 begin
2220 Scan_Ptr := Wptr;
2221 Error_No_Double_Underline;
2222 Scan_Ptr := Cend;
2223 end;
2225 else
2226 Store_Encoded_Character (Code);
2227 Underline_Found := True;
2228 end if;
2230 -- Wide character in Unicode cateogory "Other, Format"
2231 -- is accepted in an identifier, but is ignored and not
2232 -- stored. It seems reasonable to exclude it from the
2233 -- checksum.
2235 -- Note that it is correct (see AI-395) to simply strip
2236 -- other format characters, before testing for double
2237 -- underlines, or for reserved words).
2239 elsif Is_UTF_32_Other (Cat) then
2240 null;
2242 -- Wide character in category Separator,Space terminates
2244 elsif Is_UTF_32_Space (Cat) then
2245 goto Scan_Identifier_Complete;
2247 -- Any other wide character is not acceptable
2249 else
2250 Error_Msg
2251 ("invalid wide character in identifier", Wptr);
2252 end if;
2253 end if;
2255 goto Scan_Identifier;
2256 end;
2257 end if;
2258 end if;
2260 -- Scan of identifier is complete. The identifier is stored in
2261 -- Name_Buffer, and Scan_Ptr points past the last character.
2263 <<Scan_Identifier_Complete>>
2264 Token_Name := Name_Find;
2266 -- Check for identifier ending with underline or punctuation char
2268 if Underline_Found then
2269 Underline_Found := False;
2271 if Source (Scan_Ptr - 1) = '_' then
2272 Error_Msg
2273 ("identifier cannot end with underline", Scan_Ptr - 1);
2274 else
2275 Error_Msg
2276 ("identifier cannot end with punctuation character", Wptr);
2277 end if;
2278 end if;
2280 -- Here is where we check if it was a keyword
2282 if Is_Keyword_Name (Token_Name) then
2283 Token := Token_Type'Val (Get_Name_Table_Byte (Token_Name));
2285 -- Deal with possible style check for non-lower case keyword, but
2286 -- we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2287 -- this purpose if they appear as attribute designators. Actually
2288 -- we only check the first character for speed.
2290 -- Ada 2005 (AI-284): Do not apply the style check in case of
2291 -- "pragma Interface"
2293 -- Ada 2005 (AI-340): Do not apply the style check in case of
2294 -- MOD attribute.
2296 if Style_Check
2297 and then Source (Token_Ptr) <= 'Z'
2298 and then (Prev_Token /= Tok_Apostrophe
2299 or else
2300 (Token /= Tok_Access and then
2301 Token /= Tok_Delta and then
2302 Token /= Tok_Digits and then
2303 Token /= Tok_Mod and then
2304 Token /= Tok_Range))
2305 and then (Token /= Tok_Interface
2306 or else
2307 (Token = Tok_Interface
2308 and then Prev_Token /= Tok_Pragma))
2309 then
2310 Style.Non_Lower_Case_Keyword;
2311 end if;
2313 -- We must reset Token_Name since this is not an identifier and
2314 -- if we leave Token_Name set, the parser gets confused because
2315 -- it thinks it is dealing with an identifier instead of the
2316 -- corresponding keyword.
2318 Token_Name := No_Name;
2319 Accumulate_Token_Checksum;
2320 return;
2322 -- It is an identifier after all
2324 else
2325 Token := Tok_Identifier;
2326 Accumulate_Token_Checksum;
2327 Post_Scan;
2328 return;
2329 end if;
2330 end Scan;
2332 --------------------------
2333 -- Set_Comment_As_Token --
2334 --------------------------
2336 procedure Set_Comment_As_Token (Value : Boolean) is
2337 begin
2338 Comment_Is_Token := Value;
2339 end Set_Comment_As_Token;
2341 ------------------------------
2342 -- Set_End_Of_Line_As_Token --
2343 ------------------------------
2345 procedure Set_End_Of_Line_As_Token (Value : Boolean) is
2346 begin
2347 End_Of_Line_Is_Token := Value;
2348 end Set_End_Of_Line_As_Token;
2350 ---------------------------
2351 -- Set_Special_Character --
2352 ---------------------------
2354 procedure Set_Special_Character (C : Character) is
2355 begin
2356 case C is
2357 when '#' | '$' | '_' | '?' | '@' | '`' | '\' | '^' | '~' =>
2358 Special_Characters (C) := True;
2360 when others =>
2361 null;
2362 end case;
2363 end Set_Special_Character;
2365 ----------------------
2366 -- Set_Start_Column --
2367 ----------------------
2369 -- Note: it seems at first glance a little expensive to compute this value
2370 -- for every source line (since it is certainly not used for all source
2371 -- lines). On the other hand, it doesn't take much more work to skip past
2372 -- the initial white space on the line counting the columns than it would
2373 -- to scan past the white space using the standard scanning circuits.
2375 function Set_Start_Column return Column_Number is
2376 Start_Column : Column_Number := 0;
2378 begin
2379 -- Outer loop scans past horizontal tab characters
2381 Tabs_Loop : loop
2383 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2384 -- past the blanks and adjusting Start_Column to account for them.
2386 Blanks_Loop : loop
2387 if Source (Scan_Ptr) = ' ' then
2388 if Source (Scan_Ptr + 1) = ' ' then
2389 if Source (Scan_Ptr + 2) = ' ' then
2390 if Source (Scan_Ptr + 3) = ' ' then
2391 if Source (Scan_Ptr + 4) = ' ' then
2392 if Source (Scan_Ptr + 5) = ' ' then
2393 if Source (Scan_Ptr + 6) = ' ' then
2394 Scan_Ptr := Scan_Ptr + 7;
2395 Start_Column := Start_Column + 7;
2396 else
2397 Scan_Ptr := Scan_Ptr + 6;
2398 Start_Column := Start_Column + 6;
2399 exit Blanks_Loop;
2400 end if;
2401 else
2402 Scan_Ptr := Scan_Ptr + 5;
2403 Start_Column := Start_Column + 5;
2404 exit Blanks_Loop;
2405 end if;
2406 else
2407 Scan_Ptr := Scan_Ptr + 4;
2408 Start_Column := Start_Column + 4;
2409 exit Blanks_Loop;
2410 end if;
2411 else
2412 Scan_Ptr := Scan_Ptr + 3;
2413 Start_Column := Start_Column + 3;
2414 exit Blanks_Loop;
2415 end if;
2416 else
2417 Scan_Ptr := Scan_Ptr + 2;
2418 Start_Column := Start_Column + 2;
2419 exit Blanks_Loop;
2420 end if;
2421 else
2422 Scan_Ptr := Scan_Ptr + 1;
2423 Start_Column := Start_Column + 1;
2424 exit Blanks_Loop;
2425 end if;
2426 else
2427 exit Blanks_Loop;
2428 end if;
2429 end loop Blanks_Loop;
2431 -- Outer loop keeps going only if a horizontal tab follows
2433 if Source (Scan_Ptr) = HT then
2434 if Style_Check then Style.Check_HT; end if;
2435 Scan_Ptr := Scan_Ptr + 1;
2436 Start_Column := (Start_Column / 8) * 8 + 8;
2437 else
2438 exit Tabs_Loop;
2439 end if;
2441 end loop Tabs_Loop;
2443 return Start_Column;
2444 end Set_Start_Column;
2446 end Scng;