1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
9 -- Copyright (C) 1992-2005 Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
25 ------------------------------------------------------------------------------
27 with Csets
; use Csets
;
28 with Err_Vars
; use Err_Vars
;
29 with Namet
; use Namet
;
31 with Scans
; use Scans
;
32 with Sinput
; use Sinput
;
33 with Snames
; use Snames
;
34 with Stringt
; use Stringt
;
35 with Stylesw
; use Stylesw
;
36 with Uintp
; use Uintp
;
37 with Urealp
; use Urealp
;
38 with Widechar
; use Widechar
;
41 with System
.WCh_Con
; use System
.WCh_Con
;
43 with GNAT
.UTF_32
; use GNAT
.UTF_32
;
48 -- Make control characters visible
50 Special_Characters
: array (Character) of Boolean := (others => False);
51 -- For characters that are Special token, the value is True
53 Comment_Is_Token
: Boolean := False;
54 -- True if comments are tokens
56 End_Of_Line_Is_Token
: Boolean := False;
57 -- True if End_Of_Line is a token
59 -----------------------
60 -- Local Subprograms --
61 -----------------------
63 procedure Accumulate_Token_Checksum
;
64 pragma Inline
(Accumulate_Token_Checksum
);
66 procedure Accumulate_Checksum
(C
: Character);
67 pragma Inline
(Accumulate_Checksum
);
68 -- This routine accumulates the checksum given character C. During the
69 -- scanning of a source file, this routine is called with every character
70 -- in the source, excluding blanks, and all control characters (except
71 -- that ESC is included in the checksum). Upper case letters not in string
72 -- literals are folded by the caller. See Sinput spec for the documentation
73 -- of the checksum algorithm. Note: checksum values are only used if we
74 -- generate code, so it is not necessary to worry about making the right
75 -- sequence of calls in any error situation.
77 procedure Accumulate_Checksum
(C
: Char_Code
);
78 pragma Inline
(Accumulate_Checksum
);
79 -- This version is identical, except that the argument, C, is a character
80 -- code value instead of a character. This is used when wide characters
81 -- are scanned. We use the character code rather than the ASCII characters
82 -- so that the checksum is independent of wide character encoding method.
84 procedure Initialize_Checksum
;
85 pragma Inline
(Initialize_Checksum
);
86 -- Initialize checksum value
88 -------------------------
89 -- Accumulate_Checksum --
90 -------------------------
92 procedure Accumulate_Checksum
(C
: Character) is
94 System
.CRC32
.Update
(System
.CRC32
.CRC32
(Checksum
), C
);
95 end Accumulate_Checksum
;
97 procedure Accumulate_Checksum
(C
: Char_Code
) is
100 Accumulate_Checksum
(Character'Val (C
/ 2 ** 16));
101 Accumulate_Checksum
(Character'Val ((C
/ 256) mod 256));
103 Accumulate_Checksum
(Character'Val (C
/ 256));
106 Accumulate_Checksum
(Character'Val (C
mod 256));
107 end Accumulate_Checksum
;
109 -------------------------------
110 -- Accumulate_Token_Checksum --
111 -------------------------------
113 procedure Accumulate_Token_Checksum
is
116 (System
.CRC32
.CRC32
(Checksum
),
117 Character'Val (Token_Type
'Pos (Token
)));
118 end Accumulate_Token_Checksum
;
120 ----------------------------
121 -- Determine_Token_Casing --
122 ----------------------------
124 function Determine_Token_Casing
return Casing_Type
is
126 return Determine_Casing
(Source
(Token_Ptr
.. Scan_Ptr
- 1));
127 end Determine_Token_Casing
;
129 -------------------------
130 -- Initialize_Checksum --
131 -------------------------
133 procedure Initialize_Checksum
is
135 System
.CRC32
.Initialize
(System
.CRC32
.CRC32
(Checksum
));
136 end Initialize_Checksum
;
138 ------------------------
139 -- Initialize_Scanner --
140 ------------------------
142 procedure Initialize_Scanner
143 (Unit
: Unit_Number_Type
;
144 Index
: Source_File_Index
)
146 procedure Set_Reserved
(N
: Name_Id
; T
: Token_Type
);
147 pragma Inline
(Set_Reserved
);
148 -- Set given name as a reserved keyword (T is the corresponding token)
154 procedure Set_Reserved
(N
: Name_Id
; T
: Token_Type
) is
156 -- Set up Token_Type values in Names Table entries for reserved
157 -- keywords We use the Pos value of the Token_Type value. Note we
158 -- rely on the fact that Token_Type'Val (0) is not a reserved word!
160 Set_Name_Table_Byte
(N
, Token_Type
'Pos (T
));
163 -- Start of processing for Initialize_Scanner
166 -- Establish reserved words
168 Set_Reserved
(Name_Abort
, Tok_Abort
);
169 Set_Reserved
(Name_Abs
, Tok_Abs
);
170 Set_Reserved
(Name_Abstract
, Tok_Abstract
);
171 Set_Reserved
(Name_Accept
, Tok_Accept
);
172 Set_Reserved
(Name_Access
, Tok_Access
);
173 Set_Reserved
(Name_And
, Tok_And
);
174 Set_Reserved
(Name_Aliased
, Tok_Aliased
);
175 Set_Reserved
(Name_All
, Tok_All
);
176 Set_Reserved
(Name_Array
, Tok_Array
);
177 Set_Reserved
(Name_At
, Tok_At
);
178 Set_Reserved
(Name_Begin
, Tok_Begin
);
179 Set_Reserved
(Name_Body
, Tok_Body
);
180 Set_Reserved
(Name_Case
, Tok_Case
);
181 Set_Reserved
(Name_Constant
, Tok_Constant
);
182 Set_Reserved
(Name_Declare
, Tok_Declare
);
183 Set_Reserved
(Name_Delay
, Tok_Delay
);
184 Set_Reserved
(Name_Delta
, Tok_Delta
);
185 Set_Reserved
(Name_Digits
, Tok_Digits
);
186 Set_Reserved
(Name_Do
, Tok_Do
);
187 Set_Reserved
(Name_Else
, Tok_Else
);
188 Set_Reserved
(Name_Elsif
, Tok_Elsif
);
189 Set_Reserved
(Name_End
, Tok_End
);
190 Set_Reserved
(Name_Entry
, Tok_Entry
);
191 Set_Reserved
(Name_Exception
, Tok_Exception
);
192 Set_Reserved
(Name_Exit
, Tok_Exit
);
193 Set_Reserved
(Name_For
, Tok_For
);
194 Set_Reserved
(Name_Function
, Tok_Function
);
195 Set_Reserved
(Name_Generic
, Tok_Generic
);
196 Set_Reserved
(Name_Goto
, Tok_Goto
);
197 Set_Reserved
(Name_If
, Tok_If
);
198 Set_Reserved
(Name_In
, Tok_In
);
199 Set_Reserved
(Name_Is
, Tok_Is
);
200 Set_Reserved
(Name_Limited
, Tok_Limited
);
201 Set_Reserved
(Name_Loop
, Tok_Loop
);
202 Set_Reserved
(Name_Mod
, Tok_Mod
);
203 Set_Reserved
(Name_New
, Tok_New
);
204 Set_Reserved
(Name_Not
, Tok_Not
);
205 Set_Reserved
(Name_Null
, Tok_Null
);
206 Set_Reserved
(Name_Of
, Tok_Of
);
207 Set_Reserved
(Name_Or
, Tok_Or
);
208 Set_Reserved
(Name_Others
, Tok_Others
);
209 Set_Reserved
(Name_Out
, Tok_Out
);
210 Set_Reserved
(Name_Package
, Tok_Package
);
211 Set_Reserved
(Name_Pragma
, Tok_Pragma
);
212 Set_Reserved
(Name_Private
, Tok_Private
);
213 Set_Reserved
(Name_Procedure
, Tok_Procedure
);
214 Set_Reserved
(Name_Protected
, Tok_Protected
);
215 Set_Reserved
(Name_Raise
, Tok_Raise
);
216 Set_Reserved
(Name_Range
, Tok_Range
);
217 Set_Reserved
(Name_Record
, Tok_Record
);
218 Set_Reserved
(Name_Rem
, Tok_Rem
);
219 Set_Reserved
(Name_Renames
, Tok_Renames
);
220 Set_Reserved
(Name_Requeue
, Tok_Requeue
);
221 Set_Reserved
(Name_Return
, Tok_Return
);
222 Set_Reserved
(Name_Reverse
, Tok_Reverse
);
223 Set_Reserved
(Name_Select
, Tok_Select
);
224 Set_Reserved
(Name_Separate
, Tok_Separate
);
225 Set_Reserved
(Name_Subtype
, Tok_Subtype
);
226 Set_Reserved
(Name_Tagged
, Tok_Tagged
);
227 Set_Reserved
(Name_Task
, Tok_Task
);
228 Set_Reserved
(Name_Terminate
, Tok_Terminate
);
229 Set_Reserved
(Name_Then
, Tok_Then
);
230 Set_Reserved
(Name_Type
, Tok_Type
);
231 Set_Reserved
(Name_Until
, Tok_Until
);
232 Set_Reserved
(Name_Use
, Tok_Use
);
233 Set_Reserved
(Name_When
, Tok_When
);
234 Set_Reserved
(Name_While
, Tok_While
);
235 Set_Reserved
(Name_With
, Tok_With
);
236 Set_Reserved
(Name_Xor
, Tok_Xor
);
238 -- Ada 2005 reserved words
240 Set_Reserved
(Name_Interface
, Tok_Interface
);
241 Set_Reserved
(Name_Overriding
, Tok_Overriding
);
242 Set_Reserved
(Name_Synchronized
, Tok_Synchronized
);
244 -- Initialize scan control variables
246 Current_Source_File
:= Index
;
247 Source
:= Source_Text
(Current_Source_File
);
248 Current_Source_Unit
:= Unit
;
249 Scan_Ptr
:= Source_First
(Current_Source_File
);
251 Token_Ptr
:= Scan_Ptr
;
252 Current_Line_Start
:= Scan_Ptr
;
254 Token_Name
:= No_Name
;
255 Start_Column
:= Set_Start_Column
;
256 First_Non_Blank_Location
:= Scan_Ptr
;
260 -- Do not call Scan, otherwise the License stuff does not work in Scn
262 end Initialize_Scanner
;
264 ------------------------------
265 -- Reset_Special_Characters --
266 ------------------------------
268 procedure Reset_Special_Characters
is
270 Special_Characters
:= (others => False);
271 end Reset_Special_Characters
;
279 Start_Of_Comment
: Source_Ptr
;
280 -- Record start of comment position
282 Underline_Found
: Boolean;
283 -- During scanning of an identifier, set to True if last character
284 -- scanned was an underline or other punctuation character. This
285 -- is used to flag the error of two underlines/punctuations in a
286 -- row or ending an identifier with a underline/punctuation. Here
287 -- punctuation means any UTF_32 character in the Unicode category
288 -- Punctuation,Connector.
291 -- Used to remember start of last wide character scanned
293 procedure Check_End_Of_Line
;
294 -- Called when end of line encountered. Checks that line is not too
295 -- long, and that other style checks for the end of line are met.
297 function Double_Char_Token
(C
: Character) return Boolean;
298 -- This function is used for double character tokens like := or <>. It
299 -- checks if the character following Source (Scan_Ptr) is C, and if so
300 -- bumps Scan_Ptr past the pair of characters and returns True. A space
301 -- between the two characters is also recognized with an appropriate
302 -- error message being issued. If C is not present, False is returned.
303 -- Note that Double_Char_Token can only be used for tokens defined in
304 -- the Ada syntax (it's use for error cases like && is not appropriate
305 -- since we do not want a junk message for a case like &-space-&).
307 procedure Error_Illegal_Character
;
308 -- Give illegal character error, Scan_Ptr points to character. On
309 -- return, Scan_Ptr is bumped past the illegal character.
311 procedure Error_Illegal_Wide_Character
;
312 -- Give illegal wide character message. On return, Scan_Ptr is bumped
313 -- past the illegal character, which may still leave us pointing to
314 -- junk, not much we can do if the escape sequence is messed up!
316 procedure Error_Long_Line
;
317 -- Signal error of excessively long line
319 procedure Error_No_Double_Underline
;
320 -- Signal error of two underline or punctuation characters in a row.
321 -- Called with Scan_Ptr pointing to second underline/punctuation char.
324 -- This is the procedure for scanning out numeric literals. On entry,
325 -- Scan_Ptr points to the digit that starts the numeric literal (the
326 -- checksum for this character has not been accumulated yet). On return
327 -- Scan_Ptr points past the last character of the numeric literal, Token
328 -- and Token_Node are set appropriately, and the checksum is updated.
331 -- This is the procedure for scanning out string literals. On entry,
332 -- Scan_Ptr points to the opening string quote (the checksum for this
333 -- character has not been accumulated yet). On return Scan_Ptr points
334 -- past the closing quote of the string literal, Token and Token_Node
335 -- are set appropriately, and the checksum is upated.
337 -----------------------
338 -- Check_End_Of_Line --
339 -----------------------
341 procedure Check_End_Of_Line
is
342 Len
: constant Int
:= Int
(Scan_Ptr
) - Int
(Current_Line_Start
);
346 Style
.Check_Line_Terminator
(Len
);
349 -- Deal with checking maximum line length
351 if Style_Check
and Style_Check_Max_Line_Length
then
352 Style
.Check_Line_Max_Length
(Len
);
354 -- If style checking is inactive, check maximum line length against
355 -- standard value. Note that we take this from Opt.Max_Line_Length
356 -- rather than Hostparm.Max_Line_Length because we do not want to
357 -- impose any limit during scanning of configuration pragma files,
358 -- and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
359 -- is reset to Column_Number'Max during scanning of such files.
361 elsif Len
> Opt
.Max_Line_Length
then
364 end Check_End_Of_Line
;
366 -----------------------
367 -- Double_Char_Token --
368 -----------------------
370 function Double_Char_Token
(C
: Character) return Boolean is
372 if Source
(Scan_Ptr
+ 1) = C
then
373 Accumulate_Checksum
(C
);
374 Scan_Ptr
:= Scan_Ptr
+ 2;
377 elsif Source
(Scan_Ptr
+ 1) = ' '
378 and then Source
(Scan_Ptr
+ 2) = C
380 Scan_Ptr
:= Scan_Ptr
+ 1;
381 Error_Msg_S
("no space allowed here");
382 Scan_Ptr
:= Scan_Ptr
+ 2;
388 end Double_Char_Token
;
390 -----------------------------
391 -- Error_Illegal_Character --
392 -----------------------------
394 procedure Error_Illegal_Character
is
396 Error_Msg_S
("illegal character");
397 Scan_Ptr
:= Scan_Ptr
+ 1;
398 end Error_Illegal_Character
;
400 ----------------------------------
401 -- Error_Illegal_Wide_Character --
402 ----------------------------------
404 procedure Error_Illegal_Wide_Character
is
406 Error_Msg
("illegal wide character", Wptr
);
407 end Error_Illegal_Wide_Character
;
409 ---------------------
410 -- Error_Long_Line --
411 ---------------------
413 procedure Error_Long_Line
is
416 ("this line is too long",
417 Current_Line_Start
+ Source_Ptr
(Opt
.Max_Line_Length
));
420 -------------------------------
421 -- Error_No_Double_Underline --
422 -------------------------------
424 procedure Error_No_Double_Underline
is
426 Underline_Found
:= False;
428 -- There are four cases, and we special case the messages
430 if Source
(Scan_Ptr
) = '_' then
431 if Source
(Scan_Ptr
- 1) = '_' then
433 ("two consecutive underlines not permitted");
436 ("underline cannot follow punctuation character");
440 if Source
(Scan_Ptr
- 1) = '_' then
442 ("punctuation character cannot follow underline");
445 ("two consecutive punctuation characters not permitted");
448 end Error_No_Double_Underline
;
457 -- Current source program character
459 Base_Char
: Character;
460 -- Either # or : (character at start of based number)
466 -- Value of base in Uint format
469 -- Value of integer scanned by Scan_Integer in Uint format
472 -- Value of integer in numeric value being scanned
475 -- Scale value for real literal
478 -- Scale in Uint format
480 Exponent_Is_Negative
: Boolean;
481 -- Set true for negative exponent
483 Extended_Digit_Value
: Int
;
484 -- Extended digit value
486 Point_Scanned
: Boolean;
487 -- Flag for decimal point scanned in numeric literal
489 -----------------------
490 -- Local Subprograms --
491 -----------------------
493 procedure Error_Digit_Expected
;
494 -- Signal error of bad digit, Scan_Ptr points to the location at
495 -- which the digit was expected on input, and is unchanged on return.
497 procedure Scan_Integer
;
498 -- Procedure to scan integer literal. On entry, Scan_Ptr points to a
499 -- digit, on exit Scan_Ptr points past the last character of the
502 -- For each digit encountered, UI_Int_Value is multiplied by 10, and
503 -- the value of the digit added to the result. In addition, the
504 -- value in Scale is decremented by one for each actual digit
507 --------------------------
508 -- Error_Digit_Expected --
509 --------------------------
511 procedure Error_Digit_Expected
is
513 Error_Msg_S
("digit expected");
514 end Error_Digit_Expected
;
520 procedure Scan_Integer
is
522 -- Next character scanned
525 C
:= Source
(Scan_Ptr
);
527 -- Loop through digits (allowing underlines)
530 Accumulate_Checksum
(C
);
532 UI_Int_Value
* 10 + (Character'Pos (C
) - Character'Pos ('0'));
533 Scan_Ptr
:= Scan_Ptr
+ 1;
535 C
:= Source
(Scan_Ptr
);
537 -- Case of underline encountered
541 -- We do not accumulate the '_' in the checksum, so that
542 -- 1_234 is equivalent to 1234, and does not trigger
543 -- compilation for "minimal recompilation" (gnatmake -m).
546 Scan_Ptr
:= Scan_Ptr
+ 1;
547 C
:= Source
(Scan_Ptr
);
549 Error_No_Double_Underline
;
552 if C
not in '0' .. '9' then
553 Error_Digit_Expected
;
558 exit when C
not in '0' .. '9';
563 -- Start of Processing for Nlit
568 UI_Int_Value
:= Uint_0
;
572 Point_Scanned
:= False;
573 UI_Num_Value
:= UI_Int_Value
;
575 -- Various possibilities now for continuing the literal are period,
576 -- E/e (for exponent), or :/# (for based literal).
579 C
:= Source
(Scan_Ptr
);
583 -- Scan out point, but do not scan past .. which is a range
584 -- sequence, and must not be eaten up scanning a numeric literal.
586 while C
= '.' and then Source
(Scan_Ptr
+ 1) /= '.' loop
587 Accumulate_Checksum
('.');
589 if Point_Scanned
then
590 Error_Msg_S
("duplicate point ignored");
593 Point_Scanned
:= True;
594 Scan_Ptr
:= Scan_Ptr
+ 1;
595 C
:= Source
(Scan_Ptr
);
597 if C
not in '0' .. '9' then
599 ("real literal cannot end with point", Scan_Ptr
- 1);
602 UI_Num_Value
:= UI_Int_Value
;
606 -- Based literal case. The base is the value we already scanned.
607 -- In the case of colon, we insist that the following character
608 -- is indeed an extended digit or a period. This catches a number
609 -- of common errors, as well as catching the well known tricky
610 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
613 or else (C
= ':' and then
614 (Source
(Scan_Ptr
+ 1) = '.'
616 Source
(Scan_Ptr
+ 1) in '0' .. '9'
618 Source
(Scan_Ptr
+ 1) in 'A' .. 'Z'
620 Source
(Scan_Ptr
+ 1) in 'a' .. 'z'))
623 Obsolescent_Check
(Scan_Ptr
);
625 if Warn_On_Obsolescent_Feature
then
627 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
629 ("\use ""'#"" instead?");
633 Accumulate_Checksum
(C
);
635 UI_Base
:= UI_Int_Value
;
637 if UI_Base
< 2 or else UI_Base
> 16 then
638 Error_Msg_SC
("base not 2-16");
642 Base
:= UI_To_Int
(UI_Base
);
643 Scan_Ptr
:= Scan_Ptr
+ 1;
645 -- Scan out extended integer [. integer]
647 C
:= Source
(Scan_Ptr
);
648 UI_Int_Value
:= Uint_0
;
652 if C
in '0' .. '9' then
653 Accumulate_Checksum
(C
);
654 Extended_Digit_Value
:=
655 Int
'(Character'Pos (C)) - Int'(Character'Pos ('0'));
657 elsif C
in 'A' .. 'F' then
658 Accumulate_Checksum
(Character'Val (Character'Pos (C
) + 32));
659 Extended_Digit_Value
:=
660 Int
'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
662 elsif C
in 'a' .. 'f' then
663 Accumulate_Checksum
(C
);
664 Extended_Digit_Value
:=
665 Int
'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
668 Error_Msg_S
("extended digit expected");
672 if Extended_Digit_Value
>= Base
then
673 Error_Msg_S
("digit '>= base");
676 UI_Int_Value
:= UI_Int_Value
* UI_Base
+ Extended_Digit_Value
;
678 Scan_Ptr
:= Scan_Ptr
+ 1;
679 C
:= Source
(Scan_Ptr
);
683 Accumulate_Checksum
('_');
684 Scan_Ptr
:= Scan_Ptr
+ 1;
685 C
:= Source
(Scan_Ptr
);
687 Error_No_Double_Underline
;
691 Accumulate_Checksum
('.');
693 if Point_Scanned
then
694 Error_Msg_S
("duplicate point ignored");
697 Scan_Ptr
:= Scan_Ptr
+ 1;
698 C
:= Source
(Scan_Ptr
);
699 Point_Scanned
:= True;
702 elsif C
= Base_Char
then
703 Accumulate_Checksum
(C
);
704 Scan_Ptr
:= Scan_Ptr
+ 1;
707 elsif C
= '#' or else C
= ':' then
708 Error_Msg_S
("based number delimiters must match");
709 Scan_Ptr
:= Scan_Ptr
+ 1;
712 elsif not Identifier_Char
(C
) then
713 if Base_Char
= '#' then
714 Error_Msg_S
("missing '#");
716 Error_Msg_S
("missing ':");
724 UI_Num_Value
:= UI_Int_Value
;
729 if not Point_Scanned
then
733 UI_Scale
:= UI_From_Int
(Scale
);
736 if Source
(Scan_Ptr
) = 'e' or else Source
(Scan_Ptr
) = 'E' then
737 Accumulate_Checksum
('e');
738 Scan_Ptr
:= Scan_Ptr
+ 1;
739 Exponent_Is_Negative
:= False;
741 if Source
(Scan_Ptr
) = '+' then
742 Accumulate_Checksum
('+');
743 Scan_Ptr
:= Scan_Ptr
+ 1;
745 elsif Source
(Scan_Ptr
) = '-' then
746 Accumulate_Checksum
('-');
748 if not Point_Scanned
then
750 ("negative exponent not allowed for integer literal");
752 Exponent_Is_Negative
:= True;
755 Scan_Ptr
:= Scan_Ptr
+ 1;
758 UI_Int_Value
:= Uint_0
;
760 if Source
(Scan_Ptr
) in '0' .. '9' then
763 Error_Digit_Expected
;
766 if Exponent_Is_Negative
then
767 UI_Scale
:= UI_Scale
- UI_Int_Value
;
769 UI_Scale
:= UI_Scale
+ UI_Int_Value
;
773 -- Case of real literal to be returned
775 if Point_Scanned
then
776 Token
:= Tok_Real_Literal
;
777 Real_Literal_Value
:=
783 -- Case of integer literal to be returned
786 Token
:= Tok_Integer_Literal
;
789 Int_Literal_Value
:= UI_Num_Value
;
791 -- Avoid doing possibly expensive calculations in cases like
792 -- parsing 163E800_000# when semantics will not be done anyway.
793 -- This is especially useful when parsing garbled input.
795 elsif Operating_Mode
/= Check_Syntax
796 and then (Serious_Errors_Detected
= 0 or else Try_Semantics
)
798 Int_Literal_Value
:= UI_Num_Value
* UI_Base
** UI_Scale
;
801 Int_Literal_Value
:= No_Uint
;
805 Accumulate_Token_Checksum
;
816 Delimiter
: Character;
817 -- Delimiter (first character of string)
820 -- Current source program character
823 -- Current character code value
826 -- Error flag for Scan_Wide call
828 procedure Error_Bad_String_Char
;
829 -- Signal bad character in string/character literal. On entry
830 -- Scan_Ptr points to the improper character encountered during the
831 -- scan. Scan_Ptr is not modified, so it still points to the bad
832 -- character on return.
834 procedure Error_Unterminated_String
;
835 -- Procedure called if a line terminator character is encountered
836 -- during scanning a string, meaning that the string is not properly
839 procedure Set_String
;
840 -- Procedure used to distinguish between string and operator symbol.
841 -- On entry the string has been scanned out, and its characters
842 -- start at Token_Ptr and end one character before Scan_Ptr. On exit
843 -- Token is set to Tok_String_Literal or Tok_Operator_Symbol as
844 -- appropriate, and Token_Node is appropriately initialized. In
845 -- addition, in the operator symbol case, Token_Name is
846 -- appropriately set.
848 ---------------------------
849 -- Error_Bad_String_Char --
850 ---------------------------
852 procedure Error_Bad_String_Char
is
853 C
: constant Character := Source
(Scan_Ptr
);
857 Error_Msg_S
("horizontal tab not allowed in string");
859 elsif C
= VT
or else C
= FF
then
860 Error_Msg_S
("format effector not allowed in string");
862 elsif C
in Upper_Half_Character
then
863 Error_Msg_S
("(Ada 83) upper half character not allowed");
866 Error_Msg_S
("control character not allowed in string");
868 end Error_Bad_String_Char
;
870 -------------------------------
871 -- Error_Unterminated_String --
872 -------------------------------
874 procedure Error_Unterminated_String
is
876 -- An interesting little refinement. Consider the following
879 -- A := "this is an unterminated string;
880 -- A := "this is an unterminated string &
881 -- P(A, "this is a parameter that didn't get terminated);
883 -- We fiddle a little to do slightly better placement in these
884 -- cases also if there is white space at the end of the line we
885 -- place the flag at the start of this white space, not at the
886 -- end. Note that we only have to test for blanks, since tabs
887 -- aren't allowed in strings in the first place and would have
888 -- caused an error message.
890 -- Two more cases that we treat specially are:
892 -- A := "this string uses the wrong terminator'
893 -- A := "this string uses the wrong terminator' &
895 -- In these cases we give a different error message as well
897 -- We actually reposition the scan pointer to the point where we
898 -- place the flag in these cases, since it seems a better bet on
899 -- the original intention.
901 while Source
(Scan_Ptr
- 1) = ' '
902 or else Source
(Scan_Ptr
- 1) = '&'
904 Scan_Ptr
:= Scan_Ptr
- 1;
908 -- Check for case of incorrect string terminator, but single quote
909 -- is not considered incorrect if the opening terminator misused
910 -- a single quote (error message already given).
913 and then Source
(Scan_Ptr
- 1) = '''
917 ("incorrect string terminator character", Scan_Ptr
- 1);
921 if Source
(Scan_Ptr
- 1) = ';' then
922 Scan_Ptr
:= Scan_Ptr
- 1;
925 if Source
(Scan_Ptr
- 1) = ')' then
926 Scan_Ptr
:= Scan_Ptr
- 1;
931 Error_Msg_S
("missing string quote");
932 end Error_Unterminated_String
;
938 procedure Set_String
is
939 Slen
: constant Int
:= Int
(Scan_Ptr
- Token_Ptr
- 2);
945 -- Token_Name is currently set to Error_Name. The following
946 -- section of code resets Token_Name to the proper Name_Op_xx
947 -- value if the string is a valid operator symbol, otherwise it is
948 -- left set to Error_Name.
951 C1
:= Source
(Token_Ptr
+ 1);
955 Token_Name
:= Name_Op_Eq
;
958 Token_Name
:= Name_Op_Gt
;
961 Token_Name
:= Name_Op_Lt
;
964 Token_Name
:= Name_Op_Add
;
967 Token_Name
:= Name_Op_Subtract
;
970 Token_Name
:= Name_Op_Concat
;
973 Token_Name
:= Name_Op_Multiply
;
976 Token_Name
:= Name_Op_Divide
;
983 C1
:= Source
(Token_Ptr
+ 1);
984 C2
:= Source
(Token_Ptr
+ 2);
986 if C1
= '*' and then C2
= '*' then
987 Token_Name
:= Name_Op_Expon
;
992 Token_Name
:= Name_Op_Ne
;
994 Token_Name
:= Name_Op_Le
;
996 Token_Name
:= Name_Op_Ge
;
999 elsif (C1
= 'O' or else C1
= 'o') and then -- OR
1000 (C2
= 'R' or else C2
= 'r')
1002 Token_Name
:= Name_Op_Or
;
1006 C1
:= Source
(Token_Ptr
+ 1);
1007 C2
:= Source
(Token_Ptr
+ 2);
1008 C3
:= Source
(Token_Ptr
+ 3);
1010 if (C1
= 'A' or else C1
= 'a') and then -- AND
1011 (C2
= 'N' or else C2
= 'n') and then
1012 (C3
= 'D' or else C3
= 'd')
1014 Token_Name
:= Name_Op_And
;
1016 elsif (C1
= 'A' or else C1
= 'a') and then -- ABS
1017 (C2
= 'B' or else C2
= 'b') and then
1018 (C3
= 'S' or else C3
= 's')
1020 Token_Name
:= Name_Op_Abs
;
1022 elsif (C1
= 'M' or else C1
= 'm') and then -- MOD
1023 (C2
= 'O' or else C2
= 'o') and then
1024 (C3
= 'D' or else C3
= 'd')
1026 Token_Name
:= Name_Op_Mod
;
1028 elsif (C1
= 'N' or else C1
= 'n') and then -- NOT
1029 (C2
= 'O' or else C2
= 'o') and then
1030 (C3
= 'T' or else C3
= 't')
1032 Token_Name
:= Name_Op_Not
;
1034 elsif (C1
= 'R' or else C1
= 'r') and then -- REM
1035 (C2
= 'E' or else C2
= 'e') and then
1036 (C3
= 'M' or else C3
= 'm')
1038 Token_Name
:= Name_Op_Rem
;
1040 elsif (C1
= 'X' or else C1
= 'x') and then -- XOR
1041 (C2
= 'O' or else C2
= 'o') and then
1042 (C3
= 'R' or else C3
= 'r')
1044 Token_Name
:= Name_Op_Xor
;
1049 -- If it is an operator symbol, then Token_Name is set. If it is
1050 -- some other string value, then Token_Name still contains
1053 if Token_Name
= Error_Name
then
1054 Token
:= Tok_String_Literal
;
1057 Token
:= Tok_Operator_Symbol
;
1061 -- Start of processing for Slit
1064 -- On entry, Scan_Ptr points to the opening character of the string
1065 -- which is either a percent, double quote, or apostrophe (single
1066 -- quote). The latter case is an error detected by the character
1069 Delimiter
:= Source
(Scan_Ptr
);
1070 Accumulate_Checksum
(Delimiter
);
1072 Scan_Ptr
:= Scan_Ptr
+ 1;
1074 -- Loop to scan out characters of string literal
1077 C
:= Source
(Scan_Ptr
);
1079 if C
= Delimiter
then
1080 Accumulate_Checksum
(C
);
1081 Scan_Ptr
:= Scan_Ptr
+ 1;
1082 exit when Source
(Scan_Ptr
) /= Delimiter
;
1083 Code
:= Get_Char_Code
(C
);
1084 Accumulate_Checksum
(C
);
1085 Scan_Ptr
:= Scan_Ptr
+ 1;
1088 if C
= '"' and then Delimiter
= '%' then
1090 ("quote not allowed in percent delimited string");
1091 Code
:= Get_Char_Code
(C
);
1092 Scan_Ptr
:= Scan_Ptr
+ 1;
1095 and then Wide_Character_Encoding_Method
1096 in WC_ESC_Encoding_Method
)
1097 or else (C
in Upper_Half_Character
1098 and then Upper_Half_Encoding
)
1100 and then Source
(Scan_Ptr
+ 1) = '"'
1101 and then Identifier_Char
(Source
(Scan_Ptr
+ 2)))
1104 Scan_Wide
(Source
, Scan_Ptr
, Code
, Err
);
1107 Error_Illegal_Wide_Character
;
1108 Code
:= Get_Char_Code
(' ');
1111 Accumulate_Checksum
(Code
);
1113 if Ada_Version
>= Ada_05
1114 and then Is_UTF_32_Non_Graphic
(UTF_32
(Code
))
1117 ("(Ada 2005) non-graphic character not permitted " &
1118 "in string literal", Wptr
);
1122 Accumulate_Checksum
(C
);
1124 if C
not in Graphic_Character
then
1125 if C
in Line_Terminator
then
1126 Error_Unterminated_String
;
1129 elsif C
in Upper_Half_Character
then
1130 if Ada_Version
= Ada_83
then
1131 Error_Bad_String_Char
;
1135 Error_Bad_String_Char
;
1139 Code
:= Get_Char_Code
(C
);
1140 Scan_Ptr
:= Scan_Ptr
+ 1;
1144 Store_String_Char
(Code
);
1146 if not In_Character_Range
(Code
) then
1147 Wide_Character_Found
:= True;
1151 String_Literal_Id
:= End_String
;
1156 -- Start of processing for Scan
1159 Prev_Token
:= Token
;
1160 Prev_Token_Ptr
:= Token_Ptr
;
1161 Token_Name
:= Error_Name
;
1163 -- The following loop runs more than once only if a format effector
1164 -- (tab, vertical tab, form feed, line feed, carriage return) is
1165 -- encountered and skipped, or some error situation, such as an
1166 -- illegal character, is encountered.
1168 <<Scan_Next_Character
>>
1171 -- Skip past blanks, loop is opened up for speed
1173 while Source
(Scan_Ptr
) = ' ' loop
1174 if Source
(Scan_Ptr
+ 1) /= ' ' then
1175 Scan_Ptr
:= Scan_Ptr
+ 1;
1179 if Source
(Scan_Ptr
+ 2) /= ' ' then
1180 Scan_Ptr
:= Scan_Ptr
+ 2;
1184 if Source
(Scan_Ptr
+ 3) /= ' ' then
1185 Scan_Ptr
:= Scan_Ptr
+ 3;
1189 if Source
(Scan_Ptr
+ 4) /= ' ' then
1190 Scan_Ptr
:= Scan_Ptr
+ 4;
1194 if Source
(Scan_Ptr
+ 5) /= ' ' then
1195 Scan_Ptr
:= Scan_Ptr
+ 5;
1199 if Source
(Scan_Ptr
+ 6) /= ' ' then
1200 Scan_Ptr
:= Scan_Ptr
+ 6;
1204 if Source
(Scan_Ptr
+ 7) /= ' ' then
1205 Scan_Ptr
:= Scan_Ptr
+ 7;
1209 Scan_Ptr
:= Scan_Ptr
+ 8;
1212 -- We are now at a non-blank character, which is the first character
1213 -- of the token we will scan, and hence the value of Token_Ptr.
1215 Token_Ptr
:= Scan_Ptr
;
1217 -- Here begins the main case statement which transfers control on the
1218 -- basis of the non-blank character we have encountered.
1220 case Source
(Scan_Ptr
) is
1222 -- Line terminator characters
1224 when CR | LF | FF | VT
=>
1225 goto Scan_Line_Terminator
;
1227 -- Horizontal tab, just skip past it
1230 if Style_Check
then Style
.Check_HT
; end if;
1231 Scan_Ptr
:= Scan_Ptr
+ 1;
1233 -- End of file character, treated as an end of file only if it is
1234 -- the last character in the buffer, otherwise it is ignored.
1237 if Scan_Ptr
= Source_Last
(Current_Source_File
) then
1242 Scan_Ptr
:= Scan_Ptr
+ 1;
1248 Accumulate_Checksum
('&');
1250 if Source
(Scan_Ptr
+ 1) = '&' then
1251 Error_Msg_S
("'&'& should be `AND THEN`");
1252 Scan_Ptr
:= Scan_Ptr
+ 2;
1257 Scan_Ptr
:= Scan_Ptr
+ 1;
1258 Token
:= Tok_Ampersand
;
1262 -- Asterisk (can be multiplication operator or double asterisk which
1263 -- is the exponentiation compound delimiter).
1266 Accumulate_Checksum
('*');
1268 if Source
(Scan_Ptr
+ 1) = '*' then
1269 Accumulate_Checksum
('*');
1270 Scan_Ptr
:= Scan_Ptr
+ 2;
1271 Token
:= Tok_Double_Asterisk
;
1275 Scan_Ptr
:= Scan_Ptr
+ 1;
1276 Token
:= Tok_Asterisk
;
1280 -- Colon, which can either be an isolated colon, or part of an
1281 -- assignment compound delimiter.
1284 Accumulate_Checksum
(':');
1286 if Double_Char_Token
('=') then
1287 Token
:= Tok_Colon_Equal
;
1288 if Style_Check
then Style
.Check_Colon_Equal
; end if;
1291 elsif Source
(Scan_Ptr
+ 1) = '-'
1292 and then Source
(Scan_Ptr
+ 2) /= '-'
1294 Token
:= Tok_Colon_Equal
;
1295 Error_Msg
(":- should be :=", Scan_Ptr
);
1296 Scan_Ptr
:= Scan_Ptr
+ 2;
1300 Scan_Ptr
:= Scan_Ptr
+ 1;
1302 if Style_Check
then Style
.Check_Colon
; end if;
1309 Accumulate_Checksum
('(');
1310 Scan_Ptr
:= Scan_Ptr
+ 1;
1311 Token
:= Tok_Left_Paren
;
1312 if Style_Check
then Style
.Check_Left_Paren
; end if;
1318 if Source
(Scan_Ptr
+ 1) = '"' then
1319 goto Scan_Wide_Character
;
1322 Error_Msg_S
("illegal character, replaced by ""(""");
1323 Scan_Ptr
:= Scan_Ptr
+ 1;
1324 Token
:= Tok_Left_Paren
;
1331 Error_Msg_S
("illegal character, replaced by ""(""");
1332 Scan_Ptr
:= Scan_Ptr
+ 1;
1333 Token
:= Tok_Left_Paren
;
1339 Accumulate_Checksum
(',');
1340 Scan_Ptr
:= Scan_Ptr
+ 1;
1342 if Style_Check
then Style
.Check_Comma
; end if;
1345 -- Dot, which is either an isolated period, or part of a double dot
1346 -- compound delimiter sequence. We also check for the case of a
1347 -- digit following the period, to give a better error message.
1350 Accumulate_Checksum
('.');
1352 if Double_Char_Token
('.') then
1353 Token
:= Tok_Dot_Dot
;
1354 if Style_Check
then Style
.Check_Dot_Dot
; end if;
1357 elsif Source
(Scan_Ptr
+ 1) in '0' .. '9' then
1358 Error_Msg_S
("numeric literal cannot start with point");
1359 Scan_Ptr
:= Scan_Ptr
+ 1;
1362 Scan_Ptr
:= Scan_Ptr
+ 1;
1367 -- Equal, which can either be an equality operator, or part of the
1368 -- arrow (=>) compound delimiter.
1371 Accumulate_Checksum
('=');
1373 if Double_Char_Token
('>') then
1375 if Style_Check
then Style
.Check_Arrow
; end if;
1378 elsif Source
(Scan_Ptr
+ 1) = '=' then
1379 Error_Msg_S
("== should be =");
1380 Scan_Ptr
:= Scan_Ptr
+ 1;
1383 Scan_Ptr
:= Scan_Ptr
+ 1;
1387 -- Greater than, which can be a greater than operator, greater than
1388 -- or equal operator, or first character of a right label bracket.
1391 Accumulate_Checksum
('>');
1393 if Double_Char_Token
('=') then
1394 Token
:= Tok_Greater_Equal
;
1397 elsif Double_Char_Token
('>') then
1398 Token
:= Tok_Greater_Greater
;
1402 Scan_Ptr
:= Scan_Ptr
+ 1;
1403 Token
:= Tok_Greater
;
1407 -- Less than, which can be a less than operator, less than or equal
1408 -- operator, or the first character of a left label bracket, or the
1409 -- first character of a box (<>) compound delimiter.
1412 Accumulate_Checksum
('<');
1414 if Double_Char_Token
('=') then
1415 Token
:= Tok_Less_Equal
;
1418 elsif Double_Char_Token
('>') then
1420 if Style_Check
then Style
.Check_Box
; end if;
1423 elsif Double_Char_Token
('<') then
1424 Token
:= Tok_Less_Less
;
1428 Scan_Ptr
:= Scan_Ptr
+ 1;
1433 -- Minus, which is either a subtraction operator, or the first
1434 -- character of double minus starting a comment
1436 when '-' => Minus_Case
: begin
1437 if Source
(Scan_Ptr
+ 1) = '>' then
1438 Error_Msg_S
("invalid token");
1439 Scan_Ptr
:= Scan_Ptr
+ 2;
1443 elsif Source
(Scan_Ptr
+ 1) /= '-' then
1444 Accumulate_Checksum
('-');
1445 Scan_Ptr
:= Scan_Ptr
+ 1;
1451 else -- Source (Scan_Ptr + 1) = '-' then
1452 if Style_Check
then Style
.Check_Comment
; end if;
1453 Scan_Ptr
:= Scan_Ptr
+ 2;
1454 Start_Of_Comment
:= Scan_Ptr
;
1456 -- Loop to scan comment (this loop runs more than once only if
1457 -- a horizontal tab or other non-graphic character is scanned)
1460 -- Scan to non graphic character (opened up for speed)
1462 -- Note that we just eat left brackets, which means that
1463 -- bracket notation cannot be used for end of line
1464 -- characters in comments. This seems a reasonable choice,
1465 -- since no one would ever use brackets notation in a real
1466 -- program in this situation, and if we allow brackets
1467 -- notation, we forbid some valid comments which contain a
1468 -- brackets sequence that happens to match an end of line
1472 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1473 Scan_Ptr
:= Scan_Ptr
+ 1;
1474 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1475 Scan_Ptr
:= Scan_Ptr
+ 1;
1476 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1477 Scan_Ptr
:= Scan_Ptr
+ 1;
1478 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1479 Scan_Ptr
:= Scan_Ptr
+ 1;
1480 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1481 Scan_Ptr
:= Scan_Ptr
+ 1;
1484 -- Keep going if horizontal tab
1486 if Source
(Scan_Ptr
) = HT
then
1487 if Style_Check
then Style
.Check_HT
; end if;
1488 Scan_Ptr
:= Scan_Ptr
+ 1;
1490 -- Terminate scan of comment if line terminator
1492 elsif Source
(Scan_Ptr
) in Line_Terminator
then
1495 -- Terminate scan of comment if end of file encountered
1496 -- (embedded EOF character or real last character in file)
1498 elsif Source
(Scan_Ptr
) = EOF
then
1501 -- If we have a wide character, we have to scan it out,
1502 -- because it might be a legitimate line terminator
1504 elsif (Source
(Scan_Ptr
) = ESC
1505 and then Identifier_Char
(ESC
))
1507 (Source
(Scan_Ptr
) in Upper_Half_Character
1508 and then Upper_Half_Encoding
)
1511 Wptr
: constant Source_Ptr
:= Scan_Ptr
;
1516 Scan_Wide
(Source
, Scan_Ptr
, Code
, Err
);
1518 -- If not well formed wide character, then just skip
1519 -- past it and ignore it.
1522 Scan_Ptr
:= Wptr
+ 1;
1524 -- If UTF_32 terminator, terminate comment scan
1526 elsif Is_UTF_32_Line_Terminator
(UTF_32
(Code
)) then
1532 -- Keep going if character in 80-FF range, or is ESC. These
1533 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1534 -- They are allowed even in Ada 83 mode according to the
1535 -- approved AI. ESC was added to the AI in June 93.
1537 elsif Source
(Scan_Ptr
) in Upper_Half_Character
1538 or else Source
(Scan_Ptr
) = ESC
1540 Scan_Ptr
:= Scan_Ptr
+ 1;
1542 -- Otherwise we have an illegal comment character
1545 Error_Illegal_Character
;
1549 -- Note that, except when comments are tokens, we do NOT
1550 -- execute a return here, instead we fall through to reexecute
1551 -- the scan loop to look for a token.
1553 if Comment_Is_Token
then
1554 Name_Len
:= Integer (Scan_Ptr
- Start_Of_Comment
);
1555 Name_Buffer
(1 .. Name_Len
) :=
1556 String (Source
(Start_Of_Comment
.. Scan_Ptr
- 1));
1557 Comment_Id
:= Name_Find
;
1558 Token
:= Tok_Comment
;
1564 -- Double quote starting a string literal
1571 -- Percent starting a string literal
1574 Obsolescent_Check
(Token_Ptr
);
1576 if Warn_On_Obsolescent_Feature
then
1578 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1580 ("\use """""" instead?");
1587 -- Apostrophe. This can either be the start of a character literal,
1588 -- or an isolated apostrophe used in a qualified expression or an
1589 -- attribute. We treat it as a character literal if it does not
1590 -- follow a right parenthesis, identifier, the keyword ALL or
1591 -- a literal. This means that we correctly treat constructs like:
1593 -- A := CHARACTER'('A');
1595 -- Note that RM-2.2(7) does not require a separator between
1596 -- "CHARACTER" and "'" in the above.
1598 when ''' => Char_Literal_Case
: declare
1603 Accumulate_Checksum
(''');
1604 Scan_Ptr
:= Scan_Ptr
+ 1;
1606 -- Here is where we make the test to distinguish the cases. Treat
1607 -- as apostrophe if previous token is an identifier, right paren
1608 -- or the reserved word "all" (latter case as in A.all'Address)
1609 -- (or the reserved word "project" in project files). Also treat
1610 -- it as apostrophe after a literal (this catches some legitimate
1611 -- cases, like A."abs"'Address, and also gives better error
1612 -- behavior for impossible cases like 123'xxx).
1614 if Prev_Token
= Tok_Identifier
1615 or else Prev_Token
= Tok_Right_Paren
1616 or else Prev_Token
= Tok_All
1617 or else Prev_Token
= Tok_Project
1618 or else Prev_Token
in Token_Class_Literal
1620 Token
:= Tok_Apostrophe
;
1621 if Style_Check
then Style
.Check_Apostrophe
; end if;
1624 -- Otherwise the apostrophe starts a character literal
1627 -- Case of wide character literal
1629 if (Source
(Scan_Ptr
) = ESC
1631 Wide_Character_Encoding_Method
in WC_ESC_Encoding_Method
)
1633 (Source
(Scan_Ptr
) in Upper_Half_Character
1635 Upper_Half_Encoding
)
1637 (Source
(Scan_Ptr
) = '['
1639 Source
(Scan_Ptr
+ 1) = '"')
1642 Scan_Wide
(Source
, Scan_Ptr
, Code
, Err
);
1643 Accumulate_Checksum
(Code
);
1646 Error_Illegal_Wide_Character
;
1647 Code
:= Character'Pos (' ');
1649 elsif Ada_Version
>= Ada_05
1650 and then Is_UTF_32_Non_Graphic
(UTF_32
(Code
))
1653 ("(Ada 2005) non-graphic character not permitted " &
1654 "in character literal", Wptr
);
1657 if Source
(Scan_Ptr
) /= ''' then
1658 Error_Msg_S
("missing apostrophe");
1660 Scan_Ptr
:= Scan_Ptr
+ 1;
1663 -- If we do not find a closing quote in the expected place then
1664 -- assume that we have a misguided attempt at a string literal.
1666 -- However, if previous token is RANGE, then we return an
1667 -- apostrophe instead since this gives better error recovery
1669 elsif Source
(Scan_Ptr
+ 1) /= ''' then
1670 if Prev_Token
= Tok_Range
then
1671 Token
:= Tok_Apostrophe
;
1675 Scan_Ptr
:= Scan_Ptr
- 1;
1677 ("strings are delimited by double quote character");
1683 -- Otherwise we have a (non-wide) character literal
1686 Accumulate_Checksum
(Source
(Scan_Ptr
));
1688 if Source
(Scan_Ptr
) not in Graphic_Character
then
1689 if Source
(Scan_Ptr
) in Upper_Half_Character
then
1690 if Ada_Version
= Ada_83
then
1691 Error_Illegal_Character
;
1695 Error_Illegal_Character
;
1699 Code
:= Get_Char_Code
(Source
(Scan_Ptr
));
1700 Scan_Ptr
:= Scan_Ptr
+ 2;
1703 -- Fall through here with Scan_Ptr updated past the closing
1704 -- quote, and Code set to the Char_Code value for the literal
1706 Accumulate_Checksum
(''');
1707 Token
:= Tok_Char_Literal
;
1708 Set_Character_Literal_Name
(Code
);
1709 Token_Name
:= Name_Find
;
1710 Character_Code
:= Code
;
1714 end Char_Literal_Case
;
1716 -- Right parenthesis
1719 Accumulate_Checksum
(')');
1720 Scan_Ptr
:= Scan_Ptr
+ 1;
1721 Token
:= Tok_Right_Paren
;
1722 if Style_Check
then Style
.Check_Right_Paren
; end if;
1725 -- Right bracket or right brace, treated as right paren
1728 Error_Msg_S
("illegal character, replaced by "")""");
1729 Scan_Ptr
:= Scan_Ptr
+ 1;
1730 Token
:= Tok_Right_Paren
;
1733 -- Slash (can be division operator or first character of not equal)
1736 Accumulate_Checksum
('/');
1738 if Double_Char_Token
('=') then
1739 Token
:= Tok_Not_Equal
;
1742 Scan_Ptr
:= Scan_Ptr
+ 1;
1750 Accumulate_Checksum
(';');
1751 Scan_Ptr
:= Scan_Ptr
+ 1;
1752 Token
:= Tok_Semicolon
;
1753 if Style_Check
then Style
.Check_Semicolon
; end if;
1758 when '|' => Vertical_Bar_Case
: begin
1759 Accumulate_Checksum
('|');
1761 -- Special check for || to give nice message
1763 if Source
(Scan_Ptr
+ 1) = '|' then
1764 Error_Msg_S
("""'|'|"" should be `OR ELSE`");
1765 Scan_Ptr
:= Scan_Ptr
+ 2;
1770 Scan_Ptr
:= Scan_Ptr
+ 1;
1771 Token
:= Tok_Vertical_Bar
;
1772 if Style_Check
then Style
.Check_Vertical_Bar
; end if;
1775 end Vertical_Bar_Case
;
1777 -- Exclamation, replacement character for vertical bar
1779 when '!' => Exclamation_Case
: begin
1780 Accumulate_Checksum
('!');
1781 Obsolescent_Check
(Token_Ptr
);
1783 if Warn_On_Obsolescent_Feature
then
1785 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1787 ("\use ""'|"" instead?");
1790 if Source
(Scan_Ptr
+ 1) = '=' then
1791 Error_Msg_S
("'!= should be /=");
1792 Scan_Ptr
:= Scan_Ptr
+ 2;
1793 Token
:= Tok_Not_Equal
;
1797 Scan_Ptr
:= Scan_Ptr
+ 1;
1798 Token
:= Tok_Vertical_Bar
;
1801 end Exclamation_Case
;
1805 when '+' => Plus_Case
: begin
1806 Accumulate_Checksum
('+');
1807 Scan_Ptr
:= Scan_Ptr
+ 1;
1812 -- Digits starting a numeric literal
1817 if Identifier_Char
(Source
(Scan_Ptr
)) then
1819 ("delimiter required between literal and identifier");
1824 -- Lower case letters
1828 Underline_Found
:= False;
1829 Name_Buffer
(1) := Source
(Scan_Ptr
);
1830 Accumulate_Checksum
(Name_Buffer
(1));
1831 Scan_Ptr
:= Scan_Ptr
+ 1;
1832 goto Scan_Identifier
;
1834 -- Upper case letters
1838 Underline_Found
:= False;
1840 Character'Val (Character'Pos (Source
(Scan_Ptr
)) + 32);
1841 Accumulate_Checksum
(Name_Buffer
(1));
1842 Scan_Ptr
:= Scan_Ptr
+ 1;
1843 goto Scan_Identifier
;
1845 -- Underline character
1848 if Special_Characters
('_') then
1849 Token_Ptr
:= Scan_Ptr
;
1850 Scan_Ptr
:= Scan_Ptr
+ 1;
1851 Token
:= Tok_Special
;
1852 Special_Character
:= '_';
1856 Error_Msg_S
("identifier cannot start with underline");
1858 Name_Buffer
(1) := '_';
1859 Scan_Ptr
:= Scan_Ptr
+ 1;
1860 Underline_Found
:= False;
1861 goto Scan_Identifier
;
1863 -- Space (not possible, because we scanned past blanks)
1866 raise Program_Error
;
1868 -- Characters in top half of ASCII 8-bit chart
1870 when Upper_Half_Character
=>
1872 -- Wide character case
1874 if Upper_Half_Encoding
then
1875 goto Scan_Wide_Character
;
1877 -- Otherwise we have OK Latin-1 character
1880 -- Upper half characters may possibly be identifier letters
1881 -- but can never be digits, so Identifier_Char can be used to
1882 -- test for a valid start of identifier character.
1884 if Identifier_Char
(Source
(Scan_Ptr
)) then
1886 Underline_Found
:= False;
1887 goto Scan_Identifier
;
1889 Error_Illegal_Character
;
1895 -- ESC character, possible start of identifier if wide characters
1896 -- using ESC encoding are allowed in identifiers, which we can
1897 -- tell by looking at the Identifier_Char flag for ESC, which is
1898 -- only true if these conditions are met. In Ada 2005 mode, may
1899 -- also be valid UTF_32 space or line terminator character.
1901 if Identifier_Char
(ESC
) then
1903 goto Scan_Wide_Character
;
1905 Error_Illegal_Character
;
1908 -- Invalid control characters
1910 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | ASCII
.SO |
1911 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1912 EM | FS | GS | RS | US | DEL
1914 Error_Illegal_Character
;
1916 -- Invalid graphic characters
1918 when '#' |
'$' |
'?' |
'@' |
'`' |
'\' |
'^' |
'~' =>
1920 -- If Set_Special_Character has been called for this character,
1921 -- set Scans.Special_Character and return a Special token.
1923 if Special_Characters
(Source
(Scan_Ptr
)) then
1924 Token_Ptr
:= Scan_Ptr
;
1925 Token
:= Tok_Special
;
1926 Special_Character
:= Source
(Scan_Ptr
);
1927 Scan_Ptr
:= Scan_Ptr
+ 1;
1930 -- Otherwise, this is an illegal character
1933 Error_Illegal_Character
;
1936 -- End switch on non-blank character
1940 -- End loop past format effectors. The exit from this loop is by
1941 -- executing a return statement following completion of token scan
1942 -- (control never falls out of this loop to the code which follows)
1946 -- Wide_Character scanning routine. On entry we have encountered the
1947 -- initial character of a wide character sequence.
1949 <<Scan_Wide_Character
>>
1958 Scan_Wide
(Source
, Scan_Ptr
, Code
, Err
);
1960 -- If bad wide character, signal error and continue scan
1963 Error_Illegal_Wide_Character
;
1964 goto Scan_Next_Character
;
1967 Cat
:= Get_Category
(UTF_32
(Code
));
1969 -- If OK letter, reset scan ptr and go scan identifier
1971 if Is_UTF_32_Letter
(Cat
) then
1974 Underline_Found
:= False;
1975 goto Scan_Identifier
;
1977 -- If OK wide space, ignore and keep scanning (we do not include
1978 -- any ignored spaces in checksum)
1980 elsif Is_UTF_32_Space
(Cat
) then
1981 goto Scan_Next_Character
;
1983 -- If OK wide line terminator, terminate current line
1985 elsif Is_UTF_32_Line_Terminator
(UTF_32
(Code
)) then
1987 goto Scan_Line_Terminator
;
1989 -- Punctuation is an error (at start of identifier)
1991 elsif Is_UTF_32_Punctuation
(Cat
) then
1993 ("identifier cannot start with punctuation", Wptr
);
1996 Underline_Found
:= False;
1997 goto Scan_Identifier
;
1999 -- Mark character is an error (at start of identifer)
2001 elsif Is_UTF_32_Mark
(Cat
) then
2003 ("identifier cannot start with mark character", Wptr
);
2006 Underline_Found
:= False;
2007 goto Scan_Identifier
;
2009 -- Other format character is an error (at start of identifer)
2011 elsif Is_UTF_32_Other
(Cat
) then
2013 ("identifier cannot start with other format character", Wptr
);
2016 Underline_Found
:= False;
2017 goto Scan_Identifier
;
2019 -- Extended digit character is an error. Could be bad start of
2020 -- identifier or bad literal. Not worth doing too much to try to
2021 -- distinguish these cases, but we will do a little bit.
2023 elsif Is_UTF_32_Digit
(Cat
) then
2025 ("identifier cannot start with digit character", Wptr
);
2028 Underline_Found
:= False;
2029 goto Scan_Identifier
;
2031 -- All other wide characters are illegal here
2034 Error_Illegal_Wide_Character
;
2035 goto Scan_Next_Character
;
2039 -- Routine to scan line terminator. On entry Scan_Ptr points to a
2040 -- character which is one of FF,LR,CR,VT, or one of the wide characters
2041 -- that is treated as a line termiantor.
2043 <<Scan_Line_Terminator
>>
2045 -- Check line too long
2049 -- Set Token_Ptr, if End_Of_Line is a token, for the case when it is
2052 if End_Of_Line_Is_Token
then
2053 Token_Ptr
:= Scan_Ptr
;
2060 Skip_Line_Terminators
(Scan_Ptr
, Physical
);
2062 -- If we are at start of physical line, update scan pointers to
2063 -- reflect the start of the new line.
2066 Current_Line_Start
:= Scan_Ptr
;
2067 Start_Column
:= Set_Start_Column
;
2068 First_Non_Blank_Location
:= Scan_Ptr
;
2070 -- If End_Of_Line is a token, we return it as it is a
2073 if End_Of_Line_Is_Token
then
2074 Token
:= Tok_End_Of_Line
;
2080 goto Scan_Next_Character
;
2082 -- Identifier scanning routine. On entry, some initial characters of
2083 -- the identifier may have already been stored in Name_Buffer. If so,
2084 -- Name_Len has the number of characters stored. otherwise Name_Len is
2085 -- set to zero on entry. Underline_Found is also set False on entry.
2089 -- This loop scans as fast as possible past lower half letters and
2090 -- digits, which we expect to be the most common characters.
2093 if Source
(Scan_Ptr
) in 'a' .. 'z'
2094 or else Source
(Scan_Ptr
) in '0' .. '9'
2096 Name_Buffer
(Name_Len
+ 1) := Source
(Scan_Ptr
);
2097 Accumulate_Checksum
(Source
(Scan_Ptr
));
2099 elsif Source
(Scan_Ptr
) in 'A' .. 'Z' then
2100 Name_Buffer
(Name_Len
+ 1) :=
2101 Character'Val (Character'Pos (Source
(Scan_Ptr
)) + 32);
2102 Accumulate_Checksum
(Name_Buffer
(Name_Len
+ 1));
2108 Underline_Found
:= False;
2109 Scan_Ptr
:= Scan_Ptr
+ 1;
2110 Name_Len
:= Name_Len
+ 1;
2113 -- If we fall through, then we have encountered either an underline
2114 -- character, or an extended identifier character (i.e. one from the
2115 -- upper half), or a wide character, or an identifier terminator. The
2116 -- initial test speeds us up in the most common case where we have
2117 -- an identifier terminator. Note that ESC is an identifier character
2118 -- only if a wide character encoding method that uses ESC encoding
2119 -- is active, so if we find an ESC character we know that we have a
2122 if Identifier_Char
(Source
(Scan_Ptr
)) then
2124 -- Case of underline
2126 if Source
(Scan_Ptr
) = '_' then
2127 Accumulate_Checksum
('_');
2129 if Underline_Found
then
2130 Error_No_Double_Underline
;
2132 Underline_Found
:= True;
2133 Name_Len
:= Name_Len
+ 1;
2134 Name_Buffer
(Name_Len
) := '_';
2137 Scan_Ptr
:= Scan_Ptr
+ 1;
2138 goto Scan_Identifier
;
2140 -- Upper half character
2142 elsif Source
(Scan_Ptr
) in Upper_Half_Character
2143 and then not Upper_Half_Encoding
2145 Accumulate_Checksum
(Source
(Scan_Ptr
));
2146 Store_Encoded_Character
2147 (Get_Char_Code
(Fold_Lower
(Source
(Scan_Ptr
))));
2148 Scan_Ptr
:= Scan_Ptr
+ 1;
2149 Underline_Found
:= False;
2150 goto Scan_Identifier
;
2152 -- Left bracket not followed by a quote terminates an identifier.
2153 -- This is an error, but we don't want to give a junk error msg
2154 -- about wide characters in this case!
2156 elsif Source
(Scan_Ptr
) = '['
2157 and then Source
(Scan_Ptr
+ 1) /= '"'
2161 -- We know we have a wide character encoding here (the current
2162 -- character is either ESC, left bracket, or an upper half
2163 -- character depending on the encoding method).
2166 -- Scan out the wide character and insert the appropriate
2167 -- encoding into the name table entry for the identifier.
2177 Scan_Wide
(Source
, Scan_Ptr
, Code
, Err
);
2179 -- If error, signal error
2182 Error_Illegal_Wide_Character
;
2184 -- If the character scanned is a normal identifier
2185 -- character, then we treat it that way.
2187 elsif In_Character_Range
(Code
)
2188 and then Identifier_Char
(Get_Character
(Code
))
2190 Chr
:= Get_Character
(Code
);
2191 Accumulate_Checksum
(Chr
);
2192 Store_Encoded_Character
2193 (Get_Char_Code
(Fold_Lower
(Chr
)));
2194 Underline_Found
:= False;
2196 -- Here if not a normal identifier character
2199 -- Make sure we are allowing wide characters in
2200 -- identifiers. Note that we allow wide character
2201 -- notation for an OK identifier character. This in
2202 -- particular allows bracket or other notation to be
2203 -- used for upper half letters.
2205 -- Wide characters are always allowed in Ada 2005
2207 if Identifier_Character_Set
/= 'w'
2208 and then Ada_Version
< Ada_05
2211 ("wide character not allowed in identifier", Wptr
);
2214 Cat
:= Get_Category
(UTF_32
(Code
));
2216 -- If OK letter, store it folding to upper case. Note
2217 -- that we include the folded letter in the checksum.
2219 if Is_UTF_32_Letter
(Cat
) then
2221 Char_Code
(UTF_32_To_Upper_Case
(UTF_32
(Code
)));
2222 Accumulate_Checksum
(Code
);
2223 Store_Encoded_Character
(Code
);
2224 Underline_Found
:= False;
2226 -- If OK extended digit or mark, then store it
2228 elsif Is_UTF_32_Digit
(Cat
)
2229 or else Is_UTF_32_Mark
(Cat
)
2231 Accumulate_Checksum
(Code
);
2232 Store_Encoded_Character
(Code
);
2233 Underline_Found
:= False;
2235 -- Wide punctuation is also stored, but counts as an
2236 -- underline character for error checking purposes.
2238 elsif Is_UTF_32_Punctuation
(Cat
) then
2239 Accumulate_Checksum
(Code
);
2241 if Underline_Found
then
2243 Cend
: constant Source_Ptr
:= Scan_Ptr
;
2246 Error_No_Double_Underline
;
2251 Store_Encoded_Character
(Code
);
2252 Underline_Found
:= True;
2255 -- Wide character in Unicode cateogory "Other, Format"
2256 -- is accepted in an identifier, but is ignored and not
2257 -- stored. It seems reasonable to exclude it from the
2260 elsif Is_UTF_32_Other
(Cat
) then
2263 -- Wide character in category Separator,Space terminates
2265 elsif Is_UTF_32_Space
(Cat
) then
2266 goto Scan_Identifier_Complete
;
2268 -- Any other wide character is not acceptable
2272 ("invalid wide character in identifier", Wptr
);
2276 goto Scan_Identifier
;
2281 -- Scan of identifier is complete. The identifier is stored in
2282 -- Name_Buffer, and Scan_Ptr points past the last character.
2284 <<Scan_Identifier_Complete
>>
2285 Token_Name
:= Name_Find
;
2287 -- Check for identifier ending with underline or punctuation char
2289 if Underline_Found
then
2290 Underline_Found
:= False;
2292 if Source
(Scan_Ptr
- 1) = '_' then
2294 ("identifier cannot end with underline", Scan_Ptr
- 1);
2297 ("identifier cannot end with punctuation character", Wptr
);
2301 -- Here is where we check if it was a keyword
2303 if Get_Name_Table_Byte
(Token_Name
) /= 0
2304 and then (Ada_Version
>= Ada_95
2305 or else Token_Name
not in Ada_95_Reserved_Words
)
2306 and then (Ada_Version
>= Ada_05
2307 or else Token_Name
not in Ada_2005_Reserved_Words
)
2309 Token
:= Token_Type
'Val (Get_Name_Table_Byte
(Token_Name
));
2311 -- Deal with possible style check for non-lower case keyword, but
2312 -- we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords for
2313 -- this purpose if they appear as attribute designators. Actually
2314 -- we only check the first character for speed.
2316 -- Ada 2005 (AI-284): Do not apply the style check in case of
2317 -- "pragma Interface"
2320 and then Source
(Token_Ptr
) <= 'Z'
2321 and then (Prev_Token
/= Tok_Apostrophe
2323 (Token
/= Tok_Access
2324 and then Token
/= Tok_Delta
2325 and then Token
/= Tok_Digits
2326 and then Token
/= Tok_Range
))
2327 and then (Token
/= Tok_Interface
2329 (Token
= Tok_Interface
2330 and then Prev_Token
/= Tok_Pragma
))
2332 Style
.Non_Lower_Case_Keyword
;
2335 -- We must reset Token_Name since this is not an identifier and
2336 -- if we leave Token_Name set, the parser gets confused because
2337 -- it thinks it is dealing with an identifier instead of the
2338 -- corresponding keyword.
2340 Token_Name
:= No_Name
;
2341 Accumulate_Token_Checksum
;
2344 -- It is an identifier after all
2347 Token
:= Tok_Identifier
;
2348 Accumulate_Token_Checksum
;
2354 --------------------------
2355 -- Set_Comment_As_Token --
2356 --------------------------
2358 procedure Set_Comment_As_Token
(Value
: Boolean) is
2360 Comment_Is_Token
:= Value
;
2361 end Set_Comment_As_Token
;
2363 ------------------------------
2364 -- Set_End_Of_Line_As_Token --
2365 ------------------------------
2367 procedure Set_End_Of_Line_As_Token
(Value
: Boolean) is
2369 End_Of_Line_Is_Token
:= Value
;
2370 end Set_End_Of_Line_As_Token
;
2372 ---------------------------
2373 -- Set_Special_Character --
2374 ---------------------------
2376 procedure Set_Special_Character
(C
: Character) is
2379 when '#' |
'$' |
'_' |
'?' |
'@' |
'`' |
'\' |
'^' |
'~' =>
2380 Special_Characters
(C
) := True;
2385 end Set_Special_Character
;
2387 ----------------------
2388 -- Set_Start_Column --
2389 ----------------------
2391 -- Note: it seems at first glance a little expensive to compute this value
2392 -- for every source line (since it is certainly not used for all source
2393 -- lines). On the other hand, it doesn't take much more work to skip past
2394 -- the initial white space on the line counting the columns than it would
2395 -- to scan past the white space using the standard scanning circuits.
2397 function Set_Start_Column
return Column_Number
is
2398 Start_Column
: Column_Number
:= 0;
2401 -- Outer loop scans past horizontal tab characters
2405 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2406 -- past the blanks and adjusting Start_Column to account for them.
2409 if Source
(Scan_Ptr
) = ' ' then
2410 if Source
(Scan_Ptr
+ 1) = ' ' then
2411 if Source
(Scan_Ptr
+ 2) = ' ' then
2412 if Source
(Scan_Ptr
+ 3) = ' ' then
2413 if Source
(Scan_Ptr
+ 4) = ' ' then
2414 if Source
(Scan_Ptr
+ 5) = ' ' then
2415 if Source
(Scan_Ptr
+ 6) = ' ' then
2416 Scan_Ptr
:= Scan_Ptr
+ 7;
2417 Start_Column
:= Start_Column
+ 7;
2419 Scan_Ptr
:= Scan_Ptr
+ 6;
2420 Start_Column
:= Start_Column
+ 6;
2424 Scan_Ptr
:= Scan_Ptr
+ 5;
2425 Start_Column
:= Start_Column
+ 5;
2429 Scan_Ptr
:= Scan_Ptr
+ 4;
2430 Start_Column
:= Start_Column
+ 4;
2434 Scan_Ptr
:= Scan_Ptr
+ 3;
2435 Start_Column
:= Start_Column
+ 3;
2439 Scan_Ptr
:= Scan_Ptr
+ 2;
2440 Start_Column
:= Start_Column
+ 2;
2444 Scan_Ptr
:= Scan_Ptr
+ 1;
2445 Start_Column
:= Start_Column
+ 1;
2451 end loop Blanks_Loop
;
2453 -- Outer loop keeps going only if a horizontal tab follows
2455 if Source
(Scan_Ptr
) = HT
then
2456 if Style_Check
then Style
.Check_HT
; end if;
2457 Scan_Ptr
:= Scan_Ptr
+ 1;
2458 Start_Column
:= (Start_Column
/ 8) * 8 + 8;
2465 return Start_Column
;
2466 end Set_Start_Column
;