1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
9 -- Copyright (C) 1992-2004 Free Software Foundation, Inc. --
11 -- GNAT is free software; you can redistribute it and/or modify it under --
12 -- terms of the GNU General Public License as published by the Free Soft- --
13 -- ware Foundation; either version 2, or (at your option) any later ver- --
14 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
15 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
16 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
17 -- for more details. You should have received a copy of the GNU General --
18 -- Public License distributed with GNAT; see file COPYING. If not, write --
19 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
20 -- MA 02111-1307, USA. --
22 -- GNAT was originally developed by the GNAT team at New York University. --
23 -- Extensive contributions were provided by Ada Core Technologies Inc. --
25 ------------------------------------------------------------------------------
27 with Csets
; use Csets
;
28 with Err_Vars
; use Err_Vars
;
29 with Namet
; use Namet
;
31 with Scans
; use Scans
;
32 with Sinput
; use Sinput
;
33 with Snames
; use Snames
;
34 with Stringt
; use Stringt
;
35 with Stylesw
; use Stylesw
;
36 with Uintp
; use Uintp
;
37 with Urealp
; use Urealp
;
38 with Widechar
; use Widechar
;
41 with System
.WCh_Con
; use System
.WCh_Con
;
46 -- Make control characters visible
48 Special_Characters
: array (Character) of Boolean := (others => False);
49 -- For characters that are Special token, the value is True
51 Comment_Is_Token
: Boolean := False;
52 -- True if comments are tokens
54 End_Of_Line_Is_Token
: Boolean := False;
55 -- True if End_Of_Line is a token
57 -----------------------
58 -- Local Subprograms --
59 -----------------------
61 procedure Accumulate_Token_Checksum
;
62 pragma Inline
(Accumulate_Token_Checksum
);
64 procedure Accumulate_Checksum
(C
: Character);
65 pragma Inline
(Accumulate_Checksum
);
66 -- This routine accumulates the checksum given character C. During the
67 -- scanning of a source file, this routine is called with every character
68 -- in the source, excluding blanks, and all control characters (except
69 -- that ESC is included in the checksum). Upper case letters not in string
70 -- literals are folded by the caller. See Sinput spec for the documentation
71 -- of the checksum algorithm. Note: checksum values are only used if we
72 -- generate code, so it is not necessary to worry about making the right
73 -- sequence of calls in any error situation.
75 procedure Accumulate_Checksum
(C
: Char_Code
);
76 pragma Inline
(Accumulate_Checksum
);
77 -- This version is identical, except that the argument, C, is a character
78 -- code value instead of a character. This is used when wide characters
79 -- are scanned. We use the character code rather than the ASCII characters
80 -- so that the checksum is independent of wide character encoding method.
82 procedure Initialize_Checksum
;
83 pragma Inline
(Initialize_Checksum
);
84 -- Initialize checksum value
86 -------------------------
87 -- Accumulate_Checksum --
88 -------------------------
90 procedure Accumulate_Checksum
(C
: Character) is
92 System
.CRC32
.Update
(System
.CRC32
.CRC32
(Checksum
), C
);
93 end Accumulate_Checksum
;
95 procedure Accumulate_Checksum
(C
: Char_Code
) is
97 Accumulate_Checksum
(Character'Val (C
/ 256));
98 Accumulate_Checksum
(Character'Val (C
mod 256));
99 end Accumulate_Checksum
;
101 -------------------------------
102 -- Accumulate_Token_Checksum --
103 -------------------------------
105 procedure Accumulate_Token_Checksum
is
108 (System
.CRC32
.CRC32
(Checksum
),
109 Character'Val (Token_Type
'Pos (Token
)));
110 end Accumulate_Token_Checksum
;
112 ----------------------------
113 -- Determine_Token_Casing --
114 ----------------------------
116 function Determine_Token_Casing
return Casing_Type
is
118 return Determine_Casing
(Source
(Token_Ptr
.. Scan_Ptr
- 1));
119 end Determine_Token_Casing
;
121 -------------------------
122 -- Initialize_Checksum --
123 -------------------------
125 procedure Initialize_Checksum
is
127 System
.CRC32
.Initialize
(System
.CRC32
.CRC32
(Checksum
));
128 end Initialize_Checksum
;
130 ------------------------
131 -- Initialize_Scanner --
132 ------------------------
134 procedure Initialize_Scanner
135 (Unit
: Unit_Number_Type
;
136 Index
: Source_File_Index
)
139 -- Set up Token_Type values in Names Table entries for reserved keywords
140 -- We use the Pos value of the Token_Type value. Note we are relying on
141 -- the fact that Token_Type'Val (0) is not a reserved word!
143 Set_Name_Table_Byte
(Name_Abort
, Token_Type
'Pos (Tok_Abort
));
144 Set_Name_Table_Byte
(Name_Abs
, Token_Type
'Pos (Tok_Abs
));
145 Set_Name_Table_Byte
(Name_Abstract
, Token_Type
'Pos (Tok_Abstract
));
146 Set_Name_Table_Byte
(Name_Accept
, Token_Type
'Pos (Tok_Accept
));
147 Set_Name_Table_Byte
(Name_Access
, Token_Type
'Pos (Tok_Access
));
148 Set_Name_Table_Byte
(Name_And
, Token_Type
'Pos (Tok_And
));
149 Set_Name_Table_Byte
(Name_Aliased
, Token_Type
'Pos (Tok_Aliased
));
150 Set_Name_Table_Byte
(Name_All
, Token_Type
'Pos (Tok_All
));
151 Set_Name_Table_Byte
(Name_Array
, Token_Type
'Pos (Tok_Array
));
152 Set_Name_Table_Byte
(Name_At
, Token_Type
'Pos (Tok_At
));
153 Set_Name_Table_Byte
(Name_Begin
, Token_Type
'Pos (Tok_Begin
));
154 Set_Name_Table_Byte
(Name_Body
, Token_Type
'Pos (Tok_Body
));
155 Set_Name_Table_Byte
(Name_Case
, Token_Type
'Pos (Tok_Case
));
156 Set_Name_Table_Byte
(Name_Constant
, Token_Type
'Pos (Tok_Constant
));
157 Set_Name_Table_Byte
(Name_Declare
, Token_Type
'Pos (Tok_Declare
));
158 Set_Name_Table_Byte
(Name_Delay
, Token_Type
'Pos (Tok_Delay
));
159 Set_Name_Table_Byte
(Name_Delta
, Token_Type
'Pos (Tok_Delta
));
160 Set_Name_Table_Byte
(Name_Digits
, Token_Type
'Pos (Tok_Digits
));
161 Set_Name_Table_Byte
(Name_Do
, Token_Type
'Pos (Tok_Do
));
162 Set_Name_Table_Byte
(Name_Else
, Token_Type
'Pos (Tok_Else
));
163 Set_Name_Table_Byte
(Name_Elsif
, Token_Type
'Pos (Tok_Elsif
));
164 Set_Name_Table_Byte
(Name_End
, Token_Type
'Pos (Tok_End
));
165 Set_Name_Table_Byte
(Name_Entry
, Token_Type
'Pos (Tok_Entry
));
166 Set_Name_Table_Byte
(Name_Exception
, Token_Type
'Pos (Tok_Exception
));
167 Set_Name_Table_Byte
(Name_Exit
, Token_Type
'Pos (Tok_Exit
));
168 Set_Name_Table_Byte
(Name_For
, Token_Type
'Pos (Tok_For
));
169 Set_Name_Table_Byte
(Name_Function
, Token_Type
'Pos (Tok_Function
));
170 Set_Name_Table_Byte
(Name_Generic
, Token_Type
'Pos (Tok_Generic
));
171 Set_Name_Table_Byte
(Name_Goto
, Token_Type
'Pos (Tok_Goto
));
172 Set_Name_Table_Byte
(Name_If
, Token_Type
'Pos (Tok_If
));
173 Set_Name_Table_Byte
(Name_In
, Token_Type
'Pos (Tok_In
));
174 Set_Name_Table_Byte
(Name_Is
, Token_Type
'Pos (Tok_Is
));
175 Set_Name_Table_Byte
(Name_Limited
, Token_Type
'Pos (Tok_Limited
));
176 Set_Name_Table_Byte
(Name_Loop
, Token_Type
'Pos (Tok_Loop
));
177 Set_Name_Table_Byte
(Name_Mod
, Token_Type
'Pos (Tok_Mod
));
178 Set_Name_Table_Byte
(Name_New
, Token_Type
'Pos (Tok_New
));
179 Set_Name_Table_Byte
(Name_Not
, Token_Type
'Pos (Tok_Not
));
180 Set_Name_Table_Byte
(Name_Null
, Token_Type
'Pos (Tok_Null
));
181 Set_Name_Table_Byte
(Name_Of
, Token_Type
'Pos (Tok_Of
));
182 Set_Name_Table_Byte
(Name_Or
, Token_Type
'Pos (Tok_Or
));
183 Set_Name_Table_Byte
(Name_Others
, Token_Type
'Pos (Tok_Others
));
184 Set_Name_Table_Byte
(Name_Out
, Token_Type
'Pos (Tok_Out
));
185 Set_Name_Table_Byte
(Name_Package
, Token_Type
'Pos (Tok_Package
));
186 Set_Name_Table_Byte
(Name_Pragma
, Token_Type
'Pos (Tok_Pragma
));
187 Set_Name_Table_Byte
(Name_Private
, Token_Type
'Pos (Tok_Private
));
188 Set_Name_Table_Byte
(Name_Procedure
, Token_Type
'Pos (Tok_Procedure
));
189 Set_Name_Table_Byte
(Name_Protected
, Token_Type
'Pos (Tok_Protected
));
190 Set_Name_Table_Byte
(Name_Raise
, Token_Type
'Pos (Tok_Raise
));
191 Set_Name_Table_Byte
(Name_Range
, Token_Type
'Pos (Tok_Range
));
192 Set_Name_Table_Byte
(Name_Record
, Token_Type
'Pos (Tok_Record
));
193 Set_Name_Table_Byte
(Name_Rem
, Token_Type
'Pos (Tok_Rem
));
194 Set_Name_Table_Byte
(Name_Renames
, Token_Type
'Pos (Tok_Renames
));
195 Set_Name_Table_Byte
(Name_Requeue
, Token_Type
'Pos (Tok_Requeue
));
196 Set_Name_Table_Byte
(Name_Return
, Token_Type
'Pos (Tok_Return
));
197 Set_Name_Table_Byte
(Name_Reverse
, Token_Type
'Pos (Tok_Reverse
));
198 Set_Name_Table_Byte
(Name_Select
, Token_Type
'Pos (Tok_Select
));
199 Set_Name_Table_Byte
(Name_Separate
, Token_Type
'Pos (Tok_Separate
));
200 Set_Name_Table_Byte
(Name_Subtype
, Token_Type
'Pos (Tok_Subtype
));
201 Set_Name_Table_Byte
(Name_Tagged
, Token_Type
'Pos (Tok_Tagged
));
202 Set_Name_Table_Byte
(Name_Task
, Token_Type
'Pos (Tok_Task
));
203 Set_Name_Table_Byte
(Name_Terminate
, Token_Type
'Pos (Tok_Terminate
));
204 Set_Name_Table_Byte
(Name_Then
, Token_Type
'Pos (Tok_Then
));
205 Set_Name_Table_Byte
(Name_Type
, Token_Type
'Pos (Tok_Type
));
206 Set_Name_Table_Byte
(Name_Until
, Token_Type
'Pos (Tok_Until
));
207 Set_Name_Table_Byte
(Name_Use
, Token_Type
'Pos (Tok_Use
));
208 Set_Name_Table_Byte
(Name_When
, Token_Type
'Pos (Tok_When
));
209 Set_Name_Table_Byte
(Name_While
, Token_Type
'Pos (Tok_While
));
210 Set_Name_Table_Byte
(Name_With
, Token_Type
'Pos (Tok_With
));
211 Set_Name_Table_Byte
(Name_Xor
, Token_Type
'Pos (Tok_Xor
));
213 -- Initialize scan control variables
215 Current_Source_File
:= Index
;
216 Source
:= Source_Text
(Current_Source_File
);
217 Current_Source_Unit
:= Unit
;
218 Scan_Ptr
:= Source_First
(Current_Source_File
);
220 Token_Ptr
:= Scan_Ptr
;
221 Current_Line_Start
:= Scan_Ptr
;
223 Token_Name
:= No_Name
;
224 Start_Column
:= Set_Start_Column
;
225 First_Non_Blank_Location
:= Scan_Ptr
;
229 -- Do not call Scan, otherwise the License stuff does not work in Scn
231 end Initialize_Scanner
;
233 ------------------------------
234 -- Reset_Special_Characters --
235 ------------------------------
237 procedure Reset_Special_Characters
is
239 Special_Characters
:= (others => False);
240 end Reset_Special_Characters
;
248 Start_Of_Comment
: Source_Ptr
;
250 procedure Check_End_Of_Line
;
251 -- Called when end of line encountered. Checks that line is not
252 -- too long, and that other style checks for the end of line are met.
254 function Double_Char_Token
(C
: Character) return Boolean;
255 -- This function is used for double character tokens like := or <>. It
256 -- checks if the character following Source (Scan_Ptr) is C, and if so
257 -- bumps Scan_Ptr past the pair of characters and returns True. A space
258 -- between the two characters is also recognized with an appropriate
259 -- error message being issued. If C is not present, False is returned.
260 -- Note that Double_Char_Token can only be used for tokens defined in
261 -- the Ada syntax (it's use for error cases like && is not appropriate
262 -- since we do not want a junk message for a case like &-space-&).
264 procedure Error_Illegal_Character
;
265 -- Give illegal character error, Scan_Ptr points to character.
266 -- On return, Scan_Ptr is bumped past the illegal character.
268 procedure Error_Illegal_Wide_Character
;
269 -- Give illegal wide character message. On return, Scan_Ptr is bumped
270 -- past the illegal character, which may still leave us pointing to
271 -- junk, not much we can do if the escape sequence is messed up!
273 procedure Error_Long_Line
;
274 -- Signal error of excessively long line
276 procedure Error_No_Double_Underline
;
277 -- Signal error of double underline character
280 -- This is the procedure for scanning out numeric literals. On entry,
281 -- Scan_Ptr points to the digit that starts the numeric literal (the
282 -- checksum for this character has not been accumulated yet). On return
283 -- Scan_Ptr points past the last character of the numeric literal, Token
284 -- and Token_Node are set appropriately, and the checksum is updated.
287 -- This is the procedure for scanning out string literals. On entry,
288 -- Scan_Ptr points to the opening string quote (the checksum for this
289 -- character has not been accumulated yet). On return Scan_Ptr points
290 -- past the closing quote of the string literal, Token and Token_Node
291 -- are set appropriately, and the checksum is upated.
293 -----------------------
294 -- Check_End_Of_Line --
295 -----------------------
297 procedure Check_End_Of_Line
is
298 Len
: constant Int
:= Int
(Scan_Ptr
) - Int
(Current_Line_Start
);
301 if Style_Check
and Style_Check_Max_Line_Length
then
302 Style
.Check_Line_Terminator
(Len
);
304 -- If style checking is inactive, check maximum line length against
305 -- standard value. Note that we take this from Opt.Max_Line_Length
306 -- rather than Hostparm.Max_Line_Length because we do not want to
307 -- impose any limit during scanning of configuration pragma files,
308 -- and Opt.Max_Line_Length (normally set to Hostparm.Max_Line_Length)
309 -- is reset to Column_Number'Max during scanning of such files.
311 elsif Len
> Opt
.Max_Line_Length
then
314 end Check_End_Of_Line
;
316 -----------------------
317 -- Double_Char_Token --
318 -----------------------
320 function Double_Char_Token
(C
: Character) return Boolean is
322 if Source
(Scan_Ptr
+ 1) = C
then
323 Accumulate_Checksum
(C
);
324 Scan_Ptr
:= Scan_Ptr
+ 2;
327 elsif Source
(Scan_Ptr
+ 1) = ' '
328 and then Source
(Scan_Ptr
+ 2) = C
330 Scan_Ptr
:= Scan_Ptr
+ 1;
331 Error_Msg_S
("no space allowed here");
332 Scan_Ptr
:= Scan_Ptr
+ 2;
338 end Double_Char_Token
;
340 -----------------------------
341 -- Error_Illegal_Character --
342 -----------------------------
344 procedure Error_Illegal_Character
is
346 Error_Msg_S
("illegal character");
347 Scan_Ptr
:= Scan_Ptr
+ 1;
348 end Error_Illegal_Character
;
350 ----------------------------------
351 -- Error_Illegal_Wide_Character --
352 ----------------------------------
354 procedure Error_Illegal_Wide_Character
is
356 Error_Msg_S
("illegal wide character, check -gnatW switch");
357 Scan_Ptr
:= Scan_Ptr
+ 1;
358 end Error_Illegal_Wide_Character
;
360 ---------------------
361 -- Error_Long_Line --
362 ---------------------
364 procedure Error_Long_Line
is
367 ("this line is too long",
368 Current_Line_Start
+ Source_Ptr
(Opt
.Max_Line_Length
));
371 -------------------------------
372 -- Error_No_Double_Underline --
373 -------------------------------
375 procedure Error_No_Double_Underline
is
377 Error_Msg_S
("two consecutive underlines not permitted");
378 end Error_No_Double_Underline
;
387 -- Current source program character
389 Base_Char
: Character;
390 -- Either # or : (character at start of based number)
396 -- Value of base in Uint format
399 -- Value of integer scanned by Scan_Integer in Uint format
402 -- Value of integer in numeric value being scanned
405 -- Scale value for real literal
408 -- Scale in Uint format
410 Exponent_Is_Negative
: Boolean;
411 -- Set true for negative exponent
413 Extended_Digit_Value
: Int
;
414 -- Extended digit value
416 Point_Scanned
: Boolean;
417 -- Flag for decimal point scanned in numeric literal
419 -----------------------
420 -- Local Subprograms --
421 -----------------------
423 procedure Error_Digit_Expected
;
424 -- Signal error of bad digit, Scan_Ptr points to the location at
425 -- which the digit was expected on input, and is unchanged on return.
427 procedure Scan_Integer
;
428 -- Procedure to scan integer literal. On entry, Scan_Ptr points to
429 -- a digit, on exit Scan_Ptr points past the last character of
432 -- For each digit encountered, UI_Int_Value is multiplied by 10,
433 -- and the value of the digit added to the result. In addition,
434 -- the value in Scale is decremented by one for each actual digit
437 --------------------------
438 -- Error_Digit_Expected --
439 --------------------------
441 procedure Error_Digit_Expected
is
443 Error_Msg_S
("digit expected");
444 end Error_Digit_Expected
;
450 procedure Scan_Integer
is
452 -- Next character scanned
455 C
:= Source
(Scan_Ptr
);
457 -- Loop through digits (allowing underlines)
460 Accumulate_Checksum
(C
);
462 UI_Int_Value
* 10 + (Character'Pos (C
) - Character'Pos ('0'));
463 Scan_Ptr
:= Scan_Ptr
+ 1;
465 C
:= Source
(Scan_Ptr
);
469 -- We do not accumulate the '_' in the checksum, so that
470 -- 1_234 is equivalent to 1234, and does not trigger
471 -- compilation for "minimal recompilation" (gnatmake -m).
474 Scan_Ptr
:= Scan_Ptr
+ 1;
475 C
:= Source
(Scan_Ptr
);
477 Error_No_Double_Underline
;
480 if C
not in '0' .. '9' then
481 Error_Digit_Expected
;
486 exit when C
not in '0' .. '9';
492 ----------------------------------
493 -- Start of Processing for Nlit --
494 ----------------------------------
499 UI_Int_Value
:= Uint_0
;
503 Point_Scanned
:= False;
504 UI_Num_Value
:= UI_Int_Value
;
506 -- Various possibilities now for continuing the literal are
507 -- period, E/e (for exponent), or :/# (for based literal).
510 C
:= Source
(Scan_Ptr
);
514 -- Scan out point, but do not scan past .. which is a range
515 -- sequence, and must not be eaten up scanning a numeric literal.
517 while C
= '.' and then Source
(Scan_Ptr
+ 1) /= '.' loop
518 Accumulate_Checksum
('.');
520 if Point_Scanned
then
521 Error_Msg_S
("duplicate point ignored");
524 Point_Scanned
:= True;
525 Scan_Ptr
:= Scan_Ptr
+ 1;
526 C
:= Source
(Scan_Ptr
);
528 if C
not in '0' .. '9' then
530 ("real literal cannot end with point", Scan_Ptr
- 1);
533 UI_Num_Value
:= UI_Int_Value
;
537 -- Based literal case. The base is the value we already scanned.
538 -- In the case of colon, we insist that the following character
539 -- is indeed an extended digit or a period. This catches a number
540 -- of common errors, as well as catching the well known tricky
541 -- bug otherwise arising from "x : integer range 1 .. 10:= 6;"
544 or else (C
= ':' and then
545 (Source
(Scan_Ptr
+ 1) = '.'
547 Source
(Scan_Ptr
+ 1) in '0' .. '9'
549 Source
(Scan_Ptr
+ 1) in 'A' .. 'Z'
551 Source
(Scan_Ptr
+ 1) in 'a' .. 'z'))
554 Obsolescent_Check
(Scan_Ptr
);
556 if Warn_On_Obsolescent_Feature
then
558 ("use of "":"" is an obsolescent feature ('R'M 'J.2(3))?");
560 ("\use ""'#"" instead?");
565 Accumulate_Checksum
(C
);
567 UI_Base
:= UI_Int_Value
;
569 if UI_Base
< 2 or else UI_Base
> 16 then
570 Error_Msg_SC
("base not 2-16");
574 Base
:= UI_To_Int
(UI_Base
);
575 Scan_Ptr
:= Scan_Ptr
+ 1;
577 -- Scan out extended integer [. integer]
579 C
:= Source
(Scan_Ptr
);
580 UI_Int_Value
:= Uint_0
;
584 if C
in '0' .. '9' then
585 Accumulate_Checksum
(C
);
586 Extended_Digit_Value
:=
587 Int
'(Character'Pos (C)) - Int'(Character'Pos ('0'));
589 elsif C
in 'A' .. 'F' then
590 Accumulate_Checksum
(Character'Val (Character'Pos (C
) + 32));
591 Extended_Digit_Value
:=
592 Int
'(Character'Pos (C)) - Int'(Character'Pos ('A')) + 10;
594 elsif C
in 'a' .. 'f' then
595 Accumulate_Checksum
(C
);
596 Extended_Digit_Value
:=
597 Int
'(Character'Pos (C)) - Int'(Character'Pos ('a')) + 10;
600 Error_Msg_S
("extended digit expected");
604 if Extended_Digit_Value
>= Base
then
605 Error_Msg_S
("digit '>= base");
608 UI_Int_Value
:= UI_Int_Value
* UI_Base
+ Extended_Digit_Value
;
610 Scan_Ptr
:= Scan_Ptr
+ 1;
611 C
:= Source
(Scan_Ptr
);
615 Accumulate_Checksum
('_');
616 Scan_Ptr
:= Scan_Ptr
+ 1;
617 C
:= Source
(Scan_Ptr
);
619 Error_No_Double_Underline
;
623 Accumulate_Checksum
('.');
625 if Point_Scanned
then
626 Error_Msg_S
("duplicate point ignored");
629 Scan_Ptr
:= Scan_Ptr
+ 1;
630 C
:= Source
(Scan_Ptr
);
631 Point_Scanned
:= True;
634 elsif C
= Base_Char
then
635 Accumulate_Checksum
(C
);
636 Scan_Ptr
:= Scan_Ptr
+ 1;
639 elsif C
= '#' or else C
= ':' then
640 Error_Msg_S
("based number delimiters must match");
641 Scan_Ptr
:= Scan_Ptr
+ 1;
644 elsif not Identifier_Char
(C
) then
645 if Base_Char
= '#' then
646 Error_Msg_S
("missing '#");
648 Error_Msg_S
("missing ':");
656 UI_Num_Value
:= UI_Int_Value
;
661 if not Point_Scanned
then
665 UI_Scale
:= UI_From_Int
(Scale
);
668 if Source
(Scan_Ptr
) = 'e' or else Source
(Scan_Ptr
) = 'E' then
669 Accumulate_Checksum
('e');
670 Scan_Ptr
:= Scan_Ptr
+ 1;
671 Exponent_Is_Negative
:= False;
673 if Source
(Scan_Ptr
) = '+' then
674 Accumulate_Checksum
('+');
675 Scan_Ptr
:= Scan_Ptr
+ 1;
677 elsif Source
(Scan_Ptr
) = '-' then
678 Accumulate_Checksum
('-');
680 if not Point_Scanned
then
682 ("negative exponent not allowed for integer literal");
684 Exponent_Is_Negative
:= True;
687 Scan_Ptr
:= Scan_Ptr
+ 1;
690 UI_Int_Value
:= Uint_0
;
692 if Source
(Scan_Ptr
) in '0' .. '9' then
695 Error_Digit_Expected
;
698 if Exponent_Is_Negative
then
699 UI_Scale
:= UI_Scale
- UI_Int_Value
;
701 UI_Scale
:= UI_Scale
+ UI_Int_Value
;
705 -- Case of real literal to be returned
707 if Point_Scanned
then
708 Token
:= Tok_Real_Literal
;
709 Real_Literal_Value
:=
715 -- Case of integer literal to be returned
718 Token
:= Tok_Integer_Literal
;
721 Int_Literal_Value
:= UI_Num_Value
;
723 -- Avoid doing possibly expensive calculations in cases like
724 -- parsing 163E800_000# when semantics will not be done anyway.
725 -- This is especially useful when parsing garbled input.
727 elsif Operating_Mode
/= Check_Syntax
728 and then (Serious_Errors_Detected
= 0 or else Try_Semantics
)
730 Int_Literal_Value
:= UI_Num_Value
* UI_Base
** UI_Scale
;
733 Int_Literal_Value
:= No_Uint
;
739 Accumulate_Token_Checksum
;
751 Delimiter
: Character;
752 -- Delimiter (first character of string)
755 -- Current source program character
758 -- Current character code value
761 -- Error flag for Scan_Wide call
763 procedure Error_Bad_String_Char
;
764 -- Signal bad character in string/character literal. On entry
765 -- Scan_Ptr points to the improper character encountered during
766 -- the scan. Scan_Ptr is not modified, so it still points to the bad
767 -- character on return.
769 procedure Error_Unterminated_String
;
770 -- Procedure called if a line terminator character is encountered
771 -- during scanning a string, meaning that the string is not properly
774 procedure Set_String
;
775 -- Procedure used to distinguish between string and operator symbol.
776 -- On entry the string has been scanned out, and its characters start
777 -- at Token_Ptr and end one character before Scan_Ptr. On exit Token
778 -- is set to Tok_String_Literal or Tok_Operator_Symbol as
779 -- appropriate, and Token_Node is appropriately initialized.
780 -- In addition, in the operator symbol case, Token_Name is
781 -- appropriately set.
783 ---------------------------
784 -- Error_Bad_String_Char --
785 ---------------------------
787 procedure Error_Bad_String_Char
is
788 C
: constant Character := Source
(Scan_Ptr
);
792 Error_Msg_S
("horizontal tab not allowed in string");
794 elsif C
= VT
or else C
= FF
then
795 Error_Msg_S
("format effector not allowed in string");
797 elsif C
in Upper_Half_Character
then
798 Error_Msg_S
("(Ada 83) upper half character not allowed");
801 Error_Msg_S
("control character not allowed in string");
803 end Error_Bad_String_Char
;
805 -------------------------------
806 -- Error_Unterminated_String --
807 -------------------------------
809 procedure Error_Unterminated_String
is
811 -- An interesting little refinement. Consider the following
814 -- A := "this is an unterminated string;
815 -- A := "this is an unterminated string &
816 -- P(A, "this is a parameter that didn't get terminated);
818 -- We fiddle a little to do slightly better placement in these
819 -- cases also if there is white space at the end of the line we
820 -- place the flag at the start of this white space, not at the
821 -- end. Note that we only have to test for blanks, since tabs
822 -- aren't allowed in strings in the first place and would have
823 -- caused an error message.
825 -- Two more cases that we treat specially are:
827 -- A := "this string uses the wrong terminator'
828 -- A := "this string uses the wrong terminator' &
830 -- In these cases we give a different error message as well
832 -- We actually reposition the scan pointer to the point where we
833 -- place the flag in these cases, since it seems a better bet on
834 -- the original intention.
836 while Source
(Scan_Ptr
- 1) = ' '
837 or else Source
(Scan_Ptr
- 1) = '&'
839 Scan_Ptr
:= Scan_Ptr
- 1;
843 -- Check for case of incorrect string terminator, but single quote
844 -- is not considered incorrect if the opening terminator misused
845 -- a single quote (error message already given).
848 and then Source
(Scan_Ptr
- 1) = '''
852 ("incorrect string terminator character", Scan_Ptr
- 1);
856 if Source
(Scan_Ptr
- 1) = ';' then
857 Scan_Ptr
:= Scan_Ptr
- 1;
860 if Source
(Scan_Ptr
- 1) = ')' then
861 Scan_Ptr
:= Scan_Ptr
- 1;
866 Error_Msg_S
("missing string quote");
867 end Error_Unterminated_String
;
873 procedure Set_String
is
874 Slen
: constant Int
:= Int
(Scan_Ptr
- Token_Ptr
- 2);
880 -- Token_Name is currently set to Error_Name. The following
881 -- section of code resets Token_Name to the proper Name_Op_xx
882 -- value if the string is a valid operator symbol, otherwise it is
883 -- left set to Error_Name.
886 C1
:= Source
(Token_Ptr
+ 1);
890 Token_Name
:= Name_Op_Eq
;
893 Token_Name
:= Name_Op_Gt
;
896 Token_Name
:= Name_Op_Lt
;
899 Token_Name
:= Name_Op_Add
;
902 Token_Name
:= Name_Op_Subtract
;
905 Token_Name
:= Name_Op_Concat
;
908 Token_Name
:= Name_Op_Multiply
;
911 Token_Name
:= Name_Op_Divide
;
918 C1
:= Source
(Token_Ptr
+ 1);
919 C2
:= Source
(Token_Ptr
+ 2);
921 if C1
= '*' and then C2
= '*' then
922 Token_Name
:= Name_Op_Expon
;
927 Token_Name
:= Name_Op_Ne
;
929 Token_Name
:= Name_Op_Le
;
931 Token_Name
:= Name_Op_Ge
;
934 elsif (C1
= 'O' or else C1
= 'o') and then -- OR
935 (C2
= 'R' or else C2
= 'r')
937 Token_Name
:= Name_Op_Or
;
941 C1
:= Source
(Token_Ptr
+ 1);
942 C2
:= Source
(Token_Ptr
+ 2);
943 C3
:= Source
(Token_Ptr
+ 3);
945 if (C1
= 'A' or else C1
= 'a') and then -- AND
946 (C2
= 'N' or else C2
= 'n') and then
947 (C3
= 'D' or else C3
= 'd')
949 Token_Name
:= Name_Op_And
;
951 elsif (C1
= 'A' or else C1
= 'a') and then -- ABS
952 (C2
= 'B' or else C2
= 'b') and then
953 (C3
= 'S' or else C3
= 's')
955 Token_Name
:= Name_Op_Abs
;
957 elsif (C1
= 'M' or else C1
= 'm') and then -- MOD
958 (C2
= 'O' or else C2
= 'o') and then
959 (C3
= 'D' or else C3
= 'd')
961 Token_Name
:= Name_Op_Mod
;
963 elsif (C1
= 'N' or else C1
= 'n') and then -- NOT
964 (C2
= 'O' or else C2
= 'o') and then
965 (C3
= 'T' or else C3
= 't')
967 Token_Name
:= Name_Op_Not
;
969 elsif (C1
= 'R' or else C1
= 'r') and then -- REM
970 (C2
= 'E' or else C2
= 'e') and then
971 (C3
= 'M' or else C3
= 'm')
973 Token_Name
:= Name_Op_Rem
;
975 elsif (C1
= 'X' or else C1
= 'x') and then -- XOR
976 (C2
= 'O' or else C2
= 'o') and then
977 (C3
= 'R' or else C3
= 'r')
979 Token_Name
:= Name_Op_Xor
;
984 -- If it is an operator symbol, then Token_Name is set.
985 -- If it is some other string value, then Token_Name still
986 -- contains Error_Name.
988 if Token_Name
= Error_Name
then
989 Token
:= Tok_String_Literal
;
992 Token
:= Tok_Operator_Symbol
;
1002 -- On entry, Scan_Ptr points to the opening character of the string
1003 -- which is either a percent, double quote, or apostrophe
1004 -- (single quote). The latter case is an error detected by
1005 -- the character literal circuit.
1007 Delimiter
:= Source
(Scan_Ptr
);
1008 Accumulate_Checksum
(Delimiter
);
1010 Scan_Ptr
:= Scan_Ptr
+ 1;
1012 -- Loop to scan out characters of string literal
1015 C
:= Source
(Scan_Ptr
);
1017 if C
= Delimiter
then
1018 Accumulate_Checksum
(C
);
1019 Scan_Ptr
:= Scan_Ptr
+ 1;
1020 exit when Source
(Scan_Ptr
) /= Delimiter
;
1021 Code
:= Get_Char_Code
(C
);
1022 Accumulate_Checksum
(C
);
1023 Scan_Ptr
:= Scan_Ptr
+ 1;
1026 if C
= '"' and then Delimiter
= '%' then
1028 ("quote not allowed in percent delimited string");
1029 Code
:= Get_Char_Code
(C
);
1030 Scan_Ptr
:= Scan_Ptr
+ 1;
1034 Wide_Character_Encoding_Method
1035 in WC_ESC_Encoding_Method
)
1037 (C
in Upper_Half_Character
1039 Upper_Half_Encoding
)
1043 Source
(Scan_Ptr
+ 1) = '"'
1045 Identifier_Char
(Source
(Scan_Ptr
+ 2)))
1047 Scan_Wide
(Source
, Scan_Ptr
, Code
, Err
);
1048 Accumulate_Checksum
(Code
);
1051 Error_Illegal_Wide_Character
;
1052 Code
:= Get_Char_Code
(' ');
1056 Accumulate_Checksum
(C
);
1058 if C
not in Graphic_Character
then
1059 if C
in Line_Terminator
then
1060 Error_Unterminated_String
;
1063 elsif C
in Upper_Half_Character
then
1064 if Ada_Version
= Ada_83
then
1065 Error_Bad_String_Char
;
1069 Error_Bad_String_Char
;
1073 Code
:= Get_Char_Code
(C
);
1074 Scan_Ptr
:= Scan_Ptr
+ 1;
1078 Store_String_Char
(Code
);
1080 if not In_Character_Range
(Code
) then
1081 Wide_Character_Found
:= True;
1085 String_Literal_Id
:= End_String
;
1091 -- Start of body of Scan
1094 Prev_Token
:= Token
;
1095 Prev_Token_Ptr
:= Token_Ptr
;
1096 Token_Name
:= Error_Name
;
1098 -- The following loop runs more than once only if a format effector
1099 -- (tab, vertical tab, form feed, line feed, carriage return) is
1100 -- encountered and skipped, or some error situation, such as an
1101 -- illegal character, is encountered.
1104 -- Skip past blanks, loop is opened up for speed
1106 while Source
(Scan_Ptr
) = ' ' loop
1108 if Source
(Scan_Ptr
+ 1) /= ' ' then
1109 Scan_Ptr
:= Scan_Ptr
+ 1;
1113 if Source
(Scan_Ptr
+ 2) /= ' ' then
1114 Scan_Ptr
:= Scan_Ptr
+ 2;
1118 if Source
(Scan_Ptr
+ 3) /= ' ' then
1119 Scan_Ptr
:= Scan_Ptr
+ 3;
1123 if Source
(Scan_Ptr
+ 4) /= ' ' then
1124 Scan_Ptr
:= Scan_Ptr
+ 4;
1128 if Source
(Scan_Ptr
+ 5) /= ' ' then
1129 Scan_Ptr
:= Scan_Ptr
+ 5;
1133 if Source
(Scan_Ptr
+ 6) /= ' ' then
1134 Scan_Ptr
:= Scan_Ptr
+ 6;
1138 if Source
(Scan_Ptr
+ 7) /= ' ' then
1139 Scan_Ptr
:= Scan_Ptr
+ 7;
1143 Scan_Ptr
:= Scan_Ptr
+ 8;
1146 -- We are now at a non-blank character, which is the first character
1147 -- of the token we will scan, and hence the value of Token_Ptr.
1149 Token_Ptr
:= Scan_Ptr
;
1151 -- Here begins the main case statement which transfers control on
1152 -- the basis of the non-blank character we have encountered.
1154 case Source
(Scan_Ptr
) is
1156 -- Line terminator characters
1158 when CR | LF | FF | VT
=> Line_Terminator_Case
: begin
1160 -- Check line too long
1164 -- Set Token_Ptr, if End_Of_Line is a token, for the case when
1165 -- it is a physical line.
1167 if End_Of_Line_Is_Token
then
1168 Token_Ptr
:= Scan_Ptr
;
1175 Skip_Line_Terminators
(Scan_Ptr
, Physical
);
1177 -- If we are at start of physical line, update scan pointers
1178 -- to reflect the start of the new line.
1181 Current_Line_Start
:= Scan_Ptr
;
1182 Start_Column
:= Set_Start_Column
;
1183 First_Non_Blank_Location
:= Scan_Ptr
;
1185 -- If End_Of_Line is a token, we return it as it is
1188 if End_Of_Line_Is_Token
then
1189 Token
:= Tok_End_Of_Line
;
1194 end Line_Terminator_Case
;
1196 -- Horizontal tab, just skip past it
1199 if Style_Check
then Style
.Check_HT
; end if;
1200 Scan_Ptr
:= Scan_Ptr
+ 1;
1202 -- End of file character, treated as an end of file only if it
1203 -- is the last character in the buffer, otherwise it is ignored.
1206 if Scan_Ptr
= Source_Last
(Current_Source_File
) then
1212 Scan_Ptr
:= Scan_Ptr
+ 1;
1218 Accumulate_Checksum
('&');
1220 if Source
(Scan_Ptr
+ 1) = '&' then
1221 Error_Msg_S
("'&'& should be `AND THEN`");
1222 Scan_Ptr
:= Scan_Ptr
+ 2;
1227 Scan_Ptr
:= Scan_Ptr
+ 1;
1228 Token
:= Tok_Ampersand
;
1232 -- Asterisk (can be multiplication operator or double asterisk
1233 -- which is the exponentiation compound delimiter).
1236 Accumulate_Checksum
('*');
1238 if Source
(Scan_Ptr
+ 1) = '*' then
1239 Accumulate_Checksum
('*');
1240 Scan_Ptr
:= Scan_Ptr
+ 2;
1241 Token
:= Tok_Double_Asterisk
;
1245 Scan_Ptr
:= Scan_Ptr
+ 1;
1246 Token
:= Tok_Asterisk
;
1250 -- Colon, which can either be an isolated colon, or part of an
1251 -- assignment compound delimiter.
1254 Accumulate_Checksum
(':');
1256 if Double_Char_Token
('=') then
1257 Token
:= Tok_Colon_Equal
;
1258 if Style_Check
then Style
.Check_Colon_Equal
; end if;
1261 elsif Source
(Scan_Ptr
+ 1) = '-'
1262 and then Source
(Scan_Ptr
+ 2) /= '-'
1264 Token
:= Tok_Colon_Equal
;
1265 Error_Msg
(":- should be :=", Scan_Ptr
);
1266 Scan_Ptr
:= Scan_Ptr
+ 2;
1270 Scan_Ptr
:= Scan_Ptr
+ 1;
1272 if Style_Check
then Style
.Check_Colon
; end if;
1279 Accumulate_Checksum
('(');
1280 Scan_Ptr
:= Scan_Ptr
+ 1;
1281 Token
:= Tok_Left_Paren
;
1282 if Style_Check
then Style
.Check_Left_Paren
; end if;
1288 if Source
(Scan_Ptr
+ 1) = '"' then
1290 goto Scan_Identifier
;
1293 Error_Msg_S
("illegal character, replaced by ""(""");
1294 Scan_Ptr
:= Scan_Ptr
+ 1;
1295 Token
:= Tok_Left_Paren
;
1302 Error_Msg_S
("illegal character, replaced by ""(""");
1303 Scan_Ptr
:= Scan_Ptr
+ 1;
1304 Token
:= Tok_Left_Paren
;
1310 Accumulate_Checksum
(',');
1311 Scan_Ptr
:= Scan_Ptr
+ 1;
1313 if Style_Check
then Style
.Check_Comma
; end if;
1316 -- Dot, which is either an isolated period, or part of a double
1317 -- dot compound delimiter sequence. We also check for the case of
1318 -- a digit following the period, to give a better error message.
1321 Accumulate_Checksum
('.');
1323 if Double_Char_Token
('.') then
1324 Token
:= Tok_Dot_Dot
;
1325 if Style_Check
then Style
.Check_Dot_Dot
; end if;
1328 elsif Source
(Scan_Ptr
+ 1) in '0' .. '9' then
1329 Error_Msg_S
("numeric literal cannot start with point");
1330 Scan_Ptr
:= Scan_Ptr
+ 1;
1333 Scan_Ptr
:= Scan_Ptr
+ 1;
1338 -- Equal, which can either be an equality operator, or part of the
1339 -- arrow (=>) compound delimiter.
1342 Accumulate_Checksum
('=');
1344 if Double_Char_Token
('>') then
1346 if Style_Check
then Style
.Check_Arrow
; end if;
1349 elsif Source
(Scan_Ptr
+ 1) = '=' then
1350 Error_Msg_S
("== should be =");
1351 Scan_Ptr
:= Scan_Ptr
+ 1;
1354 Scan_Ptr
:= Scan_Ptr
+ 1;
1358 -- Greater than, which can be a greater than operator, greater than
1359 -- or equal operator, or first character of a right label bracket.
1362 Accumulate_Checksum
('>');
1364 if Double_Char_Token
('=') then
1365 Token
:= Tok_Greater_Equal
;
1368 elsif Double_Char_Token
('>') then
1369 Token
:= Tok_Greater_Greater
;
1373 Scan_Ptr
:= Scan_Ptr
+ 1;
1374 Token
:= Tok_Greater
;
1378 -- Less than, which can be a less than operator, less than or equal
1379 -- operator, or the first character of a left label bracket, or the
1380 -- first character of a box (<>) compound delimiter.
1383 Accumulate_Checksum
('<');
1385 if Double_Char_Token
('=') then
1386 Token
:= Tok_Less_Equal
;
1389 elsif Double_Char_Token
('>') then
1391 if Style_Check
then Style
.Check_Box
; end if;
1394 elsif Double_Char_Token
('<') then
1395 Token
:= Tok_Less_Less
;
1399 Scan_Ptr
:= Scan_Ptr
+ 1;
1404 -- Minus, which is either a subtraction operator, or the first
1405 -- character of double minus starting a comment
1407 when '-' => Minus_Case
: begin
1408 if Source
(Scan_Ptr
+ 1) = '>' then
1409 Error_Msg_S
("invalid token");
1410 Scan_Ptr
:= Scan_Ptr
+ 2;
1414 elsif Source
(Scan_Ptr
+ 1) /= '-' then
1415 Accumulate_Checksum
('-');
1416 Scan_Ptr
:= Scan_Ptr
+ 1;
1422 else -- Source (Scan_Ptr + 1) = '-' then
1423 if Style_Check
then Style
.Check_Comment
; end if;
1424 Scan_Ptr
:= Scan_Ptr
+ 2;
1425 Start_Of_Comment
:= Scan_Ptr
;
1427 -- Loop to scan comment (this loop runs more than once only if
1428 -- a horizontal tab or other non-graphic character is scanned)
1431 -- Scan to non graphic character (opened up for speed)
1434 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1435 Scan_Ptr
:= Scan_Ptr
+ 1;
1436 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1437 Scan_Ptr
:= Scan_Ptr
+ 1;
1438 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1439 Scan_Ptr
:= Scan_Ptr
+ 1;
1440 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1441 Scan_Ptr
:= Scan_Ptr
+ 1;
1442 exit when Source
(Scan_Ptr
) not in Graphic_Character
;
1443 Scan_Ptr
:= Scan_Ptr
+ 1;
1446 -- Keep going if horizontal tab
1448 if Source
(Scan_Ptr
) = HT
then
1449 if Style_Check
then Style
.Check_HT
; end if;
1450 Scan_Ptr
:= Scan_Ptr
+ 1;
1452 -- Terminate scan of comment if line terminator
1454 elsif Source
(Scan_Ptr
) in Line_Terminator
then
1457 -- Terminate scan of comment if end of file encountered
1458 -- (embedded EOF character or real last character in file)
1460 elsif Source
(Scan_Ptr
) = EOF
then
1463 -- Keep going if character in 80-FF range, or is ESC. These
1464 -- characters are allowed in comments by RM-2.1(1), 2.7(2).
1465 -- They are allowed even in Ada 83 mode according to the
1466 -- approved AI. ESC was added to the AI in June 93.
1468 elsif Source
(Scan_Ptr
) in Upper_Half_Character
1469 or else Source
(Scan_Ptr
) = ESC
1471 Scan_Ptr
:= Scan_Ptr
+ 1;
1473 -- Otherwise we have an illegal comment character
1476 Error_Illegal_Character
;
1481 -- Note that, except when comments are tokens, we do NOT
1482 -- execute a return here, instead we fall through to reexecute
1483 -- the scan loop to look for a token.
1485 if Comment_Is_Token
then
1486 Name_Len
:= Integer (Scan_Ptr
- Start_Of_Comment
);
1487 Name_Buffer
(1 .. Name_Len
) :=
1488 String (Source
(Start_Of_Comment
.. Scan_Ptr
- 1));
1489 Comment_Id
:= Name_Find
;
1490 Token
:= Tok_Comment
;
1496 -- Double quote starting a string literal
1503 -- Percent starting a string literal
1506 Obsolescent_Check
(Token_Ptr
);
1508 if Warn_On_Obsolescent_Feature
then
1510 ("use of ""'%"" is an obsolescent feature ('R'M 'J.2(4))?");
1512 ("\use """""" instead?");
1519 -- Apostrophe. This can either be the start of a character literal,
1520 -- or an isolated apostrophe used in a qualified expression or an
1521 -- attribute. We treat it as a character literal if it does not
1522 -- follow a right parenthesis, identifier, the keyword ALL or
1523 -- a literal. This means that we correctly treat constructs like:
1525 -- A := CHARACTER'('A');
1527 -- Note that RM-2.2(7) does not require a separator between
1528 -- "CHARACTER" and "'" in the above.
1530 when ''' => Char_Literal_Case
: declare
1535 Accumulate_Checksum
(''');
1536 Scan_Ptr
:= Scan_Ptr
+ 1;
1538 -- Here is where we make the test to distinguish the cases. Treat
1539 -- as apostrophe if previous token is an identifier, right paren
1540 -- or the reserved word "all" (latter case as in A.all'Address)
1541 -- (or the reserved word "project" in project files).
1542 -- Also treat it as apostrophe after a literal (this catches
1543 -- some legitimate cases, like A."abs"'Address, and also gives
1544 -- better error behavior for impossible cases like 123'xxx).
1546 if Prev_Token
= Tok_Identifier
1547 or else Prev_Token
= Tok_Right_Paren
1548 or else Prev_Token
= Tok_All
1549 or else Prev_Token
= Tok_Project
1550 or else Prev_Token
in Token_Class_Literal
1552 Token
:= Tok_Apostrophe
;
1553 if Style_Check
then Style
.Check_Apostrophe
; end if;
1556 -- Otherwise the apostrophe starts a character literal
1559 -- Case of wide character literal with ESC or [ encoding
1561 if (Source
(Scan_Ptr
) = ESC
1563 Wide_Character_Encoding_Method
in WC_ESC_Encoding_Method
)
1565 (Source
(Scan_Ptr
) in Upper_Half_Character
1567 Upper_Half_Encoding
)
1569 (Source
(Scan_Ptr
) = '['
1571 Source
(Scan_Ptr
+ 1) = '"')
1573 Scan_Wide
(Source
, Scan_Ptr
, Code
, Err
);
1574 Accumulate_Checksum
(Code
);
1577 Error_Illegal_Wide_Character
;
1580 if Source
(Scan_Ptr
) /= ''' then
1581 Error_Msg_S
("missing apostrophe");
1583 Scan_Ptr
:= Scan_Ptr
+ 1;
1586 -- If we do not find a closing quote in the expected place then
1587 -- assume that we have a misguided attempt at a string literal.
1589 -- However, if previous token is RANGE, then we return an
1590 -- apostrophe instead since this gives better error recovery
1592 elsif Source
(Scan_Ptr
+ 1) /= ''' then
1594 if Prev_Token
= Tok_Range
then
1595 Token
:= Tok_Apostrophe
;
1599 Scan_Ptr
:= Scan_Ptr
- 1;
1601 ("strings are delimited by double quote character");
1607 -- Otherwise we have a (non-wide) character literal
1610 Accumulate_Checksum
(Source
(Scan_Ptr
));
1612 if Source
(Scan_Ptr
) not in Graphic_Character
then
1613 if Source
(Scan_Ptr
) in Upper_Half_Character
then
1614 if Ada_Version
= Ada_83
then
1615 Error_Illegal_Character
;
1619 Error_Illegal_Character
;
1623 Code
:= Get_Char_Code
(Source
(Scan_Ptr
));
1624 Scan_Ptr
:= Scan_Ptr
+ 2;
1627 -- Fall through here with Scan_Ptr updated past the closing
1628 -- quote, and Code set to the Char_Code value for the literal
1630 Accumulate_Checksum
(''');
1631 Token
:= Tok_Char_Literal
;
1632 Set_Character_Literal_Name
(Code
);
1633 Token_Name
:= Name_Find
;
1634 Character_Code
:= Code
;
1638 end Char_Literal_Case
;
1640 -- Right parenthesis
1643 Accumulate_Checksum
(')');
1644 Scan_Ptr
:= Scan_Ptr
+ 1;
1645 Token
:= Tok_Right_Paren
;
1646 if Style_Check
then Style
.Check_Right_Paren
; end if;
1649 -- Right bracket or right brace, treated as right paren
1652 Error_Msg_S
("illegal character, replaced by "")""");
1653 Scan_Ptr
:= Scan_Ptr
+ 1;
1654 Token
:= Tok_Right_Paren
;
1657 -- Slash (can be division operator or first character of not equal)
1660 Accumulate_Checksum
('/');
1662 if Double_Char_Token
('=') then
1663 Token
:= Tok_Not_Equal
;
1666 Scan_Ptr
:= Scan_Ptr
+ 1;
1674 Accumulate_Checksum
(';');
1675 Scan_Ptr
:= Scan_Ptr
+ 1;
1676 Token
:= Tok_Semicolon
;
1677 if Style_Check
then Style
.Check_Semicolon
; end if;
1682 when '|' => Vertical_Bar_Case
: begin
1683 Accumulate_Checksum
('|');
1685 -- Special check for || to give nice message
1687 if Source
(Scan_Ptr
+ 1) = '|' then
1688 Error_Msg_S
("""'|'|"" should be `OR ELSE`");
1689 Scan_Ptr
:= Scan_Ptr
+ 2;
1694 Scan_Ptr
:= Scan_Ptr
+ 1;
1695 Token
:= Tok_Vertical_Bar
;
1696 if Style_Check
then Style
.Check_Vertical_Bar
; end if;
1699 end Vertical_Bar_Case
;
1701 -- Exclamation, replacement character for vertical bar
1703 when '!' => Exclamation_Case
: begin
1704 Accumulate_Checksum
('!');
1705 Obsolescent_Check
(Token_Ptr
);
1707 if Warn_On_Obsolescent_Feature
then
1709 ("use of ""'!"" is an obsolescent feature ('R'M 'J.2(2))?");
1711 ("\use ""'|"" instead?");
1714 if Source
(Scan_Ptr
+ 1) = '=' then
1715 Error_Msg_S
("'!= should be /=");
1716 Scan_Ptr
:= Scan_Ptr
+ 2;
1717 Token
:= Tok_Not_Equal
;
1721 Scan_Ptr
:= Scan_Ptr
+ 1;
1722 Token
:= Tok_Vertical_Bar
;
1726 end Exclamation_Case
;
1730 when '+' => Plus_Case
: begin
1731 Accumulate_Checksum
('+');
1732 Scan_Ptr
:= Scan_Ptr
+ 1;
1737 -- Digits starting a numeric literal
1742 if Identifier_Char
(Source
(Scan_Ptr
)) then
1744 ("delimiter required between literal and identifier");
1749 -- Lower case letters
1753 Name_Buffer
(1) := Source
(Scan_Ptr
);
1754 Accumulate_Checksum
(Name_Buffer
(1));
1755 Scan_Ptr
:= Scan_Ptr
+ 1;
1756 goto Scan_Identifier
;
1758 -- Upper case letters
1763 Character'Val (Character'Pos (Source
(Scan_Ptr
)) + 32);
1764 Accumulate_Checksum
(Name_Buffer
(1));
1765 Scan_Ptr
:= Scan_Ptr
+ 1;
1766 goto Scan_Identifier
;
1768 -- Underline character
1771 if Special_Characters
('_') then
1772 Token_Ptr
:= Scan_Ptr
;
1773 Scan_Ptr
:= Scan_Ptr
+ 1;
1774 Token
:= Tok_Special
;
1775 Special_Character
:= '_';
1779 Error_Msg_S
("identifier cannot start with underline");
1781 Name_Buffer
(1) := '_';
1782 Scan_Ptr
:= Scan_Ptr
+ 1;
1783 goto Scan_Identifier
;
1785 -- Space (not possible, because we scanned past blanks)
1788 raise Program_Error
;
1790 -- Characters in top half of ASCII 8-bit chart
1792 when Upper_Half_Character
=>
1794 -- Wide character case. Note that Scan_Identifier will issue
1795 -- an appropriate message if wide characters are not allowed
1798 if Upper_Half_Encoding
then
1800 goto Scan_Identifier
;
1802 -- Otherwise we have OK Latin-1 character
1805 -- Upper half characters may possibly be identifier letters
1806 -- but can never be digits, so Identifier_Char can be used
1807 -- to test for a valid start of identifier character.
1809 if Identifier_Char
(Source
(Scan_Ptr
)) then
1811 goto Scan_Identifier
;
1813 Error_Illegal_Character
;
1819 -- ESC character, possible start of identifier if wide characters
1820 -- using ESC encoding are allowed in identifiers, which we can
1821 -- tell by looking at the Identifier_Char flag for ESC, which is
1822 -- only true if these conditions are met.
1824 if Identifier_Char
(ESC
) then
1826 goto Scan_Identifier
;
1828 Error_Illegal_Wide_Character
;
1831 -- Invalid control characters
1833 when NUL | SOH | STX | ETX | EOT | ENQ | ACK | BEL | BS | SO |
1834 SI | DLE | DC1 | DC2 | DC3 | DC4 | NAK | SYN | ETB | CAN |
1835 EM | FS | GS | RS | US | DEL
1837 Error_Illegal_Character
;
1839 -- Invalid graphic characters
1841 when '#' |
'$' |
'?' |
'@' |
'`' |
'\' |
'^' |
'~' =>
1842 -- If Set_Special_Character has been called for this character,
1843 -- set Scans.Special_Character and return a Special token.
1845 if Special_Characters
(Source
(Scan_Ptr
)) then
1846 Token_Ptr
:= Scan_Ptr
;
1847 Token
:= Tok_Special
;
1848 Special_Character
:= Source
(Scan_Ptr
);
1849 Scan_Ptr
:= Scan_Ptr
+ 1;
1852 -- otherwise, this is an illegal character
1855 Error_Illegal_Character
;
1858 -- End switch on non-blank character
1862 -- End loop past format effectors. The exit from this loop is by
1863 -- executing a return statement following completion of token scan
1864 -- (control never falls out of this loop to the code which follows)
1868 -- Identifier scanning routine. On entry, some initial characters
1869 -- of the identifier may have already been stored in Name_Buffer.
1870 -- If so, Name_Len has the number of characters stored. otherwise
1871 -- Name_Len is set to zero on entry.
1875 -- This loop scans as fast as possible past lower half letters
1876 -- and digits, which we expect to be the most common characters.
1879 if Source
(Scan_Ptr
) in 'a' .. 'z'
1880 or else Source
(Scan_Ptr
) in '0' .. '9'
1882 Name_Buffer
(Name_Len
+ 1) := Source
(Scan_Ptr
);
1883 Accumulate_Checksum
(Source
(Scan_Ptr
));
1885 elsif Source
(Scan_Ptr
) in 'A' .. 'Z' then
1886 Name_Buffer
(Name_Len
+ 1) :=
1887 Character'Val (Character'Pos (Source
(Scan_Ptr
)) + 32);
1888 Accumulate_Checksum
(Name_Buffer
(Name_Len
+ 1));
1893 -- Open out the loop a couple of times for speed
1895 if Source
(Scan_Ptr
+ 1) in 'a' .. 'z'
1896 or else Source
(Scan_Ptr
+ 1) in '0' .. '9'
1898 Name_Buffer
(Name_Len
+ 2) := Source
(Scan_Ptr
+ 1);
1899 Accumulate_Checksum
(Source
(Scan_Ptr
+ 1));
1901 elsif Source
(Scan_Ptr
+ 1) in 'A' .. 'Z' then
1902 Name_Buffer
(Name_Len
+ 2) :=
1903 Character'Val (Character'Pos (Source
(Scan_Ptr
+ 1)) + 32);
1904 Accumulate_Checksum
(Name_Buffer
(Name_Len
+ 2));
1907 Scan_Ptr
:= Scan_Ptr
+ 1;
1908 Name_Len
:= Name_Len
+ 1;
1912 if Source
(Scan_Ptr
+ 2) in 'a' .. 'z'
1913 or else Source
(Scan_Ptr
+ 2) in '0' .. '9'
1915 Name_Buffer
(Name_Len
+ 3) := Source
(Scan_Ptr
+ 2);
1916 Accumulate_Checksum
(Source
(Scan_Ptr
+ 2));
1918 elsif Source
(Scan_Ptr
+ 2) in 'A' .. 'Z' then
1919 Name_Buffer
(Name_Len
+ 3) :=
1920 Character'Val (Character'Pos (Source
(Scan_Ptr
+ 2)) + 32);
1921 Accumulate_Checksum
(Name_Buffer
(Name_Len
+ 3));
1923 Scan_Ptr
:= Scan_Ptr
+ 2;
1924 Name_Len
:= Name_Len
+ 2;
1928 if Source
(Scan_Ptr
+ 3) in 'a' .. 'z'
1929 or else Source
(Scan_Ptr
+ 3) in '0' .. '9'
1931 Name_Buffer
(Name_Len
+ 4) := Source
(Scan_Ptr
+ 3);
1932 Accumulate_Checksum
(Source
(Scan_Ptr
+ 3));
1934 elsif Source
(Scan_Ptr
+ 3) in 'A' .. 'Z' then
1935 Name_Buffer
(Name_Len
+ 4) :=
1936 Character'Val (Character'Pos (Source
(Scan_Ptr
+ 3)) + 32);
1937 Accumulate_Checksum
(Name_Buffer
(Name_Len
+ 4));
1940 Scan_Ptr
:= Scan_Ptr
+ 3;
1941 Name_Len
:= Name_Len
+ 3;
1945 Scan_Ptr
:= Scan_Ptr
+ 4;
1946 Name_Len
:= Name_Len
+ 4;
1949 -- If we fall through, then we have encountered either an underline
1950 -- character, or an extended identifier character (i.e. one from the
1951 -- upper half), or a wide character, or an identifier terminator.
1952 -- The initial test speeds us up in the most common case where we
1953 -- have an identifier terminator. Note that ESC is an identifier
1954 -- character only if a wide character encoding method that uses
1955 -- ESC encoding is active, so if we find an ESC character we know
1956 -- that we have a wide character.
1958 if Identifier_Char
(Source
(Scan_Ptr
)) then
1960 -- Case of underline
1962 if Source
(Scan_Ptr
) = '_' then
1963 Accumulate_Checksum
('_');
1965 -- Check error case of identifier ending with underscore
1966 -- In this case we ignore the underscore and do not store it.
1968 if not Identifier_Char
(Source
(Scan_Ptr
+ 1)) then
1969 Error_Msg_S
("identifier cannot end with underline");
1970 Scan_Ptr
:= Scan_Ptr
+ 1;
1972 -- Check error case of two underscores. In this case we do
1973 -- not store the first underscore (we will store the second)
1975 elsif Source
(Scan_Ptr
+ 1) = '_' then
1976 Error_No_Double_Underline
;
1978 -- Normal case of legal underscore
1981 Name_Len
:= Name_Len
+ 1;
1982 Name_Buffer
(Name_Len
) := '_';
1985 Scan_Ptr
:= Scan_Ptr
+ 1;
1986 goto Scan_Identifier
;
1988 -- Upper half character
1990 elsif Source
(Scan_Ptr
) in Upper_Half_Character
1991 and then not Upper_Half_Encoding
1993 Accumulate_Checksum
(Source
(Scan_Ptr
));
1994 Store_Encoded_Character
1995 (Get_Char_Code
(Fold_Lower
(Source
(Scan_Ptr
))));
1996 Scan_Ptr
:= Scan_Ptr
+ 1;
1997 goto Scan_Identifier
;
1999 -- Left bracket not followed by a quote terminates an identifier.
2000 -- This is an error, but we don't want to give a junk error msg
2001 -- about wide characters in this case!
2003 elsif Source
(Scan_Ptr
) = '['
2004 and then Source
(Scan_Ptr
+ 1) /= '"'
2008 -- We know we have a wide character encoding here (the current
2009 -- character is either ESC, left bracket, or an upper half
2010 -- character depending on the encoding method).
2013 -- Scan out the wide character and insert the appropriate
2014 -- encoding into the name table entry for the identifier.
2017 Sptr
: constant Source_Ptr
:= Scan_Ptr
;
2023 Scan_Wide
(Source
, Scan_Ptr
, Code
, Err
);
2025 -- If error, signal error
2028 Error_Illegal_Wide_Character
;
2030 -- If the character scanned is a normal identifier
2031 -- character, then we treat it that way.
2033 elsif In_Character_Range
(Code
)
2034 and then Identifier_Char
(Get_Character
(Code
))
2036 Chr
:= Get_Character
(Code
);
2037 Accumulate_Checksum
(Chr
);
2038 Store_Encoded_Character
2039 (Get_Char_Code
(Fold_Lower
(Chr
)));
2041 -- Character is not normal identifier character, store
2042 -- it in encoded form.
2045 Accumulate_Checksum
(Code
);
2046 Store_Encoded_Character
(Code
);
2048 -- Make sure we are allowing wide characters in
2049 -- identifiers. Note that we allow wide character
2050 -- notation for an OK identifier character. This
2051 -- in particular allows bracket or other notation
2052 -- to be used for upper half letters.
2054 -- Wide characters are always allowed in Ada 2005
2056 if Identifier_Character_Set
/= 'w'
2057 and then Ada_Version
< Ada_05
2060 ("wide character not allowed in identifier", Sptr
);
2065 goto Scan_Identifier
;
2069 -- Scan of identifier is complete. The identifier is stored in
2070 -- Name_Buffer, and Scan_Ptr points past the last character.
2072 Token_Name
:= Name_Find
;
2074 -- Here is where we check if it was a keyword
2076 if Get_Name_Table_Byte
(Token_Name
) /= 0
2077 and then (Ada_Version
>= Ada_95
2078 or else Token_Name
not in Ada_95_Reserved_Words
)
2080 Token
:= Token_Type
'Val (Get_Name_Table_Byte
(Token_Name
));
2082 -- Deal with possible style check for non-lower case keyword,
2083 -- but we don't treat ACCESS, DELTA, DIGITS, RANGE as keywords
2084 -- for this purpose if they appear as attribute designators.
2085 -- Actually we only check the first character for speed.
2088 and then Source
(Token_Ptr
) <= 'Z'
2089 and then (Prev_Token
/= Tok_Apostrophe
2091 (Token
/= Tok_Access
2092 and then Token
/= Tok_Delta
2093 and then Token
/= Tok_Digits
2094 and then Token
/= Tok_Range
))
2096 Style
.Non_Lower_Case_Keyword
;
2099 -- We must reset Token_Name since this is not an identifier
2100 -- and if we leave Token_Name set, the parser gets confused
2101 -- because it thinks it is dealing with an identifier instead
2102 -- of the corresponding keyword.
2104 Token_Name
:= No_Name
;
2105 Accumulate_Token_Checksum
;
2108 -- It is an identifier after all
2111 Token
:= Tok_Identifier
;
2112 Accumulate_Token_Checksum
;
2118 --------------------------
2119 -- Set_Comment_As_Token --
2120 --------------------------
2122 procedure Set_Comment_As_Token
(Value
: Boolean) is
2124 Comment_Is_Token
:= Value
;
2125 end Set_Comment_As_Token
;
2127 ------------------------------
2128 -- Set_End_Of_Line_As_Token --
2129 ------------------------------
2131 procedure Set_End_Of_Line_As_Token
(Value
: Boolean) is
2133 End_Of_Line_Is_Token
:= Value
;
2134 end Set_End_Of_Line_As_Token
;
2136 ---------------------------
2137 -- Set_Special_Character --
2138 ---------------------------
2140 procedure Set_Special_Character
(C
: Character) is
2143 when '#' |
'$' |
'_' |
'?' |
'@' |
'`' |
'\' |
'^' |
'~' =>
2144 Special_Characters
(C
) := True;
2149 end Set_Special_Character
;
2151 ----------------------
2152 -- Set_Start_Column --
2153 ----------------------
2155 -- Note: it seems at first glance a little expensive to compute this value
2156 -- for every source line (since it is certainly not used for all source
2157 -- lines). On the other hand, it doesn't take much more work to skip past
2158 -- the initial white space on the line counting the columns than it would
2159 -- to scan past the white space using the standard scanning circuits.
2161 function Set_Start_Column
return Column_Number
is
2162 Start_Column
: Column_Number
:= 0;
2165 -- Outer loop scans past horizontal tab characters
2169 -- Inner loop scans past blanks as fast as possible, bumping Scan_Ptr
2170 -- past the blanks and adjusting Start_Column to account for them.
2173 if Source
(Scan_Ptr
) = ' ' then
2174 if Source
(Scan_Ptr
+ 1) = ' ' then
2175 if Source
(Scan_Ptr
+ 2) = ' ' then
2176 if Source
(Scan_Ptr
+ 3) = ' ' then
2177 if Source
(Scan_Ptr
+ 4) = ' ' then
2178 if Source
(Scan_Ptr
+ 5) = ' ' then
2179 if Source
(Scan_Ptr
+ 6) = ' ' then
2180 Scan_Ptr
:= Scan_Ptr
+ 7;
2181 Start_Column
:= Start_Column
+ 7;
2183 Scan_Ptr
:= Scan_Ptr
+ 6;
2184 Start_Column
:= Start_Column
+ 6;
2188 Scan_Ptr
:= Scan_Ptr
+ 5;
2189 Start_Column
:= Start_Column
+ 5;
2193 Scan_Ptr
:= Scan_Ptr
+ 4;
2194 Start_Column
:= Start_Column
+ 4;
2198 Scan_Ptr
:= Scan_Ptr
+ 3;
2199 Start_Column
:= Start_Column
+ 3;
2203 Scan_Ptr
:= Scan_Ptr
+ 2;
2204 Start_Column
:= Start_Column
+ 2;
2208 Scan_Ptr
:= Scan_Ptr
+ 1;
2209 Start_Column
:= Start_Column
+ 1;
2215 end loop Blanks_Loop
;
2217 -- Outer loop keeps going only if a horizontal tab follows
2219 if Source
(Scan_Ptr
) = HT
then
2220 if Style_Check
then Style
.Check_HT
; end if;
2221 Scan_Ptr
:= Scan_Ptr
+ 1;
2222 Start_Column
:= (Start_Column
/ 8) * 8 + 8;
2229 return Start_Column
;
2230 end Set_Start_Column
;