1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded characters
28 and returns a token for everything found but comments, white spaces
29 and line terminators. When necessary, it also fills the java_lval
30 (yylval) union. It's implemented to be called by a re-entrant parser
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
40 extern struct obstack
*expression_obstack
;
43 /* Function declaration */
44 static int java_lineterminator
PARAMS ((unicode_t
));
45 static char *java_sprint_unicode
PARAMS ((struct java_line
*, int));
46 static void java_unicode_2_utf8
PARAMS ((unicode_t
));
47 static void java_lex_error
PARAMS ((const char *, int));
49 static int java_is_eol
PARAMS ((FILE *, int));
50 static tree build_wfl_node
PARAMS ((tree
));
52 static void java_store_unicode
PARAMS ((struct java_line
*, unicode_t
, int));
53 static unicode_t java_parse_escape_sequence
PARAMS ((void));
54 static int java_letter_or_digit_p
PARAMS ((unicode_t
));
55 static int java_parse_doc_section
PARAMS ((unicode_t
));
56 static void java_parse_end_comment
PARAMS ((unicode_t
));
57 static unicode_t java_get_unicode
PARAMS ((void));
58 static unicode_t java_read_unicode
PARAMS ((int, int *));
59 static void java_store_unicode
PARAMS ((struct java_line
*, unicode_t
, int));
60 static unicode_t java_read_char
PARAMS ((void));
61 static void java_allocate_new_line
PARAMS ((void));
62 static void java_unget_unicode
PARAMS ((void));
63 static unicode_t java_sneak_unicode
PARAMS ((void));
69 int java_lang_imported
= 0;
72 java_lang_id
= get_identifier ("java.lang");
73 if (!java_lang_cloneable
)
74 java_lang_cloneable
= get_identifier ("java.lang.Cloneable");
76 if (!java_lang_imported
)
78 tree node
= build_tree_list
79 (build_expr_wfl (java_lang_id
, NULL
, 0, 0), NULL_TREE
);
80 read_import_dir (TREE_PURPOSE (node
));
81 TREE_CHAIN (node
) = ctxp
->import_demand_list
;
82 ctxp
->import_demand_list
= node
;
83 java_lang_imported
= 1;
87 wfl_operator
= build_expr_wfl (NULL_TREE
, ctxp
->filename
, 0, 0);
89 label_id
= get_identifier ("$L");
91 wfl_append
= build_expr_wfl (get_identifier ("append"), NULL
, 0, 0);
92 if (!wfl_string_buffer
)
94 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL
, 0, 0);
96 wfl_to_string
= build_expr_wfl (get_identifier ("toString"), NULL
, 0, 0);
98 ctxp
->static_initialized
= ctxp
->non_static_initialized
=
99 ctxp
->incomplete_class
= NULL_TREE
;
101 bzero ((PTR
) ctxp
->modifier_ctx
, 11*sizeof (ctxp
->modifier_ctx
[0]));
102 bzero ((PTR
) current_jcf
, sizeof (JCF
));
103 ctxp
->current_parsed_class
= NULL
;
104 ctxp
->package
= NULL_TREE
;
107 ctxp
->filename
= input_filename
;
108 ctxp
->lineno
= lineno
= 0;
111 ctxp
->unget_utf8_value
= 0;
112 ctxp
->minus_seen
= 0;
113 ctxp
->java_error_flag
= 0;
117 java_sprint_unicode (line
, i
)
118 struct java_line
*line
;
121 static char buffer
[10];
122 if (line
->unicode_escape_p
[i
] || line
->line
[i
] > 128)
123 sprintf (buffer
, "\\u%04x", line
->line
[i
]);
126 buffer
[0] = line
->line
[i
];
133 java_sneak_unicode ()
135 return (ctxp
->c_line
->line
[ctxp
->c_line
->current
]);
139 java_unget_unicode ()
141 if (!ctxp
->c_line
->current
)
142 fatal ("can't unget unicode - java_unget_unicode");
143 ctxp
->c_line
->current
--;
144 ctxp
->c_line
->char_col
-= JAVA_COLUMN_DELTA (0);
148 java_allocate_new_line ()
150 unicode_t ahead
= (ctxp
->c_line
? ctxp
->c_line
->ahead
[0] : '\0');
151 char ahead_escape_p
= (ctxp
->c_line
?
152 ctxp
->c_line
->unicode_escape_ahead_p
: 0);
154 if (ctxp
->c_line
&& !ctxp
->c_line
->white_space_only
)
158 free (ctxp
->p_line
->unicode_escape_p
);
159 free (ctxp
->p_line
->line
);
162 ctxp
->p_line
= ctxp
->c_line
;
163 ctxp
->c_line
= NULL
; /* Reallocated */
168 ctxp
->c_line
= (struct java_line
*)xmalloc (sizeof (struct java_line
));
169 ctxp
->c_line
->max
= JAVA_LINE_MAX
;
170 ctxp
->c_line
->line
= (unicode_t
*)xmalloc
171 (sizeof (unicode_t
)*ctxp
->c_line
->max
);
172 ctxp
->c_line
->unicode_escape_p
=
173 (char *)xmalloc (sizeof (char)*ctxp
->c_line
->max
);
174 ctxp
->c_line
->white_space_only
= 0;
177 ctxp
->c_line
->line
[0] = ctxp
->c_line
->size
= 0;
178 ctxp
->c_line
->char_col
= ctxp
->c_line
->current
= 0;
181 ctxp
->c_line
->line
[ctxp
->c_line
->size
] = ahead
;
182 ctxp
->c_line
->unicode_escape_p
[ctxp
->c_line
->size
] = ahead_escape_p
;
183 ctxp
->c_line
->size
++;
185 ctxp
->c_line
->ahead
[0] = 0;
186 ctxp
->c_line
->unicode_escape_ahead_p
= 0;
187 ctxp
->c_line
->lineno
= ++lineno
;
188 ctxp
->c_line
->white_space_only
= 1;
191 #define BAD_UTF8_VALUE 0xFFFE
199 if (ctxp
->unget_utf8_value
)
201 int to_return
= ctxp
->unget_utf8_value
;
202 ctxp
->unget_utf8_value
= 0;
214 if ((c
& 0xe0) == 0xc0)
217 if ((c1
& 0xc0) == 0x80)
218 return (unicode_t
)(((c
&0x1f) << 6) + (c1
& 0x3f));
221 else if ((c
& 0xf0) == 0xe0)
224 if ((c1
& 0xc0) == 0x80)
227 if ((c2
& 0xc0) == 0x80)
228 return (unicode_t
)(((c
& 0xf) << 12) +
229 (( c1
& 0x3f) << 6) + (c2
& 0x3f));
236 /* We looked for a UTF8 multi-byte sequence (since we saw an initial
237 byte with the high bit set), but found invalid bytes instead.
238 If the most recent byte was Ascii (and not EOF), we should
239 unget it, in case it was a comment terminator or other delimitor. */
242 return BAD_UTF8_VALUE
;
247 java_store_unicode (l
, c
, unicode_escape_p
)
250 int unicode_escape_p
;
252 if (l
->size
== l
->max
)
254 l
->max
+= JAVA_LINE_MAX
;
255 l
->line
= (unicode_t
*) xrealloc (l
->line
, sizeof (unicode_t
)*l
->max
);
256 l
->unicode_escape_p
= (char *) xrealloc (l
->unicode_escape_p
,
257 sizeof (char)*l
->max
);
259 l
->line
[l
->size
] = c
;
260 l
->unicode_escape_p
[l
->size
++] = unicode_escape_p
;
264 java_read_unicode (term_context
, unicode_escape_p
)
266 int *unicode_escape_p
;
271 c
= java_read_char ();
272 *unicode_escape_p
= 0;
275 return ((term_context
? c
:
276 java_lineterminator (c
) ? '\n' : (unicode_t
)c
));
278 /* Count the number of preceeding '\' */
279 for (base
= ftell (finput
), i
= base
-2; c
== '\\';)
281 fseek (finput
, i
--, SEEK_SET
);
282 c
= java_read_char (); /* Will fail if reading utf8 stream. FIXME */
284 fseek (finput
, base
, SEEK_SET
);
285 if ((base
-i
-3)%2 == 0) /* If odd number of \ seen */
287 c
= java_read_char ();
290 unsigned short unicode
= 0;
292 /* Next should be 4 hex digits, otherwise it's an error.
293 The hex value is converted into the unicode, pushed into
294 the Unicode stream. */
295 for (shift
= 12; shift
>= 0; shift
-= 4)
297 if ((c
= java_read_char ()) == UEOF
)
299 if (c
>= '0' && c
<= '9')
300 unicode
|= (unicode_t
)((c
-'0') << shift
);
301 else if ((c
>= 'a' && c
<= 'f') || (c
>= 'A' && c
<= 'F'))
302 unicode
|= (unicode_t
)((10+(c
| 0x20)-'a') << shift
);
305 ("Non hex digit in Unicode escape sequence", 0);
307 *unicode_escape_p
= 1;
308 return (term_context
? unicode
:
309 (java_lineterminator (c
) ? '\n' : unicode
));
311 ctxp
->unget_utf8_value
= c
;
313 return (unicode_t
)'\\';
319 /* It's time to read a line when... */
320 if (!ctxp
->c_line
|| ctxp
->c_line
->current
== ctxp
->c_line
->size
)
323 java_allocate_new_line ();
324 if (ctxp
->c_line
->line
[0] != '\n')
327 int unicode_escape_p
;
328 c
= java_read_unicode (0, &unicode_escape_p
);
329 java_store_unicode (ctxp
->c_line
, c
, unicode_escape_p
);
330 if (ctxp
->c_line
->white_space_only
331 && !JAVA_WHITE_SPACE_P (c
) && c
!='\n')
332 ctxp
->c_line
->white_space_only
= 0;
333 if ((c
== '\n') || (c
== UEOF
))
337 ctxp
->c_line
->char_col
+= JAVA_COLUMN_DELTA (0);
338 JAVA_LEX_CHAR (ctxp
->c_line
->line
[ctxp
->c_line
->current
]);
339 return ctxp
->c_line
->line
[ctxp
->c_line
->current
++];
343 java_lineterminator (c
)
346 int unicode_escape_p
;
347 if (c
== '\n') /* CR */
349 if ((c
= java_read_unicode (1, &unicode_escape_p
)) != '\r')
351 ctxp
->c_line
->ahead
[0] = c
;
352 ctxp
->c_line
->unicode_escape_ahead_p
= unicode_escape_p
;
356 else if (c
== '\r') /* LF */
358 if ((c
= java_read_unicode (1, &unicode_escape_p
)) != '\n')
360 ctxp
->c_line
->ahead
[0] = c
;
361 ctxp
->c_line
->unicode_escape_ahead_p
= unicode_escape_p
;
369 /* Parse the end of a C style comment.
370 * C is the first character following the '/' and '*'. */
372 java_parse_end_comment (c
)
376 for ( ;; c
= java_get_unicode ())
381 java_lex_error ("Comment not terminated at end of input", 0);
383 switch (c
= java_get_unicode ())
386 java_lex_error ("Comment not terminated at end of input", 0);
389 case '*': /* reparse only '*' */
390 java_unget_unicode ();
396 /* Parse the documentation section. Keywords must be at the beginning
397 of a documentation comment line (ignoring white space and any `*'
398 character). Parsed keyword(s): @DEPRECATED. */
401 java_parse_doc_section (c
)
404 int valid_tag
= 0, seen_star
= 0;
406 while (JAVA_WHITE_SPACE_P (c
) || (c
== '*') || c
== '\n')
418 c
= java_get_unicode();
422 java_lex_error ("Comment not terminated at end of input", 0);
424 if (seen_star
&& (c
== '/'))
425 return 1; /* Goto step1 in caller */
427 /* We're parsing @deprecated */
428 if (valid_tag
&& (c
== '@'))
433 while (tag_index
< 10 && c
!= UEOF
&& c
!= ' ' && c
!= '\n')
435 c
= java_get_unicode ();
436 tag
[tag_index
++] = c
;
440 java_lex_error ("Comment not terminated at end of input", 0);
441 tag
[tag_index
] = '\0';
443 if (!strcmp (tag
, "deprecated"))
444 ctxp
->deprecated
= 1;
446 java_unget_unicode ();
450 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
451 will return a wrong result. */
453 java_letter_or_digit_p (c
)
456 return _JAVA_LETTER_OR_DIGIT_P (c
);
460 java_parse_escape_sequence ()
465 switch (c
= java_get_unicode ())
468 return (unicode_t
)0x8;
470 return (unicode_t
)0x9;
472 return (unicode_t
)0xa;
474 return (unicode_t
)0xc;
476 return (unicode_t
)0xd;
478 return (unicode_t
)0x22;
480 return (unicode_t
)0x27;
482 return (unicode_t
)0x5c;
483 case '0': case '1': case '2': case '3': case '4':
484 case '5': case '6': case '7': case '8': case '9':
487 int octal_escape_index
= 0;
489 for (; octal_escape_index
< 3 && RANGE (c
, '0', '9');
490 c
= java_get_unicode ())
491 octal_escape
[octal_escape_index
++] = c
;
493 java_unget_unicode ();
495 if ((octal_escape_index
== 3) && (octal_escape
[0] > '3'))
497 java_lex_error ("Literal octal escape out of range", 0);
498 return JAVA_CHAR_ERROR
;
503 for (char_lit
=0, i
= 0, shift
= 3*(octal_escape_index
-1);
504 i
< octal_escape_index
; i
++, shift
-= 3)
505 char_lit
|= (octal_escape
[i
] - '0') << shift
;
512 return '\n'; /* ULT, caught latter as a specific error */
514 java_lex_error ("Illegal character in escape sequence", 0);
515 return JAVA_CHAR_ERROR
;
519 /* Isolate the code which may raise an arithmetic exception in its
528 int number_beginning
;
531 static void java_perform_atof
PARAMS ((PTR
));
534 java_perform_atof (av
)
537 struct jpa_args
*a
= (struct jpa_args
*)av
;
538 YYSTYPE
*java_lval
= a
->java_lval
;
539 int number_beginning
= a
->number_beginning
;
540 REAL_VALUE_TYPE value
;
541 tree type
= (a
->fflag
? FLOAT_TYPE_NODE
: DOUBLE_TYPE_NODE
);
543 SET_REAL_VALUE_ATOF (value
,
544 REAL_VALUE_ATOF (a
->literal_token
, TYPE_MODE (type
)));
546 if (REAL_VALUE_ISINF (value
)
547 || REAL_VALUE_ISNAN (value
))
549 JAVA_FLOAT_RANGE_ERROR ((a
->fflag
? "float" : "double"));
553 SET_LVAL_NODE_TYPE (build_real (type
, value
), type
);
557 static int yylex
PARAMS ((YYSTYPE
*));
567 unicode_t c
, first_unicode
;
568 int ascii_index
, all_ascii
;
571 /* Translation of the Unicode escape in the raw stream of Unicode
572 characters. Takes care of line terminator. */
574 /* Skip white spaces: SP, TAB and FF or ULT */
575 for (c
= java_get_unicode ();
576 c
== '\n' || JAVA_WHITE_SPACE_P (c
); c
= java_get_unicode ())
579 ctxp
->elc
.line
= ctxp
->c_line
->lineno
;
580 ctxp
->elc
.col
= ctxp
->c_line
->char_col
-2;
583 ctxp
->elc
.col
= (ctxp
->elc
.col
< 0 ? 0 : ctxp
->elc
.col
);
585 if (c
== 0x1a) /* CTRL-Z */
587 if ((c
= java_get_unicode ()) == UEOF
)
588 return 0; /* Ok here */
590 java_unget_unicode (); /* Caught latter at the end the function */
592 /* Handle EOF here */
593 if (c
== UEOF
) /* Should probably do something here... */
596 /* Take care of eventual comments. */
599 switch (c
= java_get_unicode ())
604 c
= java_get_unicode ();
606 java_lex_error ("Comment not terminated at end of input", 0);
607 if (c
== '\n') /* ULT */
613 if ((c
= java_get_unicode ()) == '*')
615 if ((c
= java_get_unicode ()) == '/')
616 goto step1
; /* Empy documentation comment */
617 else if (java_parse_doc_section (c
))
621 java_parse_end_comment ((c
= java_get_unicode ()));
625 java_unget_unicode ();
631 ctxp
->elc
.line
= ctxp
->c_line
->lineno
;
632 ctxp
->elc
.prev_col
= ctxp
->elc
.col
;
633 ctxp
->elc
.col
= ctxp
->c_line
->char_col
- JAVA_COLUMN_DELTA (-1);
634 if (ctxp
->elc
.col
< 0)
635 fatal ("ctxp->elc.col < 0 - java_lex");
637 /* Numeric literals */
638 if (JAVA_ASCII_DIGIT (c
) || (c
== '.'))
640 /* This section of code is borrowed from gcc/c-lex.c */
641 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
642 int parts
[TOTAL_PARTS
];
643 HOST_WIDE_INT high
, low
;
644 /* End borrowed section */
645 char literal_token
[256];
646 int literal_index
= 0, radix
= 10, long_suffix
= 0, overflow
= 0, bytes
;
649 int number_beginning
= ctxp
->c_line
->current
;
652 /* We might have a . separator instead of a FP like .[0-9]* */
655 unicode_t peep
= java_sneak_unicode ();
657 if (!JAVA_ASCII_DIGIT (peep
))
660 BUILD_OPERATOR (DOT_TK
);
664 for (i
= 0; i
< TOTAL_PARTS
; i
++)
669 c
= java_get_unicode ();
670 if (c
== 'x' || c
== 'X')
673 c
= java_get_unicode ();
675 else if (JAVA_ASCII_DIGIT (c
))
679 /* Push the '.' back and prepare for a FP parsing... */
680 java_unget_unicode ();
685 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
686 JAVA_LEX_LIT ("0", 10);
690 SET_LVAL_NODE (long_zero_node
);
693 SET_LVAL_NODE (float_zero_node
);
696 SET_LVAL_NODE (double_zero_node
);
699 java_unget_unicode ();
700 SET_LVAL_NODE (integer_zero_node
);
705 /* Parse the first part of the literal, until we find something
706 which is not a number. */
707 while ((radix
== 10 && JAVA_ASCII_DIGIT (c
)) ||
708 (radix
== 16 && JAVA_ASCII_HEXDIGIT (c
)) ||
709 (radix
== 8 && JAVA_ASCII_OCTDIGIT (c
)))
711 /* We store in a string (in case it turns out to be a FP) and in
712 PARTS if we have to process a integer literal. */
713 int numeric
= (RANGE (c
, '0', '9') ? c
-'0' : 10 +(c
|0x20)-'a');
716 literal_token
[literal_index
++] = c
;
717 /* This section of code if borrowed from gcc/c-lex.c */
718 for (count
= 0; count
< TOTAL_PARTS
; count
++)
720 parts
[count
] *= radix
;
723 parts
[count
] += (parts
[count
-1] >> HOST_BITS_PER_CHAR
);
724 parts
[count
-1] &= (1 << HOST_BITS_PER_CHAR
) - 1;
729 if (parts
[TOTAL_PARTS
-1] != 0)
731 /* End borrowed section. */
732 c
= java_get_unicode ();
735 /* If we have something from the FP char set but not a digit, parse
737 if (JAVA_ASCII_FPCHAR (c
) && !JAVA_ASCII_DIGIT (c
))
740 int seen_digit
= (literal_index
? 1 : 0);
741 int seen_exponent
= 0;
742 int fflag
= 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
743 double unless specified. */
745 java_lex_error ("Can't express non-decimal FP literal", 0);
754 literal_token
[literal_index
++ ] = c
;
755 c
= java_get_unicode ();
758 java_lex_error ("Invalid character in FP literal", 0);
761 if (c
== 'e' || c
== 'E')
765 /* {E,e} must have seen at list a digit */
767 java_lex_error ("Invalid FP literal", 0);
771 literal_token
[literal_index
++] = c
;
772 c
= java_get_unicode ();
775 java_lex_error ("Invalid character in FP literal", 0);
777 if ( c
== 'f' || c
== 'F' || c
== 'd' || c
== 'D')
779 fflag
= ((c
== 'd') || (c
== 'D')) ? 0 : 1;
780 stage
= 4; /* So we fall through */
783 if ((c
=='-' || c
=='+') && stage
== 2)
786 literal_token
[literal_index
++] = c
;
787 c
= java_get_unicode ();
790 if ((stage
== 0 && JAVA_ASCII_FPCHAR (c
)) ||
791 (stage
== 1 && JAVA_ASCII_FPCHAR (c
) && !(c
== '.')) ||
792 (stage
== 2 && (JAVA_ASCII_DIGIT (c
) || JAVA_FP_PM (c
))) ||
793 (stage
== 3 && JAVA_ASCII_DIGIT (c
)))
795 if (JAVA_ASCII_DIGIT (c
))
797 literal_token
[literal_index
++ ] = c
;
798 c
= java_get_unicode ();
805 if (stage
!= 4) /* Don't push back fF/dD */
806 java_unget_unicode ();
808 /* An exponent (if any) must have seen a digit. */
809 if (seen_exponent
&& !seen_digit
)
810 java_lex_error ("Invalid FP literal", 0);
812 literal_token
[literal_index
] = '\0';
813 JAVA_LEX_LIT (literal_token
, radix
);
816 a
.literal_token
= literal_token
;
818 a
.java_lval
= java_lval
;
819 a
.number_beginning
= number_beginning
;
820 if (do_float_handler (java_perform_atof
, (PTR
) &a
))
823 JAVA_FLOAT_RANGE_ERROR ((fflag
? "float" : "double"));
829 } /* JAVA_ASCCI_FPCHAR (c) */
831 /* Here we get back to converting the integral literal. */
832 if (c
== 'L' || c
== 'l')
834 else if (radix
== 16 && JAVA_ASCII_LETTER (c
))
835 java_lex_error ("Digit out of range in hexadecimal literal", 0);
836 else if (radix
== 8 && JAVA_ASCII_DIGIT (c
))
837 java_lex_error ("Digit out of range in octal literal", 0);
838 else if (radix
== 16 && !literal_index
)
839 java_lex_error ("No digit specified for hexadecimal literal", 0);
841 java_unget_unicode ();
843 #ifdef JAVA_LEX_DEBUG
844 literal_token
[literal_index
] = '\0'; /* So JAVA_LEX_LIT is safe. */
845 JAVA_LEX_LIT (literal_token
, radix
);
847 /* This section of code is borrowed from gcc/c-lex.c */
850 bytes
= GET_TYPE_PRECISION (long_type_node
);
851 for (i
= bytes
; i
< TOTAL_PARTS
; i
++)
859 for (i
= 0; i
< HOST_BITS_PER_WIDE_INT
/ HOST_BITS_PER_CHAR
; i
++)
861 high
|= ((HOST_WIDE_INT
) parts
[i
+ (HOST_BITS_PER_WIDE_INT
862 / HOST_BITS_PER_CHAR
)]
863 << (i
* HOST_BITS_PER_CHAR
));
864 low
|= (HOST_WIDE_INT
) parts
[i
] << (i
* HOST_BITS_PER_CHAR
);
866 /* End borrowed section. */
871 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
872 9223372036854775807L is the biggest `long' literal that can be
873 expressed using a 10 radix. For other radixes, everything that
874 fits withing 64 bits is OK. */
875 int hb
= (high
>> 31);
876 if (overflow
|| (hb
&& low
&& radix
== 10) ||
877 (hb
&& high
& 0x7fffffff && radix
== 10) ||
878 (hb
&& !(high
& 0x7fffffff) && !ctxp
->minus_seen
&& radix
== 10))
879 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
883 /* 2147483648 is valid if operand of a '-'. Otherwise,
884 2147483647 is the biggest `int' literal that can be
885 expressed using a 10 radix. For other radixes, everything
886 that fits within 32 bits is OK. As all literals are
887 signed, we sign extend here. */
888 int hb
= (low
>> 31) & 0x1;
889 if (overflow
|| high
|| (hb
&& low
& 0x7fffffff && radix
== 10) ||
890 (hb
&& !(low
& 0x7fffffff) && !ctxp
->minus_seen
&& radix
== 10))
891 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
894 ctxp
->minus_seen
= 0;
895 SET_LVAL_NODE_TYPE (build_int_2 (low
, high
),
896 (long_suffix
? long_type_node
: int_type_node
));
900 ctxp
->minus_seen
= 0;
901 /* Character literals */
905 if ((c
= java_get_unicode ()) == '\\')
906 char_lit
= java_parse_escape_sequence ();
910 c
= java_get_unicode ();
912 if ((c
== '\n') || (c
== UEOF
))
913 java_lex_error ("Character literal not terminated at end of line", 0);
915 java_lex_error ("Syntax error in character literal", 0);
917 if (c
== JAVA_CHAR_ERROR
)
918 char_lit
= 0; /* We silently convert it to zero */
920 JAVA_LEX_CHAR_LIT (char_lit
);
921 SET_LVAL_NODE_TYPE (build_int_2 (char_lit
, 0), char_type_node
);
925 /* String literals */
931 for (no_error
= 1, c
= java_get_unicode ();
932 c
!= '"' && c
!= '\n'; c
= java_get_unicode ())
935 c
= java_parse_escape_sequence ();
936 no_error
&= (c
!= JAVA_CHAR_ERROR
? 1 : 0);
937 java_unicode_2_utf8 (c
);
939 if (c
== '\n' || c
== UEOF
) /* ULT */
941 lineno
--; /* Refer to the line the terminator was seen */
942 java_lex_error ("String not terminated at end of line.", 0);
946 obstack_1grow (&temporary_obstack
, '\0');
947 string
= obstack_finish (&temporary_obstack
);
949 if (!no_error
|| (c
!= '"'))
950 java_lval
->node
= error_mark_node
; /* Requires futher testing FIXME */
953 tree s
= make_node (STRING_CST
);
954 TREE_STRING_LENGTH (s
) = strlen (string
);
955 TREE_STRING_POINTER (s
) =
956 obstack_alloc (expression_obstack
, TREE_STRING_LENGTH (s
)+1);
957 strcpy (TREE_STRING_POINTER (s
), string
);
961 return STRING_LIT_TK
;
969 BUILD_OPERATOR (OP_TK
);
975 if (ctxp
->ccb_indent
== 1)
976 ctxp
->first_ccb_indent1
= lineno
;
978 BUILD_OPERATOR (OCB_TK
);
982 if (ctxp
->ccb_indent
== 1)
983 ctxp
->last_ccb_indent1
= lineno
;
984 BUILD_OPERATOR (CCB_TK
);
987 BUILD_OPERATOR (OSB_TK
);
999 BUILD_OPERATOR (DOT_TK
);
1000 /* return DOT_TK; */
1007 if ((c
= java_get_unicode ()) == '=')
1009 BUILD_OPERATOR (EQ_TK
);
1013 /* Equals is used in two different locations. In the
1014 variable_declarator: rule, it has to be seen as '=' as opposed
1015 to being seen as an ordinary assignment operator in
1016 assignment_operators: rule. */
1017 java_unget_unicode ();
1018 BUILD_OPERATOR (ASSIGN_TK
);
1022 switch ((c
= java_get_unicode ()))
1025 BUILD_OPERATOR (GTE_TK
);
1027 switch ((c
= java_get_unicode ()))
1030 if ((c
= java_get_unicode ()) == '=')
1032 BUILD_OPERATOR2 (ZRS_ASSIGN_TK
);
1036 java_unget_unicode ();
1037 BUILD_OPERATOR (ZRS_TK
);
1040 BUILD_OPERATOR2 (SRS_ASSIGN_TK
);
1042 java_unget_unicode ();
1043 BUILD_OPERATOR (SRS_TK
);
1046 java_unget_unicode ();
1047 BUILD_OPERATOR (GT_TK
);
1051 switch ((c
= java_get_unicode ()))
1054 BUILD_OPERATOR (LTE_TK
);
1056 if ((c
= java_get_unicode ()) == '=')
1058 BUILD_OPERATOR2 (LS_ASSIGN_TK
);
1062 java_unget_unicode ();
1063 BUILD_OPERATOR (LS_TK
);
1066 java_unget_unicode ();
1067 BUILD_OPERATOR (LT_TK
);
1071 switch ((c
= java_get_unicode ()))
1074 BUILD_OPERATOR (BOOL_AND_TK
);
1076 BUILD_OPERATOR2 (AND_ASSIGN_TK
);
1078 java_unget_unicode ();
1079 BUILD_OPERATOR (AND_TK
);
1083 switch ((c
= java_get_unicode ()))
1086 BUILD_OPERATOR (BOOL_OR_TK
);
1088 BUILD_OPERATOR2 (OR_ASSIGN_TK
);
1090 java_unget_unicode ();
1091 BUILD_OPERATOR (OR_TK
);
1095 switch ((c
= java_get_unicode ()))
1098 BUILD_OPERATOR (INCR_TK
);
1100 BUILD_OPERATOR2 (PLUS_ASSIGN_TK
);
1102 java_unget_unicode ();
1103 BUILD_OPERATOR (PLUS_TK
);
1107 switch ((c
= java_get_unicode ()))
1110 BUILD_OPERATOR (DECR_TK
);
1112 BUILD_OPERATOR2 (MINUS_ASSIGN_TK
);
1114 java_unget_unicode ();
1115 ctxp
->minus_seen
= 1;
1116 BUILD_OPERATOR (MINUS_TK
);
1120 if ((c
= java_get_unicode ()) == '=')
1122 BUILD_OPERATOR2 (MULT_ASSIGN_TK
);
1126 java_unget_unicode ();
1127 BUILD_OPERATOR (MULT_TK
);
1131 if ((c
= java_get_unicode ()) == '=')
1133 BUILD_OPERATOR2 (DIV_ASSIGN_TK
);
1137 java_unget_unicode ();
1138 BUILD_OPERATOR (DIV_TK
);
1142 if ((c
= java_get_unicode ()) == '=')
1144 BUILD_OPERATOR2 (XOR_ASSIGN_TK
);
1148 java_unget_unicode ();
1149 BUILD_OPERATOR (XOR_TK
);
1153 if ((c
= java_get_unicode ()) == '=')
1155 BUILD_OPERATOR2 (REM_ASSIGN_TK
);
1159 java_unget_unicode ();
1160 BUILD_OPERATOR (REM_TK
);
1164 if ((c
= java_get_unicode()) == '=')
1166 BUILD_OPERATOR (NEQ_TK
);
1170 java_unget_unicode ();
1171 BUILD_OPERATOR (NEG_TK
);
1176 BUILD_OPERATOR (REL_QM_TK
);
1179 BUILD_OPERATOR (REL_CL_TK
);
1181 BUILD_OPERATOR (NOT_TK
);
1184 /* Keyword, boolean literal or null literal */
1185 for (first_unicode
= c
, all_ascii
= 1, ascii_index
= 0;
1186 JAVA_ID_CHAR_P (c
); c
= java_get_unicode ())
1188 java_unicode_2_utf8 (c
);
1189 if (all_ascii
&& c
>= 128)
1194 obstack_1grow (&temporary_obstack
, '\0');
1195 string
= obstack_finish (&temporary_obstack
);
1196 java_unget_unicode ();
1198 /* If we have something all ascii, we consider a keyword, a boolean
1199 literal, a null literal or an all ASCII identifier. Otherwise,
1200 this is an identifier (possibly not respecting formation rule). */
1203 struct java_keyword
*kw
;
1204 if ((kw
=java_keyword (string
, ascii_index
)))
1206 JAVA_LEX_KW (string
);
1209 case PUBLIC_TK
: case PROTECTED_TK
: case STATIC_TK
:
1210 case ABSTRACT_TK
: case FINAL_TK
: case NATIVE_TK
:
1211 case SYNCHRONIZED_TK
: case TRANSIENT_TK
: case VOLATILE_TK
:
1213 SET_MODIFIER_CTX (kw
->token
);
1216 SET_LVAL_NODE (float_type_node
);
1219 SET_LVAL_NODE (double_type_node
);
1222 SET_LVAL_NODE (boolean_type_node
);
1225 SET_LVAL_NODE (byte_type_node
);
1228 SET_LVAL_NODE (short_type_node
);
1231 SET_LVAL_NODE (int_type_node
);
1234 SET_LVAL_NODE (long_type_node
);
1237 SET_LVAL_NODE (char_type_node
);
1240 /* Keyword based literals */
1243 SET_LVAL_NODE ((kw
->token
== TRUE_TK
?
1244 boolean_true_node
: boolean_false_node
));
1247 SET_LVAL_NODE (null_pointer_node
);
1250 /* Some keyword we want to retain information on the location
1263 BUILD_OPERATOR (kw
->token
);
1271 /* We may have and ID here */
1272 if (JAVA_ID_CHAR_P(first_unicode
) && !JAVA_DIGIT_P (first_unicode
))
1274 JAVA_LEX_ID (string
);
1275 java_lval
->node
= BUILD_ID_WFL (GET_IDENTIFIER (string
));
1279 /* Everything else is an invalid character in the input */
1281 char lex_error_buffer
[128];
1282 sprintf (lex_error_buffer
, "Invalid character '%s' in input",
1283 java_sprint_unicode (ctxp
->c_line
, ctxp
->c_line
->current
));
1284 java_lex_error (lex_error_buffer
, 1);
1290 java_unicode_2_utf8 (unicode
)
1293 if (RANGE (unicode
, 0x01, 0x7f))
1294 obstack_1grow (&temporary_obstack
, (char)unicode
);
1295 else if (RANGE (unicode
, 0x80, 0x7ff) || unicode
== 0)
1297 obstack_1grow (&temporary_obstack
,
1298 (unsigned char)(0xc0 | ((0x7c0 & unicode
) >> 6)));
1299 obstack_1grow (&temporary_obstack
,
1300 (unsigned char)(0x80 | (unicode
& 0x3f)));
1302 else /* Range 0x800-0xffff */
1304 obstack_1grow (&temporary_obstack
,
1305 (unsigned char)(0xe0 | (unicode
& 0xf000) >> 12));
1306 obstack_1grow (&temporary_obstack
,
1307 (unsigned char)(0x80 | (unicode
& 0x0fc0) >> 6));
1308 obstack_1grow (&temporary_obstack
,
1309 (unsigned char)(0x80 | (unicode
& 0x003f)));
1315 build_wfl_node (node
)
1318 return build_expr_wfl (node
, ctxp
->filename
, ctxp
->elc
.line
, ctxp
->elc
.col
);
1323 java_lex_error (msg
, forward
)
1324 const char *msg ATTRIBUTE_UNUSED
;
1325 int forward ATTRIBUTE_UNUSED
;
1328 ctxp
->elc
.line
= ctxp
->c_line
->lineno
;
1329 ctxp
->elc
.col
= ctxp
->c_line
->char_col
-1+forward
;
1331 /* Might be caught in the middle of some error report */
1332 ctxp
->java_error_flag
= 0;
1349 if (next
!= '\n' && next
!= EOF
)
1361 java_get_line_col (filename
, line
, col
)
1362 char *filename ATTRIBUTE_UNUSED
;
1363 int line ATTRIBUTE_UNUSED
, col ATTRIBUTE_UNUSED
;
1368 /* Dumb implementation. Doesn't try to cache or optimize things. */
1369 /* First line of the file is line 1, first column is 1 */
1371 /* COL == -1 means, at the CR/LF in LINE */
1372 /* COL == -2 means, at the first non space char in LINE */
1375 int c
, ccol
, cline
= 1;
1376 int current_line_col
= 0;
1377 int first_non_space
= 0;
1380 if (!(fp
= fopen (filename
, "r")))
1381 fatal ("Can't open file - java_display_line_col");
1383 while (cline
!= line
)
1388 static char msg
[] = "<<file too short - unexpected EOF>>";
1389 obstack_grow (&temporary_obstack
, msg
, sizeof(msg
)-1);
1392 if (java_is_eol (fp
, c
))
1396 /* Gather the chars of the current line in a buffer */
1400 if (c
< 0 || java_is_eol (fp
, c
))
1402 if (!first_non_space
&& !JAVA_WHITE_SPACE_P (c
))
1403 first_non_space
= current_line_col
;
1404 obstack_1grow (&temporary_obstack
, c
);
1409 obstack_1grow (&temporary_obstack
, '\n');
1413 col
= current_line_col
;
1414 first_non_space
= 0;
1417 col
= first_non_space
;
1419 first_non_space
= 0;
1421 /* Place the '^' a the right position */
1422 base
= obstack_base (&temporary_obstack
);
1423 for (ccol
= 1; ccol
<= col
; ccol
++)
1425 /* Compute \t when reaching first_non_space */
1426 char c
= (first_non_space
?
1427 (base
[ccol
-1] == '\t' ? '\t' : ' ') : ' ');
1428 obstack_1grow (&temporary_obstack
, c
);
1430 obstack_grow0 (&temporary_obstack
, "^", 1);
1433 return obstack_finish (&temporary_obstack
);