* config/i386/i386.md (*sinxf2): Rename to *sinxf2_i387.
[official-gcc.git] / gcc / java / lex.c
blob730c1447fbd8fb47d0e0e7fdb09a6549a05e76c6
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
3 Free Software Foundation, Inc.
4 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA.
23 Java and all Java-based marks are trademarks or registered trademarks
24 of Sun Microsystems, Inc. in the United States and other countries.
25 The Free Software Foundation is independent of Sun Microsystems, Inc. */
27 /* It defines java_lex (yylex) that reads a Java ASCII source file
28 possibly containing Unicode escape sequence or utf8 encoded
29 characters and returns a token for everything found but comments,
30 white spaces and line terminators. When necessary, it also fills
31 the java_lval (yylval) union. It's implemented to be called by a
32 re-entrant parser generated by Bison.
34 The lexical analysis conforms to the Java grammar described in "The
35 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
36 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
38 #include "keyword.h"
39 #include "flags.h"
40 #include "chartables.h"
41 #ifndef JC1_LITE
42 #include "timevar.h"
43 #endif
45 /* Function declarations. */
46 static char *java_sprint_unicode (int);
47 static void java_unicode_2_utf8 (unicode_t);
48 static void java_lex_error (const char *, int);
49 #ifndef JC1_LITE
50 static int do_java_lex (YYSTYPE *);
51 static int java_lex (YYSTYPE *);
52 static int java_is_eol (FILE *, int);
53 static tree build_wfl_node (tree);
54 #endif
55 static int java_parse_escape_sequence (void);
56 static int java_start_char_p (unicode_t);
57 static int java_part_char_p (unicode_t);
58 static int java_space_char_p (unicode_t);
59 static void java_parse_doc_section (int);
60 static void java_parse_end_comment (int);
61 static int java_read_char (java_lexer *);
62 static int java_get_unicode (void);
63 static int java_peek_unicode (void);
64 static void java_next_unicode (void);
65 static int java_read_unicode (java_lexer *, int *);
66 #ifndef JC1_LITE
67 static int utf8_cmp (const unsigned char *, int, const char *);
68 #endif
70 java_lexer *java_new_lexer (FILE *, const char *);
71 #ifndef JC1_LITE
72 static void error_if_numeric_overflow (tree);
73 #endif
75 #ifdef HAVE_ICONV
76 /* This is nonzero if we have initialized `need_byteswap'. */
77 static int byteswap_init = 0;
79 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
80 big-endian order -- not native endian order. We handle this by
81 doing a conversion once at startup and seeing what happens. This
82 flag holds the results of this determination. */
83 static int need_byteswap = 0;
84 #endif
86 void
87 java_init_lex (FILE *finput, const char *encoding)
89 #ifndef JC1_LITE
90 int java_lang_imported = 0;
92 if (!java_lang_id)
93 java_lang_id = get_identifier ("java.lang");
94 if (!inst_id)
95 inst_id = get_identifier ("inst$");
96 if (!wpv_id)
97 wpv_id = get_identifier ("write_parm_value$");
99 if (!java_lang_imported)
101 tree node = build_tree_list (build_unknown_wfl (java_lang_id),
102 NULL_TREE);
103 read_import_dir (TREE_PURPOSE (node));
104 TREE_CHAIN (node) = ctxp->import_demand_list;
105 ctxp->import_demand_list = node;
106 java_lang_imported = 1;
109 if (!wfl_operator)
111 #ifndef JC1_LITE
112 #ifdef USE_MAPPED_LOCATION
113 wfl_operator = build_expr_wfl (NULL_TREE, input_location);
114 #else
115 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
116 #endif
117 #endif
119 if (!label_id)
120 label_id = get_identifier ("$L");
121 if (!wfl_append)
122 wfl_append = build_unknown_wfl (get_identifier ("append"));
123 if (!wfl_string_buffer)
124 wfl_string_buffer =
125 build_unknown_wfl (get_identifier (flag_emit_class_files
126 ? "java.lang.StringBuffer"
127 : "gnu.gcj.runtime.StringBuffer"));
128 if (!wfl_to_string)
129 wfl_to_string = build_unknown_wfl (get_identifier ("toString"));
131 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
132 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
134 memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
135 ctxp->current_parsed_class = NULL;
136 ctxp->package = NULL_TREE;
137 #endif
139 #ifndef JC1_LITE
140 ctxp->save_location = input_location;
141 #endif
142 ctxp->java_error_flag = 0;
143 ctxp->lexer = java_new_lexer (finput, encoding);
146 static char *
147 java_sprint_unicode (int c)
149 static char buffer [10];
150 if (c < ' ' || c >= 127)
151 sprintf (buffer, "\\u%04x", c);
152 else
154 buffer [0] = c;
155 buffer [1] = '\0';
157 return buffer;
160 /* Create a new lexer object. */
162 java_lexer *
163 java_new_lexer (FILE *finput, const char *encoding)
165 java_lexer *lex = XNEW (java_lexer);
166 int enc_error = 0;
168 lex->finput = finput;
169 lex->bs_count = 0;
170 lex->unget_value = 0;
171 lex->next_unicode = 0;
172 lex->avail_unicode = 0;
173 lex->next_columns = 1;
174 lex->encoding = encoding;
175 lex->position.line = 1;
176 lex->position.col = 1;
177 #ifndef JC1_LITE
178 #ifdef USE_MAPPED_LOCATION
179 input_location
180 = linemap_line_start (&line_table, 1, 120);
181 #else
182 input_line = 1;
183 #endif
184 #endif
186 #ifdef HAVE_ICONV
187 lex->handle = iconv_open ("UCS-2", encoding);
188 if (lex->handle != (iconv_t) -1)
190 lex->first = -1;
191 lex->last = -1;
192 lex->out_first = -1;
193 lex->out_last = -1;
194 lex->read_anything = 0;
195 lex->use_fallback = 0;
197 /* Work around broken iconv() implementations by doing checking at
198 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
199 then all UCS-2 encoders will be broken. Perhaps not a valid
200 assumption. */
201 if (! byteswap_init)
203 iconv_t handle;
205 byteswap_init = 1;
207 handle = iconv_open ("UCS-2", "UTF-8");
208 if (handle != (iconv_t) -1)
210 unicode_t result;
211 unsigned char in[3];
212 char *inp, *outp;
213 size_t inc, outc, r;
215 /* This is the UTF-8 encoding of \ufeff. */
216 in[0] = 0xef;
217 in[1] = 0xbb;
218 in[2] = 0xbf;
220 inp = (char *) in;
221 inc = 3;
222 outp = (char *) &result;
223 outc = 2;
225 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
226 &outp, &outc);
227 iconv_close (handle);
228 /* Conversion must be complete for us to use the result. */
229 if (r != (size_t) -1 && inc == 0 && outc == 0)
230 need_byteswap = (result != 0xfeff);
234 lex->byte_swap = need_byteswap;
236 else
237 #endif /* HAVE_ICONV */
239 /* If iconv failed, use the internal decoder if the default
240 encoding was requested. This code is used on platforms where
241 iconv exists but is insufficient for our needs. For
242 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
244 On Solaris the default encoding, as returned by nl_langinfo(),
245 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
246 understand that. We work around that by pretending
247 `646' to be the same as UTF-8. */
248 if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
249 enc_error = 1;
250 #ifdef HAVE_ICONV
251 else
253 lex->use_fallback = 1;
254 lex->encoding = "UTF-8";
256 #endif /* HAVE_ICONV */
259 if (enc_error)
260 fatal_error ("unknown encoding: %qs\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n%<--encoding=UTF-8%> option", encoding);
262 return lex;
265 void
266 java_destroy_lexer (java_lexer *lex)
268 #ifdef HAVE_ICONV
269 if (! lex->use_fallback)
270 iconv_close (lex->handle);
271 #endif
272 free (lex);
275 static int
276 java_read_char (java_lexer *lex)
278 #ifdef HAVE_ICONV
279 if (! lex->use_fallback)
281 size_t ir, inbytesleft, in_save, out_count, out_save;
282 char *inp, *outp;
283 unicode_t result;
285 /* If there is data which has already been converted, use it. */
286 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
288 lex->out_first = 0;
289 lex->out_last = 0;
291 while (1)
293 /* See if we need to read more data. If FIRST == 0 then
294 the previous conversion attempt ended in the middle of
295 a character at the end of the buffer. Otherwise we
296 only have to read if the buffer is empty. */
297 if (lex->first == 0 || lex->first >= lex->last)
299 int r;
301 if (lex->first >= lex->last)
303 lex->first = 0;
304 lex->last = 0;
306 if (feof (lex->finput))
307 return UEOF;
308 r = fread (&lex->buffer[lex->last], 1,
309 sizeof (lex->buffer) - lex->last,
310 lex->finput);
311 lex->last += r;
314 inbytesleft = lex->last - lex->first;
315 out_count = sizeof (lex->out_buffer) - lex->out_last;
317 if (inbytesleft == 0)
319 /* We've tried to read and there is nothing left. */
320 return UEOF;
323 in_save = inbytesleft;
324 out_save = out_count;
325 inp = &lex->buffer[lex->first];
326 outp = (char *) &lex->out_buffer[lex->out_last];
327 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
328 &inbytesleft, &outp, &out_count);
330 /* If we haven't read any bytes, then look to see if we
331 have read a BOM. */
332 if (! lex->read_anything && out_save - out_count >= 2)
334 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
335 if (uc == 0xfeff)
337 lex->byte_swap = 0;
338 lex->out_first += 2;
340 else if (uc == 0xfffe)
342 lex->byte_swap = 1;
343 lex->out_first += 2;
345 lex->read_anything = 1;
348 if (lex->byte_swap)
350 unsigned int i;
351 for (i = 0; i < out_save - out_count; i += 2)
353 char t = lex->out_buffer[lex->out_last + i];
354 lex->out_buffer[lex->out_last + i]
355 = lex->out_buffer[lex->out_last + i + 1];
356 lex->out_buffer[lex->out_last + i + 1] = t;
360 lex->first += in_save - inbytesleft;
361 lex->out_last += out_save - out_count;
363 /* If we converted anything at all, move along. */
364 if (out_count != out_save)
365 break;
367 if (ir == (size_t) -1)
369 if (errno == EINVAL)
371 /* This is ok. This means that the end of our buffer
372 is in the middle of a character sequence. We just
373 move the valid part of the buffer to the beginning
374 to force a read. */
375 memmove (&lex->buffer[0], &lex->buffer[lex->first],
376 lex->last - lex->first);
377 lex->last -= lex->first;
378 lex->first = 0;
380 else
382 /* A more serious error. */
383 char buffer[128];
384 sprintf (buffer,
385 "Unrecognized character for encoding '%s'",
386 lex->encoding);
387 java_lex_error (buffer, 0);
388 return UEOF;
394 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
396 /* Don't have any data. */
397 return UEOF;
400 /* Success. */
401 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
402 lex->out_first += 2;
403 return result;
405 else
406 #endif /* HAVE_ICONV */
408 int c, c1, c2;
409 c = getc (lex->finput);
411 if (c == EOF)
412 return UEOF;
413 if (c < 128)
414 return (unicode_t) c;
415 else
417 if ((c & 0xe0) == 0xc0)
419 c1 = getc (lex->finput);
420 if ((c1 & 0xc0) == 0x80)
422 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
423 /* Check for valid 2-byte characters. We explicitly
424 allow \0 because this encoding is common in the
425 Java world. */
426 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
427 return r;
430 else if ((c & 0xf0) == 0xe0)
432 c1 = getc (lex->finput);
433 if ((c1 & 0xc0) == 0x80)
435 c2 = getc (lex->finput);
436 if ((c2 & 0xc0) == 0x80)
438 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
439 (( c1 & 0x3f) << 6)
440 + (c2 & 0x3f));
441 /* Check for valid 3-byte characters.
442 Don't allow surrogate, \ufffe or \uffff. */
443 if (IN_RANGE (r, 0x800, 0xffff)
444 && ! IN_RANGE (r, 0xd800, 0xdfff)
445 && r != 0xfffe && r != 0xffff)
446 return r;
451 /* We simply don't support invalid characters. We also
452 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
453 cannot be valid Java characters. */
454 java_lex_error ("malformed UTF-8 character", 0);
458 /* We only get here on error. */
459 return UEOF;
462 static int
463 java_read_unicode (java_lexer *lex, int *unicode_escape_p)
465 int c;
467 if (lex->unget_value)
469 c = lex->unget_value;
470 lex->unget_value = 0;
472 else
473 c = java_read_char (lex);
475 *unicode_escape_p = 0;
477 if (c != '\\')
479 lex->bs_count = 0;
480 return c;
483 ++lex->bs_count;
484 if ((lex->bs_count) % 2 == 1)
486 /* Odd number of \ seen. */
487 c = java_read_char (lex);
488 if (c == 'u')
490 unicode_t unicode = 0;
491 int shift = 12;
493 /* Recognize any number of `u's in \u. */
494 while ((c = java_read_char (lex)) == 'u')
497 shift = 12;
500 if (c == UEOF)
502 java_lex_error ("prematurely terminated \\u sequence", 0);
503 return UEOF;
506 if (hex_p (c))
507 unicode |= (unicode_t)(hex_value (c) << shift);
508 else
510 java_lex_error ("non-hex digit in \\u sequence", 0);
511 break;
514 c = java_read_char (lex);
515 shift -= 4;
517 while (shift >= 0);
519 if (c != UEOF)
520 lex->unget_value = c;
522 lex->bs_count = 0;
523 *unicode_escape_p = 1;
524 return unicode;
526 lex->unget_value = c;
528 return (unicode_t) '\\';
531 /* Get the next Unicode character (post-Unicode-escape-handling).
532 Move the current position to just after returned character. */
534 static int
535 java_get_unicode (void)
537 int next = java_peek_unicode ();
538 java_next_unicode ();
539 return next;
542 /* Return the next Unicode character (post-Unicode-escape-handling).
543 Do not move the current position, which remains just before
544 the returned character. */
546 static int
547 java_peek_unicode (void)
549 int unicode_escape_p;
550 java_lexer *lex = ctxp->lexer;
551 int next;
553 if (lex->avail_unicode)
554 return lex->next_unicode;
556 next = java_read_unicode (lex, &unicode_escape_p);
558 if (next == '\r')
560 /* We have to read ahead to see if we got \r\n.
561 In that case we return a single line terminator. */
562 int dummy;
563 next = java_read_unicode (lex, &dummy);
564 if (next != '\n' && next != UEOF)
565 lex->unget_value = next;
566 /* In either case we must return a newline. */
567 next = '\n';
570 lex->next_unicode = next;
571 lex->avail_unicode = 1;
573 if (next == UEOF)
575 lex->next_columns = 0;
576 return next;
579 if (next == '\n')
581 lex->next_columns = 1 - lex->position.col;
583 else if (next == '\t')
585 int cur_col = lex->position.col;
586 lex->next_columns = ((cur_col + 7) & ~7) + 1 - cur_col;
589 else
591 lex->next_columns = 1;
593 if (unicode_escape_p)
594 lex->next_columns = 6;
595 return next;
598 /* Move forward one Unicode character (post-Unicode-escape-handling).
599 Only allowed after java_peek_unicode. The combination java_peek_unicode
600 followed by java_next_unicode is equivalent to java_get_unicode. */
602 static void java_next_unicode (void)
604 struct java_lexer *lex = ctxp->lexer;
605 lex->position.col += lex->next_columns;
606 if (lex->next_unicode == '\n')
608 lex->position.line++;
609 #ifndef JC1_LITE
610 #ifdef USE_MAPPED_LOCATION
611 input_location
612 = linemap_line_start (&line_table, lex->position.line, 120);
613 #else
614 input_line = lex->position.line;
615 #endif
616 #endif
618 lex->avail_unicode = 0;
621 #if 0
622 /* The inverse of java_next_unicode.
623 Not currently used, but could be if it would be cleaner or faster.
624 java_peek_unicode == java_get_unicode + java_unget_unicode.
625 java_get_unicode == java_peek_unicode + java_next_unicode.
627 static void java_unget_unicode ()
629 struct java_lexer *lex = ctxp->lexer;
630 if (lex->avail_unicode)
631 fatal_error ("internal error - bad unget");
632 lex->avail_unicode = 1;
633 lex->position.col -= lex->next_columns;
635 #endif
637 /* Parse the end of a C style comment.
638 * C is the first character following the '/' and '*'. */
639 static void
640 java_parse_end_comment (int c)
642 for ( ;; c = java_get_unicode ())
644 switch (c)
646 case UEOF:
647 java_lex_error ("Comment not terminated at end of input", 0);
648 return;
649 case '*':
650 switch (c = java_peek_unicode ())
652 case UEOF:
653 java_lex_error ("Comment not terminated at end of input", 0);
654 return;
655 case '/':
656 java_next_unicode ();
657 return;
658 case '*': /* Reparse only '*'. */
665 /* Parse the documentation section. Keywords must be at the beginning
666 of a documentation comment line (ignoring white space and any `*'
667 character). Parsed keyword(s): @DEPRECATED. */
669 static void
670 java_parse_doc_section (int c)
672 int last_was_star;
674 /* We reset this here, because only the most recent doc comment
675 applies to the following declaration. */
676 ctxp->deprecated = 0;
678 /* We loop over all the lines of the comment. We'll eventually exit
679 if we hit EOF prematurely, or when we see the comment
680 terminator. */
681 while (1)
683 /* These first steps need only be done if we're still looking
684 for the deprecated tag. If we've already seen it, we might
685 as well skip looking for it again. */
686 if (! ctxp->deprecated)
688 /* Skip whitespace and '*'s. We must also check for the end
689 of the comment here. */
690 while (JAVA_WHITE_SPACE_P (c) || c == '*')
692 last_was_star = (c == '*');
693 c = java_get_unicode ();
694 if (last_was_star && c == '/')
696 /* We just saw the comment terminator. */
697 return;
701 if (c == UEOF)
702 goto eof;
704 if (c == '@')
706 const char *deprecated = "@deprecated";
707 int i;
709 for (i = 0; deprecated[i]; ++i)
711 if (c != deprecated[i])
712 break;
713 /* We write the code in this way, with the
714 update at the end, so that after the loop
715 we're left with the next character in C. */
716 c = java_get_unicode ();
719 if (c == UEOF)
720 goto eof;
722 /* @deprecated must be followed by a space or newline.
723 We also allow a '*' in case it appears just before
724 the end of a comment. In this position only we also
725 must allow any Unicode space character. */
726 if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
728 if (! deprecated[i])
729 ctxp->deprecated = 1;
734 /* We've examined the relevant content from this line. Now we
735 skip the remaining characters and start over with the next
736 line. We also check for end of comment here. */
737 while (c != '\n' && c != UEOF)
739 last_was_star = (c == '*');
740 c = java_get_unicode ();
741 if (last_was_star && c == '/')
742 return;
745 if (c == UEOF)
746 goto eof;
747 /* We have to advance past the \n. */
748 c = java_get_unicode ();
749 if (c == UEOF)
750 goto eof;
753 eof:
754 java_lex_error ("Comment not terminated at end of input", 0);
757 /* Return true if C is a valid start character for a Java identifier.
758 This is only called if C >= 128 -- smaller values are handled
759 inline. However, this function handles all values anyway. */
760 static int
761 java_start_char_p (unicode_t c)
763 unsigned int hi = c / 256;
764 const char *const page = type_table[hi];
765 unsigned long val = (unsigned long) page;
766 int flags;
768 if ((val & ~ LETTER_MASK) != 0)
769 flags = page[c & 255];
770 else
771 flags = val;
773 return flags & LETTER_START;
776 /* Return true if C is a valid part character for a Java identifier.
777 This is only called if C >= 128 -- smaller values are handled
778 inline. However, this function handles all values anyway. */
779 static int
780 java_part_char_p (unicode_t c)
782 unsigned int hi = c / 256;
783 const char *const page = type_table[hi];
784 unsigned long val = (unsigned long) page;
785 int flags;
787 if ((val & ~ LETTER_MASK) != 0)
788 flags = page[c & 255];
789 else
790 flags = val;
792 return flags & LETTER_PART;
795 /* Return true if C is whitespace. */
796 static int
797 java_space_char_p (unicode_t c)
799 unsigned int hi = c / 256;
800 const char *const page = type_table[hi];
801 unsigned long val = (unsigned long) page;
802 int flags;
804 if ((val & ~ LETTER_MASK) != 0)
805 flags = page[c & 255];
806 else
807 flags = val;
809 return flags & LETTER_SPACE;
812 static int
813 java_parse_escape_sequence (void)
815 int c;
817 switch (c = java_get_unicode ())
819 case 'b':
820 return (unicode_t)0x8;
821 case 't':
822 return (unicode_t)0x9;
823 case 'n':
824 return (unicode_t)0xa;
825 case 'f':
826 return (unicode_t)0xc;
827 case 'r':
828 return (unicode_t)0xd;
829 case '"':
830 return (unicode_t)0x22;
831 case '\'':
832 return (unicode_t)0x27;
833 case '\\':
834 return (unicode_t)0x5c;
835 case '0': case '1': case '2': case '3': case '4':
836 case '5': case '6': case '7':
838 int more = 3;
839 unicode_t char_lit = 0;
841 if (c > '3')
843 /* According to the grammar, `\477' has a well-defined
844 meaning -- it is `\47' followed by `7'. */
845 --more;
847 char_lit = 0;
848 for (;;)
850 char_lit = 8 * char_lit + c - '0';
851 if (--more == 0)
852 break;
853 c = java_peek_unicode ();
854 if (! RANGE (c, '0', '7'))
855 break;
856 java_next_unicode ();
859 return char_lit;
861 default:
862 java_lex_error ("Invalid character in escape sequence", -1);
863 return JAVA_CHAR_ERROR;
867 #ifndef JC1_LITE
868 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
870 /* Subroutine of java_lex: converts floating-point literals to tree
871 nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
872 store the result. FFLAG indicates whether the literal was tagged
873 with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
874 is the line number on which to report any error. */
876 static void java_perform_atof (YYSTYPE *, char *, int, int);
878 static void
879 java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
880 int number_beginning)
882 REAL_VALUE_TYPE value;
883 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
885 SET_REAL_VALUE_ATOF (value,
886 REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
888 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
890 JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
891 value = DCONST0;
893 else if (IS_ZERO (value))
895 /* We check to see if the value is really 0 or if we've found an
896 underflow. We do this in the most primitive imaginable way. */
897 int really_zero = 1;
898 char *p = literal_token;
899 if (*p == '-')
900 ++p;
901 while (*p && *p != 'e' && *p != 'E')
903 if (*p != '0' && *p != '.')
905 really_zero = 0;
906 break;
908 ++p;
910 if (! really_zero)
912 int save_col = ctxp->lexer->position.col;
913 ctxp->lexer->position.col = number_beginning;
914 java_lex_error ("Floating point literal underflow", 0);
915 ctxp->lexer->position.col = save_col;
919 SET_LVAL_NODE (build_real (type, value));
921 #endif
923 static int yylex (YYSTYPE *);
925 static int
926 #ifdef JC1_LITE
927 yylex (YYSTYPE *java_lval)
928 #else
929 do_java_lex (YYSTYPE *java_lval)
930 #endif
932 int c;
933 char *string;
935 /* Translation of the Unicode escape in the raw stream of Unicode
936 characters. Takes care of line terminator. */
937 step1:
938 /* Skip white spaces: SP, TAB and FF or ULT. */
939 for (;;)
941 c = java_peek_unicode ();
942 if (c != '\n' && ! JAVA_WHITE_SPACE_P (c))
943 break;
944 java_next_unicode ();
947 /* Handle EOF here. */
948 if (c == UEOF) /* Should probably do something here... */
949 return 0;
951 #ifndef JC1_LITE
952 #ifdef USE_MAPPED_LOCATION
953 LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table,
954 ctxp->lexer->position.col);
955 #else
956 ctxp->lexer->token_start = ctxp->lexer->position;
957 #endif
958 #endif
960 /* Numeric literals. */
961 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
963 /* This section of code is borrowed from gcc/c-lex.c. */
964 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
965 int parts[TOTAL_PARTS];
966 HOST_WIDE_INT high, low;
967 /* End borrowed section. */
969 #define MAX_TOKEN_LEN 256
970 char literal_token [MAX_TOKEN_LEN + 1];
971 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
972 int found_hex_digits = 0, found_non_octal_digits = -1;
973 int i;
974 #ifndef JC1_LITE
975 int number_beginning = ctxp->lexer->position.col;
976 tree value;
977 #endif
979 for (i = 0; i < TOTAL_PARTS; i++)
980 parts [i] = 0;
982 if (c == '0')
984 java_next_unicode ();
985 c = java_peek_unicode ();
986 if (c == 'x' || c == 'X')
988 radix = 16;
989 java_next_unicode ();
990 c = java_peek_unicode ();
992 else if (JAVA_ASCII_DIGIT (c))
994 literal_token [literal_index++] = '0';
995 radix = 8;
997 else if (c == '.' || c == 'e' || c =='E')
999 literal_token [literal_index++] = '0';
1000 /* Handle C during floating-point parsing. */
1002 else
1004 /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
1005 switch (c)
1007 case 'L': case 'l':
1008 java_next_unicode ();
1009 SET_LVAL_NODE (long_zero_node);
1010 return (INT_LIT_TK);
1011 case 'f': case 'F':
1012 java_next_unicode ();
1013 SET_LVAL_NODE (float_zero_node);
1014 return (FP_LIT_TK);
1015 case 'd': case 'D':
1016 java_next_unicode ();
1017 SET_LVAL_NODE (double_zero_node);
1018 return (FP_LIT_TK);
1019 default:
1020 SET_LVAL_NODE (integer_zero_node);
1021 return (INT_LIT_TK);
1026 /* Terminate LITERAL_TOKEN in case we bail out on large tokens. */
1027 literal_token [MAX_TOKEN_LEN] = '\0';
1029 /* Parse the first part of the literal, until we find something
1030 which is not a number. */
1031 while ((radix == 16 ? JAVA_ASCII_HEXDIGIT (c) : JAVA_ASCII_DIGIT (c))
1032 && literal_index < MAX_TOKEN_LEN)
1034 /* We store in a string (in case it turns out to be a FP) and in
1035 PARTS if we have to process a integer literal. */
1036 int numeric = hex_value (c);
1037 int count;
1039 /* Remember when we find a valid hexadecimal digit. */
1040 if (radix == 16)
1041 found_hex_digits = 1;
1042 /* Remember when we find an invalid octal digit. */
1043 else if (radix == 8 && numeric >= 8 && found_non_octal_digits < 0)
1044 found_non_octal_digits = literal_index;
1046 literal_token [literal_index++] = c;
1047 /* This section of code if borrowed from gcc/c-lex.c. */
1048 for (count = 0; count < TOTAL_PARTS; count++)
1050 parts[count] *= radix;
1051 if (count)
1053 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1054 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1056 else
1057 parts[0] += numeric;
1059 if (parts [TOTAL_PARTS-1] != 0)
1060 overflow = 1;
1061 /* End borrowed section. */
1062 java_next_unicode ();
1063 c = java_peek_unicode ();
1066 /* If we have something from the FP char set but not a digit, parse
1067 a FP literal. */
1068 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1070 /* stage==0: seen digits only
1071 * stage==1: seen '.'
1072 * stage==2: seen 'e' or 'E'.
1073 * stage==3: seen '+' or '-' after 'e' or 'E'.
1074 * stage==4: seen type suffix ('f'/'F'/'d'/'D')
1076 int stage = 0;
1077 int seen_digit = (literal_index ? 1 : 0);
1078 int seen_exponent = 0;
1079 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1080 double unless specified. */
1082 /* It is ok if the radix is 8 because this just means we've
1083 seen a leading `0'. However, radix==16 is invalid. */
1084 if (radix == 16)
1085 java_lex_error ("Can't express non-decimal FP literal", 0);
1086 radix = 10;
1088 for (; literal_index < MAX_TOKEN_LEN;)
1090 if (c == '.')
1092 if (stage < 1)
1094 stage = 1;
1095 literal_token [literal_index++ ] = c;
1096 java_next_unicode ();
1097 c = java_peek_unicode ();
1098 if (literal_index == 1 && !JAVA_ASCII_DIGIT (c))
1099 BUILD_OPERATOR (DOT_TK);
1101 else
1102 java_lex_error ("Invalid character in FP literal", 0);
1105 if ((c == 'e' || c == 'E') && literal_index < MAX_TOKEN_LEN)
1107 if (stage < 2)
1109 /* {E,e} must have seen at least a digit. */
1110 if (!seen_digit)
1111 java_lex_error
1112 ("Invalid FP literal, mantissa must have digit", 0);
1113 seen_digit = 0;
1114 seen_exponent = 1;
1115 stage = 2;
1116 literal_token [literal_index++] = c;
1117 java_next_unicode ();
1118 c = java_peek_unicode ();
1120 else
1121 java_lex_error ("Invalid character in FP literal", 0);
1123 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1125 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1126 stage = 4; /* So we fall through. */
1129 if ((c=='-' || c =='+') && stage == 2
1130 && literal_index < MAX_TOKEN_LEN)
1132 stage = 3;
1133 literal_token [literal_index++] = c;
1134 java_next_unicode ();
1135 c = java_peek_unicode ();
1138 if (((stage == 0 && JAVA_ASCII_FPCHAR (c))
1139 || (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.'))
1140 || (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c)))
1141 || (stage == 3 && JAVA_ASCII_DIGIT (c)))
1142 && literal_index < MAX_TOKEN_LEN)
1144 if (JAVA_ASCII_DIGIT (c))
1145 seen_digit = 1;
1146 if (stage == 2)
1147 stage = 3;
1148 literal_token [literal_index++ ] = c;
1149 java_next_unicode ();
1150 c = java_peek_unicode ();
1152 else if (literal_index < MAX_TOKEN_LEN)
1154 if (stage == 4) /* Don't push back fF/dD. */
1155 java_next_unicode ();
1157 /* An exponent (if any) must have seen a digit. */
1158 if (seen_exponent && !seen_digit)
1159 java_lex_error
1160 ("Invalid FP literal, exponent must have digit", 0);
1162 literal_token [literal_index] = '\0';
1164 #ifndef JC1_LITE
1165 java_perform_atof (java_lval, literal_token,
1166 fflag, number_beginning);
1167 #endif
1168 return FP_LIT_TK;
1171 } /* JAVA_ASCII_FPCHAR (c) */
1173 /* Here we get back to converting the integral literal. */
1174 if (radix == 16 && ! found_hex_digits)
1175 java_lex_error
1176 ("0x must be followed by at least one hexadecimal digit", 0);
1177 else if (radix == 8 && found_non_octal_digits >= 0)
1179 int back = literal_index - found_non_octal_digits;
1180 ctxp->lexer->position.col -= back;
1181 java_lex_error ("Octal literal contains digit out of range", 0);
1182 ctxp->lexer->position.col += back;
1184 else if (c == 'L' || c == 'l')
1186 java_next_unicode ();
1187 long_suffix = 1;
1190 /* This section of code is borrowed from gcc/c-lex.c. */
1191 if (!overflow)
1193 bytes = GET_TYPE_PRECISION (long_type_node);
1194 for (i = bytes; i < TOTAL_PARTS; i++)
1195 if (parts [i])
1197 overflow = 1;
1198 break;
1201 high = low = 0;
1202 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1204 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1205 / HOST_BITS_PER_CHAR)]
1206 << (i * HOST_BITS_PER_CHAR));
1207 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1209 /* End borrowed section. */
1211 #ifndef JC1_LITE
1212 /* Range checking. */
1213 /* Temporarily set type to unsigned. */
1214 value = build_int_cst_wide (long_suffix
1215 ? unsigned_long_type_node
1216 : unsigned_int_type_node, low, high);
1217 SET_LVAL_NODE (value);
1219 /* For base 10 numbers, only values up to the highest value
1220 (plus one) can be written. For instance, only ints up to
1221 2147483648 can be written. The special case of the largest
1222 negative value is handled elsewhere. For other bases, any
1223 number can be represented. */
1224 if (overflow || (radix == 10
1225 && tree_int_cst_lt (long_suffix
1226 ? decimal_long_max
1227 : decimal_int_max,
1228 value)))
1230 if (long_suffix)
1231 JAVA_RANGE_ERROR ("Numeric overflow for 'long' literal");
1232 else
1233 JAVA_RANGE_ERROR ("Numeric overflow for 'int' literal");
1236 /* Sign extend the value. */
1237 value = build_int_cst_wide_type (long_suffix ? long_type_node
1238 : int_type_node, low, high);
1240 if (radix != 10)
1242 value = copy_node (value);
1243 JAVA_NOT_RADIX10_FLAG (value) = 1;
1246 SET_LVAL_NODE (value);
1247 #endif
1248 return INT_LIT_TK;
1251 /* We may have an ID here. */
1252 if (JAVA_START_CHAR_P (c))
1254 int ascii_index = 0, all_ascii = 1;
1256 /* Keyword, boolean literal or null literal. */
1257 while (c != UEOF && JAVA_PART_CHAR_P (c))
1259 java_unicode_2_utf8 (c);
1260 if (c >= 128)
1261 all_ascii = 0;
1262 java_next_unicode ();
1263 ascii_index++;
1264 c = java_peek_unicode ();
1267 obstack_1grow (&temporary_obstack, '\0');
1268 string = obstack_finish (&temporary_obstack);
1270 /* If we have something all ascii, we consider a keyword, a boolean
1271 literal, a null literal or an all ASCII identifier. Otherwise,
1272 this is an identifier (possibly not respecting formation rule). */
1273 if (all_ascii)
1275 const struct java_keyword *kw;
1276 if ((kw=java_keyword (string, ascii_index)))
1278 switch (kw->token)
1280 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1281 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1282 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1283 case PRIVATE_TK: case STRICT_TK:
1284 SET_MODIFIER_CTX (kw->token);
1285 return MODIFIER_TK;
1286 case FLOAT_TK:
1287 SET_LVAL_NODE (float_type_node);
1288 return FP_TK;
1289 case DOUBLE_TK:
1290 SET_LVAL_NODE (double_type_node);
1291 return FP_TK;
1292 case BOOLEAN_TK:
1293 SET_LVAL_NODE (boolean_type_node);
1294 return BOOLEAN_TK;
1295 case BYTE_TK:
1296 SET_LVAL_NODE (byte_type_node);
1297 return INTEGRAL_TK;
1298 case SHORT_TK:
1299 SET_LVAL_NODE (short_type_node);
1300 return INTEGRAL_TK;
1301 case INT_TK:
1302 SET_LVAL_NODE (int_type_node);
1303 return INTEGRAL_TK;
1304 case LONG_TK:
1305 SET_LVAL_NODE (long_type_node);
1306 return INTEGRAL_TK;
1307 case CHAR_TK:
1308 SET_LVAL_NODE (char_type_node);
1309 return INTEGRAL_TK;
1311 /* Keyword based literals. */
1312 case TRUE_TK:
1313 case FALSE_TK:
1314 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1315 boolean_true_node : boolean_false_node));
1316 return BOOL_LIT_TK;
1317 case NULL_TK:
1318 SET_LVAL_NODE (null_pointer_node);
1319 return NULL_TK;
1321 case ASSERT_TK:
1322 if (flag_assert)
1324 BUILD_OPERATOR (kw->token);
1325 return kw->token;
1327 else
1328 break;
1330 /* Some keyword we want to retain information on the location
1331 they where found. */
1332 case CASE_TK:
1333 case DEFAULT_TK:
1334 case SUPER_TK:
1335 case THIS_TK:
1336 case RETURN_TK:
1337 case BREAK_TK:
1338 case CONTINUE_TK:
1339 case TRY_TK:
1340 case CATCH_TK:
1341 case THROW_TK:
1342 case INSTANCEOF_TK:
1343 BUILD_OPERATOR (kw->token);
1345 default:
1346 return kw->token;
1351 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1352 return ID_TK;
1355 java_next_unicode ();
1357 /* Character literals. */
1358 if (c == '\'')
1360 int char_lit;
1362 if ((c = java_get_unicode ()) == '\\')
1363 char_lit = java_parse_escape_sequence ();
1364 else
1366 if (c == '\n' || c == '\'')
1367 java_lex_error ("Invalid character literal", 0);
1368 char_lit = c;
1371 c = java_get_unicode ();
1373 if ((c == '\n') || (c == UEOF))
1374 java_lex_error ("Character literal not terminated at end of line", 0);
1375 if (c != '\'')
1376 java_lex_error ("Syntax error in character literal", 0);
1378 if (char_lit == JAVA_CHAR_ERROR)
1379 char_lit = 0; /* We silently convert it to zero. */
1381 SET_LVAL_NODE (build_int_cst (char_type_node, char_lit));
1382 return CHAR_LIT_TK;
1385 /* String literals. */
1386 if (c == '"')
1388 int no_error = 1;
1389 char *string;
1391 for (;;)
1393 c = java_peek_unicode ();
1394 if (c == '\n' || c == UEOF) /* ULT. */
1396 java_lex_error ("String not terminated at end of line", 0);
1397 break;
1399 java_next_unicode ();
1400 if (c == '"')
1401 break;
1402 if (c == '\\')
1403 c = java_parse_escape_sequence ();
1404 if (c == JAVA_CHAR_ERROR)
1406 no_error = 0;
1407 c = 0; /* We silently convert it to zero. */
1409 java_unicode_2_utf8 (c);
1412 obstack_1grow (&temporary_obstack, '\0');
1413 string = obstack_finish (&temporary_obstack);
1414 #ifndef JC1_LITE
1415 if (!no_error || (c != '"'))
1416 java_lval->node = error_mark_node; /* FIXME: Requires further
1417 testing. */
1418 else
1419 java_lval->node = build_string (strlen (string), string);
1420 #endif
1421 obstack_free (&temporary_obstack, string);
1422 return STRING_LIT_TK;
1425 switch (c)
1427 case '/':
1428 /* Check for comment. */
1429 switch (c = java_peek_unicode ())
1431 case '/':
1432 java_next_unicode ();
1433 for (;;)
1435 c = java_get_unicode ();
1436 if (c == UEOF)
1438 /* It is ok to end a `//' comment with EOF, unless
1439 we're being pedantic. */
1440 if (pedantic)
1441 java_lex_error ("Comment not terminated at end of input",
1443 return 0;
1445 if (c == '\n') /* ULT */
1446 goto step1;
1448 break;
1450 case '*':
1451 java_next_unicode ();
1452 if ((c = java_get_unicode ()) == '*')
1454 c = java_get_unicode ();
1455 if (c == '/')
1457 /* Empty documentation comment. We have to reset
1458 the deprecation marker as only the most recent
1459 doc comment applies. */
1460 ctxp->deprecated = 0;
1462 else
1463 java_parse_doc_section (c);
1465 else
1466 java_parse_end_comment ((c = java_get_unicode ()));
1467 goto step1;
1468 break;
1470 case '=':
1471 java_next_unicode ();
1472 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1474 default:
1475 BUILD_OPERATOR (DIV_TK);
1478 case '(':
1479 BUILD_OPERATOR (OP_TK);
1480 case ')':
1481 return CP_TK;
1482 case '{':
1483 #ifndef JC1_LITE
1484 java_lval->operator.token = OCB_TK;
1485 java_lval->operator.location = BUILD_LOCATION();
1486 #ifdef USE_MAPPED_LOCATION
1487 if (ctxp->ccb_indent == 1)
1488 ctxp->first_ccb_indent1 = input_location;
1489 #else
1490 if (ctxp->ccb_indent == 1)
1491 ctxp->first_ccb_indent1 = input_line;
1492 #endif
1493 #endif
1494 ctxp->ccb_indent++;
1495 return OCB_TK;
1496 case '}':
1497 ctxp->ccb_indent--;
1498 #ifndef JC1_LITE
1499 java_lval->operator.token = CCB_TK;
1500 java_lval->operator.location = BUILD_LOCATION();
1501 #ifdef USE_MAPPED_LOCATION
1502 if (ctxp->ccb_indent == 1)
1503 ctxp->last_ccb_indent1 = input_location;
1504 #else
1505 if (ctxp->ccb_indent == 1)
1506 ctxp->last_ccb_indent1 = input_line;
1507 #endif
1508 #endif
1509 return CCB_TK;
1510 case '[':
1511 BUILD_OPERATOR (OSB_TK);
1512 case ']':
1513 return CSB_TK;
1514 case ';':
1515 return SC_TK;
1516 case ',':
1517 return C_TK;
1518 case '.':
1519 BUILD_OPERATOR (DOT_TK);
1521 /* Operators. */
1522 case '=':
1523 c = java_peek_unicode ();
1524 if (c == '=')
1526 java_next_unicode ();
1527 BUILD_OPERATOR (EQ_TK);
1529 else
1531 /* Equals is used in two different locations. In the
1532 variable_declarator: rule, it has to be seen as '=' as opposed
1533 to being seen as an ordinary assignment operator in
1534 assignment_operators: rule. */
1535 BUILD_OPERATOR (ASSIGN_TK);
1538 case '>':
1539 switch ((c = java_peek_unicode ()))
1541 case '=':
1542 java_next_unicode ();
1543 BUILD_OPERATOR (GTE_TK);
1544 case '>':
1545 java_next_unicode ();
1546 switch ((c = java_peek_unicode ()))
1548 case '>':
1549 java_next_unicode ();
1550 c = java_peek_unicode ();
1551 if (c == '=')
1553 java_next_unicode ();
1554 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1556 else
1558 BUILD_OPERATOR (ZRS_TK);
1560 case '=':
1561 java_next_unicode ();
1562 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1563 default:
1564 BUILD_OPERATOR (SRS_TK);
1566 default:
1567 BUILD_OPERATOR (GT_TK);
1570 case '<':
1571 switch ((c = java_peek_unicode ()))
1573 case '=':
1574 java_next_unicode ();
1575 BUILD_OPERATOR (LTE_TK);
1576 case '<':
1577 java_next_unicode ();
1578 if ((c = java_peek_unicode ()) == '=')
1580 java_next_unicode ();
1581 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1583 else
1585 BUILD_OPERATOR (LS_TK);
1587 default:
1588 BUILD_OPERATOR (LT_TK);
1591 case '&':
1592 switch ((c = java_peek_unicode ()))
1594 case '&':
1595 java_next_unicode ();
1596 BUILD_OPERATOR (BOOL_AND_TK);
1597 case '=':
1598 java_next_unicode ();
1599 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1600 default:
1601 BUILD_OPERATOR (AND_TK);
1604 case '|':
1605 switch ((c = java_peek_unicode ()))
1607 case '|':
1608 java_next_unicode ();
1609 BUILD_OPERATOR (BOOL_OR_TK);
1610 case '=':
1611 java_next_unicode ();
1612 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1613 default:
1614 BUILD_OPERATOR (OR_TK);
1617 case '+':
1618 switch ((c = java_peek_unicode ()))
1620 case '+':
1621 java_next_unicode ();
1622 BUILD_OPERATOR (INCR_TK);
1623 case '=':
1624 java_next_unicode ();
1625 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1626 default:
1627 BUILD_OPERATOR (PLUS_TK);
1630 case '-':
1631 switch ((c = java_peek_unicode ()))
1633 case '-':
1634 java_next_unicode ();
1635 BUILD_OPERATOR (DECR_TK);
1636 case '=':
1637 java_next_unicode ();
1638 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1639 default:
1640 BUILD_OPERATOR (MINUS_TK);
1643 case '*':
1644 if ((c = java_peek_unicode ()) == '=')
1646 java_next_unicode ();
1647 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1649 else
1651 BUILD_OPERATOR (MULT_TK);
1654 case '^':
1655 if ((c = java_peek_unicode ()) == '=')
1657 java_next_unicode ();
1658 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1660 else
1662 BUILD_OPERATOR (XOR_TK);
1665 case '%':
1666 if ((c = java_peek_unicode ()) == '=')
1668 java_next_unicode ();
1669 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1671 else
1673 BUILD_OPERATOR (REM_TK);
1676 case '!':
1677 if ((c = java_peek_unicode()) == '=')
1679 java_next_unicode ();
1680 BUILD_OPERATOR (NEQ_TK);
1682 else
1684 BUILD_OPERATOR (NEG_TK);
1687 case '?':
1688 BUILD_OPERATOR (REL_QM_TK);
1689 case ':':
1690 BUILD_OPERATOR (REL_CL_TK);
1691 case '~':
1692 BUILD_OPERATOR (NOT_TK);
1695 if (c == 0x1a) /* CTRL-Z. */
1697 if ((c = java_peek_unicode ()) == UEOF)
1698 return 0; /* Ok here. */
1701 /* Everything else is an invalid character in the input. */
1703 char lex_error_buffer [128];
1704 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1705 java_sprint_unicode (c));
1706 java_lex_error (lex_error_buffer, -1);
1708 return 0;
1711 #ifndef JC1_LITE
1713 /* The exported interface to the lexer. */
1714 static int
1715 java_lex (YYSTYPE *java_lval)
1717 int r;
1719 timevar_push (TV_LEX);
1720 r = do_java_lex (java_lval);
1721 timevar_pop (TV_LEX);
1722 return r;
1725 /* This is called by the parser to see if an error should be generated
1726 due to numeric overflow. This function only handles the particular
1727 case of the largest negative value, and is only called in the case
1728 where this value is not preceded by `-'. */
1729 static void
1730 error_if_numeric_overflow (tree value)
1732 if (TREE_CODE (value) == INTEGER_CST
1733 && !JAVA_NOT_RADIX10_FLAG (value)
1734 && tree_int_cst_sgn (value) < 0)
1736 if (TREE_TYPE (value) == long_type_node)
1737 java_lex_error ("Numeric overflow for 'long' literal", 0);
1738 else
1739 java_lex_error ("Numeric overflow for 'int' literal", 0);
1743 #endif /* JC1_LITE */
1745 static void
1746 java_unicode_2_utf8 (unicode_t unicode)
1748 if (RANGE (unicode, 0x01, 0x7f))
1749 obstack_1grow (&temporary_obstack, (char)unicode);
1750 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1752 obstack_1grow (&temporary_obstack,
1753 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1754 obstack_1grow (&temporary_obstack,
1755 (unsigned char)(0x80 | (unicode & 0x3f)));
1757 else /* Range 0x800-0xffff. */
1759 obstack_1grow (&temporary_obstack,
1760 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1761 obstack_1grow (&temporary_obstack,
1762 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1763 obstack_1grow (&temporary_obstack,
1764 (unsigned char)(0x80 | (unicode & 0x003f)));
1768 #ifndef JC1_LITE
1769 static tree
1770 build_wfl_node (tree node)
1772 #ifdef USE_MAPPED_LOCATION
1773 node = build_expr_wfl (node, input_location);
1774 #else
1775 node = build_expr_wfl (node, ctxp->filename,
1776 ctxp->lexer->token_start.line,
1777 ctxp->lexer->token_start.col);
1778 #endif
1779 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1780 TREE_TYPE (node) = NULL_TREE;
1781 return node;
1783 #endif
1785 static void
1786 java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
1788 #ifndef JC1_LITE
1789 int col = (ctxp->lexer->position.col
1790 + forward * ctxp->lexer->next_columns);
1791 #if USE_MAPPED_LOCATION
1792 source_location save_location = input_location;
1793 LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table, col);
1795 /* Might be caught in the middle of some error report. */
1796 ctxp->java_error_flag = 0;
1797 java_error (NULL);
1798 java_error (msg);
1799 input_location = save_location;
1800 #else
1801 java_lc save = ctxp->lexer->token_start;
1802 ctxp->lexer->token_start.line = ctxp->lexer->position.line;
1803 ctxp->lexer->token_start.col = col;
1805 /* Might be caught in the middle of some error report. */
1806 ctxp->java_error_flag = 0;
1807 java_error (NULL);
1808 java_error (msg);
1809 ctxp->lexer->token_start = save;
1810 #endif
1811 #endif
1814 #ifndef JC1_LITE
1815 static int
1816 java_is_eol (FILE *fp, int c)
1818 int next;
1819 switch (c)
1821 case '\r':
1822 next = getc (fp);
1823 if (next != '\n' && next != EOF)
1824 ungetc (next, fp);
1825 return 1;
1826 case '\n':
1827 return 1;
1828 default:
1829 return 0;
1832 #endif
1834 char *
1835 java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
1836 int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
1838 #ifdef JC1_LITE
1839 return 0;
1840 #else
1841 /* Dumb implementation. Doesn't try to cache or optimize things. */
1842 /* First line of the file is line 1, first column is 1. */
1844 /* COL == -1 means, at the CR/LF in LINE. */
1845 /* COL == -2 means, at the first non space char in LINE. */
1847 FILE *fp;
1848 int c, ccol, cline = 1;
1849 int current_line_col = 0;
1850 int first_non_space = 0;
1851 char *base;
1853 if (!(fp = fopen (filename, "r")))
1854 fatal_error ("can't open %s: %m", filename);
1856 while (cline != line)
1858 c = getc (fp);
1859 if (c == EOF)
1861 static const char msg[] = "<<file too short - unexpected EOF>>";
1862 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1863 goto have_line;
1865 if (java_is_eol (fp, c))
1866 cline++;
1869 /* Gather the chars of the current line in a buffer. */
1870 for (;;)
1872 c = getc (fp);
1873 if (c < 0 || java_is_eol (fp, c))
1874 break;
1875 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1876 first_non_space = current_line_col;
1877 obstack_1grow (&temporary_obstack, c);
1878 current_line_col++;
1880 have_line:
1882 obstack_1grow (&temporary_obstack, '\n');
1884 if (col == -1)
1886 col = current_line_col;
1887 first_non_space = 0;
1889 else if (col == -2)
1890 col = first_non_space;
1891 else
1892 first_non_space = 0;
1894 /* Place the '^' a the right position. */
1895 base = obstack_base (&temporary_obstack);
1896 for (col += 2, ccol = 0; ccol < col; ccol++)
1898 /* Compute \t when reaching first_non_space. */
1899 char c = (first_non_space ?
1900 (base [ccol] == '\t' ? '\t' : ' ') : ' ');
1901 obstack_1grow (&temporary_obstack, c);
1903 obstack_grow0 (&temporary_obstack, "^", 1);
1905 fclose (fp);
1906 return obstack_finish (&temporary_obstack);
1907 #endif
1910 #ifndef JC1_LITE
1911 static int
1912 utf8_cmp (const unsigned char *str, int length, const char *name)
1914 const unsigned char *limit = str + length;
1915 int i;
1917 for (i = 0; name[i]; ++i)
1919 int ch = UTF8_GET (str, limit);
1920 if (ch != name[i])
1921 return ch - name[i];
1924 return str == limit ? 0 : 1;
1927 /* A sorted list of all C++ keywords. */
1929 static const char *const cxx_keywords[] =
1931 "_Complex",
1932 "__alignof",
1933 "__alignof__",
1934 "__asm",
1935 "__asm__",
1936 "__attribute",
1937 "__attribute__",
1938 "__builtin_va_arg",
1939 "__complex",
1940 "__complex__",
1941 "__const",
1942 "__const__",
1943 "__extension__",
1944 "__imag",
1945 "__imag__",
1946 "__inline",
1947 "__inline__",
1948 "__label__",
1949 "__null",
1950 "__real",
1951 "__real__",
1952 "__restrict",
1953 "__restrict__",
1954 "__signed",
1955 "__signed__",
1956 "__typeof",
1957 "__typeof__",
1958 "__volatile",
1959 "__volatile__",
1960 "and",
1961 "and_eq",
1962 "asm",
1963 "auto",
1964 "bitand",
1965 "bitor",
1966 "bool",
1967 "break",
1968 "case",
1969 "catch",
1970 "char",
1971 "class",
1972 "compl",
1973 "const",
1974 "const_cast",
1975 "continue",
1976 "default",
1977 "delete",
1978 "do",
1979 "double",
1980 "dynamic_cast",
1981 "else",
1982 "enum",
1983 "explicit",
1984 "export",
1985 "extern",
1986 "false",
1987 "float",
1988 "for",
1989 "friend",
1990 "goto",
1991 "if",
1992 "inline",
1993 "int",
1994 "long",
1995 "mutable",
1996 "namespace",
1997 "new",
1998 "not",
1999 "not_eq",
2000 "operator",
2001 "or",
2002 "or_eq",
2003 "private",
2004 "protected",
2005 "public",
2006 "register",
2007 "reinterpret_cast",
2008 "return",
2009 "short",
2010 "signed",
2011 "sizeof",
2012 "static",
2013 "static_cast",
2014 "struct",
2015 "switch",
2016 "template",
2017 "this",
2018 "throw",
2019 "true",
2020 "try",
2021 "typedef",
2022 "typeid",
2023 "typename",
2024 "typeof",
2025 "union",
2026 "unsigned",
2027 "using",
2028 "virtual",
2029 "void",
2030 "volatile",
2031 "wchar_t",
2032 "while",
2033 "xor",
2034 "xor_eq"
2037 /* Return true if NAME is a C++ keyword. */
2040 cxx_keyword_p (const char *name, int length)
2042 int last = ARRAY_SIZE (cxx_keywords);
2043 int first = 0;
2044 int mid = (last + first) / 2;
2045 int old = -1;
2047 for (mid = (last + first) / 2;
2048 mid != old;
2049 old = mid, mid = (last + first) / 2)
2051 int kwl = strlen (cxx_keywords[mid]);
2052 int min_length = kwl > length ? length : kwl;
2053 int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
2055 if (r == 0)
2057 int i;
2058 /* We've found a match if all the remaining characters are `$'. */
2059 for (i = min_length; i < length && name[i] == '$'; ++i)
2061 if (i == length)
2062 return 1;
2063 r = 1;
2066 if (r < 0)
2067 last = mid;
2068 else
2069 first = mid;
2071 return 0;
2073 #endif /* JC1_LITE */