2005-01-22 Thomas Koenig <Thomas.Koenig@online.de>
[official-gcc.git] / gcc / java / lex.c
blob712ffc2c589ad65a4a97439b78af857980b4c89b
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004
3 Free Software Foundation, Inc.
4 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA.
23 Java and all Java-based marks are trademarks or registered trademarks
24 of Sun Microsystems, Inc. in the United States and other countries.
25 The Free Software Foundation is independent of Sun Microsystems, Inc. */
27 /* It defines java_lex (yylex) that reads a Java ASCII source file
28 possibly containing Unicode escape sequence or utf8 encoded
29 characters and returns a token for everything found but comments,
30 white spaces and line terminators. When necessary, it also fills
31 the java_lval (yylval) union. It's implemented to be called by a
32 re-entrant parser generated by Bison.
34 The lexical analysis conforms to the Java grammar described in "The
35 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
36 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
38 #include "keyword.h"
39 #include "flags.h"
40 #include "chartables.h"
41 #ifndef JC1_LITE
42 #include "timevar.h"
43 #endif
45 /* Function declarations. */
46 static char *java_sprint_unicode (int);
47 static void java_unicode_2_utf8 (unicode_t);
48 static void java_lex_error (const char *, int);
49 #ifndef JC1_LITE
50 static int do_java_lex (YYSTYPE *);
51 static int java_lex (YYSTYPE *);
52 static int java_is_eol (FILE *, int);
53 static tree build_wfl_node (tree);
54 #endif
55 static int java_parse_escape_sequence (void);
56 static int java_start_char_p (unicode_t);
57 static int java_part_char_p (unicode_t);
58 static int java_space_char_p (unicode_t);
59 static void java_parse_doc_section (int);
60 static void java_parse_end_comment (int);
61 static int java_read_char (java_lexer *);
62 static int java_get_unicode (void);
63 static int java_peek_unicode (void);
64 static void java_next_unicode (void);
65 static int java_read_unicode (java_lexer *, int *);
66 #ifndef JC1_LITE
67 static int utf8_cmp (const unsigned char *, int, const char *);
68 #endif
70 java_lexer *java_new_lexer (FILE *, const char *);
71 #ifndef JC1_LITE
72 static void error_if_numeric_overflow (tree);
73 #endif
75 #ifdef HAVE_ICONV
76 /* This is nonzero if we have initialized `need_byteswap'. */
77 static int byteswap_init = 0;
79 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
80 big-endian order -- not native endian order. We handle this by
81 doing a conversion once at startup and seeing what happens. This
82 flag holds the results of this determination. */
83 static int need_byteswap = 0;
84 #endif
86 void
87 java_init_lex (FILE *finput, const char *encoding)
89 #ifndef JC1_LITE
90 int java_lang_imported = 0;
92 if (!java_lang_id)
93 java_lang_id = get_identifier ("java.lang");
94 if (!inst_id)
95 inst_id = get_identifier ("inst$");
96 if (!wpv_id)
97 wpv_id = get_identifier ("write_parm_value$");
99 if (!java_lang_imported)
101 tree node = build_tree_list (build_unknown_wfl (java_lang_id),
102 NULL_TREE);
103 read_import_dir (TREE_PURPOSE (node));
104 TREE_CHAIN (node) = ctxp->import_demand_list;
105 ctxp->import_demand_list = node;
106 java_lang_imported = 1;
109 if (!wfl_operator)
111 #ifdef USE_MAPPED_LOCATION
112 wfl_operator = build_expr_wfl (NULL_TREE, input_location);
113 #else
114 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
115 #endif
117 if (!label_id)
118 label_id = get_identifier ("$L");
119 if (!wfl_append)
120 wfl_append = build_unknown_wfl (get_identifier ("append"));
121 if (!wfl_string_buffer)
122 wfl_string_buffer =
123 build_unknown_wfl (get_identifier (flag_emit_class_files
124 ? "java.lang.StringBuffer"
125 : "gnu.gcj.runtime.StringBuffer"));
126 if (!wfl_to_string)
127 wfl_to_string = build_unknown_wfl (get_identifier ("toString"));
129 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
130 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
132 memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
133 ctxp->current_parsed_class = NULL;
134 ctxp->package = NULL_TREE;
135 #endif
137 ctxp->save_location = input_location;
138 ctxp->java_error_flag = 0;
139 ctxp->lexer = java_new_lexer (finput, encoding);
142 static char *
143 java_sprint_unicode (int c)
145 static char buffer [10];
146 if (c < ' ' || c >= 127)
147 sprintf (buffer, "\\u%04x", c);
148 else
150 buffer [0] = c;
151 buffer [1] = '\0';
153 return buffer;
156 /* Create a new lexer object. */
158 java_lexer *
159 java_new_lexer (FILE *finput, const char *encoding)
161 java_lexer *lex = xmalloc (sizeof (java_lexer));
162 int enc_error = 0;
164 lex->finput = finput;
165 lex->bs_count = 0;
166 lex->unget_value = 0;
167 lex->next_unicode = 0;
168 lex->avail_unicode = 0;
169 lex->next_columns = 1;
170 lex->encoding = encoding;
171 lex->position.line = 1;
172 lex->position.col = 1;
173 #ifndef JC1_LITE
174 #ifdef USE_MAPPED_LOCATION
175 input_location
176 = linemap_line_start (&line_table, 1, 120);
177 #else
178 input_line = 1;
179 #endif
180 #endif
182 #ifdef HAVE_ICONV
183 lex->handle = iconv_open ("UCS-2", encoding);
184 if (lex->handle != (iconv_t) -1)
186 lex->first = -1;
187 lex->last = -1;
188 lex->out_first = -1;
189 lex->out_last = -1;
190 lex->read_anything = 0;
191 lex->use_fallback = 0;
193 /* Work around broken iconv() implementations by doing checking at
194 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
195 then all UCS-2 encoders will be broken. Perhaps not a valid
196 assumption. */
197 if (! byteswap_init)
199 iconv_t handle;
201 byteswap_init = 1;
203 handle = iconv_open ("UCS-2", "UTF-8");
204 if (handle != (iconv_t) -1)
206 unicode_t result;
207 unsigned char in[3];
208 char *inp, *outp;
209 size_t inc, outc, r;
211 /* This is the UTF-8 encoding of \ufeff. */
212 in[0] = 0xef;
213 in[1] = 0xbb;
214 in[2] = 0xbf;
216 inp = (char *) in;
217 inc = 3;
218 outp = (char *) &result;
219 outc = 2;
221 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
222 &outp, &outc);
223 iconv_close (handle);
224 /* Conversion must be complete for us to use the result. */
225 if (r != (size_t) -1 && inc == 0 && outc == 0)
226 need_byteswap = (result != 0xfeff);
230 lex->byte_swap = need_byteswap;
232 else
233 #endif /* HAVE_ICONV */
235 /* If iconv failed, use the internal decoder if the default
236 encoding was requested. This code is used on platforms where
237 iconv exists but is insufficient for our needs. For
238 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
240 On Solaris the default encoding, as returned by nl_langinfo(),
241 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
242 understand that. We work around that by pretending
243 `646' to be the same as UTF-8. */
244 if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
245 enc_error = 1;
246 #ifdef HAVE_ICONV
247 else
249 lex->use_fallback = 1;
250 lex->encoding = "UTF-8";
252 #endif /* HAVE_ICONV */
255 if (enc_error)
256 fatal_error ("unknown encoding: %qs\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n%<--encoding=UTF-8%> option", encoding);
258 return lex;
261 void
262 java_destroy_lexer (java_lexer *lex)
264 #ifdef HAVE_ICONV
265 if (! lex->use_fallback)
266 iconv_close (lex->handle);
267 #endif
268 free (lex);
271 static int
272 java_read_char (java_lexer *lex)
274 #ifdef HAVE_ICONV
275 if (! lex->use_fallback)
277 size_t ir, inbytesleft, in_save, out_count, out_save;
278 char *inp, *outp;
279 unicode_t result;
281 /* If there is data which has already been converted, use it. */
282 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
284 lex->out_first = 0;
285 lex->out_last = 0;
287 while (1)
289 /* See if we need to read more data. If FIRST == 0 then
290 the previous conversion attempt ended in the middle of
291 a character at the end of the buffer. Otherwise we
292 only have to read if the buffer is empty. */
293 if (lex->first == 0 || lex->first >= lex->last)
295 int r;
297 if (lex->first >= lex->last)
299 lex->first = 0;
300 lex->last = 0;
302 if (feof (lex->finput))
303 return UEOF;
304 r = fread (&lex->buffer[lex->last], 1,
305 sizeof (lex->buffer) - lex->last,
306 lex->finput);
307 lex->last += r;
310 inbytesleft = lex->last - lex->first;
311 out_count = sizeof (lex->out_buffer) - lex->out_last;
313 if (inbytesleft == 0)
315 /* We've tried to read and there is nothing left. */
316 return UEOF;
319 in_save = inbytesleft;
320 out_save = out_count;
321 inp = &lex->buffer[lex->first];
322 outp = (char *) &lex->out_buffer[lex->out_last];
323 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
324 &inbytesleft, &outp, &out_count);
326 /* If we haven't read any bytes, then look to see if we
327 have read a BOM. */
328 if (! lex->read_anything && out_save - out_count >= 2)
330 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
331 if (uc == 0xfeff)
333 lex->byte_swap = 0;
334 lex->out_first += 2;
336 else if (uc == 0xfffe)
338 lex->byte_swap = 1;
339 lex->out_first += 2;
341 lex->read_anything = 1;
344 if (lex->byte_swap)
346 unsigned int i;
347 for (i = 0; i < out_save - out_count; i += 2)
349 char t = lex->out_buffer[lex->out_last + i];
350 lex->out_buffer[lex->out_last + i]
351 = lex->out_buffer[lex->out_last + i + 1];
352 lex->out_buffer[lex->out_last + i + 1] = t;
356 lex->first += in_save - inbytesleft;
357 lex->out_last += out_save - out_count;
359 /* If we converted anything at all, move along. */
360 if (out_count != out_save)
361 break;
363 if (ir == (size_t) -1)
365 if (errno == EINVAL)
367 /* This is ok. This means that the end of our buffer
368 is in the middle of a character sequence. We just
369 move the valid part of the buffer to the beginning
370 to force a read. */
371 memmove (&lex->buffer[0], &lex->buffer[lex->first],
372 lex->last - lex->first);
373 lex->last -= lex->first;
374 lex->first = 0;
376 else
378 /* A more serious error. */
379 char buffer[128];
380 sprintf (buffer,
381 "Unrecognized character for encoding '%s'",
382 lex->encoding);
383 java_lex_error (buffer, 0);
384 return UEOF;
390 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
392 /* Don't have any data. */
393 return UEOF;
396 /* Success. */
397 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
398 lex->out_first += 2;
399 return result;
401 else
402 #endif /* HAVE_ICONV */
404 int c, c1, c2;
405 c = getc (lex->finput);
407 if (c == EOF)
408 return UEOF;
409 if (c < 128)
410 return (unicode_t) c;
411 else
413 if ((c & 0xe0) == 0xc0)
415 c1 = getc (lex->finput);
416 if ((c1 & 0xc0) == 0x80)
418 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
419 /* Check for valid 2-byte characters. We explicitly
420 allow \0 because this encoding is common in the
421 Java world. */
422 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
423 return r;
426 else if ((c & 0xf0) == 0xe0)
428 c1 = getc (lex->finput);
429 if ((c1 & 0xc0) == 0x80)
431 c2 = getc (lex->finput);
432 if ((c2 & 0xc0) == 0x80)
434 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
435 (( c1 & 0x3f) << 6)
436 + (c2 & 0x3f));
437 /* Check for valid 3-byte characters.
438 Don't allow surrogate, \ufffe or \uffff. */
439 if (IN_RANGE (r, 0x800, 0xffff)
440 && ! IN_RANGE (r, 0xd800, 0xdfff)
441 && r != 0xfffe && r != 0xffff)
442 return r;
447 /* We simply don't support invalid characters. We also
448 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
449 cannot be valid Java characters. */
450 java_lex_error ("malformed UTF-8 character", 0);
454 /* We only get here on error. */
455 return UEOF;
458 static int
459 java_read_unicode (java_lexer *lex, int *unicode_escape_p)
461 int c;
463 if (lex->unget_value)
465 c = lex->unget_value;
466 lex->unget_value = 0;
468 else
469 c = java_read_char (lex);
471 *unicode_escape_p = 0;
473 if (c != '\\')
475 lex->bs_count = 0;
476 return c;
479 ++lex->bs_count;
480 if ((lex->bs_count) % 2 == 1)
482 /* Odd number of \ seen. */
483 c = java_read_char (lex);
484 if (c == 'u')
486 unicode_t unicode = 0;
487 int shift = 12;
489 /* Recognize any number of `u's in \u. */
490 while ((c = java_read_char (lex)) == 'u')
493 shift = 12;
496 if (c == UEOF)
498 java_lex_error ("prematurely terminated \\u sequence", 0);
499 return UEOF;
502 if (hex_p (c))
503 unicode |= (unicode_t)(hex_value (c) << shift);
504 else
506 java_lex_error ("non-hex digit in \\u sequence", 0);
507 break;
510 c = java_read_char (lex);
511 shift -= 4;
513 while (shift >= 0);
515 if (c != UEOF)
516 lex->unget_value = c;
518 lex->bs_count = 0;
519 *unicode_escape_p = 1;
520 return unicode;
522 lex->unget_value = c;
524 return (unicode_t) '\\';
527 /* Get the next Unicode character (post-Unicode-escape-handling).
528 Move the current position to just after returned character. */
530 static int
531 java_get_unicode (void)
533 int next = java_peek_unicode ();
534 java_next_unicode ();
535 return next;
538 /* Return the next Unicode character (post-Unicode-escape-handling).
539 Do not move the current position, which remains just before
540 the returned character. */
542 static int
543 java_peek_unicode (void)
545 int unicode_escape_p;
546 java_lexer *lex = ctxp->lexer;
547 int next;
549 if (lex->avail_unicode)
550 return lex->next_unicode;
552 next = java_read_unicode (lex, &unicode_escape_p);
554 if (next == '\r')
556 /* We have to read ahead to see if we got \r\n.
557 In that case we return a single line terminator. */
558 int dummy;
559 next = java_read_unicode (lex, &dummy);
560 if (next != '\n' && next != UEOF)
561 lex->unget_value = next;
562 /* In either case we must return a newline. */
563 next = '\n';
566 lex->next_unicode = next;
567 lex->avail_unicode = 1;
569 if (next == UEOF)
571 lex->next_columns = 0;
572 return next;
575 if (next == '\n')
577 lex->next_columns = 1 - lex->position.col;
579 else if (next == '\t')
581 int cur_col = lex->position.col;
582 lex->next_columns = ((cur_col + 7) & ~7) + 1 - cur_col;
585 else
587 lex->next_columns = 1;
589 if (unicode_escape_p)
590 lex->next_columns = 6;
591 return next;
594 /* Move forward one Unicode character (post-Unicode-escape-handling).
595 Only allowed after java_peek_unicode. The combination java_peek_unicode
596 followed by java_next_unicode is equivalent to java_get_unicode. */
598 static void java_next_unicode (void)
600 struct java_lexer *lex = ctxp->lexer;
601 lex->position.col += lex->next_columns;
602 if (lex->next_unicode == '\n')
604 lex->position.line++;
605 #ifndef JC1_LITE
606 #ifdef USE_MAPPED_LOCATION
607 input_location
608 = linemap_line_start (&line_table, lex->position.line, 120);
609 #else
610 input_line = lex->position.line;
611 #endif
612 #endif
614 lex->avail_unicode = 0;
617 #if 0
618 /* The inverse of java_next_unicode.
619 Not currently used, but could be if it would be cleaner or faster.
620 java_peek_unicode == java_get_unicode + java_unget_unicode.
621 java_get_unicode == java_peek_unicode + java_next_unicode.
623 static void java_unget_unicode ()
625 struct java_lexer *lex = ctxp->lexer;
626 if (lex->avail_unicode)
627 fatal_error ("internal error - bad unget");
628 lex->avail_unicode = 1;
629 lex->position.col -= lex->next_columns;
631 #endif
633 /* Parse the end of a C style comment.
634 * C is the first character following the '/' and '*'. */
635 static void
636 java_parse_end_comment (int c)
638 for ( ;; c = java_get_unicode ())
640 switch (c)
642 case UEOF:
643 java_lex_error ("Comment not terminated at end of input", 0);
644 return;
645 case '*':
646 switch (c = java_peek_unicode ())
648 case UEOF:
649 java_lex_error ("Comment not terminated at end of input", 0);
650 return;
651 case '/':
652 java_next_unicode ();
653 return;
654 case '*': /* Reparse only '*'. */
661 /* Parse the documentation section. Keywords must be at the beginning
662 of a documentation comment line (ignoring white space and any `*'
663 character). Parsed keyword(s): @DEPRECATED. */
665 static void
666 java_parse_doc_section (int c)
668 int last_was_star;
670 /* We reset this here, because only the most recent doc comment
671 applies to the following declaration. */
672 ctxp->deprecated = 0;
674 /* We loop over all the lines of the comment. We'll eventually exit
675 if we hit EOF prematurely, or when we see the comment
676 terminator. */
677 while (1)
679 /* These first steps need only be done if we're still looking
680 for the deprecated tag. If we've already seen it, we might
681 as well skip looking for it again. */
682 if (! ctxp->deprecated)
684 /* Skip whitespace and '*'s. We must also check for the end
685 of the comment here. */
686 while (JAVA_WHITE_SPACE_P (c) || c == '*')
688 last_was_star = (c == '*');
689 c = java_get_unicode ();
690 if (last_was_star && c == '/')
692 /* We just saw the comment terminator. */
693 return;
697 if (c == UEOF)
698 goto eof;
700 if (c == '@')
702 const char *deprecated = "@deprecated";
703 int i;
705 for (i = 0; deprecated[i]; ++i)
707 if (c != deprecated[i])
708 break;
709 /* We write the code in this way, with the
710 update at the end, so that after the loop
711 we're left with the next character in C. */
712 c = java_get_unicode ();
715 if (c == UEOF)
716 goto eof;
718 /* @deprecated must be followed by a space or newline.
719 We also allow a '*' in case it appears just before
720 the end of a comment. In this position only we also
721 must allow any Unicode space character. */
722 if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
724 if (! deprecated[i])
725 ctxp->deprecated = 1;
730 /* We've examined the relevant content from this line. Now we
731 skip the remaining characters and start over with the next
732 line. We also check for end of comment here. */
733 while (c != '\n' && c != UEOF)
735 last_was_star = (c == '*');
736 c = java_get_unicode ();
737 if (last_was_star && c == '/')
738 return;
741 if (c == UEOF)
742 goto eof;
743 /* We have to advance past the \n. */
744 c = java_get_unicode ();
745 if (c == UEOF)
746 goto eof;
749 eof:
750 java_lex_error ("Comment not terminated at end of input", 0);
753 /* Return true if C is a valid start character for a Java identifier.
754 This is only called if C >= 128 -- smaller values are handled
755 inline. However, this function handles all values anyway. */
756 static int
757 java_start_char_p (unicode_t c)
759 unsigned int hi = c / 256;
760 const char *const page = type_table[hi];
761 unsigned long val = (unsigned long) page;
762 int flags;
764 if ((val & ~ LETTER_MASK) != 0)
765 flags = page[c & 255];
766 else
767 flags = val;
769 return flags & LETTER_START;
772 /* Return true if C is a valid part character for a Java identifier.
773 This is only called if C >= 128 -- smaller values are handled
774 inline. However, this function handles all values anyway. */
775 static int
776 java_part_char_p (unicode_t c)
778 unsigned int hi = c / 256;
779 const char *const page = type_table[hi];
780 unsigned long val = (unsigned long) page;
781 int flags;
783 if ((val & ~ LETTER_MASK) != 0)
784 flags = page[c & 255];
785 else
786 flags = val;
788 return flags & LETTER_PART;
791 /* Return true if C is whitespace. */
792 static int
793 java_space_char_p (unicode_t c)
795 unsigned int hi = c / 256;
796 const char *const page = type_table[hi];
797 unsigned long val = (unsigned long) page;
798 int flags;
800 if ((val & ~ LETTER_MASK) != 0)
801 flags = page[c & 255];
802 else
803 flags = val;
805 return flags & LETTER_SPACE;
808 static int
809 java_parse_escape_sequence (void)
811 int c;
813 switch (c = java_get_unicode ())
815 case 'b':
816 return (unicode_t)0x8;
817 case 't':
818 return (unicode_t)0x9;
819 case 'n':
820 return (unicode_t)0xa;
821 case 'f':
822 return (unicode_t)0xc;
823 case 'r':
824 return (unicode_t)0xd;
825 case '"':
826 return (unicode_t)0x22;
827 case '\'':
828 return (unicode_t)0x27;
829 case '\\':
830 return (unicode_t)0x5c;
831 case '0': case '1': case '2': case '3': case '4':
832 case '5': case '6': case '7':
834 int more = 3;
835 unicode_t char_lit = 0;
837 if (c > '3')
839 /* According to the grammar, `\477' has a well-defined
840 meaning -- it is `\47' followed by `7'. */
841 --more;
843 char_lit = 0;
844 for (;;)
846 char_lit = 8 * char_lit + c - '0';
847 if (--more == 0)
848 break;
849 c = java_peek_unicode ();
850 if (! RANGE (c, '0', '7'))
851 break;
852 java_next_unicode ();
855 return char_lit;
857 default:
858 java_lex_error ("Invalid character in escape sequence", -1);
859 return JAVA_CHAR_ERROR;
863 #ifndef JC1_LITE
864 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
866 /* Subroutine of java_lex: converts floating-point literals to tree
867 nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
868 store the result. FFLAG indicates whether the literal was tagged
869 with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
870 is the line number on which to report any error. */
872 static void java_perform_atof (YYSTYPE *, char *, int, int);
874 static void
875 java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
876 int number_beginning)
878 REAL_VALUE_TYPE value;
879 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
881 SET_REAL_VALUE_ATOF (value,
882 REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
884 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
886 JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
887 value = DCONST0;
889 else if (IS_ZERO (value))
891 /* We check to see if the value is really 0 or if we've found an
892 underflow. We do this in the most primitive imaginable way. */
893 int really_zero = 1;
894 char *p = literal_token;
895 if (*p == '-')
896 ++p;
897 while (*p && *p != 'e' && *p != 'E')
899 if (*p != '0' && *p != '.')
901 really_zero = 0;
902 break;
904 ++p;
906 if (! really_zero)
908 int save_col = ctxp->lexer->position.col;
909 ctxp->lexer->position.col = number_beginning;
910 java_lex_error ("Floating point literal underflow", 0);
911 ctxp->lexer->position.col = save_col;
915 SET_LVAL_NODE (build_real (type, value));
917 #endif
919 static int yylex (YYSTYPE *);
921 static int
922 #ifdef JC1_LITE
923 yylex (YYSTYPE *java_lval)
924 #else
925 do_java_lex (YYSTYPE *java_lval)
926 #endif
928 int c;
929 char *string;
931 /* Translation of the Unicode escape in the raw stream of Unicode
932 characters. Takes care of line terminator. */
933 step1:
934 /* Skip white spaces: SP, TAB and FF or ULT. */
935 for (;;)
937 c = java_peek_unicode ();
938 if (c != '\n' && ! JAVA_WHITE_SPACE_P (c))
939 break;
940 java_next_unicode ();
943 /* Handle EOF here. */
944 if (c == UEOF) /* Should probably do something here... */
945 return 0;
947 #ifndef JC1_LITE
948 #ifdef USE_MAPPED_LOCATION
949 LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table,
950 ctxp->lexer->position.col);
951 #else
952 ctxp->lexer->token_start = ctxp->lexer->position;
953 #endif
954 #endif
956 /* Numeric literals. */
957 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
959 /* This section of code is borrowed from gcc/c-lex.c. */
960 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
961 int parts[TOTAL_PARTS];
962 HOST_WIDE_INT high, low;
963 /* End borrowed section. */
964 char literal_token [256];
965 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
966 int found_hex_digits = 0, found_non_octal_digits = -1;
967 int i;
968 #ifndef JC1_LITE
969 int number_beginning = ctxp->lexer->position.col;
970 tree value;
971 #endif
973 for (i = 0; i < TOTAL_PARTS; i++)
974 parts [i] = 0;
976 if (c == '0')
978 java_next_unicode ();
979 c = java_peek_unicode ();
980 if (c == 'x' || c == 'X')
982 radix = 16;
983 java_next_unicode ();
984 c = java_peek_unicode ();
986 else if (JAVA_ASCII_DIGIT (c))
988 literal_token [literal_index++] = '0';
989 radix = 8;
991 else if (c == '.' || c == 'e' || c =='E')
993 literal_token [literal_index++] = '0';
994 /* Handle C during floating-point parsing. */
996 else
998 /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
999 switch (c)
1001 case 'L': case 'l':
1002 java_next_unicode ();
1003 SET_LVAL_NODE (long_zero_node);
1004 return (INT_LIT_TK);
1005 case 'f': case 'F':
1006 java_next_unicode ();
1007 SET_LVAL_NODE (float_zero_node);
1008 return (FP_LIT_TK);
1009 case 'd': case 'D':
1010 java_next_unicode ();
1011 SET_LVAL_NODE (double_zero_node);
1012 return (FP_LIT_TK);
1013 default:
1014 SET_LVAL_NODE (integer_zero_node);
1015 return (INT_LIT_TK);
1019 /* Parse the first part of the literal, until we find something
1020 which is not a number. */
1021 while (radix == 16 ? JAVA_ASCII_HEXDIGIT (c) : JAVA_ASCII_DIGIT (c))
1023 /* We store in a string (in case it turns out to be a FP) and in
1024 PARTS if we have to process a integer literal. */
1025 int numeric = hex_value (c);
1026 int count;
1028 /* Remember when we find a valid hexadecimal digit. */
1029 if (radix == 16)
1030 found_hex_digits = 1;
1031 /* Remember when we find an invalid octal digit. */
1032 else if (radix == 8 && numeric >= 8 && found_non_octal_digits < 0)
1033 found_non_octal_digits = literal_index;
1035 literal_token [literal_index++] = c;
1036 /* This section of code if borrowed from gcc/c-lex.c. */
1037 for (count = 0; count < TOTAL_PARTS; count++)
1039 parts[count] *= radix;
1040 if (count)
1042 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1043 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1045 else
1046 parts[0] += numeric;
1048 if (parts [TOTAL_PARTS-1] != 0)
1049 overflow = 1;
1050 /* End borrowed section. */
1051 java_next_unicode ();
1052 c = java_peek_unicode ();
1055 /* If we have something from the FP char set but not a digit, parse
1056 a FP literal. */
1057 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1059 /* stage==0: seen digits only
1060 * stage==1: seen '.'
1061 * stage==2: seen 'e' or 'E'.
1062 * stage==3: seen '+' or '-' after 'e' or 'E'.
1063 * stage==4: seen type suffix ('f'/'F'/'d'/'D')
1065 int stage = 0;
1066 int seen_digit = (literal_index ? 1 : 0);
1067 int seen_exponent = 0;
1068 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1069 double unless specified. */
1071 /* It is ok if the radix is 8 because this just means we've
1072 seen a leading `0'. However, radix==16 is invalid. */
1073 if (radix == 16)
1074 java_lex_error ("Can't express non-decimal FP literal", 0);
1075 radix = 10;
1077 for (;;)
1079 if (c == '.')
1081 if (stage < 1)
1083 stage = 1;
1084 literal_token [literal_index++ ] = c;
1085 java_next_unicode ();
1086 c = java_peek_unicode ();
1087 if (literal_index == 1 && !JAVA_ASCII_DIGIT (c))
1088 BUILD_OPERATOR (DOT_TK);
1090 else
1091 java_lex_error ("Invalid character in FP literal", 0);
1094 if (c == 'e' || c == 'E')
1096 if (stage < 2)
1098 /* {E,e} must have seen at least a digit. */
1099 if (!seen_digit)
1100 java_lex_error
1101 ("Invalid FP literal, mantissa must have digit", 0);
1102 seen_digit = 0;
1103 seen_exponent = 1;
1104 stage = 2;
1105 literal_token [literal_index++] = c;
1106 java_next_unicode ();
1107 c = java_peek_unicode ();
1109 else
1110 java_lex_error ("Invalid character in FP literal", 0);
1112 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1114 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1115 stage = 4; /* So we fall through. */
1118 if ((c=='-' || c =='+') && stage == 2)
1120 stage = 3;
1121 literal_token [literal_index++] = c;
1122 java_next_unicode ();
1123 c = java_peek_unicode ();
1126 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1127 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1128 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1129 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1131 if (JAVA_ASCII_DIGIT (c))
1132 seen_digit = 1;
1133 if (stage == 2)
1134 stage = 3;
1135 literal_token [literal_index++ ] = c;
1136 java_next_unicode ();
1137 c = java_peek_unicode ();
1139 else
1141 if (stage == 4) /* Don't push back fF/dD. */
1142 java_next_unicode ();
1144 /* An exponent (if any) must have seen a digit. */
1145 if (seen_exponent && !seen_digit)
1146 java_lex_error
1147 ("Invalid FP literal, exponent must have digit", 0);
1149 literal_token [literal_index] = '\0';
1151 #ifndef JC1_LITE
1152 java_perform_atof (java_lval, literal_token,
1153 fflag, number_beginning);
1154 #endif
1155 return FP_LIT_TK;
1158 } /* JAVA_ASCII_FPCHAR (c) */
1160 /* Here we get back to converting the integral literal. */
1161 if (radix == 16 && ! found_hex_digits)
1162 java_lex_error
1163 ("0x must be followed by at least one hexadecimal digit", 0);
1164 else if (radix == 8 && found_non_octal_digits >= 0)
1166 int back = literal_index - found_non_octal_digits;
1167 ctxp->lexer->position.col -= back;
1168 java_lex_error ("Octal literal contains digit out of range", 0);
1169 ctxp->lexer->position.col += back;
1171 else if (c == 'L' || c == 'l')
1173 java_next_unicode ();
1174 long_suffix = 1;
1177 /* This section of code is borrowed from gcc/c-lex.c. */
1178 if (!overflow)
1180 bytes = GET_TYPE_PRECISION (long_type_node);
1181 for (i = bytes; i < TOTAL_PARTS; i++)
1182 if (parts [i])
1184 overflow = 1;
1185 break;
1188 high = low = 0;
1189 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1191 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1192 / HOST_BITS_PER_CHAR)]
1193 << (i * HOST_BITS_PER_CHAR));
1194 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1196 /* End borrowed section. */
1198 #ifndef JC1_LITE
1199 /* Range checking. */
1200 /* Temporarily set type to unsigned. */
1201 value = build_int_cst_wide (long_suffix
1202 ? unsigned_long_type_node
1203 : unsigned_int_type_node, low, high);
1204 SET_LVAL_NODE (value);
1206 /* For base 10 numbers, only values up to the highest value
1207 (plus one) can be written. For instance, only ints up to
1208 2147483648 can be written. The special case of the largest
1209 negative value is handled elsewhere. For other bases, any
1210 number can be represented. */
1211 if (overflow || (radix == 10
1212 && tree_int_cst_lt (long_suffix
1213 ? decimal_long_max
1214 : decimal_int_max,
1215 value)))
1217 if (long_suffix)
1218 JAVA_RANGE_ERROR ("Numeric overflow for 'long' literal");
1219 else
1220 JAVA_RANGE_ERROR ("Numeric overflow for 'int' literal");
1223 /* Sign extend the value. */
1224 value = build_int_cst_wide (long_suffix ? long_type_node : int_type_node,
1225 low, high);
1226 value = force_fit_type (value, 0, false, false);
1228 if (radix != 10)
1230 value = copy_node (value);
1231 JAVA_NOT_RADIX10_FLAG (value) = 1;
1234 SET_LVAL_NODE (value);
1235 #endif
1236 return INT_LIT_TK;
1239 /* We may have an ID here. */
1240 if (JAVA_START_CHAR_P (c))
1242 int ascii_index = 0, all_ascii = 1;
1244 /* Keyword, boolean literal or null literal. */
1245 while (c != UEOF && JAVA_PART_CHAR_P (c))
1247 java_unicode_2_utf8 (c);
1248 if (c >= 128)
1249 all_ascii = 0;
1250 java_next_unicode ();
1251 ascii_index++;
1252 c = java_peek_unicode ();
1255 obstack_1grow (&temporary_obstack, '\0');
1256 string = obstack_finish (&temporary_obstack);
1258 /* If we have something all ascii, we consider a keyword, a boolean
1259 literal, a null literal or an all ASCII identifier. Otherwise,
1260 this is an identifier (possibly not respecting formation rule). */
1261 if (all_ascii)
1263 const struct java_keyword *kw;
1264 if ((kw=java_keyword (string, ascii_index)))
1266 switch (kw->token)
1268 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1269 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1270 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1271 case PRIVATE_TK: case STRICT_TK:
1272 SET_MODIFIER_CTX (kw->token);
1273 return MODIFIER_TK;
1274 case FLOAT_TK:
1275 SET_LVAL_NODE (float_type_node);
1276 return FP_TK;
1277 case DOUBLE_TK:
1278 SET_LVAL_NODE (double_type_node);
1279 return FP_TK;
1280 case BOOLEAN_TK:
1281 SET_LVAL_NODE (boolean_type_node);
1282 return BOOLEAN_TK;
1283 case BYTE_TK:
1284 SET_LVAL_NODE (byte_type_node);
1285 return INTEGRAL_TK;
1286 case SHORT_TK:
1287 SET_LVAL_NODE (short_type_node);
1288 return INTEGRAL_TK;
1289 case INT_TK:
1290 SET_LVAL_NODE (int_type_node);
1291 return INTEGRAL_TK;
1292 case LONG_TK:
1293 SET_LVAL_NODE (long_type_node);
1294 return INTEGRAL_TK;
1295 case CHAR_TK:
1296 SET_LVAL_NODE (char_type_node);
1297 return INTEGRAL_TK;
1299 /* Keyword based literals. */
1300 case TRUE_TK:
1301 case FALSE_TK:
1302 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1303 boolean_true_node : boolean_false_node));
1304 return BOOL_LIT_TK;
1305 case NULL_TK:
1306 SET_LVAL_NODE (null_pointer_node);
1307 return NULL_TK;
1309 case ASSERT_TK:
1310 if (flag_assert)
1312 BUILD_OPERATOR (kw->token);
1313 return kw->token;
1315 else
1316 break;
1318 /* Some keyword we want to retain information on the location
1319 they where found. */
1320 case CASE_TK:
1321 case DEFAULT_TK:
1322 case SUPER_TK:
1323 case THIS_TK:
1324 case RETURN_TK:
1325 case BREAK_TK:
1326 case CONTINUE_TK:
1327 case TRY_TK:
1328 case CATCH_TK:
1329 case THROW_TK:
1330 case INSTANCEOF_TK:
1331 BUILD_OPERATOR (kw->token);
1333 default:
1334 return kw->token;
1339 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1340 return ID_TK;
1343 java_next_unicode ();
1345 /* Character literals. */
1346 if (c == '\'')
1348 int char_lit;
1350 if ((c = java_get_unicode ()) == '\\')
1351 char_lit = java_parse_escape_sequence ();
1352 else
1354 if (c == '\n' || c == '\'')
1355 java_lex_error ("Invalid character literal", 0);
1356 char_lit = c;
1359 c = java_get_unicode ();
1361 if ((c == '\n') || (c == UEOF))
1362 java_lex_error ("Character literal not terminated at end of line", 0);
1363 if (c != '\'')
1364 java_lex_error ("Syntax error in character literal", 0);
1366 if (char_lit == JAVA_CHAR_ERROR)
1367 char_lit = 0; /* We silently convert it to zero. */
1369 SET_LVAL_NODE (build_int_cst (char_type_node, char_lit));
1370 return CHAR_LIT_TK;
1373 /* String literals. */
1374 if (c == '"')
1376 int no_error = 1;
1377 char *string;
1379 for (;;)
1381 c = java_peek_unicode ();
1382 if (c == '\n' || c == UEOF) /* ULT. */
1384 java_lex_error ("String not terminated at end of line", 0);
1385 break;
1387 java_next_unicode ();
1388 if (c == '"')
1389 break;
1390 if (c == '\\')
1391 c = java_parse_escape_sequence ();
1392 if (c == JAVA_CHAR_ERROR)
1394 no_error = 0;
1395 c = 0; /* We silently convert it to zero. */
1397 java_unicode_2_utf8 (c);
1400 obstack_1grow (&temporary_obstack, '\0');
1401 string = obstack_finish (&temporary_obstack);
1402 #ifndef JC1_LITE
1403 if (!no_error || (c != '"'))
1404 java_lval->node = error_mark_node; /* FIXME: Requires further
1405 testing. */
1406 else
1407 java_lval->node = build_string (strlen (string), string);
1408 #endif
1409 obstack_free (&temporary_obstack, string);
1410 return STRING_LIT_TK;
1413 switch (c)
1415 case '/':
1416 /* Check for comment. */
1417 switch (c = java_peek_unicode ())
1419 case '/':
1420 java_next_unicode ();
1421 for (;;)
1423 c = java_get_unicode ();
1424 if (c == UEOF)
1426 /* It is ok to end a `//' comment with EOF, unless
1427 we're being pedantic. */
1428 if (pedantic)
1429 java_lex_error ("Comment not terminated at end of input",
1431 return 0;
1433 if (c == '\n') /* ULT */
1434 goto step1;
1436 break;
1438 case '*':
1439 java_next_unicode ();
1440 if ((c = java_get_unicode ()) == '*')
1442 c = java_get_unicode ();
1443 if (c == '/')
1445 /* Empty documentation comment. We have to reset
1446 the deprecation marker as only the most recent
1447 doc comment applies. */
1448 ctxp->deprecated = 0;
1450 else
1451 java_parse_doc_section (c);
1453 else
1454 java_parse_end_comment ((c = java_get_unicode ()));
1455 goto step1;
1456 break;
1458 case '=':
1459 java_next_unicode ();
1460 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1462 default:
1463 BUILD_OPERATOR (DIV_TK);
1466 case '(':
1467 BUILD_OPERATOR (OP_TK);
1468 case ')':
1469 return CP_TK;
1470 case '{':
1471 #ifndef JC1_LITE
1472 java_lval->operator.token = OCB_TK;
1473 java_lval->operator.location = BUILD_LOCATION();
1474 #endif
1475 #ifdef USE_MAPPED_LOCATION
1476 if (ctxp->ccb_indent == 1)
1477 ctxp->first_ccb_indent1 = input_location;
1478 #else
1479 if (ctxp->ccb_indent == 1)
1480 ctxp->first_ccb_indent1 = input_line;
1481 #endif
1482 ctxp->ccb_indent++;
1483 return OCB_TK;
1484 case '}':
1485 #ifndef JC1_LITE
1486 java_lval->operator.token = CCB_TK;
1487 java_lval->operator.location = BUILD_LOCATION();
1488 #endif
1489 ctxp->ccb_indent--;
1490 #ifdef USE_MAPPED_LOCATION
1491 if (ctxp->ccb_indent == 1)
1492 ctxp->last_ccb_indent1 = input_location;
1493 #else
1494 if (ctxp->ccb_indent == 1)
1495 ctxp->last_ccb_indent1 = input_line;
1496 #endif
1497 return CCB_TK;
1498 case '[':
1499 BUILD_OPERATOR (OSB_TK);
1500 case ']':
1501 return CSB_TK;
1502 case ';':
1503 return SC_TK;
1504 case ',':
1505 return C_TK;
1506 case '.':
1507 BUILD_OPERATOR (DOT_TK);
1509 /* Operators. */
1510 case '=':
1511 c = java_peek_unicode ();
1512 if (c == '=')
1514 java_next_unicode ();
1515 BUILD_OPERATOR (EQ_TK);
1517 else
1519 /* Equals is used in two different locations. In the
1520 variable_declarator: rule, it has to be seen as '=' as opposed
1521 to being seen as an ordinary assignment operator in
1522 assignment_operators: rule. */
1523 BUILD_OPERATOR (ASSIGN_TK);
1526 case '>':
1527 switch ((c = java_peek_unicode ()))
1529 case '=':
1530 java_next_unicode ();
1531 BUILD_OPERATOR (GTE_TK);
1532 case '>':
1533 java_next_unicode ();
1534 switch ((c = java_peek_unicode ()))
1536 case '>':
1537 java_next_unicode ();
1538 c = java_peek_unicode ();
1539 if (c == '=')
1541 java_next_unicode ();
1542 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1544 else
1546 BUILD_OPERATOR (ZRS_TK);
1548 case '=':
1549 java_next_unicode ();
1550 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1551 default:
1552 BUILD_OPERATOR (SRS_TK);
1554 default:
1555 BUILD_OPERATOR (GT_TK);
1558 case '<':
1559 switch ((c = java_peek_unicode ()))
1561 case '=':
1562 java_next_unicode ();
1563 BUILD_OPERATOR (LTE_TK);
1564 case '<':
1565 java_next_unicode ();
1566 if ((c = java_peek_unicode ()) == '=')
1568 java_next_unicode ();
1569 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1571 else
1573 BUILD_OPERATOR (LS_TK);
1575 default:
1576 BUILD_OPERATOR (LT_TK);
1579 case '&':
1580 switch ((c = java_peek_unicode ()))
1582 case '&':
1583 java_next_unicode ();
1584 BUILD_OPERATOR (BOOL_AND_TK);
1585 case '=':
1586 java_next_unicode ();
1587 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1588 default:
1589 BUILD_OPERATOR (AND_TK);
1592 case '|':
1593 switch ((c = java_peek_unicode ()))
1595 case '|':
1596 java_next_unicode ();
1597 BUILD_OPERATOR (BOOL_OR_TK);
1598 case '=':
1599 java_next_unicode ();
1600 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1601 default:
1602 BUILD_OPERATOR (OR_TK);
1605 case '+':
1606 switch ((c = java_peek_unicode ()))
1608 case '+':
1609 java_next_unicode ();
1610 BUILD_OPERATOR (INCR_TK);
1611 case '=':
1612 java_next_unicode ();
1613 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1614 default:
1615 BUILD_OPERATOR (PLUS_TK);
1618 case '-':
1619 switch ((c = java_peek_unicode ()))
1621 case '-':
1622 java_next_unicode ();
1623 BUILD_OPERATOR (DECR_TK);
1624 case '=':
1625 java_next_unicode ();
1626 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1627 default:
1628 BUILD_OPERATOR (MINUS_TK);
1631 case '*':
1632 if ((c = java_peek_unicode ()) == '=')
1634 java_next_unicode ();
1635 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1637 else
1639 BUILD_OPERATOR (MULT_TK);
1642 case '^':
1643 if ((c = java_peek_unicode ()) == '=')
1645 java_next_unicode ();
1646 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1648 else
1650 BUILD_OPERATOR (XOR_TK);
1653 case '%':
1654 if ((c = java_peek_unicode ()) == '=')
1656 java_next_unicode ();
1657 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1659 else
1661 BUILD_OPERATOR (REM_TK);
1664 case '!':
1665 if ((c = java_peek_unicode()) == '=')
1667 java_next_unicode ();
1668 BUILD_OPERATOR (NEQ_TK);
1670 else
1672 BUILD_OPERATOR (NEG_TK);
1675 case '?':
1676 BUILD_OPERATOR (REL_QM_TK);
1677 case ':':
1678 BUILD_OPERATOR (REL_CL_TK);
1679 case '~':
1680 BUILD_OPERATOR (NOT_TK);
1683 if (c == 0x1a) /* CTRL-Z. */
1685 if ((c = java_peek_unicode ()) == UEOF)
1686 return 0; /* Ok here. */
1689 /* Everything else is an invalid character in the input. */
1691 char lex_error_buffer [128];
1692 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1693 java_sprint_unicode (c));
1694 java_lex_error (lex_error_buffer, -1);
1696 return 0;
1699 #ifndef JC1_LITE
1701 /* The exported interface to the lexer. */
1702 static int
1703 java_lex (YYSTYPE *java_lval)
1705 int r;
1707 timevar_push (TV_LEX);
1708 r = do_java_lex (java_lval);
1709 timevar_pop (TV_LEX);
1710 return r;
1713 /* This is called by the parser to see if an error should be generated
1714 due to numeric overflow. This function only handles the particular
1715 case of the largest negative value, and is only called in the case
1716 where this value is not preceded by `-'. */
1717 static void
1718 error_if_numeric_overflow (tree value)
1720 if (TREE_CODE (value) == INTEGER_CST
1721 && !JAVA_NOT_RADIX10_FLAG (value)
1722 && tree_int_cst_sgn (value) < 0)
1724 if (TREE_TYPE (value) == long_type_node)
1725 java_lex_error ("Numeric overflow for 'long' literal", 0);
1726 else
1727 java_lex_error ("Numeric overflow for 'int' literal", 0);
1731 #endif /* JC1_LITE */
1733 static void
1734 java_unicode_2_utf8 (unicode_t unicode)
1736 if (RANGE (unicode, 0x01, 0x7f))
1737 obstack_1grow (&temporary_obstack, (char)unicode);
1738 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1740 obstack_1grow (&temporary_obstack,
1741 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1742 obstack_1grow (&temporary_obstack,
1743 (unsigned char)(0x80 | (unicode & 0x3f)));
1745 else /* Range 0x800-0xffff. */
1747 obstack_1grow (&temporary_obstack,
1748 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1749 obstack_1grow (&temporary_obstack,
1750 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1751 obstack_1grow (&temporary_obstack,
1752 (unsigned char)(0x80 | (unicode & 0x003f)));
1756 #ifndef JC1_LITE
1757 static tree
1758 build_wfl_node (tree node)
1760 #ifdef USE_MAPPED_LOCATION
1761 node = build_expr_wfl (node, input_location);
1762 #else
1763 node = build_expr_wfl (node, ctxp->filename,
1764 ctxp->lexer->token_start.line,
1765 ctxp->lexer->token_start.col);
1766 #endif
1767 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1768 TREE_TYPE (node) = NULL_TREE;
1769 return node;
1771 #endif
1773 static void
1774 java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
1776 #ifndef JC1_LITE
1777 int col = (ctxp->lexer->position.col
1778 + forward * ctxp->lexer->next_columns);
1779 #if USE_MAPPED_LOCATION
1780 source_location save_location = input_location;
1781 LINEMAP_POSITION_FOR_COLUMN (input_location, &line_table, col);
1783 /* Might be caught in the middle of some error report. */
1784 ctxp->java_error_flag = 0;
1785 java_error (NULL);
1786 java_error (msg);
1787 input_location = save_location;
1788 #else
1789 java_lc save = ctxp->lexer->token_start;
1790 ctxp->lexer->token_start.line = ctxp->lexer->position.line;
1791 ctxp->lexer->token_start.col = col;
1793 /* Might be caught in the middle of some error report. */
1794 ctxp->java_error_flag = 0;
1795 java_error (NULL);
1796 java_error (msg);
1797 ctxp->lexer->token_start = save;
1798 #endif
1799 #endif
1802 #ifndef JC1_LITE
1803 static int
1804 java_is_eol (FILE *fp, int c)
1806 int next;
1807 switch (c)
1809 case '\r':
1810 next = getc (fp);
1811 if (next != '\n' && next != EOF)
1812 ungetc (next, fp);
1813 return 1;
1814 case '\n':
1815 return 1;
1816 default:
1817 return 0;
1820 #endif
1822 char *
1823 java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
1824 int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
1826 #ifdef JC1_LITE
1827 return 0;
1828 #else
1829 /* Dumb implementation. Doesn't try to cache or optimize things. */
1830 /* First line of the file is line 1, first column is 1. */
1832 /* COL == -1 means, at the CR/LF in LINE. */
1833 /* COL == -2 means, at the first non space char in LINE. */
1835 FILE *fp;
1836 int c, ccol, cline = 1;
1837 int current_line_col = 0;
1838 int first_non_space = 0;
1839 char *base;
1841 if (!(fp = fopen (filename, "r")))
1842 fatal_error ("can't open %s: %m", filename);
1844 while (cline != line)
1846 c = getc (fp);
1847 if (c == EOF)
1849 static const char msg[] = "<<file too short - unexpected EOF>>";
1850 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1851 goto have_line;
1853 if (java_is_eol (fp, c))
1854 cline++;
1857 /* Gather the chars of the current line in a buffer. */
1858 for (;;)
1860 c = getc (fp);
1861 if (c < 0 || java_is_eol (fp, c))
1862 break;
1863 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1864 first_non_space = current_line_col;
1865 obstack_1grow (&temporary_obstack, c);
1866 current_line_col++;
1868 have_line:
1870 obstack_1grow (&temporary_obstack, '\n');
1872 if (col == -1)
1874 col = current_line_col;
1875 first_non_space = 0;
1877 else if (col == -2)
1878 col = first_non_space;
1879 else
1880 first_non_space = 0;
1882 /* Place the '^' a the right position. */
1883 base = obstack_base (&temporary_obstack);
1884 for (col += 2, ccol = 0; ccol < col; ccol++)
1886 /* Compute \t when reaching first_non_space. */
1887 char c = (first_non_space ?
1888 (base [ccol] == '\t' ? '\t' : ' ') : ' ');
1889 obstack_1grow (&temporary_obstack, c);
1891 obstack_grow0 (&temporary_obstack, "^", 1);
1893 fclose (fp);
1894 return obstack_finish (&temporary_obstack);
1895 #endif
1898 #ifndef JC1_LITE
1899 static int
1900 utf8_cmp (const unsigned char *str, int length, const char *name)
1902 const unsigned char *limit = str + length;
1903 int i;
1905 for (i = 0; name[i]; ++i)
1907 int ch = UTF8_GET (str, limit);
1908 if (ch != name[i])
1909 return ch - name[i];
1912 return str == limit ? 0 : 1;
1915 /* A sorted list of all C++ keywords. */
1917 static const char *const cxx_keywords[] =
1919 "_Complex",
1920 "__alignof",
1921 "__alignof__",
1922 "__asm",
1923 "__asm__",
1924 "__attribute",
1925 "__attribute__",
1926 "__builtin_va_arg",
1927 "__complex",
1928 "__complex__",
1929 "__const",
1930 "__const__",
1931 "__extension__",
1932 "__imag",
1933 "__imag__",
1934 "__inline",
1935 "__inline__",
1936 "__label__",
1937 "__null",
1938 "__real",
1939 "__real__",
1940 "__restrict",
1941 "__restrict__",
1942 "__signed",
1943 "__signed__",
1944 "__typeof",
1945 "__typeof__",
1946 "__volatile",
1947 "__volatile__",
1948 "and",
1949 "and_eq",
1950 "asm",
1951 "auto",
1952 "bitand",
1953 "bitor",
1954 "bool",
1955 "break",
1956 "case",
1957 "catch",
1958 "char",
1959 "class",
1960 "compl",
1961 "const",
1962 "const_cast",
1963 "continue",
1964 "default",
1965 "delete",
1966 "do",
1967 "double",
1968 "dynamic_cast",
1969 "else",
1970 "enum",
1971 "explicit",
1972 "export",
1973 "extern",
1974 "false",
1975 "float",
1976 "for",
1977 "friend",
1978 "goto",
1979 "if",
1980 "inline",
1981 "int",
1982 "long",
1983 "mutable",
1984 "namespace",
1985 "new",
1986 "not",
1987 "not_eq",
1988 "operator",
1989 "or",
1990 "or_eq",
1991 "private",
1992 "protected",
1993 "public",
1994 "register",
1995 "reinterpret_cast",
1996 "return",
1997 "short",
1998 "signed",
1999 "sizeof",
2000 "static",
2001 "static_cast",
2002 "struct",
2003 "switch",
2004 "template",
2005 "this",
2006 "throw",
2007 "true",
2008 "try",
2009 "typedef",
2010 "typeid",
2011 "typename",
2012 "typeof",
2013 "union",
2014 "unsigned",
2015 "using",
2016 "virtual",
2017 "void",
2018 "volatile",
2019 "wchar_t",
2020 "while",
2021 "xor",
2022 "xor_eq"
2025 /* Return true if NAME is a C++ keyword. */
2028 cxx_keyword_p (const char *name, int length)
2030 int last = ARRAY_SIZE (cxx_keywords);
2031 int first = 0;
2032 int mid = (last + first) / 2;
2033 int old = -1;
2035 for (mid = (last + first) / 2;
2036 mid != old;
2037 old = mid, mid = (last + first) / 2)
2039 int kwl = strlen (cxx_keywords[mid]);
2040 int min_length = kwl > length ? length : kwl;
2041 int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
2043 if (r == 0)
2045 int i;
2046 /* We've found a match if all the remaining characters are `$'. */
2047 for (i = min_length; i < length && name[i] == '$'; ++i)
2049 if (i == length)
2050 return 1;
2051 r = 1;
2054 if (r < 0)
2055 last = mid;
2056 else
2057 first = mid;
2059 return 0;
2061 #endif /* JC1_LITE */