boehm.c (set_bit): Improve type safety wrt unsignedness.
[official-gcc.git] / gcc / java / lex.c
blob77e38f898485707443b1cdc41d1500e4cb43e1c4
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA.
23 Java and all Java-based marks are trademarks or registered trademarks
24 of Sun Microsystems, Inc. in the United States and other countries.
25 The Free Software Foundation is independent of Sun Microsystems, Inc. */
27 /* It defines java_lex (yylex) that reads a Java ASCII source file
28 possibly containing Unicode escape sequence or utf8 encoded
29 characters and returns a token for everything found but comments,
30 white spaces and line terminators. When necessary, it also fills
31 the java_lval (yylval) union. It's implemented to be called by a
32 re-entrant parser generated by Bison.
34 The lexical analysis conforms to the Java grammar described in "The
35 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
36 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
38 #include "keyword.h"
39 #include "flags.h"
40 #include "chartables.h"
41 #ifndef JC1_LITE
42 #include "timevar.h"
43 #endif
45 /* Function declarations. */
46 static char *java_sprint_unicode (struct java_line *, int);
47 static void java_unicode_2_utf8 (unicode_t);
48 static void java_lex_error (const char *, int);
49 #ifndef JC1_LITE
50 static int do_java_lex (YYSTYPE *);
51 static int java_lex (YYSTYPE *);
52 static int java_is_eol (FILE *, int);
53 static tree build_wfl_node (tree);
54 #endif
55 static void java_store_unicode (struct java_line *, unicode_t, int);
56 static int java_parse_escape_sequence (void);
57 static int java_start_char_p (unicode_t);
58 static int java_part_char_p (unicode_t);
59 static int java_space_char_p (unicode_t);
60 static void java_parse_doc_section (int);
61 static void java_parse_end_comment (int);
62 static int java_get_unicode (void);
63 static int java_read_unicode (java_lexer *, int *);
64 static int java_read_unicode_collapsing_terminators (java_lexer *, int *);
65 static void java_store_unicode (struct java_line *, unicode_t, int);
66 static int java_read_char (java_lexer *);
67 static void java_allocate_new_line (void);
68 static void java_unget_unicode (void);
69 static unicode_t java_sneak_unicode (void);
70 #ifndef JC1_LITE
71 static int utf8_cmp (const unsigned char *, int, const char *);
72 #endif
74 java_lexer *java_new_lexer (FILE *, const char *);
75 #ifndef JC1_LITE
76 static void error_if_numeric_overflow (tree);
77 #endif
79 #ifdef HAVE_ICONV
80 /* This is nonzero if we have initialized `need_byteswap'. */
81 static int byteswap_init = 0;
83 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
84 big-endian order -- not native endian order. We handle this by
85 doing a conversion once at startup and seeing what happens. This
86 flag holds the results of this determination. */
87 static int need_byteswap = 0;
88 #endif
90 void
91 java_init_lex (FILE *finput, const char *encoding)
93 #ifndef JC1_LITE
94 int java_lang_imported = 0;
96 if (!java_lang_id)
97 java_lang_id = get_identifier ("java.lang");
98 if (!inst_id)
99 inst_id = get_identifier ("inst$");
100 if (!wpv_id)
101 wpv_id = get_identifier ("write_parm_value$");
103 if (!java_lang_imported)
105 tree node = build_tree_list
106 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
107 read_import_dir (TREE_PURPOSE (node));
108 TREE_CHAIN (node) = ctxp->import_demand_list;
109 ctxp->import_demand_list = node;
110 java_lang_imported = 1;
113 if (!wfl_operator)
114 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
115 if (!label_id)
116 label_id = get_identifier ("$L");
117 if (!wfl_append)
118 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
119 if (!wfl_string_buffer)
120 wfl_string_buffer =
121 build_expr_wfl (get_identifier (flag_emit_class_files
122 ? "java.lang.StringBuffer"
123 : "gnu.gcj.runtime.StringBuffer"),
124 NULL, 0, 0);
125 if (!wfl_to_string)
126 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
128 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
129 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
131 memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
132 current_jcf = ggc_alloc_cleared (sizeof (JCF));
133 ctxp->current_parsed_class = NULL;
134 ctxp->package = NULL_TREE;
135 #endif
137 ctxp->filename = input_filename;
138 ctxp->lineno = input_line = 0;
139 ctxp->p_line = NULL;
140 ctxp->c_line = NULL;
141 ctxp->java_error_flag = 0;
142 ctxp->lexer = java_new_lexer (finput, encoding);
145 static char *
146 java_sprint_unicode (struct java_line *line, int i)
148 static char buffer [10];
149 if (line->unicode_escape_p [i] || line->line [i] > 128)
150 sprintf (buffer, "\\u%04x", line->line [i]);
151 else
153 buffer [0] = line->line [i];
154 buffer [1] = '\0';
156 return buffer;
159 static unicode_t
160 java_sneak_unicode (void)
162 return (ctxp->c_line->line [ctxp->c_line->current]);
165 static void
166 java_unget_unicode (void)
168 if (!ctxp->c_line->current)
169 /* Can't unget unicode. */
170 abort ();
172 ctxp->c_line->current--;
173 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
176 static void
177 java_allocate_new_line (void)
179 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
180 char ahead_escape_p = (ctxp->c_line ?
181 ctxp->c_line->unicode_escape_ahead_p : 0);
183 if (ctxp->c_line && !ctxp->c_line->white_space_only)
185 if (ctxp->p_line)
187 free (ctxp->p_line->unicode_escape_p);
188 free (ctxp->p_line->line);
189 free (ctxp->p_line);
191 ctxp->p_line = ctxp->c_line;
192 ctxp->c_line = NULL; /* Reallocated. */
195 if (!ctxp->c_line)
197 ctxp->c_line = xmalloc (sizeof (struct java_line));
198 ctxp->c_line->max = JAVA_LINE_MAX;
199 ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
200 ctxp->c_line->unicode_escape_p =
201 xmalloc (sizeof (char)*ctxp->c_line->max);
202 ctxp->c_line->white_space_only = 0;
205 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
206 ctxp->c_line->char_col = ctxp->c_line->current = 0;
207 if (ahead)
209 ctxp->c_line->line [ctxp->c_line->size] = ahead;
210 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
211 ctxp->c_line->size++;
213 ctxp->c_line->ahead [0] = 0;
214 ctxp->c_line->unicode_escape_ahead_p = 0;
215 ctxp->c_line->lineno = ++input_line;
216 ctxp->c_line->white_space_only = 1;
219 /* Create a new lexer object. */
221 java_lexer *
222 java_new_lexer (FILE *finput, const char *encoding)
224 java_lexer *lex = xmalloc (sizeof (java_lexer));
225 int enc_error = 0;
227 lex->finput = finput;
228 lex->bs_count = 0;
229 lex->unget_value = 0;
230 lex->hit_eof = 0;
231 lex->encoding = encoding;
233 #ifdef HAVE_ICONV
234 lex->handle = iconv_open ("UCS-2", encoding);
235 if (lex->handle != (iconv_t) -1)
237 lex->first = -1;
238 lex->last = -1;
239 lex->out_first = -1;
240 lex->out_last = -1;
241 lex->read_anything = 0;
242 lex->use_fallback = 0;
244 /* Work around broken iconv() implementations by doing checking at
245 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
246 then all UCS-2 encoders will be broken. Perhaps not a valid
247 assumption. */
248 if (! byteswap_init)
250 iconv_t handle;
252 byteswap_init = 1;
254 handle = iconv_open ("UCS-2", "UTF-8");
255 if (handle != (iconv_t) -1)
257 unicode_t result;
258 unsigned char in[3];
259 char *inp, *outp;
260 size_t inc, outc, r;
262 /* This is the UTF-8 encoding of \ufeff. */
263 in[0] = 0xef;
264 in[1] = 0xbb;
265 in[2] = 0xbf;
267 inp = (char *) in;
268 inc = 3;
269 outp = (char *) &result;
270 outc = 2;
272 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
273 &outp, &outc);
274 iconv_close (handle);
275 /* Conversion must be complete for us to use the result. */
276 if (r != (size_t) -1 && inc == 0 && outc == 0)
277 need_byteswap = (result != 0xfeff);
281 lex->byte_swap = need_byteswap;
283 else
284 #endif /* HAVE_ICONV */
286 /* If iconv failed, use the internal decoder if the default
287 encoding was requested. This code is used on platforms where
288 iconv exists but is insufficient for our needs. For
289 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
291 On Solaris the default encoding, as returned by nl_langinfo(),
292 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
293 understand that. We work around that by pretending
294 `646' to be the same as UTF-8. */
295 if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
296 enc_error = 1;
297 #ifdef HAVE_ICONV
298 else
300 lex->use_fallback = 1;
301 lex->encoding = "UTF-8";
303 #endif /* HAVE_ICONV */
306 if (enc_error)
307 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
309 return lex;
312 void
313 java_destroy_lexer (java_lexer *lex)
315 #ifdef HAVE_ICONV
316 if (! lex->use_fallback)
317 iconv_close (lex->handle);
318 #endif
319 free (lex);
322 static int
323 java_read_char (java_lexer *lex)
325 if (lex->unget_value)
327 unicode_t r = lex->unget_value;
328 lex->unget_value = 0;
329 return r;
332 #ifdef HAVE_ICONV
333 if (! lex->use_fallback)
335 size_t ir, inbytesleft, in_save, out_count, out_save;
336 char *inp, *outp;
337 unicode_t result;
339 /* If there is data which has already been converted, use it. */
340 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
342 lex->out_first = 0;
343 lex->out_last = 0;
345 while (1)
347 /* See if we need to read more data. If FIRST == 0 then
348 the previous conversion attempt ended in the middle of
349 a character at the end of the buffer. Otherwise we
350 only have to read if the buffer is empty. */
351 if (lex->first == 0 || lex->first >= lex->last)
353 int r;
355 if (lex->first >= lex->last)
357 lex->first = 0;
358 lex->last = 0;
360 if (feof (lex->finput))
361 return UEOF;
362 r = fread (&lex->buffer[lex->last], 1,
363 sizeof (lex->buffer) - lex->last,
364 lex->finput);
365 lex->last += r;
368 inbytesleft = lex->last - lex->first;
369 out_count = sizeof (lex->out_buffer) - lex->out_last;
371 if (inbytesleft == 0)
373 /* We've tried to read and there is nothing left. */
374 return UEOF;
377 in_save = inbytesleft;
378 out_save = out_count;
379 inp = &lex->buffer[lex->first];
380 outp = (char *) &lex->out_buffer[lex->out_last];
381 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
382 &inbytesleft, &outp, &out_count);
384 /* If we haven't read any bytes, then look to see if we
385 have read a BOM. */
386 if (! lex->read_anything && out_save - out_count >= 2)
388 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
389 if (uc == 0xfeff)
391 lex->byte_swap = 0;
392 lex->out_first += 2;
394 else if (uc == 0xfffe)
396 lex->byte_swap = 1;
397 lex->out_first += 2;
399 lex->read_anything = 1;
402 if (lex->byte_swap)
404 unsigned int i;
405 for (i = 0; i < out_save - out_count; i += 2)
407 char t = lex->out_buffer[lex->out_last + i];
408 lex->out_buffer[lex->out_last + i]
409 = lex->out_buffer[lex->out_last + i + 1];
410 lex->out_buffer[lex->out_last + i + 1] = t;
414 lex->first += in_save - inbytesleft;
415 lex->out_last += out_save - out_count;
417 /* If we converted anything at all, move along. */
418 if (out_count != out_save)
419 break;
421 if (ir == (size_t) -1)
423 if (errno == EINVAL)
425 /* This is ok. This means that the end of our buffer
426 is in the middle of a character sequence. We just
427 move the valid part of the buffer to the beginning
428 to force a read. */
429 memmove (&lex->buffer[0], &lex->buffer[lex->first],
430 lex->last - lex->first);
431 lex->last -= lex->first;
432 lex->first = 0;
434 else
436 /* A more serious error. */
437 char buffer[128];
438 sprintf (buffer,
439 "Unrecognized character for encoding '%s'",
440 lex->encoding);
441 java_lex_error (buffer, 0);
442 return UEOF;
448 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
450 /* Don't have any data. */
451 return UEOF;
454 /* Success. */
455 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
456 lex->out_first += 2;
457 return result;
459 else
460 #endif /* HAVE_ICONV */
462 int c, c1, c2;
463 c = getc (lex->finput);
465 if (c == EOF)
466 return UEOF;
467 if (c < 128)
468 return (unicode_t) c;
469 else
471 if ((c & 0xe0) == 0xc0)
473 c1 = getc (lex->finput);
474 if ((c1 & 0xc0) == 0x80)
476 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
477 /* Check for valid 2-byte characters. We explicitly
478 allow \0 because this encoding is common in the
479 Java world. */
480 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
481 return r;
484 else if ((c & 0xf0) == 0xe0)
486 c1 = getc (lex->finput);
487 if ((c1 & 0xc0) == 0x80)
489 c2 = getc (lex->finput);
490 if ((c2 & 0xc0) == 0x80)
492 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
493 (( c1 & 0x3f) << 6)
494 + (c2 & 0x3f));
495 /* Check for valid 3-byte characters.
496 Don't allow surrogate, \ufffe or \uffff. */
497 if (IN_RANGE (r, 0x800, 0xffff)
498 && ! IN_RANGE (r, 0xd800, 0xdfff)
499 && r != 0xfffe && r != 0xffff)
500 return r;
505 /* We simply don't support invalid characters. We also
506 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
507 cannot be valid Java characters. */
508 java_lex_error ("malformed UTF-8 character", 0);
512 /* We only get here on error. */
513 return UEOF;
516 static void
517 java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p)
519 if (l->size == l->max)
521 l->max += JAVA_LINE_MAX;
522 l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
523 l->unicode_escape_p = xrealloc (l->unicode_escape_p,
524 sizeof (char)*l->max);
526 l->line [l->size] = c;
527 l->unicode_escape_p [l->size++] = unicode_escape_p;
530 static int
531 java_read_unicode (java_lexer *lex, int *unicode_escape_p)
533 int c;
535 c = java_read_char (lex);
536 *unicode_escape_p = 0;
538 if (c != '\\')
540 lex->bs_count = 0;
541 return c;
544 ++lex->bs_count;
545 if ((lex->bs_count) % 2 == 1)
547 /* Odd number of \ seen. */
548 c = java_read_char (lex);
549 if (c == 'u')
551 unicode_t unicode = 0;
552 int shift = 12;
554 /* Recognize any number of `u's in \u. */
555 while ((c = java_read_char (lex)) == 'u')
558 shift = 12;
561 if (c == UEOF)
563 java_lex_error ("prematurely terminated \\u sequence", 0);
564 return UEOF;
567 if (hex_p (c))
568 unicode |= (unicode_t)(hex_value (c) << shift);
569 else
571 java_lex_error ("non-hex digit in \\u sequence", 0);
572 break;
575 c = java_read_char (lex);
576 shift -= 4;
578 while (shift >= 0);
580 if (c != UEOF)
581 lex->unget_value = c;
583 lex->bs_count = 0;
584 *unicode_escape_p = 1;
585 return unicode;
587 lex->unget_value = c;
589 return (unicode_t) '\\';
592 static int
593 java_read_unicode_collapsing_terminators (java_lexer *lex,
594 int *unicode_escape_p)
596 int c = java_read_unicode (lex, unicode_escape_p);
598 if (c == '\r')
600 /* We have to read ahead to see if we got \r\n. In that case we
601 return a single line terminator. */
602 int dummy;
603 c = java_read_unicode (lex, &dummy);
604 if (c != '\n' && c != UEOF)
605 lex->unget_value = c;
606 /* In either case we must return a newline. */
607 c = '\n';
610 return c;
613 static int
614 java_get_unicode (void)
616 /* It's time to read a line when... */
617 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
619 int c;
620 int found_chars = 0;
622 if (ctxp->lexer->hit_eof)
623 return UEOF;
625 java_allocate_new_line ();
626 if (ctxp->c_line->line[0] != '\n')
628 for (;;)
630 int unicode_escape_p;
631 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
632 &unicode_escape_p);
633 if (c != UEOF)
635 found_chars = 1;
636 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
637 if (ctxp->c_line->white_space_only
638 && !JAVA_WHITE_SPACE_P (c)
639 && c != '\n')
640 ctxp->c_line->white_space_only = 0;
642 if ((c == '\n') || (c == UEOF))
643 break;
646 if (c == UEOF && ! found_chars)
648 ctxp->lexer->hit_eof = 1;
649 return UEOF;
653 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
654 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
655 return ctxp->c_line->line [ctxp->c_line->current++];
658 /* Parse the end of a C style comment.
659 * C is the first character following the '/' and '*'. */
660 static void
661 java_parse_end_comment (int c)
663 for ( ;; c = java_get_unicode ())
665 switch (c)
667 case UEOF:
668 java_lex_error ("Comment not terminated at end of input", 0);
669 return;
670 case '*':
671 switch (c = java_get_unicode ())
673 case UEOF:
674 java_lex_error ("Comment not terminated at end of input", 0);
675 return;
676 case '/':
677 return;
678 case '*': /* Reparse only '*'. */
679 java_unget_unicode ();
685 /* Parse the documentation section. Keywords must be at the beginning
686 of a documentation comment line (ignoring white space and any `*'
687 character). Parsed keyword(s): @DEPRECATED. */
689 static void
690 java_parse_doc_section (int c)
692 int last_was_star;
694 /* We reset this here, because only the most recent doc comment
695 applies to the following declaration. */
696 ctxp->deprecated = 0;
698 /* We loop over all the lines of the comment. We'll eventually exit
699 if we hit EOF prematurely, or when we see the comment
700 terminator. */
701 while (1)
703 /* These first steps need only be done if we're still looking
704 for the deprecated tag. If we've already seen it, we might
705 as well skip looking for it again. */
706 if (! ctxp->deprecated)
708 /* Skip whitespace and '*'s. We must also check for the end
709 of the comment here. */
710 while (JAVA_WHITE_SPACE_P (c) || c == '*')
712 last_was_star = (c == '*');
713 c = java_get_unicode ();
714 if (last_was_star && c == '/')
716 /* We just saw the comment terminator. */
717 return;
721 if (c == UEOF)
722 goto eof;
724 if (c == '@')
726 const char *deprecated = "@deprecated";
727 int i;
729 for (i = 0; deprecated[i]; ++i)
731 if (c != deprecated[i])
732 break;
733 /* We write the code in this way, with the
734 update at the end, so that after the loop
735 we're left with the next character in C. */
736 c = java_get_unicode ();
739 if (c == UEOF)
740 goto eof;
742 /* @deprecated must be followed by a space or newline.
743 We also allow a '*' in case it appears just before
744 the end of a comment. In this position only we also
745 must allow any Unicode space character. */
746 if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
748 if (! deprecated[i])
749 ctxp->deprecated = 1;
754 /* We've examined the relevant content from this line. Now we
755 skip the remaining characters and start over with the next
756 line. We also check for end of comment here. */
757 while (c != '\n' && c != UEOF)
759 last_was_star = (c == '*');
760 c = java_get_unicode ();
761 if (last_was_star && c == '/')
762 return;
765 if (c == UEOF)
766 goto eof;
767 /* We have to advance past the \n. */
768 c = java_get_unicode ();
769 if (c == UEOF)
770 goto eof;
773 eof:
774 java_lex_error ("Comment not terminated at end of input", 0);
777 /* Return true if C is a valid start character for a Java identifier.
778 This is only called if C >= 128 -- smaller values are handled
779 inline. However, this function handles all values anyway. */
780 static int
781 java_start_char_p (unicode_t c)
783 unsigned int hi = c / 256;
784 const char *const page = type_table[hi];
785 unsigned long val = (unsigned long) page;
786 int flags;
788 if ((val & ~ LETTER_MASK) != 0)
789 flags = page[c & 255];
790 else
791 flags = val;
793 return flags & LETTER_START;
796 /* Return true if C is a valid part character for a Java identifier.
797 This is only called if C >= 128 -- smaller values are handled
798 inline. However, this function handles all values anyway. */
799 static int
800 java_part_char_p (unicode_t c)
802 unsigned int hi = c / 256;
803 const char *const page = type_table[hi];
804 unsigned long val = (unsigned long) page;
805 int flags;
807 if ((val & ~ LETTER_MASK) != 0)
808 flags = page[c & 255];
809 else
810 flags = val;
812 return flags & LETTER_PART;
815 /* Return true if C is whitespace. */
816 static int
817 java_space_char_p (unicode_t c)
819 unsigned int hi = c / 256;
820 const char *const page = type_table[hi];
821 unsigned long val = (unsigned long) page;
822 int flags;
824 if ((val & ~ LETTER_MASK) != 0)
825 flags = page[c & 255];
826 else
827 flags = val;
829 return flags & LETTER_SPACE;
832 static int
833 java_parse_escape_sequence (void)
835 unicode_t char_lit;
836 int c;
838 switch (c = java_get_unicode ())
840 case 'b':
841 return (unicode_t)0x8;
842 case 't':
843 return (unicode_t)0x9;
844 case 'n':
845 return (unicode_t)0xa;
846 case 'f':
847 return (unicode_t)0xc;
848 case 'r':
849 return (unicode_t)0xd;
850 case '"':
851 return (unicode_t)0x22;
852 case '\'':
853 return (unicode_t)0x27;
854 case '\\':
855 return (unicode_t)0x5c;
856 case '0': case '1': case '2': case '3': case '4':
857 case '5': case '6': case '7':
859 int octal_escape[3];
860 int octal_escape_index = 0;
861 int max = 3;
862 int i, shift;
864 for (; octal_escape_index < max && RANGE (c, '0', '7');
865 c = java_get_unicode ())
867 if (octal_escape_index == 0 && c > '3')
869 /* According to the grammar, `\477' has a well-defined
870 meaning -- it is `\47' followed by `7'. */
871 --max;
873 octal_escape [octal_escape_index++] = c;
876 java_unget_unicode ();
878 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
879 i < octal_escape_index; i++, shift -= 3)
880 char_lit |= (octal_escape [i] - '0') << shift;
882 return char_lit;
884 default:
885 java_lex_error ("Invalid character in escape sequence", 0);
886 return JAVA_CHAR_ERROR;
890 #ifndef JC1_LITE
891 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
893 /* Subroutine of java_lex: converts floating-point literals to tree
894 nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
895 store the result. FFLAG indicates whether the literal was tagged
896 with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
897 is the line number on which to report any error. */
899 static void java_perform_atof (YYSTYPE *, char *, int, int);
901 static void
902 java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
903 int number_beginning)
905 REAL_VALUE_TYPE value;
906 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
908 SET_REAL_VALUE_ATOF (value,
909 REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
911 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
913 JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
914 value = DCONST0;
916 else if (IS_ZERO (value))
918 /* We check to see if the value is really 0 or if we've found an
919 underflow. We do this in the most primitive imaginable way. */
920 int really_zero = 1;
921 char *p = literal_token;
922 if (*p == '-')
923 ++p;
924 while (*p && *p != 'e' && *p != 'E')
926 if (*p != '0' && *p != '.')
928 really_zero = 0;
929 break;
931 ++p;
933 if (! really_zero)
935 int i = ctxp->c_line->current;
936 ctxp->c_line->current = number_beginning;
937 java_lex_error ("Floating point literal underflow", 0);
938 ctxp->c_line->current = i;
942 SET_LVAL_NODE_TYPE (build_real (type, value), type);
944 #endif
946 static int yylex (YYSTYPE *);
948 static int
949 #ifdef JC1_LITE
950 yylex (YYSTYPE *java_lval)
951 #else
952 do_java_lex (YYSTYPE *java_lval)
953 #endif
955 int c;
956 unicode_t first_unicode;
957 int ascii_index, all_ascii;
958 char *string;
960 /* Translation of the Unicode escape in the raw stream of Unicode
961 characters. Takes care of line terminator. */
962 step1:
963 /* Skip white spaces: SP, TAB and FF or ULT. */
964 for (c = java_get_unicode ();
965 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
966 if (c == '\n')
968 ctxp->elc.line = ctxp->c_line->lineno;
969 ctxp->elc.col = ctxp->c_line->char_col-2;
972 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
974 if (c == 0x1a) /* CTRL-Z. */
976 if ((c = java_get_unicode ()) == UEOF)
977 return 0; /* Ok here. */
978 else
979 java_unget_unicode (); /* Caught later, at the end of the
980 function. */
982 /* Handle EOF here. */
983 if (c == UEOF) /* Should probably do something here... */
984 return 0;
986 /* Take care of eventual comments. */
987 if (c == '/')
989 switch (c = java_get_unicode ())
991 case '/':
992 for (;;)
994 c = java_get_unicode ();
995 if (c == UEOF)
997 /* It is ok to end a `//' comment with EOF, unless
998 we're being pedantic. */
999 if (pedantic)
1000 java_lex_error ("Comment not terminated at end of input",
1002 return 0;
1004 if (c == '\n') /* ULT */
1005 goto step1;
1007 break;
1009 case '*':
1010 if ((c = java_get_unicode ()) == '*')
1012 c = java_get_unicode ();
1013 if (c == '/')
1015 /* Empty documentation comment. We have to reset
1016 the deprecation marker as only the most recent
1017 doc comment applies. */
1018 ctxp->deprecated = 0;
1020 else
1021 java_parse_doc_section (c);
1023 else
1024 java_parse_end_comment ((c = java_get_unicode ()));
1025 goto step1;
1026 break;
1027 default:
1028 java_unget_unicode ();
1029 c = '/';
1030 break;
1034 ctxp->elc.line = ctxp->c_line->lineno;
1035 ctxp->elc.prev_col = ctxp->elc.col;
1036 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
1037 if (ctxp->elc.col < 0)
1038 abort ();
1040 /* Numeric literals. */
1041 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
1043 /* This section of code is borrowed from gcc/c-lex.c. */
1044 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
1045 int parts[TOTAL_PARTS];
1046 HOST_WIDE_INT high, low;
1047 /* End borrowed section. */
1048 char literal_token [256];
1049 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
1050 int found_hex_digits = 0, found_non_octal_digits = 0;
1051 int i;
1052 #ifndef JC1_LITE
1053 int number_beginning = ctxp->c_line->current;
1054 tree value;
1055 #endif
1057 /* We might have a . separator instead of a FP like .[0-9]*. */
1058 if (c == '.')
1060 unicode_t peep = java_sneak_unicode ();
1062 if (!JAVA_ASCII_DIGIT (peep))
1064 JAVA_LEX_SEP('.');
1065 BUILD_OPERATOR (DOT_TK);
1069 for (i = 0; i < TOTAL_PARTS; i++)
1070 parts [i] = 0;
1072 if (c == '0')
1074 c = java_get_unicode ();
1075 if (c == 'x' || c == 'X')
1077 radix = 16;
1078 c = java_get_unicode ();
1080 else if (JAVA_ASCII_DIGIT (c))
1081 radix = 8;
1082 else if (c == '.' || c == 'e' || c =='E')
1084 /* Push the '.', 'e', or 'E' back and prepare for a FP
1085 parsing... */
1086 java_unget_unicode ();
1087 c = '0';
1089 else
1091 /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
1092 JAVA_LEX_LIT ("0", 10);
1093 switch (c)
1095 case 'L': case 'l':
1096 SET_LVAL_NODE (long_zero_node);
1097 return (INT_LIT_TK);
1098 case 'f': case 'F':
1099 SET_LVAL_NODE (float_zero_node);
1100 return (FP_LIT_TK);
1101 case 'd': case 'D':
1102 SET_LVAL_NODE (double_zero_node);
1103 return (FP_LIT_TK);
1104 default:
1105 java_unget_unicode ();
1106 SET_LVAL_NODE (integer_zero_node);
1107 return (INT_LIT_TK);
1111 /* Parse the first part of the literal, until we find something
1112 which is not a number. */
1113 while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1114 JAVA_ASCII_DIGIT (c))
1116 /* We store in a string (in case it turns out to be a FP) and in
1117 PARTS if we have to process a integer literal. */
1118 int numeric = hex_value (c);
1119 int count;
1121 /* Remember when we find a valid hexadecimal digit. */
1122 if (radix == 16)
1123 found_hex_digits = 1;
1124 /* Remember when we find an invalid octal digit. */
1125 else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1126 found_non_octal_digits = 1;
1128 literal_token [literal_index++] = c;
1129 /* This section of code if borrowed from gcc/c-lex.c. */
1130 for (count = 0; count < TOTAL_PARTS; count++)
1132 parts[count] *= radix;
1133 if (count)
1135 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1136 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1138 else
1139 parts[0] += numeric;
1141 if (parts [TOTAL_PARTS-1] != 0)
1142 overflow = 1;
1143 /* End borrowed section. */
1144 c = java_get_unicode ();
1147 /* If we have something from the FP char set but not a digit, parse
1148 a FP literal. */
1149 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1151 int stage = 0;
1152 int seen_digit = (literal_index ? 1 : 0);
1153 int seen_exponent = 0;
1154 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1155 double unless specified. */
1157 /* It is ok if the radix is 8 because this just means we've
1158 seen a leading `0'. However, radix==16 is invalid. */
1159 if (radix == 16)
1160 java_lex_error ("Can't express non-decimal FP literal", 0);
1161 radix = 10;
1163 for (;;)
1165 if (c == '.')
1167 if (stage < 1)
1169 stage = 1;
1170 literal_token [literal_index++ ] = c;
1171 c = java_get_unicode ();
1173 else
1174 java_lex_error ("Invalid character in FP literal", 0);
1177 if (c == 'e' || c == 'E')
1179 if (stage < 2)
1181 /* {E,e} must have seen at least a digit. */
1182 if (!seen_digit)
1183 java_lex_error
1184 ("Invalid FP literal, mantissa must have digit", 0);
1185 seen_digit = 0;
1186 seen_exponent = 1;
1187 stage = 2;
1188 literal_token [literal_index++] = c;
1189 c = java_get_unicode ();
1191 else
1192 java_lex_error ("Invalid character in FP literal", 0);
1194 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1196 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1197 stage = 4; /* So we fall through. */
1200 if ((c=='-' || c =='+') && stage == 2)
1202 stage = 3;
1203 literal_token [literal_index++] = c;
1204 c = java_get_unicode ();
1207 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1208 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1209 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1210 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1212 if (JAVA_ASCII_DIGIT (c))
1213 seen_digit = 1;
1214 if (stage == 2)
1215 stage = 3;
1216 literal_token [literal_index++ ] = c;
1217 c = java_get_unicode ();
1219 else
1221 if (stage != 4) /* Don't push back fF/dD. */
1222 java_unget_unicode ();
1224 /* An exponent (if any) must have seen a digit. */
1225 if (seen_exponent && !seen_digit)
1226 java_lex_error
1227 ("Invalid FP literal, exponent must have digit", 0);
1229 literal_token [literal_index] = '\0';
1230 JAVA_LEX_LIT (literal_token, radix);
1232 #ifndef JC1_LITE
1233 java_perform_atof (java_lval, literal_token,
1234 fflag, number_beginning);
1235 #endif
1236 return FP_LIT_TK;
1239 } /* JAVA_ASCII_FPCHAR (c) */
1241 /* Here we get back to converting the integral literal. */
1242 if (radix == 16 && ! found_hex_digits)
1243 java_lex_error
1244 ("0x must be followed by at least one hexadecimal digit", 0);
1245 else if (radix == 8 && found_non_octal_digits)
1246 java_lex_error ("Octal literal contains digit out of range", 0);
1247 else if (c == 'L' || c == 'l')
1248 long_suffix = 1;
1249 else
1250 java_unget_unicode ();
1252 #ifdef JAVA_LEX_DEBUG
1253 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1254 JAVA_LEX_LIT (literal_token, radix);
1255 #endif
1256 /* This section of code is borrowed from gcc/c-lex.c. */
1257 if (!overflow)
1259 bytes = GET_TYPE_PRECISION (long_type_node);
1260 for (i = bytes; i < TOTAL_PARTS; i++)
1261 if (parts [i])
1263 overflow = 1;
1264 break;
1267 high = low = 0;
1268 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1270 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1271 / HOST_BITS_PER_CHAR)]
1272 << (i * HOST_BITS_PER_CHAR));
1273 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1275 /* End borrowed section. */
1277 #ifndef JC1_LITE
1278 /* Range checking. */
1279 value = build_int_2 (low, high);
1280 /* Temporarily set type to unsigned. */
1281 SET_LVAL_NODE_TYPE (value, (long_suffix
1282 ? unsigned_long_type_node
1283 : unsigned_int_type_node));
1285 /* For base 10 numbers, only values up to the highest value
1286 (plus one) can be written. For instance, only ints up to
1287 2147483648 can be written. The special case of the largest
1288 negative value is handled elsewhere. For other bases, any
1289 number can be represented. */
1290 if (overflow || (radix == 10
1291 && tree_int_cst_lt (long_suffix
1292 ? decimal_long_max
1293 : decimal_int_max,
1294 value)))
1296 if (long_suffix)
1297 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1298 else
1299 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1302 /* Sign extend the value. */
1303 SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1304 force_fit_type (value, 0);
1305 JAVA_RADIX10_FLAG (value) = radix == 10;
1306 #else
1307 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1308 long_suffix ? long_type_node : int_type_node);
1309 #endif
1310 return INT_LIT_TK;
1313 /* Character literals. */
1314 if (c == '\'')
1316 int char_lit;
1317 if ((c = java_get_unicode ()) == '\\')
1318 char_lit = java_parse_escape_sequence ();
1319 else
1321 if (c == '\n' || c == '\'')
1322 java_lex_error ("Invalid character literal", 0);
1323 char_lit = c;
1326 c = java_get_unicode ();
1328 if ((c == '\n') || (c == UEOF))
1329 java_lex_error ("Character literal not terminated at end of line", 0);
1330 if (c != '\'')
1331 java_lex_error ("Syntax error in character literal", 0);
1333 if (char_lit == JAVA_CHAR_ERROR)
1334 char_lit = 0; /* We silently convert it to zero. */
1336 JAVA_LEX_CHAR_LIT (char_lit);
1337 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1338 return CHAR_LIT_TK;
1341 /* String literals. */
1342 if (c == '"')
1344 int no_error;
1345 char *string;
1347 for (no_error = 1, c = java_get_unicode ();
1348 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1350 if (c == '\\')
1351 c = java_parse_escape_sequence ();
1352 if (c == JAVA_CHAR_ERROR)
1354 no_error = 0;
1355 c = 0; /* We silently convert it to zero. */
1357 java_unicode_2_utf8 (c);
1359 if (c == '\n' || c == UEOF) /* ULT. */
1361 input_line--; /* Refer to the line where the terminator was seen. */
1362 java_lex_error ("String not terminated at end of line", 0);
1363 input_line++;
1366 obstack_1grow (&temporary_obstack, '\0');
1367 string = obstack_finish (&temporary_obstack);
1368 #ifndef JC1_LITE
1369 if (!no_error || (c != '"'))
1370 java_lval->node = error_mark_node; /* FIXME: Requires further
1371 testing. */
1372 else
1373 java_lval->node = build_string (strlen (string), string);
1374 #endif
1375 obstack_free (&temporary_obstack, string);
1376 return STRING_LIT_TK;
1379 /* Separator. */
1380 switch (c)
1382 case '(':
1383 JAVA_LEX_SEP (c);
1384 BUILD_OPERATOR (OP_TK);
1385 case ')':
1386 JAVA_LEX_SEP (c);
1387 return CP_TK;
1388 case '{':
1389 JAVA_LEX_SEP (c);
1390 if (ctxp->ccb_indent == 1)
1391 ctxp->first_ccb_indent1 = input_line;
1392 ctxp->ccb_indent++;
1393 BUILD_OPERATOR (OCB_TK);
1394 case '}':
1395 JAVA_LEX_SEP (c);
1396 ctxp->ccb_indent--;
1397 if (ctxp->ccb_indent == 1)
1398 ctxp->last_ccb_indent1 = input_line;
1399 BUILD_OPERATOR (CCB_TK);
1400 case '[':
1401 JAVA_LEX_SEP (c);
1402 BUILD_OPERATOR (OSB_TK);
1403 case ']':
1404 JAVA_LEX_SEP (c);
1405 return CSB_TK;
1406 case ';':
1407 JAVA_LEX_SEP (c);
1408 return SC_TK;
1409 case ',':
1410 JAVA_LEX_SEP (c);
1411 return C_TK;
1412 case '.':
1413 JAVA_LEX_SEP (c);
1414 BUILD_OPERATOR (DOT_TK);
1415 /* return DOT_TK; */
1418 /* Operators. */
1419 switch (c)
1421 case '=':
1422 if ((c = java_get_unicode ()) == '=')
1424 BUILD_OPERATOR (EQ_TK);
1426 else
1428 /* Equals is used in two different locations. In the
1429 variable_declarator: rule, it has to be seen as '=' as opposed
1430 to being seen as an ordinary assignment operator in
1431 assignment_operators: rule. */
1432 java_unget_unicode ();
1433 BUILD_OPERATOR (ASSIGN_TK);
1436 case '>':
1437 switch ((c = java_get_unicode ()))
1439 case '=':
1440 BUILD_OPERATOR (GTE_TK);
1441 case '>':
1442 switch ((c = java_get_unicode ()))
1444 case '>':
1445 if ((c = java_get_unicode ()) == '=')
1447 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1449 else
1451 java_unget_unicode ();
1452 BUILD_OPERATOR (ZRS_TK);
1454 case '=':
1455 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1456 default:
1457 java_unget_unicode ();
1458 BUILD_OPERATOR (SRS_TK);
1460 default:
1461 java_unget_unicode ();
1462 BUILD_OPERATOR (GT_TK);
1465 case '<':
1466 switch ((c = java_get_unicode ()))
1468 case '=':
1469 BUILD_OPERATOR (LTE_TK);
1470 case '<':
1471 if ((c = java_get_unicode ()) == '=')
1473 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1475 else
1477 java_unget_unicode ();
1478 BUILD_OPERATOR (LS_TK);
1480 default:
1481 java_unget_unicode ();
1482 BUILD_OPERATOR (LT_TK);
1485 case '&':
1486 switch ((c = java_get_unicode ()))
1488 case '&':
1489 BUILD_OPERATOR (BOOL_AND_TK);
1490 case '=':
1491 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1492 default:
1493 java_unget_unicode ();
1494 BUILD_OPERATOR (AND_TK);
1497 case '|':
1498 switch ((c = java_get_unicode ()))
1500 case '|':
1501 BUILD_OPERATOR (BOOL_OR_TK);
1502 case '=':
1503 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1504 default:
1505 java_unget_unicode ();
1506 BUILD_OPERATOR (OR_TK);
1509 case '+':
1510 switch ((c = java_get_unicode ()))
1512 case '+':
1513 BUILD_OPERATOR (INCR_TK);
1514 case '=':
1515 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1516 default:
1517 java_unget_unicode ();
1518 BUILD_OPERATOR (PLUS_TK);
1521 case '-':
1522 switch ((c = java_get_unicode ()))
1524 case '-':
1525 BUILD_OPERATOR (DECR_TK);
1526 case '=':
1527 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1528 default:
1529 java_unget_unicode ();
1530 BUILD_OPERATOR (MINUS_TK);
1533 case '*':
1534 if ((c = java_get_unicode ()) == '=')
1536 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1538 else
1540 java_unget_unicode ();
1541 BUILD_OPERATOR (MULT_TK);
1544 case '/':
1545 if ((c = java_get_unicode ()) == '=')
1547 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1549 else
1551 java_unget_unicode ();
1552 BUILD_OPERATOR (DIV_TK);
1555 case '^':
1556 if ((c = java_get_unicode ()) == '=')
1558 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1560 else
1562 java_unget_unicode ();
1563 BUILD_OPERATOR (XOR_TK);
1566 case '%':
1567 if ((c = java_get_unicode ()) == '=')
1569 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1571 else
1573 java_unget_unicode ();
1574 BUILD_OPERATOR (REM_TK);
1577 case '!':
1578 if ((c = java_get_unicode()) == '=')
1580 BUILD_OPERATOR (NEQ_TK);
1582 else
1584 java_unget_unicode ();
1585 BUILD_OPERATOR (NEG_TK);
1588 case '?':
1589 JAVA_LEX_OP ("?");
1590 BUILD_OPERATOR (REL_QM_TK);
1591 case ':':
1592 JAVA_LEX_OP (":");
1593 BUILD_OPERATOR (REL_CL_TK);
1594 case '~':
1595 BUILD_OPERATOR (NOT_TK);
1598 /* Keyword, boolean literal or null literal. */
1599 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1600 c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1602 java_unicode_2_utf8 (c);
1603 if (all_ascii && c >= 128)
1604 all_ascii = 0;
1605 ascii_index++;
1608 obstack_1grow (&temporary_obstack, '\0');
1609 string = obstack_finish (&temporary_obstack);
1610 if (c != UEOF)
1611 java_unget_unicode ();
1613 /* If we have something all ascii, we consider a keyword, a boolean
1614 literal, a null literal or an all ASCII identifier. Otherwise,
1615 this is an identifier (possibly not respecting formation rule). */
1616 if (all_ascii)
1618 const struct java_keyword *kw;
1619 if ((kw=java_keyword (string, ascii_index)))
1621 JAVA_LEX_KW (string);
1622 switch (kw->token)
1624 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1625 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1626 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1627 case PRIVATE_TK: case STRICT_TK:
1628 SET_MODIFIER_CTX (kw->token);
1629 return MODIFIER_TK;
1630 case FLOAT_TK:
1631 SET_LVAL_NODE (float_type_node);
1632 return FP_TK;
1633 case DOUBLE_TK:
1634 SET_LVAL_NODE (double_type_node);
1635 return FP_TK;
1636 case BOOLEAN_TK:
1637 SET_LVAL_NODE (boolean_type_node);
1638 return BOOLEAN_TK;
1639 case BYTE_TK:
1640 SET_LVAL_NODE (byte_type_node);
1641 return INTEGRAL_TK;
1642 case SHORT_TK:
1643 SET_LVAL_NODE (short_type_node);
1644 return INTEGRAL_TK;
1645 case INT_TK:
1646 SET_LVAL_NODE (int_type_node);
1647 return INTEGRAL_TK;
1648 case LONG_TK:
1649 SET_LVAL_NODE (long_type_node);
1650 return INTEGRAL_TK;
1651 case CHAR_TK:
1652 SET_LVAL_NODE (char_type_node);
1653 return INTEGRAL_TK;
1655 /* Keyword based literals. */
1656 case TRUE_TK:
1657 case FALSE_TK:
1658 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1659 boolean_true_node : boolean_false_node));
1660 return BOOL_LIT_TK;
1661 case NULL_TK:
1662 SET_LVAL_NODE (null_pointer_node);
1663 return NULL_TK;
1665 case ASSERT_TK:
1666 if (flag_assert)
1668 BUILD_OPERATOR (kw->token);
1669 return kw->token;
1671 else
1672 break;
1674 /* Some keyword we want to retain information on the location
1675 they where found. */
1676 case CASE_TK:
1677 case DEFAULT_TK:
1678 case SUPER_TK:
1679 case THIS_TK:
1680 case RETURN_TK:
1681 case BREAK_TK:
1682 case CONTINUE_TK:
1683 case TRY_TK:
1684 case CATCH_TK:
1685 case THROW_TK:
1686 case INSTANCEOF_TK:
1687 BUILD_OPERATOR (kw->token);
1689 default:
1690 return kw->token;
1695 /* We may have an ID here. */
1696 if (JAVA_START_CHAR_P (first_unicode))
1698 JAVA_LEX_ID (string);
1699 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1700 return ID_TK;
1703 /* Everything else is an invalid character in the input. */
1705 char lex_error_buffer [128];
1706 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1707 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1708 java_lex_error (lex_error_buffer, 1);
1710 return 0;
1713 #ifndef JC1_LITE
1715 /* The exported interface to the lexer. */
1716 static int
1717 java_lex (YYSTYPE *java_lval)
1719 int r;
1721 timevar_push (TV_LEX);
1722 r = do_java_lex (java_lval);
1723 timevar_pop (TV_LEX);
1724 return r;
1727 /* This is called by the parser to see if an error should be generated
1728 due to numeric overflow. This function only handles the particular
1729 case of the largest negative value, and is only called in the case
1730 where this value is not preceded by `-'. */
1731 static void
1732 error_if_numeric_overflow (tree value)
1734 if (TREE_CODE (value) == INTEGER_CST
1735 && JAVA_RADIX10_FLAG (value)
1736 && tree_int_cst_sgn (value) < 0)
1738 if (TREE_TYPE (value) == long_type_node)
1739 java_lex_error ("Numeric overflow for `long' literal", 0);
1740 else
1741 java_lex_error ("Numeric overflow for `int' literal", 0);
1745 #endif /* JC1_LITE */
1747 static void
1748 java_unicode_2_utf8 (unicode_t unicode)
1750 if (RANGE (unicode, 0x01, 0x7f))
1751 obstack_1grow (&temporary_obstack, (char)unicode);
1752 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1754 obstack_1grow (&temporary_obstack,
1755 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1756 obstack_1grow (&temporary_obstack,
1757 (unsigned char)(0x80 | (unicode & 0x3f)));
1759 else /* Range 0x800-0xffff. */
1761 obstack_1grow (&temporary_obstack,
1762 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1763 obstack_1grow (&temporary_obstack,
1764 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1765 obstack_1grow (&temporary_obstack,
1766 (unsigned char)(0x80 | (unicode & 0x003f)));
1770 #ifndef JC1_LITE
1771 static tree
1772 build_wfl_node (tree node)
1774 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1775 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1776 TREE_TYPE (node) = NULL_TREE;
1777 return node;
1779 #endif
1781 static void
1782 java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
1784 #ifndef JC1_LITE
1785 ctxp->elc.line = ctxp->c_line->lineno;
1786 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1788 /* Might be caught in the middle of some error report. */
1789 ctxp->java_error_flag = 0;
1790 java_error (NULL);
1791 java_error (msg);
1792 #endif
1795 #ifndef JC1_LITE
1796 static int
1797 java_is_eol (FILE *fp, int c)
1799 int next;
1800 switch (c)
1802 case '\r':
1803 next = getc (fp);
1804 if (next != '\n' && next != EOF)
1805 ungetc (next, fp);
1806 return 1;
1807 case '\n':
1808 return 1;
1809 default:
1810 return 0;
1813 #endif
1815 char *
1816 java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
1817 int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
1819 #ifdef JC1_LITE
1820 return 0;
1821 #else
1822 /* Dumb implementation. Doesn't try to cache or optimize things. */
1823 /* First line of the file is line 1, first column is 1. */
1825 /* COL == -1 means, at the CR/LF in LINE. */
1826 /* COL == -2 means, at the first non space char in LINE. */
1828 FILE *fp;
1829 int c, ccol, cline = 1;
1830 int current_line_col = 0;
1831 int first_non_space = 0;
1832 char *base;
1834 if (!(fp = fopen (filename, "r")))
1835 fatal_error ("can't open %s: %m", filename);
1837 while (cline != line)
1839 c = getc (fp);
1840 if (c == EOF)
1842 static const char msg[] = "<<file too short - unexpected EOF>>";
1843 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1844 goto have_line;
1846 if (java_is_eol (fp, c))
1847 cline++;
1850 /* Gather the chars of the current line in a buffer. */
1851 for (;;)
1853 c = getc (fp);
1854 if (c < 0 || java_is_eol (fp, c))
1855 break;
1856 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1857 first_non_space = current_line_col;
1858 obstack_1grow (&temporary_obstack, c);
1859 current_line_col++;
1861 have_line:
1863 obstack_1grow (&temporary_obstack, '\n');
1865 if (col == -1)
1867 col = current_line_col;
1868 first_non_space = 0;
1870 else if (col == -2)
1871 col = first_non_space;
1872 else
1873 first_non_space = 0;
1875 /* Place the '^' a the right position. */
1876 base = obstack_base (&temporary_obstack);
1877 for (ccol = 1; ccol <= col+3; ccol++)
1879 /* Compute \t when reaching first_non_space. */
1880 char c = (first_non_space ?
1881 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1882 obstack_1grow (&temporary_obstack, c);
1884 obstack_grow0 (&temporary_obstack, "^", 1);
1886 fclose (fp);
1887 return obstack_finish (&temporary_obstack);
1888 #endif
1891 #ifndef JC1_LITE
1892 static int
1893 utf8_cmp (const unsigned char *str, int length, const char *name)
1895 const unsigned char *limit = str + length;
1896 int i;
1898 for (i = 0; name[i]; ++i)
1900 int ch = UTF8_GET (str, limit);
1901 if (ch != name[i])
1902 return ch - name[i];
1905 return str == limit ? 0 : 1;
1908 /* A sorted list of all C++ keywords. */
1910 static const char *const cxx_keywords[] =
1912 "_Complex",
1913 "__alignof",
1914 "__alignof__",
1915 "__asm",
1916 "__asm__",
1917 "__attribute",
1918 "__attribute__",
1919 "__builtin_va_arg",
1920 "__complex",
1921 "__complex__",
1922 "__const",
1923 "__const__",
1924 "__extension__",
1925 "__imag",
1926 "__imag__",
1927 "__inline",
1928 "__inline__",
1929 "__label__",
1930 "__null",
1931 "__real",
1932 "__real__",
1933 "__restrict",
1934 "__restrict__",
1935 "__signed",
1936 "__signed__",
1937 "__typeof",
1938 "__typeof__",
1939 "__volatile",
1940 "__volatile__",
1941 "and",
1942 "and_eq",
1943 "asm",
1944 "auto",
1945 "bitand",
1946 "bitor",
1947 "bool",
1948 "break",
1949 "case",
1950 "catch",
1951 "char",
1952 "class",
1953 "compl",
1954 "const",
1955 "const_cast",
1956 "continue",
1957 "default",
1958 "delete",
1959 "do",
1960 "double",
1961 "dynamic_cast",
1962 "else",
1963 "enum",
1964 "explicit",
1965 "export",
1966 "extern",
1967 "false",
1968 "float",
1969 "for",
1970 "friend",
1971 "goto",
1972 "if",
1973 "inline",
1974 "int",
1975 "long",
1976 "mutable",
1977 "namespace",
1978 "new",
1979 "not",
1980 "not_eq",
1981 "operator",
1982 "or",
1983 "or_eq",
1984 "private",
1985 "protected",
1986 "public",
1987 "register",
1988 "reinterpret_cast",
1989 "return",
1990 "short",
1991 "signed",
1992 "sizeof",
1993 "static",
1994 "static_cast",
1995 "struct",
1996 "switch",
1997 "template",
1998 "this",
1999 "throw",
2000 "true",
2001 "try",
2002 "typedef",
2003 "typeid",
2004 "typename",
2005 "typeof",
2006 "union",
2007 "unsigned",
2008 "using",
2009 "virtual",
2010 "void",
2011 "volatile",
2012 "wchar_t",
2013 "while",
2014 "xor",
2015 "xor_eq"
2018 /* Return true if NAME is a C++ keyword. */
2021 cxx_keyword_p (const char *name, int length)
2023 int last = ARRAY_SIZE (cxx_keywords);
2024 int first = 0;
2025 int mid = (last + first) / 2;
2026 int old = -1;
2028 for (mid = (last + first) / 2;
2029 mid != old;
2030 old = mid, mid = (last + first) / 2)
2032 int kwl = strlen (cxx_keywords[mid]);
2033 int min_length = kwl > length ? length : kwl;
2034 int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
2036 if (r == 0)
2038 int i;
2039 /* We've found a match if all the remaining characters are `$'. */
2040 for (i = min_length; i < length && name[i] == '$'; ++i)
2042 if (i == length)
2043 return 1;
2044 r = 1;
2047 if (r < 0)
2048 last = mid;
2049 else
2050 first = mid;
2052 return 0;
2054 #endif /* JC1_LITE */