2002-03-31 Segher Boessenkool <segher@koffie.nl>
[official-gcc.git] / gcc / java / lex.c
blobd18aa1b9954f39de5f8f0a6edc4a82a70577996b
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA.
23 Java and all Java-based marks are trademarks or registered trademarks
24 of Sun Microsystems, Inc. in the United States and other countries.
25 The Free Software Foundation is independent of Sun Microsystems, Inc. */
27 /* It defines java_lex (yylex) that reads a Java ASCII source file
28 possibly containing Unicode escape sequence or utf8 encoded
29 characters and returns a token for everything found but comments,
30 white spaces and line terminators. When necessary, it also fills
31 the java_lval (yylval) union. It's implemented to be called by a
32 re-entrant parser generated by Bison.
34 The lexical analysis conforms to the Java grammar described in "The
35 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
36 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
38 #include "keyword.h"
39 #include "flags.h"
40 #include "chartables.h"
41 #ifndef JC1_LITE
42 #include "timevar.h"
43 #endif
45 /* Function declarations. */
46 static char *java_sprint_unicode (struct java_line *, int);
47 static void java_unicode_2_utf8 (unicode_t);
48 static void java_lex_error (const char *, int);
49 #ifndef JC1_LITE
50 static int do_java_lex (YYSTYPE *);
51 static int java_lex (YYSTYPE *);
52 static int java_is_eol (FILE *, int);
53 static tree build_wfl_node (tree);
54 #endif
55 static void java_store_unicode (struct java_line *, unicode_t, int);
56 static int java_parse_escape_sequence (void);
57 static int java_start_char_p (unicode_t);
58 static int java_part_char_p (unicode_t);
59 static int java_space_char_p (unicode_t);
60 static void java_parse_doc_section (int);
61 static void java_parse_end_comment (int);
62 static int java_get_unicode (void);
63 static int java_read_unicode (java_lexer *, int *);
64 static int java_read_unicode_collapsing_terminators (java_lexer *, int *);
65 static void java_store_unicode (struct java_line *, unicode_t, int);
66 static int java_read_char (java_lexer *);
67 static void java_allocate_new_line (void);
68 static void java_unget_unicode (void);
69 static unicode_t java_sneak_unicode (void);
70 #ifndef JC1_LITE
71 static int utf8_cmp (const unsigned char *, int, const char *);
72 #endif
74 java_lexer *java_new_lexer (FILE *, const char *);
75 #ifndef JC1_LITE
76 static void error_if_numeric_overflow (tree);
77 #endif
79 #ifdef HAVE_ICONV
80 /* This is nonzero if we have initialized `need_byteswap'. */
81 static int byteswap_init = 0;
83 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
84 big-endian order -- not native endian order. We handle this by
85 doing a conversion once at startup and seeing what happens. This
86 flag holds the results of this determination. */
87 static int need_byteswap = 0;
88 #endif
90 void
91 java_init_lex (FILE *finput, const char *encoding)
93 #ifndef JC1_LITE
94 int java_lang_imported = 0;
96 if (!java_lang_id)
97 java_lang_id = get_identifier ("java.lang");
98 if (!inst_id)
99 inst_id = get_identifier ("inst$");
100 if (!wpv_id)
101 wpv_id = get_identifier ("write_parm_value$");
103 if (!java_lang_imported)
105 tree node = build_tree_list
106 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
107 read_import_dir (TREE_PURPOSE (node));
108 TREE_CHAIN (node) = ctxp->import_demand_list;
109 ctxp->import_demand_list = node;
110 java_lang_imported = 1;
113 if (!wfl_operator)
114 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
115 if (!label_id)
116 label_id = get_identifier ("$L");
117 if (!wfl_append)
118 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
119 if (!wfl_string_buffer)
120 wfl_string_buffer =
121 build_expr_wfl (get_identifier (flag_emit_class_files
122 ? "java.lang.StringBuffer"
123 : "gnu.gcj.runtime.StringBuffer"),
124 NULL, 0, 0);
125 if (!wfl_to_string)
126 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
128 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
129 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
131 memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
132 current_jcf = ggc_alloc_cleared (sizeof (JCF));
133 ctxp->current_parsed_class = NULL;
134 ctxp->package = NULL_TREE;
135 #endif
137 ctxp->filename = input_filename;
138 ctxp->lineno = lineno = 0;
139 ctxp->p_line = NULL;
140 ctxp->c_line = NULL;
141 ctxp->java_error_flag = 0;
142 ctxp->lexer = java_new_lexer (finput, encoding);
145 static char *
146 java_sprint_unicode (struct java_line *line, int i)
148 static char buffer [10];
149 if (line->unicode_escape_p [i] || line->line [i] > 128)
150 sprintf (buffer, "\\u%04x", line->line [i]);
151 else
153 buffer [0] = line->line [i];
154 buffer [1] = '\0';
156 return buffer;
159 static unicode_t
160 java_sneak_unicode (void)
162 return (ctxp->c_line->line [ctxp->c_line->current]);
165 static void
166 java_unget_unicode (void)
168 if (!ctxp->c_line->current)
169 /* Can't unget unicode. */
170 abort ();
172 ctxp->c_line->current--;
173 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
176 static void
177 java_allocate_new_line (void)
179 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
180 char ahead_escape_p = (ctxp->c_line ?
181 ctxp->c_line->unicode_escape_ahead_p : 0);
183 if (ctxp->c_line && !ctxp->c_line->white_space_only)
185 if (ctxp->p_line)
187 free (ctxp->p_line->unicode_escape_p);
188 free (ctxp->p_line->line);
189 free (ctxp->p_line);
191 ctxp->p_line = ctxp->c_line;
192 ctxp->c_line = NULL; /* Reallocated. */
195 if (!ctxp->c_line)
197 ctxp->c_line = xmalloc (sizeof (struct java_line));
198 ctxp->c_line->max = JAVA_LINE_MAX;
199 ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
200 ctxp->c_line->unicode_escape_p =
201 xmalloc (sizeof (char)*ctxp->c_line->max);
202 ctxp->c_line->white_space_only = 0;
205 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
206 ctxp->c_line->char_col = ctxp->c_line->current = 0;
207 if (ahead)
209 ctxp->c_line->line [ctxp->c_line->size] = ahead;
210 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
211 ctxp->c_line->size++;
213 ctxp->c_line->ahead [0] = 0;
214 ctxp->c_line->unicode_escape_ahead_p = 0;
215 ctxp->c_line->lineno = ++lineno;
216 ctxp->c_line->white_space_only = 1;
219 /* Create a new lexer object. */
221 java_lexer *
222 java_new_lexer (FILE *finput, const char *encoding)
224 java_lexer *lex = xmalloc (sizeof (java_lexer));
225 int enc_error = 0;
227 lex->finput = finput;
228 lex->bs_count = 0;
229 lex->unget_value = 0;
230 lex->hit_eof = 0;
232 #ifdef HAVE_ICONV
233 lex->handle = iconv_open ("UCS-2", encoding);
234 if (lex->handle != (iconv_t) -1)
236 lex->first = -1;
237 lex->last = -1;
238 lex->out_first = -1;
239 lex->out_last = -1;
240 lex->read_anything = 0;
241 lex->use_fallback = 0;
243 /* Work around broken iconv() implementations by doing checking at
244 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
245 then all UCS-2 encoders will be broken. Perhaps not a valid
246 assumption. */
247 if (! byteswap_init)
249 iconv_t handle;
251 byteswap_init = 1;
253 handle = iconv_open ("UCS-2", "UTF-8");
254 if (handle != (iconv_t) -1)
256 unicode_t result;
257 unsigned char in[3];
258 char *inp, *outp;
259 size_t inc, outc, r;
261 /* This is the UTF-8 encoding of \ufeff. */
262 in[0] = 0xef;
263 in[1] = 0xbb;
264 in[2] = 0xbf;
266 inp = in;
267 inc = 3;
268 outp = (char *) &result;
269 outc = 2;
271 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
272 &outp, &outc);
273 iconv_close (handle);
274 /* Conversion must be complete for us to use the result. */
275 if (r != (size_t) -1 && inc == 0 && outc == 0)
276 need_byteswap = (result != 0xfeff);
280 lex->byte_swap = need_byteswap;
282 else
283 #endif /* HAVE_ICONV */
285 /* If iconv failed, use the internal decoder if the default
286 encoding was requested. This code is used on platforms where
287 iconv exists but is insufficient for our needs. For
288 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
290 On Solaris the default encoding, as returned by nl_langinfo(),
291 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
292 understand that. We work around that by pretending
293 `646' to be the same as UTF-8. */
294 if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
295 enc_error = 1;
296 #ifdef HAVE_ICONV
297 else
298 lex->use_fallback = 1;
299 #endif /* HAVE_ICONV */
302 if (enc_error)
303 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
305 return lex;
308 void
309 java_destroy_lexer (java_lexer *lex)
311 #ifdef HAVE_ICONV
312 if (! lex->use_fallback)
313 iconv_close (lex->handle);
314 #endif
315 free (lex);
318 static int
319 java_read_char (java_lexer *lex)
321 if (lex->unget_value)
323 unicode_t r = lex->unget_value;
324 lex->unget_value = 0;
325 return r;
328 #ifdef HAVE_ICONV
329 if (! lex->use_fallback)
331 size_t ir, inbytesleft, in_save, out_count, out_save;
332 char *inp, *outp;
333 unicode_t result;
335 /* If there is data which has already been converted, use it. */
336 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
338 lex->out_first = 0;
339 lex->out_last = 0;
341 while (1)
343 /* See if we need to read more data. If FIRST == 0 then
344 the previous conversion attempt ended in the middle of
345 a character at the end of the buffer. Otherwise we
346 only have to read if the buffer is empty. */
347 if (lex->first == 0 || lex->first >= lex->last)
349 int r;
351 if (lex->first >= lex->last)
353 lex->first = 0;
354 lex->last = 0;
356 if (feof (lex->finput))
357 return UEOF;
358 r = fread (&lex->buffer[lex->last], 1,
359 sizeof (lex->buffer) - lex->last,
360 lex->finput);
361 lex->last += r;
364 inbytesleft = lex->last - lex->first;
365 out_count = sizeof (lex->out_buffer) - lex->out_last;
367 if (inbytesleft == 0)
369 /* We've tried to read and there is nothing left. */
370 return UEOF;
373 in_save = inbytesleft;
374 out_save = out_count;
375 inp = &lex->buffer[lex->first];
376 outp = &lex->out_buffer[lex->out_last];
377 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
378 &inbytesleft, &outp, &out_count);
380 /* If we haven't read any bytes, then look to see if we
381 have read a BOM. */
382 if (! lex->read_anything && out_save - out_count >= 2)
384 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
385 if (uc == 0xfeff)
387 lex->byte_swap = 0;
388 lex->out_first += 2;
390 else if (uc == 0xfffe)
392 lex->byte_swap = 1;
393 lex->out_first += 2;
395 lex->read_anything = 1;
398 if (lex->byte_swap)
400 unsigned int i;
401 for (i = 0; i < out_save - out_count; i += 2)
403 char t = lex->out_buffer[lex->out_last + i];
404 lex->out_buffer[lex->out_last + i]
405 = lex->out_buffer[lex->out_last + i + 1];
406 lex->out_buffer[lex->out_last + i + 1] = t;
410 lex->first += in_save - inbytesleft;
411 lex->out_last += out_save - out_count;
413 /* If we converted anything at all, move along. */
414 if (out_count != out_save)
415 break;
417 if (ir == (size_t) -1)
419 if (errno == EINVAL)
421 /* This is ok. This means that the end of our buffer
422 is in the middle of a character sequence. We just
423 move the valid part of the buffer to the beginning
424 to force a read. */
425 memmove (&lex->buffer[0], &lex->buffer[lex->first],
426 lex->last - lex->first);
427 lex->last -= lex->first;
428 lex->first = 0;
430 else
432 /* A more serious error. */
433 java_lex_error ("unrecognized character in input stream",
435 return UEOF;
441 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
443 /* Don't have any data. */
444 return UEOF;
447 /* Success. */
448 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
449 lex->out_first += 2;
450 return result;
452 else
453 #endif /* HAVE_ICONV */
455 int c, c1, c2;
456 c = getc (lex->finput);
458 if (c == EOF)
459 return UEOF;
460 if (c < 128)
461 return (unicode_t) c;
462 else
464 if ((c & 0xe0) == 0xc0)
466 c1 = getc (lex->finput);
467 if ((c1 & 0xc0) == 0x80)
469 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
470 /* Check for valid 2-byte characters. We explicitly
471 allow \0 because this encoding is common in the
472 Java world. */
473 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
474 return r;
477 else if ((c & 0xf0) == 0xe0)
479 c1 = getc (lex->finput);
480 if ((c1 & 0xc0) == 0x80)
482 c2 = getc (lex->finput);
483 if ((c2 & 0xc0) == 0x80)
485 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
486 (( c1 & 0x3f) << 6)
487 + (c2 & 0x3f));
488 /* Check for valid 3-byte characters.
489 Don't allow surrogate, \ufffe or \uffff. */
490 if (IN_RANGE (r, 0x800, 0xffff)
491 && ! IN_RANGE (r, 0xd800, 0xdfff)
492 && r != 0xfffe && r != 0xffff)
493 return r;
498 /* We simply don't support invalid characters. We also
499 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
500 cannot be valid Java characters. */
501 java_lex_error ("malformed UTF-8 character", 0);
505 /* We only get here on error. */
506 return UEOF;
509 static void
510 java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p)
512 if (l->size == l->max)
514 l->max += JAVA_LINE_MAX;
515 l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
516 l->unicode_escape_p = xrealloc (l->unicode_escape_p,
517 sizeof (char)*l->max);
519 l->line [l->size] = c;
520 l->unicode_escape_p [l->size++] = unicode_escape_p;
523 static int
524 java_read_unicode (java_lexer *lex, int *unicode_escape_p)
526 int c;
528 c = java_read_char (lex);
529 *unicode_escape_p = 0;
531 if (c != '\\')
533 lex->bs_count = 0;
534 return c;
537 ++lex->bs_count;
538 if ((lex->bs_count) % 2 == 1)
540 /* Odd number of \ seen. */
541 c = java_read_char (lex);
542 if (c == 'u')
544 unicode_t unicode = 0;
545 int shift = 12;
547 /* Recognize any number of `u's in \u. */
548 while ((c = java_read_char (lex)) == 'u')
551 shift = 12;
554 if (c == UEOF)
556 java_lex_error ("prematurely terminated \\u sequence", 0);
557 return UEOF;
560 if (hex_p (c))
561 unicode |= (unicode_t)(hex_value (c) << shift);
562 else
564 java_lex_error ("non-hex digit in \\u sequence", 0);
565 break;
568 c = java_read_char (lex);
569 shift -= 4;
571 while (shift >= 0);
573 if (c != UEOF)
574 lex->unget_value = c;
576 lex->bs_count = 0;
577 *unicode_escape_p = 1;
578 return unicode;
580 lex->unget_value = c;
582 return (unicode_t) '\\';
585 static int
586 java_read_unicode_collapsing_terminators (java_lexer *lex,
587 int *unicode_escape_p)
589 int c = java_read_unicode (lex, unicode_escape_p);
591 if (c == '\r')
593 /* We have to read ahead to see if we got \r\n. In that case we
594 return a single line terminator. */
595 int dummy;
596 c = java_read_unicode (lex, &dummy);
597 if (c != '\n' && c != UEOF)
598 lex->unget_value = c;
599 /* In either case we must return a newline. */
600 c = '\n';
603 return c;
606 static int
607 java_get_unicode (void)
609 /* It's time to read a line when... */
610 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
612 int c;
613 int found_chars = 0;
615 if (ctxp->lexer->hit_eof)
616 return UEOF;
618 java_allocate_new_line ();
619 if (ctxp->c_line->line[0] != '\n')
621 for (;;)
623 int unicode_escape_p;
624 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
625 &unicode_escape_p);
626 if (c != UEOF)
628 found_chars = 1;
629 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
630 if (ctxp->c_line->white_space_only
631 && !JAVA_WHITE_SPACE_P (c)
632 && c != '\n')
633 ctxp->c_line->white_space_only = 0;
635 if ((c == '\n') || (c == UEOF))
636 break;
639 if (c == UEOF && ! found_chars)
641 ctxp->lexer->hit_eof = 1;
642 return UEOF;
646 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
647 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
648 return ctxp->c_line->line [ctxp->c_line->current++];
651 /* Parse the end of a C style comment.
652 * C is the first character following the '/' and '*'. */
653 static void
654 java_parse_end_comment (int c)
656 for ( ;; c = java_get_unicode ())
658 switch (c)
660 case UEOF:
661 java_lex_error ("Comment not terminated at end of input", 0);
662 return;
663 case '*':
664 switch (c = java_get_unicode ())
666 case UEOF:
667 java_lex_error ("Comment not terminated at end of input", 0);
668 return;
669 case '/':
670 return;
671 case '*': /* Reparse only '*'. */
672 java_unget_unicode ();
678 /* Parse the documentation section. Keywords must be at the beginning
679 of a documentation comment line (ignoring white space and any `*'
680 character). Parsed keyword(s): @DEPRECATED. */
682 static void
683 java_parse_doc_section (int c)
685 int last_was_star;
687 /* We reset this here, because only the most recent doc comment
688 applies to the following declaration. */
689 ctxp->deprecated = 0;
691 /* We loop over all the lines of the comment. We'll eventually exit
692 if we hit EOF prematurely, or when we see the comment
693 terminator. */
694 while (1)
696 /* These first steps need only be done if we're still looking
697 for the deprecated tag. If we've already seen it, we might
698 as well skip looking for it again. */
699 if (! ctxp->deprecated)
701 /* Skip whitespace and '*'s. We must also check for the end
702 of the comment here. */
703 while (JAVA_WHITE_SPACE_P (c) || c == '*')
705 last_was_star = (c == '*');
706 c = java_get_unicode ();
707 if (last_was_star && c == '/')
709 /* We just saw the comment terminator. */
710 return;
714 if (c == UEOF)
715 goto eof;
717 if (c == '@')
719 const char *deprecated = "@deprecated";
720 int i;
722 for (i = 0; deprecated[i]; ++i)
724 if (c != deprecated[i])
725 break;
726 /* We write the code in this way, with the
727 update at the end, so that after the loop
728 we're left with the next character in C. */
729 c = java_get_unicode ();
732 if (c == UEOF)
733 goto eof;
735 /* @deprecated must be followed by a space or newline.
736 We also allow a '*' in case it appears just before
737 the end of a comment. In this position only we also
738 must allow any Unicode space character. */
739 if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
741 if (! deprecated[i])
742 ctxp->deprecated = 1;
747 /* We've examined the relevant content from this line. Now we
748 skip the remaining characters and start over with the next
749 line. We also check for end of comment here. */
750 while (c != '\n' && c != UEOF)
752 last_was_star = (c == '*');
753 c = java_get_unicode ();
754 if (last_was_star && c == '/')
755 return;
758 if (c == UEOF)
759 goto eof;
760 /* We have to advance past the \n. */
761 c = java_get_unicode ();
762 if (c == UEOF)
763 goto eof;
766 eof:
767 java_lex_error ("Comment not terminated at end of input", 0);
770 /* Return true if C is a valid start character for a Java identifier.
771 This is only called if C >= 128 -- smaller values are handled
772 inline. However, this function handles all values anyway. */
773 static int
774 java_start_char_p (unicode_t c)
776 unsigned int hi = c / 256;
777 const char *const page = type_table[hi];
778 unsigned long val = (unsigned long) page;
779 int flags;
781 if ((val & ~ LETTER_MASK) != 0)
782 flags = page[c & 255];
783 else
784 flags = val;
786 return flags & LETTER_START;
789 /* Return true if C is a valid part character for a Java identifier.
790 This is only called if C >= 128 -- smaller values are handled
791 inline. However, this function handles all values anyway. */
792 static int
793 java_part_char_p (unicode_t c)
795 unsigned int hi = c / 256;
796 const char *const page = type_table[hi];
797 unsigned long val = (unsigned long) page;
798 int flags;
800 if ((val & ~ LETTER_MASK) != 0)
801 flags = page[c & 255];
802 else
803 flags = val;
805 return flags & LETTER_PART;
808 /* Return true if C is whitespace. */
809 static int
810 java_space_char_p (unicode_t c)
812 unsigned int hi = c / 256;
813 const char *const page = type_table[hi];
814 unsigned long val = (unsigned long) page;
815 int flags;
817 if ((val & ~ LETTER_MASK) != 0)
818 flags = page[c & 255];
819 else
820 flags = val;
822 return flags & LETTER_SPACE;
825 static int
826 java_parse_escape_sequence (void)
828 unicode_t char_lit;
829 int c;
831 switch (c = java_get_unicode ())
833 case 'b':
834 return (unicode_t)0x8;
835 case 't':
836 return (unicode_t)0x9;
837 case 'n':
838 return (unicode_t)0xa;
839 case 'f':
840 return (unicode_t)0xc;
841 case 'r':
842 return (unicode_t)0xd;
843 case '"':
844 return (unicode_t)0x22;
845 case '\'':
846 return (unicode_t)0x27;
847 case '\\':
848 return (unicode_t)0x5c;
849 case '0': case '1': case '2': case '3': case '4':
850 case '5': case '6': case '7':
852 int octal_escape[3];
853 int octal_escape_index = 0;
854 int max = 3;
855 int i, shift;
857 for (; octal_escape_index < max && RANGE (c, '0', '7');
858 c = java_get_unicode ())
860 if (octal_escape_index == 0 && c > '3')
862 /* According to the grammar, `\477' has a well-defined
863 meaning -- it is `\47' followed by `7'. */
864 --max;
866 octal_escape [octal_escape_index++] = c;
869 java_unget_unicode ();
871 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
872 i < octal_escape_index; i++, shift -= 3)
873 char_lit |= (octal_escape [i] - '0') << shift;
875 return char_lit;
877 default:
878 java_lex_error ("Invalid character in escape sequence", 0);
879 return JAVA_CHAR_ERROR;
883 #ifndef JC1_LITE
884 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
886 /* Subroutine of java_lex: converts floating-point literals to tree
887 nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
888 store the result. FFLAG indicates whether the literal was tagged
889 with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
890 is the line number on which to report any error. */
892 static void java_perform_atof (YYSTYPE *, char *, int, int);
894 static void
895 java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
896 int number_beginning)
898 REAL_VALUE_TYPE value;
899 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
901 SET_REAL_VALUE_ATOF (value,
902 REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
904 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
906 JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
907 value = DCONST0;
909 else if (IS_ZERO (value))
911 /* We check to see if the value is really 0 or if we've found an
912 underflow. We do this in the most primitive imaginable way. */
913 int really_zero = 1;
914 char *p = literal_token;
915 if (*p == '-')
916 ++p;
917 while (*p && *p != 'e' && *p != 'E')
919 if (*p != '0' && *p != '.')
921 really_zero = 0;
922 break;
924 ++p;
926 if (! really_zero)
928 int i = ctxp->c_line->current;
929 ctxp->c_line->current = number_beginning;
930 java_lex_error ("Floating point literal underflow", 0);
931 ctxp->c_line->current = i;
935 SET_LVAL_NODE_TYPE (build_real (type, value), type);
937 #endif
939 static int yylex (YYSTYPE *);
941 static int
942 #ifdef JC1_LITE
943 yylex (YYSTYPE *java_lval)
944 #else
945 do_java_lex (YYSTYPE *java_lval)
946 #endif
948 int c;
949 unicode_t first_unicode;
950 int ascii_index, all_ascii;
951 char *string;
953 /* Translation of the Unicode escape in the raw stream of Unicode
954 characters. Takes care of line terminator. */
955 step1:
956 /* Skip white spaces: SP, TAB and FF or ULT. */
957 for (c = java_get_unicode ();
958 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
959 if (c == '\n')
961 ctxp->elc.line = ctxp->c_line->lineno;
962 ctxp->elc.col = ctxp->c_line->char_col-2;
965 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
967 if (c == 0x1a) /* CTRL-Z. */
969 if ((c = java_get_unicode ()) == UEOF)
970 return 0; /* Ok here. */
971 else
972 java_unget_unicode (); /* Caught later, at the end of the
973 function. */
975 /* Handle EOF here. */
976 if (c == UEOF) /* Should probably do something here... */
977 return 0;
979 /* Take care of eventual comments. */
980 if (c == '/')
982 switch (c = java_get_unicode ())
984 case '/':
985 for (;;)
987 c = java_get_unicode ();
988 if (c == UEOF)
990 /* It is ok to end a `//' comment with EOF, unless
991 we're being pedantic. */
992 if (pedantic)
993 java_lex_error ("Comment not terminated at end of input",
995 return 0;
997 if (c == '\n') /* ULT */
998 goto step1;
1000 break;
1002 case '*':
1003 if ((c = java_get_unicode ()) == '*')
1005 c = java_get_unicode ();
1006 if (c == '/')
1008 /* Empty documentation comment. We have to reset
1009 the deprecation marker as only the most recent
1010 doc comment applies. */
1011 ctxp->deprecated = 0;
1013 else
1014 java_parse_doc_section (c);
1016 else
1017 java_parse_end_comment ((c = java_get_unicode ()));
1018 goto step1;
1019 break;
1020 default:
1021 java_unget_unicode ();
1022 c = '/';
1023 break;
1027 ctxp->elc.line = ctxp->c_line->lineno;
1028 ctxp->elc.prev_col = ctxp->elc.col;
1029 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
1030 if (ctxp->elc.col < 0)
1031 abort ();
1033 /* Numeric literals. */
1034 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
1036 /* This section of code is borrowed from gcc/c-lex.c. */
1037 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
1038 int parts[TOTAL_PARTS];
1039 HOST_WIDE_INT high, low;
1040 /* End borrowed section. */
1041 char literal_token [256];
1042 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
1043 int found_hex_digits = 0, found_non_octal_digits = 0;
1044 int i;
1045 #ifndef JC1_LITE
1046 int number_beginning = ctxp->c_line->current;
1047 tree value;
1048 #endif
1050 /* We might have a . separator instead of a FP like .[0-9]*. */
1051 if (c == '.')
1053 unicode_t peep = java_sneak_unicode ();
1055 if (!JAVA_ASCII_DIGIT (peep))
1057 JAVA_LEX_SEP('.');
1058 BUILD_OPERATOR (DOT_TK);
1062 for (i = 0; i < TOTAL_PARTS; i++)
1063 parts [i] = 0;
1065 if (c == '0')
1067 c = java_get_unicode ();
1068 if (c == 'x' || c == 'X')
1070 radix = 16;
1071 c = java_get_unicode ();
1073 else if (JAVA_ASCII_DIGIT (c))
1074 radix = 8;
1075 else if (c == '.' || c == 'e' || c =='E')
1077 /* Push the '.', 'e', or 'E' back and prepare for a FP
1078 parsing... */
1079 java_unget_unicode ();
1080 c = '0';
1082 else
1084 /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
1085 JAVA_LEX_LIT ("0", 10);
1086 switch (c)
1088 case 'L': case 'l':
1089 SET_LVAL_NODE (long_zero_node);
1090 return (INT_LIT_TK);
1091 case 'f': case 'F':
1092 SET_LVAL_NODE (float_zero_node);
1093 return (FP_LIT_TK);
1094 case 'd': case 'D':
1095 SET_LVAL_NODE (double_zero_node);
1096 return (FP_LIT_TK);
1097 default:
1098 java_unget_unicode ();
1099 SET_LVAL_NODE (integer_zero_node);
1100 return (INT_LIT_TK);
1104 /* Parse the first part of the literal, until we find something
1105 which is not a number. */
1106 while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1107 JAVA_ASCII_DIGIT (c))
1109 /* We store in a string (in case it turns out to be a FP) and in
1110 PARTS if we have to process a integer literal. */
1111 int numeric = hex_value (c);
1112 int count;
1114 /* Remember when we find a valid hexadecimal digit. */
1115 if (radix == 16)
1116 found_hex_digits = 1;
1117 /* Remember when we find an invalid octal digit. */
1118 else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1119 found_non_octal_digits = 1;
1121 literal_token [literal_index++] = c;
1122 /* This section of code if borrowed from gcc/c-lex.c. */
1123 for (count = 0; count < TOTAL_PARTS; count++)
1125 parts[count] *= radix;
1126 if (count)
1128 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1129 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1131 else
1132 parts[0] += numeric;
1134 if (parts [TOTAL_PARTS-1] != 0)
1135 overflow = 1;
1136 /* End borrowed section. */
1137 c = java_get_unicode ();
1140 /* If we have something from the FP char set but not a digit, parse
1141 a FP literal. */
1142 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1144 int stage = 0;
1145 int seen_digit = (literal_index ? 1 : 0);
1146 int seen_exponent = 0;
1147 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1148 double unless specified. */
1150 /* It is ok if the radix is 8 because this just means we've
1151 seen a leading `0'. However, radix==16 is invalid. */
1152 if (radix == 16)
1153 java_lex_error ("Can't express non-decimal FP literal", 0);
1154 radix = 10;
1156 for (;;)
1158 if (c == '.')
1160 if (stage < 1)
1162 stage = 1;
1163 literal_token [literal_index++ ] = c;
1164 c = java_get_unicode ();
1166 else
1167 java_lex_error ("Invalid character in FP literal", 0);
1170 if (c == 'e' || c == 'E')
1172 if (stage < 2)
1174 /* {E,e} must have seen at least a digit. */
1175 if (!seen_digit)
1176 java_lex_error
1177 ("Invalid FP literal, mantissa must have digit", 0);
1178 seen_digit = 0;
1179 seen_exponent = 1;
1180 stage = 2;
1181 literal_token [literal_index++] = c;
1182 c = java_get_unicode ();
1184 else
1185 java_lex_error ("Invalid character in FP literal", 0);
1187 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1189 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1190 stage = 4; /* So we fall through. */
1193 if ((c=='-' || c =='+') && stage == 2)
1195 stage = 3;
1196 literal_token [literal_index++] = c;
1197 c = java_get_unicode ();
1200 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1201 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1202 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1203 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1205 if (JAVA_ASCII_DIGIT (c))
1206 seen_digit = 1;
1207 if (stage == 2)
1208 stage = 3;
1209 literal_token [literal_index++ ] = c;
1210 c = java_get_unicode ();
1212 else
1214 if (stage != 4) /* Don't push back fF/dD. */
1215 java_unget_unicode ();
1217 /* An exponent (if any) must have seen a digit. */
1218 if (seen_exponent && !seen_digit)
1219 java_lex_error
1220 ("Invalid FP literal, exponent must have digit", 0);
1222 literal_token [literal_index] = '\0';
1223 JAVA_LEX_LIT (literal_token, radix);
1225 #ifndef JC1_LITE
1226 java_perform_atof (java_lval, literal_token,
1227 fflag, number_beginning);
1228 #endif
1229 return FP_LIT_TK;
1232 } /* JAVA_ASCII_FPCHAR (c) */
1234 /* Here we get back to converting the integral literal. */
1235 if (radix == 16 && ! found_hex_digits)
1236 java_lex_error
1237 ("0x must be followed by at least one hexadecimal digit", 0);
1238 else if (radix == 8 && found_non_octal_digits)
1239 java_lex_error ("Octal literal contains digit out of range", 0);
1240 else if (c == 'L' || c == 'l')
1241 long_suffix = 1;
1242 else
1243 java_unget_unicode ();
1245 #ifdef JAVA_LEX_DEBUG
1246 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1247 JAVA_LEX_LIT (literal_token, radix);
1248 #endif
1249 /* This section of code is borrowed from gcc/c-lex.c. */
1250 if (!overflow)
1252 bytes = GET_TYPE_PRECISION (long_type_node);
1253 for (i = bytes; i < TOTAL_PARTS; i++)
1254 if (parts [i])
1256 overflow = 1;
1257 break;
1260 high = low = 0;
1261 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1263 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1264 / HOST_BITS_PER_CHAR)]
1265 << (i * HOST_BITS_PER_CHAR));
1266 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1268 /* End borrowed section. */
1270 #ifndef JC1_LITE
1271 /* Range checking. */
1272 value = build_int_2 (low, high);
1273 /* Temporarily set type to unsigned. */
1274 SET_LVAL_NODE_TYPE (value, (long_suffix
1275 ? unsigned_long_type_node
1276 : unsigned_int_type_node));
1278 /* For base 10 numbers, only values up to the highest value
1279 (plus one) can be written. For instance, only ints up to
1280 2147483648 can be written. The special case of the largest
1281 negative value is handled elsewhere. For other bases, any
1282 number can be represented. */
1283 if (overflow || (radix == 10
1284 && tree_int_cst_lt (long_suffix
1285 ? decimal_long_max
1286 : decimal_int_max,
1287 value)))
1289 if (long_suffix)
1290 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1291 else
1292 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1295 /* Sign extend the value. */
1296 SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1297 force_fit_type (value, 0);
1298 JAVA_RADIX10_FLAG (value) = radix == 10;
1299 #else
1300 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1301 long_suffix ? long_type_node : int_type_node);
1302 #endif
1303 return INT_LIT_TK;
1306 /* Character literals. */
1307 if (c == '\'')
1309 int char_lit;
1310 if ((c = java_get_unicode ()) == '\\')
1311 char_lit = java_parse_escape_sequence ();
1312 else
1314 if (c == '\n' || c == '\'')
1315 java_lex_error ("Invalid character literal", 0);
1316 char_lit = c;
1319 c = java_get_unicode ();
1321 if ((c == '\n') || (c == UEOF))
1322 java_lex_error ("Character literal not terminated at end of line", 0);
1323 if (c != '\'')
1324 java_lex_error ("Syntax error in character literal", 0);
1326 if (char_lit == JAVA_CHAR_ERROR)
1327 char_lit = 0; /* We silently convert it to zero. */
1329 JAVA_LEX_CHAR_LIT (char_lit);
1330 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1331 return CHAR_LIT_TK;
1334 /* String literals. */
1335 if (c == '"')
1337 int no_error;
1338 char *string;
1340 for (no_error = 1, c = java_get_unicode ();
1341 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1343 if (c == '\\')
1344 c = java_parse_escape_sequence ();
1345 if (c == JAVA_CHAR_ERROR)
1347 no_error = 0;
1348 c = 0; /* We silently convert it to zero. */
1350 java_unicode_2_utf8 (c);
1352 if (c == '\n' || c == UEOF) /* ULT. */
1354 lineno--; /* Refer to the line where the terminator was seen. */
1355 java_lex_error ("String not terminated at end of line", 0);
1356 lineno++;
1359 obstack_1grow (&temporary_obstack, '\0');
1360 string = obstack_finish (&temporary_obstack);
1361 #ifndef JC1_LITE
1362 if (!no_error || (c != '"'))
1363 java_lval->node = error_mark_node; /* FIXME: Requires futher
1364 testing. */
1365 else
1366 java_lval->node = build_string (strlen (string), string);
1367 #endif
1368 obstack_free (&temporary_obstack, string);
1369 return STRING_LIT_TK;
1372 /* Separator. */
1373 switch (c)
1375 case '(':
1376 JAVA_LEX_SEP (c);
1377 BUILD_OPERATOR (OP_TK);
1378 case ')':
1379 JAVA_LEX_SEP (c);
1380 return CP_TK;
1381 case '{':
1382 JAVA_LEX_SEP (c);
1383 if (ctxp->ccb_indent == 1)
1384 ctxp->first_ccb_indent1 = lineno;
1385 ctxp->ccb_indent++;
1386 BUILD_OPERATOR (OCB_TK);
1387 case '}':
1388 JAVA_LEX_SEP (c);
1389 ctxp->ccb_indent--;
1390 if (ctxp->ccb_indent == 1)
1391 ctxp->last_ccb_indent1 = lineno;
1392 BUILD_OPERATOR (CCB_TK);
1393 case '[':
1394 JAVA_LEX_SEP (c);
1395 BUILD_OPERATOR (OSB_TK);
1396 case ']':
1397 JAVA_LEX_SEP (c);
1398 return CSB_TK;
1399 case ';':
1400 JAVA_LEX_SEP (c);
1401 return SC_TK;
1402 case ',':
1403 JAVA_LEX_SEP (c);
1404 return C_TK;
1405 case '.':
1406 JAVA_LEX_SEP (c);
1407 BUILD_OPERATOR (DOT_TK);
1408 /* return DOT_TK; */
1411 /* Operators. */
1412 switch (c)
1414 case '=':
1415 if ((c = java_get_unicode ()) == '=')
1417 BUILD_OPERATOR (EQ_TK);
1419 else
1421 /* Equals is used in two different locations. In the
1422 variable_declarator: rule, it has to be seen as '=' as opposed
1423 to being seen as an ordinary assignment operator in
1424 assignment_operators: rule. */
1425 java_unget_unicode ();
1426 BUILD_OPERATOR (ASSIGN_TK);
1429 case '>':
1430 switch ((c = java_get_unicode ()))
1432 case '=':
1433 BUILD_OPERATOR (GTE_TK);
1434 case '>':
1435 switch ((c = java_get_unicode ()))
1437 case '>':
1438 if ((c = java_get_unicode ()) == '=')
1440 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1442 else
1444 java_unget_unicode ();
1445 BUILD_OPERATOR (ZRS_TK);
1447 case '=':
1448 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1449 default:
1450 java_unget_unicode ();
1451 BUILD_OPERATOR (SRS_TK);
1453 default:
1454 java_unget_unicode ();
1455 BUILD_OPERATOR (GT_TK);
1458 case '<':
1459 switch ((c = java_get_unicode ()))
1461 case '=':
1462 BUILD_OPERATOR (LTE_TK);
1463 case '<':
1464 if ((c = java_get_unicode ()) == '=')
1466 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1468 else
1470 java_unget_unicode ();
1471 BUILD_OPERATOR (LS_TK);
1473 default:
1474 java_unget_unicode ();
1475 BUILD_OPERATOR (LT_TK);
1478 case '&':
1479 switch ((c = java_get_unicode ()))
1481 case '&':
1482 BUILD_OPERATOR (BOOL_AND_TK);
1483 case '=':
1484 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1485 default:
1486 java_unget_unicode ();
1487 BUILD_OPERATOR (AND_TK);
1490 case '|':
1491 switch ((c = java_get_unicode ()))
1493 case '|':
1494 BUILD_OPERATOR (BOOL_OR_TK);
1495 case '=':
1496 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1497 default:
1498 java_unget_unicode ();
1499 BUILD_OPERATOR (OR_TK);
1502 case '+':
1503 switch ((c = java_get_unicode ()))
1505 case '+':
1506 BUILD_OPERATOR (INCR_TK);
1507 case '=':
1508 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1509 default:
1510 java_unget_unicode ();
1511 BUILD_OPERATOR (PLUS_TK);
1514 case '-':
1515 switch ((c = java_get_unicode ()))
1517 case '-':
1518 BUILD_OPERATOR (DECR_TK);
1519 case '=':
1520 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1521 default:
1522 java_unget_unicode ();
1523 BUILD_OPERATOR (MINUS_TK);
1526 case '*':
1527 if ((c = java_get_unicode ()) == '=')
1529 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1531 else
1533 java_unget_unicode ();
1534 BUILD_OPERATOR (MULT_TK);
1537 case '/':
1538 if ((c = java_get_unicode ()) == '=')
1540 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1542 else
1544 java_unget_unicode ();
1545 BUILD_OPERATOR (DIV_TK);
1548 case '^':
1549 if ((c = java_get_unicode ()) == '=')
1551 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1553 else
1555 java_unget_unicode ();
1556 BUILD_OPERATOR (XOR_TK);
1559 case '%':
1560 if ((c = java_get_unicode ()) == '=')
1562 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1564 else
1566 java_unget_unicode ();
1567 BUILD_OPERATOR (REM_TK);
1570 case '!':
1571 if ((c = java_get_unicode()) == '=')
1573 BUILD_OPERATOR (NEQ_TK);
1575 else
1577 java_unget_unicode ();
1578 BUILD_OPERATOR (NEG_TK);
1581 case '?':
1582 JAVA_LEX_OP ("?");
1583 BUILD_OPERATOR (REL_QM_TK);
1584 case ':':
1585 JAVA_LEX_OP (":");
1586 BUILD_OPERATOR (REL_CL_TK);
1587 case '~':
1588 BUILD_OPERATOR (NOT_TK);
1591 /* Keyword, boolean literal or null literal. */
1592 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1593 c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1595 java_unicode_2_utf8 (c);
1596 if (all_ascii && c >= 128)
1597 all_ascii = 0;
1598 ascii_index++;
1601 obstack_1grow (&temporary_obstack, '\0');
1602 string = obstack_finish (&temporary_obstack);
1603 if (c != UEOF)
1604 java_unget_unicode ();
1606 /* If we have something all ascii, we consider a keyword, a boolean
1607 literal, a null literal or an all ASCII identifier. Otherwise,
1608 this is an identifier (possibly not respecting formation rule). */
1609 if (all_ascii)
1611 const struct java_keyword *kw;
1612 if ((kw=java_keyword (string, ascii_index)))
1614 JAVA_LEX_KW (string);
1615 switch (kw->token)
1617 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1618 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1619 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1620 case PRIVATE_TK: case STRICT_TK:
1621 SET_MODIFIER_CTX (kw->token);
1622 return MODIFIER_TK;
1623 case FLOAT_TK:
1624 SET_LVAL_NODE (float_type_node);
1625 return FP_TK;
1626 case DOUBLE_TK:
1627 SET_LVAL_NODE (double_type_node);
1628 return FP_TK;
1629 case BOOLEAN_TK:
1630 SET_LVAL_NODE (boolean_type_node);
1631 return BOOLEAN_TK;
1632 case BYTE_TK:
1633 SET_LVAL_NODE (byte_type_node);
1634 return INTEGRAL_TK;
1635 case SHORT_TK:
1636 SET_LVAL_NODE (short_type_node);
1637 return INTEGRAL_TK;
1638 case INT_TK:
1639 SET_LVAL_NODE (int_type_node);
1640 return INTEGRAL_TK;
1641 case LONG_TK:
1642 SET_LVAL_NODE (long_type_node);
1643 return INTEGRAL_TK;
1644 case CHAR_TK:
1645 SET_LVAL_NODE (char_type_node);
1646 return INTEGRAL_TK;
1648 /* Keyword based literals. */
1649 case TRUE_TK:
1650 case FALSE_TK:
1651 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1652 boolean_true_node : boolean_false_node));
1653 return BOOL_LIT_TK;
1654 case NULL_TK:
1655 SET_LVAL_NODE (null_pointer_node);
1656 return NULL_TK;
1658 case ASSERT_TK:
1659 if (flag_assert)
1661 BUILD_OPERATOR (kw->token);
1662 return kw->token;
1664 else
1665 break;
1667 /* Some keyword we want to retain information on the location
1668 they where found. */
1669 case CASE_TK:
1670 case DEFAULT_TK:
1671 case SUPER_TK:
1672 case THIS_TK:
1673 case RETURN_TK:
1674 case BREAK_TK:
1675 case CONTINUE_TK:
1676 case TRY_TK:
1677 case CATCH_TK:
1678 case THROW_TK:
1679 case INSTANCEOF_TK:
1680 BUILD_OPERATOR (kw->token);
1682 default:
1683 return kw->token;
1688 /* We may have an ID here. */
1689 if (JAVA_START_CHAR_P (first_unicode))
1691 JAVA_LEX_ID (string);
1692 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1693 return ID_TK;
1696 /* Everything else is an invalid character in the input. */
1698 char lex_error_buffer [128];
1699 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1700 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1701 java_lex_error (lex_error_buffer, 1);
1703 return 0;
1706 #ifndef JC1_LITE
1708 /* The exported interface to the lexer. */
1709 static int
1710 java_lex (YYSTYPE *java_lval)
1712 int r;
1714 timevar_push (TV_LEX);
1715 r = do_java_lex (java_lval);
1716 timevar_pop (TV_LEX);
1717 return r;
1720 /* This is called by the parser to see if an error should be generated
1721 due to numeric overflow. This function only handles the particular
1722 case of the largest negative value, and is only called in the case
1723 where this value is not preceded by `-'. */
1724 static void
1725 error_if_numeric_overflow (tree value)
1727 if (TREE_CODE (value) == INTEGER_CST
1728 && JAVA_RADIX10_FLAG (value)
1729 && tree_int_cst_sgn (value) < 0)
1731 if (TREE_TYPE (value) == long_type_node)
1732 java_lex_error ("Numeric overflow for `long' literal", 0);
1733 else
1734 java_lex_error ("Numeric overflow for `int' literal", 0);
1738 #endif /* JC1_LITE */
1740 static void
1741 java_unicode_2_utf8 (unicode_t unicode)
1743 if (RANGE (unicode, 0x01, 0x7f))
1744 obstack_1grow (&temporary_obstack, (char)unicode);
1745 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1747 obstack_1grow (&temporary_obstack,
1748 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1749 obstack_1grow (&temporary_obstack,
1750 (unsigned char)(0x80 | (unicode & 0x3f)));
1752 else /* Range 0x800-0xffff. */
1754 obstack_1grow (&temporary_obstack,
1755 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1756 obstack_1grow (&temporary_obstack,
1757 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1758 obstack_1grow (&temporary_obstack,
1759 (unsigned char)(0x80 | (unicode & 0x003f)));
1763 #ifndef JC1_LITE
1764 static tree
1765 build_wfl_node (tree node)
1767 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1768 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1769 TREE_TYPE (node) = NULL_TREE;
1770 return node;
1772 #endif
1774 static void
1775 java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
1777 #ifndef JC1_LITE
1778 ctxp->elc.line = ctxp->c_line->lineno;
1779 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1781 /* Might be caught in the middle of some error report. */
1782 ctxp->java_error_flag = 0;
1783 java_error (NULL);
1784 java_error (msg);
1785 #endif
1788 #ifndef JC1_LITE
1789 static int
1790 java_is_eol (FILE *fp, int c)
1792 int next;
1793 switch (c)
1795 case '\r':
1796 next = getc (fp);
1797 if (next != '\n' && next != EOF)
1798 ungetc (next, fp);
1799 return 1;
1800 case '\n':
1801 return 1;
1802 default:
1803 return 0;
1806 #endif
1808 char *
1809 java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
1810 int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
1812 #ifdef JC1_LITE
1813 return 0;
1814 #else
1815 /* Dumb implementation. Doesn't try to cache or optimize things. */
1816 /* First line of the file is line 1, first column is 1. */
1818 /* COL == -1 means, at the CR/LF in LINE. */
1819 /* COL == -2 means, at the first non space char in LINE. */
1821 FILE *fp;
1822 int c, ccol, cline = 1;
1823 int current_line_col = 0;
1824 int first_non_space = 0;
1825 char *base;
1827 if (!(fp = fopen (filename, "r")))
1828 fatal_io_error ("can't open %s", filename);
1830 while (cline != line)
1832 c = getc (fp);
1833 if (c == EOF)
1835 static const char msg[] = "<<file too short - unexpected EOF>>";
1836 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1837 goto have_line;
1839 if (java_is_eol (fp, c))
1840 cline++;
1843 /* Gather the chars of the current line in a buffer. */
1844 for (;;)
1846 c = getc (fp);
1847 if (c < 0 || java_is_eol (fp, c))
1848 break;
1849 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1850 first_non_space = current_line_col;
1851 obstack_1grow (&temporary_obstack, c);
1852 current_line_col++;
1854 have_line:
1856 obstack_1grow (&temporary_obstack, '\n');
1858 if (col == -1)
1860 col = current_line_col;
1861 first_non_space = 0;
1863 else if (col == -2)
1864 col = first_non_space;
1865 else
1866 first_non_space = 0;
1868 /* Place the '^' a the right position. */
1869 base = obstack_base (&temporary_obstack);
1870 for (ccol = 1; ccol <= col+3; ccol++)
1872 /* Compute \t when reaching first_non_space. */
1873 char c = (first_non_space ?
1874 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1875 obstack_1grow (&temporary_obstack, c);
1877 obstack_grow0 (&temporary_obstack, "^", 1);
1879 fclose (fp);
1880 return obstack_finish (&temporary_obstack);
1881 #endif
1884 #ifndef JC1_LITE
1885 static int
1886 utf8_cmp (const unsigned char *str, int length, const char *name)
1888 const unsigned char *limit = str + length;
1889 int i;
1891 for (i = 0; name[i]; ++i)
1893 int ch = UTF8_GET (str, limit);
1894 if (ch != name[i])
1895 return ch - name[i];
1898 return str == limit ? 0 : 1;
1901 /* A sorted list of all C++ keywords. */
1903 static const char *const cxx_keywords[] =
1905 "_Complex",
1906 "__alignof",
1907 "__alignof__",
1908 "__asm",
1909 "__asm__",
1910 "__attribute",
1911 "__attribute__",
1912 "__builtin_va_arg",
1913 "__complex",
1914 "__complex__",
1915 "__const",
1916 "__const__",
1917 "__extension__",
1918 "__imag",
1919 "__imag__",
1920 "__inline",
1921 "__inline__",
1922 "__label__",
1923 "__null",
1924 "__real",
1925 "__real__",
1926 "__restrict",
1927 "__restrict__",
1928 "__signed",
1929 "__signed__",
1930 "__typeof",
1931 "__typeof__",
1932 "__volatile",
1933 "__volatile__",
1934 "and",
1935 "and_eq",
1936 "asm",
1937 "auto",
1938 "bitand",
1939 "bitor",
1940 "bool",
1941 "break",
1942 "case",
1943 "catch",
1944 "char",
1945 "class",
1946 "compl",
1947 "const",
1948 "const_cast",
1949 "continue",
1950 "default",
1951 "delete",
1952 "do",
1953 "double",
1954 "dynamic_cast",
1955 "else",
1956 "enum",
1957 "explicit",
1958 "export",
1959 "extern",
1960 "false",
1961 "float",
1962 "for",
1963 "friend",
1964 "goto",
1965 "if",
1966 "inline",
1967 "int",
1968 "long",
1969 "mutable",
1970 "namespace",
1971 "new",
1972 "not",
1973 "not_eq",
1974 "operator",
1975 "or",
1976 "or_eq",
1977 "private",
1978 "protected",
1979 "public",
1980 "register",
1981 "reinterpret_cast",
1982 "return",
1983 "short",
1984 "signed",
1985 "sizeof",
1986 "static",
1987 "static_cast",
1988 "struct",
1989 "switch",
1990 "template",
1991 "this",
1992 "throw",
1993 "true",
1994 "try",
1995 "typedef",
1996 "typeid",
1997 "typename",
1998 "typeof",
1999 "union",
2000 "unsigned",
2001 "using",
2002 "virtual",
2003 "void",
2004 "volatile",
2005 "wchar_t",
2006 "while",
2007 "xor",
2008 "xor_eq"
2011 /* Return true if NAME is a C++ keyword. */
2014 cxx_keyword_p (const char *name, int length)
2016 int last = ARRAY_SIZE (cxx_keywords);
2017 int first = 0;
2018 int mid = (last + first) / 2;
2019 int old = -1;
2021 for (mid = (last + first) / 2;
2022 mid != old;
2023 old = mid, mid = (last + first) / 2)
2025 int kwl = strlen (cxx_keywords[mid]);
2026 int min_length = kwl > length ? length : kwl;
2027 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
2029 if (r == 0)
2031 int i;
2032 /* We've found a match if all the remaining characters are `$'. */
2033 for (i = min_length; i < length && name[i] == '$'; ++i)
2035 if (i == length)
2036 return 1;
2037 r = 1;
2040 if (r < 0)
2041 last = mid;
2042 else
2043 first = mid;
2045 return 0;
2047 #endif /* JC1_LITE */