* config/arm/symbian.h (STARTFILE_SPEC): Remove crt*.o.
[official-gcc.git] / gcc / java / lex.c
blobaa3efcc06704512495bcc1eb143bdb32744ae619
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA.
23 Java and all Java-based marks are trademarks or registered trademarks
24 of Sun Microsystems, Inc. in the United States and other countries.
25 The Free Software Foundation is independent of Sun Microsystems, Inc. */
27 /* It defines java_lex (yylex) that reads a Java ASCII source file
28 possibly containing Unicode escape sequence or utf8 encoded
29 characters and returns a token for everything found but comments,
30 white spaces and line terminators. When necessary, it also fills
31 the java_lval (yylval) union. It's implemented to be called by a
32 re-entrant parser generated by Bison.
34 The lexical analysis conforms to the Java grammar described in "The
35 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
36 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
38 #include "keyword.h"
39 #include "flags.h"
40 #include "chartables.h"
41 #ifndef JC1_LITE
42 #include "timevar.h"
43 #endif
45 /* Function declarations. */
46 static char *java_sprint_unicode (struct java_line *, int);
47 static void java_unicode_2_utf8 (unicode_t);
48 static void java_lex_error (const char *, int);
49 #ifndef JC1_LITE
50 static int do_java_lex (YYSTYPE *);
51 static int java_lex (YYSTYPE *);
52 static int java_is_eol (FILE *, int);
53 static tree build_wfl_node (tree);
54 #endif
55 static void java_store_unicode (struct java_line *, unicode_t, int);
56 static int java_parse_escape_sequence (void);
57 static int java_start_char_p (unicode_t);
58 static int java_part_char_p (unicode_t);
59 static int java_space_char_p (unicode_t);
60 static void java_parse_doc_section (int);
61 static void java_parse_end_comment (int);
62 static int java_get_unicode (void);
63 static int java_read_unicode (java_lexer *, int *);
64 static int java_read_unicode_collapsing_terminators (java_lexer *, int *);
65 static void java_store_unicode (struct java_line *, unicode_t, int);
66 static int java_read_char (java_lexer *);
67 static void java_allocate_new_line (void);
68 static void java_unget_unicode (void);
69 static unicode_t java_sneak_unicode (void);
70 #ifndef JC1_LITE
71 static int utf8_cmp (const unsigned char *, int, const char *);
72 #endif
74 java_lexer *java_new_lexer (FILE *, const char *);
75 #ifndef JC1_LITE
76 static void error_if_numeric_overflow (tree);
77 #endif
79 #ifdef HAVE_ICONV
80 /* This is nonzero if we have initialized `need_byteswap'. */
81 static int byteswap_init = 0;
83 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
84 big-endian order -- not native endian order. We handle this by
85 doing a conversion once at startup and seeing what happens. This
86 flag holds the results of this determination. */
87 static int need_byteswap = 0;
88 #endif
90 void
91 java_init_lex (FILE *finput, const char *encoding)
93 #ifndef JC1_LITE
94 int java_lang_imported = 0;
96 if (!java_lang_id)
97 java_lang_id = get_identifier ("java.lang");
98 if (!inst_id)
99 inst_id = get_identifier ("inst$");
100 if (!wpv_id)
101 wpv_id = get_identifier ("write_parm_value$");
103 if (!java_lang_imported)
105 tree node = build_tree_list
106 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
107 read_import_dir (TREE_PURPOSE (node));
108 TREE_CHAIN (node) = ctxp->import_demand_list;
109 ctxp->import_demand_list = node;
110 java_lang_imported = 1;
113 if (!wfl_operator)
114 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
115 if (!label_id)
116 label_id = get_identifier ("$L");
117 if (!wfl_append)
118 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
119 if (!wfl_string_buffer)
120 wfl_string_buffer =
121 build_expr_wfl (get_identifier (flag_emit_class_files
122 ? "java.lang.StringBuffer"
123 : "gnu.gcj.runtime.StringBuffer"),
124 NULL, 0, 0);
125 if (!wfl_to_string)
126 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
128 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
129 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
131 memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
132 current_jcf = ggc_alloc_cleared (sizeof (JCF));
133 ctxp->current_parsed_class = NULL;
134 ctxp->package = NULL_TREE;
135 #endif
137 ctxp->filename = input_filename;
138 ctxp->lineno = input_line = 0;
139 ctxp->p_line = NULL;
140 ctxp->c_line = NULL;
141 ctxp->java_error_flag = 0;
142 ctxp->lexer = java_new_lexer (finput, encoding);
145 static char *
146 java_sprint_unicode (struct java_line *line, int i)
148 static char buffer [10];
149 if (line->unicode_escape_p [i] || line->line [i] > 128)
150 sprintf (buffer, "\\u%04x", line->line [i]);
151 else
153 buffer [0] = line->line [i];
154 buffer [1] = '\0';
156 return buffer;
159 static unicode_t
160 java_sneak_unicode (void)
162 return (ctxp->c_line->line [ctxp->c_line->current]);
165 static void
166 java_unget_unicode (void)
168 if (!ctxp->c_line->current)
169 /* Can't unget unicode. */
170 abort ();
172 ctxp->c_line->current--;
173 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
176 static void
177 java_allocate_new_line (void)
179 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
180 char ahead_escape_p = (ctxp->c_line ?
181 ctxp->c_line->unicode_escape_ahead_p : 0);
183 if (ctxp->c_line && !ctxp->c_line->white_space_only)
185 if (ctxp->p_line)
187 free (ctxp->p_line->unicode_escape_p);
188 free (ctxp->p_line->line);
189 free (ctxp->p_line);
191 ctxp->p_line = ctxp->c_line;
192 ctxp->c_line = NULL; /* Reallocated. */
195 if (!ctxp->c_line)
197 ctxp->c_line = xmalloc (sizeof (struct java_line));
198 ctxp->c_line->max = JAVA_LINE_MAX;
199 ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
200 ctxp->c_line->unicode_escape_p =
201 xmalloc (sizeof (char)*ctxp->c_line->max);
202 ctxp->c_line->white_space_only = 0;
205 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
206 ctxp->c_line->char_col = ctxp->c_line->current = 0;
207 if (ahead)
209 ctxp->c_line->line [ctxp->c_line->size] = ahead;
210 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
211 ctxp->c_line->size++;
213 ctxp->c_line->ahead [0] = 0;
214 ctxp->c_line->unicode_escape_ahead_p = 0;
215 ctxp->c_line->lineno = ++input_line;
216 ctxp->c_line->white_space_only = 1;
219 /* Create a new lexer object. */
221 java_lexer *
222 java_new_lexer (FILE *finput, const char *encoding)
224 java_lexer *lex = xmalloc (sizeof (java_lexer));
225 int enc_error = 0;
227 lex->finput = finput;
228 lex->bs_count = 0;
229 lex->unget_value = 0;
230 lex->hit_eof = 0;
231 lex->encoding = encoding;
233 #ifdef HAVE_ICONV
234 lex->handle = iconv_open ("UCS-2", encoding);
235 if (lex->handle != (iconv_t) -1)
237 lex->first = -1;
238 lex->last = -1;
239 lex->out_first = -1;
240 lex->out_last = -1;
241 lex->read_anything = 0;
242 lex->use_fallback = 0;
244 /* Work around broken iconv() implementations by doing checking at
245 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
246 then all UCS-2 encoders will be broken. Perhaps not a valid
247 assumption. */
248 if (! byteswap_init)
250 iconv_t handle;
252 byteswap_init = 1;
254 handle = iconv_open ("UCS-2", "UTF-8");
255 if (handle != (iconv_t) -1)
257 unicode_t result;
258 unsigned char in[3];
259 char *inp, *outp;
260 size_t inc, outc, r;
262 /* This is the UTF-8 encoding of \ufeff. */
263 in[0] = 0xef;
264 in[1] = 0xbb;
265 in[2] = 0xbf;
267 inp = (char *) in;
268 inc = 3;
269 outp = (char *) &result;
270 outc = 2;
272 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
273 &outp, &outc);
274 iconv_close (handle);
275 /* Conversion must be complete for us to use the result. */
276 if (r != (size_t) -1 && inc == 0 && outc == 0)
277 need_byteswap = (result != 0xfeff);
281 lex->byte_swap = need_byteswap;
283 else
284 #endif /* HAVE_ICONV */
286 /* If iconv failed, use the internal decoder if the default
287 encoding was requested. This code is used on platforms where
288 iconv exists but is insufficient for our needs. For
289 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
291 On Solaris the default encoding, as returned by nl_langinfo(),
292 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
293 understand that. We work around that by pretending
294 `646' to be the same as UTF-8. */
295 if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
296 enc_error = 1;
297 #ifdef HAVE_ICONV
298 else
300 lex->use_fallback = 1;
301 lex->encoding = "UTF-8";
303 #endif /* HAVE_ICONV */
306 if (enc_error)
307 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
309 return lex;
312 void
313 java_destroy_lexer (java_lexer *lex)
315 #ifdef HAVE_ICONV
316 if (! lex->use_fallback)
317 iconv_close (lex->handle);
318 #endif
319 free (lex);
322 static int
323 java_read_char (java_lexer *lex)
325 if (lex->unget_value)
327 unicode_t r = lex->unget_value;
328 lex->unget_value = 0;
329 return r;
332 #ifdef HAVE_ICONV
333 if (! lex->use_fallback)
335 size_t ir, inbytesleft, in_save, out_count, out_save;
336 char *inp, *outp;
337 unicode_t result;
339 /* If there is data which has already been converted, use it. */
340 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
342 lex->out_first = 0;
343 lex->out_last = 0;
345 while (1)
347 /* See if we need to read more data. If FIRST == 0 then
348 the previous conversion attempt ended in the middle of
349 a character at the end of the buffer. Otherwise we
350 only have to read if the buffer is empty. */
351 if (lex->first == 0 || lex->first >= lex->last)
353 int r;
355 if (lex->first >= lex->last)
357 lex->first = 0;
358 lex->last = 0;
360 if (feof (lex->finput))
361 return UEOF;
362 r = fread (&lex->buffer[lex->last], 1,
363 sizeof (lex->buffer) - lex->last,
364 lex->finput);
365 lex->last += r;
368 inbytesleft = lex->last - lex->first;
369 out_count = sizeof (lex->out_buffer) - lex->out_last;
371 if (inbytesleft == 0)
373 /* We've tried to read and there is nothing left. */
374 return UEOF;
377 in_save = inbytesleft;
378 out_save = out_count;
379 inp = &lex->buffer[lex->first];
380 outp = (char *) &lex->out_buffer[lex->out_last];
381 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
382 &inbytesleft, &outp, &out_count);
384 /* If we haven't read any bytes, then look to see if we
385 have read a BOM. */
386 if (! lex->read_anything && out_save - out_count >= 2)
388 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
389 if (uc == 0xfeff)
391 lex->byte_swap = 0;
392 lex->out_first += 2;
394 else if (uc == 0xfffe)
396 lex->byte_swap = 1;
397 lex->out_first += 2;
399 lex->read_anything = 1;
402 if (lex->byte_swap)
404 unsigned int i;
405 for (i = 0; i < out_save - out_count; i += 2)
407 char t = lex->out_buffer[lex->out_last + i];
408 lex->out_buffer[lex->out_last + i]
409 = lex->out_buffer[lex->out_last + i + 1];
410 lex->out_buffer[lex->out_last + i + 1] = t;
414 lex->first += in_save - inbytesleft;
415 lex->out_last += out_save - out_count;
417 /* If we converted anything at all, move along. */
418 if (out_count != out_save)
419 break;
421 if (ir == (size_t) -1)
423 if (errno == EINVAL)
425 /* This is ok. This means that the end of our buffer
426 is in the middle of a character sequence. We just
427 move the valid part of the buffer to the beginning
428 to force a read. */
429 memmove (&lex->buffer[0], &lex->buffer[lex->first],
430 lex->last - lex->first);
431 lex->last -= lex->first;
432 lex->first = 0;
434 else
436 /* A more serious error. */
437 char buffer[128];
438 sprintf (buffer,
439 "Unrecognized character for encoding '%s'",
440 lex->encoding);
441 java_lex_error (buffer, 0);
442 return UEOF;
448 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
450 /* Don't have any data. */
451 return UEOF;
454 /* Success. */
455 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
456 lex->out_first += 2;
457 return result;
459 else
460 #endif /* HAVE_ICONV */
462 int c, c1, c2;
463 c = getc (lex->finput);
465 if (c == EOF)
466 return UEOF;
467 if (c < 128)
468 return (unicode_t) c;
469 else
471 if ((c & 0xe0) == 0xc0)
473 c1 = getc (lex->finput);
474 if ((c1 & 0xc0) == 0x80)
476 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
477 /* Check for valid 2-byte characters. We explicitly
478 allow \0 because this encoding is common in the
479 Java world. */
480 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
481 return r;
484 else if ((c & 0xf0) == 0xe0)
486 c1 = getc (lex->finput);
487 if ((c1 & 0xc0) == 0x80)
489 c2 = getc (lex->finput);
490 if ((c2 & 0xc0) == 0x80)
492 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
493 (( c1 & 0x3f) << 6)
494 + (c2 & 0x3f));
495 /* Check for valid 3-byte characters.
496 Don't allow surrogate, \ufffe or \uffff. */
497 if (IN_RANGE (r, 0x800, 0xffff)
498 && ! IN_RANGE (r, 0xd800, 0xdfff)
499 && r != 0xfffe && r != 0xffff)
500 return r;
505 /* We simply don't support invalid characters. We also
506 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
507 cannot be valid Java characters. */
508 java_lex_error ("malformed UTF-8 character", 0);
512 /* We only get here on error. */
513 return UEOF;
516 static void
517 java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p)
519 if (l->size == l->max)
521 l->max += JAVA_LINE_MAX;
522 l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
523 l->unicode_escape_p = xrealloc (l->unicode_escape_p,
524 sizeof (char)*l->max);
526 l->line [l->size] = c;
527 l->unicode_escape_p [l->size++] = unicode_escape_p;
530 static int
531 java_read_unicode (java_lexer *lex, int *unicode_escape_p)
533 int c;
535 c = java_read_char (lex);
536 *unicode_escape_p = 0;
538 if (c != '\\')
540 lex->bs_count = 0;
541 return c;
544 ++lex->bs_count;
545 if ((lex->bs_count) % 2 == 1)
547 /* Odd number of \ seen. */
548 c = java_read_char (lex);
549 if (c == 'u')
551 unicode_t unicode = 0;
552 int shift = 12;
554 /* Recognize any number of `u's in \u. */
555 while ((c = java_read_char (lex)) == 'u')
558 shift = 12;
561 if (c == UEOF)
563 java_lex_error ("prematurely terminated \\u sequence", 0);
564 return UEOF;
567 if (hex_p (c))
568 unicode |= (unicode_t)(hex_value (c) << shift);
569 else
571 java_lex_error ("non-hex digit in \\u sequence", 0);
572 break;
575 c = java_read_char (lex);
576 shift -= 4;
578 while (shift >= 0);
580 if (c != UEOF)
581 lex->unget_value = c;
583 lex->bs_count = 0;
584 *unicode_escape_p = 1;
585 return unicode;
587 lex->unget_value = c;
589 return (unicode_t) '\\';
592 static int
593 java_read_unicode_collapsing_terminators (java_lexer *lex,
594 int *unicode_escape_p)
596 int c = java_read_unicode (lex, unicode_escape_p);
598 if (c == '\r')
600 /* We have to read ahead to see if we got \r\n. In that case we
601 return a single line terminator. */
602 int dummy;
603 c = java_read_unicode (lex, &dummy);
604 if (c != '\n' && c != UEOF)
605 lex->unget_value = c;
606 /* In either case we must return a newline. */
607 c = '\n';
610 return c;
613 static int
614 java_get_unicode (void)
616 /* It's time to read a line when... */
617 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
619 int c;
620 int found_chars = 0;
622 if (ctxp->lexer->hit_eof)
623 return UEOF;
625 java_allocate_new_line ();
626 if (ctxp->c_line->line[0] != '\n')
628 for (;;)
630 int unicode_escape_p;
631 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
632 &unicode_escape_p);
633 if (c != UEOF)
635 found_chars = 1;
636 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
637 if (ctxp->c_line->white_space_only
638 && !JAVA_WHITE_SPACE_P (c)
639 && c != '\n')
640 ctxp->c_line->white_space_only = 0;
642 if ((c == '\n') || (c == UEOF))
643 break;
646 if (c == UEOF && ! found_chars)
648 ctxp->lexer->hit_eof = 1;
649 return UEOF;
653 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
654 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
655 return ctxp->c_line->line [ctxp->c_line->current++];
658 /* Parse the end of a C style comment.
659 * C is the first character following the '/' and '*'. */
660 static void
661 java_parse_end_comment (int c)
663 for ( ;; c = java_get_unicode ())
665 switch (c)
667 case UEOF:
668 java_lex_error ("Comment not terminated at end of input", 0);
669 return;
670 case '*':
671 switch (c = java_get_unicode ())
673 case UEOF:
674 java_lex_error ("Comment not terminated at end of input", 0);
675 return;
676 case '/':
677 return;
678 case '*': /* Reparse only '*'. */
679 java_unget_unicode ();
685 /* Parse the documentation section. Keywords must be at the beginning
686 of a documentation comment line (ignoring white space and any `*'
687 character). Parsed keyword(s): @DEPRECATED. */
689 static void
690 java_parse_doc_section (int c)
692 int last_was_star;
694 /* We reset this here, because only the most recent doc comment
695 applies to the following declaration. */
696 ctxp->deprecated = 0;
698 /* We loop over all the lines of the comment. We'll eventually exit
699 if we hit EOF prematurely, or when we see the comment
700 terminator. */
701 while (1)
703 /* These first steps need only be done if we're still looking
704 for the deprecated tag. If we've already seen it, we might
705 as well skip looking for it again. */
706 if (! ctxp->deprecated)
708 /* Skip whitespace and '*'s. We must also check for the end
709 of the comment here. */
710 while (JAVA_WHITE_SPACE_P (c) || c == '*')
712 last_was_star = (c == '*');
713 c = java_get_unicode ();
714 if (last_was_star && c == '/')
716 /* We just saw the comment terminator. */
717 return;
721 if (c == UEOF)
722 goto eof;
724 if (c == '@')
726 const char *deprecated = "@deprecated";
727 int i;
729 for (i = 0; deprecated[i]; ++i)
731 if (c != deprecated[i])
732 break;
733 /* We write the code in this way, with the
734 update at the end, so that after the loop
735 we're left with the next character in C. */
736 c = java_get_unicode ();
739 if (c == UEOF)
740 goto eof;
742 /* @deprecated must be followed by a space or newline.
743 We also allow a '*' in case it appears just before
744 the end of a comment. In this position only we also
745 must allow any Unicode space character. */
746 if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
748 if (! deprecated[i])
749 ctxp->deprecated = 1;
754 /* We've examined the relevant content from this line. Now we
755 skip the remaining characters and start over with the next
756 line. We also check for end of comment here. */
757 while (c != '\n' && c != UEOF)
759 last_was_star = (c == '*');
760 c = java_get_unicode ();
761 if (last_was_star && c == '/')
762 return;
765 if (c == UEOF)
766 goto eof;
767 /* We have to advance past the \n. */
768 c = java_get_unicode ();
769 if (c == UEOF)
770 goto eof;
773 eof:
774 java_lex_error ("Comment not terminated at end of input", 0);
777 /* Return true if C is a valid start character for a Java identifier.
778 This is only called if C >= 128 -- smaller values are handled
779 inline. However, this function handles all values anyway. */
780 static int
781 java_start_char_p (unicode_t c)
783 unsigned int hi = c / 256;
784 const char *const page = type_table[hi];
785 unsigned long val = (unsigned long) page;
786 int flags;
788 if ((val & ~ LETTER_MASK) != 0)
789 flags = page[c & 255];
790 else
791 flags = val;
793 return flags & LETTER_START;
796 /* Return true if C is a valid part character for a Java identifier.
797 This is only called if C >= 128 -- smaller values are handled
798 inline. However, this function handles all values anyway. */
799 static int
800 java_part_char_p (unicode_t c)
802 unsigned int hi = c / 256;
803 const char *const page = type_table[hi];
804 unsigned long val = (unsigned long) page;
805 int flags;
807 if ((val & ~ LETTER_MASK) != 0)
808 flags = page[c & 255];
809 else
810 flags = val;
812 return flags & LETTER_PART;
815 /* Return true if C is whitespace. */
816 static int
817 java_space_char_p (unicode_t c)
819 unsigned int hi = c / 256;
820 const char *const page = type_table[hi];
821 unsigned long val = (unsigned long) page;
822 int flags;
824 if ((val & ~ LETTER_MASK) != 0)
825 flags = page[c & 255];
826 else
827 flags = val;
829 return flags & LETTER_SPACE;
832 static int
833 java_parse_escape_sequence (void)
835 unicode_t char_lit;
836 int c;
838 switch (c = java_get_unicode ())
840 case 'b':
841 return (unicode_t)0x8;
842 case 't':
843 return (unicode_t)0x9;
844 case 'n':
845 return (unicode_t)0xa;
846 case 'f':
847 return (unicode_t)0xc;
848 case 'r':
849 return (unicode_t)0xd;
850 case '"':
851 return (unicode_t)0x22;
852 case '\'':
853 return (unicode_t)0x27;
854 case '\\':
855 return (unicode_t)0x5c;
856 case '0': case '1': case '2': case '3': case '4':
857 case '5': case '6': case '7':
859 int octal_escape[3];
860 int octal_escape_index = 0;
861 int max = 3;
862 int i, shift;
864 for (; octal_escape_index < max && RANGE (c, '0', '7');
865 c = java_get_unicode ())
867 if (octal_escape_index == 0 && c > '3')
869 /* According to the grammar, `\477' has a well-defined
870 meaning -- it is `\47' followed by `7'. */
871 --max;
873 octal_escape [octal_escape_index++] = c;
876 java_unget_unicode ();
878 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
879 i < octal_escape_index; i++, shift -= 3)
880 char_lit |= (octal_escape [i] - '0') << shift;
882 return char_lit;
884 default:
885 java_lex_error ("Invalid character in escape sequence", 0);
886 return JAVA_CHAR_ERROR;
890 #ifndef JC1_LITE
891 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
893 /* Subroutine of java_lex: converts floating-point literals to tree
894 nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
895 store the result. FFLAG indicates whether the literal was tagged
896 with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
897 is the line number on which to report any error. */
899 static void java_perform_atof (YYSTYPE *, char *, int, int);
901 static void
902 java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
903 int number_beginning)
905 REAL_VALUE_TYPE value;
906 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
908 SET_REAL_VALUE_ATOF (value,
909 REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
911 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
913 JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
914 value = DCONST0;
916 else if (IS_ZERO (value))
918 /* We check to see if the value is really 0 or if we've found an
919 underflow. We do this in the most primitive imaginable way. */
920 int really_zero = 1;
921 char *p = literal_token;
922 if (*p == '-')
923 ++p;
924 while (*p && *p != 'e' && *p != 'E')
926 if (*p != '0' && *p != '.')
928 really_zero = 0;
929 break;
931 ++p;
933 if (! really_zero)
935 int i = ctxp->c_line->current;
936 ctxp->c_line->current = number_beginning;
937 java_lex_error ("Floating point literal underflow", 0);
938 ctxp->c_line->current = i;
942 SET_LVAL_NODE (build_real (type, value));
944 #endif
946 static int yylex (YYSTYPE *);
948 static int
949 #ifdef JC1_LITE
950 yylex (YYSTYPE *java_lval)
951 #else
952 do_java_lex (YYSTYPE *java_lval)
953 #endif
955 int c;
956 unicode_t first_unicode;
957 int ascii_index, all_ascii;
958 char *string;
960 /* Translation of the Unicode escape in the raw stream of Unicode
961 characters. Takes care of line terminator. */
962 step1:
963 /* Skip white spaces: SP, TAB and FF or ULT. */
964 for (c = java_get_unicode ();
965 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
966 if (c == '\n')
968 ctxp->elc.line = ctxp->c_line->lineno;
969 ctxp->elc.col = ctxp->c_line->char_col-2;
972 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
974 if (c == 0x1a) /* CTRL-Z. */
976 if ((c = java_get_unicode ()) == UEOF)
977 return 0; /* Ok here. */
978 else
979 java_unget_unicode (); /* Caught later, at the end of the
980 function. */
982 /* Handle EOF here. */
983 if (c == UEOF) /* Should probably do something here... */
984 return 0;
986 /* Take care of eventual comments. */
987 if (c == '/')
989 switch (c = java_get_unicode ())
991 case '/':
992 for (;;)
994 c = java_get_unicode ();
995 if (c == UEOF)
997 /* It is ok to end a `//' comment with EOF, unless
998 we're being pedantic. */
999 if (pedantic)
1000 java_lex_error ("Comment not terminated at end of input",
1002 return 0;
1004 if (c == '\n') /* ULT */
1005 goto step1;
1007 break;
1009 case '*':
1010 if ((c = java_get_unicode ()) == '*')
1012 c = java_get_unicode ();
1013 if (c == '/')
1015 /* Empty documentation comment. We have to reset
1016 the deprecation marker as only the most recent
1017 doc comment applies. */
1018 ctxp->deprecated = 0;
1020 else
1021 java_parse_doc_section (c);
1023 else
1024 java_parse_end_comment ((c = java_get_unicode ()));
1025 goto step1;
1026 break;
1027 default:
1028 java_unget_unicode ();
1029 c = '/';
1030 break;
1034 ctxp->elc.line = ctxp->c_line->lineno;
1035 ctxp->elc.prev_col = ctxp->elc.col;
1036 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
1037 if (ctxp->elc.col < 0)
1038 abort ();
1040 /* Numeric literals. */
1041 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
1043 /* This section of code is borrowed from gcc/c-lex.c. */
1044 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
1045 int parts[TOTAL_PARTS];
1046 HOST_WIDE_INT high, low;
1047 /* End borrowed section. */
1048 char literal_token [256];
1049 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
1050 int found_hex_digits = 0, found_non_octal_digits = 0;
1051 int i;
1052 #ifndef JC1_LITE
1053 int number_beginning = ctxp->c_line->current;
1054 tree value;
1055 #endif
1057 /* We might have a . separator instead of a FP like .[0-9]*. */
1058 if (c == '.')
1060 unicode_t peep = java_sneak_unicode ();
1062 if (!JAVA_ASCII_DIGIT (peep))
1064 JAVA_LEX_SEP('.');
1065 BUILD_OPERATOR (DOT_TK);
1069 for (i = 0; i < TOTAL_PARTS; i++)
1070 parts [i] = 0;
1072 if (c == '0')
1074 c = java_get_unicode ();
1075 if (c == 'x' || c == 'X')
1077 radix = 16;
1078 c = java_get_unicode ();
1080 else if (JAVA_ASCII_DIGIT (c))
1081 radix = 8;
1082 else if (c == '.' || c == 'e' || c =='E')
1084 /* Push the '.', 'e', or 'E' back and prepare for a FP
1085 parsing... */
1086 java_unget_unicode ();
1087 c = '0';
1089 else
1091 /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
1092 JAVA_LEX_LIT ("0", 10);
1093 switch (c)
1095 case 'L': case 'l':
1096 SET_LVAL_NODE (long_zero_node);
1097 return (INT_LIT_TK);
1098 case 'f': case 'F':
1099 SET_LVAL_NODE (float_zero_node);
1100 return (FP_LIT_TK);
1101 case 'd': case 'D':
1102 SET_LVAL_NODE (double_zero_node);
1103 return (FP_LIT_TK);
1104 default:
1105 java_unget_unicode ();
1106 SET_LVAL_NODE (integer_zero_node);
1107 return (INT_LIT_TK);
1111 /* Parse the first part of the literal, until we find something
1112 which is not a number. */
1113 while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1114 JAVA_ASCII_DIGIT (c))
1116 /* We store in a string (in case it turns out to be a FP) and in
1117 PARTS if we have to process a integer literal. */
1118 int numeric = hex_value (c);
1119 int count;
1121 /* Remember when we find a valid hexadecimal digit. */
1122 if (radix == 16)
1123 found_hex_digits = 1;
1124 /* Remember when we find an invalid octal digit. */
1125 else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1126 found_non_octal_digits = 1;
1128 literal_token [literal_index++] = c;
1129 /* This section of code if borrowed from gcc/c-lex.c. */
1130 for (count = 0; count < TOTAL_PARTS; count++)
1132 parts[count] *= radix;
1133 if (count)
1135 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1136 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1138 else
1139 parts[0] += numeric;
1141 if (parts [TOTAL_PARTS-1] != 0)
1142 overflow = 1;
1143 /* End borrowed section. */
1144 c = java_get_unicode ();
1147 /* If we have something from the FP char set but not a digit, parse
1148 a FP literal. */
1149 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1151 int stage = 0;
1152 int seen_digit = (literal_index ? 1 : 0);
1153 int seen_exponent = 0;
1154 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1155 double unless specified. */
1157 /* It is ok if the radix is 8 because this just means we've
1158 seen a leading `0'. However, radix==16 is invalid. */
1159 if (radix == 16)
1160 java_lex_error ("Can't express non-decimal FP literal", 0);
1161 radix = 10;
1163 for (;;)
1165 if (c == '.')
1167 if (stage < 1)
1169 stage = 1;
1170 literal_token [literal_index++ ] = c;
1171 c = java_get_unicode ();
1173 else
1174 java_lex_error ("Invalid character in FP literal", 0);
1177 if (c == 'e' || c == 'E')
1179 if (stage < 2)
1181 /* {E,e} must have seen at least a digit. */
1182 if (!seen_digit)
1183 java_lex_error
1184 ("Invalid FP literal, mantissa must have digit", 0);
1185 seen_digit = 0;
1186 seen_exponent = 1;
1187 stage = 2;
1188 literal_token [literal_index++] = c;
1189 c = java_get_unicode ();
1191 else
1192 java_lex_error ("Invalid character in FP literal", 0);
1194 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1196 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1197 stage = 4; /* So we fall through. */
1200 if ((c=='-' || c =='+') && stage == 2)
1202 stage = 3;
1203 literal_token [literal_index++] = c;
1204 c = java_get_unicode ();
1207 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1208 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1209 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1210 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1212 if (JAVA_ASCII_DIGIT (c))
1213 seen_digit = 1;
1214 if (stage == 2)
1215 stage = 3;
1216 literal_token [literal_index++ ] = c;
1217 c = java_get_unicode ();
1219 else
1221 if (stage != 4) /* Don't push back fF/dD. */
1222 java_unget_unicode ();
1224 /* An exponent (if any) must have seen a digit. */
1225 if (seen_exponent && !seen_digit)
1226 java_lex_error
1227 ("Invalid FP literal, exponent must have digit", 0);
1229 literal_token [literal_index] = '\0';
1230 JAVA_LEX_LIT (literal_token, radix);
1232 #ifndef JC1_LITE
1233 java_perform_atof (java_lval, literal_token,
1234 fflag, number_beginning);
1235 #endif
1236 return FP_LIT_TK;
1239 } /* JAVA_ASCII_FPCHAR (c) */
1241 /* Here we get back to converting the integral literal. */
1242 if (radix == 16 && ! found_hex_digits)
1243 java_lex_error
1244 ("0x must be followed by at least one hexadecimal digit", 0);
1245 else if (radix == 8 && found_non_octal_digits)
1246 java_lex_error ("Octal literal contains digit out of range", 0);
1247 else if (c == 'L' || c == 'l')
1248 long_suffix = 1;
1249 else
1250 java_unget_unicode ();
1252 #ifdef JAVA_LEX_DEBUG
1253 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1254 JAVA_LEX_LIT (literal_token, radix);
1255 #endif
1256 /* This section of code is borrowed from gcc/c-lex.c. */
1257 if (!overflow)
1259 bytes = GET_TYPE_PRECISION (long_type_node);
1260 for (i = bytes; i < TOTAL_PARTS; i++)
1261 if (parts [i])
1263 overflow = 1;
1264 break;
1267 high = low = 0;
1268 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1270 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1271 / HOST_BITS_PER_CHAR)]
1272 << (i * HOST_BITS_PER_CHAR));
1273 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1275 /* End borrowed section. */
1277 #ifndef JC1_LITE
1278 /* Range checking. */
1279 /* Temporarily set type to unsigned. */
1280 value = build_int_cst_wide (long_suffix
1281 ? unsigned_long_type_node
1282 : unsigned_int_type_node, low, high);
1283 SET_LVAL_NODE (value);
1285 /* For base 10 numbers, only values up to the highest value
1286 (plus one) can be written. For instance, only ints up to
1287 2147483648 can be written. The special case of the largest
1288 negative value is handled elsewhere. For other bases, any
1289 number can be represented. */
1290 if (overflow || (radix == 10
1291 && tree_int_cst_lt (long_suffix
1292 ? decimal_long_max
1293 : decimal_int_max,
1294 value)))
1296 if (long_suffix)
1297 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1298 else
1299 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1302 /* Sign extend the value. */
1303 value = build_int_cst_wide (long_suffix ? long_type_node : int_type_node,
1304 low, high);
1305 value = force_fit_type (value, 0, false, false);
1307 if (radix != 10)
1309 value = copy_node (value);
1310 JAVA_NOT_RADIX10_FLAG (value) = 1;
1313 SET_LVAL_NODE (value);
1314 #endif
1315 return INT_LIT_TK;
1318 /* Character literals. */
1319 if (c == '\'')
1321 int char_lit;
1323 if ((c = java_get_unicode ()) == '\\')
1324 char_lit = java_parse_escape_sequence ();
1325 else
1327 if (c == '\n' || c == '\'')
1328 java_lex_error ("Invalid character literal", 0);
1329 char_lit = c;
1332 c = java_get_unicode ();
1334 if ((c == '\n') || (c == UEOF))
1335 java_lex_error ("Character literal not terminated at end of line", 0);
1336 if (c != '\'')
1337 java_lex_error ("Syntax error in character literal", 0);
1339 if (char_lit == JAVA_CHAR_ERROR)
1340 char_lit = 0; /* We silently convert it to zero. */
1342 JAVA_LEX_CHAR_LIT (char_lit);
1343 SET_LVAL_NODE (build_int_cst (char_type_node, char_lit));
1344 return CHAR_LIT_TK;
1347 /* String literals. */
1348 if (c == '"')
1350 int no_error;
1351 char *string;
1353 for (no_error = 1, c = java_get_unicode ();
1354 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1356 if (c == '\\')
1357 c = java_parse_escape_sequence ();
1358 if (c == JAVA_CHAR_ERROR)
1360 no_error = 0;
1361 c = 0; /* We silently convert it to zero. */
1363 java_unicode_2_utf8 (c);
1365 if (c == '\n' || c == UEOF) /* ULT. */
1367 input_line--; /* Refer to the line where the terminator was seen. */
1368 java_lex_error ("String not terminated at end of line", 0);
1369 input_line++;
1372 obstack_1grow (&temporary_obstack, '\0');
1373 string = obstack_finish (&temporary_obstack);
1374 #ifndef JC1_LITE
1375 if (!no_error || (c != '"'))
1376 java_lval->node = error_mark_node; /* FIXME: Requires further
1377 testing. */
1378 else
1379 java_lval->node = build_string (strlen (string), string);
1380 #endif
1381 obstack_free (&temporary_obstack, string);
1382 return STRING_LIT_TK;
1385 /* Separator. */
1386 switch (c)
1388 case '(':
1389 JAVA_LEX_SEP (c);
1390 BUILD_OPERATOR (OP_TK);
1391 case ')':
1392 JAVA_LEX_SEP (c);
1393 return CP_TK;
1394 case '{':
1395 JAVA_LEX_SEP (c);
1396 if (ctxp->ccb_indent == 1)
1397 ctxp->first_ccb_indent1 = input_line;
1398 ctxp->ccb_indent++;
1399 BUILD_OPERATOR (OCB_TK);
1400 case '}':
1401 JAVA_LEX_SEP (c);
1402 ctxp->ccb_indent--;
1403 if (ctxp->ccb_indent == 1)
1404 ctxp->last_ccb_indent1 = input_line;
1405 BUILD_OPERATOR (CCB_TK);
1406 case '[':
1407 JAVA_LEX_SEP (c);
1408 BUILD_OPERATOR (OSB_TK);
1409 case ']':
1410 JAVA_LEX_SEP (c);
1411 return CSB_TK;
1412 case ';':
1413 JAVA_LEX_SEP (c);
1414 return SC_TK;
1415 case ',':
1416 JAVA_LEX_SEP (c);
1417 return C_TK;
1418 case '.':
1419 JAVA_LEX_SEP (c);
1420 BUILD_OPERATOR (DOT_TK);
1421 /* return DOT_TK; */
1424 /* Operators. */
1425 switch (c)
1427 case '=':
1428 if ((c = java_get_unicode ()) == '=')
1430 BUILD_OPERATOR (EQ_TK);
1432 else
1434 /* Equals is used in two different locations. In the
1435 variable_declarator: rule, it has to be seen as '=' as opposed
1436 to being seen as an ordinary assignment operator in
1437 assignment_operators: rule. */
1438 java_unget_unicode ();
1439 BUILD_OPERATOR (ASSIGN_TK);
1442 case '>':
1443 switch ((c = java_get_unicode ()))
1445 case '=':
1446 BUILD_OPERATOR (GTE_TK);
1447 case '>':
1448 switch ((c = java_get_unicode ()))
1450 case '>':
1451 if ((c = java_get_unicode ()) == '=')
1453 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1455 else
1457 java_unget_unicode ();
1458 BUILD_OPERATOR (ZRS_TK);
1460 case '=':
1461 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1462 default:
1463 java_unget_unicode ();
1464 BUILD_OPERATOR (SRS_TK);
1466 default:
1467 java_unget_unicode ();
1468 BUILD_OPERATOR (GT_TK);
1471 case '<':
1472 switch ((c = java_get_unicode ()))
1474 case '=':
1475 BUILD_OPERATOR (LTE_TK);
1476 case '<':
1477 if ((c = java_get_unicode ()) == '=')
1479 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1481 else
1483 java_unget_unicode ();
1484 BUILD_OPERATOR (LS_TK);
1486 default:
1487 java_unget_unicode ();
1488 BUILD_OPERATOR (LT_TK);
1491 case '&':
1492 switch ((c = java_get_unicode ()))
1494 case '&':
1495 BUILD_OPERATOR (BOOL_AND_TK);
1496 case '=':
1497 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1498 default:
1499 java_unget_unicode ();
1500 BUILD_OPERATOR (AND_TK);
1503 case '|':
1504 switch ((c = java_get_unicode ()))
1506 case '|':
1507 BUILD_OPERATOR (BOOL_OR_TK);
1508 case '=':
1509 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1510 default:
1511 java_unget_unicode ();
1512 BUILD_OPERATOR (OR_TK);
1515 case '+':
1516 switch ((c = java_get_unicode ()))
1518 case '+':
1519 BUILD_OPERATOR (INCR_TK);
1520 case '=':
1521 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1522 default:
1523 java_unget_unicode ();
1524 BUILD_OPERATOR (PLUS_TK);
1527 case '-':
1528 switch ((c = java_get_unicode ()))
1530 case '-':
1531 BUILD_OPERATOR (DECR_TK);
1532 case '=':
1533 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1534 default:
1535 java_unget_unicode ();
1536 BUILD_OPERATOR (MINUS_TK);
1539 case '*':
1540 if ((c = java_get_unicode ()) == '=')
1542 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1544 else
1546 java_unget_unicode ();
1547 BUILD_OPERATOR (MULT_TK);
1550 case '/':
1551 if ((c = java_get_unicode ()) == '=')
1553 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1555 else
1557 java_unget_unicode ();
1558 BUILD_OPERATOR (DIV_TK);
1561 case '^':
1562 if ((c = java_get_unicode ()) == '=')
1564 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1566 else
1568 java_unget_unicode ();
1569 BUILD_OPERATOR (XOR_TK);
1572 case '%':
1573 if ((c = java_get_unicode ()) == '=')
1575 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1577 else
1579 java_unget_unicode ();
1580 BUILD_OPERATOR (REM_TK);
1583 case '!':
1584 if ((c = java_get_unicode()) == '=')
1586 BUILD_OPERATOR (NEQ_TK);
1588 else
1590 java_unget_unicode ();
1591 BUILD_OPERATOR (NEG_TK);
1594 case '?':
1595 JAVA_LEX_OP ("?");
1596 BUILD_OPERATOR (REL_QM_TK);
1597 case ':':
1598 JAVA_LEX_OP (":");
1599 BUILD_OPERATOR (REL_CL_TK);
1600 case '~':
1601 BUILD_OPERATOR (NOT_TK);
1604 /* Keyword, boolean literal or null literal. */
1605 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1606 c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1608 java_unicode_2_utf8 (c);
1609 if (all_ascii && c >= 128)
1610 all_ascii = 0;
1611 ascii_index++;
1614 obstack_1grow (&temporary_obstack, '\0');
1615 string = obstack_finish (&temporary_obstack);
1616 if (c != UEOF)
1617 java_unget_unicode ();
1619 /* If we have something all ascii, we consider a keyword, a boolean
1620 literal, a null literal or an all ASCII identifier. Otherwise,
1621 this is an identifier (possibly not respecting formation rule). */
1622 if (all_ascii)
1624 const struct java_keyword *kw;
1625 if ((kw=java_keyword (string, ascii_index)))
1627 JAVA_LEX_KW (string);
1628 switch (kw->token)
1630 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1631 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1632 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1633 case PRIVATE_TK: case STRICT_TK:
1634 SET_MODIFIER_CTX (kw->token);
1635 return MODIFIER_TK;
1636 case FLOAT_TK:
1637 SET_LVAL_NODE (float_type_node);
1638 return FP_TK;
1639 case DOUBLE_TK:
1640 SET_LVAL_NODE (double_type_node);
1641 return FP_TK;
1642 case BOOLEAN_TK:
1643 SET_LVAL_NODE (boolean_type_node);
1644 return BOOLEAN_TK;
1645 case BYTE_TK:
1646 SET_LVAL_NODE (byte_type_node);
1647 return INTEGRAL_TK;
1648 case SHORT_TK:
1649 SET_LVAL_NODE (short_type_node);
1650 return INTEGRAL_TK;
1651 case INT_TK:
1652 SET_LVAL_NODE (int_type_node);
1653 return INTEGRAL_TK;
1654 case LONG_TK:
1655 SET_LVAL_NODE (long_type_node);
1656 return INTEGRAL_TK;
1657 case CHAR_TK:
1658 SET_LVAL_NODE (char_type_node);
1659 return INTEGRAL_TK;
1661 /* Keyword based literals. */
1662 case TRUE_TK:
1663 case FALSE_TK:
1664 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1665 boolean_true_node : boolean_false_node));
1666 return BOOL_LIT_TK;
1667 case NULL_TK:
1668 SET_LVAL_NODE (null_pointer_node);
1669 return NULL_TK;
1671 case ASSERT_TK:
1672 if (flag_assert)
1674 BUILD_OPERATOR (kw->token);
1675 return kw->token;
1677 else
1678 break;
1680 /* Some keyword we want to retain information on the location
1681 they where found. */
1682 case CASE_TK:
1683 case DEFAULT_TK:
1684 case SUPER_TK:
1685 case THIS_TK:
1686 case RETURN_TK:
1687 case BREAK_TK:
1688 case CONTINUE_TK:
1689 case TRY_TK:
1690 case CATCH_TK:
1691 case THROW_TK:
1692 case INSTANCEOF_TK:
1693 BUILD_OPERATOR (kw->token);
1695 default:
1696 return kw->token;
1701 /* We may have an ID here. */
1702 if (JAVA_START_CHAR_P (first_unicode))
1704 JAVA_LEX_ID (string);
1705 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1706 return ID_TK;
1709 /* Everything else is an invalid character in the input. */
1711 char lex_error_buffer [128];
1712 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1713 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1714 java_lex_error (lex_error_buffer, 1);
1716 return 0;
1719 #ifndef JC1_LITE
1721 /* The exported interface to the lexer. */
1722 static int
1723 java_lex (YYSTYPE *java_lval)
1725 int r;
1727 timevar_push (TV_LEX);
1728 r = do_java_lex (java_lval);
1729 timevar_pop (TV_LEX);
1730 return r;
1733 /* This is called by the parser to see if an error should be generated
1734 due to numeric overflow. This function only handles the particular
1735 case of the largest negative value, and is only called in the case
1736 where this value is not preceded by `-'. */
1737 static void
1738 error_if_numeric_overflow (tree value)
1740 if (TREE_CODE (value) == INTEGER_CST
1741 && !JAVA_NOT_RADIX10_FLAG (value)
1742 && tree_int_cst_sgn (value) < 0)
1744 if (TREE_TYPE (value) == long_type_node)
1745 java_lex_error ("Numeric overflow for `long' literal", 0);
1746 else
1747 java_lex_error ("Numeric overflow for `int' literal", 0);
1751 #endif /* JC1_LITE */
1753 static void
1754 java_unicode_2_utf8 (unicode_t unicode)
1756 if (RANGE (unicode, 0x01, 0x7f))
1757 obstack_1grow (&temporary_obstack, (char)unicode);
1758 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1760 obstack_1grow (&temporary_obstack,
1761 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1762 obstack_1grow (&temporary_obstack,
1763 (unsigned char)(0x80 | (unicode & 0x3f)));
1765 else /* Range 0x800-0xffff. */
1767 obstack_1grow (&temporary_obstack,
1768 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1769 obstack_1grow (&temporary_obstack,
1770 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1771 obstack_1grow (&temporary_obstack,
1772 (unsigned char)(0x80 | (unicode & 0x003f)));
1776 #ifndef JC1_LITE
1777 static tree
1778 build_wfl_node (tree node)
1780 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1781 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1782 TREE_TYPE (node) = NULL_TREE;
1783 return node;
1785 #endif
1787 static void
1788 java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
1790 #ifndef JC1_LITE
1791 ctxp->elc.line = ctxp->c_line->lineno;
1792 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1794 /* Might be caught in the middle of some error report. */
1795 ctxp->java_error_flag = 0;
1796 java_error (NULL);
1797 java_error (msg);
1798 #endif
1801 #ifndef JC1_LITE
1802 static int
1803 java_is_eol (FILE *fp, int c)
1805 int next;
1806 switch (c)
1808 case '\r':
1809 next = getc (fp);
1810 if (next != '\n' && next != EOF)
1811 ungetc (next, fp);
1812 return 1;
1813 case '\n':
1814 return 1;
1815 default:
1816 return 0;
1819 #endif
1821 char *
1822 java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
1823 int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
1825 #ifdef JC1_LITE
1826 return 0;
1827 #else
1828 /* Dumb implementation. Doesn't try to cache or optimize things. */
1829 /* First line of the file is line 1, first column is 1. */
1831 /* COL == -1 means, at the CR/LF in LINE. */
1832 /* COL == -2 means, at the first non space char in LINE. */
1834 FILE *fp;
1835 int c, ccol, cline = 1;
1836 int current_line_col = 0;
1837 int first_non_space = 0;
1838 char *base;
1840 if (!(fp = fopen (filename, "r")))
1841 fatal_error ("can't open %s: %m", filename);
1843 while (cline != line)
1845 c = getc (fp);
1846 if (c == EOF)
1848 static const char msg[] = "<<file too short - unexpected EOF>>";
1849 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1850 goto have_line;
1852 if (java_is_eol (fp, c))
1853 cline++;
1856 /* Gather the chars of the current line in a buffer. */
1857 for (;;)
1859 c = getc (fp);
1860 if (c < 0 || java_is_eol (fp, c))
1861 break;
1862 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1863 first_non_space = current_line_col;
1864 obstack_1grow (&temporary_obstack, c);
1865 current_line_col++;
1867 have_line:
1869 obstack_1grow (&temporary_obstack, '\n');
1871 if (col == -1)
1873 col = current_line_col;
1874 first_non_space = 0;
1876 else if (col == -2)
1877 col = first_non_space;
1878 else
1879 first_non_space = 0;
1881 /* Place the '^' a the right position. */
1882 base = obstack_base (&temporary_obstack);
1883 for (ccol = 1; ccol <= col+3; ccol++)
1885 /* Compute \t when reaching first_non_space. */
1886 char c = (first_non_space ?
1887 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1888 obstack_1grow (&temporary_obstack, c);
1890 obstack_grow0 (&temporary_obstack, "^", 1);
1892 fclose (fp);
1893 return obstack_finish (&temporary_obstack);
1894 #endif
1897 #ifndef JC1_LITE
1898 static int
1899 utf8_cmp (const unsigned char *str, int length, const char *name)
1901 const unsigned char *limit = str + length;
1902 int i;
1904 for (i = 0; name[i]; ++i)
1906 int ch = UTF8_GET (str, limit);
1907 if (ch != name[i])
1908 return ch - name[i];
1911 return str == limit ? 0 : 1;
1914 /* A sorted list of all C++ keywords. */
1916 static const char *const cxx_keywords[] =
1918 "_Complex",
1919 "__alignof",
1920 "__alignof__",
1921 "__asm",
1922 "__asm__",
1923 "__attribute",
1924 "__attribute__",
1925 "__builtin_va_arg",
1926 "__complex",
1927 "__complex__",
1928 "__const",
1929 "__const__",
1930 "__extension__",
1931 "__imag",
1932 "__imag__",
1933 "__inline",
1934 "__inline__",
1935 "__label__",
1936 "__null",
1937 "__real",
1938 "__real__",
1939 "__restrict",
1940 "__restrict__",
1941 "__signed",
1942 "__signed__",
1943 "__typeof",
1944 "__typeof__",
1945 "__volatile",
1946 "__volatile__",
1947 "and",
1948 "and_eq",
1949 "asm",
1950 "auto",
1951 "bitand",
1952 "bitor",
1953 "bool",
1954 "break",
1955 "case",
1956 "catch",
1957 "char",
1958 "class",
1959 "compl",
1960 "const",
1961 "const_cast",
1962 "continue",
1963 "default",
1964 "delete",
1965 "do",
1966 "double",
1967 "dynamic_cast",
1968 "else",
1969 "enum",
1970 "explicit",
1971 "export",
1972 "extern",
1973 "false",
1974 "float",
1975 "for",
1976 "friend",
1977 "goto",
1978 "if",
1979 "inline",
1980 "int",
1981 "long",
1982 "mutable",
1983 "namespace",
1984 "new",
1985 "not",
1986 "not_eq",
1987 "operator",
1988 "or",
1989 "or_eq",
1990 "private",
1991 "protected",
1992 "public",
1993 "register",
1994 "reinterpret_cast",
1995 "return",
1996 "short",
1997 "signed",
1998 "sizeof",
1999 "static",
2000 "static_cast",
2001 "struct",
2002 "switch",
2003 "template",
2004 "this",
2005 "throw",
2006 "true",
2007 "try",
2008 "typedef",
2009 "typeid",
2010 "typename",
2011 "typeof",
2012 "union",
2013 "unsigned",
2014 "using",
2015 "virtual",
2016 "void",
2017 "volatile",
2018 "wchar_t",
2019 "while",
2020 "xor",
2021 "xor_eq"
2024 /* Return true if NAME is a C++ keyword. */
2027 cxx_keyword_p (const char *name, int length)
2029 int last = ARRAY_SIZE (cxx_keywords);
2030 int first = 0;
2031 int mid = (last + first) / 2;
2032 int old = -1;
2034 for (mid = (last + first) / 2;
2035 mid != old;
2036 old = mid, mid = (last + first) / 2)
2038 int kwl = strlen (cxx_keywords[mid]);
2039 int min_length = kwl > length ? length : kwl;
2040 int r = utf8_cmp ((const unsigned char *) name, min_length, cxx_keywords[mid]);
2042 if (r == 0)
2044 int i;
2045 /* We've found a match if all the remaining characters are `$'. */
2046 for (i = min_length; i < length && name[i] == '$'; ++i)
2048 if (i == length)
2049 return 1;
2050 r = 1;
2053 if (r < 0)
2054 last = mid;
2055 else
2056 first = mid;
2058 return 0;
2060 #endif /* JC1_LITE */