FSF GCC merge 02/23/03
[official-gcc.git] / gcc / java / lex.c
blob370217eb96165a3b4ea1d8fdfb50392d4a923425
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA.
23 Java and all Java-based marks are trademarks or registered trademarks
24 of Sun Microsystems, Inc. in the United States and other countries.
25 The Free Software Foundation is independent of Sun Microsystems, Inc. */
27 /* It defines java_lex (yylex) that reads a Java ASCII source file
28 possibly containing Unicode escape sequence or utf8 encoded
29 characters and returns a token for everything found but comments,
30 white spaces and line terminators. When necessary, it also fills
31 the java_lval (yylval) union. It's implemented to be called by a
32 re-entrant parser generated by Bison.
34 The lexical analysis conforms to the Java grammar described in "The
35 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
36 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
38 #include "keyword.h"
39 #include "flags.h"
40 #include "chartables.h"
42 /* Function declarations. */
43 static char *java_sprint_unicode (struct java_line *, int);
44 static void java_unicode_2_utf8 (unicode_t);
45 static void java_lex_error (const char *, int);
46 #ifndef JC1_LITE
47 static int java_is_eol (FILE *, int);
48 static tree build_wfl_node (tree);
49 #endif
50 static void java_store_unicode (struct java_line *, unicode_t, int);
51 static int java_parse_escape_sequence (void);
52 static int java_start_char_p (unicode_t);
53 static int java_part_char_p (unicode_t);
54 static int java_parse_doc_section (int);
55 static void java_parse_end_comment (int);
56 static int java_get_unicode (void);
57 static int java_read_unicode (java_lexer *, int *);
58 static int java_read_unicode_collapsing_terminators (java_lexer *, int *);
59 static void java_store_unicode (struct java_line *, unicode_t, int);
60 static int java_read_char (java_lexer *);
61 static void java_allocate_new_line (void);
62 static void java_unget_unicode (void);
63 static unicode_t java_sneak_unicode (void);
64 #ifndef JC1_LITE
65 static int utf8_cmp (const unsigned char *, int, const char *);
66 #endif
68 java_lexer *java_new_lexer (FILE *, const char *);
69 #ifndef JC1_LITE
70 static void error_if_numeric_overflow (tree);
71 #endif
73 #ifdef HAVE_ICONV
74 /* This is nonzero if we have initialized `need_byteswap'. */
75 static int byteswap_init = 0;
77 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
78 big-endian order -- not native endian order. We handle this by
79 doing a conversion once at startup and seeing what happens. This
80 flag holds the results of this determination. */
81 static int need_byteswap = 0;
82 #endif
84 void
85 java_init_lex (FILE *finput, const char *encoding)
87 #ifndef JC1_LITE
88 int java_lang_imported = 0;
90 if (!java_lang_id)
91 java_lang_id = get_identifier ("java.lang");
92 if (!inst_id)
93 inst_id = get_identifier ("inst$");
94 if (!wpv_id)
95 wpv_id = get_identifier ("write_parm_value$");
97 if (!java_lang_imported)
99 tree node = build_tree_list
100 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
101 read_import_dir (TREE_PURPOSE (node));
102 TREE_CHAIN (node) = ctxp->import_demand_list;
103 ctxp->import_demand_list = node;
104 java_lang_imported = 1;
107 if (!wfl_operator)
108 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
109 if (!label_id)
110 label_id = get_identifier ("$L");
111 if (!wfl_append)
112 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
113 if (!wfl_string_buffer)
114 wfl_string_buffer =
115 build_expr_wfl (get_identifier (flag_emit_class_files
116 ? "java.lang.StringBuffer"
117 : "gnu.gcj.runtime.StringBuffer"),
118 NULL, 0, 0);
119 if (!wfl_to_string)
120 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
122 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
123 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
125 memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
126 current_jcf = ggc_alloc_cleared (sizeof (JCF));
127 ctxp->current_parsed_class = NULL;
128 ctxp->package = NULL_TREE;
129 #endif
131 ctxp->filename = input_filename;
132 ctxp->lineno = lineno = 0;
133 ctxp->p_line = NULL;
134 ctxp->c_line = NULL;
135 ctxp->java_error_flag = 0;
136 ctxp->lexer = java_new_lexer (finput, encoding);
139 static char *
140 java_sprint_unicode (struct java_line *line, int i)
142 static char buffer [10];
143 if (line->unicode_escape_p [i] || line->line [i] > 128)
144 sprintf (buffer, "\\u%04x", line->line [i]);
145 else
147 buffer [0] = line->line [i];
148 buffer [1] = '\0';
150 return buffer;
153 static unicode_t
154 java_sneak_unicode (void)
156 return (ctxp->c_line->line [ctxp->c_line->current]);
159 static void
160 java_unget_unicode (void)
162 if (!ctxp->c_line->current)
163 /* Can't unget unicode. */
164 abort ();
166 ctxp->c_line->current--;
167 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
170 static void
171 java_allocate_new_line (void)
173 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
174 char ahead_escape_p = (ctxp->c_line ?
175 ctxp->c_line->unicode_escape_ahead_p : 0);
177 if (ctxp->c_line && !ctxp->c_line->white_space_only)
179 if (ctxp->p_line)
181 free (ctxp->p_line->unicode_escape_p);
182 free (ctxp->p_line->line);
183 free (ctxp->p_line);
185 ctxp->p_line = ctxp->c_line;
186 ctxp->c_line = NULL; /* Reallocated. */
189 if (!ctxp->c_line)
191 ctxp->c_line = xmalloc (sizeof (struct java_line));
192 ctxp->c_line->max = JAVA_LINE_MAX;
193 ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
194 ctxp->c_line->unicode_escape_p =
195 xmalloc (sizeof (char)*ctxp->c_line->max);
196 ctxp->c_line->white_space_only = 0;
199 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
200 ctxp->c_line->char_col = ctxp->c_line->current = 0;
201 if (ahead)
203 ctxp->c_line->line [ctxp->c_line->size] = ahead;
204 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
205 ctxp->c_line->size++;
207 ctxp->c_line->ahead [0] = 0;
208 ctxp->c_line->unicode_escape_ahead_p = 0;
209 ctxp->c_line->lineno = ++lineno;
210 ctxp->c_line->white_space_only = 1;
213 /* Create a new lexer object. */
215 java_lexer *
216 java_new_lexer (FILE *finput, const char *encoding)
218 java_lexer *lex = xmalloc (sizeof (java_lexer));
219 int enc_error = 0;
221 lex->finput = finput;
222 lex->bs_count = 0;
223 lex->unget_value = 0;
224 lex->hit_eof = 0;
226 #ifdef HAVE_ICONV
227 lex->handle = iconv_open ("UCS-2", encoding);
228 if (lex->handle != (iconv_t) -1)
230 lex->first = -1;
231 lex->last = -1;
232 lex->out_first = -1;
233 lex->out_last = -1;
234 lex->read_anything = 0;
235 lex->use_fallback = 0;
237 /* Work around broken iconv() implementations by doing checking at
238 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
239 then all UCS-2 encoders will be broken. Perhaps not a valid
240 assumption. */
241 if (! byteswap_init)
243 iconv_t handle;
245 byteswap_init = 1;
247 handle = iconv_open ("UCS-2", "UTF-8");
248 if (handle != (iconv_t) -1)
250 unicode_t result;
251 unsigned char in[3];
252 char *inp, *outp;
253 size_t inc, outc, r;
255 /* This is the UTF-8 encoding of \ufeff. */
256 in[0] = 0xef;
257 in[1] = 0xbb;
258 in[2] = 0xbf;
260 inp = in;
261 inc = 3;
262 outp = (char *) &result;
263 outc = 2;
265 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
266 &outp, &outc);
267 iconv_close (handle);
268 /* Conversion must be complete for us to use the result. */
269 if (r != (size_t) -1 && inc == 0 && outc == 0)
270 need_byteswap = (result != 0xfeff);
274 lex->byte_swap = need_byteswap;
276 else
277 #endif /* HAVE_ICONV */
279 /* If iconv failed, use the internal decoder if the default
280 encoding was requested. This code is used on platforms where
281 iconv exists but is insufficient for our needs. For
282 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
284 On Solaris the default encoding, as returned by nl_langinfo(),
285 is `646' (aka ASCII), but the Solaris iconv_open() doesn't
286 understand that. We work around that by pretending
287 `646' to be the same as UTF-8. */
288 if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
289 enc_error = 1;
290 #ifdef HAVE_ICONV
291 else
292 lex->use_fallback = 1;
293 #endif /* HAVE_ICONV */
296 if (enc_error)
297 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
299 return lex;
302 void
303 java_destroy_lexer (java_lexer *lex)
305 #ifdef HAVE_ICONV
306 if (! lex->use_fallback)
307 iconv_close (lex->handle);
308 #endif
309 free (lex);
312 static int
313 java_read_char (java_lexer *lex)
315 if (lex->unget_value)
317 unicode_t r = lex->unget_value;
318 lex->unget_value = 0;
319 return r;
322 #ifdef HAVE_ICONV
323 if (! lex->use_fallback)
325 size_t ir, inbytesleft, in_save, out_count, out_save;
326 char *inp, *outp;
327 unicode_t result;
329 /* If there is data which has already been converted, use it. */
330 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
332 lex->out_first = 0;
333 lex->out_last = 0;
335 while (1)
337 /* See if we need to read more data. If FIRST == 0 then
338 the previous conversion attempt ended in the middle of
339 a character at the end of the buffer. Otherwise we
340 only have to read if the buffer is empty. */
341 if (lex->first == 0 || lex->first >= lex->last)
343 int r;
345 if (lex->first >= lex->last)
347 lex->first = 0;
348 lex->last = 0;
350 if (feof (lex->finput))
351 return UEOF;
352 r = fread (&lex->buffer[lex->last], 1,
353 sizeof (lex->buffer) - lex->last,
354 lex->finput);
355 lex->last += r;
358 inbytesleft = lex->last - lex->first;
359 out_count = sizeof (lex->out_buffer) - lex->out_last;
361 if (inbytesleft == 0)
363 /* We've tried to read and there is nothing left. */
364 return UEOF;
367 in_save = inbytesleft;
368 out_save = out_count;
369 inp = &lex->buffer[lex->first];
370 outp = &lex->out_buffer[lex->out_last];
371 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
372 &inbytesleft, &outp, &out_count);
374 /* If we haven't read any bytes, then look to see if we
375 have read a BOM. */
376 if (! lex->read_anything && out_save - out_count >= 2)
378 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
379 if (uc == 0xfeff)
381 lex->byte_swap = 0;
382 lex->out_first += 2;
384 else if (uc == 0xfffe)
386 lex->byte_swap = 1;
387 lex->out_first += 2;
389 lex->read_anything = 1;
392 if (lex->byte_swap)
394 unsigned int i;
395 for (i = 0; i < out_save - out_count; i += 2)
397 char t = lex->out_buffer[lex->out_last + i];
398 lex->out_buffer[lex->out_last + i]
399 = lex->out_buffer[lex->out_last + i + 1];
400 lex->out_buffer[lex->out_last + i + 1] = t;
404 lex->first += in_save - inbytesleft;
405 lex->out_last += out_save - out_count;
407 /* If we converted anything at all, move along. */
408 if (out_count != out_save)
409 break;
411 if (ir == (size_t) -1)
413 if (errno == EINVAL)
415 /* This is ok. This means that the end of our buffer
416 is in the middle of a character sequence. We just
417 move the valid part of the buffer to the beginning
418 to force a read. */
419 memmove (&lex->buffer[0], &lex->buffer[lex->first],
420 lex->last - lex->first);
421 lex->last -= lex->first;
422 lex->first = 0;
424 else
426 /* A more serious error. */
427 java_lex_error ("unrecognized character in input stream",
429 return UEOF;
435 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
437 /* Don't have any data. */
438 return UEOF;
441 /* Success. */
442 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
443 lex->out_first += 2;
444 return result;
446 else
447 #endif /* HAVE_ICONV */
449 int c, c1, c2;
450 c = getc (lex->finput);
452 if (c == EOF)
453 return UEOF;
454 if (c < 128)
455 return (unicode_t) c;
456 else
458 if ((c & 0xe0) == 0xc0)
460 c1 = getc (lex->finput);
461 if ((c1 & 0xc0) == 0x80)
463 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
464 /* Check for valid 2-byte characters. We explicitly
465 allow \0 because this encoding is common in the
466 Java world. */
467 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
468 return r;
471 else if ((c & 0xf0) == 0xe0)
473 c1 = getc (lex->finput);
474 if ((c1 & 0xc0) == 0x80)
476 c2 = getc (lex->finput);
477 if ((c2 & 0xc0) == 0x80)
479 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
480 (( c1 & 0x3f) << 6)
481 + (c2 & 0x3f));
482 /* Check for valid 3-byte characters.
483 Don't allow surrogate, \ufffe or \uffff. */
484 if (IN_RANGE (r, 0x800, 0xffff)
485 && ! IN_RANGE (r, 0xd800, 0xdfff)
486 && r != 0xfffe && r != 0xffff)
487 return r;
492 /* We simply don't support invalid characters. We also
493 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
494 cannot be valid Java characters. */
495 java_lex_error ("malformed UTF-8 character", 0);
499 /* We only get here on error. */
500 return UEOF;
503 static void
504 java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p)
506 if (l->size == l->max)
508 l->max += JAVA_LINE_MAX;
509 l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
510 l->unicode_escape_p = xrealloc (l->unicode_escape_p,
511 sizeof (char)*l->max);
513 l->line [l->size] = c;
514 l->unicode_escape_p [l->size++] = unicode_escape_p;
517 static int
518 java_read_unicode (java_lexer *lex, int *unicode_escape_p)
520 int c;
522 c = java_read_char (lex);
523 *unicode_escape_p = 0;
525 if (c != '\\')
527 lex->bs_count = 0;
528 return c;
531 ++lex->bs_count;
532 if ((lex->bs_count) % 2 == 1)
534 /* Odd number of \ seen. */
535 c = java_read_char (lex);
536 if (c == 'u')
538 unicode_t unicode = 0;
539 int shift = 12;
541 /* Recognize any number of `u's in \u. */
542 while ((c = java_read_char (lex)) == 'u')
545 shift = 12;
548 if (c == UEOF)
550 java_lex_error ("prematurely terminated \\u sequence", 0);
551 return UEOF;
554 if (hex_p (c))
555 unicode |= (unicode_t)(hex_value (c) << shift);
556 else
558 java_lex_error ("non-hex digit in \\u sequence", 0);
559 break;
562 c = java_read_char (lex);
563 shift -= 4;
565 while (shift >= 0);
567 if (c != UEOF)
568 lex->unget_value = c;
570 lex->bs_count = 0;
571 *unicode_escape_p = 1;
572 return unicode;
574 lex->unget_value = c;
576 return (unicode_t) '\\';
579 static int
580 java_read_unicode_collapsing_terminators (java_lexer *lex,
581 int *unicode_escape_p)
583 int c = java_read_unicode (lex, unicode_escape_p);
585 if (c == '\r')
587 /* We have to read ahead to see if we got \r\n. In that case we
588 return a single line terminator. */
589 int dummy;
590 c = java_read_unicode (lex, &dummy);
591 if (c != '\n' && c != UEOF)
592 lex->unget_value = c;
593 /* In either case we must return a newline. */
594 c = '\n';
597 return c;
600 static int
601 java_get_unicode (void)
603 /* It's time to read a line when... */
604 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
606 int c;
607 int found_chars = 0;
609 if (ctxp->lexer->hit_eof)
610 return UEOF;
612 java_allocate_new_line ();
613 if (ctxp->c_line->line[0] != '\n')
615 for (;;)
617 int unicode_escape_p;
618 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
619 &unicode_escape_p);
620 if (c != UEOF)
622 found_chars = 1;
623 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
624 if (ctxp->c_line->white_space_only
625 && !JAVA_WHITE_SPACE_P (c)
626 && c != '\n')
627 ctxp->c_line->white_space_only = 0;
629 if ((c == '\n') || (c == UEOF))
630 break;
633 if (c == UEOF && ! found_chars)
635 ctxp->lexer->hit_eof = 1;
636 return UEOF;
640 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
641 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
642 return ctxp->c_line->line [ctxp->c_line->current++];
645 /* Parse the end of a C style comment.
646 * C is the first character following the '/' and '*'. */
647 static void
648 java_parse_end_comment (int c)
650 for ( ;; c = java_get_unicode ())
652 switch (c)
654 case UEOF:
655 java_lex_error ("Comment not terminated at end of input", 0);
656 return;
657 case '*':
658 switch (c = java_get_unicode ())
660 case UEOF:
661 java_lex_error ("Comment not terminated at end of input", 0);
662 return;
663 case '/':
664 return;
665 case '*': /* Reparse only '*'. */
666 java_unget_unicode ();
672 /* Parse the documentation section. Keywords must be at the beginning
673 of a documentation comment line (ignoring white space and any `*'
674 character). Parsed keyword(s): @DEPRECATED. */
676 static int
677 java_parse_doc_section (int c)
679 int valid_tag = 0, seen_star = 0;
681 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
683 switch (c)
685 case '*':
686 seen_star = 1;
687 break;
688 case '\n': /* ULT */
689 valid_tag = 1;
690 default:
691 seen_star = 0;
693 c = java_get_unicode();
696 if (c == UEOF)
697 java_lex_error ("Comment not terminated at end of input", 0);
699 if (seen_star && (c == '/'))
700 return 1; /* Goto step1 in caller. */
702 /* We're parsing `@deprecated'. */
703 if (valid_tag && (c == '@'))
705 char tag [11];
706 int tag_index = 0;
708 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
710 c = java_get_unicode ();
711 tag [tag_index++] = c;
714 if (c == UEOF)
715 java_lex_error ("Comment not terminated at end of input", 0);
716 tag [tag_index] = '\0';
718 if (!strcmp (tag, "deprecated"))
719 ctxp->deprecated = 1;
721 java_unget_unicode ();
722 return 0;
725 /* Return true if C is a valid start character for a Java identifier.
726 This is only called if C >= 128 -- smaller values are handled
727 inline. However, this function handles all values anyway. */
728 static int
729 java_start_char_p (unicode_t c)
731 unsigned int hi = c / 256;
732 const char *const page = type_table[hi];
733 unsigned long val = (unsigned long) page;
734 int flags;
736 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
737 flags = page[c & 255];
738 else
739 flags = val;
741 return flags & LETTER_START;
744 /* Return true if C is a valid part character for a Java identifier.
745 This is only called if C >= 128 -- smaller values are handled
746 inline. However, this function handles all values anyway. */
747 static int
748 java_part_char_p (unicode_t c)
750 unsigned int hi = c / 256;
751 const char *const page = type_table[hi];
752 unsigned long val = (unsigned long) page;
753 int flags;
755 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
756 flags = page[c & 255];
757 else
758 flags = val;
760 return flags & LETTER_PART;
763 static int
764 java_parse_escape_sequence (void)
766 unicode_t char_lit;
767 int c;
769 switch (c = java_get_unicode ())
771 case 'b':
772 return (unicode_t)0x8;
773 case 't':
774 return (unicode_t)0x9;
775 case 'n':
776 return (unicode_t)0xa;
777 case 'f':
778 return (unicode_t)0xc;
779 case 'r':
780 return (unicode_t)0xd;
781 case '"':
782 return (unicode_t)0x22;
783 case '\'':
784 return (unicode_t)0x27;
785 case '\\':
786 return (unicode_t)0x5c;
787 case '0': case '1': case '2': case '3': case '4':
788 case '5': case '6': case '7':
790 int octal_escape[3];
791 int octal_escape_index = 0;
792 int max = 3;
793 int i, shift;
795 for (; octal_escape_index < max && RANGE (c, '0', '7');
796 c = java_get_unicode ())
798 if (octal_escape_index == 0 && c > '3')
800 /* According to the grammar, `\477' has a well-defined
801 meaning -- it is `\47' followed by `7'. */
802 --max;
804 octal_escape [octal_escape_index++] = c;
807 java_unget_unicode ();
809 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
810 i < octal_escape_index; i++, shift -= 3)
811 char_lit |= (octal_escape [i] - '0') << shift;
813 return char_lit;
815 default:
816 java_lex_error ("Invalid character in escape sequence", 0);
817 return JAVA_CHAR_ERROR;
821 #ifndef JC1_LITE
822 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
824 /* Subroutine of java_lex: converts floating-point literals to tree
825 nodes. LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
826 store the result. FFLAG indicates whether the literal was tagged
827 with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
828 is the line number on which to report any error. */
830 static void java_perform_atof (YYSTYPE *, char *, int, int);
832 static void
833 java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
834 int number_beginning)
836 REAL_VALUE_TYPE value;
837 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
839 SET_REAL_VALUE_ATOF (value,
840 REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
842 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
844 JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
845 value = DCONST0;
847 else if (IS_ZERO (value))
849 /* We check to see if the value is really 0 or if we've found an
850 underflow. We do this in the most primitive imaginable way. */
851 int really_zero = 1;
852 char *p = literal_token;
853 if (*p == '-')
854 ++p;
855 while (*p && *p != 'e' && *p != 'E')
857 if (*p != '0' && *p != '.')
859 really_zero = 0;
860 break;
862 ++p;
864 if (! really_zero)
866 int i = ctxp->c_line->current;
867 ctxp->c_line->current = number_beginning;
868 java_lex_error ("Floating point literal underflow", 0);
869 ctxp->c_line->current = i;
873 SET_LVAL_NODE_TYPE (build_real (type, value), type);
875 #endif
877 static int yylex (YYSTYPE *);
879 static int
880 #ifdef JC1_LITE
881 yylex (YYSTYPE *java_lval)
882 #else
883 java_lex (YYSTYPE *java_lval)
884 #endif
886 int c;
887 unicode_t first_unicode;
888 int ascii_index, all_ascii;
889 char *string;
891 /* Translation of the Unicode escape in the raw stream of Unicode
892 characters. Takes care of line terminator. */
893 step1:
894 /* Skip white spaces: SP, TAB and FF or ULT. */
895 for (c = java_get_unicode ();
896 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
897 if (c == '\n')
899 ctxp->elc.line = ctxp->c_line->lineno;
900 ctxp->elc.col = ctxp->c_line->char_col-2;
903 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
905 if (c == 0x1a) /* CTRL-Z. */
907 if ((c = java_get_unicode ()) == UEOF)
908 return 0; /* Ok here. */
909 else
910 java_unget_unicode (); /* Caught later, at the end of the
911 function. */
913 /* Handle EOF here. */
914 if (c == UEOF) /* Should probably do something here... */
915 return 0;
917 /* Take care of eventual comments. */
918 if (c == '/')
920 switch (c = java_get_unicode ())
922 case '/':
923 for (;;)
925 c = java_get_unicode ();
926 if (c == UEOF)
928 /* It is ok to end a `//' comment with EOF, unless
929 we're being pedantic. */
930 if (pedantic)
931 java_lex_error ("Comment not terminated at end of input",
933 return 0;
935 if (c == '\n') /* ULT */
936 goto step1;
938 break;
940 case '*':
941 if ((c = java_get_unicode ()) == '*')
943 if ((c = java_get_unicode ()) == '/')
944 goto step1; /* Empty documentation comment. */
945 else if (java_parse_doc_section (c))
946 goto step1;
949 java_parse_end_comment ((c = java_get_unicode ()));
950 goto step1;
951 break;
952 default:
953 java_unget_unicode ();
954 c = '/';
955 break;
959 ctxp->elc.line = ctxp->c_line->lineno;
960 ctxp->elc.prev_col = ctxp->elc.col;
961 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
962 if (ctxp->elc.col < 0)
963 abort ();
965 /* Numeric literals. */
966 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
968 /* This section of code is borrowed from gcc/c-lex.c. */
969 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
970 int parts[TOTAL_PARTS];
971 HOST_WIDE_INT high, low;
972 /* End borrowed section. */
973 char literal_token [256];
974 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
975 int found_hex_digits = 0, found_non_octal_digits = 0;
976 int i;
977 #ifndef JC1_LITE
978 int number_beginning = ctxp->c_line->current;
979 tree value;
980 #endif
982 /* We might have a . separator instead of a FP like .[0-9]*. */
983 if (c == '.')
985 unicode_t peep = java_sneak_unicode ();
987 if (!JAVA_ASCII_DIGIT (peep))
989 JAVA_LEX_SEP('.');
990 BUILD_OPERATOR (DOT_TK);
994 for (i = 0; i < TOTAL_PARTS; i++)
995 parts [i] = 0;
997 if (c == '0')
999 c = java_get_unicode ();
1000 if (c == 'x' || c == 'X')
1002 radix = 16;
1003 c = java_get_unicode ();
1005 else if (JAVA_ASCII_DIGIT (c))
1006 radix = 8;
1007 else if (c == '.' || c == 'e' || c =='E')
1009 /* Push the '.', 'e', or 'E' back and prepare for a FP
1010 parsing... */
1011 java_unget_unicode ();
1012 c = '0';
1014 else
1016 /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}. */
1017 JAVA_LEX_LIT ("0", 10);
1018 switch (c)
1020 case 'L': case 'l':
1021 SET_LVAL_NODE (long_zero_node);
1022 return (INT_LIT_TK);
1023 case 'f': case 'F':
1024 SET_LVAL_NODE (float_zero_node);
1025 return (FP_LIT_TK);
1026 case 'd': case 'D':
1027 SET_LVAL_NODE (double_zero_node);
1028 return (FP_LIT_TK);
1029 default:
1030 java_unget_unicode ();
1031 SET_LVAL_NODE (integer_zero_node);
1032 return (INT_LIT_TK);
1036 /* Parse the first part of the literal, until we find something
1037 which is not a number. */
1038 while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1039 JAVA_ASCII_DIGIT (c))
1041 /* We store in a string (in case it turns out to be a FP) and in
1042 PARTS if we have to process a integer literal. */
1043 int numeric = hex_value (c);
1044 int count;
1046 /* Remember when we find a valid hexadecimal digit. */
1047 if (radix == 16)
1048 found_hex_digits = 1;
1049 /* Remember when we find an invalid octal digit. */
1050 else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1051 found_non_octal_digits = 1;
1053 literal_token [literal_index++] = c;
1054 /* This section of code if borrowed from gcc/c-lex.c. */
1055 for (count = 0; count < TOTAL_PARTS; count++)
1057 parts[count] *= radix;
1058 if (count)
1060 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1061 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1063 else
1064 parts[0] += numeric;
1066 if (parts [TOTAL_PARTS-1] != 0)
1067 overflow = 1;
1068 /* End borrowed section. */
1069 c = java_get_unicode ();
1072 /* If we have something from the FP char set but not a digit, parse
1073 a FP literal. */
1074 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1076 int stage = 0;
1077 int seen_digit = (literal_index ? 1 : 0);
1078 int seen_exponent = 0;
1079 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1080 double unless specified. */
1082 /* It is ok if the radix is 8 because this just means we've
1083 seen a leading `0'. However, radix==16 is invalid. */
1084 if (radix == 16)
1085 java_lex_error ("Can't express non-decimal FP literal", 0);
1086 radix = 10;
1088 for (;;)
1090 if (c == '.')
1092 if (stage < 1)
1094 stage = 1;
1095 literal_token [literal_index++ ] = c;
1096 c = java_get_unicode ();
1098 else
1099 java_lex_error ("Invalid character in FP literal", 0);
1102 if (c == 'e' || c == 'E')
1104 if (stage < 2)
1106 /* {E,e} must have seen at least a digit. */
1107 if (!seen_digit)
1108 java_lex_error
1109 ("Invalid FP literal, mantissa must have digit", 0);
1110 seen_digit = 0;
1111 seen_exponent = 1;
1112 stage = 2;
1113 literal_token [literal_index++] = c;
1114 c = java_get_unicode ();
1116 else
1117 java_lex_error ("Invalid character in FP literal", 0);
1119 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1121 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1122 stage = 4; /* So we fall through. */
1125 if ((c=='-' || c =='+') && stage == 2)
1127 stage = 3;
1128 literal_token [literal_index++] = c;
1129 c = java_get_unicode ();
1132 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1133 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1134 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1135 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1137 if (JAVA_ASCII_DIGIT (c))
1138 seen_digit = 1;
1139 if (stage == 2)
1140 stage = 3;
1141 literal_token [literal_index++ ] = c;
1142 c = java_get_unicode ();
1144 else
1146 if (stage != 4) /* Don't push back fF/dD. */
1147 java_unget_unicode ();
1149 /* An exponent (if any) must have seen a digit. */
1150 if (seen_exponent && !seen_digit)
1151 java_lex_error
1152 ("Invalid FP literal, exponent must have digit", 0);
1154 literal_token [literal_index] = '\0';
1155 JAVA_LEX_LIT (literal_token, radix);
1157 #ifndef JC1_LITE
1158 java_perform_atof (java_lval, literal_token,
1159 fflag, number_beginning);
1160 #endif
1161 return FP_LIT_TK;
1164 } /* JAVA_ASCII_FPCHAR (c) */
1166 /* Here we get back to converting the integral literal. */
1167 if (radix == 16 && ! found_hex_digits)
1168 java_lex_error
1169 ("0x must be followed by at least one hexadecimal digit", 0);
1170 else if (radix == 8 && found_non_octal_digits)
1171 java_lex_error ("Octal literal contains digit out of range", 0);
1172 else if (c == 'L' || c == 'l')
1173 long_suffix = 1;
1174 else
1175 java_unget_unicode ();
1177 #ifdef JAVA_LEX_DEBUG
1178 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1179 JAVA_LEX_LIT (literal_token, radix);
1180 #endif
1181 /* This section of code is borrowed from gcc/c-lex.c. */
1182 if (!overflow)
1184 bytes = GET_TYPE_PRECISION (long_type_node);
1185 for (i = bytes; i < TOTAL_PARTS; i++)
1186 if (parts [i])
1188 overflow = 1;
1189 break;
1192 high = low = 0;
1193 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1195 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1196 / HOST_BITS_PER_CHAR)]
1197 << (i * HOST_BITS_PER_CHAR));
1198 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1200 /* End borrowed section. */
1202 #ifndef JC1_LITE
1203 /* Range checking. */
1204 value = build_int_2 (low, high);
1205 /* Temporarily set type to unsigned. */
1206 SET_LVAL_NODE_TYPE (value, (long_suffix
1207 ? unsigned_long_type_node
1208 : unsigned_int_type_node));
1210 /* For base 10 numbers, only values up to the highest value
1211 (plus one) can be written. For instance, only ints up to
1212 2147483648 can be written. The special case of the largest
1213 negative value is handled elsewhere. For other bases, any
1214 number can be represented. */
1215 if (overflow || (radix == 10
1216 && tree_int_cst_lt (long_suffix
1217 ? decimal_long_max
1218 : decimal_int_max,
1219 value)))
1221 if (long_suffix)
1222 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1223 else
1224 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1227 /* Sign extend the value. */
1228 SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1229 force_fit_type (value, 0);
1230 JAVA_RADIX10_FLAG (value) = radix == 10;
1231 #else
1232 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1233 long_suffix ? long_type_node : int_type_node);
1234 #endif
1235 return INT_LIT_TK;
1238 /* Character literals. */
1239 if (c == '\'')
1241 int char_lit;
1242 if ((c = java_get_unicode ()) == '\\')
1243 char_lit = java_parse_escape_sequence ();
1244 else
1246 if (c == '\n' || c == '\'')
1247 java_lex_error ("Invalid character literal", 0);
1248 char_lit = c;
1251 c = java_get_unicode ();
1253 if ((c == '\n') || (c == UEOF))
1254 java_lex_error ("Character literal not terminated at end of line", 0);
1255 if (c != '\'')
1256 java_lex_error ("Syntax error in character literal", 0);
1258 if (char_lit == JAVA_CHAR_ERROR)
1259 char_lit = 0; /* We silently convert it to zero. */
1261 JAVA_LEX_CHAR_LIT (char_lit);
1262 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1263 return CHAR_LIT_TK;
1266 /* String literals. */
1267 if (c == '"')
1269 int no_error;
1270 char *string;
1272 for (no_error = 1, c = java_get_unicode ();
1273 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1275 if (c == '\\')
1276 c = java_parse_escape_sequence ();
1277 if (c == JAVA_CHAR_ERROR)
1279 no_error = 0;
1280 c = 0; /* We silently convert it to zero. */
1282 java_unicode_2_utf8 (c);
1284 if (c == '\n' || c == UEOF) /* ULT. */
1286 lineno--; /* Refer to the line where the terminator was seen. */
1287 java_lex_error ("String not terminated at end of line", 0);
1288 lineno++;
1291 obstack_1grow (&temporary_obstack, '\0');
1292 string = obstack_finish (&temporary_obstack);
1293 #ifndef JC1_LITE
1294 if (!no_error || (c != '"'))
1295 java_lval->node = error_mark_node; /* FIXME: Requires futher
1296 testing. */
1297 else
1298 java_lval->node = build_string (strlen (string), string);
1299 #endif
1300 obstack_free (&temporary_obstack, string);
1301 return STRING_LIT_TK;
1304 /* Separator. */
1305 switch (c)
1307 case '(':
1308 JAVA_LEX_SEP (c);
1309 BUILD_OPERATOR (OP_TK);
1310 case ')':
1311 JAVA_LEX_SEP (c);
1312 return CP_TK;
1313 case '{':
1314 JAVA_LEX_SEP (c);
1315 if (ctxp->ccb_indent == 1)
1316 ctxp->first_ccb_indent1 = lineno;
1317 ctxp->ccb_indent++;
1318 BUILD_OPERATOR (OCB_TK);
1319 case '}':
1320 JAVA_LEX_SEP (c);
1321 ctxp->ccb_indent--;
1322 if (ctxp->ccb_indent == 1)
1323 ctxp->last_ccb_indent1 = lineno;
1324 BUILD_OPERATOR (CCB_TK);
1325 case '[':
1326 JAVA_LEX_SEP (c);
1327 BUILD_OPERATOR (OSB_TK);
1328 case ']':
1329 JAVA_LEX_SEP (c);
1330 return CSB_TK;
1331 case ';':
1332 JAVA_LEX_SEP (c);
1333 return SC_TK;
1334 case ',':
1335 JAVA_LEX_SEP (c);
1336 return C_TK;
1337 case '.':
1338 JAVA_LEX_SEP (c);
1339 BUILD_OPERATOR (DOT_TK);
1340 /* return DOT_TK; */
1343 /* Operators. */
1344 switch (c)
1346 case '=':
1347 if ((c = java_get_unicode ()) == '=')
1349 BUILD_OPERATOR (EQ_TK);
1351 else
1353 /* Equals is used in two different locations. In the
1354 variable_declarator: rule, it has to be seen as '=' as opposed
1355 to being seen as an ordinary assignment operator in
1356 assignment_operators: rule. */
1357 java_unget_unicode ();
1358 BUILD_OPERATOR (ASSIGN_TK);
1361 case '>':
1362 switch ((c = java_get_unicode ()))
1364 case '=':
1365 BUILD_OPERATOR (GTE_TK);
1366 case '>':
1367 switch ((c = java_get_unicode ()))
1369 case '>':
1370 if ((c = java_get_unicode ()) == '=')
1372 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1374 else
1376 java_unget_unicode ();
1377 BUILD_OPERATOR (ZRS_TK);
1379 case '=':
1380 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1381 default:
1382 java_unget_unicode ();
1383 BUILD_OPERATOR (SRS_TK);
1385 default:
1386 java_unget_unicode ();
1387 BUILD_OPERATOR (GT_TK);
1390 case '<':
1391 switch ((c = java_get_unicode ()))
1393 case '=':
1394 BUILD_OPERATOR (LTE_TK);
1395 case '<':
1396 if ((c = java_get_unicode ()) == '=')
1398 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1400 else
1402 java_unget_unicode ();
1403 BUILD_OPERATOR (LS_TK);
1405 default:
1406 java_unget_unicode ();
1407 BUILD_OPERATOR (LT_TK);
1410 case '&':
1411 switch ((c = java_get_unicode ()))
1413 case '&':
1414 BUILD_OPERATOR (BOOL_AND_TK);
1415 case '=':
1416 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1417 default:
1418 java_unget_unicode ();
1419 BUILD_OPERATOR (AND_TK);
1422 case '|':
1423 switch ((c = java_get_unicode ()))
1425 case '|':
1426 BUILD_OPERATOR (BOOL_OR_TK);
1427 case '=':
1428 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1429 default:
1430 java_unget_unicode ();
1431 BUILD_OPERATOR (OR_TK);
1434 case '+':
1435 switch ((c = java_get_unicode ()))
1437 case '+':
1438 BUILD_OPERATOR (INCR_TK);
1439 case '=':
1440 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1441 default:
1442 java_unget_unicode ();
1443 BUILD_OPERATOR (PLUS_TK);
1446 case '-':
1447 switch ((c = java_get_unicode ()))
1449 case '-':
1450 BUILD_OPERATOR (DECR_TK);
1451 case '=':
1452 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1453 default:
1454 java_unget_unicode ();
1455 BUILD_OPERATOR (MINUS_TK);
1458 case '*':
1459 if ((c = java_get_unicode ()) == '=')
1461 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1463 else
1465 java_unget_unicode ();
1466 BUILD_OPERATOR (MULT_TK);
1469 case '/':
1470 if ((c = java_get_unicode ()) == '=')
1472 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1474 else
1476 java_unget_unicode ();
1477 BUILD_OPERATOR (DIV_TK);
1480 case '^':
1481 if ((c = java_get_unicode ()) == '=')
1483 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1485 else
1487 java_unget_unicode ();
1488 BUILD_OPERATOR (XOR_TK);
1491 case '%':
1492 if ((c = java_get_unicode ()) == '=')
1494 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1496 else
1498 java_unget_unicode ();
1499 BUILD_OPERATOR (REM_TK);
1502 case '!':
1503 if ((c = java_get_unicode()) == '=')
1505 BUILD_OPERATOR (NEQ_TK);
1507 else
1509 java_unget_unicode ();
1510 BUILD_OPERATOR (NEG_TK);
1513 case '?':
1514 JAVA_LEX_OP ("?");
1515 BUILD_OPERATOR (REL_QM_TK);
1516 case ':':
1517 JAVA_LEX_OP (":");
1518 BUILD_OPERATOR (REL_CL_TK);
1519 case '~':
1520 BUILD_OPERATOR (NOT_TK);
1523 /* Keyword, boolean literal or null literal. */
1524 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1525 c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1527 java_unicode_2_utf8 (c);
1528 if (all_ascii && c >= 128)
1529 all_ascii = 0;
1530 ascii_index++;
1533 obstack_1grow (&temporary_obstack, '\0');
1534 string = obstack_finish (&temporary_obstack);
1535 if (c != UEOF)
1536 java_unget_unicode ();
1538 /* If we have something all ascii, we consider a keyword, a boolean
1539 literal, a null literal or an all ASCII identifier. Otherwise,
1540 this is an identifier (possibly not respecting formation rule). */
1541 if (all_ascii)
1543 const struct java_keyword *kw;
1544 if ((kw=java_keyword (string, ascii_index)))
1546 JAVA_LEX_KW (string);
1547 switch (kw->token)
1549 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1550 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1551 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1552 case PRIVATE_TK: case STRICT_TK:
1553 SET_MODIFIER_CTX (kw->token);
1554 return MODIFIER_TK;
1555 case FLOAT_TK:
1556 SET_LVAL_NODE (float_type_node);
1557 return FP_TK;
1558 case DOUBLE_TK:
1559 SET_LVAL_NODE (double_type_node);
1560 return FP_TK;
1561 case BOOLEAN_TK:
1562 SET_LVAL_NODE (boolean_type_node);
1563 return BOOLEAN_TK;
1564 case BYTE_TK:
1565 SET_LVAL_NODE (byte_type_node);
1566 return INTEGRAL_TK;
1567 case SHORT_TK:
1568 SET_LVAL_NODE (short_type_node);
1569 return INTEGRAL_TK;
1570 case INT_TK:
1571 SET_LVAL_NODE (int_type_node);
1572 return INTEGRAL_TK;
1573 case LONG_TK:
1574 SET_LVAL_NODE (long_type_node);
1575 return INTEGRAL_TK;
1576 case CHAR_TK:
1577 SET_LVAL_NODE (char_type_node);
1578 return INTEGRAL_TK;
1580 /* Keyword based literals. */
1581 case TRUE_TK:
1582 case FALSE_TK:
1583 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1584 boolean_true_node : boolean_false_node));
1585 return BOOL_LIT_TK;
1586 case NULL_TK:
1587 SET_LVAL_NODE (null_pointer_node);
1588 return NULL_TK;
1590 case ASSERT_TK:
1591 if (flag_assert)
1593 BUILD_OPERATOR (kw->token);
1594 return kw->token;
1596 else
1597 break;
1599 /* Some keyword we want to retain information on the location
1600 they where found. */
1601 case CASE_TK:
1602 case DEFAULT_TK:
1603 case SUPER_TK:
1604 case THIS_TK:
1605 case RETURN_TK:
1606 case BREAK_TK:
1607 case CONTINUE_TK:
1608 case TRY_TK:
1609 case CATCH_TK:
1610 case THROW_TK:
1611 case INSTANCEOF_TK:
1612 BUILD_OPERATOR (kw->token);
1614 default:
1615 return kw->token;
1620 /* We may have an ID here. */
1621 if (JAVA_START_CHAR_P (first_unicode))
1623 JAVA_LEX_ID (string);
1624 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1625 return ID_TK;
1628 /* Everything else is an invalid character in the input. */
1630 char lex_error_buffer [128];
1631 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1632 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1633 java_lex_error (lex_error_buffer, 1);
1635 return 0;
1638 #ifndef JC1_LITE
1639 /* This is called by the parser to see if an error should be generated
1640 due to numeric overflow. This function only handles the particular
1641 case of the largest negative value, and is only called in the case
1642 where this value is not preceded by `-'. */
1643 static void
1644 error_if_numeric_overflow (tree value)
1646 if (TREE_CODE (value) == INTEGER_CST
1647 && JAVA_RADIX10_FLAG (value)
1648 && tree_int_cst_sgn (value) < 0)
1650 if (TREE_TYPE (value) == long_type_node)
1651 java_lex_error ("Numeric overflow for `long' literal", 0);
1652 else
1653 java_lex_error ("Numeric overflow for `int' literal", 0);
1656 #endif /* JC1_LITE */
1658 static void
1659 java_unicode_2_utf8 (unicode_t unicode)
1661 if (RANGE (unicode, 0x01, 0x7f))
1662 obstack_1grow (&temporary_obstack, (char)unicode);
1663 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1665 obstack_1grow (&temporary_obstack,
1666 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1667 obstack_1grow (&temporary_obstack,
1668 (unsigned char)(0x80 | (unicode & 0x3f)));
1670 else /* Range 0x800-0xffff. */
1672 obstack_1grow (&temporary_obstack,
1673 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1674 obstack_1grow (&temporary_obstack,
1675 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1676 obstack_1grow (&temporary_obstack,
1677 (unsigned char)(0x80 | (unicode & 0x003f)));
1681 #ifndef JC1_LITE
1682 static tree
1683 build_wfl_node (tree node)
1685 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1686 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1687 TREE_TYPE (node) = NULL_TREE;
1688 return node;
1690 #endif
1692 static void
1693 java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
1695 #ifndef JC1_LITE
1696 ctxp->elc.line = ctxp->c_line->lineno;
1697 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1699 /* Might be caught in the middle of some error report. */
1700 ctxp->java_error_flag = 0;
1701 java_error (NULL);
1702 java_error (msg);
1703 #endif
1706 #ifndef JC1_LITE
1707 static int
1708 java_is_eol (FILE *fp, int c)
1710 int next;
1711 switch (c)
1713 case '\r':
1714 next = getc (fp);
1715 if (next != '\n' && next != EOF)
1716 ungetc (next, fp);
1717 return 1;
1718 case '\n':
1719 return 1;
1720 default:
1721 return 0;
1724 #endif
1726 char *
1727 java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
1728 int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
1730 #ifdef JC1_LITE
1731 return 0;
1732 #else
1733 /* Dumb implementation. Doesn't try to cache or optimize things. */
1734 /* First line of the file is line 1, first column is 1. */
1736 /* COL == -1 means, at the CR/LF in LINE. */
1737 /* COL == -2 means, at the first non space char in LINE. */
1739 FILE *fp;
1740 int c, ccol, cline = 1;
1741 int current_line_col = 0;
1742 int first_non_space = 0;
1743 char *base;
1745 if (!(fp = fopen (filename, "r")))
1746 fatal_io_error ("can't open %s", filename);
1748 while (cline != line)
1750 c = getc (fp);
1751 if (c == EOF)
1753 static const char msg[] = "<<file too short - unexpected EOF>>";
1754 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1755 goto have_line;
1757 if (java_is_eol (fp, c))
1758 cline++;
1761 /* Gather the chars of the current line in a buffer. */
1762 for (;;)
1764 c = getc (fp);
1765 if (c < 0 || java_is_eol (fp, c))
1766 break;
1767 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1768 first_non_space = current_line_col;
1769 obstack_1grow (&temporary_obstack, c);
1770 current_line_col++;
1772 have_line:
1774 obstack_1grow (&temporary_obstack, '\n');
1776 if (col == -1)
1778 col = current_line_col;
1779 first_non_space = 0;
1781 else if (col == -2)
1782 col = first_non_space;
1783 else
1784 first_non_space = 0;
1786 /* Place the '^' a the right position. */
1787 base = obstack_base (&temporary_obstack);
1788 for (ccol = 1; ccol <= col+3; ccol++)
1790 /* Compute \t when reaching first_non_space. */
1791 char c = (first_non_space ?
1792 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1793 obstack_1grow (&temporary_obstack, c);
1795 obstack_grow0 (&temporary_obstack, "^", 1);
1797 fclose (fp);
1798 return obstack_finish (&temporary_obstack);
1799 #endif
1802 #ifndef JC1_LITE
1803 static int
1804 utf8_cmp (const unsigned char *str, int length, const char *name)
1806 const unsigned char *limit = str + length;
1807 int i;
1809 for (i = 0; name[i]; ++i)
1811 int ch = UTF8_GET (str, limit);
1812 if (ch != name[i])
1813 return ch - name[i];
1816 return str == limit ? 0 : 1;
1819 /* A sorted list of all C++ keywords. */
1821 static const char *const cxx_keywords[] =
1823 "_Complex",
1824 "__alignof",
1825 "__alignof__",
1826 "__asm",
1827 "__asm__",
1828 "__attribute",
1829 "__attribute__",
1830 "__builtin_va_arg",
1831 "__complex",
1832 "__complex__",
1833 "__const",
1834 "__const__",
1835 "__extension__",
1836 "__imag",
1837 "__imag__",
1838 "__inline",
1839 "__inline__",
1840 "__label__",
1841 "__null",
1842 "__real",
1843 "__real__",
1844 "__restrict",
1845 "__restrict__",
1846 "__signed",
1847 "__signed__",
1848 "__typeof",
1849 "__typeof__",
1850 "__volatile",
1851 "__volatile__",
1852 "and",
1853 "and_eq",
1854 "asm",
1855 "auto",
1856 "bitand",
1857 "bitor",
1858 "bool",
1859 "break",
1860 "case",
1861 "catch",
1862 "char",
1863 "class",
1864 "compl",
1865 "const",
1866 "const_cast",
1867 "continue",
1868 "default",
1869 "delete",
1870 "do",
1871 "double",
1872 "dynamic_cast",
1873 "else",
1874 "enum",
1875 "explicit",
1876 "export",
1877 "extern",
1878 "false",
1879 "float",
1880 "for",
1881 "friend",
1882 "goto",
1883 "if",
1884 "inline",
1885 "int",
1886 "long",
1887 "mutable",
1888 "namespace",
1889 "new",
1890 "not",
1891 "not_eq",
1892 "operator",
1893 "or",
1894 "or_eq",
1895 "private",
1896 "protected",
1897 "public",
1898 "register",
1899 "reinterpret_cast",
1900 "return",
1901 "short",
1902 "signed",
1903 "sizeof",
1904 "static",
1905 "static_cast",
1906 "struct",
1907 "switch",
1908 "template",
1909 "this",
1910 "throw",
1911 "true",
1912 "try",
1913 "typedef",
1914 "typeid",
1915 "typename",
1916 "typeof",
1917 "union",
1918 "unsigned",
1919 "using",
1920 "virtual",
1921 "void",
1922 "volatile",
1923 "wchar_t",
1924 "while",
1925 "xor",
1926 "xor_eq"
1929 /* Return true if NAME is a C++ keyword. */
1932 cxx_keyword_p (const char *name, int length)
1934 int last = ARRAY_SIZE (cxx_keywords);
1935 int first = 0;
1936 int mid = (last + first) / 2;
1937 int old = -1;
1939 for (mid = (last + first) / 2;
1940 mid != old;
1941 old = mid, mid = (last + first) / 2)
1943 int kwl = strlen (cxx_keywords[mid]);
1944 int min_length = kwl > length ? length : kwl;
1945 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1947 if (r == 0)
1949 int i;
1950 /* We've found a match if all the remaining characters are `$'. */
1951 for (i = min_length; i < length && name[i] == '$'; ++i)
1953 if (i == length)
1954 return 1;
1955 r = 1;
1958 if (r < 0)
1959 last = mid;
1960 else
1961 first = mid;
1963 return 0;
1965 #endif /* JC1_LITE */