* config/mips/mips.h (SUBTARGET_CPP_SIZE_SPEC): Remove duplicate
[official-gcc.git] / gcc / java / lex.c
blob17a7e1b62f0486c4eb933f8af6947915388ffd3a
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
37 #include "keyword.h"
38 #include "flags.h"
39 #include "chartables.h"
41 /* Function declaration */
42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
44 static void java_lex_error PARAMS ((const char *, int));
45 #ifndef JC1_LITE
46 static int java_is_eol PARAMS ((FILE *, int));
47 static tree build_wfl_node PARAMS ((tree));
48 #endif
49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50 static int java_parse_escape_sequence PARAMS ((void));
51 static int java_start_char_p PARAMS ((unicode_t));
52 static int java_part_char_p PARAMS ((unicode_t));
53 static int java_parse_doc_section PARAMS ((int));
54 static void java_parse_end_comment PARAMS ((int));
55 static int java_get_unicode PARAMS ((void));
56 static int java_read_unicode PARAMS ((java_lexer *, int *));
57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
58 int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static int java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
64 #ifndef JC1_LITE
65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
66 #endif
68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
69 #ifndef JC1_LITE
70 static void error_if_numeric_overflow PARAMS ((tree));
71 #endif
73 #ifdef HAVE_ICONV
74 /* This is nonzero if we have initialized `need_byteswap'. */
75 static int byteswap_init = 0;
77 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
78 big-endian order -- not native endian order. We handle this by
79 doing a conversion once at startup and seeing what happens. This
80 flag holds the results of this determination. */
81 static int need_byteswap = 0;
82 #endif
84 void
85 java_init_lex (finput, encoding)
86 FILE *finput;
87 const char *encoding;
89 #ifndef JC1_LITE
90 int java_lang_imported = 0;
92 if (!java_lang_id)
93 java_lang_id = get_identifier ("java.lang");
94 if (!java_lang_cloneable)
95 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
96 if (!java_io_serializable)
97 java_io_serializable = get_identifier ("java.io.Serializable");
98 if (!inst_id)
99 inst_id = get_identifier ("inst$");
100 if (!wpv_id)
101 wpv_id = get_identifier ("write_parm_value$");
103 if (!java_lang_imported)
105 tree node = build_tree_list
106 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
107 read_import_dir (TREE_PURPOSE (node));
108 TREE_CHAIN (node) = ctxp->import_demand_list;
109 ctxp->import_demand_list = node;
110 java_lang_imported = 1;
113 if (!wfl_operator)
114 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
115 if (!label_id)
116 label_id = get_identifier ("$L");
117 if (!wfl_append)
118 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
119 if (!wfl_string_buffer)
120 wfl_string_buffer =
121 build_expr_wfl (get_identifier (flag_emit_class_files
122 ? "java.lang.StringBuffer"
123 : "gnu.gcj.runtime.StringBuffer"),
124 NULL, 0, 0);
125 if (!wfl_to_string)
126 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
128 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
129 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
131 memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
132 memset ((PTR) current_jcf, 0, sizeof (JCF));
133 ctxp->current_parsed_class = NULL;
134 ctxp->package = NULL_TREE;
135 #endif
137 ctxp->filename = input_filename;
138 ctxp->lineno = lineno = 0;
139 ctxp->p_line = NULL;
140 ctxp->c_line = NULL;
141 ctxp->java_error_flag = 0;
142 ctxp->lexer = java_new_lexer (finput, encoding);
145 static char *
146 java_sprint_unicode (line, i)
147 struct java_line *line;
148 int i;
150 static char buffer [10];
151 if (line->unicode_escape_p [i] || line->line [i] > 128)
152 sprintf (buffer, "\\u%04x", line->line [i]);
153 else
155 buffer [0] = line->line [i];
156 buffer [1] = '\0';
158 return buffer;
161 static unicode_t
162 java_sneak_unicode ()
164 return (ctxp->c_line->line [ctxp->c_line->current]);
167 static void
168 java_unget_unicode ()
170 if (!ctxp->c_line->current)
171 /* Can't unget unicode. */
172 abort ();
174 ctxp->c_line->current--;
175 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
178 static void
179 java_allocate_new_line ()
181 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
182 char ahead_escape_p = (ctxp->c_line ?
183 ctxp->c_line->unicode_escape_ahead_p : 0);
185 if (ctxp->c_line && !ctxp->c_line->white_space_only)
187 if (ctxp->p_line)
189 free (ctxp->p_line->unicode_escape_p);
190 free (ctxp->p_line->line);
191 free (ctxp->p_line);
193 ctxp->p_line = ctxp->c_line;
194 ctxp->c_line = NULL; /* Reallocated */
197 if (!ctxp->c_line)
199 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
200 ctxp->c_line->max = JAVA_LINE_MAX;
201 ctxp->c_line->line = (unicode_t *)xmalloc
202 (sizeof (unicode_t)*ctxp->c_line->max);
203 ctxp->c_line->unicode_escape_p =
204 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
205 ctxp->c_line->white_space_only = 0;
208 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
209 ctxp->c_line->char_col = ctxp->c_line->current = 0;
210 if (ahead)
212 ctxp->c_line->line [ctxp->c_line->size] = ahead;
213 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
214 ctxp->c_line->size++;
216 ctxp->c_line->ahead [0] = 0;
217 ctxp->c_line->unicode_escape_ahead_p = 0;
218 ctxp->c_line->lineno = ++lineno;
219 ctxp->c_line->white_space_only = 1;
222 /* Create a new lexer object. */
224 java_lexer *
225 java_new_lexer (finput, encoding)
226 FILE *finput;
227 const char *encoding;
229 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
230 int enc_error = 0;
232 lex->finput = finput;
233 lex->bs_count = 0;
234 lex->unget_value = 0;
235 lex->hit_eof = 0;
237 #ifdef HAVE_ICONV
238 lex->handle = iconv_open ("UCS-2", encoding);
239 if (lex->handle != (iconv_t) -1)
241 lex->first = -1;
242 lex->last = -1;
243 lex->out_first = -1;
244 lex->out_last = -1;
245 lex->read_anything = 0;
246 lex->use_fallback = 0;
248 /* Work around broken iconv() implementations by doing checking at
249 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
250 then all UCS-2 encoders will be broken. Perhaps not a valid
251 assumption. */
252 if (! byteswap_init)
254 iconv_t handle;
256 byteswap_init = 1;
258 handle = iconv_open ("UCS-2", "UTF-8");
259 if (handle != (iconv_t) -1)
261 unicode_t result;
262 unsigned char in[3];
263 char *inp, *outp;
264 size_t inc, outc, r;
266 /* This is the UTF-8 encoding of \ufeff. */
267 in[0] = 0xef;
268 in[1] = 0xbb;
269 in[2] = 0xbf;
271 inp = in;
272 inc = 3;
273 outp = (char *) &result;
274 outc = 2;
276 r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
277 &outp, &outc);
278 iconv_close (handle);
279 /* Conversion must be complete for us to use the result. */
280 if (r != (size_t) -1 && inc == 0 && outc == 0)
281 need_byteswap = (result != 0xfeff);
285 lex->byte_swap = need_byteswap;
287 else
288 #endif /* HAVE_ICONV */
290 /* If iconv failed, use the internal decoder if the default
291 encoding was requested. This code is used on platforms where
292 iconv exists but is insufficient for our needs. For
293 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2. */
294 if (strcmp (encoding, DEFAULT_ENCODING))
295 enc_error = 1;
296 #ifdef HAVE_ICONV
297 else
298 lex->use_fallback = 1;
299 #endif /* HAVE_ICONV */
302 if (enc_error)
303 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
305 return lex;
308 void
309 java_destroy_lexer (lex)
310 java_lexer *lex;
312 #ifdef HAVE_ICONV
313 if (! lex->use_fallback)
314 iconv_close (lex->handle);
315 #endif
316 free (lex);
319 static int
320 java_read_char (lex)
321 java_lexer *lex;
323 if (lex->unget_value)
325 unicode_t r = lex->unget_value;
326 lex->unget_value = 0;
327 return r;
330 #ifdef HAVE_ICONV
331 if (! lex->use_fallback)
333 size_t ir, inbytesleft, in_save, out_count, out_save;
334 char *inp, *outp;
335 unicode_t result;
337 /* If there is data which has already been converted, use it. */
338 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
340 lex->out_first = 0;
341 lex->out_last = 0;
343 while (1)
345 /* See if we need to read more data. If FIRST == 0 then
346 the previous conversion attempt ended in the middle of
347 a character at the end of the buffer. Otherwise we
348 only have to read if the buffer is empty. */
349 if (lex->first == 0 || lex->first >= lex->last)
351 int r;
353 if (lex->first >= lex->last)
355 lex->first = 0;
356 lex->last = 0;
358 if (feof (lex->finput))
359 return UEOF;
360 r = fread (&lex->buffer[lex->last], 1,
361 sizeof (lex->buffer) - lex->last,
362 lex->finput);
363 lex->last += r;
366 inbytesleft = lex->last - lex->first;
367 out_count = sizeof (lex->out_buffer) - lex->out_last;
369 if (inbytesleft == 0)
371 /* We've tried to read and there is nothing left. */
372 return UEOF;
375 in_save = inbytesleft;
376 out_save = out_count;
377 inp = &lex->buffer[lex->first];
378 outp = &lex->out_buffer[lex->out_last];
379 ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
380 &inbytesleft, &outp, &out_count);
382 /* If we haven't read any bytes, then look to see if we
383 have read a BOM. */
384 if (! lex->read_anything && out_save - out_count >= 2)
386 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
387 if (uc == 0xfeff)
389 lex->byte_swap = 0;
390 lex->out_first += 2;
392 else if (uc == 0xfffe)
394 lex->byte_swap = 1;
395 lex->out_first += 2;
397 lex->read_anything = 1;
400 if (lex->byte_swap)
402 unsigned int i;
403 for (i = 0; i < out_save - out_count; i += 2)
405 char t = lex->out_buffer[lex->out_last + i];
406 lex->out_buffer[lex->out_last + i]
407 = lex->out_buffer[lex->out_last + i + 1];
408 lex->out_buffer[lex->out_last + i + 1] = t;
412 lex->first += in_save - inbytesleft;
413 lex->out_last += out_save - out_count;
415 /* If we converted anything at all, move along. */
416 if (out_count != out_save)
417 break;
419 if (ir == (size_t) -1)
421 if (errno == EINVAL)
423 /* This is ok. This means that the end of our buffer
424 is in the middle of a character sequence. We just
425 move the valid part of the buffer to the beginning
426 to force a read. */
427 memmove (&lex->buffer[0], &lex->buffer[lex->first],
428 lex->last - lex->first);
429 lex->last -= lex->first;
430 lex->first = 0;
432 else
434 /* A more serious error. */
435 java_lex_error ("unrecognized character in input stream",
437 return UEOF;
443 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
445 /* Don't have any data. */
446 return UEOF;
449 /* Success. */
450 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
451 lex->out_first += 2;
452 return result;
454 else
455 #endif /* HAVE_ICONV */
457 int c, c1, c2;
458 c = getc (lex->finput);
460 if (c == EOF)
461 return UEOF;
462 if (c < 128)
463 return (unicode_t) c;
464 else
466 if ((c & 0xe0) == 0xc0)
468 c1 = getc (lex->finput);
469 if ((c1 & 0xc0) == 0x80)
471 unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
472 /* Check for valid 2-byte characters. We explicitly
473 allow \0 because this encoding is common in the
474 Java world. */
475 if (r == 0 || (r >= 0x80 && r <= 0x7ff))
476 return r;
479 else if ((c & 0xf0) == 0xe0)
481 c1 = getc (lex->finput);
482 if ((c1 & 0xc0) == 0x80)
484 c2 = getc (lex->finput);
485 if ((c2 & 0xc0) == 0x80)
487 unicode_t r = (unicode_t)(((c & 0xf) << 12) +
488 (( c1 & 0x3f) << 6)
489 + (c2 & 0x3f));
490 /* Check for valid 3-byte characters.
491 Don't allow surrogate, \ufffe or \uffff. */
492 if (r >= 0x800 && r <= 0xffff
493 && ! (r >= 0xd800 && r <= 0xdfff)
494 && r != 0xfffe && r != 0xffff)
495 return r;
500 /* We simply don't support invalid characters. We also
501 don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
502 cannot be valid Java characters. */
503 java_lex_error ("malformed UTF-8 character", 0);
507 /* We only get here on error. */
508 return UEOF;
511 static void
512 java_store_unicode (l, c, unicode_escape_p)
513 struct java_line *l;
514 unicode_t c;
515 int unicode_escape_p;
517 if (l->size == l->max)
519 l->max += JAVA_LINE_MAX;
520 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
521 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
522 sizeof (char)*l->max);
524 l->line [l->size] = c;
525 l->unicode_escape_p [l->size++] = unicode_escape_p;
528 static int
529 java_read_unicode (lex, unicode_escape_p)
530 java_lexer *lex;
531 int *unicode_escape_p;
533 int c;
535 c = java_read_char (lex);
536 *unicode_escape_p = 0;
538 if (c != '\\')
540 lex->bs_count = 0;
541 return c;
544 ++lex->bs_count;
545 if ((lex->bs_count) % 2 == 1)
547 /* Odd number of \ seen. */
548 c = java_read_char (lex);
549 if (c == 'u')
551 unicode_t unicode = 0;
552 int shift = 12;
554 /* Recognize any number of `u's in \u. */
555 while ((c = java_read_char (lex)) == 'u')
558 /* Unget the most recent character as it is not a `u'. */
559 if (c == UEOF)
560 return UEOF;
561 lex->unget_value = c;
563 /* Next should be 4 hex digits, otherwise it's an error.
564 The hex value is converted into the unicode, pushed into
565 the Unicode stream. */
566 for (shift = 12; shift >= 0; shift -= 4)
568 if ((c = java_read_char (lex)) == UEOF)
569 return UEOF;
570 if (hex_p (c))
571 unicode |= (unicode_t)(hex_value (c) << shift);
572 else
573 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
575 lex->bs_count = 0;
576 *unicode_escape_p = 1;
577 return unicode;
579 lex->unget_value = c;
581 return (unicode_t) '\\';
584 static int
585 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
586 java_lexer *lex;
587 int *unicode_escape_p;
589 int c = java_read_unicode (lex, unicode_escape_p);
591 if (c == '\r')
593 /* We have to read ahead to see if we got \r\n. In that case we
594 return a single line terminator. */
595 int dummy;
596 c = java_read_unicode (lex, &dummy);
597 if (c != '\n')
598 lex->unget_value = c;
599 /* In either case we must return a newline. */
600 c = '\n';
603 return c;
606 static int
607 java_get_unicode ()
609 /* It's time to read a line when... */
610 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
612 int c;
613 int found_chars = 0;
615 if (ctxp->lexer->hit_eof)
616 return UEOF;
618 java_allocate_new_line ();
619 if (ctxp->c_line->line[0] != '\n')
621 for (;;)
623 int unicode_escape_p;
624 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
625 &unicode_escape_p);
626 if (c != UEOF)
628 found_chars = 1;
629 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
630 if (ctxp->c_line->white_space_only
631 && !JAVA_WHITE_SPACE_P (c)
632 && c != '\n')
633 ctxp->c_line->white_space_only = 0;
635 if ((c == '\n') || (c == UEOF))
636 break;
639 if (c == UEOF && ! found_chars)
641 ctxp->lexer->hit_eof = 1;
642 return UEOF;
646 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
647 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
648 return ctxp->c_line->line [ctxp->c_line->current++];
651 /* Parse the end of a C style comment.
652 * C is the first character following the '/' and '*'. */
653 static void
654 java_parse_end_comment (c)
655 int c;
657 for ( ;; c = java_get_unicode ())
659 switch (c)
661 case UEOF:
662 java_lex_error ("Comment not terminated at end of input", 0);
663 return;
664 case '*':
665 switch (c = java_get_unicode ())
667 case UEOF:
668 java_lex_error ("Comment not terminated at end of input", 0);
669 return;
670 case '/':
671 return;
672 case '*': /* reparse only '*' */
673 java_unget_unicode ();
679 /* Parse the documentation section. Keywords must be at the beginning
680 of a documentation comment line (ignoring white space and any `*'
681 character). Parsed keyword(s): @DEPRECATED. */
683 static int
684 java_parse_doc_section (c)
685 int c;
687 int valid_tag = 0, seen_star = 0;
689 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
691 switch (c)
693 case '*':
694 seen_star = 1;
695 break;
696 case '\n': /* ULT */
697 valid_tag = 1;
698 default:
699 seen_star = 0;
701 c = java_get_unicode();
704 if (c == UEOF)
705 java_lex_error ("Comment not terminated at end of input", 0);
707 if (seen_star && (c == '/'))
708 return 1; /* Goto step1 in caller */
710 /* We're parsing @deprecated */
711 if (valid_tag && (c == '@'))
713 char tag [11];
714 int tag_index = 0;
716 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
718 c = java_get_unicode ();
719 tag [tag_index++] = c;
722 if (c == UEOF)
723 java_lex_error ("Comment not terminated at end of input", 0);
724 tag [tag_index] = '\0';
726 if (!strcmp (tag, "deprecated"))
727 ctxp->deprecated = 1;
729 java_unget_unicode ();
730 return 0;
733 /* Return true if C is a valid start character for a Java identifier.
734 This is only called if C >= 128 -- smaller values are handled
735 inline. However, this function handles all values anyway. */
736 static int
737 java_start_char_p (c)
738 unicode_t c;
740 unsigned int hi = c / 256;
741 const char *const page = type_table[hi];
742 unsigned long val = (unsigned long) page;
743 int flags;
745 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
746 flags = page[c & 255];
747 else
748 flags = val;
750 return flags & LETTER_START;
753 /* Return true if C is a valid part character for a Java identifier.
754 This is only called if C >= 128 -- smaller values are handled
755 inline. However, this function handles all values anyway. */
756 static int
757 java_part_char_p (c)
758 unicode_t c;
760 unsigned int hi = c / 256;
761 const char *const page = type_table[hi];
762 unsigned long val = (unsigned long) page;
763 int flags;
765 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
766 flags = page[c & 255];
767 else
768 flags = val;
770 return flags & LETTER_PART;
773 static int
774 java_parse_escape_sequence ()
776 unicode_t char_lit;
777 int c;
779 switch (c = java_get_unicode ())
781 case 'b':
782 return (unicode_t)0x8;
783 case 't':
784 return (unicode_t)0x9;
785 case 'n':
786 return (unicode_t)0xa;
787 case 'f':
788 return (unicode_t)0xc;
789 case 'r':
790 return (unicode_t)0xd;
791 case '"':
792 return (unicode_t)0x22;
793 case '\'':
794 return (unicode_t)0x27;
795 case '\\':
796 return (unicode_t)0x5c;
797 case '0': case '1': case '2': case '3': case '4':
798 case '5': case '6': case '7':
800 int octal_escape[3];
801 int octal_escape_index = 0;
802 int max = 3;
803 int i, shift;
805 for (; octal_escape_index < max && RANGE (c, '0', '7');
806 c = java_get_unicode ())
808 if (octal_escape_index == 0 && c > '3')
810 /* According to the grammar, `\477' has a well-defined
811 meaning -- it is `\47' followed by `7'. */
812 --max;
814 octal_escape [octal_escape_index++] = c;
817 java_unget_unicode ();
819 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
820 i < octal_escape_index; i++, shift -= 3)
821 char_lit |= (octal_escape [i] - '0') << shift;
823 return char_lit;
825 default:
826 java_lex_error ("Invalid character in escape sequence", 0);
827 return JAVA_CHAR_ERROR;
831 /* Isolate the code which may raise an arithmetic exception in its
832 own function. */
834 #ifndef JC1_LITE
835 struct jpa_args
837 YYSTYPE *java_lval;
838 char *literal_token;
839 int fflag;
840 int number_beginning;
843 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
845 static void java_perform_atof PARAMS ((PTR));
847 static void
848 java_perform_atof (av)
849 PTR av;
851 struct jpa_args *a = (struct jpa_args *)av;
852 YYSTYPE *java_lval = a->java_lval;
853 int number_beginning = a->number_beginning;
854 REAL_VALUE_TYPE value;
855 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
857 SET_REAL_VALUE_ATOF (value,
858 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
860 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
862 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
863 value = DCONST0;
865 else if (IS_ZERO (value))
867 /* We check to see if the value is really 0 or if we've found an
868 underflow. We do this in the most primitive imaginable way. */
869 int really_zero = 1;
870 char *p = a->literal_token;
871 if (*p == '-')
872 ++p;
873 while (*p && *p != 'e' && *p != 'E')
875 if (*p != '0' && *p != '.')
877 really_zero = 0;
878 break;
880 ++p;
882 if (! really_zero)
884 int i = ctxp->c_line->current;
885 ctxp->c_line->current = number_beginning;
886 java_lex_error ("Floating point literal underflow", 0);
887 ctxp->c_line->current = i;
891 SET_LVAL_NODE_TYPE (build_real (type, value), type);
893 #endif
895 static int yylex PARAMS ((YYSTYPE *));
897 static int
898 #ifdef JC1_LITE
899 yylex (java_lval)
900 #else
901 java_lex (java_lval)
902 #endif
903 YYSTYPE *java_lval;
905 int c;
906 unicode_t first_unicode;
907 int ascii_index, all_ascii;
908 char *string;
910 /* Translation of the Unicode escape in the raw stream of Unicode
911 characters. Takes care of line terminator. */
912 step1:
913 /* Skip white spaces: SP, TAB and FF or ULT */
914 for (c = java_get_unicode ();
915 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
916 if (c == '\n')
918 ctxp->elc.line = ctxp->c_line->lineno;
919 ctxp->elc.col = ctxp->c_line->char_col-2;
922 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
924 if (c == 0x1a) /* CTRL-Z */
926 if ((c = java_get_unicode ()) == UEOF)
927 return 0; /* Ok here */
928 else
929 java_unget_unicode (); /* Caught later, at the end of the function */
931 /* Handle EOF here */
932 if (c == UEOF) /* Should probably do something here... */
933 return 0;
935 /* Take care of eventual comments. */
936 if (c == '/')
938 switch (c = java_get_unicode ())
940 case '/':
941 for (;;)
943 c = java_get_unicode ();
944 if (c == UEOF)
946 /* It is ok to end a `//' comment with EOF, unless
947 we're being pedantic. */
948 if (pedantic)
949 java_lex_error ("Comment not terminated at end of input",
951 return 0;
953 if (c == '\n') /* ULT */
954 goto step1;
956 break;
958 case '*':
959 if ((c = java_get_unicode ()) == '*')
961 if ((c = java_get_unicode ()) == '/')
962 goto step1; /* Empy documentation comment */
963 else if (java_parse_doc_section (c))
964 goto step1;
967 java_parse_end_comment ((c = java_get_unicode ()));
968 goto step1;
969 break;
970 default:
971 java_unget_unicode ();
972 c = '/';
973 break;
977 ctxp->elc.line = ctxp->c_line->lineno;
978 ctxp->elc.prev_col = ctxp->elc.col;
979 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
980 if (ctxp->elc.col < 0)
981 abort ();
983 /* Numeric literals */
984 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
986 /* This section of code is borrowed from gcc/c-lex.c */
987 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
988 int parts[TOTAL_PARTS];
989 HOST_WIDE_INT high, low;
990 /* End borrowed section */
991 char literal_token [256];
992 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
993 int found_hex_digits = 0;
994 int i;
995 #ifndef JC1_LITE
996 int number_beginning = ctxp->c_line->current;
997 tree value;
998 #endif
1000 /* We might have a . separator instead of a FP like .[0-9]* */
1001 if (c == '.')
1003 unicode_t peep = java_sneak_unicode ();
1005 if (!JAVA_ASCII_DIGIT (peep))
1007 JAVA_LEX_SEP('.');
1008 BUILD_OPERATOR (DOT_TK);
1012 for (i = 0; i < TOTAL_PARTS; i++)
1013 parts [i] = 0;
1015 if (c == '0')
1017 c = java_get_unicode ();
1018 if (c == 'x' || c == 'X')
1020 radix = 16;
1021 c = java_get_unicode ();
1023 else if (JAVA_ASCII_DIGIT (c))
1024 radix = 8;
1025 else if (c == '.')
1027 /* Push the '.' back and prepare for a FP parsing... */
1028 java_unget_unicode ();
1029 c = '0';
1031 else
1033 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
1034 JAVA_LEX_LIT ("0", 10);
1035 switch (c)
1037 case 'L': case 'l':
1038 SET_LVAL_NODE (long_zero_node);
1039 return (INT_LIT_TK);
1040 case 'f': case 'F':
1041 SET_LVAL_NODE (float_zero_node);
1042 return (FP_LIT_TK);
1043 case 'd': case 'D':
1044 SET_LVAL_NODE (double_zero_node);
1045 return (FP_LIT_TK);
1046 default:
1047 java_unget_unicode ();
1048 SET_LVAL_NODE (integer_zero_node);
1049 return (INT_LIT_TK);
1053 /* Parse the first part of the literal, until we find something
1054 which is not a number. */
1055 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
1056 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1057 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
1059 /* We store in a string (in case it turns out to be a FP) and in
1060 PARTS if we have to process a integer literal. */
1061 int numeric = hex_value (c);
1062 int count;
1064 /* Remember when we find a valid hexadecimal digit */
1065 if (radix == 16)
1066 found_hex_digits = 1;
1068 literal_token [literal_index++] = c;
1069 /* This section of code if borrowed from gcc/c-lex.c */
1070 for (count = 0; count < TOTAL_PARTS; count++)
1072 parts[count] *= radix;
1073 if (count)
1075 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1076 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1078 else
1079 parts[0] += numeric;
1081 if (parts [TOTAL_PARTS-1] != 0)
1082 overflow = 1;
1083 /* End borrowed section. */
1084 c = java_get_unicode ();
1087 /* If we have something from the FP char set but not a digit, parse
1088 a FP literal. */
1089 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1091 int stage = 0;
1092 int seen_digit = (literal_index ? 1 : 0);
1093 int seen_exponent = 0;
1094 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1095 double unless specified. */
1097 /* It is ok if the radix is 8 because this just means we've
1098 seen a leading `0'. However, radix==16 is invalid. */
1099 if (radix == 16)
1100 java_lex_error ("Can't express non-decimal FP literal", 0);
1101 radix = 10;
1103 for (;;)
1105 if (c == '.')
1107 if (stage < 1)
1109 stage = 1;
1110 literal_token [literal_index++ ] = c;
1111 c = java_get_unicode ();
1113 else
1114 java_lex_error ("Invalid character in FP literal", 0);
1117 if (c == 'e' || c == 'E')
1119 if (stage < 2)
1121 /* {E,e} must have seen at list a digit */
1122 if (!seen_digit)
1123 java_lex_error ("Invalid FP literal", 0);
1124 seen_digit = 0;
1125 seen_exponent = 1;
1126 stage = 2;
1127 literal_token [literal_index++] = c;
1128 c = java_get_unicode ();
1130 else
1131 java_lex_error ("Invalid character in FP literal", 0);
1133 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1135 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1136 stage = 4; /* So we fall through */
1139 if ((c=='-' || c =='+') && stage == 2)
1141 stage = 3;
1142 literal_token [literal_index++] = c;
1143 c = java_get_unicode ();
1146 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1147 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1148 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1149 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1151 if (JAVA_ASCII_DIGIT (c))
1152 seen_digit = 1;
1153 literal_token [literal_index++ ] = c;
1154 c = java_get_unicode ();
1156 else
1158 #ifndef JC1_LITE
1159 struct jpa_args a;
1160 #endif
1161 if (stage != 4) /* Don't push back fF/dD */
1162 java_unget_unicode ();
1164 /* An exponent (if any) must have seen a digit. */
1165 if (seen_exponent && !seen_digit)
1166 java_lex_error ("Invalid FP literal", 0);
1168 literal_token [literal_index] = '\0';
1169 JAVA_LEX_LIT (literal_token, radix);
1171 #ifndef JC1_LITE
1172 a.literal_token = literal_token;
1173 a.fflag = fflag;
1174 a.java_lval = java_lval;
1175 a.number_beginning = number_beginning;
1176 if (do_float_handler (java_perform_atof, (PTR) &a))
1177 return FP_LIT_TK;
1179 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1180 #else
1181 return FP_LIT_TK;
1182 #endif
1185 } /* JAVA_ASCCI_FPCHAR (c) */
1187 if (radix == 16 && ! found_hex_digits)
1188 java_lex_error
1189 ("0x must be followed by at least one hexadecimal digit", 0);
1191 /* Here we get back to converting the integral literal. */
1192 if (c == 'L' || c == 'l')
1193 long_suffix = 1;
1194 else if (radix == 16 && JAVA_ASCII_LETTER (c))
1195 java_lex_error ("Digit out of range in hexadecimal literal", 0);
1196 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
1197 java_lex_error ("Digit out of range in octal literal", 0);
1198 else if (radix == 16 && !literal_index)
1199 java_lex_error ("No digit specified for hexadecimal literal", 0);
1200 else
1201 java_unget_unicode ();
1203 #ifdef JAVA_LEX_DEBUG
1204 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1205 JAVA_LEX_LIT (literal_token, radix);
1206 #endif
1207 /* This section of code is borrowed from gcc/c-lex.c */
1208 if (!overflow)
1210 bytes = GET_TYPE_PRECISION (long_type_node);
1211 for (i = bytes; i < TOTAL_PARTS; i++)
1212 if (parts [i])
1214 overflow = 1;
1215 break;
1218 high = low = 0;
1219 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1221 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1222 / HOST_BITS_PER_CHAR)]
1223 << (i * HOST_BITS_PER_CHAR));
1224 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1226 /* End borrowed section. */
1228 /* Range checking */
1229 if (long_suffix)
1231 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1232 9223372036854775807L is the biggest `long' literal that can be
1233 expressed using a 10 radix. For other radixes, everything that
1234 fits withing 64 bits is OK. */
1235 int hb = (high >> 31);
1236 if (overflow || (hb && low && radix == 10)
1237 || (hb && high & 0x7fffffff && radix == 10))
1238 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1240 else
1242 /* 2147483648 is valid if operand of a '-'. Otherwise,
1243 2147483647 is the biggest `int' literal that can be
1244 expressed using a 10 radix. For other radixes, everything
1245 that fits within 32 bits is OK. As all literals are
1246 signed, we sign extend here. */
1247 int hb = (low >> 31) & 0x1;
1248 if (overflow || high || (hb && low & 0x7fffffff && radix == 10))
1249 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1250 high = -hb;
1252 #ifndef JC1_LITE
1253 value = build_int_2 (low, high);
1254 JAVA_RADIX10_FLAG (value) = radix == 10;
1255 SET_LVAL_NODE_TYPE (value, long_suffix ? long_type_node : int_type_node);
1256 #else
1257 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1258 long_suffix ? long_type_node : int_type_node);
1259 #endif
1260 return INT_LIT_TK;
1263 /* Character literals */
1264 if (c == '\'')
1266 int char_lit;
1267 if ((c = java_get_unicode ()) == '\\')
1268 char_lit = java_parse_escape_sequence ();
1269 else
1271 if (c == '\n' || c == '\'')
1272 java_lex_error ("Invalid character literal", 0);
1273 char_lit = c;
1276 c = java_get_unicode ();
1278 if ((c == '\n') || (c == UEOF))
1279 java_lex_error ("Character literal not terminated at end of line", 0);
1280 if (c != '\'')
1281 java_lex_error ("Syntax error in character literal", 0);
1283 if (char_lit == JAVA_CHAR_ERROR)
1284 char_lit = 0; /* We silently convert it to zero */
1286 JAVA_LEX_CHAR_LIT (char_lit);
1287 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1288 return CHAR_LIT_TK;
1291 /* String literals */
1292 if (c == '"')
1294 int no_error;
1295 char *string;
1297 for (no_error = 1, c = java_get_unicode ();
1298 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1300 if (c == '\\')
1301 c = java_parse_escape_sequence ();
1302 if (c == JAVA_CHAR_ERROR)
1304 no_error = 0;
1305 c = 0; /* We silently convert it to zero. */
1307 java_unicode_2_utf8 (c);
1309 if (c == '\n' || c == UEOF) /* ULT */
1311 lineno--; /* Refer to the line the terminator was seen */
1312 java_lex_error ("String not terminated at end of line", 0);
1313 lineno++;
1316 obstack_1grow (&temporary_obstack, '\0');
1317 string = obstack_finish (&temporary_obstack);
1318 #ifndef JC1_LITE
1319 if (!no_error || (c != '"'))
1320 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1321 else
1322 java_lval->node = build_string (strlen (string), string);
1323 #endif
1324 obstack_free (&temporary_obstack, string);
1325 return STRING_LIT_TK;
1328 /* Separator */
1329 switch (c)
1331 case '(':
1332 JAVA_LEX_SEP (c);
1333 BUILD_OPERATOR (OP_TK);
1334 case ')':
1335 JAVA_LEX_SEP (c);
1336 return CP_TK;
1337 case '{':
1338 JAVA_LEX_SEP (c);
1339 if (ctxp->ccb_indent == 1)
1340 ctxp->first_ccb_indent1 = lineno;
1341 ctxp->ccb_indent++;
1342 BUILD_OPERATOR (OCB_TK);
1343 case '}':
1344 JAVA_LEX_SEP (c);
1345 ctxp->ccb_indent--;
1346 if (ctxp->ccb_indent == 1)
1347 ctxp->last_ccb_indent1 = lineno;
1348 BUILD_OPERATOR (CCB_TK);
1349 case '[':
1350 JAVA_LEX_SEP (c);
1351 BUILD_OPERATOR (OSB_TK);
1352 case ']':
1353 JAVA_LEX_SEP (c);
1354 return CSB_TK;
1355 case ';':
1356 JAVA_LEX_SEP (c);
1357 return SC_TK;
1358 case ',':
1359 JAVA_LEX_SEP (c);
1360 return C_TK;
1361 case '.':
1362 JAVA_LEX_SEP (c);
1363 BUILD_OPERATOR (DOT_TK);
1364 /* return DOT_TK; */
1367 /* Operators */
1368 switch (c)
1370 case '=':
1371 if ((c = java_get_unicode ()) == '=')
1373 BUILD_OPERATOR (EQ_TK);
1375 else
1377 /* Equals is used in two different locations. In the
1378 variable_declarator: rule, it has to be seen as '=' as opposed
1379 to being seen as an ordinary assignment operator in
1380 assignment_operators: rule. */
1381 java_unget_unicode ();
1382 BUILD_OPERATOR (ASSIGN_TK);
1385 case '>':
1386 switch ((c = java_get_unicode ()))
1388 case '=':
1389 BUILD_OPERATOR (GTE_TK);
1390 case '>':
1391 switch ((c = java_get_unicode ()))
1393 case '>':
1394 if ((c = java_get_unicode ()) == '=')
1396 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1398 else
1400 java_unget_unicode ();
1401 BUILD_OPERATOR (ZRS_TK);
1403 case '=':
1404 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1405 default:
1406 java_unget_unicode ();
1407 BUILD_OPERATOR (SRS_TK);
1409 default:
1410 java_unget_unicode ();
1411 BUILD_OPERATOR (GT_TK);
1414 case '<':
1415 switch ((c = java_get_unicode ()))
1417 case '=':
1418 BUILD_OPERATOR (LTE_TK);
1419 case '<':
1420 if ((c = java_get_unicode ()) == '=')
1422 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1424 else
1426 java_unget_unicode ();
1427 BUILD_OPERATOR (LS_TK);
1429 default:
1430 java_unget_unicode ();
1431 BUILD_OPERATOR (LT_TK);
1434 case '&':
1435 switch ((c = java_get_unicode ()))
1437 case '&':
1438 BUILD_OPERATOR (BOOL_AND_TK);
1439 case '=':
1440 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1441 default:
1442 java_unget_unicode ();
1443 BUILD_OPERATOR (AND_TK);
1446 case '|':
1447 switch ((c = java_get_unicode ()))
1449 case '|':
1450 BUILD_OPERATOR (BOOL_OR_TK);
1451 case '=':
1452 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1453 default:
1454 java_unget_unicode ();
1455 BUILD_OPERATOR (OR_TK);
1458 case '+':
1459 switch ((c = java_get_unicode ()))
1461 case '+':
1462 BUILD_OPERATOR (INCR_TK);
1463 case '=':
1464 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1465 default:
1466 java_unget_unicode ();
1467 BUILD_OPERATOR (PLUS_TK);
1470 case '-':
1471 switch ((c = java_get_unicode ()))
1473 case '-':
1474 BUILD_OPERATOR (DECR_TK);
1475 case '=':
1476 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1477 default:
1478 java_unget_unicode ();
1479 BUILD_OPERATOR (MINUS_TK);
1482 case '*':
1483 if ((c = java_get_unicode ()) == '=')
1485 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1487 else
1489 java_unget_unicode ();
1490 BUILD_OPERATOR (MULT_TK);
1493 case '/':
1494 if ((c = java_get_unicode ()) == '=')
1496 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1498 else
1500 java_unget_unicode ();
1501 BUILD_OPERATOR (DIV_TK);
1504 case '^':
1505 if ((c = java_get_unicode ()) == '=')
1507 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1509 else
1511 java_unget_unicode ();
1512 BUILD_OPERATOR (XOR_TK);
1515 case '%':
1516 if ((c = java_get_unicode ()) == '=')
1518 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1520 else
1522 java_unget_unicode ();
1523 BUILD_OPERATOR (REM_TK);
1526 case '!':
1527 if ((c = java_get_unicode()) == '=')
1529 BUILD_OPERATOR (NEQ_TK);
1531 else
1533 java_unget_unicode ();
1534 BUILD_OPERATOR (NEG_TK);
1537 case '?':
1538 JAVA_LEX_OP ("?");
1539 BUILD_OPERATOR (REL_QM_TK);
1540 case ':':
1541 JAVA_LEX_OP (":");
1542 BUILD_OPERATOR (REL_CL_TK);
1543 case '~':
1544 BUILD_OPERATOR (NOT_TK);
1547 /* Keyword, boolean literal or null literal */
1548 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1549 JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1551 java_unicode_2_utf8 (c);
1552 if (all_ascii && c >= 128)
1553 all_ascii = 0;
1554 ascii_index++;
1557 obstack_1grow (&temporary_obstack, '\0');
1558 string = obstack_finish (&temporary_obstack);
1559 java_unget_unicode ();
1561 /* If we have something all ascii, we consider a keyword, a boolean
1562 literal, a null literal or an all ASCII identifier. Otherwise,
1563 this is an identifier (possibly not respecting formation rule). */
1564 if (all_ascii)
1566 const struct java_keyword *kw;
1567 if ((kw=java_keyword (string, ascii_index)))
1569 JAVA_LEX_KW (string);
1570 switch (kw->token)
1572 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1573 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1574 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1575 case PRIVATE_TK: case STRICT_TK:
1576 SET_MODIFIER_CTX (kw->token);
1577 return MODIFIER_TK;
1578 case FLOAT_TK:
1579 SET_LVAL_NODE (float_type_node);
1580 return FP_TK;
1581 case DOUBLE_TK:
1582 SET_LVAL_NODE (double_type_node);
1583 return FP_TK;
1584 case BOOLEAN_TK:
1585 SET_LVAL_NODE (boolean_type_node);
1586 return BOOLEAN_TK;
1587 case BYTE_TK:
1588 SET_LVAL_NODE (byte_type_node);
1589 return INTEGRAL_TK;
1590 case SHORT_TK:
1591 SET_LVAL_NODE (short_type_node);
1592 return INTEGRAL_TK;
1593 case INT_TK:
1594 SET_LVAL_NODE (int_type_node);
1595 return INTEGRAL_TK;
1596 case LONG_TK:
1597 SET_LVAL_NODE (long_type_node);
1598 return INTEGRAL_TK;
1599 case CHAR_TK:
1600 SET_LVAL_NODE (char_type_node);
1601 return INTEGRAL_TK;
1603 /* Keyword based literals */
1604 case TRUE_TK:
1605 case FALSE_TK:
1606 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1607 boolean_true_node : boolean_false_node));
1608 return BOOL_LIT_TK;
1609 case NULL_TK:
1610 SET_LVAL_NODE (null_pointer_node);
1611 return NULL_TK;
1613 /* Some keyword we want to retain information on the location
1614 they where found */
1615 case CASE_TK:
1616 case DEFAULT_TK:
1617 case SUPER_TK:
1618 case THIS_TK:
1619 case RETURN_TK:
1620 case BREAK_TK:
1621 case CONTINUE_TK:
1622 case TRY_TK:
1623 case CATCH_TK:
1624 case THROW_TK:
1625 case INSTANCEOF_TK:
1626 BUILD_OPERATOR (kw->token);
1628 default:
1629 return kw->token;
1634 /* We may have an ID here */
1635 if (JAVA_START_CHAR_P (first_unicode))
1637 JAVA_LEX_ID (string);
1638 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1639 return ID_TK;
1642 /* Everything else is an invalid character in the input */
1644 char lex_error_buffer [128];
1645 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1646 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1647 java_lex_error (lex_error_buffer, 1);
1649 return 0;
1652 #ifndef JC1_LITE
1653 /* This is called by the parser to see if an error should be generated
1654 due to numeric overflow. This function only handles the particular
1655 case of the largest negative value, and is only called in the case
1656 where this value is not preceded by `-'. */
1657 static void
1658 error_if_numeric_overflow (value)
1659 tree value;
1661 if (TREE_CODE (value) == INTEGER_CST && JAVA_RADIX10_FLAG (value))
1663 unsigned HOST_WIDE_INT lo, hi;
1665 lo = TREE_INT_CST_LOW (value);
1666 hi = TREE_INT_CST_HIGH (value);
1667 if (TREE_TYPE (value) == long_type_node)
1669 int hb = (hi >> 31);
1670 if (hb && !(hi & 0x7fffffff))
1671 java_lex_error ("Numeric overflow for `long' literal", 0);
1673 else
1675 int hb = (lo >> 31) & 0x1;
1676 if (hb && !(lo & 0x7fffffff))
1677 java_lex_error ("Numeric overflow for `int' literal", 0);
1681 #endif /* JC1_LITE */
1683 static void
1684 java_unicode_2_utf8 (unicode)
1685 unicode_t unicode;
1687 if (RANGE (unicode, 0x01, 0x7f))
1688 obstack_1grow (&temporary_obstack, (char)unicode);
1689 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1691 obstack_1grow (&temporary_obstack,
1692 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1693 obstack_1grow (&temporary_obstack,
1694 (unsigned char)(0x80 | (unicode & 0x3f)));
1696 else /* Range 0x800-0xffff */
1698 obstack_1grow (&temporary_obstack,
1699 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1700 obstack_1grow (&temporary_obstack,
1701 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1702 obstack_1grow (&temporary_obstack,
1703 (unsigned char)(0x80 | (unicode & 0x003f)));
1707 #ifndef JC1_LITE
1708 static tree
1709 build_wfl_node (node)
1710 tree node;
1712 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1713 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1714 TREE_TYPE (node) = NULL_TREE;
1715 return node;
1717 #endif
1719 static void
1720 java_lex_error (msg, forward)
1721 const char *msg ATTRIBUTE_UNUSED;
1722 int forward ATTRIBUTE_UNUSED;
1724 #ifndef JC1_LITE
1725 ctxp->elc.line = ctxp->c_line->lineno;
1726 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1728 /* Might be caught in the middle of some error report */
1729 ctxp->java_error_flag = 0;
1730 java_error (NULL);
1731 java_error (msg);
1732 #endif
1735 #ifndef JC1_LITE
1736 static int
1737 java_is_eol (fp, c)
1738 FILE *fp;
1739 int c;
1741 int next;
1742 switch (c)
1744 case '\r':
1745 next = getc (fp);
1746 if (next != '\n' && next != EOF)
1747 ungetc (next, fp);
1748 return 1;
1749 case '\n':
1750 return 1;
1751 default:
1752 return 0;
1755 #endif
1757 char *
1758 java_get_line_col (filename, line, col)
1759 const char *filename ATTRIBUTE_UNUSED;
1760 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1762 #ifdef JC1_LITE
1763 return 0;
1764 #else
1765 /* Dumb implementation. Doesn't try to cache or optimize things. */
1766 /* First line of the file is line 1, first column is 1 */
1768 /* COL == -1 means, at the CR/LF in LINE */
1769 /* COL == -2 means, at the first non space char in LINE */
1771 FILE *fp;
1772 int c, ccol, cline = 1;
1773 int current_line_col = 0;
1774 int first_non_space = 0;
1775 char *base;
1777 if (!(fp = fopen (filename, "r")))
1778 fatal_io_error ("can't open %s", filename);
1780 while (cline != line)
1782 c = getc (fp);
1783 if (c == EOF)
1785 static const char msg[] = "<<file too short - unexpected EOF>>";
1786 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1787 goto have_line;
1789 if (java_is_eol (fp, c))
1790 cline++;
1793 /* Gather the chars of the current line in a buffer */
1794 for (;;)
1796 c = getc (fp);
1797 if (c < 0 || java_is_eol (fp, c))
1798 break;
1799 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1800 first_non_space = current_line_col;
1801 obstack_1grow (&temporary_obstack, c);
1802 current_line_col++;
1804 have_line:
1806 obstack_1grow (&temporary_obstack, '\n');
1808 if (col == -1)
1810 col = current_line_col;
1811 first_non_space = 0;
1813 else if (col == -2)
1814 col = first_non_space;
1815 else
1816 first_non_space = 0;
1818 /* Place the '^' a the right position */
1819 base = obstack_base (&temporary_obstack);
1820 for (ccol = 1; ccol <= col+3; ccol++)
1822 /* Compute \t when reaching first_non_space */
1823 char c = (first_non_space ?
1824 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1825 obstack_1grow (&temporary_obstack, c);
1827 obstack_grow0 (&temporary_obstack, "^", 1);
1829 fclose (fp);
1830 return obstack_finish (&temporary_obstack);
1831 #endif
1834 #ifndef JC1_LITE
1835 static int
1836 utf8_cmp (str, length, name)
1837 const unsigned char *str;
1838 int length;
1839 const char *name;
1841 const unsigned char *limit = str + length;
1842 int i;
1844 for (i = 0; name[i]; ++i)
1846 int ch = UTF8_GET (str, limit);
1847 if (ch != name[i])
1848 return ch - name[i];
1851 return str == limit ? 0 : 1;
1854 /* A sorted list of all C++ keywords. */
1856 static const char *const cxx_keywords[] =
1858 "_Complex",
1859 "__alignof",
1860 "__alignof__",
1861 "__asm",
1862 "__asm__",
1863 "__attribute",
1864 "__attribute__",
1865 "__builtin_va_arg",
1866 "__complex",
1867 "__complex__",
1868 "__const",
1869 "__const__",
1870 "__extension__",
1871 "__imag",
1872 "__imag__",
1873 "__inline",
1874 "__inline__",
1875 "__label__",
1876 "__null",
1877 "__real",
1878 "__real__",
1879 "__restrict",
1880 "__restrict__",
1881 "__signed",
1882 "__signed__",
1883 "__typeof",
1884 "__typeof__",
1885 "__volatile",
1886 "__volatile__",
1887 "and",
1888 "and_eq",
1889 "asm",
1890 "auto",
1891 "bitand",
1892 "bitor",
1893 "bool",
1894 "break",
1895 "case",
1896 "catch",
1897 "char",
1898 "class",
1899 "compl",
1900 "const",
1901 "const_cast",
1902 "continue",
1903 "default",
1904 "delete",
1905 "do",
1906 "double",
1907 "dynamic_cast",
1908 "else",
1909 "enum",
1910 "explicit",
1911 "export",
1912 "extern",
1913 "false",
1914 "float",
1915 "for",
1916 "friend",
1917 "goto",
1918 "if",
1919 "inline",
1920 "int",
1921 "long",
1922 "mutable",
1923 "namespace",
1924 "new",
1925 "not",
1926 "not_eq",
1927 "operator",
1928 "or",
1929 "or_eq",
1930 "private",
1931 "protected",
1932 "public",
1933 "register",
1934 "reinterpret_cast",
1935 "return",
1936 "short",
1937 "signed",
1938 "sizeof",
1939 "static",
1940 "static_cast",
1941 "struct",
1942 "switch",
1943 "template",
1944 "this",
1945 "throw",
1946 "true",
1947 "try",
1948 "typedef",
1949 "typeid",
1950 "typename",
1951 "typeof",
1952 "union",
1953 "unsigned",
1954 "using",
1955 "virtual",
1956 "void",
1957 "volatile",
1958 "wchar_t",
1959 "while",
1960 "xor",
1961 "xor_eq"
1964 /* Return true if NAME is a C++ keyword. */
1967 cxx_keyword_p (name, length)
1968 const char *name;
1969 int length;
1971 int last = ARRAY_SIZE (cxx_keywords);
1972 int first = 0;
1973 int mid = (last + first) / 2;
1974 int old = -1;
1976 for (mid = (last + first) / 2;
1977 mid != old;
1978 old = mid, mid = (last + first) / 2)
1980 int kwl = strlen (cxx_keywords[mid]);
1981 int min_length = kwl > length ? length : kwl;
1982 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1984 if (r == 0)
1986 int i;
1987 /* We've found a match if all the remaining characters are
1988 `$'. */
1989 for (i = min_length; i < length && name[i] == '$'; ++i)
1991 if (i == length)
1992 return 1;
1993 r = 1;
1996 if (r < 0)
1997 last = mid;
1998 else
1999 first = mid;
2001 return 0;
2003 #endif /* JC1_LITE */