Fix incorrect note handling.
[official-gcc.git] / gcc / java / lex.c
blob28a73e3874b4d515920aefb2b9ba6ab6857f95c6
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded
28 characters and returns a token for everything found but comments,
29 white spaces and line terminators. When necessary, it also fills
30 the java_lval (yylval) union. It's implemented to be called by a
31 re-entrant parser generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
37 #include "keyword.h"
38 #include "flags.h"
39 #include "chartables.h"
41 /* Function declaration */
42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
44 static void java_lex_error PARAMS ((const char *, int));
45 #ifndef JC1_LITE
46 static int java_is_eol PARAMS ((FILE *, int));
47 static tree build_wfl_node PARAMS ((tree));
48 #endif
49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
50 static int java_parse_escape_sequence PARAMS ((void));
51 static int java_start_char_p PARAMS ((unicode_t));
52 static int java_part_char_p PARAMS ((unicode_t));
53 static int java_parse_doc_section PARAMS ((int));
54 static void java_parse_end_comment PARAMS ((int));
55 static int java_get_unicode PARAMS ((void));
56 static int java_read_unicode PARAMS ((java_lexer *, int *));
57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
58 int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static int java_read_char PARAMS ((java_lexer *));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
64 #ifndef JC1_LITE
65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
66 #endif
68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
70 #ifdef HAVE_ICONV
71 /* This is nonzero if we have initialized `need_byteswap'. */
72 static int byteswap_init = 0;
74 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
75 big-endian order -- not native endian order. We handle this by
76 doing a conversion once at startup and seeing what happens. This
77 flag holds the results of this determination. */
78 static int need_byteswap = 0;
79 #endif
81 void
82 java_init_lex (finput, encoding)
83 FILE *finput;
84 const char *encoding;
86 #ifndef JC1_LITE
87 int java_lang_imported = 0;
89 if (!java_lang_id)
90 java_lang_id = get_identifier ("java.lang");
91 if (!java_lang_cloneable)
92 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
93 if (!java_io_serializable)
94 java_io_serializable = get_identifier ("java.io.Serializable");
95 if (!inst_id)
96 inst_id = get_identifier ("inst$");
97 if (!wpv_id)
98 wpv_id = get_identifier ("write_parm_value$");
100 if (!java_lang_imported)
102 tree node = build_tree_list
103 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
104 read_import_dir (TREE_PURPOSE (node));
105 TREE_CHAIN (node) = ctxp->import_demand_list;
106 ctxp->import_demand_list = node;
107 java_lang_imported = 1;
110 if (!wfl_operator)
111 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
112 if (!label_id)
113 label_id = get_identifier ("$L");
114 if (!wfl_append)
115 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
116 if (!wfl_string_buffer)
117 wfl_string_buffer =
118 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
119 if (!wfl_to_string)
120 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
122 CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
123 CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
125 memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
126 memset ((PTR) current_jcf, 0, sizeof (JCF));
127 ctxp->current_parsed_class = NULL;
128 ctxp->package = NULL_TREE;
129 #endif
131 ctxp->filename = input_filename;
132 ctxp->lineno = lineno = 0;
133 ctxp->p_line = NULL;
134 ctxp->c_line = NULL;
135 ctxp->minus_seen = 0;
136 ctxp->java_error_flag = 0;
137 ctxp->lexer = java_new_lexer (finput, encoding);
140 static char *
141 java_sprint_unicode (line, i)
142 struct java_line *line;
143 int i;
145 static char buffer [10];
146 if (line->unicode_escape_p [i] || line->line [i] > 128)
147 sprintf (buffer, "\\u%04x", line->line [i]);
148 else
150 buffer [0] = line->line [i];
151 buffer [1] = '\0';
153 return buffer;
156 static unicode_t
157 java_sneak_unicode ()
159 return (ctxp->c_line->line [ctxp->c_line->current]);
162 static void
163 java_unget_unicode ()
165 if (!ctxp->c_line->current)
166 /* Can't unget unicode. */
167 abort ();
169 ctxp->c_line->current--;
170 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
173 static void
174 java_allocate_new_line ()
176 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
177 char ahead_escape_p = (ctxp->c_line ?
178 ctxp->c_line->unicode_escape_ahead_p : 0);
180 if (ctxp->c_line && !ctxp->c_line->white_space_only)
182 if (ctxp->p_line)
184 free (ctxp->p_line->unicode_escape_p);
185 free (ctxp->p_line->line);
186 free (ctxp->p_line);
188 ctxp->p_line = ctxp->c_line;
189 ctxp->c_line = NULL; /* Reallocated */
192 if (!ctxp->c_line)
194 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
195 ctxp->c_line->max = JAVA_LINE_MAX;
196 ctxp->c_line->line = (unicode_t *)xmalloc
197 (sizeof (unicode_t)*ctxp->c_line->max);
198 ctxp->c_line->unicode_escape_p =
199 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
200 ctxp->c_line->white_space_only = 0;
203 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
204 ctxp->c_line->char_col = ctxp->c_line->current = 0;
205 if (ahead)
207 ctxp->c_line->line [ctxp->c_line->size] = ahead;
208 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
209 ctxp->c_line->size++;
211 ctxp->c_line->ahead [0] = 0;
212 ctxp->c_line->unicode_escape_ahead_p = 0;
213 ctxp->c_line->lineno = ++lineno;
214 ctxp->c_line->white_space_only = 1;
217 /* Create a new lexer object. */
219 java_lexer *
220 java_new_lexer (finput, encoding)
221 FILE *finput;
222 const char *encoding;
224 java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
225 int enc_error = 0;
227 lex->finput = finput;
228 lex->bs_count = 0;
229 lex->unget_value = 0;
230 lex->hit_eof = 0;
232 #ifdef HAVE_ICONV
233 lex->handle = iconv_open ("UCS-2", encoding);
234 if (lex->handle != (iconv_t) -1)
236 lex->first = -1;
237 lex->last = -1;
238 lex->out_first = -1;
239 lex->out_last = -1;
240 lex->read_anything = 0;
241 lex->use_fallback = 0;
243 /* Work around broken iconv() implementations by doing checking at
244 runtime. We assume that if the UTF-8 => UCS-2 encoder is broken,
245 then all UCS-2 encoders will be broken. Perhaps not a valid
246 assumption. */
247 if (! byteswap_init)
249 iconv_t handle;
251 byteswap_init = 1;
253 handle = iconv_open ("UCS-2", "UTF-8");
254 if (handle != (iconv_t) -1)
256 unicode_t result;
257 unsigned char in[3];
258 char *inp, *outp;
259 size_t inc, outc, r;
261 /* This is the UTF-8 encoding of \ufeff. */
262 in[0] = 0xef;
263 in[1] = 0xbb;
264 in[2] = 0xbf;
266 inp = in;
267 inc = 3;
268 outp = (char *) &result;
269 outc = 2;
271 r = iconv (handle, (const char **) &inp, &inc, &outp, &outc);
272 iconv_close (handle);
273 /* Conversion must be complete for us to use the result. */
274 if (r != (size_t) -1 && inc == 0 && outc == 0)
275 need_byteswap = (result != 0xfeff);
279 lex->byte_swap = need_byteswap;
281 else
282 #endif /* HAVE_ICONV */
284 /* If iconv failed, use the internal decoder if the default
285 encoding was requested. This code is used on platforms where
286 iconv exists but is insufficient for our needs. For
287 instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2. */
288 if (strcmp (encoding, DEFAULT_ENCODING))
289 enc_error = 1;
290 #ifdef HAVE_ICONV
291 else
292 lex->use_fallback = 1;
293 #endif /* HAVE_ICONV */
296 if (enc_error)
297 fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation. If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option.", encoding);
299 return lex;
302 void
303 java_destroy_lexer (lex)
304 java_lexer *lex;
306 #ifdef HAVE_ICONV
307 if (! lex->use_fallback)
308 iconv_close (lex->handle);
309 #endif
310 free (lex);
313 static int
314 java_read_char (lex)
315 java_lexer *lex;
317 if (lex->unget_value)
319 unicode_t r = lex->unget_value;
320 lex->unget_value = 0;
321 return r;
324 #ifdef HAVE_ICONV
325 if (! lex->use_fallback)
327 size_t ir, inbytesleft, in_save, out_count, out_save;
328 char *inp, *outp;
329 unicode_t result;
331 /* If there is data which has already been converted, use it. */
332 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
334 lex->out_first = 0;
335 lex->out_last = 0;
337 while (1)
339 /* See if we need to read more data. If FIRST == 0 then
340 the previous conversion attempt ended in the middle of
341 a character at the end of the buffer. Otherwise we
342 only have to read if the buffer is empty. */
343 if (lex->first == 0 || lex->first >= lex->last)
345 int r;
347 if (lex->first >= lex->last)
349 lex->first = 0;
350 lex->last = 0;
352 if (feof (lex->finput))
353 return UEOF;
354 r = fread (&lex->buffer[lex->last], 1,
355 sizeof (lex->buffer) - lex->last,
356 lex->finput);
357 lex->last += r;
360 inbytesleft = lex->last - lex->first;
361 out_count = sizeof (lex->out_buffer) - lex->out_last;
363 if (inbytesleft == 0)
365 /* We've tried to read and there is nothing left. */
366 return UEOF;
369 in_save = inbytesleft;
370 out_save = out_count;
371 inp = &lex->buffer[lex->first];
372 outp = &lex->out_buffer[lex->out_last];
373 ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
374 &outp, &out_count);
376 /* If we haven't read any bytes, then look to see if we
377 have read a BOM. */
378 if (! lex->read_anything && out_save - out_count >= 2)
380 unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
381 if (uc == 0xfeff)
383 lex->byte_swap = 0;
384 lex->out_first += 2;
386 else if (uc == 0xfffe)
388 lex->byte_swap = 1;
389 lex->out_first += 2;
391 lex->read_anything = 1;
394 if (lex->byte_swap)
396 unsigned int i;
397 for (i = 0; i < out_save - out_count; i += 2)
399 char t = lex->out_buffer[lex->out_last + i];
400 lex->out_buffer[lex->out_last + i]
401 = lex->out_buffer[lex->out_last + i + 1];
402 lex->out_buffer[lex->out_last + i + 1] = t;
406 lex->first += in_save - inbytesleft;
407 lex->out_last += out_save - out_count;
409 /* If we converted anything at all, move along. */
410 if (out_count != out_save)
411 break;
413 if (ir == (size_t) -1)
415 if (errno == EINVAL)
417 /* This is ok. This means that the end of our buffer
418 is in the middle of a character sequence. We just
419 move the valid part of the buffer to the beginning
420 to force a read. */
421 memmove (&lex->buffer[0], &lex->buffer[lex->first],
422 lex->last - lex->first);
423 lex->last -= lex->first;
424 lex->first = 0;
426 else
428 /* A more serious error. */
429 java_lex_error ("unrecognized character in input stream",
431 return UEOF;
437 if (lex->out_first == -1 || lex->out_first >= lex->out_last)
439 /* Don't have any data. */
440 return UEOF;
443 /* Success. */
444 result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
445 lex->out_first += 2;
446 return result;
448 else
449 #endif /* HAVE_ICONV */
451 int c, c1, c2;
452 c = getc (lex->finput);
454 if (c == EOF)
455 return UEOF;
456 if (c < 128)
457 return (unicode_t)c;
458 else
460 if ((c & 0xe0) == 0xc0)
462 c1 = getc (lex->finput);
463 if ((c1 & 0xc0) == 0x80)
464 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
465 c = c1;
467 else if ((c & 0xf0) == 0xe0)
469 c1 = getc (lex->finput);
470 if ((c1 & 0xc0) == 0x80)
472 c2 = getc (lex->finput);
473 if ((c2 & 0xc0) == 0x80)
474 return (unicode_t)(((c & 0xf) << 12) +
475 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
476 else
477 c = c2;
479 else
480 c = c1;
483 /* We simply don't support invalid characters. */
484 java_lex_error ("malformed UTF-8 character", 0);
488 /* We only get here on error. */
489 return UEOF;
492 static void
493 java_store_unicode (l, c, unicode_escape_p)
494 struct java_line *l;
495 unicode_t c;
496 int unicode_escape_p;
498 if (l->size == l->max)
500 l->max += JAVA_LINE_MAX;
501 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
502 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
503 sizeof (char)*l->max);
505 l->line [l->size] = c;
506 l->unicode_escape_p [l->size++] = unicode_escape_p;
509 static int
510 java_read_unicode (lex, unicode_escape_p)
511 java_lexer *lex;
512 int *unicode_escape_p;
514 int c;
516 c = java_read_char (lex);
517 *unicode_escape_p = 0;
519 if (c != '\\')
521 lex->bs_count = 0;
522 return c;
525 ++lex->bs_count;
526 if ((lex->bs_count) % 2 == 1)
528 /* Odd number of \ seen. */
529 c = java_read_char (lex);
530 if (c == 'u')
532 unicode_t unicode = 0;
533 int shift = 12;
535 /* Recognize any number of `u's in \u. */
536 while ((c = java_read_char (lex)) == 'u')
539 /* Unget the most recent character as it is not a `u'. */
540 if (c == UEOF)
541 return UEOF;
542 lex->unget_value = c;
544 /* Next should be 4 hex digits, otherwise it's an error.
545 The hex value is converted into the unicode, pushed into
546 the Unicode stream. */
547 for (shift = 12; shift >= 0; shift -= 4)
549 if ((c = java_read_char (lex)) == UEOF)
550 return UEOF;
551 if (c >= '0' && c <= '9')
552 unicode |= (unicode_t)((c-'0') << shift);
553 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
554 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
555 else
556 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
558 lex->bs_count = 0;
559 *unicode_escape_p = 1;
560 return unicode;
562 lex->unget_value = c;
564 return (unicode_t) '\\';
567 static int
568 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
569 java_lexer *lex;
570 int *unicode_escape_p;
572 int c = java_read_unicode (lex, unicode_escape_p);
574 if (c == '\r')
576 /* We have to read ahead to see if we got \r\n. In that case we
577 return a single line terminator. */
578 int dummy;
579 c = java_read_unicode (lex, &dummy);
580 if (c != '\n')
581 lex->unget_value = c;
582 /* In either case we must return a newline. */
583 c = '\n';
586 return c;
589 static int
590 java_get_unicode ()
592 /* It's time to read a line when... */
593 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
595 int c;
596 int found_chars = 0;
598 if (ctxp->lexer->hit_eof)
599 return UEOF;
601 java_allocate_new_line ();
602 if (ctxp->c_line->line[0] != '\n')
604 for (;;)
606 int unicode_escape_p;
607 c = java_read_unicode_collapsing_terminators (ctxp->lexer,
608 &unicode_escape_p);
609 if (c != UEOF)
611 found_chars = 1;
612 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
613 if (ctxp->c_line->white_space_only
614 && !JAVA_WHITE_SPACE_P (c)
615 && c != '\n')
616 ctxp->c_line->white_space_only = 0;
618 if ((c == '\n') || (c == UEOF))
619 break;
622 if (c == UEOF && ! found_chars)
624 ctxp->lexer->hit_eof = 1;
625 return UEOF;
629 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
630 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
631 return ctxp->c_line->line [ctxp->c_line->current++];
634 /* Parse the end of a C style comment.
635 * C is the first character following the '/' and '*'. */
636 static void
637 java_parse_end_comment (c)
638 int c;
640 for ( ;; c = java_get_unicode ())
642 switch (c)
644 case UEOF:
645 java_lex_error ("Comment not terminated at end of input", 0);
646 return;
647 case '*':
648 switch (c = java_get_unicode ())
650 case UEOF:
651 java_lex_error ("Comment not terminated at end of input", 0);
652 return;
653 case '/':
654 return;
655 case '*': /* reparse only '*' */
656 java_unget_unicode ();
662 /* Parse the documentation section. Keywords must be at the beginning
663 of a documentation comment line (ignoring white space and any `*'
664 character). Parsed keyword(s): @DEPRECATED. */
666 static int
667 java_parse_doc_section (c)
668 int c;
670 int valid_tag = 0, seen_star = 0;
672 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
674 switch (c)
676 case '*':
677 seen_star = 1;
678 break;
679 case '\n': /* ULT */
680 valid_tag = 1;
681 default:
682 seen_star = 0;
684 c = java_get_unicode();
687 if (c == UEOF)
688 java_lex_error ("Comment not terminated at end of input", 0);
690 if (seen_star && (c == '/'))
691 return 1; /* Goto step1 in caller */
693 /* We're parsing @deprecated */
694 if (valid_tag && (c == '@'))
696 char tag [11];
697 int tag_index = 0;
699 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
701 c = java_get_unicode ();
702 tag [tag_index++] = c;
705 if (c == UEOF)
706 java_lex_error ("Comment not terminated at end of input", 0);
707 tag [tag_index] = '\0';
709 if (!strcmp (tag, "deprecated"))
710 ctxp->deprecated = 1;
712 java_unget_unicode ();
713 return 0;
716 /* Return true if C is a valid start character for a Java identifier.
717 This is only called if C >= 128 -- smaller values are handled
718 inline. However, this function handles all values anyway. */
719 static int
720 java_start_char_p (c)
721 unicode_t c;
723 unsigned int hi = c / 256;
724 char *page = type_table[hi];
725 unsigned long val = (unsigned long) page;
726 int flags;
728 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
729 flags = page[c & 255];
730 else
731 flags = val;
733 return flags & LETTER_START;
736 /* Return true if C is a valid part character for a Java identifier.
737 This is only called if C >= 128 -- smaller values are handled
738 inline. However, this function handles all values anyway. */
739 static int
740 java_part_char_p (c)
741 unicode_t c;
743 unsigned int hi = c / 256;
744 char *page = type_table[hi];
745 unsigned long val = (unsigned long) page;
746 int flags;
748 if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
749 flags = page[c & 255];
750 else
751 flags = val;
753 return flags & LETTER_PART;
756 static int
757 java_parse_escape_sequence ()
759 unicode_t char_lit;
760 int c;
762 switch (c = java_get_unicode ())
764 case 'b':
765 return (unicode_t)0x8;
766 case 't':
767 return (unicode_t)0x9;
768 case 'n':
769 return (unicode_t)0xa;
770 case 'f':
771 return (unicode_t)0xc;
772 case 'r':
773 return (unicode_t)0xd;
774 case '"':
775 return (unicode_t)0x22;
776 case '\'':
777 return (unicode_t)0x27;
778 case '\\':
779 return (unicode_t)0x5c;
780 case '0': case '1': case '2': case '3': case '4':
781 case '5': case '6': case '7':
783 int octal_escape[3];
784 int octal_escape_index = 0;
785 int max = 3;
786 int i, shift;
788 for (; octal_escape_index < max && RANGE (c, '0', '7');
789 c = java_get_unicode ())
791 if (octal_escape_index == 0 && c > '3')
793 /* According to the grammar, `\477' has a well-defined
794 meaning -- it is `\47' followed by `7'. */
795 --max;
797 octal_escape [octal_escape_index++] = c;
800 java_unget_unicode ();
802 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
803 i < octal_escape_index; i++, shift -= 3)
804 char_lit |= (octal_escape [i] - '0') << shift;
806 return char_lit;
808 default:
809 java_lex_error ("Invalid character in escape sequence", 0);
810 return JAVA_CHAR_ERROR;
814 /* Isolate the code which may raise an arithmetic exception in its
815 own function. */
817 #ifndef JC1_LITE
818 struct jpa_args
820 YYSTYPE *java_lval;
821 char *literal_token;
822 int fflag;
823 int number_beginning;
826 #ifdef REAL_ARITHMETIC
827 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
828 #else
829 #define IS_ZERO(X) ((X) == 0)
830 #endif
832 static void java_perform_atof PARAMS ((PTR));
834 static void
835 java_perform_atof (av)
836 PTR av;
838 struct jpa_args *a = (struct jpa_args *)av;
839 YYSTYPE *java_lval = a->java_lval;
840 int number_beginning = a->number_beginning;
841 REAL_VALUE_TYPE value;
842 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
844 SET_REAL_VALUE_ATOF (value,
845 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
847 if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
849 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
850 value = DCONST0;
852 else if (IS_ZERO (value))
854 /* We check to see if the value is really 0 or if we've found an
855 underflow. We do this in the most primitive imaginable way. */
856 int really_zero = 1;
857 char *p = a->literal_token;
858 if (*p == '-')
859 ++p;
860 while (*p && *p != 'e' && *p != 'E')
862 if (*p != '0' && *p != '.')
864 really_zero = 0;
865 break;
867 ++p;
869 if (! really_zero)
871 int i = ctxp->c_line->current;
872 ctxp->c_line->current = number_beginning;
873 java_lex_error ("Floating point literal underflow", 0);
874 ctxp->c_line->current = i;
878 SET_LVAL_NODE_TYPE (build_real (type, value), type);
880 #endif
882 static int yylex PARAMS ((YYSTYPE *));
884 static int
885 #ifdef JC1_LITE
886 yylex (java_lval)
887 #else
888 java_lex (java_lval)
889 #endif
890 YYSTYPE *java_lval;
892 int c;
893 unicode_t first_unicode;
894 int ascii_index, all_ascii;
895 char *string;
897 /* Translation of the Unicode escape in the raw stream of Unicode
898 characters. Takes care of line terminator. */
899 step1:
900 /* Skip white spaces: SP, TAB and FF or ULT */
901 for (c = java_get_unicode ();
902 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
903 if (c == '\n')
905 ctxp->elc.line = ctxp->c_line->lineno;
906 ctxp->elc.col = ctxp->c_line->char_col-2;
909 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
911 if (c == 0x1a) /* CTRL-Z */
913 if ((c = java_get_unicode ()) == UEOF)
914 return 0; /* Ok here */
915 else
916 java_unget_unicode (); /* Caught later, at the end of the function */
918 /* Handle EOF here */
919 if (c == UEOF) /* Should probably do something here... */
920 return 0;
922 /* Take care of eventual comments. */
923 if (c == '/')
925 switch (c = java_get_unicode ())
927 case '/':
928 for (;;)
930 c = java_get_unicode ();
931 if (c == UEOF)
933 /* It is ok to end a `//' comment with EOF, unless
934 we're being pedantic. */
935 if (pedantic)
936 java_lex_error ("Comment not terminated at end of input",
938 return 0;
940 if (c == '\n') /* ULT */
941 goto step1;
943 break;
945 case '*':
946 if ((c = java_get_unicode ()) == '*')
948 if ((c = java_get_unicode ()) == '/')
949 goto step1; /* Empy documentation comment */
950 else if (java_parse_doc_section (c))
951 goto step1;
954 java_parse_end_comment ((c = java_get_unicode ()));
955 goto step1;
956 break;
957 default:
958 java_unget_unicode ();
959 c = '/';
960 break;
964 ctxp->elc.line = ctxp->c_line->lineno;
965 ctxp->elc.prev_col = ctxp->elc.col;
966 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
967 if (ctxp->elc.col < 0)
968 abort ();
970 /* Numeric literals */
971 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
973 /* This section of code is borrowed from gcc/c-lex.c */
974 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
975 int parts[TOTAL_PARTS];
976 HOST_WIDE_INT high, low;
977 /* End borrowed section */
978 char literal_token [256];
979 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
980 int i;
981 #ifndef JC1_LITE
982 int number_beginning = ctxp->c_line->current;
983 #endif
985 /* We might have a . separator instead of a FP like .[0-9]* */
986 if (c == '.')
988 unicode_t peep = java_sneak_unicode ();
990 if (!JAVA_ASCII_DIGIT (peep))
992 JAVA_LEX_SEP('.');
993 BUILD_OPERATOR (DOT_TK);
997 for (i = 0; i < TOTAL_PARTS; i++)
998 parts [i] = 0;
1000 if (c == '0')
1002 c = java_get_unicode ();
1003 if (c == 'x' || c == 'X')
1005 radix = 16;
1006 c = java_get_unicode ();
1008 else if (JAVA_ASCII_DIGIT (c))
1009 radix = 8;
1010 else if (c == '.')
1012 /* Push the '.' back and prepare for a FP parsing... */
1013 java_unget_unicode ();
1014 c = '0';
1016 else
1018 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
1019 JAVA_LEX_LIT ("0", 10);
1020 switch (c)
1022 case 'L': case 'l':
1023 SET_LVAL_NODE (long_zero_node);
1024 return (INT_LIT_TK);
1025 case 'f': case 'F':
1026 SET_LVAL_NODE (float_zero_node);
1027 return (FP_LIT_TK);
1028 case 'd': case 'D':
1029 SET_LVAL_NODE (double_zero_node);
1030 return (FP_LIT_TK);
1031 default:
1032 java_unget_unicode ();
1033 SET_LVAL_NODE (integer_zero_node);
1034 return (INT_LIT_TK);
1038 /* Parse the first part of the literal, until we find something
1039 which is not a number. */
1040 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
1041 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1042 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
1044 /* We store in a string (in case it turns out to be a FP) and in
1045 PARTS if we have to process a integer literal. */
1046 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
1047 int count;
1049 literal_token [literal_index++] = c;
1050 /* This section of code if borrowed from gcc/c-lex.c */
1051 for (count = 0; count < TOTAL_PARTS; count++)
1053 parts[count] *= radix;
1054 if (count)
1056 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
1057 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1059 else
1060 parts[0] += numeric;
1062 if (parts [TOTAL_PARTS-1] != 0)
1063 overflow = 1;
1064 /* End borrowed section. */
1065 c = java_get_unicode ();
1068 /* If we have something from the FP char set but not a digit, parse
1069 a FP literal. */
1070 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1072 int stage = 0;
1073 int seen_digit = (literal_index ? 1 : 0);
1074 int seen_exponent = 0;
1075 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
1076 double unless specified. */
1078 /* It is ok if the radix is 8 because this just means we've
1079 seen a leading `0'. However, radix==16 is invalid. */
1080 if (radix == 16)
1081 java_lex_error ("Can't express non-decimal FP literal", 0);
1082 radix = 10;
1084 for (;;)
1086 if (c == '.')
1088 if (stage < 1)
1090 stage = 1;
1091 literal_token [literal_index++ ] = c;
1092 c = java_get_unicode ();
1094 else
1095 java_lex_error ("Invalid character in FP literal", 0);
1098 if (c == 'e' || c == 'E')
1100 if (stage < 2)
1102 /* {E,e} must have seen at list a digit */
1103 if (!seen_digit)
1104 java_lex_error ("Invalid FP literal", 0);
1105 seen_digit = 0;
1106 seen_exponent = 1;
1107 stage = 2;
1108 literal_token [literal_index++] = c;
1109 c = java_get_unicode ();
1111 else
1112 java_lex_error ("Invalid character in FP literal", 0);
1114 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1116 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1117 stage = 4; /* So we fall through */
1120 if ((c=='-' || c =='+') && stage == 2)
1122 stage = 3;
1123 literal_token [literal_index++] = c;
1124 c = java_get_unicode ();
1127 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1128 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1129 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1130 (stage == 3 && JAVA_ASCII_DIGIT (c)))
1132 if (JAVA_ASCII_DIGIT (c))
1133 seen_digit = 1;
1134 literal_token [literal_index++ ] = c;
1135 c = java_get_unicode ();
1137 else
1139 #ifndef JC1_LITE
1140 struct jpa_args a;
1141 #endif
1142 if (stage != 4) /* Don't push back fF/dD */
1143 java_unget_unicode ();
1145 /* An exponent (if any) must have seen a digit. */
1146 if (seen_exponent && !seen_digit)
1147 java_lex_error ("Invalid FP literal", 0);
1149 literal_token [literal_index] = '\0';
1150 JAVA_LEX_LIT (literal_token, radix);
1152 #ifndef JC1_LITE
1153 a.literal_token = literal_token;
1154 a.fflag = fflag;
1155 a.java_lval = java_lval;
1156 a.number_beginning = number_beginning;
1157 if (do_float_handler (java_perform_atof, (PTR) &a))
1158 return FP_LIT_TK;
1160 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1161 #else
1162 return FP_LIT_TK;
1163 #endif
1166 } /* JAVA_ASCCI_FPCHAR (c) */
1168 /* Here we get back to converting the integral literal. */
1169 if (c == 'L' || c == 'l')
1170 long_suffix = 1;
1171 else if (radix == 16 && JAVA_ASCII_LETTER (c))
1172 java_lex_error ("Digit out of range in hexadecimal literal", 0);
1173 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
1174 java_lex_error ("Digit out of range in octal literal", 0);
1175 else if (radix == 16 && !literal_index)
1176 java_lex_error ("No digit specified for hexadecimal literal", 0);
1177 else
1178 java_unget_unicode ();
1180 #ifdef JAVA_LEX_DEBUG
1181 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1182 JAVA_LEX_LIT (literal_token, radix);
1183 #endif
1184 /* This section of code is borrowed from gcc/c-lex.c */
1185 if (!overflow)
1187 bytes = GET_TYPE_PRECISION (long_type_node);
1188 for (i = bytes; i < TOTAL_PARTS; i++)
1189 if (parts [i])
1191 overflow = 1;
1192 break;
1195 high = low = 0;
1196 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1198 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1199 / HOST_BITS_PER_CHAR)]
1200 << (i * HOST_BITS_PER_CHAR));
1201 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1203 /* End borrowed section. */
1205 /* Range checking */
1206 if (long_suffix)
1208 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1209 9223372036854775807L is the biggest `long' literal that can be
1210 expressed using a 10 radix. For other radixes, everything that
1211 fits withing 64 bits is OK. */
1212 int hb = (high >> 31);
1213 if (overflow || (hb && low && radix == 10) ||
1214 (hb && high & 0x7fffffff && radix == 10) ||
1215 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1216 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1218 else
1220 /* 2147483648 is valid if operand of a '-'. Otherwise,
1221 2147483647 is the biggest `int' literal that can be
1222 expressed using a 10 radix. For other radixes, everything
1223 that fits within 32 bits is OK. As all literals are
1224 signed, we sign extend here. */
1225 int hb = (low >> 31) & 0x1;
1226 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1227 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1228 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1229 high = -hb;
1231 ctxp->minus_seen = 0;
1232 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1233 (long_suffix ? long_type_node : int_type_node));
1234 return INT_LIT_TK;
1237 ctxp->minus_seen = 0;
1239 /* Character literals */
1240 if (c == '\'')
1242 int char_lit;
1243 if ((c = java_get_unicode ()) == '\\')
1244 char_lit = java_parse_escape_sequence ();
1245 else
1247 if (c == '\n' || c == '\'')
1248 java_lex_error ("Invalid character literal", 0);
1249 char_lit = c;
1252 c = java_get_unicode ();
1254 if ((c == '\n') || (c == UEOF))
1255 java_lex_error ("Character literal not terminated at end of line", 0);
1256 if (c != '\'')
1257 java_lex_error ("Syntax error in character literal", 0);
1259 if (char_lit == JAVA_CHAR_ERROR)
1260 char_lit = 0; /* We silently convert it to zero */
1262 JAVA_LEX_CHAR_LIT (char_lit);
1263 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1264 return CHAR_LIT_TK;
1267 /* String literals */
1268 if (c == '"')
1270 int no_error;
1271 char *string;
1273 for (no_error = 1, c = java_get_unicode ();
1274 c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1276 if (c == '\\')
1277 c = java_parse_escape_sequence ();
1278 if (c == JAVA_CHAR_ERROR)
1280 no_error = 0;
1281 c = 0; /* We silently convert it to zero. */
1283 java_unicode_2_utf8 (c);
1285 if (c == '\n' || c == UEOF) /* ULT */
1287 lineno--; /* Refer to the line the terminator was seen */
1288 java_lex_error ("String not terminated at end of line.", 0);
1289 lineno++;
1292 obstack_1grow (&temporary_obstack, '\0');
1293 string = obstack_finish (&temporary_obstack);
1294 #ifndef JC1_LITE
1295 if (!no_error || (c != '"'))
1296 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1297 else
1298 java_lval->node = build_string (strlen (string), string);
1299 #endif
1300 obstack_free (&temporary_obstack, string);
1301 return STRING_LIT_TK;
1304 /* Separator */
1305 switch (c)
1307 case '(':
1308 JAVA_LEX_SEP (c);
1309 BUILD_OPERATOR (OP_TK);
1310 case ')':
1311 JAVA_LEX_SEP (c);
1312 return CP_TK;
1313 case '{':
1314 JAVA_LEX_SEP (c);
1315 if (ctxp->ccb_indent == 1)
1316 ctxp->first_ccb_indent1 = lineno;
1317 ctxp->ccb_indent++;
1318 BUILD_OPERATOR (OCB_TK);
1319 case '}':
1320 JAVA_LEX_SEP (c);
1321 ctxp->ccb_indent--;
1322 if (ctxp->ccb_indent == 1)
1323 ctxp->last_ccb_indent1 = lineno;
1324 BUILD_OPERATOR (CCB_TK);
1325 case '[':
1326 JAVA_LEX_SEP (c);
1327 BUILD_OPERATOR (OSB_TK);
1328 case ']':
1329 JAVA_LEX_SEP (c);
1330 return CSB_TK;
1331 case ';':
1332 JAVA_LEX_SEP (c);
1333 return SC_TK;
1334 case ',':
1335 JAVA_LEX_SEP (c);
1336 return C_TK;
1337 case '.':
1338 JAVA_LEX_SEP (c);
1339 BUILD_OPERATOR (DOT_TK);
1340 /* return DOT_TK; */
1343 /* Operators */
1344 switch (c)
1346 case '=':
1347 if ((c = java_get_unicode ()) == '=')
1349 BUILD_OPERATOR (EQ_TK);
1351 else
1353 /* Equals is used in two different locations. In the
1354 variable_declarator: rule, it has to be seen as '=' as opposed
1355 to being seen as an ordinary assignment operator in
1356 assignment_operators: rule. */
1357 java_unget_unicode ();
1358 BUILD_OPERATOR (ASSIGN_TK);
1361 case '>':
1362 switch ((c = java_get_unicode ()))
1364 case '=':
1365 BUILD_OPERATOR (GTE_TK);
1366 case '>':
1367 switch ((c = java_get_unicode ()))
1369 case '>':
1370 if ((c = java_get_unicode ()) == '=')
1372 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1374 else
1376 java_unget_unicode ();
1377 BUILD_OPERATOR (ZRS_TK);
1379 case '=':
1380 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1381 default:
1382 java_unget_unicode ();
1383 BUILD_OPERATOR (SRS_TK);
1385 default:
1386 java_unget_unicode ();
1387 BUILD_OPERATOR (GT_TK);
1390 case '<':
1391 switch ((c = java_get_unicode ()))
1393 case '=':
1394 BUILD_OPERATOR (LTE_TK);
1395 case '<':
1396 if ((c = java_get_unicode ()) == '=')
1398 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1400 else
1402 java_unget_unicode ();
1403 BUILD_OPERATOR (LS_TK);
1405 default:
1406 java_unget_unicode ();
1407 BUILD_OPERATOR (LT_TK);
1410 case '&':
1411 switch ((c = java_get_unicode ()))
1413 case '&':
1414 BUILD_OPERATOR (BOOL_AND_TK);
1415 case '=':
1416 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1417 default:
1418 java_unget_unicode ();
1419 BUILD_OPERATOR (AND_TK);
1422 case '|':
1423 switch ((c = java_get_unicode ()))
1425 case '|':
1426 BUILD_OPERATOR (BOOL_OR_TK);
1427 case '=':
1428 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1429 default:
1430 java_unget_unicode ();
1431 BUILD_OPERATOR (OR_TK);
1434 case '+':
1435 switch ((c = java_get_unicode ()))
1437 case '+':
1438 BUILD_OPERATOR (INCR_TK);
1439 case '=':
1440 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1441 default:
1442 java_unget_unicode ();
1443 BUILD_OPERATOR (PLUS_TK);
1446 case '-':
1447 switch ((c = java_get_unicode ()))
1449 case '-':
1450 BUILD_OPERATOR (DECR_TK);
1451 case '=':
1452 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1453 default:
1454 java_unget_unicode ();
1455 ctxp->minus_seen = 1;
1456 BUILD_OPERATOR (MINUS_TK);
1459 case '*':
1460 if ((c = java_get_unicode ()) == '=')
1462 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1464 else
1466 java_unget_unicode ();
1467 BUILD_OPERATOR (MULT_TK);
1470 case '/':
1471 if ((c = java_get_unicode ()) == '=')
1473 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1475 else
1477 java_unget_unicode ();
1478 BUILD_OPERATOR (DIV_TK);
1481 case '^':
1482 if ((c = java_get_unicode ()) == '=')
1484 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1486 else
1488 java_unget_unicode ();
1489 BUILD_OPERATOR (XOR_TK);
1492 case '%':
1493 if ((c = java_get_unicode ()) == '=')
1495 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1497 else
1499 java_unget_unicode ();
1500 BUILD_OPERATOR (REM_TK);
1503 case '!':
1504 if ((c = java_get_unicode()) == '=')
1506 BUILD_OPERATOR (NEQ_TK);
1508 else
1510 java_unget_unicode ();
1511 BUILD_OPERATOR (NEG_TK);
1514 case '?':
1515 JAVA_LEX_OP ("?");
1516 BUILD_OPERATOR (REL_QM_TK);
1517 case ':':
1518 JAVA_LEX_OP (":");
1519 BUILD_OPERATOR (REL_CL_TK);
1520 case '~':
1521 BUILD_OPERATOR (NOT_TK);
1524 /* Keyword, boolean literal or null literal */
1525 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1526 JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1528 java_unicode_2_utf8 (c);
1529 if (all_ascii && c >= 128)
1530 all_ascii = 0;
1531 ascii_index++;
1534 obstack_1grow (&temporary_obstack, '\0');
1535 string = obstack_finish (&temporary_obstack);
1536 java_unget_unicode ();
1538 /* If we have something all ascii, we consider a keyword, a boolean
1539 literal, a null literal or an all ASCII identifier. Otherwise,
1540 this is an identifier (possibly not respecting formation rule). */
1541 if (all_ascii)
1543 struct java_keyword *kw;
1544 if ((kw=java_keyword (string, ascii_index)))
1546 JAVA_LEX_KW (string);
1547 switch (kw->token)
1549 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1550 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1551 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1552 case PRIVATE_TK: case STRICT_TK:
1553 SET_MODIFIER_CTX (kw->token);
1554 return MODIFIER_TK;
1555 case FLOAT_TK:
1556 SET_LVAL_NODE (float_type_node);
1557 return FP_TK;
1558 case DOUBLE_TK:
1559 SET_LVAL_NODE (double_type_node);
1560 return FP_TK;
1561 case BOOLEAN_TK:
1562 SET_LVAL_NODE (boolean_type_node);
1563 return BOOLEAN_TK;
1564 case BYTE_TK:
1565 SET_LVAL_NODE (byte_type_node);
1566 return INTEGRAL_TK;
1567 case SHORT_TK:
1568 SET_LVAL_NODE (short_type_node);
1569 return INTEGRAL_TK;
1570 case INT_TK:
1571 SET_LVAL_NODE (int_type_node);
1572 return INTEGRAL_TK;
1573 case LONG_TK:
1574 SET_LVAL_NODE (long_type_node);
1575 return INTEGRAL_TK;
1576 case CHAR_TK:
1577 SET_LVAL_NODE (char_type_node);
1578 return INTEGRAL_TK;
1580 /* Keyword based literals */
1581 case TRUE_TK:
1582 case FALSE_TK:
1583 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1584 boolean_true_node : boolean_false_node));
1585 return BOOL_LIT_TK;
1586 case NULL_TK:
1587 SET_LVAL_NODE (null_pointer_node);
1588 return NULL_TK;
1590 /* Some keyword we want to retain information on the location
1591 they where found */
1592 case CASE_TK:
1593 case DEFAULT_TK:
1594 case SUPER_TK:
1595 case THIS_TK:
1596 case RETURN_TK:
1597 case BREAK_TK:
1598 case CONTINUE_TK:
1599 case TRY_TK:
1600 case CATCH_TK:
1601 case THROW_TK:
1602 case INSTANCEOF_TK:
1603 BUILD_OPERATOR (kw->token);
1605 default:
1606 return kw->token;
1611 /* We may have an ID here */
1612 if (JAVA_START_CHAR_P (first_unicode))
1614 JAVA_LEX_ID (string);
1615 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1616 return ID_TK;
1619 /* Everything else is an invalid character in the input */
1621 char lex_error_buffer [128];
1622 sprintf (lex_error_buffer, "Invalid character `%s' in input",
1623 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1624 java_lex_error (lex_error_buffer, 1);
1626 return 0;
1629 static void
1630 java_unicode_2_utf8 (unicode)
1631 unicode_t unicode;
1633 if (RANGE (unicode, 0x01, 0x7f))
1634 obstack_1grow (&temporary_obstack, (char)unicode);
1635 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1637 obstack_1grow (&temporary_obstack,
1638 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1639 obstack_1grow (&temporary_obstack,
1640 (unsigned char)(0x80 | (unicode & 0x3f)));
1642 else /* Range 0x800-0xffff */
1644 obstack_1grow (&temporary_obstack,
1645 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1646 obstack_1grow (&temporary_obstack,
1647 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1648 obstack_1grow (&temporary_obstack,
1649 (unsigned char)(0x80 | (unicode & 0x003f)));
1653 #ifndef JC1_LITE
1654 static tree
1655 build_wfl_node (node)
1656 tree node;
1658 node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1659 /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1660 TREE_TYPE (node) = NULL_TREE;
1661 return node;
1663 #endif
1665 static void
1666 java_lex_error (msg, forward)
1667 const char *msg ATTRIBUTE_UNUSED;
1668 int forward ATTRIBUTE_UNUSED;
1670 #ifndef JC1_LITE
1671 ctxp->elc.line = ctxp->c_line->lineno;
1672 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1674 /* Might be caught in the middle of some error report */
1675 ctxp->java_error_flag = 0;
1676 java_error (NULL);
1677 java_error (msg);
1678 #endif
1681 #ifndef JC1_LITE
1682 static int
1683 java_is_eol (fp, c)
1684 FILE *fp;
1685 int c;
1687 int next;
1688 switch (c)
1690 case '\r':
1691 next = getc (fp);
1692 if (next != '\n' && next != EOF)
1693 ungetc (next, fp);
1694 return 1;
1695 case '\n':
1696 return 1;
1697 default:
1698 return 0;
1701 #endif
1703 char *
1704 java_get_line_col (filename, line, col)
1705 const char *filename ATTRIBUTE_UNUSED;
1706 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1708 #ifdef JC1_LITE
1709 return 0;
1710 #else
1711 /* Dumb implementation. Doesn't try to cache or optimize things. */
1712 /* First line of the file is line 1, first column is 1 */
1714 /* COL == -1 means, at the CR/LF in LINE */
1715 /* COL == -2 means, at the first non space char in LINE */
1717 FILE *fp;
1718 int c, ccol, cline = 1;
1719 int current_line_col = 0;
1720 int first_non_space = 0;
1721 char *base;
1723 if (!(fp = fopen (filename, "r")))
1724 fatal_io_error ("can't open %s", filename);
1726 while (cline != line)
1728 c = getc (fp);
1729 if (c == EOF)
1731 static char msg[] = "<<file too short - unexpected EOF>>";
1732 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1733 goto have_line;
1735 if (java_is_eol (fp, c))
1736 cline++;
1739 /* Gather the chars of the current line in a buffer */
1740 for (;;)
1742 c = getc (fp);
1743 if (c < 0 || java_is_eol (fp, c))
1744 break;
1745 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1746 first_non_space = current_line_col;
1747 obstack_1grow (&temporary_obstack, c);
1748 current_line_col++;
1750 have_line:
1752 obstack_1grow (&temporary_obstack, '\n');
1754 if (col == -1)
1756 col = current_line_col;
1757 first_non_space = 0;
1759 else if (col == -2)
1760 col = first_non_space;
1761 else
1762 first_non_space = 0;
1764 /* Place the '^' a the right position */
1765 base = obstack_base (&temporary_obstack);
1766 for (ccol = 1; ccol <= col+3; ccol++)
1768 /* Compute \t when reaching first_non_space */
1769 char c = (first_non_space ?
1770 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1771 obstack_1grow (&temporary_obstack, c);
1773 obstack_grow0 (&temporary_obstack, "^", 1);
1775 fclose (fp);
1776 return obstack_finish (&temporary_obstack);
1777 #endif
1780 #ifndef JC1_LITE
1781 static int
1782 utf8_cmp (str, length, name)
1783 const unsigned char *str;
1784 int length;
1785 const char *name;
1787 const unsigned char *limit = str + length;
1788 int i;
1790 for (i = 0; name[i]; ++i)
1792 int ch = UTF8_GET (str, limit);
1793 if (ch != name[i])
1794 return ch - name[i];
1797 return str == limit ? 0 : 1;
1800 /* A sorted list of all C++ keywords. */
1802 static const char *cxx_keywords[] =
1804 "_Complex",
1805 "__alignof",
1806 "__alignof__",
1807 "__asm",
1808 "__asm__",
1809 "__attribute",
1810 "__attribute__",
1811 "__builtin_va_arg",
1812 "__complex",
1813 "__complex__",
1814 "__const",
1815 "__const__",
1816 "__extension__",
1817 "__imag",
1818 "__imag__",
1819 "__inline",
1820 "__inline__",
1821 "__label__",
1822 "__null",
1823 "__real",
1824 "__real__",
1825 "__restrict",
1826 "__restrict__",
1827 "__signed",
1828 "__signed__",
1829 "__typeof",
1830 "__typeof__",
1831 "__volatile",
1832 "__volatile__",
1833 "asm",
1834 "and",
1835 "and_eq",
1836 "auto",
1837 "bitand",
1838 "bitor",
1839 "bool",
1840 "break",
1841 "case",
1842 "catch",
1843 "char",
1844 "class",
1845 "compl",
1846 "const",
1847 "const_cast",
1848 "continue",
1849 "default",
1850 "delete",
1851 "do",
1852 "double",
1853 "dynamic_cast",
1854 "else",
1855 "enum",
1856 "explicit",
1857 "export",
1858 "extern",
1859 "false",
1860 "float",
1861 "for",
1862 "friend",
1863 "goto",
1864 "if",
1865 "inline",
1866 "int",
1867 "long",
1868 "mutable",
1869 "namespace",
1870 "new",
1871 "not",
1872 "not_eq",
1873 "operator",
1874 "or",
1875 "or_eq",
1876 "private",
1877 "protected",
1878 "public",
1879 "register",
1880 "reinterpret_cast",
1881 "return",
1882 "short",
1883 "signed",
1884 "sizeof",
1885 "static",
1886 "static_cast",
1887 "struct",
1888 "switch",
1889 "template",
1890 "this",
1891 "throw",
1892 "true",
1893 "try",
1894 "typedef",
1895 "typename",
1896 "typeid",
1897 "typeof",
1898 "union",
1899 "unsigned",
1900 "using",
1901 "virtual",
1902 "void",
1903 "volatile",
1904 "wchar_t",
1905 "while",
1906 "xor",
1907 "xor_eq"
1910 /* Return true if NAME is a C++ keyword. */
1913 cxx_keyword_p (name, length)
1914 const char *name;
1915 int length;
1917 int last = ARRAY_SIZE (cxx_keywords);
1918 int first = 0;
1919 int mid = (last + first) / 2;
1920 int old = -1;
1922 for (mid = (last + first) / 2;
1923 mid != old;
1924 old = mid, mid = (last + first) / 2)
1926 int kwl = strlen (cxx_keywords[mid]);
1927 int min_length = kwl > length ? length : kwl;
1928 int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1930 if (r == 0)
1932 int i;
1933 /* We've found a match if all the remaining characters are
1934 `$'. */
1935 for (i = min_length; i < length && name[i] == '$'; ++i)
1937 if (i == length)
1938 return 1;
1939 r = 1;
1942 if (r < 0)
1943 last = mid;
1944 else
1945 first = mid;
1947 return 0;
1949 #endif /* JC1_LITE */