* rtl.h (rtunion_def): Constify member `rtstr'.
[official-gcc.git] / gcc / java / lex.c
blob311f697457f93f064d7944ef7b125abaf3aa4c9f
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded characters
28 and returns a token for everything found but comments, white spaces
29 and line terminators. When necessary, it also fills the java_lval
30 (yylval) union. It's implemented to be called by a re-entrant parser
31 generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
37 #include "keyword.h"
39 #ifndef JC1_LITE
40 extern struct obstack *expression_obstack;
41 #endif
43 /* Function declaration */
44 static int java_lineterminator PARAMS ((unicode_t));
45 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
46 static void java_unicode_2_utf8 PARAMS ((unicode_t));
47 static void java_lex_error PARAMS ((const char *, int));
48 #ifndef JC1_LITE
49 static int java_is_eol PARAMS ((FILE *, int));
50 static tree build_wfl_node PARAMS ((tree));
51 #endif
52 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
53 static unicode_t java_parse_escape_sequence PARAMS ((void));
54 static int java_letter_or_digit_p PARAMS ((unicode_t));
55 static int java_parse_doc_section PARAMS ((unicode_t));
56 static void java_parse_end_comment PARAMS ((unicode_t));
57 static unicode_t java_get_unicode PARAMS ((void));
58 static unicode_t java_read_unicode PARAMS ((int, int *));
59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
60 static unicode_t java_read_char PARAMS ((void));
61 static void java_allocate_new_line PARAMS ((void));
62 static void java_unget_unicode PARAMS ((void));
63 static unicode_t java_sneak_unicode PARAMS ((void));
65 void
66 java_init_lex ()
68 #ifndef JC1_LITE
69 int java_lang_imported = 0;
71 if (!java_lang_id)
72 java_lang_id = get_identifier ("java.lang");
73 if (!java_lang_cloneable)
74 java_lang_cloneable = get_identifier ("java.lang.Cloneable");
76 if (!java_lang_imported)
78 tree node = build_tree_list
79 (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
80 read_import_dir (TREE_PURPOSE (node));
81 TREE_CHAIN (node) = ctxp->import_demand_list;
82 ctxp->import_demand_list = node;
83 java_lang_imported = 1;
86 if (!wfl_operator)
87 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
88 if (!label_id)
89 label_id = get_identifier ("$L");
90 if (!wfl_append)
91 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
92 if (!wfl_string_buffer)
93 wfl_string_buffer =
94 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
95 if (!wfl_to_string)
96 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
98 ctxp->static_initialized = ctxp->non_static_initialized =
99 ctxp->incomplete_class = NULL_TREE;
101 bzero ((PTR) ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
102 bzero ((PTR) current_jcf, sizeof (JCF));
103 ctxp->current_parsed_class = NULL;
104 ctxp->package = NULL_TREE;
105 #endif
107 ctxp->filename = input_filename;
108 ctxp->lineno = lineno = 0;
109 ctxp->p_line = NULL;
110 ctxp->c_line = NULL;
111 ctxp->unget_utf8_value = 0;
112 ctxp->minus_seen = 0;
113 ctxp->java_error_flag = 0;
116 static char *
117 java_sprint_unicode (line, i)
118 struct java_line *line;
119 int i;
121 static char buffer [10];
122 if (line->unicode_escape_p [i] || line->line [i] > 128)
123 sprintf (buffer, "\\u%04x", line->line [i]);
124 else
126 buffer [0] = line->line [i];
127 buffer [1] = '\0';
129 return buffer;
132 static unicode_t
133 java_sneak_unicode ()
135 return (ctxp->c_line->line [ctxp->c_line->current]);
138 static void
139 java_unget_unicode ()
141 if (!ctxp->c_line->current)
142 fatal ("can't unget unicode - java_unget_unicode");
143 ctxp->c_line->current--;
144 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
147 static void
148 java_allocate_new_line ()
150 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
151 char ahead_escape_p = (ctxp->c_line ?
152 ctxp->c_line->unicode_escape_ahead_p : 0);
154 if (ctxp->c_line && !ctxp->c_line->white_space_only)
156 if (ctxp->p_line)
158 free (ctxp->p_line->unicode_escape_p);
159 free (ctxp->p_line->line);
160 free (ctxp->p_line);
162 ctxp->p_line = ctxp->c_line;
163 ctxp->c_line = NULL; /* Reallocated */
166 if (!ctxp->c_line)
168 ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
169 ctxp->c_line->max = JAVA_LINE_MAX;
170 ctxp->c_line->line = (unicode_t *)xmalloc
171 (sizeof (unicode_t)*ctxp->c_line->max);
172 ctxp->c_line->unicode_escape_p =
173 (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
174 ctxp->c_line->white_space_only = 0;
177 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
178 ctxp->c_line->char_col = ctxp->c_line->current = 0;
179 if (ahead)
181 ctxp->c_line->line [ctxp->c_line->size] = ahead;
182 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
183 ctxp->c_line->size++;
185 ctxp->c_line->ahead [0] = 0;
186 ctxp->c_line->unicode_escape_ahead_p = 0;
187 ctxp->c_line->lineno = ++lineno;
188 ctxp->c_line->white_space_only = 1;
191 #define BAD_UTF8_VALUE 0xFFFE
193 static unicode_t
194 java_read_char ()
196 int c;
197 int c1, c2;
199 if (ctxp->unget_utf8_value)
201 int to_return = ctxp->unget_utf8_value;
202 ctxp->unget_utf8_value = 0;
203 return (to_return);
206 c = GETC ();
208 if (c < 128)
209 return (unicode_t)c;
210 if (c == EOF)
211 return UEOF;
212 else
214 if ((c & 0xe0) == 0xc0)
216 c1 = GETC ();
217 if ((c1 & 0xc0) == 0x80)
218 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
219 c = c1;
221 else if ((c & 0xf0) == 0xe0)
223 c1 = GETC ();
224 if ((c1 & 0xc0) == 0x80)
226 c2 = GETC ();
227 if ((c2 & 0xc0) == 0x80)
228 return (unicode_t)(((c & 0xf) << 12) +
229 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
230 else
231 c = c2;
233 else
234 c = c1;
236 /* We looked for a UTF8 multi-byte sequence (since we saw an initial
237 byte with the high bit set), but found invalid bytes instead.
238 If the most recent byte was Ascii (and not EOF), we should
239 unget it, in case it was a comment terminator or other delimitor. */
240 if ((c & 0x80) == 0)
241 UNGETC (c);
242 return BAD_UTF8_VALUE;
246 static void
247 java_store_unicode (l, c, unicode_escape_p)
248 struct java_line *l;
249 unicode_t c;
250 int unicode_escape_p;
252 if (l->size == l->max)
254 l->max += JAVA_LINE_MAX;
255 l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
256 l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
257 sizeof (char)*l->max);
259 l->line [l->size] = c;
260 l->unicode_escape_p [l->size++] = unicode_escape_p;
263 static unicode_t
264 java_read_unicode (term_context, unicode_escape_p)
265 int term_context;
266 int *unicode_escape_p;
268 unicode_t c;
269 long i, base;
271 c = java_read_char ();
272 *unicode_escape_p = 0;
274 if (c != '\\')
275 return ((term_context ? c :
276 java_lineterminator (c) ? '\n' : (unicode_t)c));
278 /* Count the number of preceeding '\' */
279 for (base = ftell (finput), i = base-2; c == '\\';)
281 fseek (finput, i--, SEEK_SET);
282 c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
284 fseek (finput, base, SEEK_SET);
285 if ((base-i-3)%2 == 0) /* If odd number of \ seen */
287 c = java_read_char ();
288 if (c == 'u')
290 unsigned short unicode = 0;
291 int shift = 12;
292 /* Next should be 4 hex digits, otherwise it's an error.
293 The hex value is converted into the unicode, pushed into
294 the Unicode stream. */
295 for (shift = 12; shift >= 0; shift -= 4)
297 if ((c = java_read_char ()) == UEOF)
298 return UEOF;
299 if (c >= '0' && c <= '9')
300 unicode |= (unicode_t)((c-'0') << shift);
301 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
302 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
303 else
304 java_lex_error
305 ("Non hex digit in Unicode escape sequence", 0);
307 *unicode_escape_p = 1;
308 return (term_context ? unicode :
309 (java_lineterminator (c) ? '\n' : unicode));
311 ctxp->unget_utf8_value = c;
313 return (unicode_t)'\\';
316 static unicode_t
317 java_get_unicode ()
319 /* It's time to read a line when... */
320 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
322 unicode_t c;
323 java_allocate_new_line ();
324 if (ctxp->c_line->line[0] != '\n')
325 for (;;)
327 int unicode_escape_p;
328 c = java_read_unicode (0, &unicode_escape_p);
329 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
330 if (ctxp->c_line->white_space_only
331 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
332 ctxp->c_line->white_space_only = 0;
333 if ((c == '\n') || (c == UEOF))
334 break;
337 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
338 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
339 return ctxp->c_line->line [ctxp->c_line->current++];
342 static int
343 java_lineterminator (c)
344 unicode_t c;
346 int unicode_escape_p;
347 if (c == '\n') /* CR */
349 if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
351 ctxp->c_line->ahead [0] = c;
352 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
354 return 1;
356 else if (c == '\r') /* LF */
358 if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
360 ctxp->c_line->ahead [0] = c;
361 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
363 return 1;
365 else
366 return 0;
369 /* Parse the end of a C style comment.
370 * C is the first character following the '/' and '*'. */
371 static void
372 java_parse_end_comment (c)
373 unicode_t c;
376 for ( ;; c = java_get_unicode ())
378 switch (c)
380 case UEOF:
381 java_lex_error ("Comment not terminated at end of input", 0);
382 case '*':
383 switch (c = java_get_unicode ())
385 case UEOF:
386 java_lex_error ("Comment not terminated at end of input", 0);
387 case '/':
388 return;
389 case '*': /* reparse only '*' */
390 java_unget_unicode ();
396 /* Parse the documentation section. Keywords must be at the beginning
397 of a documentation comment line (ignoring white space and any `*'
398 character). Parsed keyword(s): @DEPRECATED. */
400 static int
401 java_parse_doc_section (c)
402 unicode_t c;
404 int valid_tag = 0, seen_star = 0;
406 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
408 switch (c)
410 case '*':
411 seen_star = 1;
412 break;
413 case '\n': /* ULT */
414 valid_tag = 1;
415 default:
416 seen_star = 0;
418 c = java_get_unicode();
421 if (c == UEOF)
422 java_lex_error ("Comment not terminated at end of input", 0);
424 if (seen_star && (c == '/'))
425 return 1; /* Goto step1 in caller */
427 /* We're parsing @deprecated */
428 if (valid_tag && (c == '@'))
430 char tag [11];
431 int tag_index = 0;
433 while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
435 c = java_get_unicode ();
436 tag [tag_index++] = c;
439 if (c == UEOF)
440 java_lex_error ("Comment not terminated at end of input", 0);
441 tag [tag_index] = '\0';
443 if (!strcmp (tag, "deprecated"))
444 ctxp->deprecated = 1;
446 java_unget_unicode ();
447 return 0;
450 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
451 will return a wrong result. */
452 static int
453 java_letter_or_digit_p (c)
454 unicode_t c;
456 return _JAVA_LETTER_OR_DIGIT_P (c);
459 static unicode_t
460 java_parse_escape_sequence ()
462 unicode_t char_lit;
463 unicode_t c;
465 switch (c = java_get_unicode ())
467 case 'b':
468 return (unicode_t)0x8;
469 case 't':
470 return (unicode_t)0x9;
471 case 'n':
472 return (unicode_t)0xa;
473 case 'f':
474 return (unicode_t)0xc;
475 case 'r':
476 return (unicode_t)0xd;
477 case '"':
478 return (unicode_t)0x22;
479 case '\'':
480 return (unicode_t)0x27;
481 case '\\':
482 return (unicode_t)0x5c;
483 case '0': case '1': case '2': case '3': case '4':
484 case '5': case '6': case '7': case '8': case '9':
486 int octal_escape[3];
487 int octal_escape_index = 0;
489 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
490 c = java_get_unicode ())
491 octal_escape [octal_escape_index++] = c;
493 java_unget_unicode ();
495 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
497 java_lex_error ("Literal octal escape out of range", 0);
498 return JAVA_CHAR_ERROR;
500 else
502 int i, shift;
503 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
504 i < octal_escape_index; i++, shift -= 3)
505 char_lit |= (octal_escape [i] - '0') << shift;
507 return (char_lit);
509 break;
511 case '\n':
512 return '\n'; /* ULT, caught latter as a specific error */
513 default:
514 java_lex_error ("Illegal character in escape sequence", 0);
515 return JAVA_CHAR_ERROR;
519 /* Isolate the code which may raise an arithmetic exception in its
520 own function. */
522 #ifndef JC1_LITE
523 struct jpa_args
525 YYSTYPE *java_lval;
526 char *literal_token;
527 int fflag;
528 int number_beginning;
531 static void java_perform_atof PARAMS ((PTR));
533 static void
534 java_perform_atof (av)
535 PTR av;
537 struct jpa_args *a = (struct jpa_args *)av;
538 YYSTYPE *java_lval = a->java_lval;
539 int number_beginning = a->number_beginning;
540 REAL_VALUE_TYPE value;
541 tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
543 SET_REAL_VALUE_ATOF (value,
544 REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
546 if (REAL_VALUE_ISINF (value)
547 || REAL_VALUE_ISNAN (value))
549 JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
550 value = DCONST0;
553 SET_LVAL_NODE_TYPE (build_real (type, value), type);
555 #endif
557 static int yylex PARAMS ((YYSTYPE *));
559 static int
560 #ifdef JC1_LITE
561 yylex (java_lval)
562 #else
563 java_lex (java_lval)
564 #endif
565 YYSTYPE *java_lval;
567 unicode_t c, first_unicode;
568 int ascii_index, all_ascii;
569 char *string;
571 /* Translation of the Unicode escape in the raw stream of Unicode
572 characters. Takes care of line terminator. */
573 step1:
574 /* Skip white spaces: SP, TAB and FF or ULT */
575 for (c = java_get_unicode ();
576 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
577 if (c == '\n')
579 ctxp->elc.line = ctxp->c_line->lineno;
580 ctxp->elc.col = ctxp->c_line->char_col-2;
583 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
585 if (c == 0x1a) /* CTRL-Z */
587 if ((c = java_get_unicode ()) == UEOF)
588 return 0; /* Ok here */
589 else
590 java_unget_unicode (); /* Caught latter at the end the function */
592 /* Handle EOF here */
593 if (c == UEOF) /* Should probably do something here... */
594 return 0;
596 /* Take care of eventual comments. */
597 if (c == '/')
599 switch (c = java_get_unicode ())
601 case '/':
602 for (;;)
604 c = java_get_unicode ();
605 if (c == UEOF)
606 java_lex_error ("Comment not terminated at end of input", 0);
607 if (c == '\n') /* ULT */
608 goto step1;
610 break;
612 case '*':
613 if ((c = java_get_unicode ()) == '*')
615 if ((c = java_get_unicode ()) == '/')
616 goto step1; /* Empy documentation comment */
617 else if (java_parse_doc_section (c))
618 goto step1;
621 java_parse_end_comment ((c = java_get_unicode ()));
622 goto step1;
623 break;
624 default:
625 java_unget_unicode ();
626 c = '/';
627 break;
631 ctxp->elc.line = ctxp->c_line->lineno;
632 ctxp->elc.prev_col = ctxp->elc.col;
633 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
634 if (ctxp->elc.col < 0)
635 fatal ("ctxp->elc.col < 0 - java_lex");
637 /* Numeric literals */
638 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
640 /* This section of code is borrowed from gcc/c-lex.c */
641 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
642 int parts[TOTAL_PARTS];
643 HOST_WIDE_INT high, low;
644 /* End borrowed section */
645 char literal_token [256];
646 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
647 int i;
648 #ifndef JC1_LITE
649 int number_beginning = ctxp->c_line->current;
650 #endif
652 /* We might have a . separator instead of a FP like .[0-9]* */
653 if (c == '.')
655 unicode_t peep = java_sneak_unicode ();
657 if (!JAVA_ASCII_DIGIT (peep))
659 JAVA_LEX_SEP('.');
660 BUILD_OPERATOR (DOT_TK);
664 for (i = 0; i < TOTAL_PARTS; i++)
665 parts [i] = 0;
667 if (c == '0')
669 c = java_get_unicode ();
670 if (c == 'x' || c == 'X')
672 radix = 16;
673 c = java_get_unicode ();
675 else if (JAVA_ASCII_DIGIT (c))
676 radix = 8;
677 else if (c == '.')
679 /* Push the '.' back and prepare for a FP parsing... */
680 java_unget_unicode ();
681 c = '0';
683 else
685 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
686 JAVA_LEX_LIT ("0", 10);
687 switch (c)
689 case 'L': case 'l':
690 SET_LVAL_NODE (long_zero_node);
691 return (INT_LIT_TK);
692 case 'f': case 'F':
693 SET_LVAL_NODE (float_zero_node);
694 return (FP_LIT_TK);
695 case 'd': case 'D':
696 SET_LVAL_NODE (double_zero_node);
697 return (FP_LIT_TK);
698 default:
699 java_unget_unicode ();
700 SET_LVAL_NODE (integer_zero_node);
701 return (INT_LIT_TK);
705 /* Parse the first part of the literal, until we find something
706 which is not a number. */
707 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
708 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
709 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
711 /* We store in a string (in case it turns out to be a FP) and in
712 PARTS if we have to process a integer literal. */
713 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
714 int count;
716 literal_token [literal_index++] = c;
717 /* This section of code if borrowed from gcc/c-lex.c */
718 for (count = 0; count < TOTAL_PARTS; count++)
720 parts[count] *= radix;
721 if (count)
723 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
724 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
726 else
727 parts[0] += numeric;
729 if (parts [TOTAL_PARTS-1] != 0)
730 overflow = 1;
731 /* End borrowed section. */
732 c = java_get_unicode ();
735 /* If we have something from the FP char set but not a digit, parse
736 a FP literal. */
737 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
739 int stage = 0;
740 int seen_digit = (literal_index ? 1 : 0);
741 int seen_exponent = 0;
742 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
743 double unless specified. */
744 if (radix != 10)
745 java_lex_error ("Can't express non-decimal FP literal", 0);
747 for (;;)
749 if (c == '.')
751 if (stage < 1)
753 stage = 1;
754 literal_token [literal_index++ ] = c;
755 c = java_get_unicode ();
757 else
758 java_lex_error ("Invalid character in FP literal", 0);
761 if (c == 'e' || c == 'E')
763 if (stage < 2)
765 /* {E,e} must have seen at list a digit */
766 if (!seen_digit)
767 java_lex_error ("Invalid FP literal", 0);
768 seen_digit = 0;
769 seen_exponent = 1;
770 stage = 2;
771 literal_token [literal_index++] = c;
772 c = java_get_unicode ();
774 else
775 java_lex_error ("Invalid character in FP literal", 0);
777 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
779 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
780 stage = 4; /* So we fall through */
783 if ((c=='-' || c =='+') && stage == 2)
785 stage = 3;
786 literal_token [literal_index++] = c;
787 c = java_get_unicode ();
790 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
791 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
792 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
793 (stage == 3 && JAVA_ASCII_DIGIT (c)))
795 if (JAVA_ASCII_DIGIT (c))
796 seen_digit = 1;
797 literal_token [literal_index++ ] = c;
798 c = java_get_unicode ();
800 else
802 #ifndef JC1_LITE
803 struct jpa_args a;
804 #endif
805 if (stage != 4) /* Don't push back fF/dD */
806 java_unget_unicode ();
808 /* An exponent (if any) must have seen a digit. */
809 if (seen_exponent && !seen_digit)
810 java_lex_error ("Invalid FP literal", 0);
812 literal_token [literal_index] = '\0';
813 JAVA_LEX_LIT (literal_token, radix);
815 #ifndef JC1_LITE
816 a.literal_token = literal_token;
817 a.fflag = fflag;
818 a.java_lval = java_lval;
819 a.number_beginning = number_beginning;
820 if (do_float_handler (java_perform_atof, (PTR) &a))
821 return FP_LIT_TK;
823 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
824 #else
825 return FP_LIT_TK;
826 #endif
829 } /* JAVA_ASCCI_FPCHAR (c) */
831 /* Here we get back to converting the integral literal. */
832 if (c == 'L' || c == 'l')
833 long_suffix = 1;
834 else if (radix == 16 && JAVA_ASCII_LETTER (c))
835 java_lex_error ("Digit out of range in hexadecimal literal", 0);
836 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
837 java_lex_error ("Digit out of range in octal literal", 0);
838 else if (radix == 16 && !literal_index)
839 java_lex_error ("No digit specified for hexadecimal literal", 0);
840 else
841 java_unget_unicode ();
843 #ifdef JAVA_LEX_DEBUG
844 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
845 JAVA_LEX_LIT (literal_token, radix);
846 #endif
847 /* This section of code is borrowed from gcc/c-lex.c */
848 if (!overflow)
850 bytes = GET_TYPE_PRECISION (long_type_node);
851 for (i = bytes; i < TOTAL_PARTS; i++)
852 if (parts [i])
854 overflow = 1;
855 break;
858 high = low = 0;
859 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
861 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
862 / HOST_BITS_PER_CHAR)]
863 << (i * HOST_BITS_PER_CHAR));
864 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
866 /* End borrowed section. */
868 /* Range checking */
869 if (long_suffix)
871 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
872 9223372036854775807L is the biggest `long' literal that can be
873 expressed using a 10 radix. For other radixes, everything that
874 fits withing 64 bits is OK. */
875 int hb = (high >> 31);
876 if (overflow || (hb && low && radix == 10) ||
877 (hb && high & 0x7fffffff && radix == 10) ||
878 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
879 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
881 else
883 /* 2147483648 is valid if operand of a '-'. Otherwise,
884 2147483647 is the biggest `int' literal that can be
885 expressed using a 10 radix. For other radixes, everything
886 that fits within 32 bits is OK. As all literals are
887 signed, we sign extend here. */
888 int hb = (low >> 31) & 0x1;
889 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
890 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
891 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
892 high = -hb;
894 ctxp->minus_seen = 0;
895 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
896 (long_suffix ? long_type_node : int_type_node));
897 return INT_LIT_TK;
900 ctxp->minus_seen = 0;
901 /* Character literals */
902 if (c == '\'')
904 unicode_t char_lit;
905 if ((c = java_get_unicode ()) == '\\')
906 char_lit = java_parse_escape_sequence ();
907 else
908 char_lit = c;
910 c = java_get_unicode ();
912 if ((c == '\n') || (c == UEOF))
913 java_lex_error ("Character literal not terminated at end of line", 0);
914 if (c != '\'')
915 java_lex_error ("Syntax error in character literal", 0);
917 if (c == JAVA_CHAR_ERROR)
918 char_lit = 0; /* We silently convert it to zero */
920 JAVA_LEX_CHAR_LIT (char_lit);
921 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
922 return CHAR_LIT_TK;
925 /* String literals */
926 if (c == '"')
928 int no_error;
929 char *string;
931 for (no_error = 1, c = java_get_unicode ();
932 c != '"' && c != '\n'; c = java_get_unicode ())
934 if (c == '\\')
935 c = java_parse_escape_sequence ();
936 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
937 java_unicode_2_utf8 (c);
939 if (c == '\n' || c == UEOF) /* ULT */
941 lineno--; /* Refer to the line the terminator was seen */
942 java_lex_error ("String not terminated at end of line.", 0);
943 lineno++;
946 obstack_1grow (&temporary_obstack, '\0');
947 string = obstack_finish (&temporary_obstack);
948 #ifndef JC1_LITE
949 if (!no_error || (c != '"'))
950 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
951 else
953 tree s = make_node (STRING_CST);
954 TREE_STRING_LENGTH (s) = strlen (string);
955 TREE_STRING_POINTER (s) =
956 obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
957 strcpy (TREE_STRING_POINTER (s), string);
958 java_lval->node = s;
960 #endif
961 return STRING_LIT_TK;
964 /* Separator */
965 switch (c)
967 case '(':
968 JAVA_LEX_SEP (c);
969 BUILD_OPERATOR (OP_TK);
970 case ')':
971 JAVA_LEX_SEP (c);
972 return CP_TK;
973 case '{':
974 JAVA_LEX_SEP (c);
975 if (ctxp->ccb_indent == 1)
976 ctxp->first_ccb_indent1 = lineno;
977 ctxp->ccb_indent++;
978 BUILD_OPERATOR (OCB_TK);
979 case '}':
980 JAVA_LEX_SEP (c);
981 ctxp->ccb_indent--;
982 if (ctxp->ccb_indent == 1)
983 ctxp->last_ccb_indent1 = lineno;
984 BUILD_OPERATOR (CCB_TK);
985 case '[':
986 JAVA_LEX_SEP (c);
987 BUILD_OPERATOR (OSB_TK);
988 case ']':
989 JAVA_LEX_SEP (c);
990 return CSB_TK;
991 case ';':
992 JAVA_LEX_SEP (c);
993 return SC_TK;
994 case ',':
995 JAVA_LEX_SEP (c);
996 return C_TK;
997 case '.':
998 JAVA_LEX_SEP (c);
999 BUILD_OPERATOR (DOT_TK);
1000 /* return DOT_TK; */
1003 /* Operators */
1004 switch (c)
1006 case '=':
1007 if ((c = java_get_unicode ()) == '=')
1009 BUILD_OPERATOR (EQ_TK);
1011 else
1013 /* Equals is used in two different locations. In the
1014 variable_declarator: rule, it has to be seen as '=' as opposed
1015 to being seen as an ordinary assignment operator in
1016 assignment_operators: rule. */
1017 java_unget_unicode ();
1018 BUILD_OPERATOR (ASSIGN_TK);
1021 case '>':
1022 switch ((c = java_get_unicode ()))
1024 case '=':
1025 BUILD_OPERATOR (GTE_TK);
1026 case '>':
1027 switch ((c = java_get_unicode ()))
1029 case '>':
1030 if ((c = java_get_unicode ()) == '=')
1032 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1034 else
1036 java_unget_unicode ();
1037 BUILD_OPERATOR (ZRS_TK);
1039 case '=':
1040 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1041 default:
1042 java_unget_unicode ();
1043 BUILD_OPERATOR (SRS_TK);
1045 default:
1046 java_unget_unicode ();
1047 BUILD_OPERATOR (GT_TK);
1050 case '<':
1051 switch ((c = java_get_unicode ()))
1053 case '=':
1054 BUILD_OPERATOR (LTE_TK);
1055 case '<':
1056 if ((c = java_get_unicode ()) == '=')
1058 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1060 else
1062 java_unget_unicode ();
1063 BUILD_OPERATOR (LS_TK);
1065 default:
1066 java_unget_unicode ();
1067 BUILD_OPERATOR (LT_TK);
1070 case '&':
1071 switch ((c = java_get_unicode ()))
1073 case '&':
1074 BUILD_OPERATOR (BOOL_AND_TK);
1075 case '=':
1076 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1077 default:
1078 java_unget_unicode ();
1079 BUILD_OPERATOR (AND_TK);
1082 case '|':
1083 switch ((c = java_get_unicode ()))
1085 case '|':
1086 BUILD_OPERATOR (BOOL_OR_TK);
1087 case '=':
1088 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1089 default:
1090 java_unget_unicode ();
1091 BUILD_OPERATOR (OR_TK);
1094 case '+':
1095 switch ((c = java_get_unicode ()))
1097 case '+':
1098 BUILD_OPERATOR (INCR_TK);
1099 case '=':
1100 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1101 default:
1102 java_unget_unicode ();
1103 BUILD_OPERATOR (PLUS_TK);
1106 case '-':
1107 switch ((c = java_get_unicode ()))
1109 case '-':
1110 BUILD_OPERATOR (DECR_TK);
1111 case '=':
1112 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1113 default:
1114 java_unget_unicode ();
1115 ctxp->minus_seen = 1;
1116 BUILD_OPERATOR (MINUS_TK);
1119 case '*':
1120 if ((c = java_get_unicode ()) == '=')
1122 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1124 else
1126 java_unget_unicode ();
1127 BUILD_OPERATOR (MULT_TK);
1130 case '/':
1131 if ((c = java_get_unicode ()) == '=')
1133 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1135 else
1137 java_unget_unicode ();
1138 BUILD_OPERATOR (DIV_TK);
1141 case '^':
1142 if ((c = java_get_unicode ()) == '=')
1144 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1146 else
1148 java_unget_unicode ();
1149 BUILD_OPERATOR (XOR_TK);
1152 case '%':
1153 if ((c = java_get_unicode ()) == '=')
1155 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1157 else
1159 java_unget_unicode ();
1160 BUILD_OPERATOR (REM_TK);
1163 case '!':
1164 if ((c = java_get_unicode()) == '=')
1166 BUILD_OPERATOR (NEQ_TK);
1168 else
1170 java_unget_unicode ();
1171 BUILD_OPERATOR (NEG_TK);
1174 case '?':
1175 JAVA_LEX_OP ("?");
1176 BUILD_OPERATOR (REL_QM_TK);
1177 case ':':
1178 JAVA_LEX_OP (":");
1179 BUILD_OPERATOR (REL_CL_TK);
1180 case '~':
1181 BUILD_OPERATOR (NOT_TK);
1184 /* Keyword, boolean literal or null literal */
1185 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1186 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1188 java_unicode_2_utf8 (c);
1189 if (all_ascii && c >= 128)
1190 all_ascii = 0;
1191 ascii_index++;
1194 obstack_1grow (&temporary_obstack, '\0');
1195 string = obstack_finish (&temporary_obstack);
1196 java_unget_unicode ();
1198 /* If we have something all ascii, we consider a keyword, a boolean
1199 literal, a null literal or an all ASCII identifier. Otherwise,
1200 this is an identifier (possibly not respecting formation rule). */
1201 if (all_ascii)
1203 struct java_keyword *kw;
1204 if ((kw=java_keyword (string, ascii_index)))
1206 JAVA_LEX_KW (string);
1207 switch (kw->token)
1209 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1210 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1211 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1212 case PRIVATE_TK:
1213 SET_MODIFIER_CTX (kw->token);
1214 return MODIFIER_TK;
1215 case FLOAT_TK:
1216 SET_LVAL_NODE (float_type_node);
1217 return FP_TK;
1218 case DOUBLE_TK:
1219 SET_LVAL_NODE (double_type_node);
1220 return FP_TK;
1221 case BOOLEAN_TK:
1222 SET_LVAL_NODE (boolean_type_node);
1223 return BOOLEAN_TK;
1224 case BYTE_TK:
1225 SET_LVAL_NODE (byte_type_node);
1226 return INTEGRAL_TK;
1227 case SHORT_TK:
1228 SET_LVAL_NODE (short_type_node);
1229 return INTEGRAL_TK;
1230 case INT_TK:
1231 SET_LVAL_NODE (int_type_node);
1232 return INTEGRAL_TK;
1233 case LONG_TK:
1234 SET_LVAL_NODE (long_type_node);
1235 return INTEGRAL_TK;
1236 case CHAR_TK:
1237 SET_LVAL_NODE (char_type_node);
1238 return INTEGRAL_TK;
1240 /* Keyword based literals */
1241 case TRUE_TK:
1242 case FALSE_TK:
1243 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1244 boolean_true_node : boolean_false_node));
1245 return BOOL_LIT_TK;
1246 case NULL_TK:
1247 SET_LVAL_NODE (null_pointer_node);
1248 return NULL_TK;
1250 /* Some keyword we want to retain information on the location
1251 they where found */
1252 case CASE_TK:
1253 case DEFAULT_TK:
1254 case SUPER_TK:
1255 case THIS_TK:
1256 case RETURN_TK:
1257 case BREAK_TK:
1258 case CONTINUE_TK:
1259 case TRY_TK:
1260 case CATCH_TK:
1261 case THROW_TK:
1262 case INSTANCEOF_TK:
1263 BUILD_OPERATOR (kw->token);
1265 default:
1266 return kw->token;
1271 /* We may have and ID here */
1272 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1274 JAVA_LEX_ID (string);
1275 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1276 return ID_TK;
1279 /* Everything else is an invalid character in the input */
1281 char lex_error_buffer [128];
1282 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1283 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1284 java_lex_error (lex_error_buffer, 1);
1286 return 0;
1289 static void
1290 java_unicode_2_utf8 (unicode)
1291 unicode_t unicode;
1293 if (RANGE (unicode, 0x01, 0x7f))
1294 obstack_1grow (&temporary_obstack, (char)unicode);
1295 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1297 obstack_1grow (&temporary_obstack,
1298 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1299 obstack_1grow (&temporary_obstack,
1300 (unsigned char)(0x80 | (unicode & 0x3f)));
1302 else /* Range 0x800-0xffff */
1304 obstack_1grow (&temporary_obstack,
1305 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1306 obstack_1grow (&temporary_obstack,
1307 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1308 obstack_1grow (&temporary_obstack,
1309 (unsigned char)(0x80 | (unicode & 0x003f)));
1313 #ifndef JC1_LITE
1314 static tree
1315 build_wfl_node (node)
1316 tree node;
1318 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1320 #endif
1322 static void
1323 java_lex_error (msg, forward)
1324 const char *msg ATTRIBUTE_UNUSED;
1325 int forward ATTRIBUTE_UNUSED;
1327 #ifndef JC1_LITE
1328 ctxp->elc.line = ctxp->c_line->lineno;
1329 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1331 /* Might be caught in the middle of some error report */
1332 ctxp->java_error_flag = 0;
1333 java_error (NULL);
1334 java_error (msg);
1335 #endif
1338 #ifndef JC1_LITE
1339 static int
1340 java_is_eol (fp, c)
1341 FILE *fp;
1342 int c;
1344 int next;
1345 switch (c)
1347 case '\r':
1348 next = getc (fp);
1349 if (next != '\n' && next != EOF)
1350 ungetc (next, fp);
1351 return 1;
1352 case '\n':
1353 return 1;
1354 default:
1355 return 0;
1358 #endif
1360 char *
1361 java_get_line_col (filename, line, col)
1362 char *filename ATTRIBUTE_UNUSED;
1363 int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1365 #ifdef JC1_LITE
1366 return 0;
1367 #else
1368 /* Dumb implementation. Doesn't try to cache or optimize things. */
1369 /* First line of the file is line 1, first column is 1 */
1371 /* COL == -1 means, at the CR/LF in LINE */
1372 /* COL == -2 means, at the first non space char in LINE */
1374 FILE *fp;
1375 int c, ccol, cline = 1;
1376 int current_line_col = 0;
1377 int first_non_space = 0;
1378 char *base;
1380 if (!(fp = fopen (filename, "r")))
1381 fatal ("Can't open file - java_display_line_col");
1383 while (cline != line)
1385 c = getc (fp);
1386 if (c < 0)
1388 static char msg[] = "<<file too short - unexpected EOF>>";
1389 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1390 goto have_line;
1392 if (java_is_eol (fp, c))
1393 cline++;
1396 /* Gather the chars of the current line in a buffer */
1397 for (;;)
1399 c = getc (fp);
1400 if (c < 0 || java_is_eol (fp, c))
1401 break;
1402 if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1403 first_non_space = current_line_col;
1404 obstack_1grow (&temporary_obstack, c);
1405 current_line_col++;
1407 have_line:
1409 obstack_1grow (&temporary_obstack, '\n');
1411 if (col == -1)
1413 col = current_line_col;
1414 first_non_space = 0;
1416 else if (col == -2)
1417 col = first_non_space;
1418 else
1419 first_non_space = 0;
1421 /* Place the '^' a the right position */
1422 base = obstack_base (&temporary_obstack);
1423 for (ccol = 1; ccol <= col; ccol++)
1425 /* Compute \t when reaching first_non_space */
1426 char c = (first_non_space ?
1427 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1428 obstack_1grow (&temporary_obstack, c);
1430 obstack_grow0 (&temporary_obstack, "^", 1);
1432 fclose (fp);
1433 return obstack_finish (&temporary_obstack);
1434 #endif