[official-gcc.git] / gcc / java / lex.c
bloba54688b7f4428c50e11e4a2a85c0ed97866d2630
1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded characters
28 and returns a token for everything found but comments, white spaces
29 and line terminators. When necessary, it also fills the java_lval
30 (yylval) union. It's implemented to be called by a re-entrant parser
31 generated by Bison.
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
37 #include <stdio.h>
38 #include <string.h>
39 #include <setjmp.h>
41 #ifdef JAVA_LEX_DEBUG
42 #include <ctype.h>
43 #endif
45 #ifdef inline /* javaop.h redefines inline as static */
46 #undef inline
47 #endif
48 #include "keyword.h"
50 #ifndef SEEK_SET
51 #include <unistd.h>
52 #endif
54 #ifndef JC1_LITE
55 extern struct obstack *expression_obstack;
56 #endif
58 void
59 java_init_lex ()
61 int java_lang_imported = 0;
63 #ifndef JC1_LITE
64 if (!java_lang_imported)
66 tree node = build_tree_list
67 (build_expr_wfl (get_identifier ("java.lang"), NULL, 0, 0), NULL_TREE);
68 read_import_dir (TREE_PURPOSE (node));
69 TREE_CHAIN (node) = ctxp->import_demand_list;
70 ctxp->import_demand_list = node;
71 java_lang_imported = 1;
74 if (!wfl_operator)
75 wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
76 if (!label_id)
77 label_id = get_identifier ("$L");
78 if (!wfl_append)
79 wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
80 if (!wfl_string_buffer)
81 wfl_string_buffer =
82 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
83 if (!wfl_to_string)
84 wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
86 ctxp->static_initialized = ctxp->non_static_initialized =
87 ctxp->incomplete_class = NULL_TREE;
89 bzero (ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
90 classpath = NULL;
91 bzero (current_jcf, sizeof (JCF));
92 ctxp->current_parsed_class = NULL;
93 ctxp->package = NULL_TREE;
94 #endif
96 ctxp->filename = input_filename;
97 ctxp->lineno = lineno = 0;
98 ctxp->p_line = NULL;
99 ctxp->c_line = NULL;
100 ctxp->unget_utf8_value = 0;
101 ctxp->minus_seen = 0;
102 ctxp->java_error_flag = 0;
105 static char *
106 java_sprint_unicode (line, i)
107 struct java_line *line;
108 int i;
110 static char buffer [10];
111 if (line->unicode_escape_p [i] || line->line [i] > 128)
112 sprintf (buffer, "\\u%04x", line->line [i]);
113 else
115 buffer [0] = line->line [i];
116 buffer [1] = '\0';
118 return buffer;
121 static unicode_t
122 java_sneak_unicode ()
124 return (ctxp->c_line->line [ctxp->c_line->current]);
127 static void
128 java_unget_unicode (c)
129 unicode_t c;
131 if (!ctxp->c_line->current)
132 fatal ("can't unget unicode - java_unget_unicode");
133 ctxp->c_line->current--;
134 ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
137 void
138 java_allocate_new_line ()
140 int i;
141 unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
142 char ahead_escape_p = (ctxp->c_line ?
143 ctxp->c_line->unicode_escape_ahead_p : 0);
145 if (ctxp->c_line && !ctxp->c_line->white_space_only)
147 if (ctxp->p_line)
149 free (ctxp->p_line->unicode_escape_p);
150 free (ctxp->p_line->line);
151 free (ctxp->p_line);
153 ctxp->p_line = ctxp->c_line;
154 ctxp->c_line = NULL; /* Reallocated */
157 if (!ctxp->c_line)
159 ctxp->c_line = (struct java_line *)malloc (sizeof (struct java_line));
160 ctxp->c_line->max = JAVA_LINE_MAX;
161 ctxp->c_line->line = (unicode_t *)malloc
162 (sizeof (unicode_t)*ctxp->c_line->max);
163 ctxp->c_line->unicode_escape_p =
164 (char *)malloc (sizeof (char)*ctxp->c_line->max);
165 ctxp->c_line->white_space_only = 0;
168 ctxp->c_line->line [0] = ctxp->c_line->size = 0;
169 ctxp->c_line->char_col = ctxp->c_line->current = 0;
170 if (ahead)
172 ctxp->c_line->line [ctxp->c_line->size] = ahead;
173 ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
174 ctxp->c_line->size++;
176 ctxp->c_line->ahead [0] = 0;
177 ctxp->c_line->unicode_escape_ahead_p = 0;
178 ctxp->c_line->lineno = ++lineno;
179 ctxp->c_line->white_space_only = 1;
182 static unicode_t
183 java_read_char ()
185 int c;
186 int c1, c2;
188 if (ctxp->unget_utf8_value)
190 int to_return = ctxp->unget_utf8_value;
191 ctxp->unget_utf8_value = 0;
192 return (to_return);
195 c = GETC ();
197 if (c < 128)
198 return (unicode_t)c;
199 if (c == EOF)
200 return UEOF;
201 else
203 if (c & 0xe0 == 0xc0)
205 c1 = GETC ();
206 if (c1 & 0xc0 == 0x80)
207 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
209 else if (c & 0xf0 == 0xe0)
211 c1 = GETC ();
212 if (c1 & 0xc0 == 0x80)
214 c2 = GETC ();
215 if (c2 & 0xc0 == 0x80)
216 return (unicode_t)(((c & 0xf) << 12) +
217 (( c1 & 0x3f) << 6) + (c2 & 0x3f));
220 java_lex_error ("Bad utf8 encoding", 0);
224 static void
225 java_store_unicode (l, c, unicode_escape_p)
226 struct java_line *l;
227 unicode_t c;
228 int unicode_escape_p;
230 if (l->size == l->max)
232 l->max += JAVA_LINE_MAX;
233 l->line = (unicode_t *)realloc (l->line, sizeof (unicode_t)*l->max);
234 l->unicode_escape_p = (char *)realloc (l->unicode_escape_p,
235 sizeof (char)*l->max);
237 l->line [l->size] = c;
238 l->unicode_escape_p [l->size++] = unicode_escape_p;
241 static unicode_t
242 java_read_unicode (term_context, unicode_escape_p)
243 int term_context;
244 int *unicode_escape_p;
246 unicode_t c;
247 long i, base;
249 c = java_read_char ();
250 *unicode_escape_p = 0;
252 if (c != '\\')
253 return ((term_context ? c :
254 java_lineterminator (c) ? '\n' : (unicode_t)c));
256 /* Count the number of preceeding '\' */
257 for (base = ftell (finput), i = base-2; c == '\\';)
259 fseek (finput, i--, SEEK_SET);
260 c = java_read_char (); /* Will fail if reading utf8 stream. FIXME */
262 fseek (finput, base, SEEK_SET);
263 if ((base-i-3)%2 == 0) /* If odd number of \ seen */
265 c = java_read_char ();
266 if (c == 'u')
268 unsigned short unicode = 0;
269 int shift = 12;
270 /* Next should be 4 hex digits, otherwise it's an error.
271 The hex value is converted into the unicode, pushed into
272 the Unicode stream. */
273 for (shift = 12; shift >= 0; shift -= 4)
275 if ((c = java_read_char ()) == UEOF)
276 return UEOF;
277 if (c >= '0' && c <= '9')
278 unicode |= (unicode_t)((c-'0') << shift);
279 else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
280 unicode |= (unicode_t)(10+(c | 0x20)-'a' << shift);
281 else
282 java_lex_error
283 ("Non hex digit in Unicode escape sequence", 0);
285 *unicode_escape_p = 1;
286 return (term_context ? unicode :
287 (java_lineterminator (c) ? '\n' : unicode));
289 UNGETC (c);
291 return (unicode_t)'\\';
294 static unicode_t
295 java_get_unicode ()
297 /* It's time to read a line when... */
298 if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
300 unicode_t c;
301 java_allocate_new_line ();
302 if (ctxp->c_line->line[0] != '\n')
303 for (;;)
305 int unicode_escape_p;
306 c = java_read_unicode (0, &unicode_escape_p);
307 java_store_unicode (ctxp->c_line, c, unicode_escape_p);
308 if (ctxp->c_line->white_space_only
309 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
310 ctxp->c_line->white_space_only = 0;
311 if ((c == '\n') || (c == UEOF))
312 break;
315 ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
316 JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
317 return ctxp->c_line->line [ctxp->c_line->current++];
320 static int
321 java_lineterminator (c)
322 unicode_t c;
324 int unicode_escape_p;
325 if (c == '\n') /* CR */
327 if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
329 ctxp->c_line->ahead [0] = c;
330 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
332 return 1;
334 else if (c == '\r') /* LF */
336 if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
338 ctxp->c_line->ahead [0] = c;
339 ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
341 return 1;
343 else
344 return 0;
347 /* Parse the end of a C style comment */
348 static void
349 java_parse_end_comment ()
351 unicode_t c;
353 for (c = java_get_unicode ();; c = java_get_unicode ())
355 switch (c)
357 case UEOF:
358 java_lex_error ("Comment not terminated at end of input", 0);
359 case '*':
360 switch (c = java_get_unicode ())
362 case UEOF:
363 java_lex_error ("Comment not terminated at end of input", 0);
364 case '/':
365 return;
366 case '*': /* reparse only '*' */
367 java_unget_unicode (c);
373 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
374 will return a wrong result. */
375 static int
376 java_letter_or_digit_p (c)
377 unicode_t c;
379 return _JAVA_LETTER_OR_DIGIT_P (c);
382 static unicode_t
383 java_parse_escape_sequence ()
385 unicode_t char_lit;
386 unicode_t c;
388 switch (c = java_get_unicode ())
390 case 'b':
391 return (unicode_t)0x8;
392 case 't':
393 return (unicode_t)0x9;
394 case 'n':
395 return (unicode_t)0xa;
396 case 'f':
397 return (unicode_t)0xc;
398 case 'r':
399 return (unicode_t)0xd;
400 case '"':
401 return (unicode_t)0x22;
402 case '\'':
403 return (unicode_t)0x27;
404 case '\\':
405 return (unicode_t)0x5c;
406 case '0': case '1': case '2': case '3': case '4':
407 case '5': case '6': case '7': case '8': case '9':
409 int octal_escape[3];
410 int octal_escape_index = 0;
412 for (; octal_escape_index < 3 && RANGE (c, '0', '9');
413 c = java_get_unicode ())
414 octal_escape [octal_escape_index++] = c;
416 java_unget_unicode (c);
418 if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
420 java_lex_error ("Literal octal escape out of range", 0);
421 return JAVA_CHAR_ERROR;
423 else
425 int i, shift;
426 for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
427 i < octal_escape_index; i++, shift -= 3)
428 char_lit |= (octal_escape [i] - '0') << shift;
430 return (char_lit);
432 break;
434 case '\n':
435 return '\n'; /* ULT, caught latter as a specific error */
436 default:
437 java_lex_error ("Illegal character in escape sequence", 0);
438 return JAVA_CHAR_ERROR;
443 #ifdef JC1_LITE
444 yylex (java_lval)
445 #else
446 java_lex (java_lval)
447 #endif
448 YYSTYPE *java_lval;
450 unicode_t c, first_unicode;
451 int line_terminator;
452 int ascii_index, all_ascii;
453 char *string;
455 /* Translation of the Unicode escape in the raw stream of Unicode
456 characters. Takes care of line terminator. */
457 step1:
458 /* Skip white spaces: SP, TAB and FF or ULT */
459 for (c = java_get_unicode ();
460 c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
461 if (c == '\n')
463 ctxp->elc.line = ctxp->c_line->lineno;
464 ctxp->elc.col = ctxp->c_line->char_col-2;
467 ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
469 if (c == 0x1a) /* CTRL-Z */
471 if ((c = java_get_unicode ()) == UEOF)
472 return 0; /* Ok here */
473 else
474 java_unget_unicode (c); /* Caught latter at the end the function */
476 /* Handle EOF here */
477 if (c == UEOF) /* Should probably do something here... */
478 return 0;
480 /* Take care of eventual comments. */
481 if (c == '/')
483 switch (c = java_get_unicode ())
485 case '/':
486 for (c = java_get_unicode ();;c = java_get_unicode ())
488 if (c == UEOF)
489 java_lex_error ("Comment not terminated at end of input", 0);
490 if (c == '\n') /* ULT */
491 goto step1;
493 break;
495 case '*':
496 if ((c = java_get_unicode ()) == '*')
498 if ((c = java_get_unicode ()) == '/')
499 goto step1; /* Empy documentation comment */
501 else
502 /* Parsing the documentation section. We're looking
503 for the @depracated pseudo keyword. the @deprecated
504 tag must be at the beginning of a doc comment line
505 (ignoring white space and any * character) */
508 int valid_tag = 0, seen_star;
510 while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
512 switch (c)
514 case '*':
515 seen_star = 1;
516 break;
517 case '\n': /* ULT */
518 valid_tag = 1;
519 break;
520 default:
521 seen_star = 0;
523 c = java_get_unicode();
526 if (c == UEOF)
527 java_lex_error
528 ("Comment not terminated at end of input", 0);
530 if (seen_star && (c == '/'))
531 goto step1; /* End of documentation */
533 if (valid_tag && (c == '@'))
535 char deprecated [10];
536 int deprecated_index = 0;
538 for (deprecated_index = 0, c = java_get_unicode ();
539 deprecated_index < 10 && c != UEOF;
540 c = java_get_unicode ())
541 deprecated [deprecated_index++] = c;
543 if (c == UEOF)
544 java_lex_error
545 ("Comment not terminated at end of input", 0);
547 java_unget_unicode (c);
548 deprecated [deprecated_index] = '\0';
549 if (!strcmp (deprecated, "deprecated"))
551 /* Set global flag to be checked by class. FIXME */
552 warning ("deprecated implementation found");
557 else
558 java_unget_unicode (c);
560 java_parse_end_comment ();
561 goto step1;
562 break;
563 default:
564 java_unget_unicode (c);
565 c = '/';
566 break;
570 ctxp->elc.line = ctxp->c_line->lineno;
571 ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
572 if (ctxp->elc.col < 0)
573 fatal ("ctxp->elc.col < 0 - java_lex");
575 /* Numeric literals */
576 if (JAVA_ASCII_DIGIT (c) || (c == '.'))
578 unicode_t peep;
579 /* This section of code is borrowed from gcc/c-lex.c */
580 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
581 int parts[TOTAL_PARTS];
582 HOST_WIDE_INT high, low;
583 /* End borrowed section */
584 char literal_token [256];
585 int literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
586 int i;
587 int number_beginning = ctxp->c_line->current;
589 /* We might have a . separator instead of a FP like .[0-9]* */
590 if (c == '.')
592 unicode_t peep = java_sneak_unicode ();
594 if (!JAVA_ASCII_DIGIT (peep))
596 JAVA_LEX_SEP('.');
597 BUILD_OPERATOR (DOT_TK);
601 for (i = 0; i < TOTAL_PARTS; i++)
602 parts [i] = 0;
604 if (c == '0')
606 c = java_get_unicode ();
607 if (c == 'x' || c == 'X')
609 radix = 16;
610 c = java_get_unicode ();
612 else if (JAVA_ASCII_DIGIT (c))
613 radix = 8;
614 else if (c == '.')
616 /* Push the '.' back and prepare for a FP parsing... */
617 java_unget_unicode (c);
618 c = '0';
620 else
622 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
623 JAVA_LEX_LIT ("0", 10);
624 switch (c)
626 case 'L': case 'l':
627 SET_LVAL_NODE_TYPE (integer_zero_node, long_type_node);
628 return (INT_LIT_TK);
629 case 'f': case 'F':
630 SET_LVAL_NODE_TYPE (build_real (float_type_node, dconst0),
631 float_type_node);
632 return (FP_LIT_TK);
633 case 'd': case 'D':
634 SET_LVAL_NODE_TYPE (build_real (double_type_node, dconst0),
635 double_type_node);
636 return (FP_LIT_TK);
637 default:
638 java_unget_unicode (c);
639 SET_LVAL_NODE_TYPE (integer_zero_node, int_type_node);
640 return (INT_LIT_TK);
644 /* Parse the first part of the literal, until we find something
645 which is not a number. */
646 while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
647 (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
648 (radix == 8 && JAVA_ASCII_OCTDIGIT (c)))
650 /* We store in a string (in case it turns out to be a FP) and in
651 PARTS if we have to process a integer literal. */
652 int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
653 int count;
655 literal_token [literal_index++] = c;
656 /* This section of code if borrowed from gcc/c-lex.c */
657 for (count = 0; count < TOTAL_PARTS; count++)
659 parts[count] *= radix;
660 if (count)
662 parts[count] += (parts[count-1] >> HOST_BITS_PER_CHAR);
663 parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
665 else
666 parts[0] += numeric;
668 if (parts [TOTAL_PARTS-1] != 0)
669 overflow = 1;
670 /* End borrowed section. */
671 c = java_get_unicode ();
674 /* If we have something from the FP char set but not a digit, parse
675 a FP literal. */
676 if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
678 int stage = 0;
679 int seen_digit = (literal_index ? 1 : 0);
680 int seen_exponent = 0;
681 int fflag = 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
682 double unless specified. */
683 if (radix != 10)
684 java_lex_error ("Can't express non-decimal FP literal", 0);
686 for (;;)
688 if (c == '.')
690 if (stage < 1)
692 stage = 1;
693 literal_token [literal_index++ ] = c;
694 c = java_get_unicode ();
696 else
697 java_lex_error ("Invalid character in FP literal", 0);
700 if (c == 'e' || c == 'E')
702 if (stage < 2)
704 /* {E,e} must have seen at list a digit */
705 if (!seen_digit)
706 java_lex_error ("Invalid FP literal", 0);
707 seen_digit = 0;
708 seen_exponent = 1;
709 stage = 2;
710 literal_token [literal_index++] = c;
711 c = java_get_unicode ();
713 else
714 java_lex_error ("Invalid character in FP literal", 0);
716 if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
718 fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
719 stage = 4; /* So we fall through */
722 if ((c=='-' || c =='+') && stage < 3)
724 stage = 3;
725 literal_token [literal_index++] = c;
726 c = java_get_unicode ();
729 if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
730 (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
731 (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
732 (stage == 3 && JAVA_ASCII_DIGIT (c)))
734 if (JAVA_ASCII_DIGIT (c))
735 seen_digit = 1;
736 literal_token [literal_index++ ] = c;
737 c = java_get_unicode ();
739 else
741 jmp_buf handler;
742 REAL_VALUE_TYPE value;
743 #ifndef JC1_LITE
744 tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
745 #endif
747 if (stage != 4) /* Don't push back fF/dD */
748 java_unget_unicode (c);
750 /* An exponent (if any) must have seen a digit. */
751 if (seen_exponent && !seen_digit)
752 java_lex_error ("Invalid FP literal", 0);
754 literal_token [literal_index] = '\0';
755 JAVA_LEX_LIT (literal_token, radix);
757 if (setjmp (handler))
759 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
760 value = DCONST0;
762 else
764 SET_FLOAT_HANDLER (handler);
765 SET_REAL_VALUE_ATOF
766 (value, REAL_VALUE_ATOF (literal_token,
767 TYPE_MODE (type)));
769 if (REAL_VALUE_ISINF (value))
770 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
772 if (REAL_VALUE_ISNAN (value))
773 JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
775 SET_LVAL_NODE_TYPE (build_real (type, value), type);
776 SET_FLOAT_HANDLER (NULL_PTR);
777 return FP_LIT_TK;
781 } /* JAVA_ASCCI_FPCHAR (c) */
783 /* Here we get back to converting the integral literal. */
784 if (c == 'L' || c == 'l')
785 long_suffix = 1;
786 else if (radix == 16 && JAVA_ASCII_LETTER (c))
787 java_lex_error ("Digit out of range in hexadecimal literal", 0);
788 else if (radix == 8 && JAVA_ASCII_DIGIT (c))
789 java_lex_error ("Digit out of range in octal literal", 0);
790 else if (radix == 16 && !literal_index)
791 java_lex_error ("No digit specified for hexadecimal literal", 0);
792 else
793 java_unget_unicode (c);
795 #ifdef JAVA_LEX_DEBUG
796 literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
797 JAVA_LEX_LIT (literal_token, radix);
798 #endif
799 /* This section of code is borrowed from gcc/c-lex.c */
800 if (!overflow)
802 bytes = GET_TYPE_PRECISION (long_type_node);
803 for (i = bytes; i < TOTAL_PARTS; i++)
804 if (parts [i])
806 overflow = 1;
807 break;
810 high = low = 0;
811 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
813 high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
814 / HOST_BITS_PER_CHAR)]
815 << (i * HOST_BITS_PER_CHAR));
816 low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
818 /* End borrowed section. */
820 /* Range checking */
821 if (long_suffix)
823 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
824 9223372036854775807L is the biggest `long' literal that can be
825 expressed using a 10 radix. For other radixes, everything that
826 fits withing 64 bits is OK. */
827 int hb = (high >> 31);
828 if (overflow || (hb && low && radix == 10) ||
829 (hb && high & 0x7fffffff && radix == 10) ||
830 (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
831 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
833 else
835 /* 2147483648 is valid if operand of a '-'. Otherwise,
836 2147483647 is the biggest `int' literal that can be
837 expressed using a 10 radix. For other radixes, everything
838 that fits within 32 bits is OK. */
839 int hb = (low >> 31) & 0x1;
840 if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
841 (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
842 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
844 ctxp->minus_seen = 0;
845 SET_LVAL_NODE_TYPE (build_int_2 (low, high),
846 (long_suffix ? long_type_node : int_type_node));
847 return INT_LIT_TK;
850 ctxp->minus_seen = 0;
851 /* Character literals */
852 if (c == '\'')
854 unicode_t char_lit;
855 if ((c = java_get_unicode ()) == '\\')
856 char_lit = java_parse_escape_sequence ();
857 else
858 char_lit = c;
860 c = java_get_unicode ();
862 if ((c == '\n') || (c == UEOF))
863 java_lex_error ("Character literal not terminated at end of line", 0);
864 if (c != '\'')
865 java_lex_error ("Syntax error in character literal", 0);
867 if (c == JAVA_CHAR_ERROR)
868 char_lit = 0; /* We silently convert it to zero */
870 JAVA_LEX_CHAR_LIT (char_lit);
871 SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
872 return CHAR_LIT_TK;
875 /* String literals */
876 if (c == '"')
878 int no_error;
879 char *string;
881 for (no_error = 1, c = java_get_unicode ();
882 c != '"' && c != '\n'; c = java_get_unicode ())
884 if (c == '\\')
885 c = java_parse_escape_sequence ();
886 no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
887 if (c)
888 java_unicode_2_utf8 (c);
890 if (c == '\n' || c == UEOF) /* ULT */
892 lineno--; /* Refer to the line the terminator was seen */
893 java_lex_error ("String not terminated at end of line.", 0);
894 lineno++;
897 obstack_1grow (&temporary_obstack, '\0');
898 string = obstack_finish (&temporary_obstack);
899 #ifndef JC1_LITE
900 if (!no_error || (c != '"'))
901 java_lval->node = error_mark_node; /* Requires futher testing FIXME */
902 else
904 tree s = make_node (STRING_CST);
905 TREE_STRING_LENGTH (s) = strlen (string);
906 TREE_STRING_POINTER (s) =
907 obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
908 strcpy (TREE_STRING_POINTER (s), string);
909 java_lval->node = s;
911 #endif
912 return STRING_LIT_TK;
915 /* Separator */
916 switch (c)
918 case '(':
919 JAVA_LEX_SEP (c);
920 BUILD_OPERATOR (OP_TK);
921 case ')':
922 JAVA_LEX_SEP (c);
923 return CP_TK;
924 case '{':
925 JAVA_LEX_SEP (c);
926 if (ctxp->ccb_indent == 1)
927 ctxp->first_ccb_indent1 = lineno;
928 ctxp->ccb_indent++;
929 return OCB_TK;
930 case '}':
931 JAVA_LEX_SEP (c);
932 ctxp->ccb_indent--;
933 if (ctxp->ccb_indent == 1)
934 ctxp->last_ccb_indent1 = lineno;
935 return CCB_TK;
936 case '[':
937 JAVA_LEX_SEP (c);
938 BUILD_OPERATOR (OSB_TK);
939 case ']':
940 JAVA_LEX_SEP (c);
941 return CSB_TK;
942 case ';':
943 JAVA_LEX_SEP (c);
944 return SC_TK;
945 case ',':
946 JAVA_LEX_SEP (c);
947 return C_TK;
948 case '.':
949 JAVA_LEX_SEP (c);
950 BUILD_OPERATOR (DOT_TK);
951 /* return DOT_TK; */
954 /* Operators */
955 switch (c)
957 case '=':
958 if ((c = java_get_unicode ()) == '=')
960 BUILD_OPERATOR (EQ_TK);
962 else
964 /* Equals is used in two different locations. In the
965 variable_declarator: rule, it has to be seen as '=' as opposed
966 to being seen as an ordinary assignment operator in
967 assignment_operators: rule. */
968 java_unget_unicode (c);
969 BUILD_OPERATOR (ASSIGN_TK);
972 case '>':
973 switch ((c = java_get_unicode ()))
975 case '=':
976 BUILD_OPERATOR (GTE_TK);
977 case '>':
978 switch ((c = java_get_unicode ()))
980 case '>':
981 if ((c = java_get_unicode ()) == '=')
983 BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
985 else
987 java_unget_unicode (c);
988 BUILD_OPERATOR (ZRS_TK);
990 case '=':
991 BUILD_OPERATOR2 (SRS_ASSIGN_TK);
992 default:
993 java_unget_unicode (c);
994 BUILD_OPERATOR (SRS_TK);
996 default:
997 java_unget_unicode (c);
998 BUILD_OPERATOR (GT_TK);
1001 case '<':
1002 switch ((c = java_get_unicode ()))
1004 case '=':
1005 BUILD_OPERATOR (LTE_TK);
1006 case '<':
1007 if ((c = java_get_unicode ()) == '=')
1009 BUILD_OPERATOR2 (LS_ASSIGN_TK);
1011 else
1013 java_unget_unicode (c);
1014 BUILD_OPERATOR (LS_TK);
1016 default:
1017 java_unget_unicode (c);
1018 BUILD_OPERATOR (LT_TK);
1021 case '&':
1022 switch ((c = java_get_unicode ()))
1024 case '&':
1025 BUILD_OPERATOR (BOOL_AND_TK);
1026 case '=':
1027 BUILD_OPERATOR2 (AND_ASSIGN_TK);
1028 default:
1029 java_unget_unicode (c);
1030 BUILD_OPERATOR (AND_TK);
1033 case '|':
1034 switch ((c = java_get_unicode ()))
1036 case '|':
1037 BUILD_OPERATOR (BOOL_OR_TK);
1038 case '=':
1039 BUILD_OPERATOR2 (OR_ASSIGN_TK);
1040 default:
1041 java_unget_unicode (c);
1042 BUILD_OPERATOR (OR_TK);
1045 case '+':
1046 switch ((c = java_get_unicode ()))
1048 case '+':
1049 BUILD_OPERATOR (INCR_TK);
1050 case '=':
1051 BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1052 default:
1053 java_unget_unicode (c);
1054 BUILD_OPERATOR (PLUS_TK);
1057 case '-':
1058 switch ((c = java_get_unicode ()))
1060 case '-':
1061 BUILD_OPERATOR (DECR_TK);
1062 case '=':
1063 BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1064 default:
1065 java_unget_unicode (c);
1066 ctxp->minus_seen = 1;
1067 BUILD_OPERATOR (MINUS_TK);
1070 case '*':
1071 if ((c = java_get_unicode ()) == '=')
1073 BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1075 else
1077 java_unget_unicode (c);
1078 BUILD_OPERATOR (MULT_TK);
1081 case '/':
1082 if ((c = java_get_unicode ()) == '=')
1084 BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1086 else
1088 java_unget_unicode (c);
1089 BUILD_OPERATOR (DIV_TK);
1092 case '^':
1093 if ((c = java_get_unicode ()) == '=')
1095 BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1097 else
1099 java_unget_unicode (c);
1100 BUILD_OPERATOR (XOR_TK);
1103 case '%':
1104 if ((c = java_get_unicode ()) == '=')
1106 BUILD_OPERATOR2 (REM_ASSIGN_TK);
1108 else
1110 java_unget_unicode (c);
1111 BUILD_OPERATOR (REM_TK);
1114 case '!':
1115 if ((c = java_get_unicode()) == '=')
1117 BUILD_OPERATOR (NEQ_TK);
1119 else
1121 java_unget_unicode (c);
1122 BUILD_OPERATOR (NEG_TK);
1125 case '?':
1126 JAVA_LEX_OP ("?");
1127 BUILD_OPERATOR (REL_QM_TK);
1128 case ':':
1129 JAVA_LEX_OP (":");
1130 BUILD_OPERATOR (REL_CL_TK);
1131 case '~':
1132 BUILD_OPERATOR (NOT_TK);
1135 /* Keyword, boolean literal or null literal */
1136 for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1137 JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1139 java_unicode_2_utf8 (c);
1140 if (all_ascii && c >= 128)
1141 all_ascii = 0;
1142 ascii_index++;
1145 obstack_1grow (&temporary_obstack, '\0');
1146 string = obstack_finish (&temporary_obstack);
1147 java_unget_unicode (c);
1149 /* If we have something all ascii, we consider a keyword, a boolean
1150 literal, a null literal or an all ASCII identifier. Otherwise,
1151 this is an identifier (possibly not respecting formation rule). */
1152 if (all_ascii)
1154 struct java_keyword *kw;
1155 if ((kw=java_keyword (string, ascii_index)))
1157 JAVA_LEX_KW (string);
1158 switch (kw->token)
1160 case PUBLIC_TK: case PROTECTED_TK: case STATIC_TK:
1161 case ABSTRACT_TK: case FINAL_TK: case NATIVE_TK:
1162 case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1163 case PRIVATE_TK:
1164 SET_MODIFIER_CTX (kw->token);
1165 return MODIFIER_TK;
1166 case FLOAT_TK:
1167 SET_LVAL_NODE (float_type_node);
1168 return FP_TK;
1169 case DOUBLE_TK:
1170 SET_LVAL_NODE (double_type_node);
1171 return FP_TK;
1172 case BOOLEAN_TK:
1173 SET_LVAL_NODE (boolean_type_node);
1174 return BOOLEAN_TK;
1175 case BYTE_TK:
1176 SET_LVAL_NODE (byte_type_node);
1177 return INTEGRAL_TK;
1178 case SHORT_TK:
1179 SET_LVAL_NODE (short_type_node);
1180 return INTEGRAL_TK;
1181 case INT_TK:
1182 SET_LVAL_NODE (int_type_node);
1183 return INTEGRAL_TK;
1184 case LONG_TK:
1185 SET_LVAL_NODE (long_type_node);
1186 return INTEGRAL_TK;
1187 case CHAR_TK:
1188 SET_LVAL_NODE (char_type_node);
1189 return INTEGRAL_TK;
1191 /* Keyword based literals */
1192 case TRUE_TK:
1193 case FALSE_TK:
1194 SET_LVAL_NODE ((kw->token == TRUE_TK ?
1195 boolean_true_node : boolean_false_node));
1196 return BOOL_LIT_TK;
1197 case NULL_TK:
1198 SET_LVAL_NODE (null_pointer_node);
1199 return NULL_TK;
1201 /* Some keyword we want to retain information on the location
1202 they where found */
1203 case CASE_TK:
1204 case DEFAULT_TK:
1205 case SUPER_TK:
1206 case THIS_TK:
1207 case RETURN_TK:
1208 case BREAK_TK:
1209 case CONTINUE_TK:
1210 case TRY_TK:
1211 case CATCH_TK:
1212 BUILD_OPERATOR (kw->token);
1214 default:
1215 return kw->token;
1220 /* We may have and ID here */
1221 if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1223 JAVA_LEX_ID (string);
1224 java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1225 return ID_TK;
1228 /* Everything else is an invalid character in the input */
1230 char lex_error_buffer [128];
1231 sprintf (lex_error_buffer, "Invalid character '%s' in input",
1232 java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1233 java_lex_error (lex_error_buffer, 1);
1235 return 0;
1238 static void
1239 java_unicode_2_utf8 (unicode)
1240 unicode_t unicode;
1242 if (RANGE (unicode, 0x01, 0x7f))
1243 obstack_1grow (&temporary_obstack, (char)unicode);
1244 else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1246 obstack_1grow (&temporary_obstack,
1247 (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1248 obstack_1grow (&temporary_obstack,
1249 (unsigned char)(0x80 | (unicode & 0x3f)));
1251 else /* Range 0x800-0xffff */
1253 obstack_1grow (&temporary_obstack,
1254 (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1255 obstack_1grow (&temporary_obstack,
1256 (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1257 obstack_1grow (&temporary_obstack,
1258 (unsigned char)(0x80 | (unicode & 0x003f) >> 12));
1262 #ifndef JC1_LITE
1263 static tree
1264 build_wfl_node (node)
1265 tree node;
1267 return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1269 #endif
1271 static void
1272 java_lex_error (msg, forward)
1273 char *msg;
1274 int forward;
1276 #ifndef JC1_LITE
1277 ctxp->elc.line = ctxp->c_line->lineno;
1278 ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1280 /* Might be caught in the middle of some error report */
1281 ctxp->java_error_flag = 0;
1282 java_error (NULL);
1283 java_error (msg);
1284 #endif
1287 static int
1288 java_is_eol (fp, c)
1289 FILE *fp;
1290 int c;
1292 int next;
1293 switch (c)
1295 case '\n':
1296 next = getc (fp);
1297 if (next != '\r' && next != EOF)
1298 ungetc (next, fp);
1299 return 1;
1300 case '\r':
1301 return 1;
1302 default:
1303 return 0;
1307 char *
1308 java_get_line_col (filename, line, col)
1309 char *filename;
1310 int line, col;
1312 #ifdef JC1_LITE
1313 return 0;
1314 #else
1315 /* Dumb implementation. Doesn't try to cache or optimize things. */
1316 /* First line of the file is line 1, first column is 1 */
1318 /* COL <= 0 means, at the CR/LF in LINE */
1320 FILE *fp;
1321 int c, ccol, cline = 1;
1322 int current_line_col = 0;
1324 if (!(fp = fopen (filename, "r")))
1325 fatal ("Can't open file - java_display_line_col");
1327 while (cline != line)
1329 c = getc (fp);
1330 if (c < 0)
1332 static char msg[] = "<<file too short - unexpected EOF>>";
1333 obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1334 goto have_line;
1336 if (java_is_eol (fp, c))
1337 cline++;
1340 /* Gather the chars of the current line in a buffer */
1341 for (;;)
1343 c = getc (fp);
1344 if (c < 0 || java_is_eol (fp, c))
1345 break;
1346 obstack_1grow (&temporary_obstack, c);
1347 current_line_col++;
1349 have_line:
1351 obstack_1grow (&temporary_obstack, '\n');
1353 if (col < 0)
1354 col = current_line_col;
1356 /* Place the '^' a the right position */
1357 for (ccol = 1; ccol <= col; ccol++)
1358 obstack_1grow (&temporary_obstack, ' ');
1359 obstack_grow0 (&temporary_obstack, "^", 1);
1361 fclose (fp);
1362 return obstack_finish (&temporary_obstack);
1363 #endif