1 /* Language lexer for the GNU compiler for the Java(TM) language.
2 Copyright (C) 1997, 1998 Free Software Foundation, Inc.
3 Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
5 This file is part of GNU CC.
7 GNU CC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU CC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU CC; see the file COPYING. If not, write to
19 the Free Software Foundation, 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA.
22 Java and all Java-based marks are trademarks or registered trademarks
23 of Sun Microsystems, Inc. in the United States and other countries.
24 The Free Software Foundation is independent of Sun Microsystems, Inc. */
26 /* It defines java_lex (yylex) that reads a Java ASCII source file
27 possibly containing Unicode escape sequence or utf8 encoded characters
28 and returns a token for everything found but comments, white spaces
29 and line terminators. When necessary, it also fills the java_lval
30 (yylval) union. It's implemented to be called by a re-entrant parser
33 The lexical analysis conforms to the Java grammar described in "The
34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
45 #ifdef inline /* javaop.h redefines inline as static */
55 extern struct obstack
*expression_obstack
;
61 int java_lang_imported
= 0;
64 if (!java_lang_imported
)
66 tree node
= build_tree_list
67 (build_expr_wfl (get_identifier ("java.lang"), NULL
, 0, 0), NULL_TREE
);
68 read_import_dir (TREE_PURPOSE (node
));
69 TREE_CHAIN (node
) = ctxp
->import_demand_list
;
70 ctxp
->import_demand_list
= node
;
71 java_lang_imported
= 1;
75 wfl_operator
= build_expr_wfl (NULL_TREE
, ctxp
->filename
, 0, 0);
77 label_id
= get_identifier ("$L");
79 wfl_append
= build_expr_wfl (get_identifier ("append"), NULL
, 0, 0);
80 if (!wfl_string_buffer
)
82 build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL
, 0, 0);
84 wfl_to_string
= build_expr_wfl (get_identifier ("toString"), NULL
, 0, 0);
86 ctxp
->static_initialized
= ctxp
->non_static_initialized
=
87 ctxp
->incomplete_class
= NULL_TREE
;
89 bzero (ctxp
->modifier_ctx
, 11*sizeof (ctxp
->modifier_ctx
[0]));
91 bzero (current_jcf
, sizeof (JCF
));
92 ctxp
->current_parsed_class
= NULL
;
93 ctxp
->package
= NULL_TREE
;
96 ctxp
->filename
= input_filename
;
97 ctxp
->lineno
= lineno
= 0;
100 ctxp
->unget_utf8_value
= 0;
101 ctxp
->minus_seen
= 0;
102 ctxp
->java_error_flag
= 0;
106 java_sprint_unicode (line
, i
)
107 struct java_line
*line
;
110 static char buffer
[10];
111 if (line
->unicode_escape_p
[i
] || line
->line
[i
] > 128)
112 sprintf (buffer
, "\\u%04x", line
->line
[i
]);
115 buffer
[0] = line
->line
[i
];
122 java_sneak_unicode ()
124 return (ctxp
->c_line
->line
[ctxp
->c_line
->current
]);
128 java_unget_unicode (c
)
131 if (!ctxp
->c_line
->current
)
132 fatal ("can't unget unicode - java_unget_unicode");
133 ctxp
->c_line
->current
--;
134 ctxp
->c_line
->char_col
-= JAVA_COLUMN_DELTA (0);
138 java_allocate_new_line ()
141 unicode_t ahead
= (ctxp
->c_line
? ctxp
->c_line
->ahead
[0] : '\0');
142 char ahead_escape_p
= (ctxp
->c_line
?
143 ctxp
->c_line
->unicode_escape_ahead_p
: 0);
145 if (ctxp
->c_line
&& !ctxp
->c_line
->white_space_only
)
149 free (ctxp
->p_line
->unicode_escape_p
);
150 free (ctxp
->p_line
->line
);
153 ctxp
->p_line
= ctxp
->c_line
;
154 ctxp
->c_line
= NULL
; /* Reallocated */
159 ctxp
->c_line
= (struct java_line
*)malloc (sizeof (struct java_line
));
160 ctxp
->c_line
->max
= JAVA_LINE_MAX
;
161 ctxp
->c_line
->line
= (unicode_t
*)malloc
162 (sizeof (unicode_t
)*ctxp
->c_line
->max
);
163 ctxp
->c_line
->unicode_escape_p
=
164 (char *)malloc (sizeof (char)*ctxp
->c_line
->max
);
165 ctxp
->c_line
->white_space_only
= 0;
168 ctxp
->c_line
->line
[0] = ctxp
->c_line
->size
= 0;
169 ctxp
->c_line
->char_col
= ctxp
->c_line
->current
= 0;
172 ctxp
->c_line
->line
[ctxp
->c_line
->size
] = ahead
;
173 ctxp
->c_line
->unicode_escape_p
[ctxp
->c_line
->size
] = ahead_escape_p
;
174 ctxp
->c_line
->size
++;
176 ctxp
->c_line
->ahead
[0] = 0;
177 ctxp
->c_line
->unicode_escape_ahead_p
= 0;
178 ctxp
->c_line
->lineno
= ++lineno
;
179 ctxp
->c_line
->white_space_only
= 1;
188 if (ctxp
->unget_utf8_value
)
190 int to_return
= ctxp
->unget_utf8_value
;
191 ctxp
->unget_utf8_value
= 0;
203 if (c
& 0xe0 == 0xc0)
206 if (c1
& 0xc0 == 0x80)
207 return (unicode_t
)(((c
&0x1f) << 6) + (c1
& 0x3f));
209 else if (c
& 0xf0 == 0xe0)
212 if (c1
& 0xc0 == 0x80)
215 if (c2
& 0xc0 == 0x80)
216 return (unicode_t
)(((c
& 0xf) << 12) +
217 (( c1
& 0x3f) << 6) + (c2
& 0x3f));
220 java_lex_error ("Bad utf8 encoding", 0);
225 java_store_unicode (l
, c
, unicode_escape_p
)
228 int unicode_escape_p
;
230 if (l
->size
== l
->max
)
232 l
->max
+= JAVA_LINE_MAX
;
233 l
->line
= (unicode_t
*)realloc (l
->line
, sizeof (unicode_t
)*l
->max
);
234 l
->unicode_escape_p
= (char *)realloc (l
->unicode_escape_p
,
235 sizeof (char)*l
->max
);
237 l
->line
[l
->size
] = c
;
238 l
->unicode_escape_p
[l
->size
++] = unicode_escape_p
;
242 java_read_unicode (term_context
, unicode_escape_p
)
244 int *unicode_escape_p
;
249 c
= java_read_char ();
250 *unicode_escape_p
= 0;
253 return ((term_context
? c
:
254 java_lineterminator (c
) ? '\n' : (unicode_t
)c
));
256 /* Count the number of preceeding '\' */
257 for (base
= ftell (finput
), i
= base
-2; c
== '\\';)
259 fseek (finput
, i
--, SEEK_SET
);
260 c
= java_read_char (); /* Will fail if reading utf8 stream. FIXME */
262 fseek (finput
, base
, SEEK_SET
);
263 if ((base
-i
-3)%2 == 0) /* If odd number of \ seen */
265 c
= java_read_char ();
268 unsigned short unicode
= 0;
270 /* Next should be 4 hex digits, otherwise it's an error.
271 The hex value is converted into the unicode, pushed into
272 the Unicode stream. */
273 for (shift
= 12; shift
>= 0; shift
-= 4)
275 if ((c
= java_read_char ()) == UEOF
)
277 if (c
>= '0' && c
<= '9')
278 unicode
|= (unicode_t
)((c
-'0') << shift
);
279 else if ((c
>= 'a' && c
<= 'f') || (c
>= 'A' && c
<= 'F'))
280 unicode
|= (unicode_t
)(10+(c
| 0x20)-'a' << shift
);
283 ("Non hex digit in Unicode escape sequence", 0);
285 *unicode_escape_p
= 1;
286 return (term_context
? unicode
:
287 (java_lineterminator (c
) ? '\n' : unicode
));
291 return (unicode_t
)'\\';
297 /* It's time to read a line when... */
298 if (!ctxp
->c_line
|| ctxp
->c_line
->current
== ctxp
->c_line
->size
)
301 java_allocate_new_line ();
302 if (ctxp
->c_line
->line
[0] != '\n')
305 int unicode_escape_p
;
306 c
= java_read_unicode (0, &unicode_escape_p
);
307 java_store_unicode (ctxp
->c_line
, c
, unicode_escape_p
);
308 if (ctxp
->c_line
->white_space_only
309 && !JAVA_WHITE_SPACE_P (c
) && c
!='\n')
310 ctxp
->c_line
->white_space_only
= 0;
311 if ((c
== '\n') || (c
== UEOF
))
315 ctxp
->c_line
->char_col
+= JAVA_COLUMN_DELTA (0);
316 JAVA_LEX_CHAR (ctxp
->c_line
->line
[ctxp
->c_line
->current
]);
317 return ctxp
->c_line
->line
[ctxp
->c_line
->current
++];
321 java_lineterminator (c
)
324 int unicode_escape_p
;
325 if (c
== '\n') /* CR */
327 if ((c
= java_read_unicode (1, &unicode_escape_p
)) != '\r')
329 ctxp
->c_line
->ahead
[0] = c
;
330 ctxp
->c_line
->unicode_escape_ahead_p
= unicode_escape_p
;
334 else if (c
== '\r') /* LF */
336 if ((c
= java_read_unicode (1, &unicode_escape_p
)) != '\n')
338 ctxp
->c_line
->ahead
[0] = c
;
339 ctxp
->c_line
->unicode_escape_ahead_p
= unicode_escape_p
;
347 /* Parse the end of a C style comment */
349 java_parse_end_comment ()
353 for (c
= java_get_unicode ();; c
= java_get_unicode ())
358 java_lex_error ("Comment not terminated at end of input", 0);
360 switch (c
= java_get_unicode ())
363 java_lex_error ("Comment not terminated at end of input", 0);
366 case '*': /* reparse only '*' */
367 java_unget_unicode (c
);
373 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
374 will return a wrong result. */
376 java_letter_or_digit_p (c
)
379 return _JAVA_LETTER_OR_DIGIT_P (c
);
383 java_parse_escape_sequence ()
388 switch (c
= java_get_unicode ())
391 return (unicode_t
)0x8;
393 return (unicode_t
)0x9;
395 return (unicode_t
)0xa;
397 return (unicode_t
)0xc;
399 return (unicode_t
)0xd;
401 return (unicode_t
)0x22;
403 return (unicode_t
)0x27;
405 return (unicode_t
)0x5c;
406 case '0': case '1': case '2': case '3': case '4':
407 case '5': case '6': case '7': case '8': case '9':
410 int octal_escape_index
= 0;
412 for (; octal_escape_index
< 3 && RANGE (c
, '0', '9');
413 c
= java_get_unicode ())
414 octal_escape
[octal_escape_index
++] = c
;
416 java_unget_unicode (c
);
418 if ((octal_escape_index
== 3) && (octal_escape
[0] > '3'))
420 java_lex_error ("Literal octal escape out of range", 0);
421 return JAVA_CHAR_ERROR
;
426 for (char_lit
=0, i
= 0, shift
= 3*(octal_escape_index
-1);
427 i
< octal_escape_index
; i
++, shift
-= 3)
428 char_lit
|= (octal_escape
[i
] - '0') << shift
;
435 return '\n'; /* ULT, caught latter as a specific error */
437 java_lex_error ("Illegal character in escape sequence", 0);
438 return JAVA_CHAR_ERROR
;
450 unicode_t c
, first_unicode
;
452 int ascii_index
, all_ascii
;
455 /* Translation of the Unicode escape in the raw stream of Unicode
456 characters. Takes care of line terminator. */
458 /* Skip white spaces: SP, TAB and FF or ULT */
459 for (c
= java_get_unicode ();
460 c
== '\n' || JAVA_WHITE_SPACE_P (c
); c
= java_get_unicode ())
463 ctxp
->elc
.line
= ctxp
->c_line
->lineno
;
464 ctxp
->elc
.col
= ctxp
->c_line
->char_col
-2;
467 ctxp
->elc
.col
= (ctxp
->elc
.col
< 0 ? 0 : ctxp
->elc
.col
);
469 if (c
== 0x1a) /* CTRL-Z */
471 if ((c
= java_get_unicode ()) == UEOF
)
472 return 0; /* Ok here */
474 java_unget_unicode (c
); /* Caught latter at the end the function */
476 /* Handle EOF here */
477 if (c
== UEOF
) /* Should probably do something here... */
480 /* Take care of eventual comments. */
483 switch (c
= java_get_unicode ())
486 for (c
= java_get_unicode ();;c
= java_get_unicode ())
489 java_lex_error ("Comment not terminated at end of input", 0);
490 if (c
== '\n') /* ULT */
496 if ((c
= java_get_unicode ()) == '*')
498 if ((c
= java_get_unicode ()) == '/')
499 goto step1
; /* Empy documentation comment */
502 /* Parsing the documentation section. We're looking
503 for the @depracated pseudo keyword. the @deprecated
504 tag must be at the beginning of a doc comment line
505 (ignoring white space and any * character) */
508 int valid_tag
= 0, seen_star
;
510 while (JAVA_WHITE_SPACE_P (c
) || (c
== '*') || c
== '\n')
523 c
= java_get_unicode();
528 ("Comment not terminated at end of input", 0);
530 if (seen_star
&& (c
== '/'))
531 goto step1
; /* End of documentation */
533 if (valid_tag
&& (c
== '@'))
535 char deprecated
[10];
536 int deprecated_index
= 0;
538 for (deprecated_index
= 0, c
= java_get_unicode ();
539 deprecated_index
< 10 && c
!= UEOF
;
540 c
= java_get_unicode ())
541 deprecated
[deprecated_index
++] = c
;
545 ("Comment not terminated at end of input", 0);
547 java_unget_unicode (c
);
548 deprecated
[deprecated_index
] = '\0';
549 if (!strcmp (deprecated
, "deprecated"))
551 /* Set global flag to be checked by class. FIXME */
552 warning ("deprecated implementation found");
558 java_unget_unicode (c
);
560 java_parse_end_comment ();
564 java_unget_unicode (c
);
570 ctxp
->elc
.line
= ctxp
->c_line
->lineno
;
571 ctxp
->elc
.col
= ctxp
->c_line
->char_col
- JAVA_COLUMN_DELTA (-1);
572 if (ctxp
->elc
.col
< 0)
573 fatal ("ctxp->elc.col < 0 - java_lex");
575 /* Numeric literals */
576 if (JAVA_ASCII_DIGIT (c
) || (c
== '.'))
579 /* This section of code is borrowed from gcc/c-lex.c */
580 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
581 int parts
[TOTAL_PARTS
];
582 HOST_WIDE_INT high
, low
;
583 /* End borrowed section */
584 char literal_token
[256];
585 int literal_index
= 0, radix
= 10, long_suffix
= 0, overflow
= 0, bytes
;
587 int number_beginning
= ctxp
->c_line
->current
;
589 /* We might have a . separator instead of a FP like .[0-9]* */
592 unicode_t peep
= java_sneak_unicode ();
594 if (!JAVA_ASCII_DIGIT (peep
))
597 BUILD_OPERATOR (DOT_TK
);
601 for (i
= 0; i
< TOTAL_PARTS
; i
++)
606 c
= java_get_unicode ();
607 if (c
== 'x' || c
== 'X')
610 c
= java_get_unicode ();
612 else if (JAVA_ASCII_DIGIT (c
))
616 /* Push the '.' back and prepare for a FP parsing... */
617 java_unget_unicode (c
);
622 /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
623 JAVA_LEX_LIT ("0", 10);
627 SET_LVAL_NODE_TYPE (integer_zero_node
, long_type_node
);
630 SET_LVAL_NODE_TYPE (build_real (float_type_node
, dconst0
),
634 SET_LVAL_NODE_TYPE (build_real (double_type_node
, dconst0
),
638 java_unget_unicode (c
);
639 SET_LVAL_NODE_TYPE (integer_zero_node
, int_type_node
);
644 /* Parse the first part of the literal, until we find something
645 which is not a number. */
646 while ((radix
== 10 && JAVA_ASCII_DIGIT (c
)) ||
647 (radix
== 16 && JAVA_ASCII_HEXDIGIT (c
)) ||
648 (radix
== 8 && JAVA_ASCII_OCTDIGIT (c
)))
650 /* We store in a string (in case it turns out to be a FP) and in
651 PARTS if we have to process a integer literal. */
652 int numeric
= (RANGE (c
, '0', '9') ? c
-'0' : 10 +(c
|0x20)-'a');
655 literal_token
[literal_index
++] = c
;
656 /* This section of code if borrowed from gcc/c-lex.c */
657 for (count
= 0; count
< TOTAL_PARTS
; count
++)
659 parts
[count
] *= radix
;
662 parts
[count
] += (parts
[count
-1] >> HOST_BITS_PER_CHAR
);
663 parts
[count
-1] &= (1 << HOST_BITS_PER_CHAR
) - 1;
668 if (parts
[TOTAL_PARTS
-1] != 0)
670 /* End borrowed section. */
671 c
= java_get_unicode ();
674 /* If we have something from the FP char set but not a digit, parse
676 if (JAVA_ASCII_FPCHAR (c
) && !JAVA_ASCII_DIGIT (c
))
679 int seen_digit
= (literal_index
? 1 : 0);
680 int seen_exponent
= 0;
681 int fflag
= 0; /* 1 for {f,F}, 0 for {d,D}. FP literal are
682 double unless specified. */
684 java_lex_error ("Can't express non-decimal FP literal", 0);
693 literal_token
[literal_index
++ ] = c
;
694 c
= java_get_unicode ();
697 java_lex_error ("Invalid character in FP literal", 0);
700 if (c
== 'e' || c
== 'E')
704 /* {E,e} must have seen at list a digit */
706 java_lex_error ("Invalid FP literal", 0);
710 literal_token
[literal_index
++] = c
;
711 c
= java_get_unicode ();
714 java_lex_error ("Invalid character in FP literal", 0);
716 if ( c
== 'f' || c
== 'F' || c
== 'd' || c
== 'D')
718 fflag
= ((c
== 'd') || (c
== 'D')) ? 0 : 1;
719 stage
= 4; /* So we fall through */
722 if ((c
=='-' || c
=='+') && stage
< 3)
725 literal_token
[literal_index
++] = c
;
726 c
= java_get_unicode ();
729 if ((stage
== 0 && JAVA_ASCII_FPCHAR (c
)) ||
730 (stage
== 1 && JAVA_ASCII_FPCHAR (c
) && !(c
== '.')) ||
731 (stage
== 2 && (JAVA_ASCII_DIGIT (c
) || JAVA_FP_PM (c
))) ||
732 (stage
== 3 && JAVA_ASCII_DIGIT (c
)))
734 if (JAVA_ASCII_DIGIT (c
))
736 literal_token
[literal_index
++ ] = c
;
737 c
= java_get_unicode ();
742 REAL_VALUE_TYPE value
;
744 tree type
= (fflag
? FLOAT_TYPE_NODE
: DOUBLE_TYPE_NODE
);
747 if (stage
!= 4) /* Don't push back fF/dD */
748 java_unget_unicode (c
);
750 /* An exponent (if any) must have seen a digit. */
751 if (seen_exponent
&& !seen_digit
)
752 java_lex_error ("Invalid FP literal", 0);
754 literal_token
[literal_index
] = '\0';
755 JAVA_LEX_LIT (literal_token
, radix
);
757 if (setjmp (handler
))
759 JAVA_FLOAT_RANGE_ERROR ((fflag
? "float" : "double"));
764 SET_FLOAT_HANDLER (handler
);
766 (value
, REAL_VALUE_ATOF (literal_token
,
769 if (REAL_VALUE_ISINF (value
))
770 JAVA_FLOAT_RANGE_ERROR ((fflag
? "float" : "double"));
772 if (REAL_VALUE_ISNAN (value
))
773 JAVA_FLOAT_RANGE_ERROR ((fflag
? "float" : "double"));
775 SET_LVAL_NODE_TYPE (build_real (type
, value
), type
);
776 SET_FLOAT_HANDLER (NULL_PTR
);
781 } /* JAVA_ASCCI_FPCHAR (c) */
783 /* Here we get back to converting the integral literal. */
784 if (c
== 'L' || c
== 'l')
786 else if (radix
== 16 && JAVA_ASCII_LETTER (c
))
787 java_lex_error ("Digit out of range in hexadecimal literal", 0);
788 else if (radix
== 8 && JAVA_ASCII_DIGIT (c
))
789 java_lex_error ("Digit out of range in octal literal", 0);
790 else if (radix
== 16 && !literal_index
)
791 java_lex_error ("No digit specified for hexadecimal literal", 0);
793 java_unget_unicode (c
);
795 #ifdef JAVA_LEX_DEBUG
796 literal_token
[literal_index
] = '\0'; /* So JAVA_LEX_LIT is safe. */
797 JAVA_LEX_LIT (literal_token
, radix
);
799 /* This section of code is borrowed from gcc/c-lex.c */
802 bytes
= GET_TYPE_PRECISION (long_type_node
);
803 for (i
= bytes
; i
< TOTAL_PARTS
; i
++)
811 for (i
= 0; i
< HOST_BITS_PER_WIDE_INT
/ HOST_BITS_PER_CHAR
; i
++)
813 high
|= ((HOST_WIDE_INT
) parts
[i
+ (HOST_BITS_PER_WIDE_INT
814 / HOST_BITS_PER_CHAR
)]
815 << (i
* HOST_BITS_PER_CHAR
));
816 low
|= (HOST_WIDE_INT
) parts
[i
] << (i
* HOST_BITS_PER_CHAR
);
818 /* End borrowed section. */
823 /* 9223372036854775808L is valid if operand of a '-'. Otherwise
824 9223372036854775807L is the biggest `long' literal that can be
825 expressed using a 10 radix. For other radixes, everything that
826 fits withing 64 bits is OK. */
827 int hb
= (high
>> 31);
828 if (overflow
|| (hb
&& low
&& radix
== 10) ||
829 (hb
&& high
& 0x7fffffff && radix
== 10) ||
830 (hb
&& !(high
& 0x7fffffff) && !ctxp
->minus_seen
&& radix
== 10))
831 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
835 /* 2147483648 is valid if operand of a '-'. Otherwise,
836 2147483647 is the biggest `int' literal that can be
837 expressed using a 10 radix. For other radixes, everything
838 that fits within 32 bits is OK. */
839 int hb
= (low
>> 31) & 0x1;
840 if (overflow
|| high
|| (hb
&& low
& 0x7fffffff && radix
== 10) ||
841 (hb
&& !(low
& 0x7fffffff) && !ctxp
->minus_seen
&& radix
== 10))
842 JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
844 ctxp
->minus_seen
= 0;
845 SET_LVAL_NODE_TYPE (build_int_2 (low
, high
),
846 (long_suffix
? long_type_node
: int_type_node
));
850 ctxp
->minus_seen
= 0;
851 /* Character literals */
855 if ((c
= java_get_unicode ()) == '\\')
856 char_lit
= java_parse_escape_sequence ();
860 c
= java_get_unicode ();
862 if ((c
== '\n') || (c
== UEOF
))
863 java_lex_error ("Character literal not terminated at end of line", 0);
865 java_lex_error ("Syntax error in character literal", 0);
867 if (c
== JAVA_CHAR_ERROR
)
868 char_lit
= 0; /* We silently convert it to zero */
870 JAVA_LEX_CHAR_LIT (char_lit
);
871 SET_LVAL_NODE_TYPE (build_int_2 (char_lit
, 0), char_type_node
);
875 /* String literals */
881 for (no_error
= 1, c
= java_get_unicode ();
882 c
!= '"' && c
!= '\n'; c
= java_get_unicode ())
885 c
= java_parse_escape_sequence ();
886 no_error
&= (c
!= JAVA_CHAR_ERROR
? 1 : 0);
888 java_unicode_2_utf8 (c
);
890 if (c
== '\n' || c
== UEOF
) /* ULT */
892 lineno
--; /* Refer to the line the terminator was seen */
893 java_lex_error ("String not terminated at end of line.", 0);
897 obstack_1grow (&temporary_obstack
, '\0');
898 string
= obstack_finish (&temporary_obstack
);
900 if (!no_error
|| (c
!= '"'))
901 java_lval
->node
= error_mark_node
; /* Requires futher testing FIXME */
904 tree s
= make_node (STRING_CST
);
905 TREE_STRING_LENGTH (s
) = strlen (string
);
906 TREE_STRING_POINTER (s
) =
907 obstack_alloc (expression_obstack
, TREE_STRING_LENGTH (s
)+1);
908 strcpy (TREE_STRING_POINTER (s
), string
);
912 return STRING_LIT_TK
;
920 BUILD_OPERATOR (OP_TK
);
926 if (ctxp
->ccb_indent
== 1)
927 ctxp
->first_ccb_indent1
= lineno
;
933 if (ctxp
->ccb_indent
== 1)
934 ctxp
->last_ccb_indent1
= lineno
;
938 BUILD_OPERATOR (OSB_TK
);
950 BUILD_OPERATOR (DOT_TK
);
958 if ((c
= java_get_unicode ()) == '=')
960 BUILD_OPERATOR (EQ_TK
);
964 /* Equals is used in two different locations. In the
965 variable_declarator: rule, it has to be seen as '=' as opposed
966 to being seen as an ordinary assignment operator in
967 assignment_operators: rule. */
968 java_unget_unicode (c
);
969 BUILD_OPERATOR (ASSIGN_TK
);
973 switch ((c
= java_get_unicode ()))
976 BUILD_OPERATOR (GTE_TK
);
978 switch ((c
= java_get_unicode ()))
981 if ((c
= java_get_unicode ()) == '=')
983 BUILD_OPERATOR2 (ZRS_ASSIGN_TK
);
987 java_unget_unicode (c
);
988 BUILD_OPERATOR (ZRS_TK
);
991 BUILD_OPERATOR2 (SRS_ASSIGN_TK
);
993 java_unget_unicode (c
);
994 BUILD_OPERATOR (SRS_TK
);
997 java_unget_unicode (c
);
998 BUILD_OPERATOR (GT_TK
);
1002 switch ((c
= java_get_unicode ()))
1005 BUILD_OPERATOR (LTE_TK
);
1007 if ((c
= java_get_unicode ()) == '=')
1009 BUILD_OPERATOR2 (LS_ASSIGN_TK
);
1013 java_unget_unicode (c
);
1014 BUILD_OPERATOR (LS_TK
);
1017 java_unget_unicode (c
);
1018 BUILD_OPERATOR (LT_TK
);
1022 switch ((c
= java_get_unicode ()))
1025 BUILD_OPERATOR (BOOL_AND_TK
);
1027 BUILD_OPERATOR2 (AND_ASSIGN_TK
);
1029 java_unget_unicode (c
);
1030 BUILD_OPERATOR (AND_TK
);
1034 switch ((c
= java_get_unicode ()))
1037 BUILD_OPERATOR (BOOL_OR_TK
);
1039 BUILD_OPERATOR2 (OR_ASSIGN_TK
);
1041 java_unget_unicode (c
);
1042 BUILD_OPERATOR (OR_TK
);
1046 switch ((c
= java_get_unicode ()))
1049 BUILD_OPERATOR (INCR_TK
);
1051 BUILD_OPERATOR2 (PLUS_ASSIGN_TK
);
1053 java_unget_unicode (c
);
1054 BUILD_OPERATOR (PLUS_TK
);
1058 switch ((c
= java_get_unicode ()))
1061 BUILD_OPERATOR (DECR_TK
);
1063 BUILD_OPERATOR2 (MINUS_ASSIGN_TK
);
1065 java_unget_unicode (c
);
1066 ctxp
->minus_seen
= 1;
1067 BUILD_OPERATOR (MINUS_TK
);
1071 if ((c
= java_get_unicode ()) == '=')
1073 BUILD_OPERATOR2 (MULT_ASSIGN_TK
);
1077 java_unget_unicode (c
);
1078 BUILD_OPERATOR (MULT_TK
);
1082 if ((c
= java_get_unicode ()) == '=')
1084 BUILD_OPERATOR2 (DIV_ASSIGN_TK
);
1088 java_unget_unicode (c
);
1089 BUILD_OPERATOR (DIV_TK
);
1093 if ((c
= java_get_unicode ()) == '=')
1095 BUILD_OPERATOR2 (XOR_ASSIGN_TK
);
1099 java_unget_unicode (c
);
1100 BUILD_OPERATOR (XOR_TK
);
1104 if ((c
= java_get_unicode ()) == '=')
1106 BUILD_OPERATOR2 (REM_ASSIGN_TK
);
1110 java_unget_unicode (c
);
1111 BUILD_OPERATOR (REM_TK
);
1115 if ((c
= java_get_unicode()) == '=')
1117 BUILD_OPERATOR (NEQ_TK
);
1121 java_unget_unicode (c
);
1122 BUILD_OPERATOR (NEG_TK
);
1127 BUILD_OPERATOR (REL_QM_TK
);
1130 BUILD_OPERATOR (REL_CL_TK
);
1132 BUILD_OPERATOR (NOT_TK
);
1135 /* Keyword, boolean literal or null literal */
1136 for (first_unicode
= c
, all_ascii
= 1, ascii_index
= 0;
1137 JAVA_ID_CHAR_P (c
); c
= java_get_unicode ())
1139 java_unicode_2_utf8 (c
);
1140 if (all_ascii
&& c
>= 128)
1145 obstack_1grow (&temporary_obstack
, '\0');
1146 string
= obstack_finish (&temporary_obstack
);
1147 java_unget_unicode (c
);
1149 /* If we have something all ascii, we consider a keyword, a boolean
1150 literal, a null literal or an all ASCII identifier. Otherwise,
1151 this is an identifier (possibly not respecting formation rule). */
1154 struct java_keyword
*kw
;
1155 if ((kw
=java_keyword (string
, ascii_index
)))
1157 JAVA_LEX_KW (string
);
1160 case PUBLIC_TK
: case PROTECTED_TK
: case STATIC_TK
:
1161 case ABSTRACT_TK
: case FINAL_TK
: case NATIVE_TK
:
1162 case SYNCHRONIZED_TK
: case TRANSIENT_TK
: case VOLATILE_TK
:
1164 SET_MODIFIER_CTX (kw
->token
);
1167 SET_LVAL_NODE (float_type_node
);
1170 SET_LVAL_NODE (double_type_node
);
1173 SET_LVAL_NODE (boolean_type_node
);
1176 SET_LVAL_NODE (byte_type_node
);
1179 SET_LVAL_NODE (short_type_node
);
1182 SET_LVAL_NODE (int_type_node
);
1185 SET_LVAL_NODE (long_type_node
);
1188 SET_LVAL_NODE (char_type_node
);
1191 /* Keyword based literals */
1194 SET_LVAL_NODE ((kw
->token
== TRUE_TK
?
1195 boolean_true_node
: boolean_false_node
));
1198 SET_LVAL_NODE (null_pointer_node
);
1201 /* Some keyword we want to retain information on the location
1212 BUILD_OPERATOR (kw
->token
);
1220 /* We may have and ID here */
1221 if (JAVA_ID_CHAR_P(first_unicode
) && !JAVA_DIGIT_P (first_unicode
))
1223 JAVA_LEX_ID (string
);
1224 java_lval
->node
= BUILD_ID_WFL (GET_IDENTIFIER (string
));
1228 /* Everything else is an invalid character in the input */
1230 char lex_error_buffer
[128];
1231 sprintf (lex_error_buffer
, "Invalid character '%s' in input",
1232 java_sprint_unicode (ctxp
->c_line
, ctxp
->c_line
->current
));
1233 java_lex_error (lex_error_buffer
, 1);
1239 java_unicode_2_utf8 (unicode
)
1242 if (RANGE (unicode
, 0x01, 0x7f))
1243 obstack_1grow (&temporary_obstack
, (char)unicode
);
1244 else if (RANGE (unicode
, 0x80, 0x7ff) || unicode
== 0)
1246 obstack_1grow (&temporary_obstack
,
1247 (unsigned char)(0xc0 | ((0x7c0 & unicode
) >> 6)));
1248 obstack_1grow (&temporary_obstack
,
1249 (unsigned char)(0x80 | (unicode
& 0x3f)));
1251 else /* Range 0x800-0xffff */
1253 obstack_1grow (&temporary_obstack
,
1254 (unsigned char)(0xe0 | (unicode
& 0xf000) >> 12));
1255 obstack_1grow (&temporary_obstack
,
1256 (unsigned char)(0x80 | (unicode
& 0x0fc0) >> 6));
1257 obstack_1grow (&temporary_obstack
,
1258 (unsigned char)(0x80 | (unicode
& 0x003f) >> 12));
1264 build_wfl_node (node
)
1267 return build_expr_wfl (node
, ctxp
->filename
, ctxp
->elc
.line
, ctxp
->elc
.col
);
1272 java_lex_error (msg
, forward
)
1277 ctxp
->elc
.line
= ctxp
->c_line
->lineno
;
1278 ctxp
->elc
.col
= ctxp
->c_line
->char_col
-1+forward
;
1280 /* Might be caught in the middle of some error report */
1281 ctxp
->java_error_flag
= 0;
1297 if (next
!= '\r' && next
!= EOF
)
1308 java_get_line_col (filename
, line
, col
)
1315 /* Dumb implementation. Doesn't try to cache or optimize things. */
1316 /* First line of the file is line 1, first column is 1 */
1318 /* COL <= 0 means, at the CR/LF in LINE */
1321 int c
, ccol
, cline
= 1;
1322 int current_line_col
= 0;
1324 if (!(fp
= fopen (filename
, "r")))
1325 fatal ("Can't open file - java_display_line_col");
1327 while (cline
!= line
)
1332 static char msg
[] = "<<file too short - unexpected EOF>>";
1333 obstack_grow (&temporary_obstack
, msg
, sizeof(msg
)-1);
1336 if (java_is_eol (fp
, c
))
1340 /* Gather the chars of the current line in a buffer */
1344 if (c
< 0 || java_is_eol (fp
, c
))
1346 obstack_1grow (&temporary_obstack
, c
);
1351 obstack_1grow (&temporary_obstack
, '\n');
1354 col
= current_line_col
;
1356 /* Place the '^' a the right position */
1357 for (ccol
= 1; ccol
<= col
; ccol
++)
1358 obstack_1grow (&temporary_obstack
, ' ');
1359 obstack_grow0 (&temporary_obstack
, "^", 1);
1362 return obstack_finish (&temporary_obstack
);