gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27 possibly containing Unicode escape sequence or utf8 encoded characters
  28 and returns a token for everything found but comments, white spaces
  29 and line terminators. When necessary, it also fills the java_lval
  30 (yylval) union. It's implemented to be called by a re-entrant parser
  31 generated by Bison.
  32
  33 The lexical analysis conforms to the Java grammar described in "The
  34 Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35 Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html)  */
  36
  37 #include <stdio.h>
  38 #include <string.h>
  39 #include <setjmp.h>
  40
  41 #ifdef JAVA_LEX_DEBUG
  42 #include <ctype.h>
  43 #endif
  44
  45 #ifdef inline                   /* javaop.h redefines inline as static */
  46 #undef inline
  47 #endif
  48 #include "keyword.h"
  49
  50 #ifndef SEEK_SET
  51 #include <unistd.h>
  52 #endif
  53
  54 #ifndef JC1_LITE
  55 extern struct obstack *expression_obstack;
  56 #endif
  57
  58 void
  59 java_init_lex ()
  60 {
  61   int java_lang_imported = 0;
  62
  63 #ifndef JC1_LITE
  64   if (!java_lang_imported)
  65     {
  66       tree node = build_tree_list
  67         (build_expr_wfl (get_identifier ("java.lang"), NULL, 0, 0), NULL_TREE);
  68       read_import_dir (TREE_PURPOSE (node));
  69       TREE_CHAIN (node) = ctxp->import_demand_list;
  70       ctxp->import_demand_list = node;
  71       java_lang_imported = 1;
  72     }
  73
  74   if (!wfl_operator)
  75     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
  76   if (!label_id)
  77     label_id = get_identifier ("$L");
  78   if (!wfl_append)
  79     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
  80   if (!wfl_string_buffer)
  81     wfl_string_buffer =
  82       build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
  83   if (!wfl_to_string)
  84     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
  85
  86   ctxp->static_initialized = ctxp->non_static_initialized =
  87     ctxp->incomplete_class = NULL_TREE;
  88
  89   bzero (ctxp->modifier_ctx, 11*sizeof (ctxp->modifier_ctx[0]));
  90   classpath = NULL;
  91   bzero (current_jcf, sizeof (JCF));
  92   ctxp->current_parsed_class = NULL;
  93   ctxp->package = NULL_TREE;
  94 #endif
  95
  96   ctxp->filename = input_filename;
  97   ctxp->lineno = lineno = 0;
  98   ctxp->p_line = NULL;
  99   ctxp->c_line = NULL;
 100   ctxp->unget_utf8_value = 0;
 101   ctxp->minus_seen = 0;
 102   ctxp->java_error_flag = 0;
 103 }
 104
 105 static char *
 106 java_sprint_unicode (line, i)
 107     struct java_line *line;
 108     int i;
 109 {
 110   static char buffer [10];
 111   if (line->unicode_escape_p [i] || line->line [i] > 128)
 112     sprintf (buffer, "\\u%04x", line->line [i]);
 113   else
 114     {
 115       buffer [0] = line->line [i];
 116       buffer [1] = '\0';
 117     }
 118   return buffer;
 119 }
 120
 121 static unicode_t
 122 java_sneak_unicode ()
 123 {
 124   return (ctxp->c_line->line [ctxp->c_line->current]);
 125 }
 126
 127 static void
 128 java_unget_unicode (c)
 129      unicode_t c;
 130 {
 131   if (!ctxp->c_line->current)
 132     fatal ("can't unget unicode - java_unget_unicode");
 133   ctxp->c_line->current--;
 134   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 135 }
 136
 137 void
 138 java_allocate_new_line ()
 139 {
 140   int i;
 141   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 142   char ahead_escape_p = (ctxp->c_line ?
 143                          ctxp->c_line->unicode_escape_ahead_p : 0);
 144
 145   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 146     {
 147       if (ctxp->p_line)
 148         {
 149           free (ctxp->p_line->unicode_escape_p);
 150           free (ctxp->p_line->line);
 151           free (ctxp->p_line);
 152         }
 153       ctxp->p_line = ctxp->c_line;
 154       ctxp->c_line = NULL;              /* Reallocated */
 155     }
 156
 157   if (!ctxp->c_line)
 158     {
 159       ctxp->c_line = (struct java_line *)malloc (sizeof (struct java_line));
 160       ctxp->c_line->max = JAVA_LINE_MAX;
 161       ctxp->c_line->line = (unicode_t *)malloc
 162           (sizeof (unicode_t)*ctxp->c_line->max);
 163       ctxp->c_line->unicode_escape_p =
 164           (char *)malloc (sizeof (char)*ctxp->c_line->max);
 165       ctxp->c_line->white_space_only = 0;
 166     }
 167
 168   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 169   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 170   if (ahead)
 171     {
 172       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 173       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 174       ctxp->c_line->size++;
 175     }
 176   ctxp->c_line->ahead [0] = 0;
 177   ctxp->c_line->unicode_escape_ahead_p = 0;
 178   ctxp->c_line->lineno = ++lineno;
 179   ctxp->c_line->white_space_only = 1;
 180 }
 181
 182 static unicode_t
 183 java_read_char ()
 184 {
 185   int c;
 186   int c1, c2;
 187
 188   if (ctxp->unget_utf8_value)
 189     {
 190       int to_return = ctxp->unget_utf8_value;
 191       ctxp->unget_utf8_value = 0;
 192       return (to_return);
 193     }
 194
 195   c = GETC ();
 196
 197   if (c < 128)
 198     return (unicode_t)c;
 199   if (c == EOF)
 200     return UEOF;
 201   else
 202     {
 203       if (c & 0xe0 == 0xc0)
 204         {
 205           c1 = GETC ();
 206           if (c1 & 0xc0 == 0x80)
 207             return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
 208         }
 209       else if (c & 0xf0 == 0xe0)
 210         {
 211           c1 = GETC ();
 212           if (c1 & 0xc0 == 0x80)
 213             {
 214               c2 = GETC ();
 215               if (c2 & 0xc0 == 0x80)
 216                 return (unicode_t)(((c & 0xf) << 12) +
 217                                    (( c1 & 0x3f) << 6) + (c2 & 0x3f));
 218             }
 219         }
 220       java_lex_error ("Bad utf8 encoding", 0);
 221     }
 222 }
 223
 224 static void
 225 java_store_unicode (l, c, unicode_escape_p)
 226     struct java_line *l;
 227     unicode_t c;
 228     int unicode_escape_p;
 229 {
 230   if (l->size == l->max)
 231     {
 232       l->max += JAVA_LINE_MAX;
 233       l->line = (unicode_t *)realloc (l->line, sizeof (unicode_t)*l->max);
 234       l->unicode_escape_p = (char *)realloc (l->unicode_escape_p,
 235                                              sizeof (char)*l->max);
 236     }
 237   l->line [l->size] = c;
 238   l->unicode_escape_p [l->size++] = unicode_escape_p;
 239 }
 240
 241 static unicode_t
 242 java_read_unicode (term_context, unicode_escape_p)
 243     int term_context;
 244     int *unicode_escape_p;
 245 {
 246   unicode_t c;
 247   long i, base;
 248
 249   c = java_read_char ();
 250   *unicode_escape_p = 0;
 251
 252   if (c != '\\')
 253     return ((term_context ? c :
 254              java_lineterminator (c) ? '\n' : (unicode_t)c));
 255
 256   /* Count the number of preceeding '\' */
 257   for (base = ftell (finput), i = base-2; c == '\\';)
 258     {
 259       fseek (finput, i--, SEEK_SET);
 260       c = java_read_char ();    /* Will fail if reading utf8 stream. FIXME */
 261     }
 262   fseek (finput, base, SEEK_SET);
 263   if ((base-i-3)%2 == 0)        /* If odd number of \ seen */
 264     {
 265       c = java_read_char ();
 266       if (c == 'u')
 267         {
 268           unsigned short unicode = 0;
 269           int shift = 12;
 270           /* Next should be 4 hex digits, otherwise it's an error.
 271              The hex value is converted into the unicode, pushed into
 272              the Unicode stream.  */
 273           for (shift = 12; shift >= 0; shift -= 4)
 274             {
 275               if ((c = java_read_char ()) == UEOF)
 276                 return UEOF;
 277               if (c >= '0' && c <= '9')
 278                 unicode |= (unicode_t)((c-'0') << shift);
 279               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 280                 unicode |= (unicode_t)(10+(c | 0x20)-'a' << shift);
 281               else
 282                   java_lex_error
 283                     ("Non hex digit in Unicode escape sequence", 0);
 284             }
 285           *unicode_escape_p = 1;
 286           return (term_context ? unicode :
 287                   (java_lineterminator (c) ? '\n' : unicode));
 288         }
 289       UNGETC (c);
 290     }
 291   return (unicode_t)'\\';
 292 }
 293
 294 static unicode_t
 295 java_get_unicode ()
 296 {
 297   /* It's time to read a line when... */
 298   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 299     {
 300       unicode_t c;
 301       java_allocate_new_line ();
 302       if (ctxp->c_line->line[0] != '\n')
 303         for (;;)
 304           {
 305             int unicode_escape_p;
 306             c = java_read_unicode (0, &unicode_escape_p);
 307             java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 308             if (ctxp->c_line->white_space_only
 309                 && !JAVA_WHITE_SPACE_P (c) && c!='\n')
 310               ctxp->c_line->white_space_only = 0;
 311             if ((c == '\n') || (c == UEOF))
 312               break;
 313           }
 314     }
 315   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 316   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 317   return ctxp->c_line->line [ctxp->c_line->current++];
 318 }
 319
 320 static int
 321 java_lineterminator (c)
 322      unicode_t c;
 323 {
 324   int unicode_escape_p;
 325   if (c == '\n')                /* CR */
 326     {
 327       if ((c = java_read_unicode (1, &unicode_escape_p)) != '\r')
 328         {
 329           ctxp->c_line->ahead [0] = c;
 330           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 331         }
 332       return 1;
 333     }
 334   else if (c == '\r')           /* LF */
 335     {
 336       if ((c = java_read_unicode (1, &unicode_escape_p)) != '\n')
 337         {
 338           ctxp->c_line->ahead [0] = c;
 339           ctxp->c_line->unicode_escape_ahead_p = unicode_escape_p;
 340         }
 341       return 1;
 342     }
 343   else
 344     return 0;
 345 }
 346
 347 /* Parse the end of a C style comment */
 348 static void
 349 java_parse_end_comment ()
 350 {
 351   unicode_t c;
 352
 353   for (c = java_get_unicode ();; c = java_get_unicode ())
 354     {
 355       switch (c)
 356         {
 357         case UEOF:
 358           java_lex_error ("Comment not terminated at end of input", 0);
 359         case '*':
 360           switch (c = java_get_unicode ())
 361             {
 362             case UEOF:
 363               java_lex_error ("Comment not terminated at end of input", 0);
 364             case '/':
 365               return;
 366             case '*':   /* reparse only '*' */
 367               java_unget_unicode (c);
 368             }
 369         }
 370     }
 371 }
 372
 373 /* This function to be used only by JAVA_ID_CHAR_P (), otherwise it
 374    will return a wrong result.  */
 375 static int
 376 java_letter_or_digit_p (c)
 377      unicode_t c;
 378 {
 379   return _JAVA_LETTER_OR_DIGIT_P (c);
 380 }
 381
 382 static unicode_t
 383 java_parse_escape_sequence ()
 384 {
 385   unicode_t char_lit;
 386   unicode_t c;
 387
 388   switch (c = java_get_unicode ())
 389     {
 390     case 'b':
 391       return (unicode_t)0x8;
 392     case 't':
 393       return (unicode_t)0x9;
 394     case 'n':
 395       return (unicode_t)0xa;
 396     case 'f':
 397       return (unicode_t)0xc;
 398     case 'r':
 399       return (unicode_t)0xd;
 400     case '"':
 401       return (unicode_t)0x22;
 402     case '\'':
 403       return (unicode_t)0x27;
 404     case '\\':
 405       return (unicode_t)0x5c;
 406     case '0': case '1': case '2': case '3': case '4':
 407     case '5': case '6': case '7': case '8': case '9':
 408       {
 409         int octal_escape[3];
 410         int octal_escape_index = 0;
 411
 412         for (; octal_escape_index < 3 && RANGE (c, '0', '9');
 413              c = java_get_unicode ())
 414           octal_escape [octal_escape_index++] = c;
 415
 416         java_unget_unicode (c);
 417
 418         if ((octal_escape_index == 3) && (octal_escape [0] > '3'))
 419           {
 420             java_lex_error ("Literal octal escape out of range", 0);
 421             return JAVA_CHAR_ERROR;
 422           }
 423         else
 424           {
 425             int i, shift;
 426             for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 427                  i < octal_escape_index; i++, shift -= 3)
 428               char_lit |= (octal_escape [i] - '0') << shift;
 429
 430             return (char_lit);
 431           }
 432         break;
 433       }
 434     case '\n':
 435       return '\n';              /* ULT, caught latter as a specific error */
 436     default:
 437       java_lex_error ("Illegal character in escape sequence", 0);
 438       return JAVA_CHAR_ERROR;
 439     }
 440 }
 441
 442 int
 443 #ifdef JC1_LITE
 444 yylex (java_lval)
 445 #else
 446 java_lex (java_lval)
 447 #endif
 448      YYSTYPE *java_lval;
 449 {
 450   unicode_t c, first_unicode;
 451   int line_terminator;
 452   int ascii_index, all_ascii;
 453   char *string;
 454
 455   /* Translation of the Unicode escape in the raw stream of Unicode
 456      characters. Takes care of line terminator.  */
 457  step1:
 458   /* Skip white spaces: SP, TAB and FF or ULT */
 459   for (c = java_get_unicode ();
 460        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 461     if (c == '\n')
 462       {
 463         ctxp->elc.line = ctxp->c_line->lineno;
 464         ctxp->elc.col  = ctxp->c_line->char_col-2;
 465       }
 466
 467   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 468
 469   if (c == 0x1a)                /* CTRL-Z */
 470     {
 471       if ((c = java_get_unicode ()) == UEOF)
 472         return 0;               /* Ok here */
 473       else
 474         java_unget_unicode (c); /* Caught latter at the end the function */
 475     }
 476   /* Handle EOF here */
 477   if (c == UEOF)        /* Should probably do something here... */
 478     return 0;
 479
 480   /* Take care of eventual comments.  */
 481   if (c == '/')
 482     {
 483       switch (c = java_get_unicode ())
 484         {
 485         case '/':
 486           for (c = java_get_unicode ();;c = java_get_unicode ())
 487             {
 488               if (c == UEOF)
 489                 java_lex_error ("Comment not terminated at end of input", 0);
 490               if (c == '\n')    /* ULT */
 491                 goto step1;
 492             }
 493           break;
 494
 495         case '*':
 496           if ((c = java_get_unicode ()) == '*')
 497             {
 498               if ((c = java_get_unicode ()) == '/')
 499                 goto step1;     /* Empy documentation comment  */
 500
 501               else
 502                 /* Parsing the documentation section. We're looking
 503                  for the @depracated pseudo keyword.  the @deprecated
 504                  tag must be at the beginning of a doc comment line
 505                  (ignoring white space and any * character)  */
 506
 507                 {
 508                   int valid_tag = 0, seen_star;
 509
 510                   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 511                     {
 512                       switch (c)
 513                         {
 514                         case '*':
 515                           seen_star = 1;
 516                           break;
 517                         case '\n': /* ULT */
 518                           valid_tag = 1;
 519                           break;
 520                         default:
 521                           seen_star = 0;
 522                         }
 523                       c = java_get_unicode();
 524                     }
 525
 526                   if (c == UEOF)
 527                     java_lex_error
 528                       ("Comment not terminated at end of input", 0);
 529
 530                   if (seen_star && (c == '/'))
 531                     goto step1; /* End of documentation */
 532
 533                   if (valid_tag && (c == '@'))
 534                     {
 535                       char deprecated [10];
 536                       int  deprecated_index = 0;
 537
 538                       for (deprecated_index = 0, c = java_get_unicode ();
 539                            deprecated_index < 10 && c != UEOF;
 540                            c = java_get_unicode ())
 541                         deprecated [deprecated_index++] = c;
 542
 543                       if (c == UEOF)
 544                         java_lex_error
 545                           ("Comment not terminated at end of input", 0);
 546
 547                       java_unget_unicode (c);
 548                       deprecated [deprecated_index] = '\0';
 549                       if (!strcmp (deprecated, "deprecated"))
 550                         {
 551                           /* Set global flag to be checked by class. FIXME  */
 552                           warning ("deprecated implementation found");
 553                         }
 554                     }
 555                 }
 556             }
 557           else
 558             java_unget_unicode (c);
 559
 560           java_parse_end_comment ();
 561           goto step1;
 562           break;
 563         default:
 564           java_unget_unicode (c);
 565           c = '/';
 566           break;
 567         }
 568     }
 569
 570   ctxp->elc.line = ctxp->c_line->lineno;
 571   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 572   if (ctxp->elc.col < 0)
 573     fatal ("ctxp->elc.col < 0 - java_lex");
 574
 575   /* Numeric literals */
 576   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 577     {
 578       unicode_t peep;
 579       /* This section of code is borrowed from gcc/c-lex.c  */
 580 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 581       int parts[TOTAL_PARTS];
 582       HOST_WIDE_INT high, low;
 583       /* End borrowed section  */
 584       char literal_token [256];
 585       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 586       int  i;
 587       int  number_beginning = ctxp->c_line->current;
 588
 589       /* We might have a . separator instead of a FP like .[0-9]* */
 590       if (c == '.')
 591         {
 592           unicode_t peep = java_sneak_unicode ();
 593
 594           if (!JAVA_ASCII_DIGIT (peep))
 595             {
 596               JAVA_LEX_SEP('.');
 597               BUILD_OPERATOR (DOT_TK);
 598             }
 599         }
 600
 601       for (i = 0; i < TOTAL_PARTS; i++)
 602         parts [i] = 0;
 603
 604       if (c == '0')
 605         {
 606           c = java_get_unicode ();
 607           if (c == 'x' || c == 'X')
 608             {
 609               radix = 16;
 610               c = java_get_unicode ();
 611             }
 612           else if (JAVA_ASCII_DIGIT (c))
 613             radix = 8;
 614           else if (c == '.')
 615             {
 616               /* Push the '.' back and prepare for a FP parsing... */
 617               java_unget_unicode (c);
 618               c = '0';
 619             }
 620           else
 621             {
 622               /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
 623               JAVA_LEX_LIT ("0", 10);
 624               switch (c)
 625                 {
 626                 case 'L': case 'l':
 627                   SET_LVAL_NODE_TYPE (integer_zero_node, long_type_node);
 628                   return (INT_LIT_TK);
 629                 case 'f': case 'F':
 630                   SET_LVAL_NODE_TYPE (build_real (float_type_node, dconst0),
 631                                         float_type_node);
 632                   return (FP_LIT_TK);
 633                 case 'd': case 'D':
 634                   SET_LVAL_NODE_TYPE (build_real (double_type_node, dconst0),
 635                                         double_type_node);
 636                   return (FP_LIT_TK);
 637                 default:
 638                   java_unget_unicode (c);
 639                   SET_LVAL_NODE_TYPE (integer_zero_node, int_type_node);
 640                   return (INT_LIT_TK);
 641                 }
 642             }
 643         }
 644       /* Parse the first part of the literal, until we find something
 645          which is not a number.  */
 646       while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
 647              (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
 648              (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
 649         {
 650           /* We store in a string (in case it turns out to be a FP) and in
 651              PARTS if we have to process a integer literal.  */
 652           int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
 653           int count;
 654
 655           literal_token [literal_index++] = c;
 656           /* This section of code if borrowed from gcc/c-lex.c  */
 657           for (count = 0; count < TOTAL_PARTS; count++)
 658             {
 659               parts[count] *= radix;
 660               if (count)
 661                 {
 662                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
 663                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
 664                 }
 665               else
 666                 parts[0] += numeric;
 667             }
 668           if (parts [TOTAL_PARTS-1] != 0)
 669             overflow = 1;
 670           /* End borrowed section.  */
 671           c = java_get_unicode ();
 672         }
 673
 674       /* If we have something from the FP char set but not a digit, parse
 675          a FP literal.  */
 676       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
 677         {
 678           int stage = 0;
 679           int seen_digit = (literal_index ? 1 : 0);
 680           int seen_exponent = 0;
 681           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
 682                                    double unless specified. */
 683           if (radix != 10)
 684             java_lex_error ("Can't express non-decimal FP literal", 0);
 685
 686           for (;;)
 687             {
 688               if (c == '.')
 689                 {
 690                   if (stage < 1)
 691                     {
 692                       stage = 1;
 693                       literal_token [literal_index++ ] = c;
 694                       c = java_get_unicode ();
 695                     }
 696                   else
 697                     java_lex_error ("Invalid character in FP literal", 0);
 698                 }
 699
 700               if (c == 'e' || c == 'E')
 701                 {
 702                   if (stage < 2)
 703                     {
 704                       /* {E,e} must have seen at list a digit */
 705                       if (!seen_digit)
 706                         java_lex_error ("Invalid FP literal", 0);
 707                       seen_digit = 0;
 708                       seen_exponent = 1;
 709                       stage = 2;
 710                       literal_token [literal_index++] = c;
 711                       c = java_get_unicode ();
 712                     }
 713                   else
 714                     java_lex_error ("Invalid character in FP literal", 0);
 715                 }
 716               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
 717                 {
 718                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
 719                   stage = 4;    /* So we fall through */
 720                 }
 721
 722               if ((c=='-' || c =='+') && stage < 3)
 723                 {
 724                   stage = 3;
 725                   literal_token [literal_index++] = c;
 726                   c = java_get_unicode ();
 727                 }
 728
 729               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
 730                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
 731                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
 732                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
 733                 {
 734                   if (JAVA_ASCII_DIGIT (c))
 735                     seen_digit = 1;
 736                   literal_token [literal_index++ ] = c;
 737                   c = java_get_unicode ();
 738                 }
 739               else
 740                 {
 741                   jmp_buf handler;
 742                   REAL_VALUE_TYPE value;
 743 #ifndef JC1_LITE
 744                   tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 745 #endif
 746
 747                   if (stage != 4) /* Don't push back fF/dD */
 748                     java_unget_unicode (c);
 749
 750                   /* An exponent (if any) must have seen a digit.  */
 751                   if (seen_exponent && !seen_digit)
 752                     java_lex_error ("Invalid FP literal", 0);
 753
 754                   literal_token [literal_index] = '\0';
 755                   JAVA_LEX_LIT (literal_token, radix);
 756
 757                   if (setjmp (handler))
 758                     {
 759                       JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 760                       value = DCONST0;
 761                     }
 762                   else
 763                     {
 764                       SET_FLOAT_HANDLER (handler);
 765                       SET_REAL_VALUE_ATOF
 766                         (value, REAL_VALUE_ATOF (literal_token,
 767                                                  TYPE_MODE (type)));
 768
 769                       if (REAL_VALUE_ISINF (value))
 770                         JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 771
 772                       if (REAL_VALUE_ISNAN (value))
 773                         JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
 774
 775                       SET_LVAL_NODE_TYPE (build_real (type, value), type);
 776                       SET_FLOAT_HANDLER (NULL_PTR);
 777                       return FP_LIT_TK;
 778                     }
 779                 }
 780             }
 781         } /* JAVA_ASCCI_FPCHAR (c) */
 782
 783       /* Here we get back to converting the integral literal.  */
 784       if (c == 'L' || c == 'l')
 785         long_suffix = 1;
 786       else if (radix == 16 && JAVA_ASCII_LETTER (c))
 787         java_lex_error ("Digit out of range in hexadecimal literal", 0);
 788       else if (radix == 8  && JAVA_ASCII_DIGIT (c))
 789         java_lex_error ("Digit out of range in octal literal", 0);
 790       else if (radix == 16 && !literal_index)
 791         java_lex_error ("No digit specified for hexadecimal literal", 0);
 792       else
 793         java_unget_unicode (c);
 794
 795 #ifdef JAVA_LEX_DEBUG
 796       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
 797       JAVA_LEX_LIT (literal_token, radix);
 798 #endif
 799       /* This section of code is borrowed from gcc/c-lex.c  */
 800       if (!overflow)
 801         {
 802           bytes = GET_TYPE_PRECISION (long_type_node);
 803           for (i = bytes; i < TOTAL_PARTS; i++)
 804             if (parts [i])
 805               {
 806                 overflow = 1;
 807                 break;
 808               }
 809         }
 810       high = low = 0;
 811       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
 812         {
 813           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
 814                                               / HOST_BITS_PER_CHAR)]
 815                    << (i * HOST_BITS_PER_CHAR));
 816           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
 817         }
 818       /* End borrowed section.  */
 819
 820       /* Range checking */
 821       if (long_suffix)
 822         {
 823           /* 9223372036854775808L is valid if operand of a '-'. Otherwise
 824              9223372036854775807L is the biggest `long' literal that can be
 825              expressed using a 10 radix. For other radixes, everything that
 826              fits withing 64 bits is OK. */
 827           int hb = (high >> 31);
 828           if (overflow || (hb && low && radix == 10) ||
 829               (hb && high & 0x7fffffff && radix == 10) ||
 830               (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
 831             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
 832         }
 833       else
 834         {
 835           /* 2147483648 is valid if operand of a '-'. Otherwise,
 836              2147483647 is the biggest `int' literal that can be
 837              expressed using a 10 radix. For other radixes, everything
 838              that fits within 32 bits is OK. */
 839           int hb = (low >> 31) & 0x1;
 840           if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
 841               (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
 842             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
 843         }
 844       ctxp->minus_seen = 0;
 845       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
 846                           (long_suffix ? long_type_node : int_type_node));
 847       return INT_LIT_TK;
 848     }
 849
 850   ctxp->minus_seen = 0;
 851   /* Character literals */
 852   if (c == '\'')
 853     {
 854       unicode_t char_lit;
 855       if ((c = java_get_unicode ()) == '\\')
 856         char_lit = java_parse_escape_sequence ();
 857       else
 858         char_lit = c;
 859
 860       c = java_get_unicode ();
 861
 862       if ((c == '\n') || (c == UEOF))
 863         java_lex_error ("Character literal not terminated at end of line", 0);
 864       if (c != '\'')
 865         java_lex_error ("Syntax error in character literal", 0);
 866
 867       if (c == JAVA_CHAR_ERROR)
 868         char_lit = 0;           /* We silently convert it to zero */
 869
 870       JAVA_LEX_CHAR_LIT (char_lit);
 871       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
 872       return CHAR_LIT_TK;
 873     }
 874
 875   /* String literals */
 876   if (c == '"')
 877     {
 878       int no_error;
 879       char *string;
 880
 881       for (no_error = 1, c = java_get_unicode ();
 882            c != '"' && c != '\n'; c = java_get_unicode ())
 883         {
 884           if (c == '\\')
 885             c = java_parse_escape_sequence ();
 886           no_error &= (c != JAVA_CHAR_ERROR ? 1 : 0);
 887           if (c)
 888             java_unicode_2_utf8 (c);
 889         }
 890       if (c == '\n' || c == UEOF) /* ULT */
 891         {
 892           lineno--;             /* Refer to the line the terminator was seen */
 893           java_lex_error ("String not terminated at end of line.", 0);
 894           lineno++;
 895         }
 896
 897       obstack_1grow (&temporary_obstack, '\0');
 898       string = obstack_finish (&temporary_obstack);
 899 #ifndef JC1_LITE
 900       if (!no_error || (c != '"'))
 901         java_lval->node = error_mark_node; /* Requires futher testing FIXME */
 902       else
 903         {
 904           tree s = make_node (STRING_CST);
 905           TREE_STRING_LENGTH (s) = strlen (string);
 906           TREE_STRING_POINTER (s) =
 907             obstack_alloc (expression_obstack, TREE_STRING_LENGTH (s)+1);
 908           strcpy (TREE_STRING_POINTER (s), string);
 909           java_lval->node = s;
 910         }
 911 #endif
 912       return STRING_LIT_TK;
 913     }
 914
 915   /* Separator */
 916   switch (c)
 917     {
 918     case '(':
 919       JAVA_LEX_SEP (c);
 920       BUILD_OPERATOR (OP_TK);
 921     case ')':
 922       JAVA_LEX_SEP (c);
 923       return CP_TK;
 924     case '{':
 925       JAVA_LEX_SEP (c);
 926       if (ctxp->ccb_indent == 1)
 927         ctxp->first_ccb_indent1 = lineno;
 928       ctxp->ccb_indent++;
 929       return OCB_TK;
 930     case '}':
 931       JAVA_LEX_SEP (c);
 932       ctxp->ccb_indent--;
 933       if (ctxp->ccb_indent == 1)
 934         ctxp->last_ccb_indent1 = lineno;
 935       return CCB_TK;
 936     case '[':
 937       JAVA_LEX_SEP (c);
 938       BUILD_OPERATOR (OSB_TK);
 939     case ']':
 940       JAVA_LEX_SEP (c);
 941       return CSB_TK;
 942     case ';':
 943       JAVA_LEX_SEP (c);
 944       return SC_TK;
 945     case ',':
 946       JAVA_LEX_SEP (c);
 947       return C_TK;
 948     case '.':
 949       JAVA_LEX_SEP (c);
 950       BUILD_OPERATOR (DOT_TK);
 951       /*      return DOT_TK; */
 952     }
 953
 954   /* Operators */
 955   switch (c)
 956     {
 957     case '=':
 958       if ((c = java_get_unicode ()) == '=')
 959         {
 960           BUILD_OPERATOR (EQ_TK);
 961         }
 962       else
 963         {
 964           /* Equals is used in two different locations. In the
 965              variable_declarator: rule, it has to be seen as '=' as opposed
 966              to being seen as an ordinary assignment operator in
 967              assignment_operators: rule.  */
 968           java_unget_unicode (c);
 969           BUILD_OPERATOR (ASSIGN_TK);
 970         }
 971
 972     case '>':
 973       switch ((c = java_get_unicode ()))
 974         {
 975         case '=':
 976           BUILD_OPERATOR (GTE_TK);
 977         case '>':
 978           switch ((c = java_get_unicode ()))
 979             {
 980             case '>':
 981               if ((c = java_get_unicode ()) == '=')
 982                 {
 983                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
 984                 }
 985               else
 986                 {
 987                   java_unget_unicode (c);
 988                   BUILD_OPERATOR (ZRS_TK);
 989                 }
 990             case '=':
 991               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
 992             default:
 993               java_unget_unicode (c);
 994               BUILD_OPERATOR (SRS_TK);
 995             }
 996         default:
 997           java_unget_unicode (c);
 998           BUILD_OPERATOR (GT_TK);
 999         }
1000
1001     case '<':
1002       switch ((c = java_get_unicode ()))
1003         {
1004         case '=':
1005           BUILD_OPERATOR (LTE_TK);
1006         case '<':
1007           if ((c = java_get_unicode ()) == '=')
1008             {
1009               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1010             }
1011           else
1012             {
1013               java_unget_unicode (c);
1014               BUILD_OPERATOR (LS_TK);
1015             }
1016         default:
1017           java_unget_unicode (c);
1018           BUILD_OPERATOR (LT_TK);
1019         }
1020
1021     case '&':
1022       switch ((c = java_get_unicode ()))
1023         {
1024         case '&':
1025           BUILD_OPERATOR (BOOL_AND_TK);
1026         case '=':
1027           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1028         default:
1029           java_unget_unicode (c);
1030           BUILD_OPERATOR (AND_TK);
1031         }
1032
1033     case '|':
1034       switch ((c = java_get_unicode ()))
1035         {
1036         case '|':
1037           BUILD_OPERATOR (BOOL_OR_TK);
1038         case '=':
1039           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1040         default:
1041           java_unget_unicode (c);
1042           BUILD_OPERATOR (OR_TK);
1043         }
1044
1045     case '+':
1046       switch ((c = java_get_unicode ()))
1047         {
1048         case '+':
1049           BUILD_OPERATOR (INCR_TK);
1050         case '=':
1051           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1052         default:
1053           java_unget_unicode (c);
1054           BUILD_OPERATOR (PLUS_TK);
1055         }
1056
1057     case '-':
1058       switch ((c = java_get_unicode ()))
1059         {
1060         case '-':
1061           BUILD_OPERATOR (DECR_TK);
1062         case '=':
1063           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1064         default:
1065           java_unget_unicode (c);
1066           ctxp->minus_seen = 1;
1067           BUILD_OPERATOR (MINUS_TK);
1068         }
1069
1070     case '*':
1071       if ((c = java_get_unicode ()) == '=')
1072         {
1073           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1074         }
1075       else
1076         {
1077           java_unget_unicode (c);
1078           BUILD_OPERATOR (MULT_TK);
1079         }
1080
1081     case '/':
1082       if ((c = java_get_unicode ()) == '=')
1083         {
1084           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1085         }
1086       else
1087         {
1088           java_unget_unicode (c);
1089           BUILD_OPERATOR (DIV_TK);
1090         }
1091
1092     case '^':
1093       if ((c = java_get_unicode ()) == '=')
1094         {
1095           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1096         }
1097       else
1098         {
1099           java_unget_unicode (c);
1100           BUILD_OPERATOR (XOR_TK);
1101         }
1102
1103     case '%':
1104       if ((c = java_get_unicode ()) == '=')
1105         {
1106           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1107         }
1108       else
1109         {
1110           java_unget_unicode (c);
1111           BUILD_OPERATOR (REM_TK);
1112         }
1113
1114     case '!':
1115       if ((c = java_get_unicode()) == '=')
1116         {
1117           BUILD_OPERATOR (NEQ_TK);
1118         }
1119       else
1120         {
1121           java_unget_unicode (c);
1122           BUILD_OPERATOR (NEG_TK);
1123         }
1124
1125     case '?':
1126       JAVA_LEX_OP ("?");
1127       BUILD_OPERATOR (REL_QM_TK);
1128     case ':':
1129       JAVA_LEX_OP (":");
1130       BUILD_OPERATOR (REL_CL_TK);
1131     case '~':
1132       BUILD_OPERATOR (NOT_TK);
1133     }
1134
1135   /* Keyword, boolean literal or null literal */
1136   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1137        JAVA_ID_CHAR_P (c); c = java_get_unicode ())
1138     {
1139       java_unicode_2_utf8 (c);
1140       if (all_ascii && c >= 128)
1141         all_ascii = 0;
1142       ascii_index++;
1143     }
1144
1145   obstack_1grow (&temporary_obstack, '\0');
1146   string = obstack_finish (&temporary_obstack);
1147   java_unget_unicode (c);
1148
1149   /* If we have something all ascii, we consider a keyword, a boolean
1150      literal, a null literal or an all ASCII identifier.  Otherwise,
1151      this is an identifier (possibly not respecting formation rule).  */
1152   if (all_ascii)
1153     {
1154       struct java_keyword *kw;
1155       if ((kw=java_keyword (string, ascii_index)))
1156         {
1157           JAVA_LEX_KW (string);
1158           switch (kw->token)
1159             {
1160             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1161             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1162             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1163             case PRIVATE_TK:
1164               SET_MODIFIER_CTX (kw->token);
1165               return MODIFIER_TK;
1166             case FLOAT_TK:
1167               SET_LVAL_NODE (float_type_node);
1168               return FP_TK;
1169             case DOUBLE_TK:
1170               SET_LVAL_NODE (double_type_node);
1171               return FP_TK;
1172             case BOOLEAN_TK:
1173               SET_LVAL_NODE (boolean_type_node);
1174               return BOOLEAN_TK;
1175             case BYTE_TK:
1176               SET_LVAL_NODE (byte_type_node);
1177               return INTEGRAL_TK;
1178             case SHORT_TK:
1179               SET_LVAL_NODE (short_type_node);
1180               return INTEGRAL_TK;
1181             case INT_TK:
1182               SET_LVAL_NODE (int_type_node);
1183               return INTEGRAL_TK;
1184             case LONG_TK:
1185               SET_LVAL_NODE (long_type_node);
1186               return INTEGRAL_TK;
1187             case CHAR_TK:
1188               SET_LVAL_NODE (char_type_node);
1189               return INTEGRAL_TK;
1190
1191               /* Keyword based literals */
1192             case TRUE_TK:
1193             case FALSE_TK:
1194               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1195                               boolean_true_node : boolean_false_node));
1196               return BOOL_LIT_TK;
1197             case NULL_TK:
1198               SET_LVAL_NODE (null_pointer_node);
1199               return NULL_TK;
1200
1201               /* Some keyword we want to retain information on the location
1202                  they where found */
1203             case CASE_TK:
1204             case DEFAULT_TK:
1205             case SUPER_TK:
1206             case THIS_TK:
1207             case RETURN_TK:
1208             case BREAK_TK:
1209             case CONTINUE_TK:
1210             case TRY_TK:
1211             case CATCH_TK:
1212               BUILD_OPERATOR (kw->token);
1213
1214             default:
1215               return kw->token;
1216             }
1217         }
1218     }
1219
1220   /* We may have and ID here */
1221   if (JAVA_ID_CHAR_P(first_unicode) && !JAVA_DIGIT_P (first_unicode))
1222     {
1223       JAVA_LEX_ID (string);
1224       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1225       return ID_TK;
1226     }
1227
1228   /* Everything else is an invalid character in the input */
1229   {
1230     char lex_error_buffer [128];
1231     sprintf (lex_error_buffer, "Invalid character '%s' in input",
1232              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1233     java_lex_error (lex_error_buffer, 1);
1234   }
1235   return 0;
1236 }
1237
1238 static void
1239 java_unicode_2_utf8 (unicode)
1240     unicode_t unicode;
1241 {
1242   if (RANGE (unicode, 0x01, 0x7f))
1243     obstack_1grow (&temporary_obstack, (char)unicode);
1244   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1245     {
1246       obstack_1grow (&temporary_obstack,
1247                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1248       obstack_1grow (&temporary_obstack,
1249                      (unsigned char)(0x80 | (unicode & 0x3f)));
1250     }
1251   else                          /* Range 0x800-0xffff */
1252     {
1253       obstack_1grow (&temporary_obstack,
1254                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1255       obstack_1grow (&temporary_obstack,
1256                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1257       obstack_1grow (&temporary_obstack,
1258                      (unsigned char)(0x80 | (unicode & 0x003f) >> 12));
1259     }
1260 }
1261
1262 #ifndef JC1_LITE
1263 static tree
1264 build_wfl_node (node)
1265      tree node;
1266 {
1267   return build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1268 }
1269 #endif
1270
1271 static void
1272 java_lex_error (msg, forward)
1273      char *msg;
1274      int forward;
1275 {
1276 #ifndef JC1_LITE
1277   ctxp->elc.line = ctxp->c_line->lineno;
1278   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1279
1280   /* Might be caught in the middle of some error report */
1281   ctxp->java_error_flag = 0;
1282   java_error (NULL);
1283   java_error (msg);
1284 #endif
1285 }
1286
1287 static int
1288 java_is_eol (fp, c)
1289   FILE *fp;
1290   int c;
1291 {
1292   int next;
1293   switch (c)
1294     {
1295     case '\n':
1296       next = getc (fp);
1297       if (next != '\r' && next != EOF)
1298         ungetc (next, fp);
1299       return 1;
1300     case '\r':
1301       return 1;
1302     default:
1303       return 0;
1304     }
1305 }
1306
1307 char *
1308 java_get_line_col (filename, line, col)
1309      char *filename;
1310      int line, col;
1311 {
1312 #ifdef JC1_LITE
1313   return 0;
1314 #else
1315   /* Dumb implementation. Doesn't try to cache or optimize things. */
1316   /* First line of the file is line 1, first column is 1 */
1317
1318   /* COL <= 0 means, at the CR/LF in LINE */
1319
1320   FILE *fp;
1321   int c, ccol, cline = 1;
1322   int current_line_col = 0;
1323
1324   if (!(fp = fopen (filename, "r")))
1325     fatal ("Can't open file - java_display_line_col");
1326
1327   while (cline != line)
1328     {
1329       c = getc (fp);
1330       if (c < 0)
1331         {
1332           static char msg[] = "<<file too short - unexpected EOF>>";
1333           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1334           goto have_line;
1335         }
1336       if (java_is_eol (fp, c))
1337         cline++;
1338     }
1339
1340   /* Gather the chars of the current line in a buffer */
1341   for (;;)
1342     {
1343       c = getc (fp);
1344       if (c < 0 || java_is_eol (fp, c))
1345         break;
1346       obstack_1grow (&temporary_obstack, c);
1347       current_line_col++;
1348     }
1349  have_line:
1350
1351   obstack_1grow (&temporary_obstack, '\n');
1352
1353   if (col < 0)
1354     col = current_line_col;
1355
1356   /* Place the '^' a the right position */
1357   for (ccol = 1; ccol <= col; ccol++)
1358     obstack_1grow (&temporary_obstack, ' ');
1359   obstack_grow0 (&temporary_obstack, "^", 1);
1360
1361   fclose (fp);
1362   return obstack_finish (&temporary_obstack);
1363 #endif
1364 }