gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
   3    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   4
   5 This file is part of GNU CC.
   6
   7 GNU CC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU CC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU CC; see the file COPYING.  If not, write to
  19 the Free Software Foundation, 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.
  21
  22 Java and all Java-based marks are trademarks or registered trademarks
  23 of Sun Microsystems, Inc. in the United States and other countries.
  24 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  25
  26 /* It defines java_lex (yylex) that reads a Java ASCII source file
  27    possibly containing Unicode escape sequence or utf8 encoded
  28    characters and returns a token for everything found but comments,
  29    white spaces and line terminators. When necessary, it also fills
  30    the java_lval (yylval) union. It's implemented to be called by a
  31    re-entrant parser generated by Bison.
  32
  33    The lexical analysis conforms to the Java grammar described in "The
  34    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  35    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  36
  37 #include "keyword.h"
  38 #include "flags.h"
  39 #include "chartables.h"
  40
  41 /* Function declaration  */
  42 static char *java_sprint_unicode PARAMS ((struct java_line *, int));
  43 static void java_unicode_2_utf8 PARAMS ((unicode_t));
  44 static void java_lex_error PARAMS ((const char *, int));
  45 #ifndef JC1_LITE
  46 static int java_is_eol PARAMS ((FILE *, int));
  47 static tree build_wfl_node PARAMS ((tree));
  48 #endif
  49 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  50 static int java_parse_escape_sequence PARAMS ((void));
  51 static int java_start_char_p PARAMS ((unicode_t));
  52 static int java_part_char_p PARAMS ((unicode_t));
  53 static int java_parse_doc_section PARAMS ((int));
  54 static void java_parse_end_comment PARAMS ((int));
  55 static int java_get_unicode PARAMS ((void));
  56 static int java_read_unicode PARAMS ((java_lexer *, int *));
  57 static int java_read_unicode_collapsing_terminators PARAMS ((java_lexer *,
  58                                                              int *));
  59 static void java_store_unicode PARAMS ((struct java_line *, unicode_t, int));
  60 static int java_read_char PARAMS ((java_lexer *));
  61 static void java_allocate_new_line PARAMS ((void));
  62 static void java_unget_unicode PARAMS ((void));
  63 static unicode_t java_sneak_unicode PARAMS ((void));
  64 #ifndef JC1_LITE
  65 static int utf8_cmp PARAMS ((const unsigned char *, int, const char *));
  66 #endif
  67
  68 java_lexer *java_new_lexer PARAMS ((FILE *, const char *));
  69
  70 #ifdef HAVE_ICONV
  71 /* This is nonzero if we have initialized `need_byteswap'.  */
  72 static int byteswap_init = 0;
  73
  74 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
  75    big-endian order -- not native endian order.  We handle this by
  76    doing a conversion once at startup and seeing what happens.  This
  77    flag holds the results of this determination.  */
  78 static int need_byteswap = 0;
  79 #endif
  80
  81 void
  82 java_init_lex (finput, encoding)
  83      FILE *finput;
  84      const char *encoding;
  85 {
  86 #ifndef JC1_LITE
  87   int java_lang_imported = 0;
  88
  89   if (!java_lang_id)
  90     java_lang_id = get_identifier ("java.lang");
  91   if (!java_lang_cloneable)
  92     java_lang_cloneable = get_identifier ("java.lang.Cloneable");
  93   if (!java_io_serializable)
  94     java_io_serializable = get_identifier ("java.io.Serializable");
  95   if (!inst_id)
  96     inst_id = get_identifier ("inst$");
  97   if (!wpv_id)
  98     wpv_id = get_identifier ("write_parm_value$");
  99
 100   if (!java_lang_imported)
 101     {
 102       tree node = build_tree_list
 103         (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
 104       read_import_dir (TREE_PURPOSE (node));
 105       TREE_CHAIN (node) = ctxp->import_demand_list;
 106       ctxp->import_demand_list = node;
 107       java_lang_imported = 1;
 108     }
 109
 110   if (!wfl_operator)
 111     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
 112   if (!label_id)
 113     label_id = get_identifier ("$L");
 114   if (!wfl_append)
 115     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
 116   if (!wfl_string_buffer)
 117     wfl_string_buffer =
 118       build_expr_wfl (get_identifier ("java.lang.StringBuffer"), NULL, 0, 0);
 119   if (!wfl_to_string)
 120     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 121
 122   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
 123     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
 124
 125   memset ((PTR) ctxp->modifier_ctx, 0, 11*sizeof (ctxp->modifier_ctx[0]));
 126   memset ((PTR) current_jcf, 0, sizeof (JCF));
 127   ctxp->current_parsed_class = NULL;
 128   ctxp->package = NULL_TREE;
 129 #endif
 130
 131   ctxp->filename = input_filename;
 132   ctxp->lineno = lineno = 0;
 133   ctxp->p_line = NULL;
 134   ctxp->c_line = NULL;
 135   ctxp->minus_seen = 0;
 136   ctxp->java_error_flag = 0;
 137   ctxp->lexer = java_new_lexer (finput, encoding);
 138 }
 139
 140 static char *
 141 java_sprint_unicode (line, i)
 142     struct java_line *line;
 143     int i;
 144 {
 145   static char buffer [10];
 146   if (line->unicode_escape_p [i] || line->line [i] > 128)
 147     sprintf (buffer, "\\u%04x", line->line [i]);
 148   else
 149     {
 150       buffer [0] = line->line [i];
 151       buffer [1] = '\0';
 152     }
 153   return buffer;
 154 }
 155
 156 static unicode_t
 157 java_sneak_unicode ()
 158 {
 159   return (ctxp->c_line->line [ctxp->c_line->current]);
 160 }
 161
 162 static void
 163 java_unget_unicode ()
 164 {
 165   if (!ctxp->c_line->current)
 166     /* Can't unget unicode.  */
 167     abort ();
 168
 169   ctxp->c_line->current--;
 170   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 171 }
 172
 173 static void
 174 java_allocate_new_line ()
 175 {
 176   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 177   char ahead_escape_p = (ctxp->c_line ?
 178                          ctxp->c_line->unicode_escape_ahead_p : 0);
 179
 180   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 181     {
 182       if (ctxp->p_line)
 183         {
 184           free (ctxp->p_line->unicode_escape_p);
 185           free (ctxp->p_line->line);
 186           free (ctxp->p_line);
 187         }
 188       ctxp->p_line = ctxp->c_line;
 189       ctxp->c_line = NULL;              /* Reallocated */
 190     }
 191
 192   if (!ctxp->c_line)
 193     {
 194       ctxp->c_line = (struct java_line *)xmalloc (sizeof (struct java_line));
 195       ctxp->c_line->max = JAVA_LINE_MAX;
 196       ctxp->c_line->line = (unicode_t *)xmalloc
 197         (sizeof (unicode_t)*ctxp->c_line->max);
 198       ctxp->c_line->unicode_escape_p =
 199           (char *)xmalloc (sizeof (char)*ctxp->c_line->max);
 200       ctxp->c_line->white_space_only = 0;
 201     }
 202
 203   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 204   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 205   if (ahead)
 206     {
 207       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 208       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 209       ctxp->c_line->size++;
 210     }
 211   ctxp->c_line->ahead [0] = 0;
 212   ctxp->c_line->unicode_escape_ahead_p = 0;
 213   ctxp->c_line->lineno = ++lineno;
 214   ctxp->c_line->white_space_only = 1;
 215 }
 216
 217 /* Create a new lexer object.  */
 218
 219 java_lexer *
 220 java_new_lexer (finput, encoding)
 221      FILE *finput;
 222      const char *encoding;
 223 {
 224   java_lexer *lex = (java_lexer *) xmalloc (sizeof (java_lexer));
 225   int enc_error = 0;
 226
 227   lex->finput = finput;
 228   lex->bs_count = 0;
 229   lex->unget_value = 0;
 230   lex->hit_eof = 0;
 231
 232 #ifdef HAVE_ICONV
 233   lex->handle = iconv_open ("UCS-2", encoding);
 234   if (lex->handle != (iconv_t) -1)
 235     {
 236       lex->first = -1;
 237       lex->last = -1;
 238       lex->out_first = -1;
 239       lex->out_last = -1;
 240       lex->read_anything = 0;
 241       lex->use_fallback = 0;
 242
 243       /* Work around broken iconv() implementations by doing checking at
 244          runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
 245          then all UCS-2 encoders will be broken.  Perhaps not a valid
 246          assumption.  */
 247       if (! byteswap_init)
 248         {
 249           iconv_t handle;
 250
 251           byteswap_init = 1;
 252
 253           handle = iconv_open ("UCS-2", "UTF-8");
 254           if (handle != (iconv_t) -1)
 255             {
 256               unicode_t result;
 257               unsigned char in[3];
 258               char *inp, *outp;
 259               size_t inc, outc, r;
 260
 261               /* This is the UTF-8 encoding of \ufeff.  */
 262               in[0] = 0xef;
 263               in[1] = 0xbb;
 264               in[2] = 0xbf;
 265
 266               inp = in;
 267               inc = 3;
 268               outp = (char *) &result;
 269               outc = 2;
 270
 271               r = iconv (handle, (const char **) &inp, &inc, &outp, &outc);
 272               iconv_close (handle);
 273               /* Conversion must be complete for us to use the result.  */
 274               if (r != (size_t) -1 && inc == 0 && outc == 0)
 275                 need_byteswap = (result != 0xfeff);
 276             }
 277         }
 278
 279       lex->byte_swap = need_byteswap;
 280     }
 281   else
 282 #endif /* HAVE_ICONV */
 283     {
 284       /* If iconv failed, use the internal decoder if the default
 285          encoding was requested.  This code is used on platforms where
 286          iconv exists but is insufficient for our needs.  For
 287          instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.  */
 288       if (strcmp (encoding, DEFAULT_ENCODING))
 289         enc_error = 1;
 290 #ifdef HAVE_ICONV
 291       else
 292         lex->use_fallback = 1;
 293 #endif /* HAVE_ICONV */
 294     }
 295
 296   if (enc_error)
 297     fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option.", encoding);
 298
 299   return lex;
 300 }
 301
 302 void
 303 java_destroy_lexer (lex)
 304      java_lexer *lex;
 305 {
 306 #ifdef HAVE_ICONV
 307   if (! lex->use_fallback)
 308     iconv_close (lex->handle);
 309 #endif
 310   free (lex);
 311 }
 312
 313 static int
 314 java_read_char (lex)
 315      java_lexer *lex;
 316 {
 317   if (lex->unget_value)
 318     {
 319       unicode_t r = lex->unget_value;
 320       lex->unget_value = 0;
 321       return r;
 322     }
 323
 324 #ifdef HAVE_ICONV
 325   if (! lex->use_fallback)
 326     {
 327       size_t ir, inbytesleft, in_save, out_count, out_save;
 328       char *inp, *outp;
 329       unicode_t result;
 330
 331       /* If there is data which has already been converted, use it.  */
 332       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 333         {
 334           lex->out_first = 0;
 335           lex->out_last = 0;
 336
 337           while (1)
 338             {
 339               /* See if we need to read more data.  If FIRST == 0 then
 340                  the previous conversion attempt ended in the middle of
 341                  a character at the end of the buffer.  Otherwise we
 342                  only have to read if the buffer is empty.  */
 343               if (lex->first == 0 || lex->first >= lex->last)
 344                 {
 345                   int r;
 346
 347                   if (lex->first >= lex->last)
 348                     {
 349                       lex->first = 0;
 350                       lex->last = 0;
 351                     }
 352                   if (feof (lex->finput))
 353                     return UEOF;
 354                   r = fread (&lex->buffer[lex->last], 1,
 355                              sizeof (lex->buffer) - lex->last,
 356                              lex->finput);
 357                   lex->last += r;
 358                 }
 359
 360               inbytesleft = lex->last - lex->first;
 361               out_count = sizeof (lex->out_buffer) - lex->out_last;
 362
 363               if (inbytesleft == 0)
 364                 {
 365                   /* We've tried to read and there is nothing left.  */
 366                   return UEOF;
 367                 }
 368
 369               in_save = inbytesleft;
 370               out_save = out_count;
 371               inp = &lex->buffer[lex->first];
 372               outp = &lex->out_buffer[lex->out_last];
 373               ir = iconv (lex->handle, (const char **) &inp, &inbytesleft,
 374                           &outp, &out_count);
 375
 376               /* If we haven't read any bytes, then look to see if we
 377                  have read a BOM.  */
 378               if (! lex->read_anything && out_save - out_count >= 2)
 379                 {
 380                   unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
 381                   if (uc == 0xfeff)
 382                     {
 383                       lex->byte_swap = 0;
 384                       lex->out_first += 2;
 385                     }
 386                   else if (uc == 0xfffe)
 387                     {
 388                       lex->byte_swap = 1;
 389                       lex->out_first += 2;
 390                     }
 391                   lex->read_anything = 1;
 392                 }
 393
 394               if (lex->byte_swap)
 395                 {
 396                   unsigned int i;
 397                   for (i = 0; i < out_save - out_count; i += 2)
 398                     {
 399                       char t = lex->out_buffer[lex->out_last + i];
 400                       lex->out_buffer[lex->out_last + i]
 401                         = lex->out_buffer[lex->out_last + i + 1];
 402                       lex->out_buffer[lex->out_last + i + 1] = t;
 403                     }
 404                 }
 405
 406               lex->first += in_save - inbytesleft;
 407               lex->out_last += out_save - out_count;
 408
 409               /* If we converted anything at all, move along.  */
 410               if (out_count != out_save)
 411                 break;
 412
 413               if (ir == (size_t) -1)
 414                 {
 415                   if (errno == EINVAL)
 416                     {
 417                       /* This is ok.  This means that the end of our buffer
 418                          is in the middle of a character sequence.  We just
 419                          move the valid part of the buffer to the beginning
 420                          to force a read.  */
 421                       memmove (&lex->buffer[0], &lex->buffer[lex->first],
 422                                lex->last - lex->first);
 423                       lex->last -= lex->first;
 424                       lex->first = 0;
 425                     }
 426                   else
 427                     {
 428                       /* A more serious error.  */
 429                       java_lex_error ("unrecognized character in input stream",
 430                                       0);
 431                       return UEOF;
 432                     }
 433                 }
 434             }
 435         }
 436
 437       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 438         {
 439           /* Don't have any data.  */
 440           return UEOF;
 441         }
 442
 443       /* Success.  */
 444       result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
 445       lex->out_first += 2;
 446       return result;
 447     }
 448   else
 449 #endif /* HAVE_ICONV */
 450     {
 451       int c, c1, c2;
 452       c = getc (lex->finput);
 453
 454       if (c == EOF)
 455         return UEOF;
 456       if (c < 128)
 457         return (unicode_t)c;
 458       else
 459         {
 460           if ((c & 0xe0) == 0xc0)
 461             {
 462               c1 = getc (lex->finput);
 463               if ((c1 & 0xc0) == 0x80)
 464                 return (unicode_t)(((c &0x1f) << 6) + (c1 & 0x3f));
 465               c = c1;
 466             }
 467           else if ((c & 0xf0) == 0xe0)
 468             {
 469               c1 = getc (lex->finput);
 470               if ((c1 & 0xc0) == 0x80)
 471                 {
 472                   c2 = getc (lex->finput);
 473                   if ((c2 & 0xc0) == 0x80)
 474                     return (unicode_t)(((c & 0xf) << 12) +
 475                                        (( c1 & 0x3f) << 6) + (c2 & 0x3f));
 476                   else
 477                     c = c2;
 478                 }
 479               else
 480                 c = c1;
 481             }
 482
 483           /* We simply don't support invalid characters.  */
 484           java_lex_error ("malformed UTF-8 character", 0);
 485         }
 486     }
 487
 488   /* We only get here on error.  */
 489   return UEOF;
 490 }
 491
 492 static void
 493 java_store_unicode (l, c, unicode_escape_p)
 494     struct java_line *l;
 495     unicode_t c;
 496     int unicode_escape_p;
 497 {
 498   if (l->size == l->max)
 499     {
 500       l->max += JAVA_LINE_MAX;
 501       l->line = (unicode_t *) xrealloc (l->line, sizeof (unicode_t)*l->max);
 502       l->unicode_escape_p = (char *) xrealloc (l->unicode_escape_p,
 503                                                sizeof (char)*l->max);
 504     }
 505   l->line [l->size] = c;
 506   l->unicode_escape_p [l->size++] = unicode_escape_p;
 507 }
 508
 509 static int
 510 java_read_unicode (lex, unicode_escape_p)
 511      java_lexer *lex;
 512      int *unicode_escape_p;
 513 {
 514   int c;
 515
 516   c = java_read_char (lex);
 517   *unicode_escape_p = 0;
 518
 519   if (c != '\\')
 520     {
 521       lex->bs_count = 0;
 522       return c;
 523     }
 524
 525   ++lex->bs_count;
 526   if ((lex->bs_count) % 2 == 1)
 527     {
 528       /* Odd number of \ seen.  */
 529       c = java_read_char (lex);
 530       if (c == 'u')
 531         {
 532           unicode_t unicode = 0;
 533           int shift = 12;
 534
 535           /* Recognize any number of `u's in \u.  */
 536           while ((c = java_read_char (lex)) == 'u')
 537             ;
 538
 539           /* Unget the most recent character as it is not a `u'.  */
 540           if (c == UEOF)
 541             return UEOF;
 542           lex->unget_value = c;
 543
 544           /* Next should be 4 hex digits, otherwise it's an error.
 545              The hex value is converted into the unicode, pushed into
 546              the Unicode stream.  */
 547           for (shift = 12; shift >= 0; shift -= 4)
 548             {
 549               if ((c = java_read_char (lex)) == UEOF)
 550                 return UEOF;
 551               if (c >= '0' && c <= '9')
 552                 unicode |= (unicode_t)((c-'0') << shift);
 553               else if ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
 554                 unicode |= (unicode_t)((10+(c | 0x20)-'a') << shift);
 555               else
 556                 java_lex_error ("Non hex digit in Unicode escape sequence", 0);
 557             }
 558           lex->bs_count = 0;
 559           *unicode_escape_p = 1;
 560           return unicode;
 561         }
 562       lex->unget_value = c;
 563     }
 564   return (unicode_t) '\\';
 565 }
 566
 567 static int
 568 java_read_unicode_collapsing_terminators (lex, unicode_escape_p)
 569      java_lexer *lex;
 570      int *unicode_escape_p;
 571 {
 572   int c = java_read_unicode (lex, unicode_escape_p);
 573
 574   if (c == '\r')
 575     {
 576       /* We have to read ahead to see if we got \r\n.  In that case we
 577          return a single line terminator.  */
 578       int dummy;
 579       c = java_read_unicode (lex, &dummy);
 580       if (c != '\n')
 581         lex->unget_value = c;
 582       /* In either case we must return a newline.  */
 583       c = '\n';
 584     }
 585
 586   return c;
 587 }
 588
 589 static int
 590 java_get_unicode ()
 591 {
 592   /* It's time to read a line when... */
 593   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 594     {
 595       int c;
 596       int found_chars = 0;
 597
 598       if (ctxp->lexer->hit_eof)
 599         return UEOF;
 600
 601       java_allocate_new_line ();
 602       if (ctxp->c_line->line[0] != '\n')
 603         {
 604           for (;;)
 605             {
 606               int unicode_escape_p;
 607               c = java_read_unicode_collapsing_terminators (ctxp->lexer,
 608                                                             &unicode_escape_p);
 609               if (c != UEOF)
 610                 {
 611                   found_chars = 1;
 612                   java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 613                   if (ctxp->c_line->white_space_only
 614                       && !JAVA_WHITE_SPACE_P (c)
 615                       && c != '\n')
 616                     ctxp->c_line->white_space_only = 0;
 617                 }
 618               if ((c == '\n') || (c == UEOF))
 619                 break;
 620             }
 621
 622           if (c == UEOF && ! found_chars)
 623             {
 624               ctxp->lexer->hit_eof = 1;
 625               return UEOF;
 626             }
 627         }
 628     }
 629   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 630   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 631   return ctxp->c_line->line [ctxp->c_line->current++];
 632 }
 633
 634 /* Parse the end of a C style comment.
 635  * C is the first character following the '/' and '*'. */
 636 static void
 637 java_parse_end_comment (c)
 638      int c;
 639 {
 640   for ( ;; c = java_get_unicode ())
 641     {
 642       switch (c)
 643         {
 644         case UEOF:
 645           java_lex_error ("Comment not terminated at end of input", 0);
 646           return;
 647         case '*':
 648           switch (c = java_get_unicode ())
 649             {
 650             case UEOF:
 651               java_lex_error ("Comment not terminated at end of input", 0);
 652               return;
 653             case '/':
 654               return;
 655             case '*':   /* reparse only '*' */
 656               java_unget_unicode ();
 657             }
 658         }
 659     }
 660 }
 661
 662 /* Parse the documentation section. Keywords must be at the beginning
 663    of a documentation comment line (ignoring white space and any `*'
 664    character). Parsed keyword(s): @DEPRECATED.  */
 665
 666 static int
 667 java_parse_doc_section (c)
 668      int c;
 669 {
 670   int valid_tag = 0, seen_star = 0;
 671
 672   while (JAVA_WHITE_SPACE_P (c) || (c == '*') || c == '\n')
 673     {
 674       switch (c)
 675         {
 676         case '*':
 677           seen_star = 1;
 678           break;
 679         case '\n': /* ULT */
 680           valid_tag = 1;
 681         default:
 682           seen_star = 0;
 683         }
 684       c = java_get_unicode();
 685     }
 686
 687   if (c == UEOF)
 688     java_lex_error ("Comment not terminated at end of input", 0);
 689
 690   if (seen_star && (c == '/'))
 691     return 1;                   /* Goto step1 in caller */
 692
 693   /* We're parsing @deprecated */
 694   if (valid_tag && (c == '@'))
 695     {
 696       char tag [11];
 697       int  tag_index = 0;
 698
 699       while (tag_index < 10 && c != UEOF && c != ' ' && c != '\n')
 700         {
 701           c = java_get_unicode ();
 702           tag [tag_index++] = c;
 703         }
 704
 705       if (c == UEOF)
 706         java_lex_error ("Comment not terminated at end of input", 0);
 707       tag [tag_index] = '\0';
 708
 709       if (!strcmp (tag, "deprecated"))
 710         ctxp->deprecated = 1;
 711     }
 712   java_unget_unicode ();
 713   return 0;
 714 }
 715
 716 /* Return true if C is a valid start character for a Java identifier.
 717    This is only called if C >= 128 -- smaller values are handled
 718    inline.  However, this function handles all values anyway.  */
 719 static int
 720 java_start_char_p (c)
 721      unicode_t c;
 722 {
 723   unsigned int hi = c / 256;
 724   char *page = type_table[hi];
 725   unsigned long val = (unsigned long) page;
 726   int flags;
 727
 728   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
 729     flags = page[c & 255];
 730   else
 731     flags = val;
 732
 733   return flags & LETTER_START;
 734 }
 735
 736 /* Return true if C is a valid part character for a Java identifier.
 737    This is only called if C >= 128 -- smaller values are handled
 738    inline.  However, this function handles all values anyway.  */
 739 static int
 740 java_part_char_p (c)
 741      unicode_t c;
 742 {
 743   unsigned int hi = c / 256;
 744   char *page = type_table[hi];
 745   unsigned long val = (unsigned long) page;
 746   int flags;
 747
 748   if ((val & ~ (LETTER_PART | LETTER_START)) != 0)
 749     flags = page[c & 255];
 750   else
 751     flags = val;
 752
 753   return flags & LETTER_PART;
 754 }
 755
 756 static int
 757 java_parse_escape_sequence ()
 758 {
 759   unicode_t char_lit;
 760   int c;
 761
 762   switch (c = java_get_unicode ())
 763     {
 764     case 'b':
 765       return (unicode_t)0x8;
 766     case 't':
 767       return (unicode_t)0x9;
 768     case 'n':
 769       return (unicode_t)0xa;
 770     case 'f':
 771       return (unicode_t)0xc;
 772     case 'r':
 773       return (unicode_t)0xd;
 774     case '"':
 775       return (unicode_t)0x22;
 776     case '\'':
 777       return (unicode_t)0x27;
 778     case '\\':
 779       return (unicode_t)0x5c;
 780     case '0': case '1': case '2': case '3': case '4':
 781     case '5': case '6': case '7':
 782       {
 783         int octal_escape[3];
 784         int octal_escape_index = 0;
 785         int max = 3;
 786         int i, shift;
 787
 788         for (; octal_escape_index < max && RANGE (c, '0', '7');
 789              c = java_get_unicode ())
 790           {
 791             if (octal_escape_index == 0 && c > '3')
 792               {
 793                 /* According to the grammar, `\477' has a well-defined
 794                    meaning -- it is `\47' followed by `7'.  */
 795                 --max;
 796               }
 797             octal_escape [octal_escape_index++] = c;
 798           }
 799
 800         java_unget_unicode ();
 801
 802         for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 803              i < octal_escape_index; i++, shift -= 3)
 804           char_lit |= (octal_escape [i] - '0') << shift;
 805
 806         return char_lit;
 807       }
 808     default:
 809       java_lex_error ("Invalid character in escape sequence", 0);
 810       return JAVA_CHAR_ERROR;
 811     }
 812 }
 813
 814 /* Isolate the code which may raise an arithmetic exception in its
 815    own function.  */
 816
 817 #ifndef JC1_LITE
 818 struct jpa_args
 819 {
 820   YYSTYPE *java_lval;
 821   char *literal_token;
 822   int fflag;
 823   int number_beginning;
 824 };
 825
 826 #ifdef REAL_ARITHMETIC
 827 #define IS_ZERO(X) (ereal_cmp (X, dconst0) == 0)
 828 #else
 829 #define IS_ZERO(X) ((X) == 0)
 830 #endif
 831
 832 static void java_perform_atof   PARAMS ((PTR));
 833
 834 static void
 835 java_perform_atof (av)
 836      PTR av;
 837 {
 838   struct jpa_args *a = (struct jpa_args *)av;
 839   YYSTYPE *java_lval = a->java_lval;
 840   int number_beginning = a->number_beginning;
 841   REAL_VALUE_TYPE value;
 842   tree type = (a->fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 843
 844   SET_REAL_VALUE_ATOF (value,
 845                        REAL_VALUE_ATOF (a->literal_token, TYPE_MODE (type)));
 846
 847   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
 848     {
 849       JAVA_FLOAT_RANGE_ERROR ((a->fflag ? "float" : "double"));
 850       value = DCONST0;
 851     }
 852   else if (IS_ZERO (value))
 853     {
 854       /* We check to see if the value is really 0 or if we've found an
 855          underflow.  We do this in the most primitive imaginable way.  */
 856       int really_zero = 1;
 857       char *p = a->literal_token;
 858       if (*p == '-')
 859         ++p;
 860       while (*p && *p != 'e' && *p != 'E')
 861         {
 862           if (*p != '0' && *p != '.')
 863             {
 864               really_zero = 0;
 865               break;
 866             }
 867           ++p;
 868         }
 869       if (! really_zero)
 870         {
 871           int i = ctxp->c_line->current;
 872           ctxp->c_line->current = number_beginning;
 873           java_lex_error ("Floating point literal underflow", 0);
 874           ctxp->c_line->current = i;
 875         }
 876     }
 877
 878   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 879 }
 880 #endif
 881
 882 static int yylex                PARAMS ((YYSTYPE *));
 883
 884 static int
 885 #ifdef JC1_LITE
 886 yylex (java_lval)
 887 #else
 888 java_lex (java_lval)
 889 #endif
 890      YYSTYPE *java_lval;
 891 {
 892   int c;
 893   unicode_t first_unicode;
 894   int ascii_index, all_ascii;
 895   char *string;
 896
 897   /* Translation of the Unicode escape in the raw stream of Unicode
 898      characters. Takes care of line terminator.  */
 899  step1:
 900   /* Skip white spaces: SP, TAB and FF or ULT */
 901   for (c = java_get_unicode ();
 902        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 903     if (c == '\n')
 904       {
 905         ctxp->elc.line = ctxp->c_line->lineno;
 906         ctxp->elc.col  = ctxp->c_line->char_col-2;
 907       }
 908
 909   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 910
 911   if (c == 0x1a)                /* CTRL-Z */
 912     {
 913       if ((c = java_get_unicode ()) == UEOF)
 914         return 0;               /* Ok here */
 915       else
 916         java_unget_unicode ();  /* Caught later, at the end of the function */
 917     }
 918   /* Handle EOF here */
 919   if (c == UEOF)        /* Should probably do something here... */
 920     return 0;
 921
 922   /* Take care of eventual comments.  */
 923   if (c == '/')
 924     {
 925       switch (c = java_get_unicode ())
 926         {
 927         case '/':
 928           for (;;)
 929             {
 930               c = java_get_unicode ();
 931               if (c == UEOF)
 932                 {
 933                   /* It is ok to end a `//' comment with EOF, unless
 934                      we're being pedantic.  */
 935                   if (pedantic)
 936                     java_lex_error ("Comment not terminated at end of input",
 937                                     0);
 938                   return 0;
 939                 }
 940               if (c == '\n')    /* ULT */
 941                 goto step1;
 942             }
 943           break;
 944
 945         case '*':
 946           if ((c = java_get_unicode ()) == '*')
 947             {
 948               if ((c = java_get_unicode ()) == '/')
 949                 goto step1;     /* Empy documentation comment  */
 950               else if (java_parse_doc_section (c))
 951                 goto step1;
 952             }
 953
 954           java_parse_end_comment ((c = java_get_unicode ()));
 955           goto step1;
 956           break;
 957         default:
 958           java_unget_unicode ();
 959           c = '/';
 960           break;
 961         }
 962     }
 963
 964   ctxp->elc.line = ctxp->c_line->lineno;
 965   ctxp->elc.prev_col = ctxp->elc.col;
 966   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
 967   if (ctxp->elc.col < 0)
 968     abort ();
 969
 970   /* Numeric literals */
 971   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
 972     {
 973       /* This section of code is borrowed from gcc/c-lex.c  */
 974 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
 975       int parts[TOTAL_PARTS];
 976       HOST_WIDE_INT high, low;
 977       /* End borrowed section  */
 978       char literal_token [256];
 979       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
 980       int  i;
 981 #ifndef JC1_LITE
 982       int  number_beginning = ctxp->c_line->current;
 983 #endif
 984
 985       /* We might have a . separator instead of a FP like .[0-9]* */
 986       if (c == '.')
 987         {
 988           unicode_t peep = java_sneak_unicode ();
 989
 990           if (!JAVA_ASCII_DIGIT (peep))
 991             {
 992               JAVA_LEX_SEP('.');
 993               BUILD_OPERATOR (DOT_TK);
 994             }
 995         }
 996
 997       for (i = 0; i < TOTAL_PARTS; i++)
 998         parts [i] = 0;
 999
1000       if (c == '0')
1001         {
1002           c = java_get_unicode ();
1003           if (c == 'x' || c == 'X')
1004             {
1005               radix = 16;
1006               c = java_get_unicode ();
1007             }
1008           else if (JAVA_ASCII_DIGIT (c))
1009             radix = 8;
1010           else if (c == '.')
1011             {
1012               /* Push the '.' back and prepare for a FP parsing... */
1013               java_unget_unicode ();
1014               c = '0';
1015             }
1016           else
1017             {
1018               /* We have a zero literal: 0, 0{f,F}, 0{d,D} */
1019               JAVA_LEX_LIT ("0", 10);
1020               switch (c)
1021                 {
1022                 case 'L': case 'l':
1023                   SET_LVAL_NODE (long_zero_node);
1024                   return (INT_LIT_TK);
1025                 case 'f': case 'F':
1026                   SET_LVAL_NODE (float_zero_node);
1027                   return (FP_LIT_TK);
1028                 case 'd': case 'D':
1029                   SET_LVAL_NODE (double_zero_node);
1030                   return (FP_LIT_TK);
1031                 default:
1032                   java_unget_unicode ();
1033                   SET_LVAL_NODE (integer_zero_node);
1034                   return (INT_LIT_TK);
1035                 }
1036             }
1037         }
1038       /* Parse the first part of the literal, until we find something
1039          which is not a number.  */
1040       while ((radix == 10 && JAVA_ASCII_DIGIT (c)) ||
1041              (radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1042              (radix == 8  && JAVA_ASCII_OCTDIGIT (c)))
1043         {
1044           /* We store in a string (in case it turns out to be a FP) and in
1045              PARTS if we have to process a integer literal.  */
1046           int numeric = (RANGE (c, '0', '9') ? c-'0' : 10 +(c|0x20)-'a');
1047           int count;
1048
1049           literal_token [literal_index++] = c;
1050           /* This section of code if borrowed from gcc/c-lex.c  */
1051           for (count = 0; count < TOTAL_PARTS; count++)
1052             {
1053               parts[count] *= radix;
1054               if (count)
1055                 {
1056                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
1057                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1058                 }
1059               else
1060                 parts[0] += numeric;
1061             }
1062           if (parts [TOTAL_PARTS-1] != 0)
1063             overflow = 1;
1064           /* End borrowed section.  */
1065           c = java_get_unicode ();
1066         }
1067
1068       /* If we have something from the FP char set but not a digit, parse
1069          a FP literal.  */
1070       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1071         {
1072           int stage = 0;
1073           int seen_digit = (literal_index ? 1 : 0);
1074           int seen_exponent = 0;
1075           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
1076                                    double unless specified. */
1077
1078           /* It is ok if the radix is 8 because this just means we've
1079              seen a leading `0'.  However, radix==16 is invalid.  */
1080           if (radix == 16)
1081             java_lex_error ("Can't express non-decimal FP literal", 0);
1082           radix = 10;
1083
1084           for (;;)
1085             {
1086               if (c == '.')
1087                 {
1088                   if (stage < 1)
1089                     {
1090                       stage = 1;
1091                       literal_token [literal_index++ ] = c;
1092                       c = java_get_unicode ();
1093                     }
1094                   else
1095                     java_lex_error ("Invalid character in FP literal", 0);
1096                 }
1097
1098               if (c == 'e' || c == 'E')
1099                 {
1100                   if (stage < 2)
1101                     {
1102                       /* {E,e} must have seen at list a digit */
1103                       if (!seen_digit)
1104                         java_lex_error ("Invalid FP literal", 0);
1105                       seen_digit = 0;
1106                       seen_exponent = 1;
1107                       stage = 2;
1108                       literal_token [literal_index++] = c;
1109                       c = java_get_unicode ();
1110                     }
1111                   else
1112                     java_lex_error ("Invalid character in FP literal", 0);
1113                 }
1114               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1115                 {
1116                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1117                   stage = 4;    /* So we fall through */
1118                 }
1119
1120               if ((c=='-' || c =='+') && stage == 2)
1121                 {
1122                   stage = 3;
1123                   literal_token [literal_index++] = c;
1124                   c = java_get_unicode ();
1125                 }
1126
1127               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1128                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1129                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1130                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
1131                 {
1132                   if (JAVA_ASCII_DIGIT (c))
1133                     seen_digit = 1;
1134                   literal_token [literal_index++ ] = c;
1135                   c = java_get_unicode ();
1136                 }
1137               else
1138                 {
1139 #ifndef JC1_LITE
1140                   struct jpa_args a;
1141 #endif
1142                   if (stage != 4) /* Don't push back fF/dD */
1143                     java_unget_unicode ();
1144
1145                   /* An exponent (if any) must have seen a digit.  */
1146                   if (seen_exponent && !seen_digit)
1147                     java_lex_error ("Invalid FP literal", 0);
1148
1149                   literal_token [literal_index] = '\0';
1150                   JAVA_LEX_LIT (literal_token, radix);
1151
1152 #ifndef JC1_LITE
1153                   a.literal_token = literal_token;
1154                   a.fflag = fflag;
1155                   a.java_lval = java_lval;
1156                   a.number_beginning = number_beginning;
1157                   if (do_float_handler (java_perform_atof, (PTR) &a))
1158                     return FP_LIT_TK;
1159
1160                   JAVA_FLOAT_RANGE_ERROR ((fflag ? "float" : "double"));
1161 #else
1162                   return FP_LIT_TK;
1163 #endif
1164                 }
1165             }
1166         } /* JAVA_ASCCI_FPCHAR (c) */
1167
1168       /* Here we get back to converting the integral literal.  */
1169       if (c == 'L' || c == 'l')
1170         long_suffix = 1;
1171       else if (radix == 16 && JAVA_ASCII_LETTER (c))
1172         java_lex_error ("Digit out of range in hexadecimal literal", 0);
1173       else if (radix == 8  && JAVA_ASCII_DIGIT (c))
1174         java_lex_error ("Digit out of range in octal literal", 0);
1175       else if (radix == 16 && !literal_index)
1176         java_lex_error ("No digit specified for hexadecimal literal", 0);
1177       else
1178         java_unget_unicode ();
1179
1180 #ifdef JAVA_LEX_DEBUG
1181       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe. */
1182       JAVA_LEX_LIT (literal_token, radix);
1183 #endif
1184       /* This section of code is borrowed from gcc/c-lex.c  */
1185       if (!overflow)
1186         {
1187           bytes = GET_TYPE_PRECISION (long_type_node);
1188           for (i = bytes; i < TOTAL_PARTS; i++)
1189             if (parts [i])
1190               {
1191                 overflow = 1;
1192                 break;
1193               }
1194         }
1195       high = low = 0;
1196       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1197         {
1198           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1199                                               / HOST_BITS_PER_CHAR)]
1200                    << (i * HOST_BITS_PER_CHAR));
1201           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1202         }
1203       /* End borrowed section.  */
1204
1205       /* Range checking */
1206       if (long_suffix)
1207         {
1208           /* 9223372036854775808L is valid if operand of a '-'. Otherwise
1209              9223372036854775807L is the biggest `long' literal that can be
1210              expressed using a 10 radix. For other radixes, everything that
1211              fits withing 64 bits is OK. */
1212           int hb = (high >> 31);
1213           if (overflow || (hb && low && radix == 10) ||
1214               (hb && high & 0x7fffffff && radix == 10) ||
1215               (hb && !(high & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1216             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1217         }
1218       else
1219         {
1220           /* 2147483648 is valid if operand of a '-'. Otherwise,
1221              2147483647 is the biggest `int' literal that can be
1222              expressed using a 10 radix. For other radixes, everything
1223              that fits within 32 bits is OK.  As all literals are
1224              signed, we sign extend here. */
1225           int hb = (low >> 31) & 0x1;
1226           if (overflow || high || (hb && low & 0x7fffffff && radix == 10) ||
1227               (hb && !(low & 0x7fffffff) && !ctxp->minus_seen && radix == 10))
1228             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1229           high = -hb;
1230         }
1231       ctxp->minus_seen = 0;
1232       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1233                           (long_suffix ? long_type_node : int_type_node));
1234       return INT_LIT_TK;
1235     }
1236
1237   ctxp->minus_seen = 0;
1238
1239   /* Character literals */
1240   if (c == '\'')
1241     {
1242       int char_lit;
1243       if ((c = java_get_unicode ()) == '\\')
1244         char_lit = java_parse_escape_sequence ();
1245       else
1246         {
1247           if (c == '\n' || c == '\'')
1248             java_lex_error ("Invalid character literal", 0);
1249           char_lit = c;
1250         }
1251
1252       c = java_get_unicode ();
1253
1254       if ((c == '\n') || (c == UEOF))
1255         java_lex_error ("Character literal not terminated at end of line", 0);
1256       if (c != '\'')
1257         java_lex_error ("Syntax error in character literal", 0);
1258
1259       if (char_lit == JAVA_CHAR_ERROR)
1260         char_lit = 0;           /* We silently convert it to zero */
1261
1262       JAVA_LEX_CHAR_LIT (char_lit);
1263       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1264       return CHAR_LIT_TK;
1265     }
1266
1267   /* String literals */
1268   if (c == '"')
1269     {
1270       int no_error;
1271       char *string;
1272
1273       for (no_error = 1, c = java_get_unicode ();
1274            c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1275         {
1276           if (c == '\\')
1277             c = java_parse_escape_sequence ();
1278           if (c == JAVA_CHAR_ERROR)
1279             {
1280               no_error = 0;
1281               c = 0;            /* We silently convert it to zero.  */
1282             }
1283           java_unicode_2_utf8 (c);
1284         }
1285       if (c == '\n' || c == UEOF) /* ULT */
1286         {
1287           lineno--;             /* Refer to the line the terminator was seen */
1288           java_lex_error ("String not terminated at end of line.", 0);
1289           lineno++;
1290         }
1291
1292       obstack_1grow (&temporary_obstack, '\0');
1293       string = obstack_finish (&temporary_obstack);
1294 #ifndef JC1_LITE
1295       if (!no_error || (c != '"'))
1296         java_lval->node = error_mark_node; /* Requires futher testing FIXME */
1297       else
1298         java_lval->node = build_string (strlen (string), string);
1299 #endif
1300       obstack_free (&temporary_obstack, string);
1301       return STRING_LIT_TK;
1302     }
1303
1304   /* Separator */
1305   switch (c)
1306     {
1307     case '(':
1308       JAVA_LEX_SEP (c);
1309       BUILD_OPERATOR (OP_TK);
1310     case ')':
1311       JAVA_LEX_SEP (c);
1312       return CP_TK;
1313     case '{':
1314       JAVA_LEX_SEP (c);
1315       if (ctxp->ccb_indent == 1)
1316         ctxp->first_ccb_indent1 = lineno;
1317       ctxp->ccb_indent++;
1318       BUILD_OPERATOR (OCB_TK);
1319     case '}':
1320       JAVA_LEX_SEP (c);
1321       ctxp->ccb_indent--;
1322       if (ctxp->ccb_indent == 1)
1323         ctxp->last_ccb_indent1 = lineno;
1324       BUILD_OPERATOR (CCB_TK);
1325     case '[':
1326       JAVA_LEX_SEP (c);
1327       BUILD_OPERATOR (OSB_TK);
1328     case ']':
1329       JAVA_LEX_SEP (c);
1330       return CSB_TK;
1331     case ';':
1332       JAVA_LEX_SEP (c);
1333       return SC_TK;
1334     case ',':
1335       JAVA_LEX_SEP (c);
1336       return C_TK;
1337     case '.':
1338       JAVA_LEX_SEP (c);
1339       BUILD_OPERATOR (DOT_TK);
1340       /*      return DOT_TK; */
1341     }
1342
1343   /* Operators */
1344   switch (c)
1345     {
1346     case '=':
1347       if ((c = java_get_unicode ()) == '=')
1348         {
1349           BUILD_OPERATOR (EQ_TK);
1350         }
1351       else
1352         {
1353           /* Equals is used in two different locations. In the
1354              variable_declarator: rule, it has to be seen as '=' as opposed
1355              to being seen as an ordinary assignment operator in
1356              assignment_operators: rule.  */
1357           java_unget_unicode ();
1358           BUILD_OPERATOR (ASSIGN_TK);
1359         }
1360
1361     case '>':
1362       switch ((c = java_get_unicode ()))
1363         {
1364         case '=':
1365           BUILD_OPERATOR (GTE_TK);
1366         case '>':
1367           switch ((c = java_get_unicode ()))
1368             {
1369             case '>':
1370               if ((c = java_get_unicode ()) == '=')
1371                 {
1372                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1373                 }
1374               else
1375                 {
1376                   java_unget_unicode ();
1377                   BUILD_OPERATOR (ZRS_TK);
1378                 }
1379             case '=':
1380               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1381             default:
1382               java_unget_unicode ();
1383               BUILD_OPERATOR (SRS_TK);
1384             }
1385         default:
1386           java_unget_unicode ();
1387           BUILD_OPERATOR (GT_TK);
1388         }
1389
1390     case '<':
1391       switch ((c = java_get_unicode ()))
1392         {
1393         case '=':
1394           BUILD_OPERATOR (LTE_TK);
1395         case '<':
1396           if ((c = java_get_unicode ()) == '=')
1397             {
1398               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1399             }
1400           else
1401             {
1402               java_unget_unicode ();
1403               BUILD_OPERATOR (LS_TK);
1404             }
1405         default:
1406           java_unget_unicode ();
1407           BUILD_OPERATOR (LT_TK);
1408         }
1409
1410     case '&':
1411       switch ((c = java_get_unicode ()))
1412         {
1413         case '&':
1414           BUILD_OPERATOR (BOOL_AND_TK);
1415         case '=':
1416           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1417         default:
1418           java_unget_unicode ();
1419           BUILD_OPERATOR (AND_TK);
1420         }
1421
1422     case '|':
1423       switch ((c = java_get_unicode ()))
1424         {
1425         case '|':
1426           BUILD_OPERATOR (BOOL_OR_TK);
1427         case '=':
1428           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1429         default:
1430           java_unget_unicode ();
1431           BUILD_OPERATOR (OR_TK);
1432         }
1433
1434     case '+':
1435       switch ((c = java_get_unicode ()))
1436         {
1437         case '+':
1438           BUILD_OPERATOR (INCR_TK);
1439         case '=':
1440           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1441         default:
1442           java_unget_unicode ();
1443           BUILD_OPERATOR (PLUS_TK);
1444         }
1445
1446     case '-':
1447       switch ((c = java_get_unicode ()))
1448         {
1449         case '-':
1450           BUILD_OPERATOR (DECR_TK);
1451         case '=':
1452           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1453         default:
1454           java_unget_unicode ();
1455           ctxp->minus_seen = 1;
1456           BUILD_OPERATOR (MINUS_TK);
1457         }
1458
1459     case '*':
1460       if ((c = java_get_unicode ()) == '=')
1461         {
1462           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1463         }
1464       else
1465         {
1466           java_unget_unicode ();
1467           BUILD_OPERATOR (MULT_TK);
1468         }
1469
1470     case '/':
1471       if ((c = java_get_unicode ()) == '=')
1472         {
1473           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1474         }
1475       else
1476         {
1477           java_unget_unicode ();
1478           BUILD_OPERATOR (DIV_TK);
1479         }
1480
1481     case '^':
1482       if ((c = java_get_unicode ()) == '=')
1483         {
1484           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1485         }
1486       else
1487         {
1488           java_unget_unicode ();
1489           BUILD_OPERATOR (XOR_TK);
1490         }
1491
1492     case '%':
1493       if ((c = java_get_unicode ()) == '=')
1494         {
1495           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1496         }
1497       else
1498         {
1499           java_unget_unicode ();
1500           BUILD_OPERATOR (REM_TK);
1501         }
1502
1503     case '!':
1504       if ((c = java_get_unicode()) == '=')
1505         {
1506           BUILD_OPERATOR (NEQ_TK);
1507         }
1508       else
1509         {
1510           java_unget_unicode ();
1511           BUILD_OPERATOR (NEG_TK);
1512         }
1513
1514     case '?':
1515       JAVA_LEX_OP ("?");
1516       BUILD_OPERATOR (REL_QM_TK);
1517     case ':':
1518       JAVA_LEX_OP (":");
1519       BUILD_OPERATOR (REL_CL_TK);
1520     case '~':
1521       BUILD_OPERATOR (NOT_TK);
1522     }
1523
1524   /* Keyword, boolean literal or null literal */
1525   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1526        JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1527     {
1528       java_unicode_2_utf8 (c);
1529       if (all_ascii && c >= 128)
1530         all_ascii = 0;
1531       ascii_index++;
1532     }
1533
1534   obstack_1grow (&temporary_obstack, '\0');
1535   string = obstack_finish (&temporary_obstack);
1536   java_unget_unicode ();
1537
1538   /* If we have something all ascii, we consider a keyword, a boolean
1539      literal, a null literal or an all ASCII identifier.  Otherwise,
1540      this is an identifier (possibly not respecting formation rule).  */
1541   if (all_ascii)
1542     {
1543       struct java_keyword *kw;
1544       if ((kw=java_keyword (string, ascii_index)))
1545         {
1546           JAVA_LEX_KW (string);
1547           switch (kw->token)
1548             {
1549             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1550             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1551             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1552             case PRIVATE_TK:      case STRICT_TK:
1553               SET_MODIFIER_CTX (kw->token);
1554               return MODIFIER_TK;
1555             case FLOAT_TK:
1556               SET_LVAL_NODE (float_type_node);
1557               return FP_TK;
1558             case DOUBLE_TK:
1559               SET_LVAL_NODE (double_type_node);
1560               return FP_TK;
1561             case BOOLEAN_TK:
1562               SET_LVAL_NODE (boolean_type_node);
1563               return BOOLEAN_TK;
1564             case BYTE_TK:
1565               SET_LVAL_NODE (byte_type_node);
1566               return INTEGRAL_TK;
1567             case SHORT_TK:
1568               SET_LVAL_NODE (short_type_node);
1569               return INTEGRAL_TK;
1570             case INT_TK:
1571               SET_LVAL_NODE (int_type_node);
1572               return INTEGRAL_TK;
1573             case LONG_TK:
1574               SET_LVAL_NODE (long_type_node);
1575               return INTEGRAL_TK;
1576             case CHAR_TK:
1577               SET_LVAL_NODE (char_type_node);
1578               return INTEGRAL_TK;
1579
1580               /* Keyword based literals */
1581             case TRUE_TK:
1582             case FALSE_TK:
1583               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1584                               boolean_true_node : boolean_false_node));
1585               return BOOL_LIT_TK;
1586             case NULL_TK:
1587               SET_LVAL_NODE (null_pointer_node);
1588               return NULL_TK;
1589
1590               /* Some keyword we want to retain information on the location
1591                  they where found */
1592             case CASE_TK:
1593             case DEFAULT_TK:
1594             case SUPER_TK:
1595             case THIS_TK:
1596             case RETURN_TK:
1597             case BREAK_TK:
1598             case CONTINUE_TK:
1599             case TRY_TK:
1600             case CATCH_TK:
1601             case THROW_TK:
1602             case INSTANCEOF_TK:
1603               BUILD_OPERATOR (kw->token);
1604
1605             default:
1606               return kw->token;
1607             }
1608         }
1609     }
1610
1611   /* We may have an ID here */
1612   if (JAVA_START_CHAR_P (first_unicode))
1613     {
1614       JAVA_LEX_ID (string);
1615       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1616       return ID_TK;
1617     }
1618
1619   /* Everything else is an invalid character in the input */
1620   {
1621     char lex_error_buffer [128];
1622     sprintf (lex_error_buffer, "Invalid character `%s' in input",
1623              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1624     java_lex_error (lex_error_buffer, 1);
1625   }
1626   return 0;
1627 }
1628
1629 static void
1630 java_unicode_2_utf8 (unicode)
1631     unicode_t unicode;
1632 {
1633   if (RANGE (unicode, 0x01, 0x7f))
1634     obstack_1grow (&temporary_obstack, (char)unicode);
1635   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1636     {
1637       obstack_1grow (&temporary_obstack,
1638                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1639       obstack_1grow (&temporary_obstack,
1640                      (unsigned char)(0x80 | (unicode & 0x3f)));
1641     }
1642   else                          /* Range 0x800-0xffff */
1643     {
1644       obstack_1grow (&temporary_obstack,
1645                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1646       obstack_1grow (&temporary_obstack,
1647                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1648       obstack_1grow (&temporary_obstack,
1649                      (unsigned char)(0x80 | (unicode & 0x003f)));
1650     }
1651 }
1652
1653 #ifndef JC1_LITE
1654 static tree
1655 build_wfl_node (node)
1656      tree node;
1657 {
1658   node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1659   /* Prevent java_complete_lhs from short-circuiting node (if constant). */
1660   TREE_TYPE (node) = NULL_TREE;
1661   return node;
1662 }
1663 #endif
1664
1665 static void
1666 java_lex_error (msg, forward)
1667      const char *msg ATTRIBUTE_UNUSED;
1668      int forward ATTRIBUTE_UNUSED;
1669 {
1670 #ifndef JC1_LITE
1671   ctxp->elc.line = ctxp->c_line->lineno;
1672   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1673
1674   /* Might be caught in the middle of some error report */
1675   ctxp->java_error_flag = 0;
1676   java_error (NULL);
1677   java_error (msg);
1678 #endif
1679 }
1680
1681 #ifndef JC1_LITE
1682 static int
1683 java_is_eol (fp, c)
1684   FILE *fp;
1685   int c;
1686 {
1687   int next;
1688   switch (c)
1689     {
1690     case '\r':
1691       next = getc (fp);
1692       if (next != '\n' && next != EOF)
1693         ungetc (next, fp);
1694       return 1;
1695     case '\n':
1696       return 1;
1697     default:
1698       return 0;
1699     }
1700 }
1701 #endif
1702
1703 char *
1704 java_get_line_col (filename, line, col)
1705      const char *filename ATTRIBUTE_UNUSED;
1706      int line ATTRIBUTE_UNUSED, col ATTRIBUTE_UNUSED;
1707 {
1708 #ifdef JC1_LITE
1709   return 0;
1710 #else
1711   /* Dumb implementation. Doesn't try to cache or optimize things. */
1712   /* First line of the file is line 1, first column is 1 */
1713
1714   /* COL == -1 means, at the CR/LF in LINE */
1715   /* COL == -2 means, at the first non space char in LINE */
1716
1717   FILE *fp;
1718   int c, ccol, cline = 1;
1719   int current_line_col = 0;
1720   int first_non_space = 0;
1721   char *base;
1722
1723   if (!(fp = fopen (filename, "r")))
1724     fatal_io_error ("can't open %s", filename);
1725
1726   while (cline != line)
1727     {
1728       c = getc (fp);
1729       if (c == EOF)
1730         {
1731           static char msg[] = "<<file too short - unexpected EOF>>";
1732           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1733           goto have_line;
1734         }
1735       if (java_is_eol (fp, c))
1736         cline++;
1737     }
1738
1739   /* Gather the chars of the current line in a buffer */
1740   for (;;)
1741     {
1742       c = getc (fp);
1743       if (c < 0 || java_is_eol (fp, c))
1744         break;
1745       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1746         first_non_space = current_line_col;
1747       obstack_1grow (&temporary_obstack, c);
1748       current_line_col++;
1749     }
1750  have_line:
1751
1752   obstack_1grow (&temporary_obstack, '\n');
1753
1754   if (col == -1)
1755     {
1756       col = current_line_col;
1757       first_non_space = 0;
1758     }
1759   else if (col == -2)
1760     col = first_non_space;
1761   else
1762     first_non_space = 0;
1763
1764   /* Place the '^' a the right position */
1765   base = obstack_base (&temporary_obstack);
1766   for (ccol = 1; ccol <= col+3; ccol++)
1767     {
1768       /* Compute \t when reaching first_non_space */
1769       char c = (first_non_space ?
1770                 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1771       obstack_1grow (&temporary_obstack, c);
1772     }
1773   obstack_grow0 (&temporary_obstack, "^", 1);
1774
1775   fclose (fp);
1776   return obstack_finish (&temporary_obstack);
1777 #endif
1778 }
1779
1780 #ifndef JC1_LITE
1781 static int
1782 utf8_cmp (str, length, name)
1783      const unsigned char *str;
1784      int length;
1785      const char *name;
1786 {
1787   const unsigned char *limit = str + length;
1788   int i;
1789
1790   for (i = 0; name[i]; ++i)
1791     {
1792       int ch = UTF8_GET (str, limit);
1793       if (ch != name[i])
1794         return ch - name[i];
1795     }
1796
1797   return str == limit ? 0 : 1;
1798 }
1799
1800 /* A sorted list of all C++ keywords.  */
1801
1802 static const char *cxx_keywords[] =
1803 {
1804   "_Complex",
1805   "__alignof",
1806   "__alignof__",
1807   "__asm",
1808   "__asm__",
1809   "__attribute",
1810   "__attribute__",
1811   "__builtin_va_arg",
1812   "__complex",
1813   "__complex__",
1814   "__const",
1815   "__const__",
1816   "__extension__",
1817   "__imag",
1818   "__imag__",
1819   "__inline",
1820   "__inline__",
1821   "__label__",
1822   "__null",
1823   "__real",
1824   "__real__",
1825   "__restrict",
1826   "__restrict__",
1827   "__signed",
1828   "__signed__",
1829   "__typeof",
1830   "__typeof__",
1831   "__volatile",
1832   "__volatile__",
1833   "asm",
1834   "and",
1835   "and_eq",
1836   "auto",
1837   "bitand",
1838   "bitor",
1839   "bool",
1840   "break",
1841   "case",
1842   "catch",
1843   "char",
1844   "class",
1845   "compl",
1846   "const",
1847   "const_cast",
1848   "continue",
1849   "default",
1850   "delete",
1851   "do",
1852   "double",
1853   "dynamic_cast",
1854   "else",
1855   "enum",
1856   "explicit",
1857   "export",
1858   "extern",
1859   "false",
1860   "float",
1861   "for",
1862   "friend",
1863   "goto",
1864   "if",
1865   "inline",
1866   "int",
1867   "long",
1868   "mutable",
1869   "namespace",
1870   "new",
1871   "not",
1872   "not_eq",
1873   "operator",
1874   "or",
1875   "or_eq",
1876   "private",
1877   "protected",
1878   "public",
1879   "register",
1880   "reinterpret_cast",
1881   "return",
1882   "short",
1883   "signed",
1884   "sizeof",
1885   "static",
1886   "static_cast",
1887   "struct",
1888   "switch",
1889   "template",
1890   "this",
1891   "throw",
1892   "true",
1893   "try",
1894   "typedef",
1895   "typename",
1896   "typeid",
1897   "typeof",
1898   "union",
1899   "unsigned",
1900   "using",
1901   "virtual",
1902   "void",
1903   "volatile",
1904   "wchar_t",
1905   "while",
1906   "xor",
1907   "xor_eq"
1908 };
1909
1910 /* Return true if NAME is a C++ keyword.  */
1911
1912 int
1913 cxx_keyword_p (name, length)
1914      const char *name;
1915      int length;
1916 {
1917   int last = ARRAY_SIZE (cxx_keywords);
1918   int first = 0;
1919   int mid = (last + first) / 2;
1920   int old = -1;
1921
1922   for (mid = (last + first) / 2;
1923        mid != old;
1924        old = mid, mid = (last + first) / 2)
1925     {
1926       int kwl = strlen (cxx_keywords[mid]);
1927       int min_length = kwl > length ? length : kwl;
1928       int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
1929
1930       if (r == 0)
1931         {
1932           int i;
1933           /* We've found a match if all the remaining characters are
1934              `$'.  */
1935           for (i = min_length; i < length && name[i] == '$'; ++i)
1936             ;
1937           if (i == length)
1938             return 1;
1939           r = 1;
1940         }
1941
1942       if (r < 0)
1943         last = mid;
1944       else
1945         first = mid;
1946     }
1947   return 0;
1948 }
1949 #endif /* JC1_LITE */