gcc/java/lex.c

   1 /* Language lexer for the GNU compiler for the Java(TM) language.
   2    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
   3    Free Software Foundation, Inc.
   4    Contributed by Alexandre Petit-Bianco (apbianco@cygnus.com)
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GCC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING.  If not, write to
  20 the Free Software Foundation, 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.
  22
  23 Java and all Java-based marks are trademarks or registered trademarks
  24 of Sun Microsystems, Inc. in the United States and other countries.
  25 The Free Software Foundation is independent of Sun Microsystems, Inc.  */
  26
  27 /* It defines java_lex (yylex) that reads a Java ASCII source file
  28    possibly containing Unicode escape sequence or utf8 encoded
  29    characters and returns a token for everything found but comments,
  30    white spaces and line terminators. When necessary, it also fills
  31    the java_lval (yylval) union. It's implemented to be called by a
  32    re-entrant parser generated by Bison.
  33
  34    The lexical analysis conforms to the Java grammar described in "The
  35    Java(TM) Language Specification. J. Gosling, B. Joy, G. Steele.
  36    Addison Wesley 1996" (http://java.sun.com/docs/books/jls/html/3.doc.html) */
  37
  38 #include "keyword.h"
  39 #include "flags.h"
  40 #include "chartables.h"
  41 #ifndef JC1_LITE
  42 #include "timevar.h"
  43 #endif
  44
  45 /* Function declarations.  */
  46 static char *java_sprint_unicode (struct java_line *, int);
  47 static void java_unicode_2_utf8 (unicode_t);
  48 static void java_lex_error (const char *, int);
  49 #ifndef JC1_LITE
  50 static int do_java_lex (YYSTYPE *);
  51 static int java_lex (YYSTYPE *);
  52 static int java_is_eol (FILE *, int);
  53 static tree build_wfl_node (tree);
  54 #endif
  55 static void java_store_unicode (struct java_line *, unicode_t, int);
  56 static int java_parse_escape_sequence (void);
  57 static int java_start_char_p (unicode_t);
  58 static int java_part_char_p (unicode_t);
  59 static int java_space_char_p (unicode_t);
  60 static void java_parse_doc_section (int);
  61 static void java_parse_end_comment (int);
  62 static int java_get_unicode (void);
  63 static int java_read_unicode (java_lexer *, int *);
  64 static int java_read_unicode_collapsing_terminators (java_lexer *, int *);
  65 static void java_store_unicode (struct java_line *, unicode_t, int);
  66 static int java_read_char (java_lexer *);
  67 static void java_allocate_new_line (void);
  68 static void java_unget_unicode (void);
  69 static unicode_t java_sneak_unicode (void);
  70 #ifndef JC1_LITE
  71 static int utf8_cmp (const unsigned char *, int, const char *);
  72 #endif
  73
  74 java_lexer *java_new_lexer (FILE *, const char *);
  75 #ifndef JC1_LITE
  76 static void error_if_numeric_overflow (tree);
  77 #endif
  78
  79 #ifdef HAVE_ICONV
  80 /* This is nonzero if we have initialized `need_byteswap'.  */
  81 static int byteswap_init = 0;
  82
  83 /* Some versions of iconv() (e.g., glibc 2.1.3) will return UCS-2 in
  84    big-endian order -- not native endian order.  We handle this by
  85    doing a conversion once at startup and seeing what happens.  This
  86    flag holds the results of this determination.  */
  87 static int need_byteswap = 0;
  88 #endif
  89
  90 void
  91 java_init_lex (FILE *finput, const char *encoding)
  92 {
  93 #ifndef JC1_LITE
  94   int java_lang_imported = 0;
  95
  96   if (!java_lang_id)
  97     java_lang_id = get_identifier ("java.lang");
  98   if (!inst_id)
  99     inst_id = get_identifier ("inst$");
 100   if (!wpv_id)
 101     wpv_id = get_identifier ("write_parm_value$");
 102
 103   if (!java_lang_imported)
 104     {
 105       tree node = build_tree_list
 106         (build_expr_wfl (java_lang_id, NULL, 0, 0), NULL_TREE);
 107       read_import_dir (TREE_PURPOSE (node));
 108       TREE_CHAIN (node) = ctxp->import_demand_list;
 109       ctxp->import_demand_list = node;
 110       java_lang_imported = 1;
 111     }
 112
 113   if (!wfl_operator)
 114     wfl_operator = build_expr_wfl (NULL_TREE, ctxp->filename, 0, 0);
 115   if (!label_id)
 116     label_id = get_identifier ("$L");
 117   if (!wfl_append)
 118     wfl_append = build_expr_wfl (get_identifier ("append"), NULL, 0, 0);
 119   if (!wfl_string_buffer)
 120     wfl_string_buffer =
 121       build_expr_wfl (get_identifier (flag_emit_class_files
 122                                       ? "java.lang.StringBuffer"
 123                                       : "gnu.gcj.runtime.StringBuffer"),
 124                       NULL, 0, 0);
 125   if (!wfl_to_string)
 126     wfl_to_string = build_expr_wfl (get_identifier ("toString"), NULL, 0, 0);
 127
 128   CPC_INITIALIZER_LIST (ctxp) = CPC_STATIC_INITIALIZER_LIST (ctxp) =
 129     CPC_INSTANCE_INITIALIZER_LIST (ctxp) = NULL_TREE;
 130
 131   memset (ctxp->modifier_ctx, 0, sizeof (ctxp->modifier_ctx));
 132   current_jcf = ggc_alloc_cleared (sizeof (JCF));
 133   ctxp->current_parsed_class = NULL;
 134   ctxp->package = NULL_TREE;
 135 #endif
 136
 137   ctxp->filename = input_filename;
 138   ctxp->lineno = lineno = 0;
 139   ctxp->p_line = NULL;
 140   ctxp->c_line = NULL;
 141   ctxp->java_error_flag = 0;
 142   ctxp->lexer = java_new_lexer (finput, encoding);
 143 }
 144
 145 static char *
 146 java_sprint_unicode (struct java_line *line, int i)
 147 {
 148   static char buffer [10];
 149   if (line->unicode_escape_p [i] || line->line [i] > 128)
 150     sprintf (buffer, "\\u%04x", line->line [i]);
 151   else
 152     {
 153       buffer [0] = line->line [i];
 154       buffer [1] = '\0';
 155     }
 156   return buffer;
 157 }
 158
 159 static unicode_t
 160 java_sneak_unicode (void)
 161 {
 162   return (ctxp->c_line->line [ctxp->c_line->current]);
 163 }
 164
 165 static void
 166 java_unget_unicode (void)
 167 {
 168   if (!ctxp->c_line->current)
 169     /* Can't unget unicode.  */
 170     abort ();
 171
 172   ctxp->c_line->current--;
 173   ctxp->c_line->char_col -= JAVA_COLUMN_DELTA (0);
 174 }
 175
 176 static void
 177 java_allocate_new_line (void)
 178 {
 179   unicode_t ahead = (ctxp->c_line ? ctxp->c_line->ahead[0] : '\0');
 180   char ahead_escape_p = (ctxp->c_line ?
 181                          ctxp->c_line->unicode_escape_ahead_p : 0);
 182
 183   if (ctxp->c_line && !ctxp->c_line->white_space_only)
 184     {
 185       if (ctxp->p_line)
 186         {
 187           free (ctxp->p_line->unicode_escape_p);
 188           free (ctxp->p_line->line);
 189           free (ctxp->p_line);
 190         }
 191       ctxp->p_line = ctxp->c_line;
 192       ctxp->c_line = NULL;              /* Reallocated.  */
 193     }
 194
 195   if (!ctxp->c_line)
 196     {
 197       ctxp->c_line = xmalloc (sizeof (struct java_line));
 198       ctxp->c_line->max = JAVA_LINE_MAX;
 199       ctxp->c_line->line = xmalloc (sizeof (unicode_t)*ctxp->c_line->max);
 200       ctxp->c_line->unicode_escape_p =
 201         xmalloc (sizeof (char)*ctxp->c_line->max);
 202       ctxp->c_line->white_space_only = 0;
 203     }
 204
 205   ctxp->c_line->line [0] = ctxp->c_line->size = 0;
 206   ctxp->c_line->char_col = ctxp->c_line->current = 0;
 207   if (ahead)
 208     {
 209       ctxp->c_line->line [ctxp->c_line->size] = ahead;
 210       ctxp->c_line->unicode_escape_p [ctxp->c_line->size] = ahead_escape_p;
 211       ctxp->c_line->size++;
 212     }
 213   ctxp->c_line->ahead [0] = 0;
 214   ctxp->c_line->unicode_escape_ahead_p = 0;
 215   ctxp->c_line->lineno = ++lineno;
 216   ctxp->c_line->white_space_only = 1;
 217 }
 218
 219 /* Create a new lexer object.  */
 220
 221 java_lexer *
 222 java_new_lexer (FILE *finput, const char *encoding)
 223 {
 224   java_lexer *lex = xmalloc (sizeof (java_lexer));
 225   int enc_error = 0;
 226
 227   lex->finput = finput;
 228   lex->bs_count = 0;
 229   lex->unget_value = 0;
 230   lex->hit_eof = 0;
 231
 232 #ifdef HAVE_ICONV
 233   lex->handle = iconv_open ("UCS-2", encoding);
 234   if (lex->handle != (iconv_t) -1)
 235     {
 236       lex->first = -1;
 237       lex->last = -1;
 238       lex->out_first = -1;
 239       lex->out_last = -1;
 240       lex->read_anything = 0;
 241       lex->use_fallback = 0;
 242
 243       /* Work around broken iconv() implementations by doing checking at
 244          runtime.  We assume that if the UTF-8 => UCS-2 encoder is broken,
 245          then all UCS-2 encoders will be broken.  Perhaps not a valid
 246          assumption.  */
 247       if (! byteswap_init)
 248         {
 249           iconv_t handle;
 250
 251           byteswap_init = 1;
 252
 253           handle = iconv_open ("UCS-2", "UTF-8");
 254           if (handle != (iconv_t) -1)
 255             {
 256               unicode_t result;
 257               unsigned char in[3];
 258               char *inp, *outp;
 259               size_t inc, outc, r;
 260
 261               /* This is the UTF-8 encoding of \ufeff.  */
 262               in[0] = 0xef;
 263               in[1] = 0xbb;
 264               in[2] = 0xbf;
 265
 266               inp = in;
 267               inc = 3;
 268               outp = (char *) &result;
 269               outc = 2;
 270
 271               r = iconv (handle, (ICONV_CONST char **) &inp, &inc,
 272                          &outp, &outc);
 273               iconv_close (handle);
 274               /* Conversion must be complete for us to use the result.  */
 275               if (r != (size_t) -1 && inc == 0 && outc == 0)
 276                 need_byteswap = (result != 0xfeff);
 277             }
 278         }
 279
 280       lex->byte_swap = need_byteswap;
 281     }
 282   else
 283 #endif /* HAVE_ICONV */
 284     {
 285       /* If iconv failed, use the internal decoder if the default
 286          encoding was requested.  This code is used on platforms where
 287          iconv exists but is insufficient for our needs.  For
 288          instance, on Solaris 2.5 iconv cannot handle UTF-8 or UCS-2.
 289
 290          On Solaris the default encoding, as returned by nl_langinfo(),
 291          is `646' (aka ASCII), but the Solaris iconv_open() doesn't
 292          understand that.  We work around that by pretending
 293          `646' to be the same as UTF-8.   */
 294       if (strcmp (encoding, DEFAULT_ENCODING) && strcmp (encoding, "646"))
 295         enc_error = 1;
 296 #ifdef HAVE_ICONV
 297       else
 298         lex->use_fallback = 1;
 299 #endif /* HAVE_ICONV */
 300     }
 301
 302   if (enc_error)
 303     fatal_error ("unknown encoding: `%s'\nThis might mean that your locale's encoding is not supported\nby your system's iconv(3) implementation.  If you aren't trying\nto use a particular encoding for your input file, try the\n`--encoding=UTF-8' option", encoding);
 304
 305   return lex;
 306 }
 307
 308 void
 309 java_destroy_lexer (java_lexer *lex)
 310 {
 311 #ifdef HAVE_ICONV
 312   if (! lex->use_fallback)
 313     iconv_close (lex->handle);
 314 #endif
 315   free (lex);
 316 }
 317
 318 static int
 319 java_read_char (java_lexer *lex)
 320 {
 321   if (lex->unget_value)
 322     {
 323       unicode_t r = lex->unget_value;
 324       lex->unget_value = 0;
 325       return r;
 326     }
 327
 328 #ifdef HAVE_ICONV
 329   if (! lex->use_fallback)
 330     {
 331       size_t ir, inbytesleft, in_save, out_count, out_save;
 332       char *inp, *outp;
 333       unicode_t result;
 334
 335       /* If there is data which has already been converted, use it.  */
 336       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 337         {
 338           lex->out_first = 0;
 339           lex->out_last = 0;
 340
 341           while (1)
 342             {
 343               /* See if we need to read more data.  If FIRST == 0 then
 344                  the previous conversion attempt ended in the middle of
 345                  a character at the end of the buffer.  Otherwise we
 346                  only have to read if the buffer is empty.  */
 347               if (lex->first == 0 || lex->first >= lex->last)
 348                 {
 349                   int r;
 350
 351                   if (lex->first >= lex->last)
 352                     {
 353                       lex->first = 0;
 354                       lex->last = 0;
 355                     }
 356                   if (feof (lex->finput))
 357                     return UEOF;
 358                   r = fread (&lex->buffer[lex->last], 1,
 359                              sizeof (lex->buffer) - lex->last,
 360                              lex->finput);
 361                   lex->last += r;
 362                 }
 363
 364               inbytesleft = lex->last - lex->first;
 365               out_count = sizeof (lex->out_buffer) - lex->out_last;
 366
 367               if (inbytesleft == 0)
 368                 {
 369                   /* We've tried to read and there is nothing left.  */
 370                   return UEOF;
 371                 }
 372
 373               in_save = inbytesleft;
 374               out_save = out_count;
 375               inp = &lex->buffer[lex->first];
 376               outp = &lex->out_buffer[lex->out_last];
 377               ir = iconv (lex->handle, (ICONV_CONST char **) &inp,
 378                           &inbytesleft, &outp, &out_count);
 379
 380               /* If we haven't read any bytes, then look to see if we
 381                  have read a BOM.  */
 382               if (! lex->read_anything && out_save - out_count >= 2)
 383                 {
 384                   unicode_t uc = * (unicode_t *) &lex->out_buffer[0];
 385                   if (uc == 0xfeff)
 386                     {
 387                       lex->byte_swap = 0;
 388                       lex->out_first += 2;
 389                     }
 390                   else if (uc == 0xfffe)
 391                     {
 392                       lex->byte_swap = 1;
 393                       lex->out_first += 2;
 394                     }
 395                   lex->read_anything = 1;
 396                 }
 397
 398               if (lex->byte_swap)
 399                 {
 400                   unsigned int i;
 401                   for (i = 0; i < out_save - out_count; i += 2)
 402                     {
 403                       char t = lex->out_buffer[lex->out_last + i];
 404                       lex->out_buffer[lex->out_last + i]
 405                         = lex->out_buffer[lex->out_last + i + 1];
 406                       lex->out_buffer[lex->out_last + i + 1] = t;
 407                     }
 408                 }
 409
 410               lex->first += in_save - inbytesleft;
 411               lex->out_last += out_save - out_count;
 412
 413               /* If we converted anything at all, move along.  */
 414               if (out_count != out_save)
 415                 break;
 416
 417               if (ir == (size_t) -1)
 418                 {
 419                   if (errno == EINVAL)
 420                     {
 421                       /* This is ok.  This means that the end of our buffer
 422                          is in the middle of a character sequence.  We just
 423                          move the valid part of the buffer to the beginning
 424                          to force a read.  */
 425                       memmove (&lex->buffer[0], &lex->buffer[lex->first],
 426                                lex->last - lex->first);
 427                       lex->last -= lex->first;
 428                       lex->first = 0;
 429                     }
 430                   else
 431                     {
 432                       /* A more serious error.  */
 433                       java_lex_error ("unrecognized character in input stream",
 434                                       0);
 435                       return UEOF;
 436                     }
 437                 }
 438             }
 439         }
 440
 441       if (lex->out_first == -1 || lex->out_first >= lex->out_last)
 442         {
 443           /* Don't have any data.  */
 444           return UEOF;
 445         }
 446
 447       /* Success.  */
 448       result = * ((unicode_t *) &lex->out_buffer[lex->out_first]);
 449       lex->out_first += 2;
 450       return result;
 451     }
 452   else
 453 #endif /* HAVE_ICONV */
 454     {
 455       int c, c1, c2;
 456       c = getc (lex->finput);
 457
 458       if (c == EOF)
 459         return UEOF;
 460       if (c < 128)
 461         return (unicode_t) c;
 462       else
 463         {
 464           if ((c & 0xe0) == 0xc0)
 465             {
 466               c1 = getc (lex->finput);
 467               if ((c1 & 0xc0) == 0x80)
 468                 {
 469                   unicode_t r = (unicode_t)(((c & 0x1f) << 6) + (c1 & 0x3f));
 470                   /* Check for valid 2-byte characters.  We explicitly
 471                      allow \0 because this encoding is common in the
 472                      Java world.  */
 473                   if (r == 0 || (r >= 0x80 && r <= 0x7ff))
 474                     return r;
 475                 }
 476             }
 477           else if ((c & 0xf0) == 0xe0)
 478             {
 479               c1 = getc (lex->finput);
 480               if ((c1 & 0xc0) == 0x80)
 481                 {
 482                   c2 = getc (lex->finput);
 483                   if ((c2 & 0xc0) == 0x80)
 484                     {
 485                       unicode_t r =  (unicode_t)(((c & 0xf) << 12) +
 486                                                  (( c1 & 0x3f) << 6)
 487                                                  + (c2 & 0x3f));
 488                       /* Check for valid 3-byte characters.
 489                          Don't allow surrogate, \ufffe or \uffff.  */
 490                       if (IN_RANGE (r, 0x800, 0xffff)
 491                           && ! IN_RANGE (r, 0xd800, 0xdfff)
 492                           && r != 0xfffe && r != 0xffff)
 493                         return r;
 494                     }
 495                 }
 496             }
 497
 498           /* We simply don't support invalid characters.  We also
 499              don't support 4-, 5-, or 6-byte UTF-8 sequences, as these
 500              cannot be valid Java characters.  */
 501           java_lex_error ("malformed UTF-8 character", 0);
 502         }
 503     }
 504
 505   /* We only get here on error.  */
 506   return UEOF;
 507 }
 508
 509 static void
 510 java_store_unicode (struct java_line *l, unicode_t c, int unicode_escape_p)
 511 {
 512   if (l->size == l->max)
 513     {
 514       l->max += JAVA_LINE_MAX;
 515       l->line = xrealloc (l->line, sizeof (unicode_t)*l->max);
 516       l->unicode_escape_p = xrealloc (l->unicode_escape_p,
 517                                       sizeof (char)*l->max);
 518     }
 519   l->line [l->size] = c;
 520   l->unicode_escape_p [l->size++] = unicode_escape_p;
 521 }
 522
 523 static int
 524 java_read_unicode (java_lexer *lex, int *unicode_escape_p)
 525 {
 526   int c;
 527
 528   c = java_read_char (lex);
 529   *unicode_escape_p = 0;
 530
 531   if (c != '\\')
 532     {
 533       lex->bs_count = 0;
 534       return c;
 535     }
 536
 537   ++lex->bs_count;
 538   if ((lex->bs_count) % 2 == 1)
 539     {
 540       /* Odd number of \ seen.  */
 541       c = java_read_char (lex);
 542       if (c == 'u')
 543         {
 544           unicode_t unicode = 0;
 545           int shift = 12;
 546
 547           /* Recognize any number of `u's in \u.  */
 548           while ((c = java_read_char (lex)) == 'u')
 549             ;
 550
 551           shift = 12;
 552           do
 553             {
 554               if (c == UEOF)
 555                 {
 556                   java_lex_error ("prematurely terminated \\u sequence", 0);
 557                   return UEOF;
 558                 }
 559
 560               if (hex_p (c))
 561                 unicode |= (unicode_t)(hex_value (c) << shift);
 562               else
 563                 {
 564                   java_lex_error ("non-hex digit in \\u sequence", 0);
 565                   break;
 566                 }
 567
 568               c = java_read_char (lex);
 569               shift -= 4;
 570             }
 571           while (shift >= 0);
 572
 573           if (c != UEOF)
 574             lex->unget_value = c;
 575
 576           lex->bs_count = 0;
 577           *unicode_escape_p = 1;
 578           return unicode;
 579         }
 580       lex->unget_value = c;
 581     }
 582   return (unicode_t) '\\';
 583 }
 584
 585 static int
 586 java_read_unicode_collapsing_terminators (java_lexer *lex,
 587                                           int *unicode_escape_p)
 588 {
 589   int c = java_read_unicode (lex, unicode_escape_p);
 590
 591   if (c == '\r')
 592     {
 593       /* We have to read ahead to see if we got \r\n.  In that case we
 594          return a single line terminator.  */
 595       int dummy;
 596       c = java_read_unicode (lex, &dummy);
 597       if (c != '\n' && c != UEOF)
 598         lex->unget_value = c;
 599       /* In either case we must return a newline.  */
 600       c = '\n';
 601     }
 602
 603   return c;
 604 }
 605
 606 static int
 607 java_get_unicode (void)
 608 {
 609   /* It's time to read a line when...  */
 610   if (!ctxp->c_line || ctxp->c_line->current == ctxp->c_line->size)
 611     {
 612       int c;
 613       int found_chars = 0;
 614
 615       if (ctxp->lexer->hit_eof)
 616         return UEOF;
 617
 618       java_allocate_new_line ();
 619       if (ctxp->c_line->line[0] != '\n')
 620         {
 621           for (;;)
 622             {
 623               int unicode_escape_p;
 624               c = java_read_unicode_collapsing_terminators (ctxp->lexer,
 625                                                             &unicode_escape_p);
 626               if (c != UEOF)
 627                 {
 628                   found_chars = 1;
 629                   java_store_unicode (ctxp->c_line, c, unicode_escape_p);
 630                   if (ctxp->c_line->white_space_only
 631                       && !JAVA_WHITE_SPACE_P (c)
 632                       && c != '\n')
 633                     ctxp->c_line->white_space_only = 0;
 634                 }
 635               if ((c == '\n') || (c == UEOF))
 636                 break;
 637             }
 638
 639           if (c == UEOF && ! found_chars)
 640             {
 641               ctxp->lexer->hit_eof = 1;
 642               return UEOF;
 643             }
 644         }
 645     }
 646   ctxp->c_line->char_col += JAVA_COLUMN_DELTA (0);
 647   JAVA_LEX_CHAR (ctxp->c_line->line [ctxp->c_line->current]);
 648   return ctxp->c_line->line [ctxp->c_line->current++];
 649 }
 650
 651 /* Parse the end of a C style comment.
 652  * C is the first character following the '/' and '*'.  */
 653 static void
 654 java_parse_end_comment (int c)
 655 {
 656   for ( ;; c = java_get_unicode ())
 657     {
 658       switch (c)
 659         {
 660         case UEOF:
 661           java_lex_error ("Comment not terminated at end of input", 0);
 662           return;
 663         case '*':
 664           switch (c = java_get_unicode ())
 665             {
 666             case UEOF:
 667               java_lex_error ("Comment not terminated at end of input", 0);
 668               return;
 669             case '/':
 670               return;
 671             case '*':   /* Reparse only '*'.  */
 672               java_unget_unicode ();
 673             }
 674         }
 675     }
 676 }
 677
 678 /* Parse the documentation section. Keywords must be at the beginning
 679    of a documentation comment line (ignoring white space and any `*'
 680    character). Parsed keyword(s): @DEPRECATED.  */
 681
 682 static void
 683 java_parse_doc_section (int c)
 684 {
 685   int last_was_star;
 686
 687   /* We reset this here, because only the most recent doc comment
 688      applies to the following declaration.  */
 689   ctxp->deprecated = 0;
 690
 691   /* We loop over all the lines of the comment.  We'll eventually exit
 692      if we hit EOF prematurely, or when we see the comment
 693      terminator.  */
 694   while (1)
 695     {
 696       /* These first steps need only be done if we're still looking
 697          for the deprecated tag.  If we've already seen it, we might
 698          as well skip looking for it again.  */
 699       if (! ctxp->deprecated)
 700         {
 701           /* Skip whitespace and '*'s.  We must also check for the end
 702              of the comment here.  */
 703           while (JAVA_WHITE_SPACE_P (c) || c == '*')
 704             {
 705               last_was_star = (c == '*');
 706               c = java_get_unicode ();
 707               if (last_was_star && c == '/')
 708                 {
 709                   /* We just saw the comment terminator.  */
 710                   return;
 711                 }
 712             }
 713
 714           if (c == UEOF)
 715             goto eof;
 716
 717           if (c == '@')
 718             {
 719               const char *deprecated = "@deprecated";
 720               int i;
 721
 722               for (i = 0; deprecated[i]; ++i)
 723                 {
 724                   if (c != deprecated[i])
 725                     break;
 726                   /* We write the code in this way, with the
 727                      update at the end, so that after the loop
 728                      we're left with the next character in C.  */
 729                   c = java_get_unicode ();
 730                 }
 731
 732               if (c == UEOF)
 733                 goto eof;
 734
 735               /* @deprecated must be followed by a space or newline.
 736                  We also allow a '*' in case it appears just before
 737                  the end of a comment.  In this position only we also
 738                  must allow any Unicode space character.  */
 739               if (c == ' ' || c == '\n' || c == '*' || java_space_char_p (c))
 740                 {
 741                   if (! deprecated[i])
 742                     ctxp->deprecated = 1;
 743                 }
 744             }
 745         }
 746
 747       /* We've examined the relevant content from this line.  Now we
 748          skip the remaining characters and start over with the next
 749          line.  We also check for end of comment here.  */
 750       while (c != '\n' && c != UEOF)
 751         {
 752           last_was_star = (c == '*');
 753           c = java_get_unicode ();
 754           if (last_was_star && c == '/')
 755             return;
 756         }
 757
 758       if (c == UEOF)
 759         goto eof;
 760       /* We have to advance past the \n.  */
 761       c = java_get_unicode ();
 762       if (c == UEOF)
 763         goto eof;
 764     }
 765
 766  eof:
 767   java_lex_error ("Comment not terminated at end of input", 0);
 768 }
 769
 770 /* Return true if C is a valid start character for a Java identifier.
 771    This is only called if C >= 128 -- smaller values are handled
 772    inline.  However, this function handles all values anyway.  */
 773 static int
 774 java_start_char_p (unicode_t c)
 775 {
 776   unsigned int hi = c / 256;
 777   const char *const page = type_table[hi];
 778   unsigned long val = (unsigned long) page;
 779   int flags;
 780
 781   if ((val & ~ LETTER_MASK) != 0)
 782     flags = page[c & 255];
 783   else
 784     flags = val;
 785
 786   return flags & LETTER_START;
 787 }
 788
 789 /* Return true if C is a valid part character for a Java identifier.
 790    This is only called if C >= 128 -- smaller values are handled
 791    inline.  However, this function handles all values anyway.  */
 792 static int
 793 java_part_char_p (unicode_t c)
 794 {
 795   unsigned int hi = c / 256;
 796   const char *const page = type_table[hi];
 797   unsigned long val = (unsigned long) page;
 798   int flags;
 799
 800   if ((val & ~ LETTER_MASK) != 0)
 801     flags = page[c & 255];
 802   else
 803     flags = val;
 804
 805   return flags & LETTER_PART;
 806 }
 807
 808 /* Return true if C is whitespace.  */
 809 static int
 810 java_space_char_p (unicode_t c)
 811 {
 812   unsigned int hi = c / 256;
 813   const char *const page = type_table[hi];
 814   unsigned long val = (unsigned long) page;
 815   int flags;
 816
 817   if ((val & ~ LETTER_MASK) != 0)
 818     flags = page[c & 255];
 819   else
 820     flags = val;
 821
 822   return flags & LETTER_SPACE;
 823 }
 824
 825 static int
 826 java_parse_escape_sequence (void)
 827 {
 828   unicode_t char_lit;
 829   int c;
 830
 831   switch (c = java_get_unicode ())
 832     {
 833     case 'b':
 834       return (unicode_t)0x8;
 835     case 't':
 836       return (unicode_t)0x9;
 837     case 'n':
 838       return (unicode_t)0xa;
 839     case 'f':
 840       return (unicode_t)0xc;
 841     case 'r':
 842       return (unicode_t)0xd;
 843     case '"':
 844       return (unicode_t)0x22;
 845     case '\'':
 846       return (unicode_t)0x27;
 847     case '\\':
 848       return (unicode_t)0x5c;
 849     case '0': case '1': case '2': case '3': case '4':
 850     case '5': case '6': case '7':
 851       {
 852         int octal_escape[3];
 853         int octal_escape_index = 0;
 854         int max = 3;
 855         int i, shift;
 856
 857         for (; octal_escape_index < max && RANGE (c, '0', '7');
 858              c = java_get_unicode ())
 859           {
 860             if (octal_escape_index == 0 && c > '3')
 861               {
 862                 /* According to the grammar, `\477' has a well-defined
 863                    meaning -- it is `\47' followed by `7'.  */
 864                 --max;
 865               }
 866             octal_escape [octal_escape_index++] = c;
 867           }
 868
 869         java_unget_unicode ();
 870
 871         for (char_lit=0, i = 0, shift = 3*(octal_escape_index-1);
 872              i < octal_escape_index; i++, shift -= 3)
 873           char_lit |= (octal_escape [i] - '0') << shift;
 874
 875         return char_lit;
 876       }
 877     default:
 878       java_lex_error ("Invalid character in escape sequence", 0);
 879       return JAVA_CHAR_ERROR;
 880     }
 881 }
 882
 883 #ifndef JC1_LITE
 884 #define IS_ZERO(X) REAL_VALUES_EQUAL (X, dconst0)
 885
 886 /* Subroutine of java_lex: converts floating-point literals to tree
 887    nodes.  LITERAL_TOKEN is the input literal, JAVA_LVAL is where to
 888    store the result.  FFLAG indicates whether the literal was tagged
 889    with an 'f', indicating it is of type 'float'; NUMBER_BEGINNING
 890    is the line number on which to report any error.  */
 891
 892 static void java_perform_atof (YYSTYPE *, char *, int, int);
 893
 894 static void
 895 java_perform_atof (YYSTYPE *java_lval, char *literal_token, int fflag,
 896                    int number_beginning)
 897 {
 898   REAL_VALUE_TYPE value;
 899   tree type = (fflag ? FLOAT_TYPE_NODE : DOUBLE_TYPE_NODE);
 900
 901   SET_REAL_VALUE_ATOF (value,
 902                        REAL_VALUE_ATOF (literal_token, TYPE_MODE (type)));
 903
 904   if (REAL_VALUE_ISINF (value) || REAL_VALUE_ISNAN (value))
 905     {
 906       JAVA_FLOAT_RANGE_ERROR (fflag ? "float" : "double");
 907       value = DCONST0;
 908     }
 909   else if (IS_ZERO (value))
 910     {
 911       /* We check to see if the value is really 0 or if we've found an
 912          underflow.  We do this in the most primitive imaginable way.  */
 913       int really_zero = 1;
 914       char *p = literal_token;
 915       if (*p == '-')
 916         ++p;
 917       while (*p && *p != 'e' && *p != 'E')
 918         {
 919           if (*p != '0' && *p != '.')
 920             {
 921               really_zero = 0;
 922               break;
 923             }
 924           ++p;
 925         }
 926       if (! really_zero)
 927         {
 928           int i = ctxp->c_line->current;
 929           ctxp->c_line->current = number_beginning;
 930           java_lex_error ("Floating point literal underflow", 0);
 931           ctxp->c_line->current = i;
 932         }
 933     }
 934
 935   SET_LVAL_NODE_TYPE (build_real (type, value), type);
 936 }
 937 #endif
 938
 939 static int yylex (YYSTYPE *);
 940
 941 static int
 942 #ifdef JC1_LITE
 943 yylex (YYSTYPE *java_lval)
 944 #else
 945 do_java_lex (YYSTYPE *java_lval)
 946 #endif
 947 {
 948   int c;
 949   unicode_t first_unicode;
 950   int ascii_index, all_ascii;
 951   char *string;
 952
 953   /* Translation of the Unicode escape in the raw stream of Unicode
 954      characters. Takes care of line terminator.  */
 955  step1:
 956   /* Skip white spaces: SP, TAB and FF or ULT.  */
 957   for (c = java_get_unicode ();
 958        c == '\n' || JAVA_WHITE_SPACE_P (c); c = java_get_unicode ())
 959     if (c == '\n')
 960       {
 961         ctxp->elc.line = ctxp->c_line->lineno;
 962         ctxp->elc.col  = ctxp->c_line->char_col-2;
 963       }
 964
 965   ctxp->elc.col = (ctxp->elc.col < 0 ? 0 : ctxp->elc.col);
 966
 967   if (c == 0x1a)                /* CTRL-Z.  */
 968     {
 969       if ((c = java_get_unicode ()) == UEOF)
 970         return 0;               /* Ok here.  */
 971       else
 972         java_unget_unicode ();  /* Caught later, at the end of the
 973                                    function.  */
 974     }
 975   /* Handle EOF here.  */
 976   if (c == UEOF)        /* Should probably do something here...  */
 977     return 0;
 978
 979   /* Take care of eventual comments.  */
 980   if (c == '/')
 981     {
 982       switch (c = java_get_unicode ())
 983         {
 984         case '/':
 985           for (;;)
 986             {
 987               c = java_get_unicode ();
 988               if (c == UEOF)
 989                 {
 990                   /* It is ok to end a `//' comment with EOF, unless
 991                      we're being pedantic.  */
 992                   if (pedantic)
 993                     java_lex_error ("Comment not terminated at end of input",
 994                                     0);
 995                   return 0;
 996                 }
 997               if (c == '\n')    /* ULT */
 998                 goto step1;
 999             }
1000           break;
1001
1002         case '*':
1003           if ((c = java_get_unicode ()) == '*')
1004             {
1005               c = java_get_unicode ();
1006               if (c == '/')
1007                 {
1008                   /* Empty documentation comment.  We have to reset
1009                      the deprecation marker as only the most recent
1010                      doc comment applies.  */
1011                   ctxp->deprecated = 0;
1012                 }
1013               else
1014                 java_parse_doc_section (c);
1015             }
1016           else
1017             java_parse_end_comment ((c = java_get_unicode ()));
1018           goto step1;
1019           break;
1020         default:
1021           java_unget_unicode ();
1022           c = '/';
1023           break;
1024         }
1025     }
1026
1027   ctxp->elc.line = ctxp->c_line->lineno;
1028   ctxp->elc.prev_col = ctxp->elc.col;
1029   ctxp->elc.col = ctxp->c_line->char_col - JAVA_COLUMN_DELTA (-1);
1030   if (ctxp->elc.col < 0)
1031     abort ();
1032
1033   /* Numeric literals.  */
1034   if (JAVA_ASCII_DIGIT (c) || (c == '.'))
1035     {
1036       /* This section of code is borrowed from gcc/c-lex.c.  */
1037 #define TOTAL_PARTS ((HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR) * 2 + 2)
1038       int parts[TOTAL_PARTS];
1039       HOST_WIDE_INT high, low;
1040       /* End borrowed section.  */
1041       char literal_token [256];
1042       int  literal_index = 0, radix = 10, long_suffix = 0, overflow = 0, bytes;
1043       int  found_hex_digits = 0, found_non_octal_digits = 0;
1044       int  i;
1045 #ifndef JC1_LITE
1046       int  number_beginning = ctxp->c_line->current;
1047       tree value;
1048 #endif
1049
1050       /* We might have a . separator instead of a FP like .[0-9]*.  */
1051       if (c == '.')
1052         {
1053           unicode_t peep = java_sneak_unicode ();
1054
1055           if (!JAVA_ASCII_DIGIT (peep))
1056             {
1057               JAVA_LEX_SEP('.');
1058               BUILD_OPERATOR (DOT_TK);
1059             }
1060         }
1061
1062       for (i = 0; i < TOTAL_PARTS; i++)
1063         parts [i] = 0;
1064
1065       if (c == '0')
1066         {
1067           c = java_get_unicode ();
1068           if (c == 'x' || c == 'X')
1069             {
1070               radix = 16;
1071               c = java_get_unicode ();
1072             }
1073           else if (JAVA_ASCII_DIGIT (c))
1074             radix = 8;
1075           else if (c == '.' || c == 'e' || c =='E')
1076             {
1077               /* Push the '.', 'e', or 'E' back and prepare for a FP
1078                  parsing...  */
1079               java_unget_unicode ();
1080               c = '0';
1081             }
1082           else
1083             {
1084               /* We have a zero literal: 0, 0{l,L}, 0{f,F}, 0{d,D}.  */
1085               JAVA_LEX_LIT ("0", 10);
1086               switch (c)
1087                 {
1088                 case 'L': case 'l':
1089                   SET_LVAL_NODE (long_zero_node);
1090                   return (INT_LIT_TK);
1091                 case 'f': case 'F':
1092                   SET_LVAL_NODE (float_zero_node);
1093                   return (FP_LIT_TK);
1094                 case 'd': case 'D':
1095                   SET_LVAL_NODE (double_zero_node);
1096                   return (FP_LIT_TK);
1097                 default:
1098                   java_unget_unicode ();
1099                   SET_LVAL_NODE (integer_zero_node);
1100                   return (INT_LIT_TK);
1101                 }
1102             }
1103         }
1104       /* Parse the first part of the literal, until we find something
1105          which is not a number.  */
1106       while ((radix == 16 && JAVA_ASCII_HEXDIGIT (c)) ||
1107              JAVA_ASCII_DIGIT (c))
1108         {
1109           /* We store in a string (in case it turns out to be a FP) and in
1110              PARTS if we have to process a integer literal.  */
1111           int numeric = hex_value (c);
1112           int count;
1113
1114           /* Remember when we find a valid hexadecimal digit.  */
1115           if (radix == 16)
1116             found_hex_digits = 1;
1117           /* Remember when we find an invalid octal digit.  */
1118           else if (radix == 8 && !JAVA_ASCII_OCTDIGIT (c))
1119             found_non_octal_digits = 1;
1120
1121           literal_token [literal_index++] = c;
1122           /* This section of code if borrowed from gcc/c-lex.c.  */
1123           for (count = 0; count < TOTAL_PARTS; count++)
1124             {
1125               parts[count] *= radix;
1126               if (count)
1127                 {
1128                   parts[count]   += (parts[count-1] >> HOST_BITS_PER_CHAR);
1129                   parts[count-1] &= (1 << HOST_BITS_PER_CHAR) - 1;
1130                 }
1131               else
1132                 parts[0] += numeric;
1133             }
1134           if (parts [TOTAL_PARTS-1] != 0)
1135             overflow = 1;
1136           /* End borrowed section.  */
1137           c = java_get_unicode ();
1138         }
1139
1140       /* If we have something from the FP char set but not a digit, parse
1141          a FP literal.  */
1142       if (JAVA_ASCII_FPCHAR (c) && !JAVA_ASCII_DIGIT (c))
1143         {
1144           int stage = 0;
1145           int seen_digit = (literal_index ? 1 : 0);
1146           int seen_exponent = 0;
1147           int fflag = 0;        /* 1 for {f,F}, 0 for {d,D}. FP literal are
1148                                    double unless specified.  */
1149
1150           /* It is ok if the radix is 8 because this just means we've
1151              seen a leading `0'.  However, radix==16 is invalid.  */
1152           if (radix == 16)
1153             java_lex_error ("Can't express non-decimal FP literal", 0);
1154           radix = 10;
1155
1156           for (;;)
1157             {
1158               if (c == '.')
1159                 {
1160                   if (stage < 1)
1161                     {
1162                       stage = 1;
1163                       literal_token [literal_index++ ] = c;
1164                       c = java_get_unicode ();
1165                     }
1166                   else
1167                     java_lex_error ("Invalid character in FP literal", 0);
1168                 }
1169
1170               if (c == 'e' || c == 'E')
1171                 {
1172                   if (stage < 2)
1173                     {
1174                       /* {E,e} must have seen at least a digit.  */
1175                       if (!seen_digit)
1176                         java_lex_error
1177                           ("Invalid FP literal, mantissa must have digit", 0);
1178                       seen_digit = 0;
1179                       seen_exponent = 1;
1180                       stage = 2;
1181                       literal_token [literal_index++] = c;
1182                       c = java_get_unicode ();
1183                     }
1184                   else
1185                     java_lex_error ("Invalid character in FP literal", 0);
1186                 }
1187               if ( c == 'f' || c == 'F' || c == 'd' || c == 'D')
1188                 {
1189                   fflag = ((c == 'd') || (c == 'D')) ? 0 : 1;
1190                   stage = 4;    /* So we fall through.  */
1191                 }
1192
1193               if ((c=='-' || c =='+') && stage == 2)
1194                 {
1195                   stage = 3;
1196                   literal_token [literal_index++] = c;
1197                   c = java_get_unicode ();
1198                 }
1199
1200               if ((stage == 0 && JAVA_ASCII_FPCHAR (c)) ||
1201                   (stage == 1 && JAVA_ASCII_FPCHAR (c) && !(c == '.')) ||
1202                   (stage == 2 && (JAVA_ASCII_DIGIT (c) || JAVA_FP_PM (c))) ||
1203                   (stage == 3 && JAVA_ASCII_DIGIT (c)))
1204                 {
1205                   if (JAVA_ASCII_DIGIT (c))
1206                     seen_digit = 1;
1207                   if (stage == 2)
1208                     stage = 3;
1209                   literal_token [literal_index++ ] = c;
1210                   c = java_get_unicode ();
1211                 }
1212               else
1213                 {
1214                   if (stage != 4) /* Don't push back fF/dD.  */
1215                     java_unget_unicode ();
1216
1217                   /* An exponent (if any) must have seen a digit.  */
1218                   if (seen_exponent && !seen_digit)
1219                     java_lex_error
1220                       ("Invalid FP literal, exponent must have digit", 0);
1221
1222                   literal_token [literal_index] = '\0';
1223                   JAVA_LEX_LIT (literal_token, radix);
1224
1225 #ifndef JC1_LITE
1226                   java_perform_atof (java_lval, literal_token,
1227                                      fflag, number_beginning);
1228 #endif
1229                   return FP_LIT_TK;
1230                 }
1231             }
1232         } /* JAVA_ASCII_FPCHAR (c) */
1233
1234       /* Here we get back to converting the integral literal.  */
1235       if (radix == 16 && ! found_hex_digits)
1236         java_lex_error
1237           ("0x must be followed by at least one hexadecimal digit", 0);
1238       else if (radix == 8 && found_non_octal_digits)
1239         java_lex_error ("Octal literal contains digit out of range", 0);
1240       else if (c == 'L' || c == 'l')
1241         long_suffix = 1;
1242       else
1243         java_unget_unicode ();
1244
1245 #ifdef JAVA_LEX_DEBUG
1246       literal_token [literal_index] = '\0'; /* So JAVA_LEX_LIT is safe.  */
1247       JAVA_LEX_LIT (literal_token, radix);
1248 #endif
1249       /* This section of code is borrowed from gcc/c-lex.c.  */
1250       if (!overflow)
1251         {
1252           bytes = GET_TYPE_PRECISION (long_type_node);
1253           for (i = bytes; i < TOTAL_PARTS; i++)
1254             if (parts [i])
1255               {
1256                 overflow = 1;
1257                 break;
1258               }
1259         }
1260       high = low = 0;
1261       for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++)
1262         {
1263           high |= ((HOST_WIDE_INT) parts[i + (HOST_BITS_PER_WIDE_INT
1264                                               / HOST_BITS_PER_CHAR)]
1265                    << (i * HOST_BITS_PER_CHAR));
1266           low |= (HOST_WIDE_INT) parts[i] << (i * HOST_BITS_PER_CHAR);
1267         }
1268       /* End borrowed section.  */
1269
1270 #ifndef JC1_LITE
1271       /* Range checking.  */
1272       value = build_int_2 (low, high);
1273       /* Temporarily set type to unsigned.  */
1274       SET_LVAL_NODE_TYPE (value, (long_suffix
1275                                   ? unsigned_long_type_node
1276                                   : unsigned_int_type_node));
1277
1278       /* For base 10 numbers, only values up to the highest value
1279          (plus one) can be written.  For instance, only ints up to
1280          2147483648 can be written.  The special case of the largest
1281          negative value is handled elsewhere.  For other bases, any
1282          number can be represented.  */
1283       if (overflow || (radix == 10
1284                        && tree_int_cst_lt (long_suffix
1285                                            ? decimal_long_max
1286                                            : decimal_int_max,
1287                                            value)))
1288         {
1289           if (long_suffix)
1290             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `long' literal");
1291           else
1292             JAVA_INTEGRAL_RANGE_ERROR ("Numeric overflow for `int' literal");
1293         }
1294
1295       /* Sign extend the value.  */
1296       SET_LVAL_NODE_TYPE (value, (long_suffix ? long_type_node : int_type_node));
1297       force_fit_type (value, 0);
1298       JAVA_RADIX10_FLAG (value) = radix == 10;
1299 #else
1300       SET_LVAL_NODE_TYPE (build_int_2 (low, high),
1301                           long_suffix ? long_type_node : int_type_node);
1302 #endif
1303       return INT_LIT_TK;
1304     }
1305
1306   /* Character literals.  */
1307   if (c == '\'')
1308     {
1309       int char_lit;
1310       if ((c = java_get_unicode ()) == '\\')
1311         char_lit = java_parse_escape_sequence ();
1312       else
1313         {
1314           if (c == '\n' || c == '\'')
1315             java_lex_error ("Invalid character literal", 0);
1316           char_lit = c;
1317         }
1318
1319       c = java_get_unicode ();
1320
1321       if ((c == '\n') || (c == UEOF))
1322         java_lex_error ("Character literal not terminated at end of line", 0);
1323       if (c != '\'')
1324         java_lex_error ("Syntax error in character literal", 0);
1325
1326       if (char_lit == JAVA_CHAR_ERROR)
1327         char_lit = 0;           /* We silently convert it to zero.  */
1328
1329       JAVA_LEX_CHAR_LIT (char_lit);
1330       SET_LVAL_NODE_TYPE (build_int_2 (char_lit, 0), char_type_node);
1331       return CHAR_LIT_TK;
1332     }
1333
1334   /* String literals.  */
1335   if (c == '"')
1336     {
1337       int no_error;
1338       char *string;
1339
1340       for (no_error = 1, c = java_get_unicode ();
1341            c != UEOF && c != '"' && c != '\n'; c = java_get_unicode ())
1342         {
1343           if (c == '\\')
1344             c = java_parse_escape_sequence ();
1345           if (c == JAVA_CHAR_ERROR)
1346             {
1347               no_error = 0;
1348               c = 0;            /* We silently convert it to zero.  */
1349             }
1350           java_unicode_2_utf8 (c);
1351         }
1352       if (c == '\n' || c == UEOF) /* ULT.  */
1353         {
1354           lineno--;     /* Refer to the line where the terminator was seen.  */
1355           java_lex_error ("String not terminated at end of line", 0);
1356           lineno++;
1357         }
1358
1359       obstack_1grow (&temporary_obstack, '\0');
1360       string = obstack_finish (&temporary_obstack);
1361 #ifndef JC1_LITE
1362       if (!no_error || (c != '"'))
1363         java_lval->node = error_mark_node; /* FIXME: Requires futher
1364                                               testing.  */
1365       else
1366         java_lval->node = build_string (strlen (string), string);
1367 #endif
1368       obstack_free (&temporary_obstack, string);
1369       return STRING_LIT_TK;
1370     }
1371
1372   /* Separator.  */
1373   switch (c)
1374     {
1375     case '(':
1376       JAVA_LEX_SEP (c);
1377       BUILD_OPERATOR (OP_TK);
1378     case ')':
1379       JAVA_LEX_SEP (c);
1380       return CP_TK;
1381     case '{':
1382       JAVA_LEX_SEP (c);
1383       if (ctxp->ccb_indent == 1)
1384         ctxp->first_ccb_indent1 = lineno;
1385       ctxp->ccb_indent++;
1386       BUILD_OPERATOR (OCB_TK);
1387     case '}':
1388       JAVA_LEX_SEP (c);
1389       ctxp->ccb_indent--;
1390       if (ctxp->ccb_indent == 1)
1391         ctxp->last_ccb_indent1 = lineno;
1392       BUILD_OPERATOR (CCB_TK);
1393     case '[':
1394       JAVA_LEX_SEP (c);
1395       BUILD_OPERATOR (OSB_TK);
1396     case ']':
1397       JAVA_LEX_SEP (c);
1398       return CSB_TK;
1399     case ';':
1400       JAVA_LEX_SEP (c);
1401       return SC_TK;
1402     case ',':
1403       JAVA_LEX_SEP (c);
1404       return C_TK;
1405     case '.':
1406       JAVA_LEX_SEP (c);
1407       BUILD_OPERATOR (DOT_TK);
1408       /*      return DOT_TK; */
1409     }
1410
1411   /* Operators.  */
1412   switch (c)
1413     {
1414     case '=':
1415       if ((c = java_get_unicode ()) == '=')
1416         {
1417           BUILD_OPERATOR (EQ_TK);
1418         }
1419       else
1420         {
1421           /* Equals is used in two different locations. In the
1422              variable_declarator: rule, it has to be seen as '=' as opposed
1423              to being seen as an ordinary assignment operator in
1424              assignment_operators: rule.  */
1425           java_unget_unicode ();
1426           BUILD_OPERATOR (ASSIGN_TK);
1427         }
1428
1429     case '>':
1430       switch ((c = java_get_unicode ()))
1431         {
1432         case '=':
1433           BUILD_OPERATOR (GTE_TK);
1434         case '>':
1435           switch ((c = java_get_unicode ()))
1436             {
1437             case '>':
1438               if ((c = java_get_unicode ()) == '=')
1439                 {
1440                   BUILD_OPERATOR2 (ZRS_ASSIGN_TK);
1441                 }
1442               else
1443                 {
1444                   java_unget_unicode ();
1445                   BUILD_OPERATOR (ZRS_TK);
1446                 }
1447             case '=':
1448               BUILD_OPERATOR2 (SRS_ASSIGN_TK);
1449             default:
1450               java_unget_unicode ();
1451               BUILD_OPERATOR (SRS_TK);
1452             }
1453         default:
1454           java_unget_unicode ();
1455           BUILD_OPERATOR (GT_TK);
1456         }
1457
1458     case '<':
1459       switch ((c = java_get_unicode ()))
1460         {
1461         case '=':
1462           BUILD_OPERATOR (LTE_TK);
1463         case '<':
1464           if ((c = java_get_unicode ()) == '=')
1465             {
1466               BUILD_OPERATOR2 (LS_ASSIGN_TK);
1467             }
1468           else
1469             {
1470               java_unget_unicode ();
1471               BUILD_OPERATOR (LS_TK);
1472             }
1473         default:
1474           java_unget_unicode ();
1475           BUILD_OPERATOR (LT_TK);
1476         }
1477
1478     case '&':
1479       switch ((c = java_get_unicode ()))
1480         {
1481         case '&':
1482           BUILD_OPERATOR (BOOL_AND_TK);
1483         case '=':
1484           BUILD_OPERATOR2 (AND_ASSIGN_TK);
1485         default:
1486           java_unget_unicode ();
1487           BUILD_OPERATOR (AND_TK);
1488         }
1489
1490     case '|':
1491       switch ((c = java_get_unicode ()))
1492         {
1493         case '|':
1494           BUILD_OPERATOR (BOOL_OR_TK);
1495         case '=':
1496           BUILD_OPERATOR2 (OR_ASSIGN_TK);
1497         default:
1498           java_unget_unicode ();
1499           BUILD_OPERATOR (OR_TK);
1500         }
1501
1502     case '+':
1503       switch ((c = java_get_unicode ()))
1504         {
1505         case '+':
1506           BUILD_OPERATOR (INCR_TK);
1507         case '=':
1508           BUILD_OPERATOR2 (PLUS_ASSIGN_TK);
1509         default:
1510           java_unget_unicode ();
1511           BUILD_OPERATOR (PLUS_TK);
1512         }
1513
1514     case '-':
1515       switch ((c = java_get_unicode ()))
1516         {
1517         case '-':
1518           BUILD_OPERATOR (DECR_TK);
1519         case '=':
1520           BUILD_OPERATOR2 (MINUS_ASSIGN_TK);
1521         default:
1522           java_unget_unicode ();
1523           BUILD_OPERATOR (MINUS_TK);
1524         }
1525
1526     case '*':
1527       if ((c = java_get_unicode ()) == '=')
1528         {
1529           BUILD_OPERATOR2 (MULT_ASSIGN_TK);
1530         }
1531       else
1532         {
1533           java_unget_unicode ();
1534           BUILD_OPERATOR (MULT_TK);
1535         }
1536
1537     case '/':
1538       if ((c = java_get_unicode ()) == '=')
1539         {
1540           BUILD_OPERATOR2 (DIV_ASSIGN_TK);
1541         }
1542       else
1543         {
1544           java_unget_unicode ();
1545           BUILD_OPERATOR (DIV_TK);
1546         }
1547
1548     case '^':
1549       if ((c = java_get_unicode ()) == '=')
1550         {
1551           BUILD_OPERATOR2 (XOR_ASSIGN_TK);
1552         }
1553       else
1554         {
1555           java_unget_unicode ();
1556           BUILD_OPERATOR (XOR_TK);
1557         }
1558
1559     case '%':
1560       if ((c = java_get_unicode ()) == '=')
1561         {
1562           BUILD_OPERATOR2 (REM_ASSIGN_TK);
1563         }
1564       else
1565         {
1566           java_unget_unicode ();
1567           BUILD_OPERATOR (REM_TK);
1568         }
1569
1570     case '!':
1571       if ((c = java_get_unicode()) == '=')
1572         {
1573           BUILD_OPERATOR (NEQ_TK);
1574         }
1575       else
1576         {
1577           java_unget_unicode ();
1578           BUILD_OPERATOR (NEG_TK);
1579         }
1580
1581     case '?':
1582       JAVA_LEX_OP ("?");
1583       BUILD_OPERATOR (REL_QM_TK);
1584     case ':':
1585       JAVA_LEX_OP (":");
1586       BUILD_OPERATOR (REL_CL_TK);
1587     case '~':
1588       BUILD_OPERATOR (NOT_TK);
1589     }
1590
1591   /* Keyword, boolean literal or null literal.  */
1592   for (first_unicode = c, all_ascii = 1, ascii_index = 0;
1593        c != UEOF && JAVA_PART_CHAR_P (c); c = java_get_unicode ())
1594     {
1595       java_unicode_2_utf8 (c);
1596       if (all_ascii && c >= 128)
1597         all_ascii = 0;
1598       ascii_index++;
1599     }
1600
1601   obstack_1grow (&temporary_obstack, '\0');
1602   string = obstack_finish (&temporary_obstack);
1603   if (c != UEOF)
1604     java_unget_unicode ();
1605
1606   /* If we have something all ascii, we consider a keyword, a boolean
1607      literal, a null literal or an all ASCII identifier.  Otherwise,
1608      this is an identifier (possibly not respecting formation rule).  */
1609   if (all_ascii)
1610     {
1611       const struct java_keyword *kw;
1612       if ((kw=java_keyword (string, ascii_index)))
1613         {
1614           JAVA_LEX_KW (string);
1615           switch (kw->token)
1616             {
1617             case PUBLIC_TK:       case PROTECTED_TK: case STATIC_TK:
1618             case ABSTRACT_TK:     case FINAL_TK:     case NATIVE_TK:
1619             case SYNCHRONIZED_TK: case TRANSIENT_TK: case VOLATILE_TK:
1620             case PRIVATE_TK:      case STRICT_TK:
1621               SET_MODIFIER_CTX (kw->token);
1622               return MODIFIER_TK;
1623             case FLOAT_TK:
1624               SET_LVAL_NODE (float_type_node);
1625               return FP_TK;
1626             case DOUBLE_TK:
1627               SET_LVAL_NODE (double_type_node);
1628               return FP_TK;
1629             case BOOLEAN_TK:
1630               SET_LVAL_NODE (boolean_type_node);
1631               return BOOLEAN_TK;
1632             case BYTE_TK:
1633               SET_LVAL_NODE (byte_type_node);
1634               return INTEGRAL_TK;
1635             case SHORT_TK:
1636               SET_LVAL_NODE (short_type_node);
1637               return INTEGRAL_TK;
1638             case INT_TK:
1639               SET_LVAL_NODE (int_type_node);
1640               return INTEGRAL_TK;
1641             case LONG_TK:
1642               SET_LVAL_NODE (long_type_node);
1643               return INTEGRAL_TK;
1644             case CHAR_TK:
1645               SET_LVAL_NODE (char_type_node);
1646               return INTEGRAL_TK;
1647
1648               /* Keyword based literals.  */
1649             case TRUE_TK:
1650             case FALSE_TK:
1651               SET_LVAL_NODE ((kw->token == TRUE_TK ?
1652                               boolean_true_node : boolean_false_node));
1653               return BOOL_LIT_TK;
1654             case NULL_TK:
1655               SET_LVAL_NODE (null_pointer_node);
1656               return NULL_TK;
1657
1658             case ASSERT_TK:
1659               if (flag_assert)
1660                 {
1661                   BUILD_OPERATOR (kw->token);
1662                   return kw->token;
1663                 }
1664               else
1665                 break;
1666
1667               /* Some keyword we want to retain information on the location
1668                  they where found.  */
1669             case CASE_TK:
1670             case DEFAULT_TK:
1671             case SUPER_TK:
1672             case THIS_TK:
1673             case RETURN_TK:
1674             case BREAK_TK:
1675             case CONTINUE_TK:
1676             case TRY_TK:
1677             case CATCH_TK:
1678             case THROW_TK:
1679             case INSTANCEOF_TK:
1680               BUILD_OPERATOR (kw->token);
1681
1682             default:
1683               return kw->token;
1684             }
1685         }
1686     }
1687
1688   /* We may have an ID here.  */
1689   if (JAVA_START_CHAR_P (first_unicode))
1690     {
1691       JAVA_LEX_ID (string);
1692       java_lval->node = BUILD_ID_WFL (GET_IDENTIFIER (string));
1693       return ID_TK;
1694     }
1695
1696   /* Everything else is an invalid character in the input.  */
1697   {
1698     char lex_error_buffer [128];
1699     sprintf (lex_error_buffer, "Invalid character `%s' in input",
1700              java_sprint_unicode (ctxp->c_line, ctxp->c_line->current));
1701     java_lex_error (lex_error_buffer, 1);
1702   }
1703   return 0;
1704 }
1705
1706 #ifndef JC1_LITE
1707
1708 /* The exported interface to the lexer.  */
1709 static int
1710 java_lex (YYSTYPE *java_lval)
1711 {
1712   int r;
1713
1714   timevar_push (TV_LEX);
1715   r = do_java_lex (java_lval);
1716   timevar_pop (TV_LEX);
1717   return r;
1718 }
1719
1720 /* This is called by the parser to see if an error should be generated
1721    due to numeric overflow.  This function only handles the particular
1722    case of the largest negative value, and is only called in the case
1723    where this value is not preceded by `-'.  */
1724 static void
1725 error_if_numeric_overflow (tree value)
1726 {
1727   if (TREE_CODE (value) == INTEGER_CST
1728       && JAVA_RADIX10_FLAG (value)
1729       && tree_int_cst_sgn (value) < 0)
1730     {
1731       if (TREE_TYPE (value) == long_type_node)
1732         java_lex_error ("Numeric overflow for `long' literal", 0);
1733       else
1734         java_lex_error ("Numeric overflow for `int' literal", 0);
1735     }
1736 }
1737
1738 #endif /* JC1_LITE */
1739
1740 static void
1741 java_unicode_2_utf8 (unicode_t unicode)
1742 {
1743   if (RANGE (unicode, 0x01, 0x7f))
1744     obstack_1grow (&temporary_obstack, (char)unicode);
1745   else if (RANGE (unicode, 0x80, 0x7ff) || unicode == 0)
1746     {
1747       obstack_1grow (&temporary_obstack,
1748                      (unsigned char)(0xc0 | ((0x7c0 & unicode) >> 6)));
1749       obstack_1grow (&temporary_obstack,
1750                      (unsigned char)(0x80 | (unicode & 0x3f)));
1751     }
1752   else                          /* Range 0x800-0xffff.  */
1753     {
1754       obstack_1grow (&temporary_obstack,
1755                      (unsigned char)(0xe0 | (unicode & 0xf000) >> 12));
1756       obstack_1grow (&temporary_obstack,
1757                      (unsigned char)(0x80 | (unicode & 0x0fc0) >> 6));
1758       obstack_1grow (&temporary_obstack,
1759                      (unsigned char)(0x80 | (unicode & 0x003f)));
1760     }
1761 }
1762
1763 #ifndef JC1_LITE
1764 static tree
1765 build_wfl_node (tree node)
1766 {
1767   node = build_expr_wfl (node, ctxp->filename, ctxp->elc.line, ctxp->elc.col);
1768   /* Prevent java_complete_lhs from short-circuiting node (if constant).  */
1769   TREE_TYPE (node) = NULL_TREE;
1770   return node;
1771 }
1772 #endif
1773
1774 static void
1775 java_lex_error (const char *msg ATTRIBUTE_UNUSED, int forward ATTRIBUTE_UNUSED)
1776 {
1777 #ifndef JC1_LITE
1778   ctxp->elc.line = ctxp->c_line->lineno;
1779   ctxp->elc.col = ctxp->c_line->char_col-1+forward;
1780
1781   /* Might be caught in the middle of some error report.  */
1782   ctxp->java_error_flag = 0;
1783   java_error (NULL);
1784   java_error (msg);
1785 #endif
1786 }
1787
1788 #ifndef JC1_LITE
1789 static int
1790 java_is_eol (FILE *fp, int c)
1791 {
1792   int next;
1793   switch (c)
1794     {
1795     case '\r':
1796       next = getc (fp);
1797       if (next != '\n' && next != EOF)
1798         ungetc (next, fp);
1799       return 1;
1800     case '\n':
1801       return 1;
1802     default:
1803       return 0;
1804     }
1805 }
1806 #endif
1807
1808 char *
1809 java_get_line_col (const char *filename ATTRIBUTE_UNUSED,
1810                    int line ATTRIBUTE_UNUSED, int col ATTRIBUTE_UNUSED)
1811 {
1812 #ifdef JC1_LITE
1813   return 0;
1814 #else
1815   /* Dumb implementation. Doesn't try to cache or optimize things.  */
1816   /* First line of the file is line 1, first column is 1.  */
1817
1818   /* COL == -1 means, at the CR/LF in LINE.  */
1819   /* COL == -2 means, at the first non space char in LINE.  */
1820
1821   FILE *fp;
1822   int c, ccol, cline = 1;
1823   int current_line_col = 0;
1824   int first_non_space = 0;
1825   char *base;
1826
1827   if (!(fp = fopen (filename, "r")))
1828     fatal_io_error ("can't open %s", filename);
1829
1830   while (cline != line)
1831     {
1832       c = getc (fp);
1833       if (c == EOF)
1834         {
1835           static const char msg[] = "<<file too short - unexpected EOF>>";
1836           obstack_grow (&temporary_obstack, msg, sizeof(msg)-1);
1837           goto have_line;
1838         }
1839       if (java_is_eol (fp, c))
1840         cline++;
1841     }
1842
1843   /* Gather the chars of the current line in a buffer.  */
1844   for (;;)
1845     {
1846       c = getc (fp);
1847       if (c < 0 || java_is_eol (fp, c))
1848         break;
1849       if (!first_non_space && !JAVA_WHITE_SPACE_P (c))
1850         first_non_space = current_line_col;
1851       obstack_1grow (&temporary_obstack, c);
1852       current_line_col++;
1853     }
1854  have_line:
1855
1856   obstack_1grow (&temporary_obstack, '\n');
1857
1858   if (col == -1)
1859     {
1860       col = current_line_col;
1861       first_non_space = 0;
1862     }
1863   else if (col == -2)
1864     col = first_non_space;
1865   else
1866     first_non_space = 0;
1867
1868   /* Place the '^' a the right position.  */
1869   base = obstack_base (&temporary_obstack);
1870   for (ccol = 1; ccol <= col+3; ccol++)
1871     {
1872       /* Compute \t when reaching first_non_space.  */
1873       char c = (first_non_space ?
1874                 (base [ccol-1] == '\t' ? '\t' : ' ') : ' ');
1875       obstack_1grow (&temporary_obstack, c);
1876     }
1877   obstack_grow0 (&temporary_obstack, "^", 1);
1878
1879   fclose (fp);
1880   return obstack_finish (&temporary_obstack);
1881 #endif
1882 }
1883
1884 #ifndef JC1_LITE
1885 static int
1886 utf8_cmp (const unsigned char *str, int length, const char *name)
1887 {
1888   const unsigned char *limit = str + length;
1889   int i;
1890
1891   for (i = 0; name[i]; ++i)
1892     {
1893       int ch = UTF8_GET (str, limit);
1894       if (ch != name[i])
1895         return ch - name[i];
1896     }
1897
1898   return str == limit ? 0 : 1;
1899 }
1900
1901 /* A sorted list of all C++ keywords.  */
1902
1903 static const char *const cxx_keywords[] =
1904 {
1905   "_Complex",
1906   "__alignof",
1907   "__alignof__",
1908   "__asm",
1909   "__asm__",
1910   "__attribute",
1911   "__attribute__",
1912   "__builtin_va_arg",
1913   "__complex",
1914   "__complex__",
1915   "__const",
1916   "__const__",
1917   "__extension__",
1918   "__imag",
1919   "__imag__",
1920   "__inline",
1921   "__inline__",
1922   "__label__",
1923   "__null",
1924   "__real",
1925   "__real__",
1926   "__restrict",
1927   "__restrict__",
1928   "__signed",
1929   "__signed__",
1930   "__typeof",
1931   "__typeof__",
1932   "__volatile",
1933   "__volatile__",
1934   "and",
1935   "and_eq",
1936   "asm",
1937   "auto",
1938   "bitand",
1939   "bitor",
1940   "bool",
1941   "break",
1942   "case",
1943   "catch",
1944   "char",
1945   "class",
1946   "compl",
1947   "const",
1948   "const_cast",
1949   "continue",
1950   "default",
1951   "delete",
1952   "do",
1953   "double",
1954   "dynamic_cast",
1955   "else",
1956   "enum",
1957   "explicit",
1958   "export",
1959   "extern",
1960   "false",
1961   "float",
1962   "for",
1963   "friend",
1964   "goto",
1965   "if",
1966   "inline",
1967   "int",
1968   "long",
1969   "mutable",
1970   "namespace",
1971   "new",
1972   "not",
1973   "not_eq",
1974   "operator",
1975   "or",
1976   "or_eq",
1977   "private",
1978   "protected",
1979   "public",
1980   "register",
1981   "reinterpret_cast",
1982   "return",
1983   "short",
1984   "signed",
1985   "sizeof",
1986   "static",
1987   "static_cast",
1988   "struct",
1989   "switch",
1990   "template",
1991   "this",
1992   "throw",
1993   "true",
1994   "try",
1995   "typedef",
1996   "typeid",
1997   "typename",
1998   "typeof",
1999   "union",
2000   "unsigned",
2001   "using",
2002   "virtual",
2003   "void",
2004   "volatile",
2005   "wchar_t",
2006   "while",
2007   "xor",
2008   "xor_eq"
2009 };
2010
2011 /* Return true if NAME is a C++ keyword.  */
2012
2013 int
2014 cxx_keyword_p (const char *name, int length)
2015 {
2016   int last = ARRAY_SIZE (cxx_keywords);
2017   int first = 0;
2018   int mid = (last + first) / 2;
2019   int old = -1;
2020
2021   for (mid = (last + first) / 2;
2022        mid != old;
2023        old = mid, mid = (last + first) / 2)
2024     {
2025       int kwl = strlen (cxx_keywords[mid]);
2026       int min_length = kwl > length ? length : kwl;
2027       int r = utf8_cmp (name, min_length, cxx_keywords[mid]);
2028
2029       if (r == 0)
2030         {
2031           int i;
2032           /* We've found a match if all the remaining characters are `$'.  */
2033           for (i = min_length; i < length && name[i] == '$'; ++i)
2034             ;
2035           if (i == length)
2036             return 1;
2037           r = 1;
2038         }
2039
2040       if (r < 0)
2041         last = mid;
2042       else
2043         first = mid;
2044     }
2045   return 0;
2046 }
2047 #endif /* JC1_LITE */