gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 1999
   3    Free Software Foundation, Inc.
   4
   5    This file is part of GAS, the GNU Assembler.
   6
   7    GAS is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    GAS is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GAS; see the file COPYING.  If not, write to the Free
  19    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20    02111-1307, USA.  */
  21
  22 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  23 /* App, the assembler pre-processor.  This pre-processor strips out excess
  24    spaces, turns single-quoted characters into a decimal constant, and turns
  25    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  26    pair.  This needs better error-handling.  */
  27
  28 #include <stdio.h>
  29 #include "as.h"                 /* For BAD_CASE() only */
  30
  31 #if (__STDC__ != 1)
  32 #ifndef const
  33 #define const  /* empty */
  34 #endif
  35 #endif
  36
  37 /* Whether we are scrubbing in m68k MRI mode.  This is different from
  38    flag_m68k_mri, because the two flags will be affected by the .mri
  39    pseudo-op at different times.  */
  40 static int scrub_m68k_mri;
  41
  42 /* The pseudo-op which switches in and out of MRI mode.  See the
  43    comment in do_scrub_chars.  */
  44 static const char mri_pseudo[] = ".mri 0";
  45
  46 #if defined TC_ARM && defined OBJ_ELF
  47 /* The pseudo-op for which we need to special-case `@' characters.
  48    See the comment in do_scrub_chars.  */
  49 static const char   symver_pseudo[] = ".symver";
  50 static const char * symver_state;
  51 #endif
  52
  53 static char lex[256];
  54 static const char symbol_chars[] =
  55 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  56
  57 #define LEX_IS_SYMBOL_COMPONENT         1
  58 #define LEX_IS_WHITESPACE               2
  59 #define LEX_IS_LINE_SEPARATOR           3
  60 #define LEX_IS_COMMENT_START            4
  61 #define LEX_IS_LINE_COMMENT_START       5
  62 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  63 #define LEX_IS_STRINGQUOTE              8
  64 #define LEX_IS_COLON                    9
  65 #define LEX_IS_NEWLINE                  10
  66 #define LEX_IS_ONECHAR_QUOTE            11
  67 #ifdef TC_V850
  68 #define LEX_IS_DOUBLEDASH_1ST           12
  69 #endif
  70 #ifdef TC_M32R
  71 #define LEX_IS_DOUBLEBAR_1ST            13
  72 #endif
  73 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  74 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  75 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  76 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  77 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  78 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  79
  80 static int process_escape PARAMS ((int));
  81
  82 /* FIXME-soon: The entire lexer/parser thingy should be
  83    built statically at compile time rather than dynamically
  84    each and every time the assembler is run.  xoxorich. */
  85
  86 void
  87 do_scrub_begin (m68k_mri)
  88      int m68k_mri;
  89 {
  90   const char *p;
  91   int c;
  92
  93   scrub_m68k_mri = m68k_mri;
  94
  95   lex[' '] = LEX_IS_WHITESPACE;
  96   lex['\t'] = LEX_IS_WHITESPACE;
  97   lex['\r'] = LEX_IS_WHITESPACE;
  98   lex['\n'] = LEX_IS_NEWLINE;
  99   lex[';'] = LEX_IS_LINE_SEPARATOR;
 100   lex[':'] = LEX_IS_COLON;
 101
 102   if (! m68k_mri)
 103     {
 104       lex['"'] = LEX_IS_STRINGQUOTE;
 105
 106 #ifndef TC_HPPA
 107       lex['\''] = LEX_IS_ONECHAR_QUOTE;
 108 #endif
 109
 110 #ifdef SINGLE_QUOTE_STRINGS
 111       lex['\''] = LEX_IS_STRINGQUOTE;
 112 #endif
 113     }
 114
 115   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
 116      in state 5 of do_scrub_chars must be changed.  */
 117
 118   /* Note that these override the previous defaults, e.g. if ';' is a
 119      comment char, then it isn't a line separator.  */
 120   for (p = symbol_chars; *p; ++p)
 121     {
 122       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 123     }                           /* declare symbol characters */
 124
 125   for (c = 128; c < 256; ++c)
 126     lex[c] = LEX_IS_SYMBOL_COMPONENT;
 127
 128 #ifdef tc_symbol_chars
 129   /* This macro permits the processor to specify all characters which
 130      may appears in an operand.  This will prevent the scrubber from
 131      discarding meaningful whitespace in certain cases.  The i386
 132      backend uses this to support prefixes, which can confuse the
 133      scrubber as to whether it is parsing operands or opcodes.  */
 134   for (p = tc_symbol_chars; *p; ++p)
 135     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 136 #endif
 137
 138   /* The m68k backend wants to be able to change comment_chars.  */
 139 #ifndef tc_comment_chars
 140 #define tc_comment_chars comment_chars
 141 #endif
 142   for (p = tc_comment_chars; *p; p++)
 143     {
 144       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
 145     }                           /* declare comment chars */
 146
 147   for (p = line_comment_chars; *p; p++)
 148     {
 149       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
 150     }                           /* declare line comment chars */
 151
 152   for (p = line_separator_chars; *p; p++)
 153     {
 154       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 155     }                           /* declare line separators */
 156
 157   /* Only allow slash-star comments if slash is not in use.
 158      FIXME: This isn't right.  We should always permit them.  */
 159   if (lex['/'] == 0)
 160     {
 161       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 162     }
 163
 164   if (m68k_mri)
 165     {
 166       lex['\''] = LEX_IS_STRINGQUOTE;
 167       lex[';'] = LEX_IS_COMMENT_START;
 168       lex['*'] = LEX_IS_LINE_COMMENT_START;
 169       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
 170          then it can't be used in an expression.  */
 171       lex['!'] = LEX_IS_LINE_COMMENT_START;
 172     }
 173
 174 #ifdef TC_V850
 175   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
 176 #endif
 177 #ifdef TC_M32R
 178   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
 179 #endif
 180 #ifdef TC_D30V
 181   /* must do this is we want VLIW instruction with "->" or "<-" */
 182   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
 183 #endif
 184 }                               /* do_scrub_begin() */
 185
 186 /* Saved state of the scrubber */
 187 static int state;
 188 static int old_state;
 189 static char *out_string;
 190 static char out_buf[20];
 191 static int add_newlines;
 192 static char *saved_input;
 193 static int saved_input_len;
 194 static char input_buffer[32 * 1024];
 195 static const char *mri_state;
 196 static char mri_last_ch;
 197
 198 /* Data structure for saving the state of app across #include's.  Note that
 199    app is called asynchronously to the parsing of the .include's, so our
 200    state at the time .include is interpreted is completely unrelated.
 201    That's why we have to save it all.  */
 202
 203 struct app_save
 204   {
 205     int          state;
 206     int          old_state;
 207     char *       out_string;
 208     char         out_buf[sizeof (out_buf)];
 209     int          add_newlines;
 210     char *       saved_input;
 211     int          saved_input_len;
 212     int          scrub_m68k_mri;
 213     const char * mri_state;
 214     char         mri_last_ch;
 215 #if defined TC_ARM && defined OBJ_ELF
 216     const char * symver_state;
 217 #endif
 218   };
 219
 220 char *
 221 app_push ()
 222 {
 223   register struct app_save *saved;
 224
 225   saved = (struct app_save *) xmalloc (sizeof (*saved));
 226   saved->state = state;
 227   saved->old_state = old_state;
 228   saved->out_string = out_string;
 229   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 230   saved->add_newlines = add_newlines;
 231   if (saved_input == NULL)
 232     saved->saved_input = NULL;
 233   else
 234     {
 235       saved->saved_input = xmalloc (saved_input_len);
 236       memcpy (saved->saved_input, saved_input, saved_input_len);
 237       saved->saved_input_len = saved_input_len;
 238     }
 239   saved->scrub_m68k_mri = scrub_m68k_mri;
 240   saved->mri_state = mri_state;
 241   saved->mri_last_ch = mri_last_ch;
 242 #if defined TC_ARM && defined OBJ_ELF
 243   saved->symver_state = symver_state;
 244 #endif
 245
 246   /* do_scrub_begin() is not useful, just wastes time. */
 247
 248   state = 0;
 249   saved_input = NULL;
 250
 251   return (char *) saved;
 252 }
 253
 254 void
 255 app_pop (arg)
 256      char *arg;
 257 {
 258   register struct app_save *saved = (struct app_save *) arg;
 259
 260   /* There is no do_scrub_end (). */
 261   state = saved->state;
 262   old_state = saved->old_state;
 263   out_string = saved->out_string;
 264   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 265   add_newlines = saved->add_newlines;
 266   if (saved->saved_input == NULL)
 267     saved_input = NULL;
 268   else
 269     {
 270       assert (saved->saved_input_len <= (int) (sizeof input_buffer));
 271       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
 272       saved_input = input_buffer;
 273       saved_input_len = saved->saved_input_len;
 274       free (saved->saved_input);
 275     }
 276   scrub_m68k_mri = saved->scrub_m68k_mri;
 277   mri_state = saved->mri_state;
 278   mri_last_ch = saved->mri_last_ch;
 279 #if defined TC_ARM && defined OBJ_ELF
 280   symver_state = saved->symver_state;
 281 #endif
 282
 283   free (arg);
 284 }                               /* app_pop() */
 285
 286 /* @@ This assumes that \n &c are the same on host and target.  This is not
 287    necessarily true.  */
 288 static int
 289 process_escape (ch)
 290      int ch;
 291 {
 292   switch (ch)
 293     {
 294     case 'b':
 295       return '\b';
 296     case 'f':
 297       return '\f';
 298     case 'n':
 299       return '\n';
 300     case 'r':
 301       return '\r';
 302     case 't':
 303       return '\t';
 304     case '\'':
 305       return '\'';
 306     case '"':
 307       return '\"';
 308     default:
 309       return ch;
 310     }
 311 }
 312
 313 /* This function is called to process input characters.  The GET
 314    parameter is used to retrieve more input characters.  GET should
 315    set its parameter to point to a buffer, and return the length of
 316    the buffer; it should return 0 at end of file.  The scrubbed output
 317    characters are put into the buffer starting at TOSTART; the TOSTART
 318    buffer is TOLEN bytes in length.  The function returns the number
 319    of scrubbed characters put into TOSTART.  This will be TOLEN unless
 320    end of file was seen.  This function is arranged as a state
 321    machine, and saves its state so that it may return at any point.
 322    This is the way the old code used to work.  */
 323
 324 int
 325 do_scrub_chars (get, tostart, tolen)
 326      int (*get) PARAMS ((char *, int));
 327      char *tostart;
 328      int tolen;
 329 {
 330   char *to = tostart;
 331   char *toend = tostart + tolen;
 332   char *from;
 333   char *fromend;
 334   int fromlen;
 335   register int ch, ch2 = 0;
 336
 337   /*State 0: beginning of normal line
 338           1: After first whitespace on line (flush more white)
 339           2: After first non-white (opcode) on line (keep 1white)
 340           3: after second white on line (into operands) (flush white)
 341           4: after putting out a .line, put out digits
 342           5: parsing a string, then go to old-state
 343           6: putting out \ escape in a "d string.
 344           7: After putting out a .appfile, put out string.
 345           8: After putting out a .appfile string, flush until newline.
 346           9: After seeing symbol char in state 3 (keep 1white after symchar)
 347          10: After seeing whitespace in state 9 (keep white before symchar)
 348          11: After seeing a symbol character in state 0 (eg a label definition)
 349          -1: output string in out_string and go to the state in old_state
 350          -2: flush text until a '*' '/' is seen, then go to state old_state
 351 #ifdef TC_V850
 352          12: After seeing a dash, looking for a second dash as a start of comment.
 353 #endif
 354 #ifdef TC_M32R
 355          13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
 356 #endif
 357           */
 358
 359   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 360      constructs like ``.loc 1 20''.  This was turning into ``.loc
 361      120''.  States 9 and 10 ensure that a space is never dropped in
 362      between characters which could appear in a identifier.  Ian
 363      Taylor, ian@cygnus.com.
 364
 365      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
 366      correctly on the PA (and any other target where colons are optional).
 367      Jeff Law, law@cs.utah.edu.
 368
 369      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
 370      get squashed into "cmp r1,r2||trap#1", with the all important space
 371      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
 372
 373   /* This macro gets the next input character.  */
 374
 375 #define GET()                                                   \
 376   (from < fromend                                               \
 377    ? * (unsigned char *) (from++)                               \
 378    : (saved_input = NULL,                                       \
 379       fromlen = (*get) (input_buffer, sizeof input_buffer),     \
 380       from = input_buffer,                                      \
 381       fromend = from + fromlen,                                 \
 382       (fromlen == 0                                             \
 383        ? EOF                                                    \
 384        : * (unsigned char *) (from++))))
 385
 386   /* This macro pushes a character back on the input stream.  */
 387
 388 #define UNGET(uch) (*--from = (uch))
 389
 390   /* This macro puts a character into the output buffer.  If this
 391      character fills the output buffer, this macro jumps to the label
 392      TOFULL.  We use this rather ugly approach because we need to
 393      handle two different termination conditions: EOF on the input
 394      stream, and a full output buffer.  It would be simpler if we
 395      always read in the entire input stream before processing it, but
 396      I don't want to make such a significant change to the assembler's
 397      memory usage.  */
 398
 399 #define PUT(pch)                        \
 400   do                                    \
 401     {                                   \
 402       *to++ = (pch);                    \
 403       if (to >= toend)                  \
 404         goto tofull;                    \
 405     }                                   \
 406   while (0)
 407
 408   if (saved_input != NULL)
 409     {
 410       from = saved_input;
 411       fromend = from + saved_input_len;
 412     }
 413   else
 414     {
 415       fromlen = (*get) (input_buffer, sizeof input_buffer);
 416       if (fromlen == 0)
 417         return 0;
 418       from = input_buffer;
 419       fromend = from + fromlen;
 420     }
 421
 422   while (1)
 423     {
 424       /* The cases in this switch end with continue, in order to
 425          branch back to the top of this while loop and generate the
 426          next output character in the appropriate state.  */
 427       switch (state)
 428         {
 429         case -1:
 430           ch = *out_string++;
 431           if (*out_string == '\0')
 432             {
 433               state = old_state;
 434               old_state = 3;
 435             }
 436           PUT (ch);
 437           continue;
 438
 439         case -2:
 440           for (;;)
 441             {
 442               do
 443                 {
 444                   ch = GET ();
 445
 446                   if (ch == EOF)
 447                     {
 448                       as_warn (_("end of file in comment"));
 449                       goto fromeof;
 450                     }
 451
 452                   if (ch == '\n')
 453                     PUT ('\n');
 454                 }
 455               while (ch != '*');
 456
 457               while ((ch = GET ()) == '*')
 458                 ;
 459
 460               if (ch == EOF)
 461                 {
 462                   as_warn (_("end of file in comment"));
 463                   goto fromeof;
 464                 }
 465
 466               if (ch == '/')
 467                 break;
 468
 469               UNGET (ch);
 470             }
 471
 472           state = old_state;
 473           UNGET (' ');
 474           continue;
 475
 476         case 4:
 477           ch = GET ();
 478           if (ch == EOF)
 479             goto fromeof;
 480           else if (ch >= '0' && ch <= '9')
 481             PUT (ch);
 482           else
 483             {
 484               while (ch != EOF && IS_WHITESPACE (ch))
 485                 ch = GET ();
 486               if (ch == '"')
 487                 {
 488                   UNGET (ch);
 489                   if (scrub_m68k_mri)
 490                     out_string = "\n\tappfile ";
 491                   else
 492                     out_string = "\n\t.appfile ";
 493                   old_state = 7;
 494                   state = -1;
 495                   PUT (*out_string++);
 496                 }
 497               else
 498                 {
 499                   while (ch != EOF && ch != '\n')
 500                     ch = GET ();
 501                   state = 0;
 502                   PUT (ch);
 503                 }
 504             }
 505           continue;
 506
 507         case 5:
 508           /* We are going to copy everything up to a quote character,
 509              with special handling for a backslash.  We try to
 510              optimize the copying in the simple case without using the
 511              GET and PUT macros.  */
 512           {
 513             char *s;
 514             int len;
 515
 516             for (s = from; s < fromend; s++)
 517               {
 518                 ch = *s;
 519                 /* This condition must be changed if the type of any
 520                    other character can be LEX_IS_STRINGQUOTE.  */
 521                 if (ch == '\\'
 522                     || ch == '"'
 523                     || ch == '\''
 524                     || ch == '\n')
 525                   break;
 526               }
 527             len = s - from;
 528             if (len > toend - to)
 529               len = toend - to;
 530             if (len > 0)
 531               {
 532                 memcpy (to, from, len);
 533                 to += len;
 534                 from += len;
 535               }
 536           }
 537
 538           ch = GET ();
 539           if (ch == EOF)
 540             {
 541               as_warn (_("end of file in string: inserted '\"'"));
 542               state = old_state;
 543               UNGET ('\n');
 544               PUT ('"');
 545             }
 546           else if (lex[ch] == LEX_IS_STRINGQUOTE)
 547             {
 548               state = old_state;
 549               PUT (ch);
 550             }
 551 #ifndef NO_STRING_ESCAPES
 552           else if (ch == '\\')
 553             {
 554               state = 6;
 555               PUT (ch);
 556             }
 557 #endif
 558           else if (scrub_m68k_mri && ch == '\n')
 559             {
 560               /* Just quietly terminate the string.  This permits lines like
 561                    bne  label   loop if we haven't reach end yet
 562                  */
 563               state = old_state;
 564               UNGET (ch);
 565               PUT ('\'');
 566             }
 567           else
 568             {
 569               PUT (ch);
 570             }
 571           continue;
 572
 573         case 6:
 574           state = 5;
 575           ch = GET ();
 576           switch (ch)
 577             {
 578               /* Handle strings broken across lines, by turning '\n' into
 579                  '\\' and 'n'.  */
 580             case '\n':
 581               UNGET ('n');
 582               add_newlines++;
 583               PUT ('\\');
 584               continue;
 585
 586             case '"':
 587             case '\\':
 588             case 'b':
 589             case 'f':
 590             case 'n':
 591             case 'r':
 592             case 't':
 593             case 'v':
 594             case 'x':
 595             case 'X':
 596             case '0':
 597             case '1':
 598             case '2':
 599             case '3':
 600             case '4':
 601             case '5':
 602             case '6':
 603             case '7':
 604               break;
 605 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 606             default:
 607               as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
 608               break;
 609 #else  /* ONLY_STANDARD_ESCAPES */
 610             default:
 611               /* Accept \x as x for any x */
 612               break;
 613 #endif /* ONLY_STANDARD_ESCAPES */
 614
 615             case EOF:
 616               as_warn (_("End of file in string: '\"' inserted"));
 617               PUT ('"');
 618               continue;
 619             }
 620           PUT (ch);
 621           continue;
 622
 623         case 7:
 624           ch = GET ();
 625           state = 5;
 626           old_state = 8;
 627           if (ch == EOF)
 628             goto fromeof;
 629           PUT (ch);
 630           continue;
 631
 632         case 8:
 633           do
 634             ch = GET ();
 635           while (ch != '\n' && ch != EOF);
 636           if (ch == EOF)
 637             goto fromeof;
 638           state = 0;
 639           PUT (ch);
 640           continue;
 641         }
 642
 643       /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
 644
 645       /* flushchar: */
 646       ch = GET ();
 647
 648     recycle:
 649
 650 #if defined TC_ARM && defined OBJ_ELF
 651       /* We need to watch out for .symver directives.  See the comment later
 652          in this function.  */
 653       if (symver_state == NULL)
 654         {
 655           if ((state == 0 || state == 1) && ch == symver_pseudo[0])
 656             symver_state = symver_pseudo + 1;
 657         }
 658       else
 659         {
 660           /* We advance to the next state if we find the right
 661              character.  */
 662           if (ch != '\0' && (*symver_state == ch))
 663             ++symver_state;
 664           else if (*symver_state != '\0')
 665             /* We did not get the expected character, or we didn't
 666                get a valid terminating character after seeing the
 667                entire pseudo-op, so we must go back to the beginning.  */
 668             symver_state = NULL;
 669           else
 670             {
 671               /* We've read the entire pseudo-op.  If this is the end
 672                  of the line, go back to the beginning.  */
 673               if (IS_NEWLINE (ch))
 674                 symver_state = NULL;
 675             }
 676         }
 677 #endif /* TC_ARM && OBJ_ELF */
 678
 679 #ifdef TC_M68K
 680       /* We want to have pseudo-ops which control whether we are in
 681          MRI mode or not.  Unfortunately, since m68k MRI mode affects
 682          the scrubber, that means that we need a special purpose
 683          recognizer here.  */
 684       if (mri_state == NULL)
 685         {
 686           if ((state == 0 || state == 1)
 687               && ch == mri_pseudo[0])
 688             mri_state = mri_pseudo + 1;
 689         }
 690       else
 691         {
 692           /* We advance to the next state if we find the right
 693              character, or if we need a space character and we get any
 694              whitespace character, or if we need a '0' and we get a
 695              '1' (this is so that we only need one state to handle
 696              ``.mri 0'' and ``.mri 1'').  */
 697           if (ch != '\0'
 698               && (*mri_state == ch
 699                   || (*mri_state == ' '
 700                       && lex[ch] == LEX_IS_WHITESPACE)
 701                   || (*mri_state == '0'
 702                       && ch == '1')))
 703             {
 704               mri_last_ch = ch;
 705               ++mri_state;
 706             }
 707           else if (*mri_state != '\0'
 708                    || (lex[ch] != LEX_IS_WHITESPACE
 709                        && lex[ch] != LEX_IS_NEWLINE))
 710             {
 711               /* We did not get the expected character, or we didn't
 712                  get a valid terminating character after seeing the
 713                  entire pseudo-op, so we must go back to the
 714                  beginning.  */
 715               mri_state = NULL;
 716             }
 717           else
 718             {
 719               /* We've read the entire pseudo-op.  mips_last_ch is
 720                  either '0' or '1' indicating whether to enter or
 721                  leave MRI mode.  */
 722               do_scrub_begin (mri_last_ch == '1');
 723               mri_state = NULL;
 724
 725               /* We continue handling the character as usual.  The
 726                  main gas reader must also handle the .mri pseudo-op
 727                  to control expression parsing and the like.  */
 728             }
 729         }
 730 #endif
 731
 732       if (ch == EOF)
 733         {
 734           if (state != 0)
 735             {
 736               as_warn (_("end of file not at end of a line; newline inserted"));
 737               state = 0;
 738               PUT ('\n');
 739             }
 740           goto fromeof;
 741         }
 742
 743       switch (lex[ch])
 744         {
 745         case LEX_IS_WHITESPACE:
 746           do
 747             {
 748               ch = GET ();
 749             }
 750           while (ch != EOF && IS_WHITESPACE (ch));
 751           if (ch == EOF)
 752             goto fromeof;
 753
 754           if (state == 0)
 755             {
 756               /* Preserve a single whitespace character at the
 757                  beginning of a line.  */
 758               state = 1;
 759               UNGET (ch);
 760               PUT (' ');
 761               break;
 762             }
 763
 764           if (IS_COMMENT (ch)
 765               || ch == '/'
 766               || IS_LINE_SEPARATOR (ch))
 767             {
 768               if (scrub_m68k_mri)
 769                 {
 770                   /* In MRI mode, we keep these spaces.  */
 771                   UNGET (ch);
 772                   PUT (' ');
 773                   break;
 774                 }
 775               goto recycle;
 776             }
 777
 778           /* If we're in state 2 or 11, we've seen a non-white
 779              character followed by whitespace.  If the next character
 780              is ':', this is whitespace after a label name which we
 781              normally must ignore.  In MRI mode, though, spaces are
 782              not permitted between the label and the colon.  */
 783           if ((state == 2 || state == 11)
 784               && lex[ch] == LEX_IS_COLON
 785               && ! scrub_m68k_mri)
 786             {
 787               state = 1;
 788               PUT (ch);
 789               break;
 790             }
 791
 792           switch (state)
 793             {
 794             case 0:
 795               state++;
 796               goto recycle;     /* Punted leading sp */
 797             case 1:
 798               /* We can arrive here if we leave a leading whitespace
 799                  character at the beginning of a line.  */
 800               goto recycle;
 801             case 2:
 802               state = 3;
 803               if (to + 1 < toend)
 804                 {
 805                   /* Optimize common case by skipping UNGET/GET.  */
 806                   PUT (' ');    /* Sp after opco */
 807                   goto recycle;
 808                 }
 809               UNGET (ch);
 810               PUT (' ');
 811               break;
 812             case 3:
 813               if (scrub_m68k_mri)
 814                 {
 815                   /* In MRI mode, we keep these spaces.  */
 816                   UNGET (ch);
 817                   PUT (' ');
 818                   break;
 819                 }
 820               goto recycle;     /* Sp in operands */
 821             case 9:
 822             case 10:
 823               if (scrub_m68k_mri)
 824                 {
 825                   /* In MRI mode, we keep these spaces.  */
 826                   state = 3;
 827                   UNGET (ch);
 828                   PUT (' ');
 829                   break;
 830                 }
 831               state = 10;       /* Sp after symbol char */
 832               goto recycle;
 833             case 11:
 834               if (flag_m68k_mri
 835 #ifdef LABELS_WITHOUT_COLONS
 836                   || 1
 837 #endif
 838                   )
 839                 state = 1;
 840               else
 841                 {
 842                   /* We know that ch is not ':', since we tested that
 843                      case above.  Therefore this is not a label, so it
 844                      must be the opcode, and we've just seen the
 845                      whitespace after it.  */
 846                   state = 3;
 847                 }
 848               UNGET (ch);
 849               PUT (' ');        /* Sp after label definition.  */
 850               break;
 851             default:
 852               BAD_CASE (state);
 853             }
 854           break;
 855
 856         case LEX_IS_TWOCHAR_COMMENT_1ST:
 857           ch2 = GET ();
 858           if (ch2 == '*')
 859             {
 860               for (;;)
 861                 {
 862                   do
 863                     {
 864                       ch2 = GET ();
 865                       if (ch2 != EOF && IS_NEWLINE (ch2))
 866                         add_newlines++;
 867                     }
 868                   while (ch2 != EOF && ch2 != '*');
 869
 870                   while (ch2 == '*')
 871                     ch2 = GET ();
 872
 873                   if (ch2 == EOF || ch2 == '/')
 874                     break;
 875
 876                   /* This UNGET will ensure that we count newlines
 877                      correctly.  */
 878                   UNGET (ch2);
 879                 }
 880
 881               if (ch2 == EOF)
 882                 as_warn (_("end of file in multiline comment"));
 883
 884               ch = ' ';
 885               goto recycle;
 886             }
 887           else
 888             {
 889               if (ch2 != EOF)
 890                 UNGET (ch2);
 891               if (state == 9 || state == 10)
 892                 state = 3;
 893               PUT (ch);
 894             }
 895           break;
 896
 897         case LEX_IS_STRINGQUOTE:
 898           if (state == 10)
 899             {
 900               /* Preserve the whitespace in foo "bar" */
 901               UNGET (ch);
 902               state = 3;
 903               PUT (' ');
 904
 905               /* PUT didn't jump out.  We could just break, but we
 906                  know what will happen, so optimize a bit.  */
 907               ch = GET ();
 908               old_state = 3;
 909             }
 910           else if (state == 9)
 911             old_state = 3;
 912           else
 913             old_state = state;
 914           state = 5;
 915           PUT (ch);
 916           break;
 917
 918 #ifndef IEEE_STYLE
 919         case LEX_IS_ONECHAR_QUOTE:
 920           if (state == 10)
 921             {
 922               /* Preserve the whitespace in foo 'b' */
 923               UNGET (ch);
 924               state = 3;
 925               PUT (' ');
 926               break;
 927             }
 928           ch = GET ();
 929           if (ch == EOF)
 930             {
 931               as_warn (_("end of file after a one-character quote; \\0 inserted"));
 932               ch = 0;
 933             }
 934           if (ch == '\\')
 935             {
 936               ch = GET ();
 937               if (ch == EOF)
 938                 {
 939                   as_warn (_("end of file in escape character"));
 940                   ch = '\\';
 941                 }
 942               else
 943                 ch = process_escape (ch);
 944             }
 945           sprintf (out_buf, "%d", (int) (unsigned char) ch);
 946
 947           /* None of these 'x constants for us.  We want 'x'.  */
 948           if ((ch = GET ()) != '\'')
 949             {
 950 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 951               as_warn (_("Missing close quote: (assumed)"));
 952 #else
 953               if (ch != EOF)
 954                 UNGET (ch);
 955 #endif
 956             }
 957           if (strlen (out_buf) == 1)
 958             {
 959               PUT (out_buf[0]);
 960               break;
 961             }
 962           if (state == 9)
 963             old_state = 3;
 964           else
 965             old_state = state;
 966           state = -1;
 967           out_string = out_buf;
 968           PUT (*out_string++);
 969           break;
 970 #endif
 971
 972         case LEX_IS_COLON:
 973           if (state == 9 || state == 10)
 974             state = 3;
 975           else if (state != 3)
 976             state = 1;
 977           PUT (ch);
 978           break;
 979
 980         case LEX_IS_NEWLINE:
 981           /* Roll out a bunch of newlines from inside comments, etc.  */
 982           if (add_newlines)
 983             {
 984               --add_newlines;
 985               UNGET (ch);
 986             }
 987           /* fall thru into... */
 988
 989         case LEX_IS_LINE_SEPARATOR:
 990           state = 0;
 991           PUT (ch);
 992           break;
 993
 994 #ifdef TC_V850
 995         case LEX_IS_DOUBLEDASH_1ST:
 996           ch2 = GET();
 997           if (ch2 != '-')
 998             {
 999               UNGET (ch2);
1000               goto de_fault;
1001             }
1002           /* read and skip to end of line */
1003           do
1004             {
1005               ch = GET ();
1006             }
1007           while (ch != EOF && ch != '\n');
1008           if (ch == EOF)
1009             {
1010               as_warn (_("end of file in comment; newline inserted"));
1011             }
1012           state = 0;
1013           PUT ('\n');
1014           break;
1015 #endif
1016 #ifdef TC_M32R
1017         case LEX_IS_DOUBLEBAR_1ST:
1018           ch2 = GET();
1019           if (ch2 != '|')
1020             {
1021               UNGET (ch2);
1022               goto de_fault;
1023             }
1024           /* Reset back to state 1 and pretend that we are parsing a line from
1025              just after the first white space.  */
1026           state = 1;
1027           PUT ('|');
1028           PUT ('|');
1029           break;
1030 #endif
1031         case LEX_IS_LINE_COMMENT_START:
1032           /* FIXME-someday: The two character comment stuff was badly
1033              thought out.  On i386, we want '/' as line comment start
1034              AND we want C style comments.  hence this hack.  The
1035              whole lexical process should be reworked.  xoxorich.  */
1036           if (ch == '/')
1037             {
1038               ch2 = GET ();
1039               if (ch2 == '*')
1040                 {
1041                   old_state = 3;
1042                   state = -2;
1043                   break;
1044                 }
1045               else
1046                 {
1047                   UNGET (ch2);
1048                 }
1049             } /* bad hack */
1050
1051           if (state == 0 || state == 1) /* Only comment at start of line.  */
1052             {
1053               int startch;
1054
1055               startch = ch;
1056
1057               do
1058                 {
1059                   ch = GET ();
1060                 }
1061               while (ch != EOF && IS_WHITESPACE (ch));
1062               if (ch == EOF)
1063                 {
1064                   as_warn (_("end of file in comment; newline inserted"));
1065                   PUT ('\n');
1066                   break;
1067                 }
1068               if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1069                 {
1070                   /* Not a cpp line.  */
1071                   while (ch != EOF && !IS_NEWLINE (ch))
1072                     ch = GET ();
1073                   if (ch == EOF)
1074                     as_warn (_("EOF in Comment: Newline inserted"));
1075                   state = 0;
1076                   PUT ('\n');
1077                   break;
1078                 }
1079               /* Loks like `# 123 "filename"' from cpp.  */
1080               UNGET (ch);
1081               old_state = 4;
1082               state = -1;
1083               if (scrub_m68k_mri)
1084                 out_string = "\tappline ";
1085               else
1086                 out_string = "\t.appline ";
1087               PUT (*out_string++);
1088               break;
1089             }
1090
1091 #ifdef TC_D10V
1092           /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1093              Trap is the only short insn that has a first operand that is
1094              neither register nor label.
1095              We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1096              We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is already
1097              LEX_IS_LINE_COMMENT_START.  However, it is the only character in
1098              line_comment_chars for d10v, hence we can recognize it as such.  */
1099           /* An alternative approach would be to reset the state to 1 when
1100              we see '||', '<'- or '->', but that seems to be overkill.  */
1101           if (state == 10) PUT (' ');
1102 #endif
1103           /* We have a line comment character which is not at the
1104              start of a line.  If this is also a normal comment
1105              character, fall through.  Otherwise treat it as a default
1106              character.  */
1107           if (strchr (tc_comment_chars, ch) == NULL
1108               && (! scrub_m68k_mri
1109                   || (ch != '!' && ch != '*')))
1110             goto de_fault;
1111           if (scrub_m68k_mri
1112               && (ch == '!' || ch == '*' || ch == '#')
1113               && state != 1
1114               && state != 10)
1115             goto de_fault;
1116           /* Fall through.  */
1117         case LEX_IS_COMMENT_START:
1118 #if defined TC_ARM && defined OBJ_ELF
1119           /* On the ARM, `@' is the comment character.
1120              Unfortunately this is also a special character in ELF .symver
1121              directives (and .type, though we deal with those another way).  So
1122              we check if this line is such a directive, and treat the character
1123              as default if so.  This is a hack.  */
1124           if ((symver_state != NULL) && (*symver_state == 0))
1125             goto de_fault;
1126 #endif
1127           do
1128             {
1129               ch = GET ();
1130             }
1131           while (ch != EOF && !IS_NEWLINE (ch));
1132           if (ch == EOF)
1133             as_warn (_("end of file in comment; newline inserted"));
1134           state = 0;
1135           PUT ('\n');
1136           break;
1137
1138         case LEX_IS_SYMBOL_COMPONENT:
1139           if (state == 10)
1140             {
1141               /* This is a symbol character following another symbol
1142                  character, with whitespace in between.  We skipped
1143                  the whitespace earlier, so output it now.  */
1144               UNGET (ch);
1145               state = 3;
1146               PUT (' ');
1147               break;
1148             }
1149
1150           if (state == 3)
1151             state = 9;
1152
1153           /* This is a common case.  Quickly copy CH and all the
1154              following symbol component or normal characters.  */
1155           if (to + 1 < toend
1156               && mri_state == NULL
1157 #if defined TC_ARM && defined OBJ_ELF
1158               && symver_state == NULL
1159 #endif
1160               )
1161             {
1162               char *s;
1163               int len;
1164
1165               for (s = from; s < fromend; s++)
1166                 {
1167                   int type;
1168
1169                   ch2 = * (unsigned char *) s;
1170                   type = lex[ch2];
1171                   if (type != 0
1172                       && type != LEX_IS_SYMBOL_COMPONENT)
1173                     break;
1174                 }
1175               if (s > from)
1176                 {
1177                   /* Handle the last character normally, for
1178                      simplicity.  */
1179                   --s;
1180                 }
1181               len = s - from;
1182               if (len > (toend - to) - 1)
1183                 len = (toend - to) - 1;
1184               if (len > 0)
1185                 {
1186                   PUT (ch);
1187                   if (len > 8)
1188                     {
1189                       memcpy (to, from, len);
1190                       to += len;
1191                       from += len;
1192                     }
1193                   else
1194                     {
1195                       switch (len)
1196                         {
1197                         case 8: *to++ = *from++;
1198                         case 7: *to++ = *from++;
1199                         case 6: *to++ = *from++;
1200                         case 5: *to++ = *from++;
1201                         case 4: *to++ = *from++;
1202                         case 3: *to++ = *from++;
1203                         case 2: *to++ = *from++;
1204                         case 1: *to++ = *from++;
1205                         }
1206                     }
1207                   ch = GET ();
1208                 }
1209             }
1210
1211           /* Fall through.  */
1212         default:
1213         de_fault:
1214           /* Some relatively `normal' character.  */
1215           if (state == 0)
1216             {
1217               state = 11;       /* Now seeing label definition */
1218             }
1219           else if (state == 1)
1220             {
1221               state = 2;        /* Ditto */
1222             }
1223           else if (state == 9)
1224             {
1225               if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1226                 state = 3;
1227             }
1228           else if (state == 10)
1229             {
1230               state = 3;
1231             }
1232           PUT (ch);
1233           break;
1234         }
1235     }
1236
1237   /*NOTREACHED*/
1238
1239  fromeof:
1240   /* We have reached the end of the input.  */
1241   return to - tostart;
1242
1243  tofull:
1244   /* The output buffer is full.  Save any input we have not yet
1245      processed.  */
1246   if (fromend > from)
1247     {
1248       saved_input = from;
1249       saved_input_len = fromend - from;
1250     }
1251   else
1252     saved_input = NULL;
1253
1254   return to - tostart;
1255 }
1256
1257 /* end of app.c */