gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 1999
   3    Free Software Foundation, Inc.
   4
   5    This file is part of GAS, the GNU Assembler.
   6
   7    GAS is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    GAS is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with GAS; see the file COPYING.  If not, write to the Free
  19    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20    02111-1307, USA.  */
  21
  22 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  23 /* App, the assembler pre-processor.  This pre-processor strips out excess
  24    spaces, turns single-quoted characters into a decimal constant, and turns
  25    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  26    pair.  This needs better error-handling.  */
  27
  28 #include <stdio.h>
  29 #include "as.h"                 /* For BAD_CASE() only */
  30
  31 #if (__STDC__ != 1)
  32 #ifndef const
  33 #define const  /* empty */
  34 #endif
  35 #endif
  36
  37 /* Whether we are scrubbing in m68k MRI mode.  This is different from
  38    flag_m68k_mri, because the two flags will be affected by the .mri
  39    pseudo-op at different times.  */
  40 static int scrub_m68k_mri;
  41
  42 /* The pseudo-op which switches in and out of MRI mode.  See the
  43    comment in do_scrub_chars.  */
  44 static const char mri_pseudo[] = ".mri 0";
  45
  46 #if defined TC_ARM && defined OBJ_ELF
  47 /* The pseudo-op for which we need to special-case `@' characters.
  48    See the comment in do_scrub_chars.  */
  49 static const char   symver_pseudo[] = ".symver";
  50 static const char * symver_state;
  51 #endif
  52
  53 static char lex[256];
  54 static const char symbol_chars[] =
  55 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  56
  57 #define LEX_IS_SYMBOL_COMPONENT         1
  58 #define LEX_IS_WHITESPACE               2
  59 #define LEX_IS_LINE_SEPARATOR           3
  60 #define LEX_IS_COMMENT_START            4
  61 #define LEX_IS_LINE_COMMENT_START       5
  62 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  63 #define LEX_IS_STRINGQUOTE              8
  64 #define LEX_IS_COLON                    9
  65 #define LEX_IS_NEWLINE                  10
  66 #define LEX_IS_ONECHAR_QUOTE            11
  67 #ifdef TC_V850
  68 #define LEX_IS_DOUBLEDASH_1ST           12
  69 #endif
  70 #ifdef TC_M32R
  71 #define DOUBLEBAR_PARALLEL
  72 #endif
  73 #ifdef DOUBLEBAR_PARALLEL
  74 #define LEX_IS_DOUBLEBAR_1ST            13
  75 #endif
  76 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  77 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  78 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  79 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  80 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  81 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  82
  83 static int process_escape PARAMS ((int));
  84
  85 /* FIXME-soon: The entire lexer/parser thingy should be
  86    built statically at compile time rather than dynamically
  87    each and every time the assembler is run.  xoxorich. */
  88
  89 void
  90 do_scrub_begin (m68k_mri)
  91      int m68k_mri;
  92 {
  93   const char *p;
  94   int c;
  95
  96   scrub_m68k_mri = m68k_mri;
  97
  98   lex[' '] = LEX_IS_WHITESPACE;
  99   lex['\t'] = LEX_IS_WHITESPACE;
 100   lex['\r'] = LEX_IS_WHITESPACE;
 101   lex['\n'] = LEX_IS_NEWLINE;
 102   lex[';'] = LEX_IS_LINE_SEPARATOR;
 103   lex[':'] = LEX_IS_COLON;
 104
 105   if (! m68k_mri)
 106     {
 107       lex['"'] = LEX_IS_STRINGQUOTE;
 108
 109 #ifndef TC_HPPA
 110       lex['\''] = LEX_IS_ONECHAR_QUOTE;
 111 #endif
 112
 113 #ifdef SINGLE_QUOTE_STRINGS
 114       lex['\''] = LEX_IS_STRINGQUOTE;
 115 #endif
 116     }
 117
 118   /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
 119      in state 5 of do_scrub_chars must be changed.  */
 120
 121   /* Note that these override the previous defaults, e.g. if ';' is a
 122      comment char, then it isn't a line separator.  */
 123   for (p = symbol_chars; *p; ++p)
 124     {
 125       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 126     }                           /* declare symbol characters */
 127
 128   for (c = 128; c < 256; ++c)
 129     lex[c] = LEX_IS_SYMBOL_COMPONENT;
 130
 131 #ifdef tc_symbol_chars
 132   /* This macro permits the processor to specify all characters which
 133      may appears in an operand.  This will prevent the scrubber from
 134      discarding meaningful whitespace in certain cases.  The i386
 135      backend uses this to support prefixes, which can confuse the
 136      scrubber as to whether it is parsing operands or opcodes.  */
 137   for (p = tc_symbol_chars; *p; ++p)
 138     lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
 139 #endif
 140
 141   /* The m68k backend wants to be able to change comment_chars.  */
 142 #ifndef tc_comment_chars
 143 #define tc_comment_chars comment_chars
 144 #endif
 145   for (p = tc_comment_chars; *p; p++)
 146     {
 147       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
 148     }                           /* declare comment chars */
 149
 150   for (p = line_comment_chars; *p; p++)
 151     {
 152       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
 153     }                           /* declare line comment chars */
 154
 155   for (p = line_separator_chars; *p; p++)
 156     {
 157       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 158     }                           /* declare line separators */
 159
 160   /* Only allow slash-star comments if slash is not in use.
 161      FIXME: This isn't right.  We should always permit them.  */
 162   if (lex['/'] == 0)
 163     {
 164       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 165     }
 166
 167   if (m68k_mri)
 168     {
 169       lex['\''] = LEX_IS_STRINGQUOTE;
 170       lex[';'] = LEX_IS_COMMENT_START;
 171       lex['*'] = LEX_IS_LINE_COMMENT_START;
 172       /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
 173          then it can't be used in an expression.  */
 174       lex['!'] = LEX_IS_LINE_COMMENT_START;
 175     }
 176
 177 #ifdef TC_V850
 178   lex['-'] = LEX_IS_DOUBLEDASH_1ST;
 179 #endif
 180 #ifdef DOUBLEBAR_PARALLEL
 181   lex['|'] = LEX_IS_DOUBLEBAR_1ST;
 182 #endif
 183 #ifdef TC_D30V
 184   /* must do this is we want VLIW instruction with "->" or "<-" */
 185   lex['-'] = LEX_IS_SYMBOL_COMPONENT;
 186 #endif
 187 }                               /* do_scrub_begin() */
 188
 189 /* Saved state of the scrubber */
 190 static int state;
 191 static int old_state;
 192 static char *out_string;
 193 static char out_buf[20];
 194 static int add_newlines;
 195 static char *saved_input;
 196 static int saved_input_len;
 197 static char input_buffer[32 * 1024];
 198 static const char *mri_state;
 199 static char mri_last_ch;
 200
 201 /* Data structure for saving the state of app across #include's.  Note that
 202    app is called asynchronously to the parsing of the .include's, so our
 203    state at the time .include is interpreted is completely unrelated.
 204    That's why we have to save it all.  */
 205
 206 struct app_save
 207   {
 208     int          state;
 209     int          old_state;
 210     char *       out_string;
 211     char         out_buf[sizeof (out_buf)];
 212     int          add_newlines;
 213     char *       saved_input;
 214     int          saved_input_len;
 215     int          scrub_m68k_mri;
 216     const char * mri_state;
 217     char         mri_last_ch;
 218 #if defined TC_ARM && defined OBJ_ELF
 219     const char * symver_state;
 220 #endif
 221   };
 222
 223 char *
 224 app_push ()
 225 {
 226   register struct app_save *saved;
 227
 228   saved = (struct app_save *) xmalloc (sizeof (*saved));
 229   saved->state = state;
 230   saved->old_state = old_state;
 231   saved->out_string = out_string;
 232   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 233   saved->add_newlines = add_newlines;
 234   if (saved_input == NULL)
 235     saved->saved_input = NULL;
 236   else
 237     {
 238       saved->saved_input = xmalloc (saved_input_len);
 239       memcpy (saved->saved_input, saved_input, saved_input_len);
 240       saved->saved_input_len = saved_input_len;
 241     }
 242   saved->scrub_m68k_mri = scrub_m68k_mri;
 243   saved->mri_state = mri_state;
 244   saved->mri_last_ch = mri_last_ch;
 245 #if defined TC_ARM && defined OBJ_ELF
 246   saved->symver_state = symver_state;
 247 #endif
 248
 249   /* do_scrub_begin() is not useful, just wastes time. */
 250
 251   state = 0;
 252   saved_input = NULL;
 253
 254   return (char *) saved;
 255 }
 256
 257 void
 258 app_pop (arg)
 259      char *arg;
 260 {
 261   register struct app_save *saved = (struct app_save *) arg;
 262
 263   /* There is no do_scrub_end (). */
 264   state = saved->state;
 265   old_state = saved->old_state;
 266   out_string = saved->out_string;
 267   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 268   add_newlines = saved->add_newlines;
 269   if (saved->saved_input == NULL)
 270     saved_input = NULL;
 271   else
 272     {
 273       assert (saved->saved_input_len <= (int) (sizeof input_buffer));
 274       memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
 275       saved_input = input_buffer;
 276       saved_input_len = saved->saved_input_len;
 277       free (saved->saved_input);
 278     }
 279   scrub_m68k_mri = saved->scrub_m68k_mri;
 280   mri_state = saved->mri_state;
 281   mri_last_ch = saved->mri_last_ch;
 282 #if defined TC_ARM && defined OBJ_ELF
 283   symver_state = saved->symver_state;
 284 #endif
 285
 286   free (arg);
 287 }                               /* app_pop() */
 288
 289 /* @@ This assumes that \n &c are the same on host and target.  This is not
 290    necessarily true.  */
 291 static int
 292 process_escape (ch)
 293      int ch;
 294 {
 295   switch (ch)
 296     {
 297     case 'b':
 298       return '\b';
 299     case 'f':
 300       return '\f';
 301     case 'n':
 302       return '\n';
 303     case 'r':
 304       return '\r';
 305     case 't':
 306       return '\t';
 307     case '\'':
 308       return '\'';
 309     case '"':
 310       return '\"';
 311     default:
 312       return ch;
 313     }
 314 }
 315
 316 /* This function is called to process input characters.  The GET
 317    parameter is used to retrieve more input characters.  GET should
 318    set its parameter to point to a buffer, and return the length of
 319    the buffer; it should return 0 at end of file.  The scrubbed output
 320    characters are put into the buffer starting at TOSTART; the TOSTART
 321    buffer is TOLEN bytes in length.  The function returns the number
 322    of scrubbed characters put into TOSTART.  This will be TOLEN unless
 323    end of file was seen.  This function is arranged as a state
 324    machine, and saves its state so that it may return at any point.
 325    This is the way the old code used to work.  */
 326
 327 int
 328 do_scrub_chars (get, tostart, tolen)
 329      int (*get) PARAMS ((char *, int));
 330      char *tostart;
 331      int tolen;
 332 {
 333   char *to = tostart;
 334   char *toend = tostart + tolen;
 335   char *from;
 336   char *fromend;
 337   int fromlen;
 338   register int ch, ch2 = 0;
 339
 340   /*State 0: beginning of normal line
 341           1: After first whitespace on line (flush more white)
 342           2: After first non-white (opcode) on line (keep 1white)
 343           3: after second white on line (into operands) (flush white)
 344           4: after putting out a .line, put out digits
 345           5: parsing a string, then go to old-state
 346           6: putting out \ escape in a "d string.
 347           7: After putting out a .appfile, put out string.
 348           8: After putting out a .appfile string, flush until newline.
 349           9: After seeing symbol char in state 3 (keep 1white after symchar)
 350          10: After seeing whitespace in state 9 (keep white before symchar)
 351          11: After seeing a symbol character in state 0 (eg a label definition)
 352          -1: output string in out_string and go to the state in old_state
 353          -2: flush text until a '*' '/' is seen, then go to state old_state
 354 #ifdef TC_V850
 355          12: After seeing a dash, looking for a second dash as a start of comment.
 356 #endif
 357 #ifdef DOUBLEBAR_PARALLEL
 358          13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
 359 #endif
 360           */
 361
 362   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 363      constructs like ``.loc 1 20''.  This was turning into ``.loc
 364      120''.  States 9 and 10 ensure that a space is never dropped in
 365      between characters which could appear in a identifier.  Ian
 366      Taylor, ian@cygnus.com.
 367
 368      I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
 369      correctly on the PA (and any other target where colons are optional).
 370      Jeff Law, law@cs.utah.edu.
 371
 372      I added state 13 so that something like "cmp r1, r2 || trap #1" does not
 373      get squashed into "cmp r1,r2||trap#1", with the all important space
 374      between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
 375
 376   /* This macro gets the next input character.  */
 377
 378 #define GET()                                                   \
 379   (from < fromend                                               \
 380    ? * (unsigned char *) (from++)                               \
 381    : (saved_input = NULL,                                       \
 382       fromlen = (*get) (input_buffer, sizeof input_buffer),     \
 383       from = input_buffer,                                      \
 384       fromend = from + fromlen,                                 \
 385       (fromlen == 0                                             \
 386        ? EOF                                                    \
 387        : * (unsigned char *) (from++))))
 388
 389   /* This macro pushes a character back on the input stream.  */
 390
 391 #define UNGET(uch) (*--from = (uch))
 392
 393   /* This macro puts a character into the output buffer.  If this
 394      character fills the output buffer, this macro jumps to the label
 395      TOFULL.  We use this rather ugly approach because we need to
 396      handle two different termination conditions: EOF on the input
 397      stream, and a full output buffer.  It would be simpler if we
 398      always read in the entire input stream before processing it, but
 399      I don't want to make such a significant change to the assembler's
 400      memory usage.  */
 401
 402 #define PUT(pch)                        \
 403   do                                    \
 404     {                                   \
 405       *to++ = (pch);                    \
 406       if (to >= toend)                  \
 407         goto tofull;                    \
 408     }                                   \
 409   while (0)
 410
 411   if (saved_input != NULL)
 412     {
 413       from = saved_input;
 414       fromend = from + saved_input_len;
 415     }
 416   else
 417     {
 418       fromlen = (*get) (input_buffer, sizeof input_buffer);
 419       if (fromlen == 0)
 420         return 0;
 421       from = input_buffer;
 422       fromend = from + fromlen;
 423     }
 424
 425   while (1)
 426     {
 427       /* The cases in this switch end with continue, in order to
 428          branch back to the top of this while loop and generate the
 429          next output character in the appropriate state.  */
 430       switch (state)
 431         {
 432         case -1:
 433           ch = *out_string++;
 434           if (*out_string == '\0')
 435             {
 436               state = old_state;
 437               old_state = 3;
 438             }
 439           PUT (ch);
 440           continue;
 441
 442         case -2:
 443           for (;;)
 444             {
 445               do
 446                 {
 447                   ch = GET ();
 448
 449                   if (ch == EOF)
 450                     {
 451                       as_warn (_("end of file in comment"));
 452                       goto fromeof;
 453                     }
 454
 455                   if (ch == '\n')
 456                     PUT ('\n');
 457                 }
 458               while (ch != '*');
 459
 460               while ((ch = GET ()) == '*')
 461                 ;
 462
 463               if (ch == EOF)
 464                 {
 465                   as_warn (_("end of file in comment"));
 466                   goto fromeof;
 467                 }
 468
 469               if (ch == '/')
 470                 break;
 471
 472               UNGET (ch);
 473             }
 474
 475           state = old_state;
 476           UNGET (' ');
 477           continue;
 478
 479         case 4:
 480           ch = GET ();
 481           if (ch == EOF)
 482             goto fromeof;
 483           else if (ch >= '0' && ch <= '9')
 484             PUT (ch);
 485           else
 486             {
 487               while (ch != EOF && IS_WHITESPACE (ch))
 488                 ch = GET ();
 489               if (ch == '"')
 490                 {
 491                   UNGET (ch);
 492                   if (scrub_m68k_mri)
 493                     out_string = "\n\tappfile ";
 494                   else
 495                     out_string = "\n\t.appfile ";
 496                   old_state = 7;
 497                   state = -1;
 498                   PUT (*out_string++);
 499                 }
 500               else
 501                 {
 502                   while (ch != EOF && ch != '\n')
 503                     ch = GET ();
 504                   state = 0;
 505                   PUT (ch);
 506                 }
 507             }
 508           continue;
 509
 510         case 5:
 511           /* We are going to copy everything up to a quote character,
 512              with special handling for a backslash.  We try to
 513              optimize the copying in the simple case without using the
 514              GET and PUT macros.  */
 515           {
 516             char *s;
 517             int len;
 518
 519             for (s = from; s < fromend; s++)
 520               {
 521                 ch = *s;
 522                 /* This condition must be changed if the type of any
 523                    other character can be LEX_IS_STRINGQUOTE.  */
 524                 if (ch == '\\'
 525                     || ch == '"'
 526                     || ch == '\''
 527                     || ch == '\n')
 528                   break;
 529               }
 530             len = s - from;
 531             if (len > toend - to)
 532               len = toend - to;
 533             if (len > 0)
 534               {
 535                 memcpy (to, from, len);
 536                 to += len;
 537                 from += len;
 538               }
 539           }
 540
 541           ch = GET ();
 542           if (ch == EOF)
 543             {
 544               as_warn (_("end of file in string: inserted '\"'"));
 545               state = old_state;
 546               UNGET ('\n');
 547               PUT ('"');
 548             }
 549           else if (lex[ch] == LEX_IS_STRINGQUOTE)
 550             {
 551               state = old_state;
 552               PUT (ch);
 553             }
 554 #ifndef NO_STRING_ESCAPES
 555           else if (ch == '\\')
 556             {
 557               state = 6;
 558               PUT (ch);
 559             }
 560 #endif
 561           else if (scrub_m68k_mri && ch == '\n')
 562             {
 563               /* Just quietly terminate the string.  This permits lines like
 564                    bne  label   loop if we haven't reach end yet
 565                  */
 566               state = old_state;
 567               UNGET (ch);
 568               PUT ('\'');
 569             }
 570           else
 571             {
 572               PUT (ch);
 573             }
 574           continue;
 575
 576         case 6:
 577           state = 5;
 578           ch = GET ();
 579           switch (ch)
 580             {
 581               /* Handle strings broken across lines, by turning '\n' into
 582                  '\\' and 'n'.  */
 583             case '\n':
 584               UNGET ('n');
 585               add_newlines++;
 586               PUT ('\\');
 587               continue;
 588
 589             case '"':
 590             case '\\':
 591             case 'b':
 592             case 'f':
 593             case 'n':
 594             case 'r':
 595             case 't':
 596             case 'v':
 597             case 'x':
 598             case 'X':
 599             case '0':
 600             case '1':
 601             case '2':
 602             case '3':
 603             case '4':
 604             case '5':
 605             case '6':
 606             case '7':
 607               break;
 608 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 609             default:
 610               as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
 611               break;
 612 #else  /* ONLY_STANDARD_ESCAPES */
 613             default:
 614               /* Accept \x as x for any x */
 615               break;
 616 #endif /* ONLY_STANDARD_ESCAPES */
 617
 618             case EOF:
 619               as_warn (_("End of file in string: '\"' inserted"));
 620               PUT ('"');
 621               continue;
 622             }
 623           PUT (ch);
 624           continue;
 625
 626         case 7:
 627           ch = GET ();
 628           state = 5;
 629           old_state = 8;
 630           if (ch == EOF)
 631             goto fromeof;
 632           PUT (ch);
 633           continue;
 634
 635         case 8:
 636           do
 637             ch = GET ();
 638           while (ch != '\n' && ch != EOF);
 639           if (ch == EOF)
 640             goto fromeof;
 641           state = 0;
 642           PUT (ch);
 643           continue;
 644         }
 645
 646       /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
 647
 648       /* flushchar: */
 649       ch = GET ();
 650
 651     recycle:
 652
 653 #if defined TC_ARM && defined OBJ_ELF
 654       /* We need to watch out for .symver directives.  See the comment later
 655          in this function.  */
 656       if (symver_state == NULL)
 657         {
 658           if ((state == 0 || state == 1) && ch == symver_pseudo[0])
 659             symver_state = symver_pseudo + 1;
 660         }
 661       else
 662         {
 663           /* We advance to the next state if we find the right
 664              character.  */
 665           if (ch != '\0' && (*symver_state == ch))
 666             ++symver_state;
 667           else if (*symver_state != '\0')
 668             /* We did not get the expected character, or we didn't
 669                get a valid terminating character after seeing the
 670                entire pseudo-op, so we must go back to the beginning.  */
 671             symver_state = NULL;
 672           else
 673             {
 674               /* We've read the entire pseudo-op.  If this is the end
 675                  of the line, go back to the beginning.  */
 676               if (IS_NEWLINE (ch))
 677                 symver_state = NULL;
 678             }
 679         }
 680 #endif /* TC_ARM && OBJ_ELF */
 681
 682 #ifdef TC_M68K
 683       /* We want to have pseudo-ops which control whether we are in
 684          MRI mode or not.  Unfortunately, since m68k MRI mode affects
 685          the scrubber, that means that we need a special purpose
 686          recognizer here.  */
 687       if (mri_state == NULL)
 688         {
 689           if ((state == 0 || state == 1)
 690               && ch == mri_pseudo[0])
 691             mri_state = mri_pseudo + 1;
 692         }
 693       else
 694         {
 695           /* We advance to the next state if we find the right
 696              character, or if we need a space character and we get any
 697              whitespace character, or if we need a '0' and we get a
 698              '1' (this is so that we only need one state to handle
 699              ``.mri 0'' and ``.mri 1'').  */
 700           if (ch != '\0'
 701               && (*mri_state == ch
 702                   || (*mri_state == ' '
 703                       && lex[ch] == LEX_IS_WHITESPACE)
 704                   || (*mri_state == '0'
 705                       && ch == '1')))
 706             {
 707               mri_last_ch = ch;
 708               ++mri_state;
 709             }
 710           else if (*mri_state != '\0'
 711                    || (lex[ch] != LEX_IS_WHITESPACE
 712                        && lex[ch] != LEX_IS_NEWLINE))
 713             {
 714               /* We did not get the expected character, or we didn't
 715                  get a valid terminating character after seeing the
 716                  entire pseudo-op, so we must go back to the
 717                  beginning.  */
 718               mri_state = NULL;
 719             }
 720           else
 721             {
 722               /* We've read the entire pseudo-op.  mips_last_ch is
 723                  either '0' or '1' indicating whether to enter or
 724                  leave MRI mode.  */
 725               do_scrub_begin (mri_last_ch == '1');
 726               mri_state = NULL;
 727
 728               /* We continue handling the character as usual.  The
 729                  main gas reader must also handle the .mri pseudo-op
 730                  to control expression parsing and the like.  */
 731             }
 732         }
 733 #endif
 734
 735       if (ch == EOF)
 736         {
 737           if (state != 0)
 738             {
 739               as_warn (_("end of file not at end of a line; newline inserted"));
 740               state = 0;
 741               PUT ('\n');
 742             }
 743           goto fromeof;
 744         }
 745
 746       switch (lex[ch])
 747         {
 748         case LEX_IS_WHITESPACE:
 749           do
 750             {
 751               ch = GET ();
 752             }
 753           while (ch != EOF && IS_WHITESPACE (ch));
 754           if (ch == EOF)
 755             goto fromeof;
 756
 757           if (state == 0)
 758             {
 759               /* Preserve a single whitespace character at the
 760                  beginning of a line.  */
 761               state = 1;
 762               UNGET (ch);
 763               PUT (' ');
 764               break;
 765             }
 766
 767 #ifdef KEEP_WHITE_AROUND_COLON
 768           if (lex[ch] == LEX_IS_COLON)
 769             {
 770               /* only keep this white if there's no white *after* the colon */
 771               ch2 = GET ();
 772               UNGET (ch2);
 773               if (!IS_WHITESPACE (ch2))
 774                 {
 775                   state = 9;
 776                   UNGET (ch);
 777                   PUT (' ');
 778                   break;
 779                 }
 780             }
 781 #endif
 782           if (IS_COMMENT (ch)
 783               || ch == '/'
 784               || IS_LINE_SEPARATOR (ch))
 785             {
 786               if (scrub_m68k_mri)
 787                 {
 788                   /* In MRI mode, we keep these spaces.  */
 789                   UNGET (ch);
 790                   PUT (' ');
 791                   break;
 792                 }
 793               goto recycle;
 794             }
 795
 796           /* If we're in state 2 or 11, we've seen a non-white
 797              character followed by whitespace.  If the next character
 798              is ':', this is whitespace after a label name which we
 799              normally must ignore.  In MRI mode, though, spaces are
 800              not permitted between the label and the colon.  */
 801           if ((state == 2 || state == 11)
 802               && lex[ch] == LEX_IS_COLON
 803               && ! scrub_m68k_mri)
 804             {
 805               state = 1;
 806               PUT (ch);
 807               break;
 808             }
 809
 810           switch (state)
 811             {
 812             case 0:
 813               state++;
 814               goto recycle;     /* Punted leading sp */
 815             case 1:
 816               /* We can arrive here if we leave a leading whitespace
 817                  character at the beginning of a line.  */
 818               goto recycle;
 819             case 2:
 820               state = 3;
 821               if (to + 1 < toend)
 822                 {
 823                   /* Optimize common case by skipping UNGET/GET.  */
 824                   PUT (' ');    /* Sp after opco */
 825                   goto recycle;
 826                 }
 827               UNGET (ch);
 828               PUT (' ');
 829               break;
 830             case 3:
 831               if (scrub_m68k_mri)
 832                 {
 833                   /* In MRI mode, we keep these spaces.  */
 834                   UNGET (ch);
 835                   PUT (' ');
 836                   break;
 837                 }
 838               goto recycle;     /* Sp in operands */
 839             case 9:
 840             case 10:
 841               if (scrub_m68k_mri)
 842                 {
 843                   /* In MRI mode, we keep these spaces.  */
 844                   state = 3;
 845                   UNGET (ch);
 846                   PUT (' ');
 847                   break;
 848                 }
 849               state = 10;       /* Sp after symbol char */
 850               goto recycle;
 851             case 11:
 852               if (flag_m68k_mri
 853 #ifdef LABELS_WITHOUT_COLONS
 854                   || 1
 855 #endif
 856                   )
 857                 state = 1;
 858               else
 859                 {
 860                   /* We know that ch is not ':', since we tested that
 861                      case above.  Therefore this is not a label, so it
 862                      must be the opcode, and we've just seen the
 863                      whitespace after it.  */
 864                   state = 3;
 865                 }
 866               UNGET (ch);
 867               PUT (' ');        /* Sp after label definition.  */
 868               break;
 869             default:
 870               BAD_CASE (state);
 871             }
 872           break;
 873
 874         case LEX_IS_TWOCHAR_COMMENT_1ST:
 875           ch2 = GET ();
 876           if (ch2 == '*')
 877             {
 878               for (;;)
 879                 {
 880                   do
 881                     {
 882                       ch2 = GET ();
 883                       if (ch2 != EOF && IS_NEWLINE (ch2))
 884                         add_newlines++;
 885                     }
 886                   while (ch2 != EOF && ch2 != '*');
 887
 888                   while (ch2 == '*')
 889                     ch2 = GET ();
 890
 891                   if (ch2 == EOF || ch2 == '/')
 892                     break;
 893
 894                   /* This UNGET will ensure that we count newlines
 895                      correctly.  */
 896                   UNGET (ch2);
 897                 }
 898
 899               if (ch2 == EOF)
 900                 as_warn (_("end of file in multiline comment"));
 901
 902               ch = ' ';
 903               goto recycle;
 904             }
 905           else
 906             {
 907               if (ch2 != EOF)
 908                 UNGET (ch2);
 909               if (state == 9 || state == 10)
 910                 state = 3;
 911               PUT (ch);
 912             }
 913           break;
 914
 915         case LEX_IS_STRINGQUOTE:
 916           if (state == 10)
 917             {
 918               /* Preserve the whitespace in foo "bar" */
 919               UNGET (ch);
 920               state = 3;
 921               PUT (' ');
 922
 923               /* PUT didn't jump out.  We could just break, but we
 924                  know what will happen, so optimize a bit.  */
 925               ch = GET ();
 926               old_state = 3;
 927             }
 928           else if (state == 9)
 929             old_state = 3;
 930           else
 931             old_state = state;
 932           state = 5;
 933           PUT (ch);
 934           break;
 935
 936 #ifndef IEEE_STYLE
 937         case LEX_IS_ONECHAR_QUOTE:
 938           if (state == 10)
 939             {
 940               /* Preserve the whitespace in foo 'b' */
 941               UNGET (ch);
 942               state = 3;
 943               PUT (' ');
 944               break;
 945             }
 946           ch = GET ();
 947           if (ch == EOF)
 948             {
 949               as_warn (_("end of file after a one-character quote; \\0 inserted"));
 950               ch = 0;
 951             }
 952           if (ch == '\\')
 953             {
 954               ch = GET ();
 955               if (ch == EOF)
 956                 {
 957                   as_warn (_("end of file in escape character"));
 958                   ch = '\\';
 959                 }
 960               else
 961                 ch = process_escape (ch);
 962             }
 963           sprintf (out_buf, "%d", (int) (unsigned char) ch);
 964
 965           /* None of these 'x constants for us.  We want 'x'.  */
 966           if ((ch = GET ()) != '\'')
 967             {
 968 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 969               as_warn (_("Missing close quote: (assumed)"));
 970 #else
 971               if (ch != EOF)
 972                 UNGET (ch);
 973 #endif
 974             }
 975           if (strlen (out_buf) == 1)
 976             {
 977               PUT (out_buf[0]);
 978               break;
 979             }
 980           if (state == 9)
 981             old_state = 3;
 982           else
 983             old_state = state;
 984           state = -1;
 985           out_string = out_buf;
 986           PUT (*out_string++);
 987           break;
 988 #endif
 989
 990         case LEX_IS_COLON:
 991 #ifdef KEEP_WHITE_AROUND_COLON
 992           state = 9;
 993 #else
 994           if (state == 9 || state == 10)
 995             state = 3;
 996           else if (state != 3)
 997             state = 1;
 998 #endif
 999           PUT (ch);
1000           break;
1001
1002         case LEX_IS_NEWLINE:
1003           /* Roll out a bunch of newlines from inside comments, etc.  */
1004           if (add_newlines)
1005             {
1006               --add_newlines;
1007               UNGET (ch);
1008             }
1009           /* fall thru into... */
1010
1011         case LEX_IS_LINE_SEPARATOR:
1012           state = 0;
1013           PUT (ch);
1014           break;
1015
1016 #ifdef TC_V850
1017         case LEX_IS_DOUBLEDASH_1ST:
1018           ch2 = GET();
1019           if (ch2 != '-')
1020             {
1021               UNGET (ch2);
1022               goto de_fault;
1023             }
1024           /* read and skip to end of line */
1025           do
1026             {
1027               ch = GET ();
1028             }
1029           while (ch != EOF && ch != '\n');
1030           if (ch == EOF)
1031             {
1032               as_warn (_("end of file in comment; newline inserted"));
1033             }
1034           state = 0;
1035           PUT ('\n');
1036           break;
1037 #endif
1038 #ifdef DOUBLEBAR_PARALLEL
1039         case LEX_IS_DOUBLEBAR_1ST:
1040           ch2 = GET();
1041           if (ch2 != '|')
1042             {
1043               UNGET (ch2);
1044               goto de_fault;
1045             }
1046           /* Reset back to state 1 and pretend that we are parsing a line from
1047              just after the first white space.  */
1048           state = 1;
1049           PUT ('|');
1050           PUT ('|');
1051           break;
1052 #endif
1053         case LEX_IS_LINE_COMMENT_START:
1054           /* FIXME-someday: The two character comment stuff was badly
1055              thought out.  On i386, we want '/' as line comment start
1056              AND we want C style comments.  hence this hack.  The
1057              whole lexical process should be reworked.  xoxorich.  */
1058           if (ch == '/')
1059             {
1060               ch2 = GET ();
1061               if (ch2 == '*')
1062                 {
1063                   old_state = 3;
1064                   state = -2;
1065                   break;
1066                 }
1067               else
1068                 {
1069                   UNGET (ch2);
1070                 }
1071             } /* bad hack */
1072
1073           if (state == 0 || state == 1) /* Only comment at start of line.  */
1074             {
1075               int startch;
1076
1077               startch = ch;
1078
1079               do
1080                 {
1081                   ch = GET ();
1082                 }
1083               while (ch != EOF && IS_WHITESPACE (ch));
1084               if (ch == EOF)
1085                 {
1086                   as_warn (_("end of file in comment; newline inserted"));
1087                   PUT ('\n');
1088                   break;
1089                 }
1090               if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1091                 {
1092                   /* Not a cpp line.  */
1093                   while (ch != EOF && !IS_NEWLINE (ch))
1094                     ch = GET ();
1095                   if (ch == EOF)
1096                     as_warn (_("EOF in Comment: Newline inserted"));
1097                   state = 0;
1098                   PUT ('\n');
1099                   break;
1100                 }
1101               /* Loks like `# 123 "filename"' from cpp.  */
1102               UNGET (ch);
1103               old_state = 4;
1104               state = -1;
1105               if (scrub_m68k_mri)
1106                 out_string = "\tappline ";
1107               else
1108                 out_string = "\t.appline ";
1109               PUT (*out_string++);
1110               break;
1111             }
1112
1113 #ifdef TC_D10V
1114           /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1115              Trap is the only short insn that has a first operand that is
1116              neither register nor label.
1117              We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1118              We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is already
1119              LEX_IS_LINE_COMMENT_START.  However, it is the only character in
1120              line_comment_chars for d10v, hence we can recognize it as such.  */
1121           /* An alternative approach would be to reset the state to 1 when
1122              we see '||', '<'- or '->', but that seems to be overkill.  */
1123           if (state == 10) PUT (' ');
1124 #endif
1125           /* We have a line comment character which is not at the
1126              start of a line.  If this is also a normal comment
1127              character, fall through.  Otherwise treat it as a default
1128              character.  */
1129           if (strchr (tc_comment_chars, ch) == NULL
1130               && (! scrub_m68k_mri
1131                   || (ch != '!' && ch != '*')))
1132             goto de_fault;
1133           if (scrub_m68k_mri
1134               && (ch == '!' || ch == '*' || ch == '#')
1135               && state != 1
1136               && state != 10)
1137             goto de_fault;
1138           /* Fall through.  */
1139         case LEX_IS_COMMENT_START:
1140 #if defined TC_ARM && defined OBJ_ELF
1141           /* On the ARM, `@' is the comment character.
1142              Unfortunately this is also a special character in ELF .symver
1143              directives (and .type, though we deal with those another way).  So
1144              we check if this line is such a directive, and treat the character
1145              as default if so.  This is a hack.  */
1146           if ((symver_state != NULL) && (*symver_state == 0))
1147             goto de_fault;
1148 #endif
1149           do
1150             {
1151               ch = GET ();
1152             }
1153           while (ch != EOF && !IS_NEWLINE (ch));
1154           if (ch == EOF)
1155             as_warn (_("end of file in comment; newline inserted"));
1156           state = 0;
1157           PUT ('\n');
1158           break;
1159
1160         case LEX_IS_SYMBOL_COMPONENT:
1161           if (state == 10)
1162             {
1163               /* This is a symbol character following another symbol
1164                  character, with whitespace in between.  We skipped
1165                  the whitespace earlier, so output it now.  */
1166               UNGET (ch);
1167               state = 3;
1168               PUT (' ');
1169               break;
1170             }
1171
1172           if (state == 3)
1173             state = 9;
1174
1175           /* This is a common case.  Quickly copy CH and all the
1176              following symbol component or normal characters.  */
1177           if (to + 1 < toend
1178               && mri_state == NULL
1179 #if defined TC_ARM && defined OBJ_ELF
1180               && symver_state == NULL
1181 #endif
1182               )
1183             {
1184               char *s;
1185               int len;
1186
1187               for (s = from; s < fromend; s++)
1188                 {
1189                   int type;
1190
1191                   ch2 = * (unsigned char *) s;
1192                   type = lex[ch2];
1193                   if (type != 0
1194                       && type != LEX_IS_SYMBOL_COMPONENT)
1195                     break;
1196                 }
1197               if (s > from)
1198                 {
1199                   /* Handle the last character normally, for
1200                      simplicity.  */
1201                   --s;
1202                 }
1203               len = s - from;
1204               if (len > (toend - to) - 1)
1205                 len = (toend - to) - 1;
1206               if (len > 0)
1207                 {
1208                   PUT (ch);
1209                   if (len > 8)
1210                     {
1211                       memcpy (to, from, len);
1212                       to += len;
1213                       from += len;
1214                     }
1215                   else
1216                     {
1217                       switch (len)
1218                         {
1219                         case 8: *to++ = *from++;
1220                         case 7: *to++ = *from++;
1221                         case 6: *to++ = *from++;
1222                         case 5: *to++ = *from++;
1223                         case 4: *to++ = *from++;
1224                         case 3: *to++ = *from++;
1225                         case 2: *to++ = *from++;
1226                         case 1: *to++ = *from++;
1227                         }
1228                     }
1229                   ch = GET ();
1230                 }
1231             }
1232
1233           /* Fall through.  */
1234         default:
1235         de_fault:
1236           /* Some relatively `normal' character.  */
1237           if (state == 0)
1238             {
1239               state = 11;       /* Now seeing label definition */
1240             }
1241           else if (state == 1)
1242             {
1243               state = 2;        /* Ditto */
1244             }
1245           else if (state == 9)
1246             {
1247               if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1248                 state = 3;
1249             }
1250           else if (state == 10)
1251             {
1252               if (ch == '\\')
1253                 {
1254                   /* Special handling for backslash: a backslash may
1255                      be the beginning of a formal parameter (of a
1256                      macro) following another symbol character, with
1257                      whitespace in between.  If that is the case, we
1258                      output a space before the parameter.  Strictly
1259                      speaking, correct handling depends upon what the
1260                      macro parameter expands into; if the parameter
1261                      expands into something which does not start with
1262                      an operand character, then we don't want to keep
1263                      the space.  We don't have enough information to
1264                      make the right choice, so here we are making the
1265                      choice which is more likely to be correct.  */
1266                   PUT (' ');
1267                 }
1268
1269               state = 3;
1270             }
1271           PUT (ch);
1272           break;
1273         }
1274     }
1275
1276   /*NOTREACHED*/
1277
1278  fromeof:
1279   /* We have reached the end of the input.  */
1280   return to - tostart;
1281
1282  tofull:
1283   /* The output buffer is full.  Save any input we have not yet
1284      processed.  */
1285   if (fromend > from)
1286     {
1287       saved_input = from;
1288       saved_input_len = fromend - from;
1289     }
1290   else
1291     saved_input = NULL;
1292
1293   return to - tostart;
1294 }
1295
1296 /* end of app.c */