RELAX_RELOC_*: New definitions for both BFD_ASSEMBLER and !BFD_ASSEMBLER.

[deliverable/binutils-gdb.git] / gas / app.c
diff --git a/gas/app.c b/gas/app.c

index 97c5a72718cf669726f037fcc1779307c0e1ed4d..46273c2823b61861c96951ec5ca3a5d6395182d9 100644 (file)
--- a/gas/app.c
+++ b/gas/app.c
@@ -1,5 +1,5 @@
  /* This is the Assembler Pre-Processor
-   Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 1996
+   Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 2000
     Free Software Foundation, Inc.
  
     This file is part of GAS, the GNU Assembler.
@@ -15,8 +15,9 @@
     GNU General Public License for more details.
  
     You should have received a copy of the GNU General Public License
-   along with GAS; see the file COPYING.  If not, write to
-   the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+   along with GAS; see the file COPYING.  If not, write to the Free
+   Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+   02111-1307, USA.  */
  
  /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  /* App, the assembler pre-processor.  This pre-processor strips out excess
@@ -33,6 +34,26 @@
  #endif
  #endif
  
+#ifdef TC_M68K
+/* Whether we are scrubbing in m68k MRI mode.  This is different from
+   flag_m68k_mri, because the two flags will be affected by the .mri
+   pseudo-op at different times.  */
+static int scrub_m68k_mri;
+#else
+#define scrub_m68k_mri 0
+#endif
+
+/* The pseudo-op which switches in and out of MRI mode.  See the
+   comment in do_scrub_chars.  */
+static const char mri_pseudo[] = ".mri 0";
+
+#if defined TC_ARM && defined OBJ_ELF
+/* The pseudo-op for which we need to special-case `@' characters.
+   See the comment in do_scrub_chars.  */
+static const char   symver_pseudo[] = ".symver";
+static const char * symver_state;
+#endif
+
  static char lex[256];
  static const char symbol_chars[] =
  "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
@@ -43,11 +64,19 @@ static const char symbol_chars[] =
  #define LEX_IS_COMMENT_START           4
  #define LEX_IS_LINE_COMMENT_START      5
  #define        LEX_IS_TWOCHAR_COMMENT_1ST      6
-#define        LEX_IS_TWOCHAR_COMMENT_2ND      7
  #define        LEX_IS_STRINGQUOTE              8
  #define        LEX_IS_COLON                    9
  #define        LEX_IS_NEWLINE                  10
  #define        LEX_IS_ONECHAR_QUOTE            11
+#ifdef TC_V850
+#define LEX_IS_DOUBLEDASH_1ST          12
+#endif
+#ifdef TC_M32R
+#define DOUBLEBAR_PARALLEL
+#endif
+#ifdef DOUBLEBAR_PARALLEL
+#define LEX_IS_DOUBLEBAR_1ST           13
+#endif
  #define IS_SYMBOL_COMPONENT(c)         (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  #define IS_WHITESPACE(c)               (lex[c] == LEX_IS_WHITESPACE)
  #define IS_LINE_SEPARATOR(c)           (lex[c] == LEX_IS_LINE_SEPARATOR)
@@ -59,24 +88,31 @@ static int process_escape PARAMS ((int));
  
  /* FIXME-soon: The entire lexer/parser thingy should be
     built statically at compile time rather than dynamically
-   each and every time the assembler is run.  xoxorich. */
+   each and every time the assembler is run.  xoxorich.  */
  
-void 
-do_scrub_begin ()
+void
+do_scrub_begin (m68k_mri)
+     int m68k_mri ATTRIBUTE_UNUSED;
  {
    const char *p;
+  int c;
  
    lex[' '] = LEX_IS_WHITESPACE;
    lex['\t'] = LEX_IS_WHITESPACE;
+  lex['\r'] = LEX_IS_WHITESPACE;
    lex['\n'] = LEX_IS_NEWLINE;
-  lex[';'] = LEX_IS_LINE_SEPARATOR;
    lex[':'] = LEX_IS_COLON;
  
-  if (! flag_m68k_mri)
+#ifdef TC_M68K
+  scrub_m68k_mri = m68k_mri;
+
+  if (! m68k_mri)
+#endif
      {
        lex['"'] = LEX_IS_STRINGQUOTE;
  
-#ifndef TC_HPPA
+#if ! defined (TC_HPPA) && ! defined (TC_I370)
+      /* I370 uses single-quotes to delimit integer, float constants */
        lex['\''] = LEX_IS_ONECHAR_QUOTE;
  #endif
  
@@ -95,7 +131,24 @@ do_scrub_begin ()
        lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
      }                          /* declare symbol characters */
  
-  for (p = comment_chars; *p; p++)
+  for (c = 128; c < 256; ++c)
+    lex[c] = LEX_IS_SYMBOL_COMPONENT;
+
+#ifdef tc_symbol_chars
+  /* This macro permits the processor to specify all characters which
+     may appears in an operand.  This will prevent the scrubber from
+     discarding meaningful whitespace in certain cases.  The i386
+     backend uses this to support prefixes, which can confuse the
+     scrubber as to whether it is parsing operands or opcodes.  */
+  for (p = tc_symbol_chars; *p; ++p)
+    lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
+#endif
+
+  /* The m68k backend wants to be able to change comment_chars.  */
+#ifndef tc_comment_chars
+#define tc_comment_chars comment_chars
+#endif
+  for (p = tc_comment_chars; *p; p++)
      {
        lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
      }                          /* declare comment chars */
@@ -110,19 +163,15 @@ do_scrub_begin ()
        lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
      }                          /* declare line separators */
  
-  /* Only allow slash-star comments if slash is not in use */
+  /* Only allow slash-star comments if slash is not in use.
+     FIXME: This isn't right.  We should always permit them.  */
    if (lex['/'] == 0)
      {
        lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
      }
-  /* FIXME-soon.  This is a bad hack but otherwise, we can't do
-     c-style comments when '/' is a line comment char. xoxorich. */
-  if (lex['*'] == 0)
-    {
-      lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
-    }
  
-  if (flag_m68k_mri)
+#ifdef TC_M68K
+  if (m68k_mri)
      {
        lex['\''] = LEX_IS_STRINGQUOTE;
        lex[';'] = LEX_IS_COMMENT_START;
@@ -131,6 +180,18 @@ do_scrub_begin ()
           then it can't be used in an expression.  */
        lex['!'] = LEX_IS_LINE_COMMENT_START;
      }
+#endif
+
+#ifdef TC_V850
+  lex['-'] = LEX_IS_DOUBLEDASH_1ST;
+#endif
+#ifdef DOUBLEBAR_PARALLEL
+  lex['|'] = LEX_IS_DOUBLEBAR_1ST;
+#endif
+#ifdef TC_D30V
+  /* must do this is we want VLIW instruction with "->" or "<-" */
+  lex['-'] = LEX_IS_SYMBOL_COMPONENT;
+#endif
  }                              /* do_scrub_begin() */
  
  /* Saved state of the scrubber */
@@ -141,22 +202,32 @@ static char out_buf[20];
  static int add_newlines;
  static char *saved_input;
  static int saved_input_len;
+static char input_buffer[32 * 1024];
+static const char *mri_state;
+static char mri_last_ch;
  
  /* Data structure for saving the state of app across #include's.  Note that
     app is called asynchronously to the parsing of the .include's, so our
     state at the time .include is interpreted is completely unrelated.
     That's why we have to save it all.  */
  
-struct app_save
-  {
-    int state;
-    int old_state;
-    char *out_string;
-    char out_buf[sizeof (out_buf)];
-    int add_newlines;
-    char *saved_input;
-    int saved_input_len;
-  };
+struct app_save {
+  int          state;
+  int          old_state;
+  char *       out_string;
+  char         out_buf[sizeof (out_buf)];
+  int          add_newlines;
+  char *       saved_input;
+  int          saved_input_len;
+#ifdef TC_M68K
+  int          scrub_m68k_mri;
+#endif
+  const char * mri_state;
+  char         mri_last_ch;
+#if defined TC_ARM && defined OBJ_ELF
+  const char * symver_state;
+#endif
+};
  
  char *
  app_push ()
@@ -169,10 +240,24 @@ app_push ()
    saved->out_string = out_string;
    memcpy (saved->out_buf, out_buf, sizeof (out_buf));
    saved->add_newlines = add_newlines;
-  saved->saved_input = saved_input;
-  saved->saved_input_len = saved_input_len;
+  if (saved_input == NULL)
+    saved->saved_input = NULL;
+  else
+    {
+      saved->saved_input = xmalloc (saved_input_len);
+      memcpy (saved->saved_input, saved_input, saved_input_len);
+      saved->saved_input_len = saved_input_len;
+    }
+#ifdef TC_M68K
+  saved->scrub_m68k_mri = scrub_m68k_mri;
+#endif
+  saved->mri_state = mri_state;
+  saved->mri_last_ch = mri_last_ch;
+#if defined TC_ARM && defined OBJ_ELF
+  saved->symver_state = symver_state;
+#endif
  
-  /* do_scrub_begin() is not useful, just wastes time. */
+  /* do_scrub_begin() is not useful, just wastes time.  */
  
    state = 0;
    saved_input = NULL;
@@ -180,27 +265,43 @@ app_push ()
    return (char *) saved;
  }
  
-void 
+void
  app_pop (arg)
       char *arg;
  {
    register struct app_save *saved = (struct app_save *) arg;
  
-  /* There is no do_scrub_end (). */
+  /* There is no do_scrub_end ().  */
    state = saved->state;
    old_state = saved->old_state;
    out_string = saved->out_string;
    memcpy (out_buf, saved->out_buf, sizeof (out_buf));
    add_newlines = saved->add_newlines;
-  saved_input = saved->saved_input;
-  saved_input_len = saved->saved_input_len;
+  if (saved->saved_input == NULL)
+    saved_input = NULL;
+  else
+    {
+      assert (saved->saved_input_len <= (int) (sizeof input_buffer));
+      memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
+      saved_input = input_buffer;
+      saved_input_len = saved->saved_input_len;
+      free (saved->saved_input);
+    }
+#ifdef TC_M68K
+  scrub_m68k_mri = saved->scrub_m68k_mri;
+#endif
+  mri_state = saved->mri_state;
+  mri_last_ch = saved->mri_last_ch;
+#if defined TC_ARM && defined OBJ_ELF
+  symver_state = saved->symver_state;
+#endif
  
    free (arg);
  }                              /* app_pop() */
  
  /* @@ This assumes that \n &c are the same on host and target.  This is not
     necessarily true.  */
-static int 
+static int
  process_escape (ch)
       int ch;
  {
@@ -238,7 +339,7 @@ process_escape (ch)
  
  int
  do_scrub_chars (get, tostart, tolen)
-     int (*get) PARAMS ((char **));
+     int (*get) PARAMS ((char *, int));
       char *tostart;
       int tolen;
  {
@@ -248,7 +349,6 @@ do_scrub_chars (get, tostart, tolen)
    char *fromend;
    int fromlen;
    register int ch, ch2 = 0;
-  int not_cpp_line = 0;
  
    /*State 0: beginning of normal line
           1: After first whitespace on line (flush more white)
@@ -264,6 +364,12 @@ do_scrub_chars (get, tostart, tolen)
          11: After seeing a symbol character in state 0 (eg a label definition)
          -1: output string in out_string and go to the state in old_state
          -2: flush text until a '*' '/' is seen, then go to state old_state
+#ifdef TC_V850
+         12: After seeing a dash, looking for a second dash as a start of comment.
+#endif
+#ifdef DOUBLEBAR_PARALLEL
+        13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
+#endif
           */
  
    /* I added states 9 and 10 because the MIPS ECOFF assembler uses
@@ -274,23 +380,24 @@ do_scrub_chars (get, tostart, tolen)
  
       I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
       correctly on the PA (and any other target where colons are optional).
-     Jeff Law, law@cs.utah.edu.  */
+     Jeff Law, law@cs.utah.edu.
+
+     I added state 13 so that something like "cmp r1, r2 || trap #1" does not
+     get squashed into "cmp r1,r2||trap#1", with the all important space
+     between the 'trap' and the '#1' being eliminated.  nickc@cygnus.com  */
  
    /* This macro gets the next input character.  */
  
-#define GET()                          \
-  (from < fromend                      \
-   ? *from++                           \
-   : ((saved_input != NULL             \
-       ? (free (saved_input),          \
-         saved_input = NULL,           \
-         0)                            \
-       : 0),                           \
-      fromlen = (*get) (&from),                \
-      fromend = from + fromlen,                \
-      (fromlen == 0                    \
-       ? EOF                           \
-       : *from++)))
+#define GET()                                                  \
+  (from < fromend                                              \
+   ? * (unsigned char *) (from++)                              \
+   : (saved_input = NULL,                                      \
+      fromlen = (*get) (input_buffer, sizeof input_buffer),    \
+      from = input_buffer,                                     \
+      fromend = from + fromlen,                                        \
+      (fromlen == 0                                            \
+       ? EOF                                                   \
+       : * (unsigned char *) (from++))))
  
    /* This macro pushes a character back on the input stream.  */
  
@@ -321,9 +428,10 @@ do_scrub_chars (get, tostart, tolen)
      }
    else
      {
-      fromlen = (*get) (&from);
+      fromlen = (*get) (input_buffer, sizeof input_buffer);
        if (fromlen == 0)
         return 0;
+      from = input_buffer;
        fromend = from + fromlen;
      }
  
@@ -353,7 +461,7 @@ do_scrub_chars (get, tostart, tolen)
  
                   if (ch == EOF)
                     {
-                     as_warn ("end of file in comment");
+                     as_warn (_("end of file in comment"));
                       goto fromeof;
                     }
  
@@ -367,7 +475,7 @@ do_scrub_chars (get, tostart, tolen)
  
               if (ch == EOF)
                 {
-                 as_warn ("end of file in comment");
+                 as_warn (_("end of file in comment"));
                   goto fromeof;
                 }
  
@@ -378,7 +486,7 @@ do_scrub_chars (get, tostart, tolen)
             }
  
           state = old_state;
-         PUT (' ');
+         UNGET (' ');
           continue;
  
         case 4:
@@ -394,7 +502,7 @@ do_scrub_chars (get, tostart, tolen)
               if (ch == '"')
                 {
                   UNGET (ch);
-                 if (flag_m68k_mri)
+                 if (scrub_m68k_mri)
                     out_string = "\n\tappfile ";
                   else
                     out_string = "\n\t.appfile ";
@@ -446,7 +554,7 @@ do_scrub_chars (get, tostart, tolen)
           ch = GET ();
           if (ch == EOF)
             {
-             as_warn ("end of file in string: inserted '\"'");
+             as_warn (_("end of file in string: inserted '\"'"));
               state = old_state;
               UNGET ('\n');
               PUT ('"');
@@ -463,7 +571,7 @@ do_scrub_chars (get, tostart, tolen)
               PUT (ch);
             }
  #endif
-         else if (flag_m68k_mri && ch == '\n')
+         else if (scrub_m68k_mri && ch == '\n')
             {
               /* Just quietly terminate the string.  This permits lines like
                    bne  label   loop if we haven't reach end yet
@@ -512,7 +620,7 @@ do_scrub_chars (get, tostart, tolen)
               break;
  #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
             default:
-             as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
+             as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
               break;
  #else  /* ONLY_STANDARD_ESCAPES */
             default:
@@ -521,7 +629,7 @@ do_scrub_chars (get, tostart, tolen)
  #endif /* ONLY_STANDARD_ESCAPES */
  
             case EOF:
-             as_warn ("End of file in string: '\"' inserted");
+             as_warn (_("End of file in string: '\"' inserted"));
               PUT ('"');
               continue;
             }
@@ -552,12 +660,96 @@ do_scrub_chars (get, tostart, tolen)
  
        /* flushchar: */
        ch = GET ();
+
      recycle:
+
+#if defined TC_ARM && defined OBJ_ELF
+      /* We need to watch out for .symver directives.  See the comment later
+        in this function.  */
+      if (symver_state == NULL)
+       {
+         if ((state == 0 || state == 1) && ch == symver_pseudo[0])
+           symver_state = symver_pseudo + 1;
+       }
+      else
+       {
+         /* We advance to the next state if we find the right
+            character.  */
+         if (ch != '\0' && (*symver_state == ch))
+           ++symver_state;
+         else if (*symver_state != '\0')
+           /* We did not get the expected character, or we didn't
+              get a valid terminating character after seeing the
+              entire pseudo-op, so we must go back to the beginning.  */
+           symver_state = NULL;
+         else
+           {
+             /* We've read the entire pseudo-op.  If this is the end
+                of the line, go back to the beginning.  */
+             if (IS_NEWLINE (ch))
+               symver_state = NULL;
+           }
+       }
+#endif /* TC_ARM && OBJ_ELF */
+
+#ifdef TC_M68K
+      /* We want to have pseudo-ops which control whether we are in
+         MRI mode or not.  Unfortunately, since m68k MRI mode affects
+         the scrubber, that means that we need a special purpose
+         recognizer here.  */
+      if (mri_state == NULL)
+       {
+         if ((state == 0 || state == 1)
+             && ch == mri_pseudo[0])
+           mri_state = mri_pseudo + 1;
+       }
+      else
+       {
+         /* We advance to the next state if we find the right
+            character, or if we need a space character and we get any
+            whitespace character, or if we need a '0' and we get a
+            '1' (this is so that we only need one state to handle
+            ``.mri 0'' and ``.mri 1'').  */
+         if (ch != '\0'
+             && (*mri_state == ch
+                 || (*mri_state == ' '
+                     && lex[ch] == LEX_IS_WHITESPACE)
+                 || (*mri_state == '0'
+                     && ch == '1')))
+           {
+             mri_last_ch = ch;
+             ++mri_state;
+           }
+         else if (*mri_state != '\0'
+                  || (lex[ch] != LEX_IS_WHITESPACE
+                      && lex[ch] != LEX_IS_NEWLINE))
+           {
+             /* We did not get the expected character, or we didn't
+                get a valid terminating character after seeing the
+                entire pseudo-op, so we must go back to the
+                beginning.  */
+             mri_state = NULL;
+           }
+         else
+           {
+             /* We've read the entire pseudo-op.  mips_last_ch is
+                 either '0' or '1' indicating whether to enter or
+                 leave MRI mode.  */
+             do_scrub_begin (mri_last_ch == '1');
+             mri_state = NULL;
+
+             /* We continue handling the character as usual.  The
+                 main gas reader must also handle the .mri pseudo-op
+                 to control expression parsing and the like.  */
+           }
+       }
+#endif
+
        if (ch == EOF)
         {
           if (state != 0)
             {
-             as_warn ("end of file not at end of a line; newline inserted");
+             as_warn (_("end of file not at end of a line; newline inserted"));
               state = 0;
               PUT ('\n');
             }
@@ -585,14 +777,27 @@ do_scrub_chars (get, tostart, tolen)
               break;
             }
  
+#ifdef KEEP_WHITE_AROUND_COLON
+         if (lex[ch] == LEX_IS_COLON)
+           {
+             /* Only keep this white if there's no white *after* the
+                 colon.  */
+             ch2 = GET ();
+             UNGET (ch2);
+             if (!IS_WHITESPACE (ch2))
+               {
+                 state = 9;
+                 UNGET (ch);
+                 PUT (' ');
+                 break;
+               }
+           }
+#endif
           if (IS_COMMENT (ch)
               || ch == '/'
               || IS_LINE_SEPARATOR (ch))
             {
-             /* cpp never outputs a leading space before the #, so
-                try to avoid being confused.  */
-             not_cpp_line = 1;
-             if (flag_m68k_mri)
+             if (scrub_m68k_mri)
                 {
                   /* In MRI mode, we keep these spaces.  */
                   UNGET (ch);
@@ -609,7 +814,7 @@ do_scrub_chars (get, tostart, tolen)
              not permitted between the label and the colon.  */
           if ((state == 2 || state == 11)
               && lex[ch] == LEX_IS_COLON
-             && ! flag_m68k_mri)
+             && ! scrub_m68k_mri)
             {
               state = 1;
               PUT (ch);
@@ -637,7 +842,7 @@ do_scrub_chars (get, tostart, tolen)
               PUT (' ');
               break;
             case 3:
-             if (flag_m68k_mri)
+             if (scrub_m68k_mri)
                 {
                   /* In MRI mode, we keep these spaces.  */
                   UNGET (ch);
@@ -647,7 +852,7 @@ do_scrub_chars (get, tostart, tolen)
               goto recycle;     /* Sp in operands */
             case 9:
             case 10:
-             if (flag_m68k_mri)
+             if (scrub_m68k_mri)
                 {
                   /* In MRI mode, we keep these spaces.  */
                   state = 3;
@@ -658,7 +863,16 @@ do_scrub_chars (get, tostart, tolen)
               state = 10;       /* Sp after symbol char */
               goto recycle;
             case 11:
-             state = 1;
+             if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
+               state = 1;
+             else
+               {
+                 /* We know that ch is not ':', since we tested that
+                     case above.  Therefore this is not a label, so it
+                     must be the opcode, and we've just seen the
+                     whitespace after it.  */
+                 state = 3;
+               }
               UNGET (ch);
               PUT (' ');        /* Sp after label definition.  */
               break;
@@ -669,7 +883,7 @@ do_scrub_chars (get, tostart, tolen)
  
         case LEX_IS_TWOCHAR_COMMENT_1ST:
           ch2 = GET ();
-         if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
+         if (ch2 == '*')
             {
               for (;;)
                 {
@@ -679,26 +893,40 @@ do_scrub_chars (get, tostart, tolen)
                       if (ch2 != EOF && IS_NEWLINE (ch2))
                         add_newlines++;
                     }
-                 while (ch2 != EOF &&
-                        (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
+                 while (ch2 != EOF && ch2 != '*');
  
-                 while (ch2 != EOF &&
-                        (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
-                   {
-                     ch2 = GET ();
-                   }
+                 while (ch2 == '*')
+                   ch2 = GET ();
  
-                 if (ch2 == EOF
-                     || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
+                 if (ch2 == EOF || ch2 == '/')
                     break;
-                 UNGET (ch);
+
+                 /* This UNGET will ensure that we count newlines
+                     correctly.  */
+                 UNGET (ch2);
                 }
+
               if (ch2 == EOF)
-               as_warn ("end of file in multiline comment");
+               as_warn (_("end of file in multiline comment"));
  
               ch = ' ';
               goto recycle;
             }
+#ifdef DOUBLESLASH_LINE_COMMENTS
+         else if (ch2 == '/')
+           {
+             do
+               {
+                 ch = GET ();
+               }
+             while (ch != EOF && !IS_NEWLINE (ch));
+             if (ch == EOF)
+               as_warn ("end of file in comment; newline inserted");
+             state = 0;
+             PUT ('\n');
+             break;
+           }
+#endif
           else
             {
               if (ch2 != EOF)
@@ -743,7 +971,7 @@ do_scrub_chars (get, tostart, tolen)
           ch = GET ();
           if (ch == EOF)
             {
-             as_warn ("end of file after a one-character quote; \\0 inserted");
+             as_warn (_("end of file after a one-character quote; \\0 inserted"));
               ch = 0;
             }
           if (ch == '\\')
@@ -751,7 +979,7 @@ do_scrub_chars (get, tostart, tolen)
               ch = GET ();
               if (ch == EOF)
                 {
-                 as_warn ("end of file in escape character");
+                 as_warn (_("end of file in escape character"));
                   ch = '\\';
                 }
               else
@@ -763,7 +991,7 @@ do_scrub_chars (get, tostart, tolen)
           if ((ch = GET ()) != '\'')
             {
  #ifdef REQUIRE_CHAR_CLOSE_QUOTE
-             as_warn ("Missing close quote: (assumed)");
+             as_warn (_("Missing close quote: (assumed)"));
  #else
               if (ch != EOF)
                 UNGET (ch);
@@ -785,10 +1013,14 @@ do_scrub_chars (get, tostart, tolen)
  #endif
  
         case LEX_IS_COLON:
+#ifdef KEEP_WHITE_AROUND_COLON
+         state = 9;
+#else
           if (state == 9 || state == 10)
             state = 3;
           else if (state != 3)
             state = 1;
+#endif
           PUT (ch);
           break;
  
@@ -799,37 +1031,75 @@ do_scrub_chars (get, tostart, tolen)
               --add_newlines;
               UNGET (ch);
             }
-         /* fall thru into... */
+         /* Fall through.  */
  
         case LEX_IS_LINE_SEPARATOR:
           state = 0;
           PUT (ch);
           break;
  
+#ifdef TC_V850
+       case LEX_IS_DOUBLEDASH_1ST:
+         ch2 = GET ();
+         if (ch2 != '-')
+           {
+             UNGET (ch2);
+             goto de_fault;
+           }
+         /* Read and skip to end of line.  */
+         do
+           {
+             ch = GET ();
+           }
+         while (ch != EOF && ch != '\n');
+         if (ch == EOF)
+           {
+             as_warn (_("end of file in comment; newline inserted"));
+           }
+         state = 0;
+         PUT ('\n');
+         break;
+#endif
+#ifdef DOUBLEBAR_PARALLEL
+       case LEX_IS_DOUBLEBAR_1ST:
+         ch2 = GET ();
+         if (ch2 != '|')
+           {
+             UNGET (ch2);
+             goto de_fault;
+           }
+         /* Reset back to state 1 and pretend that we are parsing a line from
+            just after the first white space.  */
+         state = 1;
+         PUT ('|');
+         PUT ('|');
+         break;
+#endif
         case LEX_IS_LINE_COMMENT_START:
-         if (state == 0)       /* Only comment at start of line.  */
+         /* FIXME-someday: The two character comment stuff was badly
+            thought out.  On i386, we want '/' as line comment start
+            AND we want C style comments.  hence this hack.  The
+            whole lexical process should be reworked.  xoxorich.  */
+         if (ch == '/')
             {
-             /* FIXME-someday: The two character comment stuff was
-                badly thought out.  On i386, we want '/' as line
-                comment start AND we want C style comments.  hence
-                this hack.  The whole lexical process should be
-                reworked.  xoxorich.  */
-             if (ch == '/')
+             ch2 = GET ();
+             if (ch2 == '*')
                 {
-                 ch2 = GET ();
-                 if (ch2 == '*')
-                   {
-                     state = -2;
-                     break;
-                   }
-                 else
-                   {
-                     UNGET (ch2);
-                   }
-               } /* bad hack */
+                 old_state = 3;
+                 state = -2;
+                 break;
+               }
+             else
+               {
+                 UNGET (ch2);
+               }
+           } /* bad hack */
+
+         if (state == 0 || state == 1) /* Only comment at start of line.  */
+           {
+             int startch;
  
-             if (ch != '#')
-               not_cpp_line = 1;
+             startch = ch;
  
               do
                 {
@@ -838,26 +1108,26 @@ do_scrub_chars (get, tostart, tolen)
               while (ch != EOF && IS_WHITESPACE (ch));
               if (ch == EOF)
                 {
-                 as_warn ("end of file in comment; newline inserted");
+                 as_warn (_("end of file in comment; newline inserted"));
                   PUT ('\n');
                   break;
                 }
-             if (ch < '0' || ch > '9' || not_cpp_line)
+             if (ch < '0' || ch > '9' || state != 0 || startch != '#')
                 {
-                 /* Non-numerics:  Eat whole comment line */
+                 /* Not a cpp line.  */
                   while (ch != EOF && !IS_NEWLINE (ch))
                     ch = GET ();
                   if (ch == EOF)
-                   as_warn ("EOF in Comment: Newline inserted");
+                   as_warn (_("EOF in Comment: Newline inserted"));
                   state = 0;
                   PUT ('\n');
                   break;
                 }
-             /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
+             /* Looks like `# 123 "filename"' from cpp.  */
               UNGET (ch);
               old_state = 4;
               state = -1;
-             if (flag_m68k_mri)
+             if (scrub_m68k_mri)
                 out_string = "\tappline ";
               else
                 out_string = "\t.appline ";
@@ -865,28 +1135,55 @@ do_scrub_chars (get, tostart, tolen)
               break;
             }
  
+#ifdef TC_D10V
+         /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
+            Trap is the only short insn that has a first operand that is
+            neither register nor label.
+            We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
+            We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
+            already LEX_IS_LINE_COMMENT_START.  However, it is the
+            only character in line_comment_chars for d10v, hence we
+            can recognize it as such.  */
+         /* An alternative approach would be to reset the state to 1 when
+            we see '||', '<'- or '->', but that seems to be overkill.  */
+         if (state == 10)
+           PUT (' ');
+#endif
           /* We have a line comment character which is not at the
              start of a line.  If this is also a normal comment
              character, fall through.  Otherwise treat it as a default
              character.  */
-         if (strchr (comment_chars, ch) == NULL
-             && (! flag_m68k_mri
+         if (strchr (tc_comment_chars, ch) == NULL
+             && (! scrub_m68k_mri
                   || (ch != '!' && ch != '*')))
             goto de_fault;
-         if (flag_m68k_mri
-             && (ch == '!' || ch == '*')
+         if (scrub_m68k_mri
+             && (ch == '!' || ch == '*' || ch == '#')
               && state != 1
               && state != 10)
             goto de_fault;
           /* Fall through.  */
         case LEX_IS_COMMENT_START:
+#if defined TC_ARM && defined OBJ_ELF
+         /* On the ARM, `@' is the comment character.
+            Unfortunately this is also a special character in ELF .symver
+            directives (and .type, though we deal with those another way).
+            So we check if this line is such a directive, and treat
+            the character as default if so.  This is a hack.  */
+         if ((symver_state != NULL) && (*symver_state == 0))
+           goto de_fault;
+#endif
+#ifdef WARN_COMMENTS
+         if (!found_comment)
+           as_where (&found_comment_file, &found_comment);
+#endif
           do
             {
               ch = GET ();
             }
           while (ch != EOF && !IS_NEWLINE (ch));
           if (ch == EOF)
-           as_warn ("end of file in comment; newline inserted");
+           as_warn (_("end of file in comment; newline inserted"));
           state = 0;
           PUT ('\n');
           break;
@@ -908,7 +1205,12 @@ do_scrub_chars (get, tostart, tolen)
  
           /* This is a common case.  Quickly copy CH and all the
               following symbol component or normal characters.  */
-         if (to + 1 < toend)
+         if (to + 1 < toend
+             && mri_state == NULL
+#if defined TC_ARM && defined OBJ_ELF
+             && symver_state == NULL
+#endif
+             )
             {
               char *s;
               int len;
@@ -917,7 +1219,7 @@ do_scrub_chars (get, tostart, tolen)
                 {
                   int type;
  
-                 ch2 = *s;
+                 ch2 = *(unsigned char *) s;
                   type = lex[ch2];
                   if (type != 0
                       && type != LEX_IS_SYMBOL_COMPONENT)
@@ -954,7 +1256,7 @@ do_scrub_chars (get, tostart, tolen)
                         case 2: *to++ = *from++;
                         case 1: *to++ = *from++;
                         }
-                   } 
+                   }
                   ch = GET ();
                 }
             }
@@ -978,6 +1280,23 @@ do_scrub_chars (get, tostart, tolen)
             }
           else if (state == 10)
             {
+             if (ch == '\\')
+               {
+                 /* Special handling for backslash: a backslash may
+                    be the beginning of a formal parameter (of a
+                    macro) following another symbol character, with
+                    whitespace in between.  If that is the case, we
+                    output a space before the parameter.  Strictly
+                    speaking, correct handling depends upon what the
+                    macro parameter expands into; if the parameter
+                    expands into something which does not start with
+                    an operand character, then we don't want to keep
+                    the space.  We don't have enough information to
+                    make the right choice, so here we are making the
+                    choice which is more likely to be correct.  */
+                 PUT (' ');
+               }
+
               state = 3;
             }
           PUT (ch);
@@ -996,23 +1315,12 @@ do_scrub_chars (get, tostart, tolen)
       processed.  */
    if (fromend > from)
      {
-      char *save;
-
-      save = (char *) xmalloc (fromend - from);
-      memcpy (save, from, fromend - from);
-      if (saved_input != NULL)
-       free (saved_input);
-      saved_input = save;
+      saved_input = from;
        saved_input_len = fromend - from;
      }
    else
-    {
-      if (saved_input != NULL)
-       {
-         free (saved_input);
-         saved_input = NULL;
-       }
-    }
+    saved_input = NULL;
+
    return to - tostart;
  }