[deliverable/binutils-gdb.git] / gas / app.c

/* This is the Assembler Pre-Processor
   Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.

   This file is part of GAS, the GNU Assembler.

   GAS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   GAS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GAS; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
/* App, the assembler pre-processor.  This pre-processor strips out excess
   spaces, turns single-quoted characters into a decimal constant, and turns
   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
   pair.  This needs better error-handling.
   */

#include <stdio.h>
#include "as.h"			/* For BAD_CASE() only */

#if (__STDC__ != 1) && !defined(const)
#define const			/* Nothing */
#endif

static char lex[256];
static const char symbol_chars[] =
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

#define LEX_IS_SYMBOL_COMPONENT		1
#define LEX_IS_WHITESPACE		2
#define LEX_IS_LINE_SEPARATOR		3
#define LEX_IS_COMMENT_START		4
#define LEX_IS_LINE_COMMENT_START	5
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
#define	LEX_IS_TWOCHAR_COMMENT_2ND	7
#define	LEX_IS_STRINGQUOTE		8
#define	LEX_IS_COLON			9
#define	LEX_IS_NEWLINE			10
#define	LEX_IS_ONECHAR_QUOTE		11
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)

/* FIXME-soon: The entire lexer/parser thingy should be
   built statically at compile time rather than dynamically
   each and every time the assembler is run.  xoxorich. */

void 
do_scrub_begin ()
{
  const char *p;

  lex[' '] = LEX_IS_WHITESPACE;
  lex['\t'] = LEX_IS_WHITESPACE;
  lex['\n'] = LEX_IS_NEWLINE;
  lex[';'] = LEX_IS_LINE_SEPARATOR;
  lex['"'] = LEX_IS_STRINGQUOTE;
#ifndef TC_HPPA
  lex['\''] = LEX_IS_ONECHAR_QUOTE;
#endif
  lex[':'] = LEX_IS_COLON;


#ifdef SINGLE_QUOTE_STRINGS
	lex['\''] = LEX_IS_STRINGQUOTE;
#endif

  /* Note that these override the previous defaults, e.g. if ';'

	   is a comment char, then it isn't a line separator.  */
  for (p = symbol_chars; *p; ++p)
    {
      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
    }				/* declare symbol characters */

  for (p = comment_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
    }				/* declare comment chars */

  for (p = line_comment_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
    }				/* declare line comment chars */

  for (p = line_separator_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
    }				/* declare line separators */

  /* Only allow slash-star comments if slash is not in use */
  if (lex['/'] == 0)
    {
      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
    }
  /* FIXME-soon.  This is a bad hack but otherwise, we
	   can't do c-style comments when '/' is a line
	   comment char. xoxorich. */
  if (lex['*'] == 0)
    {
      lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
    }
}				/* do_scrub_begin() */

FILE *scrub_file;

int 
scrub_from_file ()
{
  return getc (scrub_file);
}

void 
scrub_to_file (ch)
     int ch;
{
  ungetc (ch, scrub_file);
}				/* scrub_to_file() */

char *scrub_string;
char *scrub_last_string;

int 
scrub_from_string ()
{
  return scrub_string == scrub_last_string ? EOF : *scrub_string++;
}				/* scrub_from_string() */

void 
scrub_to_string (ch)
     int ch;
{
  *--scrub_string = ch;
}				/* scrub_to_string() */

/* Saved state of the scrubber */
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines = 0;

/* Data structure for saving the state of app across #include's.  Note that
   app is called asynchronously to the parsing of the .include's, so our
   state at the time .include is interpreted is completely unrelated.
   That's why we have to save it all.  */

struct app_save
  {
    int state;
    int old_state;
    char *out_string;
    char out_buf[sizeof (out_buf)];
    int add_newlines;
    char *scrub_string;
    char *scrub_last_string;
    FILE *scrub_file;
  };

char *
app_push ()
{
  register struct app_save *saved;

  saved = (struct app_save *) xmalloc (sizeof (*saved));
  saved->state = state;
  saved->old_state = old_state;
  saved->out_string = out_string;
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
  saved->add_newlines = add_newlines;
  saved->scrub_string = scrub_string;
  saved->scrub_last_string = scrub_last_string;
  saved->scrub_file = scrub_file;

  /* do_scrub_begin() is not useful, just wastes time. */
  return (char *) saved;
}

void 
app_pop (arg)
     char *arg;
{
  register struct app_save *saved = (struct app_save *) arg;

  /* There is no do_scrub_end (). */
  state = saved->state;
  old_state = saved->old_state;
  out_string = saved->out_string;
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
  add_newlines = saved->add_newlines;
  scrub_string = saved->scrub_string;
  scrub_last_string = saved->scrub_last_string;
  scrub_file = saved->scrub_file;

  free (arg);
}				/* app_pop() */

/* @@ This assumes that \n &c are the same on host and target.  This is not
   necessarily true.  */
int 
process_escape (ch)
     char ch;
{
  switch (ch)
    {
    case 'b':
      return '\b';
    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case 't':
      return '\t';
    case '\'':
      return '\'';
    case '"':
      return '\"';
    default:
      return ch;
    }
}
int 
do_scrub_next_char (get, unget)
     int (*get) ();
     void (*unget) ();
{
  /*State 0: beginning of normal line
	  1: After first whitespace on line (flush more white)
	  2: After first non-white (opcode) on line (keep 1white)
	  3: after second white on line (into operands) (flush white)
	  4: after putting out a .line, put out digits
	  5: parsing a string, then go to old-state
	  6: putting out \ escape in a "d string.
	  7: After putting out a .appfile, put out string.
	  8: After putting out a .appfile string, flush until newline.
	  9: After seeing symbol char in state 3 (keep 1white after symchar)
	 10: After seeing whitespace in state 9 (keep white before symchar)
	  -1: output string in out_string and go to the state in old_state
	  -2: flush text until a '*' '/' is seen, then go to state old_state
	  */

  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
     constructs like ``.loc 1 20''.  This was turning into ``.loc
     120''.  States 9 and 10 ensure that a space is never dropped in
     between characters which could appear in a identifier.  Ian
     Taylor, ian@cygnus.com.  */

  register int ch, ch2 = 0;

  switch (state)
    {
    case -1:
      ch = *out_string++;
      if (*out_string == 0)
	{
	  state = old_state;
	  old_state = 3;
	}
      return ch;

    case -2:
      for (;;)
	{
	  do
	    {
	      ch = (*get) ();
	    }
	  while (ch != EOF && ch != '\n' && ch != '*');
	  if (ch == '\n' || ch == EOF)
	    return ch;

	  /* At this point, ch must be a '*' */
	  while ((ch = (*get) ()) == '*')
	    {
	      ;
	    }
	  if (ch == EOF || ch == '/')
	    break;
	  (*unget) (ch);
	}
      state = old_state;
      return ' ';

    case 4:
      ch = (*get) ();
      if (ch == EOF || (ch >= '0' && ch <= '9'))
	return ch;
      else
	{
	  while (ch != EOF && IS_WHITESPACE (ch))
	    ch = (*get) ();
	  if (ch == '"')
	    {
	      (*unget) (ch);
	      out_string = "\n.appfile ";
	      old_state = 7;
	      state = -1;
	      return *out_string++;
	    }
	  else
	    {
	      while (ch != EOF && ch != '\n')
		ch = (*get) ();
	      state = 0;
	      return ch;
	    }
	}

    case 5:
      ch = (*get) ();
      if (lex[ch] == LEX_IS_STRINGQUOTE)
	{
	  state = old_state;
	  return ch;
	}
      else if (ch == '\\')
	{
	  state = 6;
	  return ch;
	}
      else if (ch == EOF)
	{
	  as_warn ("End of file in string: inserted '\"'");
	  state = old_state;
	  (*unget) ('\n');
	  return '"';
	}
      else
	{
	  return ch;
	}

    case 6:
      state = 5;
      ch = (*get) ();
      switch (ch)
	{
	  /* Handle strings broken across lines, by turning '\n' into
	     '\\' and 'n'.  */
	case '\n':
	  (*unget) ('n');
	  add_newlines++;
	  return '\\';

	case '"':
	case '\\':
#ifdef TC_HPPA
	case 'x':	/* '\\x' introduces escaped sequences on the PA */
#endif
	case 'b':
	case 'f':
	case 'n':
	case 'r':
	case 't':
#ifdef BACKSLASH_V
	case 'v':
#endif /* BACKSLASH_V */
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	  break;
#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
	default:
	  as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	  break;
#else /* ONLY_STANDARD_ESCAPES */
	default:
	  /* Accept \x as x for any x */
	  break;
#endif /* ONLY_STANDARD_ESCAPES */

	case EOF:
	  as_warn ("End of file in string: '\"' inserted");
	  return '"';
	}
      return ch;

    case 7:
      ch = (*get) ();
      state = 5;
      old_state = 8;
      return ch;

    case 8:
      do
	ch = (*get) ();
      while (ch != '\n');
      state = 0;
      return ch;
    }

  /* OK, we are somewhere in states 0 through 4 or 9 through 10 */

  /* flushchar: */
  ch = (*get) ();
recycle:
  if (ch == EOF)
    {
      if (state != 0)
	as_warn ("End of file not at end of a line: Newline inserted.");
      return ch;
    }

  switch (lex[ch])
    {
    case LEX_IS_WHITESPACE:
      do
	ch = (*get) ();
      while (ch != EOF && IS_WHITESPACE (ch));
      if (ch == EOF)
	return ch;

      if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
	{
	  goto recycle;
	}
#ifdef MRI
      (*unget) (ch);		/* Put back */
      return ' ';		/* Always return one space at start of line */
#endif

      /* If we're in state 2, we've seen a non-white
	 character followed by whitespace.  If the next
	 character is ':', this is whitespace after a label
	 name which we can ignore.  */
      if (state == 2 && lex[ch] == LEX_IS_COLON)
	{
	  state = 0;
	  return ch;
	}

      switch (state)
	{
	case 0:
	  state++;
	  goto recycle;		/* Punted leading sp */
	case 1:
	  BAD_CASE (state);	/* We can't get here */
	case 2:
	  state = 3;
	  (*unget) (ch);
	  return ' ';		/* Sp after opco */
	case 3:
	  goto recycle;		/* Sp in operands */
	case 9:
	case 10:
	  state = 10;		/* Sp after symbol char */
	  goto recycle;
	default:
	  BAD_CASE (state);
	}
      break;

    case LEX_IS_TWOCHAR_COMMENT_1ST:
      ch2 = (*get) ();
      if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	{
	  for (;;)
	    {
	      do
		{
		  ch2 = (*get) ();
		  if (ch2 != EOF && IS_NEWLINE (ch2))
		    add_newlines++;
		}
	      while (ch2 != EOF &&
		     (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));

	      while (ch2 != EOF &&
		     (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
		{
		  ch2 = (*get) ();
		}

	      if (ch2 == EOF
		  || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
		break;
	      (*unget) (ch);
	    }
	  if (ch2 == EOF)
	    as_warn ("End of file in multiline comment");

	  ch = ' ';
	  goto recycle;
	}
      else
	{
	  if (ch2 != EOF)
	    (*unget) (ch2);
	  if (state == 9 || state == 10)
	    state = 3;
	  return ch;
	}
      break;

    case LEX_IS_STRINGQUOTE:
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = 5;
      return ch;
#ifndef MRI
#ifndef IEEE_STYLE
    case LEX_IS_ONECHAR_QUOTE:
      ch = (*get) ();
      if (ch == EOF)
	{
	  as_warn ("End-of-file after a one-character quote; \\000 inserted");
	  ch = 0;
	}
      if (ch == '\\')
	{
	  ch = (*get) ();
	  ch = process_escape (ch);
	}
      sprintf (out_buf, "%d", (int) (unsigned char) ch);


      /* None of these 'x constants for us.  We want 'x'.  */
      if ((ch = (*get) ()) != '\'')
	{
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	  as_warn ("Missing close quote: (assumed)");
#else
	  (*unget) (ch);
#endif
	}
      if (strlen (out_buf) == 1)
	{
	  return out_buf[0];
	}
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = -1;
      out_string = out_buf;
      return *out_string++;
#endif
#endif
    case LEX_IS_COLON:
      if (state == 9 || state == 10)
	state = 3;
      else if (state != 3)
	state = 0;
      return ch;

    case LEX_IS_NEWLINE:
      /* Roll out a bunch of newlines from inside comments, etc.  */
      if (add_newlines)
	{
	  --add_newlines;
	  (*unget) (ch);
	}
      /* fall thru into... */

    case LEX_IS_LINE_SEPARATOR:
      state = 0;
      return ch;

    case LEX_IS_LINE_COMMENT_START:
      if (state == 0)		/* Only comment at start of line.  */
	{
	  /* FIXME-someday: The two character comment stuff was badly
	     thought out.  On i386, we want '/' as line comment start
	     AND we want C style comments.  hence this hack.  The
	     whole lexical process should be reworked.  xoxorich.  */
	  if (ch == '/')
	    {
	      ch2 = (*get) ();
	      if (ch2 == '*')
		{
		  state = -2;
		  return (do_scrub_next_char (get, unget));
		}
	      else
		{
		  (*unget) (ch2);
		}
	    }			/* bad hack */

	  do
	    ch = (*get) ();
	  while (ch != EOF && IS_WHITESPACE (ch));
	  if (ch == EOF)
	    {
	      as_warn ("EOF in comment:  Newline inserted");
	      return '\n';
	    }
	  if (ch < '0' || ch > '9')
	    {
	      /* Non-numerics:  Eat whole comment line */
	      while (ch != EOF && !IS_NEWLINE (ch))
		ch = (*get) ();
	      if (ch == EOF)
		as_warn ("EOF in Comment: Newline inserted");
	      state = 0;
	      return '\n';
	    }
	  /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
	  (*unget) (ch);
	  old_state = 4;
	  state = -1;
	  out_string = ".appline ";
	  return *out_string++;
	}

      /* We have a line comment character which is not at the start of
	 a line.  If this is also a normal comment character, fall
	 through.  Otherwise treat it as a default character.  */
      if (strchr (comment_chars, ch) == NULL)
	goto de_fault;
      /* Fall through.  */
    case LEX_IS_COMMENT_START:
      do
	ch = (*get) ();
      while (ch != EOF && !IS_NEWLINE (ch));
      if (ch == EOF)
	as_warn ("EOF in comment:  Newline inserted");
      state = 0;
      return '\n';

    case LEX_IS_SYMBOL_COMPONENT:
      if (state == 10)
	{
	  /* This is a symbol character following another symbol
	     character, with whitespace in between.  We skipped the
	     whitespace earlier, so output it now.  */
	  (*unget) (ch);
	  state = 3;
	  return ' ';
	}
      if (state == 3)
	state = 9;
      /* Fall through.  */
    default:
    de_fault:
      /* Some relatively `normal' character.  */
      if (state == 0)
	{
	  state = 2;		/* Now seeing opcode */
	  return ch;
	}
      else if (state == 1)
	{
	  state = 2;		/* Ditto */
	  return ch;
	}
      else if (state == 9)
	{
	  if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	    state = 3;
	  return ch;
	}
      else if (state == 10)
	{
	  state = 3;
	  return ch;
	}
      else
	{
	  return ch;		/* Opcode or operands already */
	}
    }
  return -1;
}

#ifdef TEST

const char comment_chars[] = "|";
const char line_comment_chars[] = "#";

main ()
{
  int ch;

  app_begin ();
  while ((ch = do_scrub_next_char (stdin)) != EOF)
    putc (ch, stdout);
}

as_warn (str)
     char *str;
{
  fputs (str, stderr);
  putc ('\n', stderr);
}

#endif

/* end of app.c */
Commit	Line	Data
	1	/* This is the Assembler Pre-Processor
	2	Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
	3
	4	This file is part of GAS, the GNU Assembler.
	5
	6	GAS is free software; you can redistribute it and/or modify
	7	it under the terms of the GNU General Public License as published by
	8	the Free Software Foundation; either version 2, or (at your option)
	9	any later version.
	10
	11	GAS is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
	15
	16	You should have received a copy of the GNU General Public License
	17	along with GAS; see the file COPYING. If not, write to
	18	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
	19
	20	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
	21	/* App, the assembler pre-processor. This pre-processor strips out excess
	22	spaces, turns single-quoted characters into a decimal constant, and turns
	23	# <number> <filename> <garbage> into a .line <number>\n.file <filename>
	24	pair. This needs better error-handling.
	25	*/
	26
	27	#include <stdio.h>
	28	#include "as.h" /* For BAD_CASE() only */
	29
	30	#if (__STDC__ != 1) && !defined(const)
	31	#define const /* Nothing */
	32	#endif
	33
	34	static char lex[256];
	35	static const char symbol_chars[] =
	36	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
	37
	38	#define LEX_IS_SYMBOL_COMPONENT 1
	39	#define LEX_IS_WHITESPACE 2
	40	#define LEX_IS_LINE_SEPARATOR 3
	41	#define LEX_IS_COMMENT_START 4
	42	#define LEX_IS_LINE_COMMENT_START 5
	43	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
	44	#define LEX_IS_TWOCHAR_COMMENT_2ND 7
	45	#define LEX_IS_STRINGQUOTE 8
	46	#define LEX_IS_COLON 9
	47	#define LEX_IS_NEWLINE 10
	48	#define LEX_IS_ONECHAR_QUOTE 11
	49	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
	50	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
	51	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
	52	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
	53	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
	54	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
	55
	56	/* FIXME-soon: The entire lexer/parser thingy should be
	57	built statically at compile time rather than dynamically
	58	each and every time the assembler is run. xoxorich. */
	59
	60	void
	61	do_scrub_begin ()
	62	{
	63	const char *p;
	64
	65	lex[' '] = LEX_IS_WHITESPACE;
	66	lex['\t'] = LEX_IS_WHITESPACE;
	67	lex['\n'] = LEX_IS_NEWLINE;
	68	lex[';'] = LEX_IS_LINE_SEPARATOR;
	69	lex['"'] = LEX_IS_STRINGQUOTE;
	70	#ifndef TC_HPPA
	71	lex['\''] = LEX_IS_ONECHAR_QUOTE;
	72	#endif
	73	lex[':'] = LEX_IS_COLON;
	74
	75
	76
	77	#ifdef SINGLE_QUOTE_STRINGS
	78	lex['\''] = LEX_IS_STRINGQUOTE;
	79	#endif
	80
	81	/* Note that these override the previous defaults, e.g. if ';'
	82
	83	is a comment char, then it isn't a line separator. */
	84	for (p = symbol_chars; *p; ++p)
	85	{
	86	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
	87	} /* declare symbol characters */
	88
	89	for (p = comment_chars; *p; p++)
	90	{
	91	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
	92	} /* declare comment chars */
	93
	94	for (p = line_comment_chars; *p; p++)
	95	{
	96	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
	97	} /* declare line comment chars */
	98
	99	for (p = line_separator_chars; *p; p++)
	100	{
	101	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
	102	} /* declare line separators */
	103
	104	/* Only allow slash-star comments if slash is not in use */
	105	if (lex['/'] == 0)
	106	{
	107	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
	108	}
	109	/* FIXME-soon. This is a bad hack but otherwise, we
	110	can't do c-style comments when '/' is a line
	111	comment char. xoxorich. */
	112	if (lex['*'] == 0)
	113	{
	114	lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	115	}
	116	} /* do_scrub_begin() */
	117
	118	FILE *scrub_file;
	119
	120	int
	121	scrub_from_file ()
	122	{
	123	return getc (scrub_file);
	124	}
	125
	126	void
	127	scrub_to_file (ch)
	128	int ch;
	129	{
	130	ungetc (ch, scrub_file);
	131	} /* scrub_to_file() */
	132
	133	char *scrub_string;
	134	char *scrub_last_string;
	135
	136	int
	137	scrub_from_string ()
	138	{
	139	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
	140	} /* scrub_from_string() */
	141
	142	void
	143	scrub_to_string (ch)
	144	int ch;
	145	{
	146	*--scrub_string = ch;
	147	} /* scrub_to_string() */
	148
	149	/* Saved state of the scrubber */
	150	static int state;
	151	static int old_state;
	152	static char *out_string;
	153	static char out_buf[20];
	154	static int add_newlines = 0;
	155
	156	/* Data structure for saving the state of app across #include's. Note that
	157	app is called asynchronously to the parsing of the .include's, so our
	158	state at the time .include is interpreted is completely unrelated.
	159	That's why we have to save it all. */
	160
	161	struct app_save
	162	{
	163	int state;
	164	int old_state;
	165	char *out_string;
	166	char out_buf[sizeof (out_buf)];
	167	int add_newlines;
	168	char *scrub_string;
	169	char *scrub_last_string;
	170	FILE *scrub_file;
	171	};
	172
	173	char *
	174	app_push ()
	175	{
	176	register struct app_save *saved;
	177
	178	saved = (struct app_save ) xmalloc (sizeof (saved));
	179	saved->state = state;
	180	saved->old_state = old_state;
	181	saved->out_string = out_string;
	182	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
	183	saved->add_newlines = add_newlines;
	184	saved->scrub_string = scrub_string;
	185	saved->scrub_last_string = scrub_last_string;
	186	saved->scrub_file = scrub_file;
	187
	188	/* do_scrub_begin() is not useful, just wastes time. */
	189	return (char *) saved;
	190	}
	191
	192	void
	193	app_pop (arg)
	194	char *arg;
	195	{
	196	register struct app_save saved = (struct app_save ) arg;
	197
	198	/* There is no do_scrub_end (). */
	199	state = saved->state;
	200	old_state = saved->old_state;
	201	out_string = saved->out_string;
	202	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
	203	add_newlines = saved->add_newlines;
	204	scrub_string = saved->scrub_string;
	205	scrub_last_string = saved->scrub_last_string;
	206	scrub_file = saved->scrub_file;
	207
	208	free (arg);
	209	} /* app_pop() */
	210
	211	/* @@ This assumes that \n &c are the same on host and target. This is not
	212	necessarily true. */
	213	int
	214	process_escape (ch)
	215	char ch;
	216	{
	217	switch (ch)
	218	{
	219	case 'b':
	220	return '\b';
	221	case 'f':
	222	return '\f';
	223	case 'n':
	224	return '\n';
	225	case 'r':
	226	return '\r';
	227	case 't':
	228	return '\t';
	229	case '\'':
	230	return '\'';
	231	case '"':
	232	return '\"';
	233	default:
	234	return ch;
	235	}
	236	}
	237	int
	238	do_scrub_next_char (get, unget)
	239	int (*get) ();
	240	void (*unget) ();
	241	{
	242	/*State 0: beginning of normal line
	243	1: After first whitespace on line (flush more white)
	244	2: After first non-white (opcode) on line (keep 1white)
	245	3: after second white on line (into operands) (flush white)
	246	4: after putting out a .line, put out digits
	247	5: parsing a string, then go to old-state
	248	6: putting out \ escape in a "d string.
	249	7: After putting out a .appfile, put out string.
	250	8: After putting out a .appfile string, flush until newline.
	251	9: After seeing symbol char in state 3 (keep 1white after symchar)
	252	10: After seeing whitespace in state 9 (keep white before symchar)
	253	-1: output string in out_string and go to the state in old_state
	254	-2: flush text until a '*' '/' is seen, then go to state old_state
	255	*/
	256
	257	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
	258	constructs like ``.loc 1 20''. This was turning into ``.loc
	259	120''. States 9 and 10 ensure that a space is never dropped in
	260	between characters which could appear in a identifier. Ian
	261	Taylor, ian@cygnus.com. */
	262
	263	register int ch, ch2 = 0;
	264
	265	switch (state)
	266	{
	267	case -1:
	268	ch = *out_string++;
	269	if (*out_string == 0)
	270	{
	271	state = old_state;
	272	old_state = 3;
	273	}
	274	return ch;
	275
	276	case -2:
	277	for (;;)
	278	{
	279	do
	280	{
	281	ch = (*get) ();
	282	}
	283	while (ch != EOF && ch != '\n' && ch != '*');
	284	if (ch == '\n' \|\| ch == EOF)
	285	return ch;
	286
	287	/* At this point, ch must be a '' /
	288	while ((ch = (get) ()) == '')
	289	{
	290	;
	291	}
	292	if (ch == EOF \|\| ch == '/')
	293	break;
	294	(*unget) (ch);
	295	}
	296	state = old_state;
	297	return ' ';
	298
	299	case 4:
	300	ch = (*get) ();
	301	if (ch == EOF \|\| (ch >= '0' && ch <= '9'))
	302	return ch;
	303	else
	304	{
	305	while (ch != EOF && IS_WHITESPACE (ch))
	306	ch = (*get) ();
	307	if (ch == '"')
	308	{
	309	(*unget) (ch);
	310	out_string = "\n.appfile ";
	311	old_state = 7;
	312	state = -1;
	313	return *out_string++;
	314	}
	315	else
	316	{
	317	while (ch != EOF && ch != '\n')
	318	ch = (*get) ();
	319	state = 0;
	320	return ch;
	321	}
	322	}
	323
	324	case 5:
	325	ch = (*get) ();
	326	if (lex[ch] == LEX_IS_STRINGQUOTE)
	327	{
	328	state = old_state;
	329	return ch;
	330	}
	331	else if (ch == '\\')
	332	{
	333	state = 6;
	334	return ch;
	335	}
	336	else if (ch == EOF)
	337	{
	338	as_warn ("End of file in string: inserted '\"'");
	339	state = old_state;
	340	(*unget) ('\n');
	341	return '"';
	342	}
	343	else
	344	{
	345	return ch;
	346	}
	347
	348	case 6:
	349	state = 5;
	350	ch = (*get) ();
	351	switch (ch)
	352	{
	353	/* Handle strings broken across lines, by turning '\n' into
	354	'\\' and 'n'. */
	355	case '\n':
	356	(*unget) ('n');
	357	add_newlines++;
	358	return '\\';
	359
	360	case '"':
	361	case '\\':
	362	#ifdef TC_HPPA
	363	case 'x': /* '\\x' introduces escaped sequences on the PA */
	364	#endif
	365	case 'b':
	366	case 'f':
	367	case 'n':
	368	case 'r':
	369	case 't':
	370	#ifdef BACKSLASH_V
	371	case 'v':
	372	#endif /* BACKSLASH_V */
	373	case '0':
	374	case '1':
	375	case '2':
	376	case '3':
	377	case '4':
	378	case '5':
	379	case '6':
	380	case '7':
	381	break;
	382	#if defined(IGNORE_NONSTANDARD_ESCAPES) \| defined(ONLY_STANDARD_ESCAPES)
	383	default:
	384	as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	385	break;
	386	#else /* ONLY_STANDARD_ESCAPES */
	387	default:
	388	/* Accept \x as x for any x */
	389	break;
	390	#endif /* ONLY_STANDARD_ESCAPES */
	391
	392	case EOF:
	393	as_warn ("End of file in string: '\"' inserted");
	394	return '"';
	395	}
	396	return ch;
	397
	398	case 7:
	399	ch = (*get) ();
	400	state = 5;
	401	old_state = 8;
	402	return ch;
	403
	404	case 8:
	405	do
	406	ch = (*get) ();
	407	while (ch != '\n');
	408	state = 0;
	409	return ch;
	410	}
	411
	412	/* OK, we are somewhere in states 0 through 4 or 9 through 10 */
	413
	414	/* flushchar: */
	415	ch = (*get) ();
	416	recycle:
	417	if (ch == EOF)
	418	{
	419	if (state != 0)
	420	as_warn ("End of file not at end of a line: Newline inserted.");
	421	return ch;
	422	}
	423
	424	switch (lex[ch])
	425	{
	426	case LEX_IS_WHITESPACE:
	427	do
	428	ch = (*get) ();
	429	while (ch != EOF && IS_WHITESPACE (ch));
	430	if (ch == EOF)
	431	return ch;
	432
	433	if (IS_COMMENT (ch) \|\| (state == 0 && IS_LINE_COMMENT (ch)) \|\| ch == '/' \|\| IS_LINE_SEPARATOR (ch))
	434	{
	435	goto recycle;
	436	}
	437	#ifdef MRI
	438	(unget) (ch); / Put back */
	439	return ' '; /* Always return one space at start of line */
	440	#endif
	441
	442	/* If we're in state 2, we've seen a non-white
	443	character followed by whitespace. If the next
	444	character is ':', this is whitespace after a label
	445	name which we can ignore. */
	446	if (state == 2 && lex[ch] == LEX_IS_COLON)
	447	{
	448	state = 0;
	449	return ch;
	450	}
	451
	452	switch (state)
	453	{
	454	case 0:
	455	state++;
	456	goto recycle; /* Punted leading sp */
	457	case 1:
	458	BAD_CASE (state); /* We can't get here */
	459	case 2:
	460	state = 3;
	461	(*unget) (ch);
	462	return ' '; /* Sp after opco */
	463	case 3:
	464	goto recycle; /* Sp in operands */
	465	case 9:
	466	case 10:
	467	state = 10; /* Sp after symbol char */
	468	goto recycle;
	469	default:
	470	BAD_CASE (state);
	471	}
	472	break;
	473
	474	case LEX_IS_TWOCHAR_COMMENT_1ST:
	475	ch2 = (*get) ();
	476	if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	477	{
	478	for (;;)
	479	{
	480	do
	481	{
	482	ch2 = (*get) ();
	483	if (ch2 != EOF && IS_NEWLINE (ch2))
	484	add_newlines++;
	485	}
	486	while (ch2 != EOF &&
	487	(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
	488
	489	while (ch2 != EOF &&
	490	(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
	491	{
	492	ch2 = (*get) ();
	493	}
	494
	495	if (ch2 == EOF
	496	\|\| lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
	497	break;
	498	(*unget) (ch);
	499	}
	500	if (ch2 == EOF)
	501	as_warn ("End of file in multiline comment");
	502
	503	ch = ' ';
	504	goto recycle;
	505	}
	506	else
	507	{
	508	if (ch2 != EOF)
	509	(*unget) (ch2);
	510	if (state == 9 \|\| state == 10)
	511	state = 3;
	512	return ch;
	513	}
	514	break;
	515
	516	case LEX_IS_STRINGQUOTE:
	517	if (state == 9 \|\| state == 10)
	518	old_state = 3;
	519	else
	520	old_state = state;
	521	state = 5;
	522	return ch;
	523	#ifndef MRI
	524	#ifndef IEEE_STYLE
	525	case LEX_IS_ONECHAR_QUOTE:
	526	ch = (*get) ();
	527	if (ch == EOF)
	528	{
	529	as_warn ("End-of-file after a one-character quote; \\000 inserted");
	530	ch = 0;
	531	}
	532	if (ch == '\\')
	533	{
	534	ch = (*get) ();
	535	ch = process_escape (ch);
	536	}
	537	sprintf (out_buf, "%d", (int) (unsigned char) ch);
	538
	539
	540	/* None of these 'x constants for us. We want 'x'. */
	541	if ((ch = (*get) ()) != '\'')
	542	{
	543	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	544	as_warn ("Missing close quote: (assumed)");
	545	#else
	546	(*unget) (ch);
	547	#endif
	548	}
	549	if (strlen (out_buf) == 1)
	550	{
	551	return out_buf[0];
	552	}
	553	if (state == 9 \|\| state == 10)
	554	old_state = 3;
	555	else
	556	old_state = state;
	557	state = -1;
	558	out_string = out_buf;
	559	return *out_string++;
	560	#endif
	561	#endif
	562	case LEX_IS_COLON:
	563	if (state == 9 \|\| state == 10)
	564	state = 3;
	565	else if (state != 3)
	566	state = 0;
	567	return ch;
	568
	569	case LEX_IS_NEWLINE:
	570	/* Roll out a bunch of newlines from inside comments, etc. */
	571	if (add_newlines)
	572	{
	573	--add_newlines;
	574	(*unget) (ch);
	575	}
	576	/* fall thru into... */
	577
	578	case LEX_IS_LINE_SEPARATOR:
	579	state = 0;
	580	return ch;
	581
	582	case LEX_IS_LINE_COMMENT_START:
	583	if (state == 0) /* Only comment at start of line. */
	584	{
	585	/* FIXME-someday: The two character comment stuff was badly
	586	thought out. On i386, we want '/' as line comment start
	587	AND we want C style comments. hence this hack. The
	588	whole lexical process should be reworked. xoxorich. */
	589	if (ch == '/')
	590	{
	591	ch2 = (*get) ();
	592	if (ch2 == '*')
	593	{
	594	state = -2;
	595	return (do_scrub_next_char (get, unget));
	596	}
	597	else
	598	{
	599	(*unget) (ch2);
	600	}
	601	} /* bad hack */
	602
	603	do
	604	ch = (*get) ();
	605	while (ch != EOF && IS_WHITESPACE (ch));
	606	if (ch == EOF)
	607	{
	608	as_warn ("EOF in comment: Newline inserted");
	609	return '\n';
	610	}
	611	if (ch < '0' \|\| ch > '9')
	612	{
	613	/* Non-numerics: Eat whole comment line */
	614	while (ch != EOF && !IS_NEWLINE (ch))
	615	ch = (*get) ();
	616	if (ch == EOF)
	617	as_warn ("EOF in Comment: Newline inserted");
	618	state = 0;
	619	return '\n';
	620	}
	621	/* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
	622	(*unget) (ch);
	623	old_state = 4;
	624	state = -1;
	625	out_string = ".appline ";
	626	return *out_string++;
	627	}
	628
	629	/* We have a line comment character which is not at the start of
	630	a line. If this is also a normal comment character, fall
	631	through. Otherwise treat it as a default character. */
	632	if (strchr (comment_chars, ch) == NULL)
	633	goto de_fault;
	634	/* Fall through. */
	635	case LEX_IS_COMMENT_START:
	636	do
	637	ch = (*get) ();
	638	while (ch != EOF && !IS_NEWLINE (ch));
	639	if (ch == EOF)
	640	as_warn ("EOF in comment: Newline inserted");
	641	state = 0;
	642	return '\n';
	643
	644	case LEX_IS_SYMBOL_COMPONENT:
	645	if (state == 10)
	646	{
	647	/* This is a symbol character following another symbol
	648	character, with whitespace in between. We skipped the
	649	whitespace earlier, so output it now. */
	650	(*unget) (ch);
	651	state = 3;
	652	return ' ';
	653	}
	654	if (state == 3)
	655	state = 9;
	656	/* Fall through. */
	657	default:
	658	de_fault:
	659	/* Some relatively `normal' character. */
	660	if (state == 0)
	661	{
	662	state = 2; /* Now seeing opcode */
	663	return ch;
	664	}
	665	else if (state == 1)
	666	{
	667	state = 2; /* Ditto */
	668	return ch;
	669	}
	670	else if (state == 9)
	671	{
	672	if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	673	state = 3;
	674	return ch;
	675	}
	676	else if (state == 10)
	677	{
	678	state = 3;
	679	return ch;
	680	}
	681	else
	682	{
	683	return ch; /* Opcode or operands already */
	684	}
	685	}
	686	return -1;
	687	}
	688
	689	#ifdef TEST
	690
	691	const char comment_chars[] = "\|";
	692	const char line_comment_chars[] = "#";
	693
	694	main ()
	695	{
	696	int ch;
	697
	698	app_begin ();
	699	while ((ch = do_scrub_next_char (stdin)) != EOF)
	700	putc (ch, stdout);
	701	}
	702
	703	as_warn (str)
	704	char *str;
	705	{
	706	fputs (str, stderr);
	707	putc ('\n', stderr);
	708	}
	709
	710	#endif
	711
	712	/* end of app.c */