[deliverable/binutils-gdb.git] / gas / app.c

/* This is the Assembler Pre-Processor
   Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.

   This file is part of GAS, the GNU Assembler.

   GAS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   GAS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GAS; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
/* App, the assembler pre-processor.  This pre-processor strips out excess
   spaces, turns single-quoted characters into a decimal constant, and turns
   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
   pair.  This needs better error-handling.
   */

#include <stdio.h>
#include "as.h"			/* For BAD_CASE() only */

#if (__STDC__ != 1) && !defined(const)
#define const			/* Nothing */
#endif

static char lex[256];
static const char symbol_chars[] =
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

#define LEX_IS_SYMBOL_COMPONENT		1
#define LEX_IS_WHITESPACE		2
#define LEX_IS_LINE_SEPARATOR		3
#define LEX_IS_COMMENT_START		4
#define LEX_IS_LINE_COMMENT_START	5
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
#define	LEX_IS_TWOCHAR_COMMENT_2ND	7
#define	LEX_IS_STRINGQUOTE		8
#define	LEX_IS_COLON			9
#define	LEX_IS_NEWLINE			10
#define	LEX_IS_ONECHAR_QUOTE		11
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)

/* FIXME-soon: The entire lexer/parser thingy should be
   built statically at compile time rather than dynamically
   each and every time the assembler is run.  xoxorich. */

void 
do_scrub_begin ()
{
  const char *p;

  lex[' '] = LEX_IS_WHITESPACE;
  lex['\t'] = LEX_IS_WHITESPACE;
  lex['\n'] = LEX_IS_NEWLINE;
  lex[';'] = LEX_IS_LINE_SEPARATOR;
  lex['"'] = LEX_IS_STRINGQUOTE;
#ifndef TC_HPPA
  lex['\''] = LEX_IS_ONECHAR_QUOTE;
#endif
  lex[':'] = LEX_IS_COLON;


#ifdef SINGLE_QUOTE_STRINGS
	lex['\''] = LEX_IS_STRINGQUOTE;
#endif

  /* Note that these override the previous defaults, e.g. if ';'

	   is a comment char, then it isn't a line separator.  */
  for (p = symbol_chars; *p; ++p)
    {
      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
    }				/* declare symbol characters */

  for (p = comment_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
    }				/* declare comment chars */

  for (p = line_comment_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
    }				/* declare line comment chars */

  for (p = line_separator_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
    }				/* declare line separators */

  /* Only allow slash-star comments if slash is not in use */
  if (lex['/'] == 0)
    {
      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
    }
  /* FIXME-soon.  This is a bad hack but otherwise, we
	   can't do c-style comments when '/' is a line
	   comment char. xoxorich. */
  if (lex['*'] == 0)
    {
      lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
    }
}				/* do_scrub_begin() */

FILE *scrub_file;

int 
scrub_from_file ()
{
  return getc (scrub_file);
}

void 
scrub_to_file (ch)
     int ch;
{
  ungetc (ch, scrub_file);
}				/* scrub_to_file() */

char *scrub_string;
char *scrub_last_string;

int 
scrub_from_string ()
{
  return scrub_string == scrub_last_string ? EOF : *scrub_string++;
}				/* scrub_from_string() */

void 
scrub_to_string (ch)
     int ch;
{
  *--scrub_string = ch;
}				/* scrub_to_string() */

/* Saved state of the scrubber */
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines = 0;

/* Data structure for saving the state of app across #include's.  Note that
   app is called asynchronously to the parsing of the .include's, so our
   state at the time .include is interpreted is completely unrelated.
   That's why we have to save it all.  */

struct app_save
  {
    int state;
    int old_state;
    char *out_string;
    char out_buf[sizeof (out_buf)];
    int add_newlines;
    char *scrub_string;
    char *scrub_last_string;
    FILE *scrub_file;
  };

char *
app_push ()
{
  register struct app_save *saved;

  saved = (struct app_save *) xmalloc (sizeof (*saved));
  saved->state = state;
  saved->old_state = old_state;
  saved->out_string = out_string;
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
  saved->add_newlines = add_newlines;
  saved->scrub_string = scrub_string;
  saved->scrub_last_string = scrub_last_string;
  saved->scrub_file = scrub_file;

  /* do_scrub_begin() is not useful, just wastes time. */
  return (char *) saved;
}

void 
app_pop (arg)
     char *arg;
{
  register struct app_save *saved = (struct app_save *) arg;

  /* There is no do_scrub_end (). */
  state = saved->state;
  old_state = saved->old_state;
  out_string = saved->out_string;
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
  add_newlines = saved->add_newlines;
  scrub_string = saved->scrub_string;
  scrub_last_string = saved->scrub_last_string;
  scrub_file = saved->scrub_file;

  free (arg);
}				/* app_pop() */

/* @@ This assumes that \n &c are the same on host and target.  This is not
   necessarily true.  */
int 
process_escape (ch)
     char ch;
{
  switch (ch)
    {
    case 'b':
      return '\b';
    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case 't':
      return '\t';
    case '\'':
      return '\'';
    case '"':
      return '\"';
    default:
      return ch;
    }
}
int 
do_scrub_next_char (get, unget)
     int (*get) ();
     void (*unget) ();
{
  /*State 0: beginning of normal line
	  1: After first whitespace on line (flush more white)
	  2: After first non-white (opcode) on line (keep 1white)
	  3: after second white on line (into operands) (flush white)
	  4: after putting out a .line, put out digits
	  5: parsing a string, then go to old-state
	  6: putting out \ escape in a "d string.
	  7: After putting out a .appfile, put out string.
	  8: After putting out a .appfile string, flush until newline.
	  9: After seeing symbol char in state 3 (keep 1white after symchar)
	 10: After seeing whitespace in state 9 (keep white before symchar)
	  -1: output string in out_string and go to the state in old_state
	  -2: flush text until a '*' '/' is seen, then go to state old_state
	  */

  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
     constructs like ``.loc 1 20''.  This was turning into ``.loc
     120''.  States 9 and 10 ensure that a space is never dropped in
     between characters which could appear in a identifier.  Ian
     Taylor, ian@cygnus.com.  */

  register int ch, ch2 = 0;

  switch (state)
    {
    case -1:
      ch = *out_string++;
      if (*out_string == 0)
	{
	  state = old_state;
	  old_state = 3;
	}
      return ch;

    case -2:
      for (;;)
	{
	  do
	    {
	      ch = (*get) ();
	    }
	  while (ch != EOF && ch != '\n' && ch != '*');
	  if (ch == '\n' || ch == EOF)
	    return ch;

	  /* At this point, ch must be a '*' */
	  while ((ch = (*get) ()) == '*')
	    {
	      ;
	    }
	  if (ch == EOF || ch == '/')
	    break;
	  (*unget) (ch);
	}
      state = old_state;
      return ' ';

    case 4:
      ch = (*get) ();
      if (ch == EOF || (ch >= '0' && ch <= '9'))
	return ch;
      else
	{
	  while (ch != EOF && IS_WHITESPACE (ch))
	    ch = (*get) ();
	  if (ch == '"')
	    {
	      (*unget) (ch);
	      out_string = "\n.appfile ";
	      old_state = 7;
	      state = -1;
	      return *out_string++;
	    }
	  else
	    {
	      while (ch != EOF && ch != '\n')
		ch = (*get) ();
	      state = 0;
	      return ch;
	    }
	}

    case 5:
      ch = (*get) ();
      if (lex[ch] == LEX_IS_STRINGQUOTE)
	{
	  state = old_state;
	  return ch;
	}
      else if (ch == '\\')
	{
	  state = 6;
	  return ch;
	}
      else if (ch == EOF)
	{
	  as_warn ("End of file in string: inserted '\"'");
	  state = old_state;
	  (*unget) ('\n');
	  return '"';
	}
      else
	{
	  return ch;
	}

    case 6:
      state = 5;
      ch = (*get) ();
      switch (ch)
	{
	  /* Handle strings broken across lines, by turning '\n' into
	     '\\' and 'n'.  */
	case '\n':
	  (*unget) ('n');
	  add_newlines++;
	  return '\\';

	case '"':
	case '\\':
#ifdef TC_HPPA
	case 'x':	/* '\\x' introduces escaped sequences on the PA */
#endif
	case 'b':
	case 'f':
	case 'n':
	case 'r':
	case 't':
#ifdef BACKSLASH_V
	case 'v':
#endif /* BACKSLASH_V */
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	  break;
#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
	default:
	  as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	  break;
#else /* ONLY_STANDARD_ESCAPES */
	default:
	  /* Accept \x as x for any x */
	  break;
#endif /* ONLY_STANDARD_ESCAPES */

	case EOF:
	  as_warn ("End of file in string: '\"' inserted");
	  return '"';
	}
      return ch;

    case 7:
      ch = (*get) ();
      state = 5;
      old_state = 8;
      return ch;

    case 8:
      do
	ch = (*get) ();
      while (ch != '\n');
      state = 0;
      return ch;
    }

  /* OK, we are somewhere in states 0 through 4 or 9 through 10 */

  /* flushchar: */
  ch = (*get) ();
recycle:
  if (ch == EOF)
    {
      if (state != 0)
	as_warn ("End of file not at end of a line: Newline inserted.");
      return ch;
    }

  switch (lex[ch])
    {
    case LEX_IS_WHITESPACE:
      do
	ch = (*get) ();
      while (ch != EOF && IS_WHITESPACE (ch));
      if (ch == EOF)
	return ch;

      if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
	{
	  goto recycle;
	}
#ifdef MRI
      (*unget) (ch);		/* Put back */
      return ' ';		/* Always return one space at start of line */
#endif

      /* If we're in state 2, we've seen a non-white
	 character followed by whitespace.  If the next
	 character is ':', this is whitespace after a label
	 name which we can ignore.  */
      if (state == 2 && lex[ch] == LEX_IS_COLON)
	{
	  state = 0;
	  return ch;
	}

      switch (state)
	{
	case 0:
	  state++;
	  goto recycle;		/* Punted leading sp */
	case 1:
	  BAD_CASE (state);	/* We can't get here */
	case 2:
	  state = 3;
	  (*unget) (ch);
	  return ' ';		/* Sp after opco */
	case 3:
	  goto recycle;		/* Sp in operands */
	case 9:
	case 10:
	  state = 10;		/* Sp after symbol char */
	  goto recycle;
	default:
	  BAD_CASE (state);
	}
      break;

    case LEX_IS_TWOCHAR_COMMENT_1ST:
      ch2 = (*get) ();
      if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	{
	  for (;;)
	    {
	      do
		{
		  ch2 = (*get) ();
		  if (ch2 != EOF && IS_NEWLINE (ch2))
		    add_newlines++;
		}
	      while (ch2 != EOF &&
		     (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));

	      while (ch2 != EOF &&
		     (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
		{
		  ch2 = (*get) ();
		}

	      if (ch2 == EOF
		  || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
		break;
	      (*unget) (ch);
	    }
	  if (ch2 == EOF)
	    as_warn ("End of file in multiline comment");

	  ch = ' ';
	  goto recycle;
	}
      else
	{
	  if (ch2 != EOF)
	    (*unget) (ch2);
	  if (state == 9 || state == 10)
	    state = 3;
	  return ch;
	}
      break;

    case LEX_IS_STRINGQUOTE:
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = 5;
      return ch;
#ifndef MRI
#ifndef IEEE_STYLE
    case LEX_IS_ONECHAR_QUOTE:
      ch = (*get) ();
      if (ch == EOF)
	{
	  as_warn ("End-of-file after a one-character quote; \\000 inserted");
	  ch = 0;
	}
      if (ch == '\\')
	{
	  ch = (*get) ();
	  ch = process_escape (ch);
	}
      sprintf (out_buf, "%d", (int) (unsigned char) ch);


      /* None of these 'x constants for us.  We want 'x'.  */
      if ((ch = (*get) ()) != '\'')
	{
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	  as_warn ("Missing close quote: (assumed)");
#else
	  (*unget) (ch);
#endif
	}
      if (strlen (out_buf) == 1)
	{
	  return out_buf[0];
	}
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = -1;
      out_string = out_buf;
      return *out_string++;
#endif
#endif
    case LEX_IS_COLON:
      if (state == 9 || state == 10)
	state = 3;
      else if (state != 3)
	state = 0;
      return ch;

    case LEX_IS_NEWLINE:
      /* Roll out a bunch of newlines from inside comments, etc.  */
      if (add_newlines)
	{
	  --add_newlines;
	  (*unget) (ch);
	}
      /* fall thru into... */

    case LEX_IS_LINE_SEPARATOR:
      state = 0;
      return ch;

    case LEX_IS_LINE_COMMENT_START:
      if (state == 0)		/* Only comment at start of line.  */
	{
	  /* FIXME-someday: The two character comment stuff was badly
	     thought out.  On i386, we want '/' as line comment start
	     AND we want C style comments.  hence this hack.  The
	     whole lexical process should be reworked.  xoxorich.  */
	  if (ch == '/')
	    {
	      ch2 = (*get) ();
	      if (ch2 == '*')
		{
		  state = -2;
		  return (do_scrub_next_char (get, unget));
		}
	      else
		{
		  (*unget) (ch2);
		}
	    }			/* bad hack */

	  do
	    ch = (*get) ();
	  while (ch != EOF && IS_WHITESPACE (ch));
	  if (ch == EOF)
	    {
	      as_warn ("EOF in comment:  Newline inserted");
	      return '\n';
	    }
	  if (ch < '0' || ch > '9')
	    {
	      /* Non-numerics:  Eat whole comment line */
	      while (ch != EOF && !IS_NEWLINE (ch))
		ch = (*get) ();
	      if (ch == EOF)
		as_warn ("EOF in Comment: Newline inserted");
	      state = 0;
	      return '\n';
	    }
	  /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
	  (*unget) (ch);
	  old_state = 4;
	  state = -1;
	  out_string = ".appline ";
	  return *out_string++;
	}

      /* We have a line comment character which is not at the start of
	 a line.  If this is also a normal comment character, fall
	 through.  Otherwise treat it as a default character.  */
      if (strchr (comment_chars, ch) == NULL)
	goto de_fault;
      /* Fall through.  */
    case LEX_IS_COMMENT_START:
      do
	ch = (*get) ();
      while (ch != EOF && !IS_NEWLINE (ch));
      if (ch == EOF)
	as_warn ("EOF in comment:  Newline inserted");
      state = 0;
      return '\n';

    case LEX_IS_SYMBOL_COMPONENT:
      if (state == 10)
	{
	  /* This is a symbol character following another symbol
	     character, with whitespace in between.  We skipped the
	     whitespace earlier, so output it now.  */
	  (*unget) (ch);
	  state = 3;
	  return ' ';
	}
      if (state == 3)
	state = 9;
      /* Fall through.  */
    default:
    de_fault:
      /* Some relatively `normal' character.  */
      if (state == 0)
	{
	  state = 2;		/* Now seeing opcode */
	  return ch;
	}
      else if (state == 1)
	{
	  state = 2;		/* Ditto */
	  return ch;
	}
      else if (state == 9)
	{
	  if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	    state = 3;
	  return ch;
	}
      else if (state == 10)
	{
	  state = 3;
	  return ch;
	}
      else
	{
	  return ch;		/* Opcode or operands already */
	}
    }
  return -1;
}

#ifdef TEST

const char comment_chars[] = "|";
const char line_comment_chars[] = "#";

main ()
{
  int ch;

  app_begin ();
  while ((ch = do_scrub_next_char (stdin)) != EOF)
    putc (ch, stdout);
}

as_warn (str)
     char *str;
{
  fputs (str, stderr);
  putc ('\n', stderr);
}

#endif

/* end of app.c */
Commit	Line	Data
fecd2382	1	/* This is the Assembler Pre-Processor
58d4951d	2	Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
6efd877d	3
a39116f1	4	This file is part of GAS, the GNU Assembler.
6efd877d	5
a39116f1 RP	6	GAS is free software; you can redistribute it and/or modify
	7	it under the terms of the GNU General Public License as published by
	8	the Free Software Foundation; either version 2, or (at your option)
	9	any later version.
6efd877d	10
a39116f1 RP	11	GAS is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
6efd877d	15
a39116f1 RP	16	You should have received a copy of the GNU General Public License
	17	along with GAS; see the file COPYING. If not, write to
	18	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382	19
58d4951d	20	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
fecd2382 RP	21	/* App, the assembler pre-processor. This pre-processor strips out excess
fecd2382 RP	22	spaces, turns single-quoted characters into a decimal constant, and turns
9a7d824a	23	# <number> <filename> <garbage> into a .line <number>\n.file <filename>
be06bdcd	24	pair. This needs better error-handling.
a39116f1	25	*/
fecd2382 RP	26
fecd2382 RP	27	#include <stdio.h>
6efd877d	28	#include "as.h" /* For BAD_CASE() only */
fecd2382	29
3340f7e5	30	#if (__STDC__ != 1) && !defined(const)
6efd877d	31	#define const /* Nothing */
fecd2382 RP	32	#endif
fecd2382 RP	33
6efd877d	34	static char lex[256];
6d331d71	35	static const char symbol_chars[] =
6efd877d	36	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382 RP	37
	38	#define LEX_IS_SYMBOL_COMPONENT 1
	39	#define LEX_IS_WHITESPACE 2
	40	#define LEX_IS_LINE_SEPARATOR 3
	41	#define LEX_IS_COMMENT_START 4
	42	#define LEX_IS_LINE_COMMENT_START 5
	43	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
	44	#define LEX_IS_TWOCHAR_COMMENT_2ND 7
	45	#define LEX_IS_STRINGQUOTE 8
	46	#define LEX_IS_COLON 9
	47	#define LEX_IS_NEWLINE 10
	48	#define LEX_IS_ONECHAR_QUOTE 11
a39116f1 RP	49	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
	50	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
	51	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
	52	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
	53	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
	54	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
	55
	56	/* FIXME-soon: The entire lexer/parser thingy should be
	57	built statically at compile time rather than dynamically
	58	each and every time the assembler is run. xoxorich. */
fecd2382	59
6efd877d KR	60	void
	61	do_scrub_begin ()
	62	{
	63	const char *p;
	64
	65	lex[' '] = LEX_IS_WHITESPACE;
	66	lex['\t'] = LEX_IS_WHITESPACE;
	67	lex['\n'] = LEX_IS_NEWLINE;
	68	lex[';'] = LEX_IS_LINE_SEPARATOR;
	69	lex['"'] = LEX_IS_STRINGQUOTE;
58d4951d	70	#ifndef TC_HPPA
6efd877d	71	lex['\''] = LEX_IS_ONECHAR_QUOTE;
58d4951d	72	#endif
6efd877d	73	lex[':'] = LEX_IS_COLON;
7c2d4011	74
be06bdcd SC	75
	76
	77	#ifdef SINGLE_QUOTE_STRINGS
	78	lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011	79	#endif
be06bdcd	80
6efd877d	81	/* Note that these override the previous defaults, e.g. if ';'
be06bdcd	82
fecd2382	83	is a comment char, then it isn't a line separator. */
6efd877d KR	84	for (p = symbol_chars; *p; ++p)
6efd877d KR	85	{
58d4951d	86	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
6efd877d KR	87	} /* declare symbol characters */
6efd877d KR	88
6efd877d KR	89	for (p = comment_chars; *p; p++)
6efd877d KR	90	{
58d4951d	91	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
6efd877d KR	92	} /* declare comment chars */
6efd877d KR	93
9a7d824a ILT	94	for (p = line_comment_chars; *p; p++)
9a7d824a ILT	95	{
58d4951d	96	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
9a7d824a ILT	97	} /* declare line comment chars */
9a7d824a ILT	98
6efd877d KR	99	for (p = line_separator_chars; *p; p++)
6efd877d KR	100	{
58d4951d	101	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
6efd877d KR	102	} /* declare line separators */
	103
	104	/* Only allow slash-star comments if slash is not in use */
	105	if (lex['/'] == 0)
	106	{
	107	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
	108	}
	109	/* FIXME-soon. This is a bad hack but otherwise, we
a39116f1 RP	110	can't do c-style comments when '/' is a line
a39116f1 RP	111	comment char. xoxorich. */
6efd877d KR	112	if (lex['*'] == 0)
	113	{
	114	lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	115	}
	116	} /* do_scrub_begin() */
fecd2382 RP	117
	118	FILE *scrub_file;
	119
6efd877d KR	120	int
	121	scrub_from_file ()
	122	{
	123	return getc (scrub_file);
fecd2382 RP	124	}
fecd2382 RP	125
6efd877d KR	126	void
	127	scrub_to_file (ch)
	128	int ch;
fecd2382	129	{
6efd877d KR	130	ungetc (ch, scrub_file);
6efd877d KR	131	} /* scrub_to_file() */
fecd2382 RP	132
	133	char *scrub_string;
	134	char *scrub_last_string;
	135
6efd877d KR	136	int
	137	scrub_from_string ()
	138	{
	139	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
	140	} /* scrub_from_string() */
fecd2382	141
6efd877d KR	142	void
	143	scrub_to_string (ch)
	144	int ch;
fecd2382	145	{
6efd877d KR	146	*--scrub_string = ch;
6efd877d KR	147	} /* scrub_to_string() */
fecd2382 RP	148
	149	/* Saved state of the scrubber */
	150	static int state;
	151	static int old_state;
	152	static char *out_string;
	153	static char out_buf[20];
	154	static int add_newlines = 0;
	155
	156	/* Data structure for saving the state of app across #include's. Note that
	157	app is called asynchronously to the parsing of the .include's, so our
	158	state at the time .include is interpreted is completely unrelated.
	159	That's why we have to save it all. */
	160
6efd877d KR	161	struct app_save
	162	{
	163	int state;
	164	int old_state;
	165	char *out_string;
	166	char out_buf[sizeof (out_buf)];
	167	int add_newlines;
	168	char *scrub_string;
	169	char *scrub_last_string;
	170	FILE *scrub_file;
	171	};
	172
	173	char *
	174	app_push ()
	175	{
7c2d4011 SC	176	register struct app_save *saved;
7c2d4011 SC	177
6efd877d KR	178	saved = (struct app_save ) xmalloc (sizeof (saved));
	179	saved->state = state;
	180	saved->old_state = old_state;
	181	saved->out_string = out_string;
58d4951d	182	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
6efd877d KR	183	saved->add_newlines = add_newlines;
6efd877d KR	184	saved->scrub_string = scrub_string;
7c2d4011	185	saved->scrub_last_string = scrub_last_string;
6efd877d	186	saved->scrub_file = scrub_file;
7c2d4011 SC	187
7c2d4011 SC	188	/* do_scrub_begin() is not useful, just wastes time. */
6efd877d	189	return (char *) saved;
fecd2382 RP	190	}
fecd2382 RP	191
6efd877d KR	192	void
	193	app_pop (arg)
	194	char *arg;
fecd2382	195	{
6efd877d KR	196	register struct app_save saved = (struct app_save ) arg;
	197
	198	/* There is no do_scrub_end (). */
	199	state = saved->state;
	200	old_state = saved->old_state;
	201	out_string = saved->out_string;
58d4951d	202	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
6efd877d KR	203	add_newlines = saved->add_newlines;
	204	scrub_string = saved->scrub_string;
	205	scrub_last_string = saved->scrub_last_string;
	206	scrub_file = saved->scrub_file;
	207
	208	free (arg);
	209	} /* app_pop() */
	210
6d331d71 KR	211	/* @@ This assumes that \n &c are the same on host and target. This is not
6d331d71 KR	212	necessarily true. */
6efd877d KR	213	int
	214	process_escape (ch)
	215	char ch;
7c2d4011	216	{
6efd877d KR	217	switch (ch)
	218	{
	219	case 'b':
	220	return '\b';
	221	case 'f':
	222	return '\f';
	223	case 'n':
	224	return '\n';
	225	case 'r':
	226	return '\r';
	227	case 't':
	228	return '\t';
	229	case '\'':
	230	return '\'';
	231	case '"':
6d331d71	232	return '\"';
6efd877d KR	233	default:
	234	return ch;
	235	}
7c2d4011	236	}
6efd877d KR	237	int
	238	do_scrub_next_char (get, unget)
	239	int (*get) ();
	240	void (*unget) ();
fecd2382	241	{
6efd877d	242	/*State 0: beginning of normal line
a39116f1 RP	243	1: After first whitespace on line (flush more white)
	244	2: After first non-white (opcode) on line (keep 1white)
	245	3: after second white on line (into operands) (flush white)
	246	4: after putting out a .line, put out digits
	247	5: parsing a string, then go to old-state
	248	6: putting out \ escape in a "d string.
9a7d824a ILT	249	7: After putting out a .appfile, put out string.
9a7d824a ILT	250	8: After putting out a .appfile string, flush until newline.
f6a91cc0	251	9: After seeing symbol char in state 3 (keep 1white after symchar)
9a7d824a	252	10: After seeing whitespace in state 9 (keep white before symchar)
a39116f1 RP	253	-1: output string in out_string and go to the state in old_state
	254	-2: flush text until a '*' '/' is seen, then go to state old_state
	255	*/
6efd877d	256
9a7d824a ILT	257	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
	258	constructs like ``.loc 1 20''. This was turning into ``.loc
	259	120''. States 9 and 10 ensure that a space is never dropped in
	260	between characters which could appear in a identifier. Ian
	261	Taylor, ian@cygnus.com. */
f6a91cc0	262
6efd877d KR	263	register int ch, ch2 = 0;
	264
	265	switch (state)
	266	{
	267	case -1:
	268	ch = *out_string++;
	269	if (*out_string == 0)
	270	{
	271	state = old_state;
	272	old_state = 3;
	273	}
	274	return ch;
	275
	276	case -2:
	277	for (;;)
	278	{
	279	do
	280	{
	281	ch = (*get) ();
	282	}
	283	while (ch != EOF && ch != '\n' && ch != '*');
	284	if (ch == '\n' \|\| ch == EOF)
	285	return ch;
	286
	287	/* At this point, ch must be a '' /
	288	while ((ch = (get) ()) == '')
	289	{
	290	;
	291	}
	292	if (ch == EOF \|\| ch == '/')
	293	break;
	294	(*unget) (ch);
	295	}
	296	state = old_state;
	297	return ' ';
	298
	299	case 4:
	300	ch = (*get) ();
	301	if (ch == EOF \|\| (ch >= '0' && ch <= '9'))
	302	return ch;
	303	else
	304	{
	305	while (ch != EOF && IS_WHITESPACE (ch))
	306	ch = (*get) ();
	307	if (ch == '"')
	308	{
	309	(*unget) (ch);
9a7d824a	310	out_string = "\n.appfile ";
6efd877d KR	311	old_state = 7;
	312	state = -1;
	313	return *out_string++;
	314	}
	315	else
	316	{
	317	while (ch != EOF && ch != '\n')
	318	ch = (*get) ();
58d4951d	319	state = 0;
6efd877d KR	320	return ch;
	321	}
	322	}
	323
	324	case 5:
	325	ch = (*get) ();
	326	if (lex[ch] == LEX_IS_STRINGQUOTE)
	327	{
	328	state = old_state;
	329	return ch;
	330	}
	331	else if (ch == '\\')
	332	{
	333	state = 6;
	334	return ch;
	335	}
	336	else if (ch == EOF)
	337	{
	338	as_warn ("End of file in string: inserted '\"'");
	339	state = old_state;
	340	(*unget) ('\n');
	341	return '"';
	342	}
	343	else
	344	{
	345	return ch;
	346	}
	347
	348	case 6:
	349	state = 5;
	350	ch = (*get) ();
	351	switch (ch)
	352	{
6d331d71 KR	353	/* Handle strings broken across lines, by turning '\n' into
6d331d71 KR	354	'\\' and 'n'. */
6efd877d KR	355	case '\n':
	356	(*unget) ('n');
	357	add_newlines++;
	358	return '\\';
	359
	360	case '"':
	361	case '\\':
58d4951d ILT	362	#ifdef TC_HPPA
	363	case 'x': /* '\\x' introduces escaped sequences on the PA */
	364	#endif
6efd877d KR	365	case 'b':
	366	case 'f':
	367	case 'n':
	368	case 'r':
	369	case 't':
fecd2382	370	#ifdef BACKSLASH_V
6efd877d	371	case 'v':
fecd2382	372	#endif /* BACKSLASH_V */
6efd877d KR	373	case '0':
	374	case '1':
	375	case '2':
	376	case '3':
	377	case '4':
	378	case '5':
	379	case '6':
	380	case '7':
	381	break;
7c2d4011	382	#if defined(IGNORE_NONSTANDARD_ESCAPES) \| defined(ONLY_STANDARD_ESCAPES)
6efd877d KR	383	default:
	384	as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	385	break;
fecd2382	386	#else /* ONLY_STANDARD_ESCAPES */
6efd877d KR	387	default:
	388	/* Accept \x as x for any x */
	389	break;
fecd2382	390	#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011	391
6efd877d KR	392	case EOF:
	393	as_warn ("End of file in string: '\"' inserted");
	394	return '"';
	395	}
	396	return ch;
	397
	398	case 7:
	399	ch = (*get) ();
	400	state = 5;
	401	old_state = 8;
	402	return ch;
	403
	404	case 8:
	405	do
	406	ch = (*get) ();
	407	while (ch != '\n');
	408	state = 0;
	409	return ch;
	410	}
	411
9a7d824a	412	/* OK, we are somewhere in states 0 through 4 or 9 through 10 */
6efd877d KR	413
	414	/* flushchar: */
	415	ch = (*get) ();
	416	recycle:
	417	if (ch == EOF)
	418	{
	419	if (state != 0)
	420	as_warn ("End of file not at end of a line: Newline inserted.");
	421	return ch;
	422	}
	423
	424	switch (lex[ch])
	425	{
	426	case LEX_IS_WHITESPACE:
	427	do
	428	ch = (*get) ();
	429	while (ch != EOF && IS_WHITESPACE (ch));
	430	if (ch == EOF)
	431	return ch;
	432
	433	if (IS_COMMENT (ch) \|\| (state == 0 && IS_LINE_COMMENT (ch)) \|\| ch == '/' \|\| IS_LINE_SEPARATOR (ch))
	434	{
	435	goto recycle;
fecd2382	436	}
7c2d4011	437	#ifdef MRI
6efd877d KR	438	(unget) (ch); / Put back */
6efd877d KR	439	return ' '; /* Always return one space at start of line */
7c2d4011	440	#endif
6efd877d KR	441
6efd877d KR	442	/* If we're in state 2, we've seen a non-white
6d331d71 KR	443	character followed by whitespace. If the next
	444	character is ':', this is whitespace after a label
	445	name which we can ignore. */
6efd877d KR	446	if (state == 2 && lex[ch] == LEX_IS_COLON)
	447	{
	448	state = 0;
	449	return ch;
	450	}
	451
	452	switch (state)
	453	{
	454	case 0:
	455	state++;
	456	goto recycle; /* Punted leading sp */
	457	case 1:
	458	BAD_CASE (state); /* We can't get here */
	459	case 2:
f6a91cc0	460	state = 3;
6efd877d KR	461	(*unget) (ch);
	462	return ' '; /* Sp after opco */
	463	case 3:
	464	goto recycle; /* Sp in operands */
9a7d824a ILT	465	case 9:
	466	case 10:
	467	state = 10; /* Sp after symbol char */
	468	goto recycle;
6efd877d KR	469	default:
	470	BAD_CASE (state);
	471	}
	472	break;
	473
	474	case LEX_IS_TWOCHAR_COMMENT_1ST:
	475	ch2 = (*get) ();
	476	if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	477	{
	478	for (;;)
	479	{
	480	do
	481	{
	482	ch2 = (*get) ();
	483	if (ch2 != EOF && IS_NEWLINE (ch2))
	484	add_newlines++;
fecd2382	485	}
6efd877d KR	486	while (ch2 != EOF &&
	487	(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
	488
	489	while (ch2 != EOF &&
	490	(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
	491	{
	492	ch2 = (*get) ();
fecd2382	493	}
6efd877d KR	494
	495	if (ch2 == EOF
	496	\|\| lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382	497	break;
6efd877d KR	498	(*unget) (ch);
	499	}
	500	if (ch2 == EOF)
	501	as_warn ("End of file in multiline comment");
	502
	503	ch = ' ';
	504	goto recycle;
	505	}
	506	else
	507	{
	508	if (ch2 != EOF)
	509	(*unget) (ch2);
9a7d824a ILT	510	if (state == 9 \|\| state == 10)
9a7d824a ILT	511	state = 3;
6efd877d KR	512	return ch;
	513	}
	514	break;
	515
	516	case LEX_IS_STRINGQUOTE:
9a7d824a ILT	517	if (state == 9 \|\| state == 10)
	518	old_state = 3;
	519	else
	520	old_state = state;
6efd877d KR	521	state = 5;
	522	return ch;
	523	#ifndef MRI
a39116f1	524	#ifndef IEEE_STYLE
6efd877d KR	525	case LEX_IS_ONECHAR_QUOTE:
	526	ch = (*get) ();
	527	if (ch == EOF)
	528	{
	529	as_warn ("End-of-file after a one-character quote; \\000 inserted");
	530	ch = 0;
	531	}
	532	if (ch == '\\')
	533	{
	534	ch = (*get) ();
	535	ch = process_escape (ch);
	536	}
	537	sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011	538
6efd877d	539
9a7d824a	540	/* None of these 'x constants for us. We want 'x'. */
6efd877d KR	541	if ((ch = (*get) ()) != '\'')
6efd877d KR	542	{
fecd2382	543	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d	544	as_warn ("Missing close quote: (assumed)");
fecd2382	545	#else
6efd877d	546	(*unget) (ch);
fecd2382	547	#endif
6efd877d KR	548	}
	549	if (strlen (out_buf) == 1)
	550	{
	551	return out_buf[0];
	552	}
9a7d824a ILT	553	if (state == 9 \|\| state == 10)
	554	old_state = 3;
	555	else
	556	old_state = state;
6efd877d KR	557	state = -1;
	558	out_string = out_buf;
	559	return *out_string++;
7c2d4011	560	#endif
a39116f1	561	#endif
6efd877d	562	case LEX_IS_COLON:
9a7d824a ILT	563	if (state == 9 \|\| state == 10)
	564	state = 3;
	565	else if (state != 3)
6efd877d KR	566	state = 0;
	567	return ch;
	568
	569	case LEX_IS_NEWLINE:
	570	/* Roll out a bunch of newlines from inside comments, etc. */
	571	if (add_newlines)
	572	{
	573	--add_newlines;
	574	(*unget) (ch);
	575	}
	576	/* fall thru into... */
	577
	578	case LEX_IS_LINE_SEPARATOR:
	579	state = 0;
	580	return ch;
	581
	582	case LEX_IS_LINE_COMMENT_START:
9a7d824a	583	if (state == 0) /* Only comment at start of line. */
6efd877d	584	{
9a7d824a ILT	585	/* FIXME-someday: The two character comment stuff was badly
	586	thought out. On i386, we want '/' as line comment start
	587	AND we want C style comments. hence this hack. The
	588	whole lexical process should be reworked. xoxorich. */
	589	if (ch == '/')
f6a91cc0	590	{
9a7d824a ILT	591	ch2 = (*get) ();
	592	if (ch2 == '*')
	593	{
	594	state = -2;
	595	return (do_scrub_next_char (get, unget));
	596	}
	597	else
	598	{
	599	(*unget) (ch2);
	600	}
	601	} /* bad hack */
6efd877d	602
9a7d824a	603	do
6efd877d	604	ch = (*get) ();
9a7d824a	605	while (ch != EOF && IS_WHITESPACE (ch));
6efd877d	606	if (ch == EOF)
9a7d824a ILT	607	{
	608	as_warn ("EOF in comment: Newline inserted");
	609	return '\n';
	610	}
	611	if (ch < '0' \|\| ch > '9')
	612	{
	613	/* Non-numerics: Eat whole comment line */
	614	while (ch != EOF && !IS_NEWLINE (ch))
	615	ch = (*get) ();
	616	if (ch == EOF)
	617	as_warn ("EOF in Comment: Newline inserted");
	618	state = 0;
	619	return '\n';
	620	}
	621	/* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
	622	(*unget) (ch);
	623	old_state = 4;
	624	state = -1;
	625	out_string = ".appline ";
	626	return *out_string++;
6efd877d	627	}
6efd877d	628
9a7d824a ILT	629	/* We have a line comment character which is not at the start of
	630	a line. If this is also a normal comment character, fall
	631	through. Otherwise treat it as a default character. */
	632	if (strchr (comment_chars, ch) == NULL)
	633	goto de_fault;
	634	/* Fall through. */
6efd877d KR	635	case LEX_IS_COMMENT_START:
	636	do
	637	ch = (*get) ();
	638	while (ch != EOF && !IS_NEWLINE (ch));
	639	if (ch == EOF)
	640	as_warn ("EOF in comment: Newline inserted");
	641	state = 0;
	642	return '\n';
	643
f6a91cc0	644	case LEX_IS_SYMBOL_COMPONENT:
9a7d824a ILT	645	if (state == 10)
	646	{
	647	/* This is a symbol character following another symbol
	648	character, with whitespace in between. We skipped the
	649	whitespace earlier, so output it now. */
	650	(*unget) (ch);
	651	state = 3;
	652	return ' ';
	653	}
f6a91cc0 ILT	654	if (state == 3)
	655	state = 9;
	656	/* Fall through. */
6efd877d KR	657	default:
	658	de_fault:
	659	/* Some relatively `normal' character. */
	660	if (state == 0)
	661	{
	662	state = 2; /* Now seeing opcode */
	663	return ch;
fecd2382	664	}
6efd877d KR	665	else if (state == 1)
	666	{
	667	state = 2; /* Ditto */
	668	return ch;
	669	}
f6a91cc0 ILT	670	else if (state == 9)
	671	{
	672	if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	673	state = 3;
	674	return ch;
	675	}
9a7d824a ILT	676	else if (state == 10)
	677	{
	678	state = 3;
	679	return ch;
	680	}
6efd877d KR	681	else
	682	{
	683	return ch; /* Opcode or operands already */
	684	}
	685	}
	686	return -1;
fecd2382 RP	687	}
	688
	689	#ifdef TEST
	690
6efd877d KR	691	const char comment_chars[] = "\|";
6efd877d KR	692	const char line_comment_chars[] = "#";
fecd2382	693
6efd877d	694	main ()
fecd2382	695	{
6efd877d KR	696	int ch;
	697
	698	app_begin ();
	699	while ((ch = do_scrub_next_char (stdin)) != EOF)
	700	putc (ch, stdout);
fecd2382 RP	701	}
fecd2382 RP	702
6efd877d KR	703	as_warn (str)
6efd877d KR	704	char *str;
fecd2382	705	{
6efd877d KR	706	fputs (str, stderr);
6efd877d KR	707	putc ('\n', stderr);
fecd2382	708	}
6efd877d	709
fecd2382 RP	710	#endif
fecd2382 RP	711
fecd2382	712	/* end of app.c */