[deliverable/binutils-gdb.git] / gas / app.c

/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
   
   Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   */
/* This is the Assembler Pre-Processor
   Copyright (C) 1987 Free Software Foundation, Inc.
   
   This file is part of GAS, the GNU Assembler.
   
   GAS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.
   
   GAS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with GAS; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* App, the assembler pre-processor.  This pre-processor strips out excess
   spaces, turns single-quoted characters into a decimal constant, and turns
   # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
   This needs better error-handling.
   */

#include <stdio.h>
#include "as.h"		/* For BAD_CASE() only */

#if (__STDC__ != 1) && !defined(const)
#define const /* Nothing */
#endif

static char	lex [256];
static char	symbol_chars[] = 
    "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

/* These will go in BSS if not defined elsewhere, producing empty strings. */
extern const char comment_chars[];
extern const char line_comment_chars[];
extern const char line_separator_chars[];

#define LEX_IS_SYMBOL_COMPONENT		1
#define LEX_IS_WHITESPACE		2
#define LEX_IS_LINE_SEPARATOR		3
#define LEX_IS_COMMENT_START		4
#define LEX_IS_LINE_COMMENT_START	5
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
#define	LEX_IS_TWOCHAR_COMMENT_2ND	7
#define	LEX_IS_STRINGQUOTE		8
#define	LEX_IS_COLON			9
#define	LEX_IS_NEWLINE			10
#define	LEX_IS_ONECHAR_QUOTE		11
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)

/* FIXME-soon: The entire lexer/parser thingy should be
   built statically at compile time rather than dynamically
   each and every time the assembler is run.  xoxorich. */

void do_scrub_begin() {
	const char *p;
	
	lex[' '] = LEX_IS_WHITESPACE;
	lex['\t'] = LEX_IS_WHITESPACE;
	lex['\n'] = LEX_IS_NEWLINE;
	lex[';'] = LEX_IS_LINE_SEPARATOR;
	lex['"'] = LEX_IS_STRINGQUOTE;
	lex['\''] = LEX_IS_ONECHAR_QUOTE;
	lex[':'] = LEX_IS_COLON;

#ifdef MRI
	lex['\''] = LEX_IS_STRINGQUOTE;
#endif
	/* Note that these override the previous defaults, e.g. if ';'
	   is a comment char, then it isn't a line separator.  */
	for (p = symbol_chars; *p; ++p) {
		lex[*p] = LEX_IS_SYMBOL_COMPONENT;
	} /* declare symbol characters */
	
	for (p = line_comment_chars; *p; p++) {
		lex[*p] = LEX_IS_LINE_COMMENT_START;
	} /* declare line comment chars */
	
	for (p = comment_chars; *p; p++) {
		lex[*p] = LEX_IS_COMMENT_START;
	} /* declare comment chars */
	
	for (p = line_separator_chars; *p; p++) {
		lex[*p] = LEX_IS_LINE_SEPARATOR;
	} /* declare line separators */
	
	/* Only allow slash-star comments if slash is not in use */
	if (lex['/'] == 0) {
		lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
	}
	/* FIXME-soon.  This is a bad hack but otherwise, we
	   can't do c-style comments when '/' is a line
	   comment char. xoxorich. */
	if (lex['*'] == 0) {
		lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	}
} /* do_scrub_begin() */

FILE *scrub_file;

int scrub_from_file() {
	return getc(scrub_file);
}

void scrub_to_file(ch)
int ch;
{
	ungetc(ch,scrub_file);
} /* scrub_to_file() */

char *scrub_string;
char *scrub_last_string;

int scrub_from_string() {
	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
} /* scrub_from_string() */

void scrub_to_string(ch)
int ch;
{
	*--scrub_string=ch;
} /* scrub_to_string() */

/* Saved state of the scrubber */
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines = 0;

/* Data structure for saving the state of app across #include's.  Note that
   app is called asynchronously to the parsing of the .include's, so our
   state at the time .include is interpreted is completely unrelated.
   That's why we have to save it all.  */

struct app_save {
	int state;
	int old_state;
	char *out_string;
	char out_buf[sizeof (out_buf)];
	int add_newlines;
	char *scrub_string;
	char *scrub_last_string;
	FILE *scrub_file;
};

char *app_push() {
  register struct app_save *saved;

  saved = (struct app_save *) xmalloc(sizeof (*saved));
  saved->state		= state;
  saved->old_state	= old_state;
  saved->out_string	= out_string;
  bcopy(saved->out_buf, out_buf, sizeof(out_buf));
  saved->add_newlines	= add_newlines;
  saved->scrub_string	= scrub_string;
  saved->scrub_last_string = scrub_last_string;
  saved->scrub_file	= scrub_file;

  /* do_scrub_begin() is not useful, just wastes time. */
  return (char *)saved;
}

void app_pop(arg)
char *arg;
{
	register struct app_save *saved = (struct app_save *)arg;
	
	/* There is no do_scrub_end (). */
	state		= saved->state;
	old_state	= saved->old_state;
	out_string	= saved->out_string;
	memcpy(saved->out_buf, out_buf, sizeof (out_buf));
	add_newlines	= saved->add_newlines;
	scrub_string	= saved->scrub_string;
	scrub_last_string = saved->scrub_last_string;
	scrub_file	= saved->scrub_file;
	
	free (arg);
} /* app_pop() */

int process_escape(ch)
char ch;
{
  switch (ch) 
{
   case 'b':
    return '\b';
   case 'f':
    return '\f';
   case 'n':
    return '\n';
   case 'r':
    return '\r';
   case 't':
    return '\t';
   case '\'':
    return '\'';
   case '"':
    return '\'';
   default:
    return ch;
  }
}
int do_scrub_next_char(get,unget)
int (*get)();
void (*unget)();
{
	/*State 0: beginning of normal line
	  1: After first whitespace on line (flush more white)
	  2: After first non-white (opcode) on line (keep 1white)
	  3: after second white on line (into operands) (flush white)
	  4: after putting out a .line, put out digits
	  5: parsing a string, then go to old-state
	  6: putting out \ escape in a "d string.
	  7: After putting out a .app-file, put out string.
	  8: After putting out a .app-file string, flush until newline.
	  -1: output string in out_string and go to the state in old_state
	  -2: flush text until a '*' '/' is seen, then go to state old_state
	  */
	
	register int ch, ch2 = 0;
	
	switch (state) {
	case -1: 
		ch= *out_string++;
		if(*out_string==0) {
			state=old_state;
			old_state=3;
		}
		return ch;
		
	case -2:
		for(;;) {
			do {
				ch=(*get)();
			} while(ch!=EOF && ch!='\n' && ch!='*');
			if(ch=='\n' || ch==EOF)
			    return ch;
			
			/* At this point, ch must be a '*' */
			while ( (ch=(*get)()) == '*' ){
				;
			}
			if(ch==EOF || ch=='/')
			    break;
			(*unget)(ch);
		}
		state=old_state;
		return ' ';
		
	case 4:
		ch=(*get)();
		if(ch==EOF || (ch>='0' && ch<='9'))
		    return ch;
		else {
			while(ch!=EOF && IS_WHITESPACE(ch))
			    ch=(*get)();
			if(ch=='"') {
				(*unget)(ch);
				out_string="\n.app-file ";
				old_state=7;
				state= -1;
				return *out_string++;
			} else {
				while(ch!=EOF && ch!='\n')
				    ch=(*get)();
				return ch;
			}
		}
		
	case 5:
		ch=(*get)();
		if(lex[ch]==LEX_IS_STRINGQUOTE) {
			state=old_state;
			return ch;
		} else if(ch=='\\') {
			state=6;
			return ch;
		} else if(ch==EOF) {
			as_warn("End of file in string: inserted '\"'");
 			state=old_state;
			(*unget)('\n');
			return '"';
		} else {
			return ch;
		}
		
	case 6:
		state=5;
		ch=(*get)();
		switch(ch) {
			/* This is neet.  Turn "string
			   more string" into "string\n  more string"
			   */
		case '\n':
			(*unget)('n');
			add_newlines++;
			return '\\';
			
		case '"':
		case '\\':
		case 'b':
		case 'f':
		case 'n':
		case 'r':
		case 't':
#ifdef BACKSLASH_V
		case 'v':
#endif /* BACKSLASH_V */
		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
			break;
#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
		default:
			as_warn("Unknown escape '\\%c' in string: Ignored",ch);
			break;
#else /* ONLY_STANDARD_ESCAPES */
		default:
			/* Accept \x as x for any x */
			break;
#endif /* ONLY_STANDARD_ESCAPES */
			
		case EOF:
			as_warn("End of file in string: '\"' inserted");
			return '"';
		}
		return ch;
		
	case 7:
		ch=(*get)();
		state=5;
		old_state=8;
		return ch;
		
	case 8:
		do ch= (*get)();
		while(ch!='\n');
		state=0;
		return ch;
	}

	/* OK, we are somewhere in states 0 through 4 */

/* flushchar: */
	ch=(*get)();
 recycle:
	if (ch == EOF) {
		if (state != 0)
			as_warn("End of file not at end of a line: Newline inserted.");
		return ch;
	}

	switch (lex[ch]) {
	case LEX_IS_WHITESPACE:
		do ch=(*get)();
		while(ch!=EOF && IS_WHITESPACE(ch));
		if(ch==EOF)
			return ch;

		if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
			goto recycle;
		}
#ifdef MRI
		(*unget)(ch); /* Put back */
		return ' '; /* Always return one space at start of line */
#endif
		switch (state) {
		case 0:	state++; goto recycle;	/* Punted leading sp */
		case 1:          BAD_CASE(state); /* We can't get here */
		case 2: state++; (*unget)(ch); return ' ';  /* Sp after opco */
		case 3:		 goto recycle;	/* Sp in operands */
		default:	BAD_CASE(state);
		}
		break;
		
	case LEX_IS_TWOCHAR_COMMENT_1ST:
		ch2=(*get)();
		if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
			for(;;) {
				do {
					ch2=(*get)();
					if(ch2 != EOF && IS_NEWLINE(ch2))
						add_newlines++;
				} while(ch2!=EOF &&
					(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
				
				while (ch2!=EOF &&
				       (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
					ch2=(*get)();
				}
				
				if(ch2==EOF 
				  || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
					break;
				(*unget)(ch);
			}
			if(ch2==EOF)
				as_warn("End of file in multiline comment");

			ch = ' ';
			goto recycle;
		} else {
			if(ch2!=EOF)
			    (*unget)(ch2);
			return ch;
		}
		break;
		
	case LEX_IS_STRINGQUOTE:
		old_state=state;
		state=5;
		return ch;
#ifndef MRI		
#ifndef IEEE_STYLE
	case LEX_IS_ONECHAR_QUOTE:
		ch=(*get)();
		if(ch==EOF) {
			as_warn("End-of-file after a one-character quote; \000 inserted");
			ch=0;
		}
		if (ch == '\\') {
		  ch = (*get)();
		  ch = process_escape(ch);
		}
		sprintf(out_buf,"%d", (int)(unsigned char)ch);

		/* None of these 'x constants for us.  We want 'x'.
		 */
		if ( (ch=(*get)()) != '\'' ) {
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
			as_warn("Missing close quote: (assumed)");
#else
			(*unget)(ch);
#endif
		}
		
		old_state=state;
		state= -1;
		out_string=out_buf;
		return *out_string++;
#endif
#endif
	case LEX_IS_COLON:
		if(state!=3)
			state=0;
		return ch;

	case LEX_IS_NEWLINE:
		/* Roll out a bunch of newlines from inside comments, etc.  */
		if(add_newlines) {
			--add_newlines;
			(*unget)(ch);
		}
		/* fall thru into... */
		
	case LEX_IS_LINE_SEPARATOR:
		state=0;
		return ch;
		
	case LEX_IS_LINE_COMMENT_START:
		if (state != 0)		/* Not at start of line, act normal */
			goto de_fault;

 /* FIXME-someday: The two character comment stuff was badly
    thought out.  On i386, we want '/' as line comment start
    AND we want C style comments.  hence this hack.  The
    whole lexical process should be reworked.  xoxorich.  */

		if (ch == '/' && (ch2 = (*get)()) == '*') {
			state = -2;
			return(do_scrub_next_char(get, unget));
		} else {
			(*unget)(ch2);
		} /* bad hack */
		
		do ch=(*get)();
		while(ch!=EOF && IS_WHITESPACE(ch));
		if(ch==EOF) {
			as_warn("EOF in comment:  Newline inserted");
			return '\n';
		}
		if(ch<'0' || ch>'9') {
			/* Non-numerics:  Eat whole comment line */
			while(ch!=EOF && !IS_NEWLINE(ch))
			    ch=(*get)();
			if(ch==EOF)
			    as_warn("EOF in Comment: Newline inserted");
			state=0;
			return '\n';
		}
		/* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
		(*unget)(ch);
		old_state=4;
		state= -1;
		out_string=".line ";
		return *out_string++;
		
	case LEX_IS_COMMENT_START:
		do ch=(*get)();
		while(ch!=EOF && !IS_NEWLINE(ch));
		if(ch==EOF)
		    as_warn("EOF in comment:  Newline inserted");
		state=0;
		return '\n';
		
	default:
	de_fault:
		/* Some relatively `normal' character.  */
		if(state==0) {
			state=2;	/* Now seeing opcode */
			return ch;
		} else if(state==1) {
			state=2;	/* Ditto */
			return ch;
		} else {
			return ch;	/* Opcode or operands already */
		}
	}
	return -1;
}

#ifdef TEST

char comment_chars[] = "|";
char line_comment_chars[] = "#";

main()
{
	int	ch;
	
	app_begin();
	while((ch=do_scrub_next_char(stdin))!=EOF)
	    putc(ch,stdout);
}

as_warn(str)
char *str;
{
	fputs(str,stderr);
	putc('\n',stderr);
}
#endif

/*
 * Local Variables:
 * comment-column: 0
 * fill-column: 131
 * End:
 */

/* end of app.c */
Commit	Line	Data
3340f7e5	1	/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
a39116f1 RP	2
	3	Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
	4	*/
fecd2382 RP	5	/* This is the Assembler Pre-Processor
fecd2382 RP	6	Copyright (C) 1987 Free Software Foundation, Inc.
a39116f1 RP	7
	8	This file is part of GAS, the GNU Assembler.
	9
	10	GAS is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation; either version 2, or (at your option)
	13	any later version.
	14
	15	GAS is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
	19
	20	You should have received a copy of the GNU General Public License
	21	along with GAS; see the file COPYING. If not, write to
	22	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382 RP	23
	24	/* App, the assembler pre-processor. This pre-processor strips out excess
	25	spaces, turns single-quoted characters into a decimal constant, and turns
	26	# <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
	27	This needs better error-handling.
a39116f1	28	*/
fecd2382 RP	29
	30	#include <stdio.h>
	31	#include "as.h" /* For BAD_CASE() only */
	32
3340f7e5	33	#if (__STDC__ != 1) && !defined(const)
fecd2382 RP	34	#define const /* Nothing */
	35	#endif
	36
	37	static char lex [256];
	38	static char symbol_chars[] =
a39116f1	39	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382 RP	40
	41	/* These will go in BSS if not defined elsewhere, producing empty strings. */
	42	extern const char comment_chars[];
	43	extern const char line_comment_chars[];
	44	extern const char line_separator_chars[];
	45
	46	#define LEX_IS_SYMBOL_COMPONENT 1
	47	#define LEX_IS_WHITESPACE 2
	48	#define LEX_IS_LINE_SEPARATOR 3
	49	#define LEX_IS_COMMENT_START 4
	50	#define LEX_IS_LINE_COMMENT_START 5
	51	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
	52	#define LEX_IS_TWOCHAR_COMMENT_2ND 7
	53	#define LEX_IS_STRINGQUOTE 8
	54	#define LEX_IS_COLON 9
	55	#define LEX_IS_NEWLINE 10
	56	#define LEX_IS_ONECHAR_QUOTE 11
a39116f1 RP	57	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
	58	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
	59	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
	60	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
	61	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
	62	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
	63
	64	/* FIXME-soon: The entire lexer/parser thingy should be
	65	built statically at compile time rather than dynamically
	66	each and every time the assembler is run. xoxorich. */
fecd2382 RP	67
	68	void do_scrub_begin() {
	69	const char *p;
a39116f1 RP	70
	71	lex[' '] = LEX_IS_WHITESPACE;
	72	lex['\t'] = LEX_IS_WHITESPACE;
	73	lex['\n'] = LEX_IS_NEWLINE;
	74	lex[';'] = LEX_IS_LINE_SEPARATOR;
	75	lex['"'] = LEX_IS_STRINGQUOTE;
	76	lex['\''] = LEX_IS_ONECHAR_QUOTE;
	77	lex[':'] = LEX_IS_COLON;
7c2d4011 SC	78
	79	#ifdef MRI
	80	lex['\''] = LEX_IS_STRINGQUOTE;
	81	#endif
fecd2382 RP	82	/* Note that these override the previous defaults, e.g. if ';'
fecd2382 RP	83	is a comment char, then it isn't a line separator. */
a39116f1	84	for (p = symbol_chars; *p; ++p) {
fecd2382	85	lex[*p] = LEX_IS_SYMBOL_COMPONENT;
a39116f1 RP	86	} /* declare symbol characters */
	87
	88	for (p = line_comment_chars; *p; p++) {
fecd2382	89	lex[*p] = LEX_IS_LINE_COMMENT_START;
a39116f1 RP	90	} /* declare line comment chars */
	91
	92	for (p = comment_chars; *p; p++) {
	93	lex[*p] = LEX_IS_COMMENT_START;
	94	} /* declare comment chars */
	95
	96	for (p = line_separator_chars; *p; p++) {
fecd2382	97	lex[*p] = LEX_IS_LINE_SEPARATOR;
a39116f1 RP	98	} /* declare line separators */
a39116f1 RP	99
fecd2382 RP	100	/* Only allow slash-star comments if slash is not in use */
fecd2382 RP	101	if (lex['/'] == 0) {
a39116f1	102	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
fecd2382	103	}
a39116f1 RP	104	/* FIXME-soon. This is a bad hack but otherwise, we
	105	can't do c-style comments when '/' is a line
	106	comment char. xoxorich. */
	107	if (lex['*'] == 0) {
	108	lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	109	}
	110	} /* do_scrub_begin() */
fecd2382 RP	111
	112	FILE *scrub_file;
	113
	114	int scrub_from_file() {
	115	return getc(scrub_file);
	116	}
	117
	118	void scrub_to_file(ch)
	119	int ch;
	120	{
	121	ungetc(ch,scrub_file);
	122	} /* scrub_to_file() */
	123
	124	char *scrub_string;
	125	char *scrub_last_string;
	126
	127	int scrub_from_string() {
	128	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
	129	} /* scrub_from_string() */
	130
	131	void scrub_to_string(ch)
	132	int ch;
	133	{
	134	*--scrub_string=ch;
	135	} /* scrub_to_string() */
	136
	137	/* Saved state of the scrubber */
	138	static int state;
	139	static int old_state;
	140	static char *out_string;
	141	static char out_buf[20];
	142	static int add_newlines = 0;
	143
	144	/* Data structure for saving the state of app across #include's. Note that
	145	app is called asynchronously to the parsing of the .include's, so our
	146	state at the time .include is interpreted is completely unrelated.
	147	That's why we have to save it all. */
	148
	149	struct app_save {
a39116f1 RP	150	int state;
	151	int old_state;
	152	char *out_string;
	153	char out_buf[sizeof (out_buf)];
	154	int add_newlines;
	155	char *scrub_string;
	156	char *scrub_last_string;
	157	FILE *scrub_file;
fecd2382 RP	158	};
	159
	160	char *app_push() {
7c2d4011 SC	161	register struct app_save *saved;
	162
	163	saved = (struct app_save ) xmalloc(sizeof (saved));
	164	saved->state = state;
	165	saved->old_state = old_state;
	166	saved->out_string = out_string;
	167	bcopy(saved->out_buf, out_buf, sizeof(out_buf));
	168	saved->add_newlines = add_newlines;
	169	saved->scrub_string = scrub_string;
	170	saved->scrub_last_string = scrub_last_string;
	171	saved->scrub_file = scrub_file;
	172
	173	/* do_scrub_begin() is not useful, just wastes time. */
	174	return (char *)saved;
fecd2382 RP	175	}
	176
	177	void app_pop(arg)
	178	char *arg;
	179	{
a39116f1 RP	180	register struct app_save saved = (struct app_save )arg;
	181
	182	/* There is no do_scrub_end (). */
	183	state = saved->state;
	184	old_state = saved->old_state;
	185	out_string = saved->out_string;
7c2d4011	186	memcpy(saved->out_buf, out_buf, sizeof (out_buf));
a39116f1 RP	187	add_newlines = saved->add_newlines;
	188	scrub_string = saved->scrub_string;
	189	scrub_last_string = saved->scrub_last_string;
	190	scrub_file = saved->scrub_file;
	191
	192	free (arg);
fecd2382 RP	193	} /* app_pop() */
fecd2382 RP	194
7c2d4011 SC	195	int process_escape(ch)
	196	char ch;
	197	{
	198	switch (ch)
	199	{
	200	case 'b':
	201	return '\b';
	202	case 'f':
	203	return '\f';
	204	case 'n':
	205	return '\n';
	206	case 'r':
	207	return '\r';
	208	case 't':
	209	return '\t';
	210	case '\'':
	211	return '\'';
	212	case '"':
	213	return '\'';
	214	default:
	215	return ch;
	216	}
	217	}
fecd2382 RP	218	int do_scrub_next_char(get,unget)
	219	int (*get)();
	220	void (*unget)();
	221	{
	222	/*State 0: beginning of normal line
a39116f1 RP	223	1: After first whitespace on line (flush more white)
	224	2: After first non-white (opcode) on line (keep 1white)
	225	3: after second white on line (into operands) (flush white)
	226	4: after putting out a .line, put out digits
	227	5: parsing a string, then go to old-state
	228	6: putting out \ escape in a "d string.
	229	7: After putting out a .app-file, put out string.
	230	8: After putting out a .app-file string, flush until newline.
	231	-1: output string in out_string and go to the state in old_state
	232	-2: flush text until a '*' '/' is seen, then go to state old_state
	233	*/
	234
	235	register int ch, ch2 = 0;
	236
fecd2382 RP	237	switch (state) {
	238	case -1:
	239	ch= *out_string++;
	240	if(*out_string==0) {
	241	state=old_state;
	242	old_state=3;
	243	}
	244	return ch;
a39116f1	245
fecd2382 RP	246	case -2:
	247	for(;;) {
	248	do {
	249	ch=(*get)();
	250	} while(ch!=EOF && ch!='\n' && ch!='*');
	251	if(ch=='\n' \|\| ch==EOF)
a39116f1 RP	252	return ch;
a39116f1 RP	253
fecd2382 RP	254	/* At this point, ch must be a '' /
	255	while ( (ch=(get)()) == '' ){
	256	;
	257	}
	258	if(ch==EOF \|\| ch=='/')
a39116f1	259	break;
fecd2382 RP	260	(*unget)(ch);
	261	}
	262	state=old_state;
	263	return ' ';
a39116f1	264
fecd2382 RP	265	case 4:
	266	ch=(*get)();
	267	if(ch==EOF \|\| (ch>='0' && ch<='9'))
a39116f1	268	return ch;
fecd2382 RP	269	else {
fecd2382 RP	270	while(ch!=EOF && IS_WHITESPACE(ch))
a39116f1	271	ch=(*get)();
fecd2382 RP	272	if(ch=='"') {
	273	(*unget)(ch);
	274	out_string="\n.app-file ";
	275	old_state=7;
	276	state= -1;
	277	return *out_string++;
	278	} else {
	279	while(ch!=EOF && ch!='\n')
a39116f1	280	ch=(*get)();
fecd2382 RP	281	return ch;
	282	}
	283	}
a39116f1	284
fecd2382 RP	285	case 5:
fecd2382 RP	286	ch=(*get)();
7c2d4011	287	if(lex[ch]==LEX_IS_STRINGQUOTE) {
fecd2382	288	state=old_state;
7c2d4011	289	return ch;
fecd2382 RP	290	} else if(ch=='\\') {
	291	state=6;
	292	return ch;
	293	} else if(ch==EOF) {
	294	as_warn("End of file in string: inserted '\"'");
	295	state=old_state;
	296	(*unget)('\n');
	297	return '"';
	298	} else {
	299	return ch;
	300	}
a39116f1	301
fecd2382 RP	302	case 6:
	303	state=5;
	304	ch=(*get)();
	305	switch(ch) {
	306	/* This is neet. Turn "string
	307	more string" into "string\n more string"
a39116f1	308	*/
fecd2382 RP	309	case '\n':
	310	(*unget)('n');
	311	add_newlines++;
	312	return '\\';
a39116f1	313
fecd2382 RP	314	case '"':
	315	case '\\':
	316	case 'b':
	317	case 'f':
	318	case 'n':
	319	case 'r':
	320	case 't':
	321	#ifdef BACKSLASH_V
	322	case 'v':
	323	#endif /* BACKSLASH_V */
	324	case '0':
	325	case '1':
	326	case '2':
	327	case '3':
	328	case '4':
	329	case '5':
	330	case '6':
	331	case '7':
	332	break;
7c2d4011	333	#if defined(IGNORE_NONSTANDARD_ESCAPES) \| defined(ONLY_STANDARD_ESCAPES)
fecd2382 RP	334	default:
	335	as_warn("Unknown escape '\\%c' in string: Ignored",ch);
	336	break;
	337	#else /* ONLY_STANDARD_ESCAPES */
	338	default:
	339	/* Accept \x as x for any x */
	340	break;
	341	#endif /* ONLY_STANDARD_ESCAPES */
a39116f1	342
fecd2382 RP	343	case EOF:
	344	as_warn("End of file in string: '\"' inserted");
	345	return '"';
	346	}
	347	return ch;
a39116f1	348
fecd2382 RP	349	case 7:
	350	ch=(*get)();
	351	state=5;
	352	old_state=8;
	353	return ch;
a39116f1	354
fecd2382 RP	355	case 8:
	356	do ch= (*get)();
	357	while(ch!='\n');
	358	state=0;
	359	return ch;
	360	}
7c2d4011	361
fecd2382	362	/* OK, we are somewhere in states 0 through 4 */
7c2d4011 SC	363
7c2d4011 SC	364	/* flushchar: */
fecd2382 RP	365	ch=(*get)();
	366	recycle:
	367	if (ch == EOF) {
	368	if (state != 0)
7c2d4011	369	as_warn("End of file not at end of a line: Newline inserted.");
fecd2382 RP	370	return ch;
fecd2382 RP	371	}
7c2d4011	372
fecd2382 RP	373	switch (lex[ch]) {
	374	case LEX_IS_WHITESPACE:
	375	do ch=(*get)();
	376	while(ch!=EOF && IS_WHITESPACE(ch));
	377	if(ch==EOF)
7c2d4011 SC	378	return ch;
7c2d4011 SC	379
fecd2382 RP	380	if(IS_COMMENT(ch) \|\| (state==0 && IS_LINE_COMMENT(ch)) \|\| ch=='/' \|\| IS_LINE_SEPARATOR(ch)) {
	381	goto recycle;
	382	}
7c2d4011 SC	383	#ifdef MRI
	384	(unget)(ch); / Put back */
	385	return ' '; /* Always return one space at start of line */
	386	#endif
fecd2382 RP	387	switch (state) {
	388	case 0: state++; goto recycle; /* Punted leading sp */
	389	case 1: BAD_CASE(state); /* We can't get here */
	390	case 2: state++; (unget)(ch); return ' '; / Sp after opco */
	391	case 3: goto recycle; /* Sp in operands */
	392	default: BAD_CASE(state);
	393	}
	394	break;
a39116f1	395
fecd2382 RP	396	case LEX_IS_TWOCHAR_COMMENT_1ST:
	397	ch2=(*get)();
	398	if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
	399	for(;;) {
	400	do {
	401	ch2=(*get)();
	402	if(ch2 != EOF && IS_NEWLINE(ch2))
7c2d4011	403	add_newlines++;
fecd2382	404	} while(ch2!=EOF &&
a39116f1 RP	405	(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
a39116f1 RP	406
fecd2382	407	while (ch2!=EOF &&
a39116f1	408	(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
fecd2382 RP	409	ch2=(*get)();
fecd2382 RP	410	}
a39116f1	411
fecd2382	412	if(ch2==EOF
7c2d4011 SC	413	\|\| lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
7c2d4011 SC	414	break;
fecd2382 RP	415	(*unget)(ch);
	416	}
	417	if(ch2==EOF)
7c2d4011 SC	418	as_warn("End of file in multiline comment");
7c2d4011 SC	419
fecd2382 RP	420	ch = ' ';
	421	goto recycle;
	422	} else {
	423	if(ch2!=EOF)
a39116f1	424	(*unget)(ch2);
fecd2382 RP	425	return ch;
	426	}
	427	break;
a39116f1	428
fecd2382 RP	429	case LEX_IS_STRINGQUOTE:
	430	old_state=state;
	431	state=5;
	432	return ch;
7c2d4011	433	#ifndef MRI
a39116f1	434	#ifndef IEEE_STYLE
fecd2382 RP	435	case LEX_IS_ONECHAR_QUOTE:
	436	ch=(*get)();
	437	if(ch==EOF) {
	438	as_warn("End-of-file after a one-character quote; \000 inserted");
	439	ch=0;
	440	}
7c2d4011 SC	441	if (ch == '\\') {
	442	ch = (*get)();
	443	ch = process_escape(ch);
	444	}
fecd2382	445	sprintf(out_buf,"%d", (int)(unsigned char)ch);
7c2d4011	446
fecd2382 RP	447	/* None of these 'x constants for us. We want 'x'.
	448	*/
	449	if ( (ch=(*get)()) != '\'' ) {
	450	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	451	as_warn("Missing close quote: (assumed)");
	452	#else
	453	(*unget)(ch);
	454	#endif
	455	}
a39116f1	456
fecd2382 RP	457	old_state=state;
	458	state= -1;
	459	out_string=out_buf;
	460	return *out_string++;
7c2d4011	461	#endif
a39116f1	462	#endif
fecd2382 RP	463	case LEX_IS_COLON:
fecd2382 RP	464	if(state!=3)
7c2d4011	465	state=0;
fecd2382	466	return ch;
7c2d4011	467
fecd2382 RP	468	case LEX_IS_NEWLINE:
	469	/* Roll out a bunch of newlines from inside comments, etc. */
	470	if(add_newlines) {
	471	--add_newlines;
	472	(*unget)(ch);
	473	}
	474	/* fall thru into... */
a39116f1	475
fecd2382 RP	476	case LEX_IS_LINE_SEPARATOR:
	477	state=0;
	478	return ch;
a39116f1	479
fecd2382 RP	480	case LEX_IS_LINE_COMMENT_START:
fecd2382 RP	481	if (state != 0) /* Not at start of line, act normal */
7c2d4011 SC	482	goto de_fault;
	483
	484	/* FIXME-someday: The two character comment stuff was badly
	485	thought out. On i386, we want '/' as line comment start
	486	AND we want C style comments. hence this hack. The
	487	whole lexical process should be reworked. xoxorich. */
	488
a39116f1 RP	489	if (ch == '/' && (ch2 = (get)()) == '') {
	490	state = -2;
	491	return(do_scrub_next_char(get, unget));
	492	} else {
	493	(*unget)(ch2);
	494	} /* bad hack */
	495
fecd2382 RP	496	do ch=(*get)();
	497	while(ch!=EOF && IS_WHITESPACE(ch));
	498	if(ch==EOF) {
	499	as_warn("EOF in comment: Newline inserted");
	500	return '\n';
	501	}
	502	if(ch<'0' \|\| ch>'9') {
	503	/* Non-numerics: Eat whole comment line */
	504	while(ch!=EOF && !IS_NEWLINE(ch))
a39116f1	505	ch=(*get)();
fecd2382	506	if(ch==EOF)
a39116f1	507	as_warn("EOF in Comment: Newline inserted");
fecd2382 RP	508	state=0;
	509	return '\n';
	510	}
	511	/* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
	512	(*unget)(ch);
	513	old_state=4;
	514	state= -1;
	515	out_string=".line ";
	516	return *out_string++;
a39116f1	517
fecd2382 RP	518	case LEX_IS_COMMENT_START:
	519	do ch=(*get)();
	520	while(ch!=EOF && !IS_NEWLINE(ch));
	521	if(ch==EOF)
a39116f1	522	as_warn("EOF in comment: Newline inserted");
fecd2382 RP	523	state=0;
fecd2382 RP	524	return '\n';
a39116f1	525
fecd2382 RP	526	default:
	527	de_fault:
	528	/* Some relatively `normal' character. */
	529	if(state==0) {
	530	state=2; /* Now seeing opcode */
	531	return ch;
	532	} else if(state==1) {
	533	state=2; /* Ditto */
	534	return ch;
	535	} else {
	536	return ch; /* Opcode or operands already */
	537	}
	538	}
	539	return -1;
	540	}
	541
	542	#ifdef TEST
	543
	544	char comment_chars[] = "\|";
	545	char line_comment_chars[] = "#";
	546
	547	main()
	548	{
	549	int ch;
a39116f1	550
fecd2382 RP	551	app_begin();
fecd2382 RP	552	while((ch=do_scrub_next_char(stdin))!=EOF)
a39116f1	553	putc(ch,stdout);
fecd2382 RP	554	}
	555
	556	as_warn(str)
	557	char *str;
	558	{
	559	fputs(str,stderr);
	560	putc('\n',stderr);
	561	}
	562	#endif
	563
	564	/*
	565	* Local Variables:
	566	* comment-column: 0
	567	* fill-column: 131
	568	* End:
	569	*/
	570
	571	/* end of app.c */