1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
8 This file is part of GAS, the GNU Assembler.
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
27 This needs better error-handling.
31 #include "as.h" /* For BAD_CASE() only */
34 #if (__STDC__ != 1) && !defined(const)
35 #define const /* Nothing */
39 static char symbol_chars
[] =
40 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
42 #define LEX_IS_SYMBOL_COMPONENT 1
43 #define LEX_IS_WHITESPACE 2
44 #define LEX_IS_LINE_SEPARATOR 3
45 #define LEX_IS_COMMENT_START 4
46 #define LEX_IS_LINE_COMMENT_START 5
47 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
48 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
49 #define LEX_IS_STRINGQUOTE 8
50 #define LEX_IS_COLON 9
51 #define LEX_IS_NEWLINE 10
52 #define LEX_IS_ONECHAR_QUOTE 11
53 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
54 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
55 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
56 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
57 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
58 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
60 /* FIXME-soon: The entire lexer/parser thingy should be
61 built statically at compile time rather than dynamically
62 each and every time the assembler is run. xoxorich. */
69 lex
[' '] = LEX_IS_WHITESPACE
;
70 lex
['\t'] = LEX_IS_WHITESPACE
;
71 lex
['\n'] = LEX_IS_NEWLINE
;
72 lex
[';'] = LEX_IS_LINE_SEPARATOR
;
73 lex
['"'] = LEX_IS_STRINGQUOTE
;
74 lex
['\''] = LEX_IS_ONECHAR_QUOTE
;
75 lex
[':'] = LEX_IS_COLON
;
78 lex
['\''] = LEX_IS_STRINGQUOTE
;
80 /* Note that these override the previous defaults, e.g. if ';'
81 is a comment char, then it isn't a line separator. */
82 for (p
= symbol_chars
; *p
; ++p
)
84 lex
[*p
] = LEX_IS_SYMBOL_COMPONENT
;
85 } /* declare symbol characters */
87 for (p
= line_comment_chars
; *p
; p
++)
89 lex
[*p
] = LEX_IS_LINE_COMMENT_START
;
90 } /* declare line comment chars */
92 for (p
= comment_chars
; *p
; p
++)
94 lex
[*p
] = LEX_IS_COMMENT_START
;
95 } /* declare comment chars */
97 for (p
= line_separator_chars
; *p
; p
++)
99 lex
[*p
] = LEX_IS_LINE_SEPARATOR
;
100 } /* declare line separators */
102 /* Only allow slash-star comments if slash is not in use */
105 lex
['/'] = LEX_IS_TWOCHAR_COMMENT_1ST
;
107 /* FIXME-soon. This is a bad hack but otherwise, we
108 can't do c-style comments when '/' is a line
109 comment char. xoxorich. */
112 lex
['*'] = LEX_IS_TWOCHAR_COMMENT_2ND
;
114 } /* do_scrub_begin() */
121 return getc (scrub_file
);
128 ungetc (ch
, scrub_file
);
129 } /* scrub_to_file() */
132 char *scrub_last_string
;
137 return scrub_string
== scrub_last_string
? EOF
: *scrub_string
++;
138 } /* scrub_from_string() */
144 *--scrub_string
= ch
;
145 } /* scrub_to_string() */
147 /* Saved state of the scrubber */
149 static int old_state
;
150 static char *out_string
;
151 static char out_buf
[20];
152 static int add_newlines
= 0;
154 /* Data structure for saving the state of app across #include's. Note that
155 app is called asynchronously to the parsing of the .include's, so our
156 state at the time .include is interpreted is completely unrelated.
157 That's why we have to save it all. */
164 char out_buf
[sizeof (out_buf
)];
167 char *scrub_last_string
;
174 register struct app_save
*saved
;
176 saved
= (struct app_save
*) xmalloc (sizeof (*saved
));
177 saved
->state
= state
;
178 saved
->old_state
= old_state
;
179 saved
->out_string
= out_string
;
180 bcopy (saved
->out_buf
, out_buf
, sizeof (out_buf
));
181 saved
->add_newlines
= add_newlines
;
182 saved
->scrub_string
= scrub_string
;
183 saved
->scrub_last_string
= scrub_last_string
;
184 saved
->scrub_file
= scrub_file
;
186 /* do_scrub_begin() is not useful, just wastes time. */
187 return (char *) saved
;
194 register struct app_save
*saved
= (struct app_save
*) arg
;
196 /* There is no do_scrub_end (). */
197 state
= saved
->state
;
198 old_state
= saved
->old_state
;
199 out_string
= saved
->out_string
;
200 memcpy (saved
->out_buf
, out_buf
, sizeof (out_buf
));
201 add_newlines
= saved
->add_newlines
;
202 scrub_string
= saved
->scrub_string
;
203 scrub_last_string
= saved
->scrub_last_string
;
204 scrub_file
= saved
->scrub_file
;
234 do_scrub_next_char (get
, unget
)
238 /*State 0: beginning of normal line
239 1: After first whitespace on line (flush more white)
240 2: After first non-white (opcode) on line (keep 1white)
241 3: after second white on line (into operands) (flush white)
242 4: after putting out a .line, put out digits
243 5: parsing a string, then go to old-state
244 6: putting out \ escape in a "d string.
245 7: After putting out a .app-file, put out string.
246 8: After putting out a .app-file string, flush until newline.
247 -1: output string in out_string and go to the state in old_state
248 -2: flush text until a '*' '/' is seen, then go to state old_state
251 register int ch
, ch2
= 0;
257 if (*out_string
== 0)
271 while (ch
!= EOF
&& ch
!= '\n' && ch
!= '*');
272 if (ch
== '\n' || ch
== EOF
)
275 /* At this point, ch must be a '*' */
276 while ((ch
= (*get
) ()) == '*')
280 if (ch
== EOF
|| ch
== '/')
289 if (ch
== EOF
|| (ch
>= '0' && ch
<= '9'))
293 while (ch
!= EOF
&& IS_WHITESPACE (ch
))
298 out_string
= "\n.app-file ";
301 return *out_string
++;
305 while (ch
!= EOF
&& ch
!= '\n')
313 if (lex
[ch
] == LEX_IS_STRINGQUOTE
)
325 as_warn ("End of file in string: inserted '\"'");
340 /* This is neet. Turn "string
341 more string" into "string\n more string"
357 #endif /* BACKSLASH_V */
367 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
369 as_warn ("Unknown escape '\\%c' in string: Ignored", ch
);
371 #else /* ONLY_STANDARD_ESCAPES */
373 /* Accept \x as x for any x */
375 #endif /* ONLY_STANDARD_ESCAPES */
378 as_warn ("End of file in string: '\"' inserted");
397 /* OK, we are somewhere in states 0 through 4 */
405 as_warn ("End of file not at end of a line: Newline inserted.");
411 case LEX_IS_WHITESPACE
:
414 while (ch
!= EOF
&& IS_WHITESPACE (ch
));
418 if (IS_COMMENT (ch
) || (state
== 0 && IS_LINE_COMMENT (ch
)) || ch
== '/' || IS_LINE_SEPARATOR (ch
))
423 (*unget
) (ch
); /* Put back */
424 return ' '; /* Always return one space at start of line */
427 /* If we're in state 2, we've seen a non-white
428 character followed by whitespace. If the next
429 character is ':', this is whitespace after a label
430 name which we can ignore. */
431 if (state
== 2 && lex
[ch
] == LEX_IS_COLON
)
441 goto recycle
; /* Punted leading sp */
443 BAD_CASE (state
); /* We can't get here */
447 return ' '; /* Sp after opco */
449 goto recycle
; /* Sp in operands */
455 case LEX_IS_TWOCHAR_COMMENT_1ST
:
457 if (ch2
!= EOF
&& lex
[ch2
] == LEX_IS_TWOCHAR_COMMENT_2ND
)
464 if (ch2
!= EOF
&& IS_NEWLINE (ch2
))
468 (lex
[ch2
] != LEX_IS_TWOCHAR_COMMENT_2ND
));
471 (lex
[ch2
] == LEX_IS_TWOCHAR_COMMENT_2ND
))
477 || lex
[ch2
] == LEX_IS_TWOCHAR_COMMENT_1ST
)
482 as_warn ("End of file in multiline comment");
495 case LEX_IS_STRINGQUOTE
:
501 case LEX_IS_ONECHAR_QUOTE
:
505 as_warn ("End-of-file after a one-character quote; \\000 inserted");
511 ch
= process_escape (ch
);
513 sprintf (out_buf
, "%d", (int) (unsigned char) ch
);
516 /* None of these 'x constants for us. We want 'x'.
518 if ((ch
= (*get
) ()) != '\'')
520 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
521 as_warn ("Missing close quote: (assumed)");
526 if (strlen (out_buf
) == 1)
532 out_string
= out_buf
;
533 return *out_string
++;
542 /* Roll out a bunch of newlines from inside comments, etc. */
548 /* fall thru into... */
550 case LEX_IS_LINE_SEPARATOR
:
554 case LEX_IS_LINE_COMMENT_START
:
555 if (state
!= 0) /* Not at start of line, act normal */
558 /* FIXME-someday: The two character comment stuff was badly
559 thought out. On i386, we want '/' as line comment start
560 AND we want C style comments. hence this hack. The
561 whole lexical process should be reworked. xoxorich. */
563 if (ch
== '/' && (ch2
= (*get
) ()) == '*')
566 return (do_scrub_next_char (get
, unget
));
575 while (ch
!= EOF
&& IS_WHITESPACE (ch
));
578 as_warn ("EOF in comment: Newline inserted");
581 if (ch
< '0' || ch
> '9')
583 /* Non-numerics: Eat whole comment line */
584 while (ch
!= EOF
&& !IS_NEWLINE (ch
))
587 as_warn ("EOF in Comment: Newline inserted");
591 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
595 out_string
= ".line ";
596 return *out_string
++;
598 case LEX_IS_COMMENT_START
:
601 while (ch
!= EOF
&& !IS_NEWLINE (ch
));
603 as_warn ("EOF in comment: Newline inserted");
609 /* Some relatively `normal' character. */
612 state
= 2; /* Now seeing opcode */
617 state
= 2; /* Ditto */
622 return ch
; /* Opcode or operands already */
630 const char comment_chars
[] = "|";
631 const char line_comment_chars
[] = "#";
638 while ((ch
= do_scrub_next_char (stdin
)) != EOF
)