* configure.in: Change i[34]86 to i[345]86.
[deliverable/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21 /* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling.
25 */
26
27 #include <stdio.h>
28 #include "as.h" /* For BAD_CASE() only */
29
30 #if (__STDC__ != 1)
31 #ifndef const
32 #define const /* empty */
33 #endif
34 #endif
35
36 static char lex[256];
37 static const char symbol_chars[] =
38 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
39
40 #define LEX_IS_SYMBOL_COMPONENT 1
41 #define LEX_IS_WHITESPACE 2
42 #define LEX_IS_LINE_SEPARATOR 3
43 #define LEX_IS_COMMENT_START 4
44 #define LEX_IS_LINE_COMMENT_START 5
45 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
46 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
47 #define LEX_IS_STRINGQUOTE 8
48 #define LEX_IS_COLON 9
49 #define LEX_IS_NEWLINE 10
50 #define LEX_IS_ONECHAR_QUOTE 11
51 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
52 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
53 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
54 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
55 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
56 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
57
58 static int process_escape PARAMS ((int));
59
60 /* FIXME-soon: The entire lexer/parser thingy should be
61 built statically at compile time rather than dynamically
62 each and every time the assembler is run. xoxorich. */
63
64 void
65 do_scrub_begin ()
66 {
67 const char *p;
68
69 lex[' '] = LEX_IS_WHITESPACE;
70 lex['\t'] = LEX_IS_WHITESPACE;
71 lex['\n'] = LEX_IS_NEWLINE;
72 lex[';'] = LEX_IS_LINE_SEPARATOR;
73 lex['"'] = LEX_IS_STRINGQUOTE;
74 #ifndef TC_HPPA
75 lex['\''] = LEX_IS_ONECHAR_QUOTE;
76 #endif
77 lex[':'] = LEX_IS_COLON;
78
79
80
81 #ifdef SINGLE_QUOTE_STRINGS
82 lex['\''] = LEX_IS_STRINGQUOTE;
83 #endif
84
85 /* Note that these override the previous defaults, e.g. if ';'
86
87 is a comment char, then it isn't a line separator. */
88 for (p = symbol_chars; *p; ++p)
89 {
90 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
91 } /* declare symbol characters */
92
93 for (p = comment_chars; *p; p++)
94 {
95 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
96 } /* declare comment chars */
97
98 for (p = line_comment_chars; *p; p++)
99 {
100 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
101 } /* declare line comment chars */
102
103 for (p = line_separator_chars; *p; p++)
104 {
105 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
106 } /* declare line separators */
107
108 /* Only allow slash-star comments if slash is not in use */
109 if (lex['/'] == 0)
110 {
111 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
112 }
113 /* FIXME-soon. This is a bad hack but otherwise, we
114 can't do c-style comments when '/' is a line
115 comment char. xoxorich. */
116 if (lex['*'] == 0)
117 {
118 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
119 }
120 } /* do_scrub_begin() */
121
122 FILE *scrub_file;
123
124 int
125 scrub_from_file ()
126 {
127 return getc (scrub_file);
128 }
129
130 void
131 scrub_to_file (ch)
132 int ch;
133 {
134 ungetc (ch, scrub_file);
135 } /* scrub_to_file() */
136
137 char *scrub_string;
138 char *scrub_last_string;
139
140 int
141 scrub_from_string ()
142 {
143 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
144 } /* scrub_from_string() */
145
146 void
147 scrub_to_string (ch)
148 int ch;
149 {
150 *--scrub_string = ch;
151 } /* scrub_to_string() */
152
153 /* Saved state of the scrubber */
154 static int state;
155 static int old_state;
156 static char *out_string;
157 static char out_buf[20];
158 static int add_newlines = 0;
159
160 /* Data structure for saving the state of app across #include's. Note that
161 app is called asynchronously to the parsing of the .include's, so our
162 state at the time .include is interpreted is completely unrelated.
163 That's why we have to save it all. */
164
165 struct app_save
166 {
167 int state;
168 int old_state;
169 char *out_string;
170 char out_buf[sizeof (out_buf)];
171 int add_newlines;
172 char *scrub_string;
173 char *scrub_last_string;
174 FILE *scrub_file;
175 };
176
177 char *
178 app_push ()
179 {
180 register struct app_save *saved;
181
182 saved = (struct app_save *) xmalloc (sizeof (*saved));
183 saved->state = state;
184 saved->old_state = old_state;
185 saved->out_string = out_string;
186 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
187 saved->add_newlines = add_newlines;
188 saved->scrub_string = scrub_string;
189 saved->scrub_last_string = scrub_last_string;
190 saved->scrub_file = scrub_file;
191
192 /* do_scrub_begin() is not useful, just wastes time. */
193 return (char *) saved;
194 }
195
196 void
197 app_pop (arg)
198 char *arg;
199 {
200 register struct app_save *saved = (struct app_save *) arg;
201
202 /* There is no do_scrub_end (). */
203 state = saved->state;
204 old_state = saved->old_state;
205 out_string = saved->out_string;
206 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
207 add_newlines = saved->add_newlines;
208 scrub_string = saved->scrub_string;
209 scrub_last_string = saved->scrub_last_string;
210 scrub_file = saved->scrub_file;
211
212 free (arg);
213 } /* app_pop() */
214
215 /* @@ This assumes that \n &c are the same on host and target. This is not
216 necessarily true. */
217 static int
218 process_escape (ch)
219 int ch;
220 {
221 switch (ch)
222 {
223 case 'b':
224 return '\b';
225 case 'f':
226 return '\f';
227 case 'n':
228 return '\n';
229 case 'r':
230 return '\r';
231 case 't':
232 return '\t';
233 case '\'':
234 return '\'';
235 case '"':
236 return '\"';
237 default:
238 return ch;
239 }
240 }
241 int
242 do_scrub_next_char (get, unget)
243 int (*get) ();
244 void (*unget) ();
245 {
246 /*State 0: beginning of normal line
247 1: After first whitespace on line (flush more white)
248 2: After first non-white (opcode) on line (keep 1white)
249 3: after second white on line (into operands) (flush white)
250 4: after putting out a .line, put out digits
251 5: parsing a string, then go to old-state
252 6: putting out \ escape in a "d string.
253 7: After putting out a .appfile, put out string.
254 8: After putting out a .appfile string, flush until newline.
255 9: After seeing symbol char in state 3 (keep 1white after symchar)
256 10: After seeing whitespace in state 9 (keep white before symchar)
257 -1: output string in out_string and go to the state in old_state
258 -2: flush text until a '*' '/' is seen, then go to state old_state
259 */
260
261 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
262 constructs like ``.loc 1 20''. This was turning into ``.loc
263 120''. States 9 and 10 ensure that a space is never dropped in
264 between characters which could appear in a identifier. Ian
265 Taylor, ian@cygnus.com. */
266
267 register int ch, ch2 = 0;
268 int not_cpp_line = 0;
269
270 switch (state)
271 {
272 case -1:
273 ch = *out_string++;
274 if (*out_string == 0)
275 {
276 state = old_state;
277 old_state = 3;
278 }
279 return ch;
280
281 case -2:
282 for (;;)
283 {
284 do
285 {
286 ch = (*get) ();
287 }
288 while (ch != EOF && ch != '\n' && ch != '*');
289 if (ch == '\n' || ch == EOF)
290 return ch;
291
292 /* At this point, ch must be a '*' */
293 while ((ch = (*get) ()) == '*')
294 {
295 ;
296 }
297 if (ch == EOF || ch == '/')
298 break;
299 (*unget) (ch);
300 }
301 state = old_state;
302 return ' ';
303
304 case 4:
305 ch = (*get) ();
306 if (ch == EOF || (ch >= '0' && ch <= '9'))
307 return ch;
308 else
309 {
310 while (ch != EOF && IS_WHITESPACE (ch))
311 ch = (*get) ();
312 if (ch == '"')
313 {
314 (*unget) (ch);
315 out_string = "\n\t.appfile ";
316 old_state = 7;
317 state = -1;
318 return *out_string++;
319 }
320 else
321 {
322 while (ch != EOF && ch != '\n')
323 ch = (*get) ();
324 state = 0;
325 return ch;
326 }
327 }
328
329 case 5:
330 ch = (*get) ();
331 if (lex[ch] == LEX_IS_STRINGQUOTE)
332 {
333 state = old_state;
334 return ch;
335 }
336 else if (ch == '\\')
337 {
338 state = 6;
339 return ch;
340 }
341 else if (ch == EOF)
342 {
343 as_warn ("End of file in string: inserted '\"'");
344 state = old_state;
345 (*unget) ('\n');
346 return '"';
347 }
348 else
349 {
350 return ch;
351 }
352
353 case 6:
354 state = 5;
355 ch = (*get) ();
356 switch (ch)
357 {
358 /* Handle strings broken across lines, by turning '\n' into
359 '\\' and 'n'. */
360 case '\n':
361 (*unget) ('n');
362 add_newlines++;
363 return '\\';
364
365 case '"':
366 case '\\':
367 case 'b':
368 case 'f':
369 case 'n':
370 case 'r':
371 case 't':
372 #ifdef BACKSLASH_V
373 case 'v':
374 #endif /* BACKSLASH_V */
375 case 'x':
376 case 'X':
377 case '0':
378 case '1':
379 case '2':
380 case '3':
381 case '4':
382 case '5':
383 case '6':
384 case '7':
385 break;
386 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
387 default:
388 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
389 break;
390 #else /* ONLY_STANDARD_ESCAPES */
391 default:
392 /* Accept \x as x for any x */
393 break;
394 #endif /* ONLY_STANDARD_ESCAPES */
395
396 case EOF:
397 as_warn ("End of file in string: '\"' inserted");
398 return '"';
399 }
400 return ch;
401
402 case 7:
403 ch = (*get) ();
404 state = 5;
405 old_state = 8;
406 return ch;
407
408 case 8:
409 do
410 ch = (*get) ();
411 while (ch != '\n');
412 state = 0;
413 return ch;
414 }
415
416 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
417
418 /* flushchar: */
419 ch = (*get) ();
420 recycle:
421 if (ch == EOF)
422 {
423 if (state != 0)
424 as_warn ("End of file not at end of a line: Newline inserted.");
425 return ch;
426 }
427
428 switch (lex[ch])
429 {
430 case LEX_IS_WHITESPACE:
431 do
432 /* Preserve a single whitespace character at the beginning of
433 a line. */
434 if (state == 0)
435 {
436 state = 1;
437 return ch;
438 }
439 else
440 ch = (*get) ();
441 while (ch != EOF && IS_WHITESPACE (ch));
442 if (ch == EOF)
443 return ch;
444
445 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
446 {
447 /* cpp never outputs a leading space before the #, so try to
448 avoid being confused. */
449 not_cpp_line = 1;
450 goto recycle;
451 }
452 #ifdef MRI
453 (*unget) (ch); /* Put back */
454 return ' '; /* Always return one space at start of line */
455 #endif
456
457 /* If we're in state 2, we've seen a non-white
458 character followed by whitespace. If the next
459 character is ':', this is whitespace after a label
460 name which we can ignore. */
461 if (state == 2 && lex[ch] == LEX_IS_COLON)
462 {
463 state = 0;
464 return ch;
465 }
466
467 switch (state)
468 {
469 case 0:
470 state++;
471 goto recycle; /* Punted leading sp */
472 case 1:
473 /* We can arrive here if we leave a leading whitespace character
474 at the beginning of a line. */
475 goto recycle;
476 case 2:
477 state = 3;
478 (*unget) (ch);
479 return ' '; /* Sp after opco */
480 case 3:
481 goto recycle; /* Sp in operands */
482 case 9:
483 case 10:
484 state = 10; /* Sp after symbol char */
485 goto recycle;
486 default:
487 BAD_CASE (state);
488 }
489 break;
490
491 case LEX_IS_TWOCHAR_COMMENT_1ST:
492 ch2 = (*get) ();
493 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
494 {
495 for (;;)
496 {
497 do
498 {
499 ch2 = (*get) ();
500 if (ch2 != EOF && IS_NEWLINE (ch2))
501 add_newlines++;
502 }
503 while (ch2 != EOF &&
504 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
505
506 while (ch2 != EOF &&
507 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
508 {
509 ch2 = (*get) ();
510 }
511
512 if (ch2 == EOF
513 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
514 break;
515 (*unget) (ch);
516 }
517 if (ch2 == EOF)
518 as_warn ("End of file in multiline comment");
519
520 ch = ' ';
521 goto recycle;
522 }
523 else
524 {
525 if (ch2 != EOF)
526 (*unget) (ch2);
527 if (state == 9 || state == 10)
528 state = 3;
529 return ch;
530 }
531 break;
532
533 case LEX_IS_STRINGQUOTE:
534 if (state == 9 || state == 10)
535 old_state = 3;
536 else
537 old_state = state;
538 state = 5;
539 return ch;
540 #ifndef MRI
541 #ifndef IEEE_STYLE
542 case LEX_IS_ONECHAR_QUOTE:
543 ch = (*get) ();
544 if (ch == EOF)
545 {
546 as_warn ("End-of-file after a one-character quote; \\000 inserted");
547 ch = 0;
548 }
549 if (ch == '\\')
550 {
551 ch = (*get) ();
552 ch = process_escape (ch);
553 }
554 sprintf (out_buf, "%d", (int) (unsigned char) ch);
555
556
557 /* None of these 'x constants for us. We want 'x'. */
558 if ((ch = (*get) ()) != '\'')
559 {
560 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
561 as_warn ("Missing close quote: (assumed)");
562 #else
563 (*unget) (ch);
564 #endif
565 }
566 if (strlen (out_buf) == 1)
567 {
568 return out_buf[0];
569 }
570 if (state == 9 || state == 10)
571 old_state = 3;
572 else
573 old_state = state;
574 state = -1;
575 out_string = out_buf;
576 return *out_string++;
577 #endif
578 #endif
579 case LEX_IS_COLON:
580 if (state == 9 || state == 10)
581 state = 3;
582 else if (state != 3)
583 state = 0;
584 return ch;
585
586 case LEX_IS_NEWLINE:
587 /* Roll out a bunch of newlines from inside comments, etc. */
588 if (add_newlines)
589 {
590 --add_newlines;
591 (*unget) (ch);
592 }
593 /* fall thru into... */
594
595 case LEX_IS_LINE_SEPARATOR:
596 state = 0;
597 return ch;
598
599 case LEX_IS_LINE_COMMENT_START:
600 if (state == 0) /* Only comment at start of line. */
601 {
602 /* FIXME-someday: The two character comment stuff was badly
603 thought out. On i386, we want '/' as line comment start
604 AND we want C style comments. hence this hack. The
605 whole lexical process should be reworked. xoxorich. */
606 if (ch == '/')
607 {
608 ch2 = (*get) ();
609 if (ch2 == '*')
610 {
611 state = -2;
612 return (do_scrub_next_char (get, unget));
613 }
614 else
615 {
616 (*unget) (ch2);
617 }
618 } /* bad hack */
619
620 if (ch != '#')
621 not_cpp_line = 1;
622
623 do
624 ch = (*get) ();
625 while (ch != EOF && IS_WHITESPACE (ch));
626 if (ch == EOF)
627 {
628 as_warn ("EOF in comment: Newline inserted");
629 return '\n';
630 }
631 if (ch < '0' || ch > '9' || not_cpp_line)
632 {
633 /* Non-numerics: Eat whole comment line */
634 while (ch != EOF && !IS_NEWLINE (ch))
635 ch = (*get) ();
636 if (ch == EOF)
637 as_warn ("EOF in Comment: Newline inserted");
638 state = 0;
639 return '\n';
640 }
641 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
642 (*unget) (ch);
643 old_state = 4;
644 state = -1;
645 out_string = "\t.appline ";
646 return *out_string++;
647 }
648
649 /* We have a line comment character which is not at the start of
650 a line. If this is also a normal comment character, fall
651 through. Otherwise treat it as a default character. */
652 if (strchr (comment_chars, ch) == NULL)
653 goto de_fault;
654 /* Fall through. */
655 case LEX_IS_COMMENT_START:
656 do
657 ch = (*get) ();
658 while (ch != EOF && !IS_NEWLINE (ch));
659 if (ch == EOF)
660 as_warn ("EOF in comment: Newline inserted");
661 state = 0;
662 return '\n';
663
664 case LEX_IS_SYMBOL_COMPONENT:
665 if (state == 10)
666 {
667 /* This is a symbol character following another symbol
668 character, with whitespace in between. We skipped the
669 whitespace earlier, so output it now. */
670 (*unget) (ch);
671 state = 3;
672 return ' ';
673 }
674 if (state == 3)
675 state = 9;
676 /* Fall through. */
677 default:
678 de_fault:
679 /* Some relatively `normal' character. */
680 if (state == 0)
681 {
682 state = 2; /* Now seeing opcode */
683 return ch;
684 }
685 else if (state == 1)
686 {
687 state = 2; /* Ditto */
688 return ch;
689 }
690 else if (state == 9)
691 {
692 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
693 state = 3;
694 return ch;
695 }
696 else if (state == 10)
697 {
698 state = 3;
699 return ch;
700 }
701 else
702 {
703 return ch; /* Opcode or operands already */
704 }
705 }
706 return -1;
707 }
708
709 #ifdef TEST
710
711 const char comment_chars[] = "|";
712 const char line_comment_chars[] = "#";
713
714 main ()
715 {
716 int ch;
717
718 app_begin ();
719 while ((ch = do_scrub_next_char (stdin)) != EOF)
720 putc (ch, stdout);
721 }
722
723 as_warn (str)
724 char *str;
725 {
726 fputs (str, stderr);
727 putc ('\n', stderr);
728 }
729
730 #endif
731
732 /* end of app.c */
This page took 0.047287 seconds and 4 git commands to generate.