* gas/all/p2425.s: Insert a tab before assembler directives so
[deliverable/binutils-gdb.git] / gas / app.c
... / ...
CommitLineData
1/* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21/* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling.
25 */
26
27#include <stdio.h>
28#include "as.h" /* For BAD_CASE() only */
29
30#if (__STDC__ != 1) && !defined(const)
31#define const /* Nothing */
32#endif
33
34static char lex[256];
35static const char symbol_chars[] =
36"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
37
38#define LEX_IS_SYMBOL_COMPONENT 1
39#define LEX_IS_WHITESPACE 2
40#define LEX_IS_LINE_SEPARATOR 3
41#define LEX_IS_COMMENT_START 4
42#define LEX_IS_LINE_COMMENT_START 5
43#define LEX_IS_TWOCHAR_COMMENT_1ST 6
44#define LEX_IS_TWOCHAR_COMMENT_2ND 7
45#define LEX_IS_STRINGQUOTE 8
46#define LEX_IS_COLON 9
47#define LEX_IS_NEWLINE 10
48#define LEX_IS_ONECHAR_QUOTE 11
49#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
50#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
51#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
52#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
53#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
54#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
55
56/* FIXME-soon: The entire lexer/parser thingy should be
57 built statically at compile time rather than dynamically
58 each and every time the assembler is run. xoxorich. */
59
60void
61do_scrub_begin ()
62{
63 const char *p;
64
65 lex[' '] = LEX_IS_WHITESPACE;
66 lex['\t'] = LEX_IS_WHITESPACE;
67 lex['\n'] = LEX_IS_NEWLINE;
68 lex[';'] = LEX_IS_LINE_SEPARATOR;
69 lex['"'] = LEX_IS_STRINGQUOTE;
70#ifndef TC_HPPA
71 lex['\''] = LEX_IS_ONECHAR_QUOTE;
72#endif
73 lex[':'] = LEX_IS_COLON;
74
75
76
77#ifdef SINGLE_QUOTE_STRINGS
78 lex['\''] = LEX_IS_STRINGQUOTE;
79#endif
80
81 /* Note that these override the previous defaults, e.g. if ';'
82
83 is a comment char, then it isn't a line separator. */
84 for (p = symbol_chars; *p; ++p)
85 {
86 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
87 } /* declare symbol characters */
88
89 for (p = comment_chars; *p; p++)
90 {
91 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
92 } /* declare comment chars */
93
94 for (p = line_comment_chars; *p; p++)
95 {
96 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
97 } /* declare line comment chars */
98
99 for (p = line_separator_chars; *p; p++)
100 {
101 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
102 } /* declare line separators */
103
104 /* Only allow slash-star comments if slash is not in use */
105 if (lex['/'] == 0)
106 {
107 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
108 }
109 /* FIXME-soon. This is a bad hack but otherwise, we
110 can't do c-style comments when '/' is a line
111 comment char. xoxorich. */
112 if (lex['*'] == 0)
113 {
114 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
115 }
116} /* do_scrub_begin() */
117
118FILE *scrub_file;
119
120int
121scrub_from_file ()
122{
123 return getc (scrub_file);
124}
125
126void
127scrub_to_file (ch)
128 int ch;
129{
130 ungetc (ch, scrub_file);
131} /* scrub_to_file() */
132
133char *scrub_string;
134char *scrub_last_string;
135
136int
137scrub_from_string ()
138{
139 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
140} /* scrub_from_string() */
141
142void
143scrub_to_string (ch)
144 int ch;
145{
146 *--scrub_string = ch;
147} /* scrub_to_string() */
148
149/* Saved state of the scrubber */
150static int state;
151static int old_state;
152static char *out_string;
153static char out_buf[20];
154static int add_newlines = 0;
155
156/* Data structure for saving the state of app across #include's. Note that
157 app is called asynchronously to the parsing of the .include's, so our
158 state at the time .include is interpreted is completely unrelated.
159 That's why we have to save it all. */
160
161struct app_save
162 {
163 int state;
164 int old_state;
165 char *out_string;
166 char out_buf[sizeof (out_buf)];
167 int add_newlines;
168 char *scrub_string;
169 char *scrub_last_string;
170 FILE *scrub_file;
171 };
172
173char *
174app_push ()
175{
176 register struct app_save *saved;
177
178 saved = (struct app_save *) xmalloc (sizeof (*saved));
179 saved->state = state;
180 saved->old_state = old_state;
181 saved->out_string = out_string;
182 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
183 saved->add_newlines = add_newlines;
184 saved->scrub_string = scrub_string;
185 saved->scrub_last_string = scrub_last_string;
186 saved->scrub_file = scrub_file;
187
188 /* do_scrub_begin() is not useful, just wastes time. */
189 return (char *) saved;
190}
191
192void
193app_pop (arg)
194 char *arg;
195{
196 register struct app_save *saved = (struct app_save *) arg;
197
198 /* There is no do_scrub_end (). */
199 state = saved->state;
200 old_state = saved->old_state;
201 out_string = saved->out_string;
202 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
203 add_newlines = saved->add_newlines;
204 scrub_string = saved->scrub_string;
205 scrub_last_string = saved->scrub_last_string;
206 scrub_file = saved->scrub_file;
207
208 free (arg);
209} /* app_pop() */
210
211/* @@ This assumes that \n &c are the same on host and target. This is not
212 necessarily true. */
213int
214process_escape (ch)
215 char ch;
216{
217 switch (ch)
218 {
219 case 'b':
220 return '\b';
221 case 'f':
222 return '\f';
223 case 'n':
224 return '\n';
225 case 'r':
226 return '\r';
227 case 't':
228 return '\t';
229 case '\'':
230 return '\'';
231 case '"':
232 return '\"';
233 default:
234 return ch;
235 }
236}
237int
238do_scrub_next_char (get, unget)
239 int (*get) ();
240 void (*unget) ();
241{
242 /*State 0: beginning of normal line
243 1: After first whitespace on line (flush more white)
244 2: After first non-white (opcode) on line (keep 1white)
245 3: after second white on line (into operands) (flush white)
246 4: after putting out a .line, put out digits
247 5: parsing a string, then go to old-state
248 6: putting out \ escape in a "d string.
249 7: After putting out a .appfile, put out string.
250 8: After putting out a .appfile string, flush until newline.
251 9: After seeing symbol char in state 3 (keep 1white after symchar)
252 10: After seeing whitespace in state 9 (keep white before symchar)
253 -1: output string in out_string and go to the state in old_state
254 -2: flush text until a '*' '/' is seen, then go to state old_state
255 */
256
257 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
258 constructs like ``.loc 1 20''. This was turning into ``.loc
259 120''. States 9 and 10 ensure that a space is never dropped in
260 between characters which could appear in a identifier. Ian
261 Taylor, ian@cygnus.com. */
262
263 register int ch, ch2 = 0;
264
265 switch (state)
266 {
267 case -1:
268 ch = *out_string++;
269 if (*out_string == 0)
270 {
271 state = old_state;
272 old_state = 3;
273 }
274 return ch;
275
276 case -2:
277 for (;;)
278 {
279 do
280 {
281 ch = (*get) ();
282 }
283 while (ch != EOF && ch != '\n' && ch != '*');
284 if (ch == '\n' || ch == EOF)
285 return ch;
286
287 /* At this point, ch must be a '*' */
288 while ((ch = (*get) ()) == '*')
289 {
290 ;
291 }
292 if (ch == EOF || ch == '/')
293 break;
294 (*unget) (ch);
295 }
296 state = old_state;
297 return ' ';
298
299 case 4:
300 ch = (*get) ();
301 if (ch == EOF || (ch >= '0' && ch <= '9'))
302 return ch;
303 else
304 {
305 while (ch != EOF && IS_WHITESPACE (ch))
306 ch = (*get) ();
307 if (ch == '"')
308 {
309 (*unget) (ch);
310 out_string = "\n.appfile ";
311 old_state = 7;
312 state = -1;
313 return *out_string++;
314 }
315 else
316 {
317 while (ch != EOF && ch != '\n')
318 ch = (*get) ();
319 state = 0;
320 return ch;
321 }
322 }
323
324 case 5:
325 ch = (*get) ();
326 if (lex[ch] == LEX_IS_STRINGQUOTE)
327 {
328 state = old_state;
329 return ch;
330 }
331 else if (ch == '\\')
332 {
333 state = 6;
334 return ch;
335 }
336 else if (ch == EOF)
337 {
338 as_warn ("End of file in string: inserted '\"'");
339 state = old_state;
340 (*unget) ('\n');
341 return '"';
342 }
343 else
344 {
345 return ch;
346 }
347
348 case 6:
349 state = 5;
350 ch = (*get) ();
351 switch (ch)
352 {
353 /* Handle strings broken across lines, by turning '\n' into
354 '\\' and 'n'. */
355 case '\n':
356 (*unget) ('n');
357 add_newlines++;
358 return '\\';
359
360 case '"':
361 case '\\':
362#ifdef TC_HPPA
363 case 'x': /* '\\x' introduces escaped sequences on the PA */
364#endif
365 case 'b':
366 case 'f':
367 case 'n':
368 case 'r':
369 case 't':
370#ifdef BACKSLASH_V
371 case 'v':
372#endif /* BACKSLASH_V */
373 case '0':
374 case '1':
375 case '2':
376 case '3':
377 case '4':
378 case '5':
379 case '6':
380 case '7':
381 break;
382#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
383 default:
384 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
385 break;
386#else /* ONLY_STANDARD_ESCAPES */
387 default:
388 /* Accept \x as x for any x */
389 break;
390#endif /* ONLY_STANDARD_ESCAPES */
391
392 case EOF:
393 as_warn ("End of file in string: '\"' inserted");
394 return '"';
395 }
396 return ch;
397
398 case 7:
399 ch = (*get) ();
400 state = 5;
401 old_state = 8;
402 return ch;
403
404 case 8:
405 do
406 ch = (*get) ();
407 while (ch != '\n');
408 state = 0;
409 return ch;
410 }
411
412 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
413
414 /* flushchar: */
415 ch = (*get) ();
416recycle:
417 if (ch == EOF)
418 {
419 if (state != 0)
420 as_warn ("End of file not at end of a line: Newline inserted.");
421 return ch;
422 }
423
424 switch (lex[ch])
425 {
426 case LEX_IS_WHITESPACE:
427 do
428 ch = (*get) ();
429 while (ch != EOF && IS_WHITESPACE (ch));
430 if (ch == EOF)
431 return ch;
432
433 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
434 {
435 goto recycle;
436 }
437#ifdef MRI
438 (*unget) (ch); /* Put back */
439 return ' '; /* Always return one space at start of line */
440#endif
441
442 /* If we're in state 2, we've seen a non-white
443 character followed by whitespace. If the next
444 character is ':', this is whitespace after a label
445 name which we can ignore. */
446 if (state == 2 && lex[ch] == LEX_IS_COLON)
447 {
448 state = 0;
449 return ch;
450 }
451
452 switch (state)
453 {
454 case 0:
455 state++;
456 goto recycle; /* Punted leading sp */
457 case 1:
458 BAD_CASE (state); /* We can't get here */
459 case 2:
460 state = 3;
461 (*unget) (ch);
462 return ' '; /* Sp after opco */
463 case 3:
464 goto recycle; /* Sp in operands */
465 case 9:
466 case 10:
467 state = 10; /* Sp after symbol char */
468 goto recycle;
469 default:
470 BAD_CASE (state);
471 }
472 break;
473
474 case LEX_IS_TWOCHAR_COMMENT_1ST:
475 ch2 = (*get) ();
476 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
477 {
478 for (;;)
479 {
480 do
481 {
482 ch2 = (*get) ();
483 if (ch2 != EOF && IS_NEWLINE (ch2))
484 add_newlines++;
485 }
486 while (ch2 != EOF &&
487 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
488
489 while (ch2 != EOF &&
490 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
491 {
492 ch2 = (*get) ();
493 }
494
495 if (ch2 == EOF
496 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
497 break;
498 (*unget) (ch);
499 }
500 if (ch2 == EOF)
501 as_warn ("End of file in multiline comment");
502
503 ch = ' ';
504 goto recycle;
505 }
506 else
507 {
508 if (ch2 != EOF)
509 (*unget) (ch2);
510 if (state == 9 || state == 10)
511 state = 3;
512 return ch;
513 }
514 break;
515
516 case LEX_IS_STRINGQUOTE:
517 if (state == 9 || state == 10)
518 old_state = 3;
519 else
520 old_state = state;
521 state = 5;
522 return ch;
523#ifndef MRI
524#ifndef IEEE_STYLE
525 case LEX_IS_ONECHAR_QUOTE:
526 ch = (*get) ();
527 if (ch == EOF)
528 {
529 as_warn ("End-of-file after a one-character quote; \\000 inserted");
530 ch = 0;
531 }
532 if (ch == '\\')
533 {
534 ch = (*get) ();
535 ch = process_escape (ch);
536 }
537 sprintf (out_buf, "%d", (int) (unsigned char) ch);
538
539
540 /* None of these 'x constants for us. We want 'x'. */
541 if ((ch = (*get) ()) != '\'')
542 {
543#ifdef REQUIRE_CHAR_CLOSE_QUOTE
544 as_warn ("Missing close quote: (assumed)");
545#else
546 (*unget) (ch);
547#endif
548 }
549 if (strlen (out_buf) == 1)
550 {
551 return out_buf[0];
552 }
553 if (state == 9 || state == 10)
554 old_state = 3;
555 else
556 old_state = state;
557 state = -1;
558 out_string = out_buf;
559 return *out_string++;
560#endif
561#endif
562 case LEX_IS_COLON:
563 if (state == 9 || state == 10)
564 state = 3;
565 else if (state != 3)
566 state = 0;
567 return ch;
568
569 case LEX_IS_NEWLINE:
570 /* Roll out a bunch of newlines from inside comments, etc. */
571 if (add_newlines)
572 {
573 --add_newlines;
574 (*unget) (ch);
575 }
576 /* fall thru into... */
577
578 case LEX_IS_LINE_SEPARATOR:
579 state = 0;
580 return ch;
581
582 case LEX_IS_LINE_COMMENT_START:
583 if (state == 0) /* Only comment at start of line. */
584 {
585 /* FIXME-someday: The two character comment stuff was badly
586 thought out. On i386, we want '/' as line comment start
587 AND we want C style comments. hence this hack. The
588 whole lexical process should be reworked. xoxorich. */
589 if (ch == '/')
590 {
591 ch2 = (*get) ();
592 if (ch2 == '*')
593 {
594 state = -2;
595 return (do_scrub_next_char (get, unget));
596 }
597 else
598 {
599 (*unget) (ch2);
600 }
601 } /* bad hack */
602
603 do
604 ch = (*get) ();
605 while (ch != EOF && IS_WHITESPACE (ch));
606 if (ch == EOF)
607 {
608 as_warn ("EOF in comment: Newline inserted");
609 return '\n';
610 }
611 if (ch < '0' || ch > '9')
612 {
613 /* Non-numerics: Eat whole comment line */
614 while (ch != EOF && !IS_NEWLINE (ch))
615 ch = (*get) ();
616 if (ch == EOF)
617 as_warn ("EOF in Comment: Newline inserted");
618 state = 0;
619 return '\n';
620 }
621 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
622 (*unget) (ch);
623 old_state = 4;
624 state = -1;
625 out_string = ".appline ";
626 return *out_string++;
627 }
628
629 /* We have a line comment character which is not at the start of
630 a line. If this is also a normal comment character, fall
631 through. Otherwise treat it as a default character. */
632 if (strchr (comment_chars, ch) == NULL)
633 goto de_fault;
634 /* Fall through. */
635 case LEX_IS_COMMENT_START:
636 do
637 ch = (*get) ();
638 while (ch != EOF && !IS_NEWLINE (ch));
639 if (ch == EOF)
640 as_warn ("EOF in comment: Newline inserted");
641 state = 0;
642 return '\n';
643
644 case LEX_IS_SYMBOL_COMPONENT:
645 if (state == 10)
646 {
647 /* This is a symbol character following another symbol
648 character, with whitespace in between. We skipped the
649 whitespace earlier, so output it now. */
650 (*unget) (ch);
651 state = 3;
652 return ' ';
653 }
654 if (state == 3)
655 state = 9;
656 /* Fall through. */
657 default:
658 de_fault:
659 /* Some relatively `normal' character. */
660 if (state == 0)
661 {
662 state = 2; /* Now seeing opcode */
663 return ch;
664 }
665 else if (state == 1)
666 {
667 state = 2; /* Ditto */
668 return ch;
669 }
670 else if (state == 9)
671 {
672 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
673 state = 3;
674 return ch;
675 }
676 else if (state == 10)
677 {
678 state = 3;
679 return ch;
680 }
681 else
682 {
683 return ch; /* Opcode or operands already */
684 }
685 }
686 return -1;
687}
688
689#ifdef TEST
690
691const char comment_chars[] = "|";
692const char line_comment_chars[] = "#";
693
694main ()
695{
696 int ch;
697
698 app_begin ();
699 while ((ch = do_scrub_next_char (stdin)) != EOF)
700 putc (ch, stdout);
701}
702
703as_warn (str)
704 char *str;
705{
706 fputs (str, stderr);
707 putc ('\n', stderr);
708}
709
710#endif
711
712/* end of app.c */
This page took 0.026064 seconds and 4 git commands to generate.