* gas/all/p2425.s: Insert a tab before assembler directives so
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
fecd2382 1/* This is the Assembler Pre-Processor
58d4951d 2 Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
6efd877d 3
a39116f1 4 This file is part of GAS, the GNU Assembler.
6efd877d 5
a39116f1
RP
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
6efd877d 10
a39116f1
RP
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
6efd877d 15
a39116f1
RP
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382 19
58d4951d 20/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
fecd2382
RP
21/* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
9a7d824a 23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
be06bdcd 24 pair. This needs better error-handling.
a39116f1 25 */
fecd2382
RP
26
27#include <stdio.h>
6efd877d 28#include "as.h" /* For BAD_CASE() only */
fecd2382 29
3340f7e5 30#if (__STDC__ != 1) && !defined(const)
6efd877d 31#define const /* Nothing */
fecd2382
RP
32#endif
33
6efd877d 34static char lex[256];
6d331d71 35static const char symbol_chars[] =
6efd877d 36"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382
RP
37
38#define LEX_IS_SYMBOL_COMPONENT 1
39#define LEX_IS_WHITESPACE 2
40#define LEX_IS_LINE_SEPARATOR 3
41#define LEX_IS_COMMENT_START 4
42#define LEX_IS_LINE_COMMENT_START 5
43#define LEX_IS_TWOCHAR_COMMENT_1ST 6
44#define LEX_IS_TWOCHAR_COMMENT_2ND 7
45#define LEX_IS_STRINGQUOTE 8
46#define LEX_IS_COLON 9
47#define LEX_IS_NEWLINE 10
48#define LEX_IS_ONECHAR_QUOTE 11
a39116f1
RP
49#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
50#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
51#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
52#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
53#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
54#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
55
56/* FIXME-soon: The entire lexer/parser thingy should be
57 built statically at compile time rather than dynamically
58 each and every time the assembler is run. xoxorich. */
fecd2382 59
6efd877d
KR
60void
61do_scrub_begin ()
62{
63 const char *p;
64
65 lex[' '] = LEX_IS_WHITESPACE;
66 lex['\t'] = LEX_IS_WHITESPACE;
67 lex['\n'] = LEX_IS_NEWLINE;
68 lex[';'] = LEX_IS_LINE_SEPARATOR;
69 lex['"'] = LEX_IS_STRINGQUOTE;
58d4951d 70#ifndef TC_HPPA
6efd877d 71 lex['\''] = LEX_IS_ONECHAR_QUOTE;
58d4951d 72#endif
6efd877d 73 lex[':'] = LEX_IS_COLON;
7c2d4011 74
be06bdcd
SC
75
76
77#ifdef SINGLE_QUOTE_STRINGS
78 lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011 79#endif
be06bdcd 80
6efd877d 81 /* Note that these override the previous defaults, e.g. if ';'
be06bdcd 82
fecd2382 83 is a comment char, then it isn't a line separator. */
6efd877d
KR
84 for (p = symbol_chars; *p; ++p)
85 {
58d4951d 86 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
6efd877d
KR
87 } /* declare symbol characters */
88
6efd877d
KR
89 for (p = comment_chars; *p; p++)
90 {
58d4951d 91 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
6efd877d
KR
92 } /* declare comment chars */
93
9a7d824a
ILT
94 for (p = line_comment_chars; *p; p++)
95 {
58d4951d 96 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
9a7d824a
ILT
97 } /* declare line comment chars */
98
6efd877d
KR
99 for (p = line_separator_chars; *p; p++)
100 {
58d4951d 101 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
6efd877d
KR
102 } /* declare line separators */
103
104 /* Only allow slash-star comments if slash is not in use */
105 if (lex['/'] == 0)
106 {
107 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
108 }
109 /* FIXME-soon. This is a bad hack but otherwise, we
a39116f1
RP
110 can't do c-style comments when '/' is a line
111 comment char. xoxorich. */
6efd877d
KR
112 if (lex['*'] == 0)
113 {
114 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
115 }
116} /* do_scrub_begin() */
fecd2382
RP
117
118FILE *scrub_file;
119
6efd877d
KR
120int
121scrub_from_file ()
122{
123 return getc (scrub_file);
fecd2382
RP
124}
125
6efd877d
KR
126void
127scrub_to_file (ch)
128 int ch;
fecd2382 129{
6efd877d
KR
130 ungetc (ch, scrub_file);
131} /* scrub_to_file() */
fecd2382
RP
132
133char *scrub_string;
134char *scrub_last_string;
135
6efd877d
KR
136int
137scrub_from_string ()
138{
139 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
140} /* scrub_from_string() */
fecd2382 141
6efd877d
KR
142void
143scrub_to_string (ch)
144 int ch;
fecd2382 145{
6efd877d
KR
146 *--scrub_string = ch;
147} /* scrub_to_string() */
fecd2382
RP
148
149/* Saved state of the scrubber */
150static int state;
151static int old_state;
152static char *out_string;
153static char out_buf[20];
154static int add_newlines = 0;
155
156/* Data structure for saving the state of app across #include's. Note that
157 app is called asynchronously to the parsing of the .include's, so our
158 state at the time .include is interpreted is completely unrelated.
159 That's why we have to save it all. */
160
6efd877d
KR
161struct app_save
162 {
163 int state;
164 int old_state;
165 char *out_string;
166 char out_buf[sizeof (out_buf)];
167 int add_newlines;
168 char *scrub_string;
169 char *scrub_last_string;
170 FILE *scrub_file;
171 };
172
173char *
174app_push ()
175{
7c2d4011
SC
176 register struct app_save *saved;
177
6efd877d
KR
178 saved = (struct app_save *) xmalloc (sizeof (*saved));
179 saved->state = state;
180 saved->old_state = old_state;
181 saved->out_string = out_string;
58d4951d 182 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
6efd877d
KR
183 saved->add_newlines = add_newlines;
184 saved->scrub_string = scrub_string;
7c2d4011 185 saved->scrub_last_string = scrub_last_string;
6efd877d 186 saved->scrub_file = scrub_file;
7c2d4011
SC
187
188 /* do_scrub_begin() is not useful, just wastes time. */
6efd877d 189 return (char *) saved;
fecd2382
RP
190}
191
6efd877d
KR
192void
193app_pop (arg)
194 char *arg;
fecd2382 195{
6efd877d
KR
196 register struct app_save *saved = (struct app_save *) arg;
197
198 /* There is no do_scrub_end (). */
199 state = saved->state;
200 old_state = saved->old_state;
201 out_string = saved->out_string;
58d4951d 202 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
6efd877d
KR
203 add_newlines = saved->add_newlines;
204 scrub_string = saved->scrub_string;
205 scrub_last_string = saved->scrub_last_string;
206 scrub_file = saved->scrub_file;
207
208 free (arg);
209} /* app_pop() */
210
6d331d71
KR
211/* @@ This assumes that \n &c are the same on host and target. This is not
212 necessarily true. */
6efd877d
KR
213int
214process_escape (ch)
215 char ch;
7c2d4011 216{
6efd877d
KR
217 switch (ch)
218 {
219 case 'b':
220 return '\b';
221 case 'f':
222 return '\f';
223 case 'n':
224 return '\n';
225 case 'r':
226 return '\r';
227 case 't':
228 return '\t';
229 case '\'':
230 return '\'';
231 case '"':
6d331d71 232 return '\"';
6efd877d
KR
233 default:
234 return ch;
235 }
7c2d4011 236}
6efd877d
KR
237int
238do_scrub_next_char (get, unget)
239 int (*get) ();
240 void (*unget) ();
fecd2382 241{
6efd877d 242 /*State 0: beginning of normal line
a39116f1
RP
243 1: After first whitespace on line (flush more white)
244 2: After first non-white (opcode) on line (keep 1white)
245 3: after second white on line (into operands) (flush white)
246 4: after putting out a .line, put out digits
247 5: parsing a string, then go to old-state
248 6: putting out \ escape in a "d string.
9a7d824a
ILT
249 7: After putting out a .appfile, put out string.
250 8: After putting out a .appfile string, flush until newline.
f6a91cc0 251 9: After seeing symbol char in state 3 (keep 1white after symchar)
9a7d824a 252 10: After seeing whitespace in state 9 (keep white before symchar)
a39116f1
RP
253 -1: output string in out_string and go to the state in old_state
254 -2: flush text until a '*' '/' is seen, then go to state old_state
255 */
6efd877d 256
9a7d824a
ILT
257 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
258 constructs like ``.loc 1 20''. This was turning into ``.loc
259 120''. States 9 and 10 ensure that a space is never dropped in
260 between characters which could appear in a identifier. Ian
261 Taylor, ian@cygnus.com. */
f6a91cc0 262
6efd877d
KR
263 register int ch, ch2 = 0;
264
265 switch (state)
266 {
267 case -1:
268 ch = *out_string++;
269 if (*out_string == 0)
270 {
271 state = old_state;
272 old_state = 3;
273 }
274 return ch;
275
276 case -2:
277 for (;;)
278 {
279 do
280 {
281 ch = (*get) ();
282 }
283 while (ch != EOF && ch != '\n' && ch != '*');
284 if (ch == '\n' || ch == EOF)
285 return ch;
286
287 /* At this point, ch must be a '*' */
288 while ((ch = (*get) ()) == '*')
289 {
290 ;
291 }
292 if (ch == EOF || ch == '/')
293 break;
294 (*unget) (ch);
295 }
296 state = old_state;
297 return ' ';
298
299 case 4:
300 ch = (*get) ();
301 if (ch == EOF || (ch >= '0' && ch <= '9'))
302 return ch;
303 else
304 {
305 while (ch != EOF && IS_WHITESPACE (ch))
306 ch = (*get) ();
307 if (ch == '"')
308 {
309 (*unget) (ch);
9a7d824a 310 out_string = "\n.appfile ";
6efd877d
KR
311 old_state = 7;
312 state = -1;
313 return *out_string++;
314 }
315 else
316 {
317 while (ch != EOF && ch != '\n')
318 ch = (*get) ();
58d4951d 319 state = 0;
6efd877d
KR
320 return ch;
321 }
322 }
323
324 case 5:
325 ch = (*get) ();
326 if (lex[ch] == LEX_IS_STRINGQUOTE)
327 {
328 state = old_state;
329 return ch;
330 }
331 else if (ch == '\\')
332 {
333 state = 6;
334 return ch;
335 }
336 else if (ch == EOF)
337 {
338 as_warn ("End of file in string: inserted '\"'");
339 state = old_state;
340 (*unget) ('\n');
341 return '"';
342 }
343 else
344 {
345 return ch;
346 }
347
348 case 6:
349 state = 5;
350 ch = (*get) ();
351 switch (ch)
352 {
6d331d71
KR
353 /* Handle strings broken across lines, by turning '\n' into
354 '\\' and 'n'. */
6efd877d
KR
355 case '\n':
356 (*unget) ('n');
357 add_newlines++;
358 return '\\';
359
360 case '"':
361 case '\\':
58d4951d
ILT
362#ifdef TC_HPPA
363 case 'x': /* '\\x' introduces escaped sequences on the PA */
364#endif
6efd877d
KR
365 case 'b':
366 case 'f':
367 case 'n':
368 case 'r':
369 case 't':
fecd2382 370#ifdef BACKSLASH_V
6efd877d 371 case 'v':
fecd2382 372#endif /* BACKSLASH_V */
6efd877d
KR
373 case '0':
374 case '1':
375 case '2':
376 case '3':
377 case '4':
378 case '5':
379 case '6':
380 case '7':
381 break;
7c2d4011 382#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
6efd877d
KR
383 default:
384 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
385 break;
fecd2382 386#else /* ONLY_STANDARD_ESCAPES */
6efd877d
KR
387 default:
388 /* Accept \x as x for any x */
389 break;
fecd2382 390#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011 391
6efd877d
KR
392 case EOF:
393 as_warn ("End of file in string: '\"' inserted");
394 return '"';
395 }
396 return ch;
397
398 case 7:
399 ch = (*get) ();
400 state = 5;
401 old_state = 8;
402 return ch;
403
404 case 8:
405 do
406 ch = (*get) ();
407 while (ch != '\n');
408 state = 0;
409 return ch;
410 }
411
9a7d824a 412 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
6efd877d
KR
413
414 /* flushchar: */
415 ch = (*get) ();
416recycle:
417 if (ch == EOF)
418 {
419 if (state != 0)
420 as_warn ("End of file not at end of a line: Newline inserted.");
421 return ch;
422 }
423
424 switch (lex[ch])
425 {
426 case LEX_IS_WHITESPACE:
427 do
428 ch = (*get) ();
429 while (ch != EOF && IS_WHITESPACE (ch));
430 if (ch == EOF)
431 return ch;
432
433 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
434 {
435 goto recycle;
fecd2382 436 }
7c2d4011 437#ifdef MRI
6efd877d
KR
438 (*unget) (ch); /* Put back */
439 return ' '; /* Always return one space at start of line */
7c2d4011 440#endif
6efd877d
KR
441
442 /* If we're in state 2, we've seen a non-white
6d331d71
KR
443 character followed by whitespace. If the next
444 character is ':', this is whitespace after a label
445 name which we can ignore. */
6efd877d
KR
446 if (state == 2 && lex[ch] == LEX_IS_COLON)
447 {
448 state = 0;
449 return ch;
450 }
451
452 switch (state)
453 {
454 case 0:
455 state++;
456 goto recycle; /* Punted leading sp */
457 case 1:
458 BAD_CASE (state); /* We can't get here */
459 case 2:
f6a91cc0 460 state = 3;
6efd877d
KR
461 (*unget) (ch);
462 return ' '; /* Sp after opco */
463 case 3:
464 goto recycle; /* Sp in operands */
9a7d824a
ILT
465 case 9:
466 case 10:
467 state = 10; /* Sp after symbol char */
468 goto recycle;
6efd877d
KR
469 default:
470 BAD_CASE (state);
471 }
472 break;
473
474 case LEX_IS_TWOCHAR_COMMENT_1ST:
475 ch2 = (*get) ();
476 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
477 {
478 for (;;)
479 {
480 do
481 {
482 ch2 = (*get) ();
483 if (ch2 != EOF && IS_NEWLINE (ch2))
484 add_newlines++;
fecd2382 485 }
6efd877d
KR
486 while (ch2 != EOF &&
487 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
488
489 while (ch2 != EOF &&
490 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
491 {
492 ch2 = (*get) ();
fecd2382 493 }
6efd877d
KR
494
495 if (ch2 == EOF
496 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382 497 break;
6efd877d
KR
498 (*unget) (ch);
499 }
500 if (ch2 == EOF)
501 as_warn ("End of file in multiline comment");
502
503 ch = ' ';
504 goto recycle;
505 }
506 else
507 {
508 if (ch2 != EOF)
509 (*unget) (ch2);
9a7d824a
ILT
510 if (state == 9 || state == 10)
511 state = 3;
6efd877d
KR
512 return ch;
513 }
514 break;
515
516 case LEX_IS_STRINGQUOTE:
9a7d824a
ILT
517 if (state == 9 || state == 10)
518 old_state = 3;
519 else
520 old_state = state;
6efd877d
KR
521 state = 5;
522 return ch;
523#ifndef MRI
a39116f1 524#ifndef IEEE_STYLE
6efd877d
KR
525 case LEX_IS_ONECHAR_QUOTE:
526 ch = (*get) ();
527 if (ch == EOF)
528 {
529 as_warn ("End-of-file after a one-character quote; \\000 inserted");
530 ch = 0;
531 }
532 if (ch == '\\')
533 {
534 ch = (*get) ();
535 ch = process_escape (ch);
536 }
537 sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011 538
6efd877d 539
9a7d824a 540 /* None of these 'x constants for us. We want 'x'. */
6efd877d
KR
541 if ((ch = (*get) ()) != '\'')
542 {
fecd2382 543#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d 544 as_warn ("Missing close quote: (assumed)");
fecd2382 545#else
6efd877d 546 (*unget) (ch);
fecd2382 547#endif
6efd877d
KR
548 }
549 if (strlen (out_buf) == 1)
550 {
551 return out_buf[0];
552 }
9a7d824a
ILT
553 if (state == 9 || state == 10)
554 old_state = 3;
555 else
556 old_state = state;
6efd877d
KR
557 state = -1;
558 out_string = out_buf;
559 return *out_string++;
7c2d4011 560#endif
a39116f1 561#endif
6efd877d 562 case LEX_IS_COLON:
9a7d824a
ILT
563 if (state == 9 || state == 10)
564 state = 3;
565 else if (state != 3)
6efd877d
KR
566 state = 0;
567 return ch;
568
569 case LEX_IS_NEWLINE:
570 /* Roll out a bunch of newlines from inside comments, etc. */
571 if (add_newlines)
572 {
573 --add_newlines;
574 (*unget) (ch);
575 }
576 /* fall thru into... */
577
578 case LEX_IS_LINE_SEPARATOR:
579 state = 0;
580 return ch;
581
582 case LEX_IS_LINE_COMMENT_START:
9a7d824a 583 if (state == 0) /* Only comment at start of line. */
6efd877d 584 {
9a7d824a
ILT
585 /* FIXME-someday: The two character comment stuff was badly
586 thought out. On i386, we want '/' as line comment start
587 AND we want C style comments. hence this hack. The
588 whole lexical process should be reworked. xoxorich. */
589 if (ch == '/')
f6a91cc0 590 {
9a7d824a
ILT
591 ch2 = (*get) ();
592 if (ch2 == '*')
593 {
594 state = -2;
595 return (do_scrub_next_char (get, unget));
596 }
597 else
598 {
599 (*unget) (ch2);
600 }
601 } /* bad hack */
6efd877d 602
9a7d824a 603 do
6efd877d 604 ch = (*get) ();
9a7d824a 605 while (ch != EOF && IS_WHITESPACE (ch));
6efd877d 606 if (ch == EOF)
9a7d824a
ILT
607 {
608 as_warn ("EOF in comment: Newline inserted");
609 return '\n';
610 }
611 if (ch < '0' || ch > '9')
612 {
613 /* Non-numerics: Eat whole comment line */
614 while (ch != EOF && !IS_NEWLINE (ch))
615 ch = (*get) ();
616 if (ch == EOF)
617 as_warn ("EOF in Comment: Newline inserted");
618 state = 0;
619 return '\n';
620 }
621 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
622 (*unget) (ch);
623 old_state = 4;
624 state = -1;
625 out_string = ".appline ";
626 return *out_string++;
6efd877d 627 }
6efd877d 628
9a7d824a
ILT
629 /* We have a line comment character which is not at the start of
630 a line. If this is also a normal comment character, fall
631 through. Otherwise treat it as a default character. */
632 if (strchr (comment_chars, ch) == NULL)
633 goto de_fault;
634 /* Fall through. */
6efd877d
KR
635 case LEX_IS_COMMENT_START:
636 do
637 ch = (*get) ();
638 while (ch != EOF && !IS_NEWLINE (ch));
639 if (ch == EOF)
640 as_warn ("EOF in comment: Newline inserted");
641 state = 0;
642 return '\n';
643
f6a91cc0 644 case LEX_IS_SYMBOL_COMPONENT:
9a7d824a
ILT
645 if (state == 10)
646 {
647 /* This is a symbol character following another symbol
648 character, with whitespace in between. We skipped the
649 whitespace earlier, so output it now. */
650 (*unget) (ch);
651 state = 3;
652 return ' ';
653 }
f6a91cc0
ILT
654 if (state == 3)
655 state = 9;
656 /* Fall through. */
6efd877d
KR
657 default:
658 de_fault:
659 /* Some relatively `normal' character. */
660 if (state == 0)
661 {
662 state = 2; /* Now seeing opcode */
663 return ch;
fecd2382 664 }
6efd877d
KR
665 else if (state == 1)
666 {
667 state = 2; /* Ditto */
668 return ch;
669 }
f6a91cc0
ILT
670 else if (state == 9)
671 {
672 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
673 state = 3;
674 return ch;
675 }
9a7d824a
ILT
676 else if (state == 10)
677 {
678 state = 3;
679 return ch;
680 }
6efd877d
KR
681 else
682 {
683 return ch; /* Opcode or operands already */
684 }
685 }
686 return -1;
fecd2382
RP
687}
688
689#ifdef TEST
690
6efd877d
KR
691const char comment_chars[] = "|";
692const char line_comment_chars[] = "#";
fecd2382 693
6efd877d 694main ()
fecd2382 695{
6efd877d
KR
696 int ch;
697
698 app_begin ();
699 while ((ch = do_scrub_next_char (stdin)) != EOF)
700 putc (ch, stdout);
fecd2382
RP
701}
702
6efd877d
KR
703as_warn (str)
704 char *str;
fecd2382 705{
6efd877d
KR
706 fputs (str, stderr);
707 putc ('\n', stderr);
fecd2382 708}
6efd877d 709
fecd2382
RP
710#endif
711
fecd2382 712/* end of app.c */
This page took 0.123536 seconds and 4 git commands to generate.