* config/tc-h8500.c (md_begin): Use a local variable when
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
3340f7e5 1/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
6efd877d 2
a39116f1
RP
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
fecd2382
RP
5/* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
6efd877d 7
a39116f1 8 This file is part of GAS, the GNU Assembler.
6efd877d 9
a39116f1
RP
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
6efd877d 14
a39116f1
RP
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
6efd877d 19
a39116f1
RP
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382
RP
23
24/* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
9a7d824a 26 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
be06bdcd 27 pair. This needs better error-handling.
a39116f1 28 */
fecd2382
RP
29
30#include <stdio.h>
6efd877d 31#include "as.h" /* For BAD_CASE() only */
fecd2382 32
3340f7e5 33#if (__STDC__ != 1) && !defined(const)
6efd877d 34#define const /* Nothing */
fecd2382
RP
35#endif
36
6efd877d 37static char lex[256];
6d331d71 38static const char symbol_chars[] =
6efd877d 39"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382
RP
40
41#define LEX_IS_SYMBOL_COMPONENT 1
42#define LEX_IS_WHITESPACE 2
43#define LEX_IS_LINE_SEPARATOR 3
44#define LEX_IS_COMMENT_START 4
45#define LEX_IS_LINE_COMMENT_START 5
46#define LEX_IS_TWOCHAR_COMMENT_1ST 6
47#define LEX_IS_TWOCHAR_COMMENT_2ND 7
48#define LEX_IS_STRINGQUOTE 8
49#define LEX_IS_COLON 9
50#define LEX_IS_NEWLINE 10
51#define LEX_IS_ONECHAR_QUOTE 11
a39116f1
RP
52#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
53#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
54#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
55#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
56#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
57#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
58
59/* FIXME-soon: The entire lexer/parser thingy should be
60 built statically at compile time rather than dynamically
61 each and every time the assembler is run. xoxorich. */
fecd2382 62
6efd877d
KR
63void
64do_scrub_begin ()
65{
66 const char *p;
67
68 lex[' '] = LEX_IS_WHITESPACE;
69 lex['\t'] = LEX_IS_WHITESPACE;
70 lex['\n'] = LEX_IS_NEWLINE;
71 lex[';'] = LEX_IS_LINE_SEPARATOR;
72 lex['"'] = LEX_IS_STRINGQUOTE;
73 lex['\''] = LEX_IS_ONECHAR_QUOTE;
74 lex[':'] = LEX_IS_COLON;
7c2d4011 75
be06bdcd
SC
76
77
78#ifdef SINGLE_QUOTE_STRINGS
79 lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011 80#endif
be06bdcd 81
6efd877d 82 /* Note that these override the previous defaults, e.g. if ';'
be06bdcd 83
fecd2382 84 is a comment char, then it isn't a line separator. */
6efd877d
KR
85 for (p = symbol_chars; *p; ++p)
86 {
87 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
88 } /* declare symbol characters */
89
6efd877d
KR
90 for (p = comment_chars; *p; p++)
91 {
92 lex[*p] = LEX_IS_COMMENT_START;
93 } /* declare comment chars */
94
9a7d824a
ILT
95 for (p = line_comment_chars; *p; p++)
96 {
97 lex[*p] = LEX_IS_LINE_COMMENT_START;
98 } /* declare line comment chars */
99
6efd877d
KR
100 for (p = line_separator_chars; *p; p++)
101 {
102 lex[*p] = LEX_IS_LINE_SEPARATOR;
103 } /* declare line separators */
104
105 /* Only allow slash-star comments if slash is not in use */
106 if (lex['/'] == 0)
107 {
108 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
109 }
110 /* FIXME-soon. This is a bad hack but otherwise, we
a39116f1
RP
111 can't do c-style comments when '/' is a line
112 comment char. xoxorich. */
6efd877d
KR
113 if (lex['*'] == 0)
114 {
115 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
116 }
117} /* do_scrub_begin() */
fecd2382
RP
118
119FILE *scrub_file;
120
6efd877d
KR
121int
122scrub_from_file ()
123{
124 return getc (scrub_file);
fecd2382
RP
125}
126
6efd877d
KR
127void
128scrub_to_file (ch)
129 int ch;
fecd2382 130{
6efd877d
KR
131 ungetc (ch, scrub_file);
132} /* scrub_to_file() */
fecd2382
RP
133
134char *scrub_string;
135char *scrub_last_string;
136
6efd877d
KR
137int
138scrub_from_string ()
139{
140 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
141} /* scrub_from_string() */
fecd2382 142
6efd877d
KR
143void
144scrub_to_string (ch)
145 int ch;
fecd2382 146{
6efd877d
KR
147 *--scrub_string = ch;
148} /* scrub_to_string() */
fecd2382
RP
149
150/* Saved state of the scrubber */
151static int state;
152static int old_state;
153static char *out_string;
154static char out_buf[20];
155static int add_newlines = 0;
156
157/* Data structure for saving the state of app across #include's. Note that
158 app is called asynchronously to the parsing of the .include's, so our
159 state at the time .include is interpreted is completely unrelated.
160 That's why we have to save it all. */
161
6efd877d
KR
162struct app_save
163 {
164 int state;
165 int old_state;
166 char *out_string;
167 char out_buf[sizeof (out_buf)];
168 int add_newlines;
169 char *scrub_string;
170 char *scrub_last_string;
171 FILE *scrub_file;
172 };
173
174char *
175app_push ()
176{
7c2d4011
SC
177 register struct app_save *saved;
178
6efd877d
KR
179 saved = (struct app_save *) xmalloc (sizeof (*saved));
180 saved->state = state;
181 saved->old_state = old_state;
182 saved->out_string = out_string;
6d331d71 183 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
6efd877d
KR
184 saved->add_newlines = add_newlines;
185 saved->scrub_string = scrub_string;
7c2d4011 186 saved->scrub_last_string = scrub_last_string;
6efd877d 187 saved->scrub_file = scrub_file;
7c2d4011
SC
188
189 /* do_scrub_begin() is not useful, just wastes time. */
6efd877d 190 return (char *) saved;
fecd2382
RP
191}
192
6efd877d
KR
193void
194app_pop (arg)
195 char *arg;
fecd2382 196{
6efd877d
KR
197 register struct app_save *saved = (struct app_save *) arg;
198
199 /* There is no do_scrub_end (). */
200 state = saved->state;
201 old_state = saved->old_state;
202 out_string = saved->out_string;
203 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
204 add_newlines = saved->add_newlines;
205 scrub_string = saved->scrub_string;
206 scrub_last_string = saved->scrub_last_string;
207 scrub_file = saved->scrub_file;
208
209 free (arg);
210} /* app_pop() */
211
6d331d71
KR
212/* @@ This assumes that \n &c are the same on host and target. This is not
213 necessarily true. */
6efd877d
KR
214int
215process_escape (ch)
216 char ch;
7c2d4011 217{
6efd877d
KR
218 switch (ch)
219 {
220 case 'b':
221 return '\b';
222 case 'f':
223 return '\f';
224 case 'n':
225 return '\n';
226 case 'r':
227 return '\r';
228 case 't':
229 return '\t';
230 case '\'':
231 return '\'';
232 case '"':
6d331d71 233 return '\"';
6efd877d
KR
234 default:
235 return ch;
236 }
7c2d4011 237}
6efd877d
KR
238int
239do_scrub_next_char (get, unget)
240 int (*get) ();
241 void (*unget) ();
fecd2382 242{
6efd877d 243 /*State 0: beginning of normal line
a39116f1
RP
244 1: After first whitespace on line (flush more white)
245 2: After first non-white (opcode) on line (keep 1white)
246 3: after second white on line (into operands) (flush white)
247 4: after putting out a .line, put out digits
248 5: parsing a string, then go to old-state
249 6: putting out \ escape in a "d string.
9a7d824a
ILT
250 7: After putting out a .appfile, put out string.
251 8: After putting out a .appfile string, flush until newline.
f6a91cc0 252 9: After seeing symbol char in state 3 (keep 1white after symchar)
9a7d824a 253 10: After seeing whitespace in state 9 (keep white before symchar)
a39116f1
RP
254 -1: output string in out_string and go to the state in old_state
255 -2: flush text until a '*' '/' is seen, then go to state old_state
256 */
6efd877d 257
9a7d824a
ILT
258 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
259 constructs like ``.loc 1 20''. This was turning into ``.loc
260 120''. States 9 and 10 ensure that a space is never dropped in
261 between characters which could appear in a identifier. Ian
262 Taylor, ian@cygnus.com. */
f6a91cc0 263
6efd877d
KR
264 register int ch, ch2 = 0;
265
266 switch (state)
267 {
268 case -1:
269 ch = *out_string++;
270 if (*out_string == 0)
271 {
272 state = old_state;
273 old_state = 3;
274 }
275 return ch;
276
277 case -2:
278 for (;;)
279 {
280 do
281 {
282 ch = (*get) ();
283 }
284 while (ch != EOF && ch != '\n' && ch != '*');
285 if (ch == '\n' || ch == EOF)
286 return ch;
287
288 /* At this point, ch must be a '*' */
289 while ((ch = (*get) ()) == '*')
290 {
291 ;
292 }
293 if (ch == EOF || ch == '/')
294 break;
295 (*unget) (ch);
296 }
297 state = old_state;
298 return ' ';
299
300 case 4:
301 ch = (*get) ();
302 if (ch == EOF || (ch >= '0' && ch <= '9'))
303 return ch;
304 else
305 {
306 while (ch != EOF && IS_WHITESPACE (ch))
307 ch = (*get) ();
308 if (ch == '"')
309 {
310 (*unget) (ch);
9a7d824a 311 out_string = "\n.appfile ";
6efd877d
KR
312 old_state = 7;
313 state = -1;
314 return *out_string++;
315 }
316 else
317 {
318 while (ch != EOF && ch != '\n')
319 ch = (*get) ();
320 return ch;
321 }
322 }
323
324 case 5:
325 ch = (*get) ();
326 if (lex[ch] == LEX_IS_STRINGQUOTE)
327 {
328 state = old_state;
329 return ch;
330 }
331 else if (ch == '\\')
332 {
333 state = 6;
334 return ch;
335 }
336 else if (ch == EOF)
337 {
338 as_warn ("End of file in string: inserted '\"'");
339 state = old_state;
340 (*unget) ('\n');
341 return '"';
342 }
343 else
344 {
345 return ch;
346 }
347
348 case 6:
349 state = 5;
350 ch = (*get) ();
351 switch (ch)
352 {
6d331d71
KR
353 /* Handle strings broken across lines, by turning '\n' into
354 '\\' and 'n'. */
6efd877d
KR
355 case '\n':
356 (*unget) ('n');
357 add_newlines++;
358 return '\\';
359
360 case '"':
361 case '\\':
362 case 'b':
363 case 'f':
364 case 'n':
365 case 'r':
366 case 't':
fecd2382 367#ifdef BACKSLASH_V
6efd877d 368 case 'v':
fecd2382 369#endif /* BACKSLASH_V */
6efd877d
KR
370 case '0':
371 case '1':
372 case '2':
373 case '3':
374 case '4':
375 case '5':
376 case '6':
377 case '7':
378 break;
7c2d4011 379#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
6efd877d
KR
380 default:
381 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
382 break;
fecd2382 383#else /* ONLY_STANDARD_ESCAPES */
6efd877d
KR
384 default:
385 /* Accept \x as x for any x */
386 break;
fecd2382 387#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011 388
6efd877d
KR
389 case EOF:
390 as_warn ("End of file in string: '\"' inserted");
391 return '"';
392 }
393 return ch;
394
395 case 7:
396 ch = (*get) ();
397 state = 5;
398 old_state = 8;
399 return ch;
400
401 case 8:
402 do
403 ch = (*get) ();
404 while (ch != '\n');
405 state = 0;
406 return ch;
407 }
408
9a7d824a 409 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
6efd877d
KR
410
411 /* flushchar: */
412 ch = (*get) ();
413recycle:
414 if (ch == EOF)
415 {
416 if (state != 0)
417 as_warn ("End of file not at end of a line: Newline inserted.");
418 return ch;
419 }
420
421 switch (lex[ch])
422 {
423 case LEX_IS_WHITESPACE:
424 do
425 ch = (*get) ();
426 while (ch != EOF && IS_WHITESPACE (ch));
427 if (ch == EOF)
428 return ch;
429
430 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
431 {
432 goto recycle;
fecd2382 433 }
7c2d4011 434#ifdef MRI
6efd877d
KR
435 (*unget) (ch); /* Put back */
436 return ' '; /* Always return one space at start of line */
7c2d4011 437#endif
6efd877d
KR
438
439 /* If we're in state 2, we've seen a non-white
6d331d71
KR
440 character followed by whitespace. If the next
441 character is ':', this is whitespace after a label
442 name which we can ignore. */
6efd877d
KR
443 if (state == 2 && lex[ch] == LEX_IS_COLON)
444 {
445 state = 0;
446 return ch;
447 }
448
449 switch (state)
450 {
451 case 0:
452 state++;
453 goto recycle; /* Punted leading sp */
454 case 1:
455 BAD_CASE (state); /* We can't get here */
456 case 2:
f6a91cc0 457 state = 3;
6efd877d
KR
458 (*unget) (ch);
459 return ' '; /* Sp after opco */
460 case 3:
461 goto recycle; /* Sp in operands */
9a7d824a
ILT
462 case 9:
463 case 10:
464 state = 10; /* Sp after symbol char */
465 goto recycle;
6efd877d
KR
466 default:
467 BAD_CASE (state);
468 }
469 break;
470
471 case LEX_IS_TWOCHAR_COMMENT_1ST:
472 ch2 = (*get) ();
473 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
474 {
475 for (;;)
476 {
477 do
478 {
479 ch2 = (*get) ();
480 if (ch2 != EOF && IS_NEWLINE (ch2))
481 add_newlines++;
fecd2382 482 }
6efd877d
KR
483 while (ch2 != EOF &&
484 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
485
486 while (ch2 != EOF &&
487 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
488 {
489 ch2 = (*get) ();
fecd2382 490 }
6efd877d
KR
491
492 if (ch2 == EOF
493 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382 494 break;
6efd877d
KR
495 (*unget) (ch);
496 }
497 if (ch2 == EOF)
498 as_warn ("End of file in multiline comment");
499
500 ch = ' ';
501 goto recycle;
502 }
503 else
504 {
505 if (ch2 != EOF)
506 (*unget) (ch2);
9a7d824a
ILT
507 if (state == 9 || state == 10)
508 state = 3;
6efd877d
KR
509 return ch;
510 }
511 break;
512
513 case LEX_IS_STRINGQUOTE:
9a7d824a
ILT
514 if (state == 9 || state == 10)
515 old_state = 3;
516 else
517 old_state = state;
6efd877d
KR
518 state = 5;
519 return ch;
520#ifndef MRI
a39116f1 521#ifndef IEEE_STYLE
6efd877d
KR
522 case LEX_IS_ONECHAR_QUOTE:
523 ch = (*get) ();
524 if (ch == EOF)
525 {
526 as_warn ("End-of-file after a one-character quote; \\000 inserted");
527 ch = 0;
528 }
529 if (ch == '\\')
530 {
531 ch = (*get) ();
532 ch = process_escape (ch);
533 }
534 sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011 535
6efd877d 536
9a7d824a 537 /* None of these 'x constants for us. We want 'x'. */
6efd877d
KR
538 if ((ch = (*get) ()) != '\'')
539 {
fecd2382 540#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d 541 as_warn ("Missing close quote: (assumed)");
fecd2382 542#else
6efd877d 543 (*unget) (ch);
fecd2382 544#endif
6efd877d
KR
545 }
546 if (strlen (out_buf) == 1)
547 {
548 return out_buf[0];
549 }
9a7d824a
ILT
550 if (state == 9 || state == 10)
551 old_state = 3;
552 else
553 old_state = state;
6efd877d
KR
554 state = -1;
555 out_string = out_buf;
556 return *out_string++;
7c2d4011 557#endif
a39116f1 558#endif
6efd877d 559 case LEX_IS_COLON:
9a7d824a
ILT
560 if (state == 9 || state == 10)
561 state = 3;
562 else if (state != 3)
6efd877d
KR
563 state = 0;
564 return ch;
565
566 case LEX_IS_NEWLINE:
567 /* Roll out a bunch of newlines from inside comments, etc. */
568 if (add_newlines)
569 {
570 --add_newlines;
571 (*unget) (ch);
572 }
573 /* fall thru into... */
574
575 case LEX_IS_LINE_SEPARATOR:
576 state = 0;
577 return ch;
578
579 case LEX_IS_LINE_COMMENT_START:
9a7d824a 580 if (state == 0) /* Only comment at start of line. */
6efd877d 581 {
9a7d824a
ILT
582 /* FIXME-someday: The two character comment stuff was badly
583 thought out. On i386, we want '/' as line comment start
584 AND we want C style comments. hence this hack. The
585 whole lexical process should be reworked. xoxorich. */
586 if (ch == '/')
f6a91cc0 587 {
9a7d824a
ILT
588 ch2 = (*get) ();
589 if (ch2 == '*')
590 {
591 state = -2;
592 return (do_scrub_next_char (get, unget));
593 }
594 else
595 {
596 (*unget) (ch2);
597 }
598 } /* bad hack */
6efd877d 599
9a7d824a 600 do
6efd877d 601 ch = (*get) ();
9a7d824a 602 while (ch != EOF && IS_WHITESPACE (ch));
6efd877d 603 if (ch == EOF)
9a7d824a
ILT
604 {
605 as_warn ("EOF in comment: Newline inserted");
606 return '\n';
607 }
608 if (ch < '0' || ch > '9')
609 {
610 /* Non-numerics: Eat whole comment line */
611 while (ch != EOF && !IS_NEWLINE (ch))
612 ch = (*get) ();
613 if (ch == EOF)
614 as_warn ("EOF in Comment: Newline inserted");
615 state = 0;
616 return '\n';
617 }
618 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
619 (*unget) (ch);
620 old_state = 4;
621 state = -1;
622 out_string = ".appline ";
623 return *out_string++;
6efd877d 624 }
6efd877d 625
9a7d824a
ILT
626 /* We have a line comment character which is not at the start of
627 a line. If this is also a normal comment character, fall
628 through. Otherwise treat it as a default character. */
629 if (strchr (comment_chars, ch) == NULL)
630 goto de_fault;
631 /* Fall through. */
6efd877d
KR
632 case LEX_IS_COMMENT_START:
633 do
634 ch = (*get) ();
635 while (ch != EOF && !IS_NEWLINE (ch));
636 if (ch == EOF)
637 as_warn ("EOF in comment: Newline inserted");
638 state = 0;
639 return '\n';
640
f6a91cc0 641 case LEX_IS_SYMBOL_COMPONENT:
9a7d824a
ILT
642 if (state == 10)
643 {
644 /* This is a symbol character following another symbol
645 character, with whitespace in between. We skipped the
646 whitespace earlier, so output it now. */
647 (*unget) (ch);
648 state = 3;
649 return ' ';
650 }
f6a91cc0
ILT
651 if (state == 3)
652 state = 9;
653 /* Fall through. */
6efd877d
KR
654 default:
655 de_fault:
656 /* Some relatively `normal' character. */
657 if (state == 0)
658 {
659 state = 2; /* Now seeing opcode */
660 return ch;
fecd2382 661 }
6efd877d
KR
662 else if (state == 1)
663 {
664 state = 2; /* Ditto */
665 return ch;
666 }
f6a91cc0
ILT
667 else if (state == 9)
668 {
669 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
670 state = 3;
671 return ch;
672 }
9a7d824a
ILT
673 else if (state == 10)
674 {
675 state = 3;
676 return ch;
677 }
6efd877d
KR
678 else
679 {
680 return ch; /* Opcode or operands already */
681 }
682 }
683 return -1;
fecd2382
RP
684}
685
686#ifdef TEST
687
6efd877d
KR
688const char comment_chars[] = "|";
689const char line_comment_chars[] = "#";
fecd2382 690
6efd877d 691main ()
fecd2382 692{
6efd877d
KR
693 int ch;
694
695 app_begin ();
696 while ((ch = do_scrub_next_char (stdin)) != EOF)
697 putc (ch, stdout);
fecd2382
RP
698}
699
6efd877d
KR
700as_warn (str)
701 char *str;
fecd2382 702{
6efd877d
KR
703 fputs (str, stderr);
704 putc ('\n', stderr);
fecd2382 705}
6efd877d 706
fecd2382
RP
707#endif
708
fecd2382 709/* end of app.c */
This page took 0.181322 seconds and 4 git commands to generate.