hppa configuration
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
3340f7e5 1/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
6efd877d 2
a39116f1
RP
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
fecd2382
RP
5/* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
6efd877d 7
a39116f1 8 This file is part of GAS, the GNU Assembler.
6efd877d 9
a39116f1
RP
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
6efd877d 14
a39116f1
RP
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
6efd877d 19
a39116f1
RP
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382
RP
23
24/* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
be06bdcd
SC
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename>
27 pair. This needs better error-handling.
a39116f1 28 */
fecd2382
RP
29
30#include <stdio.h>
6efd877d 31#include "as.h" /* For BAD_CASE() only */
fecd2382 32
3340f7e5 33#if (__STDC__ != 1) && !defined(const)
6efd877d 34#define const /* Nothing */
fecd2382
RP
35#endif
36
6efd877d 37static char lex[256];
6d331d71 38static const char symbol_chars[] =
6efd877d 39"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382
RP
40
41#define LEX_IS_SYMBOL_COMPONENT 1
42#define LEX_IS_WHITESPACE 2
43#define LEX_IS_LINE_SEPARATOR 3
44#define LEX_IS_COMMENT_START 4
45#define LEX_IS_LINE_COMMENT_START 5
46#define LEX_IS_TWOCHAR_COMMENT_1ST 6
47#define LEX_IS_TWOCHAR_COMMENT_2ND 7
48#define LEX_IS_STRINGQUOTE 8
49#define LEX_IS_COLON 9
50#define LEX_IS_NEWLINE 10
51#define LEX_IS_ONECHAR_QUOTE 11
a39116f1
RP
52#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
53#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
54#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
55#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
56#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
57#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
58
59/* FIXME-soon: The entire lexer/parser thingy should be
60 built statically at compile time rather than dynamically
61 each and every time the assembler is run. xoxorich. */
fecd2382 62
6efd877d
KR
63void
64do_scrub_begin ()
65{
66 const char *p;
67
68 lex[' '] = LEX_IS_WHITESPACE;
69 lex['\t'] = LEX_IS_WHITESPACE;
70 lex['\n'] = LEX_IS_NEWLINE;
71 lex[';'] = LEX_IS_LINE_SEPARATOR;
72 lex['"'] = LEX_IS_STRINGQUOTE;
73 lex['\''] = LEX_IS_ONECHAR_QUOTE;
74 lex[':'] = LEX_IS_COLON;
7c2d4011 75
be06bdcd
SC
76
77
78#ifdef SINGLE_QUOTE_STRINGS
79 lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011 80#endif
be06bdcd 81
6efd877d 82 /* Note that these override the previous defaults, e.g. if ';'
be06bdcd 83
fecd2382 84 is a comment char, then it isn't a line separator. */
6efd877d
KR
85 for (p = symbol_chars; *p; ++p)
86 {
87 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
88 } /* declare symbol characters */
89
90 for (p = line_comment_chars; *p; p++)
91 {
92 lex[*p] = LEX_IS_LINE_COMMENT_START;
93 } /* declare line comment chars */
94
95 for (p = comment_chars; *p; p++)
96 {
97 lex[*p] = LEX_IS_COMMENT_START;
98 } /* declare comment chars */
99
100 for (p = line_separator_chars; *p; p++)
101 {
102 lex[*p] = LEX_IS_LINE_SEPARATOR;
103 } /* declare line separators */
104
105 /* Only allow slash-star comments if slash is not in use */
106 if (lex['/'] == 0)
107 {
108 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
109 }
110 /* FIXME-soon. This is a bad hack but otherwise, we
a39116f1
RP
111 can't do c-style comments when '/' is a line
112 comment char. xoxorich. */
6efd877d
KR
113 if (lex['*'] == 0)
114 {
115 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
116 }
117} /* do_scrub_begin() */
fecd2382
RP
118
119FILE *scrub_file;
120
6efd877d
KR
121int
122scrub_from_file ()
123{
124 return getc (scrub_file);
fecd2382
RP
125}
126
6efd877d
KR
127void
128scrub_to_file (ch)
129 int ch;
fecd2382 130{
6efd877d
KR
131 ungetc (ch, scrub_file);
132} /* scrub_to_file() */
fecd2382
RP
133
134char *scrub_string;
135char *scrub_last_string;
136
6efd877d
KR
137int
138scrub_from_string ()
139{
140 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
141} /* scrub_from_string() */
fecd2382 142
6efd877d
KR
143void
144scrub_to_string (ch)
145 int ch;
fecd2382 146{
6efd877d
KR
147 *--scrub_string = ch;
148} /* scrub_to_string() */
fecd2382
RP
149
150/* Saved state of the scrubber */
151static int state;
152static int old_state;
153static char *out_string;
154static char out_buf[20];
155static int add_newlines = 0;
156
157/* Data structure for saving the state of app across #include's. Note that
158 app is called asynchronously to the parsing of the .include's, so our
159 state at the time .include is interpreted is completely unrelated.
160 That's why we have to save it all. */
161
6efd877d
KR
162struct app_save
163 {
164 int state;
165 int old_state;
166 char *out_string;
167 char out_buf[sizeof (out_buf)];
168 int add_newlines;
169 char *scrub_string;
170 char *scrub_last_string;
171 FILE *scrub_file;
172 };
173
174char *
175app_push ()
176{
7c2d4011
SC
177 register struct app_save *saved;
178
6efd877d
KR
179 saved = (struct app_save *) xmalloc (sizeof (*saved));
180 saved->state = state;
181 saved->old_state = old_state;
182 saved->out_string = out_string;
6d331d71 183 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
6efd877d
KR
184 saved->add_newlines = add_newlines;
185 saved->scrub_string = scrub_string;
7c2d4011 186 saved->scrub_last_string = scrub_last_string;
6efd877d 187 saved->scrub_file = scrub_file;
7c2d4011
SC
188
189 /* do_scrub_begin() is not useful, just wastes time. */
6efd877d 190 return (char *) saved;
fecd2382
RP
191}
192
6efd877d
KR
193void
194app_pop (arg)
195 char *arg;
fecd2382 196{
6efd877d
KR
197 register struct app_save *saved = (struct app_save *) arg;
198
199 /* There is no do_scrub_end (). */
200 state = saved->state;
201 old_state = saved->old_state;
202 out_string = saved->out_string;
203 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
204 add_newlines = saved->add_newlines;
205 scrub_string = saved->scrub_string;
206 scrub_last_string = saved->scrub_last_string;
207 scrub_file = saved->scrub_file;
208
209 free (arg);
210} /* app_pop() */
211
6d331d71
KR
212/* @@ This assumes that \n &c are the same on host and target. This is not
213 necessarily true. */
6efd877d
KR
214int
215process_escape (ch)
216 char ch;
7c2d4011 217{
6efd877d
KR
218 switch (ch)
219 {
220 case 'b':
221 return '\b';
222 case 'f':
223 return '\f';
224 case 'n':
225 return '\n';
226 case 'r':
227 return '\r';
228 case 't':
229 return '\t';
230 case '\'':
231 return '\'';
232 case '"':
6d331d71 233 return '\"';
6efd877d
KR
234 default:
235 return ch;
236 }
7c2d4011 237}
6efd877d
KR
238int
239do_scrub_next_char (get, unget)
240 int (*get) ();
241 void (*unget) ();
fecd2382 242{
6efd877d 243 /*State 0: beginning of normal line
a39116f1
RP
244 1: After first whitespace on line (flush more white)
245 2: After first non-white (opcode) on line (keep 1white)
246 3: after second white on line (into operands) (flush white)
247 4: after putting out a .line, put out digits
248 5: parsing a string, then go to old-state
249 6: putting out \ escape in a "d string.
250 7: After putting out a .app-file, put out string.
251 8: After putting out a .app-file string, flush until newline.
f6a91cc0 252 9: After seeing symbol char in state 3 (keep 1white after symchar)
a39116f1
RP
253 -1: output string in out_string and go to the state in old_state
254 -2: flush text until a '*' '/' is seen, then go to state old_state
255 */
6efd877d 256
f6a91cc0
ILT
257 /* I added state 9 because the MIPS ECOFF assembler uses constructs
258 like ``.loc 1 20''. This was turning into ``.loc 120''. State 9
259 ensures that a space is never dropped immediately following a
260 character which could appear in a identifier. It is still
261 dropped following a comma, so this has no effect for most
262 assemblers. I hope. Ian Taylor, ian@cygnus.com. */
263
6efd877d
KR
264 register int ch, ch2 = 0;
265
266 switch (state)
267 {
268 case -1:
269 ch = *out_string++;
270 if (*out_string == 0)
271 {
272 state = old_state;
273 old_state = 3;
274 }
275 return ch;
276
277 case -2:
278 for (;;)
279 {
280 do
281 {
282 ch = (*get) ();
283 }
284 while (ch != EOF && ch != '\n' && ch != '*');
285 if (ch == '\n' || ch == EOF)
286 return ch;
287
288 /* At this point, ch must be a '*' */
289 while ((ch = (*get) ()) == '*')
290 {
291 ;
292 }
293 if (ch == EOF || ch == '/')
294 break;
295 (*unget) (ch);
296 }
297 state = old_state;
298 return ' ';
299
300 case 4:
301 ch = (*get) ();
302 if (ch == EOF || (ch >= '0' && ch <= '9'))
303 return ch;
304 else
305 {
306 while (ch != EOF && IS_WHITESPACE (ch))
307 ch = (*get) ();
308 if (ch == '"')
309 {
310 (*unget) (ch);
311 out_string = "\n.app-file ";
312 old_state = 7;
313 state = -1;
314 return *out_string++;
315 }
316 else
317 {
318 while (ch != EOF && ch != '\n')
319 ch = (*get) ();
320 return ch;
321 }
322 }
323
324 case 5:
325 ch = (*get) ();
326 if (lex[ch] == LEX_IS_STRINGQUOTE)
327 {
328 state = old_state;
329 return ch;
330 }
331 else if (ch == '\\')
332 {
333 state = 6;
334 return ch;
335 }
336 else if (ch == EOF)
337 {
338 as_warn ("End of file in string: inserted '\"'");
339 state = old_state;
340 (*unget) ('\n');
341 return '"';
342 }
343 else
344 {
345 return ch;
346 }
347
348 case 6:
349 state = 5;
350 ch = (*get) ();
351 switch (ch)
352 {
6d331d71
KR
353 /* Handle strings broken across lines, by turning '\n' into
354 '\\' and 'n'. */
6efd877d
KR
355 case '\n':
356 (*unget) ('n');
357 add_newlines++;
358 return '\\';
359
360 case '"':
361 case '\\':
362 case 'b':
363 case 'f':
364 case 'n':
365 case 'r':
366 case 't':
fecd2382 367#ifdef BACKSLASH_V
6efd877d 368 case 'v':
fecd2382 369#endif /* BACKSLASH_V */
6efd877d
KR
370 case '0':
371 case '1':
372 case '2':
373 case '3':
374 case '4':
375 case '5':
376 case '6':
377 case '7':
378 break;
7c2d4011 379#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
6efd877d
KR
380 default:
381 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
382 break;
fecd2382 383#else /* ONLY_STANDARD_ESCAPES */
6efd877d
KR
384 default:
385 /* Accept \x as x for any x */
386 break;
fecd2382 387#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011 388
6efd877d
KR
389 case EOF:
390 as_warn ("End of file in string: '\"' inserted");
391 return '"';
392 }
393 return ch;
394
395 case 7:
396 ch = (*get) ();
397 state = 5;
398 old_state = 8;
399 return ch;
400
401 case 8:
402 do
403 ch = (*get) ();
404 while (ch != '\n');
405 state = 0;
406 return ch;
407 }
408
f6a91cc0 409 /* OK, we are somewhere in states 0 through 4 or 9 */
6efd877d
KR
410
411 /* flushchar: */
412 ch = (*get) ();
413recycle:
414 if (ch == EOF)
415 {
416 if (state != 0)
417 as_warn ("End of file not at end of a line: Newline inserted.");
418 return ch;
419 }
420
421 switch (lex[ch])
422 {
423 case LEX_IS_WHITESPACE:
424 do
425 ch = (*get) ();
426 while (ch != EOF && IS_WHITESPACE (ch));
427 if (ch == EOF)
428 return ch;
429
430 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
431 {
432 goto recycle;
fecd2382 433 }
7c2d4011 434#ifdef MRI
6efd877d
KR
435 (*unget) (ch); /* Put back */
436 return ' '; /* Always return one space at start of line */
7c2d4011 437#endif
6efd877d
KR
438
439 /* If we're in state 2, we've seen a non-white
6d331d71
KR
440 character followed by whitespace. If the next
441 character is ':', this is whitespace after a label
442 name which we can ignore. */
6efd877d
KR
443 if (state == 2 && lex[ch] == LEX_IS_COLON)
444 {
445 state = 0;
446 return ch;
447 }
448
449 switch (state)
450 {
451 case 0:
452 state++;
453 goto recycle; /* Punted leading sp */
454 case 1:
455 BAD_CASE (state); /* We can't get here */
456 case 2:
f6a91cc0
ILT
457 case 9:
458 state = 3;
6efd877d
KR
459 (*unget) (ch);
460 return ' '; /* Sp after opco */
461 case 3:
462 goto recycle; /* Sp in operands */
463 default:
464 BAD_CASE (state);
465 }
466 break;
467
468 case LEX_IS_TWOCHAR_COMMENT_1ST:
469 ch2 = (*get) ();
470 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
471 {
472 for (;;)
473 {
474 do
475 {
476 ch2 = (*get) ();
477 if (ch2 != EOF && IS_NEWLINE (ch2))
478 add_newlines++;
fecd2382 479 }
6efd877d
KR
480 while (ch2 != EOF &&
481 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
482
483 while (ch2 != EOF &&
484 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
485 {
486 ch2 = (*get) ();
fecd2382 487 }
6efd877d
KR
488
489 if (ch2 == EOF
490 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382 491 break;
6efd877d
KR
492 (*unget) (ch);
493 }
494 if (ch2 == EOF)
495 as_warn ("End of file in multiline comment");
496
497 ch = ' ';
498 goto recycle;
499 }
500 else
501 {
502 if (ch2 != EOF)
503 (*unget) (ch2);
504 return ch;
505 }
506 break;
507
508 case LEX_IS_STRINGQUOTE:
509 old_state = state;
510 state = 5;
511 return ch;
512#ifndef MRI
a39116f1 513#ifndef IEEE_STYLE
6efd877d
KR
514 case LEX_IS_ONECHAR_QUOTE:
515 ch = (*get) ();
516 if (ch == EOF)
517 {
518 as_warn ("End-of-file after a one-character quote; \\000 inserted");
519 ch = 0;
520 }
521 if (ch == '\\')
522 {
523 ch = (*get) ();
524 ch = process_escape (ch);
525 }
526 sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011 527
6efd877d
KR
528
529 /* None of these 'x constants for us. We want 'x'.
fecd2382 530 */
6efd877d
KR
531 if ((ch = (*get) ()) != '\'')
532 {
fecd2382 533#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d 534 as_warn ("Missing close quote: (assumed)");
fecd2382 535#else
6efd877d 536 (*unget) (ch);
fecd2382 537#endif
6efd877d
KR
538 }
539 if (strlen (out_buf) == 1)
540 {
541 return out_buf[0];
542 }
543 old_state = state;
544 state = -1;
545 out_string = out_buf;
546 return *out_string++;
7c2d4011 547#endif
a39116f1 548#endif
6efd877d
KR
549 case LEX_IS_COLON:
550 if (state != 3)
551 state = 0;
552 return ch;
553
554 case LEX_IS_NEWLINE:
555 /* Roll out a bunch of newlines from inside comments, etc. */
556 if (add_newlines)
557 {
558 --add_newlines;
559 (*unget) (ch);
560 }
561 /* fall thru into... */
562
563 case LEX_IS_LINE_SEPARATOR:
564 state = 0;
565 return ch;
566
567 case LEX_IS_LINE_COMMENT_START:
568 if (state != 0) /* Not at start of line, act normal */
569 goto de_fault;
570
571 /* FIXME-someday: The two character comment stuff was badly
f6a91cc0
ILT
572 thought out. On i386, we want '/' as line comment start AND
573 we want C style comments. hence this hack. The whole
574 lexical process should be reworked. xoxorich. */
7c2d4011 575
f6a91cc0 576 if (ch == '/')
6efd877d 577 {
f6a91cc0
ILT
578 ch2 = (*get) ();
579 if (ch2 == '*')
580 {
581 state = -2;
582 return (do_scrub_next_char (get, unget));
583 }
584 else
585 {
586 (*unget) (ch2);
587 }
6efd877d
KR
588 } /* bad hack */
589
590 do
591 ch = (*get) ();
592 while (ch != EOF && IS_WHITESPACE (ch));
593 if (ch == EOF)
594 {
595 as_warn ("EOF in comment: Newline inserted");
596 return '\n';
597 }
598 if (ch < '0' || ch > '9')
599 {
600 /* Non-numerics: Eat whole comment line */
601 while (ch != EOF && !IS_NEWLINE (ch))
602 ch = (*get) ();
603 if (ch == EOF)
604 as_warn ("EOF in Comment: Newline inserted");
605 state = 0;
606 return '\n';
607 }
608 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
609 (*unget) (ch);
610 old_state = 4;
611 state = -1;
612 out_string = ".line ";
613 return *out_string++;
614
615 case LEX_IS_COMMENT_START:
616 do
617 ch = (*get) ();
618 while (ch != EOF && !IS_NEWLINE (ch));
619 if (ch == EOF)
620 as_warn ("EOF in comment: Newline inserted");
621 state = 0;
622 return '\n';
623
f6a91cc0
ILT
624 case LEX_IS_SYMBOL_COMPONENT:
625 if (state == 3)
626 state = 9;
627 /* Fall through. */
6efd877d
KR
628 default:
629 de_fault:
630 /* Some relatively `normal' character. */
631 if (state == 0)
632 {
633 state = 2; /* Now seeing opcode */
634 return ch;
fecd2382 635 }
6efd877d
KR
636 else if (state == 1)
637 {
638 state = 2; /* Ditto */
639 return ch;
640 }
f6a91cc0
ILT
641 else if (state == 9)
642 {
643 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
644 state = 3;
645 return ch;
646 }
6efd877d
KR
647 else
648 {
649 return ch; /* Opcode or operands already */
650 }
651 }
652 return -1;
fecd2382
RP
653}
654
655#ifdef TEST
656
6efd877d
KR
657const char comment_chars[] = "|";
658const char line_comment_chars[] = "#";
fecd2382 659
6efd877d 660main ()
fecd2382 661{
6efd877d
KR
662 int ch;
663
664 app_begin ();
665 while ((ch = do_scrub_next_char (stdin)) != EOF)
666 putc (ch, stdout);
fecd2382
RP
667}
668
6efd877d
KR
669as_warn (str)
670 char *str;
fecd2382 671{
6efd877d
KR
672 fputs (str, stderr);
673 putc ('\n', stderr);
fecd2382 674}
6efd877d 675
fecd2382
RP
676#endif
677
678/*
679 * Local Variables:
680 * comment-column: 0
681 * fill-column: 131
682 * End:
683 */
684
685/* end of app.c */
This page took 0.094464 seconds and 4 git commands to generate.