Ran "indent", for GNU coding style; some code & comments still need fixup.
[deliverable/binutils-gdb.git] / gas / app.c
1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
2
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
7
8 This file is part of GAS, the GNU Assembler.
9
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
23
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
27 This needs better error-handling.
28 */
29
30 #include <stdio.h>
31 #include "as.h" /* For BAD_CASE() only */
32 #include "read.h"
33
34 #if (__STDC__ != 1) && !defined(const)
35 #define const /* Nothing */
36 #endif
37
38 static char lex[256];
39 static char symbol_chars[] =
40 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
41
42 #define LEX_IS_SYMBOL_COMPONENT 1
43 #define LEX_IS_WHITESPACE 2
44 #define LEX_IS_LINE_SEPARATOR 3
45 #define LEX_IS_COMMENT_START 4
46 #define LEX_IS_LINE_COMMENT_START 5
47 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
48 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
49 #define LEX_IS_STRINGQUOTE 8
50 #define LEX_IS_COLON 9
51 #define LEX_IS_NEWLINE 10
52 #define LEX_IS_ONECHAR_QUOTE 11
53 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
54 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
55 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
56 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
57 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
58 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
59
60 /* FIXME-soon: The entire lexer/parser thingy should be
61 built statically at compile time rather than dynamically
62 each and every time the assembler is run. xoxorich. */
63
64 void
65 do_scrub_begin ()
66 {
67 const char *p;
68
69 lex[' '] = LEX_IS_WHITESPACE;
70 lex['\t'] = LEX_IS_WHITESPACE;
71 lex['\n'] = LEX_IS_NEWLINE;
72 lex[';'] = LEX_IS_LINE_SEPARATOR;
73 lex['"'] = LEX_IS_STRINGQUOTE;
74 lex['\''] = LEX_IS_ONECHAR_QUOTE;
75 lex[':'] = LEX_IS_COLON;
76
77 #ifdef MRI
78 lex['\''] = LEX_IS_STRINGQUOTE;
79 #endif
80 /* Note that these override the previous defaults, e.g. if ';'
81 is a comment char, then it isn't a line separator. */
82 for (p = symbol_chars; *p; ++p)
83 {
84 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
85 } /* declare symbol characters */
86
87 for (p = line_comment_chars; *p; p++)
88 {
89 lex[*p] = LEX_IS_LINE_COMMENT_START;
90 } /* declare line comment chars */
91
92 for (p = comment_chars; *p; p++)
93 {
94 lex[*p] = LEX_IS_COMMENT_START;
95 } /* declare comment chars */
96
97 for (p = line_separator_chars; *p; p++)
98 {
99 lex[*p] = LEX_IS_LINE_SEPARATOR;
100 } /* declare line separators */
101
102 /* Only allow slash-star comments if slash is not in use */
103 if (lex['/'] == 0)
104 {
105 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
106 }
107 /* FIXME-soon. This is a bad hack but otherwise, we
108 can't do c-style comments when '/' is a line
109 comment char. xoxorich. */
110 if (lex['*'] == 0)
111 {
112 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
113 }
114 } /* do_scrub_begin() */
115
116 FILE *scrub_file;
117
118 int
119 scrub_from_file ()
120 {
121 return getc (scrub_file);
122 }
123
124 void
125 scrub_to_file (ch)
126 int ch;
127 {
128 ungetc (ch, scrub_file);
129 } /* scrub_to_file() */
130
131 char *scrub_string;
132 char *scrub_last_string;
133
134 int
135 scrub_from_string ()
136 {
137 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
138 } /* scrub_from_string() */
139
140 void
141 scrub_to_string (ch)
142 int ch;
143 {
144 *--scrub_string = ch;
145 } /* scrub_to_string() */
146
147 /* Saved state of the scrubber */
148 static int state;
149 static int old_state;
150 static char *out_string;
151 static char out_buf[20];
152 static int add_newlines = 0;
153
154 /* Data structure for saving the state of app across #include's. Note that
155 app is called asynchronously to the parsing of the .include's, so our
156 state at the time .include is interpreted is completely unrelated.
157 That's why we have to save it all. */
158
159 struct app_save
160 {
161 int state;
162 int old_state;
163 char *out_string;
164 char out_buf[sizeof (out_buf)];
165 int add_newlines;
166 char *scrub_string;
167 char *scrub_last_string;
168 FILE *scrub_file;
169 };
170
171 char *
172 app_push ()
173 {
174 register struct app_save *saved;
175
176 saved = (struct app_save *) xmalloc (sizeof (*saved));
177 saved->state = state;
178 saved->old_state = old_state;
179 saved->out_string = out_string;
180 bcopy (saved->out_buf, out_buf, sizeof (out_buf));
181 saved->add_newlines = add_newlines;
182 saved->scrub_string = scrub_string;
183 saved->scrub_last_string = scrub_last_string;
184 saved->scrub_file = scrub_file;
185
186 /* do_scrub_begin() is not useful, just wastes time. */
187 return (char *) saved;
188 }
189
190 void
191 app_pop (arg)
192 char *arg;
193 {
194 register struct app_save *saved = (struct app_save *) arg;
195
196 /* There is no do_scrub_end (). */
197 state = saved->state;
198 old_state = saved->old_state;
199 out_string = saved->out_string;
200 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
201 add_newlines = saved->add_newlines;
202 scrub_string = saved->scrub_string;
203 scrub_last_string = saved->scrub_last_string;
204 scrub_file = saved->scrub_file;
205
206 free (arg);
207 } /* app_pop() */
208
209 int
210 process_escape (ch)
211 char ch;
212 {
213 switch (ch)
214 {
215 case 'b':
216 return '\b';
217 case 'f':
218 return '\f';
219 case 'n':
220 return '\n';
221 case 'r':
222 return '\r';
223 case 't':
224 return '\t';
225 case '\'':
226 return '\'';
227 case '"':
228 return '\'';
229 default:
230 return ch;
231 }
232 }
233 int
234 do_scrub_next_char (get, unget)
235 int (*get) ();
236 void (*unget) ();
237 {
238 /*State 0: beginning of normal line
239 1: After first whitespace on line (flush more white)
240 2: After first non-white (opcode) on line (keep 1white)
241 3: after second white on line (into operands) (flush white)
242 4: after putting out a .line, put out digits
243 5: parsing a string, then go to old-state
244 6: putting out \ escape in a "d string.
245 7: After putting out a .app-file, put out string.
246 8: After putting out a .app-file string, flush until newline.
247 -1: output string in out_string and go to the state in old_state
248 -2: flush text until a '*' '/' is seen, then go to state old_state
249 */
250
251 register int ch, ch2 = 0;
252
253 switch (state)
254 {
255 case -1:
256 ch = *out_string++;
257 if (*out_string == 0)
258 {
259 state = old_state;
260 old_state = 3;
261 }
262 return ch;
263
264 case -2:
265 for (;;)
266 {
267 do
268 {
269 ch = (*get) ();
270 }
271 while (ch != EOF && ch != '\n' && ch != '*');
272 if (ch == '\n' || ch == EOF)
273 return ch;
274
275 /* At this point, ch must be a '*' */
276 while ((ch = (*get) ()) == '*')
277 {
278 ;
279 }
280 if (ch == EOF || ch == '/')
281 break;
282 (*unget) (ch);
283 }
284 state = old_state;
285 return ' ';
286
287 case 4:
288 ch = (*get) ();
289 if (ch == EOF || (ch >= '0' && ch <= '9'))
290 return ch;
291 else
292 {
293 while (ch != EOF && IS_WHITESPACE (ch))
294 ch = (*get) ();
295 if (ch == '"')
296 {
297 (*unget) (ch);
298 out_string = "\n.app-file ";
299 old_state = 7;
300 state = -1;
301 return *out_string++;
302 }
303 else
304 {
305 while (ch != EOF && ch != '\n')
306 ch = (*get) ();
307 return ch;
308 }
309 }
310
311 case 5:
312 ch = (*get) ();
313 if (lex[ch] == LEX_IS_STRINGQUOTE)
314 {
315 state = old_state;
316 return ch;
317 }
318 else if (ch == '\\')
319 {
320 state = 6;
321 return ch;
322 }
323 else if (ch == EOF)
324 {
325 as_warn ("End of file in string: inserted '\"'");
326 state = old_state;
327 (*unget) ('\n');
328 return '"';
329 }
330 else
331 {
332 return ch;
333 }
334
335 case 6:
336 state = 5;
337 ch = (*get) ();
338 switch (ch)
339 {
340 /* This is neet. Turn "string
341 more string" into "string\n more string"
342 */
343 case '\n':
344 (*unget) ('n');
345 add_newlines++;
346 return '\\';
347
348 case '"':
349 case '\\':
350 case 'b':
351 case 'f':
352 case 'n':
353 case 'r':
354 case 't':
355 #ifdef BACKSLASH_V
356 case 'v':
357 #endif /* BACKSLASH_V */
358 case '0':
359 case '1':
360 case '2':
361 case '3':
362 case '4':
363 case '5':
364 case '6':
365 case '7':
366 break;
367 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
368 default:
369 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
370 break;
371 #else /* ONLY_STANDARD_ESCAPES */
372 default:
373 /* Accept \x as x for any x */
374 break;
375 #endif /* ONLY_STANDARD_ESCAPES */
376
377 case EOF:
378 as_warn ("End of file in string: '\"' inserted");
379 return '"';
380 }
381 return ch;
382
383 case 7:
384 ch = (*get) ();
385 state = 5;
386 old_state = 8;
387 return ch;
388
389 case 8:
390 do
391 ch = (*get) ();
392 while (ch != '\n');
393 state = 0;
394 return ch;
395 }
396
397 /* OK, we are somewhere in states 0 through 4 */
398
399 /* flushchar: */
400 ch = (*get) ();
401 recycle:
402 if (ch == EOF)
403 {
404 if (state != 0)
405 as_warn ("End of file not at end of a line: Newline inserted.");
406 return ch;
407 }
408
409 switch (lex[ch])
410 {
411 case LEX_IS_WHITESPACE:
412 do
413 ch = (*get) ();
414 while (ch != EOF && IS_WHITESPACE (ch));
415 if (ch == EOF)
416 return ch;
417
418 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
419 {
420 goto recycle;
421 }
422 #ifdef MRI
423 (*unget) (ch); /* Put back */
424 return ' '; /* Always return one space at start of line */
425 #endif
426
427 /* If we're in state 2, we've seen a non-white
428 character followed by whitespace. If the next
429 character is ':', this is whitespace after a label
430 name which we can ignore. */
431 if (state == 2 && lex[ch] == LEX_IS_COLON)
432 {
433 state = 0;
434 return ch;
435 }
436
437 switch (state)
438 {
439 case 0:
440 state++;
441 goto recycle; /* Punted leading sp */
442 case 1:
443 BAD_CASE (state); /* We can't get here */
444 case 2:
445 state++;
446 (*unget) (ch);
447 return ' '; /* Sp after opco */
448 case 3:
449 goto recycle; /* Sp in operands */
450 default:
451 BAD_CASE (state);
452 }
453 break;
454
455 case LEX_IS_TWOCHAR_COMMENT_1ST:
456 ch2 = (*get) ();
457 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
458 {
459 for (;;)
460 {
461 do
462 {
463 ch2 = (*get) ();
464 if (ch2 != EOF && IS_NEWLINE (ch2))
465 add_newlines++;
466 }
467 while (ch2 != EOF &&
468 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
469
470 while (ch2 != EOF &&
471 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
472 {
473 ch2 = (*get) ();
474 }
475
476 if (ch2 == EOF
477 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
478 break;
479 (*unget) (ch);
480 }
481 if (ch2 == EOF)
482 as_warn ("End of file in multiline comment");
483
484 ch = ' ';
485 goto recycle;
486 }
487 else
488 {
489 if (ch2 != EOF)
490 (*unget) (ch2);
491 return ch;
492 }
493 break;
494
495 case LEX_IS_STRINGQUOTE:
496 old_state = state;
497 state = 5;
498 return ch;
499 #ifndef MRI
500 #ifndef IEEE_STYLE
501 case LEX_IS_ONECHAR_QUOTE:
502 ch = (*get) ();
503 if (ch == EOF)
504 {
505 as_warn ("End-of-file after a one-character quote; \\000 inserted");
506 ch = 0;
507 }
508 if (ch == '\\')
509 {
510 ch = (*get) ();
511 ch = process_escape (ch);
512 }
513 sprintf (out_buf, "%d", (int) (unsigned char) ch);
514
515
516 /* None of these 'x constants for us. We want 'x'.
517 */
518 if ((ch = (*get) ()) != '\'')
519 {
520 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
521 as_warn ("Missing close quote: (assumed)");
522 #else
523 (*unget) (ch);
524 #endif
525 }
526 if (strlen (out_buf) == 1)
527 {
528 return out_buf[0];
529 }
530 old_state = state;
531 state = -1;
532 out_string = out_buf;
533 return *out_string++;
534 #endif
535 #endif
536 case LEX_IS_COLON:
537 if (state != 3)
538 state = 0;
539 return ch;
540
541 case LEX_IS_NEWLINE:
542 /* Roll out a bunch of newlines from inside comments, etc. */
543 if (add_newlines)
544 {
545 --add_newlines;
546 (*unget) (ch);
547 }
548 /* fall thru into... */
549
550 case LEX_IS_LINE_SEPARATOR:
551 state = 0;
552 return ch;
553
554 case LEX_IS_LINE_COMMENT_START:
555 if (state != 0) /* Not at start of line, act normal */
556 goto de_fault;
557
558 /* FIXME-someday: The two character comment stuff was badly
559 thought out. On i386, we want '/' as line comment start
560 AND we want C style comments. hence this hack. The
561 whole lexical process should be reworked. xoxorich. */
562
563 if (ch == '/' && (ch2 = (*get) ()) == '*')
564 {
565 state = -2;
566 return (do_scrub_next_char (get, unget));
567 }
568 else
569 {
570 (*unget) (ch2);
571 } /* bad hack */
572
573 do
574 ch = (*get) ();
575 while (ch != EOF && IS_WHITESPACE (ch));
576 if (ch == EOF)
577 {
578 as_warn ("EOF in comment: Newline inserted");
579 return '\n';
580 }
581 if (ch < '0' || ch > '9')
582 {
583 /* Non-numerics: Eat whole comment line */
584 while (ch != EOF && !IS_NEWLINE (ch))
585 ch = (*get) ();
586 if (ch == EOF)
587 as_warn ("EOF in Comment: Newline inserted");
588 state = 0;
589 return '\n';
590 }
591 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
592 (*unget) (ch);
593 old_state = 4;
594 state = -1;
595 out_string = ".line ";
596 return *out_string++;
597
598 case LEX_IS_COMMENT_START:
599 do
600 ch = (*get) ();
601 while (ch != EOF && !IS_NEWLINE (ch));
602 if (ch == EOF)
603 as_warn ("EOF in comment: Newline inserted");
604 state = 0;
605 return '\n';
606
607 default:
608 de_fault:
609 /* Some relatively `normal' character. */
610 if (state == 0)
611 {
612 state = 2; /* Now seeing opcode */
613 return ch;
614 }
615 else if (state == 1)
616 {
617 state = 2; /* Ditto */
618 return ch;
619 }
620 else
621 {
622 return ch; /* Opcode or operands already */
623 }
624 }
625 return -1;
626 }
627
628 #ifdef TEST
629
630 const char comment_chars[] = "|";
631 const char line_comment_chars[] = "#";
632
633 main ()
634 {
635 int ch;
636
637 app_begin ();
638 while ((ch = do_scrub_next_char (stdin)) != EOF)
639 putc (ch, stdout);
640 }
641
642 as_warn (str)
643 char *str;
644 {
645 fputs (str, stderr);
646 putc ('\n', stderr);
647 }
648
649 #endif
650
651 /*
652 * Local Variables:
653 * comment-column: 0
654 * fill-column: 131
655 * End:
656 */
657
658 /* end of app.c */
This page took 0.069851 seconds and 5 git commands to generate.