This commit was generated by cvs2svn to track changes on a CVS vendor
[deliverable/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 1999
3 Free Software Foundation, Inc.
4
5 This file is part of GAS, the GNU Assembler.
6
7 GAS is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GAS is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GAS; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
23 /* App, the assembler pre-processor. This pre-processor strips out excess
24 spaces, turns single-quoted characters into a decimal constant, and turns
25 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
26 pair. This needs better error-handling. */
27
28 #include <stdio.h>
29 #include "as.h" /* For BAD_CASE() only */
30
31 #if (__STDC__ != 1)
32 #ifndef const
33 #define const /* empty */
34 #endif
35 #endif
36
37 /* Whether we are scrubbing in m68k MRI mode. This is different from
38 flag_m68k_mri, because the two flags will be affected by the .mri
39 pseudo-op at different times. */
40 static int scrub_m68k_mri;
41
42 /* The pseudo-op which switches in and out of MRI mode. See the
43 comment in do_scrub_chars. */
44 static const char mri_pseudo[] = ".mri 0";
45
46 #if defined TC_ARM && defined OBJ_ELF
47 /* The pseudo-op for which we need to special-case `@' characters.
48 See the comment in do_scrub_chars. */
49 static const char symver_pseudo[] = ".symver";
50 static const char * symver_state;
51 #endif
52
53 static char lex[256];
54 static const char symbol_chars[] =
55 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
56
57 #define LEX_IS_SYMBOL_COMPONENT 1
58 #define LEX_IS_WHITESPACE 2
59 #define LEX_IS_LINE_SEPARATOR 3
60 #define LEX_IS_COMMENT_START 4
61 #define LEX_IS_LINE_COMMENT_START 5
62 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
63 #define LEX_IS_STRINGQUOTE 8
64 #define LEX_IS_COLON 9
65 #define LEX_IS_NEWLINE 10
66 #define LEX_IS_ONECHAR_QUOTE 11
67 #ifdef TC_V850
68 #define LEX_IS_DOUBLEDASH_1ST 12
69 #endif
70 #ifdef TC_M32R
71 #define DOUBLEBAR_PARALLEL
72 #endif
73 #ifdef DOUBLEBAR_PARALLEL
74 #define LEX_IS_DOUBLEBAR_1ST 13
75 #endif
76 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
77 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
78 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
79 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
80 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
81 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
82
83 static int process_escape PARAMS ((int));
84
85 /* FIXME-soon: The entire lexer/parser thingy should be
86 built statically at compile time rather than dynamically
87 each and every time the assembler is run. xoxorich. */
88
89 void
90 do_scrub_begin (m68k_mri)
91 int m68k_mri;
92 {
93 const char *p;
94 int c;
95
96 scrub_m68k_mri = m68k_mri;
97
98 lex[' '] = LEX_IS_WHITESPACE;
99 lex['\t'] = LEX_IS_WHITESPACE;
100 lex['\r'] = LEX_IS_WHITESPACE;
101 lex['\n'] = LEX_IS_NEWLINE;
102 lex[';'] = LEX_IS_LINE_SEPARATOR;
103 lex[':'] = LEX_IS_COLON;
104
105 if (! m68k_mri)
106 {
107 lex['"'] = LEX_IS_STRINGQUOTE;
108
109 #ifndef TC_HPPA
110 lex['\''] = LEX_IS_ONECHAR_QUOTE;
111 #endif
112
113 #ifdef SINGLE_QUOTE_STRINGS
114 lex['\''] = LEX_IS_STRINGQUOTE;
115 #endif
116 }
117
118 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
119 in state 5 of do_scrub_chars must be changed. */
120
121 /* Note that these override the previous defaults, e.g. if ';' is a
122 comment char, then it isn't a line separator. */
123 for (p = symbol_chars; *p; ++p)
124 {
125 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
126 } /* declare symbol characters */
127
128 for (c = 128; c < 256; ++c)
129 lex[c] = LEX_IS_SYMBOL_COMPONENT;
130
131 #ifdef tc_symbol_chars
132 /* This macro permits the processor to specify all characters which
133 may appears in an operand. This will prevent the scrubber from
134 discarding meaningful whitespace in certain cases. The i386
135 backend uses this to support prefixes, which can confuse the
136 scrubber as to whether it is parsing operands or opcodes. */
137 for (p = tc_symbol_chars; *p; ++p)
138 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139 #endif
140
141 /* The m68k backend wants to be able to change comment_chars. */
142 #ifndef tc_comment_chars
143 #define tc_comment_chars comment_chars
144 #endif
145 for (p = tc_comment_chars; *p; p++)
146 {
147 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
148 } /* declare comment chars */
149
150 for (p = line_comment_chars; *p; p++)
151 {
152 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
153 } /* declare line comment chars */
154
155 for (p = line_separator_chars; *p; p++)
156 {
157 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
158 } /* declare line separators */
159
160 /* Only allow slash-star comments if slash is not in use.
161 FIXME: This isn't right. We should always permit them. */
162 if (lex['/'] == 0)
163 {
164 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
165 }
166
167 if (m68k_mri)
168 {
169 lex['\''] = LEX_IS_STRINGQUOTE;
170 lex[';'] = LEX_IS_COMMENT_START;
171 lex['*'] = LEX_IS_LINE_COMMENT_START;
172 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
173 then it can't be used in an expression. */
174 lex['!'] = LEX_IS_LINE_COMMENT_START;
175 }
176
177 #ifdef TC_V850
178 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
179 #endif
180 #ifdef DOUBLEBAR_PARALLEL
181 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
182 #endif
183 #ifdef TC_D30V
184 /* must do this is we want VLIW instruction with "->" or "<-" */
185 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
186 #endif
187 } /* do_scrub_begin() */
188
189 /* Saved state of the scrubber */
190 static int state;
191 static int old_state;
192 static char *out_string;
193 static char out_buf[20];
194 static int add_newlines;
195 static char *saved_input;
196 static int saved_input_len;
197 static char input_buffer[32 * 1024];
198 static const char *mri_state;
199 static char mri_last_ch;
200
201 /* Data structure for saving the state of app across #include's. Note that
202 app is called asynchronously to the parsing of the .include's, so our
203 state at the time .include is interpreted is completely unrelated.
204 That's why we have to save it all. */
205
206 struct app_save
207 {
208 int state;
209 int old_state;
210 char * out_string;
211 char out_buf[sizeof (out_buf)];
212 int add_newlines;
213 char * saved_input;
214 int saved_input_len;
215 int scrub_m68k_mri;
216 const char * mri_state;
217 char mri_last_ch;
218 #if defined TC_ARM && defined OBJ_ELF
219 const char * symver_state;
220 #endif
221 };
222
223 char *
224 app_push ()
225 {
226 register struct app_save *saved;
227
228 saved = (struct app_save *) xmalloc (sizeof (*saved));
229 saved->state = state;
230 saved->old_state = old_state;
231 saved->out_string = out_string;
232 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
233 saved->add_newlines = add_newlines;
234 if (saved_input == NULL)
235 saved->saved_input = NULL;
236 else
237 {
238 saved->saved_input = xmalloc (saved_input_len);
239 memcpy (saved->saved_input, saved_input, saved_input_len);
240 saved->saved_input_len = saved_input_len;
241 }
242 saved->scrub_m68k_mri = scrub_m68k_mri;
243 saved->mri_state = mri_state;
244 saved->mri_last_ch = mri_last_ch;
245 #if defined TC_ARM && defined OBJ_ELF
246 saved->symver_state = symver_state;
247 #endif
248
249 /* do_scrub_begin() is not useful, just wastes time. */
250
251 state = 0;
252 saved_input = NULL;
253
254 return (char *) saved;
255 }
256
257 void
258 app_pop (arg)
259 char *arg;
260 {
261 register struct app_save *saved = (struct app_save *) arg;
262
263 /* There is no do_scrub_end (). */
264 state = saved->state;
265 old_state = saved->old_state;
266 out_string = saved->out_string;
267 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
268 add_newlines = saved->add_newlines;
269 if (saved->saved_input == NULL)
270 saved_input = NULL;
271 else
272 {
273 assert (saved->saved_input_len <= (int) (sizeof input_buffer));
274 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
275 saved_input = input_buffer;
276 saved_input_len = saved->saved_input_len;
277 free (saved->saved_input);
278 }
279 scrub_m68k_mri = saved->scrub_m68k_mri;
280 mri_state = saved->mri_state;
281 mri_last_ch = saved->mri_last_ch;
282 #if defined TC_ARM && defined OBJ_ELF
283 symver_state = saved->symver_state;
284 #endif
285
286 free (arg);
287 } /* app_pop() */
288
289 /* @@ This assumes that \n &c are the same on host and target. This is not
290 necessarily true. */
291 static int
292 process_escape (ch)
293 int ch;
294 {
295 switch (ch)
296 {
297 case 'b':
298 return '\b';
299 case 'f':
300 return '\f';
301 case 'n':
302 return '\n';
303 case 'r':
304 return '\r';
305 case 't':
306 return '\t';
307 case '\'':
308 return '\'';
309 case '"':
310 return '\"';
311 default:
312 return ch;
313 }
314 }
315
316 /* This function is called to process input characters. The GET
317 parameter is used to retrieve more input characters. GET should
318 set its parameter to point to a buffer, and return the length of
319 the buffer; it should return 0 at end of file. The scrubbed output
320 characters are put into the buffer starting at TOSTART; the TOSTART
321 buffer is TOLEN bytes in length. The function returns the number
322 of scrubbed characters put into TOSTART. This will be TOLEN unless
323 end of file was seen. This function is arranged as a state
324 machine, and saves its state so that it may return at any point.
325 This is the way the old code used to work. */
326
327 int
328 do_scrub_chars (get, tostart, tolen)
329 int (*get) PARAMS ((char *, int));
330 char *tostart;
331 int tolen;
332 {
333 char *to = tostart;
334 char *toend = tostart + tolen;
335 char *from;
336 char *fromend;
337 int fromlen;
338 register int ch, ch2 = 0;
339
340 /*State 0: beginning of normal line
341 1: After first whitespace on line (flush more white)
342 2: After first non-white (opcode) on line (keep 1white)
343 3: after second white on line (into operands) (flush white)
344 4: after putting out a .line, put out digits
345 5: parsing a string, then go to old-state
346 6: putting out \ escape in a "d string.
347 7: After putting out a .appfile, put out string.
348 8: After putting out a .appfile string, flush until newline.
349 9: After seeing symbol char in state 3 (keep 1white after symchar)
350 10: After seeing whitespace in state 9 (keep white before symchar)
351 11: After seeing a symbol character in state 0 (eg a label definition)
352 -1: output string in out_string and go to the state in old_state
353 -2: flush text until a '*' '/' is seen, then go to state old_state
354 #ifdef TC_V850
355 12: After seeing a dash, looking for a second dash as a start of comment.
356 #endif
357 #ifdef DOUBLEBAR_PARALLEL
358 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
359 #endif
360 */
361
362 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
363 constructs like ``.loc 1 20''. This was turning into ``.loc
364 120''. States 9 and 10 ensure that a space is never dropped in
365 between characters which could appear in a identifier. Ian
366 Taylor, ian@cygnus.com.
367
368 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
369 correctly on the PA (and any other target where colons are optional).
370 Jeff Law, law@cs.utah.edu.
371
372 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
373 get squashed into "cmp r1,r2||trap#1", with the all important space
374 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
375
376 /* This macro gets the next input character. */
377
378 #define GET() \
379 (from < fromend \
380 ? * (unsigned char *) (from++) \
381 : (saved_input = NULL, \
382 fromlen = (*get) (input_buffer, sizeof input_buffer), \
383 from = input_buffer, \
384 fromend = from + fromlen, \
385 (fromlen == 0 \
386 ? EOF \
387 : * (unsigned char *) (from++))))
388
389 /* This macro pushes a character back on the input stream. */
390
391 #define UNGET(uch) (*--from = (uch))
392
393 /* This macro puts a character into the output buffer. If this
394 character fills the output buffer, this macro jumps to the label
395 TOFULL. We use this rather ugly approach because we need to
396 handle two different termination conditions: EOF on the input
397 stream, and a full output buffer. It would be simpler if we
398 always read in the entire input stream before processing it, but
399 I don't want to make such a significant change to the assembler's
400 memory usage. */
401
402 #define PUT(pch) \
403 do \
404 { \
405 *to++ = (pch); \
406 if (to >= toend) \
407 goto tofull; \
408 } \
409 while (0)
410
411 if (saved_input != NULL)
412 {
413 from = saved_input;
414 fromend = from + saved_input_len;
415 }
416 else
417 {
418 fromlen = (*get) (input_buffer, sizeof input_buffer);
419 if (fromlen == 0)
420 return 0;
421 from = input_buffer;
422 fromend = from + fromlen;
423 }
424
425 while (1)
426 {
427 /* The cases in this switch end with continue, in order to
428 branch back to the top of this while loop and generate the
429 next output character in the appropriate state. */
430 switch (state)
431 {
432 case -1:
433 ch = *out_string++;
434 if (*out_string == '\0')
435 {
436 state = old_state;
437 old_state = 3;
438 }
439 PUT (ch);
440 continue;
441
442 case -2:
443 for (;;)
444 {
445 do
446 {
447 ch = GET ();
448
449 if (ch == EOF)
450 {
451 as_warn (_("end of file in comment"));
452 goto fromeof;
453 }
454
455 if (ch == '\n')
456 PUT ('\n');
457 }
458 while (ch != '*');
459
460 while ((ch = GET ()) == '*')
461 ;
462
463 if (ch == EOF)
464 {
465 as_warn (_("end of file in comment"));
466 goto fromeof;
467 }
468
469 if (ch == '/')
470 break;
471
472 UNGET (ch);
473 }
474
475 state = old_state;
476 UNGET (' ');
477 continue;
478
479 case 4:
480 ch = GET ();
481 if (ch == EOF)
482 goto fromeof;
483 else if (ch >= '0' && ch <= '9')
484 PUT (ch);
485 else
486 {
487 while (ch != EOF && IS_WHITESPACE (ch))
488 ch = GET ();
489 if (ch == '"')
490 {
491 UNGET (ch);
492 if (scrub_m68k_mri)
493 out_string = "\n\tappfile ";
494 else
495 out_string = "\n\t.appfile ";
496 old_state = 7;
497 state = -1;
498 PUT (*out_string++);
499 }
500 else
501 {
502 while (ch != EOF && ch != '\n')
503 ch = GET ();
504 state = 0;
505 PUT (ch);
506 }
507 }
508 continue;
509
510 case 5:
511 /* We are going to copy everything up to a quote character,
512 with special handling for a backslash. We try to
513 optimize the copying in the simple case without using the
514 GET and PUT macros. */
515 {
516 char *s;
517 int len;
518
519 for (s = from; s < fromend; s++)
520 {
521 ch = *s;
522 /* This condition must be changed if the type of any
523 other character can be LEX_IS_STRINGQUOTE. */
524 if (ch == '\\'
525 || ch == '"'
526 || ch == '\''
527 || ch == '\n')
528 break;
529 }
530 len = s - from;
531 if (len > toend - to)
532 len = toend - to;
533 if (len > 0)
534 {
535 memcpy (to, from, len);
536 to += len;
537 from += len;
538 }
539 }
540
541 ch = GET ();
542 if (ch == EOF)
543 {
544 as_warn (_("end of file in string: inserted '\"'"));
545 state = old_state;
546 UNGET ('\n');
547 PUT ('"');
548 }
549 else if (lex[ch] == LEX_IS_STRINGQUOTE)
550 {
551 state = old_state;
552 PUT (ch);
553 }
554 #ifndef NO_STRING_ESCAPES
555 else if (ch == '\\')
556 {
557 state = 6;
558 PUT (ch);
559 }
560 #endif
561 else if (scrub_m68k_mri && ch == '\n')
562 {
563 /* Just quietly terminate the string. This permits lines like
564 bne label loop if we haven't reach end yet
565 */
566 state = old_state;
567 UNGET (ch);
568 PUT ('\'');
569 }
570 else
571 {
572 PUT (ch);
573 }
574 continue;
575
576 case 6:
577 state = 5;
578 ch = GET ();
579 switch (ch)
580 {
581 /* Handle strings broken across lines, by turning '\n' into
582 '\\' and 'n'. */
583 case '\n':
584 UNGET ('n');
585 add_newlines++;
586 PUT ('\\');
587 continue;
588
589 case '"':
590 case '\\':
591 case 'b':
592 case 'f':
593 case 'n':
594 case 'r':
595 case 't':
596 case 'v':
597 case 'x':
598 case 'X':
599 case '0':
600 case '1':
601 case '2':
602 case '3':
603 case '4':
604 case '5':
605 case '6':
606 case '7':
607 break;
608 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
609 default:
610 as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
611 break;
612 #else /* ONLY_STANDARD_ESCAPES */
613 default:
614 /* Accept \x as x for any x */
615 break;
616 #endif /* ONLY_STANDARD_ESCAPES */
617
618 case EOF:
619 as_warn (_("End of file in string: '\"' inserted"));
620 PUT ('"');
621 continue;
622 }
623 PUT (ch);
624 continue;
625
626 case 7:
627 ch = GET ();
628 state = 5;
629 old_state = 8;
630 if (ch == EOF)
631 goto fromeof;
632 PUT (ch);
633 continue;
634
635 case 8:
636 do
637 ch = GET ();
638 while (ch != '\n' && ch != EOF);
639 if (ch == EOF)
640 goto fromeof;
641 state = 0;
642 PUT (ch);
643 continue;
644 }
645
646 /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
647
648 /* flushchar: */
649 ch = GET ();
650
651 recycle:
652
653 #if defined TC_ARM && defined OBJ_ELF
654 /* We need to watch out for .symver directives. See the comment later
655 in this function. */
656 if (symver_state == NULL)
657 {
658 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
659 symver_state = symver_pseudo + 1;
660 }
661 else
662 {
663 /* We advance to the next state if we find the right
664 character. */
665 if (ch != '\0' && (*symver_state == ch))
666 ++symver_state;
667 else if (*symver_state != '\0')
668 /* We did not get the expected character, or we didn't
669 get a valid terminating character after seeing the
670 entire pseudo-op, so we must go back to the beginning. */
671 symver_state = NULL;
672 else
673 {
674 /* We've read the entire pseudo-op. If this is the end
675 of the line, go back to the beginning. */
676 if (IS_NEWLINE (ch))
677 symver_state = NULL;
678 }
679 }
680 #endif /* TC_ARM && OBJ_ELF */
681
682 #ifdef TC_M68K
683 /* We want to have pseudo-ops which control whether we are in
684 MRI mode or not. Unfortunately, since m68k MRI mode affects
685 the scrubber, that means that we need a special purpose
686 recognizer here. */
687 if (mri_state == NULL)
688 {
689 if ((state == 0 || state == 1)
690 && ch == mri_pseudo[0])
691 mri_state = mri_pseudo + 1;
692 }
693 else
694 {
695 /* We advance to the next state if we find the right
696 character, or if we need a space character and we get any
697 whitespace character, or if we need a '0' and we get a
698 '1' (this is so that we only need one state to handle
699 ``.mri 0'' and ``.mri 1''). */
700 if (ch != '\0'
701 && (*mri_state == ch
702 || (*mri_state == ' '
703 && lex[ch] == LEX_IS_WHITESPACE)
704 || (*mri_state == '0'
705 && ch == '1')))
706 {
707 mri_last_ch = ch;
708 ++mri_state;
709 }
710 else if (*mri_state != '\0'
711 || (lex[ch] != LEX_IS_WHITESPACE
712 && lex[ch] != LEX_IS_NEWLINE))
713 {
714 /* We did not get the expected character, or we didn't
715 get a valid terminating character after seeing the
716 entire pseudo-op, so we must go back to the
717 beginning. */
718 mri_state = NULL;
719 }
720 else
721 {
722 /* We've read the entire pseudo-op. mips_last_ch is
723 either '0' or '1' indicating whether to enter or
724 leave MRI mode. */
725 do_scrub_begin (mri_last_ch == '1');
726 mri_state = NULL;
727
728 /* We continue handling the character as usual. The
729 main gas reader must also handle the .mri pseudo-op
730 to control expression parsing and the like. */
731 }
732 }
733 #endif
734
735 if (ch == EOF)
736 {
737 if (state != 0)
738 {
739 as_warn (_("end of file not at end of a line; newline inserted"));
740 state = 0;
741 PUT ('\n');
742 }
743 goto fromeof;
744 }
745
746 switch (lex[ch])
747 {
748 case LEX_IS_WHITESPACE:
749 do
750 {
751 ch = GET ();
752 }
753 while (ch != EOF && IS_WHITESPACE (ch));
754 if (ch == EOF)
755 goto fromeof;
756
757 if (state == 0)
758 {
759 /* Preserve a single whitespace character at the
760 beginning of a line. */
761 state = 1;
762 UNGET (ch);
763 PUT (' ');
764 break;
765 }
766
767 #ifdef KEEP_WHITE_AROUND_COLON
768 if (lex[ch] == LEX_IS_COLON)
769 {
770 /* only keep this white if there's no white *after* the colon */
771 ch2 = GET ();
772 UNGET (ch2);
773 if (!IS_WHITESPACE (ch2))
774 {
775 state = 9;
776 UNGET (ch);
777 PUT (' ');
778 break;
779 }
780 }
781 #endif
782 if (IS_COMMENT (ch)
783 || ch == '/'
784 || IS_LINE_SEPARATOR (ch))
785 {
786 if (scrub_m68k_mri)
787 {
788 /* In MRI mode, we keep these spaces. */
789 UNGET (ch);
790 PUT (' ');
791 break;
792 }
793 goto recycle;
794 }
795
796 /* If we're in state 2 or 11, we've seen a non-white
797 character followed by whitespace. If the next character
798 is ':', this is whitespace after a label name which we
799 normally must ignore. In MRI mode, though, spaces are
800 not permitted between the label and the colon. */
801 if ((state == 2 || state == 11)
802 && lex[ch] == LEX_IS_COLON
803 && ! scrub_m68k_mri)
804 {
805 state = 1;
806 PUT (ch);
807 break;
808 }
809
810 switch (state)
811 {
812 case 0:
813 state++;
814 goto recycle; /* Punted leading sp */
815 case 1:
816 /* We can arrive here if we leave a leading whitespace
817 character at the beginning of a line. */
818 goto recycle;
819 case 2:
820 state = 3;
821 if (to + 1 < toend)
822 {
823 /* Optimize common case by skipping UNGET/GET. */
824 PUT (' '); /* Sp after opco */
825 goto recycle;
826 }
827 UNGET (ch);
828 PUT (' ');
829 break;
830 case 3:
831 if (scrub_m68k_mri)
832 {
833 /* In MRI mode, we keep these spaces. */
834 UNGET (ch);
835 PUT (' ');
836 break;
837 }
838 goto recycle; /* Sp in operands */
839 case 9:
840 case 10:
841 if (scrub_m68k_mri)
842 {
843 /* In MRI mode, we keep these spaces. */
844 state = 3;
845 UNGET (ch);
846 PUT (' ');
847 break;
848 }
849 state = 10; /* Sp after symbol char */
850 goto recycle;
851 case 11:
852 if (flag_m68k_mri
853 #ifdef LABELS_WITHOUT_COLONS
854 || 1
855 #endif
856 )
857 state = 1;
858 else
859 {
860 /* We know that ch is not ':', since we tested that
861 case above. Therefore this is not a label, so it
862 must be the opcode, and we've just seen the
863 whitespace after it. */
864 state = 3;
865 }
866 UNGET (ch);
867 PUT (' '); /* Sp after label definition. */
868 break;
869 default:
870 BAD_CASE (state);
871 }
872 break;
873
874 case LEX_IS_TWOCHAR_COMMENT_1ST:
875 ch2 = GET ();
876 if (ch2 == '*')
877 {
878 for (;;)
879 {
880 do
881 {
882 ch2 = GET ();
883 if (ch2 != EOF && IS_NEWLINE (ch2))
884 add_newlines++;
885 }
886 while (ch2 != EOF && ch2 != '*');
887
888 while (ch2 == '*')
889 ch2 = GET ();
890
891 if (ch2 == EOF || ch2 == '/')
892 break;
893
894 /* This UNGET will ensure that we count newlines
895 correctly. */
896 UNGET (ch2);
897 }
898
899 if (ch2 == EOF)
900 as_warn (_("end of file in multiline comment"));
901
902 ch = ' ';
903 goto recycle;
904 }
905 else
906 {
907 if (ch2 != EOF)
908 UNGET (ch2);
909 if (state == 9 || state == 10)
910 state = 3;
911 PUT (ch);
912 }
913 break;
914
915 case LEX_IS_STRINGQUOTE:
916 if (state == 10)
917 {
918 /* Preserve the whitespace in foo "bar" */
919 UNGET (ch);
920 state = 3;
921 PUT (' ');
922
923 /* PUT didn't jump out. We could just break, but we
924 know what will happen, so optimize a bit. */
925 ch = GET ();
926 old_state = 3;
927 }
928 else if (state == 9)
929 old_state = 3;
930 else
931 old_state = state;
932 state = 5;
933 PUT (ch);
934 break;
935
936 #ifndef IEEE_STYLE
937 case LEX_IS_ONECHAR_QUOTE:
938 if (state == 10)
939 {
940 /* Preserve the whitespace in foo 'b' */
941 UNGET (ch);
942 state = 3;
943 PUT (' ');
944 break;
945 }
946 ch = GET ();
947 if (ch == EOF)
948 {
949 as_warn (_("end of file after a one-character quote; \\0 inserted"));
950 ch = 0;
951 }
952 if (ch == '\\')
953 {
954 ch = GET ();
955 if (ch == EOF)
956 {
957 as_warn (_("end of file in escape character"));
958 ch = '\\';
959 }
960 else
961 ch = process_escape (ch);
962 }
963 sprintf (out_buf, "%d", (int) (unsigned char) ch);
964
965 /* None of these 'x constants for us. We want 'x'. */
966 if ((ch = GET ()) != '\'')
967 {
968 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
969 as_warn (_("Missing close quote: (assumed)"));
970 #else
971 if (ch != EOF)
972 UNGET (ch);
973 #endif
974 }
975 if (strlen (out_buf) == 1)
976 {
977 PUT (out_buf[0]);
978 break;
979 }
980 if (state == 9)
981 old_state = 3;
982 else
983 old_state = state;
984 state = -1;
985 out_string = out_buf;
986 PUT (*out_string++);
987 break;
988 #endif
989
990 case LEX_IS_COLON:
991 #ifdef KEEP_WHITE_AROUND_COLON
992 state = 9;
993 #else
994 if (state == 9 || state == 10)
995 state = 3;
996 else if (state != 3)
997 state = 1;
998 #endif
999 PUT (ch);
1000 break;
1001
1002 case LEX_IS_NEWLINE:
1003 /* Roll out a bunch of newlines from inside comments, etc. */
1004 if (add_newlines)
1005 {
1006 --add_newlines;
1007 UNGET (ch);
1008 }
1009 /* fall thru into... */
1010
1011 case LEX_IS_LINE_SEPARATOR:
1012 state = 0;
1013 PUT (ch);
1014 break;
1015
1016 #ifdef TC_V850
1017 case LEX_IS_DOUBLEDASH_1ST:
1018 ch2 = GET();
1019 if (ch2 != '-')
1020 {
1021 UNGET (ch2);
1022 goto de_fault;
1023 }
1024 /* read and skip to end of line */
1025 do
1026 {
1027 ch = GET ();
1028 }
1029 while (ch != EOF && ch != '\n');
1030 if (ch == EOF)
1031 {
1032 as_warn (_("end of file in comment; newline inserted"));
1033 }
1034 state = 0;
1035 PUT ('\n');
1036 break;
1037 #endif
1038 #ifdef DOUBLEBAR_PARALLEL
1039 case LEX_IS_DOUBLEBAR_1ST:
1040 ch2 = GET();
1041 if (ch2 != '|')
1042 {
1043 UNGET (ch2);
1044 goto de_fault;
1045 }
1046 /* Reset back to state 1 and pretend that we are parsing a line from
1047 just after the first white space. */
1048 state = 1;
1049 PUT ('|');
1050 PUT ('|');
1051 break;
1052 #endif
1053 case LEX_IS_LINE_COMMENT_START:
1054 /* FIXME-someday: The two character comment stuff was badly
1055 thought out. On i386, we want '/' as line comment start
1056 AND we want C style comments. hence this hack. The
1057 whole lexical process should be reworked. xoxorich. */
1058 if (ch == '/')
1059 {
1060 ch2 = GET ();
1061 if (ch2 == '*')
1062 {
1063 old_state = 3;
1064 state = -2;
1065 break;
1066 }
1067 else
1068 {
1069 UNGET (ch2);
1070 }
1071 } /* bad hack */
1072
1073 if (state == 0 || state == 1) /* Only comment at start of line. */
1074 {
1075 int startch;
1076
1077 startch = ch;
1078
1079 do
1080 {
1081 ch = GET ();
1082 }
1083 while (ch != EOF && IS_WHITESPACE (ch));
1084 if (ch == EOF)
1085 {
1086 as_warn (_("end of file in comment; newline inserted"));
1087 PUT ('\n');
1088 break;
1089 }
1090 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1091 {
1092 /* Not a cpp line. */
1093 while (ch != EOF && !IS_NEWLINE (ch))
1094 ch = GET ();
1095 if (ch == EOF)
1096 as_warn (_("EOF in Comment: Newline inserted"));
1097 state = 0;
1098 PUT ('\n');
1099 break;
1100 }
1101 /* Loks like `# 123 "filename"' from cpp. */
1102 UNGET (ch);
1103 old_state = 4;
1104 state = -1;
1105 if (scrub_m68k_mri)
1106 out_string = "\tappline ";
1107 else
1108 out_string = "\t.appline ";
1109 PUT (*out_string++);
1110 break;
1111 }
1112
1113 #ifdef TC_D10V
1114 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1115 Trap is the only short insn that has a first operand that is
1116 neither register nor label.
1117 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1118 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is already
1119 LEX_IS_LINE_COMMENT_START. However, it is the only character in
1120 line_comment_chars for d10v, hence we can recognize it as such. */
1121 /* An alternative approach would be to reset the state to 1 when
1122 we see '||', '<'- or '->', but that seems to be overkill. */
1123 if (state == 10) PUT (' ');
1124 #endif
1125 /* We have a line comment character which is not at the
1126 start of a line. If this is also a normal comment
1127 character, fall through. Otherwise treat it as a default
1128 character. */
1129 if (strchr (tc_comment_chars, ch) == NULL
1130 && (! scrub_m68k_mri
1131 || (ch != '!' && ch != '*')))
1132 goto de_fault;
1133 if (scrub_m68k_mri
1134 && (ch == '!' || ch == '*' || ch == '#')
1135 && state != 1
1136 && state != 10)
1137 goto de_fault;
1138 /* Fall through. */
1139 case LEX_IS_COMMENT_START:
1140 #if defined TC_ARM && defined OBJ_ELF
1141 /* On the ARM, `@' is the comment character.
1142 Unfortunately this is also a special character in ELF .symver
1143 directives (and .type, though we deal with those another way). So
1144 we check if this line is such a directive, and treat the character
1145 as default if so. This is a hack. */
1146 if ((symver_state != NULL) && (*symver_state == 0))
1147 goto de_fault;
1148 #endif
1149 do
1150 {
1151 ch = GET ();
1152 }
1153 while (ch != EOF && !IS_NEWLINE (ch));
1154 if (ch == EOF)
1155 as_warn (_("end of file in comment; newline inserted"));
1156 state = 0;
1157 PUT ('\n');
1158 break;
1159
1160 case LEX_IS_SYMBOL_COMPONENT:
1161 if (state == 10)
1162 {
1163 /* This is a symbol character following another symbol
1164 character, with whitespace in between. We skipped
1165 the whitespace earlier, so output it now. */
1166 UNGET (ch);
1167 state = 3;
1168 PUT (' ');
1169 break;
1170 }
1171
1172 if (state == 3)
1173 state = 9;
1174
1175 /* This is a common case. Quickly copy CH and all the
1176 following symbol component or normal characters. */
1177 if (to + 1 < toend
1178 && mri_state == NULL
1179 #if defined TC_ARM && defined OBJ_ELF
1180 && symver_state == NULL
1181 #endif
1182 )
1183 {
1184 char *s;
1185 int len;
1186
1187 for (s = from; s < fromend; s++)
1188 {
1189 int type;
1190
1191 ch2 = * (unsigned char *) s;
1192 type = lex[ch2];
1193 if (type != 0
1194 && type != LEX_IS_SYMBOL_COMPONENT)
1195 break;
1196 }
1197 if (s > from)
1198 {
1199 /* Handle the last character normally, for
1200 simplicity. */
1201 --s;
1202 }
1203 len = s - from;
1204 if (len > (toend - to) - 1)
1205 len = (toend - to) - 1;
1206 if (len > 0)
1207 {
1208 PUT (ch);
1209 if (len > 8)
1210 {
1211 memcpy (to, from, len);
1212 to += len;
1213 from += len;
1214 }
1215 else
1216 {
1217 switch (len)
1218 {
1219 case 8: *to++ = *from++;
1220 case 7: *to++ = *from++;
1221 case 6: *to++ = *from++;
1222 case 5: *to++ = *from++;
1223 case 4: *to++ = *from++;
1224 case 3: *to++ = *from++;
1225 case 2: *to++ = *from++;
1226 case 1: *to++ = *from++;
1227 }
1228 }
1229 ch = GET ();
1230 }
1231 }
1232
1233 /* Fall through. */
1234 default:
1235 de_fault:
1236 /* Some relatively `normal' character. */
1237 if (state == 0)
1238 {
1239 state = 11; /* Now seeing label definition */
1240 }
1241 else if (state == 1)
1242 {
1243 state = 2; /* Ditto */
1244 }
1245 else if (state == 9)
1246 {
1247 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1248 state = 3;
1249 }
1250 else if (state == 10)
1251 {
1252 if (ch == '\\')
1253 {
1254 /* Special handling for backslash: a backslash may
1255 be the beginning of a formal parameter (of a
1256 macro) following another symbol character, with
1257 whitespace in between. If that is the case, we
1258 output a space before the parameter. Strictly
1259 speaking, correct handling depends upon what the
1260 macro parameter expands into; if the parameter
1261 expands into something which does not start with
1262 an operand character, then we don't want to keep
1263 the space. We don't have enough information to
1264 make the right choice, so here we are making the
1265 choice which is more likely to be correct. */
1266 PUT (' ');
1267 }
1268
1269 state = 3;
1270 }
1271 PUT (ch);
1272 break;
1273 }
1274 }
1275
1276 /*NOTREACHED*/
1277
1278 fromeof:
1279 /* We have reached the end of the input. */
1280 return to - tostart;
1281
1282 tofull:
1283 /* The output buffer is full. Save any input we have not yet
1284 processed. */
1285 if (fromend > from)
1286 {
1287 saved_input = from;
1288 saved_input_len = fromend - from;
1289 }
1290 else
1291 saved_input = NULL;
1292
1293 return to - tostart;
1294 }
1295
1296 /* end of app.c */
This page took 0.058209 seconds and 5 git commands to generate.