daily update
[deliverable/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
3 1999, 2000, 2002, 2003
4 Free Software Foundation, Inc.
5
6 This file is part of GAS, the GNU Assembler.
7
8 GAS is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GAS is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GAS; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
22
23 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
27 pair. This needs better error-handling. */
28
29 #include <stdio.h>
30 #include "as.h" /* For BAD_CASE() only */
31
32 #if (__STDC__ != 1)
33 #ifndef const
34 #define const /* empty */
35 #endif
36 #endif
37
38 #ifdef TC_M68K
39 /* Whether we are scrubbing in m68k MRI mode. This is different from
40 flag_m68k_mri, because the two flags will be affected by the .mri
41 pseudo-op at different times. */
42 static int scrub_m68k_mri;
43
44 /* The pseudo-op which switches in and out of MRI mode. See the
45 comment in do_scrub_chars. */
46 static const char mri_pseudo[] = ".mri 0";
47 #else
48 #define scrub_m68k_mri 0
49 #endif
50
51 #if defined TC_ARM && defined OBJ_ELF
52 /* The pseudo-op for which we need to special-case `@' characters.
53 See the comment in do_scrub_chars. */
54 static const char symver_pseudo[] = ".symver";
55 static const char * symver_state;
56 #endif
57
58 static char lex[256];
59 static const char symbol_chars[] =
60 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
61
62 #define LEX_IS_SYMBOL_COMPONENT 1
63 #define LEX_IS_WHITESPACE 2
64 #define LEX_IS_LINE_SEPARATOR 3
65 #define LEX_IS_COMMENT_START 4
66 #define LEX_IS_LINE_COMMENT_START 5
67 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
68 #define LEX_IS_STRINGQUOTE 8
69 #define LEX_IS_COLON 9
70 #define LEX_IS_NEWLINE 10
71 #define LEX_IS_ONECHAR_QUOTE 11
72 #ifdef TC_V850
73 #define LEX_IS_DOUBLEDASH_1ST 12
74 #endif
75 #ifdef TC_M32R
76 #define DOUBLEBAR_PARALLEL
77 #endif
78 #ifdef DOUBLEBAR_PARALLEL
79 #define LEX_IS_DOUBLEBAR_1ST 13
80 #endif
81 #define LEX_IS_PARALLEL_SEPARATOR 14
82 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
83 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
84 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
85 #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
86 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
87 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
88 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
89
90 static int process_escape PARAMS ((int));
91
92 /* FIXME-soon: The entire lexer/parser thingy should be
93 built statically at compile time rather than dynamically
94 each and every time the assembler is run. xoxorich. */
95
96 void
97 do_scrub_begin (m68k_mri)
98 int m68k_mri ATTRIBUTE_UNUSED;
99 {
100 const char *p;
101 int c;
102
103 lex[' '] = LEX_IS_WHITESPACE;
104 lex['\t'] = LEX_IS_WHITESPACE;
105 lex['\r'] = LEX_IS_WHITESPACE;
106 lex['\n'] = LEX_IS_NEWLINE;
107 lex[':'] = LEX_IS_COLON;
108
109 #ifdef TC_M68K
110 scrub_m68k_mri = m68k_mri;
111
112 if (! m68k_mri)
113 #endif
114 {
115 lex['"'] = LEX_IS_STRINGQUOTE;
116
117 #if ! defined (TC_HPPA) && ! defined (TC_I370)
118 /* I370 uses single-quotes to delimit integer, float constants */
119 lex['\''] = LEX_IS_ONECHAR_QUOTE;
120 #endif
121
122 #ifdef SINGLE_QUOTE_STRINGS
123 lex['\''] = LEX_IS_STRINGQUOTE;
124 #endif
125 }
126
127 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
128 in state 5 of do_scrub_chars must be changed. */
129
130 /* Note that these override the previous defaults, e.g. if ';' is a
131 comment char, then it isn't a line separator. */
132 for (p = symbol_chars; *p; ++p)
133 {
134 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
135 } /* declare symbol characters */
136
137 for (c = 128; c < 256; ++c)
138 lex[c] = LEX_IS_SYMBOL_COMPONENT;
139
140 #ifdef tc_symbol_chars
141 /* This macro permits the processor to specify all characters which
142 may appears in an operand. This will prevent the scrubber from
143 discarding meaningful whitespace in certain cases. The i386
144 backend uses this to support prefixes, which can confuse the
145 scrubber as to whether it is parsing operands or opcodes. */
146 for (p = tc_symbol_chars; *p; ++p)
147 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
148 #endif
149
150 /* The m68k backend wants to be able to change comment_chars. */
151 #ifndef tc_comment_chars
152 #define tc_comment_chars comment_chars
153 #endif
154 for (p = tc_comment_chars; *p; p++)
155 {
156 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
157 } /* declare comment chars */
158
159 for (p = line_comment_chars; *p; p++)
160 {
161 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162 } /* declare line comment chars */
163
164 for (p = line_separator_chars; *p; p++)
165 {
166 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
167 } /* declare line separators */
168
169 #ifdef tc_parallel_separator_chars
170 /* This macro permits the processor to specify all characters which
171 separate parallel insns on the same line. */
172 for (p = tc_parallel_separator_chars; *p; p++)
173 {
174 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
175 } /* declare parallel separators */
176 #endif
177
178 /* Only allow slash-star comments if slash is not in use.
179 FIXME: This isn't right. We should always permit them. */
180 if (lex['/'] == 0)
181 {
182 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
183 }
184
185 #ifdef TC_M68K
186 if (m68k_mri)
187 {
188 lex['\''] = LEX_IS_STRINGQUOTE;
189 lex[';'] = LEX_IS_COMMENT_START;
190 lex['*'] = LEX_IS_LINE_COMMENT_START;
191 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
192 then it can't be used in an expression. */
193 lex['!'] = LEX_IS_LINE_COMMENT_START;
194 }
195 #endif
196
197 #ifdef TC_V850
198 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
199 #endif
200 #ifdef DOUBLEBAR_PARALLEL
201 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
202 #endif
203 #ifdef TC_D30V
204 /* must do this is we want VLIW instruction with "->" or "<-" */
205 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
206 #endif
207 } /* do_scrub_begin() */
208
209 /* Saved state of the scrubber */
210 static int state;
211 static int old_state;
212 static char *out_string;
213 static char out_buf[20];
214 static int add_newlines;
215 static char *saved_input;
216 static int saved_input_len;
217 static char input_buffer[32 * 1024];
218 static const char *mri_state;
219 static char mri_last_ch;
220
221 /* Data structure for saving the state of app across #include's. Note that
222 app is called asynchronously to the parsing of the .include's, so our
223 state at the time .include is interpreted is completely unrelated.
224 That's why we have to save it all. */
225
226 struct app_save {
227 int state;
228 int old_state;
229 char * out_string;
230 char out_buf[sizeof (out_buf)];
231 int add_newlines;
232 char * saved_input;
233 int saved_input_len;
234 #ifdef TC_M68K
235 int scrub_m68k_mri;
236 #endif
237 const char * mri_state;
238 char mri_last_ch;
239 #if defined TC_ARM && defined OBJ_ELF
240 const char * symver_state;
241 #endif
242 };
243
244 char *
245 app_push ()
246 {
247 register struct app_save *saved;
248
249 saved = (struct app_save *) xmalloc (sizeof (*saved));
250 saved->state = state;
251 saved->old_state = old_state;
252 saved->out_string = out_string;
253 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
254 saved->add_newlines = add_newlines;
255 if (saved_input == NULL)
256 saved->saved_input = NULL;
257 else
258 {
259 saved->saved_input = xmalloc (saved_input_len);
260 memcpy (saved->saved_input, saved_input, saved_input_len);
261 saved->saved_input_len = saved_input_len;
262 }
263 #ifdef TC_M68K
264 saved->scrub_m68k_mri = scrub_m68k_mri;
265 #endif
266 saved->mri_state = mri_state;
267 saved->mri_last_ch = mri_last_ch;
268 #if defined TC_ARM && defined OBJ_ELF
269 saved->symver_state = symver_state;
270 #endif
271
272 /* do_scrub_begin() is not useful, just wastes time. */
273
274 state = 0;
275 saved_input = NULL;
276
277 return (char *) saved;
278 }
279
280 void
281 app_pop (arg)
282 char *arg;
283 {
284 register struct app_save *saved = (struct app_save *) arg;
285
286 /* There is no do_scrub_end (). */
287 state = saved->state;
288 old_state = saved->old_state;
289 out_string = saved->out_string;
290 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
291 add_newlines = saved->add_newlines;
292 if (saved->saved_input == NULL)
293 saved_input = NULL;
294 else
295 {
296 assert (saved->saved_input_len <= (int) (sizeof input_buffer));
297 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
298 saved_input = input_buffer;
299 saved_input_len = saved->saved_input_len;
300 free (saved->saved_input);
301 }
302 #ifdef TC_M68K
303 scrub_m68k_mri = saved->scrub_m68k_mri;
304 #endif
305 mri_state = saved->mri_state;
306 mri_last_ch = saved->mri_last_ch;
307 #if defined TC_ARM && defined OBJ_ELF
308 symver_state = saved->symver_state;
309 #endif
310
311 free (arg);
312 } /* app_pop() */
313
314 /* @@ This assumes that \n &c are the same on host and target. This is not
315 necessarily true. */
316 static int
317 process_escape (ch)
318 int ch;
319 {
320 switch (ch)
321 {
322 case 'b':
323 return '\b';
324 case 'f':
325 return '\f';
326 case 'n':
327 return '\n';
328 case 'r':
329 return '\r';
330 case 't':
331 return '\t';
332 case '\'':
333 return '\'';
334 case '"':
335 return '\"';
336 default:
337 return ch;
338 }
339 }
340
341 /* This function is called to process input characters. The GET
342 parameter is used to retrieve more input characters. GET should
343 set its parameter to point to a buffer, and return the length of
344 the buffer; it should return 0 at end of file. The scrubbed output
345 characters are put into the buffer starting at TOSTART; the TOSTART
346 buffer is TOLEN bytes in length. The function returns the number
347 of scrubbed characters put into TOSTART. This will be TOLEN unless
348 end of file was seen. This function is arranged as a state
349 machine, and saves its state so that it may return at any point.
350 This is the way the old code used to work. */
351
352 int
353 do_scrub_chars (get, tostart, tolen)
354 int (*get) PARAMS ((char *, int));
355 char *tostart;
356 int tolen;
357 {
358 char *to = tostart;
359 char *toend = tostart + tolen;
360 char *from;
361 char *fromend;
362 int fromlen;
363 register int ch, ch2 = 0;
364
365 /*State 0: beginning of normal line
366 1: After first whitespace on line (flush more white)
367 2: After first non-white (opcode) on line (keep 1white)
368 3: after second white on line (into operands) (flush white)
369 4: after putting out a .line, put out digits
370 5: parsing a string, then go to old-state
371 6: putting out \ escape in a "d string.
372 7: After putting out a .appfile, put out string.
373 8: After putting out a .appfile string, flush until newline.
374 9: After seeing symbol char in state 3 (keep 1white after symchar)
375 10: After seeing whitespace in state 9 (keep white before symchar)
376 11: After seeing a symbol character in state 0 (eg a label definition)
377 -1: output string in out_string and go to the state in old_state
378 -2: flush text until a '*' '/' is seen, then go to state old_state
379 #ifdef TC_V850
380 12: After seeing a dash, looking for a second dash as a start
381 of comment.
382 #endif
383 #ifdef DOUBLEBAR_PARALLEL
384 13: After seeing a vertical bar, looking for a second
385 vertical bar as a parallel expression separator.
386 #endif
387 */
388
389 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
390 constructs like ``.loc 1 20''. This was turning into ``.loc
391 120''. States 9 and 10 ensure that a space is never dropped in
392 between characters which could appear in an identifier. Ian
393 Taylor, ian@cygnus.com.
394
395 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
396 correctly on the PA (and any other target where colons are optional).
397 Jeff Law, law@cs.utah.edu.
398
399 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
400 get squashed into "cmp r1,r2||trap#1", with the all important space
401 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
402
403 /* This macro gets the next input character. */
404
405 #define GET() \
406 (from < fromend \
407 ? * (unsigned char *) (from++) \
408 : (saved_input = NULL, \
409 fromlen = (*get) (input_buffer, sizeof input_buffer), \
410 from = input_buffer, \
411 fromend = from + fromlen, \
412 (fromlen == 0 \
413 ? EOF \
414 : * (unsigned char *) (from++))))
415
416 /* This macro pushes a character back on the input stream. */
417
418 #define UNGET(uch) (*--from = (uch))
419
420 /* This macro puts a character into the output buffer. If this
421 character fills the output buffer, this macro jumps to the label
422 TOFULL. We use this rather ugly approach because we need to
423 handle two different termination conditions: EOF on the input
424 stream, and a full output buffer. It would be simpler if we
425 always read in the entire input stream before processing it, but
426 I don't want to make such a significant change to the assembler's
427 memory usage. */
428
429 #define PUT(pch) \
430 do \
431 { \
432 *to++ = (pch); \
433 if (to >= toend) \
434 goto tofull; \
435 } \
436 while (0)
437
438 if (saved_input != NULL)
439 {
440 from = saved_input;
441 fromend = from + saved_input_len;
442 }
443 else
444 {
445 fromlen = (*get) (input_buffer, sizeof input_buffer);
446 if (fromlen == 0)
447 return 0;
448 from = input_buffer;
449 fromend = from + fromlen;
450 }
451
452 while (1)
453 {
454 /* The cases in this switch end with continue, in order to
455 branch back to the top of this while loop and generate the
456 next output character in the appropriate state. */
457 switch (state)
458 {
459 case -1:
460 ch = *out_string++;
461 if (*out_string == '\0')
462 {
463 state = old_state;
464 old_state = 3;
465 }
466 PUT (ch);
467 continue;
468
469 case -2:
470 for (;;)
471 {
472 do
473 {
474 ch = GET ();
475
476 if (ch == EOF)
477 {
478 as_warn (_("end of file in comment"));
479 goto fromeof;
480 }
481
482 if (ch == '\n')
483 PUT ('\n');
484 }
485 while (ch != '*');
486
487 while ((ch = GET ()) == '*')
488 ;
489
490 if (ch == EOF)
491 {
492 as_warn (_("end of file in comment"));
493 goto fromeof;
494 }
495
496 if (ch == '/')
497 break;
498
499 UNGET (ch);
500 }
501
502 state = old_state;
503 UNGET (' ');
504 continue;
505
506 case 4:
507 ch = GET ();
508 if (ch == EOF)
509 goto fromeof;
510 else if (ch >= '0' && ch <= '9')
511 PUT (ch);
512 else
513 {
514 while (ch != EOF && IS_WHITESPACE (ch))
515 ch = GET ();
516 if (ch == '"')
517 {
518 UNGET (ch);
519 if (scrub_m68k_mri)
520 out_string = "\n\tappfile ";
521 else
522 out_string = "\n\t.appfile ";
523 old_state = 7;
524 state = -1;
525 PUT (*out_string++);
526 }
527 else
528 {
529 while (ch != EOF && ch != '\n')
530 ch = GET ();
531 state = 0;
532 PUT (ch);
533 }
534 }
535 continue;
536
537 case 5:
538 /* We are going to copy everything up to a quote character,
539 with special handling for a backslash. We try to
540 optimize the copying in the simple case without using the
541 GET and PUT macros. */
542 {
543 char *s;
544 int len;
545
546 for (s = from; s < fromend; s++)
547 {
548 ch = *s;
549 /* This condition must be changed if the type of any
550 other character can be LEX_IS_STRINGQUOTE. */
551 if (ch == '\\'
552 || ch == '"'
553 || ch == '\''
554 || ch == '\n')
555 break;
556 }
557 len = s - from;
558 if (len > toend - to)
559 len = toend - to;
560 if (len > 0)
561 {
562 memcpy (to, from, len);
563 to += len;
564 from += len;
565 }
566 }
567
568 ch = GET ();
569 if (ch == EOF)
570 {
571 as_warn (_("end of file in string; inserted '\"'"));
572 state = old_state;
573 UNGET ('\n');
574 PUT ('"');
575 }
576 else if (lex[ch] == LEX_IS_STRINGQUOTE)
577 {
578 state = old_state;
579 PUT (ch);
580 }
581 #ifndef NO_STRING_ESCAPES
582 else if (ch == '\\')
583 {
584 state = 6;
585 PUT (ch);
586 }
587 #endif
588 else if (scrub_m68k_mri && ch == '\n')
589 {
590 /* Just quietly terminate the string. This permits lines like
591 bne label loop if we haven't reach end yet
592 */
593 state = old_state;
594 UNGET (ch);
595 PUT ('\'');
596 }
597 else
598 {
599 PUT (ch);
600 }
601 continue;
602
603 case 6:
604 state = 5;
605 ch = GET ();
606 switch (ch)
607 {
608 /* Handle strings broken across lines, by turning '\n' into
609 '\\' and 'n'. */
610 case '\n':
611 UNGET ('n');
612 add_newlines++;
613 PUT ('\\');
614 continue;
615
616 case EOF:
617 as_warn (_("end of file in string; '\"' inserted"));
618 PUT ('"');
619 continue;
620
621 case '"':
622 case '\\':
623 case 'b':
624 case 'f':
625 case 'n':
626 case 'r':
627 case 't':
628 case 'v':
629 case 'x':
630 case 'X':
631 case '0':
632 case '1':
633 case '2':
634 case '3':
635 case '4':
636 case '5':
637 case '6':
638 case '7':
639 break;
640
641 default:
642 #ifdef ONLY_STANDARD_ESCAPES
643 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
644 #endif
645 break;
646 }
647 PUT (ch);
648 continue;
649
650 case 7:
651 ch = GET ();
652 state = 5;
653 old_state = 8;
654 if (ch == EOF)
655 goto fromeof;
656 PUT (ch);
657 continue;
658
659 case 8:
660 do
661 ch = GET ();
662 while (ch != '\n' && ch != EOF);
663 if (ch == EOF)
664 goto fromeof;
665 state = 0;
666 PUT (ch);
667 continue;
668
669 #ifdef DOUBLEBAR_PARALLEL
670 case 13:
671 ch = GET ();
672 if (ch != '|')
673 abort ();
674
675 /* Reset back to state 1 and pretend that we are parsing a
676 line from just after the first white space. */
677 state = 1;
678 PUT ('|');
679 continue;
680 #endif
681 }
682
683 /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
684
685 /* flushchar: */
686 ch = GET ();
687
688 recycle:
689
690 #if defined TC_ARM && defined OBJ_ELF
691 /* We need to watch out for .symver directives. See the comment later
692 in this function. */
693 if (symver_state == NULL)
694 {
695 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
696 symver_state = symver_pseudo + 1;
697 }
698 else
699 {
700 /* We advance to the next state if we find the right
701 character. */
702 if (ch != '\0' && (*symver_state == ch))
703 ++symver_state;
704 else if (*symver_state != '\0')
705 /* We did not get the expected character, or we didn't
706 get a valid terminating character after seeing the
707 entire pseudo-op, so we must go back to the beginning. */
708 symver_state = NULL;
709 else
710 {
711 /* We've read the entire pseudo-op. If this is the end
712 of the line, go back to the beginning. */
713 if (IS_NEWLINE (ch))
714 symver_state = NULL;
715 }
716 }
717 #endif /* TC_ARM && OBJ_ELF */
718
719 #ifdef TC_M68K
720 /* We want to have pseudo-ops which control whether we are in
721 MRI mode or not. Unfortunately, since m68k MRI mode affects
722 the scrubber, that means that we need a special purpose
723 recognizer here. */
724 if (mri_state == NULL)
725 {
726 if ((state == 0 || state == 1)
727 && ch == mri_pseudo[0])
728 mri_state = mri_pseudo + 1;
729 }
730 else
731 {
732 /* We advance to the next state if we find the right
733 character, or if we need a space character and we get any
734 whitespace character, or if we need a '0' and we get a
735 '1' (this is so that we only need one state to handle
736 ``.mri 0'' and ``.mri 1''). */
737 if (ch != '\0'
738 && (*mri_state == ch
739 || (*mri_state == ' '
740 && lex[ch] == LEX_IS_WHITESPACE)
741 || (*mri_state == '0'
742 && ch == '1')))
743 {
744 mri_last_ch = ch;
745 ++mri_state;
746 }
747 else if (*mri_state != '\0'
748 || (lex[ch] != LEX_IS_WHITESPACE
749 && lex[ch] != LEX_IS_NEWLINE))
750 {
751 /* We did not get the expected character, or we didn't
752 get a valid terminating character after seeing the
753 entire pseudo-op, so we must go back to the
754 beginning. */
755 mri_state = NULL;
756 }
757 else
758 {
759 /* We've read the entire pseudo-op. mips_last_ch is
760 either '0' or '1' indicating whether to enter or
761 leave MRI mode. */
762 do_scrub_begin (mri_last_ch == '1');
763 mri_state = NULL;
764
765 /* We continue handling the character as usual. The
766 main gas reader must also handle the .mri pseudo-op
767 to control expression parsing and the like. */
768 }
769 }
770 #endif
771
772 if (ch == EOF)
773 {
774 if (state != 0)
775 {
776 as_warn (_("end of file not at end of a line; newline inserted"));
777 state = 0;
778 PUT ('\n');
779 }
780 goto fromeof;
781 }
782
783 switch (lex[ch])
784 {
785 case LEX_IS_WHITESPACE:
786 do
787 {
788 ch = GET ();
789 }
790 while (ch != EOF && IS_WHITESPACE (ch));
791 if (ch == EOF)
792 goto fromeof;
793
794 if (state == 0)
795 {
796 /* Preserve a single whitespace character at the
797 beginning of a line. */
798 state = 1;
799 UNGET (ch);
800 PUT (' ');
801 break;
802 }
803
804 #ifdef KEEP_WHITE_AROUND_COLON
805 if (lex[ch] == LEX_IS_COLON)
806 {
807 /* Only keep this white if there's no white *after* the
808 colon. */
809 ch2 = GET ();
810 UNGET (ch2);
811 if (!IS_WHITESPACE (ch2))
812 {
813 state = 9;
814 UNGET (ch);
815 PUT (' ');
816 break;
817 }
818 }
819 #endif
820 if (IS_COMMENT (ch)
821 || ch == '/'
822 || IS_LINE_SEPARATOR (ch)
823 || IS_PARALLEL_SEPARATOR (ch))
824 {
825 if (scrub_m68k_mri)
826 {
827 /* In MRI mode, we keep these spaces. */
828 UNGET (ch);
829 PUT (' ');
830 break;
831 }
832 goto recycle;
833 }
834
835 /* If we're in state 2 or 11, we've seen a non-white
836 character followed by whitespace. If the next character
837 is ':', this is whitespace after a label name which we
838 normally must ignore. In MRI mode, though, spaces are
839 not permitted between the label and the colon. */
840 if ((state == 2 || state == 11)
841 && lex[ch] == LEX_IS_COLON
842 && ! scrub_m68k_mri)
843 {
844 state = 1;
845 PUT (ch);
846 break;
847 }
848
849 switch (state)
850 {
851 case 0:
852 state++;
853 goto recycle; /* Punted leading sp */
854 case 1:
855 /* We can arrive here if we leave a leading whitespace
856 character at the beginning of a line. */
857 goto recycle;
858 case 2:
859 state = 3;
860 if (to + 1 < toend)
861 {
862 /* Optimize common case by skipping UNGET/GET. */
863 PUT (' '); /* Sp after opco */
864 goto recycle;
865 }
866 UNGET (ch);
867 PUT (' ');
868 break;
869 case 3:
870 if (scrub_m68k_mri)
871 {
872 /* In MRI mode, we keep these spaces. */
873 UNGET (ch);
874 PUT (' ');
875 break;
876 }
877 goto recycle; /* Sp in operands */
878 case 9:
879 case 10:
880 if (scrub_m68k_mri)
881 {
882 /* In MRI mode, we keep these spaces. */
883 state = 3;
884 UNGET (ch);
885 PUT (' ');
886 break;
887 }
888 state = 10; /* Sp after symbol char */
889 goto recycle;
890 case 11:
891 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
892 state = 1;
893 else
894 {
895 /* We know that ch is not ':', since we tested that
896 case above. Therefore this is not a label, so it
897 must be the opcode, and we've just seen the
898 whitespace after it. */
899 state = 3;
900 }
901 UNGET (ch);
902 PUT (' '); /* Sp after label definition. */
903 break;
904 default:
905 BAD_CASE (state);
906 }
907 break;
908
909 case LEX_IS_TWOCHAR_COMMENT_1ST:
910 ch2 = GET ();
911 if (ch2 == '*')
912 {
913 for (;;)
914 {
915 do
916 {
917 ch2 = GET ();
918 if (ch2 != EOF && IS_NEWLINE (ch2))
919 add_newlines++;
920 }
921 while (ch2 != EOF && ch2 != '*');
922
923 while (ch2 == '*')
924 ch2 = GET ();
925
926 if (ch2 == EOF || ch2 == '/')
927 break;
928
929 /* This UNGET will ensure that we count newlines
930 correctly. */
931 UNGET (ch2);
932 }
933
934 if (ch2 == EOF)
935 as_warn (_("end of file in multiline comment"));
936
937 ch = ' ';
938 goto recycle;
939 }
940 #ifdef DOUBLESLASH_LINE_COMMENTS
941 else if (ch2 == '/')
942 {
943 do
944 {
945 ch = GET ();
946 }
947 while (ch != EOF && !IS_NEWLINE (ch));
948 if (ch == EOF)
949 as_warn ("end of file in comment; newline inserted");
950 state = 0;
951 PUT ('\n');
952 break;
953 }
954 #endif
955 else
956 {
957 if (ch2 != EOF)
958 UNGET (ch2);
959 if (state == 9 || state == 10)
960 state = 3;
961 PUT (ch);
962 }
963 break;
964
965 case LEX_IS_STRINGQUOTE:
966 if (state == 10)
967 {
968 /* Preserve the whitespace in foo "bar" */
969 UNGET (ch);
970 state = 3;
971 PUT (' ');
972
973 /* PUT didn't jump out. We could just break, but we
974 know what will happen, so optimize a bit. */
975 ch = GET ();
976 old_state = 3;
977 }
978 else if (state == 9)
979 old_state = 3;
980 else
981 old_state = state;
982 state = 5;
983 PUT (ch);
984 break;
985
986 #ifndef IEEE_STYLE
987 case LEX_IS_ONECHAR_QUOTE:
988 if (state == 10)
989 {
990 /* Preserve the whitespace in foo 'b' */
991 UNGET (ch);
992 state = 3;
993 PUT (' ');
994 break;
995 }
996 ch = GET ();
997 if (ch == EOF)
998 {
999 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1000 ch = 0;
1001 }
1002 if (ch == '\\')
1003 {
1004 ch = GET ();
1005 if (ch == EOF)
1006 {
1007 as_warn (_("end of file in escape character"));
1008 ch = '\\';
1009 }
1010 else
1011 ch = process_escape (ch);
1012 }
1013 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1014
1015 /* None of these 'x constants for us. We want 'x'. */
1016 if ((ch = GET ()) != '\'')
1017 {
1018 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1019 as_warn (_("missing close quote; (assumed)"));
1020 #else
1021 if (ch != EOF)
1022 UNGET (ch);
1023 #endif
1024 }
1025 if (strlen (out_buf) == 1)
1026 {
1027 PUT (out_buf[0]);
1028 break;
1029 }
1030 if (state == 9)
1031 old_state = 3;
1032 else
1033 old_state = state;
1034 state = -1;
1035 out_string = out_buf;
1036 PUT (*out_string++);
1037 break;
1038 #endif
1039
1040 case LEX_IS_COLON:
1041 #ifdef KEEP_WHITE_AROUND_COLON
1042 state = 9;
1043 #else
1044 if (state == 9 || state == 10)
1045 state = 3;
1046 else if (state != 3)
1047 state = 1;
1048 #endif
1049 PUT (ch);
1050 break;
1051
1052 case LEX_IS_NEWLINE:
1053 /* Roll out a bunch of newlines from inside comments, etc. */
1054 if (add_newlines)
1055 {
1056 --add_newlines;
1057 UNGET (ch);
1058 }
1059 /* Fall through. */
1060
1061 case LEX_IS_LINE_SEPARATOR:
1062 state = 0;
1063 PUT (ch);
1064 break;
1065
1066 case LEX_IS_PARALLEL_SEPARATOR:
1067 state = 1;
1068 PUT (ch);
1069 break;
1070
1071 #ifdef TC_V850
1072 case LEX_IS_DOUBLEDASH_1ST:
1073 ch2 = GET ();
1074 if (ch2 != '-')
1075 {
1076 UNGET (ch2);
1077 goto de_fault;
1078 }
1079 /* Read and skip to end of line. */
1080 do
1081 {
1082 ch = GET ();
1083 }
1084 while (ch != EOF && ch != '\n');
1085 if (ch == EOF)
1086 {
1087 as_warn (_("end of file in comment; newline inserted"));
1088 }
1089 state = 0;
1090 PUT ('\n');
1091 break;
1092 #endif
1093 #ifdef DOUBLEBAR_PARALLEL
1094 case LEX_IS_DOUBLEBAR_1ST:
1095 ch2 = GET ();
1096 UNGET (ch2);
1097 if (ch2 != '|')
1098 {
1099 goto de_fault;
1100 }
1101 /* Handle '||' in two states as invoking PUT twice might
1102 result in the first one jumping out of this loop. We'd
1103 then lose track of the state and one '|' char. */
1104 state = 13;
1105 PUT ('|');
1106 break;
1107 #endif
1108 case LEX_IS_LINE_COMMENT_START:
1109 /* FIXME-someday: The two character comment stuff was badly
1110 thought out. On i386, we want '/' as line comment start
1111 AND we want C style comments. hence this hack. The
1112 whole lexical process should be reworked. xoxorich. */
1113 if (ch == '/')
1114 {
1115 ch2 = GET ();
1116 if (ch2 == '*')
1117 {
1118 old_state = 3;
1119 state = -2;
1120 break;
1121 }
1122 else
1123 {
1124 UNGET (ch2);
1125 }
1126 } /* bad hack */
1127
1128 if (state == 0 || state == 1) /* Only comment at start of line. */
1129 {
1130 int startch;
1131
1132 startch = ch;
1133
1134 do
1135 {
1136 ch = GET ();
1137 }
1138 while (ch != EOF && IS_WHITESPACE (ch));
1139 if (ch == EOF)
1140 {
1141 as_warn (_("end of file in comment; newline inserted"));
1142 PUT ('\n');
1143 break;
1144 }
1145 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1146 {
1147 /* Not a cpp line. */
1148 while (ch != EOF && !IS_NEWLINE (ch))
1149 ch = GET ();
1150 if (ch == EOF)
1151 as_warn (_("end of file in comment; newline inserted"));
1152 state = 0;
1153 PUT ('\n');
1154 break;
1155 }
1156 /* Looks like `# 123 "filename"' from cpp. */
1157 UNGET (ch);
1158 old_state = 4;
1159 state = -1;
1160 if (scrub_m68k_mri)
1161 out_string = "\tappline ";
1162 else
1163 out_string = "\t.appline ";
1164 PUT (*out_string++);
1165 break;
1166 }
1167
1168 #ifdef TC_D10V
1169 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1170 Trap is the only short insn that has a first operand that is
1171 neither register nor label.
1172 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1173 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1174 already LEX_IS_LINE_COMMENT_START. However, it is the
1175 only character in line_comment_chars for d10v, hence we
1176 can recognize it as such. */
1177 /* An alternative approach would be to reset the state to 1 when
1178 we see '||', '<'- or '->', but that seems to be overkill. */
1179 if (state == 10)
1180 PUT (' ');
1181 #endif
1182 /* We have a line comment character which is not at the
1183 start of a line. If this is also a normal comment
1184 character, fall through. Otherwise treat it as a default
1185 character. */
1186 if (strchr (tc_comment_chars, ch) == NULL
1187 && (! scrub_m68k_mri
1188 || (ch != '!' && ch != '*')))
1189 goto de_fault;
1190 if (scrub_m68k_mri
1191 && (ch == '!' || ch == '*' || ch == '#')
1192 && state != 1
1193 && state != 10)
1194 goto de_fault;
1195 /* Fall through. */
1196 case LEX_IS_COMMENT_START:
1197 #if defined TC_ARM && defined OBJ_ELF
1198 /* On the ARM, `@' is the comment character.
1199 Unfortunately this is also a special character in ELF .symver
1200 directives (and .type, though we deal with those another way).
1201 So we check if this line is such a directive, and treat
1202 the character as default if so. This is a hack. */
1203 if ((symver_state != NULL) && (*symver_state == 0))
1204 goto de_fault;
1205 #endif
1206 #ifdef WARN_COMMENTS
1207 if (!found_comment)
1208 as_where (&found_comment_file, &found_comment);
1209 #endif
1210 do
1211 {
1212 ch = GET ();
1213 }
1214 while (ch != EOF && !IS_NEWLINE (ch));
1215 if (ch == EOF)
1216 as_warn (_("end of file in comment; newline inserted"));
1217 state = 0;
1218 PUT ('\n');
1219 break;
1220
1221 case LEX_IS_SYMBOL_COMPONENT:
1222 if (state == 10)
1223 {
1224 /* This is a symbol character following another symbol
1225 character, with whitespace in between. We skipped
1226 the whitespace earlier, so output it now. */
1227 UNGET (ch);
1228 state = 3;
1229 PUT (' ');
1230 break;
1231 }
1232
1233 if (state == 3)
1234 state = 9;
1235
1236 /* This is a common case. Quickly copy CH and all the
1237 following symbol component or normal characters. */
1238 if (to + 1 < toend
1239 && mri_state == NULL
1240 #if defined TC_ARM && defined OBJ_ELF
1241 && symver_state == NULL
1242 #endif
1243 )
1244 {
1245 char *s;
1246 int len;
1247
1248 for (s = from; s < fromend; s++)
1249 {
1250 int type;
1251
1252 ch2 = *(unsigned char *) s;
1253 type = lex[ch2];
1254 if (type != 0
1255 && type != LEX_IS_SYMBOL_COMPONENT)
1256 break;
1257 }
1258 if (s > from)
1259 {
1260 /* Handle the last character normally, for
1261 simplicity. */
1262 --s;
1263 }
1264 len = s - from;
1265 if (len > (toend - to) - 1)
1266 len = (toend - to) - 1;
1267 if (len > 0)
1268 {
1269 PUT (ch);
1270 if (len > 8)
1271 {
1272 memcpy (to, from, len);
1273 to += len;
1274 from += len;
1275 }
1276 else
1277 {
1278 switch (len)
1279 {
1280 case 8: *to++ = *from++;
1281 case 7: *to++ = *from++;
1282 case 6: *to++ = *from++;
1283 case 5: *to++ = *from++;
1284 case 4: *to++ = *from++;
1285 case 3: *to++ = *from++;
1286 case 2: *to++ = *from++;
1287 case 1: *to++ = *from++;
1288 }
1289 }
1290 ch = GET ();
1291 }
1292 }
1293
1294 /* Fall through. */
1295 default:
1296 de_fault:
1297 /* Some relatively `normal' character. */
1298 if (state == 0)
1299 {
1300 state = 11; /* Now seeing label definition */
1301 }
1302 else if (state == 1)
1303 {
1304 state = 2; /* Ditto */
1305 }
1306 else if (state == 9)
1307 {
1308 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1309 state = 3;
1310 }
1311 else if (state == 10)
1312 {
1313 if (ch == '\\')
1314 {
1315 /* Special handling for backslash: a backslash may
1316 be the beginning of a formal parameter (of a
1317 macro) following another symbol character, with
1318 whitespace in between. If that is the case, we
1319 output a space before the parameter. Strictly
1320 speaking, correct handling depends upon what the
1321 macro parameter expands into; if the parameter
1322 expands into something which does not start with
1323 an operand character, then we don't want to keep
1324 the space. We don't have enough information to
1325 make the right choice, so here we are making the
1326 choice which is more likely to be correct. */
1327 PUT (' ');
1328 }
1329
1330 state = 3;
1331 }
1332 PUT (ch);
1333 break;
1334 }
1335 }
1336
1337 /*NOTREACHED*/
1338
1339 fromeof:
1340 /* We have reached the end of the input. */
1341 return to - tostart;
1342
1343 tofull:
1344 /* The output buffer is full. Save any input we have not yet
1345 processed. */
1346 if (fromend > from)
1347 {
1348 saved_input = from;
1349 saved_input_len = fromend - from;
1350 }
1351 else
1352 saved_input = NULL;
1353
1354 return to - tostart;
1355 }
1356
1357 /* end of app.c */
This page took 0.07502 seconds and 4 git commands to generate.