Mention IBM 370 support
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
252b5132
RH
1/* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 97, 98, 1999
3 Free Software Foundation, Inc.
4
5 This file is part of GAS, the GNU Assembler.
6
7 GAS is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GAS is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GAS; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
23/* App, the assembler pre-processor. This pre-processor strips out excess
24 spaces, turns single-quoted characters into a decimal constant, and turns
25 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
26 pair. This needs better error-handling. */
27
28#include <stdio.h>
29#include "as.h" /* For BAD_CASE() only */
30
31#if (__STDC__ != 1)
32#ifndef const
33#define const /* empty */
34#endif
35#endif
36
37/* Whether we are scrubbing in m68k MRI mode. This is different from
38 flag_m68k_mri, because the two flags will be affected by the .mri
39 pseudo-op at different times. */
40static int scrub_m68k_mri;
41
42/* The pseudo-op which switches in and out of MRI mode. See the
43 comment in do_scrub_chars. */
44static const char mri_pseudo[] = ".mri 0";
45
46#if defined TC_ARM && defined OBJ_ELF
47/* The pseudo-op for which we need to special-case `@' characters.
48 See the comment in do_scrub_chars. */
49static const char symver_pseudo[] = ".symver";
50static const char * symver_state;
51#endif
52
53static char lex[256];
54static const char symbol_chars[] =
55"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
56
57#define LEX_IS_SYMBOL_COMPONENT 1
58#define LEX_IS_WHITESPACE 2
59#define LEX_IS_LINE_SEPARATOR 3
60#define LEX_IS_COMMENT_START 4
61#define LEX_IS_LINE_COMMENT_START 5
62#define LEX_IS_TWOCHAR_COMMENT_1ST 6
63#define LEX_IS_STRINGQUOTE 8
64#define LEX_IS_COLON 9
65#define LEX_IS_NEWLINE 10
66#define LEX_IS_ONECHAR_QUOTE 11
67#ifdef TC_V850
68#define LEX_IS_DOUBLEDASH_1ST 12
69#endif
70#ifdef TC_M32R
f28e8eb3
TW
71#define DOUBLEBAR_PARALLEL
72#endif
73#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
74#define LEX_IS_DOUBLEBAR_1ST 13
75#endif
76#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
77#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
78#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
79#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
80#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
81#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
82
83static int process_escape PARAMS ((int));
84
85/* FIXME-soon: The entire lexer/parser thingy should be
86 built statically at compile time rather than dynamically
87 each and every time the assembler is run. xoxorich. */
88
89void
90do_scrub_begin (m68k_mri)
91 int m68k_mri;
92{
93 const char *p;
94 int c;
95
96 scrub_m68k_mri = m68k_mri;
97
98 lex[' '] = LEX_IS_WHITESPACE;
99 lex['\t'] = LEX_IS_WHITESPACE;
100 lex['\r'] = LEX_IS_WHITESPACE;
101 lex['\n'] = LEX_IS_NEWLINE;
102 lex[';'] = LEX_IS_LINE_SEPARATOR;
103 lex[':'] = LEX_IS_COLON;
104
105 if (! m68k_mri)
106 {
107 lex['"'] = LEX_IS_STRINGQUOTE;
108
5b93d8bb
AM
109#if ! defined (TC_HPPA) && ! defined (TC_I370)
110 /* I370 uses single-quotes to delimit integer, float constants */
252b5132
RH
111 lex['\''] = LEX_IS_ONECHAR_QUOTE;
112#endif
113
114#ifdef SINGLE_QUOTE_STRINGS
115 lex['\''] = LEX_IS_STRINGQUOTE;
116#endif
117 }
118
119 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
120 in state 5 of do_scrub_chars must be changed. */
121
122 /* Note that these override the previous defaults, e.g. if ';' is a
123 comment char, then it isn't a line separator. */
124 for (p = symbol_chars; *p; ++p)
125 {
126 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
127 } /* declare symbol characters */
128
129 for (c = 128; c < 256; ++c)
130 lex[c] = LEX_IS_SYMBOL_COMPONENT;
131
132#ifdef tc_symbol_chars
133 /* This macro permits the processor to specify all characters which
134 may appears in an operand. This will prevent the scrubber from
135 discarding meaningful whitespace in certain cases. The i386
136 backend uses this to support prefixes, which can confuse the
137 scrubber as to whether it is parsing operands or opcodes. */
138 for (p = tc_symbol_chars; *p; ++p)
139 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
140#endif
141
142 /* The m68k backend wants to be able to change comment_chars. */
143#ifndef tc_comment_chars
144#define tc_comment_chars comment_chars
145#endif
146 for (p = tc_comment_chars; *p; p++)
147 {
148 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
149 } /* declare comment chars */
150
151 for (p = line_comment_chars; *p; p++)
152 {
153 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
154 } /* declare line comment chars */
155
156 for (p = line_separator_chars; *p; p++)
157 {
158 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
159 } /* declare line separators */
160
161 /* Only allow slash-star comments if slash is not in use.
162 FIXME: This isn't right. We should always permit them. */
163 if (lex['/'] == 0)
164 {
165 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
166 }
167
168 if (m68k_mri)
169 {
170 lex['\''] = LEX_IS_STRINGQUOTE;
171 lex[';'] = LEX_IS_COMMENT_START;
172 lex['*'] = LEX_IS_LINE_COMMENT_START;
173 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
174 then it can't be used in an expression. */
175 lex['!'] = LEX_IS_LINE_COMMENT_START;
176 }
177
178#ifdef TC_V850
179 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
180#endif
f28e8eb3 181#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
182 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
183#endif
184#ifdef TC_D30V
185 /* must do this is we want VLIW instruction with "->" or "<-" */
186 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
187#endif
188} /* do_scrub_begin() */
189
190/* Saved state of the scrubber */
191static int state;
192static int old_state;
193static char *out_string;
194static char out_buf[20];
195static int add_newlines;
196static char *saved_input;
197static int saved_input_len;
2b47531b 198static char input_buffer[32 * 1024];
252b5132
RH
199static const char *mri_state;
200static char mri_last_ch;
201
202/* Data structure for saving the state of app across #include's. Note that
203 app is called asynchronously to the parsing of the .include's, so our
204 state at the time .include is interpreted is completely unrelated.
205 That's why we have to save it all. */
206
207struct app_save
208 {
209 int state;
210 int old_state;
211 char * out_string;
212 char out_buf[sizeof (out_buf)];
213 int add_newlines;
214 char * saved_input;
215 int saved_input_len;
216 int scrub_m68k_mri;
217 const char * mri_state;
218 char mri_last_ch;
219#if defined TC_ARM && defined OBJ_ELF
220 const char * symver_state;
221#endif
222 };
223
224char *
225app_push ()
226{
227 register struct app_save *saved;
228
229 saved = (struct app_save *) xmalloc (sizeof (*saved));
230 saved->state = state;
231 saved->old_state = old_state;
232 saved->out_string = out_string;
233 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
234 saved->add_newlines = add_newlines;
2b47531b
ILT
235 if (saved_input == NULL)
236 saved->saved_input = NULL;
237 else
238 {
239 saved->saved_input = xmalloc (saved_input_len);
240 memcpy (saved->saved_input, saved_input, saved_input_len);
241 saved->saved_input_len = saved_input_len;
242 }
252b5132
RH
243 saved->scrub_m68k_mri = scrub_m68k_mri;
244 saved->mri_state = mri_state;
245 saved->mri_last_ch = mri_last_ch;
246#if defined TC_ARM && defined OBJ_ELF
247 saved->symver_state = symver_state;
248#endif
249
250 /* do_scrub_begin() is not useful, just wastes time. */
251
252 state = 0;
253 saved_input = NULL;
254
255 return (char *) saved;
256}
257
258void
259app_pop (arg)
260 char *arg;
261{
262 register struct app_save *saved = (struct app_save *) arg;
263
264 /* There is no do_scrub_end (). */
265 state = saved->state;
266 old_state = saved->old_state;
267 out_string = saved->out_string;
268 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
269 add_newlines = saved->add_newlines;
2b47531b
ILT
270 if (saved->saved_input == NULL)
271 saved_input = NULL;
272 else
273 {
ab9da554 274 assert (saved->saved_input_len <= (int) (sizeof input_buffer));
2b47531b
ILT
275 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
276 saved_input = input_buffer;
277 saved_input_len = saved->saved_input_len;
278 free (saved->saved_input);
279 }
252b5132
RH
280 scrub_m68k_mri = saved->scrub_m68k_mri;
281 mri_state = saved->mri_state;
282 mri_last_ch = saved->mri_last_ch;
283#if defined TC_ARM && defined OBJ_ELF
284 symver_state = saved->symver_state;
285#endif
286
287 free (arg);
288} /* app_pop() */
289
290/* @@ This assumes that \n &c are the same on host and target. This is not
291 necessarily true. */
292static int
293process_escape (ch)
294 int ch;
295{
296 switch (ch)
297 {
298 case 'b':
299 return '\b';
300 case 'f':
301 return '\f';
302 case 'n':
303 return '\n';
304 case 'r':
305 return '\r';
306 case 't':
307 return '\t';
308 case '\'':
309 return '\'';
310 case '"':
311 return '\"';
312 default:
313 return ch;
314 }
315}
316
317/* This function is called to process input characters. The GET
318 parameter is used to retrieve more input characters. GET should
319 set its parameter to point to a buffer, and return the length of
320 the buffer; it should return 0 at end of file. The scrubbed output
321 characters are put into the buffer starting at TOSTART; the TOSTART
322 buffer is TOLEN bytes in length. The function returns the number
323 of scrubbed characters put into TOSTART. This will be TOLEN unless
324 end of file was seen. This function is arranged as a state
325 machine, and saves its state so that it may return at any point.
326 This is the way the old code used to work. */
327
328int
329do_scrub_chars (get, tostart, tolen)
2b47531b 330 int (*get) PARAMS ((char *, int));
252b5132
RH
331 char *tostart;
332 int tolen;
333{
334 char *to = tostart;
335 char *toend = tostart + tolen;
336 char *from;
337 char *fromend;
338 int fromlen;
339 register int ch, ch2 = 0;
340
341 /*State 0: beginning of normal line
342 1: After first whitespace on line (flush more white)
343 2: After first non-white (opcode) on line (keep 1white)
344 3: after second white on line (into operands) (flush white)
345 4: after putting out a .line, put out digits
346 5: parsing a string, then go to old-state
347 6: putting out \ escape in a "d string.
348 7: After putting out a .appfile, put out string.
349 8: After putting out a .appfile string, flush until newline.
350 9: After seeing symbol char in state 3 (keep 1white after symchar)
351 10: After seeing whitespace in state 9 (keep white before symchar)
352 11: After seeing a symbol character in state 0 (eg a label definition)
353 -1: output string in out_string and go to the state in old_state
354 -2: flush text until a '*' '/' is seen, then go to state old_state
355#ifdef TC_V850
356 12: After seeing a dash, looking for a second dash as a start of comment.
357#endif
f28e8eb3 358#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
359 13: After seeing a vertical bar, looking for a second vertical bar as a parallel expression seperator.
360#endif
361 */
362
363 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
364 constructs like ``.loc 1 20''. This was turning into ``.loc
365 120''. States 9 and 10 ensure that a space is never dropped in
366 between characters which could appear in a identifier. Ian
367 Taylor, ian@cygnus.com.
368
369 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
370 correctly on the PA (and any other target where colons are optional).
371 Jeff Law, law@cs.utah.edu.
372
373 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
374 get squashed into "cmp r1,r2||trap#1", with the all important space
375 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
376
377 /* This macro gets the next input character. */
378
2b47531b
ILT
379#define GET() \
380 (from < fromend \
381 ? * (unsigned char *) (from++) \
382 : (saved_input = NULL, \
383 fromlen = (*get) (input_buffer, sizeof input_buffer), \
384 from = input_buffer, \
385 fromend = from + fromlen, \
386 (fromlen == 0 \
387 ? EOF \
252b5132
RH
388 : * (unsigned char *) (from++))))
389
390 /* This macro pushes a character back on the input stream. */
391
392#define UNGET(uch) (*--from = (uch))
393
394 /* This macro puts a character into the output buffer. If this
395 character fills the output buffer, this macro jumps to the label
396 TOFULL. We use this rather ugly approach because we need to
397 handle two different termination conditions: EOF on the input
398 stream, and a full output buffer. It would be simpler if we
399 always read in the entire input stream before processing it, but
400 I don't want to make such a significant change to the assembler's
401 memory usage. */
402
403#define PUT(pch) \
404 do \
405 { \
406 *to++ = (pch); \
407 if (to >= toend) \
408 goto tofull; \
409 } \
410 while (0)
411
412 if (saved_input != NULL)
413 {
414 from = saved_input;
415 fromend = from + saved_input_len;
416 }
417 else
418 {
2b47531b 419 fromlen = (*get) (input_buffer, sizeof input_buffer);
252b5132
RH
420 if (fromlen == 0)
421 return 0;
2b47531b 422 from = input_buffer;
252b5132
RH
423 fromend = from + fromlen;
424 }
425
426 while (1)
427 {
428 /* The cases in this switch end with continue, in order to
429 branch back to the top of this while loop and generate the
430 next output character in the appropriate state. */
431 switch (state)
432 {
433 case -1:
434 ch = *out_string++;
435 if (*out_string == '\0')
436 {
437 state = old_state;
438 old_state = 3;
439 }
440 PUT (ch);
441 continue;
442
443 case -2:
444 for (;;)
445 {
446 do
447 {
448 ch = GET ();
449
450 if (ch == EOF)
451 {
452 as_warn (_("end of file in comment"));
453 goto fromeof;
454 }
455
456 if (ch == '\n')
457 PUT ('\n');
458 }
459 while (ch != '*');
460
461 while ((ch = GET ()) == '*')
462 ;
463
464 if (ch == EOF)
465 {
466 as_warn (_("end of file in comment"));
467 goto fromeof;
468 }
469
470 if (ch == '/')
471 break;
472
473 UNGET (ch);
474 }
475
476 state = old_state;
477 UNGET (' ');
478 continue;
479
480 case 4:
481 ch = GET ();
482 if (ch == EOF)
483 goto fromeof;
484 else if (ch >= '0' && ch <= '9')
485 PUT (ch);
486 else
487 {
488 while (ch != EOF && IS_WHITESPACE (ch))
489 ch = GET ();
490 if (ch == '"')
491 {
492 UNGET (ch);
493 if (scrub_m68k_mri)
494 out_string = "\n\tappfile ";
495 else
496 out_string = "\n\t.appfile ";
497 old_state = 7;
498 state = -1;
499 PUT (*out_string++);
500 }
501 else
502 {
503 while (ch != EOF && ch != '\n')
504 ch = GET ();
505 state = 0;
506 PUT (ch);
507 }
508 }
509 continue;
510
511 case 5:
512 /* We are going to copy everything up to a quote character,
513 with special handling for a backslash. We try to
514 optimize the copying in the simple case without using the
515 GET and PUT macros. */
516 {
517 char *s;
518 int len;
519
520 for (s = from; s < fromend; s++)
521 {
522 ch = *s;
523 /* This condition must be changed if the type of any
524 other character can be LEX_IS_STRINGQUOTE. */
525 if (ch == '\\'
526 || ch == '"'
527 || ch == '\''
528 || ch == '\n')
529 break;
530 }
531 len = s - from;
532 if (len > toend - to)
533 len = toend - to;
534 if (len > 0)
535 {
536 memcpy (to, from, len);
537 to += len;
538 from += len;
539 }
540 }
541
542 ch = GET ();
543 if (ch == EOF)
544 {
545 as_warn (_("end of file in string: inserted '\"'"));
546 state = old_state;
547 UNGET ('\n');
548 PUT ('"');
549 }
550 else if (lex[ch] == LEX_IS_STRINGQUOTE)
551 {
552 state = old_state;
553 PUT (ch);
554 }
555#ifndef NO_STRING_ESCAPES
556 else if (ch == '\\')
557 {
558 state = 6;
559 PUT (ch);
560 }
561#endif
562 else if (scrub_m68k_mri && ch == '\n')
563 {
564 /* Just quietly terminate the string. This permits lines like
565 bne label loop if we haven't reach end yet
566 */
567 state = old_state;
568 UNGET (ch);
569 PUT ('\'');
570 }
571 else
572 {
573 PUT (ch);
574 }
575 continue;
576
577 case 6:
578 state = 5;
579 ch = GET ();
580 switch (ch)
581 {
582 /* Handle strings broken across lines, by turning '\n' into
583 '\\' and 'n'. */
584 case '\n':
585 UNGET ('n');
586 add_newlines++;
587 PUT ('\\');
588 continue;
589
590 case '"':
591 case '\\':
592 case 'b':
593 case 'f':
594 case 'n':
595 case 'r':
596 case 't':
597 case 'v':
598 case 'x':
599 case 'X':
600 case '0':
601 case '1':
602 case '2':
603 case '3':
604 case '4':
605 case '5':
606 case '6':
607 case '7':
608 break;
609#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
610 default:
611 as_warn (_("Unknown escape '\\%c' in string: Ignored"), ch);
612 break;
613#else /* ONLY_STANDARD_ESCAPES */
614 default:
615 /* Accept \x as x for any x */
616 break;
617#endif /* ONLY_STANDARD_ESCAPES */
618
619 case EOF:
620 as_warn (_("End of file in string: '\"' inserted"));
621 PUT ('"');
622 continue;
623 }
624 PUT (ch);
625 continue;
626
627 case 7:
628 ch = GET ();
629 state = 5;
630 old_state = 8;
631 if (ch == EOF)
632 goto fromeof;
633 PUT (ch);
634 continue;
635
636 case 8:
637 do
638 ch = GET ();
639 while (ch != '\n' && ch != EOF);
640 if (ch == EOF)
641 goto fromeof;
642 state = 0;
643 PUT (ch);
644 continue;
645 }
646
647 /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
648
649 /* flushchar: */
650 ch = GET ();
651
652 recycle:
653
654#if defined TC_ARM && defined OBJ_ELF
655 /* We need to watch out for .symver directives. See the comment later
656 in this function. */
657 if (symver_state == NULL)
658 {
659 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
660 symver_state = symver_pseudo + 1;
661 }
662 else
663 {
664 /* We advance to the next state if we find the right
665 character. */
666 if (ch != '\0' && (*symver_state == ch))
667 ++symver_state;
668 else if (*symver_state != '\0')
669 /* We did not get the expected character, or we didn't
670 get a valid terminating character after seeing the
671 entire pseudo-op, so we must go back to the beginning. */
672 symver_state = NULL;
673 else
674 {
675 /* We've read the entire pseudo-op. If this is the end
676 of the line, go back to the beginning. */
677 if (IS_NEWLINE (ch))
678 symver_state = NULL;
679 }
680 }
681#endif /* TC_ARM && OBJ_ELF */
682
683#ifdef TC_M68K
684 /* We want to have pseudo-ops which control whether we are in
685 MRI mode or not. Unfortunately, since m68k MRI mode affects
686 the scrubber, that means that we need a special purpose
687 recognizer here. */
688 if (mri_state == NULL)
689 {
690 if ((state == 0 || state == 1)
691 && ch == mri_pseudo[0])
692 mri_state = mri_pseudo + 1;
693 }
694 else
695 {
696 /* We advance to the next state if we find the right
697 character, or if we need a space character and we get any
698 whitespace character, or if we need a '0' and we get a
699 '1' (this is so that we only need one state to handle
700 ``.mri 0'' and ``.mri 1''). */
701 if (ch != '\0'
702 && (*mri_state == ch
703 || (*mri_state == ' '
704 && lex[ch] == LEX_IS_WHITESPACE)
705 || (*mri_state == '0'
706 && ch == '1')))
707 {
708 mri_last_ch = ch;
709 ++mri_state;
710 }
711 else if (*mri_state != '\0'
712 || (lex[ch] != LEX_IS_WHITESPACE
713 && lex[ch] != LEX_IS_NEWLINE))
714 {
715 /* We did not get the expected character, or we didn't
716 get a valid terminating character after seeing the
717 entire pseudo-op, so we must go back to the
718 beginning. */
719 mri_state = NULL;
720 }
721 else
722 {
723 /* We've read the entire pseudo-op. mips_last_ch is
724 either '0' or '1' indicating whether to enter or
725 leave MRI mode. */
726 do_scrub_begin (mri_last_ch == '1');
727 mri_state = NULL;
728
729 /* We continue handling the character as usual. The
730 main gas reader must also handle the .mri pseudo-op
731 to control expression parsing and the like. */
732 }
733 }
734#endif
735
736 if (ch == EOF)
737 {
738 if (state != 0)
739 {
740 as_warn (_("end of file not at end of a line; newline inserted"));
741 state = 0;
742 PUT ('\n');
743 }
744 goto fromeof;
745 }
746
747 switch (lex[ch])
748 {
749 case LEX_IS_WHITESPACE:
750 do
751 {
752 ch = GET ();
753 }
754 while (ch != EOF && IS_WHITESPACE (ch));
755 if (ch == EOF)
756 goto fromeof;
757
758 if (state == 0)
759 {
760 /* Preserve a single whitespace character at the
761 beginning of a line. */
762 state = 1;
763 UNGET (ch);
764 PUT (' ');
765 break;
766 }
767
f28e8eb3
TW
768#ifdef KEEP_WHITE_AROUND_COLON
769 if (lex[ch] == LEX_IS_COLON)
770 {
771 /* only keep this white if there's no white *after* the colon */
772 ch2 = GET ();
773 UNGET (ch2);
774 if (!IS_WHITESPACE (ch2))
775 {
776 state = 9;
777 UNGET (ch);
778 PUT (' ');
779 break;
780 }
781 }
782#endif
252b5132
RH
783 if (IS_COMMENT (ch)
784 || ch == '/'
785 || IS_LINE_SEPARATOR (ch))
786 {
787 if (scrub_m68k_mri)
788 {
789 /* In MRI mode, we keep these spaces. */
790 UNGET (ch);
791 PUT (' ');
792 break;
793 }
794 goto recycle;
795 }
796
797 /* If we're in state 2 or 11, we've seen a non-white
798 character followed by whitespace. If the next character
799 is ':', this is whitespace after a label name which we
800 normally must ignore. In MRI mode, though, spaces are
801 not permitted between the label and the colon. */
802 if ((state == 2 || state == 11)
803 && lex[ch] == LEX_IS_COLON
804 && ! scrub_m68k_mri)
805 {
806 state = 1;
807 PUT (ch);
808 break;
809 }
810
811 switch (state)
812 {
813 case 0:
814 state++;
815 goto recycle; /* Punted leading sp */
816 case 1:
817 /* We can arrive here if we leave a leading whitespace
818 character at the beginning of a line. */
819 goto recycle;
820 case 2:
821 state = 3;
822 if (to + 1 < toend)
823 {
824 /* Optimize common case by skipping UNGET/GET. */
825 PUT (' '); /* Sp after opco */
826 goto recycle;
827 }
828 UNGET (ch);
829 PUT (' ');
830 break;
831 case 3:
832 if (scrub_m68k_mri)
833 {
834 /* In MRI mode, we keep these spaces. */
835 UNGET (ch);
836 PUT (' ');
837 break;
838 }
839 goto recycle; /* Sp in operands */
840 case 9:
841 case 10:
842 if (scrub_m68k_mri)
843 {
844 /* In MRI mode, we keep these spaces. */
845 state = 3;
846 UNGET (ch);
847 PUT (' ');
848 break;
849 }
850 state = 10; /* Sp after symbol char */
851 goto recycle;
852 case 11:
853 if (flag_m68k_mri
854#ifdef LABELS_WITHOUT_COLONS
855 || 1
856#endif
857 )
858 state = 1;
859 else
860 {
861 /* We know that ch is not ':', since we tested that
862 case above. Therefore this is not a label, so it
863 must be the opcode, and we've just seen the
864 whitespace after it. */
865 state = 3;
866 }
867 UNGET (ch);
868 PUT (' '); /* Sp after label definition. */
869 break;
870 default:
871 BAD_CASE (state);
872 }
873 break;
874
875 case LEX_IS_TWOCHAR_COMMENT_1ST:
876 ch2 = GET ();
877 if (ch2 == '*')
878 {
879 for (;;)
880 {
881 do
882 {
883 ch2 = GET ();
884 if (ch2 != EOF && IS_NEWLINE (ch2))
885 add_newlines++;
886 }
887 while (ch2 != EOF && ch2 != '*');
888
889 while (ch2 == '*')
890 ch2 = GET ();
891
892 if (ch2 == EOF || ch2 == '/')
893 break;
894
895 /* This UNGET will ensure that we count newlines
896 correctly. */
897 UNGET (ch2);
898 }
899
900 if (ch2 == EOF)
901 as_warn (_("end of file in multiline comment"));
902
903 ch = ' ';
904 goto recycle;
905 }
906 else
907 {
908 if (ch2 != EOF)
909 UNGET (ch2);
910 if (state == 9 || state == 10)
911 state = 3;
912 PUT (ch);
913 }
914 break;
915
916 case LEX_IS_STRINGQUOTE:
917 if (state == 10)
918 {
919 /* Preserve the whitespace in foo "bar" */
920 UNGET (ch);
921 state = 3;
922 PUT (' ');
923
924 /* PUT didn't jump out. We could just break, but we
925 know what will happen, so optimize a bit. */
926 ch = GET ();
927 old_state = 3;
928 }
929 else if (state == 9)
930 old_state = 3;
931 else
932 old_state = state;
933 state = 5;
934 PUT (ch);
935 break;
936
937#ifndef IEEE_STYLE
938 case LEX_IS_ONECHAR_QUOTE:
939 if (state == 10)
940 {
941 /* Preserve the whitespace in foo 'b' */
942 UNGET (ch);
943 state = 3;
944 PUT (' ');
945 break;
946 }
947 ch = GET ();
948 if (ch == EOF)
949 {
950 as_warn (_("end of file after a one-character quote; \\0 inserted"));
951 ch = 0;
952 }
953 if (ch == '\\')
954 {
955 ch = GET ();
956 if (ch == EOF)
957 {
958 as_warn (_("end of file in escape character"));
959 ch = '\\';
960 }
961 else
962 ch = process_escape (ch);
963 }
964 sprintf (out_buf, "%d", (int) (unsigned char) ch);
965
966 /* None of these 'x constants for us. We want 'x'. */
967 if ((ch = GET ()) != '\'')
968 {
969#ifdef REQUIRE_CHAR_CLOSE_QUOTE
970 as_warn (_("Missing close quote: (assumed)"));
971#else
972 if (ch != EOF)
973 UNGET (ch);
974#endif
975 }
976 if (strlen (out_buf) == 1)
977 {
978 PUT (out_buf[0]);
979 break;
980 }
981 if (state == 9)
982 old_state = 3;
983 else
984 old_state = state;
985 state = -1;
986 out_string = out_buf;
987 PUT (*out_string++);
988 break;
989#endif
990
991 case LEX_IS_COLON:
f28e8eb3
TW
992#ifdef KEEP_WHITE_AROUND_COLON
993 state = 9;
994#else
252b5132
RH
995 if (state == 9 || state == 10)
996 state = 3;
997 else if (state != 3)
998 state = 1;
f28e8eb3 999#endif
252b5132
RH
1000 PUT (ch);
1001 break;
1002
1003 case LEX_IS_NEWLINE:
1004 /* Roll out a bunch of newlines from inside comments, etc. */
1005 if (add_newlines)
1006 {
1007 --add_newlines;
1008 UNGET (ch);
1009 }
1010 /* fall thru into... */
1011
1012 case LEX_IS_LINE_SEPARATOR:
1013 state = 0;
1014 PUT (ch);
1015 break;
1016
1017#ifdef TC_V850
1018 case LEX_IS_DOUBLEDASH_1ST:
1019 ch2 = GET();
1020 if (ch2 != '-')
1021 {
1022 UNGET (ch2);
1023 goto de_fault;
1024 }
1025 /* read and skip to end of line */
1026 do
1027 {
1028 ch = GET ();
1029 }
1030 while (ch != EOF && ch != '\n');
1031 if (ch == EOF)
1032 {
1033 as_warn (_("end of file in comment; newline inserted"));
1034 }
1035 state = 0;
1036 PUT ('\n');
1037 break;
1038#endif
f28e8eb3 1039#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
1040 case LEX_IS_DOUBLEBAR_1ST:
1041 ch2 = GET();
1042 if (ch2 != '|')
1043 {
1044 UNGET (ch2);
1045 goto de_fault;
1046 }
1047 /* Reset back to state 1 and pretend that we are parsing a line from
1048 just after the first white space. */
1049 state = 1;
1050 PUT ('|');
1051 PUT ('|');
1052 break;
1053#endif
1054 case LEX_IS_LINE_COMMENT_START:
1055 /* FIXME-someday: The two character comment stuff was badly
1056 thought out. On i386, we want '/' as line comment start
1057 AND we want C style comments. hence this hack. The
1058 whole lexical process should be reworked. xoxorich. */
1059 if (ch == '/')
1060 {
1061 ch2 = GET ();
1062 if (ch2 == '*')
1063 {
1064 old_state = 3;
1065 state = -2;
1066 break;
1067 }
1068 else
1069 {
1070 UNGET (ch2);
1071 }
1072 } /* bad hack */
1073
1074 if (state == 0 || state == 1) /* Only comment at start of line. */
1075 {
1076 int startch;
1077
1078 startch = ch;
1079
1080 do
1081 {
1082 ch = GET ();
1083 }
1084 while (ch != EOF && IS_WHITESPACE (ch));
1085 if (ch == EOF)
1086 {
1087 as_warn (_("end of file in comment; newline inserted"));
1088 PUT ('\n');
1089 break;
1090 }
1091 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1092 {
1093 /* Not a cpp line. */
1094 while (ch != EOF && !IS_NEWLINE (ch))
1095 ch = GET ();
1096 if (ch == EOF)
1097 as_warn (_("EOF in Comment: Newline inserted"));
1098 state = 0;
1099 PUT ('\n');
1100 break;
1101 }
1102 /* Loks like `# 123 "filename"' from cpp. */
1103 UNGET (ch);
1104 old_state = 4;
1105 state = -1;
1106 if (scrub_m68k_mri)
1107 out_string = "\tappline ";
1108 else
1109 out_string = "\t.appline ";
1110 PUT (*out_string++);
1111 break;
1112 }
1113
1114#ifdef TC_D10V
1115 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1116 Trap is the only short insn that has a first operand that is
1117 neither register nor label.
1118 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1119 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is already
1120 LEX_IS_LINE_COMMENT_START. However, it is the only character in
1121 line_comment_chars for d10v, hence we can recognize it as such. */
1122 /* An alternative approach would be to reset the state to 1 when
1123 we see '||', '<'- or '->', but that seems to be overkill. */
1124 if (state == 10) PUT (' ');
1125#endif
1126 /* We have a line comment character which is not at the
1127 start of a line. If this is also a normal comment
1128 character, fall through. Otherwise treat it as a default
1129 character. */
1130 if (strchr (tc_comment_chars, ch) == NULL
1131 && (! scrub_m68k_mri
1132 || (ch != '!' && ch != '*')))
1133 goto de_fault;
1134 if (scrub_m68k_mri
1135 && (ch == '!' || ch == '*' || ch == '#')
1136 && state != 1
1137 && state != 10)
1138 goto de_fault;
1139 /* Fall through. */
1140 case LEX_IS_COMMENT_START:
1141#if defined TC_ARM && defined OBJ_ELF
1142 /* On the ARM, `@' is the comment character.
1143 Unfortunately this is also a special character in ELF .symver
1144 directives (and .type, though we deal with those another way). So
1145 we check if this line is such a directive, and treat the character
1146 as default if so. This is a hack. */
1147 if ((symver_state != NULL) && (*symver_state == 0))
1148 goto de_fault;
1149#endif
1150 do
1151 {
1152 ch = GET ();
1153 }
1154 while (ch != EOF && !IS_NEWLINE (ch));
1155 if (ch == EOF)
1156 as_warn (_("end of file in comment; newline inserted"));
1157 state = 0;
1158 PUT ('\n');
1159 break;
1160
1161 case LEX_IS_SYMBOL_COMPONENT:
1162 if (state == 10)
1163 {
1164 /* This is a symbol character following another symbol
1165 character, with whitespace in between. We skipped
1166 the whitespace earlier, so output it now. */
1167 UNGET (ch);
1168 state = 3;
1169 PUT (' ');
1170 break;
1171 }
1172
1173 if (state == 3)
1174 state = 9;
1175
1176 /* This is a common case. Quickly copy CH and all the
1177 following symbol component or normal characters. */
1178 if (to + 1 < toend
1179 && mri_state == NULL
1180#if defined TC_ARM && defined OBJ_ELF
1181 && symver_state == NULL
1182#endif
1183 )
1184 {
1185 char *s;
1186 int len;
1187
1188 for (s = from; s < fromend; s++)
1189 {
1190 int type;
1191
1192 ch2 = * (unsigned char *) s;
1193 type = lex[ch2];
1194 if (type != 0
1195 && type != LEX_IS_SYMBOL_COMPONENT)
1196 break;
1197 }
1198 if (s > from)
1199 {
1200 /* Handle the last character normally, for
1201 simplicity. */
1202 --s;
1203 }
1204 len = s - from;
1205 if (len > (toend - to) - 1)
1206 len = (toend - to) - 1;
1207 if (len > 0)
1208 {
1209 PUT (ch);
1210 if (len > 8)
1211 {
1212 memcpy (to, from, len);
1213 to += len;
1214 from += len;
1215 }
1216 else
1217 {
1218 switch (len)
1219 {
1220 case 8: *to++ = *from++;
1221 case 7: *to++ = *from++;
1222 case 6: *to++ = *from++;
1223 case 5: *to++ = *from++;
1224 case 4: *to++ = *from++;
1225 case 3: *to++ = *from++;
1226 case 2: *to++ = *from++;
1227 case 1: *to++ = *from++;
1228 }
1229 }
1230 ch = GET ();
1231 }
1232 }
1233
1234 /* Fall through. */
1235 default:
1236 de_fault:
1237 /* Some relatively `normal' character. */
1238 if (state == 0)
1239 {
1240 state = 11; /* Now seeing label definition */
1241 }
1242 else if (state == 1)
1243 {
1244 state = 2; /* Ditto */
1245 }
1246 else if (state == 9)
1247 {
1248 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
1249 state = 3;
1250 }
1251 else if (state == 10)
1252 {
c5c834aa
AH
1253 if (ch == '\\')
1254 {
1255 /* Special handling for backslash: a backslash may
1256 be the beginning of a formal parameter (of a
1257 macro) following another symbol character, with
1258 whitespace in between. If that is the case, we
1259 output a space before the parameter. Strictly
1260 speaking, correct handling depends upon what the
1261 macro parameter expands into; if the parameter
1262 expands into something which does not start with
1263 an operand character, then we don't want to keep
1264 the space. We don't have enough information to
1265 make the right choice, so here we are making the
1266 choice which is more likely to be correct. */
1267 PUT (' ');
1268 }
1269
252b5132
RH
1270 state = 3;
1271 }
1272 PUT (ch);
1273 break;
1274 }
1275 }
1276
1277 /*NOTREACHED*/
1278
1279 fromeof:
1280 /* We have reached the end of the input. */
1281 return to - tostart;
1282
1283 tofull:
1284 /* The output buffer is full. Save any input we have not yet
1285 processed. */
1286 if (fromend > from)
1287 {
2b47531b 1288 saved_input = from;
252b5132
RH
1289 saved_input_len = fromend - from;
1290 }
1291 else
2b47531b
ILT
1292 saved_input = NULL;
1293
252b5132
RH
1294 return to - tostart;
1295}
1296
1297/* end of app.c */
This page took 0.112657 seconds and 4 git commands to generate.