gdb: add target_ops::supports_displaced_step
[deliverable/binutils-gdb.git] / gas / app.c
1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987-2020 Free Software Foundation, Inc.
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GAS is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
22 /* App, the assembler pre-processor. This pre-processor strips out
23 excess spaces, turns single-quoted characters into a decimal
24 constant, and turns the # in # <number> <filename> <garbage> into a
25 .linefile. This needs better error-handling. */
26
27 #include "as.h"
28
29 #if (__STDC__ != 1)
30 #ifndef const
31 #define const /* empty */
32 #endif
33 #endif
34
35 #ifdef H_TICK_HEX
36 int enable_h_tick_hex = 0;
37 #endif
38
39 #ifdef TC_M68K
40 /* Whether we are scrubbing in m68k MRI mode. This is different from
41 flag_m68k_mri, because the two flags will be affected by the .mri
42 pseudo-op at different times. */
43 static int scrub_m68k_mri;
44
45 /* The pseudo-op which switches in and out of MRI mode. See the
46 comment in do_scrub_chars. */
47 static const char mri_pseudo[] = ".mri 0";
48 #else
49 #define scrub_m68k_mri 0
50 #endif
51
52 #if defined TC_ARM && defined OBJ_ELF
53 /* The pseudo-op for which we need to special-case `@' characters.
54 See the comment in do_scrub_chars. */
55 static const char symver_pseudo[] = ".symver";
56 static const char * symver_state;
57 #endif
58 #ifdef TC_ARM
59 static char last_char;
60 #endif
61
62 static char lex[256];
63 static const char symbol_chars[] =
64 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
65
66 #define LEX_IS_SYMBOL_COMPONENT 1
67 #define LEX_IS_WHITESPACE 2
68 #define LEX_IS_LINE_SEPARATOR 3
69 #define LEX_IS_COMMENT_START 4
70 #define LEX_IS_LINE_COMMENT_START 5
71 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
72 #define LEX_IS_STRINGQUOTE 8
73 #define LEX_IS_COLON 9
74 #define LEX_IS_NEWLINE 10
75 #define LEX_IS_ONECHAR_QUOTE 11
76 #ifdef TC_V850
77 #define LEX_IS_DOUBLEDASH_1ST 12
78 #endif
79 #ifdef TC_M32R
80 #define DOUBLEBAR_PARALLEL
81 #endif
82 #ifdef DOUBLEBAR_PARALLEL
83 #define LEX_IS_DOUBLEBAR_1ST 13
84 #endif
85 #define LEX_IS_PARALLEL_SEPARATOR 14
86 #ifdef H_TICK_HEX
87 #define LEX_IS_H 15
88 #endif
89 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
90 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
91 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
92 #define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
93 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
94 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
95 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
96
97 static int process_escape (int);
98
99 /* FIXME-soon: The entire lexer/parser thingy should be
100 built statically at compile time rather than dynamically
101 each and every time the assembler is run. xoxorich. */
102
103 void
104 do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
105 {
106 const char *p;
107 int c;
108
109 lex[' '] = LEX_IS_WHITESPACE;
110 lex['\t'] = LEX_IS_WHITESPACE;
111 lex['\r'] = LEX_IS_WHITESPACE;
112 lex['\n'] = LEX_IS_NEWLINE;
113 lex[':'] = LEX_IS_COLON;
114
115 #ifdef TC_M68K
116 scrub_m68k_mri = m68k_mri;
117
118 if (! m68k_mri)
119 #endif
120 {
121 lex['"'] = LEX_IS_STRINGQUOTE;
122
123 #if ! defined (TC_HPPA)
124 lex['\''] = LEX_IS_ONECHAR_QUOTE;
125 #endif
126
127 #ifdef SINGLE_QUOTE_STRINGS
128 lex['\''] = LEX_IS_STRINGQUOTE;
129 #endif
130 }
131
132 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133 in state 5 of do_scrub_chars must be changed. */
134
135 /* Note that these override the previous defaults, e.g. if ';' is a
136 comment char, then it isn't a line separator. */
137 for (p = symbol_chars; *p; ++p)
138 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
139
140 for (c = 128; c < 256; ++c)
141 lex[c] = LEX_IS_SYMBOL_COMPONENT;
142
143 #ifdef tc_symbol_chars
144 /* This macro permits the processor to specify all characters which
145 may appears in an operand. This will prevent the scrubber from
146 discarding meaningful whitespace in certain cases. The i386
147 backend uses this to support prefixes, which can confuse the
148 scrubber as to whether it is parsing operands or opcodes. */
149 for (p = tc_symbol_chars; *p; ++p)
150 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151 #endif
152
153 /* The m68k backend wants to be able to change comment_chars. */
154 #ifndef tc_comment_chars
155 #define tc_comment_chars comment_chars
156 #endif
157 for (p = tc_comment_chars; *p; p++)
158 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
159
160 for (p = line_comment_chars; *p; p++)
161 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
162
163 #ifndef tc_line_separator_chars
164 #define tc_line_separator_chars line_separator_chars
165 #endif
166 for (p = tc_line_separator_chars; *p; p++)
167 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
168
169 #ifdef tc_parallel_separator_chars
170 /* This macro permits the processor to specify all characters which
171 separate parallel insns on the same line. */
172 for (p = tc_parallel_separator_chars; *p; p++)
173 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
174 #endif
175
176 /* Only allow slash-star comments if slash is not in use.
177 FIXME: This isn't right. We should always permit them. */
178 if (lex['/'] == 0)
179 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
180
181 #ifdef TC_M68K
182 if (m68k_mri)
183 {
184 lex['\''] = LEX_IS_STRINGQUOTE;
185 lex[';'] = LEX_IS_COMMENT_START;
186 lex['*'] = LEX_IS_LINE_COMMENT_START;
187 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
188 then it can't be used in an expression. */
189 lex['!'] = LEX_IS_LINE_COMMENT_START;
190 }
191 #endif
192
193 #ifdef TC_V850
194 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
195 #endif
196 #ifdef DOUBLEBAR_PARALLEL
197 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
198 #endif
199 #ifdef TC_D30V
200 /* Must do this is we want VLIW instruction with "->" or "<-". */
201 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
202 #endif
203
204 #ifdef H_TICK_HEX
205 if (enable_h_tick_hex)
206 {
207 lex['h'] = LEX_IS_H;
208 lex['H'] = LEX_IS_H;
209 }
210 #endif
211 }
212
213 /* Saved state of the scrubber. */
214 static int state;
215 static int old_state;
216 static const char *out_string;
217 static char out_buf[20];
218 static int add_newlines;
219 static char *saved_input;
220 static size_t saved_input_len;
221 static char input_buffer[32 * 1024];
222 static const char *mri_state;
223 static char mri_last_ch;
224
225 /* Data structure for saving the state of app across #include's. Note that
226 app is called asynchronously to the parsing of the .include's, so our
227 state at the time .include is interpreted is completely unrelated.
228 That's why we have to save it all. */
229
230 struct app_save
231 {
232 int state;
233 int old_state;
234 const char * out_string;
235 char out_buf[sizeof (out_buf)];
236 int add_newlines;
237 char * saved_input;
238 size_t saved_input_len;
239 #ifdef TC_M68K
240 int scrub_m68k_mri;
241 #endif
242 const char * mri_state;
243 char mri_last_ch;
244 #if defined TC_ARM && defined OBJ_ELF
245 const char * symver_state;
246 #endif
247 #ifdef TC_ARM
248 char last_char;
249 #endif
250 };
251
252 char *
253 app_push (void)
254 {
255 struct app_save *saved;
256
257 saved = XNEW (struct app_save);
258 saved->state = state;
259 saved->old_state = old_state;
260 saved->out_string = out_string;
261 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
262 saved->add_newlines = add_newlines;
263 if (saved_input == NULL)
264 saved->saved_input = NULL;
265 else
266 {
267 saved->saved_input = XNEWVEC (char, saved_input_len);
268 memcpy (saved->saved_input, saved_input, saved_input_len);
269 saved->saved_input_len = saved_input_len;
270 }
271 #ifdef TC_M68K
272 saved->scrub_m68k_mri = scrub_m68k_mri;
273 #endif
274 saved->mri_state = mri_state;
275 saved->mri_last_ch = mri_last_ch;
276 #if defined TC_ARM && defined OBJ_ELF
277 saved->symver_state = symver_state;
278 #endif
279 #ifdef TC_ARM
280 saved->last_char = last_char;
281 #endif
282
283 /* do_scrub_begin() is not useful, just wastes time. */
284
285 state = 0;
286 saved_input = NULL;
287 add_newlines = 0;
288
289 return (char *) saved;
290 }
291
292 void
293 app_pop (char *arg)
294 {
295 struct app_save *saved = (struct app_save *) arg;
296
297 /* There is no do_scrub_end (). */
298 state = saved->state;
299 old_state = saved->old_state;
300 out_string = saved->out_string;
301 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
302 add_newlines = saved->add_newlines;
303 if (saved->saved_input == NULL)
304 saved_input = NULL;
305 else
306 {
307 gas_assert (saved->saved_input_len <= sizeof (input_buffer));
308 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
309 saved_input = input_buffer;
310 saved_input_len = saved->saved_input_len;
311 free (saved->saved_input);
312 }
313 #ifdef TC_M68K
314 scrub_m68k_mri = saved->scrub_m68k_mri;
315 #endif
316 mri_state = saved->mri_state;
317 mri_last_ch = saved->mri_last_ch;
318 #if defined TC_ARM && defined OBJ_ELF
319 symver_state = saved->symver_state;
320 #endif
321 #ifdef TC_ARM
322 last_char = saved->last_char;
323 #endif
324
325 free (arg);
326 }
327
328 /* @@ This assumes that \n &c are the same on host and target. This is not
329 necessarily true. */
330
331 static int
332 process_escape (int ch)
333 {
334 switch (ch)
335 {
336 case 'b':
337 return '\b';
338 case 'f':
339 return '\f';
340 case 'n':
341 return '\n';
342 case 'r':
343 return '\r';
344 case 't':
345 return '\t';
346 case '\'':
347 return '\'';
348 case '"':
349 return '\"';
350 default:
351 return ch;
352 }
353 }
354
355 /* This function is called to process input characters. The GET
356 parameter is used to retrieve more input characters. GET should
357 set its parameter to point to a buffer, and return the length of
358 the buffer; it should return 0 at end of file. The scrubbed output
359 characters are put into the buffer starting at TOSTART; the TOSTART
360 buffer is TOLEN bytes in length. The function returns the number
361 of scrubbed characters put into TOSTART. This will be TOLEN unless
362 end of file was seen. This function is arranged as a state
363 machine, and saves its state so that it may return at any point.
364 This is the way the old code used to work. */
365
366 size_t
367 do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
368 {
369 char *to = tostart;
370 char *toend = tostart + tolen;
371 char *from;
372 char *fromend;
373 size_t fromlen;
374 int ch, ch2 = 0;
375 /* Character that started the string we're working on. */
376 static char quotechar;
377
378 /*State 0: beginning of normal line
379 1: After first whitespace on line (flush more white)
380 2: After first non-white (opcode) on line (keep 1white)
381 3: after second white on line (into operands) (flush white)
382 4: after putting out a .linefile, put out digits
383 5: parsing a string, then go to old-state
384 6: putting out \ escape in a "d string.
385 7: no longer used
386 8: no longer used
387 9: After seeing symbol char in state 3 (keep 1white after symchar)
388 10: After seeing whitespace in state 9 (keep white before symchar)
389 11: After seeing a symbol character in state 0 (eg a label definition)
390 -1: output string in out_string and go to the state in old_state
391 -2: flush text until a '*' '/' is seen, then go to state old_state
392 #ifdef TC_V850
393 12: After seeing a dash, looking for a second dash as a start
394 of comment.
395 #endif
396 #ifdef DOUBLEBAR_PARALLEL
397 13: After seeing a vertical bar, looking for a second
398 vertical bar as a parallel expression separator.
399 #endif
400 #ifdef TC_PREDICATE_START_CHAR
401 14: After seeing a predicate start character at state 0, looking
402 for a predicate end character as predicate.
403 15: After seeing a predicate start character at state 1, looking
404 for a predicate end character as predicate.
405 #endif
406 #ifdef TC_Z80
407 16: After seeing an 'a' or an 'A' at the start of a symbol
408 17: After seeing an 'f' or an 'F' in state 16
409 #endif
410 */
411
412 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
413 constructs like ``.loc 1 20''. This was turning into ``.loc
414 120''. States 9 and 10 ensure that a space is never dropped in
415 between characters which could appear in an identifier. Ian
416 Taylor, ian@cygnus.com.
417
418 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
419 correctly on the PA (and any other target where colons are optional).
420 Jeff Law, law@cs.utah.edu.
421
422 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
423 get squashed into "cmp r1,r2||trap#1", with the all important space
424 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
425
426 /* This macro gets the next input character. */
427
428 #define GET() \
429 (from < fromend \
430 ? * (unsigned char *) (from++) \
431 : (saved_input = NULL, \
432 fromlen = (*get) (input_buffer, sizeof input_buffer), \
433 from = input_buffer, \
434 fromend = from + fromlen, \
435 (fromlen == 0 \
436 ? EOF \
437 : * (unsigned char *) (from++))))
438
439 /* This macro pushes a character back on the input stream. */
440
441 #define UNGET(uch) (*--from = (uch))
442
443 /* This macro puts a character into the output buffer. If this
444 character fills the output buffer, this macro jumps to the label
445 TOFULL. We use this rather ugly approach because we need to
446 handle two different termination conditions: EOF on the input
447 stream, and a full output buffer. It would be simpler if we
448 always read in the entire input stream before processing it, but
449 I don't want to make such a significant change to the assembler's
450 memory usage. */
451
452 #define PUT(pch) \
453 do \
454 { \
455 *to++ = (pch); \
456 if (to >= toend) \
457 goto tofull; \
458 } \
459 while (0)
460
461 if (saved_input != NULL)
462 {
463 from = saved_input;
464 fromend = from + saved_input_len;
465 }
466 else
467 {
468 fromlen = (*get) (input_buffer, sizeof input_buffer);
469 if (fromlen == 0)
470 return 0;
471 from = input_buffer;
472 fromend = from + fromlen;
473 }
474
475 while (1)
476 {
477 /* The cases in this switch end with continue, in order to
478 branch back to the top of this while loop and generate the
479 next output character in the appropriate state. */
480 switch (state)
481 {
482 case -1:
483 ch = *out_string++;
484 if (*out_string == '\0')
485 {
486 state = old_state;
487 old_state = 3;
488 }
489 PUT (ch);
490 continue;
491
492 case -2:
493 for (;;)
494 {
495 do
496 {
497 ch = GET ();
498
499 if (ch == EOF)
500 {
501 as_warn (_("end of file in comment"));
502 goto fromeof;
503 }
504
505 if (ch == '\n')
506 PUT ('\n');
507 }
508 while (ch != '*');
509
510 while ((ch = GET ()) == '*')
511 ;
512
513 if (ch == EOF)
514 {
515 as_warn (_("end of file in comment"));
516 goto fromeof;
517 }
518
519 if (ch == '/')
520 break;
521
522 UNGET (ch);
523 }
524
525 state = old_state;
526 UNGET (' ');
527 continue;
528
529 case 4:
530 ch = GET ();
531 if (ch == EOF)
532 goto fromeof;
533 else if (ch >= '0' && ch <= '9')
534 PUT (ch);
535 else
536 {
537 while (ch != EOF && IS_WHITESPACE (ch))
538 ch = GET ();
539 if (ch == '"')
540 {
541 quotechar = ch;
542 state = 5;
543 old_state = 3;
544 PUT (ch);
545 }
546 else
547 {
548 while (ch != EOF && ch != '\n')
549 ch = GET ();
550 state = 0;
551 PUT (ch);
552 }
553 }
554 continue;
555
556 case 5:
557 /* We are going to copy everything up to a quote character,
558 with special handling for a backslash. We try to
559 optimize the copying in the simple case without using the
560 GET and PUT macros. */
561 {
562 char *s;
563 ptrdiff_t len;
564
565 for (s = from; s < fromend; s++)
566 {
567 ch = *s;
568 if (ch == '\\'
569 || ch == quotechar
570 || ch == '\n')
571 break;
572 }
573 len = s - from;
574 if (len > toend - to)
575 len = toend - to;
576 if (len > 0)
577 {
578 memcpy (to, from, len);
579 to += len;
580 from += len;
581 if (to >= toend)
582 goto tofull;
583 }
584 }
585
586 ch = GET ();
587 if (ch == EOF)
588 {
589 /* This buffer is here specifically so
590 that the UNGET below will work. */
591 static char one_char_buf[1];
592
593 as_warn (_("end of file in string; '%c' inserted"), quotechar);
594 state = old_state;
595 from = fromend = one_char_buf + 1;
596 fromlen = 1;
597 UNGET ('\n');
598 PUT (quotechar);
599 }
600 else if (ch == quotechar)
601 {
602 state = old_state;
603 PUT (ch);
604 }
605 else if (TC_STRING_ESCAPES && ch == '\\')
606 {
607 state = 6;
608 PUT (ch);
609 }
610 else if (scrub_m68k_mri && ch == '\n')
611 {
612 /* Just quietly terminate the string. This permits lines like
613 bne label loop if we haven't reach end yet. */
614 state = old_state;
615 UNGET (ch);
616 PUT ('\'');
617 }
618 else
619 {
620 PUT (ch);
621 }
622 continue;
623
624 case 6:
625 state = 5;
626 ch = GET ();
627 switch (ch)
628 {
629 /* Handle strings broken across lines, by turning '\n' into
630 '\\' and 'n'. */
631 case '\n':
632 UNGET ('n');
633 add_newlines++;
634 PUT ('\\');
635 continue;
636
637 case EOF:
638 as_warn (_("end of file in string; '%c' inserted"), quotechar);
639 PUT (quotechar);
640 continue;
641
642 case '"':
643 case '\\':
644 case 'b':
645 case 'f':
646 case 'n':
647 case 'r':
648 case 't':
649 case 'v':
650 case 'x':
651 case 'X':
652 case '0':
653 case '1':
654 case '2':
655 case '3':
656 case '4':
657 case '5':
658 case '6':
659 case '7':
660 break;
661
662 default:
663 #ifdef ONLY_STANDARD_ESCAPES
664 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
665 #endif
666 break;
667 }
668 PUT (ch);
669 continue;
670
671 #ifdef DOUBLEBAR_PARALLEL
672 case 13:
673 ch = GET ();
674 if (ch != '|')
675 abort ();
676
677 /* Reset back to state 1 and pretend that we are parsing a
678 line from just after the first white space. */
679 state = 1;
680 PUT ('|');
681 #ifdef TC_TIC6X
682 /* "||^" is used for SPMASKed instructions. */
683 ch = GET ();
684 if (ch == EOF)
685 goto fromeof;
686 else if (ch == '^')
687 PUT ('^');
688 else
689 UNGET (ch);
690 #endif
691 continue;
692 #endif
693 #ifdef TC_Z80
694 case 16:
695 /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
696 ch = GET ();
697 if (ch == 'f' || ch == 'F')
698 {
699 state = 17;
700 PUT (ch);
701 }
702 else
703 {
704 state = 9;
705 break;
706 }
707 /* Fall through. */
708 case 17:
709 /* We have seen "af" at the start of a symbol,
710 a ' here is a part of that symbol. */
711 ch = GET ();
712 state = 9;
713 if (ch == '\'')
714 /* Change to avoid warning about unclosed string. */
715 PUT ('`');
716 else if (ch != EOF)
717 UNGET (ch);
718 break;
719 #endif
720 }
721
722 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
723
724 /* flushchar: */
725 ch = GET ();
726
727 #ifdef TC_PREDICATE_START_CHAR
728 if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
729 {
730 state += 14;
731 PUT (ch);
732 continue;
733 }
734 else if (state == 14 || state == 15)
735 {
736 if (ch == TC_PREDICATE_END_CHAR)
737 {
738 state -= 14;
739 PUT (ch);
740 ch = GET ();
741 }
742 else
743 {
744 PUT (ch);
745 continue;
746 }
747 }
748 #endif
749
750 recycle:
751
752 #if defined TC_ARM && defined OBJ_ELF
753 /* We need to watch out for .symver directives. See the comment later
754 in this function. */
755 if (symver_state == NULL)
756 {
757 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
758 symver_state = symver_pseudo + 1;
759 }
760 else
761 {
762 /* We advance to the next state if we find the right
763 character. */
764 if (ch != '\0' && (*symver_state == ch))
765 ++symver_state;
766 else if (*symver_state != '\0')
767 /* We did not get the expected character, or we didn't
768 get a valid terminating character after seeing the
769 entire pseudo-op, so we must go back to the beginning. */
770 symver_state = NULL;
771 else
772 {
773 /* We've read the entire pseudo-op. If this is the end
774 of the line, go back to the beginning. */
775 if (IS_NEWLINE (ch))
776 symver_state = NULL;
777 }
778 }
779 #endif /* TC_ARM && OBJ_ELF */
780
781 #ifdef TC_M68K
782 /* We want to have pseudo-ops which control whether we are in
783 MRI mode or not. Unfortunately, since m68k MRI mode affects
784 the scrubber, that means that we need a special purpose
785 recognizer here. */
786 if (mri_state == NULL)
787 {
788 if ((state == 0 || state == 1)
789 && ch == mri_pseudo[0])
790 mri_state = mri_pseudo + 1;
791 }
792 else
793 {
794 /* We advance to the next state if we find the right
795 character, or if we need a space character and we get any
796 whitespace character, or if we need a '0' and we get a
797 '1' (this is so that we only need one state to handle
798 ``.mri 0'' and ``.mri 1''). */
799 if (ch != '\0'
800 && (*mri_state == ch
801 || (*mri_state == ' '
802 && lex[ch] == LEX_IS_WHITESPACE)
803 || (*mri_state == '0'
804 && ch == '1')))
805 {
806 mri_last_ch = ch;
807 ++mri_state;
808 }
809 else if (*mri_state != '\0'
810 || (lex[ch] != LEX_IS_WHITESPACE
811 && lex[ch] != LEX_IS_NEWLINE))
812 {
813 /* We did not get the expected character, or we didn't
814 get a valid terminating character after seeing the
815 entire pseudo-op, so we must go back to the
816 beginning. */
817 mri_state = NULL;
818 }
819 else
820 {
821 /* We've read the entire pseudo-op. mips_last_ch is
822 either '0' or '1' indicating whether to enter or
823 leave MRI mode. */
824 do_scrub_begin (mri_last_ch == '1');
825 mri_state = NULL;
826
827 /* We continue handling the character as usual. The
828 main gas reader must also handle the .mri pseudo-op
829 to control expression parsing and the like. */
830 }
831 }
832 #endif
833
834 if (ch == EOF)
835 {
836 if (state != 0)
837 {
838 as_warn (_("end of file not at end of a line; newline inserted"));
839 state = 0;
840 PUT ('\n');
841 }
842 goto fromeof;
843 }
844
845 switch (lex[ch])
846 {
847 case LEX_IS_WHITESPACE:
848 do
849 {
850 ch = GET ();
851 }
852 while (ch != EOF && IS_WHITESPACE (ch));
853 if (ch == EOF)
854 goto fromeof;
855
856 if (state == 0)
857 {
858 /* Preserve a single whitespace character at the
859 beginning of a line. */
860 state = 1;
861 UNGET (ch);
862 PUT (' ');
863 break;
864 }
865
866 #ifdef KEEP_WHITE_AROUND_COLON
867 if (lex[ch] == LEX_IS_COLON)
868 {
869 /* Only keep this white if there's no white *after* the
870 colon. */
871 ch2 = GET ();
872 if (ch2 != EOF)
873 UNGET (ch2);
874 if (!IS_WHITESPACE (ch2))
875 {
876 state = 9;
877 UNGET (ch);
878 PUT (' ');
879 break;
880 }
881 }
882 #endif
883 if (IS_COMMENT (ch)
884 || ch == '/'
885 || IS_LINE_SEPARATOR (ch)
886 || IS_PARALLEL_SEPARATOR (ch))
887 {
888 if (scrub_m68k_mri)
889 {
890 /* In MRI mode, we keep these spaces. */
891 UNGET (ch);
892 PUT (' ');
893 break;
894 }
895 goto recycle;
896 }
897
898 /* If we're in state 2 or 11, we've seen a non-white
899 character followed by whitespace. If the next character
900 is ':', this is whitespace after a label name which we
901 normally must ignore. In MRI mode, though, spaces are
902 not permitted between the label and the colon. */
903 if ((state == 2 || state == 11)
904 && lex[ch] == LEX_IS_COLON
905 && ! scrub_m68k_mri)
906 {
907 state = 1;
908 PUT (ch);
909 break;
910 }
911
912 switch (state)
913 {
914 case 1:
915 /* We can arrive here if we leave a leading whitespace
916 character at the beginning of a line. */
917 goto recycle;
918 case 2:
919 state = 3;
920 if (to + 1 < toend)
921 {
922 /* Optimize common case by skipping UNGET/GET. */
923 PUT (' '); /* Sp after opco */
924 goto recycle;
925 }
926 UNGET (ch);
927 PUT (' ');
928 break;
929 case 3:
930 #ifndef TC_KEEP_OPERAND_SPACES
931 /* For TI C6X, we keep these spaces as they may separate
932 functional unit specifiers from operands. */
933 if (scrub_m68k_mri)
934 #endif
935 {
936 /* In MRI mode, we keep these spaces. */
937 UNGET (ch);
938 PUT (' ');
939 break;
940 }
941 goto recycle; /* Sp in operands */
942 case 9:
943 case 10:
944 #ifndef TC_KEEP_OPERAND_SPACES
945 if (scrub_m68k_mri)
946 #endif
947 {
948 /* In MRI mode, we keep these spaces. */
949 state = 3;
950 UNGET (ch);
951 PUT (' ');
952 break;
953 }
954 state = 10; /* Sp after symbol char */
955 goto recycle;
956 case 11:
957 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
958 state = 1;
959 else
960 {
961 /* We know that ch is not ':', since we tested that
962 case above. Therefore this is not a label, so it
963 must be the opcode, and we've just seen the
964 whitespace after it. */
965 state = 3;
966 }
967 UNGET (ch);
968 PUT (' '); /* Sp after label definition. */
969 break;
970 default:
971 BAD_CASE (state);
972 }
973 break;
974
975 case LEX_IS_TWOCHAR_COMMENT_1ST:
976 ch2 = GET ();
977 if (ch2 == '*')
978 {
979 for (;;)
980 {
981 do
982 {
983 ch2 = GET ();
984 if (ch2 != EOF && IS_NEWLINE (ch2))
985 add_newlines++;
986 }
987 while (ch2 != EOF && ch2 != '*');
988
989 while (ch2 == '*')
990 ch2 = GET ();
991
992 if (ch2 == EOF || ch2 == '/')
993 break;
994
995 /* This UNGET will ensure that we count newlines
996 correctly. */
997 UNGET (ch2);
998 }
999
1000 if (ch2 == EOF)
1001 as_warn (_("end of file in multiline comment"));
1002
1003 ch = ' ';
1004 goto recycle;
1005 }
1006 #ifdef DOUBLESLASH_LINE_COMMENTS
1007 else if (ch2 == '/')
1008 {
1009 do
1010 {
1011 ch = GET ();
1012 }
1013 while (ch != EOF && !IS_NEWLINE (ch));
1014 if (ch == EOF)
1015 as_warn ("end of file in comment; newline inserted");
1016 state = 0;
1017 PUT ('\n');
1018 break;
1019 }
1020 #endif
1021 else
1022 {
1023 if (ch2 != EOF)
1024 UNGET (ch2);
1025 if (state == 9 || state == 10)
1026 state = 3;
1027 PUT (ch);
1028 }
1029 break;
1030
1031 case LEX_IS_STRINGQUOTE:
1032 quotechar = ch;
1033 if (state == 10)
1034 {
1035 /* Preserve the whitespace in foo "bar". */
1036 UNGET (ch);
1037 state = 3;
1038 PUT (' ');
1039
1040 /* PUT didn't jump out. We could just break, but we
1041 know what will happen, so optimize a bit. */
1042 ch = GET ();
1043 old_state = 3;
1044 }
1045 else if (state == 9)
1046 old_state = 3;
1047 else
1048 old_state = state;
1049 state = 5;
1050 PUT (ch);
1051 break;
1052
1053 case LEX_IS_ONECHAR_QUOTE:
1054 #ifdef H_TICK_HEX
1055 if (state == 9 && enable_h_tick_hex)
1056 {
1057 char c;
1058
1059 c = GET ();
1060 as_warn ("'%c found after symbol", c);
1061 UNGET (c);
1062 }
1063 #endif
1064 if (state == 10)
1065 {
1066 /* Preserve the whitespace in foo 'b'. */
1067 UNGET (ch);
1068 state = 3;
1069 PUT (' ');
1070 break;
1071 }
1072 ch = GET ();
1073 if (ch == EOF)
1074 {
1075 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1076 ch = 0;
1077 }
1078 if (ch == '\\')
1079 {
1080 ch = GET ();
1081 if (ch == EOF)
1082 {
1083 as_warn (_("end of file in escape character"));
1084 ch = '\\';
1085 }
1086 else
1087 ch = process_escape (ch);
1088 }
1089 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1090
1091 /* None of these 'x constants for us. We want 'x'. */
1092 if ((ch = GET ()) != '\'')
1093 {
1094 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
1095 as_warn (_("missing close quote; (assumed)"));
1096 #else
1097 if (ch != EOF)
1098 UNGET (ch);
1099 #endif
1100 }
1101 if (strlen (out_buf) == 1)
1102 {
1103 PUT (out_buf[0]);
1104 break;
1105 }
1106 if (state == 9)
1107 old_state = 3;
1108 else
1109 old_state = state;
1110 state = -1;
1111 out_string = out_buf;
1112 PUT (*out_string++);
1113 break;
1114
1115 case LEX_IS_COLON:
1116 #ifdef KEEP_WHITE_AROUND_COLON
1117 state = 9;
1118 #else
1119 if (state == 9 || state == 10)
1120 state = 3;
1121 else if (state != 3)
1122 state = 1;
1123 #endif
1124 PUT (ch);
1125 break;
1126
1127 case LEX_IS_NEWLINE:
1128 /* Roll out a bunch of newlines from inside comments, etc. */
1129 if (add_newlines)
1130 {
1131 --add_newlines;
1132 UNGET (ch);
1133 }
1134 /* Fall through. */
1135
1136 case LEX_IS_LINE_SEPARATOR:
1137 state = 0;
1138 PUT (ch);
1139 break;
1140
1141 case LEX_IS_PARALLEL_SEPARATOR:
1142 state = 1;
1143 PUT (ch);
1144 break;
1145
1146 #ifdef TC_V850
1147 case LEX_IS_DOUBLEDASH_1ST:
1148 ch2 = GET ();
1149 if (ch2 != '-')
1150 {
1151 if (ch2 != EOF)
1152 UNGET (ch2);
1153 goto de_fault;
1154 }
1155 /* Read and skip to end of line. */
1156 do
1157 {
1158 ch = GET ();
1159 }
1160 while (ch != EOF && ch != '\n');
1161
1162 if (ch == EOF)
1163 as_warn (_("end of file in comment; newline inserted"));
1164
1165 state = 0;
1166 PUT ('\n');
1167 break;
1168 #endif
1169 #ifdef DOUBLEBAR_PARALLEL
1170 case LEX_IS_DOUBLEBAR_1ST:
1171 ch2 = GET ();
1172 if (ch2 != EOF)
1173 UNGET (ch2);
1174 if (ch2 != '|')
1175 goto de_fault;
1176
1177 /* Handle '||' in two states as invoking PUT twice might
1178 result in the first one jumping out of this loop. We'd
1179 then lose track of the state and one '|' char. */
1180 state = 13;
1181 PUT ('|');
1182 break;
1183 #endif
1184 case LEX_IS_LINE_COMMENT_START:
1185 /* FIXME-someday: The two character comment stuff was badly
1186 thought out. On i386, we want '/' as line comment start
1187 AND we want C style comments. hence this hack. The
1188 whole lexical process should be reworked. xoxorich. */
1189 if (ch == '/')
1190 {
1191 ch2 = GET ();
1192 if (ch2 == '*')
1193 {
1194 old_state = 3;
1195 state = -2;
1196 break;
1197 }
1198 else if (ch2 != EOF)
1199 {
1200 UNGET (ch2);
1201 }
1202 }
1203
1204 if (state == 0 || state == 1) /* Only comment at start of line. */
1205 {
1206 int startch;
1207
1208 startch = ch;
1209
1210 do
1211 {
1212 ch = GET ();
1213 }
1214 while (ch != EOF && IS_WHITESPACE (ch));
1215
1216 if (ch == EOF)
1217 {
1218 as_warn (_("end of file in comment; newline inserted"));
1219 PUT ('\n');
1220 break;
1221 }
1222
1223 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1224 {
1225 /* Not a cpp line. */
1226 while (ch != EOF && !IS_NEWLINE (ch))
1227 ch = GET ();
1228 if (ch == EOF)
1229 {
1230 as_warn (_("end of file in comment; newline inserted"));
1231 PUT ('\n');
1232 }
1233 else /* IS_NEWLINE (ch) */
1234 {
1235 /* To process non-zero add_newlines. */
1236 UNGET (ch);
1237 }
1238 state = 0;
1239 break;
1240 }
1241 /* Looks like `# 123 "filename"' from cpp. */
1242 UNGET (ch);
1243 old_state = 4;
1244 state = -1;
1245 if (scrub_m68k_mri)
1246 out_string = "\tlinefile ";
1247 else
1248 out_string = "\t.linefile ";
1249 PUT (*out_string++);
1250 break;
1251 }
1252
1253 #ifdef TC_D10V
1254 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1255 Trap is the only short insn that has a first operand that is
1256 neither register nor label.
1257 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
1258 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1259 already LEX_IS_LINE_COMMENT_START. However, it is the
1260 only character in line_comment_chars for d10v, hence we
1261 can recognize it as such. */
1262 /* An alternative approach would be to reset the state to 1 when
1263 we see '||', '<'- or '->', but that seems to be overkill. */
1264 if (state == 10)
1265 PUT (' ');
1266 #endif
1267 /* We have a line comment character which is not at the
1268 start of a line. If this is also a normal comment
1269 character, fall through. Otherwise treat it as a default
1270 character. */
1271 if (strchr (tc_comment_chars, ch) == NULL
1272 && (! scrub_m68k_mri
1273 || (ch != '!' && ch != '*')))
1274 goto de_fault;
1275 if (scrub_m68k_mri
1276 && (ch == '!' || ch == '*' || ch == '#')
1277 && state != 1
1278 && state != 10)
1279 goto de_fault;
1280 /* Fall through. */
1281 case LEX_IS_COMMENT_START:
1282 #if defined TC_ARM && defined OBJ_ELF
1283 /* On the ARM, `@' is the comment character.
1284 Unfortunately this is also a special character in ELF .symver
1285 directives (and .type, though we deal with those another way).
1286 So we check if this line is such a directive, and treat
1287 the character as default if so. This is a hack. */
1288 if ((symver_state != NULL) && (*symver_state == 0))
1289 goto de_fault;
1290 #endif
1291
1292 #ifdef TC_ARM
1293 /* For the ARM, care is needed not to damage occurrences of \@
1294 by stripping the @ onwards. Yuck. */
1295 if ((to > tostart ? to[-1] : last_char) == '\\')
1296 /* Do not treat the @ as a start-of-comment. */
1297 goto de_fault;
1298 #endif
1299
1300 #ifdef WARN_COMMENTS
1301 if (!found_comment)
1302 found_comment_file = as_where (&found_comment);
1303 #endif
1304 do
1305 {
1306 ch = GET ();
1307 }
1308 while (ch != EOF && !IS_NEWLINE (ch));
1309 if (ch == EOF)
1310 as_warn (_("end of file in comment; newline inserted"));
1311 state = 0;
1312 PUT ('\n');
1313 break;
1314
1315 #ifdef H_TICK_HEX
1316 case LEX_IS_H:
1317 /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1318 the H' with 0x to make them gas-style hex characters. */
1319 if (enable_h_tick_hex)
1320 {
1321 char quot;
1322
1323 quot = GET ();
1324 if (quot == '\'')
1325 {
1326 UNGET ('x');
1327 ch = '0';
1328 }
1329 else
1330 UNGET (quot);
1331 }
1332 #endif
1333 /* Fall through. */
1334
1335 case LEX_IS_SYMBOL_COMPONENT:
1336 if (state == 10)
1337 {
1338 /* This is a symbol character following another symbol
1339 character, with whitespace in between. We skipped
1340 the whitespace earlier, so output it now. */
1341 UNGET (ch);
1342 state = 3;
1343 PUT (' ');
1344 break;
1345 }
1346
1347 #ifdef TC_Z80
1348 /* "af'" is a symbol containing '\''. */
1349 if (state == 3 && (ch == 'a' || ch == 'A'))
1350 {
1351 state = 16;
1352 PUT (ch);
1353 ch = GET ();
1354 if (ch == 'f' || ch == 'F')
1355 {
1356 state = 17;
1357 PUT (ch);
1358 break;
1359 }
1360 else
1361 {
1362 state = 9;
1363 if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
1364 {
1365 if (ch != EOF)
1366 UNGET (ch);
1367 break;
1368 }
1369 }
1370 }
1371 #endif
1372 if (state == 3)
1373 state = 9;
1374
1375 /* This is a common case. Quickly copy CH and all the
1376 following symbol component or normal characters. */
1377 if (to + 1 < toend
1378 && mri_state == NULL
1379 #if defined TC_ARM && defined OBJ_ELF
1380 && symver_state == NULL
1381 #endif
1382 )
1383 {
1384 char *s;
1385 ptrdiff_t len;
1386
1387 for (s = from; s < fromend; s++)
1388 {
1389 int type;
1390
1391 ch2 = *(unsigned char *) s;
1392 type = lex[ch2];
1393 if (type != 0
1394 && type != LEX_IS_SYMBOL_COMPONENT)
1395 break;
1396 }
1397
1398 if (s > from)
1399 /* Handle the last character normally, for
1400 simplicity. */
1401 --s;
1402
1403 len = s - from;
1404
1405 if (len > (toend - to) - 1)
1406 len = (toend - to) - 1;
1407
1408 if (len > 0)
1409 {
1410 PUT (ch);
1411 memcpy (to, from, len);
1412 to += len;
1413 from += len;
1414 if (to >= toend)
1415 goto tofull;
1416 ch = GET ();
1417 }
1418 }
1419
1420 /* Fall through. */
1421 default:
1422 de_fault:
1423 /* Some relatively `normal' character. */
1424 if (state == 0)
1425 {
1426 state = 11; /* Now seeing label definition. */
1427 }
1428 else if (state == 1)
1429 {
1430 state = 2; /* Ditto. */
1431 }
1432 else if (state == 9)
1433 {
1434 if (!IS_SYMBOL_COMPONENT (ch))
1435 state = 3;
1436 }
1437 else if (state == 10)
1438 {
1439 if (ch == '\\')
1440 {
1441 /* Special handling for backslash: a backslash may
1442 be the beginning of a formal parameter (of a
1443 macro) following another symbol character, with
1444 whitespace in between. If that is the case, we
1445 output a space before the parameter. Strictly
1446 speaking, correct handling depends upon what the
1447 macro parameter expands into; if the parameter
1448 expands into something which does not start with
1449 an operand character, then we don't want to keep
1450 the space. We don't have enough information to
1451 make the right choice, so here we are making the
1452 choice which is more likely to be correct. */
1453 if (to + 1 >= toend)
1454 {
1455 /* If we're near the end of the buffer, save the
1456 character for the next time round. Otherwise
1457 we'll lose our state. */
1458 UNGET (ch);
1459 goto tofull;
1460 }
1461 *to++ = ' ';
1462 }
1463
1464 state = 3;
1465 }
1466 PUT (ch);
1467 break;
1468 }
1469 }
1470
1471 /*NOTREACHED*/
1472
1473 fromeof:
1474 /* We have reached the end of the input. */
1475 #ifdef TC_ARM
1476 if (to > tostart)
1477 last_char = to[-1];
1478 #endif
1479 return to - tostart;
1480
1481 tofull:
1482 /* The output buffer is full. Save any input we have not yet
1483 processed. */
1484 if (fromend > from)
1485 {
1486 saved_input = from;
1487 saved_input_len = fromend - from;
1488 }
1489 else
1490 saved_input = NULL;
1491
1492 #ifdef TC_ARM
1493 if (to > tostart)
1494 last_char = to[-1];
1495 #endif
1496 return to - tostart;
1497 }
This page took 0.060957 seconds and 4 git commands to generate.