* config/tc-a29k.h: Fix comment typos.
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
252b5132 1/* This is the Assembler Pre-Processor
f7e42eb4 2 Copyright 1987, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
b1ac4c66 3 1999, 2000, 2002, 2003
252b5132
RH
4 Free Software Foundation, Inc.
5
6 This file is part of GAS, the GNU Assembler.
7
8 GAS is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GAS is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GAS; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
22
204cd129 23/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
252b5132
RH
24/* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
27 pair. This needs better error-handling. */
28
29#include <stdio.h>
8d9cd6b1 30#include "as.h" /* For BAD_CASE() only. */
252b5132
RH
31
32#if (__STDC__ != 1)
33#ifndef const
34#define const /* empty */
35#endif
36#endif
37
abd63a32 38#ifdef TC_M68K
252b5132
RH
39/* Whether we are scrubbing in m68k MRI mode. This is different from
40 flag_m68k_mri, because the two flags will be affected by the .mri
41 pseudo-op at different times. */
42static int scrub_m68k_mri;
43
44/* The pseudo-op which switches in and out of MRI mode. See the
45 comment in do_scrub_chars. */
46static const char mri_pseudo[] = ".mri 0";
72297628
AM
47#else
48#define scrub_m68k_mri 0
49#endif
252b5132
RH
50
51#if defined TC_ARM && defined OBJ_ELF
3ee4defc 52/* The pseudo-op for which we need to special-case `@' characters.
252b5132
RH
53 See the comment in do_scrub_chars. */
54static const char symver_pseudo[] = ".symver";
55static const char * symver_state;
56#endif
57
58static char lex[256];
59static const char symbol_chars[] =
60"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
61
62#define LEX_IS_SYMBOL_COMPONENT 1
63#define LEX_IS_WHITESPACE 2
64#define LEX_IS_LINE_SEPARATOR 3
65#define LEX_IS_COMMENT_START 4
66#define LEX_IS_LINE_COMMENT_START 5
67#define LEX_IS_TWOCHAR_COMMENT_1ST 6
68#define LEX_IS_STRINGQUOTE 8
69#define LEX_IS_COLON 9
70#define LEX_IS_NEWLINE 10
71#define LEX_IS_ONECHAR_QUOTE 11
72#ifdef TC_V850
73#define LEX_IS_DOUBLEDASH_1ST 12
74#endif
75#ifdef TC_M32R
f28e8eb3
TW
76#define DOUBLEBAR_PARALLEL
77#endif
78#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
79#define LEX_IS_DOUBLEBAR_1ST 13
80#endif
62f65a7b 81#define LEX_IS_PARALLEL_SEPARATOR 14
252b5132
RH
82#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
83#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
84#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
62f65a7b 85#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
252b5132
RH
86#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
87#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
88#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
89
90static int process_escape PARAMS ((int));
91
92/* FIXME-soon: The entire lexer/parser thingy should be
93 built statically at compile time rather than dynamically
3ee4defc 94 each and every time the assembler is run. xoxorich. */
252b5132 95
3ee4defc 96void
252b5132 97do_scrub_begin (m68k_mri)
92161534 98 int m68k_mri ATTRIBUTE_UNUSED;
252b5132
RH
99{
100 const char *p;
101 int c;
102
252b5132
RH
103 lex[' '] = LEX_IS_WHITESPACE;
104 lex['\t'] = LEX_IS_WHITESPACE;
105 lex['\r'] = LEX_IS_WHITESPACE;
106 lex['\n'] = LEX_IS_NEWLINE;
252b5132
RH
107 lex[':'] = LEX_IS_COLON;
108
abd63a32
AM
109#ifdef TC_M68K
110 scrub_m68k_mri = m68k_mri;
111
252b5132 112 if (! m68k_mri)
abd63a32 113#endif
252b5132
RH
114 {
115 lex['"'] = LEX_IS_STRINGQUOTE;
116
5b93d8bb 117#if ! defined (TC_HPPA) && ! defined (TC_I370)
204cd129 118 /* I370 uses single-quotes to delimit integer, float constants. */
252b5132
RH
119 lex['\''] = LEX_IS_ONECHAR_QUOTE;
120#endif
121
122#ifdef SINGLE_QUOTE_STRINGS
123 lex['\''] = LEX_IS_STRINGQUOTE;
124#endif
125 }
126
127 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
128 in state 5 of do_scrub_chars must be changed. */
129
130 /* Note that these override the previous defaults, e.g. if ';' is a
131 comment char, then it isn't a line separator. */
132 for (p = symbol_chars; *p; ++p)
204cd129 133 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
252b5132
RH
134
135 for (c = 128; c < 256; ++c)
136 lex[c] = LEX_IS_SYMBOL_COMPONENT;
137
138#ifdef tc_symbol_chars
139 /* This macro permits the processor to specify all characters which
140 may appears in an operand. This will prevent the scrubber from
141 discarding meaningful whitespace in certain cases. The i386
142 backend uses this to support prefixes, which can confuse the
143 scrubber as to whether it is parsing operands or opcodes. */
144 for (p = tc_symbol_chars; *p; ++p)
145 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
146#endif
147
148 /* The m68k backend wants to be able to change comment_chars. */
149#ifndef tc_comment_chars
150#define tc_comment_chars comment_chars
151#endif
152 for (p = tc_comment_chars; *p; p++)
204cd129 153 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
252b5132
RH
154
155 for (p = line_comment_chars; *p; p++)
204cd129 156 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
252b5132
RH
157
158 for (p = line_separator_chars; *p; p++)
204cd129 159 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
252b5132 160
62f65a7b
DB
161#ifdef tc_parallel_separator_chars
162 /* This macro permits the processor to specify all characters which
163 separate parallel insns on the same line. */
164 for (p = tc_parallel_separator_chars; *p; p++)
204cd129 165 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
62f65a7b
DB
166#endif
167
252b5132
RH
168 /* Only allow slash-star comments if slash is not in use.
169 FIXME: This isn't right. We should always permit them. */
170 if (lex['/'] == 0)
204cd129 171 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
252b5132 172
abd63a32 173#ifdef TC_M68K
252b5132
RH
174 if (m68k_mri)
175 {
176 lex['\''] = LEX_IS_STRINGQUOTE;
177 lex[';'] = LEX_IS_COMMENT_START;
178 lex['*'] = LEX_IS_LINE_COMMENT_START;
179 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
b1ac4c66 180 then it can't be used in an expression. */
252b5132
RH
181 lex['!'] = LEX_IS_LINE_COMMENT_START;
182 }
abd63a32 183#endif
252b5132
RH
184
185#ifdef TC_V850
186 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
187#endif
f28e8eb3 188#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
189 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
190#endif
191#ifdef TC_D30V
204cd129 192 /* Must do this is we want VLIW instruction with "->" or "<-". */
252b5132
RH
193 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
194#endif
204cd129 195}
252b5132 196
204cd129 197/* Saved state of the scrubber. */
252b5132
RH
198static int state;
199static int old_state;
200static char *out_string;
201static char out_buf[20];
202static int add_newlines;
203static char *saved_input;
204static int saved_input_len;
2b47531b 205static char input_buffer[32 * 1024];
252b5132
RH
206static const char *mri_state;
207static char mri_last_ch;
208
209/* Data structure for saving the state of app across #include's. Note that
210 app is called asynchronously to the parsing of the .include's, so our
211 state at the time .include is interpreted is completely unrelated.
212 That's why we have to save it all. */
213
204cd129
NC
214struct app_save
215{
30a2b4ef
KH
216 int state;
217 int old_state;
218 char * out_string;
219 char out_buf[sizeof (out_buf)];
220 int add_newlines;
221 char * saved_input;
222 int saved_input_len;
abd63a32 223#ifdef TC_M68K
30a2b4ef 224 int scrub_m68k_mri;
abd63a32 225#endif
30a2b4ef
KH
226 const char * mri_state;
227 char mri_last_ch;
252b5132 228#if defined TC_ARM && defined OBJ_ELF
30a2b4ef 229 const char * symver_state;
252b5132 230#endif
30a2b4ef 231};
252b5132
RH
232
233char *
234app_push ()
235{
236 register struct app_save *saved;
237
238 saved = (struct app_save *) xmalloc (sizeof (*saved));
239 saved->state = state;
240 saved->old_state = old_state;
241 saved->out_string = out_string;
242 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
243 saved->add_newlines = add_newlines;
2b47531b
ILT
244 if (saved_input == NULL)
245 saved->saved_input = NULL;
246 else
247 {
248 saved->saved_input = xmalloc (saved_input_len);
249 memcpy (saved->saved_input, saved_input, saved_input_len);
250 saved->saved_input_len = saved_input_len;
251 }
abd63a32 252#ifdef TC_M68K
252b5132 253 saved->scrub_m68k_mri = scrub_m68k_mri;
abd63a32 254#endif
252b5132
RH
255 saved->mri_state = mri_state;
256 saved->mri_last_ch = mri_last_ch;
257#if defined TC_ARM && defined OBJ_ELF
258 saved->symver_state = symver_state;
259#endif
260
3ee4defc 261 /* do_scrub_begin() is not useful, just wastes time. */
252b5132
RH
262
263 state = 0;
264 saved_input = NULL;
265
266 return (char *) saved;
267}
268
3ee4defc 269void
252b5132
RH
270app_pop (arg)
271 char *arg;
272{
273 register struct app_save *saved = (struct app_save *) arg;
274
3ee4defc 275 /* There is no do_scrub_end (). */
252b5132
RH
276 state = saved->state;
277 old_state = saved->old_state;
278 out_string = saved->out_string;
279 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
280 add_newlines = saved->add_newlines;
2b47531b
ILT
281 if (saved->saved_input == NULL)
282 saved_input = NULL;
283 else
284 {
ab9da554 285 assert (saved->saved_input_len <= (int) (sizeof input_buffer));
2b47531b
ILT
286 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
287 saved_input = input_buffer;
288 saved_input_len = saved->saved_input_len;
289 free (saved->saved_input);
290 }
abd63a32 291#ifdef TC_M68K
252b5132 292 scrub_m68k_mri = saved->scrub_m68k_mri;
abd63a32 293#endif
252b5132
RH
294 mri_state = saved->mri_state;
295 mri_last_ch = saved->mri_last_ch;
296#if defined TC_ARM && defined OBJ_ELF
297 symver_state = saved->symver_state;
298#endif
299
300 free (arg);
204cd129 301}
252b5132
RH
302
303/* @@ This assumes that \n &c are the same on host and target. This is not
304 necessarily true. */
204cd129 305
3ee4defc 306static int
252b5132
RH
307process_escape (ch)
308 int ch;
309{
310 switch (ch)
311 {
312 case 'b':
313 return '\b';
314 case 'f':
315 return '\f';
316 case 'n':
317 return '\n';
318 case 'r':
319 return '\r';
320 case 't':
321 return '\t';
322 case '\'':
323 return '\'';
324 case '"':
325 return '\"';
326 default:
327 return ch;
328 }
329}
330
331/* This function is called to process input characters. The GET
332 parameter is used to retrieve more input characters. GET should
333 set its parameter to point to a buffer, and return the length of
334 the buffer; it should return 0 at end of file. The scrubbed output
335 characters are put into the buffer starting at TOSTART; the TOSTART
336 buffer is TOLEN bytes in length. The function returns the number
337 of scrubbed characters put into TOSTART. This will be TOLEN unless
338 end of file was seen. This function is arranged as a state
339 machine, and saves its state so that it may return at any point.
340 This is the way the old code used to work. */
341
342int
343do_scrub_chars (get, tostart, tolen)
2b47531b 344 int (*get) PARAMS ((char *, int));
252b5132
RH
345 char *tostart;
346 int tolen;
347{
348 char *to = tostart;
349 char *toend = tostart + tolen;
350 char *from;
351 char *fromend;
352 int fromlen;
353 register int ch, ch2 = 0;
354
355 /*State 0: beginning of normal line
356 1: After first whitespace on line (flush more white)
357 2: After first non-white (opcode) on line (keep 1white)
358 3: after second white on line (into operands) (flush white)
359 4: after putting out a .line, put out digits
360 5: parsing a string, then go to old-state
361 6: putting out \ escape in a "d string.
362 7: After putting out a .appfile, put out string.
363 8: After putting out a .appfile string, flush until newline.
364 9: After seeing symbol char in state 3 (keep 1white after symchar)
365 10: After seeing whitespace in state 9 (keep white before symchar)
366 11: After seeing a symbol character in state 0 (eg a label definition)
367 -1: output string in out_string and go to the state in old_state
368 -2: flush text until a '*' '/' is seen, then go to state old_state
369#ifdef TC_V850
b1ac4c66
AM
370 12: After seeing a dash, looking for a second dash as a start
371 of comment.
252b5132 372#endif
f28e8eb3 373#ifdef DOUBLEBAR_PARALLEL
b1ac4c66
AM
374 13: After seeing a vertical bar, looking for a second
375 vertical bar as a parallel expression separator.
52628315
L
376#endif
377#ifdef TC_IA64
378 14: After seeing a `(' at state 0, looking for a `)' as
379 predicate.
380 15: After seeing a `(' at state 1, looking for a `)' as
381 predicate.
252b5132
RH
382#endif
383 */
384
385 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
386 constructs like ``.loc 1 20''. This was turning into ``.loc
387 120''. States 9 and 10 ensure that a space is never dropped in
3b37fd66 388 between characters which could appear in an identifier. Ian
252b5132
RH
389 Taylor, ian@cygnus.com.
390
391 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
392 correctly on the PA (and any other target where colons are optional).
393 Jeff Law, law@cs.utah.edu.
394
395 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
396 get squashed into "cmp r1,r2||trap#1", with the all important space
397 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
398
399 /* This macro gets the next input character. */
400
2b47531b
ILT
401#define GET() \
402 (from < fromend \
403 ? * (unsigned char *) (from++) \
404 : (saved_input = NULL, \
405 fromlen = (*get) (input_buffer, sizeof input_buffer), \
406 from = input_buffer, \
407 fromend = from + fromlen, \
408 (fromlen == 0 \
409 ? EOF \
252b5132
RH
410 : * (unsigned char *) (from++))))
411
412 /* This macro pushes a character back on the input stream. */
413
414#define UNGET(uch) (*--from = (uch))
415
416 /* This macro puts a character into the output buffer. If this
417 character fills the output buffer, this macro jumps to the label
418 TOFULL. We use this rather ugly approach because we need to
419 handle two different termination conditions: EOF on the input
420 stream, and a full output buffer. It would be simpler if we
421 always read in the entire input stream before processing it, but
422 I don't want to make such a significant change to the assembler's
423 memory usage. */
424
411863a4
KH
425#define PUT(pch) \
426 do \
427 { \
428 *to++ = (pch); \
429 if (to >= toend) \
430 goto tofull; \
431 } \
252b5132
RH
432 while (0)
433
434 if (saved_input != NULL)
435 {
436 from = saved_input;
437 fromend = from + saved_input_len;
438 }
439 else
440 {
2b47531b 441 fromlen = (*get) (input_buffer, sizeof input_buffer);
252b5132
RH
442 if (fromlen == 0)
443 return 0;
2b47531b 444 from = input_buffer;
252b5132
RH
445 fromend = from + fromlen;
446 }
447
448 while (1)
449 {
450 /* The cases in this switch end with continue, in order to
b1ac4c66
AM
451 branch back to the top of this while loop and generate the
452 next output character in the appropriate state. */
252b5132
RH
453 switch (state)
454 {
455 case -1:
456 ch = *out_string++;
457 if (*out_string == '\0')
458 {
459 state = old_state;
460 old_state = 3;
461 }
462 PUT (ch);
463 continue;
464
465 case -2:
466 for (;;)
467 {
468 do
469 {
470 ch = GET ();
471
472 if (ch == EOF)
473 {
474 as_warn (_("end of file in comment"));
475 goto fromeof;
476 }
477
478 if (ch == '\n')
479 PUT ('\n');
480 }
481 while (ch != '*');
482
483 while ((ch = GET ()) == '*')
484 ;
485
486 if (ch == EOF)
487 {
488 as_warn (_("end of file in comment"));
489 goto fromeof;
490 }
491
492 if (ch == '/')
493 break;
494
495 UNGET (ch);
496 }
497
498 state = old_state;
499 UNGET (' ');
500 continue;
501
502 case 4:
503 ch = GET ();
504 if (ch == EOF)
505 goto fromeof;
506 else if (ch >= '0' && ch <= '9')
507 PUT (ch);
508 else
509 {
510 while (ch != EOF && IS_WHITESPACE (ch))
511 ch = GET ();
512 if (ch == '"')
513 {
514 UNGET (ch);
515 if (scrub_m68k_mri)
516 out_string = "\n\tappfile ";
517 else
518 out_string = "\n\t.appfile ";
519 old_state = 7;
520 state = -1;
521 PUT (*out_string++);
522 }
523 else
524 {
525 while (ch != EOF && ch != '\n')
526 ch = GET ();
527 state = 0;
528 PUT (ch);
529 }
530 }
531 continue;
532
533 case 5:
534 /* We are going to copy everything up to a quote character,
b1ac4c66
AM
535 with special handling for a backslash. We try to
536 optimize the copying in the simple case without using the
537 GET and PUT macros. */
252b5132
RH
538 {
539 char *s;
540 int len;
541
542 for (s = from; s < fromend; s++)
543 {
544 ch = *s;
545 /* This condition must be changed if the type of any
b1ac4c66 546 other character can be LEX_IS_STRINGQUOTE. */
252b5132
RH
547 if (ch == '\\'
548 || ch == '"'
549 || ch == '\''
550 || ch == '\n')
551 break;
552 }
553 len = s - from;
554 if (len > toend - to)
555 len = toend - to;
556 if (len > 0)
557 {
558 memcpy (to, from, len);
559 to += len;
560 from += len;
561 }
562 }
563
564 ch = GET ();
565 if (ch == EOF)
566 {
0e389e77 567 as_warn (_("end of file in string; inserted '\"'"));
252b5132
RH
568 state = old_state;
569 UNGET ('\n');
570 PUT ('"');
571 }
572 else if (lex[ch] == LEX_IS_STRINGQUOTE)
573 {
574 state = old_state;
575 PUT (ch);
576 }
577#ifndef NO_STRING_ESCAPES
578 else if (ch == '\\')
579 {
580 state = 6;
581 PUT (ch);
582 }
583#endif
584 else if (scrub_m68k_mri && ch == '\n')
585 {
586 /* Just quietly terminate the string. This permits lines like
204cd129 587 bne label loop if we haven't reach end yet. */
252b5132
RH
588 state = old_state;
589 UNGET (ch);
590 PUT ('\'');
591 }
592 else
593 {
594 PUT (ch);
595 }
596 continue;
597
598 case 6:
599 state = 5;
600 ch = GET ();
601 switch (ch)
602 {
603 /* Handle strings broken across lines, by turning '\n' into
604 '\\' and 'n'. */
605 case '\n':
606 UNGET ('n');
607 add_newlines++;
608 PUT ('\\');
609 continue;
610
4252e537
AM
611 case EOF:
612 as_warn (_("end of file in string; '\"' inserted"));
613 PUT ('"');
614 continue;
615
252b5132
RH
616 case '"':
617 case '\\':
618 case 'b':
619 case 'f':
620 case 'n':
621 case 'r':
622 case 't':
623 case 'v':
624 case 'x':
625 case 'X':
626 case '0':
627 case '1':
628 case '2':
629 case '3':
630 case '4':
631 case '5':
632 case '6':
633 case '7':
634 break;
4252e537 635
252b5132 636 default:
4252e537 637#ifdef ONLY_STANDARD_ESCAPES
0e389e77 638 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
4252e537 639#endif
252b5132 640 break;
252b5132
RH
641 }
642 PUT (ch);
643 continue;
644
645 case 7:
646 ch = GET ();
647 state = 5;
648 old_state = 8;
649 if (ch == EOF)
650 goto fromeof;
651 PUT (ch);
652 continue;
653
654 case 8:
655 do
656 ch = GET ();
657 while (ch != '\n' && ch != EOF);
658 if (ch == EOF)
659 goto fromeof;
660 state = 0;
661 PUT (ch);
662 continue;
b1ac4c66
AM
663
664#ifdef DOUBLEBAR_PARALLEL
665 case 13:
666 ch = GET ();
667 if (ch != '|')
668 abort ();
669
670 /* Reset back to state 1 and pretend that we are parsing a
671 line from just after the first white space. */
672 state = 1;
673 PUT ('|');
674 continue;
675#endif
252b5132
RH
676 }
677
204cd129 678 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
252b5132
RH
679
680 /* flushchar: */
681 ch = GET ();
682
52628315
L
683#ifdef TC_IA64
684 if (ch == '(' && (state == 0 || state == 1))
685 {
686 state += 14;
687 PUT (ch);
688 continue;
689 }
690 else if (state == 14 || state == 15)
691 {
692 if (ch == ')')
693 state -= 14;
694 else
695 {
696 PUT (ch);
697 continue;
698 }
699 }
700#endif
701
252b5132
RH
702 recycle:
703
704#if defined TC_ARM && defined OBJ_ELF
705 /* We need to watch out for .symver directives. See the comment later
706 in this function. */
707 if (symver_state == NULL)
708 {
709 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
710 symver_state = symver_pseudo + 1;
711 }
712 else
713 {
714 /* We advance to the next state if we find the right
715 character. */
716 if (ch != '\0' && (*symver_state == ch))
717 ++symver_state;
718 else if (*symver_state != '\0')
719 /* We did not get the expected character, or we didn't
720 get a valid terminating character after seeing the
721 entire pseudo-op, so we must go back to the beginning. */
722 symver_state = NULL;
723 else
724 {
725 /* We've read the entire pseudo-op. If this is the end
726 of the line, go back to the beginning. */
727 if (IS_NEWLINE (ch))
728 symver_state = NULL;
729 }
730 }
731#endif /* TC_ARM && OBJ_ELF */
732
733#ifdef TC_M68K
734 /* We want to have pseudo-ops which control whether we are in
b1ac4c66
AM
735 MRI mode or not. Unfortunately, since m68k MRI mode affects
736 the scrubber, that means that we need a special purpose
737 recognizer here. */
252b5132
RH
738 if (mri_state == NULL)
739 {
740 if ((state == 0 || state == 1)
741 && ch == mri_pseudo[0])
742 mri_state = mri_pseudo + 1;
743 }
744 else
745 {
746 /* We advance to the next state if we find the right
747 character, or if we need a space character and we get any
748 whitespace character, or if we need a '0' and we get a
749 '1' (this is so that we only need one state to handle
750 ``.mri 0'' and ``.mri 1''). */
751 if (ch != '\0'
752 && (*mri_state == ch
753 || (*mri_state == ' '
754 && lex[ch] == LEX_IS_WHITESPACE)
755 || (*mri_state == '0'
756 && ch == '1')))
757 {
758 mri_last_ch = ch;
759 ++mri_state;
760 }
761 else if (*mri_state != '\0'
762 || (lex[ch] != LEX_IS_WHITESPACE
763 && lex[ch] != LEX_IS_NEWLINE))
764 {
765 /* We did not get the expected character, or we didn't
766 get a valid terminating character after seeing the
767 entire pseudo-op, so we must go back to the
768 beginning. */
769 mri_state = NULL;
770 }
771 else
772 {
773 /* We've read the entire pseudo-op. mips_last_ch is
b1ac4c66
AM
774 either '0' or '1' indicating whether to enter or
775 leave MRI mode. */
252b5132
RH
776 do_scrub_begin (mri_last_ch == '1');
777 mri_state = NULL;
778
779 /* We continue handling the character as usual. The
b1ac4c66
AM
780 main gas reader must also handle the .mri pseudo-op
781 to control expression parsing and the like. */
252b5132
RH
782 }
783 }
784#endif
785
786 if (ch == EOF)
787 {
788 if (state != 0)
789 {
790 as_warn (_("end of file not at end of a line; newline inserted"));
791 state = 0;
792 PUT ('\n');
793 }
794 goto fromeof;
795 }
796
797 switch (lex[ch])
798 {
799 case LEX_IS_WHITESPACE:
800 do
801 {
802 ch = GET ();
803 }
804 while (ch != EOF && IS_WHITESPACE (ch));
805 if (ch == EOF)
806 goto fromeof;
807
808 if (state == 0)
809 {
810 /* Preserve a single whitespace character at the
811 beginning of a line. */
812 state = 1;
813 UNGET (ch);
814 PUT (' ');
815 break;
816 }
817
f28e8eb3 818#ifdef KEEP_WHITE_AROUND_COLON
30a2b4ef
KH
819 if (lex[ch] == LEX_IS_COLON)
820 {
821 /* Only keep this white if there's no white *after* the
b1ac4c66 822 colon. */
30a2b4ef
KH
823 ch2 = GET ();
824 UNGET (ch2);
825 if (!IS_WHITESPACE (ch2))
826 {
827 state = 9;
828 UNGET (ch);
829 PUT (' ');
830 break;
831 }
832 }
f28e8eb3 833#endif
252b5132
RH
834 if (IS_COMMENT (ch)
835 || ch == '/'
62f65a7b
DB
836 || IS_LINE_SEPARATOR (ch)
837 || IS_PARALLEL_SEPARATOR (ch))
252b5132
RH
838 {
839 if (scrub_m68k_mri)
840 {
841 /* In MRI mode, we keep these spaces. */
842 UNGET (ch);
843 PUT (' ');
844 break;
845 }
846 goto recycle;
847 }
848
849 /* If we're in state 2 or 11, we've seen a non-white
850 character followed by whitespace. If the next character
851 is ':', this is whitespace after a label name which we
852 normally must ignore. In MRI mode, though, spaces are
853 not permitted between the label and the colon. */
854 if ((state == 2 || state == 11)
855 && lex[ch] == LEX_IS_COLON
856 && ! scrub_m68k_mri)
857 {
858 state = 1;
859 PUT (ch);
860 break;
861 }
862
863 switch (state)
864 {
865 case 0:
866 state++;
867 goto recycle; /* Punted leading sp */
868 case 1:
869 /* We can arrive here if we leave a leading whitespace
870 character at the beginning of a line. */
871 goto recycle;
872 case 2:
873 state = 3;
874 if (to + 1 < toend)
875 {
876 /* Optimize common case by skipping UNGET/GET. */
877 PUT (' '); /* Sp after opco */
878 goto recycle;
879 }
880 UNGET (ch);
881 PUT (' ');
882 break;
883 case 3:
884 if (scrub_m68k_mri)
885 {
886 /* In MRI mode, we keep these spaces. */
887 UNGET (ch);
888 PUT (' ');
889 break;
890 }
891 goto recycle; /* Sp in operands */
892 case 9:
893 case 10:
894 if (scrub_m68k_mri)
895 {
896 /* In MRI mode, we keep these spaces. */
897 state = 3;
898 UNGET (ch);
899 PUT (' ');
900 break;
901 }
902 state = 10; /* Sp after symbol char */
903 goto recycle;
904 case 11:
abd63a32 905 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
252b5132
RH
906 state = 1;
907 else
908 {
909 /* We know that ch is not ':', since we tested that
b1ac4c66
AM
910 case above. Therefore this is not a label, so it
911 must be the opcode, and we've just seen the
912 whitespace after it. */
252b5132
RH
913 state = 3;
914 }
915 UNGET (ch);
916 PUT (' '); /* Sp after label definition. */
917 break;
918 default:
919 BAD_CASE (state);
920 }
921 break;
922
923 case LEX_IS_TWOCHAR_COMMENT_1ST:
924 ch2 = GET ();
925 if (ch2 == '*')
926 {
927 for (;;)
928 {
929 do
930 {
931 ch2 = GET ();
932 if (ch2 != EOF && IS_NEWLINE (ch2))
933 add_newlines++;
934 }
935 while (ch2 != EOF && ch2 != '*');
936
937 while (ch2 == '*')
938 ch2 = GET ();
939
940 if (ch2 == EOF || ch2 == '/')
941 break;
942
943 /* This UNGET will ensure that we count newlines
b1ac4c66 944 correctly. */
252b5132
RH
945 UNGET (ch2);
946 }
947
948 if (ch2 == EOF)
949 as_warn (_("end of file in multiline comment"));
950
951 ch = ' ';
952 goto recycle;
953 }
800eeca4
JW
954#ifdef DOUBLESLASH_LINE_COMMENTS
955 else if (ch2 == '/')
956 {
957 do
958 {
959 ch = GET ();
960 }
961 while (ch != EOF && !IS_NEWLINE (ch));
962 if (ch == EOF)
963 as_warn ("end of file in comment; newline inserted");
964 state = 0;
965 PUT ('\n');
966 break;
967 }
968#endif
252b5132
RH
969 else
970 {
971 if (ch2 != EOF)
972 UNGET (ch2);
973 if (state == 9 || state == 10)
974 state = 3;
975 PUT (ch);
976 }
977 break;
978
979 case LEX_IS_STRINGQUOTE:
980 if (state == 10)
981 {
204cd129 982 /* Preserve the whitespace in foo "bar". */
252b5132
RH
983 UNGET (ch);
984 state = 3;
985 PUT (' ');
986
987 /* PUT didn't jump out. We could just break, but we
b1ac4c66 988 know what will happen, so optimize a bit. */
252b5132
RH
989 ch = GET ();
990 old_state = 3;
991 }
992 else if (state == 9)
993 old_state = 3;
994 else
995 old_state = state;
996 state = 5;
997 PUT (ch);
998 break;
999
1000#ifndef IEEE_STYLE
1001 case LEX_IS_ONECHAR_QUOTE:
1002 if (state == 10)
1003 {
204cd129 1004 /* Preserve the whitespace in foo 'b'. */
252b5132
RH
1005 UNGET (ch);
1006 state = 3;
1007 PUT (' ');
1008 break;
1009 }
1010 ch = GET ();
1011 if (ch == EOF)
1012 {
1013 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1014 ch = 0;
1015 }
1016 if (ch == '\\')
1017 {
1018 ch = GET ();
1019 if (ch == EOF)
1020 {
1021 as_warn (_("end of file in escape character"));
1022 ch = '\\';
1023 }
1024 else
1025 ch = process_escape (ch);
1026 }
1027 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1028
1029 /* None of these 'x constants for us. We want 'x'. */
1030 if ((ch = GET ()) != '\'')
1031 {
1032#ifdef REQUIRE_CHAR_CLOSE_QUOTE
0e389e77 1033 as_warn (_("missing close quote; (assumed)"));
252b5132
RH
1034#else
1035 if (ch != EOF)
1036 UNGET (ch);
1037#endif
1038 }
1039 if (strlen (out_buf) == 1)
1040 {
1041 PUT (out_buf[0]);
1042 break;
1043 }
1044 if (state == 9)
1045 old_state = 3;
1046 else
1047 old_state = state;
1048 state = -1;
1049 out_string = out_buf;
1050 PUT (*out_string++);
1051 break;
1052#endif
1053
1054 case LEX_IS_COLON:
f28e8eb3 1055#ifdef KEEP_WHITE_AROUND_COLON
30a2b4ef 1056 state = 9;
f28e8eb3 1057#else
252b5132
RH
1058 if (state == 9 || state == 10)
1059 state = 3;
1060 else if (state != 3)
1061 state = 1;
f28e8eb3 1062#endif
252b5132
RH
1063 PUT (ch);
1064 break;
1065
1066 case LEX_IS_NEWLINE:
1067 /* Roll out a bunch of newlines from inside comments, etc. */
1068 if (add_newlines)
1069 {
1070 --add_newlines;
1071 UNGET (ch);
1072 }
3ee4defc 1073 /* Fall through. */
252b5132
RH
1074
1075 case LEX_IS_LINE_SEPARATOR:
1076 state = 0;
1077 PUT (ch);
1078 break;
1079
62f65a7b
DB
1080 case LEX_IS_PARALLEL_SEPARATOR:
1081 state = 1;
1082 PUT (ch);
1083 break;
1084
252b5132
RH
1085#ifdef TC_V850
1086 case LEX_IS_DOUBLEDASH_1ST:
30a2b4ef 1087 ch2 = GET ();
252b5132
RH
1088 if (ch2 != '-')
1089 {
1090 UNGET (ch2);
1091 goto de_fault;
1092 }
3ee4defc 1093 /* Read and skip to end of line. */
252b5132
RH
1094 do
1095 {
1096 ch = GET ();
1097 }
1098 while (ch != EOF && ch != '\n');
204cd129 1099
252b5132 1100 if (ch == EOF)
204cd129
NC
1101 as_warn (_("end of file in comment; newline inserted"));
1102
252b5132
RH
1103 state = 0;
1104 PUT ('\n');
1105 break;
3ee4defc 1106#endif
f28e8eb3 1107#ifdef DOUBLEBAR_PARALLEL
252b5132 1108 case LEX_IS_DOUBLEBAR_1ST:
30a2b4ef 1109 ch2 = GET ();
b1ac4c66 1110 UNGET (ch2);
252b5132 1111 if (ch2 != '|')
204cd129
NC
1112 goto de_fault;
1113
b1ac4c66
AM
1114 /* Handle '||' in two states as invoking PUT twice might
1115 result in the first one jumping out of this loop. We'd
1116 then lose track of the state and one '|' char. */
1117 state = 13;
252b5132
RH
1118 PUT ('|');
1119 break;
3ee4defc 1120#endif
252b5132
RH
1121 case LEX_IS_LINE_COMMENT_START:
1122 /* FIXME-someday: The two character comment stuff was badly
1123 thought out. On i386, we want '/' as line comment start
1124 AND we want C style comments. hence this hack. The
1125 whole lexical process should be reworked. xoxorich. */
1126 if (ch == '/')
1127 {
1128 ch2 = GET ();
1129 if (ch2 == '*')
1130 {
1131 old_state = 3;
1132 state = -2;
1133 break;
1134 }
1135 else
1136 {
1137 UNGET (ch2);
1138 }
204cd129 1139 }
252b5132
RH
1140
1141 if (state == 0 || state == 1) /* Only comment at start of line. */
1142 {
1143 int startch;
1144
1145 startch = ch;
1146
1147 do
1148 {
1149 ch = GET ();
1150 }
1151 while (ch != EOF && IS_WHITESPACE (ch));
204cd129 1152
252b5132
RH
1153 if (ch == EOF)
1154 {
1155 as_warn (_("end of file in comment; newline inserted"));
1156 PUT ('\n');
1157 break;
1158 }
204cd129 1159
252b5132
RH
1160 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1161 {
1162 /* Not a cpp line. */
1163 while (ch != EOF && !IS_NEWLINE (ch))
1164 ch = GET ();
1165 if (ch == EOF)
0e389e77 1166 as_warn (_("end of file in comment; newline inserted"));
252b5132
RH
1167 state = 0;
1168 PUT ('\n');
1169 break;
1170 }
3ee4defc 1171 /* Looks like `# 123 "filename"' from cpp. */
252b5132
RH
1172 UNGET (ch);
1173 old_state = 4;
1174 state = -1;
1175 if (scrub_m68k_mri)
1176 out_string = "\tappline ";
1177 else
1178 out_string = "\t.appline ";
1179 PUT (*out_string++);
1180 break;
1181 }
1182
1183#ifdef TC_D10V
1184 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1185 Trap is the only short insn that has a first operand that is
1186 neither register nor label.
1187 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
30a2b4ef
KH
1188 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1189 already LEX_IS_LINE_COMMENT_START. However, it is the
1190 only character in line_comment_chars for d10v, hence we
1191 can recognize it as such. */
252b5132
RH
1192 /* An alternative approach would be to reset the state to 1 when
1193 we see '||', '<'- or '->', but that seems to be overkill. */
30a2b4ef
KH
1194 if (state == 10)
1195 PUT (' ');
252b5132
RH
1196#endif
1197 /* We have a line comment character which is not at the
1198 start of a line. If this is also a normal comment
1199 character, fall through. Otherwise treat it as a default
1200 character. */
1201 if (strchr (tc_comment_chars, ch) == NULL
1202 && (! scrub_m68k_mri
1203 || (ch != '!' && ch != '*')))
1204 goto de_fault;
1205 if (scrub_m68k_mri
1206 && (ch == '!' || ch == '*' || ch == '#')
1207 && state != 1
1208 && state != 10)
1209 goto de_fault;
1210 /* Fall through. */
1211 case LEX_IS_COMMENT_START:
1212#if defined TC_ARM && defined OBJ_ELF
1213 /* On the ARM, `@' is the comment character.
1214 Unfortunately this is also a special character in ELF .symver
30a2b4ef
KH
1215 directives (and .type, though we deal with those another way).
1216 So we check if this line is such a directive, and treat
1217 the character as default if so. This is a hack. */
252b5132
RH
1218 if ((symver_state != NULL) && (*symver_state == 0))
1219 goto de_fault;
4c400d5e
AM
1220#endif
1221#ifdef WARN_COMMENTS
1222 if (!found_comment)
1223 as_where (&found_comment_file, &found_comment);
252b5132
RH
1224#endif
1225 do
1226 {
1227 ch = GET ();
1228 }
1229 while (ch != EOF && !IS_NEWLINE (ch));
1230 if (ch == EOF)
1231 as_warn (_("end of file in comment; newline inserted"));
1232 state = 0;
1233 PUT ('\n');
1234 break;
1235
1236 case LEX_IS_SYMBOL_COMPONENT:
1237 if (state == 10)
1238 {
1239 /* This is a symbol character following another symbol
1240 character, with whitespace in between. We skipped
1241 the whitespace earlier, so output it now. */
1242 UNGET (ch);
1243 state = 3;
1244 PUT (' ');
1245 break;
1246 }
1247
1248 if (state == 3)
1249 state = 9;
1250
1251 /* This is a common case. Quickly copy CH and all the
b1ac4c66 1252 following symbol component or normal characters. */
252b5132
RH
1253 if (to + 1 < toend
1254 && mri_state == NULL
1255#if defined TC_ARM && defined OBJ_ELF
1256 && symver_state == NULL
1257#endif
1258 )
1259 {
1260 char *s;
1261 int len;
1262
1263 for (s = from; s < fromend; s++)
1264 {
1265 int type;
1266
30a2b4ef 1267 ch2 = *(unsigned char *) s;
252b5132
RH
1268 type = lex[ch2];
1269 if (type != 0
1270 && type != LEX_IS_SYMBOL_COMPONENT)
1271 break;
1272 }
204cd129 1273
252b5132 1274 if (s > from)
204cd129
NC
1275 /* Handle the last character normally, for
1276 simplicity. */
1277 --s;
1278
252b5132 1279 len = s - from;
204cd129 1280
252b5132
RH
1281 if (len > (toend - to) - 1)
1282 len = (toend - to) - 1;
204cd129 1283
252b5132
RH
1284 if (len > 0)
1285 {
1286 PUT (ch);
1287 if (len > 8)
1288 {
1289 memcpy (to, from, len);
1290 to += len;
1291 from += len;
1292 }
1293 else
1294 {
1295 switch (len)
1296 {
1297 case 8: *to++ = *from++;
1298 case 7: *to++ = *from++;
1299 case 6: *to++ = *from++;
1300 case 5: *to++ = *from++;
1301 case 4: *to++ = *from++;
1302 case 3: *to++ = *from++;
1303 case 2: *to++ = *from++;
1304 case 1: *to++ = *from++;
1305 }
3ee4defc 1306 }
252b5132
RH
1307 ch = GET ();
1308 }
1309 }
1310
1311 /* Fall through. */
1312 default:
1313 de_fault:
1314 /* Some relatively `normal' character. */
1315 if (state == 0)
1316 {
2cdb18a7 1317 if (IS_SYMBOL_COMPONENT (ch))
204cd129 1318 state = 11; /* Now seeing label definition. */
252b5132
RH
1319 }
1320 else if (state == 1)
1321 {
204cd129
NC
1322 if (IS_SYMBOL_COMPONENT (ch))
1323 state = 2; /* Ditto. */
252b5132
RH
1324 }
1325 else if (state == 9)
1326 {
2cdb18a7 1327 if (!IS_SYMBOL_COMPONENT (ch))
252b5132
RH
1328 state = 3;
1329 }
1330 else if (state == 10)
1331 {
c5c834aa
AH
1332 if (ch == '\\')
1333 {
1334 /* Special handling for backslash: a backslash may
1335 be the beginning of a formal parameter (of a
1336 macro) following another symbol character, with
1337 whitespace in between. If that is the case, we
1338 output a space before the parameter. Strictly
1339 speaking, correct handling depends upon what the
1340 macro parameter expands into; if the parameter
1341 expands into something which does not start with
1342 an operand character, then we don't want to keep
1343 the space. We don't have enough information to
1344 make the right choice, so here we are making the
1345 choice which is more likely to be correct. */
1346 PUT (' ');
1347 }
1348
252b5132
RH
1349 state = 3;
1350 }
1351 PUT (ch);
1352 break;
1353 }
1354 }
1355
1356 /*NOTREACHED*/
1357
1358 fromeof:
1359 /* We have reached the end of the input. */
1360 return to - tostart;
1361
1362 tofull:
1363 /* The output buffer is full. Save any input we have not yet
1364 processed. */
1365 if (fromend > from)
1366 {
2b47531b 1367 saved_input = from;
252b5132
RH
1368 saved_input_len = fromend - from;
1369 }
1370 else
2b47531b
ILT
1371 saved_input = NULL;
1372
252b5132
RH
1373 return to - tostart;
1374}
1375
This page took 0.296756 seconds and 4 git commands to generate.