Yet more signed overflow fixes
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
252b5132 1/* This is the Assembler Pre-Processor
82704155 2 Copyright (C) 1987-2019 Free Software Foundation, Inc.
252b5132
RH
3
4 This file is part of GAS, the GNU Assembler.
5
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
ec2655a6 8 the Free Software Foundation; either version 3, or (at your option)
252b5132
RH
9 any later version.
10
ec2655a6
NC
11 GAS is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
14 License for more details.
252b5132
RH
15
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to the Free
4b4da160
NC
18 Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
19 02110-1301, USA. */
252b5132 20
204cd129 21/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90. */
93e914b2
AO
22/* App, the assembler pre-processor. This pre-processor strips out
23 excess spaces, turns single-quoted characters into a decimal
24 constant, and turns the # in # <number> <filename> <garbage> into a
25 .linefile. This needs better error-handling. */
252b5132 26
ebd1c875 27#include "as.h"
252b5132
RH
28
29#if (__STDC__ != 1)
30#ifndef const
31#define const /* empty */
32#endif
33#endif
34
c54b5932
DD
35#ifdef H_TICK_HEX
36int enable_h_tick_hex = 0;
37#endif
38
abd63a32 39#ifdef TC_M68K
252b5132
RH
40/* Whether we are scrubbing in m68k MRI mode. This is different from
41 flag_m68k_mri, because the two flags will be affected by the .mri
42 pseudo-op at different times. */
43static int scrub_m68k_mri;
44
45/* The pseudo-op which switches in and out of MRI mode. See the
46 comment in do_scrub_chars. */
47static const char mri_pseudo[] = ".mri 0";
72297628
AM
48#else
49#define scrub_m68k_mri 0
50#endif
252b5132
RH
51
52#if defined TC_ARM && defined OBJ_ELF
3ee4defc 53/* The pseudo-op for which we need to special-case `@' characters.
252b5132
RH
54 See the comment in do_scrub_chars. */
55static const char symver_pseudo[] = ".symver";
56static const char * symver_state;
57#endif
ab1fadc6
AM
58#ifdef TC_ARM
59static char last_char;
60#endif
252b5132
RH
61
62static char lex[256];
63static const char symbol_chars[] =
64"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
65
66#define LEX_IS_SYMBOL_COMPONENT 1
67#define LEX_IS_WHITESPACE 2
68#define LEX_IS_LINE_SEPARATOR 3
69#define LEX_IS_COMMENT_START 4
70#define LEX_IS_LINE_COMMENT_START 5
71#define LEX_IS_TWOCHAR_COMMENT_1ST 6
72#define LEX_IS_STRINGQUOTE 8
73#define LEX_IS_COLON 9
74#define LEX_IS_NEWLINE 10
75#define LEX_IS_ONECHAR_QUOTE 11
76#ifdef TC_V850
77#define LEX_IS_DOUBLEDASH_1ST 12
78#endif
79#ifdef TC_M32R
f28e8eb3
TW
80#define DOUBLEBAR_PARALLEL
81#endif
82#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
83#define LEX_IS_DOUBLEBAR_1ST 13
84#endif
62f65a7b 85#define LEX_IS_PARALLEL_SEPARATOR 14
c54b5932
DD
86#ifdef H_TICK_HEX
87#define LEX_IS_H 15
88#endif
252b5132
RH
89#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
90#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
91#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
62f65a7b 92#define IS_PARALLEL_SEPARATOR(c) (lex[c] == LEX_IS_PARALLEL_SEPARATOR)
252b5132
RH
93#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
94#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
95#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
96
73ee5e4c 97static int process_escape (int);
252b5132
RH
98
99/* FIXME-soon: The entire lexer/parser thingy should be
100 built statically at compile time rather than dynamically
3ee4defc 101 each and every time the assembler is run. xoxorich. */
252b5132 102
3ee4defc 103void
73ee5e4c 104do_scrub_begin (int m68k_mri ATTRIBUTE_UNUSED)
252b5132
RH
105{
106 const char *p;
107 int c;
108
252b5132
RH
109 lex[' '] = LEX_IS_WHITESPACE;
110 lex['\t'] = LEX_IS_WHITESPACE;
111 lex['\r'] = LEX_IS_WHITESPACE;
112 lex['\n'] = LEX_IS_NEWLINE;
252b5132
RH
113 lex[':'] = LEX_IS_COLON;
114
abd63a32
AM
115#ifdef TC_M68K
116 scrub_m68k_mri = m68k_mri;
117
252b5132 118 if (! m68k_mri)
abd63a32 119#endif
252b5132
RH
120 {
121 lex['"'] = LEX_IS_STRINGQUOTE;
122
6793974d 123#if ! defined (TC_HPPA)
252b5132
RH
124 lex['\''] = LEX_IS_ONECHAR_QUOTE;
125#endif
126
127#ifdef SINGLE_QUOTE_STRINGS
128 lex['\''] = LEX_IS_STRINGQUOTE;
129#endif
130 }
131
132 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
133 in state 5 of do_scrub_chars must be changed. */
134
135 /* Note that these override the previous defaults, e.g. if ';' is a
136 comment char, then it isn't a line separator. */
137 for (p = symbol_chars; *p; ++p)
204cd129 138 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
252b5132
RH
139
140 for (c = 128; c < 256; ++c)
141 lex[c] = LEX_IS_SYMBOL_COMPONENT;
142
143#ifdef tc_symbol_chars
144 /* This macro permits the processor to specify all characters which
145 may appears in an operand. This will prevent the scrubber from
146 discarding meaningful whitespace in certain cases. The i386
147 backend uses this to support prefixes, which can confuse the
148 scrubber as to whether it is parsing operands or opcodes. */
149 for (p = tc_symbol_chars; *p; ++p)
150 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
151#endif
152
153 /* The m68k backend wants to be able to change comment_chars. */
154#ifndef tc_comment_chars
155#define tc_comment_chars comment_chars
156#endif
157 for (p = tc_comment_chars; *p; p++)
204cd129 158 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
252b5132
RH
159
160 for (p = line_comment_chars; *p; p++)
204cd129 161 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
252b5132 162
2e6976a8
DG
163#ifndef tc_line_separator_chars
164#define tc_line_separator_chars line_separator_chars
165#endif
166 for (p = tc_line_separator_chars; *p; p++)
204cd129 167 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
252b5132 168
62f65a7b
DB
169#ifdef tc_parallel_separator_chars
170 /* This macro permits the processor to specify all characters which
171 separate parallel insns on the same line. */
172 for (p = tc_parallel_separator_chars; *p; p++)
204cd129 173 lex[(unsigned char) *p] = LEX_IS_PARALLEL_SEPARATOR;
62f65a7b
DB
174#endif
175
252b5132
RH
176 /* Only allow slash-star comments if slash is not in use.
177 FIXME: This isn't right. We should always permit them. */
178 if (lex['/'] == 0)
204cd129 179 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
252b5132 180
abd63a32 181#ifdef TC_M68K
252b5132
RH
182 if (m68k_mri)
183 {
184 lex['\''] = LEX_IS_STRINGQUOTE;
185 lex[';'] = LEX_IS_COMMENT_START;
186 lex['*'] = LEX_IS_LINE_COMMENT_START;
187 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
b1ac4c66 188 then it can't be used in an expression. */
252b5132
RH
189 lex['!'] = LEX_IS_LINE_COMMENT_START;
190 }
abd63a32 191#endif
252b5132
RH
192
193#ifdef TC_V850
194 lex['-'] = LEX_IS_DOUBLEDASH_1ST;
195#endif
f28e8eb3 196#ifdef DOUBLEBAR_PARALLEL
252b5132
RH
197 lex['|'] = LEX_IS_DOUBLEBAR_1ST;
198#endif
199#ifdef TC_D30V
204cd129 200 /* Must do this is we want VLIW instruction with "->" or "<-". */
252b5132
RH
201 lex['-'] = LEX_IS_SYMBOL_COMPONENT;
202#endif
c54b5932
DD
203
204#ifdef H_TICK_HEX
205 if (enable_h_tick_hex)
206 {
207 lex['h'] = LEX_IS_H;
208 lex['H'] = LEX_IS_H;
209 }
210#endif
204cd129 211}
252b5132 212
204cd129 213/* Saved state of the scrubber. */
252b5132
RH
214static int state;
215static int old_state;
cd0bbe6e 216static const char *out_string;
252b5132
RH
217static char out_buf[20];
218static int add_newlines;
219static char *saved_input;
39a45edc 220static size_t saved_input_len;
2b47531b 221static char input_buffer[32 * 1024];
252b5132
RH
222static const char *mri_state;
223static char mri_last_ch;
224
225/* Data structure for saving the state of app across #include's. Note that
226 app is called asynchronously to the parsing of the .include's, so our
227 state at the time .include is interpreted is completely unrelated.
228 That's why we have to save it all. */
229
204cd129
NC
230struct app_save
231{
30a2b4ef
KH
232 int state;
233 int old_state;
cd0bbe6e 234 const char * out_string;
30a2b4ef
KH
235 char out_buf[sizeof (out_buf)];
236 int add_newlines;
237 char * saved_input;
39a45edc 238 size_t saved_input_len;
abd63a32 239#ifdef TC_M68K
30a2b4ef 240 int scrub_m68k_mri;
abd63a32 241#endif
30a2b4ef
KH
242 const char * mri_state;
243 char mri_last_ch;
252b5132 244#if defined TC_ARM && defined OBJ_ELF
30a2b4ef 245 const char * symver_state;
252b5132 246#endif
ab1fadc6
AM
247#ifdef TC_ARM
248 char last_char;
249#endif
30a2b4ef 250};
252b5132
RH
251
252char *
73ee5e4c 253app_push (void)
252b5132 254{
ed9e98c2 255 struct app_save *saved;
252b5132 256
325801bd 257 saved = XNEW (struct app_save);
252b5132
RH
258 saved->state = state;
259 saved->old_state = old_state;
260 saved->out_string = out_string;
261 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
262 saved->add_newlines = add_newlines;
2b47531b
ILT
263 if (saved_input == NULL)
264 saved->saved_input = NULL;
265 else
266 {
add39d23 267 saved->saved_input = XNEWVEC (char, saved_input_len);
2b47531b
ILT
268 memcpy (saved->saved_input, saved_input, saved_input_len);
269 saved->saved_input_len = saved_input_len;
270 }
abd63a32 271#ifdef TC_M68K
252b5132 272 saved->scrub_m68k_mri = scrub_m68k_mri;
abd63a32 273#endif
252b5132
RH
274 saved->mri_state = mri_state;
275 saved->mri_last_ch = mri_last_ch;
276#if defined TC_ARM && defined OBJ_ELF
277 saved->symver_state = symver_state;
278#endif
ab1fadc6
AM
279#ifdef TC_ARM
280 saved->last_char = last_char;
281#endif
252b5132 282
3ee4defc 283 /* do_scrub_begin() is not useful, just wastes time. */
252b5132
RH
284
285 state = 0;
286 saved_input = NULL;
f8819316 287 add_newlines = 0;
252b5132
RH
288
289 return (char *) saved;
290}
291
3ee4defc 292void
73ee5e4c 293app_pop (char *arg)
252b5132 294{
ed9e98c2 295 struct app_save *saved = (struct app_save *) arg;
252b5132 296
3ee4defc 297 /* There is no do_scrub_end (). */
252b5132
RH
298 state = saved->state;
299 old_state = saved->old_state;
300 out_string = saved->out_string;
301 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
302 add_newlines = saved->add_newlines;
2b47531b
ILT
303 if (saved->saved_input == NULL)
304 saved_input = NULL;
305 else
306 {
39a45edc 307 gas_assert (saved->saved_input_len <= sizeof (input_buffer));
2b47531b
ILT
308 memcpy (input_buffer, saved->saved_input, saved->saved_input_len);
309 saved_input = input_buffer;
310 saved_input_len = saved->saved_input_len;
311 free (saved->saved_input);
312 }
abd63a32 313#ifdef TC_M68K
252b5132 314 scrub_m68k_mri = saved->scrub_m68k_mri;
abd63a32 315#endif
252b5132
RH
316 mri_state = saved->mri_state;
317 mri_last_ch = saved->mri_last_ch;
318#if defined TC_ARM && defined OBJ_ELF
319 symver_state = saved->symver_state;
320#endif
ab1fadc6
AM
321#ifdef TC_ARM
322 last_char = saved->last_char;
323#endif
252b5132
RH
324
325 free (arg);
204cd129 326}
252b5132
RH
327
328/* @@ This assumes that \n &c are the same on host and target. This is not
329 necessarily true. */
204cd129 330
3ee4defc 331static int
73ee5e4c 332process_escape (int ch)
252b5132
RH
333{
334 switch (ch)
335 {
336 case 'b':
337 return '\b';
338 case 'f':
339 return '\f';
340 case 'n':
341 return '\n';
342 case 'r':
343 return '\r';
344 case 't':
345 return '\t';
346 case '\'':
347 return '\'';
348 case '"':
349 return '\"';
350 default:
351 return ch;
352 }
353}
354
355/* This function is called to process input characters. The GET
356 parameter is used to retrieve more input characters. GET should
357 set its parameter to point to a buffer, and return the length of
358 the buffer; it should return 0 at end of file. The scrubbed output
359 characters are put into the buffer starting at TOSTART; the TOSTART
360 buffer is TOLEN bytes in length. The function returns the number
361 of scrubbed characters put into TOSTART. This will be TOLEN unless
362 end of file was seen. This function is arranged as a state
363 machine, and saves its state so that it may return at any point.
364 This is the way the old code used to work. */
365
39a45edc
AM
366size_t
367do_scrub_chars (size_t (*get) (char *, size_t), char *tostart, size_t tolen)
252b5132
RH
368{
369 char *to = tostart;
370 char *toend = tostart + tolen;
371 char *from;
372 char *fromend;
39a45edc 373 size_t fromlen;
ed9e98c2 374 int ch, ch2 = 0;
c9c5dcda
AM
375 /* Character that started the string we're working on. */
376 static char quotechar;
252b5132
RH
377
378 /*State 0: beginning of normal line
379 1: After first whitespace on line (flush more white)
380 2: After first non-white (opcode) on line (keep 1white)
381 3: after second white on line (into operands) (flush white)
93e914b2 382 4: after putting out a .linefile, put out digits
252b5132
RH
383 5: parsing a string, then go to old-state
384 6: putting out \ escape in a "d string.
93e914b2 385 7: no longer used
e9fc6c21 386 8: no longer used
252b5132
RH
387 9: After seeing symbol char in state 3 (keep 1white after symchar)
388 10: After seeing whitespace in state 9 (keep white before symchar)
389 11: After seeing a symbol character in state 0 (eg a label definition)
390 -1: output string in out_string and go to the state in old_state
391 -2: flush text until a '*' '/' is seen, then go to state old_state
392#ifdef TC_V850
b1ac4c66
AM
393 12: After seeing a dash, looking for a second dash as a start
394 of comment.
252b5132 395#endif
f28e8eb3 396#ifdef DOUBLEBAR_PARALLEL
b1ac4c66
AM
397 13: After seeing a vertical bar, looking for a second
398 vertical bar as a parallel expression separator.
52628315 399#endif
40b36596
JM
400#ifdef TC_PREDICATE_START_CHAR
401 14: After seeing a predicate start character at state 0, looking
402 for a predicate end character as predicate.
403 15: After seeing a predicate start character at state 1, looking
404 for a predicate end character as predicate.
3c9b82ba
NC
405#endif
406#ifdef TC_Z80
407 16: After seeing an 'a' or an 'A' at the start of a symbol
408 17: After seeing an 'f' or an 'F' in state 16
252b5132
RH
409#endif
410 */
411
412 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
413 constructs like ``.loc 1 20''. This was turning into ``.loc
414 120''. States 9 and 10 ensure that a space is never dropped in
3b37fd66 415 between characters which could appear in an identifier. Ian
252b5132
RH
416 Taylor, ian@cygnus.com.
417
418 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
419 correctly on the PA (and any other target where colons are optional).
420 Jeff Law, law@cs.utah.edu.
421
422 I added state 13 so that something like "cmp r1, r2 || trap #1" does not
423 get squashed into "cmp r1,r2||trap#1", with the all important space
424 between the 'trap' and the '#1' being eliminated. nickc@cygnus.com */
425
426 /* This macro gets the next input character. */
427
2b47531b
ILT
428#define GET() \
429 (from < fromend \
430 ? * (unsigned char *) (from++) \
431 : (saved_input = NULL, \
432 fromlen = (*get) (input_buffer, sizeof input_buffer), \
433 from = input_buffer, \
434 fromend = from + fromlen, \
435 (fromlen == 0 \
436 ? EOF \
252b5132
RH
437 : * (unsigned char *) (from++))))
438
439 /* This macro pushes a character back on the input stream. */
440
441#define UNGET(uch) (*--from = (uch))
442
443 /* This macro puts a character into the output buffer. If this
444 character fills the output buffer, this macro jumps to the label
445 TOFULL. We use this rather ugly approach because we need to
446 handle two different termination conditions: EOF on the input
447 stream, and a full output buffer. It would be simpler if we
448 always read in the entire input stream before processing it, but
449 I don't want to make such a significant change to the assembler's
450 memory usage. */
451
411863a4
KH
452#define PUT(pch) \
453 do \
454 { \
455 *to++ = (pch); \
456 if (to >= toend) \
457 goto tofull; \
458 } \
252b5132
RH
459 while (0)
460
461 if (saved_input != NULL)
462 {
463 from = saved_input;
464 fromend = from + saved_input_len;
465 }
466 else
467 {
2b47531b 468 fromlen = (*get) (input_buffer, sizeof input_buffer);
252b5132
RH
469 if (fromlen == 0)
470 return 0;
2b47531b 471 from = input_buffer;
252b5132
RH
472 fromend = from + fromlen;
473 }
474
475 while (1)
476 {
477 /* The cases in this switch end with continue, in order to
b1ac4c66
AM
478 branch back to the top of this while loop and generate the
479 next output character in the appropriate state. */
252b5132
RH
480 switch (state)
481 {
482 case -1:
483 ch = *out_string++;
484 if (*out_string == '\0')
485 {
486 state = old_state;
487 old_state = 3;
488 }
489 PUT (ch);
490 continue;
491
492 case -2:
493 for (;;)
494 {
495 do
496 {
497 ch = GET ();
498
499 if (ch == EOF)
500 {
501 as_warn (_("end of file in comment"));
502 goto fromeof;
503 }
504
505 if (ch == '\n')
506 PUT ('\n');
507 }
508 while (ch != '*');
509
510 while ((ch = GET ()) == '*')
511 ;
512
513 if (ch == EOF)
514 {
515 as_warn (_("end of file in comment"));
516 goto fromeof;
517 }
518
519 if (ch == '/')
520 break;
521
522 UNGET (ch);
523 }
524
525 state = old_state;
526 UNGET (' ');
527 continue;
528
529 case 4:
530 ch = GET ();
531 if (ch == EOF)
532 goto fromeof;
533 else if (ch >= '0' && ch <= '9')
534 PUT (ch);
535 else
536 {
537 while (ch != EOF && IS_WHITESPACE (ch))
538 ch = GET ();
539 if (ch == '"')
540 {
93e914b2
AO
541 quotechar = ch;
542 state = 5;
e9fc6c21 543 old_state = 3;
4061927e 544 PUT (ch);
252b5132
RH
545 }
546 else
547 {
548 while (ch != EOF && ch != '\n')
549 ch = GET ();
550 state = 0;
551 PUT (ch);
552 }
553 }
554 continue;
555
556 case 5:
557 /* We are going to copy everything up to a quote character,
b1ac4c66
AM
558 with special handling for a backslash. We try to
559 optimize the copying in the simple case without using the
560 GET and PUT macros. */
252b5132
RH
561 {
562 char *s;
39a45edc 563 ptrdiff_t len;
252b5132
RH
564
565 for (s = from; s < fromend; s++)
566 {
567 ch = *s;
252b5132 568 if (ch == '\\'
c9c5dcda 569 || ch == quotechar
252b5132
RH
570 || ch == '\n')
571 break;
572 }
573 len = s - from;
574 if (len > toend - to)
575 len = toend - to;
576 if (len > 0)
577 {
578 memcpy (to, from, len);
579 to += len;
580 from += len;
df816087
AM
581 if (to >= toend)
582 goto tofull;
252b5132
RH
583 }
584 }
585
586 ch = GET ();
587 if (ch == EOF)
588 {
fc5910c0
NC
589 /* This buffer is here specifically so
590 that the UNGET below will work. */
591 static char one_char_buf[1];
592
c9c5dcda 593 as_warn (_("end of file in string; '%c' inserted"), quotechar);
252b5132 594 state = old_state;
fc5910c0
NC
595 from = fromend = one_char_buf + 1;
596 fromlen = 1;
252b5132 597 UNGET ('\n');
c9c5dcda 598 PUT (quotechar);
252b5132 599 }
c9c5dcda 600 else if (ch == quotechar)
252b5132
RH
601 {
602 state = old_state;
603 PUT (ch);
604 }
605#ifndef NO_STRING_ESCAPES
606 else if (ch == '\\')
607 {
608 state = 6;
609 PUT (ch);
610 }
611#endif
612 else if (scrub_m68k_mri && ch == '\n')
613 {
614 /* Just quietly terminate the string. This permits lines like
204cd129 615 bne label loop if we haven't reach end yet. */
252b5132
RH
616 state = old_state;
617 UNGET (ch);
618 PUT ('\'');
619 }
620 else
621 {
622 PUT (ch);
623 }
624 continue;
625
626 case 6:
627 state = 5;
628 ch = GET ();
629 switch (ch)
630 {
631 /* Handle strings broken across lines, by turning '\n' into
632 '\\' and 'n'. */
633 case '\n':
634 UNGET ('n');
635 add_newlines++;
636 PUT ('\\');
637 continue;
638
4252e537 639 case EOF:
c9c5dcda
AM
640 as_warn (_("end of file in string; '%c' inserted"), quotechar);
641 PUT (quotechar);
4252e537
AM
642 continue;
643
252b5132
RH
644 case '"':
645 case '\\':
646 case 'b':
647 case 'f':
648 case 'n':
649 case 'r':
650 case 't':
651 case 'v':
652 case 'x':
653 case 'X':
654 case '0':
655 case '1':
656 case '2':
657 case '3':
658 case '4':
659 case '5':
660 case '6':
661 case '7':
662 break;
4252e537 663
252b5132 664 default:
4252e537 665#ifdef ONLY_STANDARD_ESCAPES
0e389e77 666 as_warn (_("unknown escape '\\%c' in string; ignored"), ch);
4252e537 667#endif
252b5132 668 break;
252b5132
RH
669 }
670 PUT (ch);
671 continue;
672
b1ac4c66
AM
673#ifdef DOUBLEBAR_PARALLEL
674 case 13:
675 ch = GET ();
676 if (ch != '|')
677 abort ();
678
679 /* Reset back to state 1 and pretend that we are parsing a
680 line from just after the first white space. */
681 state = 1;
682 PUT ('|');
40b36596
JM
683#ifdef TC_TIC6X
684 /* "||^" is used for SPMASKed instructions. */
685 ch = GET ();
686 if (ch == EOF)
687 goto fromeof;
688 else if (ch == '^')
689 PUT ('^');
690 else
691 UNGET (ch);
692#endif
b1ac4c66 693 continue;
3c9b82ba
NC
694#endif
695#ifdef TC_Z80
696 case 16:
697 /* We have seen an 'a' at the start of a symbol, look for an 'f'. */
698 ch = GET ();
34bca508 699 if (ch == 'f' || ch == 'F')
3c9b82ba
NC
700 {
701 state = 17;
702 PUT (ch);
703 }
704 else
705 {
706 state = 9;
707 break;
708 }
1a0670f3 709 /* Fall through. */
3c9b82ba
NC
710 case 17:
711 /* We have seen "af" at the start of a symbol,
712 a ' here is a part of that symbol. */
713 ch = GET ();
714 state = 9;
715 if (ch == '\'')
716 /* Change to avoid warning about unclosed string. */
717 PUT ('`');
0146fc9d 718 else if (ch != EOF)
3c9b82ba
NC
719 UNGET (ch);
720 break;
b1ac4c66 721#endif
252b5132
RH
722 }
723
204cd129 724 /* OK, we are somewhere in states 0 through 4 or 9 through 11. */
252b5132
RH
725
726 /* flushchar: */
727 ch = GET ();
728
40b36596
JM
729#ifdef TC_PREDICATE_START_CHAR
730 if (ch == TC_PREDICATE_START_CHAR && (state == 0 || state == 1))
52628315
L
731 {
732 state += 14;
733 PUT (ch);
734 continue;
735 }
736 else if (state == 14 || state == 15)
737 {
40b36596 738 if (ch == TC_PREDICATE_END_CHAR)
70b911ad
JJ
739 {
740 state -= 14;
741 PUT (ch);
742 ch = GET ();
743 }
52628315
L
744 else
745 {
746 PUT (ch);
747 continue;
748 }
749 }
750#endif
751
252b5132
RH
752 recycle:
753
754#if defined TC_ARM && defined OBJ_ELF
755 /* We need to watch out for .symver directives. See the comment later
756 in this function. */
757 if (symver_state == NULL)
758 {
759 if ((state == 0 || state == 1) && ch == symver_pseudo[0])
760 symver_state = symver_pseudo + 1;
761 }
762 else
763 {
764 /* We advance to the next state if we find the right
765 character. */
766 if (ch != '\0' && (*symver_state == ch))
767 ++symver_state;
768 else if (*symver_state != '\0')
769 /* We did not get the expected character, or we didn't
770 get a valid terminating character after seeing the
771 entire pseudo-op, so we must go back to the beginning. */
772 symver_state = NULL;
773 else
774 {
775 /* We've read the entire pseudo-op. If this is the end
776 of the line, go back to the beginning. */
777 if (IS_NEWLINE (ch))
778 symver_state = NULL;
779 }
780 }
781#endif /* TC_ARM && OBJ_ELF */
782
783#ifdef TC_M68K
784 /* We want to have pseudo-ops which control whether we are in
b1ac4c66
AM
785 MRI mode or not. Unfortunately, since m68k MRI mode affects
786 the scrubber, that means that we need a special purpose
787 recognizer here. */
252b5132
RH
788 if (mri_state == NULL)
789 {
790 if ((state == 0 || state == 1)
791 && ch == mri_pseudo[0])
792 mri_state = mri_pseudo + 1;
793 }
794 else
795 {
796 /* We advance to the next state if we find the right
797 character, or if we need a space character and we get any
798 whitespace character, or if we need a '0' and we get a
799 '1' (this is so that we only need one state to handle
800 ``.mri 0'' and ``.mri 1''). */
801 if (ch != '\0'
802 && (*mri_state == ch
803 || (*mri_state == ' '
804 && lex[ch] == LEX_IS_WHITESPACE)
805 || (*mri_state == '0'
806 && ch == '1')))
807 {
808 mri_last_ch = ch;
809 ++mri_state;
810 }
811 else if (*mri_state != '\0'
812 || (lex[ch] != LEX_IS_WHITESPACE
813 && lex[ch] != LEX_IS_NEWLINE))
814 {
815 /* We did not get the expected character, or we didn't
816 get a valid terminating character after seeing the
817 entire pseudo-op, so we must go back to the
818 beginning. */
819 mri_state = NULL;
820 }
821 else
822 {
823 /* We've read the entire pseudo-op. mips_last_ch is
b1ac4c66
AM
824 either '0' or '1' indicating whether to enter or
825 leave MRI mode. */
252b5132
RH
826 do_scrub_begin (mri_last_ch == '1');
827 mri_state = NULL;
828
829 /* We continue handling the character as usual. The
b1ac4c66
AM
830 main gas reader must also handle the .mri pseudo-op
831 to control expression parsing and the like. */
252b5132
RH
832 }
833 }
834#endif
835
836 if (ch == EOF)
837 {
838 if (state != 0)
839 {
840 as_warn (_("end of file not at end of a line; newline inserted"));
841 state = 0;
842 PUT ('\n');
843 }
844 goto fromeof;
845 }
846
847 switch (lex[ch])
848 {
849 case LEX_IS_WHITESPACE:
850 do
851 {
852 ch = GET ();
853 }
854 while (ch != EOF && IS_WHITESPACE (ch));
855 if (ch == EOF)
856 goto fromeof;
857
858 if (state == 0)
859 {
860 /* Preserve a single whitespace character at the
861 beginning of a line. */
862 state = 1;
863 UNGET (ch);
864 PUT (' ');
865 break;
866 }
867
f28e8eb3 868#ifdef KEEP_WHITE_AROUND_COLON
30a2b4ef
KH
869 if (lex[ch] == LEX_IS_COLON)
870 {
871 /* Only keep this white if there's no white *after* the
b1ac4c66 872 colon. */
30a2b4ef 873 ch2 = GET ();
83bd7402
NC
874 if (ch2 != EOF)
875 UNGET (ch2);
30a2b4ef
KH
876 if (!IS_WHITESPACE (ch2))
877 {
878 state = 9;
879 UNGET (ch);
880 PUT (' ');
881 break;
882 }
883 }
f28e8eb3 884#endif
252b5132
RH
885 if (IS_COMMENT (ch)
886 || ch == '/'
62f65a7b
DB
887 || IS_LINE_SEPARATOR (ch)
888 || IS_PARALLEL_SEPARATOR (ch))
252b5132
RH
889 {
890 if (scrub_m68k_mri)
891 {
892 /* In MRI mode, we keep these spaces. */
893 UNGET (ch);
894 PUT (' ');
895 break;
896 }
897 goto recycle;
898 }
899
900 /* If we're in state 2 or 11, we've seen a non-white
901 character followed by whitespace. If the next character
902 is ':', this is whitespace after a label name which we
903 normally must ignore. In MRI mode, though, spaces are
904 not permitted between the label and the colon. */
905 if ((state == 2 || state == 11)
906 && lex[ch] == LEX_IS_COLON
907 && ! scrub_m68k_mri)
908 {
909 state = 1;
910 PUT (ch);
911 break;
912 }
913
914 switch (state)
915 {
252b5132
RH
916 case 1:
917 /* We can arrive here if we leave a leading whitespace
918 character at the beginning of a line. */
919 goto recycle;
920 case 2:
921 state = 3;
922 if (to + 1 < toend)
923 {
924 /* Optimize common case by skipping UNGET/GET. */
925 PUT (' '); /* Sp after opco */
926 goto recycle;
927 }
928 UNGET (ch);
929 PUT (' ');
930 break;
931 case 3:
40b36596
JM
932#ifndef TC_KEEP_OPERAND_SPACES
933 /* For TI C6X, we keep these spaces as they may separate
934 functional unit specifiers from operands. */
252b5132 935 if (scrub_m68k_mri)
40b36596 936#endif
252b5132
RH
937 {
938 /* In MRI mode, we keep these spaces. */
939 UNGET (ch);
940 PUT (' ');
941 break;
942 }
943 goto recycle; /* Sp in operands */
944 case 9:
945 case 10:
40b36596 946#ifndef TC_KEEP_OPERAND_SPACES
252b5132 947 if (scrub_m68k_mri)
40b36596 948#endif
252b5132
RH
949 {
950 /* In MRI mode, we keep these spaces. */
951 state = 3;
952 UNGET (ch);
953 PUT (' ');
954 break;
955 }
956 state = 10; /* Sp after symbol char */
957 goto recycle;
958 case 11:
abd63a32 959 if (LABELS_WITHOUT_COLONS || flag_m68k_mri)
252b5132
RH
960 state = 1;
961 else
962 {
963 /* We know that ch is not ':', since we tested that
b1ac4c66
AM
964 case above. Therefore this is not a label, so it
965 must be the opcode, and we've just seen the
966 whitespace after it. */
252b5132
RH
967 state = 3;
968 }
969 UNGET (ch);
970 PUT (' '); /* Sp after label definition. */
971 break;
972 default:
973 BAD_CASE (state);
974 }
975 break;
976
977 case LEX_IS_TWOCHAR_COMMENT_1ST:
978 ch2 = GET ();
979 if (ch2 == '*')
980 {
981 for (;;)
982 {
983 do
984 {
985 ch2 = GET ();
986 if (ch2 != EOF && IS_NEWLINE (ch2))
987 add_newlines++;
988 }
989 while (ch2 != EOF && ch2 != '*');
990
991 while (ch2 == '*')
992 ch2 = GET ();
993
994 if (ch2 == EOF || ch2 == '/')
995 break;
996
997 /* This UNGET will ensure that we count newlines
b1ac4c66 998 correctly. */
252b5132
RH
999 UNGET (ch2);
1000 }
1001
1002 if (ch2 == EOF)
1003 as_warn (_("end of file in multiline comment"));
1004
1005 ch = ' ';
1006 goto recycle;
1007 }
800eeca4
JW
1008#ifdef DOUBLESLASH_LINE_COMMENTS
1009 else if (ch2 == '/')
1010 {
1011 do
1012 {
1013 ch = GET ();
1014 }
1015 while (ch != EOF && !IS_NEWLINE (ch));
1016 if (ch == EOF)
1017 as_warn ("end of file in comment; newline inserted");
1018 state = 0;
1019 PUT ('\n');
1020 break;
1021 }
1022#endif
252b5132
RH
1023 else
1024 {
1025 if (ch2 != EOF)
1026 UNGET (ch2);
1027 if (state == 9 || state == 10)
1028 state = 3;
1029 PUT (ch);
1030 }
1031 break;
1032
1033 case LEX_IS_STRINGQUOTE:
c9c5dcda 1034 quotechar = ch;
252b5132
RH
1035 if (state == 10)
1036 {
204cd129 1037 /* Preserve the whitespace in foo "bar". */
252b5132
RH
1038 UNGET (ch);
1039 state = 3;
1040 PUT (' ');
1041
1042 /* PUT didn't jump out. We could just break, but we
b1ac4c66 1043 know what will happen, so optimize a bit. */
252b5132
RH
1044 ch = GET ();
1045 old_state = 3;
1046 }
1047 else if (state == 9)
1048 old_state = 3;
1049 else
1050 old_state = state;
1051 state = 5;
1052 PUT (ch);
1053 break;
1054
252b5132 1055 case LEX_IS_ONECHAR_QUOTE:
c0a139c7
NC
1056#ifdef H_TICK_HEX
1057 if (state == 9 && enable_h_tick_hex)
c54b5932
DD
1058 {
1059 char c;
1060
1061 c = GET ();
1062 as_warn ("'%c found after symbol", c);
1063 UNGET (c);
1064 }
c0a139c7 1065#endif
252b5132
RH
1066 if (state == 10)
1067 {
204cd129 1068 /* Preserve the whitespace in foo 'b'. */
252b5132
RH
1069 UNGET (ch);
1070 state = 3;
1071 PUT (' ');
1072 break;
1073 }
1074 ch = GET ();
1075 if (ch == EOF)
1076 {
1077 as_warn (_("end of file after a one-character quote; \\0 inserted"));
1078 ch = 0;
1079 }
1080 if (ch == '\\')
1081 {
1082 ch = GET ();
1083 if (ch == EOF)
1084 {
1085 as_warn (_("end of file in escape character"));
1086 ch = '\\';
1087 }
1088 else
1089 ch = process_escape (ch);
1090 }
1091 sprintf (out_buf, "%d", (int) (unsigned char) ch);
1092
1093 /* None of these 'x constants for us. We want 'x'. */
1094 if ((ch = GET ()) != '\'')
1095 {
1096#ifdef REQUIRE_CHAR_CLOSE_QUOTE
0e389e77 1097 as_warn (_("missing close quote; (assumed)"));
252b5132
RH
1098#else
1099 if (ch != EOF)
1100 UNGET (ch);
1101#endif
1102 }
1103 if (strlen (out_buf) == 1)
1104 {
1105 PUT (out_buf[0]);
1106 break;
1107 }
1108 if (state == 9)
1109 old_state = 3;
1110 else
1111 old_state = state;
1112 state = -1;
1113 out_string = out_buf;
1114 PUT (*out_string++);
1115 break;
252b5132
RH
1116
1117 case LEX_IS_COLON:
f28e8eb3 1118#ifdef KEEP_WHITE_AROUND_COLON
30a2b4ef 1119 state = 9;
f28e8eb3 1120#else
252b5132
RH
1121 if (state == 9 || state == 10)
1122 state = 3;
1123 else if (state != 3)
1124 state = 1;
f28e8eb3 1125#endif
252b5132
RH
1126 PUT (ch);
1127 break;
1128
1129 case LEX_IS_NEWLINE:
1130 /* Roll out a bunch of newlines from inside comments, etc. */
1131 if (add_newlines)
1132 {
1133 --add_newlines;
1134 UNGET (ch);
1135 }
3ee4defc 1136 /* Fall through. */
252b5132
RH
1137
1138 case LEX_IS_LINE_SEPARATOR:
1139 state = 0;
1140 PUT (ch);
1141 break;
1142
62f65a7b
DB
1143 case LEX_IS_PARALLEL_SEPARATOR:
1144 state = 1;
1145 PUT (ch);
1146 break;
1147
252b5132
RH
1148#ifdef TC_V850
1149 case LEX_IS_DOUBLEDASH_1ST:
30a2b4ef 1150 ch2 = GET ();
252b5132
RH
1151 if (ch2 != '-')
1152 {
0146fc9d
NC
1153 if (ch2 != EOF)
1154 UNGET (ch2);
252b5132
RH
1155 goto de_fault;
1156 }
3ee4defc 1157 /* Read and skip to end of line. */
252b5132
RH
1158 do
1159 {
1160 ch = GET ();
1161 }
1162 while (ch != EOF && ch != '\n');
204cd129 1163
252b5132 1164 if (ch == EOF)
204cd129
NC
1165 as_warn (_("end of file in comment; newline inserted"));
1166
252b5132
RH
1167 state = 0;
1168 PUT ('\n');
1169 break;
3ee4defc 1170#endif
f28e8eb3 1171#ifdef DOUBLEBAR_PARALLEL
252b5132 1172 case LEX_IS_DOUBLEBAR_1ST:
30a2b4ef 1173 ch2 = GET ();
83bd7402
NC
1174 if (ch2 != EOF)
1175 UNGET (ch2);
252b5132 1176 if (ch2 != '|')
204cd129
NC
1177 goto de_fault;
1178
b1ac4c66
AM
1179 /* Handle '||' in two states as invoking PUT twice might
1180 result in the first one jumping out of this loop. We'd
1181 then lose track of the state and one '|' char. */
1182 state = 13;
252b5132
RH
1183 PUT ('|');
1184 break;
3ee4defc 1185#endif
252b5132
RH
1186 case LEX_IS_LINE_COMMENT_START:
1187 /* FIXME-someday: The two character comment stuff was badly
1188 thought out. On i386, we want '/' as line comment start
1189 AND we want C style comments. hence this hack. The
1190 whole lexical process should be reworked. xoxorich. */
1191 if (ch == '/')
1192 {
1193 ch2 = GET ();
1194 if (ch2 == '*')
1195 {
1196 old_state = 3;
1197 state = -2;
1198 break;
1199 }
69ace220 1200 else if (ch2 != EOF)
252b5132
RH
1201 {
1202 UNGET (ch2);
1203 }
204cd129 1204 }
252b5132
RH
1205
1206 if (state == 0 || state == 1) /* Only comment at start of line. */
1207 {
1208 int startch;
1209
1210 startch = ch;
1211
1212 do
1213 {
1214 ch = GET ();
1215 }
1216 while (ch != EOF && IS_WHITESPACE (ch));
204cd129 1217
252b5132
RH
1218 if (ch == EOF)
1219 {
1220 as_warn (_("end of file in comment; newline inserted"));
1221 PUT ('\n');
1222 break;
1223 }
204cd129 1224
252b5132
RH
1225 if (ch < '0' || ch > '9' || state != 0 || startch != '#')
1226 {
1227 /* Not a cpp line. */
1228 while (ch != EOF && !IS_NEWLINE (ch))
1229 ch = GET ();
1230 if (ch == EOF)
cf3f45fa
AM
1231 {
1232 as_warn (_("end of file in comment; newline inserted"));
1233 PUT ('\n');
1234 }
1235 else /* IS_NEWLINE (ch) */
1236 {
1237 /* To process non-zero add_newlines. */
1238 UNGET (ch);
1239 }
252b5132 1240 state = 0;
252b5132
RH
1241 break;
1242 }
3ee4defc 1243 /* Looks like `# 123 "filename"' from cpp. */
252b5132
RH
1244 UNGET (ch);
1245 old_state = 4;
1246 state = -1;
1247 if (scrub_m68k_mri)
93e914b2 1248 out_string = "\tlinefile ";
252b5132 1249 else
93e914b2 1250 out_string = "\t.linefile ";
252b5132
RH
1251 PUT (*out_string++);
1252 break;
1253 }
1254
1255#ifdef TC_D10V
1256 /* All insns end in a char for which LEX_IS_SYMBOL_COMPONENT is true.
1257 Trap is the only short insn that has a first operand that is
1258 neither register nor label.
1259 We must prevent exef0f ||trap #1 to degenerate to exef0f ||trap#1 .
30a2b4ef
KH
1260 We can't make '#' LEX_IS_SYMBOL_COMPONENT because it is
1261 already LEX_IS_LINE_COMMENT_START. However, it is the
1262 only character in line_comment_chars for d10v, hence we
1263 can recognize it as such. */
252b5132
RH
1264 /* An alternative approach would be to reset the state to 1 when
1265 we see '||', '<'- or '->', but that seems to be overkill. */
30a2b4ef
KH
1266 if (state == 10)
1267 PUT (' ');
252b5132
RH
1268#endif
1269 /* We have a line comment character which is not at the
1270 start of a line. If this is also a normal comment
1271 character, fall through. Otherwise treat it as a default
1272 character. */
1273 if (strchr (tc_comment_chars, ch) == NULL
1274 && (! scrub_m68k_mri
1275 || (ch != '!' && ch != '*')))
1276 goto de_fault;
1277 if (scrub_m68k_mri
1278 && (ch == '!' || ch == '*' || ch == '#')
1279 && state != 1
1280 && state != 10)
1281 goto de_fault;
1282 /* Fall through. */
1283 case LEX_IS_COMMENT_START:
1284#if defined TC_ARM && defined OBJ_ELF
1285 /* On the ARM, `@' is the comment character.
1286 Unfortunately this is also a special character in ELF .symver
30a2b4ef
KH
1287 directives (and .type, though we deal with those another way).
1288 So we check if this line is such a directive, and treat
1289 the character as default if so. This is a hack. */
252b5132
RH
1290 if ((symver_state != NULL) && (*symver_state == 0))
1291 goto de_fault;
4c400d5e 1292#endif
2a676888
MS
1293
1294#ifdef TC_ARM
1295 /* For the ARM, care is needed not to damage occurrences of \@
1296 by stripping the @ onwards. Yuck. */
ab1fadc6 1297 if ((to > tostart ? to[-1] : last_char) == '\\')
2a676888
MS
1298 /* Do not treat the @ as a start-of-comment. */
1299 goto de_fault;
1300#endif
1301
4c400d5e
AM
1302#ifdef WARN_COMMENTS
1303 if (!found_comment)
3b4dbbbf 1304 found_comment_file = as_where (&found_comment);
252b5132
RH
1305#endif
1306 do
1307 {
1308 ch = GET ();
1309 }
1310 while (ch != EOF && !IS_NEWLINE (ch));
1311 if (ch == EOF)
1312 as_warn (_("end of file in comment; newline inserted"));
1313 state = 0;
1314 PUT ('\n');
1315 break;
1316
c54b5932
DD
1317#ifdef H_TICK_HEX
1318 case LEX_IS_H:
1319 /* Look for strings like H'[0-9A-Fa-f] and if found, replace
1320 the H' with 0x to make them gas-style hex characters. */
1321 if (enable_h_tick_hex)
1322 {
1323 char quot;
1324
1325 quot = GET ();
1326 if (quot == '\'')
1327 {
1328 UNGET ('x');
1329 ch = '0';
1330 }
1331 else
1332 UNGET (quot);
1333 }
c54b5932 1334#endif
fcddde94 1335 /* Fall through. */
c54b5932 1336
252b5132
RH
1337 case LEX_IS_SYMBOL_COMPONENT:
1338 if (state == 10)
1339 {
1340 /* This is a symbol character following another symbol
1341 character, with whitespace in between. We skipped
1342 the whitespace earlier, so output it now. */
1343 UNGET (ch);
1344 state = 3;
1345 PUT (' ');
1346 break;
1347 }
1348
3c9b82ba
NC
1349#ifdef TC_Z80
1350 /* "af'" is a symbol containing '\''. */
34bca508 1351 if (state == 3 && (ch == 'a' || ch == 'A'))
3c9b82ba
NC
1352 {
1353 state = 16;
1354 PUT (ch);
1355 ch = GET ();
34bca508 1356 if (ch == 'f' || ch == 'F')
3c9b82ba
NC
1357 {
1358 state = 17;
1359 PUT (ch);
1360 break;
1361 }
1362 else
1363 {
1364 state = 9;
536695d0 1365 if (ch == EOF || !IS_SYMBOL_COMPONENT (ch))
3c9b82ba 1366 {
0146fc9d
NC
1367 if (ch != EOF)
1368 UNGET (ch);
3c9b82ba
NC
1369 break;
1370 }
1371 }
1372 }
1373#endif
252b5132
RH
1374 if (state == 3)
1375 state = 9;
1376
1377 /* This is a common case. Quickly copy CH and all the
b1ac4c66 1378 following symbol component or normal characters. */
252b5132
RH
1379 if (to + 1 < toend
1380 && mri_state == NULL
1381#if defined TC_ARM && defined OBJ_ELF
1382 && symver_state == NULL
1383#endif
1384 )
1385 {
1386 char *s;
39a45edc 1387 ptrdiff_t len;
252b5132
RH
1388
1389 for (s = from; s < fromend; s++)
1390 {
1391 int type;
1392
30a2b4ef 1393 ch2 = *(unsigned char *) s;
252b5132
RH
1394 type = lex[ch2];
1395 if (type != 0
1396 && type != LEX_IS_SYMBOL_COMPONENT)
1397 break;
1398 }
204cd129 1399
252b5132 1400 if (s > from)
204cd129
NC
1401 /* Handle the last character normally, for
1402 simplicity. */
1403 --s;
1404
252b5132 1405 len = s - from;
204cd129 1406
252b5132
RH
1407 if (len > (toend - to) - 1)
1408 len = (toend - to) - 1;
204cd129 1409
252b5132
RH
1410 if (len > 0)
1411 {
1412 PUT (ch);
518051dc
BE
1413 memcpy (to, from, len);
1414 to += len;
1415 from += len;
37b75c0c
AM
1416 if (to >= toend)
1417 goto tofull;
252b5132
RH
1418 ch = GET ();
1419 }
1420 }
1421
1422 /* Fall through. */
1423 default:
1424 de_fault:
1425 /* Some relatively `normal' character. */
1426 if (state == 0)
1427 {
9a124774 1428 state = 11; /* Now seeing label definition. */
252b5132
RH
1429 }
1430 else if (state == 1)
1431 {
9a124774 1432 state = 2; /* Ditto. */
252b5132
RH
1433 }
1434 else if (state == 9)
1435 {
2cdb18a7 1436 if (!IS_SYMBOL_COMPONENT (ch))
252b5132
RH
1437 state = 3;
1438 }
1439 else if (state == 10)
1440 {
c5c834aa
AH
1441 if (ch == '\\')
1442 {
1443 /* Special handling for backslash: a backslash may
1444 be the beginning of a formal parameter (of a
1445 macro) following another symbol character, with
1446 whitespace in between. If that is the case, we
1447 output a space before the parameter. Strictly
1448 speaking, correct handling depends upon what the
1449 macro parameter expands into; if the parameter
1450 expands into something which does not start with
1451 an operand character, then we don't want to keep
1452 the space. We don't have enough information to
1453 make the right choice, so here we are making the
1454 choice which is more likely to be correct. */
1740b7b1
NS
1455 if (to + 1 >= toend)
1456 {
1457 /* If we're near the end of the buffer, save the
1458 character for the next time round. Otherwise
1459 we'll lose our state. */
1460 UNGET (ch);
1461 goto tofull;
1462 }
1463 *to++ = ' ';
c5c834aa
AH
1464 }
1465
252b5132
RH
1466 state = 3;
1467 }
1468 PUT (ch);
1469 break;
1470 }
1471 }
1472
1473 /*NOTREACHED*/
1474
1475 fromeof:
1476 /* We have reached the end of the input. */
ab1fadc6
AM
1477#ifdef TC_ARM
1478 if (to > tostart)
1479 last_char = to[-1];
1480#endif
252b5132
RH
1481 return to - tostart;
1482
1483 tofull:
1484 /* The output buffer is full. Save any input we have not yet
1485 processed. */
1486 if (fromend > from)
1487 {
2b47531b 1488 saved_input = from;
252b5132
RH
1489 saved_input_len = fromend - from;
1490 }
1491 else
2b47531b
ILT
1492 saved_input = NULL;
1493
ab1fadc6
AM
1494#ifdef TC_ARM
1495 if (to > tostart)
1496 last_char = to[-1];
1497#endif
252b5132
RH
1498 return to - tostart;
1499}
This page took 0.949927 seconds and 4 git commands to generate.