Details of more fixes to earlier fixes
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
fecd2382 1/* This is the Assembler Pre-Processor
5a051773 2 Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
6efd877d 3
a39116f1 4 This file is part of GAS, the GNU Assembler.
6efd877d 5
a39116f1
RP
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
6efd877d 10
a39116f1
RP
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
6efd877d 15
a39116f1
RP
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
a2a5a4fa 18 the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
fecd2382 19
58d4951d 20/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
fecd2382
RP
21/* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
9a7d824a 23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
a2a5a4fa 24 pair. This needs better error-handling. */
fecd2382
RP
25
26#include <stdio.h>
6efd877d 27#include "as.h" /* For BAD_CASE() only */
fecd2382 28
5a051773
SS
29#if (__STDC__ != 1)
30#ifndef const
31#define const /* empty */
32#endif
fecd2382
RP
33#endif
34
6efd877d 35static char lex[256];
6d331d71 36static const char symbol_chars[] =
6efd877d 37"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382
RP
38
39#define LEX_IS_SYMBOL_COMPONENT 1
40#define LEX_IS_WHITESPACE 2
41#define LEX_IS_LINE_SEPARATOR 3
42#define LEX_IS_COMMENT_START 4
43#define LEX_IS_LINE_COMMENT_START 5
44#define LEX_IS_TWOCHAR_COMMENT_1ST 6
45#define LEX_IS_TWOCHAR_COMMENT_2ND 7
46#define LEX_IS_STRINGQUOTE 8
47#define LEX_IS_COLON 9
48#define LEX_IS_NEWLINE 10
49#define LEX_IS_ONECHAR_QUOTE 11
a39116f1
RP
50#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
51#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
52#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
53#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
54#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
55#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
56
385ce433
JL
57static int process_escape PARAMS ((int));
58
a39116f1
RP
59/* FIXME-soon: The entire lexer/parser thingy should be
60 built statically at compile time rather than dynamically
61 each and every time the assembler is run. xoxorich. */
fecd2382 62
6efd877d
KR
63void
64do_scrub_begin ()
65{
66 const char *p;
67
68 lex[' '] = LEX_IS_WHITESPACE;
69 lex['\t'] = LEX_IS_WHITESPACE;
70 lex['\n'] = LEX_IS_NEWLINE;
71 lex[';'] = LEX_IS_LINE_SEPARATOR;
6efd877d 72 lex[':'] = LEX_IS_COLON;
7c2d4011 73
7172e226
ILT
74 if (! flag_mri)
75 {
76 lex['"'] = LEX_IS_STRINGQUOTE;
be06bdcd 77
7172e226
ILT
78#ifndef TC_HPPA
79 lex['\''] = LEX_IS_ONECHAR_QUOTE;
80#endif
be06bdcd
SC
81
82#ifdef SINGLE_QUOTE_STRINGS
7172e226 83 lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011 84#endif
7172e226 85 }
be06bdcd 86
86038ada
ILT
87 /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop
88 in state 5 of do_scrub_chars must be changed. */
89
a2a5a4fa
KR
90 /* Note that these override the previous defaults, e.g. if ';' is a
91 comment char, then it isn't a line separator. */
6efd877d
KR
92 for (p = symbol_chars; *p; ++p)
93 {
58d4951d 94 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
6efd877d
KR
95 } /* declare symbol characters */
96
6efd877d
KR
97 for (p = comment_chars; *p; p++)
98 {
58d4951d 99 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
6efd877d
KR
100 } /* declare comment chars */
101
9a7d824a
ILT
102 for (p = line_comment_chars; *p; p++)
103 {
58d4951d 104 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
9a7d824a
ILT
105 } /* declare line comment chars */
106
6efd877d
KR
107 for (p = line_separator_chars; *p; p++)
108 {
58d4951d 109 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
6efd877d
KR
110 } /* declare line separators */
111
112 /* Only allow slash-star comments if slash is not in use */
113 if (lex['/'] == 0)
114 {
115 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
116 }
a2a5a4fa
KR
117 /* FIXME-soon. This is a bad hack but otherwise, we can't do
118 c-style comments when '/' is a line comment char. xoxorich. */
6efd877d
KR
119 if (lex['*'] == 0)
120 {
121 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
122 }
7172e226
ILT
123
124 if (flag_mri)
125 {
126 lex['\''] = LEX_IS_STRINGQUOTE;
127 lex[';'] = LEX_IS_COMMENT_START;
128 lex['*'] = LEX_IS_LINE_COMMENT_START;
129 /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but
130 then it can't be used in an expression. */
131 lex['!'] = LEX_IS_LINE_COMMENT_START;
132 }
6efd877d 133} /* do_scrub_begin() */
fecd2382 134
fecd2382
RP
135/* Saved state of the scrubber */
136static int state;
137static int old_state;
138static char *out_string;
139static char out_buf[20];
86038ada
ILT
140static int add_newlines;
141static char *saved_input;
142static int saved_input_len;
fecd2382
RP
143
144/* Data structure for saving the state of app across #include's. Note that
145 app is called asynchronously to the parsing of the .include's, so our
146 state at the time .include is interpreted is completely unrelated.
147 That's why we have to save it all. */
148
6efd877d
KR
149struct app_save
150 {
151 int state;
152 int old_state;
153 char *out_string;
154 char out_buf[sizeof (out_buf)];
155 int add_newlines;
86038ada
ILT
156 char *saved_input;
157 int saved_input_len;
6efd877d
KR
158 };
159
160char *
161app_push ()
162{
7c2d4011
SC
163 register struct app_save *saved;
164
6efd877d
KR
165 saved = (struct app_save *) xmalloc (sizeof (*saved));
166 saved->state = state;
167 saved->old_state = old_state;
168 saved->out_string = out_string;
58d4951d 169 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
6efd877d 170 saved->add_newlines = add_newlines;
86038ada
ILT
171 saved->saved_input = saved_input;
172 saved->saved_input_len = saved_input_len;
7c2d4011
SC
173
174 /* do_scrub_begin() is not useful, just wastes time. */
86038ada
ILT
175
176 state = 0;
177 saved_input = NULL;
178
6efd877d 179 return (char *) saved;
fecd2382
RP
180}
181
6efd877d
KR
182void
183app_pop (arg)
184 char *arg;
fecd2382 185{
6efd877d
KR
186 register struct app_save *saved = (struct app_save *) arg;
187
188 /* There is no do_scrub_end (). */
189 state = saved->state;
190 old_state = saved->old_state;
191 out_string = saved->out_string;
58d4951d 192 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
6efd877d 193 add_newlines = saved->add_newlines;
86038ada
ILT
194 saved_input = saved->saved_input;
195 saved_input_len = saved->saved_input_len;
6efd877d
KR
196
197 free (arg);
198} /* app_pop() */
199
6d331d71
KR
200/* @@ This assumes that \n &c are the same on host and target. This is not
201 necessarily true. */
385ce433 202static int
6efd877d 203process_escape (ch)
385ce433 204 int ch;
7c2d4011 205{
6efd877d
KR
206 switch (ch)
207 {
208 case 'b':
209 return '\b';
210 case 'f':
211 return '\f';
212 case 'n':
213 return '\n';
214 case 'r':
215 return '\r';
216 case 't':
217 return '\t';
218 case '\'':
219 return '\'';
220 case '"':
6d331d71 221 return '\"';
6efd877d
KR
222 default:
223 return ch;
224 }
7c2d4011 225}
86038ada
ILT
226
227/* This function is called to process input characters. The GET
228 parameter is used to retrieve more input characters. GET should
229 set its parameter to point to a buffer, and return the length of
230 the buffer; it should return 0 at end of file. The scrubbed output
231 characters are put into the buffer starting at TOSTART; the TOSTART
232 buffer is TOLEN bytes in length. The function returns the number
233 of scrubbed characters put into TOSTART. This will be TOLEN unless
234 end of file was seen. This function is arranged as a state
235 machine, and saves its state so that it may return at any point.
236 This is the way the old code used to work. */
237
238int
239do_scrub_chars (get, tostart, tolen)
240 int (*get) PARAMS ((char **));
241 char *tostart;
242 int tolen;
fecd2382 243{
86038ada
ILT
244 char *to = tostart;
245 char *toend = tostart + tolen;
246 char *from;
247 char *fromend;
248 int fromlen;
249 register int ch, ch2 = 0;
250 int not_cpp_line = 0;
251
6efd877d 252 /*State 0: beginning of normal line
a39116f1
RP
253 1: After first whitespace on line (flush more white)
254 2: After first non-white (opcode) on line (keep 1white)
255 3: after second white on line (into operands) (flush white)
256 4: after putting out a .line, put out digits
257 5: parsing a string, then go to old-state
258 6: putting out \ escape in a "d string.
9a7d824a
ILT
259 7: After putting out a .appfile, put out string.
260 8: After putting out a .appfile string, flush until newline.
f6a91cc0 261 9: After seeing symbol char in state 3 (keep 1white after symchar)
9a7d824a 262 10: After seeing whitespace in state 9 (keep white before symchar)
a2a5a4fa
KR
263 11: After seeing a symbol character in state 0 (eg a label definition)
264 -1: output string in out_string and go to the state in old_state
265 -2: flush text until a '*' '/' is seen, then go to state old_state
a39116f1 266 */
6efd877d 267
9a7d824a
ILT
268 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
269 constructs like ``.loc 1 20''. This was turning into ``.loc
270 120''. States 9 and 10 ensure that a space is never dropped in
271 between characters which could appear in a identifier. Ian
a2a5a4fa
KR
272 Taylor, ian@cygnus.com.
273
274 I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works
275 correctly on the PA (and any other target where colons are optional).
276 Jeff Law, law@cs.utah.edu. */
277
86038ada
ILT
278 /* This macro gets the next input character. */
279
280#define GET() \
281 (from < fromend \
282 ? *from++ \
283 : ((saved_input != NULL \
284 ? (free (saved_input), \
285 saved_input = NULL, \
286 0) \
287 : 0), \
288 fromlen = (*get) (&from), \
289 fromend = from + fromlen, \
290 (fromlen == 0 \
291 ? EOF \
292 : *from++)))
293
294 /* This macro pushes a character back on the input stream. */
295
296#define UNGET(uch) (*--from = (uch))
297
298 /* This macro puts a character into the output buffer. If this
299 character fills the output buffer, this macro jumps to the label
300 TOFULL. We use this rather ugly approach because we need to
301 handle two different termination conditions: EOF on the input
302 stream, and a full output buffer. It would be simpler if we
303 always read in the entire input stream before processing it, but
304 I don't want to make such a significant change to the assembler's
305 memory usage. */
306
307#define PUT(pch) \
308 do \
309 { \
310 *to++ = (pch); \
311 if (to >= toend) \
312 goto tofull; \
313 } \
314 while (0)
315
316 if (saved_input != NULL)
6efd877d 317 {
86038ada
ILT
318 from = saved_input;
319 fromend = from + saved_input_len;
320 }
321 else
322 {
323 fromlen = (*get) (&from);
324 if (fromlen == 0)
325 return 0;
326 fromend = from + fromlen;
327 }
6efd877d 328
86038ada
ILT
329 while (1)
330 {
331 /* The cases in this switch end with continue, in order to
332 branch back to the top of this while loop and generate the
333 next output character in the appropriate state. */
334 switch (state)
6efd877d 335 {
86038ada
ILT
336 case -1:
337 ch = *out_string++;
338 if (*out_string == '\0')
6efd877d 339 {
86038ada
ILT
340 state = old_state;
341 old_state = 3;
6efd877d 342 }
86038ada
ILT
343 PUT (ch);
344 continue;
6efd877d 345
86038ada
ILT
346 case -2:
347 for (;;)
6efd877d 348 {
86038ada
ILT
349 do
350 {
351 ch = GET ();
6efd877d 352
86038ada
ILT
353 if (ch == EOF)
354 {
355 as_warn ("end of file in comment");
356 goto fromeof;
357 }
358
359 if (ch == '\n')
360 PUT ('\n');
361 }
362 while (ch != '*');
363
364 while ((ch = GET ()) == '*')
365 ;
366
367 if (ch == EOF)
368 {
369 as_warn ("end of file in comment");
370 goto fromeof;
371 }
372
373 if (ch == '/')
374 break;
375
376 UNGET (ch);
6efd877d 377 }
86038ada
ILT
378
379 state = old_state;
380 PUT (' ');
381 continue;
382
383 case 4:
384 ch = GET ();
385 if (ch == EOF)
386 goto fromeof;
387 else if (ch >= '0' && ch <= '9')
388 PUT (ch);
6efd877d
KR
389 else
390 {
86038ada 391 while (ch != EOF && IS_WHITESPACE (ch))
a2a5a4fa 392 ch = GET ();
86038ada
ILT
393 if (ch == '"')
394 {
395 UNGET (ch);
396 out_string = "\n\t.appfile ";
397 old_state = 7;
398 state = -1;
399 PUT (*out_string++);
400 }
401 else
402 {
403 while (ch != EOF && ch != '\n')
404 ch = GET ();
405 state = 0;
406 PUT (ch);
407 }
6efd877d 408 }
86038ada 409 continue;
6efd877d 410
86038ada
ILT
411 case 5:
412 /* We are going to copy everything up to a quote character,
413 with special handling for a backslash. We try to
414 optimize the copying in the simple case without using the
415 GET and PUT macros. */
416 {
417 char *s;
418 int len;
419
420 for (s = from; s < fromend; s++)
421 {
422 ch = *s;
423 /* This condition must be changed if the type of any
424 other character can be LEX_IS_STRINGQUOTE. */
425 if (ch == '\\'
426 || ch == '"'
427 || ch == '\''
428 || ch == '\n')
429 break;
430 }
431 len = s - from;
432 if (len > toend - to)
433 len = toend - to;
434 if (len > 0)
435 {
436 memcpy (to, from, len);
437 to += len;
438 from += len;
439 }
440 }
441
442 ch = GET ();
443 if (ch == EOF)
444 {
445 as_warn ("end of file in string: inserted '\"'");
446 state = old_state;
447 UNGET ('\n');
448 PUT ('"');
449 }
450 else if (lex[ch] == LEX_IS_STRINGQUOTE)
451 {
452 state = old_state;
453 PUT (ch);
454 }
a2a5a4fa 455#ifndef NO_STRING_ESCAPES
86038ada
ILT
456 else if (ch == '\\')
457 {
458 state = 6;
459 PUT (ch);
460 }
a2a5a4fa 461#endif
86038ada
ILT
462 else if (flag_mri && ch == '\n')
463 {
464 /* Just quietly terminate the string. This permits lines like
465 bne label loop if we haven't reach end yet
466 */
467 state = old_state;
468 UNGET (ch);
469 PUT ('\'');
470 }
471 else
472 {
473 PUT (ch);
474 }
475 continue;
6efd877d 476
86038ada
ILT
477 case 6:
478 state = 5;
479 ch = GET ();
480 switch (ch)
481 {
482 /* Handle strings broken across lines, by turning '\n' into
483 '\\' and 'n'. */
484 case '\n':
485 UNGET ('n');
486 add_newlines++;
487 PUT ('\\');
488 continue;
489
490 case '"':
491 case '\\':
492 case 'b':
493 case 'f':
494 case 'n':
495 case 'r':
496 case 't':
497 case 'v':
498 case 'x':
499 case 'X':
500 case '0':
501 case '1':
502 case '2':
503 case '3':
504 case '4':
505 case '5':
506 case '6':
507 case '7':
508 break;
7c2d4011 509#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
86038ada
ILT
510 default:
511 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
512 break;
513#else /* ONLY_STANDARD_ESCAPES */
514 default:
515 /* Accept \x as x for any x */
516 break;
fecd2382 517#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011 518
86038ada
ILT
519 case EOF:
520 as_warn ("End of file in string: '\"' inserted");
521 PUT ('"');
522 continue;
523 }
524 PUT (ch);
525 continue;
6efd877d 526
86038ada
ILT
527 case 7:
528 ch = GET ();
529 state = 5;
530 old_state = 8;
531 if (ch == EOF)
532 goto fromeof;
533 PUT (ch);
534 continue;
6efd877d 535
86038ada
ILT
536 case 8:
537 do
538 ch = GET ();
539 while (ch != '\n' && ch != EOF);
540 if (ch == EOF)
541 goto fromeof;
a2a5a4fa 542 state = 0;
86038ada
ILT
543 PUT (ch);
544 continue;
a2a5a4fa 545 }
6efd877d 546
86038ada 547 /* OK, we are somewhere in states 0 through 4 or 9 through 11 */
6efd877d 548
86038ada
ILT
549 /* flushchar: */
550 ch = GET ();
551 recycle:
552 if (ch == EOF)
6efd877d 553 {
86038ada
ILT
554 if (state != 0)
555 {
556 as_warn ("end of file not at end of a line; newline inserted");
557 state = 0;
558 PUT ('\n');
559 }
560 goto fromeof;
fecd2382 561 }
6efd877d 562
86038ada 563 switch (lex[ch])
6efd877d 564 {
86038ada
ILT
565 case LEX_IS_WHITESPACE:
566 if (state == 0)
567 {
568 /* Preserve a single whitespace character at the
569 beginning of a line. */
570 state = 1;
571 PUT (ch);
572 break;
573 }
574 do
575 {
576 ch = GET ();
577 }
578 while (ch != EOF && IS_WHITESPACE (ch));
579 if (ch == EOF)
580 goto fromeof;
6efd877d 581
86038ada
ILT
582 if (IS_COMMENT (ch)
583 || (state == 0 && IS_LINE_COMMENT (ch))
584 || ch == '/'
585 || IS_LINE_SEPARATOR (ch))
586 {
587 /* cpp never outputs a leading space before the #, so
588 try to avoid being confused. */
589 not_cpp_line = 1;
92a25e12
ILT
590 if (flag_mri)
591 {
592 /* In MRI mode, we keep these spaces. */
593 UNGET (ch);
594 PUT (' ');
595 break;
596 }
86038ada
ILT
597 goto recycle;
598 }
6efd877d 599
86038ada
ILT
600 /* If we're in state 2 or 11, we've seen a non-white
601 character followed by whitespace. If the next character
602 is ':', this is whitespace after a label name which we
603 normally must ignore. In MRI mode, though, spaces are
604 not permitted between the label and the colon. */
605 if ((state == 2 || state == 11)
606 && lex[ch] == LEX_IS_COLON
607 && ! flag_mri)
6efd877d 608 {
86038ada
ILT
609 state = 1;
610 PUT (ch);
611 break;
612 }
613
614 switch (state)
615 {
616 case 0:
617 state++;
618 goto recycle; /* Punted leading sp */
619 case 1:
620 /* We can arrive here if we leave a leading whitespace
621 character at the beginning of a line. */
622 goto recycle;
623 case 2:
624 state = 3;
625 if (to + 1 < toend)
6efd877d 626 {
86038ada
ILT
627 /* Optimize common case by skipping UNGET/GET. */
628 PUT (' '); /* Sp after opco */
629 goto recycle;
fecd2382 630 }
86038ada
ILT
631 UNGET (ch);
632 PUT (' ');
633 break;
634 case 3:
635 if (flag_mri)
636 {
637 /* In MRI mode, we keep these spaces. */
638 UNGET (ch);
639 PUT (' ');
640 break;
641 }
642 goto recycle; /* Sp in operands */
643 case 9:
644 case 10:
645 if (flag_mri)
646 {
647 /* In MRI mode, we keep these spaces. */
648 state = 3;
649 UNGET (ch);
650 PUT (' ');
651 break;
652 }
653 state = 10; /* Sp after symbol char */
654 goto recycle;
655 case 11:
656 state = 1;
657 UNGET (ch);
658 PUT (' '); /* Sp after label definition. */
659 break;
660 default:
661 BAD_CASE (state);
662 }
663 break;
6efd877d 664
86038ada
ILT
665 case LEX_IS_TWOCHAR_COMMENT_1ST:
666 ch2 = GET ();
667 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
668 {
669 for (;;)
6efd877d 670 {
86038ada
ILT
671 do
672 {
673 ch2 = GET ();
674 if (ch2 != EOF && IS_NEWLINE (ch2))
675 add_newlines++;
676 }
677 while (ch2 != EOF &&
678 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
679
680 while (ch2 != EOF &&
681 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
682 {
683 ch2 = GET ();
684 }
685
686 if (ch2 == EOF
687 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
688 break;
689 UNGET (ch);
fecd2382 690 }
86038ada
ILT
691 if (ch2 == EOF)
692 as_warn ("end of file in multiline comment");
6efd877d 693
86038ada
ILT
694 ch = ' ';
695 goto recycle;
6efd877d 696 }
86038ada
ILT
697 else
698 {
699 if (ch2 != EOF)
700 UNGET (ch2);
701 if (state == 9 || state == 10)
702 state = 3;
703 PUT (ch);
704 }
705 break;
6efd877d 706
86038ada
ILT
707 case LEX_IS_STRINGQUOTE:
708 if (state == 10)
709 {
710 /* Preserve the whitespace in foo "bar" */
711 UNGET (ch);
712 state = 3;
713 PUT (' ');
714
715 /* PUT didn't jump out. We could just break, but we
716 know what will happen, so optimize a bit. */
717 ch = GET ();
718 old_state = 3;
719 }
720 else if (state == 9)
721 old_state = 3;
722 else
723 old_state = state;
724 state = 5;
725 PUT (ch);
726 break;
6efd877d 727
a39116f1 728#ifndef IEEE_STYLE
86038ada
ILT
729 case LEX_IS_ONECHAR_QUOTE:
730 if (state == 10)
731 {
732 /* Preserve the whitespace in foo 'b' */
733 UNGET (ch);
734 state = 3;
735 PUT (' ');
736 break;
737 }
a2a5a4fa 738 ch = GET ();
86038ada
ILT
739 if (ch == EOF)
740 {
741 as_warn ("end of file after a one-character quote; \\0 inserted");
742 ch = 0;
743 }
744 if (ch == '\\')
745 {
746 ch = GET ();
747 if (ch == EOF)
748 {
749 as_warn ("end of file in escape character");
750 ch = '\\';
751 }
752 else
753 ch = process_escape (ch);
754 }
755 sprintf (out_buf, "%d", (int) (unsigned char) ch);
6efd877d 756
86038ada
ILT
757 /* None of these 'x constants for us. We want 'x'. */
758 if ((ch = GET ()) != '\'')
759 {
fecd2382 760#ifdef REQUIRE_CHAR_CLOSE_QUOTE
86038ada 761 as_warn ("Missing close quote: (assumed)");
fecd2382 762#else
86038ada
ILT
763 if (ch != EOF)
764 UNGET (ch);
fecd2382 765#endif
86038ada
ILT
766 }
767 if (strlen (out_buf) == 1)
768 {
769 PUT (out_buf[0]);
770 break;
771 }
772 if (state == 9)
773 old_state = 3;
774 else
775 old_state = state;
776 state = -1;
777 out_string = out_buf;
778 PUT (*out_string++);
779 break;
a39116f1 780#endif
6efd877d 781
86038ada
ILT
782 case LEX_IS_COLON:
783 if (state == 9 || state == 10)
784 state = 3;
785 else if (state != 3)
786 state = 1;
787 PUT (ch);
788 break;
6efd877d 789
86038ada
ILT
790 case LEX_IS_NEWLINE:
791 /* Roll out a bunch of newlines from inside comments, etc. */
792 if (add_newlines)
793 {
794 --add_newlines;
795 UNGET (ch);
796 }
797 /* fall thru into... */
6efd877d 798
86038ada
ILT
799 case LEX_IS_LINE_SEPARATOR:
800 state = 0;
801 PUT (ch);
802 break;
803
804 case LEX_IS_LINE_COMMENT_START:
805 if (state == 0) /* Only comment at start of line. */
806 {
807 /* FIXME-someday: The two character comment stuff was
808 badly thought out. On i386, we want '/' as line
809 comment start AND we want C style comments. hence
810 this hack. The whole lexical process should be
811 reworked. xoxorich. */
812 if (ch == '/')
813 {
814 ch2 = GET ();
815 if (ch2 == '*')
816 {
817 state = -2;
818 break;
819 }
820 else
821 {
822 UNGET (ch2);
823 }
824 } /* bad hack */
825
826 if (ch != '#')
827 not_cpp_line = 1;
828
829 do
9a7d824a 830 {
86038ada 831 ch = GET ();
9a7d824a 832 }
86038ada
ILT
833 while (ch != EOF && IS_WHITESPACE (ch));
834 if (ch == EOF)
9a7d824a 835 {
86038ada
ILT
836 as_warn ("end of file in comment; newline inserted");
837 PUT ('\n');
838 break;
9a7d824a 839 }
86038ada
ILT
840 if (ch < '0' || ch > '9' || not_cpp_line)
841 {
842 /* Non-numerics: Eat whole comment line */
843 while (ch != EOF && !IS_NEWLINE (ch))
844 ch = GET ();
845 if (ch == EOF)
846 as_warn ("EOF in Comment: Newline inserted");
847 state = 0;
848 PUT ('\n');
849 break;
850 }
851 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
852 UNGET (ch);
853 old_state = 4;
854 state = -1;
855 out_string = "\t.appline ";
856 PUT (*out_string++);
857 break;
858 }
385ce433 859
86038ada
ILT
860 /* We have a line comment character which is not at the
861 start of a line. If this is also a normal comment
862 character, fall through. Otherwise treat it as a default
863 character. */
864 if (strchr (comment_chars, ch) == NULL
865 && (! flag_mri
866 || (ch != '!' && ch != '*')))
867 goto de_fault;
868 if (flag_mri
869 && (ch == '!' || ch == '*')
870 && state != 1
871 && state != 10)
872 goto de_fault;
873 /* Fall through. */
874 case LEX_IS_COMMENT_START:
9a7d824a 875 do
9a7d824a 876 {
86038ada 877 ch = GET ();
9a7d824a 878 }
86038ada
ILT
879 while (ch != EOF && !IS_NEWLINE (ch));
880 if (ch == EOF)
881 as_warn ("end of file in comment; newline inserted");
882 state = 0;
883 PUT ('\n');
884 break;
885
886 case LEX_IS_SYMBOL_COMPONENT:
887 if (state == 10)
9a7d824a 888 {
86038ada
ILT
889 /* This is a symbol character following another symbol
890 character, with whitespace in between. We skipped
891 the whitespace earlier, so output it now. */
892 UNGET (ch);
893 state = 3;
894 PUT (' ');
895 break;
9a7d824a 896 }
6efd877d 897
86038ada
ILT
898 if (state == 3)
899 state = 9;
6efd877d 900
86038ada
ILT
901 /* This is a common case. Quickly copy CH and all the
902 following symbol component or normal characters. */
903 if (to + 1 < toend)
904 {
905 char *s;
906 int len;
a2a5a4fa 907
86038ada
ILT
908 for (s = from; s < fromend; s++)
909 {
910 int type;
fecd2382 911
86038ada
ILT
912 ch2 = *s;
913 type = lex[ch2];
914 if (type != 0
915 && type != LEX_IS_SYMBOL_COMPONENT)
916 break;
917 }
918 if (s > from)
919 {
920 /* Handle the last character normally, for
921 simplicity. */
922 --s;
923 }
924 len = s - from;
925 if (len > (toend - to) - 1)
926 len = (toend - to) - 1;
927 if (len > 0)
928 {
929 PUT (ch);
930 if (len > 8)
931 {
932 memcpy (to, from, len);
933 to += len;
934 from += len;
935 }
936 else
937 {
938 switch (len)
939 {
940 case 8: *to++ = *from++;
941 case 7: *to++ = *from++;
942 case 6: *to++ = *from++;
943 case 5: *to++ = *from++;
944 case 4: *to++ = *from++;
945 case 3: *to++ = *from++;
946 case 2: *to++ = *from++;
947 case 1: *to++ = *from++;
948 }
949 }
950 ch = GET ();
951 }
952 }
fecd2382 953
86038ada
ILT
954 /* Fall through. */
955 default:
956 de_fault:
957 /* Some relatively `normal' character. */
958 if (state == 0)
959 {
960 state = 11; /* Now seeing label definition */
961 }
962 else if (state == 1)
963 {
964 state = 2; /* Ditto */
965 }
966 else if (state == 9)
967 {
968 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
969 state = 3;
970 }
971 else if (state == 10)
972 {
973 state = 3;
974 }
975 PUT (ch);
976 break;
977 }
978 }
fecd2382 979
86038ada 980 /*NOTREACHED*/
6efd877d 981
86038ada
ILT
982 fromeof:
983 /* We have reached the end of the input. */
984 return to - tostart;
fecd2382 985
86038ada
ILT
986 tofull:
987 /* The output buffer is full. Save any input we have not yet
988 processed. */
989 if (fromend > from)
990 {
991 char *save;
992
993 save = (char *) xmalloc (fromend - from);
994 memcpy (save, from, fromend - from);
995 if (saved_input != NULL)
996 free (saved_input);
997 saved_input = save;
998 saved_input_len = fromend - from;
999 }
1000 else
1001 {
1002 if (saved_input != NULL)
1003 {
1004 free (saved_input);
1005 saved_input = NULL;
1006 }
1007 }
1008 return to - tostart;
fecd2382 1009}
6efd877d 1010
fecd2382 1011/* end of app.c */
This page took 0.184621 seconds and 4 git commands to generate.