* language.h (PRINT_LITERAL_FORM): New macro that takes character
[deliverable/binutils-gdb.git] / gas / app.c
CommitLineData
3340f7e5 1/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
a39116f1
RP
2
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
4 */
fecd2382
RP
5/* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
a39116f1
RP
7
8 This file is part of GAS, the GNU Assembler.
9
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382
RP
23
24/* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
27 This needs better error-handling.
a39116f1 28 */
fecd2382
RP
29
30#include <stdio.h>
31#include "as.h" /* For BAD_CASE() only */
32
3340f7e5 33#if (__STDC__ != 1) && !defined(const)
fecd2382
RP
34#define const /* Nothing */
35#endif
36
37static char lex [256];
38static char symbol_chars[] =
a39116f1 39 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382
RP
40
41/* These will go in BSS if not defined elsewhere, producing empty strings. */
42extern const char comment_chars[];
43extern const char line_comment_chars[];
44extern const char line_separator_chars[];
45
46#define LEX_IS_SYMBOL_COMPONENT 1
47#define LEX_IS_WHITESPACE 2
48#define LEX_IS_LINE_SEPARATOR 3
49#define LEX_IS_COMMENT_START 4
50#define LEX_IS_LINE_COMMENT_START 5
51#define LEX_IS_TWOCHAR_COMMENT_1ST 6
52#define LEX_IS_TWOCHAR_COMMENT_2ND 7
53#define LEX_IS_STRINGQUOTE 8
54#define LEX_IS_COLON 9
55#define LEX_IS_NEWLINE 10
56#define LEX_IS_ONECHAR_QUOTE 11
a39116f1
RP
57#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
58#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
59#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
60#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
61#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
62#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
63
64/* FIXME-soon: The entire lexer/parser thingy should be
65 built statically at compile time rather than dynamically
66 each and every time the assembler is run. xoxorich. */
fecd2382
RP
67
68void do_scrub_begin() {
69 const char *p;
a39116f1
RP
70
71 lex[' '] = LEX_IS_WHITESPACE;
72 lex['\t'] = LEX_IS_WHITESPACE;
73 lex['\n'] = LEX_IS_NEWLINE;
74 lex[';'] = LEX_IS_LINE_SEPARATOR;
75 lex['"'] = LEX_IS_STRINGQUOTE;
76 lex['\''] = LEX_IS_ONECHAR_QUOTE;
77 lex[':'] = LEX_IS_COLON;
7c2d4011
SC
78
79#ifdef MRI
80 lex['\''] = LEX_IS_STRINGQUOTE;
81#endif
fecd2382
RP
82 /* Note that these override the previous defaults, e.g. if ';'
83 is a comment char, then it isn't a line separator. */
a39116f1 84 for (p = symbol_chars; *p; ++p) {
fecd2382 85 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
a39116f1
RP
86 } /* declare symbol characters */
87
88 for (p = line_comment_chars; *p; p++) {
fecd2382 89 lex[*p] = LEX_IS_LINE_COMMENT_START;
a39116f1
RP
90 } /* declare line comment chars */
91
92 for (p = comment_chars; *p; p++) {
93 lex[*p] = LEX_IS_COMMENT_START;
94 } /* declare comment chars */
95
96 for (p = line_separator_chars; *p; p++) {
fecd2382 97 lex[*p] = LEX_IS_LINE_SEPARATOR;
a39116f1
RP
98 } /* declare line separators */
99
fecd2382
RP
100 /* Only allow slash-star comments if slash is not in use */
101 if (lex['/'] == 0) {
a39116f1 102 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
fecd2382 103 }
a39116f1
RP
104 /* FIXME-soon. This is a bad hack but otherwise, we
105 can't do c-style comments when '/' is a line
106 comment char. xoxorich. */
107 if (lex['*'] == 0) {
108 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
109 }
110} /* do_scrub_begin() */
fecd2382
RP
111
112FILE *scrub_file;
113
114int scrub_from_file() {
115 return getc(scrub_file);
116}
117
118void scrub_to_file(ch)
119int ch;
120{
121 ungetc(ch,scrub_file);
122} /* scrub_to_file() */
123
124char *scrub_string;
125char *scrub_last_string;
126
127int scrub_from_string() {
128 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
129} /* scrub_from_string() */
130
131void scrub_to_string(ch)
132int ch;
133{
134 *--scrub_string=ch;
135} /* scrub_to_string() */
136
137/* Saved state of the scrubber */
138static int state;
139static int old_state;
140static char *out_string;
141static char out_buf[20];
142static int add_newlines = 0;
143
144/* Data structure for saving the state of app across #include's. Note that
145 app is called asynchronously to the parsing of the .include's, so our
146 state at the time .include is interpreted is completely unrelated.
147 That's why we have to save it all. */
148
149struct app_save {
a39116f1
RP
150 int state;
151 int old_state;
152 char *out_string;
153 char out_buf[sizeof (out_buf)];
154 int add_newlines;
155 char *scrub_string;
156 char *scrub_last_string;
157 FILE *scrub_file;
fecd2382
RP
158};
159
160char *app_push() {
7c2d4011
SC
161 register struct app_save *saved;
162
163 saved = (struct app_save *) xmalloc(sizeof (*saved));
164 saved->state = state;
165 saved->old_state = old_state;
166 saved->out_string = out_string;
167 bcopy(saved->out_buf, out_buf, sizeof(out_buf));
168 saved->add_newlines = add_newlines;
169 saved->scrub_string = scrub_string;
170 saved->scrub_last_string = scrub_last_string;
171 saved->scrub_file = scrub_file;
172
173 /* do_scrub_begin() is not useful, just wastes time. */
174 return (char *)saved;
fecd2382
RP
175}
176
177void app_pop(arg)
178char *arg;
179{
a39116f1
RP
180 register struct app_save *saved = (struct app_save *)arg;
181
182 /* There is no do_scrub_end (). */
183 state = saved->state;
184 old_state = saved->old_state;
185 out_string = saved->out_string;
7c2d4011 186 memcpy(saved->out_buf, out_buf, sizeof (out_buf));
a39116f1
RP
187 add_newlines = saved->add_newlines;
188 scrub_string = saved->scrub_string;
189 scrub_last_string = saved->scrub_last_string;
190 scrub_file = saved->scrub_file;
191
192 free (arg);
fecd2382
RP
193} /* app_pop() */
194
7c2d4011
SC
195int process_escape(ch)
196char ch;
197{
198 switch (ch)
199{
200 case 'b':
201 return '\b';
202 case 'f':
203 return '\f';
204 case 'n':
205 return '\n';
206 case 'r':
207 return '\r';
208 case 't':
209 return '\t';
210 case '\'':
211 return '\'';
212 case '"':
213 return '\'';
214 default:
215 return ch;
216 }
217}
fecd2382
RP
218int do_scrub_next_char(get,unget)
219int (*get)();
220void (*unget)();
221{
222 /*State 0: beginning of normal line
a39116f1
RP
223 1: After first whitespace on line (flush more white)
224 2: After first non-white (opcode) on line (keep 1white)
225 3: after second white on line (into operands) (flush white)
226 4: after putting out a .line, put out digits
227 5: parsing a string, then go to old-state
228 6: putting out \ escape in a "d string.
229 7: After putting out a .app-file, put out string.
230 8: After putting out a .app-file string, flush until newline.
231 -1: output string in out_string and go to the state in old_state
232 -2: flush text until a '*' '/' is seen, then go to state old_state
233 */
234
235 register int ch, ch2 = 0;
236
fecd2382
RP
237 switch (state) {
238 case -1:
239 ch= *out_string++;
240 if(*out_string==0) {
241 state=old_state;
242 old_state=3;
243 }
244 return ch;
a39116f1 245
fecd2382
RP
246 case -2:
247 for(;;) {
248 do {
249 ch=(*get)();
250 } while(ch!=EOF && ch!='\n' && ch!='*');
251 if(ch=='\n' || ch==EOF)
a39116f1
RP
252 return ch;
253
fecd2382
RP
254 /* At this point, ch must be a '*' */
255 while ( (ch=(*get)()) == '*' ){
256 ;
257 }
258 if(ch==EOF || ch=='/')
a39116f1 259 break;
fecd2382
RP
260 (*unget)(ch);
261 }
262 state=old_state;
263 return ' ';
a39116f1 264
fecd2382
RP
265 case 4:
266 ch=(*get)();
267 if(ch==EOF || (ch>='0' && ch<='9'))
a39116f1 268 return ch;
fecd2382
RP
269 else {
270 while(ch!=EOF && IS_WHITESPACE(ch))
a39116f1 271 ch=(*get)();
fecd2382
RP
272 if(ch=='"') {
273 (*unget)(ch);
274 out_string="\n.app-file ";
275 old_state=7;
276 state= -1;
277 return *out_string++;
278 } else {
279 while(ch!=EOF && ch!='\n')
a39116f1 280 ch=(*get)();
fecd2382
RP
281 return ch;
282 }
283 }
a39116f1 284
fecd2382
RP
285 case 5:
286 ch=(*get)();
7c2d4011 287 if(lex[ch]==LEX_IS_STRINGQUOTE) {
fecd2382 288 state=old_state;
7c2d4011 289 return ch;
fecd2382
RP
290 } else if(ch=='\\') {
291 state=6;
292 return ch;
293 } else if(ch==EOF) {
294 as_warn("End of file in string: inserted '\"'");
295 state=old_state;
296 (*unget)('\n');
297 return '"';
298 } else {
299 return ch;
300 }
a39116f1 301
fecd2382
RP
302 case 6:
303 state=5;
304 ch=(*get)();
305 switch(ch) {
306 /* This is neet. Turn "string
307 more string" into "string\n more string"
a39116f1 308 */
fecd2382
RP
309 case '\n':
310 (*unget)('n');
311 add_newlines++;
312 return '\\';
a39116f1 313
fecd2382
RP
314 case '"':
315 case '\\':
316 case 'b':
317 case 'f':
318 case 'n':
319 case 'r':
320 case 't':
321#ifdef BACKSLASH_V
322 case 'v':
323#endif /* BACKSLASH_V */
324 case '0':
325 case '1':
326 case '2':
327 case '3':
328 case '4':
329 case '5':
330 case '6':
331 case '7':
332 break;
7c2d4011 333#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
fecd2382
RP
334 default:
335 as_warn("Unknown escape '\\%c' in string: Ignored",ch);
336 break;
337#else /* ONLY_STANDARD_ESCAPES */
338 default:
339 /* Accept \x as x for any x */
340 break;
341#endif /* ONLY_STANDARD_ESCAPES */
a39116f1 342
fecd2382
RP
343 case EOF:
344 as_warn("End of file in string: '\"' inserted");
345 return '"';
346 }
347 return ch;
a39116f1 348
fecd2382
RP
349 case 7:
350 ch=(*get)();
351 state=5;
352 old_state=8;
353 return ch;
a39116f1 354
fecd2382
RP
355 case 8:
356 do ch= (*get)();
357 while(ch!='\n');
358 state=0;
359 return ch;
360 }
7c2d4011 361
fecd2382 362 /* OK, we are somewhere in states 0 through 4 */
7c2d4011
SC
363
364/* flushchar: */
fecd2382
RP
365 ch=(*get)();
366 recycle:
367 if (ch == EOF) {
368 if (state != 0)
7c2d4011 369 as_warn("End of file not at end of a line: Newline inserted.");
fecd2382
RP
370 return ch;
371 }
7c2d4011 372
fecd2382
RP
373 switch (lex[ch]) {
374 case LEX_IS_WHITESPACE:
375 do ch=(*get)();
376 while(ch!=EOF && IS_WHITESPACE(ch));
377 if(ch==EOF)
7c2d4011
SC
378 return ch;
379
fecd2382
RP
380 if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
381 goto recycle;
382 }
7c2d4011
SC
383#ifdef MRI
384 (*unget)(ch); /* Put back */
385 return ' '; /* Always return one space at start of line */
386#endif
fecd2382
RP
387 switch (state) {
388 case 0: state++; goto recycle; /* Punted leading sp */
389 case 1: BAD_CASE(state); /* We can't get here */
390 case 2: state++; (*unget)(ch); return ' '; /* Sp after opco */
391 case 3: goto recycle; /* Sp in operands */
392 default: BAD_CASE(state);
393 }
394 break;
a39116f1 395
fecd2382
RP
396 case LEX_IS_TWOCHAR_COMMENT_1ST:
397 ch2=(*get)();
398 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
399 for(;;) {
400 do {
401 ch2=(*get)();
402 if(ch2 != EOF && IS_NEWLINE(ch2))
7c2d4011 403 add_newlines++;
fecd2382 404 } while(ch2!=EOF &&
a39116f1
RP
405 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
406
fecd2382 407 while (ch2!=EOF &&
a39116f1 408 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
fecd2382
RP
409 ch2=(*get)();
410 }
a39116f1 411
fecd2382 412 if(ch2==EOF
7c2d4011
SC
413 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
414 break;
fecd2382
RP
415 (*unget)(ch);
416 }
417 if(ch2==EOF)
7c2d4011
SC
418 as_warn("End of file in multiline comment");
419
fecd2382
RP
420 ch = ' ';
421 goto recycle;
422 } else {
423 if(ch2!=EOF)
a39116f1 424 (*unget)(ch2);
fecd2382
RP
425 return ch;
426 }
427 break;
a39116f1 428
fecd2382
RP
429 case LEX_IS_STRINGQUOTE:
430 old_state=state;
431 state=5;
432 return ch;
7c2d4011 433#ifndef MRI
a39116f1 434#ifndef IEEE_STYLE
fecd2382
RP
435 case LEX_IS_ONECHAR_QUOTE:
436 ch=(*get)();
437 if(ch==EOF) {
438 as_warn("End-of-file after a one-character quote; \000 inserted");
439 ch=0;
440 }
7c2d4011
SC
441 if (ch == '\\') {
442 ch = (*get)();
443 ch = process_escape(ch);
444 }
fecd2382 445 sprintf(out_buf,"%d", (int)(unsigned char)ch);
7c2d4011 446
fecd2382
RP
447 /* None of these 'x constants for us. We want 'x'.
448 */
449 if ( (ch=(*get)()) != '\'' ) {
450#ifdef REQUIRE_CHAR_CLOSE_QUOTE
451 as_warn("Missing close quote: (assumed)");
452#else
453 (*unget)(ch);
454#endif
455 }
a39116f1 456
fecd2382
RP
457 old_state=state;
458 state= -1;
459 out_string=out_buf;
460 return *out_string++;
7c2d4011 461#endif
a39116f1 462#endif
fecd2382
RP
463 case LEX_IS_COLON:
464 if(state!=3)
7c2d4011 465 state=0;
fecd2382 466 return ch;
7c2d4011 467
fecd2382
RP
468 case LEX_IS_NEWLINE:
469 /* Roll out a bunch of newlines from inside comments, etc. */
470 if(add_newlines) {
471 --add_newlines;
472 (*unget)(ch);
473 }
474 /* fall thru into... */
a39116f1 475
fecd2382
RP
476 case LEX_IS_LINE_SEPARATOR:
477 state=0;
478 return ch;
a39116f1 479
fecd2382
RP
480 case LEX_IS_LINE_COMMENT_START:
481 if (state != 0) /* Not at start of line, act normal */
7c2d4011
SC
482 goto de_fault;
483
484 /* FIXME-someday: The two character comment stuff was badly
485 thought out. On i386, we want '/' as line comment start
486 AND we want C style comments. hence this hack. The
487 whole lexical process should be reworked. xoxorich. */
488
a39116f1
RP
489 if (ch == '/' && (ch2 = (*get)()) == '*') {
490 state = -2;
491 return(do_scrub_next_char(get, unget));
492 } else {
493 (*unget)(ch2);
494 } /* bad hack */
495
fecd2382
RP
496 do ch=(*get)();
497 while(ch!=EOF && IS_WHITESPACE(ch));
498 if(ch==EOF) {
499 as_warn("EOF in comment: Newline inserted");
500 return '\n';
501 }
502 if(ch<'0' || ch>'9') {
503 /* Non-numerics: Eat whole comment line */
504 while(ch!=EOF && !IS_NEWLINE(ch))
a39116f1 505 ch=(*get)();
fecd2382 506 if(ch==EOF)
a39116f1 507 as_warn("EOF in Comment: Newline inserted");
fecd2382
RP
508 state=0;
509 return '\n';
510 }
511 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
512 (*unget)(ch);
513 old_state=4;
514 state= -1;
515 out_string=".line ";
516 return *out_string++;
a39116f1 517
fecd2382
RP
518 case LEX_IS_COMMENT_START:
519 do ch=(*get)();
520 while(ch!=EOF && !IS_NEWLINE(ch));
521 if(ch==EOF)
a39116f1 522 as_warn("EOF in comment: Newline inserted");
fecd2382
RP
523 state=0;
524 return '\n';
a39116f1 525
fecd2382
RP
526 default:
527 de_fault:
528 /* Some relatively `normal' character. */
529 if(state==0) {
530 state=2; /* Now seeing opcode */
531 return ch;
532 } else if(state==1) {
533 state=2; /* Ditto */
534 return ch;
535 } else {
536 return ch; /* Opcode or operands already */
537 }
538 }
539 return -1;
540}
541
542#ifdef TEST
543
544char comment_chars[] = "|";
545char line_comment_chars[] = "#";
546
547main()
548{
549 int ch;
a39116f1 550
fecd2382
RP
551 app_begin();
552 while((ch=do_scrub_next_char(stdin))!=EOF)
a39116f1 553 putc(ch,stdout);
fecd2382
RP
554}
555
556as_warn(str)
557char *str;
558{
559 fputs(str,stderr);
560 putc('\n',stderr);
561}
562#endif
563
564/*
565 * Local Variables:
566 * comment-column: 0
567 * fill-column: 131
568 * End:
569 */
570
571/* end of app.c */
This page took 0.077682 seconds and 4 git commands to generate.