Commit | Line | Data |
---|---|---|
fecd2382 RP |
1 | /* Copyright (C) 1987, 1990, 1991 Free Software Foundation, Inc. |
2 | ||
3 | Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 | |
4 | */ | |
5 | /* This is the Assembler Pre-Processor | |
6 | Copyright (C) 1987 Free Software Foundation, Inc. | |
7 | ||
8 | This file is part of GAS, the GNU Assembler. | |
9 | ||
10 | GAS is free software; you can redistribute it and/or modify | |
11 | it under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 1, or (at your option) | |
13 | any later version. | |
14 | ||
15 | GAS is distributed in the hope that it will be useful, | |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | GNU General Public License for more details. | |
19 | ||
20 | You should have received a copy of the GNU General Public License | |
21 | along with GAS; see the file COPYING. If not, write to | |
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
23 | ||
24 | /* App, the assembler pre-processor. This pre-processor strips out excess | |
25 | spaces, turns single-quoted characters into a decimal constant, and turns | |
26 | # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair. | |
27 | This needs better error-handling. | |
28 | */ | |
29 | ||
30 | /* static const char rcsid[] = "$Id$"; */ | |
31 | ||
32 | #include <stdio.h> | |
33 | #include "as.h" /* For BAD_CASE() only */ | |
34 | ||
35 | #if !defined(__STDC__) && !defined(const) | |
36 | #define const /* Nothing */ | |
37 | #endif | |
38 | ||
39 | static char lex [256]; | |
40 | static char symbol_chars[] = | |
41 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; | |
42 | ||
43 | /* These will go in BSS if not defined elsewhere, producing empty strings. */ | |
44 | extern const char comment_chars[]; | |
45 | extern const char line_comment_chars[]; | |
46 | extern const char line_separator_chars[]; | |
47 | ||
48 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
49 | #define LEX_IS_WHITESPACE 2 | |
50 | #define LEX_IS_LINE_SEPARATOR 3 | |
51 | #define LEX_IS_COMMENT_START 4 | |
52 | #define LEX_IS_LINE_COMMENT_START 5 | |
53 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
54 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
55 | #define LEX_IS_STRINGQUOTE 8 | |
56 | #define LEX_IS_COLON 9 | |
57 | #define LEX_IS_NEWLINE 10 | |
58 | #define LEX_IS_ONECHAR_QUOTE 11 | |
59 | #define IS_SYMBOL_COMPONENT(c) (lex [c] == LEX_IS_SYMBOL_COMPONENT) | |
60 | #define IS_WHITESPACE(c) (lex [c] == LEX_IS_WHITESPACE) | |
61 | #define IS_LINE_SEPARATOR(c) (lex [c] == LEX_IS_LINE_SEPARATOR) | |
62 | #define IS_COMMENT(c) (lex [c] == LEX_IS_COMMENT_START) | |
63 | #define IS_LINE_COMMENT(c) (lex [c] == LEX_IS_LINE_COMMENT_START) | |
64 | #define IS_NEWLINE(c) (lex [c] == LEX_IS_NEWLINE) | |
65 | ||
66 | void do_scrub_begin() { | |
67 | const char *p; | |
68 | ||
69 | lex [' '] = LEX_IS_WHITESPACE; | |
70 | lex ['\t'] = LEX_IS_WHITESPACE; | |
71 | lex ['\n'] = LEX_IS_NEWLINE; | |
72 | lex [';'] = LEX_IS_LINE_SEPARATOR; | |
73 | lex ['"'] = LEX_IS_STRINGQUOTE; | |
74 | lex ['\''] = LEX_IS_ONECHAR_QUOTE; | |
75 | lex [':'] = LEX_IS_COLON; | |
76 | ||
77 | /* Note that these override the previous defaults, e.g. if ';' | |
78 | is a comment char, then it isn't a line separator. */ | |
79 | for (p =symbol_chars;*p;++p) | |
80 | lex[*p] = LEX_IS_SYMBOL_COMPONENT; | |
81 | for (p=comment_chars;*p;p++) | |
82 | lex[*p] = LEX_IS_COMMENT_START; | |
83 | for (p=line_comment_chars;*p;p++) | |
84 | lex[*p] = LEX_IS_LINE_COMMENT_START; | |
85 | for (p=line_separator_chars;*p;p++) | |
86 | lex[*p] = LEX_IS_LINE_SEPARATOR; | |
87 | ||
88 | /* Only allow slash-star comments if slash is not in use */ | |
89 | if (lex['/'] == 0) { | |
90 | lex ['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; | |
91 | lex ['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
92 | } | |
93 | } | |
94 | ||
95 | FILE *scrub_file; | |
96 | ||
97 | int scrub_from_file() { | |
98 | return getc(scrub_file); | |
99 | } | |
100 | ||
101 | void scrub_to_file(ch) | |
102 | int ch; | |
103 | { | |
104 | ungetc(ch,scrub_file); | |
105 | } /* scrub_to_file() */ | |
106 | ||
107 | char *scrub_string; | |
108 | char *scrub_last_string; | |
109 | ||
110 | int scrub_from_string() { | |
111 | return scrub_string == scrub_last_string ? EOF : *scrub_string++; | |
112 | } /* scrub_from_string() */ | |
113 | ||
114 | void scrub_to_string(ch) | |
115 | int ch; | |
116 | { | |
117 | *--scrub_string=ch; | |
118 | } /* scrub_to_string() */ | |
119 | ||
120 | /* Saved state of the scrubber */ | |
121 | static int state; | |
122 | static int old_state; | |
123 | static char *out_string; | |
124 | static char out_buf[20]; | |
125 | static int add_newlines = 0; | |
126 | ||
127 | /* Data structure for saving the state of app across #include's. Note that | |
128 | app is called asynchronously to the parsing of the .include's, so our | |
129 | state at the time .include is interpreted is completely unrelated. | |
130 | That's why we have to save it all. */ | |
131 | ||
132 | struct app_save { | |
133 | int state; | |
134 | int old_state; | |
135 | char *out_string; | |
136 | char out_buf[sizeof (out_buf)]; | |
137 | int add_newlines; | |
138 | char *scrub_string; | |
139 | char *scrub_last_string; | |
140 | FILE *scrub_file; | |
141 | }; | |
142 | ||
143 | char *app_push() { | |
144 | register struct app_save *saved; | |
145 | ||
146 | saved = (struct app_save *) xmalloc(sizeof (*saved)); | |
147 | saved->state = state; | |
148 | saved->old_state = old_state; | |
149 | saved->out_string = out_string; | |
150 | bcopy(saved->out_buf, out_buf, sizeof(out_buf)); | |
151 | saved->add_newlines = add_newlines; | |
152 | saved->scrub_string = scrub_string; | |
153 | saved->scrub_last_string = scrub_last_string; | |
154 | saved->scrub_file = scrub_file; | |
155 | ||
156 | /* do_scrub_begin() is not useful, just wastes time. */ | |
157 | return (char *)saved; | |
158 | } | |
159 | ||
160 | void app_pop(arg) | |
161 | char *arg; | |
162 | { | |
163 | register struct app_save *saved = (struct app_save *)arg; | |
164 | ||
165 | /* There is no do_scrub_end (). */ | |
166 | state = saved->state; | |
167 | old_state = saved->old_state; | |
168 | out_string = saved->out_string; | |
169 | bcopy (out_buf, saved->out_buf, sizeof (out_buf)); | |
170 | add_newlines = saved->add_newlines; | |
171 | scrub_string = saved->scrub_string; | |
172 | scrub_last_string = saved->scrub_last_string; | |
173 | scrub_file = saved->scrub_file; | |
174 | ||
175 | free (arg); | |
176 | } /* app_pop() */ | |
177 | ||
178 | int do_scrub_next_char(get,unget) | |
179 | int (*get)(); | |
180 | void (*unget)(); | |
181 | { | |
182 | /*State 0: beginning of normal line | |
183 | 1: After first whitespace on line (flush more white) | |
184 | 2: After first non-white (opcode) on line (keep 1white) | |
185 | 3: after second white on line (into operands) (flush white) | |
186 | 4: after putting out a .line, put out digits | |
187 | 5: parsing a string, then go to old-state | |
188 | 6: putting out \ escape in a "d string. | |
189 | 7: After putting out a .app-file, put out string. | |
190 | 8: After putting out a .app-file string, flush until newline. | |
191 | -1: output string in out_string and go to the state in old_state | |
192 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
193 | */ | |
194 | ||
195 | register int ch, ch2; | |
196 | ||
197 | switch (state) { | |
198 | case -1: | |
199 | ch= *out_string++; | |
200 | if(*out_string==0) { | |
201 | state=old_state; | |
202 | old_state=3; | |
203 | } | |
204 | return ch; | |
205 | ||
206 | case -2: | |
207 | for(;;) { | |
208 | do { | |
209 | ch=(*get)(); | |
210 | } while(ch!=EOF && ch!='\n' && ch!='*'); | |
211 | if(ch=='\n' || ch==EOF) | |
212 | return ch; | |
213 | ||
214 | /* At this point, ch must be a '*' */ | |
215 | while ( (ch=(*get)()) == '*' ){ | |
216 | ; | |
217 | } | |
218 | if(ch==EOF || ch=='/') | |
219 | break; | |
220 | (*unget)(ch); | |
221 | } | |
222 | state=old_state; | |
223 | return ' '; | |
224 | ||
225 | case 4: | |
226 | ch=(*get)(); | |
227 | if(ch==EOF || (ch>='0' && ch<='9')) | |
228 | return ch; | |
229 | else { | |
230 | while(ch!=EOF && IS_WHITESPACE(ch)) | |
231 | ch=(*get)(); | |
232 | if(ch=='"') { | |
233 | (*unget)(ch); | |
234 | out_string="\n.app-file "; | |
235 | old_state=7; | |
236 | state= -1; | |
237 | return *out_string++; | |
238 | } else { | |
239 | while(ch!=EOF && ch!='\n') | |
240 | ch=(*get)(); | |
241 | return ch; | |
242 | } | |
243 | } | |
244 | ||
245 | case 5: | |
246 | ch=(*get)(); | |
247 | if(ch=='"') { | |
248 | state=old_state; | |
249 | return '"'; | |
250 | } else if(ch=='\\') { | |
251 | state=6; | |
252 | return ch; | |
253 | } else if(ch==EOF) { | |
254 | as_warn("End of file in string: inserted '\"'"); | |
255 | state=old_state; | |
256 | (*unget)('\n'); | |
257 | return '"'; | |
258 | } else { | |
259 | return ch; | |
260 | } | |
261 | ||
262 | case 6: | |
263 | state=5; | |
264 | ch=(*get)(); | |
265 | switch(ch) { | |
266 | /* This is neet. Turn "string | |
267 | more string" into "string\n more string" | |
268 | */ | |
269 | case '\n': | |
270 | (*unget)('n'); | |
271 | add_newlines++; | |
272 | return '\\'; | |
273 | ||
274 | case '"': | |
275 | case '\\': | |
276 | case 'b': | |
277 | case 'f': | |
278 | case 'n': | |
279 | case 'r': | |
280 | case 't': | |
281 | #ifdef BACKSLASH_V | |
282 | case 'v': | |
283 | #endif /* BACKSLASH_V */ | |
284 | case '0': | |
285 | case '1': | |
286 | case '2': | |
287 | case '3': | |
288 | case '4': | |
289 | case '5': | |
290 | case '6': | |
291 | case '7': | |
292 | break; | |
293 | ||
294 | #ifdef ONLY_STANDARD_ESCAPES | |
295 | default: | |
296 | as_warn("Unknown escape '\\%c' in string: Ignored",ch); | |
297 | break; | |
298 | #else /* ONLY_STANDARD_ESCAPES */ | |
299 | default: | |
300 | /* Accept \x as x for any x */ | |
301 | break; | |
302 | #endif /* ONLY_STANDARD_ESCAPES */ | |
303 | ||
304 | case EOF: | |
305 | as_warn("End of file in string: '\"' inserted"); | |
306 | return '"'; | |
307 | } | |
308 | return ch; | |
309 | ||
310 | case 7: | |
311 | ch=(*get)(); | |
312 | state=5; | |
313 | old_state=8; | |
314 | return ch; | |
315 | ||
316 | case 8: | |
317 | do ch= (*get)(); | |
318 | while(ch!='\n'); | |
319 | state=0; | |
320 | return ch; | |
321 | } | |
322 | ||
323 | /* OK, we are somewhere in states 0 through 4 */ | |
324 | ||
325 | /* flushchar: */ | |
326 | ch=(*get)(); | |
327 | recycle: | |
328 | if (ch == EOF) { | |
329 | if (state != 0) | |
330 | as_warn("End of file not at end of a line: Newline inserted."); | |
331 | return ch; | |
332 | } | |
333 | ||
334 | switch (lex[ch]) { | |
335 | case LEX_IS_WHITESPACE: | |
336 | do ch=(*get)(); | |
337 | while(ch!=EOF && IS_WHITESPACE(ch)); | |
338 | if(ch==EOF) | |
339 | return ch; | |
340 | if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) { | |
341 | goto recycle; | |
342 | } | |
343 | switch (state) { | |
344 | case 0: state++; goto recycle; /* Punted leading sp */ | |
345 | case 1: BAD_CASE(state); /* We can't get here */ | |
346 | case 2: state++; (*unget)(ch); return ' '; /* Sp after opco */ | |
347 | case 3: goto recycle; /* Sp in operands */ | |
348 | default: BAD_CASE(state); | |
349 | } | |
350 | break; | |
351 | ||
352 | case LEX_IS_TWOCHAR_COMMENT_1ST: | |
353 | ch2=(*get)(); | |
354 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) { | |
355 | for(;;) { | |
356 | do { | |
357 | ch2=(*get)(); | |
358 | if(ch2 != EOF && IS_NEWLINE(ch2)) | |
359 | add_newlines++; | |
360 | } while(ch2!=EOF && | |
361 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); | |
362 | ||
363 | while (ch2!=EOF && | |
364 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){ | |
365 | ch2=(*get)(); | |
366 | } | |
367 | ||
368 | if(ch2==EOF | |
369 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) | |
370 | break; | |
371 | (*unget)(ch); | |
372 | } | |
373 | if(ch2==EOF) | |
374 | as_warn("End of file in multiline comment"); | |
375 | ||
376 | ch = ' '; | |
377 | goto recycle; | |
378 | } else { | |
379 | if(ch2!=EOF) | |
380 | (*unget)(ch2); | |
381 | return ch; | |
382 | } | |
383 | break; | |
384 | ||
385 | case LEX_IS_STRINGQUOTE: | |
386 | old_state=state; | |
387 | state=5; | |
388 | return ch; | |
389 | ||
390 | case LEX_IS_ONECHAR_QUOTE: | |
391 | ch=(*get)(); | |
392 | if(ch==EOF) { | |
393 | as_warn("End-of-file after a one-character quote; \000 inserted"); | |
394 | ch=0; | |
395 | } | |
396 | sprintf(out_buf,"%d", (int)(unsigned char)ch); | |
397 | ||
398 | /* None of these 'x constants for us. We want 'x'. | |
399 | */ | |
400 | if ( (ch=(*get)()) != '\'' ) { | |
401 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE | |
402 | as_warn("Missing close quote: (assumed)"); | |
403 | #else | |
404 | (*unget)(ch); | |
405 | #endif | |
406 | } | |
407 | ||
408 | old_state=state; | |
409 | state= -1; | |
410 | out_string=out_buf; | |
411 | return *out_string++; | |
412 | ||
413 | case LEX_IS_COLON: | |
414 | if(state!=3) | |
415 | state=0; | |
416 | return ch; | |
417 | ||
418 | case LEX_IS_NEWLINE: | |
419 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
420 | if(add_newlines) { | |
421 | --add_newlines; | |
422 | (*unget)(ch); | |
423 | } | |
424 | /* fall thru into... */ | |
425 | ||
426 | case LEX_IS_LINE_SEPARATOR: | |
427 | state=0; | |
428 | return ch; | |
429 | ||
430 | case LEX_IS_LINE_COMMENT_START: | |
431 | if (state != 0) /* Not at start of line, act normal */ | |
432 | goto de_fault; | |
433 | do ch=(*get)(); | |
434 | while(ch!=EOF && IS_WHITESPACE(ch)); | |
435 | if(ch==EOF) { | |
436 | as_warn("EOF in comment: Newline inserted"); | |
437 | return '\n'; | |
438 | } | |
439 | if(ch<'0' || ch>'9') { | |
440 | /* Non-numerics: Eat whole comment line */ | |
441 | while(ch!=EOF && !IS_NEWLINE(ch)) | |
442 | ch=(*get)(); | |
443 | if(ch==EOF) | |
444 | as_warn("EOF in Comment: Newline inserted"); | |
445 | state=0; | |
446 | return '\n'; | |
447 | } | |
448 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
449 | (*unget)(ch); | |
450 | old_state=4; | |
451 | state= -1; | |
452 | out_string=".line "; | |
453 | return *out_string++; | |
454 | ||
455 | case LEX_IS_COMMENT_START: | |
456 | do ch=(*get)(); | |
457 | while(ch!=EOF && !IS_NEWLINE(ch)); | |
458 | if(ch==EOF) | |
459 | as_warn("EOF in comment: Newline inserted"); | |
460 | state=0; | |
461 | return '\n'; | |
462 | ||
463 | default: | |
464 | de_fault: | |
465 | /* Some relatively `normal' character. */ | |
466 | if(state==0) { | |
467 | state=2; /* Now seeing opcode */ | |
468 | return ch; | |
469 | } else if(state==1) { | |
470 | state=2; /* Ditto */ | |
471 | return ch; | |
472 | } else { | |
473 | return ch; /* Opcode or operands already */ | |
474 | } | |
475 | } | |
476 | return -1; | |
477 | } | |
478 | ||
479 | #ifdef TEST | |
480 | ||
481 | char comment_chars[] = "|"; | |
482 | char line_comment_chars[] = "#"; | |
483 | ||
484 | main() | |
485 | { | |
486 | int ch; | |
487 | ||
488 | app_begin(); | |
489 | while((ch=do_scrub_next_char(stdin))!=EOF) | |
490 | putc(ch,stdout); | |
491 | } | |
492 | ||
493 | as_warn(str) | |
494 | char *str; | |
495 | { | |
496 | fputs(str,stderr); | |
497 | putc('\n',stderr); | |
498 | } | |
499 | #endif | |
500 | ||
501 | /* | |
502 | * Local Variables: | |
503 | * comment-column: 0 | |
504 | * fill-column: 131 | |
505 | * End: | |
506 | */ | |
507 | ||
508 | /* end of app.c */ |