Commit | Line | Data |
---|---|---|
fecd2382 | 1 | /* This is the Assembler Pre-Processor |
58d4951d | 2 | Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc. |
6efd877d | 3 | |
a39116f1 | 4 | This file is part of GAS, the GNU Assembler. |
6efd877d | 5 | |
a39116f1 RP |
6 | GAS is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
6efd877d | 10 | |
a39116f1 RP |
11 | GAS is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
6efd877d | 15 | |
a39116f1 RP |
16 | You should have received a copy of the GNU General Public License |
17 | along with GAS; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
fecd2382 | 19 | |
58d4951d | 20 | /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ |
fecd2382 RP |
21 | /* App, the assembler pre-processor. This pre-processor strips out excess |
22 | spaces, turns single-quoted characters into a decimal constant, and turns | |
9a7d824a | 23 | # <number> <filename> <garbage> into a .line <number>\n.file <filename> |
be06bdcd | 24 | pair. This needs better error-handling. |
a39116f1 | 25 | */ |
fecd2382 RP |
26 | |
27 | #include <stdio.h> | |
6efd877d | 28 | #include "as.h" /* For BAD_CASE() only */ |
fecd2382 | 29 | |
3340f7e5 | 30 | #if (__STDC__ != 1) && !defined(const) |
6efd877d | 31 | #define const /* Nothing */ |
fecd2382 RP |
32 | #endif |
33 | ||
6efd877d | 34 | static char lex[256]; |
6d331d71 | 35 | static const char symbol_chars[] = |
6efd877d | 36 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; |
fecd2382 RP |
37 | |
38 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
39 | #define LEX_IS_WHITESPACE 2 | |
40 | #define LEX_IS_LINE_SEPARATOR 3 | |
41 | #define LEX_IS_COMMENT_START 4 | |
42 | #define LEX_IS_LINE_COMMENT_START 5 | |
43 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
44 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
45 | #define LEX_IS_STRINGQUOTE 8 | |
46 | #define LEX_IS_COLON 9 | |
47 | #define LEX_IS_NEWLINE 10 | |
48 | #define LEX_IS_ONECHAR_QUOTE 11 | |
a39116f1 RP |
49 | #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) |
50 | #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) | |
51 | #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) | |
52 | #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) | |
53 | #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) | |
54 | #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) | |
55 | ||
56 | /* FIXME-soon: The entire lexer/parser thingy should be | |
57 | built statically at compile time rather than dynamically | |
58 | each and every time the assembler is run. xoxorich. */ | |
fecd2382 | 59 | |
6efd877d KR |
60 | void |
61 | do_scrub_begin () | |
62 | { | |
63 | const char *p; | |
64 | ||
65 | lex[' '] = LEX_IS_WHITESPACE; | |
66 | lex['\t'] = LEX_IS_WHITESPACE; | |
67 | lex['\n'] = LEX_IS_NEWLINE; | |
68 | lex[';'] = LEX_IS_LINE_SEPARATOR; | |
69 | lex['"'] = LEX_IS_STRINGQUOTE; | |
58d4951d | 70 | #ifndef TC_HPPA |
6efd877d | 71 | lex['\''] = LEX_IS_ONECHAR_QUOTE; |
58d4951d | 72 | #endif |
6efd877d | 73 | lex[':'] = LEX_IS_COLON; |
7c2d4011 | 74 | |
be06bdcd SC |
75 | |
76 | ||
77 | #ifdef SINGLE_QUOTE_STRINGS | |
78 | lex['\''] = LEX_IS_STRINGQUOTE; | |
7c2d4011 | 79 | #endif |
be06bdcd | 80 | |
6efd877d | 81 | /* Note that these override the previous defaults, e.g. if ';' |
be06bdcd | 82 | |
fecd2382 | 83 | is a comment char, then it isn't a line separator. */ |
6efd877d KR |
84 | for (p = symbol_chars; *p; ++p) |
85 | { | |
58d4951d | 86 | lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; |
6efd877d KR |
87 | } /* declare symbol characters */ |
88 | ||
6efd877d KR |
89 | for (p = comment_chars; *p; p++) |
90 | { | |
58d4951d | 91 | lex[(unsigned char) *p] = LEX_IS_COMMENT_START; |
6efd877d KR |
92 | } /* declare comment chars */ |
93 | ||
9a7d824a ILT |
94 | for (p = line_comment_chars; *p; p++) |
95 | { | |
58d4951d | 96 | lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; |
9a7d824a ILT |
97 | } /* declare line comment chars */ |
98 | ||
6efd877d KR |
99 | for (p = line_separator_chars; *p; p++) |
100 | { | |
58d4951d | 101 | lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; |
6efd877d KR |
102 | } /* declare line separators */ |
103 | ||
104 | /* Only allow slash-star comments if slash is not in use */ | |
105 | if (lex['/'] == 0) | |
106 | { | |
107 | lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; | |
108 | } | |
109 | /* FIXME-soon. This is a bad hack but otherwise, we | |
a39116f1 RP |
110 | can't do c-style comments when '/' is a line |
111 | comment char. xoxorich. */ | |
6efd877d KR |
112 | if (lex['*'] == 0) |
113 | { | |
114 | lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
115 | } | |
116 | } /* do_scrub_begin() */ | |
fecd2382 RP |
117 | |
118 | FILE *scrub_file; | |
119 | ||
6efd877d KR |
120 | int |
121 | scrub_from_file () | |
122 | { | |
123 | return getc (scrub_file); | |
fecd2382 RP |
124 | } |
125 | ||
6efd877d KR |
126 | void |
127 | scrub_to_file (ch) | |
128 | int ch; | |
fecd2382 | 129 | { |
6efd877d KR |
130 | ungetc (ch, scrub_file); |
131 | } /* scrub_to_file() */ | |
fecd2382 RP |
132 | |
133 | char *scrub_string; | |
134 | char *scrub_last_string; | |
135 | ||
6efd877d KR |
136 | int |
137 | scrub_from_string () | |
138 | { | |
139 | return scrub_string == scrub_last_string ? EOF : *scrub_string++; | |
140 | } /* scrub_from_string() */ | |
fecd2382 | 141 | |
6efd877d KR |
142 | void |
143 | scrub_to_string (ch) | |
144 | int ch; | |
fecd2382 | 145 | { |
6efd877d KR |
146 | *--scrub_string = ch; |
147 | } /* scrub_to_string() */ | |
fecd2382 RP |
148 | |
149 | /* Saved state of the scrubber */ | |
150 | static int state; | |
151 | static int old_state; | |
152 | static char *out_string; | |
153 | static char out_buf[20]; | |
154 | static int add_newlines = 0; | |
155 | ||
156 | /* Data structure for saving the state of app across #include's. Note that | |
157 | app is called asynchronously to the parsing of the .include's, so our | |
158 | state at the time .include is interpreted is completely unrelated. | |
159 | That's why we have to save it all. */ | |
160 | ||
6efd877d KR |
161 | struct app_save |
162 | { | |
163 | int state; | |
164 | int old_state; | |
165 | char *out_string; | |
166 | char out_buf[sizeof (out_buf)]; | |
167 | int add_newlines; | |
168 | char *scrub_string; | |
169 | char *scrub_last_string; | |
170 | FILE *scrub_file; | |
171 | }; | |
172 | ||
173 | char * | |
174 | app_push () | |
175 | { | |
7c2d4011 SC |
176 | register struct app_save *saved; |
177 | ||
6efd877d KR |
178 | saved = (struct app_save *) xmalloc (sizeof (*saved)); |
179 | saved->state = state; | |
180 | saved->old_state = old_state; | |
181 | saved->out_string = out_string; | |
58d4951d | 182 | memcpy (saved->out_buf, out_buf, sizeof (out_buf)); |
6efd877d KR |
183 | saved->add_newlines = add_newlines; |
184 | saved->scrub_string = scrub_string; | |
7c2d4011 | 185 | saved->scrub_last_string = scrub_last_string; |
6efd877d | 186 | saved->scrub_file = scrub_file; |
7c2d4011 SC |
187 | |
188 | /* do_scrub_begin() is not useful, just wastes time. */ | |
6efd877d | 189 | return (char *) saved; |
fecd2382 RP |
190 | } |
191 | ||
6efd877d KR |
192 | void |
193 | app_pop (arg) | |
194 | char *arg; | |
fecd2382 | 195 | { |
6efd877d KR |
196 | register struct app_save *saved = (struct app_save *) arg; |
197 | ||
198 | /* There is no do_scrub_end (). */ | |
199 | state = saved->state; | |
200 | old_state = saved->old_state; | |
201 | out_string = saved->out_string; | |
58d4951d | 202 | memcpy (out_buf, saved->out_buf, sizeof (out_buf)); |
6efd877d KR |
203 | add_newlines = saved->add_newlines; |
204 | scrub_string = saved->scrub_string; | |
205 | scrub_last_string = saved->scrub_last_string; | |
206 | scrub_file = saved->scrub_file; | |
207 | ||
208 | free (arg); | |
209 | } /* app_pop() */ | |
210 | ||
6d331d71 KR |
211 | /* @@ This assumes that \n &c are the same on host and target. This is not |
212 | necessarily true. */ | |
6efd877d KR |
213 | int |
214 | process_escape (ch) | |
215 | char ch; | |
7c2d4011 | 216 | { |
6efd877d KR |
217 | switch (ch) |
218 | { | |
219 | case 'b': | |
220 | return '\b'; | |
221 | case 'f': | |
222 | return '\f'; | |
223 | case 'n': | |
224 | return '\n'; | |
225 | case 'r': | |
226 | return '\r'; | |
227 | case 't': | |
228 | return '\t'; | |
229 | case '\'': | |
230 | return '\''; | |
231 | case '"': | |
6d331d71 | 232 | return '\"'; |
6efd877d KR |
233 | default: |
234 | return ch; | |
235 | } | |
7c2d4011 | 236 | } |
6efd877d KR |
237 | int |
238 | do_scrub_next_char (get, unget) | |
239 | int (*get) (); | |
240 | void (*unget) (); | |
fecd2382 | 241 | { |
6efd877d | 242 | /*State 0: beginning of normal line |
a39116f1 RP |
243 | 1: After first whitespace on line (flush more white) |
244 | 2: After first non-white (opcode) on line (keep 1white) | |
245 | 3: after second white on line (into operands) (flush white) | |
246 | 4: after putting out a .line, put out digits | |
247 | 5: parsing a string, then go to old-state | |
248 | 6: putting out \ escape in a "d string. | |
9a7d824a ILT |
249 | 7: After putting out a .appfile, put out string. |
250 | 8: After putting out a .appfile string, flush until newline. | |
f6a91cc0 | 251 | 9: After seeing symbol char in state 3 (keep 1white after symchar) |
9a7d824a | 252 | 10: After seeing whitespace in state 9 (keep white before symchar) |
a39116f1 RP |
253 | -1: output string in out_string and go to the state in old_state |
254 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
255 | */ | |
6efd877d | 256 | |
9a7d824a ILT |
257 | /* I added states 9 and 10 because the MIPS ECOFF assembler uses |
258 | constructs like ``.loc 1 20''. This was turning into ``.loc | |
259 | 120''. States 9 and 10 ensure that a space is never dropped in | |
260 | between characters which could appear in a identifier. Ian | |
261 | Taylor, ian@cygnus.com. */ | |
f6a91cc0 | 262 | |
6efd877d KR |
263 | register int ch, ch2 = 0; |
264 | ||
265 | switch (state) | |
266 | { | |
267 | case -1: | |
268 | ch = *out_string++; | |
269 | if (*out_string == 0) | |
270 | { | |
271 | state = old_state; | |
272 | old_state = 3; | |
273 | } | |
274 | return ch; | |
275 | ||
276 | case -2: | |
277 | for (;;) | |
278 | { | |
279 | do | |
280 | { | |
281 | ch = (*get) (); | |
282 | } | |
283 | while (ch != EOF && ch != '\n' && ch != '*'); | |
284 | if (ch == '\n' || ch == EOF) | |
285 | return ch; | |
286 | ||
287 | /* At this point, ch must be a '*' */ | |
288 | while ((ch = (*get) ()) == '*') | |
289 | { | |
290 | ; | |
291 | } | |
292 | if (ch == EOF || ch == '/') | |
293 | break; | |
294 | (*unget) (ch); | |
295 | } | |
296 | state = old_state; | |
297 | return ' '; | |
298 | ||
299 | case 4: | |
300 | ch = (*get) (); | |
301 | if (ch == EOF || (ch >= '0' && ch <= '9')) | |
302 | return ch; | |
303 | else | |
304 | { | |
305 | while (ch != EOF && IS_WHITESPACE (ch)) | |
306 | ch = (*get) (); | |
307 | if (ch == '"') | |
308 | { | |
309 | (*unget) (ch); | |
9a7d824a | 310 | out_string = "\n.appfile "; |
6efd877d KR |
311 | old_state = 7; |
312 | state = -1; | |
313 | return *out_string++; | |
314 | } | |
315 | else | |
316 | { | |
317 | while (ch != EOF && ch != '\n') | |
318 | ch = (*get) (); | |
58d4951d | 319 | state = 0; |
6efd877d KR |
320 | return ch; |
321 | } | |
322 | } | |
323 | ||
324 | case 5: | |
325 | ch = (*get) (); | |
326 | if (lex[ch] == LEX_IS_STRINGQUOTE) | |
327 | { | |
328 | state = old_state; | |
329 | return ch; | |
330 | } | |
331 | else if (ch == '\\') | |
332 | { | |
333 | state = 6; | |
334 | return ch; | |
335 | } | |
336 | else if (ch == EOF) | |
337 | { | |
338 | as_warn ("End of file in string: inserted '\"'"); | |
339 | state = old_state; | |
340 | (*unget) ('\n'); | |
341 | return '"'; | |
342 | } | |
343 | else | |
344 | { | |
345 | return ch; | |
346 | } | |
347 | ||
348 | case 6: | |
349 | state = 5; | |
350 | ch = (*get) (); | |
351 | switch (ch) | |
352 | { | |
6d331d71 KR |
353 | /* Handle strings broken across lines, by turning '\n' into |
354 | '\\' and 'n'. */ | |
6efd877d KR |
355 | case '\n': |
356 | (*unget) ('n'); | |
357 | add_newlines++; | |
358 | return '\\'; | |
359 | ||
360 | case '"': | |
361 | case '\\': | |
58d4951d ILT |
362 | #ifdef TC_HPPA |
363 | case 'x': /* '\\x' introduces escaped sequences on the PA */ | |
364 | #endif | |
6efd877d KR |
365 | case 'b': |
366 | case 'f': | |
367 | case 'n': | |
368 | case 'r': | |
369 | case 't': | |
fecd2382 | 370 | #ifdef BACKSLASH_V |
6efd877d | 371 | case 'v': |
fecd2382 | 372 | #endif /* BACKSLASH_V */ |
6efd877d KR |
373 | case '0': |
374 | case '1': | |
375 | case '2': | |
376 | case '3': | |
377 | case '4': | |
378 | case '5': | |
379 | case '6': | |
380 | case '7': | |
381 | break; | |
7c2d4011 | 382 | #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) |
6efd877d KR |
383 | default: |
384 | as_warn ("Unknown escape '\\%c' in string: Ignored", ch); | |
385 | break; | |
fecd2382 | 386 | #else /* ONLY_STANDARD_ESCAPES */ |
6efd877d KR |
387 | default: |
388 | /* Accept \x as x for any x */ | |
389 | break; | |
fecd2382 | 390 | #endif /* ONLY_STANDARD_ESCAPES */ |
7c2d4011 | 391 | |
6efd877d KR |
392 | case EOF: |
393 | as_warn ("End of file in string: '\"' inserted"); | |
394 | return '"'; | |
395 | } | |
396 | return ch; | |
397 | ||
398 | case 7: | |
399 | ch = (*get) (); | |
400 | state = 5; | |
401 | old_state = 8; | |
402 | return ch; | |
403 | ||
404 | case 8: | |
405 | do | |
406 | ch = (*get) (); | |
407 | while (ch != '\n'); | |
408 | state = 0; | |
409 | return ch; | |
410 | } | |
411 | ||
9a7d824a | 412 | /* OK, we are somewhere in states 0 through 4 or 9 through 10 */ |
6efd877d KR |
413 | |
414 | /* flushchar: */ | |
415 | ch = (*get) (); | |
416 | recycle: | |
417 | if (ch == EOF) | |
418 | { | |
419 | if (state != 0) | |
420 | as_warn ("End of file not at end of a line: Newline inserted."); | |
421 | return ch; | |
422 | } | |
423 | ||
424 | switch (lex[ch]) | |
425 | { | |
426 | case LEX_IS_WHITESPACE: | |
427 | do | |
428 | ch = (*get) (); | |
429 | while (ch != EOF && IS_WHITESPACE (ch)); | |
430 | if (ch == EOF) | |
431 | return ch; | |
432 | ||
433 | if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch)) | |
434 | { | |
435 | goto recycle; | |
fecd2382 | 436 | } |
7c2d4011 | 437 | #ifdef MRI |
6efd877d KR |
438 | (*unget) (ch); /* Put back */ |
439 | return ' '; /* Always return one space at start of line */ | |
7c2d4011 | 440 | #endif |
6efd877d KR |
441 | |
442 | /* If we're in state 2, we've seen a non-white | |
6d331d71 KR |
443 | character followed by whitespace. If the next |
444 | character is ':', this is whitespace after a label | |
445 | name which we can ignore. */ | |
6efd877d KR |
446 | if (state == 2 && lex[ch] == LEX_IS_COLON) |
447 | { | |
448 | state = 0; | |
449 | return ch; | |
450 | } | |
451 | ||
452 | switch (state) | |
453 | { | |
454 | case 0: | |
455 | state++; | |
456 | goto recycle; /* Punted leading sp */ | |
457 | case 1: | |
458 | BAD_CASE (state); /* We can't get here */ | |
459 | case 2: | |
f6a91cc0 | 460 | state = 3; |
6efd877d KR |
461 | (*unget) (ch); |
462 | return ' '; /* Sp after opco */ | |
463 | case 3: | |
464 | goto recycle; /* Sp in operands */ | |
9a7d824a ILT |
465 | case 9: |
466 | case 10: | |
467 | state = 10; /* Sp after symbol char */ | |
468 | goto recycle; | |
6efd877d KR |
469 | default: |
470 | BAD_CASE (state); | |
471 | } | |
472 | break; | |
473 | ||
474 | case LEX_IS_TWOCHAR_COMMENT_1ST: | |
475 | ch2 = (*get) (); | |
476 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) | |
477 | { | |
478 | for (;;) | |
479 | { | |
480 | do | |
481 | { | |
482 | ch2 = (*get) (); | |
483 | if (ch2 != EOF && IS_NEWLINE (ch2)) | |
484 | add_newlines++; | |
fecd2382 | 485 | } |
6efd877d KR |
486 | while (ch2 != EOF && |
487 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); | |
488 | ||
489 | while (ch2 != EOF && | |
490 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)) | |
491 | { | |
492 | ch2 = (*get) (); | |
fecd2382 | 493 | } |
6efd877d KR |
494 | |
495 | if (ch2 == EOF | |
496 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) | |
fecd2382 | 497 | break; |
6efd877d KR |
498 | (*unget) (ch); |
499 | } | |
500 | if (ch2 == EOF) | |
501 | as_warn ("End of file in multiline comment"); | |
502 | ||
503 | ch = ' '; | |
504 | goto recycle; | |
505 | } | |
506 | else | |
507 | { | |
508 | if (ch2 != EOF) | |
509 | (*unget) (ch2); | |
9a7d824a ILT |
510 | if (state == 9 || state == 10) |
511 | state = 3; | |
6efd877d KR |
512 | return ch; |
513 | } | |
514 | break; | |
515 | ||
516 | case LEX_IS_STRINGQUOTE: | |
9a7d824a ILT |
517 | if (state == 9 || state == 10) |
518 | old_state = 3; | |
519 | else | |
520 | old_state = state; | |
6efd877d KR |
521 | state = 5; |
522 | return ch; | |
523 | #ifndef MRI | |
a39116f1 | 524 | #ifndef IEEE_STYLE |
6efd877d KR |
525 | case LEX_IS_ONECHAR_QUOTE: |
526 | ch = (*get) (); | |
527 | if (ch == EOF) | |
528 | { | |
529 | as_warn ("End-of-file after a one-character quote; \\000 inserted"); | |
530 | ch = 0; | |
531 | } | |
532 | if (ch == '\\') | |
533 | { | |
534 | ch = (*get) (); | |
535 | ch = process_escape (ch); | |
536 | } | |
537 | sprintf (out_buf, "%d", (int) (unsigned char) ch); | |
7c2d4011 | 538 | |
6efd877d | 539 | |
9a7d824a | 540 | /* None of these 'x constants for us. We want 'x'. */ |
6efd877d KR |
541 | if ((ch = (*get) ()) != '\'') |
542 | { | |
fecd2382 | 543 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE |
6efd877d | 544 | as_warn ("Missing close quote: (assumed)"); |
fecd2382 | 545 | #else |
6efd877d | 546 | (*unget) (ch); |
fecd2382 | 547 | #endif |
6efd877d KR |
548 | } |
549 | if (strlen (out_buf) == 1) | |
550 | { | |
551 | return out_buf[0]; | |
552 | } | |
9a7d824a ILT |
553 | if (state == 9 || state == 10) |
554 | old_state = 3; | |
555 | else | |
556 | old_state = state; | |
6efd877d KR |
557 | state = -1; |
558 | out_string = out_buf; | |
559 | return *out_string++; | |
7c2d4011 | 560 | #endif |
a39116f1 | 561 | #endif |
6efd877d | 562 | case LEX_IS_COLON: |
9a7d824a ILT |
563 | if (state == 9 || state == 10) |
564 | state = 3; | |
565 | else if (state != 3) | |
6efd877d KR |
566 | state = 0; |
567 | return ch; | |
568 | ||
569 | case LEX_IS_NEWLINE: | |
570 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
571 | if (add_newlines) | |
572 | { | |
573 | --add_newlines; | |
574 | (*unget) (ch); | |
575 | } | |
576 | /* fall thru into... */ | |
577 | ||
578 | case LEX_IS_LINE_SEPARATOR: | |
579 | state = 0; | |
580 | return ch; | |
581 | ||
582 | case LEX_IS_LINE_COMMENT_START: | |
9a7d824a | 583 | if (state == 0) /* Only comment at start of line. */ |
6efd877d | 584 | { |
9a7d824a ILT |
585 | /* FIXME-someday: The two character comment stuff was badly |
586 | thought out. On i386, we want '/' as line comment start | |
587 | AND we want C style comments. hence this hack. The | |
588 | whole lexical process should be reworked. xoxorich. */ | |
589 | if (ch == '/') | |
f6a91cc0 | 590 | { |
9a7d824a ILT |
591 | ch2 = (*get) (); |
592 | if (ch2 == '*') | |
593 | { | |
594 | state = -2; | |
595 | return (do_scrub_next_char (get, unget)); | |
596 | } | |
597 | else | |
598 | { | |
599 | (*unget) (ch2); | |
600 | } | |
601 | } /* bad hack */ | |
6efd877d | 602 | |
9a7d824a | 603 | do |
6efd877d | 604 | ch = (*get) (); |
9a7d824a | 605 | while (ch != EOF && IS_WHITESPACE (ch)); |
6efd877d | 606 | if (ch == EOF) |
9a7d824a ILT |
607 | { |
608 | as_warn ("EOF in comment: Newline inserted"); | |
609 | return '\n'; | |
610 | } | |
611 | if (ch < '0' || ch > '9') | |
612 | { | |
613 | /* Non-numerics: Eat whole comment line */ | |
614 | while (ch != EOF && !IS_NEWLINE (ch)) | |
615 | ch = (*get) (); | |
616 | if (ch == EOF) | |
617 | as_warn ("EOF in Comment: Newline inserted"); | |
618 | state = 0; | |
619 | return '\n'; | |
620 | } | |
621 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
622 | (*unget) (ch); | |
623 | old_state = 4; | |
624 | state = -1; | |
625 | out_string = ".appline "; | |
626 | return *out_string++; | |
6efd877d | 627 | } |
6efd877d | 628 | |
9a7d824a ILT |
629 | /* We have a line comment character which is not at the start of |
630 | a line. If this is also a normal comment character, fall | |
631 | through. Otherwise treat it as a default character. */ | |
632 | if (strchr (comment_chars, ch) == NULL) | |
633 | goto de_fault; | |
634 | /* Fall through. */ | |
6efd877d KR |
635 | case LEX_IS_COMMENT_START: |
636 | do | |
637 | ch = (*get) (); | |
638 | while (ch != EOF && !IS_NEWLINE (ch)); | |
639 | if (ch == EOF) | |
640 | as_warn ("EOF in comment: Newline inserted"); | |
641 | state = 0; | |
642 | return '\n'; | |
643 | ||
f6a91cc0 | 644 | case LEX_IS_SYMBOL_COMPONENT: |
9a7d824a ILT |
645 | if (state == 10) |
646 | { | |
647 | /* This is a symbol character following another symbol | |
648 | character, with whitespace in between. We skipped the | |
649 | whitespace earlier, so output it now. */ | |
650 | (*unget) (ch); | |
651 | state = 3; | |
652 | return ' '; | |
653 | } | |
f6a91cc0 ILT |
654 | if (state == 3) |
655 | state = 9; | |
656 | /* Fall through. */ | |
6efd877d KR |
657 | default: |
658 | de_fault: | |
659 | /* Some relatively `normal' character. */ | |
660 | if (state == 0) | |
661 | { | |
662 | state = 2; /* Now seeing opcode */ | |
663 | return ch; | |
fecd2382 | 664 | } |
6efd877d KR |
665 | else if (state == 1) |
666 | { | |
667 | state = 2; /* Ditto */ | |
668 | return ch; | |
669 | } | |
f6a91cc0 ILT |
670 | else if (state == 9) |
671 | { | |
672 | if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) | |
673 | state = 3; | |
674 | return ch; | |
675 | } | |
9a7d824a ILT |
676 | else if (state == 10) |
677 | { | |
678 | state = 3; | |
679 | return ch; | |
680 | } | |
6efd877d KR |
681 | else |
682 | { | |
683 | return ch; /* Opcode or operands already */ | |
684 | } | |
685 | } | |
686 | return -1; | |
fecd2382 RP |
687 | } |
688 | ||
689 | #ifdef TEST | |
690 | ||
6efd877d KR |
691 | const char comment_chars[] = "|"; |
692 | const char line_comment_chars[] = "#"; | |
fecd2382 | 693 | |
6efd877d | 694 | main () |
fecd2382 | 695 | { |
6efd877d KR |
696 | int ch; |
697 | ||
698 | app_begin (); | |
699 | while ((ch = do_scrub_next_char (stdin)) != EOF) | |
700 | putc (ch, stdout); | |
fecd2382 RP |
701 | } |
702 | ||
6efd877d KR |
703 | as_warn (str) |
704 | char *str; | |
fecd2382 | 705 | { |
6efd877d KR |
706 | fputs (str, stderr); |
707 | putc ('\n', stderr); | |
fecd2382 | 708 | } |
6efd877d | 709 | |
fecd2382 RP |
710 | #endif |
711 | ||
fecd2382 | 712 | /* end of app.c */ |