Commit | Line | Data |
---|---|---|
fecd2382 | 1 | /* This is the Assembler Pre-Processor |
5a051773 | 2 | Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc. |
6efd877d | 3 | |
a39116f1 | 4 | This file is part of GAS, the GNU Assembler. |
6efd877d | 5 | |
a39116f1 RP |
6 | GAS is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
6efd877d | 10 | |
a39116f1 RP |
11 | GAS is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
6efd877d | 15 | |
a39116f1 RP |
16 | You should have received a copy of the GNU General Public License |
17 | along with GAS; see the file COPYING. If not, write to | |
a2a5a4fa | 18 | the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
fecd2382 | 19 | |
58d4951d | 20 | /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ |
fecd2382 RP |
21 | /* App, the assembler pre-processor. This pre-processor strips out excess |
22 | spaces, turns single-quoted characters into a decimal constant, and turns | |
9a7d824a | 23 | # <number> <filename> <garbage> into a .line <number>\n.file <filename> |
a2a5a4fa | 24 | pair. This needs better error-handling. */ |
fecd2382 RP |
25 | |
26 | #include <stdio.h> | |
6efd877d | 27 | #include "as.h" /* For BAD_CASE() only */ |
fecd2382 | 28 | |
5a051773 SS |
29 | #if (__STDC__ != 1) |
30 | #ifndef const | |
31 | #define const /* empty */ | |
32 | #endif | |
fecd2382 RP |
33 | #endif |
34 | ||
6efd877d | 35 | static char lex[256]; |
6d331d71 | 36 | static const char symbol_chars[] = |
6efd877d | 37 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; |
fecd2382 RP |
38 | |
39 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
40 | #define LEX_IS_WHITESPACE 2 | |
41 | #define LEX_IS_LINE_SEPARATOR 3 | |
42 | #define LEX_IS_COMMENT_START 4 | |
43 | #define LEX_IS_LINE_COMMENT_START 5 | |
44 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
45 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
46 | #define LEX_IS_STRINGQUOTE 8 | |
47 | #define LEX_IS_COLON 9 | |
48 | #define LEX_IS_NEWLINE 10 | |
49 | #define LEX_IS_ONECHAR_QUOTE 11 | |
a39116f1 RP |
50 | #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) |
51 | #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) | |
52 | #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) | |
53 | #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) | |
54 | #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) | |
55 | #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) | |
56 | ||
385ce433 JL |
57 | static int process_escape PARAMS ((int)); |
58 | ||
a39116f1 RP |
59 | /* FIXME-soon: The entire lexer/parser thingy should be |
60 | built statically at compile time rather than dynamically | |
61 | each and every time the assembler is run. xoxorich. */ | |
fecd2382 | 62 | |
6efd877d KR |
63 | void |
64 | do_scrub_begin () | |
65 | { | |
66 | const char *p; | |
67 | ||
68 | lex[' '] = LEX_IS_WHITESPACE; | |
69 | lex['\t'] = LEX_IS_WHITESPACE; | |
70 | lex['\n'] = LEX_IS_NEWLINE; | |
71 | lex[';'] = LEX_IS_LINE_SEPARATOR; | |
72 | lex['"'] = LEX_IS_STRINGQUOTE; | |
58d4951d | 73 | #ifndef TC_HPPA |
6efd877d | 74 | lex['\''] = LEX_IS_ONECHAR_QUOTE; |
58d4951d | 75 | #endif |
6efd877d | 76 | lex[':'] = LEX_IS_COLON; |
7c2d4011 | 77 | |
be06bdcd SC |
78 | |
79 | ||
80 | #ifdef SINGLE_QUOTE_STRINGS | |
a2a5a4fa | 81 | lex['\''] = LEX_IS_STRINGQUOTE; |
7c2d4011 | 82 | #endif |
be06bdcd | 83 | |
a2a5a4fa KR |
84 | /* Note that these override the previous defaults, e.g. if ';' is a |
85 | comment char, then it isn't a line separator. */ | |
6efd877d KR |
86 | for (p = symbol_chars; *p; ++p) |
87 | { | |
58d4951d | 88 | lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; |
6efd877d KR |
89 | } /* declare symbol characters */ |
90 | ||
6efd877d KR |
91 | for (p = comment_chars; *p; p++) |
92 | { | |
58d4951d | 93 | lex[(unsigned char) *p] = LEX_IS_COMMENT_START; |
6efd877d KR |
94 | } /* declare comment chars */ |
95 | ||
9a7d824a ILT |
96 | for (p = line_comment_chars; *p; p++) |
97 | { | |
58d4951d | 98 | lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; |
9a7d824a ILT |
99 | } /* declare line comment chars */ |
100 | ||
6efd877d KR |
101 | for (p = line_separator_chars; *p; p++) |
102 | { | |
58d4951d | 103 | lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; |
6efd877d KR |
104 | } /* declare line separators */ |
105 | ||
106 | /* Only allow slash-star comments if slash is not in use */ | |
107 | if (lex['/'] == 0) | |
108 | { | |
109 | lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; | |
110 | } | |
a2a5a4fa KR |
111 | /* FIXME-soon. This is a bad hack but otherwise, we can't do |
112 | c-style comments when '/' is a line comment char. xoxorich. */ | |
6efd877d KR |
113 | if (lex['*'] == 0) |
114 | { | |
115 | lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
116 | } | |
117 | } /* do_scrub_begin() */ | |
fecd2382 RP |
118 | |
119 | FILE *scrub_file; | |
120 | ||
6efd877d KR |
121 | int |
122 | scrub_from_file () | |
123 | { | |
124 | return getc (scrub_file); | |
fecd2382 RP |
125 | } |
126 | ||
6efd877d KR |
127 | void |
128 | scrub_to_file (ch) | |
129 | int ch; | |
fecd2382 | 130 | { |
6efd877d KR |
131 | ungetc (ch, scrub_file); |
132 | } /* scrub_to_file() */ | |
fecd2382 RP |
133 | |
134 | char *scrub_string; | |
135 | char *scrub_last_string; | |
136 | ||
6efd877d KR |
137 | int |
138 | scrub_from_string () | |
139 | { | |
140 | return scrub_string == scrub_last_string ? EOF : *scrub_string++; | |
141 | } /* scrub_from_string() */ | |
fecd2382 | 142 | |
6efd877d KR |
143 | void |
144 | scrub_to_string (ch) | |
145 | int ch; | |
fecd2382 | 146 | { |
6efd877d KR |
147 | *--scrub_string = ch; |
148 | } /* scrub_to_string() */ | |
fecd2382 RP |
149 | |
150 | /* Saved state of the scrubber */ | |
151 | static int state; | |
152 | static int old_state; | |
153 | static char *out_string; | |
154 | static char out_buf[20]; | |
155 | static int add_newlines = 0; | |
156 | ||
157 | /* Data structure for saving the state of app across #include's. Note that | |
158 | app is called asynchronously to the parsing of the .include's, so our | |
159 | state at the time .include is interpreted is completely unrelated. | |
160 | That's why we have to save it all. */ | |
161 | ||
6efd877d KR |
162 | struct app_save |
163 | { | |
164 | int state; | |
165 | int old_state; | |
166 | char *out_string; | |
167 | char out_buf[sizeof (out_buf)]; | |
168 | int add_newlines; | |
169 | char *scrub_string; | |
170 | char *scrub_last_string; | |
171 | FILE *scrub_file; | |
172 | }; | |
173 | ||
174 | char * | |
175 | app_push () | |
176 | { | |
7c2d4011 SC |
177 | register struct app_save *saved; |
178 | ||
6efd877d KR |
179 | saved = (struct app_save *) xmalloc (sizeof (*saved)); |
180 | saved->state = state; | |
181 | saved->old_state = old_state; | |
182 | saved->out_string = out_string; | |
58d4951d | 183 | memcpy (saved->out_buf, out_buf, sizeof (out_buf)); |
6efd877d KR |
184 | saved->add_newlines = add_newlines; |
185 | saved->scrub_string = scrub_string; | |
7c2d4011 | 186 | saved->scrub_last_string = scrub_last_string; |
6efd877d | 187 | saved->scrub_file = scrub_file; |
7c2d4011 SC |
188 | |
189 | /* do_scrub_begin() is not useful, just wastes time. */ | |
6efd877d | 190 | return (char *) saved; |
fecd2382 RP |
191 | } |
192 | ||
6efd877d KR |
193 | void |
194 | app_pop (arg) | |
195 | char *arg; | |
fecd2382 | 196 | { |
6efd877d KR |
197 | register struct app_save *saved = (struct app_save *) arg; |
198 | ||
199 | /* There is no do_scrub_end (). */ | |
200 | state = saved->state; | |
201 | old_state = saved->old_state; | |
202 | out_string = saved->out_string; | |
58d4951d | 203 | memcpy (out_buf, saved->out_buf, sizeof (out_buf)); |
6efd877d KR |
204 | add_newlines = saved->add_newlines; |
205 | scrub_string = saved->scrub_string; | |
206 | scrub_last_string = saved->scrub_last_string; | |
207 | scrub_file = saved->scrub_file; | |
208 | ||
209 | free (arg); | |
210 | } /* app_pop() */ | |
211 | ||
6d331d71 KR |
212 | /* @@ This assumes that \n &c are the same on host and target. This is not |
213 | necessarily true. */ | |
385ce433 | 214 | static int |
6efd877d | 215 | process_escape (ch) |
385ce433 | 216 | int ch; |
7c2d4011 | 217 | { |
6efd877d KR |
218 | switch (ch) |
219 | { | |
220 | case 'b': | |
221 | return '\b'; | |
222 | case 'f': | |
223 | return '\f'; | |
224 | case 'n': | |
225 | return '\n'; | |
226 | case 'r': | |
227 | return '\r'; | |
228 | case 't': | |
229 | return '\t'; | |
230 | case '\'': | |
231 | return '\''; | |
232 | case '"': | |
6d331d71 | 233 | return '\"'; |
6efd877d KR |
234 | default: |
235 | return ch; | |
236 | } | |
7c2d4011 | 237 | } |
6efd877d KR |
238 | int |
239 | do_scrub_next_char (get, unget) | |
240 | int (*get) (); | |
241 | void (*unget) (); | |
fecd2382 | 242 | { |
6efd877d | 243 | /*State 0: beginning of normal line |
a39116f1 RP |
244 | 1: After first whitespace on line (flush more white) |
245 | 2: After first non-white (opcode) on line (keep 1white) | |
246 | 3: after second white on line (into operands) (flush white) | |
247 | 4: after putting out a .line, put out digits | |
248 | 5: parsing a string, then go to old-state | |
249 | 6: putting out \ escape in a "d string. | |
9a7d824a ILT |
250 | 7: After putting out a .appfile, put out string. |
251 | 8: After putting out a .appfile string, flush until newline. | |
f6a91cc0 | 252 | 9: After seeing symbol char in state 3 (keep 1white after symchar) |
9a7d824a | 253 | 10: After seeing whitespace in state 9 (keep white before symchar) |
a2a5a4fa KR |
254 | 11: After seeing a symbol character in state 0 (eg a label definition) |
255 | -1: output string in out_string and go to the state in old_state | |
256 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
a39116f1 | 257 | */ |
6efd877d | 258 | |
9a7d824a ILT |
259 | /* I added states 9 and 10 because the MIPS ECOFF assembler uses |
260 | constructs like ``.loc 1 20''. This was turning into ``.loc | |
261 | 120''. States 9 and 10 ensure that a space is never dropped in | |
262 | between characters which could appear in a identifier. Ian | |
a2a5a4fa KR |
263 | Taylor, ian@cygnus.com. |
264 | ||
265 | I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works | |
266 | correctly on the PA (and any other target where colons are optional). | |
267 | Jeff Law, law@cs.utah.edu. */ | |
268 | ||
269 | /* This is purely an optimization hack, and relies on gcc's inlining | |
270 | capability. */ | |
271 | #if defined (__GNUC__) && defined (__OPTIMIZE__) | |
272 | #define GET() (get == scrub_from_file ? scrub_from_file () : (*get) ()) | |
273 | #else | |
274 | #define GET() ((*get) ()) | |
275 | #endif | |
f6a91cc0 | 276 | |
6efd877d | 277 | register int ch, ch2 = 0; |
385ce433 | 278 | int not_cpp_line = 0; |
6efd877d KR |
279 | |
280 | switch (state) | |
281 | { | |
282 | case -1: | |
283 | ch = *out_string++; | |
284 | if (*out_string == 0) | |
285 | { | |
286 | state = old_state; | |
287 | old_state = 3; | |
288 | } | |
289 | return ch; | |
290 | ||
291 | case -2: | |
292 | for (;;) | |
293 | { | |
294 | do | |
295 | { | |
a2a5a4fa | 296 | ch = GET (); |
6efd877d KR |
297 | } |
298 | while (ch != EOF && ch != '\n' && ch != '*'); | |
299 | if (ch == '\n' || ch == EOF) | |
300 | return ch; | |
301 | ||
302 | /* At this point, ch must be a '*' */ | |
a2a5a4fa | 303 | while ((ch = GET ()) == '*') |
6efd877d KR |
304 | { |
305 | ; | |
306 | } | |
307 | if (ch == EOF || ch == '/') | |
308 | break; | |
309 | (*unget) (ch); | |
310 | } | |
311 | state = old_state; | |
312 | return ' '; | |
313 | ||
314 | case 4: | |
a2a5a4fa | 315 | ch = GET (); |
6efd877d KR |
316 | if (ch == EOF || (ch >= '0' && ch <= '9')) |
317 | return ch; | |
318 | else | |
319 | { | |
320 | while (ch != EOF && IS_WHITESPACE (ch)) | |
a2a5a4fa | 321 | ch = GET (); |
6efd877d KR |
322 | if (ch == '"') |
323 | { | |
324 | (*unget) (ch); | |
001581c7 | 325 | out_string = "\n\t.appfile "; |
6efd877d KR |
326 | old_state = 7; |
327 | state = -1; | |
328 | return *out_string++; | |
329 | } | |
330 | else | |
331 | { | |
332 | while (ch != EOF && ch != '\n') | |
a2a5a4fa | 333 | ch = GET (); |
58d4951d | 334 | state = 0; |
6efd877d KR |
335 | return ch; |
336 | } | |
337 | } | |
338 | ||
339 | case 5: | |
a2a5a4fa | 340 | ch = GET (); |
6efd877d KR |
341 | if (lex[ch] == LEX_IS_STRINGQUOTE) |
342 | { | |
343 | state = old_state; | |
344 | return ch; | |
345 | } | |
a2a5a4fa | 346 | #ifndef NO_STRING_ESCAPES |
6efd877d KR |
347 | else if (ch == '\\') |
348 | { | |
349 | state = 6; | |
350 | return ch; | |
351 | } | |
a2a5a4fa | 352 | #endif |
6efd877d KR |
353 | else if (ch == EOF) |
354 | { | |
355 | as_warn ("End of file in string: inserted '\"'"); | |
356 | state = old_state; | |
357 | (*unget) ('\n'); | |
358 | return '"'; | |
359 | } | |
360 | else | |
361 | { | |
362 | return ch; | |
363 | } | |
364 | ||
365 | case 6: | |
366 | state = 5; | |
a2a5a4fa | 367 | ch = GET (); |
6efd877d KR |
368 | switch (ch) |
369 | { | |
6d331d71 KR |
370 | /* Handle strings broken across lines, by turning '\n' into |
371 | '\\' and 'n'. */ | |
6efd877d KR |
372 | case '\n': |
373 | (*unget) ('n'); | |
374 | add_newlines++; | |
375 | return '\\'; | |
376 | ||
377 | case '"': | |
378 | case '\\': | |
379 | case 'b': | |
380 | case 'f': | |
381 | case 'n': | |
382 | case 'r': | |
383 | case 't': | |
fecd2382 | 384 | #ifdef BACKSLASH_V |
6efd877d | 385 | case 'v': |
fecd2382 | 386 | #endif /* BACKSLASH_V */ |
385ce433 JL |
387 | case 'x': |
388 | case 'X': | |
6efd877d KR |
389 | case '0': |
390 | case '1': | |
391 | case '2': | |
392 | case '3': | |
393 | case '4': | |
394 | case '5': | |
395 | case '6': | |
396 | case '7': | |
397 | break; | |
7c2d4011 | 398 | #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) |
6efd877d KR |
399 | default: |
400 | as_warn ("Unknown escape '\\%c' in string: Ignored", ch); | |
401 | break; | |
fecd2382 | 402 | #else /* ONLY_STANDARD_ESCAPES */ |
6efd877d KR |
403 | default: |
404 | /* Accept \x as x for any x */ | |
405 | break; | |
fecd2382 | 406 | #endif /* ONLY_STANDARD_ESCAPES */ |
7c2d4011 | 407 | |
6efd877d KR |
408 | case EOF: |
409 | as_warn ("End of file in string: '\"' inserted"); | |
410 | return '"'; | |
411 | } | |
412 | return ch; | |
413 | ||
414 | case 7: | |
a2a5a4fa | 415 | ch = GET (); |
6efd877d KR |
416 | state = 5; |
417 | old_state = 8; | |
418 | return ch; | |
419 | ||
420 | case 8: | |
421 | do | |
a2a5a4fa | 422 | ch = GET (); |
6efd877d KR |
423 | while (ch != '\n'); |
424 | state = 0; | |
425 | return ch; | |
426 | } | |
427 | ||
a2a5a4fa | 428 | /* OK, we are somewhere in states 0 through 4 or 9 through 11 */ |
6efd877d KR |
429 | |
430 | /* flushchar: */ | |
a2a5a4fa | 431 | ch = GET (); |
6efd877d KR |
432 | recycle: |
433 | if (ch == EOF) | |
434 | { | |
435 | if (state != 0) | |
a2a5a4fa KR |
436 | { |
437 | as_warn ("End of file not at end of a line: Newline inserted."); | |
438 | state = 0; | |
439 | return '\n'; | |
440 | } | |
6efd877d KR |
441 | return ch; |
442 | } | |
443 | ||
444 | switch (lex[ch]) | |
445 | { | |
446 | case LEX_IS_WHITESPACE: | |
447 | do | |
385ce433 JL |
448 | /* Preserve a single whitespace character at the beginning of |
449 | a line. */ | |
450 | if (state == 0) | |
451 | { | |
452 | state = 1; | |
453 | return ch; | |
454 | } | |
455 | else | |
a2a5a4fa | 456 | ch = GET (); |
6efd877d KR |
457 | while (ch != EOF && IS_WHITESPACE (ch)); |
458 | if (ch == EOF) | |
459 | return ch; | |
460 | ||
a2a5a4fa KR |
461 | if (IS_COMMENT (ch) |
462 | || (state == 0 && IS_LINE_COMMENT (ch)) | |
463 | || ch == '/' | |
464 | || IS_LINE_SEPARATOR (ch)) | |
6efd877d | 465 | { |
385ce433 JL |
466 | /* cpp never outputs a leading space before the #, so try to |
467 | avoid being confused. */ | |
468 | not_cpp_line = 1; | |
6efd877d | 469 | goto recycle; |
fecd2382 | 470 | } |
5a051773 SS |
471 | #ifdef MRI |
472 | (*unget) (ch); /* Put back */ | |
473 | return ' '; /* Always return one space at start of line */ | |
474 | #endif | |
6efd877d | 475 | |
a2a5a4fa KR |
476 | /* If we're in state 2 or 11, we've seen a non-white character |
477 | followed by whitespace. If the next character is ':', this | |
478 | is whitespace after a label name which we *must* ignore. */ | |
479 | if ((state == 2 || state == 11) && lex[ch] == LEX_IS_COLON) | |
6efd877d | 480 | { |
a2a5a4fa | 481 | state = 1; |
6efd877d KR |
482 | return ch; |
483 | } | |
484 | ||
485 | switch (state) | |
486 | { | |
487 | case 0: | |
488 | state++; | |
489 | goto recycle; /* Punted leading sp */ | |
490 | case 1: | |
385ce433 JL |
491 | /* We can arrive here if we leave a leading whitespace character |
492 | at the beginning of a line. */ | |
493 | goto recycle; | |
6efd877d | 494 | case 2: |
f6a91cc0 | 495 | state = 3; |
6efd877d KR |
496 | (*unget) (ch); |
497 | return ' '; /* Sp after opco */ | |
498 | case 3: | |
499 | goto recycle; /* Sp in operands */ | |
9a7d824a ILT |
500 | case 9: |
501 | case 10: | |
502 | state = 10; /* Sp after symbol char */ | |
503 | goto recycle; | |
a2a5a4fa KR |
504 | case 11: |
505 | state = 1; | |
506 | (*unget) (ch); | |
507 | return ' '; /* Sp after label definition. */ | |
6efd877d KR |
508 | default: |
509 | BAD_CASE (state); | |
510 | } | |
511 | break; | |
512 | ||
513 | case LEX_IS_TWOCHAR_COMMENT_1ST: | |
a2a5a4fa | 514 | ch2 = GET (); |
6efd877d KR |
515 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) |
516 | { | |
517 | for (;;) | |
518 | { | |
519 | do | |
520 | { | |
a2a5a4fa | 521 | ch2 = GET (); |
6efd877d KR |
522 | if (ch2 != EOF && IS_NEWLINE (ch2)) |
523 | add_newlines++; | |
fecd2382 | 524 | } |
6efd877d KR |
525 | while (ch2 != EOF && |
526 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); | |
527 | ||
528 | while (ch2 != EOF && | |
529 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)) | |
530 | { | |
a2a5a4fa | 531 | ch2 = GET (); |
fecd2382 | 532 | } |
6efd877d KR |
533 | |
534 | if (ch2 == EOF | |
535 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) | |
fecd2382 | 536 | break; |
6efd877d KR |
537 | (*unget) (ch); |
538 | } | |
539 | if (ch2 == EOF) | |
540 | as_warn ("End of file in multiline comment"); | |
541 | ||
542 | ch = ' '; | |
543 | goto recycle; | |
544 | } | |
545 | else | |
546 | { | |
547 | if (ch2 != EOF) | |
548 | (*unget) (ch2); | |
9a7d824a ILT |
549 | if (state == 9 || state == 10) |
550 | state = 3; | |
6efd877d KR |
551 | return ch; |
552 | } | |
553 | break; | |
554 | ||
555 | case LEX_IS_STRINGQUOTE: | |
9a7d824a ILT |
556 | if (state == 9 || state == 10) |
557 | old_state = 3; | |
558 | else | |
559 | old_state = state; | |
6efd877d KR |
560 | state = 5; |
561 | return ch; | |
562 | #ifndef MRI | |
a39116f1 | 563 | #ifndef IEEE_STYLE |
6efd877d | 564 | case LEX_IS_ONECHAR_QUOTE: |
a2a5a4fa | 565 | ch = GET (); |
6efd877d KR |
566 | if (ch == EOF) |
567 | { | |
568 | as_warn ("End-of-file after a one-character quote; \\000 inserted"); | |
569 | ch = 0; | |
570 | } | |
571 | if (ch == '\\') | |
572 | { | |
a2a5a4fa | 573 | ch = GET (); |
6efd877d KR |
574 | ch = process_escape (ch); |
575 | } | |
576 | sprintf (out_buf, "%d", (int) (unsigned char) ch); | |
7c2d4011 | 577 | |
6efd877d | 578 | |
9a7d824a | 579 | /* None of these 'x constants for us. We want 'x'. */ |
a2a5a4fa | 580 | if ((ch = GET ()) != '\'') |
6efd877d | 581 | { |
fecd2382 | 582 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE |
6efd877d | 583 | as_warn ("Missing close quote: (assumed)"); |
fecd2382 | 584 | #else |
6efd877d | 585 | (*unget) (ch); |
fecd2382 | 586 | #endif |
6efd877d KR |
587 | } |
588 | if (strlen (out_buf) == 1) | |
589 | { | |
590 | return out_buf[0]; | |
591 | } | |
9a7d824a ILT |
592 | if (state == 9 || state == 10) |
593 | old_state = 3; | |
594 | else | |
595 | old_state = state; | |
6efd877d KR |
596 | state = -1; |
597 | out_string = out_buf; | |
598 | return *out_string++; | |
7c2d4011 | 599 | #endif |
a39116f1 | 600 | #endif |
6efd877d | 601 | case LEX_IS_COLON: |
9a7d824a ILT |
602 | if (state == 9 || state == 10) |
603 | state = 3; | |
604 | else if (state != 3) | |
a2a5a4fa | 605 | state = 1; |
6efd877d KR |
606 | return ch; |
607 | ||
608 | case LEX_IS_NEWLINE: | |
609 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
610 | if (add_newlines) | |
611 | { | |
612 | --add_newlines; | |
613 | (*unget) (ch); | |
614 | } | |
615 | /* fall thru into... */ | |
616 | ||
617 | case LEX_IS_LINE_SEPARATOR: | |
618 | state = 0; | |
619 | return ch; | |
620 | ||
621 | case LEX_IS_LINE_COMMENT_START: | |
9a7d824a | 622 | if (state == 0) /* Only comment at start of line. */ |
6efd877d | 623 | { |
9a7d824a ILT |
624 | /* FIXME-someday: The two character comment stuff was badly |
625 | thought out. On i386, we want '/' as line comment start | |
626 | AND we want C style comments. hence this hack. The | |
627 | whole lexical process should be reworked. xoxorich. */ | |
628 | if (ch == '/') | |
f6a91cc0 | 629 | { |
a2a5a4fa | 630 | ch2 = GET (); |
9a7d824a ILT |
631 | if (ch2 == '*') |
632 | { | |
633 | state = -2; | |
634 | return (do_scrub_next_char (get, unget)); | |
635 | } | |
636 | else | |
637 | { | |
638 | (*unget) (ch2); | |
639 | } | |
640 | } /* bad hack */ | |
6efd877d | 641 | |
385ce433 JL |
642 | if (ch != '#') |
643 | not_cpp_line = 1; | |
644 | ||
9a7d824a | 645 | do |
a2a5a4fa | 646 | ch = GET (); |
9a7d824a | 647 | while (ch != EOF && IS_WHITESPACE (ch)); |
6efd877d | 648 | if (ch == EOF) |
9a7d824a ILT |
649 | { |
650 | as_warn ("EOF in comment: Newline inserted"); | |
651 | return '\n'; | |
652 | } | |
385ce433 | 653 | if (ch < '0' || ch > '9' || not_cpp_line) |
9a7d824a ILT |
654 | { |
655 | /* Non-numerics: Eat whole comment line */ | |
656 | while (ch != EOF && !IS_NEWLINE (ch)) | |
a2a5a4fa | 657 | ch = GET (); |
9a7d824a ILT |
658 | if (ch == EOF) |
659 | as_warn ("EOF in Comment: Newline inserted"); | |
660 | state = 0; | |
661 | return '\n'; | |
662 | } | |
663 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
664 | (*unget) (ch); | |
665 | old_state = 4; | |
666 | state = -1; | |
001581c7 | 667 | out_string = "\t.appline "; |
9a7d824a | 668 | return *out_string++; |
6efd877d | 669 | } |
6efd877d | 670 | |
9a7d824a ILT |
671 | /* We have a line comment character which is not at the start of |
672 | a line. If this is also a normal comment character, fall | |
673 | through. Otherwise treat it as a default character. */ | |
674 | if (strchr (comment_chars, ch) == NULL) | |
675 | goto de_fault; | |
676 | /* Fall through. */ | |
6efd877d KR |
677 | case LEX_IS_COMMENT_START: |
678 | do | |
a2a5a4fa | 679 | ch = GET (); |
6efd877d KR |
680 | while (ch != EOF && !IS_NEWLINE (ch)); |
681 | if (ch == EOF) | |
682 | as_warn ("EOF in comment: Newline inserted"); | |
683 | state = 0; | |
684 | return '\n'; | |
685 | ||
f6a91cc0 | 686 | case LEX_IS_SYMBOL_COMPONENT: |
9a7d824a ILT |
687 | if (state == 10) |
688 | { | |
689 | /* This is a symbol character following another symbol | |
690 | character, with whitespace in between. We skipped the | |
691 | whitespace earlier, so output it now. */ | |
692 | (*unget) (ch); | |
693 | state = 3; | |
694 | return ' '; | |
695 | } | |
f6a91cc0 ILT |
696 | if (state == 3) |
697 | state = 9; | |
698 | /* Fall through. */ | |
6efd877d KR |
699 | default: |
700 | de_fault: | |
701 | /* Some relatively `normal' character. */ | |
702 | if (state == 0) | |
703 | { | |
a2a5a4fa | 704 | state = 11; /* Now seeing label definition */ |
6efd877d | 705 | return ch; |
fecd2382 | 706 | } |
6efd877d KR |
707 | else if (state == 1) |
708 | { | |
709 | state = 2; /* Ditto */ | |
710 | return ch; | |
711 | } | |
f6a91cc0 ILT |
712 | else if (state == 9) |
713 | { | |
714 | if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) | |
715 | state = 3; | |
716 | return ch; | |
717 | } | |
9a7d824a ILT |
718 | else if (state == 10) |
719 | { | |
720 | state = 3; | |
721 | return ch; | |
722 | } | |
6efd877d KR |
723 | else |
724 | { | |
725 | return ch; /* Opcode or operands already */ | |
726 | } | |
727 | } | |
728 | return -1; | |
a2a5a4fa KR |
729 | |
730 | #undef GET | |
fecd2382 RP |
731 | } |
732 | ||
733 | #ifdef TEST | |
734 | ||
6efd877d KR |
735 | const char comment_chars[] = "|"; |
736 | const char line_comment_chars[] = "#"; | |
fecd2382 | 737 | |
6efd877d | 738 | main () |
fecd2382 | 739 | { |
6efd877d KR |
740 | int ch; |
741 | ||
742 | app_begin (); | |
743 | while ((ch = do_scrub_next_char (stdin)) != EOF) | |
744 | putc (ch, stdout); | |
fecd2382 RP |
745 | } |
746 | ||
6efd877d KR |
747 | as_warn (str) |
748 | char *str; | |
fecd2382 | 749 | { |
6efd877d KR |
750 | fputs (str, stderr); |
751 | putc ('\n', stderr); | |
fecd2382 | 752 | } |
6efd877d | 753 | |
fecd2382 RP |
754 | #endif |
755 | ||
fecd2382 | 756 | /* end of app.c */ |