gas/app.c

   1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
   2
   3    Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   4    */
   5 /* This is the Assembler Pre-Processor
   6    Copyright (C) 1987 Free Software Foundation, Inc.
   7
   8    This file is part of GAS, the GNU Assembler.
   9
  10    GAS is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    GAS is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with GAS; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  23
  24 /* App, the assembler pre-processor.  This pre-processor strips out excess
  25    spaces, turns single-quoted characters into a decimal constant, and turns
  26    # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
  27    This needs better error-handling.
  28    */
  29
  30 #include <stdio.h>
  31 #include "as.h"                 /* For BAD_CASE() only */
  32 #include "read.h"
  33
  34 #if (__STDC__ != 1) && !defined(const)
  35 #define const                   /* Nothing */
  36 #endif
  37
  38 static char lex[256];
  39 static char symbol_chars[] =
  40 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  41
  42 #define LEX_IS_SYMBOL_COMPONENT         1
  43 #define LEX_IS_WHITESPACE               2
  44 #define LEX_IS_LINE_SEPARATOR           3
  45 #define LEX_IS_COMMENT_START            4
  46 #define LEX_IS_LINE_COMMENT_START       5
  47 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  48 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
  49 #define LEX_IS_STRINGQUOTE              8
  50 #define LEX_IS_COLON                    9
  51 #define LEX_IS_NEWLINE                  10
  52 #define LEX_IS_ONECHAR_QUOTE            11
  53 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  54 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  55 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  56 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  57 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  58 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  59
  60 /* FIXME-soon: The entire lexer/parser thingy should be
  61    built statically at compile time rather than dynamically
  62    each and every time the assembler is run.  xoxorich. */
  63
  64 void
  65 do_scrub_begin ()
  66 {
  67   const char *p;
  68
  69   lex[' '] = LEX_IS_WHITESPACE;
  70   lex['\t'] = LEX_IS_WHITESPACE;
  71   lex['\n'] = LEX_IS_NEWLINE;
  72   lex[';'] = LEX_IS_LINE_SEPARATOR;
  73   lex['"'] = LEX_IS_STRINGQUOTE;
  74   lex['\''] = LEX_IS_ONECHAR_QUOTE;
  75   lex[':'] = LEX_IS_COLON;
  76
  77 #ifdef MRI
  78   lex['\''] = LEX_IS_STRINGQUOTE;
  79 #endif
  80   /* Note that these override the previous defaults, e.g. if ';'
  81            is a comment char, then it isn't a line separator.  */
  82   for (p = symbol_chars; *p; ++p)
  83     {
  84       lex[*p] = LEX_IS_SYMBOL_COMPONENT;
  85     }                           /* declare symbol characters */
  86
  87   for (p = line_comment_chars; *p; p++)
  88     {
  89       lex[*p] = LEX_IS_LINE_COMMENT_START;
  90     }                           /* declare line comment chars */
  91
  92   for (p = comment_chars; *p; p++)
  93     {
  94       lex[*p] = LEX_IS_COMMENT_START;
  95     }                           /* declare comment chars */
  96
  97   for (p = line_separator_chars; *p; p++)
  98     {
  99       lex[*p] = LEX_IS_LINE_SEPARATOR;
 100     }                           /* declare line separators */
 101
 102   /* Only allow slash-star comments if slash is not in use */
 103   if (lex['/'] == 0)
 104     {
 105       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 106     }
 107   /* FIXME-soon.  This is a bad hack but otherwise, we
 108            can't do c-style comments when '/' is a line
 109            comment char. xoxorich. */
 110   if (lex['*'] == 0)
 111     {
 112       lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
 113     }
 114 }                               /* do_scrub_begin() */
 115
 116 FILE *scrub_file;
 117
 118 int
 119 scrub_from_file ()
 120 {
 121   return getc (scrub_file);
 122 }
 123
 124 void
 125 scrub_to_file (ch)
 126      int ch;
 127 {
 128   ungetc (ch, scrub_file);
 129 }                               /* scrub_to_file() */
 130
 131 char *scrub_string;
 132 char *scrub_last_string;
 133
 134 int
 135 scrub_from_string ()
 136 {
 137   return scrub_string == scrub_last_string ? EOF : *scrub_string++;
 138 }                               /* scrub_from_string() */
 139
 140 void
 141 scrub_to_string (ch)
 142      int ch;
 143 {
 144   *--scrub_string = ch;
 145 }                               /* scrub_to_string() */
 146
 147 /* Saved state of the scrubber */
 148 static int state;
 149 static int old_state;
 150 static char *out_string;
 151 static char out_buf[20];
 152 static int add_newlines = 0;
 153
 154 /* Data structure for saving the state of app across #include's.  Note that
 155    app is called asynchronously to the parsing of the .include's, so our
 156    state at the time .include is interpreted is completely unrelated.
 157    That's why we have to save it all.  */
 158
 159 struct app_save
 160   {
 161     int state;
 162     int old_state;
 163     char *out_string;
 164     char out_buf[sizeof (out_buf)];
 165     int add_newlines;
 166     char *scrub_string;
 167     char *scrub_last_string;
 168     FILE *scrub_file;
 169   };
 170
 171 char *
 172 app_push ()
 173 {
 174   register struct app_save *saved;
 175
 176   saved = (struct app_save *) xmalloc (sizeof (*saved));
 177   saved->state = state;
 178   saved->old_state = old_state;
 179   saved->out_string = out_string;
 180   bcopy (saved->out_buf, out_buf, sizeof (out_buf));
 181   saved->add_newlines = add_newlines;
 182   saved->scrub_string = scrub_string;
 183   saved->scrub_last_string = scrub_last_string;
 184   saved->scrub_file = scrub_file;
 185
 186   /* do_scrub_begin() is not useful, just wastes time. */
 187   return (char *) saved;
 188 }
 189
 190 void
 191 app_pop (arg)
 192      char *arg;
 193 {
 194   register struct app_save *saved = (struct app_save *) arg;
 195
 196   /* There is no do_scrub_end (). */
 197   state = saved->state;
 198   old_state = saved->old_state;
 199   out_string = saved->out_string;
 200   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 201   add_newlines = saved->add_newlines;
 202   scrub_string = saved->scrub_string;
 203   scrub_last_string = saved->scrub_last_string;
 204   scrub_file = saved->scrub_file;
 205
 206   free (arg);
 207 }                               /* app_pop() */
 208
 209 int
 210 process_escape (ch)
 211      char ch;
 212 {
 213   switch (ch)
 214     {
 215     case 'b':
 216       return '\b';
 217     case 'f':
 218       return '\f';
 219     case 'n':
 220       return '\n';
 221     case 'r':
 222       return '\r';
 223     case 't':
 224       return '\t';
 225     case '\'':
 226       return '\'';
 227     case '"':
 228       return '\'';
 229     default:
 230       return ch;
 231     }
 232 }
 233 int
 234 do_scrub_next_char (get, unget)
 235      int (*get) ();
 236      void (*unget) ();
 237 {
 238   /*State 0: beginning of normal line
 239           1: After first whitespace on line (flush more white)
 240           2: After first non-white (opcode) on line (keep 1white)
 241           3: after second white on line (into operands) (flush white)
 242           4: after putting out a .line, put out digits
 243           5: parsing a string, then go to old-state
 244           6: putting out \ escape in a "d string.
 245           7: After putting out a .app-file, put out string.
 246           8: After putting out a .app-file string, flush until newline.
 247           -1: output string in out_string and go to the state in old_state
 248           -2: flush text until a '*' '/' is seen, then go to state old_state
 249           */
 250
 251   register int ch, ch2 = 0;
 252
 253   switch (state)
 254     {
 255     case -1:
 256       ch = *out_string++;
 257       if (*out_string == 0)
 258         {
 259           state = old_state;
 260           old_state = 3;
 261         }
 262       return ch;
 263
 264     case -2:
 265       for (;;)
 266         {
 267           do
 268             {
 269               ch = (*get) ();
 270             }
 271           while (ch != EOF && ch != '\n' && ch != '*');
 272           if (ch == '\n' || ch == EOF)
 273             return ch;
 274
 275           /* At this point, ch must be a '*' */
 276           while ((ch = (*get) ()) == '*')
 277             {
 278               ;
 279             }
 280           if (ch == EOF || ch == '/')
 281             break;
 282           (*unget) (ch);
 283         }
 284       state = old_state;
 285       return ' ';
 286
 287     case 4:
 288       ch = (*get) ();
 289       if (ch == EOF || (ch >= '0' && ch <= '9'))
 290         return ch;
 291       else
 292         {
 293           while (ch != EOF && IS_WHITESPACE (ch))
 294             ch = (*get) ();
 295           if (ch == '"')
 296             {
 297               (*unget) (ch);
 298               out_string = "\n.app-file ";
 299               old_state = 7;
 300               state = -1;
 301               return *out_string++;
 302             }
 303           else
 304             {
 305               while (ch != EOF && ch != '\n')
 306                 ch = (*get) ();
 307               return ch;
 308             }
 309         }
 310
 311     case 5:
 312       ch = (*get) ();
 313       if (lex[ch] == LEX_IS_STRINGQUOTE)
 314         {
 315           state = old_state;
 316           return ch;
 317         }
 318       else if (ch == '\\')
 319         {
 320           state = 6;
 321           return ch;
 322         }
 323       else if (ch == EOF)
 324         {
 325           as_warn ("End of file in string: inserted '\"'");
 326           state = old_state;
 327           (*unget) ('\n');
 328           return '"';
 329         }
 330       else
 331         {
 332           return ch;
 333         }
 334
 335     case 6:
 336       state = 5;
 337       ch = (*get) ();
 338       switch (ch)
 339         {
 340           /* This is neet.  Turn "string
 341                            more string" into "string\n  more string"
 342                            */
 343         case '\n':
 344           (*unget) ('n');
 345           add_newlines++;
 346           return '\\';
 347
 348         case '"':
 349         case '\\':
 350         case 'b':
 351         case 'f':
 352         case 'n':
 353         case 'r':
 354         case 't':
 355 #ifdef BACKSLASH_V
 356         case 'v':
 357 #endif /* BACKSLASH_V */
 358         case '0':
 359         case '1':
 360         case '2':
 361         case '3':
 362         case '4':
 363         case '5':
 364         case '6':
 365         case '7':
 366           break;
 367 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 368         default:
 369           as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
 370           break;
 371 #else /* ONLY_STANDARD_ESCAPES */
 372         default:
 373           /* Accept \x as x for any x */
 374           break;
 375 #endif /* ONLY_STANDARD_ESCAPES */
 376
 377         case EOF:
 378           as_warn ("End of file in string: '\"' inserted");
 379           return '"';
 380         }
 381       return ch;
 382
 383     case 7:
 384       ch = (*get) ();
 385       state = 5;
 386       old_state = 8;
 387       return ch;
 388
 389     case 8:
 390       do
 391         ch = (*get) ();
 392       while (ch != '\n');
 393       state = 0;
 394       return ch;
 395     }
 396
 397   /* OK, we are somewhere in states 0 through 4 */
 398
 399   /* flushchar: */
 400   ch = (*get) ();
 401 recycle:
 402   if (ch == EOF)
 403     {
 404       if (state != 0)
 405         as_warn ("End of file not at end of a line: Newline inserted.");
 406       return ch;
 407     }
 408
 409   switch (lex[ch])
 410     {
 411     case LEX_IS_WHITESPACE:
 412       do
 413         ch = (*get) ();
 414       while (ch != EOF && IS_WHITESPACE (ch));
 415       if (ch == EOF)
 416         return ch;
 417
 418       if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
 419         {
 420           goto recycle;
 421         }
 422 #ifdef MRI
 423       (*unget) (ch);            /* Put back */
 424       return ' ';               /* Always return one space at start of line */
 425 #endif
 426
 427       /* If we're in state 2, we've seen a non-white
 428                    character followed by whitespace.  If the next
 429                    character is ':', this is whitespace after a label
 430                    name which we can ignore.  */
 431       if (state == 2 && lex[ch] == LEX_IS_COLON)
 432         {
 433           state = 0;
 434           return ch;
 435         }
 436
 437       switch (state)
 438         {
 439         case 0:
 440           state++;
 441           goto recycle;         /* Punted leading sp */
 442         case 1:
 443           BAD_CASE (state);     /* We can't get here */
 444         case 2:
 445           state++;
 446           (*unget) (ch);
 447           return ' ';           /* Sp after opco */
 448         case 3:
 449           goto recycle;         /* Sp in operands */
 450         default:
 451           BAD_CASE (state);
 452         }
 453       break;
 454
 455     case LEX_IS_TWOCHAR_COMMENT_1ST:
 456       ch2 = (*get) ();
 457       if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
 458         {
 459           for (;;)
 460             {
 461               do
 462                 {
 463                   ch2 = (*get) ();
 464                   if (ch2 != EOF && IS_NEWLINE (ch2))
 465                     add_newlines++;
 466                 }
 467               while (ch2 != EOF &&
 468                      (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
 469
 470               while (ch2 != EOF &&
 471                      (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
 472                 {
 473                   ch2 = (*get) ();
 474                 }
 475
 476               if (ch2 == EOF
 477                   || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
 478                 break;
 479               (*unget) (ch);
 480             }
 481           if (ch2 == EOF)
 482             as_warn ("End of file in multiline comment");
 483
 484           ch = ' ';
 485           goto recycle;
 486         }
 487       else
 488         {
 489           if (ch2 != EOF)
 490             (*unget) (ch2);
 491           return ch;
 492         }
 493       break;
 494
 495     case LEX_IS_STRINGQUOTE:
 496       old_state = state;
 497       state = 5;
 498       return ch;
 499 #ifndef MRI
 500 #ifndef IEEE_STYLE
 501     case LEX_IS_ONECHAR_QUOTE:
 502       ch = (*get) ();
 503       if (ch == EOF)
 504         {
 505           as_warn ("End-of-file after a one-character quote; \\000 inserted");
 506           ch = 0;
 507         }
 508       if (ch == '\\')
 509         {
 510           ch = (*get) ();
 511           ch = process_escape (ch);
 512         }
 513       sprintf (out_buf, "%d", (int) (unsigned char) ch);
 514
 515
 516       /* None of these 'x constants for us.  We want 'x'.
 517                  */
 518       if ((ch = (*get) ()) != '\'')
 519         {
 520 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 521           as_warn ("Missing close quote: (assumed)");
 522 #else
 523           (*unget) (ch);
 524 #endif
 525         }
 526       if (strlen (out_buf) == 1)
 527         {
 528           return out_buf[0];
 529         }
 530       old_state = state;
 531       state = -1;
 532       out_string = out_buf;
 533       return *out_string++;
 534 #endif
 535 #endif
 536     case LEX_IS_COLON:
 537       if (state != 3)
 538         state = 0;
 539       return ch;
 540
 541     case LEX_IS_NEWLINE:
 542       /* Roll out a bunch of newlines from inside comments, etc.  */
 543       if (add_newlines)
 544         {
 545           --add_newlines;
 546           (*unget) (ch);
 547         }
 548       /* fall thru into... */
 549
 550     case LEX_IS_LINE_SEPARATOR:
 551       state = 0;
 552       return ch;
 553
 554     case LEX_IS_LINE_COMMENT_START:
 555       if (state != 0)           /* Not at start of line, act normal */
 556         goto de_fault;
 557
 558       /* FIXME-someday: The two character comment stuff was badly
 559     thought out.  On i386, we want '/' as line comment start
 560     AND we want C style comments.  hence this hack.  The
 561     whole lexical process should be reworked.  xoxorich.  */
 562
 563       if (ch == '/' && (ch2 = (*get) ()) == '*')
 564         {
 565           state = -2;
 566           return (do_scrub_next_char (get, unget));
 567         }
 568       else
 569         {
 570           (*unget) (ch2);
 571         }                       /* bad hack */
 572
 573       do
 574         ch = (*get) ();
 575       while (ch != EOF && IS_WHITESPACE (ch));
 576       if (ch == EOF)
 577         {
 578           as_warn ("EOF in comment:  Newline inserted");
 579           return '\n';
 580         }
 581       if (ch < '0' || ch > '9')
 582         {
 583           /* Non-numerics:  Eat whole comment line */
 584           while (ch != EOF && !IS_NEWLINE (ch))
 585             ch = (*get) ();
 586           if (ch == EOF)
 587             as_warn ("EOF in Comment: Newline inserted");
 588           state = 0;
 589           return '\n';
 590         }
 591       /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
 592       (*unget) (ch);
 593       old_state = 4;
 594       state = -1;
 595       out_string = ".line ";
 596       return *out_string++;
 597
 598     case LEX_IS_COMMENT_START:
 599       do
 600         ch = (*get) ();
 601       while (ch != EOF && !IS_NEWLINE (ch));
 602       if (ch == EOF)
 603         as_warn ("EOF in comment:  Newline inserted");
 604       state = 0;
 605       return '\n';
 606
 607     default:
 608     de_fault:
 609       /* Some relatively `normal' character.  */
 610       if (state == 0)
 611         {
 612           state = 2;            /* Now seeing opcode */
 613           return ch;
 614         }
 615       else if (state == 1)
 616         {
 617           state = 2;            /* Ditto */
 618           return ch;
 619         }
 620       else
 621         {
 622           return ch;            /* Opcode or operands already */
 623         }
 624     }
 625   return -1;
 626 }
 627
 628 #ifdef TEST
 629
 630 const char comment_chars[] = "|";
 631 const char line_comment_chars[] = "#";
 632
 633 main ()
 634 {
 635   int ch;
 636
 637   app_begin ();
 638   while ((ch = do_scrub_next_char (stdin)) != EOF)
 639     putc (ch, stdout);
 640 }
 641
 642 as_warn (str)
 643      char *str;
 644 {
 645   fputs (str, stderr);
 646   putc ('\n', stderr);
 647 }
 648
 649 #endif
 650
 651 /*
 652  * Local Variables:
 653  * comment-column: 0
 654  * fill-column: 131
 655  * End:
 656  */
 657
 658 /* end of app.c */