gold/script.cc

   1 // script.cc -- handle linker scripts for gold.
   2
   3 // Copyright 2006, 2007 Free Software Foundation, Inc.
   4 // Written by Ian Lance Taylor <iant@google.com>.
   5
   6 // This file is part of gold.
   7
   8 // This program is free software; you can redistribute it and/or modify
   9 // it under the terms of the GNU General Public License as published by
  10 // the Free Software Foundation; either version 3 of the License, or
  11 // (at your option) any later version.
  12
  13 // This program is distributed in the hope that it will be useful,
  14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 // GNU General Public License for more details.
  17
  18 // You should have received a copy of the GNU General Public License
  19 // along with this program; if not, write to the Free Software
  20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  21 // MA 02110-1301, USA.
  22
  23 #include "gold.h"
  24
  25 #include <string>
  26 #include <vector>
  27 #include <cstdio>
  28 #include <cstdlib>
  29
  30 #include "options.h"
  31 #include "fileread.h"
  32 #include "workqueue.h"
  33 #include "readsyms.h"
  34 #include "yyscript.h"
  35 #include "script.h"
  36 #include "script-c.h"
  37
  38 namespace gold
  39 {
  40
  41 // A token read from a script file.  We don't implement keywords here;
  42 // all keywords are simply represented as a string.
  43
  44 class Token
  45 {
  46  public:
  47   // Token classification.
  48   enum Classification
  49   {
  50     // Token is invalid.
  51     TOKEN_INVALID,
  52     // Token indicates end of input.
  53     TOKEN_EOF,
  54     // Token is a string of characters.
  55     TOKEN_STRING,
  56     // Token is an operator.
  57     TOKEN_OPERATOR,
  58     // Token is a number (an integer).
  59     TOKEN_INTEGER
  60   };
  61
  62   // We need an empty constructor so that we can put this STL objects.
  63   Token()
  64     : classification_(TOKEN_INVALID), value_(), opcode_(0),
  65       lineno_(0), charpos_(0)
  66   { }
  67
  68   // A general token with no value.
  69   Token(Classification classification, int lineno, int charpos)
  70     : classification_(classification), value_(), opcode_(0),
  71       lineno_(lineno), charpos_(charpos)
  72   {
  73     gold_assert(classification == TOKEN_INVALID
  74                 || classification == TOKEN_EOF);
  75   }
  76
  77   // A general token with a value.
  78   Token(Classification classification, const std::string& value,
  79         int lineno, int charpos)
  80     : classification_(classification), value_(value), opcode_(0),
  81       lineno_(lineno), charpos_(charpos)
  82   {
  83     gold_assert(classification != TOKEN_INVALID
  84                 && classification != TOKEN_EOF);
  85   }
  86
  87   // A token representing a string of characters.
  88   Token(const std::string& s, int lineno, int charpos)
  89     : classification_(TOKEN_STRING), value_(s), opcode_(0),
  90       lineno_(lineno), charpos_(charpos)
  91   { }
  92
  93   // A token representing an operator.
  94   Token(int opcode, int lineno, int charpos)
  95     : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode),
  96       lineno_(lineno), charpos_(charpos)
  97   { }
  98
  99   // Return whether the token is invalid.
 100   bool
 101   is_invalid() const
 102   { return this->classification_ == TOKEN_INVALID; }
 103
 104   // Return whether this is an EOF token.
 105   bool
 106   is_eof() const
 107   { return this->classification_ == TOKEN_EOF; }
 108
 109   // Return the token classification.
 110   Classification
 111   classification() const
 112   { return this->classification_; }
 113
 114   // Return the line number at which the token starts.
 115   int
 116   lineno() const
 117   { return this->lineno_; }
 118
 119   // Return the character position at this the token starts.
 120   int
 121   charpos() const
 122   { return this->charpos_; }
 123
 124   // Get the value of a token.
 125
 126   const std::string&
 127   string_value() const
 128   {
 129     gold_assert(this->classification_ == TOKEN_STRING);
 130     return this->value_;
 131   }
 132
 133   int
 134   operator_value() const
 135   {
 136     gold_assert(this->classification_ == TOKEN_OPERATOR);
 137     return this->opcode_;
 138   }
 139
 140   int64_t
 141   integer_value() const
 142   {
 143     gold_assert(this->classification_ == TOKEN_INTEGER);
 144     return strtoll(this->value_.c_str(), NULL, 0);
 145   }
 146
 147  private:
 148   // The token classification.
 149   Classification classification_;
 150   // The token value, for TOKEN_STRING or TOKEN_INTEGER.
 151   std::string value_;
 152   // The token value, for TOKEN_OPERATOR.
 153   int opcode_;
 154   // The line number where this token started (one based).
 155   int lineno_;
 156   // The character position within the line where this token started
 157   // (one based).
 158   int charpos_;
 159 };
 160
 161 // This class handles lexing a file into a sequence of tokens.  We
 162 // don't expect linker scripts to be large, so we just read them and
 163 // tokenize them all at once.
 164
 165 class Lex
 166 {
 167  public:
 168   Lex(Input_file* input_file)
 169     : input_file_(input_file), tokens_()
 170   { }
 171
 172   // Tokenize the file.  Return the final token, which will be either
 173   // an invalid token or an EOF token.  An invalid token indicates
 174   // that tokenization failed.
 175   Token
 176   tokenize();
 177
 178   // A token sequence.
 179   typedef std::vector<Token> Token_sequence;
 180
 181   // Return the tokens.
 182   const Token_sequence&
 183   tokens() const
 184   { return this->tokens_; }
 185
 186  private:
 187   Lex(const Lex&);
 188   Lex& operator=(const Lex&);
 189
 190   // Read the file into a string buffer.
 191   void
 192   read_file(std::string*);
 193
 194   // Make a general token with no value at the current location.
 195   Token
 196   make_token(Token::Classification c, const char* p) const
 197   { return Token(c, this->lineno_, p - this->linestart_ + 1); }
 198
 199   // Make a general token with a value at the current location.
 200   Token
 201   make_token(Token::Classification c, const std::string& v, const char* p)
 202     const
 203   { return Token(c, v, this->lineno_, p - this->linestart_ + 1); }
 204
 205   // Make an operator token at the current location.
 206   Token
 207   make_token(int opcode, const char* p) const
 208   { return Token(opcode, this->lineno_, p - this->linestart_ + 1); }
 209
 210   // Make an invalid token at the current location.
 211   Token
 212   make_invalid_token(const char* p)
 213   { return this->make_token(Token::TOKEN_INVALID, p); }
 214
 215   // Make an EOF token at the current location.
 216   Token
 217   make_eof_token(const char* p)
 218   { return this->make_token(Token::TOKEN_EOF, p); }
 219
 220   // Return whether C can be the first character in a name.  C2 is the
 221   // next character, since we sometimes need that.
 222   static inline bool
 223   can_start_name(char c, char c2);
 224
 225   // Return whether C can appear in a name which has already started.
 226   static inline bool
 227   can_continue_name(char c);
 228
 229   // Return whether C, C2, C3 can start a hex number.
 230   static inline bool
 231   can_start_hex(char c, char c2, char c3);
 232
 233   // Return whether C can appear in a hex number.
 234   static inline bool
 235   can_continue_hex(char c);
 236
 237   // Return whether C can start a non-hex number.
 238   static inline bool
 239   can_start_number(char c);
 240
 241   // Return whether C can appear in a non-hex number.
 242   static inline bool
 243   can_continue_number(char c)
 244   { return Lex::can_start_number(c); }
 245
 246   // If C1 C2 C3 form a valid three character operator, return the
 247   // opcode.  Otherwise return 0.
 248   static inline int
 249   three_char_operator(char c1, char c2, char c3);
 250
 251   // If C1 C2 form a valid two character operator, return the opcode.
 252   // Otherwise return 0.
 253   static inline int
 254   two_char_operator(char c1, char c2);
 255
 256   // If C1 is a valid one character operator, return the opcode.
 257   // Otherwise return 0.
 258   static inline int
 259   one_char_operator(char c1);
 260
 261   // Read the next token.
 262   Token
 263   get_token(const char**);
 264
 265   // Skip a C style /* */ comment.  Return false if the comment did
 266   // not end.
 267   bool
 268   skip_c_comment(const char**);
 269
 270   // Skip a line # comment.  Return false if there was no newline.
 271   bool
 272   skip_line_comment(const char**);
 273
 274   // Build a token CLASSIFICATION from all characters that match
 275   // CAN_CONTINUE_FN.  The token starts at START.  Start matching from
 276   // MATCH.  Set *PP to the character following the token.
 277   inline Token
 278   gather_token(Token::Classification, bool (*can_continue_fn)(char),
 279                const char* start, const char* match, const char** pp);
 280
 281   // Build a token from a quoted string.
 282   Token
 283   gather_quoted_string(const char** pp);
 284
 285   // The file we are reading.
 286   Input_file* input_file_;
 287   // The token sequence we create.
 288   Token_sequence tokens_;
 289   // The current line number.
 290   int lineno_;
 291   // The start of the current line in the buffer.
 292   const char* linestart_;
 293 };
 294
 295 // Read the whole file into memory.  We don't expect linker scripts to
 296 // be large, so we just use a std::string as a buffer.  We ignore the
 297 // data we've already read, so that we read aligned buffers.
 298
 299 void
 300 Lex::read_file(std::string* contents)
 301 {
 302   contents->clear();
 303   off_t off = 0;
 304   off_t got;
 305   unsigned char buf[BUFSIZ];
 306   do
 307     {
 308       this->input_file_->file().read(off, sizeof buf, buf, &got);
 309       contents->append(reinterpret_cast<char*>(&buf[0]), got);
 310     }
 311   while (got == sizeof buf);
 312 }
 313
 314 // Return whether C can be the start of a name, if the next character
 315 // is C2.  A name can being with a letter, underscore, period, or
 316 // dollar sign.  Because a name can be a file name, we also permit
 317 // forward slash, backslash, and tilde.  Tilde is the tricky case
 318 // here; GNU ld also uses it as a bitwise not operator.  It is only
 319 // recognized as the operator if it is not immediately followed by
 320 // some character which can appear in a symbol.  That is, "~0" is a
 321 // symbol name, and "~ 0" is an expression using bitwise not.  We are
 322 // compatible.
 323
 324 inline bool
 325 Lex::can_start_name(char c, char c2)
 326 {
 327   switch (c)
 328     {
 329     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 330     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 331     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 332     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 333     case 'Y': case 'Z':
 334     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 335     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 336     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 337     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 338     case 'y': case 'z':
 339     case '_': case '.': case '$': case '/': case '\\':
 340       return true;
 341
 342     case '~':
 343       return can_continue_name(c2);
 344
 345     default:
 346       return false;
 347     }
 348 }
 349
 350 // Return whether C can continue a name which has already started.
 351 // Subsequent characters in a name are the same as the leading
 352 // characters, plus digits and "=+-:[],?*".  So in general the linker
 353 // script language requires spaces around operators.
 354
 355 inline bool
 356 Lex::can_continue_name(char c)
 357 {
 358   switch (c)
 359     {
 360     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 361     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 362     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 363     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 364     case 'Y': case 'Z':
 365     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 366     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 367     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 368     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 369     case 'y': case 'z':
 370     case '_': case '.': case '$': case '/': case '\\':
 371     case '~':
 372     case '0': case '1': case '2': case '3': case '4':
 373     case '5': case '6': case '7': case '8': case '9':
 374     case '=': case '+': case '-': case ':': case '[': case ']':
 375     case ',': case '?': case '*':
 376       return true;
 377
 378     default:
 379       return false;
 380     }
 381 }
 382
 383 // For a number we accept 0x followed by hex digits, or any sequence
 384 // of digits.  The old linker accepts leading '$' for hex, and
 385 // trailing HXBOD.  Those are for MRI compatibility and we don't
 386 // accept them.  The old linker also accepts trailing MK for mega or
 387 // kilo.  Those are mentioned in the documentation, and we accept
 388 // them.
 389
 390 // Return whether C1 C2 C3 can start a hex number.
 391
 392 inline bool
 393 Lex::can_start_hex(char c1, char c2, char c3)
 394 {
 395   if (c1 == '0' && (c2 == 'x' || c2 == 'X'))
 396     return Lex::can_continue_hex(c3);
 397   return false;
 398 }
 399
 400 // Return whether C can appear in a hex number.
 401
 402 inline bool
 403 Lex::can_continue_hex(char c)
 404 {
 405   switch (c)
 406     {
 407     case '0': case '1': case '2': case '3': case '4':
 408     case '5': case '6': case '7': case '8': case '9':
 409     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 410     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 411       return true;
 412
 413     default:
 414       return false;
 415     }
 416 }
 417
 418 // Return whether C can start a non-hex number.
 419
 420 inline bool
 421 Lex::can_start_number(char c)
 422 {
 423   switch (c)
 424     {
 425     case '0': case '1': case '2': case '3': case '4':
 426     case '5': case '6': case '7': case '8': case '9':
 427       return true;
 428
 429     default:
 430       return false;
 431     }
 432 }
 433
 434 // If C1 C2 C3 form a valid three character operator, return the
 435 // opcode (defined in the yyscript.h file generated from yyscript.y).
 436 // Otherwise return 0.
 437
 438 inline int
 439 Lex::three_char_operator(char c1, char c2, char c3)
 440 {
 441   switch (c1)
 442     {
 443     case '<':
 444       if (c2 == '<' && c3 == '=')
 445         return LSHIFTEQ;
 446       break;
 447     case '>':
 448       if (c2 == '>' && c3 == '=')
 449         return RSHIFTEQ;
 450       break;
 451     default:
 452       break;
 453     }
 454   return 0;
 455 }
 456
 457 // If C1 C2 form a valid two character operator, return the opcode
 458 // (defined in the yyscript.h file generated from yyscript.y).
 459 // Otherwise return 0.
 460
 461 inline int
 462 Lex::two_char_operator(char c1, char c2)
 463 {
 464   switch (c1)
 465     {
 466     case '=':
 467       if (c2 == '=')
 468         return EQ;
 469       break;
 470     case '!':
 471       if (c2 == '=')
 472         return NE;
 473       break;
 474     case '+':
 475       if (c2 == '=')
 476         return PLUSEQ;
 477       break;
 478     case '-':
 479       if (c2 == '=')
 480         return MINUSEQ;
 481       break;
 482     case '*':
 483       if (c2 == '=')
 484         return MULTEQ;
 485       break;
 486     case '/':
 487       if (c2 == '=')
 488         return DIVEQ;
 489       break;
 490     case '|':
 491       if (c2 == '=')
 492         return OREQ;
 493       if (c2 == '|')
 494         return OROR;
 495       break;
 496     case '&':
 497       if (c2 == '=')
 498         return ANDEQ;
 499       if (c2 == '&')
 500         return ANDAND;
 501       break;
 502     case '>':
 503       if (c2 == '=')
 504         return GE;
 505       if (c2 == '>')
 506         return RSHIFT;
 507       break;
 508     case '<':
 509       if (c2 == '=')
 510         return LE;
 511       if (c2 == '<')
 512         return LSHIFT;
 513       break;
 514     default:
 515       break;
 516     }
 517   return 0;
 518 }
 519
 520 // If C1 is a valid operator, return the opcode.  Otherwise return 0.
 521
 522 inline int
 523 Lex::one_char_operator(char c1)
 524 {
 525   switch (c1)
 526     {
 527     case '+':
 528     case '-':
 529     case '*':
 530     case '/':
 531     case '%':
 532     case '!':
 533     case '&':
 534     case '|':
 535     case '^':
 536     case '~':
 537     case '<':
 538     case '>':
 539     case '=':
 540     case '?':
 541     case ',':
 542     case '(':
 543     case ')':
 544     case '{':
 545     case '}':
 546     case '[':
 547     case ']':
 548     case ':':
 549     case ';':
 550       return c1;
 551     default:
 552       return 0;
 553     }
 554 }
 555
 556 // Skip a C style comment.  *PP points to just after the "/*".  Return
 557 // false if the comment did not end.
 558
 559 bool
 560 Lex::skip_c_comment(const char** pp)
 561 {
 562   const char* p = *pp;
 563   while (p[0] != '*' || p[1] != '/')
 564     {
 565       if (*p == '\0')
 566         {
 567           *pp = p;
 568           return false;
 569         }
 570
 571       if (*p == '\n')
 572         {
 573           ++this->lineno_;
 574           this->linestart_ = p + 1;
 575         }
 576       ++p;
 577     }
 578
 579   *pp = p + 2;
 580   return true;
 581 }
 582
 583 // Skip a line # comment.  Return false if there was no newline.
 584
 585 bool
 586 Lex::skip_line_comment(const char** pp)
 587 {
 588   const char* p = *pp;
 589   size_t skip = strcspn(p, "\n");
 590   if (p[skip] == '\0')
 591     {
 592       *pp = p + skip;
 593       return false;
 594     }
 595
 596   p += skip + 1;
 597   ++this->lineno_;
 598   this->linestart_ = p;
 599   *pp = p;
 600
 601   return true;
 602 }
 603
 604 // Build a token CLASSIFICATION from all characters that match
 605 // CAN_CONTINUE_FN.  Update *PP.
 606
 607 inline Token
 608 Lex::gather_token(Token::Classification classification,
 609                   bool (*can_continue_fn)(char),
 610                   const char* start,
 611                   const char* match,
 612                   const char **pp)
 613 {
 614   while ((*can_continue_fn)(*match))
 615     ++match;
 616   *pp = match;
 617   return this->make_token(classification,
 618                           std::string(start, match - start),
 619                           start);
 620 }
 621
 622 // Build a token from a quoted string.
 623
 624 Token
 625 Lex::gather_quoted_string(const char** pp)
 626 {
 627   const char* start = *pp;
 628   const char* p = start;
 629   ++p;
 630   size_t skip = strcspn(p, "\"\n");
 631   if (p[skip] != '"')
 632     return this->make_invalid_token(start);
 633   *pp = p + skip + 1;
 634   return this->make_token(Token::TOKEN_STRING,
 635                           std::string(p, skip),
 636                           start);
 637 }
 638
 639 // Return the next token at *PP.  Update *PP.  General guideline: we
 640 // require linker scripts to be simple ASCII.  No unicode linker
 641 // scripts.  In particular we can assume that any '\0' is the end of
 642 // the input.
 643
 644 Token
 645 Lex::get_token(const char** pp)
 646 {
 647   const char* p = *pp;
 648
 649   while (true)
 650     {
 651       if (*p == '\0')
 652         {
 653           *pp = p;
 654           return this->make_eof_token(p);
 655         }
 656
 657       // Skip whitespace quickly.
 658       while (*p == ' ' || *p == '\t')
 659         ++p;
 660
 661       if (*p == '\n')
 662         {
 663           ++p;
 664           ++this->lineno_;
 665           this->linestart_ = p;
 666           continue;
 667         }
 668
 669       // Skip C style comments.
 670       if (p[0] == '/' && p[1] == '*')
 671         {
 672           int lineno = this->lineno_;
 673           int charpos = p - this->linestart_ + 1;
 674
 675           *pp = p + 2;
 676           if (!this->skip_c_comment(pp))
 677             return Token(Token::TOKEN_INVALID, lineno, charpos);
 678           p = *pp;
 679
 680           continue;
 681         }
 682
 683       // Skip line comments.
 684       if (*p == '#')
 685         {
 686           *pp = p + 1;
 687           if (!this->skip_line_comment(pp))
 688             return this->make_eof_token(p);
 689           p = *pp;
 690           continue;
 691         }
 692
 693       // Check for a name.
 694       if (Lex::can_start_name(p[0], p[1]))
 695         return this->gather_token(Token::TOKEN_STRING,
 696                                   Lex::can_continue_name,
 697                                   p, p + 2, pp);
 698
 699       // We accept any arbitrary name in double quotes, as long as it
 700       // does not cross a line boundary.
 701       if (*p == '"')
 702         {
 703           *pp = p;
 704           return this->gather_quoted_string(pp);
 705         }
 706
 707       // Check for a number.
 708
 709       if (Lex::can_start_hex(p[0], p[1], p[2]))
 710         return this->gather_token(Token::TOKEN_INTEGER,
 711                                   Lex::can_continue_hex,
 712                                   p, p + 3, pp);
 713
 714       if (Lex::can_start_number(p[0]))
 715         return this->gather_token(Token::TOKEN_INTEGER,
 716                                   Lex::can_continue_number,
 717                                   p, p + 1, pp);
 718
 719       // Check for operators.
 720
 721       int opcode = Lex::three_char_operator(p[0], p[1], p[2]);
 722       if (opcode != 0)
 723         {
 724           *pp = p + 3;
 725           return this->make_token(opcode, p);
 726         }
 727
 728       opcode = Lex::two_char_operator(p[0], p[1]);
 729       if (opcode != 0)
 730         {
 731           *pp = p + 2;
 732           return this->make_token(opcode, p);
 733         }
 734
 735       opcode = Lex::one_char_operator(p[0]);
 736       if (opcode != 0)
 737         {
 738           *pp = p + 1;
 739           return this->make_token(opcode, p);
 740         }
 741
 742       return this->make_token(Token::TOKEN_INVALID, p);
 743     }
 744 }
 745
 746 // Tokenize the file.  Return the final token.
 747
 748 Token
 749 Lex::tokenize()
 750 {
 751   std::string contents;
 752   this->read_file(&contents);
 753
 754   const char* p = contents.c_str();
 755
 756   this->lineno_ = 1;
 757   this->linestart_ = p;
 758
 759   while (true)
 760     {
 761       Token t(this->get_token(&p));
 762
 763       // Don't let an early null byte fool us into thinking that we've
 764       // reached the end of the file.
 765       if (t.is_eof()
 766           && static_cast<size_t>(p - contents.c_str()) < contents.length())
 767         t = this->make_invalid_token(p);
 768
 769       if (t.is_invalid() || t.is_eof())
 770         return t;
 771
 772       this->tokens_.push_back(t);
 773     }
 774 }
 775
 776 // A trivial task which waits for THIS_BLOCKER to be clear and then
 777 // clears NEXT_BLOCKER.  THIS_BLOCKER may be NULL.
 778
 779 class Script_unblock : public Task
 780 {
 781  public:
 782   Script_unblock(Task_token* this_blocker, Task_token* next_blocker)
 783     : this_blocker_(this_blocker), next_blocker_(next_blocker)
 784   { }
 785
 786   ~Script_unblock()
 787   {
 788     if (this->this_blocker_ != NULL)
 789       delete this->this_blocker_;
 790   }
 791
 792   Is_runnable_type
 793   is_runnable(Workqueue*)
 794   {
 795     if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked())
 796       return IS_BLOCKED;
 797     return IS_RUNNABLE;
 798   }
 799
 800   Task_locker*
 801   locks(Workqueue* workqueue)
 802   {
 803     return new Task_locker_block(*this->next_blocker_, workqueue);
 804   }
 805
 806   void
 807   run(Workqueue*)
 808   { }
 809
 810  private:
 811   Task_token* this_blocker_;
 812   Task_token* next_blocker_;
 813 };
 814
 815 // This class holds data passed through the parser to the lexer and to
 816 // the parser support functions.  This avoids global variables.  We
 817 // can't use global variables because we need not be called in the
 818 // main thread.
 819
 820 class Parser_closure
 821 {
 822  public:
 823   Parser_closure(const char* filename,
 824                  const Position_dependent_options& posdep_options,
 825                  bool in_group,
 826                  const Lex::Token_sequence* tokens)
 827     : filename_(filename), posdep_options_(posdep_options),
 828       in_group_(in_group), tokens_(tokens),
 829       next_token_index_(0), inputs_(NULL)
 830   { }
 831
 832   // Return the file name.
 833   const char*
 834   filename() const
 835   { return this->filename_; }
 836
 837   // Return the position dependent options.  The caller may modify
 838   // this.
 839   Position_dependent_options&
 840   position_dependent_options()
 841   { return this->posdep_options_; }
 842
 843   // Return whether this script is being run in a group.
 844   bool
 845   in_group() const
 846   { return this->in_group_; }
 847
 848   // Whether we are at the end of the token list.
 849   bool
 850   at_eof() const
 851   { return this->next_token_index_ >= this->tokens_->size(); }
 852
 853   // Return the next token.
 854   const Token*
 855   next_token()
 856   {
 857     const Token* ret = &(*this->tokens_)[this->next_token_index_];
 858     ++this->next_token_index_;
 859     return ret;
 860   }
 861
 862   // Return the list of input files, creating it if necessary.  This
 863   // is a space leak--we never free the INPUTS_ pointer.
 864   Input_arguments*
 865   inputs()
 866   {
 867     if (this->inputs_ == NULL)
 868       this->inputs_ = new Input_arguments();
 869     return this->inputs_;
 870   }
 871
 872   // Return whether we saw any input files.
 873   bool
 874   saw_inputs() const
 875   { return this->inputs_ != NULL && !this->inputs_->empty(); }
 876
 877  private:
 878   // The name of the file we are reading.
 879   const char* filename_;
 880   // The position dependent options.
 881   Position_dependent_options posdep_options_;
 882   // Whether we are currently in a --start-group/--end-group.
 883   bool in_group_;
 884
 885   // The tokens to be returned by the lexer.
 886   const Lex::Token_sequence* tokens_;
 887   // The index of the next token to return.
 888   unsigned int next_token_index_;
 889   // New input files found to add to the link.
 890   Input_arguments* inputs_;
 891 };
 892
 893 // FILE was found as an argument on the command line.  Try to read it
 894 // as a script.  We've already read BYTES of data into P, but we
 895 // ignore that.  Return true if the file was handled.
 896
 897 bool
 898 read_input_script(Workqueue* workqueue, const General_options& options,
 899                   Symbol_table* symtab, Layout* layout,
 900                   const Dirsearch& dirsearch, Input_objects* input_objects,
 901                   Input_group* input_group,
 902                   const Input_argument* input_argument,
 903                   Input_file* input_file, const unsigned char*, off_t,
 904                   Task_token* this_blocker, Task_token* next_blocker)
 905 {
 906   Lex lex(input_file);
 907   if (lex.tokenize().is_invalid())
 908     return false;
 909
 910   Parser_closure closure(input_file->filename().c_str(),
 911                          input_argument->file().options(),
 912                          input_group != NULL,
 913                          &lex.tokens());
 914
 915   if (yyparse(&closure) != 0)
 916     return false;
 917
 918   // THIS_BLOCKER must be clear before we may add anything to the
 919   // symbol table.  We are responsible for unblocking NEXT_BLOCKER
 920   // when we are done.  We are responsible for deleting THIS_BLOCKER
 921   // when it is unblocked.
 922
 923   if (!closure.saw_inputs())
 924     {
 925       // The script did not add any files to read.  Note that we are
 926       // not permitted to call NEXT_BLOCKER->unblock() here even if
 927       // THIS_BLOCKER is NULL, as we are not in the main thread.
 928       workqueue->queue(new Script_unblock(this_blocker, next_blocker));
 929       return true;
 930     }
 931
 932   for (Input_arguments::const_iterator p = closure.inputs()->begin();
 933        p != closure.inputs()->end();
 934        ++p)
 935     {
 936       Task_token* nb;
 937       if (p + 1 == closure.inputs()->end())
 938         nb = next_blocker;
 939       else
 940         {
 941           nb = new Task_token();
 942           nb->add_blocker();
 943         }
 944       workqueue->queue(new Read_symbols(options, input_objects, symtab,
 945                                         layout, dirsearch, &*p,
 946                                         input_group, this_blocker, nb));
 947       this_blocker = nb;
 948     }
 949
 950   return true;
 951 }
 952
 953 // Manage mapping from keywords to the codes expected by the bison
 954 // parser.
 955
 956 class Keyword_to_parsecode
 957 {
 958  public:
 959   // The structure which maps keywords to parsecodes.
 960   struct Keyword_parsecode
 961   {
 962     // Keyword.
 963     const char* keyword;
 964     // Corresponding parsecode.
 965     int parsecode;
 966   };
 967
 968   // Return the parsecode corresponding KEYWORD, or 0 if it is not a
 969   // keyword.
 970   static int
 971   keyword_to_parsecode(const char* keyword);
 972
 973  private:
 974   // The array of all keywords.
 975   static const Keyword_parsecode keyword_parsecodes_[];
 976
 977   // The number of keywords.
 978   static const int keyword_count;
 979 };
 980
 981 // Mapping from keyword string to keyword parsecode.  This array must
 982 // be kept in sorted order.  Parsecodes are looked up using bsearch.
 983 // This array must correspond to the list of parsecodes in yyscript.y.
 984
 985 const Keyword_to_parsecode::Keyword_parsecode
 986 Keyword_to_parsecode::keyword_parsecodes_[] =
 987 {
 988   { "ABSOLUTE", ABSOLUTE },
 989   { "ADDR", ADDR },
 990   { "ALIGN", ALIGN_K },
 991   { "ASSERT", ASSERT_K },
 992   { "AS_NEEDED", AS_NEEDED },
 993   { "AT", AT },
 994   { "BIND", BIND },
 995   { "BLOCK", BLOCK },
 996   { "BYTE", BYTE },
 997   { "CONSTANT", CONSTANT },
 998   { "CONSTRUCTORS", CONSTRUCTORS },
 999   { "COPY", COPY },
1000   { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS },
1001   { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN },
1002   { "DATA_SEGMENT_END", DATA_SEGMENT_END },
1003   { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END },
1004   { "DEFINED", DEFINED },
1005   { "DSECT", DSECT },
1006   { "ENTRY", ENTRY },
1007   { "EXCLUDE_FILE", EXCLUDE_FILE },
1008   { "EXTERN", EXTERN },
1009   { "FILL", FILL },
1010   { "FLOAT", FLOAT },
1011   { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION },
1012   { "GROUP", GROUP },
1013   { "HLL", HLL },
1014   { "INCLUDE", INCLUDE },
1015   { "INFO", INFO },
1016   { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION },
1017   { "INPUT", INPUT },
1018   { "KEEP", KEEP },
1019   { "LENGTH", LENGTH },
1020   { "LOADADDR", LOADADDR },
1021   { "LONG", LONG },
1022   { "MAP", MAP },
1023   { "MAX", MAX_K },
1024   { "MEMORY", MEMORY },
1025   { "MIN", MIN_K },
1026   { "NEXT", NEXT },
1027   { "NOCROSSREFS", NOCROSSREFS },
1028   { "NOFLOAT", NOFLOAT },
1029   { "NOLOAD", NOLOAD },
1030   { "ONLY_IF_RO", ONLY_IF_RO },
1031   { "ONLY_IF_RW", ONLY_IF_RW },
1032   { "ORIGIN", ORIGIN },
1033   { "OUTPUT", OUTPUT },
1034   { "OUTPUT_ARCH", OUTPUT_ARCH },
1035   { "OUTPUT_FORMAT", OUTPUT_FORMAT },
1036   { "OVERLAY", OVERLAY },
1037   { "PHDRS", PHDRS },
1038   { "PROVIDE", PROVIDE },
1039   { "PROVIDE_HIDDEN", PROVIDE_HIDDEN },
1040   { "QUAD", QUAD },
1041   { "SEARCH_DIR", SEARCH_DIR },
1042   { "SECTIONS", SECTIONS },
1043   { "SEGMENT_START", SEGMENT_START },
1044   { "SHORT", SHORT },
1045   { "SIZEOF", SIZEOF },
1046   { "SIZEOF_HEADERS", SIZEOF_HEADERS },
1047   { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT },
1048   { "SORT_BY_NAME", SORT_BY_NAME },
1049   { "SPECIAL", SPECIAL },
1050   { "SQUAD", SQUAD },
1051   { "STARTUP", STARTUP },
1052   { "SUBALIGN", SUBALIGN },
1053   { "SYSLIB", SYSLIB },
1054   { "TARGET", TARGET_K },
1055   { "TRUNCATE", TRUNCATE },
1056   { "VERSION", VERSIONK },
1057   { "global", GLOBAL },
1058   { "l", LENGTH },
1059   { "len", LENGTH },
1060   { "local", LOCAL },
1061   { "o", ORIGIN },
1062   { "org", ORIGIN },
1063   { "sizeof_headers", SIZEOF_HEADERS },
1064 };
1065
1066 const int Keyword_to_parsecode::keyword_count =
1067   (sizeof(Keyword_to_parsecode::keyword_parsecodes_)
1068    / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]));
1069
1070 // Comparison function passed to bsearch.
1071
1072 extern "C"
1073 {
1074
1075 static int
1076 ktt_compare(const void* keyv, const void* kttv)
1077 {
1078   const char* key = static_cast<const char*>(keyv);
1079   const Keyword_to_parsecode::Keyword_parsecode* ktt =
1080     static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv);
1081   return strcmp(key, ktt->keyword);
1082 }
1083
1084 } // End extern "C".
1085
1086 int
1087 Keyword_to_parsecode::keyword_to_parsecode(const char* keyword)
1088 {
1089   void* kttv = bsearch(keyword,
1090                        Keyword_to_parsecode::keyword_parsecodes_,
1091                        Keyword_to_parsecode::keyword_count,
1092                        sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]),
1093                        ktt_compare);
1094   if (kttv == NULL)
1095     return 0;
1096   Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv);
1097   return ktt->parsecode;
1098 }
1099
1100 } // End namespace gold.
1101
1102 // The remaining functions are extern "C", so it's clearer to not put
1103 // them in namespace gold.
1104
1105 using namespace gold;
1106
1107 // This function is called by the bison parser to return the next
1108 // token.
1109
1110 extern "C" int
1111 yylex(YYSTYPE* lvalp, void* closurev)
1112 {
1113   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1114
1115   if (closure->at_eof())
1116     return 0;
1117
1118   const Token* token = closure->next_token();
1119
1120   switch (token->classification())
1121     {
1122     default:
1123     case Token::TOKEN_INVALID:
1124     case Token::TOKEN_EOF:
1125       gold_unreachable();
1126
1127     case Token::TOKEN_STRING:
1128       {
1129         const char* str = token->string_value().c_str();
1130         int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str);
1131         if (parsecode != 0)
1132           return parsecode;
1133         lvalp->string = str;
1134         return STRING;
1135       }
1136
1137     case Token::TOKEN_OPERATOR:
1138       return token->operator_value();
1139
1140     case Token::TOKEN_INTEGER:
1141       lvalp->integer = token->integer_value();
1142       return INTEGER;
1143     }
1144 }
1145
1146 // This function is called by the bison parser to report an error.
1147
1148 extern "C" void
1149 yyerror(void* closurev, const char* message)
1150 {
1151   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1152
1153   fprintf(stderr, _("%s: %s: %s\n"),
1154           program_name, closure->filename(), message);
1155   gold_exit(false);
1156 }
1157
1158 // Called by the bison parser to add a file to the link.
1159
1160 extern "C" void
1161 script_add_file(void* closurev, const char* name)
1162 {
1163   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1164   std::string absname;
1165   if (name[0] == '/')
1166     {
1167       absname = name;
1168     }
1169   else
1170     {
1171       // Prepend `dirname closure->filename()` to make the path absolute.
1172       char *slash = strrchr(closure->filename(), '/');
1173       absname.assign(closure->filename(),
1174                      slash ? slash - closure->filename() + 1 : 0);
1175       absname += name;
1176     }
1177   Input_file_argument file(absname.c_str(), false, closure->position_dependent_options());
1178   closure->inputs()->add_file(file);
1179 }
1180
1181 // Called by the bison parser to start a group.  If we are already in
1182 // a group, that means that this script was invoked within a
1183 // --start-group --end-group sequence on the command line, or that
1184 // this script was found in a GROUP of another script.  In that case,
1185 // we simply continue the existing group, rather than starting a new
1186 // one.  It is possible to construct a case in which this will do
1187 // something other than what would happen if we did a recursive group,
1188 // but it's hard to imagine why the different behaviour would be
1189 // useful for a real program.  Avoiding recursive groups is simpler
1190 // and more efficient.
1191
1192 extern "C" void
1193 script_start_group(void* closurev)
1194 {
1195   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1196   if (!closure->in_group())
1197     closure->inputs()->start_group();
1198 }
1199
1200 // Called by the bison parser at the end of a group.
1201
1202 extern "C" void
1203 script_end_group(void* closurev)
1204 {
1205   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1206   if (!closure->in_group())
1207     closure->inputs()->end_group();
1208 }
1209
1210 // Called by the bison parser to start an AS_NEEDED list.
1211
1212 extern "C" void
1213 script_start_as_needed(void* closurev)
1214 {
1215   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1216   closure->position_dependent_options().set_as_needed();
1217 }
1218
1219 // Called by the bison parser at the end of an AS_NEEDED list.
1220
1221 extern "C" void
1222 script_end_as_needed(void* closurev)
1223 {
1224   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1225   closure->position_dependent_options().clear_as_needed();
1226 }