gold/script.cc

   1 // script.cc -- handle linker scripts for gold.
   2
   3 #include "gold.h"
   4
   5 #include <string>
   6 #include <vector>
   7 #include <cstdio>
   8 #include <cstdlib>
   9
  10 #include "options.h"
  11 #include "fileread.h"
  12 #include "workqueue.h"
  13 #include "readsyms.h"
  14 #include "yyscript.h"
  15 #include "script.h"
  16 #include "script-c.h"
  17
  18 namespace gold
  19 {
  20
  21 // A token read from a script file.  We don't implement keywords here;
  22 // all keywords are simply represented as a string.
  23
  24 class Token
  25 {
  26  public:
  27   // Token classification.
  28   enum Classification
  29   {
  30     // Token is invalid.
  31     TOKEN_INVALID,
  32     // Token indicates end of input.
  33     TOKEN_EOF,
  34     // Token is a string of characters.
  35     TOKEN_STRING,
  36     // Token is an operator.
  37     TOKEN_OPERATOR,
  38     // Token is a number (an integer).
  39     TOKEN_INTEGER
  40   };
  41
  42   // We need an empty constructor so that we can put this STL objects.
  43   Token()
  44     : classification_(TOKEN_INVALID), value_(), opcode_(0),
  45       lineno_(0), charpos_(0)
  46   { }
  47
  48   // A general token with no value.
  49   Token(Classification classification, int lineno, int charpos)
  50     : classification_(classification), value_(), opcode_(0),
  51       lineno_(lineno), charpos_(charpos)
  52   {
  53     gold_assert(classification == TOKEN_INVALID
  54                 || classification == TOKEN_EOF);
  55   }
  56
  57   // A general token with a value.
  58   Token(Classification classification, const std::string& value,
  59         int lineno, int charpos)
  60     : classification_(classification), value_(value), opcode_(0),
  61       lineno_(lineno), charpos_(charpos)
  62   {
  63     gold_assert(classification != TOKEN_INVALID
  64                 && classification != TOKEN_EOF);
  65   }
  66
  67   // A token representing a string of characters.
  68   Token(const std::string& s, int lineno, int charpos)
  69     : classification_(TOKEN_STRING), value_(s), opcode_(0),
  70       lineno_(lineno), charpos_(charpos)
  71   { }
  72
  73   // A token representing an operator.
  74   Token(int opcode, int lineno, int charpos)
  75     : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode),
  76       lineno_(lineno), charpos_(charpos)
  77   { }
  78
  79   // Return whether the token is invalid.
  80   bool
  81   is_invalid() const
  82   { return this->classification_ == TOKEN_INVALID; }
  83
  84   // Return whether this is an EOF token.
  85   bool
  86   is_eof() const
  87   { return this->classification_ == TOKEN_EOF; }
  88
  89   // Return the token classification.
  90   Classification
  91   classification() const
  92   { return this->classification_; }
  93
  94   // Return the line number at which the token starts.
  95   int
  96   lineno() const
  97   { return this->lineno_; }
  98
  99   // Return the character position at this the token starts.
 100   int
 101   charpos() const
 102   { return this->charpos_; }
 103
 104   // Get the value of a token.
 105
 106   const std::string&
 107   string_value() const
 108   {
 109     gold_assert(this->classification_ == TOKEN_STRING);
 110     return this->value_;
 111   }
 112
 113   int
 114   operator_value() const
 115   {
 116     gold_assert(this->classification_ == TOKEN_OPERATOR);
 117     return this->opcode_;
 118   }
 119
 120   int64_t
 121   integer_value() const
 122   {
 123     gold_assert(this->classification_ == TOKEN_INTEGER);
 124     return strtoll(this->value_.c_str(), NULL, 0);
 125   }
 126
 127  private:
 128   // The token classification.
 129   Classification classification_;
 130   // The token value, for TOKEN_STRING or TOKEN_INTEGER.
 131   std::string value_;
 132   // The token value, for TOKEN_OPERATOR.
 133   int opcode_;
 134   // The line number where this token started (one based).
 135   int lineno_;
 136   // The character position within the line where this token started
 137   // (one based).
 138   int charpos_;
 139 };
 140
 141 // This class handles lexing a file into a sequence of tokens.  We
 142 // don't expect linker scripts to be large, so we just read them and
 143 // tokenize them all at once.
 144
 145 class Lex
 146 {
 147  public:
 148   Lex(Input_file* input_file)
 149     : input_file_(input_file), tokens_()
 150   { }
 151
 152   // Tokenize the file.  Return the final token, which will be either
 153   // an invalid token or an EOF token.  An invalid token indicates
 154   // that tokenization failed.
 155   Token
 156   tokenize();
 157
 158   // A token sequence.
 159   typedef std::vector<Token> Token_sequence;
 160
 161   // Return the tokens.
 162   const Token_sequence&
 163   tokens() const
 164   { return this->tokens_; }
 165
 166  private:
 167   Lex(const Lex&);
 168   Lex& operator=(const Lex&);
 169
 170   // Read the file into a string buffer.
 171   void
 172   read_file(std::string*);
 173
 174   // Make a general token with no value at the current location.
 175   Token
 176   make_token(Token::Classification c, const char* p) const
 177   { return Token(c, this->lineno_, p - this->linestart_ + 1); }
 178
 179   // Make a general token with a value at the current location.
 180   Token
 181   make_token(Token::Classification c, const std::string& v, const char* p)
 182     const
 183   { return Token(c, v, this->lineno_, p - this->linestart_ + 1); }
 184
 185   // Make an operator token at the current location.
 186   Token
 187   make_token(int opcode, const char* p) const
 188   { return Token(opcode, this->lineno_, p - this->linestart_ + 1); }
 189
 190   // Make an invalid token at the current location.
 191   Token
 192   make_invalid_token(const char* p)
 193   { return this->make_token(Token::TOKEN_INVALID, p); }
 194
 195   // Make an EOF token at the current location.
 196   Token
 197   make_eof_token(const char* p)
 198   { return this->make_token(Token::TOKEN_EOF, p); }
 199
 200   // Return whether C can be the first character in a name.  C2 is the
 201   // next character, since we sometimes need that.
 202   static inline bool
 203   can_start_name(char c, char c2);
 204
 205   // Return whether C can appear in a name which has already started.
 206   static inline bool
 207   can_continue_name(char c);
 208
 209   // Return whether C, C2, C3 can start a hex number.
 210   static inline bool
 211   can_start_hex(char c, char c2, char c3);
 212
 213   // Return whether C can appear in a hex number.
 214   static inline bool
 215   can_continue_hex(char c);
 216
 217   // Return whether C can start a non-hex number.
 218   static inline bool
 219   can_start_number(char c);
 220
 221   // Return whether C can appear in a non-hex number.
 222   static inline bool
 223   can_continue_number(char c)
 224   { return Lex::can_start_number(c); }
 225
 226   // If C1 C2 C3 form a valid three character operator, return the
 227   // opcode.  Otherwise return 0.
 228   static inline int
 229   three_char_operator(char c1, char c2, char c3);
 230
 231   // If C1 C2 form a valid two character operator, return the opcode.
 232   // Otherwise return 0.
 233   static inline int
 234   two_char_operator(char c1, char c2);
 235
 236   // If C1 is a valid one character operator, return the opcode.
 237   // Otherwise return 0.
 238   static inline int
 239   one_char_operator(char c1);
 240
 241   // Read the next token.
 242   Token
 243   get_token(const char**);
 244
 245   // Skip a C style /* */ comment.  Return false if the comment did
 246   // not end.
 247   bool
 248   skip_c_comment(const char**);
 249
 250   // Skip a line # comment.  Return false if there was no newline.
 251   bool
 252   skip_line_comment(const char**);
 253
 254   // Build a token CLASSIFICATION from all characters that match
 255   // CAN_CONTINUE_FN.  The token starts at START.  Start matching from
 256   // MATCH.  Set *PP to the character following the token.
 257   inline Token
 258   gather_token(Token::Classification, bool (*can_continue_fn)(char),
 259                const char* start, const char* match, const char** pp);
 260
 261   // Build a token from a quoted string.
 262   Token
 263   gather_quoted_string(const char** pp);
 264
 265   // The file we are reading.
 266   Input_file* input_file_;
 267   // The token sequence we create.
 268   Token_sequence tokens_;
 269   // The current line number.
 270   int lineno_;
 271   // The start of the current line in the buffer.
 272   const char* linestart_;
 273 };
 274
 275 // Read the whole file into memory.  We don't expect linker scripts to
 276 // be large, so we just use a std::string as a buffer.  We ignore the
 277 // data we've already read, so that we read aligned buffers.
 278
 279 void
 280 Lex::read_file(std::string* contents)
 281 {
 282   contents->clear();
 283   off_t off = 0;
 284   off_t got;
 285   unsigned char buf[BUFSIZ];
 286   do
 287     {
 288       this->input_file_->file().read(off, sizeof buf, buf, &got);
 289       contents->append(reinterpret_cast<char*>(&buf[0]), got);
 290     }
 291   while (got == sizeof buf);
 292 }
 293
 294 // Return whether C can be the start of a name, if the next character
 295 // is C2.  A name can being with a letter, underscore, period, or
 296 // dollar sign.  Because a name can be a file name, we also permit
 297 // forward slash, backslash, and tilde.  Tilde is the tricky case
 298 // here; GNU ld also uses it as a bitwise not operator.  It is only
 299 // recognized as the operator if it is not immediately followed by
 300 // some character which can appear in a symbol.  That is, "~0" is a
 301 // symbol name, and "~ 0" is an expression using bitwise not.  We are
 302 // compatible.
 303
 304 inline bool
 305 Lex::can_start_name(char c, char c2)
 306 {
 307   switch (c)
 308     {
 309     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 310     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 311     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 312     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 313     case 'Y': case 'Z':
 314     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 315     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 316     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 317     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 318     case 'y': case 'z':
 319     case '_': case '.': case '$': case '/': case '\\':
 320       return true;
 321
 322     case '~':
 323       return can_continue_name(c2);
 324
 325     default:
 326       return false;
 327     }
 328 }
 329
 330 // Return whether C can continue a name which has already started.
 331 // Subsequent characters in a name are the same as the leading
 332 // characters, plus digits and "=+-:[],?*".  So in general the linker
 333 // script language requires spaces around operators.
 334
 335 inline bool
 336 Lex::can_continue_name(char c)
 337 {
 338   switch (c)
 339     {
 340     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 341     case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 342     case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
 343     case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 344     case 'Y': case 'Z':
 345     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 346     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 347     case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
 348     case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 349     case 'y': case 'z':
 350     case '_': case '.': case '$': case '/': case '\\':
 351     case '~':
 352     case '0': case '1': case '2': case '3': case '4':
 353     case '5': case '6': case '7': case '8': case '9':
 354     case '=': case '+': case '-': case ':': case '[': case ']':
 355     case ',': case '?': case '*':
 356       return true;
 357
 358     default:
 359       return false;
 360     }
 361 }
 362
 363 // For a number we accept 0x followed by hex digits, or any sequence
 364 // of digits.  The old linker accepts leading '$' for hex, and
 365 // trailing HXBOD.  Those are for MRI compatibility and we don't
 366 // accept them.  The old linker also accepts trailing MK for mega or
 367 // kilo.  Those are mentioned in the documentation, and we accept
 368 // them.
 369
 370 // Return whether C1 C2 C3 can start a hex number.
 371
 372 inline bool
 373 Lex::can_start_hex(char c1, char c2, char c3)
 374 {
 375   if (c1 == '0' && (c2 == 'x' || c2 == 'X'))
 376     return Lex::can_continue_hex(c3);
 377   return false;
 378 }
 379
 380 // Return whether C can appear in a hex number.
 381
 382 inline bool
 383 Lex::can_continue_hex(char c)
 384 {
 385   switch (c)
 386     {
 387     case '0': case '1': case '2': case '3': case '4':
 388     case '5': case '6': case '7': case '8': case '9':
 389     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 390     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 391       return true;
 392
 393     default:
 394       return false;
 395     }
 396 }
 397
 398 // Return whether C can start a non-hex number.
 399
 400 inline bool
 401 Lex::can_start_number(char c)
 402 {
 403   switch (c)
 404     {
 405     case '0': case '1': case '2': case '3': case '4':
 406     case '5': case '6': case '7': case '8': case '9':
 407       return true;
 408
 409     default:
 410       return false;
 411     }
 412 }
 413
 414 // If C1 C2 C3 form a valid three character operator, return the
 415 // opcode (defined in the yyscript.h file generated from yyscript.y).
 416 // Otherwise return 0.
 417
 418 inline int
 419 Lex::three_char_operator(char c1, char c2, char c3)
 420 {
 421   switch (c1)
 422     {
 423     case '<':
 424       if (c2 == '<' && c3 == '=')
 425         return LSHIFTEQ;
 426       break;
 427     case '>':
 428       if (c2 == '>' && c3 == '=')
 429         return RSHIFTEQ;
 430       break;
 431     default:
 432       break;
 433     }
 434   return 0;
 435 }
 436
 437 // If C1 C2 form a valid two character operator, return the opcode
 438 // (defined in the yyscript.h file generated from yyscript.y).
 439 // Otherwise return 0.
 440
 441 inline int
 442 Lex::two_char_operator(char c1, char c2)
 443 {
 444   switch (c1)
 445     {
 446     case '=':
 447       if (c2 == '=')
 448         return EQ;
 449       break;
 450     case '!':
 451       if (c2 == '=')
 452         return NE;
 453       break;
 454     case '+':
 455       if (c2 == '=')
 456         return PLUSEQ;
 457       break;
 458     case '-':
 459       if (c2 == '=')
 460         return MINUSEQ;
 461       break;
 462     case '*':
 463       if (c2 == '=')
 464         return MULTEQ;
 465       break;
 466     case '/':
 467       if (c2 == '=')
 468         return DIVEQ;
 469       break;
 470     case '|':
 471       if (c2 == '=')
 472         return OREQ;
 473       if (c2 == '|')
 474         return OROR;
 475       break;
 476     case '&':
 477       if (c2 == '=')
 478         return ANDEQ;
 479       if (c2 == '&')
 480         return ANDAND;
 481       break;
 482     case '>':
 483       if (c2 == '=')
 484         return GE;
 485       if (c2 == '>')
 486         return RSHIFT;
 487       break;
 488     case '<':
 489       if (c2 == '=')
 490         return LE;
 491       if (c2 == '<')
 492         return LSHIFT;
 493       break;
 494     default:
 495       break;
 496     }
 497   return 0;
 498 }
 499
 500 // If C1 is a valid operator, return the opcode.  Otherwise return 0.
 501
 502 inline int
 503 Lex::one_char_operator(char c1)
 504 {
 505   switch (c1)
 506     {
 507     case '+':
 508     case '-':
 509     case '*':
 510     case '/':
 511     case '%':
 512     case '!':
 513     case '&':
 514     case '|':
 515     case '^':
 516     case '~':
 517     case '<':
 518     case '>':
 519     case '=':
 520     case '?':
 521     case ',':
 522     case '(':
 523     case ')':
 524     case '{':
 525     case '}':
 526     case '[':
 527     case ']':
 528     case ':':
 529     case ';':
 530       return c1;
 531     default:
 532       return 0;
 533     }
 534 }
 535
 536 // Skip a C style comment.  *PP points to just after the "/*".  Return
 537 // false if the comment did not end.
 538
 539 bool
 540 Lex::skip_c_comment(const char** pp)
 541 {
 542   const char* p = *pp;
 543   while (p[0] != '*' || p[1] != '/')
 544     {
 545       if (*p == '\0')
 546         {
 547           *pp = p;
 548           return false;
 549         }
 550
 551       if (*p == '\n')
 552         {
 553           ++this->lineno_;
 554           this->linestart_ = p + 1;
 555         }
 556       ++p;
 557     }
 558
 559   *pp = p + 2;
 560   return true;
 561 }
 562
 563 // Skip a line # comment.  Return false if there was no newline.
 564
 565 bool
 566 Lex::skip_line_comment(const char** pp)
 567 {
 568   const char* p = *pp;
 569   size_t skip = strcspn(p, "\n");
 570   if (p[skip] == '\0')
 571     {
 572       *pp = p + skip;
 573       return false;
 574     }
 575
 576   p += skip + 1;
 577   ++this->lineno_;
 578   this->linestart_ = p;
 579   *pp = p;
 580
 581   return true;
 582 }
 583
 584 // Build a token CLASSIFICATION from all characters that match
 585 // CAN_CONTINUE_FN.  Update *PP.
 586
 587 inline Token
 588 Lex::gather_token(Token::Classification classification,
 589                   bool (*can_continue_fn)(char),
 590                   const char* start,
 591                   const char* match,
 592                   const char **pp)
 593 {
 594   while ((*can_continue_fn)(*match))
 595     ++match;
 596   *pp = match;
 597   return this->make_token(classification,
 598                           std::string(start, match - start),
 599                           start);
 600 }
 601
 602 // Build a token from a quoted string.
 603
 604 Token
 605 Lex::gather_quoted_string(const char** pp)
 606 {
 607   const char* start = *pp;
 608   const char* p = start;
 609   ++p;
 610   size_t skip = strcspn(p, "\"\n");
 611   if (p[skip] != '"')
 612     return this->make_invalid_token(start);
 613   *pp = p + skip + 1;
 614   return this->make_token(Token::TOKEN_STRING,
 615                           std::string(p, skip),
 616                           start);
 617 }
 618
 619 // Return the next token at *PP.  Update *PP.  General guideline: we
 620 // require linker scripts to be simple ASCII.  No unicode linker
 621 // scripts.  In particular we can assume that any '\0' is the end of
 622 // the input.
 623
 624 Token
 625 Lex::get_token(const char** pp)
 626 {
 627   const char* p = *pp;
 628
 629   while (true)
 630     {
 631       if (*p == '\0')
 632         {
 633           *pp = p;
 634           return this->make_eof_token(p);
 635         }
 636
 637       // Skip whitespace quickly.
 638       while (*p == ' ' || *p == '\t')
 639         ++p;
 640
 641       if (*p == '\n')
 642         {
 643           ++p;
 644           ++this->lineno_;
 645           this->linestart_ = p;
 646           continue;
 647         }
 648
 649       // Skip C style comments.
 650       if (p[0] == '/' && p[1] == '*')
 651         {
 652           int lineno = this->lineno_;
 653           int charpos = p - this->linestart_ + 1;
 654
 655           *pp = p + 2;
 656           if (!this->skip_c_comment(pp))
 657             return Token(Token::TOKEN_INVALID, lineno, charpos);
 658           p = *pp;
 659
 660           continue;
 661         }
 662
 663       // Skip line comments.
 664       if (*p == '#')
 665         {
 666           *pp = p + 1;
 667           if (!this->skip_line_comment(pp))
 668             return this->make_eof_token(p);
 669           p = *pp;
 670           continue;
 671         }
 672
 673       // Check for a name.
 674       if (Lex::can_start_name(p[0], p[1]))
 675         return this->gather_token(Token::TOKEN_STRING,
 676                                   Lex::can_continue_name,
 677                                   p, p + 2, pp);
 678
 679       // We accept any arbitrary name in double quotes, as long as it
 680       // does not cross a line boundary.
 681       if (*p == '"')
 682         {
 683           *pp = p;
 684           return this->gather_quoted_string(pp);
 685         }
 686
 687       // Check for a number.
 688
 689       if (Lex::can_start_hex(p[0], p[1], p[2]))
 690         return this->gather_token(Token::TOKEN_INTEGER,
 691                                   Lex::can_continue_hex,
 692                                   p, p + 3, pp);
 693
 694       if (Lex::can_start_number(p[0]))
 695         return this->gather_token(Token::TOKEN_INTEGER,
 696                                   Lex::can_continue_number,
 697                                   p, p + 1, pp);
 698
 699       // Check for operators.
 700
 701       int opcode = Lex::three_char_operator(p[0], p[1], p[2]);
 702       if (opcode != 0)
 703         {
 704           *pp = p + 3;
 705           return this->make_token(opcode, p);
 706         }
 707
 708       opcode = Lex::two_char_operator(p[0], p[1]);
 709       if (opcode != 0)
 710         {
 711           *pp = p + 2;
 712           return this->make_token(opcode, p);
 713         }
 714
 715       opcode = Lex::one_char_operator(p[0]);
 716       if (opcode != 0)
 717         {
 718           *pp = p + 1;
 719           return this->make_token(opcode, p);
 720         }
 721
 722       return this->make_token(Token::TOKEN_INVALID, p);
 723     }
 724 }
 725
 726 // Tokenize the file.  Return the final token.
 727
 728 Token
 729 Lex::tokenize()
 730 {
 731   std::string contents;
 732   this->read_file(&contents);
 733
 734   const char* p = contents.c_str();
 735
 736   this->lineno_ = 1;
 737   this->linestart_ = p;
 738
 739   while (true)
 740     {
 741       Token t(this->get_token(&p));
 742
 743       // Don't let an early null byte fool us into thinking that we've
 744       // reached the end of the file.
 745       if (t.is_eof()
 746           && static_cast<size_t>(p - contents.c_str()) < contents.length())
 747         t = this->make_invalid_token(p);
 748
 749       if (t.is_invalid() || t.is_eof())
 750         return t;
 751
 752       this->tokens_.push_back(t);
 753     }
 754 }
 755
 756 // A trivial task which waits for THIS_BLOCKER to be clear and then
 757 // clears NEXT_BLOCKER.  THIS_BLOCKER may be NULL.
 758
 759 class Script_unblock : public Task
 760 {
 761  public:
 762   Script_unblock(Task_token* this_blocker, Task_token* next_blocker)
 763     : this_blocker_(this_blocker), next_blocker_(next_blocker)
 764   { }
 765
 766   ~Script_unblock()
 767   {
 768     if (this->this_blocker_ != NULL)
 769       delete this->this_blocker_;
 770   }
 771
 772   Is_runnable_type
 773   is_runnable(Workqueue*)
 774   {
 775     if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked())
 776       return IS_BLOCKED;
 777     return IS_RUNNABLE;
 778   }
 779
 780   Task_locker*
 781   locks(Workqueue* workqueue)
 782   {
 783     return new Task_locker_block(*this->next_blocker_, workqueue);
 784   }
 785
 786   void
 787   run(Workqueue*)
 788   { }
 789
 790  private:
 791   Task_token* this_blocker_;
 792   Task_token* next_blocker_;
 793 };
 794
 795 // This class holds data passed through the parser to the lexer and to
 796 // the parser support functions.  This avoids global variables.  We
 797 // can't use global variables because we need not be called in the
 798 // main thread.
 799
 800 class Parser_closure
 801 {
 802  public:
 803   Parser_closure(const char* filename,
 804                  const Position_dependent_options& posdep_options,
 805                  bool in_group,
 806                  const Lex::Token_sequence* tokens)
 807     : filename_(filename), posdep_options_(posdep_options),
 808       in_group_(in_group), tokens_(tokens),
 809       next_token_index_(0), inputs_(NULL)
 810   { }
 811
 812   // Return the file name.
 813   const char*
 814   filename() const
 815   { return this->filename_; }
 816
 817   // Return the position dependent options.  The caller may modify
 818   // this.
 819   Position_dependent_options&
 820   position_dependent_options()
 821   { return this->posdep_options_; }
 822
 823   // Return whether this script is being run in a group.
 824   bool
 825   in_group() const
 826   { return this->in_group_; }
 827
 828   // Whether we are at the end of the token list.
 829   bool
 830   at_eof() const
 831   { return this->next_token_index_ >= this->tokens_->size(); }
 832
 833   // Return the next token.
 834   const Token*
 835   next_token()
 836   {
 837     const Token* ret = &(*this->tokens_)[this->next_token_index_];
 838     ++this->next_token_index_;
 839     return ret;
 840   }
 841
 842   // Return the list of input files, creating it if necessary.  This
 843   // is a space leak--we never free the INPUTS_ pointer.
 844   Input_arguments*
 845   inputs()
 846   {
 847     if (this->inputs_ == NULL)
 848       this->inputs_ = new Input_arguments();
 849     return this->inputs_;
 850   }
 851
 852   // Return whether we saw any input files.
 853   bool
 854   saw_inputs() const
 855   { return this->inputs_ != NULL && !this->inputs_->empty(); }
 856
 857  private:
 858   // The name of the file we are reading.
 859   const char* filename_;
 860   // The position dependent options.
 861   Position_dependent_options posdep_options_;
 862   // Whether we are currently in a --start-group/--end-group.
 863   bool in_group_;
 864
 865   // The tokens to be returned by the lexer.
 866   const Lex::Token_sequence* tokens_;
 867   // The index of the next token to return.
 868   unsigned int next_token_index_;
 869   // New input files found to add to the link.
 870   Input_arguments* inputs_;
 871 };
 872
 873 // FILE was found as an argument on the command line.  Try to read it
 874 // as a script.  We've already read BYTES of data into P, but we
 875 // ignore that.  Return true if the file was handled.
 876
 877 bool
 878 read_input_script(Workqueue* workqueue, const General_options& options,
 879                   Symbol_table* symtab, Layout* layout,
 880                   const Dirsearch& dirsearch, Input_objects* input_objects,
 881                   Input_group* input_group,
 882                   const Input_argument* input_argument,
 883                   Input_file* input_file, const unsigned char*, off_t,
 884                   Task_token* this_blocker, Task_token* next_blocker)
 885 {
 886   Lex lex(input_file);
 887   if (lex.tokenize().is_invalid())
 888     return false;
 889
 890   Parser_closure closure(input_file->filename().c_str(),
 891                          input_argument->file().options(),
 892                          input_group != NULL,
 893                          &lex.tokens());
 894
 895   if (yyparse(&closure) != 0)
 896     return false;
 897
 898   // THIS_BLOCKER must be clear before we may add anything to the
 899   // symbol table.  We are responsible for unblocking NEXT_BLOCKER
 900   // when we are done.  We are responsible for deleting THIS_BLOCKER
 901   // when it is unblocked.
 902
 903   if (!closure.saw_inputs())
 904     {
 905       // The script did not add any files to read.  Note that we are
 906       // not permitted to call NEXT_BLOCKER->unblock() here even if
 907       // THIS_BLOCKER is NULL, as we are not in the main thread.
 908       workqueue->queue(new Script_unblock(this_blocker, next_blocker));
 909       return true;
 910     }
 911
 912   for (Input_arguments::const_iterator p = closure.inputs()->begin();
 913        p != closure.inputs()->end();
 914        ++p)
 915     {
 916       Task_token* nb;
 917       if (p + 1 == closure.inputs()->end())
 918         nb = next_blocker;
 919       else
 920         {
 921           nb = new Task_token();
 922           nb->add_blocker();
 923         }
 924       workqueue->queue(new Read_symbols(options, input_objects, symtab,
 925                                         layout, dirsearch, &*p,
 926                                         input_group, this_blocker, nb));
 927       this_blocker = nb;
 928     }
 929
 930   return true;
 931 }
 932
 933 // Manage mapping from keywords to the codes expected by the bison
 934 // parser.
 935
 936 class Keyword_to_parsecode
 937 {
 938  public:
 939   // The structure which maps keywords to parsecodes.
 940   struct Keyword_parsecode
 941   {
 942     // Keyword.
 943     const char* keyword;
 944     // Corresponding parsecode.
 945     int parsecode;
 946   };
 947
 948   // Return the parsecode corresponding KEYWORD, or 0 if it is not a
 949   // keyword.
 950   static int
 951   keyword_to_parsecode(const char* keyword);
 952
 953  private:
 954   // The array of all keywords.
 955   static const Keyword_parsecode keyword_parsecodes_[];
 956
 957   // The number of keywords.
 958   static const int keyword_count;
 959 };
 960
 961 // Mapping from keyword string to keyword parsecode.  This array must
 962 // be kept in sorted order.  Parsecodes are looked up using bsearch.
 963 // This array must correspond to the list of parsecodes in yyscript.y.
 964
 965 const Keyword_to_parsecode::Keyword_parsecode
 966 Keyword_to_parsecode::keyword_parsecodes_[] =
 967 {
 968   { "ABSOLUTE", ABSOLUTE },
 969   { "ADDR", ADDR },
 970   { "ALIGN", ALIGN_K },
 971   { "ASSERT", ASSERT_K },
 972   { "AS_NEEDED", AS_NEEDED },
 973   { "AT", AT },
 974   { "BIND", BIND },
 975   { "BLOCK", BLOCK },
 976   { "BYTE", BYTE },
 977   { "CONSTANT", CONSTANT },
 978   { "CONSTRUCTORS", CONSTRUCTORS },
 979   { "COPY", COPY },
 980   { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS },
 981   { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN },
 982   { "DATA_SEGMENT_END", DATA_SEGMENT_END },
 983   { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END },
 984   { "DEFINED", DEFINED },
 985   { "DSECT", DSECT },
 986   { "ENTRY", ENTRY },
 987   { "EXCLUDE_FILE", EXCLUDE_FILE },
 988   { "EXTERN", EXTERN },
 989   { "FILL", FILL },
 990   { "FLOAT", FLOAT },
 991   { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION },
 992   { "GROUP", GROUP },
 993   { "HLL", HLL },
 994   { "INCLUDE", INCLUDE },
 995   { "INFO", INFO },
 996   { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION },
 997   { "INPUT", INPUT },
 998   { "KEEP", KEEP },
 999   { "LENGTH", LENGTH },
1000   { "LOADADDR", LOADADDR },
1001   { "LONG", LONG },
1002   { "MAP", MAP },
1003   { "MAX", MAX_K },
1004   { "MEMORY", MEMORY },
1005   { "MIN", MIN_K },
1006   { "NEXT", NEXT },
1007   { "NOCROSSREFS", NOCROSSREFS },
1008   { "NOFLOAT", NOFLOAT },
1009   { "NOLOAD", NOLOAD },
1010   { "ONLY_IF_RO", ONLY_IF_RO },
1011   { "ONLY_IF_RW", ONLY_IF_RW },
1012   { "ORIGIN", ORIGIN },
1013   { "OUTPUT", OUTPUT },
1014   { "OUTPUT_ARCH", OUTPUT_ARCH },
1015   { "OUTPUT_FORMAT", OUTPUT_FORMAT },
1016   { "OVERLAY", OVERLAY },
1017   { "PHDRS", PHDRS },
1018   { "PROVIDE", PROVIDE },
1019   { "PROVIDE_HIDDEN", PROVIDE_HIDDEN },
1020   { "QUAD", QUAD },
1021   { "SEARCH_DIR", SEARCH_DIR },
1022   { "SECTIONS", SECTIONS },
1023   { "SEGMENT_START", SEGMENT_START },
1024   { "SHORT", SHORT },
1025   { "SIZEOF", SIZEOF },
1026   { "SIZEOF_HEADERS", SIZEOF_HEADERS },
1027   { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT },
1028   { "SORT_BY_NAME", SORT_BY_NAME },
1029   { "SPECIAL", SPECIAL },
1030   { "SQUAD", SQUAD },
1031   { "STARTUP", STARTUP },
1032   { "SUBALIGN", SUBALIGN },
1033   { "SYSLIB", SYSLIB },
1034   { "TARGET", TARGET_K },
1035   { "TRUNCATE", TRUNCATE },
1036   { "VERSION", VERSIONK },
1037   { "global", GLOBAL },
1038   { "l", LENGTH },
1039   { "len", LENGTH },
1040   { "local", LOCAL },
1041   { "o", ORIGIN },
1042   { "org", ORIGIN },
1043   { "sizeof_headers", SIZEOF_HEADERS },
1044 };
1045
1046 const int Keyword_to_parsecode::keyword_count =
1047   (sizeof(Keyword_to_parsecode::keyword_parsecodes_)
1048    / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]));
1049
1050 // Comparison function passed to bsearch.
1051
1052 extern "C"
1053 {
1054
1055 static int
1056 ktt_compare(const void* keyv, const void* kttv)
1057 {
1058   const char* key = static_cast<const char*>(keyv);
1059   const Keyword_to_parsecode::Keyword_parsecode* ktt =
1060     static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv);
1061   return strcmp(key, ktt->keyword);
1062 }
1063
1064 } // End extern "C".
1065
1066 int
1067 Keyword_to_parsecode::keyword_to_parsecode(const char* keyword)
1068 {
1069   void* kttv = bsearch(keyword,
1070                        Keyword_to_parsecode::keyword_parsecodes_,
1071                        Keyword_to_parsecode::keyword_count,
1072                        sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]),
1073                        ktt_compare);
1074   if (kttv == NULL)
1075     return 0;
1076   Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv);
1077   return ktt->parsecode;
1078 }
1079
1080 } // End namespace gold.
1081
1082 // The remaining functions are extern "C", so it's clearer to not put
1083 // them in namespace gold.
1084
1085 using namespace gold;
1086
1087 // This function is called by the bison parser to return the next
1088 // token.
1089
1090 extern "C" int
1091 yylex(YYSTYPE* lvalp, void* closurev)
1092 {
1093   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1094
1095   if (closure->at_eof())
1096     return 0;
1097
1098   const Token* token = closure->next_token();
1099
1100   switch (token->classification())
1101     {
1102     default:
1103     case Token::TOKEN_INVALID:
1104     case Token::TOKEN_EOF:
1105       gold_unreachable();
1106
1107     case Token::TOKEN_STRING:
1108       {
1109         const char* str = token->string_value().c_str();
1110         int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str);
1111         if (parsecode != 0)
1112           return parsecode;
1113         lvalp->string = str;
1114         return STRING;
1115       }
1116
1117     case Token::TOKEN_OPERATOR:
1118       return token->operator_value();
1119
1120     case Token::TOKEN_INTEGER:
1121       lvalp->integer = token->integer_value();
1122       return INTEGER;
1123     }
1124 }
1125
1126 // This function is called by the bison parser to report an error.
1127
1128 extern "C" void
1129 yyerror(void* closurev, const char* message)
1130 {
1131   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1132
1133   fprintf(stderr, _("%s: %s: %s\n"),
1134           program_name, closure->filename(), message);
1135   gold_exit(false);
1136 }
1137
1138 // Called by the bison parser to add a file to the link.
1139
1140 extern "C" void
1141 script_add_file(void* closurev, const char* name)
1142 {
1143   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1144   std::string absname;
1145   if (name[0] == '/')
1146     {
1147       absname = name;
1148     }
1149   else
1150     {
1151       // Prepend `dirname closure->filename()` to make the path absolute.
1152       char *slash = strrchr(closure->filename(), '/');
1153       absname.assign(closure->filename(),
1154                      slash ? slash - closure->filename() + 1 : 0);
1155       absname += name;
1156     }
1157   Input_file_argument file(absname.c_str(), false, closure->position_dependent_options());
1158   closure->inputs()->add_file(file);
1159 }
1160
1161 // Called by the bison parser to start a group.  If we are already in
1162 // a group, that means that this script was invoked within a
1163 // --start-group --end-group sequence on the command line, or that
1164 // this script was found in a GROUP of another script.  In that case,
1165 // we simply continue the existing group, rather than starting a new
1166 // one.  It is possible to construct a case in which this will do
1167 // something other than what would happen if we did a recursive group,
1168 // but it's hard to imagine why the different behaviour would be
1169 // useful for a real program.  Avoiding recursive groups is simpler
1170 // and more efficient.
1171
1172 extern "C" void
1173 script_start_group(void* closurev)
1174 {
1175   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1176   if (!closure->in_group())
1177     closure->inputs()->start_group();
1178 }
1179
1180 // Called by the bison parser at the end of a group.
1181
1182 extern "C" void
1183 script_end_group(void* closurev)
1184 {
1185   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1186   if (!closure->in_group())
1187     closure->inputs()->end_group();
1188 }
1189
1190 // Called by the bison parser to start an AS_NEEDED list.
1191
1192 extern "C" void
1193 script_start_as_needed(void* closurev)
1194 {
1195   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1196   closure->position_dependent_options().set_as_needed();
1197 }
1198
1199 // Called by the bison parser at the end of an AS_NEEDED list.
1200
1201 extern "C" void
1202 script_end_as_needed(void* closurev)
1203 {
1204   Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1205   closure->position_dependent_options().clear_as_needed();
1206 }