From Craig Silverstein: Fix bug when reading large script files.
[deliverable/binutils-gdb.git] / gold / script.cc
1 // script.cc -- handle linker scripts for gold.
2
3 // Copyright 2006, 2007 Free Software Foundation, Inc.
4 // Written by Ian Lance Taylor <iant@google.com>.
5
6 // This file is part of gold.
7
8 // This program is free software; you can redistribute it and/or modify
9 // it under the terms of the GNU General Public License as published by
10 // the Free Software Foundation; either version 3 of the License, or
11 // (at your option) any later version.
12
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU General Public License for more details.
17
18 // You should have received a copy of the GNU General Public License
19 // along with this program; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21 // MA 02110-1301, USA.
22
23 #include "gold.h"
24
25 #include <string>
26 #include <vector>
27 #include <cstdio>
28 #include <cstdlib>
29
30 #include "options.h"
31 #include "fileread.h"
32 #include "workqueue.h"
33 #include "readsyms.h"
34 #include "yyscript.h"
35 #include "script.h"
36 #include "script-c.h"
37
38 namespace gold
39 {
40
41 // A token read from a script file. We don't implement keywords here;
42 // all keywords are simply represented as a string.
43
44 class Token
45 {
46 public:
47 // Token classification.
48 enum Classification
49 {
50 // Token is invalid.
51 TOKEN_INVALID,
52 // Token indicates end of input.
53 TOKEN_EOF,
54 // Token is a string of characters.
55 TOKEN_STRING,
56 // Token is an operator.
57 TOKEN_OPERATOR,
58 // Token is a number (an integer).
59 TOKEN_INTEGER
60 };
61
62 // We need an empty constructor so that we can put this STL objects.
63 Token()
64 : classification_(TOKEN_INVALID), value_(), opcode_(0),
65 lineno_(0), charpos_(0)
66 { }
67
68 // A general token with no value.
69 Token(Classification classification, int lineno, int charpos)
70 : classification_(classification), value_(), opcode_(0),
71 lineno_(lineno), charpos_(charpos)
72 {
73 gold_assert(classification == TOKEN_INVALID
74 || classification == TOKEN_EOF);
75 }
76
77 // A general token with a value.
78 Token(Classification classification, const std::string& value,
79 int lineno, int charpos)
80 : classification_(classification), value_(value), opcode_(0),
81 lineno_(lineno), charpos_(charpos)
82 {
83 gold_assert(classification != TOKEN_INVALID
84 && classification != TOKEN_EOF);
85 }
86
87 // A token representing a string of characters.
88 Token(const std::string& s, int lineno, int charpos)
89 : classification_(TOKEN_STRING), value_(s), opcode_(0),
90 lineno_(lineno), charpos_(charpos)
91 { }
92
93 // A token representing an operator.
94 Token(int opcode, int lineno, int charpos)
95 : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode),
96 lineno_(lineno), charpos_(charpos)
97 { }
98
99 // Return whether the token is invalid.
100 bool
101 is_invalid() const
102 { return this->classification_ == TOKEN_INVALID; }
103
104 // Return whether this is an EOF token.
105 bool
106 is_eof() const
107 { return this->classification_ == TOKEN_EOF; }
108
109 // Return the token classification.
110 Classification
111 classification() const
112 { return this->classification_; }
113
114 // Return the line number at which the token starts.
115 int
116 lineno() const
117 { return this->lineno_; }
118
119 // Return the character position at this the token starts.
120 int
121 charpos() const
122 { return this->charpos_; }
123
124 // Get the value of a token.
125
126 const std::string&
127 string_value() const
128 {
129 gold_assert(this->classification_ == TOKEN_STRING);
130 return this->value_;
131 }
132
133 int
134 operator_value() const
135 {
136 gold_assert(this->classification_ == TOKEN_OPERATOR);
137 return this->opcode_;
138 }
139
140 int64_t
141 integer_value() const
142 {
143 gold_assert(this->classification_ == TOKEN_INTEGER);
144 return strtoll(this->value_.c_str(), NULL, 0);
145 }
146
147 private:
148 // The token classification.
149 Classification classification_;
150 // The token value, for TOKEN_STRING or TOKEN_INTEGER.
151 std::string value_;
152 // The token value, for TOKEN_OPERATOR.
153 int opcode_;
154 // The line number where this token started (one based).
155 int lineno_;
156 // The character position within the line where this token started
157 // (one based).
158 int charpos_;
159 };
160
161 // This class handles lexing a file into a sequence of tokens. We
162 // don't expect linker scripts to be large, so we just read them and
163 // tokenize them all at once.
164
165 class Lex
166 {
167 public:
168 Lex(Input_file* input_file)
169 : input_file_(input_file), tokens_()
170 { }
171
172 // Tokenize the file. Return the final token, which will be either
173 // an invalid token or an EOF token. An invalid token indicates
174 // that tokenization failed.
175 Token
176 tokenize();
177
178 // A token sequence.
179 typedef std::vector<Token> Token_sequence;
180
181 // Return the tokens.
182 const Token_sequence&
183 tokens() const
184 { return this->tokens_; }
185
186 private:
187 Lex(const Lex&);
188 Lex& operator=(const Lex&);
189
190 // Read the file into a string buffer.
191 void
192 read_file(std::string*);
193
194 // Make a general token with no value at the current location.
195 Token
196 make_token(Token::Classification c, const char* p) const
197 { return Token(c, this->lineno_, p - this->linestart_ + 1); }
198
199 // Make a general token with a value at the current location.
200 Token
201 make_token(Token::Classification c, const std::string& v, const char* p)
202 const
203 { return Token(c, v, this->lineno_, p - this->linestart_ + 1); }
204
205 // Make an operator token at the current location.
206 Token
207 make_token(int opcode, const char* p) const
208 { return Token(opcode, this->lineno_, p - this->linestart_ + 1); }
209
210 // Make an invalid token at the current location.
211 Token
212 make_invalid_token(const char* p)
213 { return this->make_token(Token::TOKEN_INVALID, p); }
214
215 // Make an EOF token at the current location.
216 Token
217 make_eof_token(const char* p)
218 { return this->make_token(Token::TOKEN_EOF, p); }
219
220 // Return whether C can be the first character in a name. C2 is the
221 // next character, since we sometimes need that.
222 static inline bool
223 can_start_name(char c, char c2);
224
225 // Return whether C can appear in a name which has already started.
226 static inline bool
227 can_continue_name(char c);
228
229 // Return whether C, C2, C3 can start a hex number.
230 static inline bool
231 can_start_hex(char c, char c2, char c3);
232
233 // Return whether C can appear in a hex number.
234 static inline bool
235 can_continue_hex(char c);
236
237 // Return whether C can start a non-hex number.
238 static inline bool
239 can_start_number(char c);
240
241 // Return whether C can appear in a non-hex number.
242 static inline bool
243 can_continue_number(char c)
244 { return Lex::can_start_number(c); }
245
246 // If C1 C2 C3 form a valid three character operator, return the
247 // opcode. Otherwise return 0.
248 static inline int
249 three_char_operator(char c1, char c2, char c3);
250
251 // If C1 C2 form a valid two character operator, return the opcode.
252 // Otherwise return 0.
253 static inline int
254 two_char_operator(char c1, char c2);
255
256 // If C1 is a valid one character operator, return the opcode.
257 // Otherwise return 0.
258 static inline int
259 one_char_operator(char c1);
260
261 // Read the next token.
262 Token
263 get_token(const char**);
264
265 // Skip a C style /* */ comment. Return false if the comment did
266 // not end.
267 bool
268 skip_c_comment(const char**);
269
270 // Skip a line # comment. Return false if there was no newline.
271 bool
272 skip_line_comment(const char**);
273
274 // Build a token CLASSIFICATION from all characters that match
275 // CAN_CONTINUE_FN. The token starts at START. Start matching from
276 // MATCH. Set *PP to the character following the token.
277 inline Token
278 gather_token(Token::Classification, bool (*can_continue_fn)(char),
279 const char* start, const char* match, const char** pp);
280
281 // Build a token from a quoted string.
282 Token
283 gather_quoted_string(const char** pp);
284
285 // The file we are reading.
286 Input_file* input_file_;
287 // The token sequence we create.
288 Token_sequence tokens_;
289 // The current line number.
290 int lineno_;
291 // The start of the current line in the buffer.
292 const char* linestart_;
293 };
294
295 // Read the whole file into memory. We don't expect linker scripts to
296 // be large, so we just use a std::string as a buffer. We ignore the
297 // data we've already read, so that we read aligned buffers.
298
299 void
300 Lex::read_file(std::string* contents)
301 {
302 contents->clear();
303 off_t off = 0;
304 off_t got;
305 unsigned char buf[BUFSIZ];
306 do
307 {
308 this->input_file_->file().read(off, sizeof buf, buf, &got);
309 contents->append(reinterpret_cast<char*>(&buf[0]), got);
310 off += got;
311 }
312 while (got == sizeof buf);
313 }
314
315 // Return whether C can be the start of a name, if the next character
316 // is C2. A name can being with a letter, underscore, period, or
317 // dollar sign. Because a name can be a file name, we also permit
318 // forward slash, backslash, and tilde. Tilde is the tricky case
319 // here; GNU ld also uses it as a bitwise not operator. It is only
320 // recognized as the operator if it is not immediately followed by
321 // some character which can appear in a symbol. That is, "~0" is a
322 // symbol name, and "~ 0" is an expression using bitwise not. We are
323 // compatible.
324
325 inline bool
326 Lex::can_start_name(char c, char c2)
327 {
328 switch (c)
329 {
330 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
331 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
332 case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
333 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
334 case 'Y': case 'Z':
335 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
336 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
337 case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
338 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
339 case 'y': case 'z':
340 case '_': case '.': case '$': case '/': case '\\':
341 return true;
342
343 case '~':
344 return can_continue_name(c2);
345
346 default:
347 return false;
348 }
349 }
350
351 // Return whether C can continue a name which has already started.
352 // Subsequent characters in a name are the same as the leading
353 // characters, plus digits and "=+-:[],?*". So in general the linker
354 // script language requires spaces around operators.
355
356 inline bool
357 Lex::can_continue_name(char c)
358 {
359 switch (c)
360 {
361 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
362 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
363 case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R':
364 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
365 case 'Y': case 'Z':
366 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
367 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
368 case 'm': case 'n': case 'o': case 'q': case 'p': case 'r':
369 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
370 case 'y': case 'z':
371 case '_': case '.': case '$': case '/': case '\\':
372 case '~':
373 case '0': case '1': case '2': case '3': case '4':
374 case '5': case '6': case '7': case '8': case '9':
375 case '=': case '+': case '-': case ':': case '[': case ']':
376 case ',': case '?': case '*':
377 return true;
378
379 default:
380 return false;
381 }
382 }
383
384 // For a number we accept 0x followed by hex digits, or any sequence
385 // of digits. The old linker accepts leading '$' for hex, and
386 // trailing HXBOD. Those are for MRI compatibility and we don't
387 // accept them. The old linker also accepts trailing MK for mega or
388 // kilo. Those are mentioned in the documentation, and we accept
389 // them.
390
391 // Return whether C1 C2 C3 can start a hex number.
392
393 inline bool
394 Lex::can_start_hex(char c1, char c2, char c3)
395 {
396 if (c1 == '0' && (c2 == 'x' || c2 == 'X'))
397 return Lex::can_continue_hex(c3);
398 return false;
399 }
400
401 // Return whether C can appear in a hex number.
402
403 inline bool
404 Lex::can_continue_hex(char c)
405 {
406 switch (c)
407 {
408 case '0': case '1': case '2': case '3': case '4':
409 case '5': case '6': case '7': case '8': case '9':
410 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
411 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
412 return true;
413
414 default:
415 return false;
416 }
417 }
418
419 // Return whether C can start a non-hex number.
420
421 inline bool
422 Lex::can_start_number(char c)
423 {
424 switch (c)
425 {
426 case '0': case '1': case '2': case '3': case '4':
427 case '5': case '6': case '7': case '8': case '9':
428 return true;
429
430 default:
431 return false;
432 }
433 }
434
435 // If C1 C2 C3 form a valid three character operator, return the
436 // opcode (defined in the yyscript.h file generated from yyscript.y).
437 // Otherwise return 0.
438
439 inline int
440 Lex::three_char_operator(char c1, char c2, char c3)
441 {
442 switch (c1)
443 {
444 case '<':
445 if (c2 == '<' && c3 == '=')
446 return LSHIFTEQ;
447 break;
448 case '>':
449 if (c2 == '>' && c3 == '=')
450 return RSHIFTEQ;
451 break;
452 default:
453 break;
454 }
455 return 0;
456 }
457
458 // If C1 C2 form a valid two character operator, return the opcode
459 // (defined in the yyscript.h file generated from yyscript.y).
460 // Otherwise return 0.
461
462 inline int
463 Lex::two_char_operator(char c1, char c2)
464 {
465 switch (c1)
466 {
467 case '=':
468 if (c2 == '=')
469 return EQ;
470 break;
471 case '!':
472 if (c2 == '=')
473 return NE;
474 break;
475 case '+':
476 if (c2 == '=')
477 return PLUSEQ;
478 break;
479 case '-':
480 if (c2 == '=')
481 return MINUSEQ;
482 break;
483 case '*':
484 if (c2 == '=')
485 return MULTEQ;
486 break;
487 case '/':
488 if (c2 == '=')
489 return DIVEQ;
490 break;
491 case '|':
492 if (c2 == '=')
493 return OREQ;
494 if (c2 == '|')
495 return OROR;
496 break;
497 case '&':
498 if (c2 == '=')
499 return ANDEQ;
500 if (c2 == '&')
501 return ANDAND;
502 break;
503 case '>':
504 if (c2 == '=')
505 return GE;
506 if (c2 == '>')
507 return RSHIFT;
508 break;
509 case '<':
510 if (c2 == '=')
511 return LE;
512 if (c2 == '<')
513 return LSHIFT;
514 break;
515 default:
516 break;
517 }
518 return 0;
519 }
520
521 // If C1 is a valid operator, return the opcode. Otherwise return 0.
522
523 inline int
524 Lex::one_char_operator(char c1)
525 {
526 switch (c1)
527 {
528 case '+':
529 case '-':
530 case '*':
531 case '/':
532 case '%':
533 case '!':
534 case '&':
535 case '|':
536 case '^':
537 case '~':
538 case '<':
539 case '>':
540 case '=':
541 case '?':
542 case ',':
543 case '(':
544 case ')':
545 case '{':
546 case '}':
547 case '[':
548 case ']':
549 case ':':
550 case ';':
551 return c1;
552 default:
553 return 0;
554 }
555 }
556
557 // Skip a C style comment. *PP points to just after the "/*". Return
558 // false if the comment did not end.
559
560 bool
561 Lex::skip_c_comment(const char** pp)
562 {
563 const char* p = *pp;
564 while (p[0] != '*' || p[1] != '/')
565 {
566 if (*p == '\0')
567 {
568 *pp = p;
569 return false;
570 }
571
572 if (*p == '\n')
573 {
574 ++this->lineno_;
575 this->linestart_ = p + 1;
576 }
577 ++p;
578 }
579
580 *pp = p + 2;
581 return true;
582 }
583
584 // Skip a line # comment. Return false if there was no newline.
585
586 bool
587 Lex::skip_line_comment(const char** pp)
588 {
589 const char* p = *pp;
590 size_t skip = strcspn(p, "\n");
591 if (p[skip] == '\0')
592 {
593 *pp = p + skip;
594 return false;
595 }
596
597 p += skip + 1;
598 ++this->lineno_;
599 this->linestart_ = p;
600 *pp = p;
601
602 return true;
603 }
604
605 // Build a token CLASSIFICATION from all characters that match
606 // CAN_CONTINUE_FN. Update *PP.
607
608 inline Token
609 Lex::gather_token(Token::Classification classification,
610 bool (*can_continue_fn)(char),
611 const char* start,
612 const char* match,
613 const char **pp)
614 {
615 while ((*can_continue_fn)(*match))
616 ++match;
617 *pp = match;
618 return this->make_token(classification,
619 std::string(start, match - start),
620 start);
621 }
622
623 // Build a token from a quoted string.
624
625 Token
626 Lex::gather_quoted_string(const char** pp)
627 {
628 const char* start = *pp;
629 const char* p = start;
630 ++p;
631 size_t skip = strcspn(p, "\"\n");
632 if (p[skip] != '"')
633 return this->make_invalid_token(start);
634 *pp = p + skip + 1;
635 return this->make_token(Token::TOKEN_STRING,
636 std::string(p, skip),
637 start);
638 }
639
640 // Return the next token at *PP. Update *PP. General guideline: we
641 // require linker scripts to be simple ASCII. No unicode linker
642 // scripts. In particular we can assume that any '\0' is the end of
643 // the input.
644
645 Token
646 Lex::get_token(const char** pp)
647 {
648 const char* p = *pp;
649
650 while (true)
651 {
652 if (*p == '\0')
653 {
654 *pp = p;
655 return this->make_eof_token(p);
656 }
657
658 // Skip whitespace quickly.
659 while (*p == ' ' || *p == '\t')
660 ++p;
661
662 if (*p == '\n')
663 {
664 ++p;
665 ++this->lineno_;
666 this->linestart_ = p;
667 continue;
668 }
669
670 // Skip C style comments.
671 if (p[0] == '/' && p[1] == '*')
672 {
673 int lineno = this->lineno_;
674 int charpos = p - this->linestart_ + 1;
675
676 *pp = p + 2;
677 if (!this->skip_c_comment(pp))
678 return Token(Token::TOKEN_INVALID, lineno, charpos);
679 p = *pp;
680
681 continue;
682 }
683
684 // Skip line comments.
685 if (*p == '#')
686 {
687 *pp = p + 1;
688 if (!this->skip_line_comment(pp))
689 return this->make_eof_token(p);
690 p = *pp;
691 continue;
692 }
693
694 // Check for a name.
695 if (Lex::can_start_name(p[0], p[1]))
696 return this->gather_token(Token::TOKEN_STRING,
697 Lex::can_continue_name,
698 p, p + 2, pp);
699
700 // We accept any arbitrary name in double quotes, as long as it
701 // does not cross a line boundary.
702 if (*p == '"')
703 {
704 *pp = p;
705 return this->gather_quoted_string(pp);
706 }
707
708 // Check for a number.
709
710 if (Lex::can_start_hex(p[0], p[1], p[2]))
711 return this->gather_token(Token::TOKEN_INTEGER,
712 Lex::can_continue_hex,
713 p, p + 3, pp);
714
715 if (Lex::can_start_number(p[0]))
716 return this->gather_token(Token::TOKEN_INTEGER,
717 Lex::can_continue_number,
718 p, p + 1, pp);
719
720 // Check for operators.
721
722 int opcode = Lex::three_char_operator(p[0], p[1], p[2]);
723 if (opcode != 0)
724 {
725 *pp = p + 3;
726 return this->make_token(opcode, p);
727 }
728
729 opcode = Lex::two_char_operator(p[0], p[1]);
730 if (opcode != 0)
731 {
732 *pp = p + 2;
733 return this->make_token(opcode, p);
734 }
735
736 opcode = Lex::one_char_operator(p[0]);
737 if (opcode != 0)
738 {
739 *pp = p + 1;
740 return this->make_token(opcode, p);
741 }
742
743 return this->make_token(Token::TOKEN_INVALID, p);
744 }
745 }
746
747 // Tokenize the file. Return the final token.
748
749 Token
750 Lex::tokenize()
751 {
752 std::string contents;
753 this->read_file(&contents);
754
755 const char* p = contents.c_str();
756
757 this->lineno_ = 1;
758 this->linestart_ = p;
759
760 while (true)
761 {
762 Token t(this->get_token(&p));
763
764 // Don't let an early null byte fool us into thinking that we've
765 // reached the end of the file.
766 if (t.is_eof()
767 && static_cast<size_t>(p - contents.c_str()) < contents.length())
768 t = this->make_invalid_token(p);
769
770 if (t.is_invalid() || t.is_eof())
771 return t;
772
773 this->tokens_.push_back(t);
774 }
775 }
776
777 // A trivial task which waits for THIS_BLOCKER to be clear and then
778 // clears NEXT_BLOCKER. THIS_BLOCKER may be NULL.
779
780 class Script_unblock : public Task
781 {
782 public:
783 Script_unblock(Task_token* this_blocker, Task_token* next_blocker)
784 : this_blocker_(this_blocker), next_blocker_(next_blocker)
785 { }
786
787 ~Script_unblock()
788 {
789 if (this->this_blocker_ != NULL)
790 delete this->this_blocker_;
791 }
792
793 Is_runnable_type
794 is_runnable(Workqueue*)
795 {
796 if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked())
797 return IS_BLOCKED;
798 return IS_RUNNABLE;
799 }
800
801 Task_locker*
802 locks(Workqueue* workqueue)
803 {
804 return new Task_locker_block(*this->next_blocker_, workqueue);
805 }
806
807 void
808 run(Workqueue*)
809 { }
810
811 private:
812 Task_token* this_blocker_;
813 Task_token* next_blocker_;
814 };
815
816 // This class holds data passed through the parser to the lexer and to
817 // the parser support functions. This avoids global variables. We
818 // can't use global variables because we need not be called in the
819 // main thread.
820
821 class Parser_closure
822 {
823 public:
824 Parser_closure(const char* filename,
825 const Position_dependent_options& posdep_options,
826 bool in_group,
827 const Lex::Token_sequence* tokens)
828 : filename_(filename), posdep_options_(posdep_options),
829 in_group_(in_group), tokens_(tokens),
830 next_token_index_(0), inputs_(NULL)
831 { }
832
833 // Return the file name.
834 const char*
835 filename() const
836 { return this->filename_; }
837
838 // Return the position dependent options. The caller may modify
839 // this.
840 Position_dependent_options&
841 position_dependent_options()
842 { return this->posdep_options_; }
843
844 // Return whether this script is being run in a group.
845 bool
846 in_group() const
847 { return this->in_group_; }
848
849 // Whether we are at the end of the token list.
850 bool
851 at_eof() const
852 { return this->next_token_index_ >= this->tokens_->size(); }
853
854 // Return the next token.
855 const Token*
856 next_token()
857 {
858 const Token* ret = &(*this->tokens_)[this->next_token_index_];
859 ++this->next_token_index_;
860 return ret;
861 }
862
863 // Return the list of input files, creating it if necessary. This
864 // is a space leak--we never free the INPUTS_ pointer.
865 Input_arguments*
866 inputs()
867 {
868 if (this->inputs_ == NULL)
869 this->inputs_ = new Input_arguments();
870 return this->inputs_;
871 }
872
873 // Return whether we saw any input files.
874 bool
875 saw_inputs() const
876 { return this->inputs_ != NULL && !this->inputs_->empty(); }
877
878 private:
879 // The name of the file we are reading.
880 const char* filename_;
881 // The position dependent options.
882 Position_dependent_options posdep_options_;
883 // Whether we are currently in a --start-group/--end-group.
884 bool in_group_;
885
886 // The tokens to be returned by the lexer.
887 const Lex::Token_sequence* tokens_;
888 // The index of the next token to return.
889 unsigned int next_token_index_;
890 // New input files found to add to the link.
891 Input_arguments* inputs_;
892 };
893
894 // FILE was found as an argument on the command line. Try to read it
895 // as a script. We've already read BYTES of data into P, but we
896 // ignore that. Return true if the file was handled.
897
898 bool
899 read_input_script(Workqueue* workqueue, const General_options& options,
900 Symbol_table* symtab, Layout* layout,
901 const Dirsearch& dirsearch, Input_objects* input_objects,
902 Input_group* input_group,
903 const Input_argument* input_argument,
904 Input_file* input_file, const unsigned char*, off_t,
905 Task_token* this_blocker, Task_token* next_blocker)
906 {
907 Lex lex(input_file);
908 if (lex.tokenize().is_invalid())
909 return false;
910
911 Parser_closure closure(input_file->filename().c_str(),
912 input_argument->file().options(),
913 input_group != NULL,
914 &lex.tokens());
915
916 if (yyparse(&closure) != 0)
917 return false;
918
919 // THIS_BLOCKER must be clear before we may add anything to the
920 // symbol table. We are responsible for unblocking NEXT_BLOCKER
921 // when we are done. We are responsible for deleting THIS_BLOCKER
922 // when it is unblocked.
923
924 if (!closure.saw_inputs())
925 {
926 // The script did not add any files to read. Note that we are
927 // not permitted to call NEXT_BLOCKER->unblock() here even if
928 // THIS_BLOCKER is NULL, as we are not in the main thread.
929 workqueue->queue(new Script_unblock(this_blocker, next_blocker));
930 return true;
931 }
932
933 for (Input_arguments::const_iterator p = closure.inputs()->begin();
934 p != closure.inputs()->end();
935 ++p)
936 {
937 Task_token* nb;
938 if (p + 1 == closure.inputs()->end())
939 nb = next_blocker;
940 else
941 {
942 nb = new Task_token();
943 nb->add_blocker();
944 }
945 workqueue->queue(new Read_symbols(options, input_objects, symtab,
946 layout, dirsearch, &*p,
947 input_group, this_blocker, nb));
948 this_blocker = nb;
949 }
950
951 return true;
952 }
953
954 // Manage mapping from keywords to the codes expected by the bison
955 // parser.
956
957 class Keyword_to_parsecode
958 {
959 public:
960 // The structure which maps keywords to parsecodes.
961 struct Keyword_parsecode
962 {
963 // Keyword.
964 const char* keyword;
965 // Corresponding parsecode.
966 int parsecode;
967 };
968
969 // Return the parsecode corresponding KEYWORD, or 0 if it is not a
970 // keyword.
971 static int
972 keyword_to_parsecode(const char* keyword);
973
974 private:
975 // The array of all keywords.
976 static const Keyword_parsecode keyword_parsecodes_[];
977
978 // The number of keywords.
979 static const int keyword_count;
980 };
981
982 // Mapping from keyword string to keyword parsecode. This array must
983 // be kept in sorted order. Parsecodes are looked up using bsearch.
984 // This array must correspond to the list of parsecodes in yyscript.y.
985
986 const Keyword_to_parsecode::Keyword_parsecode
987 Keyword_to_parsecode::keyword_parsecodes_[] =
988 {
989 { "ABSOLUTE", ABSOLUTE },
990 { "ADDR", ADDR },
991 { "ALIGN", ALIGN_K },
992 { "ASSERT", ASSERT_K },
993 { "AS_NEEDED", AS_NEEDED },
994 { "AT", AT },
995 { "BIND", BIND },
996 { "BLOCK", BLOCK },
997 { "BYTE", BYTE },
998 { "CONSTANT", CONSTANT },
999 { "CONSTRUCTORS", CONSTRUCTORS },
1000 { "COPY", COPY },
1001 { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS },
1002 { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN },
1003 { "DATA_SEGMENT_END", DATA_SEGMENT_END },
1004 { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END },
1005 { "DEFINED", DEFINED },
1006 { "DSECT", DSECT },
1007 { "ENTRY", ENTRY },
1008 { "EXCLUDE_FILE", EXCLUDE_FILE },
1009 { "EXTERN", EXTERN },
1010 { "FILL", FILL },
1011 { "FLOAT", FLOAT },
1012 { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION },
1013 { "GROUP", GROUP },
1014 { "HLL", HLL },
1015 { "INCLUDE", INCLUDE },
1016 { "INFO", INFO },
1017 { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION },
1018 { "INPUT", INPUT },
1019 { "KEEP", KEEP },
1020 { "LENGTH", LENGTH },
1021 { "LOADADDR", LOADADDR },
1022 { "LONG", LONG },
1023 { "MAP", MAP },
1024 { "MAX", MAX_K },
1025 { "MEMORY", MEMORY },
1026 { "MIN", MIN_K },
1027 { "NEXT", NEXT },
1028 { "NOCROSSREFS", NOCROSSREFS },
1029 { "NOFLOAT", NOFLOAT },
1030 { "NOLOAD", NOLOAD },
1031 { "ONLY_IF_RO", ONLY_IF_RO },
1032 { "ONLY_IF_RW", ONLY_IF_RW },
1033 { "ORIGIN", ORIGIN },
1034 { "OUTPUT", OUTPUT },
1035 { "OUTPUT_ARCH", OUTPUT_ARCH },
1036 { "OUTPUT_FORMAT", OUTPUT_FORMAT },
1037 { "OVERLAY", OVERLAY },
1038 { "PHDRS", PHDRS },
1039 { "PROVIDE", PROVIDE },
1040 { "PROVIDE_HIDDEN", PROVIDE_HIDDEN },
1041 { "QUAD", QUAD },
1042 { "SEARCH_DIR", SEARCH_DIR },
1043 { "SECTIONS", SECTIONS },
1044 { "SEGMENT_START", SEGMENT_START },
1045 { "SHORT", SHORT },
1046 { "SIZEOF", SIZEOF },
1047 { "SIZEOF_HEADERS", SIZEOF_HEADERS },
1048 { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT },
1049 { "SORT_BY_NAME", SORT_BY_NAME },
1050 { "SPECIAL", SPECIAL },
1051 { "SQUAD", SQUAD },
1052 { "STARTUP", STARTUP },
1053 { "SUBALIGN", SUBALIGN },
1054 { "SYSLIB", SYSLIB },
1055 { "TARGET", TARGET_K },
1056 { "TRUNCATE", TRUNCATE },
1057 { "VERSION", VERSIONK },
1058 { "global", GLOBAL },
1059 { "l", LENGTH },
1060 { "len", LENGTH },
1061 { "local", LOCAL },
1062 { "o", ORIGIN },
1063 { "org", ORIGIN },
1064 { "sizeof_headers", SIZEOF_HEADERS },
1065 };
1066
1067 const int Keyword_to_parsecode::keyword_count =
1068 (sizeof(Keyword_to_parsecode::keyword_parsecodes_)
1069 / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]));
1070
1071 // Comparison function passed to bsearch.
1072
1073 extern "C"
1074 {
1075
1076 static int
1077 ktt_compare(const void* keyv, const void* kttv)
1078 {
1079 const char* key = static_cast<const char*>(keyv);
1080 const Keyword_to_parsecode::Keyword_parsecode* ktt =
1081 static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv);
1082 return strcmp(key, ktt->keyword);
1083 }
1084
1085 } // End extern "C".
1086
1087 int
1088 Keyword_to_parsecode::keyword_to_parsecode(const char* keyword)
1089 {
1090 void* kttv = bsearch(keyword,
1091 Keyword_to_parsecode::keyword_parsecodes_,
1092 Keyword_to_parsecode::keyword_count,
1093 sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]),
1094 ktt_compare);
1095 if (kttv == NULL)
1096 return 0;
1097 Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv);
1098 return ktt->parsecode;
1099 }
1100
1101 } // End namespace gold.
1102
1103 // The remaining functions are extern "C", so it's clearer to not put
1104 // them in namespace gold.
1105
1106 using namespace gold;
1107
1108 // This function is called by the bison parser to return the next
1109 // token.
1110
1111 extern "C" int
1112 yylex(YYSTYPE* lvalp, void* closurev)
1113 {
1114 Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1115
1116 if (closure->at_eof())
1117 return 0;
1118
1119 const Token* token = closure->next_token();
1120
1121 switch (token->classification())
1122 {
1123 default:
1124 case Token::TOKEN_INVALID:
1125 case Token::TOKEN_EOF:
1126 gold_unreachable();
1127
1128 case Token::TOKEN_STRING:
1129 {
1130 const char* str = token->string_value().c_str();
1131 int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str);
1132 if (parsecode != 0)
1133 return parsecode;
1134 lvalp->string = str;
1135 return STRING;
1136 }
1137
1138 case Token::TOKEN_OPERATOR:
1139 return token->operator_value();
1140
1141 case Token::TOKEN_INTEGER:
1142 lvalp->integer = token->integer_value();
1143 return INTEGER;
1144 }
1145 }
1146
1147 // This function is called by the bison parser to report an error.
1148
1149 extern "C" void
1150 yyerror(void* closurev, const char* message)
1151 {
1152 Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1153
1154 fprintf(stderr, _("%s: %s: %s\n"),
1155 program_name, closure->filename(), message);
1156 gold_exit(false);
1157 }
1158
1159 // Called by the bison parser to add a file to the link.
1160
1161 extern "C" void
1162 script_add_file(void* closurev, const char* name)
1163 {
1164 Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1165 std::string absname;
1166 if (name[0] == '/')
1167 {
1168 absname = name;
1169 }
1170 else
1171 {
1172 // Prepend `dirname closure->filename()` to make the path absolute.
1173 char *slash = strrchr(closure->filename(), '/');
1174 absname.assign(closure->filename(),
1175 slash ? slash - closure->filename() + 1 : 0);
1176 absname += name;
1177 }
1178 Input_file_argument file(absname.c_str(), false, closure->position_dependent_options());
1179 closure->inputs()->add_file(file);
1180 }
1181
1182 // Called by the bison parser to start a group. If we are already in
1183 // a group, that means that this script was invoked within a
1184 // --start-group --end-group sequence on the command line, or that
1185 // this script was found in a GROUP of another script. In that case,
1186 // we simply continue the existing group, rather than starting a new
1187 // one. It is possible to construct a case in which this will do
1188 // something other than what would happen if we did a recursive group,
1189 // but it's hard to imagine why the different behaviour would be
1190 // useful for a real program. Avoiding recursive groups is simpler
1191 // and more efficient.
1192
1193 extern "C" void
1194 script_start_group(void* closurev)
1195 {
1196 Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1197 if (!closure->in_group())
1198 closure->inputs()->start_group();
1199 }
1200
1201 // Called by the bison parser at the end of a group.
1202
1203 extern "C" void
1204 script_end_group(void* closurev)
1205 {
1206 Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1207 if (!closure->in_group())
1208 closure->inputs()->end_group();
1209 }
1210
1211 // Called by the bison parser to start an AS_NEEDED list.
1212
1213 extern "C" void
1214 script_start_as_needed(void* closurev)
1215 {
1216 Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1217 closure->position_dependent_options().set_as_needed();
1218 }
1219
1220 // Called by the bison parser at the end of an AS_NEEDED list.
1221
1222 extern "C" void
1223 script_end_as_needed(void* closurev)
1224 {
1225 Parser_closure* closure = static_cast<Parser_closure*>(closurev);
1226 closure->position_dependent_options().clear_as_needed();
1227 }
This page took 0.12318 seconds and 5 git commands to generate.