Commit | Line | Data |
---|---|---|
dbe717ef ILT |
1 | // script.cc -- handle linker scripts for gold. |
2 | ||
3 | #include "gold.h" | |
4 | ||
5 | #include <string> | |
6 | #include <vector> | |
7 | #include <cassert> | |
8 | #include <cstdio> | |
9 | #include <cstdlib> | |
10 | ||
11 | #include "options.h" | |
12 | #include "fileread.h" | |
13 | #include "workqueue.h" | |
14 | #include "readsyms.h" | |
15 | #include "yyscript.h" | |
16 | #include "script.h" | |
17 | #include "script-c.h" | |
18 | ||
19 | namespace gold | |
20 | { | |
21 | ||
22 | // A token read from a script file. We don't implement keywords here; | |
23 | // all keywords are simply represented as a string. | |
24 | ||
25 | class Token | |
26 | { | |
27 | public: | |
28 | // Token classification. | |
29 | enum Classification | |
30 | { | |
31 | // Token is invalid. | |
32 | TOKEN_INVALID, | |
33 | // Token indicates end of input. | |
34 | TOKEN_EOF, | |
35 | // Token is a string of characters. | |
36 | TOKEN_STRING, | |
37 | // Token is an operator. | |
38 | TOKEN_OPERATOR, | |
39 | // Token is a number (an integer). | |
40 | TOKEN_INTEGER | |
41 | }; | |
42 | ||
43 | // We need an empty constructor so that we can put this STL objects. | |
44 | Token() | |
45 | : classification_(TOKEN_INVALID), value_(), opcode_(0), | |
46 | lineno_(0), charpos_(0) | |
47 | { } | |
48 | ||
49 | // A general token with no value. | |
50 | Token(Classification classification, int lineno, int charpos) | |
51 | : classification_(classification), value_(), opcode_(0), | |
52 | lineno_(lineno), charpos_(charpos) | |
53 | { assert(classification == TOKEN_INVALID || classification == TOKEN_EOF); } | |
54 | ||
55 | // A general token with a value. | |
56 | Token(Classification classification, const std::string& value, | |
57 | int lineno, int charpos) | |
58 | : classification_(classification), value_(value), opcode_(0), | |
59 | lineno_(lineno), charpos_(charpos) | |
60 | { assert(classification != TOKEN_INVALID && classification != TOKEN_EOF); } | |
61 | ||
62 | // A token representing a string of characters. | |
63 | Token(const std::string& s, int lineno, int charpos) | |
64 | : classification_(TOKEN_STRING), value_(s), opcode_(0), | |
65 | lineno_(lineno), charpos_(charpos) | |
66 | { } | |
67 | ||
68 | // A token representing an operator. | |
69 | Token(int opcode, int lineno, int charpos) | |
70 | : classification_(TOKEN_OPERATOR), value_(), opcode_(opcode), | |
71 | lineno_(lineno), charpos_(charpos) | |
72 | { } | |
73 | ||
74 | // Return whether the token is invalid. | |
75 | bool | |
76 | is_invalid() const | |
77 | { return this->classification_ == TOKEN_INVALID; } | |
78 | ||
79 | // Return whether this is an EOF token. | |
80 | bool | |
81 | is_eof() const | |
82 | { return this->classification_ == TOKEN_EOF; } | |
83 | ||
84 | // Return the token classification. | |
85 | Classification | |
86 | classification() const | |
87 | { return this->classification_; } | |
88 | ||
89 | // Return the line number at which the token starts. | |
90 | int | |
91 | lineno() const | |
92 | { return this->lineno_; } | |
93 | ||
94 | // Return the character position at this the token starts. | |
95 | int | |
96 | charpos() const | |
97 | { return this->charpos_; } | |
98 | ||
99 | // Get the value of a token. | |
100 | ||
101 | const std::string& | |
102 | string_value() const | |
103 | { | |
104 | assert(this->classification_ == TOKEN_STRING); | |
105 | return this->value_; | |
106 | } | |
107 | ||
108 | int | |
109 | operator_value() const | |
110 | { | |
111 | assert(this->classification_ == TOKEN_OPERATOR); | |
112 | return this->opcode_; | |
113 | } | |
114 | ||
115 | int64_t | |
116 | integer_value() const | |
117 | { | |
118 | assert(this->classification_ == TOKEN_INTEGER); | |
119 | return strtoll(this->value_.c_str(), NULL, 0); | |
120 | } | |
121 | ||
122 | private: | |
123 | // The token classification. | |
124 | Classification classification_; | |
125 | // The token value, for TOKEN_STRING or TOKEN_INTEGER. | |
126 | std::string value_; | |
127 | // The token value, for TOKEN_OPERATOR. | |
128 | int opcode_; | |
129 | // The line number where this token started (one based). | |
130 | int lineno_; | |
131 | // The character position within the line where this token started | |
132 | // (one based). | |
133 | int charpos_; | |
134 | }; | |
135 | ||
136 | // This class handles lexing a file into a sequence of tokens. We | |
137 | // don't expect linker scripts to be large, so we just read them and | |
138 | // tokenize them all at once. | |
139 | ||
140 | class Lex | |
141 | { | |
142 | public: | |
143 | Lex(Input_file* input_file) | |
144 | : input_file_(input_file), tokens_() | |
145 | { } | |
146 | ||
147 | // Tokenize the file. Return the final token, which will be either | |
148 | // an invalid token or an EOF token. An invalid token indicates | |
149 | // that tokenization failed. | |
150 | Token | |
151 | tokenize(); | |
152 | ||
153 | // A token sequence. | |
154 | typedef std::vector<Token> Token_sequence; | |
155 | ||
156 | // Return the tokens. | |
157 | const Token_sequence& | |
158 | tokens() const | |
159 | { return this->tokens_; } | |
160 | ||
161 | private: | |
162 | Lex(const Lex&); | |
163 | Lex& operator=(const Lex&); | |
164 | ||
165 | // Read the file into a string buffer. | |
166 | void | |
167 | read_file(std::string*); | |
168 | ||
169 | // Make a general token with no value at the current location. | |
170 | Token | |
171 | make_token(Token::Classification c, const char* p) const | |
172 | { return Token(c, this->lineno_, p - this->linestart_ + 1); } | |
173 | ||
174 | // Make a general token with a value at the current location. | |
175 | Token | |
176 | make_token(Token::Classification c, const std::string& v, const char* p) | |
177 | const | |
178 | { return Token(c, v, this->lineno_, p - this->linestart_ + 1); } | |
179 | ||
180 | // Make an operator token at the current location. | |
181 | Token | |
182 | make_token(int opcode, const char* p) const | |
183 | { return Token(opcode, this->lineno_, p - this->linestart_ + 1); } | |
184 | ||
185 | // Make an invalid token at the current location. | |
186 | Token | |
187 | make_invalid_token(const char* p) | |
188 | { return this->make_token(Token::TOKEN_INVALID, p); } | |
189 | ||
190 | // Make an EOF token at the current location. | |
191 | Token | |
192 | make_eof_token(const char* p) | |
193 | { return this->make_token(Token::TOKEN_EOF, p); } | |
194 | ||
195 | // Return whether C can be the first character in a name. C2 is the | |
196 | // next character, since we sometimes need that. | |
197 | static inline bool | |
198 | can_start_name(char c, char c2); | |
199 | ||
200 | // Return whether C can appear in a name which has already started. | |
201 | static inline bool | |
202 | can_continue_name(char c); | |
203 | ||
204 | // Return whether C, C2, C3 can start a hex number. | |
205 | static inline bool | |
206 | can_start_hex(char c, char c2, char c3); | |
207 | ||
208 | // Return whether C can appear in a hex number. | |
209 | static inline bool | |
210 | can_continue_hex(char c); | |
211 | ||
212 | // Return whether C can start a non-hex number. | |
213 | static inline bool | |
214 | can_start_number(char c); | |
215 | ||
216 | // Return whether C can appear in a non-hex number. | |
217 | static inline bool | |
218 | can_continue_number(char c) | |
219 | { return Lex::can_start_number(c); } | |
220 | ||
221 | // If C1 C2 C3 form a valid three character operator, return the | |
222 | // opcode. Otherwise return 0. | |
223 | static inline int | |
224 | three_char_operator(char c1, char c2, char c3); | |
225 | ||
226 | // If C1 C2 form a valid two character operator, return the opcode. | |
227 | // Otherwise return 0. | |
228 | static inline int | |
229 | two_char_operator(char c1, char c2); | |
230 | ||
231 | // If C1 is a valid one character operator, return the opcode. | |
232 | // Otherwise return 0. | |
233 | static inline int | |
234 | one_char_operator(char c1); | |
235 | ||
236 | // Read the next token. | |
237 | Token | |
238 | get_token(const char**); | |
239 | ||
240 | // Skip a C style /* */ comment. Return false if the comment did | |
241 | // not end. | |
242 | bool | |
243 | skip_c_comment(const char**); | |
244 | ||
245 | // Skip a line # comment. Return false if there was no newline. | |
246 | bool | |
247 | skip_line_comment(const char**); | |
248 | ||
249 | // Build a token CLASSIFICATION from all characters that match | |
250 | // CAN_CONTINUE_FN. The token starts at START. Start matching from | |
251 | // MATCH. Set *PP to the character following the token. | |
252 | inline Token | |
253 | gather_token(Token::Classification, bool (*can_continue_fn)(char), | |
254 | const char* start, const char* match, const char** pp); | |
255 | ||
256 | // Build a token from a quoted string. | |
257 | Token | |
258 | gather_quoted_string(const char** pp); | |
259 | ||
260 | // The file we are reading. | |
261 | Input_file* input_file_; | |
262 | // The token sequence we create. | |
263 | Token_sequence tokens_; | |
264 | // The current line number. | |
265 | int lineno_; | |
266 | // The start of the current line in the buffer. | |
267 | const char* linestart_; | |
268 | }; | |
269 | ||
270 | // Read the whole file into memory. We don't expect linker scripts to | |
271 | // be large, so we just use a std::string as a buffer. We ignore the | |
272 | // data we've already read, so that we read aligned buffers. | |
273 | ||
274 | void | |
275 | Lex::read_file(std::string* contents) | |
276 | { | |
277 | contents->clear(); | |
278 | off_t off = 0; | |
279 | off_t got; | |
280 | unsigned char buf[BUFSIZ]; | |
281 | do | |
282 | { | |
283 | this->input_file_->file().read(off, sizeof buf, buf, &got); | |
284 | contents->append(reinterpret_cast<char*>(&buf[0]), got); | |
285 | } | |
286 | while (got == sizeof buf); | |
287 | } | |
288 | ||
289 | // Return whether C can be the start of a name, if the next character | |
290 | // is C2. A name can being with a letter, underscore, period, or | |
291 | // dollar sign. Because a name can be a file name, we also permit | |
292 | // forward slash, backslash, and tilde. Tilde is the tricky case | |
293 | // here; GNU ld also uses it as a bitwise not operator. It is only | |
294 | // recognized as the operator if it is not immediately followed by | |
295 | // some character which can appear in a symbol. That is, "~0" is a | |
296 | // symbol name, and "~ 0" is an expression using bitwise not. We are | |
297 | // compatible. | |
298 | ||
299 | inline bool | |
300 | Lex::can_start_name(char c, char c2) | |
301 | { | |
302 | switch (c) | |
303 | { | |
304 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
305 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': | |
306 | case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R': | |
307 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
308 | case 'Y': case 'Z': | |
309 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
310 | case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
311 | case 'm': case 'n': case 'o': case 'q': case 'p': case 'r': | |
312 | case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
313 | case 'y': case 'z': | |
314 | case '_': case '.': case '$': case '/': case '\\': | |
315 | return true; | |
316 | ||
317 | case '~': | |
318 | return can_continue_name(c2); | |
319 | ||
320 | default: | |
321 | return false; | |
322 | } | |
323 | } | |
324 | ||
325 | // Return whether C can continue a name which has already started. | |
326 | // Subsequent characters in a name are the same as the leading | |
327 | // characters, plus digits and "=+-:[],?*". So in general the linker | |
328 | // script language requires spaces around operators. | |
329 | ||
330 | inline bool | |
331 | Lex::can_continue_name(char c) | |
332 | { | |
333 | switch (c) | |
334 | { | |
335 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
336 | case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': | |
337 | case 'M': case 'N': case 'O': case 'Q': case 'P': case 'R': | |
338 | case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': | |
339 | case 'Y': case 'Z': | |
340 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
341 | case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': | |
342 | case 'm': case 'n': case 'o': case 'q': case 'p': case 'r': | |
343 | case 's': case 't': case 'u': case 'v': case 'w': case 'x': | |
344 | case 'y': case 'z': | |
345 | case '_': case '.': case '$': case '/': case '\\': | |
346 | case '~': | |
347 | case '0': case '1': case '2': case '3': case '4': | |
348 | case '5': case '6': case '7': case '8': case '9': | |
349 | case '=': case '+': case '-': case ':': case '[': case ']': | |
350 | case ',': case '?': case '*': | |
351 | return true; | |
352 | ||
353 | default: | |
354 | return false; | |
355 | } | |
356 | } | |
357 | ||
358 | // For a number we accept 0x followed by hex digits, or any sequence | |
359 | // of digits. The old linker accepts leading '$' for hex, and | |
360 | // trailing HXBOD. Those are for MRI compatibility and we don't | |
361 | // accept them. The old linker also accepts trailing MK for mega or | |
362 | // kilo. Those are mentioned in the documentation, and we accept | |
363 | // them. | |
364 | ||
365 | // Return whether C1 C2 C3 can start a hex number. | |
366 | ||
367 | inline bool | |
368 | Lex::can_start_hex(char c1, char c2, char c3) | |
369 | { | |
370 | if (c1 == '0' && (c2 == 'x' || c2 == 'X')) | |
371 | return Lex::can_continue_hex(c3); | |
372 | return false; | |
373 | } | |
374 | ||
375 | // Return whether C can appear in a hex number. | |
376 | ||
377 | inline bool | |
378 | Lex::can_continue_hex(char c) | |
379 | { | |
380 | switch (c) | |
381 | { | |
382 | case '0': case '1': case '2': case '3': case '4': | |
383 | case '5': case '6': case '7': case '8': case '9': | |
384 | case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': | |
385 | case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': | |
386 | return true; | |
387 | ||
388 | default: | |
389 | return false; | |
390 | } | |
391 | } | |
392 | ||
393 | // Return whether C can start a non-hex number. | |
394 | ||
395 | inline bool | |
396 | Lex::can_start_number(char c) | |
397 | { | |
398 | switch (c) | |
399 | { | |
400 | case '0': case '1': case '2': case '3': case '4': | |
401 | case '5': case '6': case '7': case '8': case '9': | |
402 | return true; | |
403 | ||
404 | default: | |
405 | return false; | |
406 | } | |
407 | } | |
408 | ||
409 | // If C1 C2 C3 form a valid three character operator, return the | |
410 | // opcode (defined in the yyscript.h file generated from yyscript.y). | |
411 | // Otherwise return 0. | |
412 | ||
413 | inline int | |
414 | Lex::three_char_operator(char c1, char c2, char c3) | |
415 | { | |
416 | switch (c1) | |
417 | { | |
418 | case '<': | |
419 | if (c2 == '<' && c3 == '=') | |
420 | return LSHIFTEQ; | |
421 | break; | |
422 | case '>': | |
423 | if (c2 == '>' && c3 == '=') | |
424 | return RSHIFTEQ; | |
425 | break; | |
426 | default: | |
427 | break; | |
428 | } | |
429 | return 0; | |
430 | } | |
431 | ||
432 | // If C1 C2 form a valid two character operator, return the opcode | |
433 | // (defined in the yyscript.h file generated from yyscript.y). | |
434 | // Otherwise return 0. | |
435 | ||
436 | inline int | |
437 | Lex::two_char_operator(char c1, char c2) | |
438 | { | |
439 | switch (c1) | |
440 | { | |
441 | case '=': | |
442 | if (c2 == '=') | |
443 | return EQ; | |
444 | break; | |
445 | case '!': | |
446 | if (c2 == '=') | |
447 | return NE; | |
448 | break; | |
449 | case '+': | |
450 | if (c2 == '=') | |
451 | return PLUSEQ; | |
452 | break; | |
453 | case '-': | |
454 | if (c2 == '=') | |
455 | return MINUSEQ; | |
456 | break; | |
457 | case '*': | |
458 | if (c2 == '=') | |
459 | return MULTEQ; | |
460 | break; | |
461 | case '/': | |
462 | if (c2 == '=') | |
463 | return DIVEQ; | |
464 | break; | |
465 | case '|': | |
466 | if (c2 == '=') | |
467 | return OREQ; | |
468 | if (c2 == '|') | |
469 | return OROR; | |
470 | break; | |
471 | case '&': | |
472 | if (c2 == '=') | |
473 | return ANDEQ; | |
474 | if (c2 == '&') | |
475 | return ANDAND; | |
476 | break; | |
477 | case '>': | |
478 | if (c2 == '=') | |
479 | return GE; | |
480 | if (c2 == '>') | |
481 | return RSHIFT; | |
482 | break; | |
483 | case '<': | |
484 | if (c2 == '=') | |
485 | return LE; | |
486 | if (c2 == '<') | |
487 | return LSHIFT; | |
488 | break; | |
489 | default: | |
490 | break; | |
491 | } | |
492 | return 0; | |
493 | } | |
494 | ||
495 | // If C1 is a valid operator, return the opcode. Otherwise return 0. | |
496 | ||
497 | inline int | |
498 | Lex::one_char_operator(char c1) | |
499 | { | |
500 | switch (c1) | |
501 | { | |
502 | case '+': | |
503 | case '-': | |
504 | case '*': | |
505 | case '/': | |
506 | case '%': | |
507 | case '!': | |
508 | case '&': | |
509 | case '|': | |
510 | case '^': | |
511 | case '~': | |
512 | case '<': | |
513 | case '>': | |
514 | case '=': | |
515 | case '?': | |
516 | case ',': | |
517 | case '(': | |
518 | case ')': | |
519 | case '{': | |
520 | case '}': | |
521 | case '[': | |
522 | case ']': | |
523 | case ':': | |
524 | case ';': | |
525 | return c1; | |
526 | default: | |
527 | return 0; | |
528 | } | |
529 | } | |
530 | ||
531 | // Skip a C style comment. *PP points to just after the "/*". Return | |
532 | // false if the comment did not end. | |
533 | ||
534 | bool | |
535 | Lex::skip_c_comment(const char** pp) | |
536 | { | |
537 | const char* p = *pp; | |
538 | while (p[0] != '*' || p[1] != '/') | |
539 | { | |
540 | if (*p == '\0') | |
541 | { | |
542 | *pp = p; | |
543 | return false; | |
544 | } | |
545 | ||
546 | if (*p == '\n') | |
547 | { | |
548 | ++this->lineno_; | |
549 | this->linestart_ = p + 1; | |
550 | } | |
551 | ++p; | |
552 | } | |
553 | ||
554 | *pp = p + 2; | |
555 | return true; | |
556 | } | |
557 | ||
558 | // Skip a line # comment. Return false if there was no newline. | |
559 | ||
560 | bool | |
561 | Lex::skip_line_comment(const char** pp) | |
562 | { | |
563 | const char* p = *pp; | |
564 | size_t skip = strcspn(p, "\n"); | |
565 | if (p[skip] == '\0') | |
566 | { | |
567 | *pp = p + skip; | |
568 | return false; | |
569 | } | |
570 | ||
571 | p += skip + 1; | |
572 | ++this->lineno_; | |
573 | this->linestart_ = p; | |
574 | *pp = p; | |
575 | ||
576 | return true; | |
577 | } | |
578 | ||
579 | // Build a token CLASSIFICATION from all characters that match | |
580 | // CAN_CONTINUE_FN. Update *PP. | |
581 | ||
582 | inline Token | |
583 | Lex::gather_token(Token::Classification classification, | |
584 | bool (*can_continue_fn)(char), | |
585 | const char* start, | |
586 | const char* match, | |
587 | const char **pp) | |
588 | { | |
589 | while ((*can_continue_fn)(*match)) | |
590 | ++match; | |
591 | *pp = match; | |
592 | return this->make_token(classification, | |
593 | std::string(start, match - start), | |
594 | start); | |
595 | } | |
596 | ||
597 | // Build a token from a quoted string. | |
598 | ||
599 | Token | |
600 | Lex::gather_quoted_string(const char** pp) | |
601 | { | |
602 | const char* start = *pp; | |
603 | const char* p = start; | |
604 | ++p; | |
605 | size_t skip = strcspn(p, "\"\n"); | |
606 | if (p[skip] != '"') | |
607 | return this->make_invalid_token(start); | |
608 | *pp = p + skip + 1; | |
609 | return this->make_token(Token::TOKEN_STRING, | |
610 | std::string(p, skip), | |
611 | start); | |
612 | } | |
613 | ||
614 | // Return the next token at *PP. Update *PP. General guideline: we | |
615 | // require linker scripts to be simple ASCII. No unicode linker | |
616 | // scripts. In particular we can assume that any '\0' is the end of | |
617 | // the input. | |
618 | ||
619 | Token | |
620 | Lex::get_token(const char** pp) | |
621 | { | |
622 | const char* p = *pp; | |
623 | ||
624 | while (true) | |
625 | { | |
626 | if (*p == '\0') | |
627 | { | |
628 | *pp = p; | |
629 | return this->make_eof_token(p); | |
630 | } | |
631 | ||
632 | // Skip whitespace quickly. | |
633 | while (*p == ' ' || *p == '\t') | |
634 | ++p; | |
635 | ||
636 | if (*p == '\n') | |
637 | { | |
638 | ++p; | |
639 | ++this->lineno_; | |
640 | this->linestart_ = p; | |
641 | continue; | |
642 | } | |
643 | ||
644 | // Skip C style comments. | |
645 | if (p[0] == '/' && p[1] == '*') | |
646 | { | |
647 | int lineno = this->lineno_; | |
648 | int charpos = p - this->linestart_ + 1; | |
649 | ||
650 | *pp = p + 2; | |
651 | if (!this->skip_c_comment(pp)) | |
652 | return Token(Token::TOKEN_INVALID, lineno, charpos); | |
653 | p = *pp; | |
654 | ||
655 | continue; | |
656 | } | |
657 | ||
658 | // Skip line comments. | |
659 | if (*p == '#') | |
660 | { | |
661 | *pp = p + 1; | |
662 | if (!this->skip_line_comment(pp)) | |
663 | return this->make_eof_token(p); | |
664 | p = *pp; | |
665 | continue; | |
666 | } | |
667 | ||
668 | // Check for a name. | |
669 | if (Lex::can_start_name(p[0], p[1])) | |
670 | return this->gather_token(Token::TOKEN_STRING, | |
671 | Lex::can_continue_name, | |
672 | p, p + 2, pp); | |
673 | ||
674 | // We accept any arbitrary name in double quotes, as long as it | |
675 | // does not cross a line boundary. | |
676 | if (*p == '"') | |
677 | { | |
678 | *pp = p; | |
679 | return this->gather_quoted_string(pp); | |
680 | } | |
681 | ||
682 | // Check for a number. | |
683 | ||
684 | if (Lex::can_start_hex(p[0], p[1], p[2])) | |
685 | return this->gather_token(Token::TOKEN_INTEGER, | |
686 | Lex::can_continue_hex, | |
687 | p, p + 3, pp); | |
688 | ||
689 | if (Lex::can_start_number(p[0])) | |
690 | return this->gather_token(Token::TOKEN_INTEGER, | |
691 | Lex::can_continue_number, | |
692 | p, p + 1, pp); | |
693 | ||
694 | // Check for operators. | |
695 | ||
696 | int opcode = Lex::three_char_operator(p[0], p[1], p[2]); | |
697 | if (opcode != 0) | |
698 | { | |
699 | *pp = p + 3; | |
700 | return this->make_token(opcode, p); | |
701 | } | |
702 | ||
703 | opcode = Lex::two_char_operator(p[0], p[1]); | |
704 | if (opcode != 0) | |
705 | { | |
706 | *pp = p + 2; | |
707 | return this->make_token(opcode, p); | |
708 | } | |
709 | ||
710 | opcode = Lex::one_char_operator(p[0]); | |
711 | if (opcode != 0) | |
712 | { | |
713 | *pp = p + 1; | |
714 | return this->make_token(opcode, p); | |
715 | } | |
716 | ||
717 | return this->make_token(Token::TOKEN_INVALID, p); | |
718 | } | |
719 | } | |
720 | ||
721 | // Tokenize the file. Return the final token. | |
722 | ||
723 | Token | |
724 | Lex::tokenize() | |
725 | { | |
726 | std::string contents; | |
727 | this->read_file(&contents); | |
728 | ||
729 | const char* p = contents.c_str(); | |
730 | ||
731 | this->lineno_ = 1; | |
732 | this->linestart_ = p; | |
733 | ||
734 | while (true) | |
735 | { | |
736 | Token t(this->get_token(&p)); | |
737 | ||
738 | // Don't let an early null byte fool us into thinking that we've | |
739 | // reached the end of the file. | |
740 | if (t.is_eof() | |
741 | && static_cast<size_t>(p - contents.c_str()) < contents.length()) | |
742 | t = this->make_invalid_token(p); | |
743 | ||
744 | if (t.is_invalid() || t.is_eof()) | |
745 | return t; | |
746 | ||
747 | this->tokens_.push_back(t); | |
748 | } | |
749 | } | |
750 | ||
751 | // A trivial task which waits for THIS_BLOCKER to be clear and then | |
752 | // clears NEXT_BLOCKER. THIS_BLOCKER may be NULL. | |
753 | ||
754 | class Script_unblock : public Task | |
755 | { | |
756 | public: | |
757 | Script_unblock(Task_token* this_blocker, Task_token* next_blocker) | |
758 | : this_blocker_(this_blocker), next_blocker_(next_blocker) | |
759 | { } | |
760 | ||
761 | ~Script_unblock() | |
762 | { | |
763 | if (this->this_blocker_ != NULL) | |
764 | delete this->this_blocker_; | |
765 | } | |
766 | ||
767 | Is_runnable_type | |
768 | is_runnable(Workqueue*) | |
769 | { | |
770 | if (this->this_blocker_ != NULL && this->this_blocker_->is_blocked()) | |
771 | return IS_BLOCKED; | |
772 | return IS_RUNNABLE; | |
773 | } | |
774 | ||
775 | Task_locker* | |
776 | locks(Workqueue* workqueue) | |
777 | { | |
778 | return new Task_locker_block(*this->next_blocker_, workqueue); | |
779 | } | |
780 | ||
781 | void | |
782 | run(Workqueue*) | |
783 | { } | |
784 | ||
785 | private: | |
786 | Task_token* this_blocker_; | |
787 | Task_token* next_blocker_; | |
788 | }; | |
789 | ||
790 | // This class holds data passed through the parser to the lexer and to | |
791 | // the parser support functions. This avoids global variables. We | |
792 | // can't use global variables because we need not be called in the | |
793 | // main thread. | |
794 | ||
795 | class Parser_closure | |
796 | { | |
797 | public: | |
798 | Parser_closure(const char* filename, | |
799 | const Position_dependent_options& posdep_options, | |
800 | bool in_group, | |
801 | const Lex::Token_sequence* tokens) | |
802 | : filename_(filename), posdep_options_(posdep_options), | |
803 | in_group_(in_group), tokens_(tokens), | |
804 | next_token_index_(0), inputs_(NULL) | |
805 | { } | |
806 | ||
807 | // Return the file name. | |
808 | const char* | |
809 | filename() const | |
810 | { return this->filename_; } | |
811 | ||
812 | // Return the position dependent options. The caller may modify | |
813 | // this. | |
814 | Position_dependent_options& | |
815 | position_dependent_options() | |
816 | { return this->posdep_options_; } | |
817 | ||
818 | // Return whether this script is being run in a group. | |
819 | bool | |
820 | in_group() const | |
821 | { return this->in_group_; } | |
822 | ||
823 | // Whether we are at the end of the token list. | |
824 | bool | |
825 | at_eof() const | |
826 | { return this->next_token_index_ >= this->tokens_->size(); } | |
827 | ||
828 | // Return the next token. | |
829 | const Token* | |
830 | next_token() | |
831 | { | |
832 | const Token* ret = &(*this->tokens_)[this->next_token_index_]; | |
833 | ++this->next_token_index_; | |
834 | return ret; | |
835 | } | |
836 | ||
837 | // Return the list of input files, creating it if necessary. This | |
838 | // is a space leak--we never free the INPUTS_ pointer. | |
839 | Input_arguments* | |
840 | inputs() | |
841 | { | |
842 | if (this->inputs_ == NULL) | |
843 | this->inputs_ = new Input_arguments(); | |
844 | return this->inputs_; | |
845 | } | |
846 | ||
847 | // Return whether we saw any input files. | |
848 | bool | |
849 | saw_inputs() const | |
850 | { return this->inputs_ != NULL && !this->inputs_->empty(); } | |
851 | ||
852 | private: | |
853 | // The name of the file we are reading. | |
854 | const char* filename_; | |
855 | // The position dependent options. | |
856 | Position_dependent_options posdep_options_; | |
857 | // Whether we are currently in a --start-group/--end-group. | |
858 | bool in_group_; | |
859 | ||
860 | // The tokens to be returned by the lexer. | |
861 | const Lex::Token_sequence* tokens_; | |
862 | // The index of the next token to return. | |
863 | unsigned int next_token_index_; | |
864 | // New input files found to add to the link. | |
865 | Input_arguments* inputs_; | |
866 | }; | |
867 | ||
868 | // FILE was found as an argument on the command line. Try to read it | |
869 | // as a script. We've already read BYTES of data into P, but we | |
870 | // ignore that. Return true if the file was handled. | |
871 | ||
872 | bool | |
873 | read_input_script(Workqueue* workqueue, const General_options& options, | |
874 | Symbol_table* symtab, Layout* layout, | |
875 | const Dirsearch& dirsearch, Input_objects* input_objects, | |
876 | Input_group* input_group, | |
877 | const Input_argument* input_argument, | |
878 | Input_file* input_file, const unsigned char*, off_t, | |
879 | Task_token* this_blocker, Task_token* next_blocker) | |
880 | { | |
881 | Lex lex(input_file); | |
882 | if (lex.tokenize().is_invalid()) | |
883 | return false; | |
884 | ||
885 | Parser_closure closure(input_file->filename().c_str(), | |
886 | input_argument->file().options(), | |
887 | input_group != NULL, | |
888 | &lex.tokens()); | |
889 | ||
890 | if (yyparse(&closure) != 0) | |
891 | return false; | |
892 | ||
893 | // THIS_BLOCKER must be clear before we may add anything to the | |
894 | // symbol table. We are responsible for unblocking NEXT_BLOCKER | |
895 | // when we are done. We are responsible for deleting THIS_BLOCKER | |
896 | // when it is unblocked. | |
897 | ||
898 | if (!closure.saw_inputs()) | |
899 | { | |
900 | // The script did not add any files to read. Note that we are | |
901 | // not permitted to call NEXT_BLOCKER->unblock() here even if | |
902 | // THIS_BLOCKER is NULL, as we are not in the main thread. | |
903 | workqueue->queue(new Script_unblock(this_blocker, next_blocker)); | |
904 | return true; | |
905 | } | |
906 | ||
907 | for (Input_arguments::const_iterator p = closure.inputs()->begin(); | |
908 | p != closure.inputs()->end(); | |
909 | ++p) | |
910 | { | |
911 | Task_token* nb; | |
912 | if (p + 1 == closure.inputs()->end()) | |
913 | nb = next_blocker; | |
914 | else | |
915 | { | |
916 | nb = new Task_token(); | |
917 | nb->add_blocker(); | |
918 | } | |
919 | workqueue->queue(new Read_symbols(options, input_objects, symtab, | |
920 | layout, dirsearch, &*p, | |
921 | input_group, this_blocker, nb)); | |
922 | this_blocker = nb; | |
923 | } | |
924 | ||
925 | return true; | |
926 | } | |
927 | ||
928 | // Manage mapping from keywords to the codes expected by the bison | |
929 | // parser. | |
930 | ||
931 | class Keyword_to_parsecode | |
932 | { | |
933 | public: | |
934 | // The structure which maps keywords to parsecodes. | |
935 | struct Keyword_parsecode | |
936 | { | |
937 | // Keyword. | |
938 | const char* keyword; | |
939 | // Corresponding parsecode. | |
940 | int parsecode; | |
941 | }; | |
942 | ||
943 | // Return the parsecode corresponding KEYWORD, or 0 if it is not a | |
944 | // keyword. | |
945 | static int | |
946 | keyword_to_parsecode(const char* keyword); | |
947 | ||
948 | private: | |
949 | // The array of all keywords. | |
950 | static const Keyword_parsecode keyword_parsecodes_[]; | |
951 | ||
952 | // The number of keywords. | |
953 | static const int keyword_count; | |
954 | }; | |
955 | ||
956 | // Mapping from keyword string to keyword parsecode. This array must | |
957 | // be kept in sorted order. Parsecodes are looked up using bsearch. | |
958 | // This array must correspond to the list of parsecodes in yyscript.y. | |
959 | ||
960 | const Keyword_to_parsecode::Keyword_parsecode | |
961 | Keyword_to_parsecode::keyword_parsecodes_[] = | |
962 | { | |
963 | { "ABSOLUTE", ABSOLUTE }, | |
964 | { "ADDR", ADDR }, | |
965 | { "ALIGN", ALIGN_K }, | |
966 | { "ASSERT", ASSERT_K }, | |
967 | { "AS_NEEDED", AS_NEEDED }, | |
968 | { "AT", AT }, | |
969 | { "BIND", BIND }, | |
970 | { "BLOCK", BLOCK }, | |
971 | { "BYTE", BYTE }, | |
972 | { "CONSTANT", CONSTANT }, | |
973 | { "CONSTRUCTORS", CONSTRUCTORS }, | |
974 | { "COPY", COPY }, | |
975 | { "CREATE_OBJECT_SYMBOLS", CREATE_OBJECT_SYMBOLS }, | |
976 | { "DATA_SEGMENT_ALIGN", DATA_SEGMENT_ALIGN }, | |
977 | { "DATA_SEGMENT_END", DATA_SEGMENT_END }, | |
978 | { "DATA_SEGMENT_RELRO_END", DATA_SEGMENT_RELRO_END }, | |
979 | { "DEFINED", DEFINED }, | |
980 | { "DSECT", DSECT }, | |
981 | { "ENTRY", ENTRY }, | |
982 | { "EXCLUDE_FILE", EXCLUDE_FILE }, | |
983 | { "EXTERN", EXTERN }, | |
984 | { "FILL", FILL }, | |
985 | { "FLOAT", FLOAT }, | |
986 | { "FORCE_COMMON_ALLOCATION", FORCE_COMMON_ALLOCATION }, | |
987 | { "GROUP", GROUP }, | |
988 | { "HLL", HLL }, | |
989 | { "INCLUDE", INCLUDE }, | |
990 | { "INFO", INFO }, | |
991 | { "INHIBIT_COMMON_ALLOCATION", INHIBIT_COMMON_ALLOCATION }, | |
992 | { "INPUT", INPUT }, | |
993 | { "KEEP", KEEP }, | |
994 | { "LENGTH", LENGTH }, | |
995 | { "LOADADDR", LOADADDR }, | |
996 | { "LONG", LONG }, | |
997 | { "MAP", MAP }, | |
998 | { "MAX", MAX_K }, | |
999 | { "MEMORY", MEMORY }, | |
1000 | { "MIN", MIN_K }, | |
1001 | { "NEXT", NEXT }, | |
1002 | { "NOCROSSREFS", NOCROSSREFS }, | |
1003 | { "NOFLOAT", NOFLOAT }, | |
1004 | { "NOLOAD", NOLOAD }, | |
1005 | { "ONLY_IF_RO", ONLY_IF_RO }, | |
1006 | { "ONLY_IF_RW", ONLY_IF_RW }, | |
1007 | { "ORIGIN", ORIGIN }, | |
1008 | { "OUTPUT", OUTPUT }, | |
1009 | { "OUTPUT_ARCH", OUTPUT_ARCH }, | |
1010 | { "OUTPUT_FORMAT", OUTPUT_FORMAT }, | |
1011 | { "OVERLAY", OVERLAY }, | |
1012 | { "PHDRS", PHDRS }, | |
1013 | { "PROVIDE", PROVIDE }, | |
1014 | { "PROVIDE_HIDDEN", PROVIDE_HIDDEN }, | |
1015 | { "QUAD", QUAD }, | |
1016 | { "SEARCH_DIR", SEARCH_DIR }, | |
1017 | { "SECTIONS", SECTIONS }, | |
1018 | { "SEGMENT_START", SEGMENT_START }, | |
1019 | { "SHORT", SHORT }, | |
1020 | { "SIZEOF", SIZEOF }, | |
1021 | { "SIZEOF_HEADERS", SIZEOF_HEADERS }, | |
1022 | { "SORT_BY_ALIGNMENT", SORT_BY_ALIGNMENT }, | |
1023 | { "SORT_BY_NAME", SORT_BY_NAME }, | |
1024 | { "SPECIAL", SPECIAL }, | |
1025 | { "SQUAD", SQUAD }, | |
1026 | { "STARTUP", STARTUP }, | |
1027 | { "SUBALIGN", SUBALIGN }, | |
1028 | { "SYSLIB", SYSLIB }, | |
1029 | { "TARGET", TARGET_K }, | |
1030 | { "TRUNCATE", TRUNCATE }, | |
1031 | { "VERSION", VERSIONK }, | |
1032 | { "global", GLOBAL }, | |
1033 | { "l", LENGTH }, | |
1034 | { "len", LENGTH }, | |
1035 | { "local", LOCAL }, | |
1036 | { "o", ORIGIN }, | |
1037 | { "org", ORIGIN }, | |
1038 | { "sizeof_headers", SIZEOF_HEADERS }, | |
1039 | }; | |
1040 | ||
1041 | const int Keyword_to_parsecode::keyword_count = | |
1042 | (sizeof(Keyword_to_parsecode::keyword_parsecodes_) | |
1043 | / sizeof(Keyword_to_parsecode::keyword_parsecodes_[0])); | |
1044 | ||
1045 | // Comparison function passed to bsearch. | |
1046 | ||
1047 | extern "C" | |
1048 | { | |
1049 | ||
1050 | static int | |
1051 | ktt_compare(const void* keyv, const void* kttv) | |
1052 | { | |
1053 | const char* key = static_cast<const char*>(keyv); | |
1054 | const Keyword_to_parsecode::Keyword_parsecode* ktt = | |
1055 | static_cast<const Keyword_to_parsecode::Keyword_parsecode*>(kttv); | |
1056 | return strcmp(key, ktt->keyword); | |
1057 | } | |
1058 | ||
1059 | } // End extern "C". | |
1060 | ||
1061 | int | |
1062 | Keyword_to_parsecode::keyword_to_parsecode(const char* keyword) | |
1063 | { | |
1064 | void* kttv = bsearch(keyword, | |
1065 | Keyword_to_parsecode::keyword_parsecodes_, | |
1066 | Keyword_to_parsecode::keyword_count, | |
1067 | sizeof(Keyword_to_parsecode::keyword_parsecodes_[0]), | |
1068 | ktt_compare); | |
1069 | if (kttv == NULL) | |
1070 | return 0; | |
1071 | Keyword_parsecode* ktt = static_cast<Keyword_parsecode*>(kttv); | |
1072 | return ktt->parsecode; | |
1073 | } | |
1074 | ||
1075 | } // End namespace gold. | |
1076 | ||
1077 | // The remaining functions are extern "C", so it's clearer to not put | |
1078 | // them in namespace gold. | |
1079 | ||
1080 | using namespace gold; | |
1081 | ||
1082 | // This function is called by the bison parser to return the next | |
1083 | // token. | |
1084 | ||
1085 | extern "C" int | |
1086 | yylex(YYSTYPE* lvalp, void* closurev) | |
1087 | { | |
1088 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1089 | ||
1090 | if (closure->at_eof()) | |
1091 | return 0; | |
1092 | ||
1093 | const Token* token = closure->next_token(); | |
1094 | ||
1095 | switch (token->classification()) | |
1096 | { | |
1097 | default: | |
1098 | case Token::TOKEN_INVALID: | |
1099 | case Token::TOKEN_EOF: | |
1100 | abort(); | |
1101 | ||
1102 | case Token::TOKEN_STRING: | |
1103 | { | |
1104 | const char* str = token->string_value().c_str(); | |
1105 | int parsecode = Keyword_to_parsecode::keyword_to_parsecode(str); | |
1106 | if (parsecode != 0) | |
1107 | return parsecode; | |
1108 | lvalp->string = str; | |
1109 | return STRING; | |
1110 | } | |
1111 | ||
1112 | case Token::TOKEN_OPERATOR: | |
1113 | return token->operator_value(); | |
1114 | ||
1115 | case Token::TOKEN_INTEGER: | |
1116 | lvalp->integer = token->integer_value(); | |
1117 | return INTEGER; | |
1118 | } | |
1119 | } | |
1120 | ||
1121 | // This function is called by the bison parser to report an error. | |
1122 | ||
1123 | extern "C" void | |
1124 | yyerror(void* closurev, const char* message) | |
1125 | { | |
1126 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1127 | ||
1128 | fprintf(stderr, _("%s: %s: %s\n"), | |
1129 | program_name, closure->filename(), message); | |
1130 | gold_exit(false); | |
1131 | } | |
1132 | ||
1133 | // Called by the bison parser to add a file to the link. | |
1134 | ||
1135 | extern "C" void | |
1136 | script_add_file(void* closurev, const char* name) | |
1137 | { | |
1138 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1139 | Input_file_argument file(name, false, closure->position_dependent_options()); | |
1140 | closure->inputs()->add_file(file); | |
1141 | } | |
1142 | ||
1143 | // Called by the bison parser to start a group. If we are already in | |
1144 | // a group, that means that this script was invoked within a | |
1145 | // --start-group --end-group sequence on the command line, or that | |
1146 | // this script was found in a GROUP of another script. In that case, | |
1147 | // we simply continue the existing group, rather than starting a new | |
1148 | // one. It is possible to construct a case in which this will do | |
1149 | // something other than what would happen if we did a recursive group, | |
1150 | // but it's hard to imagine why the different behaviour would be | |
1151 | // useful for a real program. Avoiding recursive groups is simpler | |
1152 | // and more efficient. | |
1153 | ||
1154 | extern "C" void | |
1155 | script_start_group(void* closurev) | |
1156 | { | |
1157 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1158 | if (!closure->in_group()) | |
1159 | closure->inputs()->start_group(); | |
1160 | } | |
1161 | ||
1162 | // Called by the bison parser at the end of a group. | |
1163 | ||
1164 | extern "C" void | |
1165 | script_end_group(void* closurev) | |
1166 | { | |
1167 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1168 | if (!closure->in_group()) | |
1169 | closure->inputs()->end_group(); | |
1170 | } | |
1171 | ||
1172 | // Called by the bison parser to start an AS_NEEDED list. | |
1173 | ||
1174 | extern "C" void | |
1175 | script_start_as_needed(void* closurev) | |
1176 | { | |
1177 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1178 | closure->position_dependent_options().set_as_needed(); | |
1179 | } | |
1180 | ||
1181 | // Called by the bison parser at the end of an AS_NEEDED list. | |
1182 | ||
1183 | extern "C" void | |
1184 | script_end_as_needed(void* closurev) | |
1185 | { | |
1186 | Parser_closure* closure = static_cast<Parser_closure*>(closurev); | |
1187 | closure->position_dependent_options().clear_as_needed(); | |
1188 | } |