PR macros/13205:
[deliverable/binutils-gdb.git] / gdb / macroexp.c
1 /* C preprocessor macro expansion for GDB.
2 Copyright (C) 2002, 2007-2012 Free Software Foundation, Inc.
3 Contributed by Red Hat, Inc.
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include "defs.h"
21 #include "gdb_obstack.h"
22 #include "bcache.h"
23 #include "macrotab.h"
24 #include "macroexp.h"
25 #include "gdb_assert.h"
26 #include "c-lang.h"
27
28
29 \f
30 /* A resizeable, substringable string type. */
31
32
33 /* A string type that we can resize, quickly append to, and use to
34 refer to substrings of other strings. */
35 struct macro_buffer
36 {
37 /* An array of characters. The first LEN bytes are the real text,
38 but there are SIZE bytes allocated to the array. If SIZE is
39 zero, then this doesn't point to a malloc'ed block. If SHARED is
40 non-zero, then this buffer is actually a pointer into some larger
41 string, and we shouldn't append characters to it, etc. Because
42 of sharing, we can't assume in general that the text is
43 null-terminated. */
44 char *text;
45
46 /* The number of characters in the string. */
47 int len;
48
49 /* The number of characters allocated to the string. If SHARED is
50 non-zero, this is meaningless; in this case, we set it to zero so
51 that any "do we have room to append something?" tests will fail,
52 so we don't always have to check SHARED before using this field. */
53 int size;
54
55 /* Zero if TEXT can be safely realloc'ed (i.e., it's its own malloc
56 block). Non-zero if TEXT is actually pointing into the middle of
57 some other block, and we shouldn't reallocate it. */
58 int shared;
59
60 /* For detecting token splicing.
61
62 This is the index in TEXT of the first character of the token
63 that abuts the end of TEXT. If TEXT contains no tokens, then we
64 set this equal to LEN. If TEXT ends in whitespace, then there is
65 no token abutting the end of TEXT (it's just whitespace), and
66 again, we set this equal to LEN. We set this to -1 if we don't
67 know the nature of TEXT. */
68 int last_token;
69
70 /* If this buffer is holding the result from get_token, then this
71 is non-zero if it is an identifier token, zero otherwise. */
72 int is_identifier;
73 };
74
75
76 /* Set the macro buffer *B to the empty string, guessing that its
77 final contents will fit in N bytes. (It'll get resized if it
78 doesn't, so the guess doesn't have to be right.) Allocate the
79 initial storage with xmalloc. */
80 static void
81 init_buffer (struct macro_buffer *b, int n)
82 {
83 b->size = n;
84 if (n > 0)
85 b->text = (char *) xmalloc (n);
86 else
87 b->text = NULL;
88 b->len = 0;
89 b->shared = 0;
90 b->last_token = -1;
91 }
92
93
94 /* Set the macro buffer *BUF to refer to the LEN bytes at ADDR, as a
95 shared substring. */
96 static void
97 init_shared_buffer (struct macro_buffer *buf, char *addr, int len)
98 {
99 buf->text = addr;
100 buf->len = len;
101 buf->shared = 1;
102 buf->size = 0;
103 buf->last_token = -1;
104 }
105
106
107 /* Free the text of the buffer B. Raise an error if B is shared. */
108 static void
109 free_buffer (struct macro_buffer *b)
110 {
111 gdb_assert (! b->shared);
112 if (b->size)
113 xfree (b->text);
114 }
115
116 /* Like free_buffer, but return the text as an xstrdup()d string.
117 This only exists to try to make the API relatively clean. */
118
119 static char *
120 free_buffer_return_text (struct macro_buffer *b)
121 {
122 gdb_assert (! b->shared);
123 gdb_assert (b->size);
124 /* Nothing to do. */
125 return b->text;
126 }
127
128 /* A cleanup function for macro buffers. */
129 static void
130 cleanup_macro_buffer (void *untyped_buf)
131 {
132 free_buffer ((struct macro_buffer *) untyped_buf);
133 }
134
135
136 /* Resize the buffer B to be at least N bytes long. Raise an error if
137 B shouldn't be resized. */
138 static void
139 resize_buffer (struct macro_buffer *b, int n)
140 {
141 /* We shouldn't be trying to resize shared strings. */
142 gdb_assert (! b->shared);
143
144 if (b->size == 0)
145 b->size = n;
146 else
147 while (b->size <= n)
148 b->size *= 2;
149
150 b->text = xrealloc (b->text, b->size);
151 }
152
153
154 /* Append the character C to the buffer B. */
155 static void
156 appendc (struct macro_buffer *b, int c)
157 {
158 int new_len = b->len + 1;
159
160 if (new_len > b->size)
161 resize_buffer (b, new_len);
162
163 b->text[b->len] = c;
164 b->len = new_len;
165 }
166
167
168 /* Append the LEN bytes at ADDR to the buffer B. */
169 static void
170 appendmem (struct macro_buffer *b, char *addr, int len)
171 {
172 int new_len = b->len + len;
173
174 if (new_len > b->size)
175 resize_buffer (b, new_len);
176
177 memcpy (b->text + b->len, addr, len);
178 b->len = new_len;
179 }
180
181
182 \f
183 /* Recognizing preprocessor tokens. */
184
185
186 int
187 macro_is_whitespace (int c)
188 {
189 return (c == ' '
190 || c == '\t'
191 || c == '\n'
192 || c == '\v'
193 || c == '\f');
194 }
195
196
197 int
198 macro_is_digit (int c)
199 {
200 return ('0' <= c && c <= '9');
201 }
202
203
204 int
205 macro_is_identifier_nondigit (int c)
206 {
207 return (c == '_'
208 || ('a' <= c && c <= 'z')
209 || ('A' <= c && c <= 'Z'));
210 }
211
212
213 static void
214 set_token (struct macro_buffer *tok, char *start, char *end)
215 {
216 init_shared_buffer (tok, start, end - start);
217 tok->last_token = 0;
218
219 /* Presumed; get_identifier may overwrite this. */
220 tok->is_identifier = 0;
221 }
222
223
224 static int
225 get_comment (struct macro_buffer *tok, char *p, char *end)
226 {
227 if (p + 2 > end)
228 return 0;
229 else if (p[0] == '/'
230 && p[1] == '*')
231 {
232 char *tok_start = p;
233
234 p += 2;
235
236 for (; p < end; p++)
237 if (p + 2 <= end
238 && p[0] == '*'
239 && p[1] == '/')
240 {
241 p += 2;
242 set_token (tok, tok_start, p);
243 return 1;
244 }
245
246 error (_("Unterminated comment in macro expansion."));
247 }
248 else if (p[0] == '/'
249 && p[1] == '/')
250 {
251 char *tok_start = p;
252
253 p += 2;
254 for (; p < end; p++)
255 if (*p == '\n')
256 break;
257
258 set_token (tok, tok_start, p);
259 return 1;
260 }
261 else
262 return 0;
263 }
264
265
266 static int
267 get_identifier (struct macro_buffer *tok, char *p, char *end)
268 {
269 if (p < end
270 && macro_is_identifier_nondigit (*p))
271 {
272 char *tok_start = p;
273
274 while (p < end
275 && (macro_is_identifier_nondigit (*p)
276 || macro_is_digit (*p)))
277 p++;
278
279 set_token (tok, tok_start, p);
280 tok->is_identifier = 1;
281 return 1;
282 }
283 else
284 return 0;
285 }
286
287
288 static int
289 get_pp_number (struct macro_buffer *tok, char *p, char *end)
290 {
291 if (p < end
292 && (macro_is_digit (*p)
293 || (*p == '.'
294 && p + 2 <= end
295 && macro_is_digit (p[1]))))
296 {
297 char *tok_start = p;
298
299 while (p < end)
300 {
301 if (p + 2 <= end
302 && strchr ("eEpP", *p)
303 && (p[1] == '+' || p[1] == '-'))
304 p += 2;
305 else if (macro_is_digit (*p)
306 || macro_is_identifier_nondigit (*p)
307 || *p == '.')
308 p++;
309 else
310 break;
311 }
312
313 set_token (tok, tok_start, p);
314 return 1;
315 }
316 else
317 return 0;
318 }
319
320
321
322 /* If the text starting at P going up to (but not including) END
323 starts with a character constant, set *TOK to point to that
324 character constant, and return 1. Otherwise, return zero.
325 Signal an error if it contains a malformed or incomplete character
326 constant. */
327 static int
328 get_character_constant (struct macro_buffer *tok, char *p, char *end)
329 {
330 /* ISO/IEC 9899:1999 (E) Section 6.4.4.4 paragraph 1
331 But of course, what really matters is that we handle it the same
332 way GDB's C/C++ lexer does. So we call parse_escape in utils.c
333 to handle escape sequences. */
334 if ((p + 1 <= end && *p == '\'')
335 || (p + 2 <= end
336 && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
337 && p[1] == '\''))
338 {
339 char *tok_start = p;
340 char *body_start;
341 int char_count = 0;
342
343 if (*p == '\'')
344 p++;
345 else if (*p == 'L' || *p == 'u' || *p == 'U')
346 p += 2;
347 else
348 gdb_assert_not_reached ("unexpected character constant");
349
350 body_start = p;
351 for (;;)
352 {
353 if (p >= end)
354 error (_("Unmatched single quote."));
355 else if (*p == '\'')
356 {
357 if (!char_count)
358 error (_("A character constant must contain at least one "
359 "character."));
360 p++;
361 break;
362 }
363 else if (*p == '\\')
364 {
365 p++;
366 char_count += c_parse_escape (&p, NULL);
367 }
368 else
369 {
370 p++;
371 char_count++;
372 }
373 }
374
375 set_token (tok, tok_start, p);
376 return 1;
377 }
378 else
379 return 0;
380 }
381
382
383 /* If the text starting at P going up to (but not including) END
384 starts with a string literal, set *TOK to point to that string
385 literal, and return 1. Otherwise, return zero. Signal an error if
386 it contains a malformed or incomplete string literal. */
387 static int
388 get_string_literal (struct macro_buffer *tok, char *p, char *end)
389 {
390 if ((p + 1 <= end
391 && *p == '"')
392 || (p + 2 <= end
393 && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U')
394 && p[1] == '"'))
395 {
396 char *tok_start = p;
397
398 if (*p == '"')
399 p++;
400 else if (*p == 'L' || *p == 'u' || *p == 'U')
401 p += 2;
402 else
403 gdb_assert_not_reached ("unexpected string literal");
404
405 for (;;)
406 {
407 if (p >= end)
408 error (_("Unterminated string in expression."));
409 else if (*p == '"')
410 {
411 p++;
412 break;
413 }
414 else if (*p == '\n')
415 error (_("Newline characters may not appear in string "
416 "constants."));
417 else if (*p == '\\')
418 {
419 p++;
420 c_parse_escape (&p, NULL);
421 }
422 else
423 p++;
424 }
425
426 set_token (tok, tok_start, p);
427 return 1;
428 }
429 else
430 return 0;
431 }
432
433
434 static int
435 get_punctuator (struct macro_buffer *tok, char *p, char *end)
436 {
437 /* Here, speed is much less important than correctness and clarity. */
438
439 /* ISO/IEC 9899:1999 (E) Section 6.4.6 Paragraph 1.
440 Note that this table is ordered in a special way. A punctuator
441 which is a prefix of another punctuator must appear after its
442 "extension". Otherwise, the wrong token will be returned. */
443 static const char * const punctuators[] = {
444 "[", "]", "(", ")", "{", "}", "?", ";", ",", "~",
445 "...", ".",
446 "->", "--", "-=", "-",
447 "++", "+=", "+",
448 "*=", "*",
449 "!=", "!",
450 "&&", "&=", "&",
451 "/=", "/",
452 "%>", "%:%:", "%:", "%=", "%",
453 "^=", "^",
454 "##", "#",
455 ":>", ":",
456 "||", "|=", "|",
457 "<<=", "<<", "<=", "<:", "<%", "<",
458 ">>=", ">>", ">=", ">",
459 "==", "=",
460 0
461 };
462
463 int i;
464
465 if (p + 1 <= end)
466 {
467 for (i = 0; punctuators[i]; i++)
468 {
469 const char *punctuator = punctuators[i];
470
471 if (p[0] == punctuator[0])
472 {
473 int len = strlen (punctuator);
474
475 if (p + len <= end
476 && ! memcmp (p, punctuator, len))
477 {
478 set_token (tok, p, p + len);
479 return 1;
480 }
481 }
482 }
483 }
484
485 return 0;
486 }
487
488
489 /* Peel the next preprocessor token off of SRC, and put it in TOK.
490 Mutate TOK to refer to the first token in SRC, and mutate SRC to
491 refer to the text after that token. SRC must be a shared buffer;
492 the resulting TOK will be shared, pointing into the same string SRC
493 does. Initialize TOK's last_token field. Return non-zero if we
494 succeed, or 0 if we didn't find any more tokens in SRC. */
495 static int
496 get_token (struct macro_buffer *tok,
497 struct macro_buffer *src)
498 {
499 char *p = src->text;
500 char *end = p + src->len;
501
502 gdb_assert (src->shared);
503
504 /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4:
505
506 preprocessing-token:
507 header-name
508 identifier
509 pp-number
510 character-constant
511 string-literal
512 punctuator
513 each non-white-space character that cannot be one of the above
514
515 We don't have to deal with header-name tokens, since those can
516 only occur after a #include, which we will never see. */
517
518 while (p < end)
519 if (macro_is_whitespace (*p))
520 p++;
521 else if (get_comment (tok, p, end))
522 p += tok->len;
523 else if (get_pp_number (tok, p, end)
524 || get_character_constant (tok, p, end)
525 || get_string_literal (tok, p, end)
526 /* Note: the grammar in the standard seems to be
527 ambiguous: L'x' can be either a wide character
528 constant, or an identifier followed by a normal
529 character constant. By trying `get_identifier' after
530 we try get_character_constant and get_string_literal,
531 we give the wide character syntax precedence. Now,
532 since GDB doesn't handle wide character constants
533 anyway, is this the right thing to do? */
534 || get_identifier (tok, p, end)
535 || get_punctuator (tok, p, end))
536 {
537 /* How many characters did we consume, including whitespace? */
538 int consumed = p - src->text + tok->len;
539
540 src->text += consumed;
541 src->len -= consumed;
542 return 1;
543 }
544 else
545 {
546 /* We have found a "non-whitespace character that cannot be
547 one of the above." Make a token out of it. */
548 int consumed;
549
550 set_token (tok, p, p + 1);
551 consumed = p - src->text + tok->len;
552 src->text += consumed;
553 src->len -= consumed;
554 return 1;
555 }
556
557 return 0;
558 }
559
560
561 \f
562 /* Appending token strings, with and without splicing */
563
564
565 /* Append the macro buffer SRC to the end of DEST, and ensure that
566 doing so doesn't splice the token at the end of SRC with the token
567 at the beginning of DEST. SRC and DEST must have their last_token
568 fields set. Upon return, DEST's last_token field is set correctly.
569
570 For example:
571
572 If DEST is "(" and SRC is "y", then we can return with
573 DEST set to "(y" --- we've simply appended the two buffers.
574
575 However, if DEST is "x" and SRC is "y", then we must not return
576 with DEST set to "xy" --- that would splice the two tokens "x" and
577 "y" together to make a single token "xy". However, it would be
578 fine to return with DEST set to "x y". Similarly, "<" and "<" must
579 yield "< <", not "<<", etc. */
580 static void
581 append_tokens_without_splicing (struct macro_buffer *dest,
582 struct macro_buffer *src)
583 {
584 int original_dest_len = dest->len;
585 struct macro_buffer dest_tail, new_token;
586
587 gdb_assert (src->last_token != -1);
588 gdb_assert (dest->last_token != -1);
589
590 /* First, just try appending the two, and call get_token to see if
591 we got a splice. */
592 appendmem (dest, src->text, src->len);
593
594 /* If DEST originally had no token abutting its end, then we can't
595 have spliced anything, so we're done. */
596 if (dest->last_token == original_dest_len)
597 {
598 dest->last_token = original_dest_len + src->last_token;
599 return;
600 }
601
602 /* Set DEST_TAIL to point to the last token in DEST, followed by
603 all the stuff we just appended. */
604 init_shared_buffer (&dest_tail,
605 dest->text + dest->last_token,
606 dest->len - dest->last_token);
607
608 /* Re-parse DEST's last token. We know that DEST used to contain
609 at least one token, so if it doesn't contain any after the
610 append, then we must have spliced "/" and "*" or "/" and "/" to
611 make a comment start. (Just for the record, I got this right
612 the first time. This is not a bug fix.) */
613 if (get_token (&new_token, &dest_tail)
614 && (new_token.text + new_token.len
615 == dest->text + original_dest_len))
616 {
617 /* No splice, so we're done. */
618 dest->last_token = original_dest_len + src->last_token;
619 return;
620 }
621
622 /* Okay, a simple append caused a splice. Let's chop dest back to
623 its original length and try again, but separate the texts with a
624 space. */
625 dest->len = original_dest_len;
626 appendc (dest, ' ');
627 appendmem (dest, src->text, src->len);
628
629 init_shared_buffer (&dest_tail,
630 dest->text + dest->last_token,
631 dest->len - dest->last_token);
632
633 /* Try to re-parse DEST's last token, as above. */
634 if (get_token (&new_token, &dest_tail)
635 && (new_token.text + new_token.len
636 == dest->text + original_dest_len))
637 {
638 /* No splice, so we're done. */
639 dest->last_token = original_dest_len + 1 + src->last_token;
640 return;
641 }
642
643 /* As far as I know, there's no case where inserting a space isn't
644 enough to prevent a splice. */
645 internal_error (__FILE__, __LINE__,
646 _("unable to avoid splicing tokens during macro expansion"));
647 }
648
649 /* Stringify an argument, and insert it into DEST. ARG is the text to
650 stringify; it is LEN bytes long. */
651
652 static void
653 stringify (struct macro_buffer *dest, const char *arg, int len)
654 {
655 /* Trim initial whitespace from ARG. */
656 while (len > 0 && macro_is_whitespace (*arg))
657 {
658 ++arg;
659 --len;
660 }
661
662 /* Trim trailing whitespace from ARG. */
663 while (len > 0 && macro_is_whitespace (arg[len - 1]))
664 --len;
665
666 /* Insert the string. */
667 appendc (dest, '"');
668 while (len > 0)
669 {
670 /* We could try to handle strange cases here, like control
671 characters, but there doesn't seem to be much point. */
672 if (macro_is_whitespace (*arg))
673 {
674 /* Replace a sequence of whitespace with a single space. */
675 appendc (dest, ' ');
676 while (len > 1 && macro_is_whitespace (arg[1]))
677 {
678 ++arg;
679 --len;
680 }
681 }
682 else if (*arg == '\\' || *arg == '"')
683 {
684 appendc (dest, '\\');
685 appendc (dest, *arg);
686 }
687 else
688 appendc (dest, *arg);
689 ++arg;
690 --len;
691 }
692 appendc (dest, '"');
693 dest->last_token = dest->len;
694 }
695
696 /* See macroexp.h. */
697
698 char *
699 macro_stringify (const char *str)
700 {
701 struct macro_buffer buffer;
702 int len = strlen (str);
703 char *result;
704
705 init_buffer (&buffer, len);
706 stringify (&buffer, str, len);
707
708 return free_buffer_return_text (&buffer);
709 }
710
711 \f
712 /* Expanding macros! */
713
714
715 /* A singly-linked list of the names of the macros we are currently
716 expanding --- for detecting expansion loops. */
717 struct macro_name_list {
718 const char *name;
719 struct macro_name_list *next;
720 };
721
722
723 /* Return non-zero if we are currently expanding the macro named NAME,
724 according to LIST; otherwise, return zero.
725
726 You know, it would be possible to get rid of all the NO_LOOP
727 arguments to these functions by simply generating a new lookup
728 function and baton which refuses to find the definition for a
729 particular macro, and otherwise delegates the decision to another
730 function/baton pair. But that makes the linked list of excluded
731 macros chained through untyped baton pointers, which will make it
732 harder to debug. :( */
733 static int
734 currently_rescanning (struct macro_name_list *list, const char *name)
735 {
736 for (; list; list = list->next)
737 if (strcmp (name, list->name) == 0)
738 return 1;
739
740 return 0;
741 }
742
743
744 /* Gather the arguments to a macro expansion.
745
746 NAME is the name of the macro being invoked. (It's only used for
747 printing error messages.)
748
749 Assume that SRC is the text of the macro invocation immediately
750 following the macro name. For example, if we're processing the
751 text foo(bar, baz), then NAME would be foo and SRC will be (bar,
752 baz).
753
754 If SRC doesn't start with an open paren ( token at all, return
755 zero, leave SRC unchanged, and don't set *ARGC_P to anything.
756
757 If SRC doesn't contain a properly terminated argument list, then
758 raise an error.
759
760 For a variadic macro, NARGS holds the number of formal arguments to
761 the macro. For a GNU-style variadic macro, this should be the
762 number of named arguments. For a non-variadic macro, NARGS should
763 be -1.
764
765 Otherwise, return a pointer to the first element of an array of
766 macro buffers referring to the argument texts, and set *ARGC_P to
767 the number of arguments we found --- the number of elements in the
768 array. The macro buffers share their text with SRC, and their
769 last_token fields are initialized. The array is allocated with
770 xmalloc, and the caller is responsible for freeing it.
771
772 NOTE WELL: if SRC starts with a open paren ( token followed
773 immediately by a close paren ) token (e.g., the invocation looks
774 like "foo()"), we treat that as one argument, which happens to be
775 the empty list of tokens. The caller should keep in mind that such
776 a sequence of tokens is a valid way to invoke one-parameter
777 function-like macros, but also a valid way to invoke zero-parameter
778 function-like macros. Eeew.
779
780 Consume the tokens from SRC; after this call, SRC contains the text
781 following the invocation. */
782
783 static struct macro_buffer *
784 gather_arguments (const char *name, struct macro_buffer *src,
785 int nargs, int *argc_p)
786 {
787 struct macro_buffer tok;
788 int args_len, args_size;
789 struct macro_buffer *args = NULL;
790 struct cleanup *back_to = make_cleanup (free_current_contents, &args);
791
792 /* Does SRC start with an opening paren token? Read from a copy of
793 SRC, so SRC itself is unaffected if we don't find an opening
794 paren. */
795 {
796 struct macro_buffer temp;
797
798 init_shared_buffer (&temp, src->text, src->len);
799
800 if (! get_token (&tok, &temp)
801 || tok.len != 1
802 || tok.text[0] != '(')
803 {
804 discard_cleanups (back_to);
805 return 0;
806 }
807 }
808
809 /* Consume SRC's opening paren. */
810 get_token (&tok, src);
811
812 args_len = 0;
813 args_size = 6;
814 args = (struct macro_buffer *) xmalloc (sizeof (*args) * args_size);
815
816 for (;;)
817 {
818 struct macro_buffer *arg;
819 int depth;
820
821 /* Make sure we have room for the next argument. */
822 if (args_len >= args_size)
823 {
824 args_size *= 2;
825 args = xrealloc (args, sizeof (*args) * args_size);
826 }
827
828 /* Initialize the next argument. */
829 arg = &args[args_len++];
830 set_token (arg, src->text, src->text);
831
832 /* Gather the argument's tokens. */
833 depth = 0;
834 for (;;)
835 {
836 if (! get_token (&tok, src))
837 error (_("Malformed argument list for macro `%s'."), name);
838
839 /* Is tok an opening paren? */
840 if (tok.len == 1 && tok.text[0] == '(')
841 depth++;
842
843 /* Is tok is a closing paren? */
844 else if (tok.len == 1 && tok.text[0] == ')')
845 {
846 /* If it's a closing paren at the top level, then that's
847 the end of the argument list. */
848 if (depth == 0)
849 {
850 /* In the varargs case, the last argument may be
851 missing. Add an empty argument in this case. */
852 if (nargs != -1 && args_len == nargs - 1)
853 {
854 /* Make sure we have room for the argument. */
855 if (args_len >= args_size)
856 {
857 args_size++;
858 args = xrealloc (args, sizeof (*args) * args_size);
859 }
860 arg = &args[args_len++];
861 set_token (arg, src->text, src->text);
862 }
863
864 discard_cleanups (back_to);
865 *argc_p = args_len;
866 return args;
867 }
868
869 depth--;
870 }
871
872 /* If tok is a comma at top level, then that's the end of
873 the current argument. However, if we are handling a
874 variadic macro and we are computing the last argument, we
875 want to include the comma and remaining tokens. */
876 else if (tok.len == 1 && tok.text[0] == ',' && depth == 0
877 && (nargs == -1 || args_len < nargs))
878 break;
879
880 /* Extend the current argument to enclose this token. If
881 this is the current argument's first token, leave out any
882 leading whitespace, just for aesthetics. */
883 if (arg->len == 0)
884 {
885 arg->text = tok.text;
886 arg->len = tok.len;
887 arg->last_token = 0;
888 }
889 else
890 {
891 arg->len = (tok.text + tok.len) - arg->text;
892 arg->last_token = tok.text - arg->text;
893 }
894 }
895 }
896 }
897
898
899 /* The `expand' and `substitute_args' functions both invoke `scan'
900 recursively, so we need a forward declaration somewhere. */
901 static void scan (struct macro_buffer *dest,
902 struct macro_buffer *src,
903 struct macro_name_list *no_loop,
904 macro_lookup_ftype *lookup_func,
905 void *lookup_baton);
906
907
908 /* A helper function for substitute_args.
909
910 ARGV is a vector of all the arguments; ARGC is the number of
911 arguments. IS_VARARGS is true if the macro being substituted is a
912 varargs macro; in this case VA_ARG_NAME is the name of the
913 "variable" argument. VA_ARG_NAME is ignored if IS_VARARGS is
914 false.
915
916 If the token TOK is the name of a parameter, return the parameter's
917 index. If TOK is not an argument, return -1. */
918
919 static int
920 find_parameter (const struct macro_buffer *tok,
921 int is_varargs, const struct macro_buffer *va_arg_name,
922 int argc, const char * const *argv)
923 {
924 int i;
925
926 if (! tok->is_identifier)
927 return -1;
928
929 for (i = 0; i < argc; ++i)
930 if (tok->len == strlen (argv[i])
931 && !memcmp (tok->text, argv[i], tok->len))
932 return i;
933
934 if (is_varargs && tok->len == va_arg_name->len
935 && ! memcmp (tok->text, va_arg_name->text, tok->len))
936 return argc - 1;
937
938 return -1;
939 }
940
941 /* Given the macro definition DEF, being invoked with the actual
942 arguments given by ARGC and ARGV, substitute the arguments into the
943 replacement list, and store the result in DEST.
944
945 IS_VARARGS should be true if DEF is a varargs macro. In this case,
946 VA_ARG_NAME should be the name of the "variable" argument -- either
947 __VA_ARGS__ for c99-style varargs, or the final argument name, for
948 GNU-style varargs. If IS_VARARGS is false, this parameter is
949 ignored.
950
951 If it is necessary to expand macro invocations in one of the
952 arguments, use LOOKUP_FUNC and LOOKUP_BATON to find the macro
953 definitions, and don't expand invocations of the macros listed in
954 NO_LOOP. */
955
956 static void
957 substitute_args (struct macro_buffer *dest,
958 struct macro_definition *def,
959 int is_varargs, const struct macro_buffer *va_arg_name,
960 int argc, struct macro_buffer *argv,
961 struct macro_name_list *no_loop,
962 macro_lookup_ftype *lookup_func,
963 void *lookup_baton)
964 {
965 /* A macro buffer for the macro's replacement list. */
966 struct macro_buffer replacement_list;
967 /* The token we are currently considering. */
968 struct macro_buffer tok;
969 /* The replacement list's pointer from just before TOK was lexed. */
970 char *original_rl_start;
971 /* We have a single lookahead token to handle token splicing. */
972 struct macro_buffer lookahead;
973 /* The lookahead token might not be valid. */
974 int lookahead_valid;
975 /* The replacement list's pointer from just before LOOKAHEAD was
976 lexed. */
977 char *lookahead_rl_start;
978
979 init_shared_buffer (&replacement_list, (char *) def->replacement,
980 strlen (def->replacement));
981
982 gdb_assert (dest->len == 0);
983 dest->last_token = 0;
984
985 original_rl_start = replacement_list.text;
986 if (! get_token (&tok, &replacement_list))
987 return;
988 lookahead_rl_start = replacement_list.text;
989 lookahead_valid = get_token (&lookahead, &replacement_list);
990
991 for (;;)
992 {
993 /* Just for aesthetics. If we skipped some whitespace, copy
994 that to DEST. */
995 if (tok.text > original_rl_start)
996 {
997 appendmem (dest, original_rl_start, tok.text - original_rl_start);
998 dest->last_token = dest->len;
999 }
1000
1001 /* Is this token the stringification operator? */
1002 if (tok.len == 1
1003 && tok.text[0] == '#')
1004 {
1005 int arg;
1006
1007 if (!lookahead_valid)
1008 error (_("Stringification operator requires an argument."));
1009
1010 arg = find_parameter (&lookahead, is_varargs, va_arg_name,
1011 def->argc, def->argv);
1012 if (arg == -1)
1013 error (_("Argument to stringification operator must name "
1014 "a macro parameter."));
1015
1016 stringify (dest, argv[arg].text, argv[arg].len);
1017
1018 /* Read one token and let the loop iteration code handle the
1019 rest. */
1020 lookahead_rl_start = replacement_list.text;
1021 lookahead_valid = get_token (&lookahead, &replacement_list);
1022 }
1023 /* Is this token the splicing operator? */
1024 else if (tok.len == 2
1025 && tok.text[0] == '#'
1026 && tok.text[1] == '#')
1027 error (_("Stray splicing operator"));
1028 /* Is the next token the splicing operator? */
1029 else if (lookahead_valid
1030 && lookahead.len == 2
1031 && lookahead.text[0] == '#'
1032 && lookahead.text[1] == '#')
1033 {
1034 int finished = 0;
1035 int prev_was_comma = 0;
1036
1037 /* Note that GCC warns if the result of splicing is not a
1038 token. In the debugger there doesn't seem to be much
1039 benefit from doing this. */
1040
1041 /* Insert the first token. */
1042 if (tok.len == 1 && tok.text[0] == ',')
1043 prev_was_comma = 1;
1044 else
1045 {
1046 int arg = find_parameter (&tok, is_varargs, va_arg_name,
1047 def->argc, def->argv);
1048
1049 if (arg != -1)
1050 appendmem (dest, argv[arg].text, argv[arg].len);
1051 else
1052 appendmem (dest, tok.text, tok.len);
1053 }
1054
1055 /* Apply a possible sequence of ## operators. */
1056 for (;;)
1057 {
1058 if (! get_token (&tok, &replacement_list))
1059 error (_("Splicing operator at end of macro"));
1060
1061 /* Handle a comma before a ##. If we are handling
1062 varargs, and the token on the right hand side is the
1063 varargs marker, and the final argument is empty or
1064 missing, then drop the comma. This is a GNU
1065 extension. There is one ambiguous case here,
1066 involving pedantic behavior with an empty argument,
1067 but we settle that in favor of GNU-style (GCC uses an
1068 option). If we aren't dealing with varargs, we
1069 simply insert the comma. */
1070 if (prev_was_comma)
1071 {
1072 if (! (is_varargs
1073 && tok.len == va_arg_name->len
1074 && !memcmp (tok.text, va_arg_name->text, tok.len)
1075 && argv[argc - 1].len == 0))
1076 appendmem (dest, ",", 1);
1077 prev_was_comma = 0;
1078 }
1079
1080 /* Insert the token. If it is a parameter, insert the
1081 argument. If it is a comma, treat it specially. */
1082 if (tok.len == 1 && tok.text[0] == ',')
1083 prev_was_comma = 1;
1084 else
1085 {
1086 int arg = find_parameter (&tok, is_varargs, va_arg_name,
1087 def->argc, def->argv);
1088
1089 if (arg != -1)
1090 appendmem (dest, argv[arg].text, argv[arg].len);
1091 else
1092 appendmem (dest, tok.text, tok.len);
1093 }
1094
1095 /* Now read another token. If it is another splice, we
1096 loop. */
1097 original_rl_start = replacement_list.text;
1098 if (! get_token (&tok, &replacement_list))
1099 {
1100 finished = 1;
1101 break;
1102 }
1103
1104 if (! (tok.len == 2
1105 && tok.text[0] == '#'
1106 && tok.text[1] == '#'))
1107 break;
1108 }
1109
1110 if (prev_was_comma)
1111 {
1112 /* We saw a comma. Insert it now. */
1113 appendmem (dest, ",", 1);
1114 }
1115
1116 dest->last_token = dest->len;
1117 if (finished)
1118 lookahead_valid = 0;
1119 else
1120 {
1121 /* Set up for the loop iterator. */
1122 lookahead = tok;
1123 lookahead_rl_start = original_rl_start;
1124 lookahead_valid = 1;
1125 }
1126 }
1127 else
1128 {
1129 /* Is this token an identifier? */
1130 int substituted = 0;
1131 int arg = find_parameter (&tok, is_varargs, va_arg_name,
1132 def->argc, def->argv);
1133
1134 if (arg != -1)
1135 {
1136 struct macro_buffer arg_src;
1137
1138 /* Expand any macro invocations in the argument text,
1139 and append the result to dest. Remember that scan
1140 mutates its source, so we need to scan a new buffer
1141 referring to the argument's text, not the argument
1142 itself. */
1143 init_shared_buffer (&arg_src, argv[arg].text, argv[arg].len);
1144 scan (dest, &arg_src, no_loop, lookup_func, lookup_baton);
1145 substituted = 1;
1146 }
1147
1148 /* If it wasn't a parameter, then just copy it across. */
1149 if (! substituted)
1150 append_tokens_without_splicing (dest, &tok);
1151 }
1152
1153 if (! lookahead_valid)
1154 break;
1155
1156 tok = lookahead;
1157 original_rl_start = lookahead_rl_start;
1158
1159 lookahead_rl_start = replacement_list.text;
1160 lookahead_valid = get_token (&lookahead, &replacement_list);
1161 }
1162 }
1163
1164
1165 /* Expand a call to a macro named ID, whose definition is DEF. Append
1166 its expansion to DEST. SRC is the input text following the ID
1167 token. We are currently rescanning the expansions of the macros
1168 named in NO_LOOP; don't re-expand them. Use LOOKUP_FUNC and
1169 LOOKUP_BATON to find definitions for any nested macro references.
1170
1171 Return 1 if we decided to expand it, zero otherwise. (If it's a
1172 function-like macro name that isn't followed by an argument list,
1173 we don't expand it.) If we return zero, leave SRC unchanged. */
1174 static int
1175 expand (const char *id,
1176 struct macro_definition *def,
1177 struct macro_buffer *dest,
1178 struct macro_buffer *src,
1179 struct macro_name_list *no_loop,
1180 macro_lookup_ftype *lookup_func,
1181 void *lookup_baton)
1182 {
1183 struct macro_name_list new_no_loop;
1184
1185 /* Create a new node to be added to the front of the no-expand list.
1186 This list is appropriate for re-scanning replacement lists, but
1187 it is *not* appropriate for scanning macro arguments; invocations
1188 of the macro whose arguments we are gathering *do* get expanded
1189 there. */
1190 new_no_loop.name = id;
1191 new_no_loop.next = no_loop;
1192
1193 /* What kind of macro are we expanding? */
1194 if (def->kind == macro_object_like)
1195 {
1196 struct macro_buffer replacement_list;
1197
1198 init_shared_buffer (&replacement_list, (char *) def->replacement,
1199 strlen (def->replacement));
1200
1201 scan (dest, &replacement_list, &new_no_loop, lookup_func, lookup_baton);
1202 return 1;
1203 }
1204 else if (def->kind == macro_function_like)
1205 {
1206 struct cleanup *back_to = make_cleanup (null_cleanup, 0);
1207 int argc = 0;
1208 struct macro_buffer *argv = NULL;
1209 struct macro_buffer substituted;
1210 struct macro_buffer substituted_src;
1211 struct macro_buffer va_arg_name = {0};
1212 int is_varargs = 0;
1213
1214 if (def->argc >= 1)
1215 {
1216 if (strcmp (def->argv[def->argc - 1], "...") == 0)
1217 {
1218 /* In C99-style varargs, substitution is done using
1219 __VA_ARGS__. */
1220 init_shared_buffer (&va_arg_name, "__VA_ARGS__",
1221 strlen ("__VA_ARGS__"));
1222 is_varargs = 1;
1223 }
1224 else
1225 {
1226 int len = strlen (def->argv[def->argc - 1]);
1227
1228 if (len > 3
1229 && strcmp (def->argv[def->argc - 1] + len - 3, "...") == 0)
1230 {
1231 /* In GNU-style varargs, the name of the
1232 substitution parameter is the name of the formal
1233 argument without the "...". */
1234 init_shared_buffer (&va_arg_name,
1235 (char *) def->argv[def->argc - 1],
1236 len - 3);
1237 is_varargs = 1;
1238 }
1239 }
1240 }
1241
1242 make_cleanup (free_current_contents, &argv);
1243 argv = gather_arguments (id, src, is_varargs ? def->argc : -1,
1244 &argc);
1245
1246 /* If we couldn't find any argument list, then we don't expand
1247 this macro. */
1248 if (! argv)
1249 {
1250 do_cleanups (back_to);
1251 return 0;
1252 }
1253
1254 /* Check that we're passing an acceptable number of arguments for
1255 this macro. */
1256 if (argc != def->argc)
1257 {
1258 if (is_varargs && argc >= def->argc - 1)
1259 {
1260 /* Ok. */
1261 }
1262 /* Remember that a sequence of tokens like "foo()" is a
1263 valid invocation of a macro expecting either zero or one
1264 arguments. */
1265 else if (! (argc == 1
1266 && argv[0].len == 0
1267 && def->argc == 0))
1268 error (_("Wrong number of arguments to macro `%s' "
1269 "(expected %d, got %d)."),
1270 id, def->argc, argc);
1271 }
1272
1273 /* Note that we don't expand macro invocations in the arguments
1274 yet --- we let subst_args take care of that. Parameters that
1275 appear as operands of the stringifying operator "#" or the
1276 splicing operator "##" don't get macro references expanded,
1277 so we can't really tell whether it's appropriate to macro-
1278 expand an argument until we see how it's being used. */
1279 init_buffer (&substituted, 0);
1280 make_cleanup (cleanup_macro_buffer, &substituted);
1281 substitute_args (&substituted, def, is_varargs, &va_arg_name,
1282 argc, argv, no_loop, lookup_func, lookup_baton);
1283
1284 /* Now `substituted' is the macro's replacement list, with all
1285 argument values substituted into it properly. Re-scan it for
1286 macro references, but don't expand invocations of this macro.
1287
1288 We create a new buffer, `substituted_src', which points into
1289 `substituted', and scan that. We can't scan `substituted'
1290 itself, since the tokenization process moves the buffer's
1291 text pointer around, and we still need to be able to find
1292 `substituted's original text buffer after scanning it so we
1293 can free it. */
1294 init_shared_buffer (&substituted_src, substituted.text, substituted.len);
1295 scan (dest, &substituted_src, &new_no_loop, lookup_func, lookup_baton);
1296
1297 do_cleanups (back_to);
1298
1299 return 1;
1300 }
1301 else
1302 internal_error (__FILE__, __LINE__, _("bad macro definition kind"));
1303 }
1304
1305
1306 /* If the single token in SRC_FIRST followed by the tokens in SRC_REST
1307 constitute a macro invokation not forbidden in NO_LOOP, append its
1308 expansion to DEST and return non-zero. Otherwise, return zero, and
1309 leave DEST unchanged.
1310
1311 SRC_FIRST and SRC_REST must be shared buffers; DEST must not be one.
1312 SRC_FIRST must be a string built by get_token. */
1313 static int
1314 maybe_expand (struct macro_buffer *dest,
1315 struct macro_buffer *src_first,
1316 struct macro_buffer *src_rest,
1317 struct macro_name_list *no_loop,
1318 macro_lookup_ftype *lookup_func,
1319 void *lookup_baton)
1320 {
1321 gdb_assert (src_first->shared);
1322 gdb_assert (src_rest->shared);
1323 gdb_assert (! dest->shared);
1324
1325 /* Is this token an identifier? */
1326 if (src_first->is_identifier)
1327 {
1328 /* Make a null-terminated copy of it, since that's what our
1329 lookup function expects. */
1330 char *id = xmalloc (src_first->len + 1);
1331 struct cleanup *back_to = make_cleanup (xfree, id);
1332
1333 memcpy (id, src_first->text, src_first->len);
1334 id[src_first->len] = 0;
1335
1336 /* If we're currently re-scanning the result of expanding
1337 this macro, don't expand it again. */
1338 if (! currently_rescanning (no_loop, id))
1339 {
1340 /* Does this identifier have a macro definition in scope? */
1341 struct macro_definition *def = lookup_func (id, lookup_baton);
1342
1343 if (def && expand (id, def, dest, src_rest, no_loop,
1344 lookup_func, lookup_baton))
1345 {
1346 do_cleanups (back_to);
1347 return 1;
1348 }
1349 }
1350
1351 do_cleanups (back_to);
1352 }
1353
1354 return 0;
1355 }
1356
1357
1358 /* Expand macro references in SRC, appending the results to DEST.
1359 Assume we are re-scanning the result of expanding the macros named
1360 in NO_LOOP, and don't try to re-expand references to them.
1361
1362 SRC must be a shared buffer; DEST must not be one. */
1363 static void
1364 scan (struct macro_buffer *dest,
1365 struct macro_buffer *src,
1366 struct macro_name_list *no_loop,
1367 macro_lookup_ftype *lookup_func,
1368 void *lookup_baton)
1369 {
1370 gdb_assert (src->shared);
1371 gdb_assert (! dest->shared);
1372
1373 for (;;)
1374 {
1375 struct macro_buffer tok;
1376 char *original_src_start = src->text;
1377
1378 /* Find the next token in SRC. */
1379 if (! get_token (&tok, src))
1380 break;
1381
1382 /* Just for aesthetics. If we skipped some whitespace, copy
1383 that to DEST. */
1384 if (tok.text > original_src_start)
1385 {
1386 appendmem (dest, original_src_start, tok.text - original_src_start);
1387 dest->last_token = dest->len;
1388 }
1389
1390 if (! maybe_expand (dest, &tok, src, no_loop, lookup_func, lookup_baton))
1391 /* We didn't end up expanding tok as a macro reference, so
1392 simply append it to dest. */
1393 append_tokens_without_splicing (dest, &tok);
1394 }
1395
1396 /* Just for aesthetics. If there was any trailing whitespace in
1397 src, copy it to dest. */
1398 if (src->len)
1399 {
1400 appendmem (dest, src->text, src->len);
1401 dest->last_token = dest->len;
1402 }
1403 }
1404
1405
1406 char *
1407 macro_expand (const char *source,
1408 macro_lookup_ftype *lookup_func,
1409 void *lookup_func_baton)
1410 {
1411 struct macro_buffer src, dest;
1412 struct cleanup *back_to;
1413
1414 init_shared_buffer (&src, (char *) source, strlen (source));
1415
1416 init_buffer (&dest, 0);
1417 dest.last_token = 0;
1418 back_to = make_cleanup (cleanup_macro_buffer, &dest);
1419
1420 scan (&dest, &src, 0, lookup_func, lookup_func_baton);
1421
1422 appendc (&dest, '\0');
1423
1424 discard_cleanups (back_to);
1425 return dest.text;
1426 }
1427
1428
1429 char *
1430 macro_expand_once (const char *source,
1431 macro_lookup_ftype *lookup_func,
1432 void *lookup_func_baton)
1433 {
1434 error (_("Expand-once not implemented yet."));
1435 }
1436
1437
1438 char *
1439 macro_expand_next (char **lexptr,
1440 macro_lookup_ftype *lookup_func,
1441 void *lookup_baton)
1442 {
1443 struct macro_buffer src, dest, tok;
1444 struct cleanup *back_to;
1445
1446 /* Set up SRC to refer to the input text, pointed to by *lexptr. */
1447 init_shared_buffer (&src, *lexptr, strlen (*lexptr));
1448
1449 /* Set up DEST to receive the expansion, if there is one. */
1450 init_buffer (&dest, 0);
1451 dest.last_token = 0;
1452 back_to = make_cleanup (cleanup_macro_buffer, &dest);
1453
1454 /* Get the text's first preprocessing token. */
1455 if (! get_token (&tok, &src))
1456 {
1457 do_cleanups (back_to);
1458 return 0;
1459 }
1460
1461 /* If it's a macro invocation, expand it. */
1462 if (maybe_expand (&dest, &tok, &src, 0, lookup_func, lookup_baton))
1463 {
1464 /* It was a macro invocation! Package up the expansion as a
1465 null-terminated string and return it. Set *lexptr to the
1466 start of the next token in the input. */
1467 appendc (&dest, '\0');
1468 discard_cleanups (back_to);
1469 *lexptr = src.text;
1470 return dest.text;
1471 }
1472 else
1473 {
1474 /* It wasn't a macro invocation. */
1475 do_cleanups (back_to);
1476 return 0;
1477 }
1478 }
This page took 0.058728 seconds and 5 git commands to generate.