gdb
[deliverable/binutils-gdb.git] / gdb / macroexp.c
1 /* C preprocessor macro expansion for GDB.
2 Copyright (C) 2002, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Red Hat, Inc.
4
5 This file is part of GDB.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 #include "defs.h"
21 #include "gdb_obstack.h"
22 #include "bcache.h"
23 #include "macrotab.h"
24 #include "macroexp.h"
25 #include "gdb_assert.h"
26
27
28 \f
29 /* A resizeable, substringable string type. */
30
31
32 /* A string type that we can resize, quickly append to, and use to
33 refer to substrings of other strings. */
34 struct macro_buffer
35 {
36 /* An array of characters. The first LEN bytes are the real text,
37 but there are SIZE bytes allocated to the array. If SIZE is
38 zero, then this doesn't point to a malloc'ed block. If SHARED is
39 non-zero, then this buffer is actually a pointer into some larger
40 string, and we shouldn't append characters to it, etc. Because
41 of sharing, we can't assume in general that the text is
42 null-terminated. */
43 char *text;
44
45 /* The number of characters in the string. */
46 int len;
47
48 /* The number of characters allocated to the string. If SHARED is
49 non-zero, this is meaningless; in this case, we set it to zero so
50 that any "do we have room to append something?" tests will fail,
51 so we don't always have to check SHARED before using this field. */
52 int size;
53
54 /* Zero if TEXT can be safely realloc'ed (i.e., it's its own malloc
55 block). Non-zero if TEXT is actually pointing into the middle of
56 some other block, and we shouldn't reallocate it. */
57 int shared;
58
59 /* For detecting token splicing.
60
61 This is the index in TEXT of the first character of the token
62 that abuts the end of TEXT. If TEXT contains no tokens, then we
63 set this equal to LEN. If TEXT ends in whitespace, then there is
64 no token abutting the end of TEXT (it's just whitespace), and
65 again, we set this equal to LEN. We set this to -1 if we don't
66 know the nature of TEXT. */
67 int last_token;
68
69 /* If this buffer is holding the result from get_token, then this
70 is non-zero if it is an identifier token, zero otherwise. */
71 int is_identifier;
72 };
73
74
75 /* Set the macro buffer *B to the empty string, guessing that its
76 final contents will fit in N bytes. (It'll get resized if it
77 doesn't, so the guess doesn't have to be right.) Allocate the
78 initial storage with xmalloc. */
79 static void
80 init_buffer (struct macro_buffer *b, int n)
81 {
82 b->size = n;
83 if (n > 0)
84 b->text = (char *) xmalloc (n);
85 else
86 b->text = NULL;
87 b->len = 0;
88 b->shared = 0;
89 b->last_token = -1;
90 }
91
92
93 /* Set the macro buffer *BUF to refer to the LEN bytes at ADDR, as a
94 shared substring. */
95 static void
96 init_shared_buffer (struct macro_buffer *buf, char *addr, int len)
97 {
98 buf->text = addr;
99 buf->len = len;
100 buf->shared = 1;
101 buf->size = 0;
102 buf->last_token = -1;
103 }
104
105
106 /* Free the text of the buffer B. Raise an error if B is shared. */
107 static void
108 free_buffer (struct macro_buffer *b)
109 {
110 gdb_assert (! b->shared);
111 if (b->size)
112 xfree (b->text);
113 }
114
115
116 /* A cleanup function for macro buffers. */
117 static void
118 cleanup_macro_buffer (void *untyped_buf)
119 {
120 free_buffer ((struct macro_buffer *) untyped_buf);
121 }
122
123
124 /* Resize the buffer B to be at least N bytes long. Raise an error if
125 B shouldn't be resized. */
126 static void
127 resize_buffer (struct macro_buffer *b, int n)
128 {
129 /* We shouldn't be trying to resize shared strings. */
130 gdb_assert (! b->shared);
131
132 if (b->size == 0)
133 b->size = n;
134 else
135 while (b->size <= n)
136 b->size *= 2;
137
138 b->text = xrealloc (b->text, b->size);
139 }
140
141
142 /* Append the character C to the buffer B. */
143 static void
144 appendc (struct macro_buffer *b, int c)
145 {
146 int new_len = b->len + 1;
147
148 if (new_len > b->size)
149 resize_buffer (b, new_len);
150
151 b->text[b->len] = c;
152 b->len = new_len;
153 }
154
155
156 /* Append the LEN bytes at ADDR to the buffer B. */
157 static void
158 appendmem (struct macro_buffer *b, char *addr, int len)
159 {
160 int new_len = b->len + len;
161
162 if (new_len > b->size)
163 resize_buffer (b, new_len);
164
165 memcpy (b->text + b->len, addr, len);
166 b->len = new_len;
167 }
168
169
170 \f
171 /* Recognizing preprocessor tokens. */
172
173
174 int
175 macro_is_whitespace (int c)
176 {
177 return (c == ' '
178 || c == '\t'
179 || c == '\n'
180 || c == '\v'
181 || c == '\f');
182 }
183
184
185 int
186 macro_is_digit (int c)
187 {
188 return ('0' <= c && c <= '9');
189 }
190
191
192 int
193 macro_is_identifier_nondigit (int c)
194 {
195 return (c == '_'
196 || ('a' <= c && c <= 'z')
197 || ('A' <= c && c <= 'Z'));
198 }
199
200
201 static void
202 set_token (struct macro_buffer *tok, char *start, char *end)
203 {
204 init_shared_buffer (tok, start, end - start);
205 tok->last_token = 0;
206
207 /* Presumed; get_identifier may overwrite this. */
208 tok->is_identifier = 0;
209 }
210
211
212 static int
213 get_comment (struct macro_buffer *tok, char *p, char *end)
214 {
215 if (p + 2 > end)
216 return 0;
217 else if (p[0] == '/'
218 && p[1] == '*')
219 {
220 char *tok_start = p;
221
222 p += 2;
223
224 for (; p < end; p++)
225 if (p + 2 <= end
226 && p[0] == '*'
227 && p[1] == '/')
228 {
229 p += 2;
230 set_token (tok, tok_start, p);
231 return 1;
232 }
233
234 error (_("Unterminated comment in macro expansion."));
235 }
236 else if (p[0] == '/'
237 && p[1] == '/')
238 {
239 char *tok_start = p;
240
241 p += 2;
242 for (; p < end; p++)
243 if (*p == '\n')
244 break;
245
246 set_token (tok, tok_start, p);
247 return 1;
248 }
249 else
250 return 0;
251 }
252
253
254 static int
255 get_identifier (struct macro_buffer *tok, char *p, char *end)
256 {
257 if (p < end
258 && macro_is_identifier_nondigit (*p))
259 {
260 char *tok_start = p;
261
262 while (p < end
263 && (macro_is_identifier_nondigit (*p)
264 || macro_is_digit (*p)))
265 p++;
266
267 set_token (tok, tok_start, p);
268 tok->is_identifier = 1;
269 return 1;
270 }
271 else
272 return 0;
273 }
274
275
276 static int
277 get_pp_number (struct macro_buffer *tok, char *p, char *end)
278 {
279 if (p < end
280 && (macro_is_digit (*p)
281 || (*p == '.'
282 && p + 2 <= end
283 && macro_is_digit (p[1]))))
284 {
285 char *tok_start = p;
286
287 while (p < end)
288 {
289 if (p + 2 <= end
290 && strchr ("eEpP", *p)
291 && (p[1] == '+' || p[1] == '-'))
292 p += 2;
293 else if (macro_is_digit (*p)
294 || macro_is_identifier_nondigit (*p)
295 || *p == '.')
296 p++;
297 else
298 break;
299 }
300
301 set_token (tok, tok_start, p);
302 return 1;
303 }
304 else
305 return 0;
306 }
307
308
309
310 /* If the text starting at P going up to (but not including) END
311 starts with a character constant, set *TOK to point to that
312 character constant, and return 1. Otherwise, return zero.
313 Signal an error if it contains a malformed or incomplete character
314 constant. */
315 static int
316 get_character_constant (struct macro_buffer *tok, char *p, char *end)
317 {
318 /* ISO/IEC 9899:1999 (E) Section 6.4.4.4 paragraph 1
319 But of course, what really matters is that we handle it the same
320 way GDB's C/C++ lexer does. So we call parse_escape in utils.c
321 to handle escape sequences. */
322 if ((p + 1 <= end && *p == '\'')
323 || (p + 2 <= end && p[0] == 'L' && p[1] == '\''))
324 {
325 char *tok_start = p;
326 char *body_start;
327
328 if (*p == '\'')
329 p++;
330 else if (*p == 'L')
331 p += 2;
332 else
333 gdb_assert (0);
334
335 body_start = p;
336 for (;;)
337 {
338 if (p >= end)
339 error (_("Unmatched single quote."));
340 else if (*p == '\'')
341 {
342 if (p == body_start)
343 error (_("A character constant must contain at least one "
344 "character."));
345 p++;
346 break;
347 }
348 else if (*p == '\\')
349 {
350 p++;
351 parse_escape (&p);
352 }
353 else
354 p++;
355 }
356
357 set_token (tok, tok_start, p);
358 return 1;
359 }
360 else
361 return 0;
362 }
363
364
365 /* If the text starting at P going up to (but not including) END
366 starts with a string literal, set *TOK to point to that string
367 literal, and return 1. Otherwise, return zero. Signal an error if
368 it contains a malformed or incomplete string literal. */
369 static int
370 get_string_literal (struct macro_buffer *tok, char *p, char *end)
371 {
372 if ((p + 1 <= end
373 && *p == '\"')
374 || (p + 2 <= end
375 && p[0] == 'L'
376 && p[1] == '\"'))
377 {
378 char *tok_start = p;
379
380 if (*p == '\"')
381 p++;
382 else if (*p == 'L')
383 p += 2;
384 else
385 gdb_assert (0);
386
387 for (;;)
388 {
389 if (p >= end)
390 error (_("Unterminated string in expression."));
391 else if (*p == '\"')
392 {
393 p++;
394 break;
395 }
396 else if (*p == '\n')
397 error (_("Newline characters may not appear in string "
398 "constants."));
399 else if (*p == '\\')
400 {
401 p++;
402 parse_escape (&p);
403 }
404 else
405 p++;
406 }
407
408 set_token (tok, tok_start, p);
409 return 1;
410 }
411 else
412 return 0;
413 }
414
415
416 static int
417 get_punctuator (struct macro_buffer *tok, char *p, char *end)
418 {
419 /* Here, speed is much less important than correctness and clarity. */
420
421 /* ISO/IEC 9899:1999 (E) Section 6.4.6 Paragraph 1.
422 Note that this table is ordered in a special way. A punctuator
423 which is a prefix of another punctuator must appear after its
424 "extension". Otherwise, the wrong token will be returned. */
425 static const char * const punctuators[] = {
426 "[", "]", "(", ")", "{", "}", "?", ";", ",", "~",
427 "...", ".",
428 "->", "--", "-=", "-",
429 "++", "+=", "+",
430 "*=", "*",
431 "!=", "!",
432 "&&", "&=", "&",
433 "/=", "/",
434 "%>", "%:%:", "%:", "%=", "%",
435 "^=", "^",
436 "##", "#",
437 ":>", ":",
438 "||", "|=", "|",
439 "<<=", "<<", "<=", "<:", "<%", "<",
440 ">>=", ">>", ">=", ">",
441 "==", "=",
442 0
443 };
444
445 int i;
446
447 if (p + 1 <= end)
448 {
449 for (i = 0; punctuators[i]; i++)
450 {
451 const char *punctuator = punctuators[i];
452
453 if (p[0] == punctuator[0])
454 {
455 int len = strlen (punctuator);
456
457 if (p + len <= end
458 && ! memcmp (p, punctuator, len))
459 {
460 set_token (tok, p, p + len);
461 return 1;
462 }
463 }
464 }
465 }
466
467 return 0;
468 }
469
470
471 /* Peel the next preprocessor token off of SRC, and put it in TOK.
472 Mutate TOK to refer to the first token in SRC, and mutate SRC to
473 refer to the text after that token. SRC must be a shared buffer;
474 the resulting TOK will be shared, pointing into the same string SRC
475 does. Initialize TOK's last_token field. Return non-zero if we
476 succeed, or 0 if we didn't find any more tokens in SRC. */
477 static int
478 get_token (struct macro_buffer *tok,
479 struct macro_buffer *src)
480 {
481 char *p = src->text;
482 char *end = p + src->len;
483
484 gdb_assert (src->shared);
485
486 /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4:
487
488 preprocessing-token:
489 header-name
490 identifier
491 pp-number
492 character-constant
493 string-literal
494 punctuator
495 each non-white-space character that cannot be one of the above
496
497 We don't have to deal with header-name tokens, since those can
498 only occur after a #include, which we will never see. */
499
500 while (p < end)
501 if (macro_is_whitespace (*p))
502 p++;
503 else if (get_comment (tok, p, end))
504 p += tok->len;
505 else if (get_pp_number (tok, p, end)
506 || get_character_constant (tok, p, end)
507 || get_string_literal (tok, p, end)
508 /* Note: the grammar in the standard seems to be
509 ambiguous: L'x' can be either a wide character
510 constant, or an identifier followed by a normal
511 character constant. By trying `get_identifier' after
512 we try get_character_constant and get_string_literal,
513 we give the wide character syntax precedence. Now,
514 since GDB doesn't handle wide character constants
515 anyway, is this the right thing to do? */
516 || get_identifier (tok, p, end)
517 || get_punctuator (tok, p, end))
518 {
519 /* How many characters did we consume, including whitespace? */
520 int consumed = p - src->text + tok->len;
521 src->text += consumed;
522 src->len -= consumed;
523 return 1;
524 }
525 else
526 {
527 /* We have found a "non-whitespace character that cannot be
528 one of the above." Make a token out of it. */
529 int consumed;
530
531 set_token (tok, p, p + 1);
532 consumed = p - src->text + tok->len;
533 src->text += consumed;
534 src->len -= consumed;
535 return 1;
536 }
537
538 return 0;
539 }
540
541
542 \f
543 /* Appending token strings, with and without splicing */
544
545
546 /* Append the macro buffer SRC to the end of DEST, and ensure that
547 doing so doesn't splice the token at the end of SRC with the token
548 at the beginning of DEST. SRC and DEST must have their last_token
549 fields set. Upon return, DEST's last_token field is set correctly.
550
551 For example:
552
553 If DEST is "(" and SRC is "y", then we can return with
554 DEST set to "(y" --- we've simply appended the two buffers.
555
556 However, if DEST is "x" and SRC is "y", then we must not return
557 with DEST set to "xy" --- that would splice the two tokens "x" and
558 "y" together to make a single token "xy". However, it would be
559 fine to return with DEST set to "x y". Similarly, "<" and "<" must
560 yield "< <", not "<<", etc. */
561 static void
562 append_tokens_without_splicing (struct macro_buffer *dest,
563 struct macro_buffer *src)
564 {
565 int original_dest_len = dest->len;
566 struct macro_buffer dest_tail, new_token;
567
568 gdb_assert (src->last_token != -1);
569 gdb_assert (dest->last_token != -1);
570
571 /* First, just try appending the two, and call get_token to see if
572 we got a splice. */
573 appendmem (dest, src->text, src->len);
574
575 /* If DEST originally had no token abutting its end, then we can't
576 have spliced anything, so we're done. */
577 if (dest->last_token == original_dest_len)
578 {
579 dest->last_token = original_dest_len + src->last_token;
580 return;
581 }
582
583 /* Set DEST_TAIL to point to the last token in DEST, followed by
584 all the stuff we just appended. */
585 init_shared_buffer (&dest_tail,
586 dest->text + dest->last_token,
587 dest->len - dest->last_token);
588
589 /* Re-parse DEST's last token. We know that DEST used to contain
590 at least one token, so if it doesn't contain any after the
591 append, then we must have spliced "/" and "*" or "/" and "/" to
592 make a comment start. (Just for the record, I got this right
593 the first time. This is not a bug fix.) */
594 if (get_token (&new_token, &dest_tail)
595 && (new_token.text + new_token.len
596 == dest->text + original_dest_len))
597 {
598 /* No splice, so we're done. */
599 dest->last_token = original_dest_len + src->last_token;
600 return;
601 }
602
603 /* Okay, a simple append caused a splice. Let's chop dest back to
604 its original length and try again, but separate the texts with a
605 space. */
606 dest->len = original_dest_len;
607 appendc (dest, ' ');
608 appendmem (dest, src->text, src->len);
609
610 init_shared_buffer (&dest_tail,
611 dest->text + dest->last_token,
612 dest->len - dest->last_token);
613
614 /* Try to re-parse DEST's last token, as above. */
615 if (get_token (&new_token, &dest_tail)
616 && (new_token.text + new_token.len
617 == dest->text + original_dest_len))
618 {
619 /* No splice, so we're done. */
620 dest->last_token = original_dest_len + 1 + src->last_token;
621 return;
622 }
623
624 /* As far as I know, there's no case where inserting a space isn't
625 enough to prevent a splice. */
626 internal_error (__FILE__, __LINE__,
627 _("unable to avoid splicing tokens during macro expansion"));
628 }
629
630 /* Stringify an argument, and insert it into DEST. ARG is the text to
631 stringify; it is LEN bytes long. */
632
633 static void
634 stringify (struct macro_buffer *dest, char *arg, int len)
635 {
636 /* Trim initial whitespace from ARG. */
637 while (len > 0 && macro_is_whitespace (*arg))
638 {
639 ++arg;
640 --len;
641 }
642
643 /* Trim trailing whitespace from ARG. */
644 while (len > 0 && macro_is_whitespace (arg[len - 1]))
645 --len;
646
647 /* Insert the string. */
648 appendc (dest, '"');
649 while (len > 0)
650 {
651 /* We could try to handle strange cases here, like control
652 characters, but there doesn't seem to be much point. */
653 if (macro_is_whitespace (*arg))
654 {
655 /* Replace a sequence of whitespace with a single space. */
656 appendc (dest, ' ');
657 while (len > 1 && macro_is_whitespace (arg[1]))
658 {
659 ++arg;
660 --len;
661 }
662 }
663 else if (*arg == '\\' || *arg == '"')
664 {
665 appendc (dest, '\\');
666 appendc (dest, *arg);
667 }
668 else
669 appendc (dest, *arg);
670 ++arg;
671 --len;
672 }
673 appendc (dest, '"');
674 dest->last_token = dest->len;
675 }
676
677 \f
678 /* Expanding macros! */
679
680
681 /* A singly-linked list of the names of the macros we are currently
682 expanding --- for detecting expansion loops. */
683 struct macro_name_list {
684 const char *name;
685 struct macro_name_list *next;
686 };
687
688
689 /* Return non-zero if we are currently expanding the macro named NAME,
690 according to LIST; otherwise, return zero.
691
692 You know, it would be possible to get rid of all the NO_LOOP
693 arguments to these functions by simply generating a new lookup
694 function and baton which refuses to find the definition for a
695 particular macro, and otherwise delegates the decision to another
696 function/baton pair. But that makes the linked list of excluded
697 macros chained through untyped baton pointers, which will make it
698 harder to debug. :( */
699 static int
700 currently_rescanning (struct macro_name_list *list, const char *name)
701 {
702 for (; list; list = list->next)
703 if (strcmp (name, list->name) == 0)
704 return 1;
705
706 return 0;
707 }
708
709
710 /* Gather the arguments to a macro expansion.
711
712 NAME is the name of the macro being invoked. (It's only used for
713 printing error messages.)
714
715 Assume that SRC is the text of the macro invocation immediately
716 following the macro name. For example, if we're processing the
717 text foo(bar, baz), then NAME would be foo and SRC will be (bar,
718 baz).
719
720 If SRC doesn't start with an open paren ( token at all, return
721 zero, leave SRC unchanged, and don't set *ARGC_P to anything.
722
723 If SRC doesn't contain a properly terminated argument list, then
724 raise an error.
725
726 For a variadic macro, NARGS holds the number of formal arguments to
727 the macro. For a GNU-style variadic macro, this should be the
728 number of named arguments. For a non-variadic macro, NARGS should
729 be -1.
730
731 Otherwise, return a pointer to the first element of an array of
732 macro buffers referring to the argument texts, and set *ARGC_P to
733 the number of arguments we found --- the number of elements in the
734 array. The macro buffers share their text with SRC, and their
735 last_token fields are initialized. The array is allocated with
736 xmalloc, and the caller is responsible for freeing it.
737
738 NOTE WELL: if SRC starts with a open paren ( token followed
739 immediately by a close paren ) token (e.g., the invocation looks
740 like "foo()"), we treat that as one argument, which happens to be
741 the empty list of tokens. The caller should keep in mind that such
742 a sequence of tokens is a valid way to invoke one-parameter
743 function-like macros, but also a valid way to invoke zero-parameter
744 function-like macros. Eeew.
745
746 Consume the tokens from SRC; after this call, SRC contains the text
747 following the invocation. */
748
749 static struct macro_buffer *
750 gather_arguments (const char *name, struct macro_buffer *src,
751 int nargs, int *argc_p)
752 {
753 struct macro_buffer tok;
754 int args_len, args_size;
755 struct macro_buffer *args = NULL;
756 struct cleanup *back_to = make_cleanup (free_current_contents, &args);
757
758 /* Does SRC start with an opening paren token? Read from a copy of
759 SRC, so SRC itself is unaffected if we don't find an opening
760 paren. */
761 {
762 struct macro_buffer temp;
763 init_shared_buffer (&temp, src->text, src->len);
764
765 if (! get_token (&tok, &temp)
766 || tok.len != 1
767 || tok.text[0] != '(')
768 {
769 discard_cleanups (back_to);
770 return 0;
771 }
772 }
773
774 /* Consume SRC's opening paren. */
775 get_token (&tok, src);
776
777 args_len = 0;
778 args_size = 6;
779 args = (struct macro_buffer *) xmalloc (sizeof (*args) * args_size);
780
781 for (;;)
782 {
783 struct macro_buffer *arg;
784 int depth;
785
786 /* Make sure we have room for the next argument. */
787 if (args_len >= args_size)
788 {
789 args_size *= 2;
790 args = xrealloc (args, sizeof (*args) * args_size);
791 }
792
793 /* Initialize the next argument. */
794 arg = &args[args_len++];
795 set_token (arg, src->text, src->text);
796
797 /* Gather the argument's tokens. */
798 depth = 0;
799 for (;;)
800 {
801 char *start = src->text;
802
803 if (! get_token (&tok, src))
804 error (_("Malformed argument list for macro `%s'."), name);
805
806 /* Is tok an opening paren? */
807 if (tok.len == 1 && tok.text[0] == '(')
808 depth++;
809
810 /* Is tok is a closing paren? */
811 else if (tok.len == 1 && tok.text[0] == ')')
812 {
813 /* If it's a closing paren at the top level, then that's
814 the end of the argument list. */
815 if (depth == 0)
816 {
817 /* In the varargs case, the last argument may be
818 missing. Add an empty argument in this case. */
819 if (nargs != -1 && args_len == nargs - 1)
820 {
821 /* Make sure we have room for the argument. */
822 if (args_len >= args_size)
823 {
824 args_size++;
825 args = xrealloc (args, sizeof (*args) * args_size);
826 }
827 arg = &args[args_len++];
828 set_token (arg, src->text, src->text);
829 }
830
831 discard_cleanups (back_to);
832 *argc_p = args_len;
833 return args;
834 }
835
836 depth--;
837 }
838
839 /* If tok is a comma at top level, then that's the end of
840 the current argument. However, if we are handling a
841 variadic macro and we are computing the last argument, we
842 want to include the comma and remaining tokens. */
843 else if (tok.len == 1 && tok.text[0] == ',' && depth == 0
844 && (nargs == -1 || args_len < nargs))
845 break;
846
847 /* Extend the current argument to enclose this token. If
848 this is the current argument's first token, leave out any
849 leading whitespace, just for aesthetics. */
850 if (arg->len == 0)
851 {
852 arg->text = tok.text;
853 arg->len = tok.len;
854 arg->last_token = 0;
855 }
856 else
857 {
858 arg->len = (tok.text + tok.len) - arg->text;
859 arg->last_token = tok.text - arg->text;
860 }
861 }
862 }
863 }
864
865
866 /* The `expand' and `substitute_args' functions both invoke `scan'
867 recursively, so we need a forward declaration somewhere. */
868 static void scan (struct macro_buffer *dest,
869 struct macro_buffer *src,
870 struct macro_name_list *no_loop,
871 macro_lookup_ftype *lookup_func,
872 void *lookup_baton);
873
874
875 /* A helper function for substitute_args.
876
877 ARGV is a vector of all the arguments; ARGC is the number of
878 arguments. IS_VARARGS is true if the macro being substituted is a
879 varargs macro; in this case VA_ARG_NAME is the name of the
880 "variable" argument. VA_ARG_NAME is ignored if IS_VARARGS is
881 false.
882
883 If the token TOK is the name of a parameter, return the parameter's
884 index. If TOK is not an argument, return -1. */
885
886 static int
887 find_parameter (const struct macro_buffer *tok,
888 int is_varargs, const struct macro_buffer *va_arg_name,
889 int argc, const char * const *argv)
890 {
891 int i;
892
893 if (! tok->is_identifier)
894 return -1;
895
896 for (i = 0; i < argc; ++i)
897 if (tok->len == strlen (argv[i]) && ! memcmp (tok->text, argv[i], tok->len))
898 return i;
899
900 if (is_varargs && tok->len == va_arg_name->len
901 && ! memcmp (tok->text, va_arg_name->text, tok->len))
902 return argc - 1;
903
904 return -1;
905 }
906
907 /* Given the macro definition DEF, being invoked with the actual
908 arguments given by ARGC and ARGV, substitute the arguments into the
909 replacement list, and store the result in DEST.
910
911 IS_VARARGS should be true if DEF is a varargs macro. In this case,
912 VA_ARG_NAME should be the name of the "variable" argument -- either
913 __VA_ARGS__ for c99-style varargs, or the final argument name, for
914 GNU-style varargs. If IS_VARARGS is false, this parameter is
915 ignored.
916
917 If it is necessary to expand macro invocations in one of the
918 arguments, use LOOKUP_FUNC and LOOKUP_BATON to find the macro
919 definitions, and don't expand invocations of the macros listed in
920 NO_LOOP. */
921
922 static void
923 substitute_args (struct macro_buffer *dest,
924 struct macro_definition *def,
925 int is_varargs, const struct macro_buffer *va_arg_name,
926 int argc, struct macro_buffer *argv,
927 struct macro_name_list *no_loop,
928 macro_lookup_ftype *lookup_func,
929 void *lookup_baton)
930 {
931 /* A macro buffer for the macro's replacement list. */
932 struct macro_buffer replacement_list;
933 /* The token we are currently considering. */
934 struct macro_buffer tok;
935 /* The replacement list's pointer from just before TOK was lexed. */
936 char *original_rl_start;
937 /* We have a single lookahead token to handle token splicing. */
938 struct macro_buffer lookahead;
939 /* The lookahead token might not be valid. */
940 int lookahead_valid;
941 /* The replacement list's pointer from just before LOOKAHEAD was
942 lexed. */
943 char *lookahead_rl_start;
944
945 init_shared_buffer (&replacement_list, (char *) def->replacement,
946 strlen (def->replacement));
947
948 gdb_assert (dest->len == 0);
949 dest->last_token = 0;
950
951 original_rl_start = replacement_list.text;
952 if (! get_token (&tok, &replacement_list))
953 return;
954 lookahead_rl_start = replacement_list.text;
955 lookahead_valid = get_token (&lookahead, &replacement_list);
956
957 for (;;)
958 {
959 /* Just for aesthetics. If we skipped some whitespace, copy
960 that to DEST. */
961 if (tok.text > original_rl_start)
962 {
963 appendmem (dest, original_rl_start, tok.text - original_rl_start);
964 dest->last_token = dest->len;
965 }
966
967 /* Is this token the stringification operator? */
968 if (tok.len == 1
969 && tok.text[0] == '#')
970 {
971 int arg;
972
973 if (!lookahead_valid)
974 error (_("Stringification operator requires an argument."));
975
976 arg = find_parameter (&lookahead, is_varargs, va_arg_name,
977 def->argc, def->argv);
978 if (arg == -1)
979 error (_("Argument to stringification operator must name "
980 "a macro parameter."));
981
982 stringify (dest, argv[arg].text, argv[arg].len);
983
984 /* Read one token and let the loop iteration code handle the
985 rest. */
986 lookahead_rl_start = replacement_list.text;
987 lookahead_valid = get_token (&lookahead, &replacement_list);
988 }
989 /* Is this token the splicing operator? */
990 else if (tok.len == 2
991 && tok.text[0] == '#'
992 && tok.text[1] == '#')
993 error (_("Stray splicing operator"));
994 /* Is the next token the splicing operator? */
995 else if (lookahead_valid
996 && lookahead.len == 2
997 && lookahead.text[0] == '#'
998 && lookahead.text[1] == '#')
999 {
1000 int arg, finished = 0;
1001 int prev_was_comma = 0;
1002
1003 /* Note that GCC warns if the result of splicing is not a
1004 token. In the debugger there doesn't seem to be much
1005 benefit from doing this. */
1006
1007 /* Insert the first token. */
1008 if (tok.len == 1 && tok.text[0] == ',')
1009 prev_was_comma = 1;
1010 else
1011 {
1012 int arg = find_parameter (&tok, is_varargs, va_arg_name,
1013 def->argc, def->argv);
1014 if (arg != -1)
1015 appendmem (dest, argv[arg].text, argv[arg].len);
1016 else
1017 appendmem (dest, tok.text, tok.len);
1018 }
1019
1020 /* Apply a possible sequence of ## operators. */
1021 for (;;)
1022 {
1023 if (! get_token (&tok, &replacement_list))
1024 error (_("Splicing operator at end of macro"));
1025
1026 /* Handle a comma before a ##. If we are handling
1027 varargs, and the token on the right hand side is the
1028 varargs marker, and the final argument is empty or
1029 missing, then drop the comma. This is a GNU
1030 extension. There is one ambiguous case here,
1031 involving pedantic behavior with an empty argument,
1032 but we settle that in favor of GNU-style (GCC uses an
1033 option). If we aren't dealing with varargs, we
1034 simply insert the comma. */
1035 if (prev_was_comma)
1036 {
1037 if (! (is_varargs
1038 && tok.len == va_arg_name->len
1039 && !memcmp (tok.text, va_arg_name->text, tok.len)
1040 && argv[argc - 1].len == 0))
1041 appendmem (dest, ",", 1);
1042 prev_was_comma = 0;
1043 }
1044
1045 /* Insert the token. If it is a parameter, insert the
1046 argument. If it is a comma, treat it specially. */
1047 if (tok.len == 1 && tok.text[0] == ',')
1048 prev_was_comma = 1;
1049 else
1050 {
1051 int arg = find_parameter (&tok, is_varargs, va_arg_name,
1052 def->argc, def->argv);
1053 if (arg != -1)
1054 appendmem (dest, argv[arg].text, argv[arg].len);
1055 else
1056 appendmem (dest, tok.text, tok.len);
1057 }
1058
1059 /* Now read another token. If it is another splice, we
1060 loop. */
1061 original_rl_start = replacement_list.text;
1062 if (! get_token (&tok, &replacement_list))
1063 {
1064 finished = 1;
1065 break;
1066 }
1067
1068 if (! (tok.len == 2
1069 && tok.text[0] == '#'
1070 && tok.text[1] == '#'))
1071 break;
1072 }
1073
1074 if (prev_was_comma)
1075 {
1076 /* We saw a comma. Insert it now. */
1077 appendmem (dest, ",", 1);
1078 }
1079
1080 dest->last_token = dest->len;
1081 if (finished)
1082 lookahead_valid = 0;
1083 else
1084 {
1085 /* Set up for the loop iterator. */
1086 lookahead = tok;
1087 lookahead_rl_start = original_rl_start;
1088 lookahead_valid = 1;
1089 }
1090 }
1091 else
1092 {
1093 /* Is this token an identifier? */
1094 int substituted = 0;
1095 int arg = find_parameter (&tok, is_varargs, va_arg_name,
1096 def->argc, def->argv);
1097
1098 if (arg != -1)
1099 {
1100 struct macro_buffer arg_src;
1101
1102 /* Expand any macro invocations in the argument text,
1103 and append the result to dest. Remember that scan
1104 mutates its source, so we need to scan a new buffer
1105 referring to the argument's text, not the argument
1106 itself. */
1107 init_shared_buffer (&arg_src, argv[arg].text, argv[arg].len);
1108 scan (dest, &arg_src, no_loop, lookup_func, lookup_baton);
1109 substituted = 1;
1110 }
1111
1112 /* If it wasn't a parameter, then just copy it across. */
1113 if (! substituted)
1114 append_tokens_without_splicing (dest, &tok);
1115 }
1116
1117 if (! lookahead_valid)
1118 break;
1119
1120 tok = lookahead;
1121 original_rl_start = lookahead_rl_start;
1122
1123 lookahead_rl_start = replacement_list.text;
1124 lookahead_valid = get_token (&lookahead, &replacement_list);
1125 }
1126 }
1127
1128
1129 /* Expand a call to a macro named ID, whose definition is DEF. Append
1130 its expansion to DEST. SRC is the input text following the ID
1131 token. We are currently rescanning the expansions of the macros
1132 named in NO_LOOP; don't re-expand them. Use LOOKUP_FUNC and
1133 LOOKUP_BATON to find definitions for any nested macro references.
1134
1135 Return 1 if we decided to expand it, zero otherwise. (If it's a
1136 function-like macro name that isn't followed by an argument list,
1137 we don't expand it.) If we return zero, leave SRC unchanged. */
1138 static int
1139 expand (const char *id,
1140 struct macro_definition *def,
1141 struct macro_buffer *dest,
1142 struct macro_buffer *src,
1143 struct macro_name_list *no_loop,
1144 macro_lookup_ftype *lookup_func,
1145 void *lookup_baton)
1146 {
1147 struct macro_name_list new_no_loop;
1148
1149 /* Create a new node to be added to the front of the no-expand list.
1150 This list is appropriate for re-scanning replacement lists, but
1151 it is *not* appropriate for scanning macro arguments; invocations
1152 of the macro whose arguments we are gathering *do* get expanded
1153 there. */
1154 new_no_loop.name = id;
1155 new_no_loop.next = no_loop;
1156
1157 /* What kind of macro are we expanding? */
1158 if (def->kind == macro_object_like)
1159 {
1160 struct macro_buffer replacement_list;
1161
1162 init_shared_buffer (&replacement_list, (char *) def->replacement,
1163 strlen (def->replacement));
1164
1165 scan (dest, &replacement_list, &new_no_loop, lookup_func, lookup_baton);
1166 return 1;
1167 }
1168 else if (def->kind == macro_function_like)
1169 {
1170 struct cleanup *back_to = make_cleanup (null_cleanup, 0);
1171 int argc = 0;
1172 struct macro_buffer *argv = NULL;
1173 struct macro_buffer substituted;
1174 struct macro_buffer substituted_src;
1175 struct macro_buffer va_arg_name;
1176 int is_varargs = 0;
1177
1178 if (def->argc >= 1)
1179 {
1180 if (strcmp (def->argv[def->argc - 1], "...") == 0)
1181 {
1182 /* In C99-style varargs, substitution is done using
1183 __VA_ARGS__. */
1184 init_shared_buffer (&va_arg_name, "__VA_ARGS__",
1185 strlen ("__VA_ARGS__"));
1186 is_varargs = 1;
1187 }
1188 else
1189 {
1190 int len = strlen (def->argv[def->argc - 1]);
1191 if (len > 3
1192 && strcmp (def->argv[def->argc - 1] + len - 3, "...") == 0)
1193 {
1194 /* In GNU-style varargs, the name of the
1195 substitution parameter is the name of the formal
1196 argument without the "...". */
1197 init_shared_buffer (&va_arg_name,
1198 (char *) def->argv[def->argc - 1],
1199 len - 3);
1200 is_varargs = 1;
1201 }
1202 }
1203 }
1204
1205 make_cleanup (free_current_contents, &argv);
1206 argv = gather_arguments (id, src, is_varargs ? def->argc : -1,
1207 &argc);
1208
1209 /* If we couldn't find any argument list, then we don't expand
1210 this macro. */
1211 if (! argv)
1212 {
1213 do_cleanups (back_to);
1214 return 0;
1215 }
1216
1217 /* Check that we're passing an acceptable number of arguments for
1218 this macro. */
1219 if (argc != def->argc)
1220 {
1221 if (is_varargs && argc >= def->argc - 1)
1222 {
1223 /* Ok. */
1224 }
1225 /* Remember that a sequence of tokens like "foo()" is a
1226 valid invocation of a macro expecting either zero or one
1227 arguments. */
1228 else if (! (argc == 1
1229 && argv[0].len == 0
1230 && def->argc == 0))
1231 error (_("Wrong number of arguments to macro `%s' "
1232 "(expected %d, got %d)."),
1233 id, def->argc, argc);
1234 }
1235
1236 /* Note that we don't expand macro invocations in the arguments
1237 yet --- we let subst_args take care of that. Parameters that
1238 appear as operands of the stringifying operator "#" or the
1239 splicing operator "##" don't get macro references expanded,
1240 so we can't really tell whether it's appropriate to macro-
1241 expand an argument until we see how it's being used. */
1242 init_buffer (&substituted, 0);
1243 make_cleanup (cleanup_macro_buffer, &substituted);
1244 substitute_args (&substituted, def, is_varargs, &va_arg_name,
1245 argc, argv, no_loop, lookup_func, lookup_baton);
1246
1247 /* Now `substituted' is the macro's replacement list, with all
1248 argument values substituted into it properly. Re-scan it for
1249 macro references, but don't expand invocations of this macro.
1250
1251 We create a new buffer, `substituted_src', which points into
1252 `substituted', and scan that. We can't scan `substituted'
1253 itself, since the tokenization process moves the buffer's
1254 text pointer around, and we still need to be able to find
1255 `substituted's original text buffer after scanning it so we
1256 can free it. */
1257 init_shared_buffer (&substituted_src, substituted.text, substituted.len);
1258 scan (dest, &substituted_src, &new_no_loop, lookup_func, lookup_baton);
1259
1260 do_cleanups (back_to);
1261
1262 return 1;
1263 }
1264 else
1265 internal_error (__FILE__, __LINE__, _("bad macro definition kind"));
1266 }
1267
1268
1269 /* If the single token in SRC_FIRST followed by the tokens in SRC_REST
1270 constitute a macro invokation not forbidden in NO_LOOP, append its
1271 expansion to DEST and return non-zero. Otherwise, return zero, and
1272 leave DEST unchanged.
1273
1274 SRC_FIRST and SRC_REST must be shared buffers; DEST must not be one.
1275 SRC_FIRST must be a string built by get_token. */
1276 static int
1277 maybe_expand (struct macro_buffer *dest,
1278 struct macro_buffer *src_first,
1279 struct macro_buffer *src_rest,
1280 struct macro_name_list *no_loop,
1281 macro_lookup_ftype *lookup_func,
1282 void *lookup_baton)
1283 {
1284 gdb_assert (src_first->shared);
1285 gdb_assert (src_rest->shared);
1286 gdb_assert (! dest->shared);
1287
1288 /* Is this token an identifier? */
1289 if (src_first->is_identifier)
1290 {
1291 /* Make a null-terminated copy of it, since that's what our
1292 lookup function expects. */
1293 char *id = xmalloc (src_first->len + 1);
1294 struct cleanup *back_to = make_cleanup (xfree, id);
1295 memcpy (id, src_first->text, src_first->len);
1296 id[src_first->len] = 0;
1297
1298 /* If we're currently re-scanning the result of expanding
1299 this macro, don't expand it again. */
1300 if (! currently_rescanning (no_loop, id))
1301 {
1302 /* Does this identifier have a macro definition in scope? */
1303 struct macro_definition *def = lookup_func (id, lookup_baton);
1304
1305 if (def && expand (id, def, dest, src_rest, no_loop,
1306 lookup_func, lookup_baton))
1307 {
1308 do_cleanups (back_to);
1309 return 1;
1310 }
1311 }
1312
1313 do_cleanups (back_to);
1314 }
1315
1316 return 0;
1317 }
1318
1319
1320 /* Expand macro references in SRC, appending the results to DEST.
1321 Assume we are re-scanning the result of expanding the macros named
1322 in NO_LOOP, and don't try to re-expand references to them.
1323
1324 SRC must be a shared buffer; DEST must not be one. */
1325 static void
1326 scan (struct macro_buffer *dest,
1327 struct macro_buffer *src,
1328 struct macro_name_list *no_loop,
1329 macro_lookup_ftype *lookup_func,
1330 void *lookup_baton)
1331 {
1332 gdb_assert (src->shared);
1333 gdb_assert (! dest->shared);
1334
1335 for (;;)
1336 {
1337 struct macro_buffer tok;
1338 char *original_src_start = src->text;
1339
1340 /* Find the next token in SRC. */
1341 if (! get_token (&tok, src))
1342 break;
1343
1344 /* Just for aesthetics. If we skipped some whitespace, copy
1345 that to DEST. */
1346 if (tok.text > original_src_start)
1347 {
1348 appendmem (dest, original_src_start, tok.text - original_src_start);
1349 dest->last_token = dest->len;
1350 }
1351
1352 if (! maybe_expand (dest, &tok, src, no_loop, lookup_func, lookup_baton))
1353 /* We didn't end up expanding tok as a macro reference, so
1354 simply append it to dest. */
1355 append_tokens_without_splicing (dest, &tok);
1356 }
1357
1358 /* Just for aesthetics. If there was any trailing whitespace in
1359 src, copy it to dest. */
1360 if (src->len)
1361 {
1362 appendmem (dest, src->text, src->len);
1363 dest->last_token = dest->len;
1364 }
1365 }
1366
1367
1368 char *
1369 macro_expand (const char *source,
1370 macro_lookup_ftype *lookup_func,
1371 void *lookup_func_baton)
1372 {
1373 struct macro_buffer src, dest;
1374 struct cleanup *back_to;
1375
1376 init_shared_buffer (&src, (char *) source, strlen (source));
1377
1378 init_buffer (&dest, 0);
1379 dest.last_token = 0;
1380 back_to = make_cleanup (cleanup_macro_buffer, &dest);
1381
1382 scan (&dest, &src, 0, lookup_func, lookup_func_baton);
1383
1384 appendc (&dest, '\0');
1385
1386 discard_cleanups (back_to);
1387 return dest.text;
1388 }
1389
1390
1391 char *
1392 macro_expand_once (const char *source,
1393 macro_lookup_ftype *lookup_func,
1394 void *lookup_func_baton)
1395 {
1396 error (_("Expand-once not implemented yet."));
1397 }
1398
1399
1400 char *
1401 macro_expand_next (char **lexptr,
1402 macro_lookup_ftype *lookup_func,
1403 void *lookup_baton)
1404 {
1405 struct macro_buffer src, dest, tok;
1406 struct cleanup *back_to;
1407
1408 /* Set up SRC to refer to the input text, pointed to by *lexptr. */
1409 init_shared_buffer (&src, *lexptr, strlen (*lexptr));
1410
1411 /* Set up DEST to receive the expansion, if there is one. */
1412 init_buffer (&dest, 0);
1413 dest.last_token = 0;
1414 back_to = make_cleanup (cleanup_macro_buffer, &dest);
1415
1416 /* Get the text's first preprocessing token. */
1417 if (! get_token (&tok, &src))
1418 {
1419 do_cleanups (back_to);
1420 return 0;
1421 }
1422
1423 /* If it's a macro invocation, expand it. */
1424 if (maybe_expand (&dest, &tok, &src, 0, lookup_func, lookup_baton))
1425 {
1426 /* It was a macro invocation! Package up the expansion as a
1427 null-terminated string and return it. Set *lexptr to the
1428 start of the next token in the input. */
1429 appendc (&dest, '\0');
1430 discard_cleanups (back_to);
1431 *lexptr = src.text;
1432 return dest.text;
1433 }
1434 else
1435 {
1436 /* It wasn't a macro invocation. */
1437 do_cleanups (back_to);
1438 return 0;
1439 }
1440 }
This page took 0.060388 seconds and 5 git commands to generate.