1 /* YACC parser for D expressions, for GDB.
3 Copyright (C) 2014-2020 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 /* This file is derived from c-exp.y, jv-exp.y. */
22 /* Parse a D expression from text in a string,
23 and return the result as a struct expression pointer.
24 That structure contains arithmetic operations in reverse polish,
25 with constants represented by operations that are followed by special data.
26 See expression.h for the details of the format.
27 What is important here is that it can be built up sequentially
28 during the process of parsing; the lower levels of the tree always
29 come first in the result.
31 Note that malloc's and realloc's in this file are transformed to
32 xmalloc and xrealloc respectively by the same sed command in the
33 makefile that remaps any other malloc/realloc inserted by the parser
34 generator. Doing this with #defines and trying to control the interaction
35 with include files (<malloc.h> and <stdlib.h> for example) just became
36 too messy, particularly when such includes can be inserted at random
37 times by the parser generator. */
43 #include "expression.h"
45 #include "parser-defs.h"
49 #include "bfd.h" /* Required by objfiles.h. */
50 #include "symfile.h" /* Required by objfiles.h. */
51 #include "objfiles.h" /* For have_full_symbols and have_partial_symbols */
54 #include "type-stack.h"
56 #define parse_type(ps) builtin_type (ps->gdbarch ())
57 #define parse_d_type(ps) builtin_d_type (ps->gdbarch ())
59 /* Remap normal yacc parser interface names (yyparse, yylex, yyerror,
61 #define GDB_YY_REMAP_PREFIX d_
64 /* The state of the parser, used internally when we are parsing the
67 static struct parser_state *pstate = NULL;
69 /* The current type stack. */
70 static struct type_stack *type_stack;
74 static int yylex (void);
76 static void yyerror (const char *);
78 static int type_aggregate_p (struct type *);
82 /* Although the yacc "value" of an expression is not used,
83 since the result is stored in the structure being created,
84 other node types do have values. */
98 struct typed_stoken tsval;
101 struct symtoken ssym;
104 enum exp_opcode opcode;
105 struct stoken_vector svec;
109 /* YYSTYPE gets defined by %union */
110 static int parse_number (struct parser_state *, const char *,
111 int, int, YYSTYPE *);
114 %token <sval> IDENTIFIER UNKNOWN_NAME
115 %token <tsym> TYPENAME
116 %token <voidval> COMPLETE
118 /* A NAME_OR_INT is a symbol which is not known in the symbol table,
119 but which would parse as a valid number in the current input radix.
120 E.g. "c" when input_radix==16. Depending on the parse, it will be
121 turned into a name or into a number. */
123 %token <sval> NAME_OR_INT
125 %token <typed_val_int> INTEGER_LITERAL
126 %token <typed_val_float> FLOAT_LITERAL
127 %token <tsval> CHARACTER_LITERAL
128 %token <tsval> STRING_LITERAL
130 %type <svec> StringExp
131 %type <tval> BasicType TypeExp
132 %type <sval> IdentifierExp
133 %type <ival> ArrayLiteral
138 /* Keywords that have a constant value. */
139 %token TRUE_KEYWORD FALSE_KEYWORD NULL_KEYWORD
140 /* Class 'super' accessor. */
143 %token CAST_KEYWORD SIZEOF_KEYWORD
144 %token TYPEOF_KEYWORD TYPEID_KEYWORD
146 /* Comparison keywords. */
147 /* Type storage classes. */
148 %token IMMUTABLE_KEYWORD CONST_KEYWORD SHARED_KEYWORD
149 /* Non-scalar type keywords. */
150 %token STRUCT_KEYWORD UNION_KEYWORD
151 %token CLASS_KEYWORD INTERFACE_KEYWORD
152 %token ENUM_KEYWORD TEMPLATE_KEYWORD
153 %token DELEGATE_KEYWORD FUNCTION_KEYWORD
155 %token <sval> DOLLAR_VARIABLE
157 %token <opcode> ASSIGN_MODIFY
160 %right '=' ASSIGN_MODIFY
167 %left EQUAL NOTEQUAL '<' '>' LEQ GEQ
172 %left IDENTITY NOTIDENTITY
173 %right INCREMENT DECREMENT
185 /* Expressions, including the comma operator. */
193 | AssignExpression ',' CommaExpression
194 { write_exp_elt_opcode (pstate, BINOP_COMMA); }
198 ConditionalExpression
199 | ConditionalExpression '=' AssignExpression
200 { write_exp_elt_opcode (pstate, BINOP_ASSIGN); }
201 | ConditionalExpression ASSIGN_MODIFY AssignExpression
202 { write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY);
203 write_exp_elt_opcode (pstate, $2);
204 write_exp_elt_opcode (pstate, BINOP_ASSIGN_MODIFY); }
207 ConditionalExpression:
209 | OrOrExpression '?' Expression ':' ConditionalExpression
210 { write_exp_elt_opcode (pstate, TERNOP_COND); }
215 | OrOrExpression OROR AndAndExpression
216 { write_exp_elt_opcode (pstate, BINOP_LOGICAL_OR); }
221 | AndAndExpression ANDAND OrExpression
222 { write_exp_elt_opcode (pstate, BINOP_LOGICAL_AND); }
227 | OrExpression '|' XorExpression
228 { write_exp_elt_opcode (pstate, BINOP_BITWISE_IOR); }
233 | XorExpression '^' AndExpression
234 { write_exp_elt_opcode (pstate, BINOP_BITWISE_XOR); }
239 | AndExpression '&' CmpExpression
240 { write_exp_elt_opcode (pstate, BINOP_BITWISE_AND); }
251 ShiftExpression EQUAL ShiftExpression
252 { write_exp_elt_opcode (pstate, BINOP_EQUAL); }
253 | ShiftExpression NOTEQUAL ShiftExpression
254 { write_exp_elt_opcode (pstate, BINOP_NOTEQUAL); }
258 ShiftExpression IDENTITY ShiftExpression
259 { write_exp_elt_opcode (pstate, BINOP_EQUAL); }
260 | ShiftExpression NOTIDENTITY ShiftExpression
261 { write_exp_elt_opcode (pstate, BINOP_NOTEQUAL); }
265 ShiftExpression '<' ShiftExpression
266 { write_exp_elt_opcode (pstate, BINOP_LESS); }
267 | ShiftExpression LEQ ShiftExpression
268 { write_exp_elt_opcode (pstate, BINOP_LEQ); }
269 | ShiftExpression '>' ShiftExpression
270 { write_exp_elt_opcode (pstate, BINOP_GTR); }
271 | ShiftExpression GEQ ShiftExpression
272 { write_exp_elt_opcode (pstate, BINOP_GEQ); }
277 | ShiftExpression LSH AddExpression
278 { write_exp_elt_opcode (pstate, BINOP_LSH); }
279 | ShiftExpression RSH AddExpression
280 { write_exp_elt_opcode (pstate, BINOP_RSH); }
285 | AddExpression '+' MulExpression
286 { write_exp_elt_opcode (pstate, BINOP_ADD); }
287 | AddExpression '-' MulExpression
288 { write_exp_elt_opcode (pstate, BINOP_SUB); }
289 | AddExpression '~' MulExpression
290 { write_exp_elt_opcode (pstate, BINOP_CONCAT); }
295 | MulExpression '*' UnaryExpression
296 { write_exp_elt_opcode (pstate, BINOP_MUL); }
297 | MulExpression '/' UnaryExpression
298 { write_exp_elt_opcode (pstate, BINOP_DIV); }
299 | MulExpression '%' UnaryExpression
300 { write_exp_elt_opcode (pstate, BINOP_REM); }
304 { write_exp_elt_opcode (pstate, UNOP_ADDR); }
305 | INCREMENT UnaryExpression
306 { write_exp_elt_opcode (pstate, UNOP_PREINCREMENT); }
307 | DECREMENT UnaryExpression
308 { write_exp_elt_opcode (pstate, UNOP_PREDECREMENT); }
309 | '*' UnaryExpression
310 { write_exp_elt_opcode (pstate, UNOP_IND); }
311 | '-' UnaryExpression
312 { write_exp_elt_opcode (pstate, UNOP_NEG); }
313 | '+' UnaryExpression
314 { write_exp_elt_opcode (pstate, UNOP_PLUS); }
315 | '!' UnaryExpression
316 { write_exp_elt_opcode (pstate, UNOP_LOGICAL_NOT); }
317 | '~' UnaryExpression
318 { write_exp_elt_opcode (pstate, UNOP_COMPLEMENT); }
319 | TypeExp '.' SIZEOF_KEYWORD
320 { write_exp_elt_opcode (pstate, UNOP_SIZEOF); }
326 CAST_KEYWORD '(' TypeExp ')' UnaryExpression
327 { write_exp_elt_opcode (pstate, UNOP_CAST_TYPE); }
328 /* C style cast is illegal D, but is still recognised in
329 the grammar, so we keep this around for convenience. */
330 | '(' TypeExp ')' UnaryExpression
331 { write_exp_elt_opcode (pstate, UNOP_CAST_TYPE); }
337 | PostfixExpression HATHAT UnaryExpression
338 { write_exp_elt_opcode (pstate, BINOP_EXP); }
343 | PostfixExpression '.' COMPLETE
345 pstate->mark_struct_expression ();
346 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
349 write_exp_string (pstate, s);
350 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
351 | PostfixExpression '.' IDENTIFIER
352 { write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
353 write_exp_string (pstate, $3);
354 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
355 | PostfixExpression '.' IDENTIFIER COMPLETE
356 { pstate->mark_struct_expression ();
357 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT);
358 write_exp_string (pstate, $3);
359 write_exp_elt_opcode (pstate, STRUCTOP_STRUCT); }
360 | PostfixExpression '.' SIZEOF_KEYWORD
361 { write_exp_elt_opcode (pstate, UNOP_SIZEOF); }
362 | PostfixExpression INCREMENT
363 { write_exp_elt_opcode (pstate, UNOP_POSTINCREMENT); }
364 | PostfixExpression DECREMENT
365 { write_exp_elt_opcode (pstate, UNOP_POSTDECREMENT); }
373 { pstate->arglist_len = 1; }
374 | ArgumentList ',' AssignExpression
375 { pstate->arglist_len++; }
380 { pstate->arglist_len = 0; }
385 PostfixExpression '('
386 { pstate->start_arglist (); }
388 { write_exp_elt_opcode (pstate, OP_FUNCALL);
389 write_exp_elt_longcst (pstate, pstate->end_arglist ());
390 write_exp_elt_opcode (pstate, OP_FUNCALL); }
394 PostfixExpression '[' ArgumentList ']'
395 { if (pstate->arglist_len > 0)
397 write_exp_elt_opcode (pstate, MULTI_SUBSCRIPT);
398 write_exp_elt_longcst (pstate, pstate->arglist_len);
399 write_exp_elt_opcode (pstate, MULTI_SUBSCRIPT);
402 write_exp_elt_opcode (pstate, BINOP_SUBSCRIPT);
407 PostfixExpression '[' ']'
408 { /* Do nothing. */ }
409 | PostfixExpression '[' AssignExpression DOTDOT AssignExpression ']'
410 { write_exp_elt_opcode (pstate, TERNOP_SLICE); }
415 { /* Do nothing. */ }
417 { struct bound_minimal_symbol msymbol;
418 std::string copy = copy_name ($1);
419 struct field_of_this_result is_a_field_of_this;
420 struct block_symbol sym;
422 /* Handle VAR, which could be local or global. */
423 sym = lookup_symbol (copy.c_str (),
424 pstate->expression_context_block,
425 VAR_DOMAIN, &is_a_field_of_this);
426 if (sym.symbol && SYMBOL_CLASS (sym.symbol) != LOC_TYPEDEF)
428 if (symbol_read_needs_frame (sym.symbol))
429 pstate->block_tracker->update (sym);
430 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
431 write_exp_elt_block (pstate, sym.block);
432 write_exp_elt_sym (pstate, sym.symbol);
433 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
435 else if (is_a_field_of_this.type != NULL)
437 /* It hangs off of `this'. Must not inadvertently convert from a
438 method call to data ref. */
439 pstate->block_tracker->update (sym);
440 write_exp_elt_opcode (pstate, OP_THIS);
441 write_exp_elt_opcode (pstate, OP_THIS);
442 write_exp_elt_opcode (pstate, STRUCTOP_PTR);
443 write_exp_string (pstate, $1);
444 write_exp_elt_opcode (pstate, STRUCTOP_PTR);
448 /* Lookup foreign name in global static symbols. */
449 msymbol = lookup_bound_minimal_symbol (copy.c_str ());
450 if (msymbol.minsym != NULL)
451 write_exp_msymbol (pstate, msymbol);
452 else if (!have_full_symbols () && !have_partial_symbols ())
453 error (_("No symbol table is loaded. Use the \"file\" command"));
455 error (_("No symbol \"%s\" in current context."),
459 | TypeExp '.' IdentifierExp
460 { struct type *type = check_typedef ($1);
462 /* Check if the qualified name is in the global
463 context. However if the symbol has not already
464 been resolved, it's not likely to be found. */
465 if (TYPE_CODE (type) == TYPE_CODE_MODULE)
467 struct bound_minimal_symbol msymbol;
468 struct block_symbol sym;
469 const char *type_name = TYPE_SAFE_NAME (type);
470 int type_name_len = strlen (type_name);
472 = string_printf ("%.*s.%.*s",
473 type_name_len, type_name,
477 lookup_symbol (name.c_str (),
478 (const struct block *) NULL,
482 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
483 write_exp_elt_block (pstate, sym.block);
484 write_exp_elt_sym (pstate, sym.symbol);
485 write_exp_elt_opcode (pstate, OP_VAR_VALUE);
489 msymbol = lookup_bound_minimal_symbol (name.c_str ());
490 if (msymbol.minsym != NULL)
491 write_exp_msymbol (pstate, msymbol);
492 else if (!have_full_symbols () && !have_partial_symbols ())
493 error (_("No symbol table is loaded. Use the \"file\" command."));
495 error (_("No symbol \"%s\" in current context."),
499 /* Check if the qualified name resolves as a member
500 of an aggregate or an enum type. */
501 if (!type_aggregate_p (type))
502 error (_("`%s' is not defined as an aggregate type."),
503 TYPE_SAFE_NAME (type));
505 write_exp_elt_opcode (pstate, OP_SCOPE);
506 write_exp_elt_type (pstate, type);
507 write_exp_string (pstate, $3);
508 write_exp_elt_opcode (pstate, OP_SCOPE);
511 { write_dollar_variable (pstate, $1); }
514 parse_number (pstate, $1.ptr, $1.length, 0, &val);
515 write_exp_elt_opcode (pstate, OP_LONG);
516 write_exp_elt_type (pstate, val.typed_val_int.type);
517 write_exp_elt_longcst (pstate,
518 (LONGEST) val.typed_val_int.val);
519 write_exp_elt_opcode (pstate, OP_LONG); }
521 { struct type *type = parse_d_type (pstate)->builtin_void;
522 type = lookup_pointer_type (type);
523 write_exp_elt_opcode (pstate, OP_LONG);
524 write_exp_elt_type (pstate, type);
525 write_exp_elt_longcst (pstate, (LONGEST) 0);
526 write_exp_elt_opcode (pstate, OP_LONG); }
528 { write_exp_elt_opcode (pstate, OP_BOOL);
529 write_exp_elt_longcst (pstate, (LONGEST) 1);
530 write_exp_elt_opcode (pstate, OP_BOOL); }
532 { write_exp_elt_opcode (pstate, OP_BOOL);
533 write_exp_elt_longcst (pstate, (LONGEST) 0);
534 write_exp_elt_opcode (pstate, OP_BOOL); }
536 { write_exp_elt_opcode (pstate, OP_LONG);
537 write_exp_elt_type (pstate, $1.type);
538 write_exp_elt_longcst (pstate, (LONGEST)($1.val));
539 write_exp_elt_opcode (pstate, OP_LONG); }
541 { write_exp_elt_opcode (pstate, OP_FLOAT);
542 write_exp_elt_type (pstate, $1.type);
543 write_exp_elt_floatcst (pstate, $1.val);
544 write_exp_elt_opcode (pstate, OP_FLOAT); }
546 { struct stoken_vector vec;
549 write_exp_string_vector (pstate, $1.type, &vec); }
552 write_exp_string_vector (pstate, 0, &$1);
553 for (i = 0; i < $1.len; ++i)
554 free ($1.tokens[i].ptr);
557 { write_exp_elt_opcode (pstate, OP_ARRAY);
558 write_exp_elt_longcst (pstate, (LONGEST) 0);
559 write_exp_elt_longcst (pstate, (LONGEST) $1 - 1);
560 write_exp_elt_opcode (pstate, OP_ARRAY); }
561 | TYPEOF_KEYWORD '(' Expression ')'
562 { write_exp_elt_opcode (pstate, OP_TYPEOF); }
566 '[' ArgumentList_opt ']'
567 { $$ = pstate->arglist_len; }
576 { /* We copy the string here, and not in the
577 lexer, to guarantee that we do not leak a
578 string. Note that we follow the
579 NUL-termination convention of the
581 struct typed_stoken *vec = XNEW (struct typed_stoken);
586 vec->length = $1.length;
587 vec->ptr = (char *) malloc ($1.length + 1);
588 memcpy (vec->ptr, $1.ptr, $1.length + 1);
590 | StringExp STRING_LITERAL
591 { /* Note that we NUL-terminate here, but just
596 = XRESIZEVEC (struct typed_stoken, $$.tokens, $$.len);
598 p = (char *) malloc ($2.length + 1);
599 memcpy (p, $2.ptr, $2.length + 1);
601 $$.tokens[$$.len - 1].type = $2.type;
602 $$.tokens[$$.len - 1].length = $2.length;
603 $$.tokens[$$.len - 1].ptr = p;
609 { /* Do nothing. */ }
611 { write_exp_elt_opcode (pstate, OP_TYPE);
612 write_exp_elt_type (pstate, $1);
613 write_exp_elt_opcode (pstate, OP_TYPE); }
614 | BasicType BasicType2
615 { $$ = type_stack->follow_types ($1);
616 write_exp_elt_opcode (pstate, OP_TYPE);
617 write_exp_elt_type (pstate, $$);
618 write_exp_elt_opcode (pstate, OP_TYPE);
624 { type_stack->push (tp_pointer); }
626 { type_stack->push (tp_pointer); }
627 | '[' INTEGER_LITERAL ']'
628 { type_stack->push ($2.val);
629 type_stack->push (tp_array); }
630 | '[' INTEGER_LITERAL ']' BasicType2
631 { type_stack->push ($2.val);
632 type_stack->push (tp_array); }
642 /* Return true if the type is aggregate-like. */
645 type_aggregate_p (struct type *type)
647 return (TYPE_CODE (type) == TYPE_CODE_STRUCT
648 || TYPE_CODE (type) == TYPE_CODE_UNION
649 || TYPE_CODE (type) == TYPE_CODE_MODULE
650 || (TYPE_CODE (type) == TYPE_CODE_ENUM
651 && TYPE_DECLARED_CLASS (type)));
654 /* Take care of parsing a number (anything that starts with a digit).
655 Set yylval and return the token type; update lexptr.
656 LEN is the number of characters in it. */
658 /*** Needs some error checking for the float case ***/
661 parse_number (struct parser_state *ps, const char *p,
662 int len, int parsed_float, YYSTYPE *putithere)
670 int base = input_radix;
674 /* We have found a "L" or "U" suffix. */
675 int found_suffix = 0;
678 struct type *signed_type;
679 struct type *unsigned_type;
685 /* Strip out all embedded '_' before passing to parse_float. */
686 s = (char *) alloca (len + 1);
697 /* Check suffix for `i' , `fi' or `li' (idouble, ifloat or ireal). */
698 if (len >= 1 && tolower (s[len - 1]) == 'i')
700 if (len >= 2 && tolower (s[len - 2]) == 'f')
702 putithere->typed_val_float.type
703 = parse_d_type (ps)->builtin_ifloat;
706 else if (len >= 2 && tolower (s[len - 2]) == 'l')
708 putithere->typed_val_float.type
709 = parse_d_type (ps)->builtin_ireal;
714 putithere->typed_val_float.type
715 = parse_d_type (ps)->builtin_idouble;
719 /* Check suffix for `f' or `l'' (float or real). */
720 else if (len >= 1 && tolower (s[len - 1]) == 'f')
722 putithere->typed_val_float.type
723 = parse_d_type (ps)->builtin_float;
726 else if (len >= 1 && tolower (s[len - 1]) == 'l')
728 putithere->typed_val_float.type
729 = parse_d_type (ps)->builtin_real;
732 /* Default type if no suffix. */
735 putithere->typed_val_float.type
736 = parse_d_type (ps)->builtin_double;
739 if (!parse_float (s, len,
740 putithere->typed_val_float.type,
741 putithere->typed_val_float.val))
744 return FLOAT_LITERAL;
747 /* Handle base-switching prefixes 0x, 0b, 0 */
780 continue; /* Ignore embedded '_'. */
781 if (c >= 'A' && c <= 'Z')
783 if (c != 'l' && c != 'u')
785 if (c >= '0' && c <= '9')
793 if (base > 10 && c >= 'a' && c <= 'f')
797 n += i = c - 'a' + 10;
799 else if (c == 'l' && long_p == 0)
804 else if (c == 'u' && unsigned_p == 0)
810 return ERROR; /* Char not a digit */
813 return ERROR; /* Invalid digit in this base. */
814 /* Portably test for integer overflow. */
815 if (c != 'l' && c != 'u')
817 ULONGEST n2 = prevn * base;
818 if ((n2 / base != prevn) || (n2 + i < prevn))
819 error (_("Numeric constant too large."));
824 /* An integer constant is an int or a long. An L suffix forces it to
825 be long, and a U suffix forces it to be unsigned. To figure out
826 whether it fits, we shift it right and see whether anything remains.
827 Note that we can't shift sizeof (LONGEST) * HOST_CHAR_BIT bits or
828 more in one operation, because many compilers will warn about such a
829 shift (which always produces a zero result). To deal with the case
830 where it is we just always shift the value more than once, with fewer
832 un = (ULONGEST) n >> 2;
833 if (long_p == 0 && (un >> 30) == 0)
835 high_bit = ((ULONGEST) 1) << 31;
836 signed_type = parse_d_type (ps)->builtin_int;
837 /* For decimal notation, keep the sign of the worked out type. */
838 if (base == 10 && !unsigned_p)
839 unsigned_type = parse_d_type (ps)->builtin_long;
841 unsigned_type = parse_d_type (ps)->builtin_uint;
846 if (sizeof (ULONGEST) * HOST_CHAR_BIT < 64)
847 /* A long long does not fit in a LONGEST. */
848 shift = (sizeof (ULONGEST) * HOST_CHAR_BIT - 1);
851 high_bit = (ULONGEST) 1 << shift;
852 signed_type = parse_d_type (ps)->builtin_long;
853 unsigned_type = parse_d_type (ps)->builtin_ulong;
856 putithere->typed_val_int.val = n;
858 /* If the high bit of the worked out type is set then this number
859 has to be unsigned_type. */
860 if (unsigned_p || (n & high_bit))
861 putithere->typed_val_int.type = unsigned_type;
863 putithere->typed_val_int.type = signed_type;
865 return INTEGER_LITERAL;
868 /* Temporary obstack used for holding strings. */
869 static struct obstack tempbuf;
870 static int tempbuf_init;
872 /* Parse a string or character literal from TOKPTR. The string or
873 character may be wide or unicode. *OUTPTR is set to just after the
874 end of the literal in the input string. The resulting token is
875 stored in VALUE. This returns a token value, either STRING or
876 CHAR, depending on what was parsed. *HOST_CHARS is set to the
877 number of host characters in the literal. */
880 parse_string_or_char (const char *tokptr, const char **outptr,
881 struct typed_stoken *value, int *host_chars)
885 /* Build the gdb internal form of the input string in tempbuf. Note
886 that the buffer is null byte terminated *only* for the
887 convenience of debugging gdb itself and printing the buffer
888 contents when the buffer contains no embedded nulls. Gdb does
889 not depend upon the buffer being null byte terminated, it uses
890 the length string instead. This allows gdb to handle C strings
891 (as well as strings in other languages) with embedded null
897 obstack_free (&tempbuf, NULL);
898 obstack_init (&tempbuf);
900 /* Skip the quote. */
912 *host_chars += c_parse_escape (&tokptr, &tempbuf);
918 obstack_1grow (&tempbuf, c);
920 /* FIXME: this does the wrong thing with multi-byte host
921 characters. We could use mbrlen here, but that would
922 make "set host-charset" a bit less useful. */
927 if (*tokptr != quote)
929 if (quote == '"' || quote == '`')
930 error (_("Unterminated string in expression."));
932 error (_("Unmatched single quote."));
936 /* FIXME: should instead use own language string_type enum
937 and handle D-specific string suffixes here. */
939 value->type = C_CHAR;
941 value->type = C_STRING;
943 value->ptr = (char *) obstack_base (&tempbuf);
944 value->length = obstack_object_size (&tempbuf);
948 return quote == '\'' ? CHARACTER_LITERAL : STRING_LITERAL;
955 enum exp_opcode opcode;
958 static const struct token tokentab3[] =
960 {"^^=", ASSIGN_MODIFY, BINOP_EXP},
961 {"<<=", ASSIGN_MODIFY, BINOP_LSH},
962 {">>=", ASSIGN_MODIFY, BINOP_RSH},
965 static const struct token tokentab2[] =
967 {"+=", ASSIGN_MODIFY, BINOP_ADD},
968 {"-=", ASSIGN_MODIFY, BINOP_SUB},
969 {"*=", ASSIGN_MODIFY, BINOP_MUL},
970 {"/=", ASSIGN_MODIFY, BINOP_DIV},
971 {"%=", ASSIGN_MODIFY, BINOP_REM},
972 {"|=", ASSIGN_MODIFY, BINOP_BITWISE_IOR},
973 {"&=", ASSIGN_MODIFY, BINOP_BITWISE_AND},
974 {"^=", ASSIGN_MODIFY, BINOP_BITWISE_XOR},
975 {"++", INCREMENT, BINOP_END},
976 {"--", DECREMENT, BINOP_END},
977 {"&&", ANDAND, BINOP_END},
978 {"||", OROR, BINOP_END},
979 {"^^", HATHAT, BINOP_END},
980 {"<<", LSH, BINOP_END},
981 {">>", RSH, BINOP_END},
982 {"==", EQUAL, BINOP_END},
983 {"!=", NOTEQUAL, BINOP_END},
984 {"<=", LEQ, BINOP_END},
985 {">=", GEQ, BINOP_END},
986 {"..", DOTDOT, BINOP_END},
989 /* Identifier-like tokens. */
990 static const struct token ident_tokens[] =
992 {"is", IDENTITY, BINOP_END},
993 {"!is", NOTIDENTITY, BINOP_END},
995 {"cast", CAST_KEYWORD, OP_NULL},
996 {"const", CONST_KEYWORD, OP_NULL},
997 {"immutable", IMMUTABLE_KEYWORD, OP_NULL},
998 {"shared", SHARED_KEYWORD, OP_NULL},
999 {"super", SUPER_KEYWORD, OP_NULL},
1001 {"null", NULL_KEYWORD, OP_NULL},
1002 {"true", TRUE_KEYWORD, OP_NULL},
1003 {"false", FALSE_KEYWORD, OP_NULL},
1005 {"init", INIT_KEYWORD, OP_NULL},
1006 {"sizeof", SIZEOF_KEYWORD, OP_NULL},
1007 {"typeof", TYPEOF_KEYWORD, OP_NULL},
1008 {"typeid", TYPEID_KEYWORD, OP_NULL},
1010 {"delegate", DELEGATE_KEYWORD, OP_NULL},
1011 {"function", FUNCTION_KEYWORD, OP_NULL},
1012 {"struct", STRUCT_KEYWORD, OP_NULL},
1013 {"union", UNION_KEYWORD, OP_NULL},
1014 {"class", CLASS_KEYWORD, OP_NULL},
1015 {"interface", INTERFACE_KEYWORD, OP_NULL},
1016 {"enum", ENUM_KEYWORD, OP_NULL},
1017 {"template", TEMPLATE_KEYWORD, OP_NULL},
1020 /* This is set if a NAME token appeared at the very end of the input
1021 string, with no whitespace separating the name from the EOF. This
1022 is used only when parsing to do field name completion. */
1023 static int saw_name_at_eof;
1025 /* This is set if the previously-returned token was a structure operator.
1026 This is used only when parsing to do field name completion. */
1027 static int last_was_structop;
1029 /* Depth of parentheses. */
1030 static int paren_depth;
1032 /* Read one token, getting characters through lexptr. */
1035 lex_one_token (struct parser_state *par_state)
1040 const char *tokstart;
1041 int saw_structop = last_was_structop;
1043 last_was_structop = 0;
1047 pstate->prev_lexptr = pstate->lexptr;
1049 tokstart = pstate->lexptr;
1050 /* See if it is a special token of length 3. */
1051 for (i = 0; i < sizeof tokentab3 / sizeof tokentab3[0]; i++)
1052 if (strncmp (tokstart, tokentab3[i].oper, 3) == 0)
1054 pstate->lexptr += 3;
1055 yylval.opcode = tokentab3[i].opcode;
1056 return tokentab3[i].token;
1059 /* See if it is a special token of length 2. */
1060 for (i = 0; i < sizeof tokentab2 / sizeof tokentab2[0]; i++)
1061 if (strncmp (tokstart, tokentab2[i].oper, 2) == 0)
1063 pstate->lexptr += 2;
1064 yylval.opcode = tokentab2[i].opcode;
1065 return tokentab2[i].token;
1068 switch (c = *tokstart)
1071 /* If we're parsing for field name completion, and the previous
1072 token allows such completion, return a COMPLETE token.
1073 Otherwise, we were already scanning the original text, and
1074 we're really done. */
1075 if (saw_name_at_eof)
1077 saw_name_at_eof = 0;
1080 else if (saw_structop)
1099 if (paren_depth == 0)
1106 if (pstate->comma_terminates && paren_depth == 0)
1112 /* Might be a floating point number. */
1113 if (pstate->lexptr[1] < '0' || pstate->lexptr[1] > '9')
1115 if (pstate->parse_completion)
1116 last_was_structop = 1;
1117 goto symbol; /* Nope, must be a symbol. */
1132 /* It's a number. */
1133 int got_dot = 0, got_e = 0, toktype;
1134 const char *p = tokstart;
1135 int hex = input_radix > 10;
1137 if (c == '0' && (p[1] == 'x' || p[1] == 'X'))
1145 /* Hex exponents start with 'p', because 'e' is a valid hex
1146 digit and thus does not indicate a floating point number
1147 when the radix is hex. */
1148 if ((!hex && !got_e && tolower (p[0]) == 'e')
1149 || (hex && !got_e && tolower (p[0] == 'p')))
1150 got_dot = got_e = 1;
1151 /* A '.' always indicates a decimal floating point number
1152 regardless of the radix. If we have a '..' then its the
1153 end of the number and the beginning of a slice. */
1154 else if (!got_dot && (p[0] == '.' && p[1] != '.'))
1156 /* This is the sign of the exponent, not the end of the number. */
1157 else if (got_e && (tolower (p[-1]) == 'e' || tolower (p[-1]) == 'p')
1158 && (*p == '-' || *p == '+'))
1160 /* We will take any letters or digits, ignoring any embedded '_'.
1161 parse_number will complain if past the radix, or if L or U are
1163 else if ((*p < '0' || *p > '9') && (*p != '_')
1164 && ((*p < 'a' || *p > 'z') && (*p < 'A' || *p > 'Z')))
1168 toktype = parse_number (par_state, tokstart, p - tokstart,
1169 got_dot|got_e, &yylval);
1170 if (toktype == ERROR)
1172 char *err_copy = (char *) alloca (p - tokstart + 1);
1174 memcpy (err_copy, tokstart, p - tokstart);
1175 err_copy[p - tokstart] = 0;
1176 error (_("Invalid number \"%s\"."), err_copy);
1184 const char *p = &tokstart[1];
1185 size_t len = strlen ("entry");
1187 while (isspace (*p))
1189 if (strncmp (p, "entry", len) == 0 && !isalnum (p[len])
1192 pstate->lexptr = &p[len];
1223 int result = parse_string_or_char (tokstart, &pstate->lexptr,
1224 &yylval.tsval, &host_len);
1225 if (result == CHARACTER_LITERAL)
1228 error (_("Empty character constant."));
1229 else if (host_len > 2 && c == '\'')
1232 namelen = pstate->lexptr - tokstart - 1;
1235 else if (host_len > 1)
1236 error (_("Invalid character constant."));
1242 if (!(c == '_' || c == '$'
1243 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
1244 /* We must have come across a bad character (e.g. ';'). */
1245 error (_("Invalid character '%c' in expression"), c);
1247 /* It's a name. See how long it is. */
1249 for (c = tokstart[namelen];
1250 (c == '_' || c == '$' || (c >= '0' && c <= '9')
1251 || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'));)
1252 c = tokstart[++namelen];
1254 /* The token "if" terminates the expression and is NOT
1255 removed from the input stream. */
1256 if (namelen == 2 && tokstart[0] == 'i' && tokstart[1] == 'f')
1259 /* For the same reason (breakpoint conditions), "thread N"
1260 terminates the expression. "thread" could be an identifier, but
1261 an identifier is never followed by a number without intervening
1262 punctuation. "task" is similar. Handle abbreviations of these,
1263 similarly to breakpoint.c:find_condition_and_thread. */
1265 && (strncmp (tokstart, "thread", namelen) == 0
1266 || strncmp (tokstart, "task", namelen) == 0)
1267 && (tokstart[namelen] == ' ' || tokstart[namelen] == '\t'))
1269 const char *p = tokstart + namelen + 1;
1271 while (*p == ' ' || *p == '\t')
1273 if (*p >= '0' && *p <= '9')
1277 pstate->lexptr += namelen;
1281 yylval.sval.ptr = tokstart;
1282 yylval.sval.length = namelen;
1284 /* Catch specific keywords. */
1285 std::string copy = copy_name (yylval.sval);
1286 for (i = 0; i < sizeof ident_tokens / sizeof ident_tokens[0]; i++)
1287 if (copy == ident_tokens[i].oper)
1289 /* It is ok to always set this, even though we don't always
1290 strictly need to. */
1291 yylval.opcode = ident_tokens[i].opcode;
1292 return ident_tokens[i].token;
1295 if (*tokstart == '$')
1296 return DOLLAR_VARIABLE;
1299 = language_lookup_primitive_type (par_state->language (),
1300 par_state->gdbarch (), copy.c_str ());
1301 if (yylval.tsym.type != NULL)
1304 /* Input names that aren't symbols but ARE valid hex numbers,
1305 when the input radix permits them, can be names or numbers
1306 depending on the parse. Note we support radixes > 16 here. */
1307 if ((tokstart[0] >= 'a' && tokstart[0] < 'a' + input_radix - 10)
1308 || (tokstart[0] >= 'A' && tokstart[0] < 'A' + input_radix - 10))
1310 YYSTYPE newlval; /* Its value is ignored. */
1311 int hextype = parse_number (par_state, tokstart, namelen, 0, &newlval);
1312 if (hextype == INTEGER_LITERAL)
1316 if (pstate->parse_completion && *pstate->lexptr == '\0')
1317 saw_name_at_eof = 1;
1322 /* An object of this type is pushed on a FIFO by the "outer" lexer. */
1323 struct token_and_value
1330 /* A FIFO of tokens that have been read but not yet returned to the
1332 static std::vector<token_and_value> token_fifo;
1334 /* Non-zero if the lexer should return tokens from the FIFO. */
1337 /* Temporary storage for yylex; this holds symbol names as they are
1339 static auto_obstack name_obstack;
1341 /* Classify an IDENTIFIER token. The contents of the token are in `yylval'.
1342 Updates yylval and returns the new token type. BLOCK is the block
1343 in which lookups start; this can be NULL to mean the global scope. */
1346 classify_name (struct parser_state *par_state, const struct block *block)
1348 struct block_symbol sym;
1349 struct field_of_this_result is_a_field_of_this;
1351 std::string copy = copy_name (yylval.sval);
1353 sym = lookup_symbol (copy.c_str (), block, VAR_DOMAIN, &is_a_field_of_this);
1354 if (sym.symbol && SYMBOL_CLASS (sym.symbol) == LOC_TYPEDEF)
1356 yylval.tsym.type = SYMBOL_TYPE (sym.symbol);
1359 else if (sym.symbol == NULL)
1361 /* Look-up first for a module name, then a type. */
1362 sym = lookup_symbol (copy.c_str (), block, MODULE_DOMAIN, NULL);
1363 if (sym.symbol == NULL)
1364 sym = lookup_symbol (copy.c_str (), block, STRUCT_DOMAIN, NULL);
1366 if (sym.symbol != NULL)
1368 yylval.tsym.type = SYMBOL_TYPE (sym.symbol);
1372 return UNKNOWN_NAME;
1378 /* Like classify_name, but used by the inner loop of the lexer, when a
1379 name might have already been seen. CONTEXT is the context type, or
1380 NULL if this is the first component of a name. */
1383 classify_inner_name (struct parser_state *par_state,
1384 const struct block *block, struct type *context)
1388 if (context == NULL)
1389 return classify_name (par_state, block);
1391 type = check_typedef (context);
1392 if (!type_aggregate_p (type))
1395 std::string copy = copy_name (yylval.ssym.stoken);
1396 yylval.ssym.sym = d_lookup_nested_symbol (type, copy.c_str (), block);
1398 if (yylval.ssym.sym.symbol == NULL)
1401 if (SYMBOL_CLASS (yylval.ssym.sym.symbol) == LOC_TYPEDEF)
1403 yylval.tsym.type = SYMBOL_TYPE (yylval.ssym.sym.symbol);
1410 /* The outer level of a two-level lexer. This calls the inner lexer
1411 to return tokens. It then either returns these tokens, or
1412 aggregates them into a larger token. This lets us work around a
1413 problem in our parsing approach, where the parser could not
1414 distinguish between qualified names and qualified types at the
1420 token_and_value current;
1422 struct type *context_type = NULL;
1423 int last_to_examine, next_to_examine, checkpoint;
1424 const struct block *search_block;
1426 if (popping && !token_fifo.empty ())
1430 /* Read the first token and decide what to do. */
1431 current.token = lex_one_token (pstate);
1432 if (current.token != IDENTIFIER && current.token != '.')
1433 return current.token;
1435 /* Read any sequence of alternating "." and identifier tokens into
1437 current.value = yylval;
1438 token_fifo.push_back (current);
1439 last_was_dot = current.token == '.';
1443 current.token = lex_one_token (pstate);
1444 current.value = yylval;
1445 token_fifo.push_back (current);
1447 if ((last_was_dot && current.token != IDENTIFIER)
1448 || (!last_was_dot && current.token != '.'))
1451 last_was_dot = !last_was_dot;
1455 /* We always read one extra token, so compute the number of tokens
1456 to examine accordingly. */
1457 last_to_examine = token_fifo.size () - 2;
1458 next_to_examine = 0;
1460 current = token_fifo[next_to_examine];
1463 /* If we are not dealing with a typename, now is the time to find out. */
1464 if (current.token == IDENTIFIER)
1466 yylval = current.value;
1467 current.token = classify_name (pstate, pstate->expression_context_block);
1468 current.value = yylval;
1471 /* If the IDENTIFIER is not known, it could be a package symbol,
1472 first try building up a name until we find the qualified module. */
1473 if (current.token == UNKNOWN_NAME)
1475 name_obstack.clear ();
1476 obstack_grow (&name_obstack, current.value.sval.ptr,
1477 current.value.sval.length);
1481 while (next_to_examine <= last_to_examine)
1483 token_and_value next;
1485 next = token_fifo[next_to_examine];
1488 if (next.token == IDENTIFIER && last_was_dot)
1490 /* Update the partial name we are constructing. */
1491 obstack_grow_str (&name_obstack, ".");
1492 obstack_grow (&name_obstack, next.value.sval.ptr,
1493 next.value.sval.length);
1495 yylval.sval.ptr = (char *) obstack_base (&name_obstack);
1496 yylval.sval.length = obstack_object_size (&name_obstack);
1498 current.token = classify_name (pstate,
1499 pstate->expression_context_block);
1500 current.value = yylval;
1502 /* We keep going until we find a TYPENAME. */
1503 if (current.token == TYPENAME)
1505 /* Install it as the first token in the FIFO. */
1506 token_fifo[0] = current;
1507 token_fifo.erase (token_fifo.begin () + 1,
1508 token_fifo.begin () + next_to_examine);
1512 else if (next.token == '.' && !last_was_dot)
1516 /* We've reached the end of the name. */
1521 /* Reset our current token back to the start, if we found nothing
1522 this means that we will just jump to do pop. */
1523 current = token_fifo[0];
1524 next_to_examine = 1;
1526 if (current.token != TYPENAME && current.token != '.')
1529 name_obstack.clear ();
1531 if (current.token == '.')
1532 search_block = NULL;
1535 gdb_assert (current.token == TYPENAME);
1536 search_block = pstate->expression_context_block;
1537 obstack_grow (&name_obstack, current.value.sval.ptr,
1538 current.value.sval.length);
1539 context_type = current.value.tsym.type;
1543 last_was_dot = current.token == '.';
1545 while (next_to_examine <= last_to_examine)
1547 token_and_value next;
1549 next = token_fifo[next_to_examine];
1552 if (next.token == IDENTIFIER && last_was_dot)
1556 yylval = next.value;
1557 classification = classify_inner_name (pstate, search_block,
1559 /* We keep going until we either run out of names, or until
1560 we have a qualified name which is not a type. */
1561 if (classification != TYPENAME && classification != IDENTIFIER)
1564 /* Accept up to this token. */
1565 checkpoint = next_to_examine;
1567 /* Update the partial name we are constructing. */
1568 if (context_type != NULL)
1570 /* We don't want to put a leading "." into the name. */
1571 obstack_grow_str (&name_obstack, ".");
1573 obstack_grow (&name_obstack, next.value.sval.ptr,
1574 next.value.sval.length);
1576 yylval.sval.ptr = (char *) obstack_base (&name_obstack);
1577 yylval.sval.length = obstack_object_size (&name_obstack);
1578 current.value = yylval;
1579 current.token = classification;
1583 if (classification == IDENTIFIER)
1586 context_type = yylval.tsym.type;
1588 else if (next.token == '.' && !last_was_dot)
1592 /* We've reached the end of the name. */
1597 /* If we have a replacement token, install it as the first token in
1598 the FIFO, and delete the other constituent tokens. */
1601 token_fifo[0] = current;
1603 token_fifo.erase (token_fifo.begin () + 1,
1604 token_fifo.begin () + checkpoint);
1608 current = token_fifo[0];
1609 token_fifo.erase (token_fifo.begin ());
1610 yylval = current.value;
1611 return current.token;
1615 d_parse (struct parser_state *par_state)
1617 /* Setting up the parser state. */
1618 scoped_restore pstate_restore = make_scoped_restore (&pstate);
1619 gdb_assert (par_state != NULL);
1622 scoped_restore restore_yydebug = make_scoped_restore (&yydebug,
1625 struct type_stack stack;
1626 scoped_restore restore_type_stack = make_scoped_restore (&type_stack,
1629 /* Initialize some state used by the lexer. */
1630 last_was_structop = 0;
1631 saw_name_at_eof = 0;
1634 token_fifo.clear ();
1636 name_obstack.clear ();
1642 yyerror (const char *msg)
1644 if (pstate->prev_lexptr)
1645 pstate->lexptr = pstate->prev_lexptr;
1647 error (_("A %s in expression, near `%s'."), msg, pstate->lexptr);