gdb/ada-lex.l

   1 /* FLEX lexer for Ada expressions, for GDB.
   2    Copyright (C) 1994, 1997, 1998, 2000, 2001, 2002, 2003, 2007, 2008
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GDB.
   6
   7 This program is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 This program is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program; if not, write to the Free Software
  19 Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22 /*----------------------------------------------------------------------*/
  23
  24 /* The converted version of this file is to be included in ada-exp.y, */
  25 /* the Ada parser for gdb.  The function yylex obtains characters from */
  26 /* the global pointer lexptr.  It returns a syntactic category for */
  27 /* each successive token and places a semantic value into yylval */
  28 /* (ada-lval), defined by the parser.   */
  29
  30 DIG     [0-9]
  31 NUM10   ({DIG}({DIG}|_)*)
  32 HEXDIG  [0-9a-f]
  33 NUM16   ({HEXDIG}({HEXDIG}|_)*)
  34 OCTDIG  [0-7]
  35 LETTER  [a-z_]
  36 ID      ({LETTER}({LETTER}|{DIG})*|"<"{LETTER}({LETTER}|{DIG})*">")
  37 WHITE   [ \t\n]
  38 TICK    ("'"{WHITE}*)
  39 GRAPHIC [a-z0-9 #&'()*+,-./:;<>=_|!$%?@\[\]\\^`{}~]
  40 OPER    ([-+*/=<>&]|"<="|">="|"**"|"/="|"and"|"or"|"xor"|"not"|"mod"|"rem"|"abs")
  41
  42 EXP     (e[+-]{NUM10})
  43 POSEXP  (e"+"?{NUM10})
  44
  45 %{
  46
  47 #define NUMERAL_WIDTH 256
  48 #define LONGEST_SIGN ((ULONGEST) 1 << (sizeof(LONGEST) * HOST_CHAR_BIT - 1))
  49
  50 /* Temporary staging for numeric literals.  */
  51 static char numbuf[NUMERAL_WIDTH];
  52  static void canonicalizeNumeral (char *s1, const char *);
  53 static struct stoken processString (const char*, int);
  54 static int processInt (const char *, const char *, const char *);
  55 static int processReal (const char *);
  56 static struct stoken processId (const char *, int);
  57 static int processAttribute (const char *);
  58 static int find_dot_all (const char *);
  59
  60 #undef YY_DECL
  61 #define YY_DECL static int yylex ( void )
  62
  63 #undef YY_INPUT
  64 #define YY_INPUT(BUF, RESULT, MAX_SIZE) \
  65     if ( *lexptr == '\000' ) \
  66       (RESULT) = YY_NULL; \
  67     else \
  68       { \
  69         *(BUF) = *lexptr; \
  70         (RESULT) = 1; \
  71         lexptr += 1; \
  72       }
  73
  74 static int find_dot_all (const char *);
  75
  76 %}
  77
  78 %option case-insensitive interactive nodefault
  79
  80 %s BEFORE_QUAL_QUOTE
  81
  82 %%
  83
  84 {WHITE}          { }
  85
  86 "--".*           { yyterminate(); }
  87
  88 {NUM10}{POSEXP}  {
  89                    canonicalizeNumeral (numbuf, yytext);
  90                    return processInt (NULL, numbuf, strrchr(numbuf, 'e')+1);
  91                  }
  92
  93 {NUM10}          {
  94                    canonicalizeNumeral (numbuf, yytext);
  95                    return processInt (NULL, numbuf, NULL);
  96                  }
  97
  98 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#"{POSEXP} {
  99                    canonicalizeNumeral (numbuf, yytext);
 100                    return processInt (numbuf,
 101                                       strchr (numbuf, '#') + 1,
 102                                       strrchr(numbuf, '#') + 1);
 103                  }
 104
 105 {NUM10}"#"{HEXDIG}({HEXDIG}|_)*"#" {
 106                    canonicalizeNumeral (numbuf, yytext);
 107                    return processInt (numbuf, strchr (numbuf, '#') + 1, NULL);
 108                  }
 109
 110 "0x"{HEXDIG}+   {
 111                   canonicalizeNumeral (numbuf, yytext+2);
 112                   return processInt ("16#", numbuf, NULL);
 113                 }
 114
 115
 116 {NUM10}"."{NUM10}{EXP} {
 117                    canonicalizeNumeral (numbuf, yytext);
 118                    return processReal (numbuf);
 119                 }
 120
 121 {NUM10}"."{NUM10} {
 122                    canonicalizeNumeral (numbuf, yytext);
 123                    return processReal (numbuf);
 124                 }
 125
 126 {NUM10}"#"{NUM16}"."{NUM16}"#"{EXP} {
 127                    error (_("Based real literals not implemented yet."));
 128                 }
 129
 130 {NUM10}"#"{NUM16}"."{NUM16}"#" {
 131                    error (_("Based real literals not implemented yet."));
 132                 }
 133
 134 <INITIAL>"'"({GRAPHIC}|\")"'" {
 135                    yylval.typed_val.type = type_char ();
 136                    yylval.typed_val.val = yytext[1];
 137                    return CHARLIT;
 138                 }
 139
 140 <INITIAL>"'[\""{HEXDIG}{2}"\"]'"   {
 141                    int v;
 142                    yylval.typed_val.type = type_char ();
 143                    sscanf (yytext+3, "%2x", &v);
 144                    yylval.typed_val.val = v;
 145                    return CHARLIT;
 146                 }
 147
 148 \"({GRAPHIC}|"[\""({HEXDIG}{2}|\")"\"]")*\"   {
 149                    yylval.sval = processString (yytext+1, yyleng-2);
 150                    return STRING;
 151                 }
 152
 153 \"              {
 154                    error (_("ill-formed or non-terminated string literal"));
 155                 }
 156
 157
 158 if              {
 159                   while (*lexptr != 'i' && *lexptr != 'I')
 160                     lexptr -= 1;
 161                   yyrestart(NULL);
 162                   return 0;
 163                 }
 164
 165         /* ADA KEYWORDS */
 166
 167 abs             { return ABS; }
 168 and             { return _AND_; }
 169 else            { return ELSE; }
 170 in              { return IN; }
 171 mod             { return MOD; }
 172 new             { return NEW; }
 173 not             { return NOT; }
 174 null            { return NULL_PTR; }
 175 or              { return OR; }
 176 others          { return OTHERS; }
 177 rem             { return REM; }
 178 then            { return THEN; }
 179 xor             { return XOR; }
 180
 181         /* BOOLEAN "KEYWORDS" */
 182
 183  /* True and False are not keywords in Ada, but rather enumeration constants.
 184     However, the boolean type is no longer represented as an enum, so True
 185     and False are no longer defined in symbol tables.  We compromise by
 186     making them keywords (when bare). */
 187
 188 true            { return TRUEKEYWORD; }
 189 false           { return FALSEKEYWORD; }
 190
 191         /* ATTRIBUTES */
 192
 193 {TICK}[a-zA-Z][a-zA-Z]+ { return processAttribute (yytext+1); }
 194
 195         /* PUNCTUATION */
 196
 197 "=>"            { return ARROW; }
 198 ".."            { return DOTDOT; }
 199 "**"            { return STARSTAR; }
 200 ":="            { return ASSIGN; }
 201 "/="            { return NOTEQUAL; }
 202 "<="            { return LEQ; }
 203 ">="            { return GEQ; }
 204
 205 <BEFORE_QUAL_QUOTE>"'" { BEGIN INITIAL; return '\''; }
 206
 207 [-&*+./:<>=|;\[\]] { return yytext[0]; }
 208
 209 ","             { if (paren_depth == 0 && comma_terminates)
 210                     {
 211                       lexptr -= 1;
 212                       yyrestart(NULL);
 213                       return 0;
 214                     }
 215                   else
 216                     return ',';
 217                 }
 218
 219 "("             { paren_depth += 1; return '('; }
 220 ")"             { if (paren_depth == 0)
 221                     {
 222                       lexptr -= 1;
 223                       yyrestart(NULL);
 224                       return 0;
 225                     }
 226                   else
 227                     {
 228                       paren_depth -= 1;
 229                       return ')';
 230                     }
 231                 }
 232
 233 "."{WHITE}*all  { return DOT_ALL; }
 234
 235 "."{WHITE}*{ID} {
 236                   yylval.sval = processId (yytext+1, yyleng-1);
 237                   return DOT_ID;
 238                 }
 239
 240 {ID}({WHITE}*"."{WHITE}*({ID}|\"{OPER}\"))*(" "*"'")?  {
 241                   int all_posn = find_dot_all (yytext);
 242
 243                   if (all_posn == -1 && yytext[yyleng-1] == '\'')
 244                     {
 245                       BEGIN BEFORE_QUAL_QUOTE;
 246                       yyless (yyleng-1);
 247                     }
 248                   else if (all_posn >= 0)
 249                     yyless (all_posn);
 250                   yylval.sval = processId (yytext, yyleng);
 251                   return NAME;
 252                }
 253
 254
 255         /* GDB EXPRESSION CONSTRUCTS  */
 256
 257 "'"[^']+"'"{WHITE}*:: {
 258                   yyless (yyleng - 2);
 259                   yylval.sval = processId (yytext, yyleng);
 260                   return NAME;
 261                 }
 262
 263 "::"            { return COLONCOLON; }
 264
 265 [{}@]           { return yytext[0]; }
 266
 267         /* REGISTERS AND GDB CONVENIENCE VARIABLES */
 268
 269 "$"({LETTER}|{DIG}|"$")*  {
 270                   yylval.sval.ptr = yytext;
 271                   yylval.sval.length = yyleng;
 272                   return SPECIAL_VARIABLE;
 273                 }
 274
 275         /* CATCH-ALL ERROR CASE */
 276
 277 .               { error (_("Invalid character '%s' in expression."), yytext); }
 278 %%
 279
 280 #include <ctype.h>
 281 #include "gdb_string.h"
 282
 283 /* Initialize the lexer for processing new expression. */
 284
 285 void
 286 lexer_init (FILE *inp)
 287 {
 288   BEGIN INITIAL;
 289   yyrestart (inp);
 290 }
 291
 292
 293 /* Copy S2 to S1, removing all underscores, and downcasing all letters.  */
 294
 295 static void
 296 canonicalizeNumeral (char *s1, const char *s2)
 297 {
 298   for (; *s2 != '\000'; s2 += 1)
 299     {
 300       if (*s2 != '_')
 301         {
 302           *s1 = tolower(*s2);
 303           s1 += 1;
 304         }
 305     }
 306   s1[0] = '\000';
 307 }
 308
 309 /* Interprets the prefix of NUM that consists of digits of the given BASE
 310    as an integer of that BASE, with the string EXP as an exponent.
 311    Puts value in yylval, and returns INT, if the string is valid.  Causes
 312    an error if the number is improperly formated.   BASE, if NULL, defaults
 313    to "10", and EXP to "1".  The EXP does not contain a leading 'e' or 'E'.
 314  */
 315
 316 static int
 317 processInt (const char *base0, const char *num0, const char *exp0)
 318 {
 319   ULONGEST result;
 320   long exp;
 321   int base;
 322
 323   char *trailer;
 324
 325   if (base0 == NULL)
 326     base = 10;
 327   else
 328     {
 329       base = strtol (base0, (char **) NULL, 10);
 330       if (base < 2 || base > 16)
 331         error (_("Invalid base: %d."), base);
 332     }
 333
 334   if (exp0 == NULL)
 335     exp = 0;
 336   else
 337     exp = strtol(exp0, (char **) NULL, 10);
 338
 339   errno = 0;
 340   result = strtoulst (num0, (const char **) &trailer, base);
 341   if (errno == ERANGE)
 342     error (_("Integer literal out of range"));
 343   if (isxdigit(*trailer))
 344     error (_("Invalid digit `%c' in based literal"), *trailer);
 345
 346   while (exp > 0)
 347     {
 348       if (result > (ULONG_MAX / base))
 349         error (_("Integer literal out of range"));
 350       result *= base;
 351       exp -= 1;
 352     }
 353
 354   if ((result >> (gdbarch_int_bit (current_gdbarch)-1)) == 0)
 355     yylval.typed_val.type = type_int ();
 356   else if ((result >> (gdbarch_long_bit (current_gdbarch)-1)) == 0)
 357     yylval.typed_val.type = type_long ();
 358   else if (((result >> (gdbarch_long_bit (current_gdbarch)-1)) >> 1) == 0)
 359     {
 360       /* We have a number representable as an unsigned integer quantity.
 361          For consistency with the C treatment, we will treat it as an
 362          anonymous modular (unsigned) quantity.  Alas, the types are such
 363          that we need to store .val as a signed quantity.  Sorry
 364          for the mess, but C doesn't officially guarantee that a simple
 365          assignment does the trick (no, it doesn't; read the reference manual).
 366        */
 367       yylval.typed_val.type = builtin_type_unsigned_long;
 368       if (result & LONGEST_SIGN)
 369         yylval.typed_val.val =
 370           (LONGEST) (result & ~LONGEST_SIGN)
 371           - (LONGEST_SIGN>>1) - (LONGEST_SIGN>>1);
 372       else
 373         yylval.typed_val.val = (LONGEST) result;
 374       return INT;
 375     }
 376   else
 377     yylval.typed_val.type = type_long_long ();
 378
 379   yylval.typed_val.val = (LONGEST) result;
 380   return INT;
 381 }
 382
 383 static int
 384 processReal (const char *num0)
 385 {
 386   sscanf (num0, "%" DOUBLEST_SCAN_FORMAT, &yylval.typed_val_float.dval);
 387
 388   yylval.typed_val_float.type = type_float ();
 389   if (sizeof(DOUBLEST) >= gdbarch_double_bit (current_gdbarch)
 390                             / TARGET_CHAR_BIT)
 391     yylval.typed_val_float.type = type_double ();
 392   if (sizeof(DOUBLEST) >= gdbarch_long_double_bit (current_gdbarch)
 393                             / TARGET_CHAR_BIT)
 394     yylval.typed_val_float.type = type_long_double ();
 395
 396   return FLOAT;
 397 }
 398
 399
 400 /* Store a canonicalized version of NAME0[0..LEN-1] in yylval.ssym.  The
 401    resulting string is valid until the next call to ada_parse.  It differs
 402    from NAME0 in that:
 403     + Characters between '...' or <...> are transfered verbatim to
 404       yylval.ssym.
 405     + <, >, and trailing "'" characters in quoted sequences are removed
 406       (a leading quote is preserved to indicate that the name is not to be
 407       GNAT-encoded).
 408     + Unquoted whitespace is removed.
 409     + Unquoted alphabetic characters are mapped to lower case.
 410    Result is returned as a struct stoken, but for convenience, the string
 411    is also null-terminated.  Result string valid until the next call of
 412    ada_parse.
 413  */
 414 static struct stoken
 415 processId (const char *name0, int len)
 416 {
 417   char *name = obstack_alloc (&temp_parse_space, len + 11);
 418   int i0, i;
 419   struct stoken result;
 420
 421   while (len > 0 && isspace (name0[len-1]))
 422     len -= 1;
 423   i = i0 = 0;
 424   while (i0 < len)
 425     {
 426       if (isalnum (name0[i0]))
 427         {
 428           name[i] = tolower (name0[i0]);
 429           i += 1; i0 += 1;
 430         }
 431       else switch (name0[i0])
 432         {
 433         default:
 434           name[i] = name0[i0];
 435           i += 1; i0 += 1;
 436           break;
 437         case ' ': case '\t':
 438           i0 += 1;
 439           break;
 440         case '\'':
 441           do
 442             {
 443               name[i] = name0[i0];
 444               i += 1; i0 += 1;
 445             }
 446           while (i0 < len && name0[i0] != '\'');
 447           i0 += 1;
 448           break;
 449         case '<':
 450           i0 += 1;
 451           while (i0 < len && name0[i0] != '>')
 452             {
 453               name[i] = name0[i0];
 454               i += 1; i0 += 1;
 455             }
 456           i0 += 1;
 457           break;
 458         }
 459     }
 460   name[i] = '\000';
 461
 462   result.ptr = name;
 463   result.length = i;
 464   return result;
 465 }
 466
 467 /* Return TEXT[0..LEN-1], a string literal without surrounding quotes,
 468    with special hex character notations replaced with characters.
 469    Result valid until the next call to ada_parse.  */
 470
 471 static struct stoken
 472 processString (const char *text, int len)
 473 {
 474   const char *p;
 475   char *q;
 476   const char *lim = text + len;
 477   struct stoken result;
 478
 479   q = result.ptr = obstack_alloc (&temp_parse_space, len);
 480   p = text;
 481   while (p < lim)
 482     {
 483       if (p[0] == '[' && p[1] == '"' && p+2 < lim)
 484          {
 485            if (p[2] == '"')  /* "...["""]... */
 486              {
 487                *q = '"';
 488                p += 4;
 489              }
 490            else
 491              {
 492                int chr;
 493                sscanf (p+2, "%2x", &chr);
 494                *q = (char) chr;
 495                p += 5;
 496              }
 497          }
 498        else
 499          *q = *p;
 500        q += 1;
 501        p += 1;
 502      }
 503   result.length = q - result.ptr;
 504   return result;
 505 }
 506
 507 /* Returns the position within STR of the '.' in a
 508    '.{WHITE}*all' component of a dotted name, or -1 if there is none.
 509    Note: we actually don't need this routine, since 'all' can never be an
 510    Ada identifier.  Thus, looking up foo.all or foo.all.x as a name
 511    must fail, and will eventually be interpreted as (foo).all or
 512    (foo).all.x.  However, this does avoid an extraneous lookup. */
 513
 514 static int
 515 find_dot_all (const char *str)
 516 {
 517   int i;
 518   for (i = 0; str[i] != '\000'; i += 1)
 519     {
 520       if (str[i] == '.')
 521         {
 522           int i0 = i;
 523           do
 524             i += 1;
 525           while (isspace (str[i]));
 526           if (strncmp (str+i, "all", 3) == 0
 527               && ! isalnum (str[i+3]) && str[i+3] != '_')
 528             return i0;
 529         }
 530     }
 531   return -1;
 532 }
 533
 534 /* Returns non-zero iff string SUBSEQ matches a subsequence of STR, ignoring
 535    case.  */
 536
 537 static int
 538 subseqMatch (const char *subseq, const char *str)
 539 {
 540   if (subseq[0] == '\0')
 541     return 1;
 542   else if (str[0] == '\0')
 543     return 0;
 544   else if (tolower (subseq[0]) == tolower (str[0]))
 545     return subseqMatch (subseq+1, str+1) || subseqMatch (subseq, str+1);
 546   else
 547     return subseqMatch (subseq, str+1);
 548 }
 549
 550
 551 static struct { const char *name; int code; }
 552 attributes[] = {
 553   { "address", TICK_ADDRESS },
 554   { "unchecked_access", TICK_ACCESS },
 555   { "unrestricted_access", TICK_ACCESS },
 556   { "access", TICK_ACCESS },
 557   { "first", TICK_FIRST },
 558   { "last", TICK_LAST },
 559   { "length", TICK_LENGTH },
 560   { "max", TICK_MAX },
 561   { "min", TICK_MIN },
 562   { "modulus", TICK_MODULUS },
 563   { "pos", TICK_POS },
 564   { "range", TICK_RANGE },
 565   { "size", TICK_SIZE },
 566   { "tag", TICK_TAG },
 567   { "val", TICK_VAL },
 568   { NULL, -1 }
 569 };
 570
 571 /* Return the syntactic code corresponding to the attribute name or
 572    abbreviation STR.  */
 573
 574 static int
 575 processAttribute (const char *str)
 576 {
 577   int i, k;
 578
 579   for (i = 0; attributes[i].code != -1; i += 1)
 580     if (strcasecmp (str, attributes[i].name) == 0)
 581       return attributes[i].code;
 582
 583   for (i = 0, k = -1; attributes[i].code != -1; i += 1)
 584     if (subseqMatch (str, attributes[i].name))
 585       {
 586         if (k == -1)
 587           k = i;
 588         else
 589           error (_("ambiguous attribute name: `%s'"), str);
 590       }
 591   if (k == -1)
 592     error (_("unrecognized attribute: `%s'"), str);
 593
 594   return attributes[k].code;
 595 }
 596
 597 int
 598 yywrap(void)
 599 {
 600   return 1;
 601 }
 602
 603 /* Dummy definition to suppress warnings about unused static definitions. */
 604 typedef void (*dummy_function) ();
 605 dummy_function ada_flex_use[] =
 606 {
 607   (dummy_function) yyunput
 608 };