| 1 | /* Symbol, variable and name lookup. |
| 2 | Copyright (C) 2019 Free Software Foundation, Inc. |
| 3 | |
| 4 | This file is part of libctf. |
| 5 | |
| 6 | libctf is free software; you can redistribute it and/or modify it under |
| 7 | the terms of the GNU General Public License as published by the Free |
| 8 | Software Foundation; either version 3, or (at your option) any later |
| 9 | version. |
| 10 | |
| 11 | This program is distributed in the hope that it will be useful, but |
| 12 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
| 14 | See the GNU General Public License for more details. |
| 15 | |
| 16 | You should have received a copy of the GNU General Public License |
| 17 | along with this program; see the file COPYING. If not see |
| 18 | <http://www.gnu.org/licenses/>. */ |
| 19 | |
| 20 | #include <ctf-impl.h> |
| 21 | #include <elf.h> |
| 22 | #include <string.h> |
| 23 | |
| 24 | /* Compare the given input string and length against a table of known C storage |
| 25 | qualifier keywords. We just ignore these in ctf_lookup_by_name, below. To |
| 26 | do this quickly, we use a pre-computed Perfect Hash Function similar to the |
| 27 | technique originally described in the classic paper: |
| 28 | |
| 29 | R.J. Cichelli, "Minimal Perfect Hash Functions Made Simple", |
| 30 | Communications of the ACM, Volume 23, Issue 1, January 1980, pp. 17-19. |
| 31 | |
| 32 | For an input string S of length N, we use hash H = S[N - 1] + N - 105, which |
| 33 | for the current set of qualifiers yields a unique H in the range [0 .. 20]. |
| 34 | The hash can be modified when the keyword set changes as necessary. We also |
| 35 | store the length of each keyword and check it prior to the final strcmp(). |
| 36 | |
| 37 | TODO: just use gperf. */ |
| 38 | |
| 39 | static int |
| 40 | isqualifier (const char *s, size_t len) |
| 41 | { |
| 42 | static const struct qual |
| 43 | { |
| 44 | const char *q_name; |
| 45 | size_t q_len; |
| 46 | } qhash[] = { |
| 47 | {"static", 6}, {"", 0}, {"", 0}, {"", 0}, |
| 48 | {"volatile", 8}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, |
| 49 | {"", 0}, {"auto", 4}, {"extern", 6}, {"", 0}, {"", 0}, |
| 50 | {"", 0}, {"", 0}, {"const", 5}, {"register", 8}, |
| 51 | {"", 0}, {"restrict", 8}, {"_Restrict", 9} |
| 52 | }; |
| 53 | |
| 54 | int h = s[len - 1] + (int) len - 105; |
| 55 | const struct qual *qp = &qhash[h]; |
| 56 | |
| 57 | return (h >= 0 && (size_t) h < sizeof (qhash) / sizeof (qhash[0]) |
| 58 | && (size_t) len == qp->q_len && |
| 59 | strncmp (qp->q_name, s, qp->q_len) == 0); |
| 60 | } |
| 61 | |
| 62 | /* Attempt to convert the given C type name into the corresponding CTF type ID. |
| 63 | It is not possible to do complete and proper conversion of type names |
| 64 | without implementing a more full-fledged parser, which is necessary to |
| 65 | handle things like types that are function pointers to functions that |
| 66 | have arguments that are function pointers, and fun stuff like that. |
| 67 | Instead, this function implements a very simple conversion algorithm that |
| 68 | finds the things that we actually care about: structs, unions, enums, |
| 69 | integers, floats, typedefs, and pointers to any of these named types. */ |
| 70 | |
| 71 | ctf_id_t |
| 72 | ctf_lookup_by_name (ctf_file_t *fp, const char *name) |
| 73 | { |
| 74 | static const char delimiters[] = " \t\n\r\v\f*"; |
| 75 | |
| 76 | const ctf_lookup_t *lp; |
| 77 | const char *p, *q, *end; |
| 78 | ctf_id_t type = 0; |
| 79 | ctf_id_t ntype, ptype; |
| 80 | |
| 81 | if (name == NULL) |
| 82 | return (ctf_set_errno (fp, EINVAL)); |
| 83 | |
| 84 | for (p = name, end = name + strlen (name); *p != '\0'; p = q) |
| 85 | { |
| 86 | while (isspace (*p)) |
| 87 | p++; /* Skip leading whitespace. */ |
| 88 | |
| 89 | if (p == end) |
| 90 | break; |
| 91 | |
| 92 | if ((q = strpbrk (p + 1, delimiters)) == NULL) |
| 93 | q = end; /* Compare until end. */ |
| 94 | |
| 95 | if (*p == '*') |
| 96 | { |
| 97 | /* Find a pointer to type by looking in fp->ctf_ptrtab. |
| 98 | If we can't find a pointer to the given type, see if |
| 99 | we can compute a pointer to the type resulting from |
| 100 | resolving the type down to its base type and use |
| 101 | that instead. This helps with cases where the CTF |
| 102 | data includes "struct foo *" but not "foo_t *" and |
| 103 | the user tries to access "foo_t *" in the debugger. |
| 104 | |
| 105 | TODO need to handle parent containers too. */ |
| 106 | |
| 107 | ntype = fp->ctf_ptrtab[LCTF_TYPE_TO_INDEX (fp, type)]; |
| 108 | if (ntype == 0) |
| 109 | { |
| 110 | ntype = ctf_type_resolve_unsliced (fp, type); |
| 111 | if (ntype == CTF_ERR |
| 112 | || (ntype = |
| 113 | fp->ctf_ptrtab[LCTF_TYPE_TO_INDEX (fp, ntype)]) == 0) |
| 114 | { |
| 115 | (void) ctf_set_errno (fp, ECTF_NOTYPE); |
| 116 | goto err; |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | type = LCTF_INDEX_TO_TYPE (fp, ntype, (fp->ctf_flags & LCTF_CHILD)); |
| 121 | |
| 122 | q = p + 1; |
| 123 | continue; |
| 124 | } |
| 125 | |
| 126 | if (isqualifier (p, (size_t) (q - p))) |
| 127 | continue; /* Skip qualifier keyword. */ |
| 128 | |
| 129 | for (lp = fp->ctf_lookups; lp->ctl_prefix != NULL; lp++) |
| 130 | { |
| 131 | /* TODO: This is not MT-safe. */ |
| 132 | if ((lp->ctl_prefix[0] == '\0' || |
| 133 | strncmp (p, lp->ctl_prefix, (size_t) (q - p)) == 0) && |
| 134 | (size_t) (q - p) >= lp->ctl_len) |
| 135 | { |
| 136 | for (p += lp->ctl_len; isspace (*p); p++) |
| 137 | continue; /* Skip prefix and next whitespace. */ |
| 138 | |
| 139 | if ((q = strchr (p, '*')) == NULL) |
| 140 | q = end; /* Compare until end. */ |
| 141 | |
| 142 | while (isspace (q[-1])) |
| 143 | q--; /* Exclude trailing whitespace. */ |
| 144 | |
| 145 | /* Expand and/or allocate storage for a slice of the name, then |
| 146 | copy it in. */ |
| 147 | |
| 148 | if (fp->ctf_tmp_typeslicelen >= (size_t) (q - p) + 1) |
| 149 | { |
| 150 | memcpy (fp->ctf_tmp_typeslice, p, (size_t) (q - p)); |
| 151 | fp->ctf_tmp_typeslice[(size_t) (q - p)] = '\0'; |
| 152 | } |
| 153 | else |
| 154 | { |
| 155 | free (fp->ctf_tmp_typeslice); |
| 156 | fp->ctf_tmp_typeslice = xstrndup (p, (size_t) (q - p)); |
| 157 | if (fp->ctf_tmp_typeslice == NULL) |
| 158 | { |
| 159 | (void) ctf_set_errno (fp, ENOMEM); |
| 160 | return CTF_ERR; |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | if ((type = ctf_hash_lookup_type (lp->ctl_hash, fp, |
| 165 | fp->ctf_tmp_typeslice)) == 0) |
| 166 | { |
| 167 | (void) ctf_set_errno (fp, ECTF_NOTYPE); |
| 168 | goto err; |
| 169 | } |
| 170 | |
| 171 | break; |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | if (lp->ctl_prefix == NULL) |
| 176 | { |
| 177 | (void) ctf_set_errno (fp, ECTF_NOTYPE); |
| 178 | goto err; |
| 179 | } |
| 180 | } |
| 181 | |
| 182 | if (*p != '\0' || type == 0) |
| 183 | return (ctf_set_errno (fp, ECTF_SYNTAX)); |
| 184 | |
| 185 | return type; |
| 186 | |
| 187 | err: |
| 188 | if (fp->ctf_parent != NULL |
| 189 | && (ptype = ctf_lookup_by_name (fp->ctf_parent, name)) != CTF_ERR) |
| 190 | return ptype; |
| 191 | |
| 192 | return CTF_ERR; |
| 193 | } |
| 194 | |
| 195 | typedef struct ctf_lookup_var_key |
| 196 | { |
| 197 | ctf_file_t *clvk_fp; |
| 198 | const char *clvk_name; |
| 199 | } ctf_lookup_var_key_t; |
| 200 | |
| 201 | /* A bsearch function for variable names. */ |
| 202 | |
| 203 | static int |
| 204 | ctf_lookup_var (const void *key_, const void *memb_) |
| 205 | { |
| 206 | const ctf_lookup_var_key_t *key = key_; |
| 207 | const ctf_varent_t *memb = memb_; |
| 208 | |
| 209 | return (strcmp (key->clvk_name, ctf_strptr (key->clvk_fp, memb->ctv_name))); |
| 210 | } |
| 211 | |
| 212 | /* Given a variable name, return the type of the variable with that name. */ |
| 213 | |
| 214 | ctf_id_t |
| 215 | ctf_lookup_variable (ctf_file_t *fp, const char *name) |
| 216 | { |
| 217 | ctf_varent_t *ent; |
| 218 | ctf_lookup_var_key_t key = { fp, name }; |
| 219 | |
| 220 | /* This array is sorted, so we can bsearch for it. */ |
| 221 | |
| 222 | ent = bsearch (&key, fp->ctf_vars, fp->ctf_nvars, sizeof (ctf_varent_t), |
| 223 | ctf_lookup_var); |
| 224 | |
| 225 | if (ent == NULL) |
| 226 | { |
| 227 | if (fp->ctf_parent != NULL) |
| 228 | return ctf_lookup_variable (fp->ctf_parent, name); |
| 229 | |
| 230 | return (ctf_set_errno (fp, ECTF_NOTYPEDAT)); |
| 231 | } |
| 232 | |
| 233 | return ent->ctv_type; |
| 234 | } |
| 235 | |
| 236 | /* Given a symbol table index, return the name of that symbol from the secondary |
| 237 | string table, or the null string (never NULL). */ |
| 238 | const char * |
| 239 | ctf_lookup_symbol_name (ctf_file_t *fp, unsigned long symidx) |
| 240 | { |
| 241 | const ctf_sect_t *sp = &fp->ctf_symtab; |
| 242 | Elf64_Sym sym, *gsp; |
| 243 | |
| 244 | if (sp->cts_data == NULL) |
| 245 | { |
| 246 | ctf_set_errno (fp, ECTF_NOSYMTAB); |
| 247 | return _CTF_NULLSTR; |
| 248 | } |
| 249 | |
| 250 | if (symidx >= fp->ctf_nsyms) |
| 251 | { |
| 252 | ctf_set_errno (fp, EINVAL); |
| 253 | return _CTF_NULLSTR; |
| 254 | } |
| 255 | |
| 256 | if (sp->cts_entsize == sizeof (Elf32_Sym)) |
| 257 | { |
| 258 | const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx; |
| 259 | gsp = ctf_sym_to_elf64 (symp, &sym); |
| 260 | } |
| 261 | else |
| 262 | gsp = (Elf64_Sym *) sp->cts_data + symidx; |
| 263 | |
| 264 | if (gsp->st_name < fp->ctf_str[CTF_STRTAB_1].cts_len) |
| 265 | return (const char *) fp->ctf_str[CTF_STRTAB_1].cts_strs + gsp->st_name; |
| 266 | |
| 267 | return _CTF_NULLSTR; |
| 268 | } |
| 269 | |
| 270 | /* Given a symbol table index, return the type of the data object described |
| 271 | by the corresponding entry in the symbol table. */ |
| 272 | |
| 273 | ctf_id_t |
| 274 | ctf_lookup_by_symbol (ctf_file_t *fp, unsigned long symidx) |
| 275 | { |
| 276 | const ctf_sect_t *sp = &fp->ctf_symtab; |
| 277 | ctf_id_t type; |
| 278 | |
| 279 | if (sp->cts_data == NULL) |
| 280 | return (ctf_set_errno (fp, ECTF_NOSYMTAB)); |
| 281 | |
| 282 | if (symidx >= fp->ctf_nsyms) |
| 283 | return (ctf_set_errno (fp, EINVAL)); |
| 284 | |
| 285 | if (sp->cts_entsize == sizeof (Elf32_Sym)) |
| 286 | { |
| 287 | const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx; |
| 288 | if (ELF32_ST_TYPE (symp->st_info) != STT_OBJECT) |
| 289 | return (ctf_set_errno (fp, ECTF_NOTDATA)); |
| 290 | } |
| 291 | else |
| 292 | { |
| 293 | const Elf64_Sym *symp = (Elf64_Sym *) sp->cts_data + symidx; |
| 294 | if (ELF64_ST_TYPE (symp->st_info) != STT_OBJECT) |
| 295 | return (ctf_set_errno (fp, ECTF_NOTDATA)); |
| 296 | } |
| 297 | |
| 298 | if (fp->ctf_sxlate[symidx] == -1u) |
| 299 | return (ctf_set_errno (fp, ECTF_NOTYPEDAT)); |
| 300 | |
| 301 | type = *(uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]); |
| 302 | if (type == 0) |
| 303 | return (ctf_set_errno (fp, ECTF_NOTYPEDAT)); |
| 304 | |
| 305 | return type; |
| 306 | } |
| 307 | |
| 308 | /* Return the pointer to the internal CTF type data corresponding to the |
| 309 | given type ID. If the ID is invalid, the function returns NULL. |
| 310 | This function is not exported outside of the library. */ |
| 311 | |
| 312 | const ctf_type_t * |
| 313 | ctf_lookup_by_id (ctf_file_t **fpp, ctf_id_t type) |
| 314 | { |
| 315 | ctf_file_t *fp = *fpp; /* Caller passes in starting CTF container. */ |
| 316 | ctf_id_t idx; |
| 317 | |
| 318 | if ((fp->ctf_flags & LCTF_CHILD) && LCTF_TYPE_ISPARENT (fp, type) |
| 319 | && (fp = fp->ctf_parent) == NULL) |
| 320 | { |
| 321 | (void) ctf_set_errno (*fpp, ECTF_NOPARENT); |
| 322 | return NULL; |
| 323 | } |
| 324 | |
| 325 | idx = LCTF_TYPE_TO_INDEX (fp, type); |
| 326 | if (idx > 0 && (unsigned long) idx <= fp->ctf_typemax) |
| 327 | { |
| 328 | *fpp = fp; /* Function returns ending CTF container. */ |
| 329 | return (LCTF_INDEX_TO_TYPEPTR (fp, idx)); |
| 330 | } |
| 331 | |
| 332 | /* If this container is writable, check for a dynamic type. */ |
| 333 | |
| 334 | if (fp->ctf_flags & LCTF_RDWR) |
| 335 | { |
| 336 | ctf_dtdef_t *dtd; |
| 337 | |
| 338 | if ((dtd = ctf_dynamic_type (fp, type)) != NULL) |
| 339 | { |
| 340 | *fpp = fp; |
| 341 | return &dtd->dtd_data; |
| 342 | } |
| 343 | } |
| 344 | (void) ctf_set_errno (*fpp, ECTF_BADID); |
| 345 | return NULL; |
| 346 | } |
| 347 | |
| 348 | /* Given a symbol table index, return the info for the function described |
| 349 | by the corresponding entry in the symbol table. */ |
| 350 | |
| 351 | int |
| 352 | ctf_func_info (ctf_file_t *fp, unsigned long symidx, ctf_funcinfo_t *fip) |
| 353 | { |
| 354 | const ctf_sect_t *sp = &fp->ctf_symtab; |
| 355 | const uint32_t *dp; |
| 356 | uint32_t info, kind, n; |
| 357 | |
| 358 | if (sp->cts_data == NULL) |
| 359 | return (ctf_set_errno (fp, ECTF_NOSYMTAB)); |
| 360 | |
| 361 | if (symidx >= fp->ctf_nsyms) |
| 362 | return (ctf_set_errno (fp, EINVAL)); |
| 363 | |
| 364 | if (sp->cts_entsize == sizeof (Elf32_Sym)) |
| 365 | { |
| 366 | const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx; |
| 367 | if (ELF32_ST_TYPE (symp->st_info) != STT_FUNC) |
| 368 | return (ctf_set_errno (fp, ECTF_NOTFUNC)); |
| 369 | } |
| 370 | else |
| 371 | { |
| 372 | const Elf64_Sym *symp = (Elf64_Sym *) sp->cts_data + symidx; |
| 373 | if (ELF64_ST_TYPE (symp->st_info) != STT_FUNC) |
| 374 | return (ctf_set_errno (fp, ECTF_NOTFUNC)); |
| 375 | } |
| 376 | |
| 377 | if (fp->ctf_sxlate[symidx] == -1u) |
| 378 | return (ctf_set_errno (fp, ECTF_NOFUNCDAT)); |
| 379 | |
| 380 | dp = (uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]); |
| 381 | |
| 382 | info = *dp++; |
| 383 | kind = LCTF_INFO_KIND (fp, info); |
| 384 | n = LCTF_INFO_VLEN (fp, info); |
| 385 | |
| 386 | if (kind == CTF_K_UNKNOWN && n == 0) |
| 387 | return (ctf_set_errno (fp, ECTF_NOFUNCDAT)); |
| 388 | |
| 389 | if (kind != CTF_K_FUNCTION) |
| 390 | return (ctf_set_errno (fp, ECTF_CORRUPT)); |
| 391 | |
| 392 | fip->ctc_return = *dp++; |
| 393 | fip->ctc_argc = n; |
| 394 | fip->ctc_flags = 0; |
| 395 | |
| 396 | if (n != 0 && dp[n - 1] == 0) |
| 397 | { |
| 398 | fip->ctc_flags |= CTF_FUNC_VARARG; |
| 399 | fip->ctc_argc--; |
| 400 | } |
| 401 | |
| 402 | return 0; |
| 403 | } |
| 404 | |
| 405 | /* Given a symbol table index, return the arguments for the function described |
| 406 | by the corresponding entry in the symbol table. */ |
| 407 | |
| 408 | int |
| 409 | ctf_func_args (ctf_file_t * fp, unsigned long symidx, uint32_t argc, |
| 410 | ctf_id_t * argv) |
| 411 | { |
| 412 | const uint32_t *dp; |
| 413 | ctf_funcinfo_t f; |
| 414 | |
| 415 | if (ctf_func_info (fp, symidx, &f) < 0) |
| 416 | return -1; /* errno is set for us. */ |
| 417 | |
| 418 | /* The argument data is two uint32_t's past the translation table |
| 419 | offset: one for the function info, and one for the return type. */ |
| 420 | |
| 421 | dp = (uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]) + 2; |
| 422 | |
| 423 | for (argc = MIN (argc, f.ctc_argc); argc != 0; argc--) |
| 424 | *argv++ = *dp++; |
| 425 | |
| 426 | return 0; |
| 427 | } |