4 * Babeltrace - Executable and Shared Object Debug Info Reader
6 * Copyright 2015 Antoine Busque <abusque@efficios.com>
8 * Author: Antoine Busque <abusque@efficios.com>
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39 #include <babeltrace/dwarf.h>
40 #include <babeltrace/so-info.h>
41 #include <babeltrace/crc32.h>
42 #include <babeltrace/babeltrace-internal.h>
43 #include <babeltrace/utils.h>
46 * An address printed in hex is at most 20 bytes (16 for 64-bits +
47 * leading 0x + optional leading '+' if addr is an offset + null
50 #define ADDR_STR_LEN 20
53 int so_info_init(void)
57 if (elf_version(EV_CURRENT
) == EV_NONE
) {
58 fprintf(stderr
, "ELF library initialization failed: %s\n",
67 struct so_info
*so_info_create(const char *path
, uint64_t low_addr
,
70 struct so_info
*so
= NULL
;
71 GElf_Ehdr
*ehdr
= NULL
;
77 so
= g_new0(struct so_info
, 1);
82 so
->elf_path
= strdup(path
);
87 so
->elf_fd
= open(path
, O_RDONLY
);
89 fprintf(stderr
, "Failed to open %s\n", path
);
93 so
->elf_file
= elf_begin(so
->elf_fd
, ELF_C_READ
, NULL
);
95 fprintf(stderr
, "elf_begin failed: %s\n", elf_errmsg(-1));
99 if (elf_kind(so
->elf_file
) != ELF_K_ELF
) {
100 fprintf(stderr
, "Error: %s is not an ELF object\n",
105 ehdr
= g_new0(GElf_Ehdr
, 1);
110 if (!gelf_getehdr(so
->elf_file
, ehdr
)) {
111 fprintf(stderr
, "Error: couldn't get ehdr for %s\n",
116 /* Position independent code has an e_type value of ET_DYN. */
117 so
->is_pic
= ehdr
->e_type
== ET_DYN
;
119 so
->low_addr
= low_addr
;
120 so
->high_addr
= so
->low_addr
+ so
->memsz
;
132 void so_info_destroy(struct so_info
*so
)
138 dwarf_end(so
->dwarf_info
);
141 free(so
->dwarf_path
);
143 free(so
->dbg_link_filename
);
145 elf_end(so
->elf_file
);
154 int so_info_set_build_id(struct so_info
*so
, uint8_t *build_id
,
157 if (!so
|| !build_id
) {
161 so
->build_id
= malloc(build_id_len
);
166 memcpy(so
->build_id
, build_id
, build_id_len
);
167 so
->build_id_len
= build_id_len
;
170 * Reset the is_elf_only flag in case it had been set
171 * previously, because we might find separate debug info using
172 * the new build id information.
174 so
->is_elf_only
= false;
184 int so_info_set_debug_link(struct so_info
*so
, char *filename
, uint32_t crc
)
186 if (!so
|| !filename
) {
190 so
->dbg_link_filename
= strdup(filename
);
191 if (!so
->dbg_link_filename
) {
195 so
->dbg_link_crc
= crc
;
198 * Reset the is_elf_only flag in case it had been set
199 * previously, because we might find separate debug info using
200 * the new build id information.
202 so
->is_elf_only
= false;
212 * Tries to read DWARF info from the location given by path, and
213 * attach it to the given so_info instance if it exists.
215 * @param so so_info instance for which to set DWARF info
216 * @param path Presumed location of the DWARF info
217 * @returns 0 on success, -1 on failure
220 int so_info_set_dwarf_info_from_path(struct so_info
*so
, char *path
)
222 int fd
= -1, ret
= 0;
223 struct bt_dwarf_cu
*cu
= NULL
;
224 Dwarf
*dwarf_info
= NULL
;
230 fd
= open(path
, O_RDONLY
);
235 dwarf_info
= dwarf_begin(fd
, DWARF_C_READ
);
241 * Check if the dwarf info has any CU. If not, the SO's object
242 * file contains no DWARF info.
244 cu
= bt_dwarf_cu_create(dwarf_info
);
249 ret
= bt_dwarf_cu_next(cu
);
255 so
->dwarf_path
= strdup(path
);
256 if (!so
->dwarf_path
) {
259 so
->dwarf_info
= dwarf_info
;
266 dwarf_end(dwarf_info
);
274 * Try to set the dwarf_info for a given so_info instance via the
277 * @param so so_info instance for which to retrieve the
278 * DWARF info via build ID
279 * @returns 0 on success (i.e. dwarf_info set), -1 on failure
282 int so_info_set_dwarf_info_build_id(struct so_info
*so
)
284 int i
= 0, ret
= 0, dbg_dir_trailing_slash
= 0;
285 char *path
= NULL
, *build_id_file
= NULL
;
286 const char *dbg_dir
= NULL
;
287 size_t build_id_file_len
, path_len
;
289 if (!so
|| !so
->build_id
) {
293 dbg_dir
= opt_debug_info_dir
? : DEFAULT_DEBUG_DIR
;
295 dbg_dir_trailing_slash
= dbg_dir
[strlen(dbg_dir
) - 1] == '/';
297 /* 2 characters per byte printed in hex, +2 for '/' and '\0' */
298 build_id_file_len
= (2 * so
->build_id_len
) + 2;
299 build_id_file
= malloc(build_id_file_len
);
300 if (!build_id_file
) {
304 snprintf(build_id_file
, 4, "%02x/", so
->build_id
[0]);
305 for (i
= 1; i
< so
->build_id_len
; ++i
) {
306 int path_idx
= 3 + 2 * (i
- 1);
308 snprintf(&build_id_file
[path_idx
], 3, "%02x", so
->build_id
[i
]);
311 path_len
= strlen(dbg_dir
) + strlen(BUILD_ID_SUBDIR
) +
312 strlen(build_id_file
) + strlen(BUILD_ID_SUFFIX
) + 1;
313 if (!dbg_dir_trailing_slash
) {
317 path
= malloc(path_len
);
322 strcpy(path
, dbg_dir
);
323 if (!dbg_dir_trailing_slash
) {
326 strcat(path
, BUILD_ID_SUBDIR
);
327 strcat(path
, build_id_file
);
328 strcat(path
, BUILD_ID_SUFFIX
);
330 ret
= so_info_set_dwarf_info_from_path(so
, path
);
347 * Tests whether the file located at path exists and has the expected
350 * This predicate is used when looking up separate debug info via the
351 * GNU debuglink method. The expected crc can be found .gnu_debuglink
352 * section in the original ELF file, along with the filename for the
353 * file containing the debug info.
355 * @param path Full path at which to look for the debug file
356 * @param crc Expected checksum for the debug file
357 * @returns 1 if the file exists and has the correct checksum,
361 int is_valid_debug_file(char *path
, uint32_t crc
)
363 int ret
= 0, fd
= -1;
370 fd
= open(path
, O_RDONLY
);
375 ret
= crc32(fd
, &_crc
);
389 * Try to set the dwarf_info for a given so_info instance via the
392 * @param so so_info instance for which to retrieve the
393 * DWARF info via debug link
394 * @returns 0 on success (i.e. dwarf_info set), -1 on failure
397 int so_info_set_dwarf_info_debug_link(struct so_info
*so
)
400 const char *dbg_dir
= NULL
;
401 char *dir_name
= NULL
, *so_dir
= NULL
, *path
= NULL
;
402 size_t max_path_len
= 0;
404 if (!so
|| !so
->dbg_link_filename
) {
408 dbg_dir
= opt_debug_info_dir
? : DEFAULT_DEBUG_DIR
;
410 dir_name
= dirname(so
->elf_path
);
415 /* so_dir is just dir_name with a trailing slash */
416 so_dir
= malloc(strlen(dir_name
) + 2);
421 strcpy(so_dir
, dir_name
);
424 max_path_len
= strlen(dbg_dir
) + strlen(so_dir
) +
425 strlen(DEBUG_SUBDIR
) + strlen(so
->dbg_link_filename
)
427 path
= malloc(max_path_len
);
432 /* First look in the SO's dir */
433 strcpy(path
, so_dir
);
434 strcat(path
, so
->dbg_link_filename
);
436 if (is_valid_debug_file(path
, so
->dbg_link_crc
)) {
440 /* If not found, look in .debug subdir */
441 strcpy(path
, so_dir
);
442 strcat(path
, DEBUG_SUBDIR
);
443 strcat(path
, so
->dbg_link_filename
);
445 if (is_valid_debug_file(path
, so
->dbg_link_crc
)) {
449 /* Lastly, look under the global debug directory */
450 strcpy(path
, dbg_dir
);
451 strcat(path
, so_dir
);
452 strcat(path
, so
->dbg_link_filename
);
454 if (is_valid_debug_file(path
, so
->dbg_link_crc
)) {
467 ret
= so_info_set_dwarf_info_from_path(so
, path
);
476 * Initialize the DWARF info for a given executable.
478 * @param so so_info instance
479 * @returns 0 on success, -1 on failure
482 int so_info_set_dwarf_info(struct so_info
*so
)
490 /* First try to set the DWARF info from the ELF file */
491 ret
= so_info_set_dwarf_info_from_path(so
, so
->elf_path
);
497 * If that fails, try to find separate debug info via build ID
500 ret
= so_info_set_dwarf_info_build_id(so
);
505 ret
= so_info_set_dwarf_info_debug_link(so
);
517 void source_location_destroy(struct source_location
*src_loc
)
523 free(src_loc
->filename
);
528 * Try to find the symbol closest to an address within a given ELF
531 * Only function symbols are taken into account. The symbol's address
532 * must precede `addr`. A symbol with a closer address might exist
533 * after `addr` but is irrelevant because it cannot encompass `addr`.
535 * On success, if found, the out parameters `sym` and `shdr` are
536 * set. On failure or if none are found, they remain unchanged.
538 * @param scn ELF section in which to look for the address
539 * @param addr Virtual memory address for which to find the
540 * nearest function symbol
541 * @param sym Out parameter, the nearest function symbol
542 * @param shdr Out parameter, the section header for scn
543 * @returns 0 on success, -1 on failure
546 int so_info_get_nearest_symbol_from_section(Elf_Scn
*scn
, uint64_t addr
,
547 GElf_Sym
**sym
, GElf_Shdr
**shdr
)
551 Elf_Data
*data
= NULL
;
552 GElf_Shdr
*_shdr
= NULL
;
553 GElf_Sym
*nearest_sym
= NULL
;
555 if (!scn
|| !sym
|| !shdr
) {
559 _shdr
= g_new0(GElf_Shdr
, 1);
564 _shdr
= gelf_getshdr(scn
, _shdr
);
569 if (_shdr
->sh_type
!= SHT_SYMTAB
) {
571 * We are only interested in symbol table (symtab)
572 * sections, skip this one.
577 data
= elf_getdata(scn
, NULL
);
582 symbol_count
= _shdr
->sh_size
/ _shdr
->sh_entsize
;
584 for (i
= 0; i
< symbol_count
; ++i
) {
585 GElf_Sym
*cur_sym
= NULL
;
587 cur_sym
= g_new0(GElf_Sym
, 1);
591 cur_sym
= gelf_getsym(data
, i
, cur_sym
);
595 if (GELF_ST_TYPE(cur_sym
->st_info
) != STT_FUNC
) {
596 /* We're only interested in the functions. */
601 if (cur_sym
->st_value
<= addr
&&
603 cur_sym
->st_value
> nearest_sym
->st_value
)) {
605 nearest_sym
= cur_sym
;
628 * Get the name of the function containing a given address within an
629 * executable using ELF symbols.
631 * The function name is in fact the name of the nearest ELF symbol,
632 * followed by the offset in bytes between the address and the symbol
633 * (in hex), separated by a '+' character.
635 * If found, the out parameter `func_name` is set on success. On failure,
636 * it remains unchanged.
638 * @param so so_info instance for the executable containing
640 * @param addr Virtual memory address for which to find the
642 * @param func_name Out parameter, the function name
643 * @returns 0 on success, -1 on failure
646 int so_info_lookup_elf_function_name(struct so_info
*so
, uint64_t addr
,
650 * TODO (possible optimisation): if an ELF has no symtab
651 * section, it has been stripped. Therefore, it would be wise
652 * to store a flag indicating the stripped status after the
653 * first iteration to prevent subsequent ones.
657 GElf_Sym
*sym
= NULL
;
658 GElf_Shdr
*shdr
= NULL
;
659 char *sym_name
= NULL
;
660 char *_func_name
= NULL
;
661 char offset_str
[ADDR_STR_LEN
];
663 scn
= elf_nextscn(so
->elf_file
, scn
);
668 while (scn
&& !sym
) {
669 ret
= so_info_get_nearest_symbol_from_section(
670 scn
, addr
, &sym
, &shdr
);
675 scn
= elf_nextscn(so
->elf_file
, scn
);
679 sym_name
= elf_strptr(so
->elf_file
, shdr
->sh_link
,
685 snprintf(offset_str
, ADDR_STR_LEN
, "+%#0" PRIx64
,
686 addr
- sym
->st_value
);
687 _func_name
= malloc(strlen(sym_name
) + ADDR_STR_LEN
);
692 strcpy(_func_name
, sym_name
);
693 strcat(_func_name
, offset_str
);
694 *func_name
= _func_name
;
709 * Get the name of the function containing a given address within a
710 * given compile unit (CU).
712 * If found, the out parameter `func_name` is set on success. On
713 * failure, it remains unchanged.
715 * @param cu bt_dwarf_cu instance which may contain the address
716 * @param addr Virtual memory address for which to find the
718 * @param func_name Out parameter, the function name
719 * @returns 0 on success, -1 on failure
722 int so_info_lookup_cu_function_name(struct bt_dwarf_cu
*cu
, uint64_t addr
,
725 int ret
= 0, found
= 0;
726 char *_func_name
= NULL
;
727 struct bt_dwarf_die
*die
= NULL
;
729 if (!cu
|| !func_name
) {
733 die
= bt_dwarf_die_create(cu
);
738 while (bt_dwarf_die_next(die
) == 0) {
741 ret
= bt_dwarf_die_get_tag(die
, &tag
);
746 if (tag
== DW_TAG_subprogram
) {
747 ret
= bt_dwarf_die_contains_addr(die
, addr
, &found
);
759 ret
= bt_dwarf_die_get_name(die
, &_func_name
);
764 *func_name
= _func_name
;
767 bt_dwarf_die_destroy(die
);
771 bt_dwarf_die_destroy(die
);
776 * Get the name of the function containing a given address within an
777 * executable using DWARF debug info.
779 * If found, the out parameter `func_name` is set on success. On
780 * failure, it remains unchanged.
782 * @param so so_info instance for the executable containing
784 * @param addr Virtual memory address for which to find the
786 * @param func_name Out parameter, the function name
787 * @returns 0 on success, -1 on failure
790 int so_info_lookup_dwarf_function_name(struct so_info
*so
, uint64_t addr
,
794 char *_func_name
= NULL
;
795 struct bt_dwarf_cu
*cu
= NULL
;
797 if (!so
|| !func_name
) {
801 cu
= bt_dwarf_cu_create(so
->dwarf_info
);
806 while (bt_dwarf_cu_next(cu
) == 0) {
807 ret
= so_info_lookup_cu_function_name(cu
, addr
, &_func_name
);
818 *func_name
= _func_name
;
821 bt_dwarf_cu_destroy(cu
);
825 bt_dwarf_cu_destroy(cu
);
830 int so_info_lookup_function_name(struct so_info
*so
, uint64_t ip
,
834 char *_func_name
= NULL
;
835 uint64_t relative_addr
;
837 if (!so
|| !func_name
) {
841 /* Set DWARF info if it hasn't been accessed yet. */
842 if (!so
->dwarf_info
&& !so
->is_elf_only
) {
843 ret
= so_info_set_dwarf_info(so
);
845 /* Failed to set DWARF info, fallback to ELF. */
846 so
->is_elf_only
= true;
850 if (!so_info_has_address(so
, ip
)) {
854 relative_addr
= ip
- so
->low_addr
;
856 * Addresses in ELF and DWARF are relative to base address for
857 * PIC, so make the address argument relative too if needed.
859 if (so
->is_elf_only
) {
860 ret
= so_info_lookup_elf_function_name(so
,
861 so
->is_pic
? relative_addr
: ip
,
864 ret
= so_info_lookup_dwarf_function_name(so
,
865 so
->is_pic
? relative_addr
: ip
,
875 * Can't map to a function; fallback to a generic output of the
876 * form binary+/@address.
878 * FIXME check position independence flag.
880 const char *binary_name
= get_filename_from_path(so
->elf_path
);
882 ret
= asprintf(&_func_name
, "%s+%#0" PRIx64
, binary_name
,
889 *func_name
= _func_name
;
897 * Predicate used to determine whether the children of a given DIE
898 * contain a specific address.
900 * More specifically, the parameter `die` is expected to be a
901 * subprogram (function) DIE, and this predicate tells whether any
902 * subroutines are inlined within this function and would contain
905 * Do note that this function advances the position of `die`. If the
906 * address is found within one of its children, `die` will be pointing
907 * to that child upon returning from the function, allowing to extract
908 * the information deemed necessary.
910 * @param die The parent DIE in whose children the address will be
912 * @param addr The address for which to look for in the DIEs
913 * @returns Returns 1 if the address was found, 0 if not
916 int so_info_child_die_has_address(struct bt_dwarf_die
*die
, uint64_t addr
)
918 int ret
= 0, contains
= 0;
924 ret
= bt_dwarf_die_child(die
);
932 ret
= bt_dwarf_die_get_tag(die
, &tag
);
937 if (tag
== DW_TAG_inlined_subroutine
) {
938 ret
= bt_dwarf_die_contains_addr(die
, addr
, &contains
);
948 } while (bt_dwarf_die_next(die
) == 0);
959 * Lookup the source location for a given address within a CU, making
960 * the assumption that it is contained within an inline routine in a
963 * @param cu bt_dwarf_cu instance in which to look for the address
964 * @param addr The address for which to look for
965 * @param src_loc Out parameter, the source location (filename and
966 * line number) for the address
967 * @returns 0 on success, -1 on failure
970 int so_info_lookup_cu_src_loc_inl(struct bt_dwarf_cu
*cu
, uint64_t addr
,
971 struct source_location
**src_loc
)
973 int ret
= 0, found
= 0;
974 struct bt_dwarf_die
*die
= NULL
;
975 struct source_location
*_src_loc
= NULL
;
977 if (!cu
|| !src_loc
) {
981 die
= bt_dwarf_die_create(cu
);
986 while (bt_dwarf_die_next(die
) == 0) {
989 ret
= bt_dwarf_die_get_tag(die
, &tag
);
994 if (tag
== DW_TAG_subprogram
) {
997 ret
= bt_dwarf_die_contains_addr(die
, addr
, &contains
);
1004 * Try to find an inlined subroutine
1005 * child of this DIE containing addr.
1007 found
= so_info_child_die_has_address(
1016 char *filename
= NULL
;
1019 _src_loc
= g_new0(struct source_location
, 1);
1024 ret
= bt_dwarf_die_get_call_file(die
, &filename
);
1028 ret
= bt_dwarf_die_get_call_line(die
, &line_no
);
1034 _src_loc
->filename
= filename
;
1035 _src_loc
->line_no
= line_no
;
1036 *src_loc
= _src_loc
;
1039 bt_dwarf_die_destroy(die
);
1043 source_location_destroy(_src_loc
);
1044 bt_dwarf_die_destroy(die
);
1049 * Lookup the source location for a given address within a CU,
1050 * assuming that it is contained within an inlined function.
1052 * A source location can be found regardless of inlining status for
1053 * this method, but in the case of an inlined function, the returned
1054 * source location will point not to the callsite but rather to the
1055 * definition site of the inline function.
1057 * @param cu bt_dwarf_cu instance in which to look for the address
1058 * @param addr The address for which to look for
1059 * @param src_loc Out parameter, the source location (filename and
1060 * line number) for the address
1061 * @returns 0 on success, -1 on failure
1064 int so_info_lookup_cu_src_loc_no_inl(struct bt_dwarf_cu
*cu
, uint64_t addr
,
1065 struct source_location
**src_loc
)
1067 struct source_location
*_src_loc
= NULL
;
1068 struct bt_dwarf_die
*die
= NULL
;
1069 const char *filename
= NULL
;
1070 Dwarf_Line
*line
= NULL
;
1071 Dwarf_Addr line_addr
;
1074 if (!cu
|| !src_loc
) {
1078 die
= bt_dwarf_die_create(cu
);
1083 line
= dwarf_getsrc_die(die
->dwarf_die
, addr
);
1088 ret
= dwarf_lineaddr(line
, &line_addr
);
1093 filename
= dwarf_linesrc(line
, NULL
, NULL
);
1098 if (addr
== line_addr
) {
1099 _src_loc
= g_new0(struct source_location
, 1);
1104 ret
= dwarf_lineno(line
, &line_no
);
1109 _src_loc
->line_no
= line_no
;
1110 _src_loc
->filename
= strdup(filename
);
1113 bt_dwarf_die_destroy(die
);
1116 *src_loc
= _src_loc
;
1122 source_location_destroy(_src_loc
);
1123 bt_dwarf_die_destroy(die
);
1128 * Get the source location (file name and line number) for a given
1129 * address within a compile unit (CU).
1131 * On success, the out parameter `src_loc` is set if found. On
1132 * failure, it remains unchanged.
1134 * @param so bt_dwarf_cu instance for the compile unit which
1135 * may contain the address
1136 * @param addr Virtual memory address for which to find the
1138 * @param src_loc Out parameter, the source location
1139 * @returns 0 on success, -1 on failure
1142 int so_info_lookup_cu_src_loc(struct bt_dwarf_cu
*cu
, uint64_t addr
,
1143 struct source_location
**src_loc
)
1146 struct source_location
*_src_loc
= NULL
;
1148 if (!cu
|| !src_loc
) {
1152 ret
= so_info_lookup_cu_src_loc_inl(cu
, addr
, &_src_loc
);
1161 ret
= so_info_lookup_cu_src_loc_no_inl(cu
, addr
, &_src_loc
);
1172 *src_loc
= _src_loc
;
1178 source_location_destroy(_src_loc
);
1183 int so_info_lookup_source_location(struct so_info
*so
, uint64_t addr
,
1184 struct source_location
**src_loc
)
1186 struct bt_dwarf_cu
*cu
= NULL
;
1187 struct source_location
*_src_loc
= NULL
;
1189 if (!so
|| !src_loc
) {
1193 /* Set DWARF info if it hasn't been accessed yet. */
1194 if (!so
->dwarf_info
&& !so
->is_elf_only
) {
1195 if (so_info_set_dwarf_info(so
)) {
1196 /* Failed to set DWARF info. */
1197 so
->is_elf_only
= true;
1201 if (so
->is_elf_only
) {
1202 /* We cannot lookup source location without DWARF info. */
1206 if (!so_info_has_address(so
, addr
)) {
1211 * Addresses in ELF and DWARF are relative to base address for
1212 * PIC, so make the address argument relative too if needed.
1215 addr
-= so
->low_addr
;
1218 cu
= bt_dwarf_cu_create(so
->dwarf_info
);
1223 while (bt_dwarf_cu_next(cu
) == 0) {
1226 ret
= so_info_lookup_cu_src_loc(cu
, addr
, &_src_loc
);
1236 bt_dwarf_cu_destroy(cu
);
1238 *src_loc
= _src_loc
;
1244 source_location_destroy(_src_loc
);
1245 bt_dwarf_cu_destroy(cu
);