From bbf2f4dfaec5cf2e21b0935300b4921f0b5a8eb7 Mon Sep 17 00:00:00 2001 From: Pedro Alves Date: Mon, 17 Jul 2017 11:28:33 +0100 Subject: [PATCH] Fix TAB-completion + .gdb_index slowness (generalize filename_seen_cache) Tab completion when debugging a program binary that uses GDB index is surprisingly much slower than when GDB uses psymtabs instead. Around 1.5x/3x slower. That's surprising, because the whole point of GDB index is to speed things up... For example, with: set pagination off set $count = 0 while $count < 400 complete b string_prin # matches gdb's string_printf printf "count = %d\n", $count set $count = $count + 1 end $ time ./gdb --batch -q ./gdb-with-index -ex "source script.cmd" real 0m11.042s user 0m10.920s sys 0m0.042s $ time ./gdb --batch -q ./gdb-without-index -ex "source script.cmd" real 0m4.635s user 0m4.590s sys 0m0.037s Same but with: - complete b string_prin + complete b zzzzzz to exercise the no-matches worst case, master currently gets you something like: with index without index real 0m11.971s 0m8.413s user 0m11.912s 0m8.355s sys 0m0.035s 0m0.035s Running gdb under perf shows 80% spent inside maybe_add_partial_symtab_filename, and 20% spent in the lbasename inside that. The problem that tab completion walks over all compunit symtabs, and for each, walks the contained file symtabs. And there a huge number of file symtabs (each included system header, etc.) that appear in each compunit symtab's file symtab list. As in, when debugging GDB, I have 367381 symtabs iterated, when of those only 5371 filenames are unique... This was a regression from the earlier (nice) split of symtabs in compunit symtabs + file symtabs. The fix here is to add a cache of unique filenames per objfile so that the walk / uniquing is only done once. There's already a abstraction for this in symtab.c; this patch moves that code out to a separate file and C++ifies it bit. This makes the worst-case scenario above consistently drop to ~2.5s (1.5s for the "string_prin" hit case), making it over 3.3x times faster than psymtabs in this use case (7x in the "string_prin" hit case). gdb/ChangeLog: 2017-07-17 Pedro Alves * Makefile.in (COMMON_OBS): Add filename-seen-cache.o. * dwarf2read.c: Include "filename-seen-cache.h". * dwarf2read.c (dwarf2_per_objfile) : New field. (dw2_map_symbol_filenames): Build and use a filenames_seen_cache. * filename-seen-cache.c: New file. * filename-seen-cache.h: New file. * symtab.c: Include "filename-seen-cache.h". (struct filename_seen_cache, INITIAL_FILENAME_SEEN_CACHE_SIZE) (create_filename_seen_cache, clear_filename_seen_cache) (delete_filename_seen_cache, filename_seen): Delete, parts moved to filename-seen-cache.h/filename-seen-cache.c. (output_source_filename, sources_info) (maybe_add_partial_symtab_filename) (make_source_files_completion_list): Adjust to use filename_seen_cache. --- gdb/ChangeLog | 18 +++++++ gdb/Makefile.in | 1 + gdb/dwarf2read.c | 105 ++++++++++++++++++++++---------------- gdb/filename-seen-cache.c | 66 ++++++++++++++++++++++++ gdb/filename-seen-cache.h | 63 +++++++++++++++++++++++ gdb/symtab.c | 100 +++++------------------------------- 6 files changed, 221 insertions(+), 132 deletions(-) create mode 100644 gdb/filename-seen-cache.c create mode 100644 gdb/filename-seen-cache.h diff --git a/gdb/ChangeLog b/gdb/ChangeLog index 29504945a7..8b37cda6b4 100644 --- a/gdb/ChangeLog +++ b/gdb/ChangeLog @@ -1,3 +1,21 @@ +2017-07-17 Pedro Alves + + * Makefile.in (COMMON_OBS): Add filename-seen-cache.o. + * dwarf2read.c: Include "filename-seen-cache.h". + * dwarf2read.c (dwarf2_per_objfile) : New field. + (dw2_map_symbol_filenames): Build and use a filenames_seen_cache. + * filename-seen-cache.c: New file. + * filename-seen-cache.h: New file. + * symtab.c: Include "filename-seen-cache.h". + (struct filename_seen_cache, INITIAL_FILENAME_SEEN_CACHE_SIZE) + (create_filename_seen_cache, clear_filename_seen_cache) + (delete_filename_seen_cache, filename_seen): Delete, parts moved + to filename-seen-cache.h/filename-seen-cache.c. + (output_source_filename, sources_info) + (maybe_add_partial_symtab_filename) + (make_source_files_completion_list): Adjust to use + filename_seen_cache. + 2017-07-17 Pedro Alves * dwarf2read.c (dwarf2_per_objfile): In-class initialize all diff --git a/gdb/Makefile.in b/gdb/Makefile.in index b27f6982d4..c6e618a541 100644 --- a/gdb/Makefile.in +++ b/gdb/Makefile.in @@ -1714,6 +1714,7 @@ COMMON_OBS = $(DEPFILES) $(CONFIG_OBS) $(YYOBJ) \ f-typeprint.o \ f-valprint.o \ fileio.o \ + filename-seen-cache.o \ filestuff.o \ filesystem.o \ findcmd.o \ diff --git a/gdb/dwarf2read.c b/gdb/dwarf2read.c index b690d0b30e..bb69f44f46 100644 --- a/gdb/dwarf2read.c +++ b/gdb/dwarf2read.c @@ -74,7 +74,7 @@ #include "common/gdb_optional.h" #include "common/underlying.h" #include "common/byte-vector.h" - +#include "filename-seen-cache.h" #include #include #include @@ -349,6 +349,10 @@ public: /* Table containing line_header indexed by offset and offset_in_dwz. */ htab_t line_header_hash {}; + + /* Table containing all filenames. This is an optional because the + table is lazily constructed on first access. */ + gdb::optional filenames_cache; }; static struct dwarf2_per_objfile *dwarf2_per_objfile; @@ -4312,64 +4316,75 @@ static void dw2_map_symbol_filenames (struct objfile *objfile, symbol_filename_ftype *fun, void *data, int need_fullname) { - int i; - htab_up visited (htab_create_alloc (10, htab_hash_pointer, htab_eq_pointer, - NULL, xcalloc, xfree)); - dw2_setup (objfile); - /* The rule is CUs specify all the files, including those used by - any TU, so there's no need to scan TUs here. - We can ignore file names coming from already-expanded CUs. */ - - for (i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) + if (!dwarf2_per_objfile->filenames_cache) { - struct dwarf2_per_cu_data *per_cu = dw2_get_cutu (i); + dwarf2_per_objfile->filenames_cache.emplace (); - if (per_cu->v.quick->compunit_symtab) - { - void **slot = htab_find_slot (visited.get (), - per_cu->v.quick->file_names, - INSERT); + htab_up visited (htab_create_alloc (10, + htab_hash_pointer, htab_eq_pointer, + NULL, xcalloc, xfree)); - *slot = per_cu->v.quick->file_names; - } - } - - for (i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) - { - int j; - struct dwarf2_per_cu_data *per_cu = dw2_get_cu (i); - struct quick_file_names *file_data; - void **slot; + /* The rule is CUs specify all the files, including those used + by any TU, so there's no need to scan TUs here. We can + ignore file names coming from already-expanded CUs. */ - /* We only need to look at symtabs not already expanded. */ - if (per_cu->v.quick->compunit_symtab) - continue; + for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) + { + struct dwarf2_per_cu_data *per_cu = dw2_get_cutu (i); - file_data = dw2_get_file_names (per_cu); - if (file_data == NULL) - continue; + if (per_cu->v.quick->compunit_symtab) + { + void **slot = htab_find_slot (visited.get (), + per_cu->v.quick->file_names, + INSERT); - slot = htab_find_slot (visited.get (), file_data, INSERT); - if (*slot) - { - /* Already visited. */ - continue; + *slot = per_cu->v.quick->file_names; + } } - *slot = file_data; - for (j = 0; j < file_data->num_file_names; ++j) + for (int i = 0; i < dwarf2_per_objfile->n_comp_units; ++i) { - const char *this_real_name; + int j; + struct dwarf2_per_cu_data *per_cu = dw2_get_cu (i); + struct quick_file_names *file_data; + void **slot; - if (need_fullname) - this_real_name = dw2_get_real_path (objfile, file_data, j); - else - this_real_name = NULL; - (*fun) (file_data->file_names[j], this_real_name, data); + /* We only need to look at symtabs not already expanded. */ + if (per_cu->v.quick->compunit_symtab) + continue; + + file_data = dw2_get_file_names (per_cu); + if (file_data == NULL) + continue; + + slot = htab_find_slot (visited.get (), file_data, INSERT); + if (*slot) + { + /* Already visited. */ + continue; + } + *slot = file_data; + + for (int j = 0; j < file_data->num_file_names; ++j) + { + const char *filename = file_data->file_names[j]; + dwarf2_per_objfile->filenames_cache->seen (filename); + } } } + + dwarf2_per_objfile->filenames_cache->traverse ([&] (const char *filename) + { + const char *this_real_name; + + if (need_fullname) + this_real_name = gdb_realpath (filename); + else + this_real_name = NULL; + (*fun) (filename, this_real_name, data); + }); } static int diff --git a/gdb/filename-seen-cache.c b/gdb/filename-seen-cache.c new file mode 100644 index 0000000000..6282105d27 --- /dev/null +++ b/gdb/filename-seen-cache.c @@ -0,0 +1,66 @@ +/* Filename-seen cache for the GNU debugger, GDB. + + Copyright (C) 1986-2017 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "defs.h" +#include "filename-seen-cache.h" +#include "filenames.h" + + /* Initial size of the table. It automagically grows from here. */ +#define INITIAL_FILENAME_SEEN_CACHE_SIZE 100 + +/* filename_seen_cache constructor. */ + +filename_seen_cache::filename_seen_cache () +{ + m_tab = htab_create_alloc (INITIAL_FILENAME_SEEN_CACHE_SIZE, + filename_hash, filename_eq, + NULL, xcalloc, xfree); +} + +/* See filename-seen-cache.h. */ + +void +filename_seen_cache::clear () +{ + htab_empty (m_tab); +} + +/* See filename-seen-cache.h. */ + +filename_seen_cache::~filename_seen_cache () +{ + htab_delete (m_tab); +} + +/* See filename-seen-cache.h. */ + +bool +filename_seen_cache::seen (const char *file) +{ + void **slot; + + /* Is FILE in tab? */ + slot = htab_find_slot (m_tab, file, INSERT); + if (*slot != NULL) + return true; + + /* No; add it to tab. */ + *slot = (char *) file; + return false; +} diff --git a/gdb/filename-seen-cache.h b/gdb/filename-seen-cache.h new file mode 100644 index 0000000000..dd9cf7a8d7 --- /dev/null +++ b/gdb/filename-seen-cache.h @@ -0,0 +1,63 @@ +/* Filename-seen cache for the GNU debugger, GDB. + + Copyright (C) 1986-2017 Free Software Foundation, Inc. + + This file is part of GDB. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include "defs.h" +#include "common/function-view.h" + +/* Cache to watch for file names already seen. */ + +class filename_seen_cache +{ +public: + filename_seen_cache (); + ~filename_seen_cache (); + + /* Disable copy. */ + filename_seen_cache (const filename_seen_cache &) = delete; + void operator= (const filename_seen_cache &) = delete; + + /* Empty the cache, but do not delete it. */ + void clear (); + + /* If FILE is not already in the table of files in CACHE, add it and + return false; otherwise return true. + + NOTE: We don't manage space for FILE, we assume FILE lives as + long as the caller needs. */ + bool seen (const char *file); + + /* Traverse all cache entries, calling CALLBACK on each. The + filename is passed as argument to CALLBACK. */ + void traverse (gdb::function_view callback) + { + auto erased_cb = [] (void **slot, void *info) -> int + { + auto filename = (const char *) *slot; + auto restored_cb = (decltype (callback) *) info; + (*restored_cb) (filename); + return 1; + }; + + htab_traverse_noresize (m_tab, erased_cb, &callback); + } + +private: + /* Table of files seen so far. */ + htab_t m_tab; +}; diff --git a/gdb/symtab.c b/gdb/symtab.c index 519b7dd6f2..44809aeaff 100644 --- a/gdb/symtab.c +++ b/gdb/symtab.c @@ -63,6 +63,7 @@ #include "completer.h" #include "progspace-and-thread.h" #include "common/gdb_optional.h" +#include "filename-seen-cache.h" /* Forward declarations for local functions. */ @@ -3968,74 +3969,6 @@ operator_chars (const char *p, const char **end) } -/* Cache to watch for file names already seen by filename_seen. */ - -struct filename_seen_cache -{ - /* Table of files seen so far. */ - htab_t tab; - /* Initial size of the table. It automagically grows from here. */ -#define INITIAL_FILENAME_SEEN_CACHE_SIZE 100 -}; - -/* filename_seen_cache constructor. */ - -static struct filename_seen_cache * -create_filename_seen_cache (void) -{ - struct filename_seen_cache *cache = XNEW (struct filename_seen_cache); - - cache->tab = htab_create_alloc (INITIAL_FILENAME_SEEN_CACHE_SIZE, - filename_hash, filename_eq, - NULL, xcalloc, xfree); - - return cache; -} - -/* Empty the cache, but do not delete it. */ - -static void -clear_filename_seen_cache (struct filename_seen_cache *cache) -{ - htab_empty (cache->tab); -} - -/* filename_seen_cache destructor. - This takes a void * argument as it is generally used as a cleanup. */ - -static void -delete_filename_seen_cache (void *ptr) -{ - struct filename_seen_cache *cache = (struct filename_seen_cache *) ptr; - - htab_delete (cache->tab); - xfree (cache); -} - -/* If FILE is not already in the table of files in CACHE, return zero; - otherwise return non-zero. Optionally add FILE to the table if ADD - is non-zero. - - NOTE: We don't manage space for FILE, we assume FILE lives as long - as the caller needs. */ - -static int -filename_seen (struct filename_seen_cache *cache, const char *file, int add) -{ - void **slot; - - /* Is FILE in tab? */ - slot = htab_find_slot (cache->tab, file, add ? INSERT : NO_INSERT); - if (*slot != NULL) - return 1; - - /* No; maybe add it to tab. */ - if (add) - *slot = (char *) file; - - return 0; -} - /* Data structure to maintain printing state for output_source_filename. */ struct output_source_filename_data @@ -4065,7 +3998,7 @@ output_source_filename (const char *name, symtabs; it doesn't hurt to check. */ /* Was NAME already seen? */ - if (filename_seen (data->filename_seen_cache, name, 1)) + if (data->filename_seen_cache->seen (name)) { /* Yes; don't print it again. */ return; @@ -4097,16 +4030,15 @@ sources_info (char *ignore, int from_tty) struct symtab *s; struct objfile *objfile; struct output_source_filename_data data; - struct cleanup *cleanups; if (!have_full_symbols () && !have_partial_symbols ()) { error (_("No symbol table is loaded. Use the \"file\" command.")); } - data.filename_seen_cache = create_filename_seen_cache (); - cleanups = make_cleanup (delete_filename_seen_cache, - data.filename_seen_cache); + filename_seen_cache filenames_seen; + + data.filename_seen_cache = &filenames_seen; printf_filtered ("Source files for which symbols have been read in:\n\n"); @@ -4122,13 +4054,11 @@ sources_info (char *ignore, int from_tty) printf_filtered ("Source files for which symbols " "will be read in on demand:\n\n"); - clear_filename_seen_cache (data.filename_seen_cache); + filenames_seen.clear (); data.first = 1; map_symbol_filenames (output_partial_symbol_filename, &data, 1 /*need_fullname*/); printf_filtered ("\n"); - - do_cleanups (cleanups); } /* Compare FILE against all the NFILES entries of FILES. If BASENAMES is @@ -5552,7 +5482,7 @@ maybe_add_partial_symtab_filename (const char *filename, const char *fullname, if (not_interesting_fname (filename)) return; - if (!filename_seen (data->filename_seen_cache, filename, 1) + if (!data->filename_seen_cache->seen (filename) && filename_ncmp (filename, data->text, data->text_len) == 0) { /* This file matches for a completion; add it to the @@ -5564,7 +5494,7 @@ maybe_add_partial_symtab_filename (const char *filename, const char *fullname, const char *base_name = lbasename (filename); if (base_name != filename - && !filename_seen (data->filename_seen_cache, base_name, 1) + && !data->filename_seen_cache->seen (base_name) && filename_ncmp (base_name, data->text, data->text_len) == 0) add_filename_to_list (base_name, data->text, data->word, data->list); } @@ -5585,23 +5515,20 @@ make_source_files_completion_list (const char *text, const char *word) VEC (char_ptr) *list = NULL; const char *base_name; struct add_partial_filename_data datum; - struct filename_seen_cache *filename_seen_cache; - struct cleanup *back_to, *cache_cleanup; + struct cleanup *back_to; if (!have_full_symbols () && !have_partial_symbols ()) return list; back_to = make_cleanup (do_free_completion_list, &list); - filename_seen_cache = create_filename_seen_cache (); - cache_cleanup = make_cleanup (delete_filename_seen_cache, - filename_seen_cache); + filename_seen_cache filenames_seen; ALL_FILETABS (objfile, cu, s) { if (not_interesting_fname (s->filename)) continue; - if (!filename_seen (filename_seen_cache, s->filename, 1) + if (!filenames_seen.seen (s->filename) && filename_ncmp (s->filename, text, text_len) == 0) { /* This file matches for a completion; add it to the current @@ -5616,13 +5543,13 @@ make_source_files_completion_list (const char *text, const char *word) command do when they parse file names. */ base_name = lbasename (s->filename); if (base_name != s->filename - && !filename_seen (filename_seen_cache, base_name, 1) + && !filenames_seen.seen (base_name) && filename_ncmp (base_name, text, text_len) == 0) add_filename_to_list (base_name, text, word, &list); } } - datum.filename_seen_cache = filename_seen_cache; + datum.filename_seen_cache = &filenames_seen; datum.text = text; datum.word = word; datum.text_len = text_len; @@ -5630,7 +5557,6 @@ make_source_files_completion_list (const char *text, const char *word) map_symbol_filenames (maybe_add_partial_symtab_filename, &datum, 0 /*need_fullname*/); - do_cleanups (cache_cleanup); discard_cleanups (back_to); return list; -- 2.34.1