gdb/dwarf2/index-write.c

   1 /* DWARF index writing support for GDB.
   2
   3    Copyright (C) 1994-2021 Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include "defs.h"
  21
  22 #include "dwarf2/index-write.h"
  23
  24 #include "addrmap.h"
  25 #include "cli/cli-decode.h"
  26 #include "gdbsupport/byte-vector.h"
  27 #include "gdbsupport/filestuff.h"
  28 #include "gdbsupport/gdb_unlinker.h"
  29 #include "gdbsupport/pathstuff.h"
  30 #include "gdbsupport/scoped_fd.h"
  31 #include "complaints.h"
  32 #include "dwarf2/index-common.h"
  33 #include "dwarf2.h"
  34 #include "dwarf2/read.h"
  35 #include "dwarf2/dwz.h"
  36 #include "gdb/gdb-index.h"
  37 #include "gdbcmd.h"
  38 #include "objfiles.h"
  39 #include "psympriv.h"
  40 #include "ada-lang.h"
  41
  42 #include <algorithm>
  43 #include <cmath>
  44 #include <forward_list>
  45 #include <set>
  46 #include <unordered_map>
  47 #include <unordered_set>
  48
  49 /* Ensure only legit values are used.  */
  50 #define DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE(cu_index, value) \
  51   do { \
  52     gdb_assert ((unsigned int) (value) <= 1); \
  53     GDB_INDEX_SYMBOL_STATIC_SET_VALUE((cu_index), (value)); \
  54   } while (0)
  55
  56 /* Ensure only legit values are used.  */
  57 #define DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE(cu_index, value) \
  58   do { \
  59     gdb_assert ((value) >= GDB_INDEX_SYMBOL_KIND_TYPE \
  60                 && (value) <= GDB_INDEX_SYMBOL_KIND_OTHER); \
  61     GDB_INDEX_SYMBOL_KIND_SET_VALUE((cu_index), (value)); \
  62   } while (0)
  63
  64 /* Ensure we don't use more than the allotted number of bits for the CU.  */
  65 #define DW2_GDB_INDEX_CU_SET_VALUE(cu_index, value) \
  66   do { \
  67     gdb_assert (((value) & ~GDB_INDEX_CU_MASK) == 0); \
  68     GDB_INDEX_CU_SET_VALUE((cu_index), (value)); \
  69   } while (0)
  70
  71 /* The "save gdb-index" command.  */
  72
  73 /* Write SIZE bytes from the buffer pointed to by DATA to FILE, with
  74    error checking.  */
  75
  76 static void
  77 file_write (FILE *file, const void *data, size_t size)
  78 {
  79   if (fwrite (data, 1, size, file) != size)
  80     error (_("couldn't data write to file"));
  81 }
  82
  83 /* Write the contents of VEC to FILE, with error checking.  */
  84
  85 template<typename Elem, typename Alloc>
  86 static void
  87 file_write (FILE *file, const std::vector<Elem, Alloc> &vec)
  88 {
  89   if (!vec.empty ())
  90     file_write (file, vec.data (), vec.size () * sizeof (vec[0]));
  91 }
  92
  93 /* In-memory buffer to prepare data to be written later to a file.  */
  94 class data_buf
  95 {
  96 public:
  97   /* Copy ARRAY to the end of the buffer.  */
  98   void append_array (gdb::array_view<const gdb_byte> array)
  99   {
 100     std::copy (array.begin (), array.end (), grow (array.size ()));
 101   }
 102
 103   /* Copy CSTR (a zero-terminated string) to the end of buffer.  The
 104      terminating zero is appended too.  */
 105   void append_cstr0 (const char *cstr)
 106   {
 107     const size_t size = strlen (cstr) + 1;
 108     std::copy (cstr, cstr + size, grow (size));
 109   }
 110
 111   /* Store INPUT as ULEB128 to the end of buffer.  */
 112   void append_unsigned_leb128 (ULONGEST input)
 113   {
 114     for (;;)
 115       {
 116         gdb_byte output = input & 0x7f;
 117         input >>= 7;
 118         if (input)
 119           output |= 0x80;
 120         m_vec.push_back (output);
 121         if (input == 0)
 122           break;
 123       }
 124   }
 125
 126   /* Accept a host-format integer in VAL and append it to the buffer
 127      as a target-format integer which is LEN bytes long.  */
 128   void append_uint (size_t len, bfd_endian byte_order, ULONGEST val)
 129   {
 130     ::store_unsigned_integer (grow (len), len, byte_order, val);
 131   }
 132
 133   /* Copy VALUE to the end of the buffer, little-endian.  */
 134   void append_offset (offset_type value)
 135   {
 136     append_uint (sizeof (value), BFD_ENDIAN_LITTLE, value);
 137   }
 138
 139   /* Return the size of the buffer.  */
 140   size_t size () const
 141   {
 142     return m_vec.size ();
 143   }
 144
 145   /* Return true iff the buffer is empty.  */
 146   bool empty () const
 147   {
 148     return m_vec.empty ();
 149   }
 150
 151   /* Write the buffer to FILE.  */
 152   void file_write (FILE *file) const
 153   {
 154     ::file_write (file, m_vec);
 155   }
 156
 157 private:
 158   /* Grow SIZE bytes at the end of the buffer.  Returns a pointer to
 159      the start of the new block.  */
 160   gdb_byte *grow (size_t size)
 161   {
 162     m_vec.resize (m_vec.size () + size);
 163     return &*(m_vec.end () - size);
 164   }
 165
 166   gdb::byte_vector m_vec;
 167 };
 168
 169 /* An entry in the symbol table.  */
 170 struct symtab_index_entry
 171 {
 172   /* The name of the symbol.  */
 173   const char *name;
 174   /* The offset of the name in the constant pool.  */
 175   offset_type index_offset;
 176   /* A sorted vector of the indices of all the CUs that hold an object
 177      of this name.  */
 178   std::vector<offset_type> cu_indices;
 179 };
 180
 181 /* The symbol table.  This is a power-of-2-sized hash table.  */
 182 struct mapped_symtab
 183 {
 184   mapped_symtab ()
 185   {
 186     data.resize (1024);
 187   }
 188
 189   offset_type n_elements = 0;
 190   std::vector<symtab_index_entry> data;
 191
 192   /* Temporary storage for Ada names.  */
 193   auto_obstack m_string_obstack;
 194 };
 195
 196 /* Find a slot in SYMTAB for the symbol NAME.  Returns a reference to
 197    the slot.
 198
 199    Function is used only during write_hash_table so no index format backward
 200    compatibility is needed.  */
 201
 202 static symtab_index_entry &
 203 find_slot (struct mapped_symtab *symtab, const char *name)
 204 {
 205   offset_type index, step, hash = mapped_index_string_hash (INT_MAX, name);
 206
 207   index = hash & (symtab->data.size () - 1);
 208   step = ((hash * 17) & (symtab->data.size () - 1)) | 1;
 209
 210   for (;;)
 211     {
 212       if (symtab->data[index].name == NULL
 213           || strcmp (name, symtab->data[index].name) == 0)
 214         return symtab->data[index];
 215       index = (index + step) & (symtab->data.size () - 1);
 216     }
 217 }
 218
 219 /* Expand SYMTAB's hash table.  */
 220
 221 static void
 222 hash_expand (struct mapped_symtab *symtab)
 223 {
 224   auto old_entries = std::move (symtab->data);
 225
 226   symtab->data.clear ();
 227   symtab->data.resize (old_entries.size () * 2);
 228
 229   for (auto &it : old_entries)
 230     if (it.name != NULL)
 231       {
 232         auto &ref = find_slot (symtab, it.name);
 233         ref = std::move (it);
 234       }
 235 }
 236
 237 /* Add an entry to SYMTAB.  NAME is the name of the symbol.
 238    CU_INDEX is the index of the CU in which the symbol appears.
 239    IS_STATIC is one if the symbol is static, otherwise zero (global).  */
 240
 241 static void
 242 add_index_entry (struct mapped_symtab *symtab, const char *name,
 243                  int is_static, gdb_index_symbol_kind kind,
 244                  offset_type cu_index)
 245 {
 246   offset_type cu_index_and_attrs;
 247
 248   ++symtab->n_elements;
 249   if (4 * symtab->n_elements / 3 >= symtab->data.size ())
 250     hash_expand (symtab);
 251
 252   symtab_index_entry &slot = find_slot (symtab, name);
 253   if (slot.name == NULL)
 254     {
 255       slot.name = name;
 256       /* index_offset is set later.  */
 257     }
 258
 259   cu_index_and_attrs = 0;
 260   DW2_GDB_INDEX_CU_SET_VALUE (cu_index_and_attrs, cu_index);
 261   DW2_GDB_INDEX_SYMBOL_STATIC_SET_VALUE (cu_index_and_attrs, is_static);
 262   DW2_GDB_INDEX_SYMBOL_KIND_SET_VALUE (cu_index_and_attrs, kind);
 263
 264   /* We don't want to record an index value twice as we want to avoid the
 265      duplication.
 266      We process all global symbols and then all static symbols
 267      (which would allow us to avoid the duplication by only having to check
 268      the last entry pushed), but a symbol could have multiple kinds in one CU.
 269      To keep things simple we don't worry about the duplication here and
 270      sort and uniquify the list after we've processed all symbols.  */
 271   slot.cu_indices.push_back (cu_index_and_attrs);
 272 }
 273
 274 /* Sort and remove duplicates of all symbols' cu_indices lists.  */
 275
 276 static void
 277 uniquify_cu_indices (struct mapped_symtab *symtab)
 278 {
 279   for (auto &entry : symtab->data)
 280     {
 281       if (entry.name != NULL && !entry.cu_indices.empty ())
 282         {
 283           auto &cu_indices = entry.cu_indices;
 284           std::sort (cu_indices.begin (), cu_indices.end ());
 285           auto from = std::unique (cu_indices.begin (), cu_indices.end ());
 286           cu_indices.erase (from, cu_indices.end ());
 287         }
 288     }
 289 }
 290
 291 /* A form of 'const char *' suitable for container keys.  Only the
 292    pointer is stored.  The strings themselves are compared, not the
 293    pointers.  */
 294 class c_str_view
 295 {
 296 public:
 297   c_str_view (const char *cstr)
 298     : m_cstr (cstr)
 299   {}
 300
 301   bool operator== (const c_str_view &other) const
 302   {
 303     return strcmp (m_cstr, other.m_cstr) == 0;
 304   }
 305
 306   /* Return the underlying C string.  Note, the returned string is
 307      only a reference with lifetime of this object.  */
 308   const char *c_str () const
 309   {
 310     return m_cstr;
 311   }
 312
 313 private:
 314   friend class c_str_view_hasher;
 315   const char *const m_cstr;
 316 };
 317
 318 /* A std::unordered_map::hasher for c_str_view that uses the right
 319    hash function for strings in a mapped index.  */
 320 class c_str_view_hasher
 321 {
 322 public:
 323   size_t operator () (const c_str_view &x) const
 324   {
 325     return mapped_index_string_hash (INT_MAX, x.m_cstr);
 326   }
 327 };
 328
 329 /* A std::unordered_map::hasher for std::vector<>.  */
 330 template<typename T>
 331 class vector_hasher
 332 {
 333 public:
 334   size_t operator () (const std::vector<T> &key) const
 335   {
 336     return iterative_hash (key.data (),
 337                            sizeof (key.front ()) * key.size (), 0);
 338   }
 339 };
 340
 341 /* Write the mapped hash table SYMTAB to the data buffer OUTPUT, with
 342    constant pool entries going into the data buffer CPOOL.  */
 343
 344 static void
 345 write_hash_table (mapped_symtab *symtab, data_buf &output, data_buf &cpool)
 346 {
 347   {
 348     /* Elements are sorted vectors of the indices of all the CUs that
 349        hold an object of this name.  */
 350     std::unordered_map<std::vector<offset_type>, offset_type,
 351                        vector_hasher<offset_type>>
 352       symbol_hash_table;
 353
 354     /* We add all the index vectors to the constant pool first, to
 355        ensure alignment is ok.  */
 356     for (symtab_index_entry &entry : symtab->data)
 357       {
 358         if (entry.name == NULL)
 359           continue;
 360         gdb_assert (entry.index_offset == 0);
 361
 362         /* Finding before inserting is faster than always trying to
 363            insert, because inserting always allocates a node, does the
 364            lookup, and then destroys the new node if another node
 365            already had the same key.  C++17 try_emplace will avoid
 366            this.  */
 367         const auto found
 368           = symbol_hash_table.find (entry.cu_indices);
 369         if (found != symbol_hash_table.end ())
 370           {
 371             entry.index_offset = found->second;
 372             continue;
 373           }
 374
 375         symbol_hash_table.emplace (entry.cu_indices, cpool.size ());
 376         entry.index_offset = cpool.size ();
 377         cpool.append_offset (entry.cu_indices.size ());
 378         for (const auto index : entry.cu_indices)
 379           cpool.append_offset (index);
 380       }
 381   }
 382
 383   /* Now write out the hash table.  */
 384   std::unordered_map<c_str_view, offset_type, c_str_view_hasher> str_table;
 385   for (const auto &entry : symtab->data)
 386     {
 387       offset_type str_off, vec_off;
 388
 389       if (entry.name != NULL)
 390         {
 391           const auto insertpair = str_table.emplace (entry.name, cpool.size ());
 392           if (insertpair.second)
 393             cpool.append_cstr0 (entry.name);
 394           str_off = insertpair.first->second;
 395           vec_off = entry.index_offset;
 396         }
 397       else
 398         {
 399           /* While 0 is a valid constant pool index, it is not valid
 400              to have 0 for both offsets.  */
 401           str_off = 0;
 402           vec_off = 0;
 403         }
 404
 405       output.append_offset (str_off);
 406       output.append_offset (vec_off);
 407     }
 408 }
 409
 410 typedef std::unordered_map<partial_symtab *, unsigned int> psym_index_map;
 411
 412 /* Helper struct for building the address table.  */
 413 struct addrmap_index_data
 414 {
 415   addrmap_index_data (data_buf &addr_vec_, psym_index_map &cu_index_htab_)
 416     : addr_vec (addr_vec_), cu_index_htab (cu_index_htab_)
 417   {}
 418
 419   data_buf &addr_vec;
 420   psym_index_map &cu_index_htab;
 421
 422   int operator() (CORE_ADDR start_addr, void *obj);
 423
 424   /* Non-zero if the previous_* fields are valid.
 425      We can't write an entry until we see the next entry (since it is only then
 426      that we know the end of the entry).  */
 427   int previous_valid = 0;
 428   /* Index of the CU in the table of all CUs in the index file.  */
 429   unsigned int previous_cu_index = 0;
 430   /* Start address of the CU.  */
 431   CORE_ADDR previous_cu_start = 0;
 432 };
 433
 434 /* Write an address entry to ADDR_VEC.  */
 435
 436 static void
 437 add_address_entry (data_buf &addr_vec,
 438                    CORE_ADDR start, CORE_ADDR end, unsigned int cu_index)
 439 {
 440   addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, start);
 441   addr_vec.append_uint (8, BFD_ENDIAN_LITTLE, end);
 442   addr_vec.append_offset (cu_index);
 443 }
 444
 445 /* Worker function for traversing an addrmap to build the address table.  */
 446
 447 int
 448 addrmap_index_data::operator() (CORE_ADDR start_addr, void *obj)
 449 {
 450   partial_symtab *pst = (partial_symtab *) obj;
 451
 452   if (previous_valid)
 453     add_address_entry (addr_vec,
 454                        previous_cu_start, start_addr,
 455                        previous_cu_index);
 456
 457   previous_cu_start = start_addr;
 458   if (pst != NULL)
 459     {
 460       const auto it = cu_index_htab.find (pst);
 461       gdb_assert (it != cu_index_htab.cend ());
 462       previous_cu_index = it->second;
 463       previous_valid = 1;
 464     }
 465   else
 466     previous_valid = 0;
 467
 468   return 0;
 469 }
 470
 471 /* Write PER_BFD's address map to ADDR_VEC.
 472    CU_INDEX_HTAB is used to map addrmap entries to their CU indices
 473    in the index file.  */
 474
 475 static void
 476 write_address_map (dwarf2_per_bfd *per_bfd, data_buf &addr_vec,
 477                    psym_index_map &cu_index_htab)
 478 {
 479   struct addrmap_index_data addrmap_index_data (addr_vec, cu_index_htab);
 480
 481   addrmap_foreach (per_bfd->partial_symtabs->psymtabs_addrmap,
 482                    addrmap_index_data);
 483
 484   /* It's highly unlikely the last entry (end address = 0xff...ff)
 485      is valid, but we should still handle it.
 486      The end address is recorded as the start of the next region, but that
 487      doesn't work here.  To cope we pass 0xff...ff, this is a rare situation
 488      anyway.  */
 489   if (addrmap_index_data.previous_valid)
 490     add_address_entry (addr_vec,
 491                        addrmap_index_data.previous_cu_start, (CORE_ADDR) -1,
 492                        addrmap_index_data.previous_cu_index);
 493 }
 494
 495 /* Return the symbol kind of PSYM.  */
 496
 497 static gdb_index_symbol_kind
 498 symbol_kind (struct partial_symbol *psym)
 499 {
 500   domain_enum domain = psym->domain;
 501   enum address_class aclass = psym->aclass;
 502
 503   switch (domain)
 504     {
 505     case VAR_DOMAIN:
 506       switch (aclass)
 507         {
 508         case LOC_BLOCK:
 509           return GDB_INDEX_SYMBOL_KIND_FUNCTION;
 510         case LOC_TYPEDEF:
 511           return GDB_INDEX_SYMBOL_KIND_TYPE;
 512         case LOC_COMPUTED:
 513         case LOC_CONST_BYTES:
 514         case LOC_OPTIMIZED_OUT:
 515         case LOC_STATIC:
 516           return GDB_INDEX_SYMBOL_KIND_VARIABLE;
 517         case LOC_CONST:
 518           /* Note: It's currently impossible to recognize psyms as enum values
 519              short of reading the type info.  For now punt.  */
 520           return GDB_INDEX_SYMBOL_KIND_VARIABLE;
 521         default:
 522           /* There are other LOC_FOO values that one might want to classify
 523              as variables, but dwarf2read.c doesn't currently use them.  */
 524           return GDB_INDEX_SYMBOL_KIND_OTHER;
 525         }
 526     case STRUCT_DOMAIN:
 527       return GDB_INDEX_SYMBOL_KIND_TYPE;
 528     default:
 529       return GDB_INDEX_SYMBOL_KIND_OTHER;
 530     }
 531 }
 532
 533 /* Add a list of partial symbols to SYMTAB.  */
 534
 535 static void
 536 write_psymbols (struct mapped_symtab *symtab,
 537                 std::unordered_set<partial_symbol *> &psyms_seen,
 538                 const std::vector<partial_symbol *> &symbols,
 539                 offset_type cu_index,
 540                 int is_static)
 541 {
 542   for (partial_symbol *psym : symbols)
 543     {
 544       const char *name = psym->ginfo.search_name ();
 545
 546       if (psym->ginfo.language () == language_ada)
 547         {
 548           /* We want to ensure that the Ada main function's name appears
 549              verbatim in the index.  However, this name will be of the
 550              form "_ada_mumble", and will be rewritten by ada_decode.
 551              So, recognize it specially here and add it to the index by
 552              hand.  */
 553           if (strcmp (main_name (), name) == 0)
 554             {
 555               gdb_index_symbol_kind kind = symbol_kind (psym);
 556
 557               add_index_entry (symtab, name, is_static, kind, cu_index);
 558             }
 559
 560           /* In order for the index to work when read back into gdb, it
 561              has to supply a funny form of the name: it should be the
 562              encoded name, with any suffixes stripped.  Using the
 563              ordinary encoded name will not work properly with the
 564              searching logic in find_name_components_bounds; nor will
 565              using the decoded name.  Furthermore, an Ada "verbatim"
 566              name (of the form "<MumBle>") must be entered without the
 567              angle brackets.  Note that the current index is unusual,
 568              see PR symtab/24820 for details.  */
 569           std::string decoded = ada_decode (name);
 570           if (decoded[0] == '<')
 571             name = (char *) obstack_copy0 (&symtab->m_string_obstack,
 572                                            decoded.c_str () + 1,
 573                                            decoded.length () - 2);
 574           else
 575             name = obstack_strdup (&symtab->m_string_obstack,
 576                                    ada_encode (decoded.c_str ()));
 577         }
 578
 579       /* Only add a given psymbol once.  */
 580       if (psyms_seen.insert (psym).second)
 581         {
 582           gdb_index_symbol_kind kind = symbol_kind (psym);
 583
 584           add_index_entry (symtab, name, is_static, kind, cu_index);
 585         }
 586     }
 587 }
 588
 589 /* A helper struct used when iterating over debug_types.  */
 590 struct signatured_type_index_data
 591 {
 592   signatured_type_index_data (data_buf &types_list_,
 593                               std::unordered_set<partial_symbol *> &psyms_seen_)
 594     : types_list (types_list_), psyms_seen (psyms_seen_)
 595   {}
 596
 597   struct objfile *objfile;
 598   struct mapped_symtab *symtab;
 599   data_buf &types_list;
 600   std::unordered_set<partial_symbol *> &psyms_seen;
 601   int cu_index;
 602 };
 603
 604 /* A helper function that writes a single signatured_type to an
 605    obstack.  */
 606
 607 static int
 608 write_one_signatured_type (void **slot, void *d)
 609 {
 610   struct signatured_type_index_data *info
 611     = (struct signatured_type_index_data *) d;
 612   struct signatured_type *entry = (struct signatured_type *) *slot;
 613   partial_symtab *psymtab = entry->v.psymtab;
 614
 615   if (psymtab == nullptr)
 616     {
 617       /* We can end up here when processing a skeleton CU referring to a
 618          .dwo file that hasn't been found.  There's not much we can do in
 619          such a case, so skip this CU.  */
 620       return 1;
 621     }
 622
 623   write_psymbols (info->symtab, info->psyms_seen,
 624                   psymtab->global_psymbols, info->cu_index,
 625                   0);
 626   write_psymbols (info->symtab, info->psyms_seen,
 627                   psymtab->static_psymbols, info->cu_index,
 628                   1);
 629
 630   info->types_list.append_uint (8, BFD_ENDIAN_LITTLE,
 631                                 to_underlying (entry->sect_off));
 632   info->types_list.append_uint (8, BFD_ENDIAN_LITTLE,
 633                                 to_underlying (entry->type_offset_in_tu));
 634   info->types_list.append_uint (8, BFD_ENDIAN_LITTLE, entry->signature);
 635
 636   ++info->cu_index;
 637
 638   return 1;
 639 }
 640
 641 /* Recurse into all "included" dependencies and count their symbols as
 642    if they appeared in this psymtab.  */
 643
 644 static void
 645 recursively_count_psymbols (partial_symtab *psymtab,
 646                             size_t &psyms_seen)
 647 {
 648   for (int i = 0; i < psymtab->number_of_dependencies; ++i)
 649     if (psymtab->dependencies[i]->user != NULL)
 650       recursively_count_psymbols (psymtab->dependencies[i],
 651                                   psyms_seen);
 652
 653   psyms_seen += psymtab->global_psymbols.size ();
 654   psyms_seen += psymtab->static_psymbols.size ();
 655 }
 656
 657 /* Recurse into all "included" dependencies and write their symbols as
 658    if they appeared in this psymtab.  */
 659
 660 static void
 661 recursively_write_psymbols (struct objfile *objfile,
 662                             partial_symtab *psymtab,
 663                             struct mapped_symtab *symtab,
 664                             std::unordered_set<partial_symbol *> &psyms_seen,
 665                             offset_type cu_index)
 666 {
 667   int i;
 668
 669   for (i = 0; i < psymtab->number_of_dependencies; ++i)
 670     if (psymtab->dependencies[i]->user != NULL)
 671       recursively_write_psymbols (objfile,
 672                                   psymtab->dependencies[i],
 673                                   symtab, psyms_seen, cu_index);
 674
 675   write_psymbols (symtab, psyms_seen,
 676                   psymtab->global_psymbols, cu_index,
 677                   0);
 678   write_psymbols (symtab, psyms_seen,
 679                   psymtab->static_psymbols, cu_index,
 680                   1);
 681 }
 682
 683 /* DWARF-5 .debug_names builder.  */
 684 class debug_names
 685 {
 686 public:
 687   debug_names (dwarf2_per_objfile *per_objfile, bool is_dwarf64,
 688                bfd_endian dwarf5_byte_order)
 689     : m_dwarf5_byte_order (dwarf5_byte_order),
 690       m_dwarf32 (dwarf5_byte_order),
 691       m_dwarf64 (dwarf5_byte_order),
 692       m_dwarf (is_dwarf64
 693                ? static_cast<dwarf &> (m_dwarf64)
 694                : static_cast<dwarf &> (m_dwarf32)),
 695       m_name_table_string_offs (m_dwarf.name_table_string_offs),
 696       m_name_table_entry_offs (m_dwarf.name_table_entry_offs),
 697       m_debugstrlookup (per_objfile)
 698   {}
 699
 700   int dwarf5_offset_size () const
 701   {
 702     const bool dwarf5_is_dwarf64 = &m_dwarf == &m_dwarf64;
 703     return dwarf5_is_dwarf64 ? 8 : 4;
 704   }
 705
 706   /* Is this symbol from DW_TAG_compile_unit or DW_TAG_type_unit?  */
 707   enum class unit_kind { cu, tu };
 708
 709   /* Insert one symbol.  */
 710   void insert (const partial_symbol *psym, int cu_index, bool is_static,
 711                unit_kind kind)
 712   {
 713     const int dwarf_tag = psymbol_tag (psym);
 714     if (dwarf_tag == 0)
 715       return;
 716     const char *name = psym->ginfo.search_name ();
 717
 718     if (psym->ginfo.language () == language_ada)
 719       {
 720         /* We want to ensure that the Ada main function's name appears
 721            verbatim in the index.  However, this name will be of the
 722            form "_ada_mumble", and will be rewritten by ada_decode.
 723            So, recognize it specially here and add it to the index by
 724            hand.  */
 725         if (strcmp (main_name (), name) == 0)
 726           {
 727             const auto insertpair
 728               = m_name_to_value_set.emplace (c_str_view (name),
 729                                              std::set<symbol_value> ());
 730             std::set<symbol_value> &value_set = insertpair.first->second;
 731             value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static,
 732                                              kind));
 733           }
 734
 735         /* In order for the index to work when read back into gdb, it
 736            has to supply a funny form of the name: it should be the
 737            encoded name, with any suffixes stripped.  Using the
 738            ordinary encoded name will not work properly with the
 739            searching logic in find_name_components_bounds; nor will
 740            using the decoded name.  Furthermore, an Ada "verbatim"
 741            name (of the form "<MumBle>") must be entered without the
 742            angle brackets.  Note that the current index is unusual,
 743            see PR symtab/24820 for details.  */
 744         std::string decoded = ada_decode (name);
 745         if (decoded[0] == '<')
 746           name = (char *) obstack_copy0 (&m_string_obstack,
 747                                          decoded.c_str () + 1,
 748                                          decoded.length () - 2);
 749         else
 750           name = obstack_strdup (&m_string_obstack,
 751                                  ada_encode (decoded.c_str ()));
 752       }
 753
 754     const auto insertpair
 755       = m_name_to_value_set.emplace (c_str_view (name),
 756                                      std::set<symbol_value> ());
 757     std::set<symbol_value> &value_set = insertpair.first->second;
 758     value_set.emplace (symbol_value (dwarf_tag, cu_index, is_static, kind));
 759   }
 760
 761   /* Build all the tables.  All symbols must be already inserted.
 762      This function does not call file_write, caller has to do it
 763      afterwards.  */
 764   void build ()
 765   {
 766     /* Verify the build method has not be called twice.  */
 767     gdb_assert (m_abbrev_table.empty ());
 768     const size_t name_count = m_name_to_value_set.size ();
 769     m_bucket_table.resize
 770       (std::pow (2, std::ceil (std::log2 (name_count * 4 / 3))));
 771     m_hash_table.reserve (name_count);
 772     m_name_table_string_offs.reserve (name_count);
 773     m_name_table_entry_offs.reserve (name_count);
 774
 775     /* Map each hash of symbol to its name and value.  */
 776     struct hash_it_pair
 777     {
 778       uint32_t hash;
 779       decltype (m_name_to_value_set)::const_iterator it;
 780     };
 781     std::vector<std::forward_list<hash_it_pair>> bucket_hash;
 782     bucket_hash.resize (m_bucket_table.size ());
 783     for (decltype (m_name_to_value_set)::const_iterator it
 784            = m_name_to_value_set.cbegin ();
 785          it != m_name_to_value_set.cend ();
 786          ++it)
 787       {
 788         const char *const name = it->first.c_str ();
 789         const uint32_t hash = dwarf5_djb_hash (name);
 790         hash_it_pair hashitpair;
 791         hashitpair.hash = hash;
 792         hashitpair.it = it;
 793         auto &slot = bucket_hash[hash % bucket_hash.size()];
 794         slot.push_front (std::move (hashitpair));
 795       }
 796     for (size_t bucket_ix = 0; bucket_ix < bucket_hash.size (); ++bucket_ix)
 797       {
 798         const std::forward_list<hash_it_pair> &hashitlist
 799           = bucket_hash[bucket_ix];
 800         if (hashitlist.empty ())
 801           continue;
 802         uint32_t &bucket_slot = m_bucket_table[bucket_ix];
 803         /* The hashes array is indexed starting at 1.  */
 804         store_unsigned_integer (reinterpret_cast<gdb_byte *> (&bucket_slot),
 805                                 sizeof (bucket_slot), m_dwarf5_byte_order,
 806                                 m_hash_table.size () + 1);
 807         for (const hash_it_pair &hashitpair : hashitlist)
 808           {
 809             m_hash_table.push_back (0);
 810             store_unsigned_integer (reinterpret_cast<gdb_byte *>
 811                                                         (&m_hash_table.back ()),
 812                                     sizeof (m_hash_table.back ()),
 813                                     m_dwarf5_byte_order, hashitpair.hash);
 814             const c_str_view &name = hashitpair.it->first;
 815             const std::set<symbol_value> &value_set = hashitpair.it->second;
 816             m_name_table_string_offs.push_back_reorder
 817               (m_debugstrlookup.lookup (name.c_str ()));
 818             m_name_table_entry_offs.push_back_reorder (m_entry_pool.size ());
 819             gdb_assert (!value_set.empty ());
 820             for (const symbol_value &value : value_set)
 821               {
 822                 int &idx = m_indexkey_to_idx[index_key (value.dwarf_tag,
 823                                                         value.is_static,
 824                                                         value.kind)];
 825                 if (idx == 0)
 826                   {
 827                     idx = m_idx_next++;
 828                     m_abbrev_table.append_unsigned_leb128 (idx);
 829                     m_abbrev_table.append_unsigned_leb128 (value.dwarf_tag);
 830                     m_abbrev_table.append_unsigned_leb128
 831                               (value.kind == unit_kind::cu ? DW_IDX_compile_unit
 832                                                            : DW_IDX_type_unit);
 833                     m_abbrev_table.append_unsigned_leb128 (DW_FORM_udata);
 834                     m_abbrev_table.append_unsigned_leb128 (value.is_static
 835                                                            ? DW_IDX_GNU_internal
 836                                                            : DW_IDX_GNU_external);
 837                     m_abbrev_table.append_unsigned_leb128 (DW_FORM_flag_present);
 838
 839                     /* Terminate attributes list.  */
 840                     m_abbrev_table.append_unsigned_leb128 (0);
 841                     m_abbrev_table.append_unsigned_leb128 (0);
 842                   }
 843
 844                 m_entry_pool.append_unsigned_leb128 (idx);
 845                 m_entry_pool.append_unsigned_leb128 (value.cu_index);
 846               }
 847
 848             /* Terminate the list of CUs.  */
 849             m_entry_pool.append_unsigned_leb128 (0);
 850           }
 851       }
 852     gdb_assert (m_hash_table.size () == name_count);
 853
 854     /* Terminate tags list.  */
 855     m_abbrev_table.append_unsigned_leb128 (0);
 856   }
 857
 858   /* Return .debug_names bucket count.  This must be called only after
 859      calling the build method.  */
 860   uint32_t bucket_count () const
 861   {
 862     /* Verify the build method has been already called.  */
 863     gdb_assert (!m_abbrev_table.empty ());
 864     const uint32_t retval = m_bucket_table.size ();
 865
 866     /* Check for overflow.  */
 867     gdb_assert (retval == m_bucket_table.size ());
 868     return retval;
 869   }
 870
 871   /* Return .debug_names names count.  This must be called only after
 872      calling the build method.  */
 873   uint32_t name_count () const
 874   {
 875     /* Verify the build method has been already called.  */
 876     gdb_assert (!m_abbrev_table.empty ());
 877     const uint32_t retval = m_hash_table.size ();
 878
 879     /* Check for overflow.  */
 880     gdb_assert (retval == m_hash_table.size ());
 881     return retval;
 882   }
 883
 884   /* Return number of bytes of .debug_names abbreviation table.  This
 885      must be called only after calling the build method.  */
 886   uint32_t abbrev_table_bytes () const
 887   {
 888     gdb_assert (!m_abbrev_table.empty ());
 889     return m_abbrev_table.size ();
 890   }
 891
 892   /* Recurse into all "included" dependencies and store their symbols
 893      as if they appeared in this psymtab.  */
 894   void recursively_write_psymbols
 895     (struct objfile *objfile,
 896      partial_symtab *psymtab,
 897      std::unordered_set<partial_symbol *> &psyms_seen,
 898      int cu_index)
 899   {
 900     for (int i = 0; i < psymtab->number_of_dependencies; ++i)
 901       if (psymtab->dependencies[i]->user != NULL)
 902         recursively_write_psymbols
 903           (objfile, psymtab->dependencies[i], psyms_seen, cu_index);
 904
 905     write_psymbols (psyms_seen, psymtab->global_psymbols,
 906                     cu_index, false, unit_kind::cu);
 907     write_psymbols (psyms_seen, psymtab->static_psymbols,
 908                     cu_index, true, unit_kind::cu);
 909   }
 910
 911   /* Return number of bytes the .debug_names section will have.  This
 912      must be called only after calling the build method.  */
 913   size_t bytes () const
 914   {
 915     /* Verify the build method has been already called.  */
 916     gdb_assert (!m_abbrev_table.empty ());
 917     size_t expected_bytes = 0;
 918     expected_bytes += m_bucket_table.size () * sizeof (m_bucket_table[0]);
 919     expected_bytes += m_hash_table.size () * sizeof (m_hash_table[0]);
 920     expected_bytes += m_name_table_string_offs.bytes ();
 921     expected_bytes += m_name_table_entry_offs.bytes ();
 922     expected_bytes += m_abbrev_table.size ();
 923     expected_bytes += m_entry_pool.size ();
 924     return expected_bytes;
 925   }
 926
 927   /* Write .debug_names to FILE_NAMES and .debug_str addition to
 928      FILE_STR.  This must be called only after calling the build
 929      method.  */
 930   void file_write (FILE *file_names, FILE *file_str) const
 931   {
 932     /* Verify the build method has been already called.  */
 933     gdb_assert (!m_abbrev_table.empty ());
 934     ::file_write (file_names, m_bucket_table);
 935     ::file_write (file_names, m_hash_table);
 936     m_name_table_string_offs.file_write (file_names);
 937     m_name_table_entry_offs.file_write (file_names);
 938     m_abbrev_table.file_write (file_names);
 939     m_entry_pool.file_write (file_names);
 940     m_debugstrlookup.file_write (file_str);
 941   }
 942
 943   /* A helper user data for write_one_signatured_type.  */
 944   class write_one_signatured_type_data
 945   {
 946   public:
 947     write_one_signatured_type_data (debug_names &nametable_,
 948                                     signatured_type_index_data &&info_)
 949     : nametable (nametable_), info (std::move (info_))
 950     {}
 951     debug_names &nametable;
 952     struct signatured_type_index_data info;
 953   };
 954
 955   /* A helper function to pass write_one_signatured_type to
 956      htab_traverse_noresize.  */
 957   static int
 958   write_one_signatured_type (void **slot, void *d)
 959   {
 960     write_one_signatured_type_data *data = (write_one_signatured_type_data *) d;
 961     struct signatured_type_index_data *info = &data->info;
 962     struct signatured_type *entry = (struct signatured_type *) *slot;
 963
 964     data->nametable.write_one_signatured_type (entry, info);
 965
 966     return 1;
 967   }
 968
 969 private:
 970
 971   /* Storage for symbol names mapping them to their .debug_str section
 972      offsets.  */
 973   class debug_str_lookup
 974   {
 975   public:
 976
 977     /* Object constructor to be called for current DWARF2_PER_OBJFILE.
 978        All .debug_str section strings are automatically stored.  */
 979     debug_str_lookup (dwarf2_per_objfile *per_objfile)
 980       : m_abfd (per_objfile->objfile->obfd),
 981         m_per_objfile (per_objfile)
 982     {
 983       per_objfile->per_bfd->str.read (per_objfile->objfile);
 984       if (per_objfile->per_bfd->str.buffer == NULL)
 985         return;
 986       for (const gdb_byte *data = per_objfile->per_bfd->str.buffer;
 987            data < (per_objfile->per_bfd->str.buffer
 988                    + per_objfile->per_bfd->str.size);)
 989         {
 990           const char *const s = reinterpret_cast<const char *> (data);
 991           const auto insertpair
 992             = m_str_table.emplace (c_str_view (s),
 993                                    data - per_objfile->per_bfd->str.buffer);
 994           if (!insertpair.second)
 995             complaint (_("Duplicate string \"%s\" in "
 996                          ".debug_str section [in module %s]"),
 997                        s, bfd_get_filename (m_abfd));
 998           data += strlen (s) + 1;
 999         }
1000     }
1001
1002     /* Return offset of symbol name S in the .debug_str section.  Add
1003        such symbol to the section's end if it does not exist there
1004        yet.  */
1005     size_t lookup (const char *s)
1006     {
1007       const auto it = m_str_table.find (c_str_view (s));
1008       if (it != m_str_table.end ())
1009         return it->second;
1010       const size_t offset = (m_per_objfile->per_bfd->str.size
1011                              + m_str_add_buf.size ());
1012       m_str_table.emplace (c_str_view (s), offset);
1013       m_str_add_buf.append_cstr0 (s);
1014       return offset;
1015     }
1016
1017     /* Append the end of the .debug_str section to FILE.  */
1018     void file_write (FILE *file) const
1019     {
1020       m_str_add_buf.file_write (file);
1021     }
1022
1023   private:
1024     std::unordered_map<c_str_view, size_t, c_str_view_hasher> m_str_table;
1025     bfd *const m_abfd;
1026     dwarf2_per_objfile *m_per_objfile;
1027
1028     /* Data to add at the end of .debug_str for new needed symbol names.  */
1029     data_buf m_str_add_buf;
1030   };
1031
1032   /* Container to map used DWARF tags to their .debug_names abbreviation
1033      tags.  */
1034   class index_key
1035   {
1036   public:
1037     index_key (int dwarf_tag_, bool is_static_, unit_kind kind_)
1038       : dwarf_tag (dwarf_tag_), is_static (is_static_), kind (kind_)
1039     {
1040     }
1041
1042     bool
1043     operator== (const index_key &other) const
1044     {
1045       return (dwarf_tag == other.dwarf_tag && is_static == other.is_static
1046               && kind == other.kind);
1047     }
1048
1049     const int dwarf_tag;
1050     const bool is_static;
1051     const unit_kind kind;
1052   };
1053
1054   /* Provide std::unordered_map::hasher for index_key.  */
1055   class index_key_hasher
1056   {
1057   public:
1058     size_t
1059     operator () (const index_key &key) const
1060     {
1061       return (std::hash<int>() (key.dwarf_tag) << 1) | key.is_static;
1062     }
1063   };
1064
1065   /* Parameters of one symbol entry.  */
1066   class symbol_value
1067   {
1068   public:
1069     const int dwarf_tag, cu_index;
1070     const bool is_static;
1071     const unit_kind kind;
1072
1073     symbol_value (int dwarf_tag_, int cu_index_, bool is_static_,
1074                   unit_kind kind_)
1075       : dwarf_tag (dwarf_tag_), cu_index (cu_index_), is_static (is_static_),
1076         kind (kind_)
1077     {}
1078
1079     bool
1080     operator< (const symbol_value &other) const
1081     {
1082 #define X(n) \
1083   do \
1084     { \
1085       if (n < other.n) \
1086         return true; \
1087       if (n > other.n) \
1088         return false; \
1089     } \
1090   while (0)
1091       X (dwarf_tag);
1092       X (is_static);
1093       X (kind);
1094       X (cu_index);
1095 #undef X
1096       return false;
1097     }
1098   };
1099
1100   /* Abstract base class to unify DWARF-32 and DWARF-64 name table
1101      output.  */
1102   class offset_vec
1103   {
1104   protected:
1105     const bfd_endian dwarf5_byte_order;
1106   public:
1107     explicit offset_vec (bfd_endian dwarf5_byte_order_)
1108       : dwarf5_byte_order (dwarf5_byte_order_)
1109     {}
1110
1111     /* Call std::vector::reserve for NELEM elements.  */
1112     virtual void reserve (size_t nelem) = 0;
1113
1114     /* Call std::vector::push_back with store_unsigned_integer byte
1115        reordering for ELEM.  */
1116     virtual void push_back_reorder (size_t elem) = 0;
1117
1118     /* Return expected output size in bytes.  */
1119     virtual size_t bytes () const = 0;
1120
1121     /* Write name table to FILE.  */
1122     virtual void file_write (FILE *file) const = 0;
1123   };
1124
1125   /* Template to unify DWARF-32 and DWARF-64 output.  */
1126   template<typename OffsetSize>
1127   class offset_vec_tmpl : public offset_vec
1128   {
1129   public:
1130     explicit offset_vec_tmpl (bfd_endian dwarf5_byte_order_)
1131       : offset_vec (dwarf5_byte_order_)
1132     {}
1133
1134     /* Implement offset_vec::reserve.  */
1135     void reserve (size_t nelem) override
1136     {
1137       m_vec.reserve (nelem);
1138     }
1139
1140     /* Implement offset_vec::push_back_reorder.  */
1141     void push_back_reorder (size_t elem) override
1142     {
1143       m_vec.push_back (elem);
1144       /* Check for overflow.  */
1145       gdb_assert (m_vec.back () == elem);
1146       store_unsigned_integer (reinterpret_cast<gdb_byte *> (&m_vec.back ()),
1147                               sizeof (m_vec.back ()), dwarf5_byte_order, elem);
1148     }
1149
1150     /* Implement offset_vec::bytes.  */
1151     size_t bytes () const override
1152     {
1153       return m_vec.size () * sizeof (m_vec[0]);
1154     }
1155
1156     /* Implement offset_vec::file_write.  */
1157     void file_write (FILE *file) const override
1158     {
1159       ::file_write (file, m_vec);
1160     }
1161
1162   private:
1163     std::vector<OffsetSize> m_vec;
1164   };
1165
1166   /* Base class to unify DWARF-32 and DWARF-64 .debug_names output
1167      respecting name table width.  */
1168   class dwarf
1169   {
1170   public:
1171     offset_vec &name_table_string_offs, &name_table_entry_offs;
1172
1173     dwarf (offset_vec &name_table_string_offs_,
1174            offset_vec &name_table_entry_offs_)
1175       : name_table_string_offs (name_table_string_offs_),
1176         name_table_entry_offs (name_table_entry_offs_)
1177     {
1178     }
1179   };
1180
1181   /* Template to unify DWARF-32 and DWARF-64 .debug_names output
1182      respecting name table width.  */
1183   template<typename OffsetSize>
1184   class dwarf_tmpl : public dwarf
1185   {
1186   public:
1187     explicit dwarf_tmpl (bfd_endian dwarf5_byte_order_)
1188       : dwarf (m_name_table_string_offs, m_name_table_entry_offs),
1189         m_name_table_string_offs (dwarf5_byte_order_),
1190         m_name_table_entry_offs (dwarf5_byte_order_)
1191     {}
1192
1193   private:
1194     offset_vec_tmpl<OffsetSize> m_name_table_string_offs;
1195     offset_vec_tmpl<OffsetSize> m_name_table_entry_offs;
1196   };
1197
1198   /* Try to reconstruct original DWARF tag for given partial_symbol.
1199      This function is not DWARF-5 compliant but it is sufficient for
1200      GDB as a DWARF-5 index consumer.  */
1201   static int psymbol_tag (const struct partial_symbol *psym)
1202   {
1203     domain_enum domain = psym->domain;
1204     enum address_class aclass = psym->aclass;
1205
1206     switch (domain)
1207       {
1208       case VAR_DOMAIN:
1209         switch (aclass)
1210           {
1211           case LOC_BLOCK:
1212             return DW_TAG_subprogram;
1213           case LOC_TYPEDEF:
1214             return DW_TAG_typedef;
1215           case LOC_COMPUTED:
1216           case LOC_CONST_BYTES:
1217           case LOC_OPTIMIZED_OUT:
1218           case LOC_STATIC:
1219             return DW_TAG_variable;
1220           case LOC_CONST:
1221             /* Note: It's currently impossible to recognize psyms as enum values
1222                short of reading the type info.  For now punt.  */
1223             return DW_TAG_variable;
1224           default:
1225             /* There are other LOC_FOO values that one might want to classify
1226                as variables, but dwarf2read.c doesn't currently use them.  */
1227             return DW_TAG_variable;
1228           }
1229       case STRUCT_DOMAIN:
1230         return DW_TAG_structure_type;
1231       case MODULE_DOMAIN:
1232         return DW_TAG_module;
1233       default:
1234         return 0;
1235       }
1236   }
1237
1238   /* Call insert for all partial symbols and mark them in PSYMS_SEEN.  */
1239   void write_psymbols (std::unordered_set<partial_symbol *> &psyms_seen,
1240                        const std::vector<partial_symbol *> &symbols,
1241                        int cu_index, bool is_static, unit_kind kind)
1242   {
1243     for (partial_symbol *psym : symbols)
1244       {
1245         /* Only add a given psymbol once.  */
1246         if (psyms_seen.insert (psym).second)
1247           insert (psym, cu_index, is_static, kind);
1248       }
1249   }
1250
1251   /* A helper function that writes a single signatured_type
1252      to a debug_names.  */
1253   void
1254   write_one_signatured_type (struct signatured_type *entry,
1255                              struct signatured_type_index_data *info)
1256   {
1257     partial_symtab *psymtab = entry->v.psymtab;
1258
1259     write_psymbols (info->psyms_seen, psymtab->global_psymbols,
1260                     info->cu_index, false, unit_kind::tu);
1261     write_psymbols (info->psyms_seen, psymtab->static_psymbols,
1262                     info->cu_index, true, unit_kind::tu);
1263
1264     info->types_list.append_uint (dwarf5_offset_size (), m_dwarf5_byte_order,
1265                                   to_underlying (entry->sect_off));
1266
1267     ++info->cu_index;
1268   }
1269
1270   /* Store value of each symbol.  */
1271   std::unordered_map<c_str_view, std::set<symbol_value>, c_str_view_hasher>
1272     m_name_to_value_set;
1273
1274   /* Tables of DWARF-5 .debug_names.  They are in object file byte
1275      order.  */
1276   std::vector<uint32_t> m_bucket_table;
1277   std::vector<uint32_t> m_hash_table;
1278
1279   const bfd_endian m_dwarf5_byte_order;
1280   dwarf_tmpl<uint32_t> m_dwarf32;
1281   dwarf_tmpl<uint64_t> m_dwarf64;
1282   dwarf &m_dwarf;
1283   offset_vec &m_name_table_string_offs, &m_name_table_entry_offs;
1284   debug_str_lookup m_debugstrlookup;
1285
1286   /* Map each used .debug_names abbreviation tag parameter to its
1287      index value.  */
1288   std::unordered_map<index_key, int, index_key_hasher> m_indexkey_to_idx;
1289
1290   /* Next unused .debug_names abbreviation tag for
1291      m_indexkey_to_idx.  */
1292   int m_idx_next = 1;
1293
1294   /* .debug_names abbreviation table.  */
1295   data_buf m_abbrev_table;
1296
1297   /* .debug_names entry pool.  */
1298   data_buf m_entry_pool;
1299
1300   /* Temporary storage for Ada names.  */
1301   auto_obstack m_string_obstack;
1302 };
1303
1304 /* Return iff any of the needed offsets does not fit into 32-bit
1305    .debug_names section.  */
1306
1307 static bool
1308 check_dwarf64_offsets (dwarf2_per_objfile *per_objfile)
1309 {
1310   for (const auto &per_cu : per_objfile->per_bfd->all_comp_units)
1311     {
1312       if (to_underlying (per_cu->sect_off)
1313           >= (static_cast<uint64_t> (1) << 32))
1314         return true;
1315     }
1316   return false;
1317 }
1318
1319 /* The psyms_seen set is potentially going to be largish (~40k
1320    elements when indexing a -g3 build of GDB itself).  Estimate the
1321    number of elements in order to avoid too many rehashes, which
1322    require rebuilding buckets and thus many trips to
1323    malloc/free.  */
1324
1325 static size_t
1326 psyms_seen_size (dwarf2_per_objfile *per_objfile)
1327 {
1328   size_t psyms_count = 0;
1329   for (const auto &per_cu : per_objfile->per_bfd->all_comp_units)
1330     {
1331       partial_symtab *psymtab = per_cu->v.psymtab;
1332
1333       if (psymtab != NULL && psymtab->user == NULL)
1334         recursively_count_psymbols (psymtab, psyms_count);
1335     }
1336   /* Generating an index for gdb itself shows a ratio of
1337      TOTAL_SEEN_SYMS/UNIQUE_SYMS or ~5.  4 seems like a good bet.  */
1338   return psyms_count / 4;
1339 }
1340
1341 /* Assert that FILE's size is EXPECTED_SIZE.  Assumes file's seek
1342    position is at the end of the file.  */
1343
1344 static void
1345 assert_file_size (FILE *file, size_t expected_size)
1346 {
1347   const auto file_size = ftell (file);
1348   if (file_size == -1)
1349     perror_with_name (("ftell"));
1350   gdb_assert (file_size == expected_size);
1351 }
1352
1353 /* Write a gdb index file to OUT_FILE from all the sections passed as
1354    arguments.  */
1355
1356 static void
1357 write_gdbindex_1 (FILE *out_file,
1358                   const data_buf &cu_list,
1359                   const data_buf &types_cu_list,
1360                   const data_buf &addr_vec,
1361                   const data_buf &symtab_vec,
1362                   const data_buf &constant_pool)
1363 {
1364   data_buf contents;
1365   const offset_type size_of_header = 6 * sizeof (offset_type);
1366   offset_type total_len = size_of_header;
1367
1368   /* The version number.  */
1369   contents.append_offset (8);
1370
1371   /* The offset of the CU list from the start of the file.  */
1372   contents.append_offset (total_len);
1373   total_len += cu_list.size ();
1374
1375   /* The offset of the types CU list from the start of the file.  */
1376   contents.append_offset (total_len);
1377   total_len += types_cu_list.size ();
1378
1379   /* The offset of the address table from the start of the file.  */
1380   contents.append_offset (total_len);
1381   total_len += addr_vec.size ();
1382
1383   /* The offset of the symbol table from the start of the file.  */
1384   contents.append_offset (total_len);
1385   total_len += symtab_vec.size ();
1386
1387   /* The offset of the constant pool from the start of the file.  */
1388   contents.append_offset (total_len);
1389   total_len += constant_pool.size ();
1390
1391   gdb_assert (contents.size () == size_of_header);
1392
1393   contents.file_write (out_file);
1394   cu_list.file_write (out_file);
1395   types_cu_list.file_write (out_file);
1396   addr_vec.file_write (out_file);
1397   symtab_vec.file_write (out_file);
1398   constant_pool.file_write (out_file);
1399
1400   assert_file_size (out_file, total_len);
1401 }
1402
1403 /* Write contents of a .gdb_index section for OBJFILE into OUT_FILE.
1404    If OBJFILE has an associated dwz file, write contents of a .gdb_index
1405    section for that dwz file into DWZ_OUT_FILE.  If OBJFILE does not have an
1406    associated dwz file, DWZ_OUT_FILE must be NULL.  */
1407
1408 static void
1409 write_gdbindex (dwarf2_per_objfile *per_objfile, FILE *out_file,
1410                 FILE *dwz_out_file)
1411 {
1412   struct objfile *objfile = per_objfile->objfile;
1413   mapped_symtab symtab;
1414   data_buf objfile_cu_list;
1415   data_buf dwz_cu_list;
1416
1417   /* While we're scanning CU's create a table that maps a psymtab pointer
1418      (which is what addrmap records) to its index (which is what is recorded
1419      in the index file).  This will later be needed to write the address
1420      table.  */
1421   psym_index_map cu_index_htab;
1422   cu_index_htab.reserve (per_objfile->per_bfd->all_comp_units.size ());
1423
1424   /* The CU list is already sorted, so we don't need to do additional
1425      work here.  Also, the debug_types entries do not appear in
1426      all_comp_units, but only in their own hash table.  */
1427
1428   std::unordered_set<partial_symbol *> psyms_seen
1429     (psyms_seen_size (per_objfile));
1430   int counter = 0;
1431   for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
1432     {
1433       dwarf2_per_cu_data *per_cu
1434         = per_objfile->per_bfd->all_comp_units[i].get ();
1435       if (per_cu->is_debug_types)
1436         continue;
1437
1438       partial_symtab *psymtab = per_cu->v.psymtab;
1439
1440       if (psymtab != NULL)
1441         {
1442           if (psymtab->user == NULL)
1443             recursively_write_psymbols (objfile, psymtab, &symtab,
1444                                         psyms_seen, i);
1445
1446           const auto insertpair = cu_index_htab.emplace (psymtab, counter);
1447           gdb_assert (insertpair.second);
1448         }
1449
1450       /* The all_comp_units list contains CUs read from the objfile as well as
1451          from the eventual dwz file.  We need to place the entry in the
1452          corresponding index.  */
1453       data_buf &cu_list = per_cu->is_dwz ? dwz_cu_list : objfile_cu_list;
1454       cu_list.append_uint (8, BFD_ENDIAN_LITTLE,
1455                            to_underlying (per_cu->sect_off));
1456       cu_list.append_uint (8, BFD_ENDIAN_LITTLE, per_cu->length);
1457       ++counter;
1458     }
1459
1460   /* Dump the address map.  */
1461   data_buf addr_vec;
1462   write_address_map (per_objfile->per_bfd, addr_vec, cu_index_htab);
1463
1464   /* Write out the .debug_type entries, if any.  */
1465   data_buf types_cu_list;
1466   if (per_objfile->per_bfd->signatured_types)
1467     {
1468       signatured_type_index_data sig_data (types_cu_list,
1469                                            psyms_seen);
1470
1471       sig_data.objfile = objfile;
1472       sig_data.symtab = &symtab;
1473       sig_data.cu_index = (per_objfile->per_bfd->all_comp_units.size ()
1474                            - per_objfile->per_bfd->tu_stats.nr_tus);
1475       htab_traverse_noresize (per_objfile->per_bfd->signatured_types.get (),
1476                               write_one_signatured_type, &sig_data);
1477     }
1478
1479   /* Now that we've processed all symbols we can shrink their cu_indices
1480      lists.  */
1481   uniquify_cu_indices (&symtab);
1482
1483   data_buf symtab_vec, constant_pool;
1484   write_hash_table (&symtab, symtab_vec, constant_pool);
1485
1486   write_gdbindex_1(out_file, objfile_cu_list, types_cu_list, addr_vec,
1487                    symtab_vec, constant_pool);
1488
1489   if (dwz_out_file != NULL)
1490     write_gdbindex_1 (dwz_out_file, dwz_cu_list, {}, {}, {}, {});
1491   else
1492     gdb_assert (dwz_cu_list.empty ());
1493 }
1494
1495 /* DWARF-5 augmentation string for GDB's DW_IDX_GNU_* extension.  */
1496 static const gdb_byte dwarf5_gdb_augmentation[] = { 'G', 'D', 'B', 0 };
1497
1498 /* Write a new .debug_names section for OBJFILE into OUT_FILE, write
1499    needed addition to .debug_str section to OUT_FILE_STR.  Return how
1500    many bytes were expected to be written into OUT_FILE.  */
1501
1502 static void
1503 write_debug_names (dwarf2_per_objfile *per_objfile,
1504                    FILE *out_file, FILE *out_file_str)
1505 {
1506   const bool dwarf5_is_dwarf64 = check_dwarf64_offsets (per_objfile);
1507   struct objfile *objfile = per_objfile->objfile;
1508   const enum bfd_endian dwarf5_byte_order
1509     = gdbarch_byte_order (objfile->arch ());
1510
1511   /* The CU list is already sorted, so we don't need to do additional
1512      work here.  Also, the debug_types entries do not appear in
1513      all_comp_units, but only in their own hash table.  */
1514   data_buf cu_list;
1515   debug_names nametable (per_objfile, dwarf5_is_dwarf64, dwarf5_byte_order);
1516   std::unordered_set<partial_symbol *>
1517     psyms_seen (psyms_seen_size (per_objfile));
1518   int counter = 0;
1519   for (int i = 0; i < per_objfile->per_bfd->all_comp_units.size (); ++i)
1520     {
1521       const dwarf2_per_cu_data *per_cu
1522         = per_objfile->per_bfd->all_comp_units[i].get ();
1523       if (per_cu->is_debug_types)
1524         continue;
1525
1526       partial_symtab *psymtab = per_cu->v.psymtab;
1527
1528       /* CU of a shared file from 'dwz -m' may be unused by this main
1529          file.  It may be referenced from a local scope but in such
1530          case it does not need to be present in .debug_names.  */
1531       if (psymtab == NULL)
1532         continue;
1533
1534       if (psymtab->user == NULL)
1535         nametable.recursively_write_psymbols (objfile, psymtab, psyms_seen,
1536                                               counter);
1537
1538       cu_list.append_uint (nametable.dwarf5_offset_size (), dwarf5_byte_order,
1539                            to_underlying (per_cu->sect_off));
1540       ++counter;
1541     }
1542
1543   /* Write out the .debug_type entries, if any.  */
1544   data_buf types_cu_list;
1545   if (per_objfile->per_bfd->signatured_types)
1546     {
1547       debug_names::write_one_signatured_type_data sig_data (nametable,
1548                         signatured_type_index_data (types_cu_list, psyms_seen));
1549
1550       sig_data.info.objfile = objfile;
1551       /* It is used only for gdb_index.  */
1552       sig_data.info.symtab = nullptr;
1553       sig_data.info.cu_index = 0;
1554       htab_traverse_noresize (per_objfile->per_bfd->signatured_types.get (),
1555                               debug_names::write_one_signatured_type,
1556                               &sig_data);
1557     }
1558
1559   nametable.build ();
1560
1561   /* No addr_vec - DWARF-5 uses .debug_aranges generated by GCC.  */
1562
1563   const offset_type bytes_of_header
1564     = ((dwarf5_is_dwarf64 ? 12 : 4)
1565        + 2 + 2 + 7 * 4
1566        + sizeof (dwarf5_gdb_augmentation));
1567   size_t expected_bytes = 0;
1568   expected_bytes += bytes_of_header;
1569   expected_bytes += cu_list.size ();
1570   expected_bytes += types_cu_list.size ();
1571   expected_bytes += nametable.bytes ();
1572   data_buf header;
1573
1574   if (!dwarf5_is_dwarf64)
1575     {
1576       const uint64_t size64 = expected_bytes - 4;
1577       gdb_assert (size64 < 0xfffffff0);
1578       header.append_uint (4, dwarf5_byte_order, size64);
1579     }
1580   else
1581     {
1582       header.append_uint (4, dwarf5_byte_order, 0xffffffff);
1583       header.append_uint (8, dwarf5_byte_order, expected_bytes - 12);
1584     }
1585
1586   /* The version number.  */
1587   header.append_uint (2, dwarf5_byte_order, 5);
1588
1589   /* Padding.  */
1590   header.append_uint (2, dwarf5_byte_order, 0);
1591
1592   /* comp_unit_count - The number of CUs in the CU list.  */
1593   header.append_uint (4, dwarf5_byte_order,
1594                       per_objfile->per_bfd->all_comp_units.size ()
1595                       - per_objfile->per_bfd->tu_stats.nr_tus);
1596
1597   /* local_type_unit_count - The number of TUs in the local TU
1598      list.  */
1599   header.append_uint (4, dwarf5_byte_order,
1600                       per_objfile->per_bfd->tu_stats.nr_tus);
1601
1602   /* foreign_type_unit_count - The number of TUs in the foreign TU
1603      list.  */
1604   header.append_uint (4, dwarf5_byte_order, 0);
1605
1606   /* bucket_count - The number of hash buckets in the hash lookup
1607      table.  */
1608   header.append_uint (4, dwarf5_byte_order, nametable.bucket_count ());
1609
1610   /* name_count - The number of unique names in the index.  */
1611   header.append_uint (4, dwarf5_byte_order, nametable.name_count ());
1612
1613   /* abbrev_table_size - The size in bytes of the abbreviations
1614      table.  */
1615   header.append_uint (4, dwarf5_byte_order, nametable.abbrev_table_bytes ());
1616
1617   /* augmentation_string_size - The size in bytes of the augmentation
1618      string.  This value is rounded up to a multiple of 4.  */
1619   static_assert (sizeof (dwarf5_gdb_augmentation) % 4 == 0, "");
1620   header.append_uint (4, dwarf5_byte_order, sizeof (dwarf5_gdb_augmentation));
1621   header.append_array (dwarf5_gdb_augmentation);
1622
1623   gdb_assert (header.size () == bytes_of_header);
1624
1625   header.file_write (out_file);
1626   cu_list.file_write (out_file);
1627   types_cu_list.file_write (out_file);
1628   nametable.file_write (out_file, out_file_str);
1629
1630   assert_file_size (out_file, expected_bytes);
1631 }
1632
1633 /* This represents an index file being written (work-in-progress).
1634
1635    The data is initially written to a temporary file.  When the finalize method
1636    is called, the file is closed and moved to its final location.
1637
1638    On failure (if this object is being destroyed with having called finalize),
1639    the temporary file is closed and deleted.  */
1640
1641 struct index_wip_file
1642 {
1643   index_wip_file (const char *dir, const char *basename,
1644                   const char *suffix)
1645   {
1646     filename = (std::string (dir) + SLASH_STRING + basename
1647                 + suffix);
1648
1649     filename_temp = make_temp_filename (filename);
1650
1651     scoped_fd out_file_fd (gdb_mkostemp_cloexec (filename_temp.data (),
1652                                                  O_BINARY));
1653     if (out_file_fd.get () == -1)
1654       perror_with_name (("mkstemp"));
1655
1656     out_file = out_file_fd.to_file ("wb");
1657
1658     if (out_file == nullptr)
1659       error (_("Can't open `%s' for writing"), filename_temp.data ());
1660
1661     unlink_file.emplace (filename_temp.data ());
1662   }
1663
1664   void finalize ()
1665   {
1666     /* We want to keep the file.  */
1667     unlink_file->keep ();
1668
1669     /* Close and move the str file in place.  */
1670     unlink_file.reset ();
1671     if (rename (filename_temp.data (), filename.c_str ()) != 0)
1672       perror_with_name (("rename"));
1673   }
1674
1675   std::string filename;
1676   gdb::char_vector filename_temp;
1677
1678   /* Order matters here; we want FILE to be closed before
1679      FILENAME_TEMP is unlinked, because on MS-Windows one cannot
1680      delete a file that is still open.  So, we wrap the unlinker in an
1681      optional and emplace it once we know the file name.  */
1682   gdb::optional<gdb::unlinker> unlink_file;
1683
1684   gdb_file_up out_file;
1685 };
1686
1687 /* See dwarf-index-write.h.  */
1688
1689 void
1690 write_psymtabs_to_index (dwarf2_per_objfile *per_objfile, const char *dir,
1691                          const char *basename, const char *dwz_basename,
1692                          dw_index_kind index_kind)
1693 {
1694   dwarf2_per_bfd *per_bfd = per_objfile->per_bfd;
1695   struct objfile *objfile = per_objfile->objfile;
1696
1697   if (per_objfile->per_bfd->using_index)
1698     error (_("Cannot use an index to create the index"));
1699
1700   if (per_objfile->per_bfd->types.size () > 1)
1701     error (_("Cannot make an index when the file has multiple .debug_types sections"));
1702
1703   if (per_bfd->partial_symtabs == nullptr
1704       || !per_bfd->partial_symtabs->psymtabs
1705       || !per_bfd->partial_symtabs->psymtabs_addrmap)
1706     return;
1707
1708   struct stat st;
1709   if (stat (objfile_name (objfile), &st) < 0)
1710     perror_with_name (objfile_name (objfile));
1711
1712   const char *index_suffix = (index_kind == dw_index_kind::DEBUG_NAMES
1713                               ? INDEX5_SUFFIX : INDEX4_SUFFIX);
1714
1715   index_wip_file objfile_index_wip (dir, basename, index_suffix);
1716   gdb::optional<index_wip_file> dwz_index_wip;
1717
1718   if (dwz_basename != NULL)
1719       dwz_index_wip.emplace (dir, dwz_basename, index_suffix);
1720
1721   if (index_kind == dw_index_kind::DEBUG_NAMES)
1722     {
1723       index_wip_file str_wip_file (dir, basename, DEBUG_STR_SUFFIX);
1724
1725       write_debug_names (per_objfile, objfile_index_wip.out_file.get (),
1726                          str_wip_file.out_file.get ());
1727
1728       str_wip_file.finalize ();
1729     }
1730   else
1731     write_gdbindex (per_objfile, objfile_index_wip.out_file.get (),
1732                     (dwz_index_wip.has_value ()
1733                      ? dwz_index_wip->out_file.get () : NULL));
1734
1735   objfile_index_wip.finalize ();
1736
1737   if (dwz_index_wip.has_value ())
1738     dwz_index_wip->finalize ();
1739 }
1740
1741 /* Implementation of the `save gdb-index' command.
1742
1743    Note that the .gdb_index file format used by this command is
1744    documented in the GDB manual.  Any changes here must be documented
1745    there.  */
1746
1747 static void
1748 save_gdb_index_command (const char *arg, int from_tty)
1749 {
1750   const char dwarf5space[] = "-dwarf-5 ";
1751   dw_index_kind index_kind = dw_index_kind::GDB_INDEX;
1752
1753   if (!arg)
1754     arg = "";
1755
1756   arg = skip_spaces (arg);
1757   if (strncmp (arg, dwarf5space, strlen (dwarf5space)) == 0)
1758     {
1759       index_kind = dw_index_kind::DEBUG_NAMES;
1760       arg += strlen (dwarf5space);
1761       arg = skip_spaces (arg);
1762     }
1763
1764   if (!*arg)
1765     error (_("usage: save gdb-index [-dwarf-5] DIRECTORY"));
1766
1767   for (objfile *objfile : current_program_space->objfiles ())
1768     {
1769       struct stat st;
1770
1771       /* If the objfile does not correspond to an actual file, skip it.  */
1772       if (stat (objfile_name (objfile), &st) < 0)
1773         continue;
1774
1775       dwarf2_per_objfile *per_objfile = get_dwarf2_per_objfile (objfile);
1776
1777       if (per_objfile != NULL)
1778         {
1779           try
1780             {
1781               const char *basename = lbasename (objfile_name (objfile));
1782               const dwz_file *dwz = dwarf2_get_dwz_file (per_objfile->per_bfd);
1783               const char *dwz_basename = NULL;
1784
1785               if (dwz != NULL)
1786                 dwz_basename = lbasename (dwz->filename ());
1787
1788               write_psymtabs_to_index (per_objfile, arg, basename, dwz_basename,
1789                                        index_kind);
1790             }
1791           catch (const gdb_exception_error &except)
1792             {
1793               exception_fprintf (gdb_stderr, except,
1794                                  _("Error while writing index for `%s': "),
1795                                  objfile_name (objfile));
1796             }
1797             }
1798
1799     }
1800 }
1801
1802 void _initialize_dwarf_index_write ();
1803 void
1804 _initialize_dwarf_index_write ()
1805 {
1806   cmd_list_element *c = add_cmd ("gdb-index", class_files,
1807                                  save_gdb_index_command, _("\
1808 Save a gdb-index file.\n\
1809 Usage: save gdb-index [-dwarf-5] DIRECTORY\n\
1810 \n\
1811 No options create one file with .gdb-index extension for pre-DWARF-5\n\
1812 compatible .gdb_index section.  With -dwarf-5 creates two files with\n\
1813 extension .debug_names and .debug_str for DWARF-5 .debug_names section."),
1814                &save_cmdlist);
1815   set_cmd_completer (c, filename_completer);
1816 }