X-Git-Url: http://git.efficios.com/?a=blobdiff_plain;f=gdb%2Fbcache.h;h=fb37ec9e43e0fc56373d481db20cade8eebc9556;hb=c2d6af84da44465f30e3bb487721128dfe03d0e4;hp=5355e7481c28fd5ed006df8a6b2af052d802acb3;hpb=27e232885db363fb545fd2f450e72d929e59b8f6;p=deliverable%2Fbinutils-gdb.git diff --git a/gdb/bcache.h b/gdb/bcache.h index 5355e7481c..fb37ec9e43 100644 --- a/gdb/bcache.h +++ b/gdb/bcache.h @@ -1,13 +1,14 @@ /* Include file cached obstack implementation. Written by Fred Fish Rewritten by Jim Blandy - Copyright 1999 Free Software Foundation, Inc. + + Copyright (C) 1999-2013 Free Software Foundation, Inc. This file is part of GDB. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or + the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -16,9 +17,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ + along with this program. If not, see . */ #ifndef BCACHE_H #define BCACHE_H 1 @@ -47,84 +46,131 @@ You shouldn't modify the strings you get from a bcache, because: - You don't necessarily know who you're sharing space with. If I - stick eight bytes of text in a bcache, and then stick an - eight-byte structure in the same bcache, there's no guarantee - those two objects don't actually comprise the same sequence of - bytes. If they happen to, the bcache will use a single byte - string for both of them. Then, modifying the structure will - change the string. In bizarre ways. + stick eight bytes of text in a bcache, and then stick an eight-byte + structure in the same bcache, there's no guarantee those two + objects don't actually comprise the same sequence of bytes. If + they happen to, the bcache will use a single byte string for both + of them. Then, modifying the structure will change the string. In + bizarre ways. - Even if you know for some other reason that all that's okay, - there's another problem. A bcache stores all its strings in a - hash table. If you modify a string's contents, you will probably - change its hash value. This means that the modified string is - now in the wrong place in the hash table, and future bcache - probes will never find it. So by mutating a string, you give up - any chance of sharing its space with future duplicates. */ - - -/* The type used to hold a single bcache string. The user data is - stored in d.data. Since it can be any type, it needs to have the - same alignment as the most strict alignment of any type on the host - machine. I don't know of any really correct way to do this in - stock ANSI C, so just do it the same way obstack.h does. - - It would be nicer to have this stuff hidden away in bcache.c, but - struct objstack contains a struct bcache directly --- not a pointer - to one --- and then the memory-mapped stuff makes this a real pain. - We don't strictly need to expose struct bstring, but it's better to - have it all in one place. */ - -struct bstring { - struct bstring *next; - size_t length; - - union - { - char data[1]; - double dummy; - } - d; -}; - - -/* The structure for a bcache itself. - To initialize a bcache, just fill it with zeros. */ -struct bcache { - /* All the bstrings are allocated here. */ - struct obstack cache; - - /* How many hash buckets we're using. */ - unsigned int num_buckets; + there's another problem. A bcache stores all its strings in a hash + table. If you modify a string's contents, you will probably change + its hash value. This means that the modified string is now in the + wrong place in the hash table, and future bcache probes will never + find it. So by mutating a string, you give up any chance of + sharing its space with future duplicates. + + + Size of bcache VS hashtab: + + For bcache, the most critical cost is size (or more exactly the + overhead added by the bcache). It turns out that the bcache is + remarkably efficient. + + Assuming a 32-bit system (the hash table slots are 4 bytes), + ignoring alignment, and limit strings to 255 bytes (1 byte length) + we get ... + + bcache: This uses a separate linked list to track the hash chain. + The numbers show roughly 100% occupancy of the hash table and an + average chain length of 4. Spreading the slot cost over the 4 + chain elements: + + 4 (slot) / 4 (chain length) + 1 (length) + 4 (chain) = 6 bytes + + hashtab: This uses a more traditional re-hash algorithm where the + chain is maintained within the hash table. The table occupancy is + kept below 75% but we'll assume its perfect: + + 4 (slot) x 4/3 (occupancy) + 1 (length) = 6 1/3 bytes + + So a perfect hashtab has just slightly larger than an average + bcache. + + It turns out that an average hashtab is far worse. Two things + hurt: + + - Hashtab's occupancy is more like 50% (it ranges between 38% and + 75%) giving a per slot cost of 4x2 vs 4x4/3. + + - the string structure needs to be aligned to 8 bytes which for + hashtab wastes 7 bytes, while for bcache wastes only 3. + + This gives: + + hashtab: 4 x 2 + 1 + 7 = 16 bytes + + bcache 4 / 4 + 1 + 4 + 3 = 9 bytes + + The numbers of GDB debugging GDB support this. ~40% vs ~70% overhead. + + + Speed of bcache VS hashtab (the half hash hack): + + While hashtab has a typical chain length of 1, bcache has a chain + length of round 4. This means that the bcache will require + something like double the number of compares after that initial + hash. In both cases the comparison takes the form: + + a.length == b.length && memcmp (a.data, b.data, a.length) == 0 + + That is lengths are checked before doing the memcmp. + + For GDB debugging GDB, it turned out that all lengths were 24 bytes + (no C++ so only psymbols were cached) and hence, all compares + required a call to memcmp. As a hack, two bytes of padding + (mentioned above) are used to store the upper 16 bits of the + string's hash value and then that is used in the comparison vis: + + a.half_hash == b.half_hash && a.length == b.length && memcmp + (a.data, b.data, a.length) + + The numbers from GDB debugging GDB show this to be a remarkable + 100% effective (only necessary length and memcmp tests being + performed). + + Mind you, looking at the wall clock, the same GDB debugging GDB + showed only marginal speed up (0.780 vs 0.773s). Seems GDB is too + busy doing something else :-( - /* Hash buckets. This table is allocated using malloc, so when we - grow the table we can return the old table to the system. */ - struct bstring **bucket; +*/ - /* Statistics. */ - unsigned long unique_count; /* number of unique strings */ - long total_count; /* total number of strings cached, including dups */ - long unique_size; /* size of unique strings, in bytes */ - long total_size; /* total number of bytes cached, including dups */ - long structure_size; /* total size of bcache, including infrastructure */ -}; +struct bcache; /* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has never seen those bytes before, add a copy of them to BCACHE. In - either case, return a pointer to BCACHE's copy of that string. */ -extern void *bcache (void *addr, int length, struct bcache *bcache); - -/* Free all the storage that BCACHE refers to. The result is a valid, - but empty, bcache. This does not free BCACHE itself, since that - might be part of some larger object. */ -extern void free_bcache (struct bcache *bcache); + either case, return a pointer to BCACHE's copy of that string. + Since the cached value is ment to be read-only, return a const + buffer. */ +extern const void *bcache (const void *addr, int length, + struct bcache *bcache); + +/* Like bcache, but if ADDED is not NULL, set *ADDED to true if the + bytes were newly added to the cache, or to false if the bytes were + found in the cache. */ +extern const void *bcache_full (const void *addr, int length, + struct bcache *bcache, int *added); + +/* Free all the storage used by BCACHE. */ +extern void bcache_xfree (struct bcache *bcache); + +/* Create a new bcache object. */ +extern struct bcache *bcache_xmalloc ( + unsigned long (*hash_function)(const void *, int length), + int (*compare_function)(const void *, const void *, int length)); /* Print statistics on BCACHE's memory usage and efficacity at eliminating duplication. TYPE should be a string describing the kind of data BCACHE holds. Statistics are printed using `printf_filtered' and its ilk. */ extern void print_bcache_statistics (struct bcache *bcache, char *type); -/* The hash function */ -extern unsigned long hash(void *addr, int length); +extern int bcache_memory_used (struct bcache *bcache); + +/* The hash functions */ +extern unsigned long hash(const void *addr, int length); +extern unsigned long hash_continue (const void *addr, int length, + unsigned long h); + #endif /* BCACHE_H */