1 /* hash.c - hash table lookup strings -
2 Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
4 This file is part of GAS, the GNU Assembler.
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
21 * BUGS, GRIPES, APOLOGIA etc.
23 * A typical user doesn't need ALL this: I intend to make a library out
24 * of it one day - Dean Elsner.
25 * Also, I want to change the definition of a symbol to (address,length)
26 * so I can put arbitrary binary in the names stored. [see hsh.c for that]
28 * This slime is common coupled inside the module. Com-coupling (and other
29 * vandalism) was done to speed running time. The interfaces at the
30 * module's edges are adequately clean.
32 * There is no way to (a) run a test script through this heap and (b)
33 * compare results with previous scripts, to see if we have broken any
34 * code. Use GNU (f)utilities to do this. A few commands assist test.
35 * The testing is awkward: it tries to be both batch & interactive.
36 * For now, interactive rules!
40 * The idea is to implement a symbol table. A test jig is here.
41 * Symbols are arbitrary strings; they can't contain '\0'.
42 * [See hsh.c for a more general symbol flavour.]
43 * Each symbol is associated with a char*, which can point to anything
44 * you want, allowing an arbitrary property list for each symbol.
46 * The basic operations are:
48 * new creates symbol table, returns handle
49 * find (symbol) returns char*
50 * insert (symbol,char*) error if symbol already in table
51 * delete (symbol) returns char* if symbol was in table
52 * apply so you can delete all symbols before die()
53 * die destroy symbol table (free up memory)
55 * Supplementary functions include:
57 * say how big? what % full?
58 * replace (symbol,newval) report previous value
59 * jam (symbol,value) assert symbol:=value
61 * You, the caller, have control over errors: this just reports them.
63 * This package requires malloc(), free().
64 * Malloc(size) returns NULL or address of char[size].
65 * Free(address) frees same.
69 * The code and its structures are re-enterent.
71 * Before you do anything else, you must call hash_new() which will
72 * return the address of a hash-table-control-block. You then use
73 * this address as a handle of the symbol table by passing it to all
74 * the other hash_...() functions. The only approved way to recover
75 * the memory used by the symbol table is to call hash_die() with the
76 * handle of the symbol table.
78 * Before you call hash_die() you normally delete anything pointed to
79 * by individual symbols. After hash_die() you can't use that symbol
82 * The char* you associate with a symbol may not be NULL (0) because
83 * NULL is returned whenever a symbol is not in the table. Any other
84 * value is OK, except DELETED, #defined below.
86 * When you supply a symbol string for insertion, YOU MUST PRESERVE THE
87 * STRING until that symbol is deleted from the table. The reason is that
88 * only the address you supply, NOT the symbol string itself, is stored
89 * in the symbol table.
91 * You may delete and add symbols arbitrarily.
92 * Any or all symbols may have the same 'value' (char *). In fact, these
93 * routines don't do anything with your symbol values.
95 * You have no right to know where the symbol:char* mapping is stored,
96 * because it moves around in memory; also because we may change how it
97 * works and we don't want to break your code do we? However the handle
98 * (address of struct hash_control) is never changed in
99 * the life of the symbol table.
101 * What you CAN find out about a symbol table is:
102 * how many slots are in the hash table?
103 * how many slots are filled with symbols?
104 * (total hashes,collisions) for (reads,writes) (*)
105 * All of the above values vary in time.
106 * (*) some of these numbers will not be meaningful if we change the
112 * Hash table is an array of hash_entries; each entry is a pointer to a
113 * a string and a user-supplied value 1 char* wide.
115 * The array always has 2 ** n elements, n>0, n integer.
116 * There is also a 'wall' entry after the array, which is always empty
117 * and acts as a sentinel to stop running off the end of the array.
118 * When the array gets too full, we create a new array twice as large
119 * and re-hash the symbols into the new array, then forget the old array.
120 * (Of course, we copy the values into the new array before we junk the
129 #define TRUE (!FALSE)
130 #endif /* no FALSE yet */
133 #define min(a, b) ((a) < (b) ? (a) : (b))
137 #define error as_fatal
139 #define DELETED ((PTR)1) /* guarenteed invalid address */
140 #define START_POWER (11) /* power of two: size of new hash table */
142 /* TRUE if a symbol is in entry @ ptr. */
143 #define islive(ptr) (ptr->hash_string && ptr->hash_string!=DELETED)
145 /* Number of slots in hash table. The wall does not count here.
146 We expect this is always a power of 2. */
147 #define STAT_SIZE (0)
148 #define STAT_ACCESS (1) /* number of hash_ask()s */
149 #define STAT__READ (0) /* reading */
150 #define STAT__WRITE (1) /* writing */
151 /* Number of collisions (total). This may exceed STAT_ACCESS if we
152 have lots of collisions/access. */
153 #define STAT_COLLIDE (3)
154 #define STAT_USED (5) /* slots used right now */
155 #define STATLENGTH (6) /* size of statistics block */
156 #if STATLENGTH != HASH_STATLENGTH
157 Panic
! Please make
#include "stat.h" agree with previous definitions!
160 /* #define SUSPECT to do runtime checks */
161 /* #define TEST to be a test jig for hash...() */
164 /* TEST: use smaller hash table */
166 #define START_POWER (3)
168 #define START_SIZE (8)
170 #define START_FULL (4)
173 /*------------------ plan ---------------------------------- i = internal
175 struct hash_control * c;
176 struct hash_entry * e; i
177 int b[z]; buffer for statistics
179 char * s; symbol string (address) [ key ]
180 char * v; value string (address) [datum]
181 boolean f; TRUE if we found s in hash table i
182 char * t; error string; 0 means OK
183 int a; access type [0...n) i
185 c=hash_new () create new hash_control
187 hash_die (c) destroy hash_control (and hash table)
188 table should be empty.
189 doesn't check if table is empty.
190 c has no meaning after this.
192 hash_say (c,b,z) report statistics of hash_control.
193 also report number of available statistics.
195 v=hash_delete (c,s) delete symbol, return old value if any.
196 ask() NULL means no old value.
199 v=hash_replace (c,s,v) replace old value of s with v.
200 ask() NULL means no old value: no table change.
203 t=hash_insert (c,s,v) insert (s,v) in c.
204 ask() return error string.
205 f it is an error to insert if s is already
207 if any error, c is unchanged.
209 t=hash_jam (c,s,v) assert that new value of s will be v. i
210 ask() it may decide to GROW the table. i
213 t=hash_grow (c) grow the hash table. i
214 jam() will invoke JAM. i
216 ?=hash_apply (c,y) apply y() to every symbol in c.
217 y evtries visited in 'unspecified' order.
219 v=hash_find (c,s) return value of s, or NULL if s not in c.
223 f,e=hash_ask() (c,s,a) return slot where s SHOULD live. i
224 code() maintain collision stats in c. i
226 .=hash_code (c,s) compute hash-code for s, i
227 from parameters of c. i
231 /* Returned by hash_ask() to stop extra testing. hash_ask() wants to
232 return both a slot and a status. This is the status. TRUE: found
233 symbol FALSE: absent: empty or deleted slot Also returned by
234 hash_jam(). TRUE: we replaced a value FALSE: we inserted a value. */
235 static char hash_found
;
237 static struct hash_entry
*hash_ask
PARAMS ((struct hash_control
*,
239 static int hash_code
PARAMS ((struct hash_control
*, const char *));
240 static const char *hash_grow
PARAMS ((struct hash_control
*));
242 /* Create a new hash table. Return NULL if failed; otherwise return handle
243 (address of struct hash). */
244 struct hash_control
*
247 struct hash_control
*retval
;
248 struct hash_entry
*room
; /* points to hash table */
249 struct hash_entry
*wall
;
250 struct hash_entry
*entry
;
251 int *ip
; /* scan stats block of struct hash_control */
252 int *nd
; /* limit of stats block */
254 room
= (struct hash_entry
*) xmalloc (sizeof (struct hash_entry
)
255 /* +1 for the wall entry */
256 * ((1 << START_POWER
) + 1));
257 retval
= (struct hash_control
*) xmalloc (sizeof (struct hash_control
));
259 nd
= retval
->hash_stat
+ STATLENGTH
;
260 for (ip
= retval
->hash_stat
; ip
< nd
; ip
++)
263 retval
->hash_stat
[STAT_SIZE
] = 1 << START_POWER
;
264 retval
->hash_mask
= (1 << START_POWER
) - 1;
265 retval
->hash_sizelog
= START_POWER
;
266 /* works for 1's compl ok */
267 retval
->hash_where
= room
;
269 wall
= room
+ (1 << START_POWER
);
270 retval
->hash_full
= (1 << START_POWER
) / 2;
271 for (entry
= room
; entry
<= wall
; entry
++)
272 entry
->hash_string
= NULL
;
277 * h a s h _ d i e ( )
279 * Table should be empty, but this is not checked.
280 * To empty the table, try hash_apply()ing a symbol deleter.
281 * Return to free memory both the hash table and it's control
283 * 'handle' has no meaning after this function.
284 * No errors are recoverable.
288 struct hash_control
*handle
;
290 free ((char *) handle
->hash_where
);
291 free ((char *) handle
);
295 * h a s h _ s a y ( )
297 * Return the size of the statistics table, and as many statistics as
298 * we can until either (a) we have run out of statistics or (b) caller
299 * has run out of buffer.
300 * NOTE: hash_say treats all statistics alike.
301 * These numbers may change with time, due to insertions, deletions
302 * and expansions of the table.
303 * The first "statistic" returned is the length of hash_stat[].
304 * Then contents of hash_stat[] are read out (in ascending order)
305 * until your buffer or hash_stat[] is exausted.
308 hash_say (handle
, buffer
, bufsiz
)
309 struct hash_control
*handle
;
310 int buffer
[ /*bufsiz*/ ];
313 int *nd
; /* limit of statistics block */
314 int *ip
; /* scan statistics */
316 ip
= handle
->hash_stat
;
317 nd
= ip
+ min (bufsiz
- 1, STATLENGTH
);
318 if (bufsiz
> 0) /* trust nothing! bufsiz<=0 is dangerous */
320 *buffer
++ = STATLENGTH
;
321 for (; ip
< nd
; ip
++, buffer
++)
329 * h a s h _ d e l e t e ( )
331 * Try to delete a symbol from the table.
332 * If it was there, return its value (and adjust STAT_USED).
333 * Otherwise, return NULL.
334 * Anyway, the symbol is not present after this function.
337 PTR
/* NULL if string not in table, else */
338 /* returns value of deleted symbol */
339 hash_delete (handle
, string
)
340 struct hash_control
*handle
;
344 struct hash_entry
*entry
;
346 entry
= hash_ask (handle
, string
, STAT__WRITE
);
349 retval
= entry
->hash_value
;
350 entry
->hash_string
= DELETED
;
351 handle
->hash_stat
[STAT_USED
] -= 1;
353 if (handle
->hash_stat
[STAT_USED
] < 0)
355 error ("hash_delete");
357 #endif /* def SUSPECT */
367 * h a s h _ r e p l a c e ( )
369 * Try to replace the old value of a symbol with a new value.
370 * Normally return the old value.
371 * Return NULL and don't change the table if the symbol is not already
375 hash_replace (handle
, string
, value
)
376 struct hash_control
*handle
;
380 struct hash_entry
*entry
;
383 entry
= hash_ask (handle
, string
, STAT__WRITE
);
386 retval
= entry
->hash_value
;
387 entry
->hash_value
= value
;
398 * h a s h _ i n s e r t ( )
400 * Insert a (symbol-string, value) into the hash table.
401 * Return an error string, 0 means OK.
402 * It is an 'error' to insert an existing symbol.
405 const char * /* return error string */
406 hash_insert (handle
, string
, value
)
407 struct hash_control
*handle
;
411 struct hash_entry
*entry
;
415 if (handle
->hash_stat
[STAT_USED
] > handle
->hash_full
)
417 retval
= hash_grow (handle
);
421 entry
= hash_ask (handle
, string
, STAT__WRITE
);
428 entry
->hash_value
= value
;
429 entry
->hash_string
= string
;
430 handle
->hash_stat
[STAT_USED
] += 1;
437 * h a s h _ j a m ( )
439 * Regardless of what was in the symbol table before, after hash_jam()
440 * the named symbol has the given value. The symbol is either inserted or
441 * (its value is) relpaced.
442 * An error message string is returned, 0 means OK.
444 * WARNING: this may decide to grow the hashed symbol table.
445 * To do this, we call hash_grow(), WHICH WILL recursively CALL US.
447 * We report status internally: hash_found is TRUE if we replaced, but
448 * false if we inserted.
451 hash_jam (handle
, string
, value
)
452 struct hash_control
*handle
;
457 struct hash_entry
*entry
;
460 if (handle
->hash_stat
[STAT_USED
] > handle
->hash_full
)
462 retval
= hash_grow (handle
);
466 entry
= hash_ask (handle
, string
, STAT__WRITE
);
469 entry
->hash_string
= string
;
470 handle
->hash_stat
[STAT_USED
] += 1;
472 entry
->hash_value
= value
;
478 * h a s h _ g r o w ( )
480 * Grow a new (bigger) hash table from the old one.
481 * We choose to double the hash table's size.
482 * Return a human-scrutible error string: 0 if OK.
483 * Warning! This uses hash_jam(), which had better not recurse
484 * back here! Hash_jam() conditionally calls us, but we ALWAYS
489 hash_grow (handle
) /* make a hash table grow */
490 struct hash_control
*handle
;
492 struct hash_entry
*newwall
;
493 struct hash_entry
*newwhere
;
494 struct hash_entry
*newtrack
;
495 struct hash_entry
*oldtrack
;
496 struct hash_entry
*oldwhere
;
497 struct hash_entry
*oldwall
;
507 * capture info about old hash table
509 oldwhere
= handle
->hash_where
;
510 oldwall
= handle
->hash_wall
;
512 oldused
= handle
->hash_stat
[STAT_USED
];
515 * attempt to get enough room for a hash table twice as big
517 temp
= handle
->hash_stat
[STAT_SIZE
];
518 if ((newwhere
= ((struct hash_entry
*)
519 xmalloc ((unsigned long) ((temp
+ temp
+ 1)
520 * sizeof (struct hash_entry
)))))
522 /* +1 for wall slot */
524 retval
= 0; /* assume success until proven otherwise */
526 * have enough room: now we do all the work.
527 * double the size of everything in handle,
528 * note: hash_mask frob works for 1's & for 2's complement machines
530 handle
->hash_mask
= handle
->hash_mask
+ handle
->hash_mask
+ 1;
531 handle
->hash_stat
[STAT_SIZE
] <<= 1;
532 newsize
= handle
->hash_stat
[STAT_SIZE
];
533 handle
->hash_where
= newwhere
;
534 handle
->hash_full
<<= 1;
535 handle
->hash_sizelog
+= 1;
536 handle
->hash_stat
[STAT_USED
] = 0;
538 newwall
= newwhere
+ newsize
;
540 * set all those pesky new slots to vacant.
542 for (newtrack
= newwhere
; newtrack
<= newwall
; newtrack
++)
544 newtrack
->hash_string
= NULL
;
547 * we will do a scan of the old table, the hard way, using the
548 * new control block to re-insert the data into new hash table.
550 handle
->hash_stat
[STAT_USED
] = 0; /* inserts will bump it up to correct */
551 for (oldtrack
= oldwhere
; oldtrack
< oldwall
; oldtrack
++)
552 if (((string
= oldtrack
->hash_string
) != NULL
) && string
!= DELETED
)
553 if ((retval
= hash_jam (handle
, string
, oldtrack
->hash_value
)))
557 if (!retval
&& handle
->hash_stat
[STAT_USED
] != oldused
)
559 retval
= "hash_used";
565 * we have a completely faked up control block.
566 * return the old hash table.
568 free ((char *) oldwhere
);
570 * Here with success. retval is already 0.
582 * h a s h _ a p p l y ( )
584 * Use this to scan each entry in symbol table.
585 * For each symbol, this calls (applys) a nominated function supplying the
586 * symbol's value (and the symbol's name).
587 * The idea is you use this to destroy whatever is associted with
588 * any values in the table BEFORE you destroy the table with hash_die.
589 * Of course, you can use it for other jobs; whenever you need to
590 * visit all extant symbols in the table.
592 * We choose to have a call-you-back idea for two reasons:
593 * asthetic: it is a neater idea to use apply than an explicit loop
594 * sensible: if we ever had to grow the symbol table (due to insertions)
595 * then we would lose our place in the table when we re-hashed
596 * symbols into the new table in a different order.
598 * The order symbols are visited depends entirely on the hashing function.
599 * Whenever you insert a (symbol, value) you risk expanding the table. If
600 * you do expand the table, then the hashing function WILL change, so you
601 * MIGHT get a different order of symbols visited. In other words, if you
602 * want the same order of visiting symbols as the last time you used
603 * hash_apply() then you better not have done any hash_insert()s or
604 * hash_jam()s since the last time you used hash_apply().
606 * In future we may use the value returned by your nominated function.
607 * One idea is to abort the scan if, after applying the function to a
608 * certain node, the function returns a certain code.
609 * To be safe, please make your functions of type char *. If you always
610 * return NULL, then the scan will complete, visiting every symbol in
611 * the table exactly once. ALL OTHER RETURNED VALUES have no meaning yet!
614 * The function you supply should be of the form:
615 * char * myfunct(string,value)
616 * char * string; |* the symbol's name *|
617 * char * value; |* the symbol's value *|
623 * The returned value of hash_apply() is (char*)NULL. In future it may return
624 * other values. NULL means "completed scan OK". Other values have no meaning
625 * yet. (The function has no graceful failures.)
628 hash_apply (handle
, function
)
629 struct hash_control
*handle
;
630 char *(*function
) ();
632 struct hash_entry
*entry
;
633 struct hash_entry
*wall
;
635 wall
= handle
->hash_wall
;
636 for (entry
= handle
->hash_where
; entry
< wall
; entry
++)
638 if (islive (entry
)) /* silly code: tests entry->string twice! */
640 (*function
) (entry
->hash_string
, entry
->hash_value
);
647 * h a s h _ f i n d ( )
649 * Given symbol string, find value (if any).
650 * Return found value or NULL.
653 hash_find (handle
, string
)
654 struct hash_control
*handle
;
657 struct hash_entry
*entry
;
659 entry
= hash_ask (handle
, string
, STAT__READ
);
661 return entry
->hash_value
;
667 * h a s h _ a s k ( )
669 * Searches for given symbol string.
670 * Return the slot where it OUGHT to live. It may be there.
671 * Return hash_found: TRUE only if symbol is in that slot.
672 * Access argument is to help keep statistics in control block.
675 static struct hash_entry
* /* string slot, may be empty or deleted */
676 hash_ask (handle
, string
, access
)
677 struct hash_control
*handle
;
679 int access
; /* access type */
682 struct hash_entry
*slot
;
683 int collision
; /* count collisions */
685 /* start looking here */
686 slot
= handle
->hash_where
+ hash_code (handle
, string
);
688 handle
->hash_stat
[STAT_ACCESS
+ access
] += 1;
691 while (((s
= slot
->hash_string
) != NULL
) && s
!= DELETED
)
693 if (string
== s
|| !strcmp (string
, s
))
702 * in use: we found string slot
704 * at wall: we fell off: wrap round ????
705 * in table: dig here slot
706 * at DELETED: dig here slot
708 if (slot
== handle
->hash_wall
)
710 slot
= handle
->hash_where
;/* now look again */
711 while (((s
= slot
->hash_string
) != NULL
) && s
!= DELETED
)
713 if (string
== s
|| !strcmp (string
, s
))
720 * in use: we found it slot
721 * empty: wall: ERROR IMPOSSIBLE !!!!
722 * in table: dig here slot
723 * DELETED:dig here slot
726 handle
->hash_stat
[STAT_COLLIDE
+ access
] += collision
;
727 return (slot
); /* also return hash_found */
733 * Does hashing of symbol string to hash number.
737 hash_code (handle
, string
)
738 struct hash_control
*handle
;
741 #if 1 /* There seems to be some interesting property of this function
742 that prevents the bfd version below from being an adequate
743 substitute. @@ Figure out what this property is! */
744 long h
; /* hash code built here */
745 long c
; /* each character lands here */
746 int n
; /* Amount to shift h by */
748 n
= (handle
->hash_sizelog
- 3);
750 while ((c
= *string
++) != 0)
753 h
= (h
<< 3) + (h
>> n
) + c
;
755 return (h
& handle
->hash_mask
);
759 unsigned int len
= 0;
762 while ((c
= *string
++) != 0)
768 h
+= len
+ (len
<< 17);
770 return h
& handle
->hash_mask
;
775 * Here is a test program to exercise above.
779 #define TABLES (6) /* number of hash tables to maintain */
780 /* (at once) in any testing */
781 #define STATBUFSIZE (12) /* we can have 12 statistics */
783 int statbuf
[STATBUFSIZE
]; /* display statistics here */
784 char answer
[100]; /* human farts here */
785 char *hashtable
[TABLES
]; /* we test many hash tables at once */
786 char *h
; /* points to curent hash_control */
794 int number
; /* number 0:TABLES-1 of current hashed */
799 char (*applicatee ());
806 printf ("type h <RETURN> for help\n");
809 printf ("hash_test command: ");
812 if (isupper (command
))
813 command
= tolower (command
); /* ecch! */
817 printf ("old hash table #=%d.\n", number
);
821 for (pp
= hashtable
; pp
< hashtable
+ TABLES
; pp
++)
823 printf ("address of hash table #%d control block is %xx\n"
824 ,pp
- hashtable
, *pp
);
828 hash_apply (h
, applicatee
);
831 hash_apply (h
, destroy
);
835 p
= hash_find (h
, name
= what ("symbol"));
836 printf ("value of \"%s\" is \"%s\"\n", name
, p
? p
: "NOT-PRESENT");
839 printf ("# show old, select new default hash table number\n");
840 printf ("? display all hashtable control block addresses\n");
841 printf ("a apply a simple display-er to each symbol in table\n");
842 printf ("d die: destroy hashtable\n");
843 printf ("f find value of nominated symbol\n");
844 printf ("h this help\n");
845 printf ("i insert value into symbol\n");
846 printf ("j jam value into symbol\n");
847 printf ("n new hashtable\n");
848 printf ("r replace a value with another\n");
849 printf ("s say what %% of table is used\n");
850 printf ("q exit this program\n");
851 printf ("x delete a symbol from table, report its value\n");
854 p
= hash_insert (h
, name
= what ("symbol"), value
= what ("value"));
857 printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name
, value
,
862 p
= hash_jam (h
, name
= what ("symbol"), value
= what ("value"));
865 printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name
, value
, p
);
869 h
= hashtable
[number
] = (char *) hash_new ();
874 p
= hash_replace (h
, name
= what ("symbol"), value
= what ("value"));
875 printf ("old value was \"%s\"\n", p
? p
: "{}");
878 hash_say (h
, statbuf
, STATBUFSIZE
);
879 for (ip
= statbuf
; ip
< statbuf
+ STATBUFSIZE
; ip
++)
886 p
= hash_delete (h
, name
= what ("symbol"));
887 printf ("old value was \"%s\"\n", p
? p
: "{}");
890 printf ("I can't understand command \"%c\"\n", command
);
903 printf (" %s : ", description
);
905 /* will one day clean up answer here */
906 retval
= malloc (strlen (answer
) + 1);
911 (void) strcpy (retval
, answer
);
916 destroy (string
, value
)
927 applicatee (string
, value
)
931 printf ("%.20s-%.20s\n", string
, value
);
935 whattable () /* determine number: what hash table to use */
936 /* also determine h: points to hash_control */
941 printf (" what hash table (%d:%d) ? ", 0, TABLES
- 1);
943 sscanf (answer
, "%d", &number
);
944 if (number
>= 0 && number
< TABLES
)
946 h
= hashtable
[number
];
949 printf ("warning: current hash-table-#%d. has no hash-control\n", number
);
955 printf ("invalid hash table number: %d\n", number
);
962 #endif /* #ifdef TEST */
This page took 0.053108 seconds and 4 git commands to generate.