Commit | Line | Data |
---|---|---|
252b5132 RH |
1 | /* hash.c - hash table lookup strings - |
2 | Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 96, 1998 | |
3 | Free Software Foundation, Inc. | |
4 | ||
5 | This file is part of GAS, the GNU Assembler. | |
6 | ||
7 | GAS is free software; you can redistribute it and/or modify | |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GAS is distributed in the hope that it will be useful, | |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GAS; see the file COPYING. If not, write to | |
19 | the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | /* | |
22 | * BUGS, GRIPES, APOLOGIA etc. | |
23 | * | |
24 | * A typical user doesn't need ALL this: I intend to make a library out | |
25 | * of it one day - Dean Elsner. | |
26 | * Also, I want to change the definition of a symbol to (address,length) | |
27 | * so I can put arbitrary binary in the names stored. [see hsh.c for that] | |
28 | * | |
29 | * This slime is common coupled inside the module. Com-coupling (and other | |
30 | * vandalism) was done to speed running time. The interfaces at the | |
31 | * module's edges are adequately clean. | |
32 | * | |
33 | * There is no way to (a) run a test script through this heap and (b) | |
34 | * compare results with previous scripts, to see if we have broken any | |
35 | * code. Use GNU (f)utilities to do this. A few commands assist test. | |
36 | * The testing is awkward: it tries to be both batch & interactive. | |
37 | * For now, interactive rules! | |
38 | */ | |
39 | \f | |
40 | /* | |
41 | * The idea is to implement a symbol table. A test jig is here. | |
42 | * Symbols are arbitrary strings; they can't contain '\0'. | |
43 | * [See hsh.c for a more general symbol flavour.] | |
44 | * Each symbol is associated with a char*, which can point to anything | |
45 | * you want, allowing an arbitrary property list for each symbol. | |
46 | * | |
47 | * The basic operations are: | |
48 | * | |
49 | * new creates symbol table, returns handle | |
50 | * find (symbol) returns char* | |
51 | * insert (symbol,char*) error if symbol already in table | |
52 | * delete (symbol) returns char* if symbol was in table | |
53 | * apply so you can delete all symbols before die() | |
54 | * die destroy symbol table (free up memory) | |
55 | * | |
56 | * Supplementary functions include: | |
57 | * | |
58 | * say how big? what % full? | |
59 | * replace (symbol,newval) report previous value | |
60 | * jam (symbol,value) assert symbol:=value | |
61 | * | |
62 | * You, the caller, have control over errors: this just reports them. | |
63 | * | |
64 | * This package requires malloc(), free(). | |
65 | * Malloc(size) returns NULL or address of char[size]. | |
66 | * Free(address) frees same. | |
67 | */ | |
68 | \f | |
69 | /* | |
70 | * The code and its structures are re-enterent. | |
71 | * | |
72 | * Before you do anything else, you must call hash_new() which will | |
73 | * return the address of a hash-table-control-block. You then use | |
74 | * this address as a handle of the symbol table by passing it to all | |
75 | * the other hash_...() functions. The only approved way to recover | |
76 | * the memory used by the symbol table is to call hash_die() with the | |
77 | * handle of the symbol table. | |
78 | * | |
79 | * Before you call hash_die() you normally delete anything pointed to | |
80 | * by individual symbols. After hash_die() you can't use that symbol | |
81 | * table again. | |
82 | * | |
83 | * The char* you associate with a symbol may not be NULL (0) because | |
84 | * NULL is returned whenever a symbol is not in the table. Any other | |
85 | * value is OK, except DELETED, #defined below. | |
86 | * | |
87 | * When you supply a symbol string for insertion, YOU MUST PRESERVE THE | |
88 | * STRING until that symbol is deleted from the table. The reason is that | |
89 | * only the address you supply, NOT the symbol string itself, is stored | |
90 | * in the symbol table. | |
91 | * | |
92 | * You may delete and add symbols arbitrarily. | |
93 | * Any or all symbols may have the same 'value' (char *). In fact, these | |
94 | * routines don't do anything with your symbol values. | |
95 | * | |
96 | * You have no right to know where the symbol:char* mapping is stored, | |
97 | * because it moves around in memory; also because we may change how it | |
98 | * works and we don't want to break your code do we? However the handle | |
99 | * (address of struct hash_control) is never changed in | |
100 | * the life of the symbol table. | |
101 | * | |
102 | * What you CAN find out about a symbol table is: | |
103 | * how many slots are in the hash table? | |
104 | * how many slots are filled with symbols? | |
105 | * (total hashes,collisions) for (reads,writes) (*) | |
106 | * All of the above values vary in time. | |
107 | * (*) some of these numbers will not be meaningful if we change the | |
108 | * internals. */ | |
109 | \f | |
110 | /* | |
111 | * I N T E R N A L | |
112 | * | |
113 | * Hash table is an array of hash_entries; each entry is a pointer to a | |
114 | * a string and a user-supplied value 1 char* wide. | |
115 | * | |
116 | * The array always has 2 ** n elements, n>0, n integer. | |
117 | * There is also a 'wall' entry after the array, which is always empty | |
118 | * and acts as a sentinel to stop running off the end of the array. | |
119 | * When the array gets too full, we create a new array twice as large | |
120 | * and re-hash the symbols into the new array, then forget the old array. | |
121 | * (Of course, we copy the values into the new array before we junk the | |
122 | * old array!) | |
123 | * | |
124 | */ | |
125 | ||
126 | #include <stdio.h> | |
127 | ||
128 | #ifndef FALSE | |
129 | #define FALSE (0) | |
130 | #define TRUE (!FALSE) | |
131 | #endif /* no FALSE yet */ | |
132 | ||
133 | #include <ctype.h> | |
134 | #define min(a, b) ((a) < (b) ? (a) : (b)) | |
135 | ||
136 | #include "as.h" | |
137 | ||
138 | #define error as_fatal | |
139 | ||
140 | static char _deleted_[1]; | |
141 | #define DELETED ((PTR)_deleted_) /* guarenteed unique address */ | |
142 | #define START_POWER (10) /* power of two: size of new hash table */ | |
143 | ||
144 | /* TRUE if a symbol is in entry @ ptr. */ | |
145 | #define islive(ptr) (ptr->hash_string && ptr->hash_string!=DELETED) | |
146 | ||
147 | enum stat_enum { | |
148 | /* Number of slots in hash table. The wall does not count here. | |
149 | We expect this is always a power of 2. */ | |
150 | STAT_SIZE = 0, | |
151 | /* Number of hash_ask calls. */ | |
152 | STAT_ACCESS, | |
153 | STAT_ACCESS_w, | |
154 | /* Number of collisions (total). This may exceed STAT_ACCESS if we | |
155 | have lots of collisions/access. */ | |
156 | STAT_COLLIDE, | |
157 | STAT_COLLIDE_w, | |
158 | /* Slots used right now. */ | |
159 | STAT_USED, | |
160 | /* How many string compares? */ | |
161 | STAT_STRCMP, | |
162 | STAT_STRCMP_w, | |
163 | /* Size of statistics block... this must be last. */ | |
164 | STATLENGTH | |
165 | }; | |
166 | #define STAT__READ (0) /* reading */ | |
167 | #define STAT__WRITE (1) /* writing */ | |
168 | ||
169 | /* When we grow a hash table, by what power of two do we increase it? */ | |
170 | #define GROW_FACTOR 1 | |
171 | /* When should we grow it? */ | |
172 | #define FULL_VALUE(N) ((N) / 2) | |
173 | ||
174 | /* #define SUSPECT to do runtime checks */ | |
175 | /* #define TEST to be a test jig for hash...() */ | |
176 | ||
177 | #ifdef TEST | |
178 | /* TEST: use smaller hash table */ | |
179 | #undef START_POWER | |
180 | #define START_POWER (3) | |
181 | #undef START_SIZE | |
182 | #define START_SIZE (8) | |
183 | #undef START_FULL | |
184 | #define START_FULL (4) | |
185 | #endif | |
186 | \f | |
187 | struct hash_entry | |
188 | { | |
189 | const char *hash_string; /* points to where the symbol string is */ | |
190 | /* NULL means slot is not used */ | |
191 | /* DELETED means slot was deleted */ | |
192 | PTR hash_value; /* user's datum, associated with symbol */ | |
193 | unsigned long h; | |
194 | }; | |
195 | ||
196 | struct hash_control { | |
197 | struct hash_entry *hash_where;/* address of hash table */ | |
198 | int hash_sizelog; /* Log of ( hash_mask + 1 ) */ | |
199 | int hash_mask; /* masks a hash into index into table */ | |
200 | int hash_full; /* when hash_stat[STAT_USED] exceeds this, */ | |
201 | /* grow table */ | |
202 | struct hash_entry *hash_wall; /* point just after last (usable) entry */ | |
203 | /* here we have some statistics */ | |
204 | int hash_stat[STATLENGTH]; /* lies & statistics */ | |
205 | }; | |
206 | \f | |
207 | /*------------------ plan ---------------------------------- i = internal | |
208 | ||
209 | struct hash_control * c; | |
210 | struct hash_entry * e; i | |
211 | int b[z]; buffer for statistics | |
212 | z size of b | |
213 | char * s; symbol string (address) [ key ] | |
214 | char * v; value string (address) [datum] | |
215 | boolean f; TRUE if we found s in hash table i | |
216 | char * t; error string; 0 means OK | |
217 | int a; access type [0...n) i | |
218 | ||
219 | c=hash_new () create new hash_control | |
220 | ||
221 | hash_die (c) destroy hash_control (and hash table) | |
222 | table should be empty. | |
223 | doesn't check if table is empty. | |
224 | c has no meaning after this. | |
225 | ||
226 | hash_say (c,b,z) report statistics of hash_control. | |
227 | also report number of available statistics. | |
228 | ||
229 | v=hash_delete (c,s) delete symbol, return old value if any. | |
230 | ask() NULL means no old value. | |
231 | f | |
232 | ||
233 | v=hash_replace (c,s,v) replace old value of s with v. | |
234 | ask() NULL means no old value: no table change. | |
235 | f | |
236 | ||
237 | t=hash_insert (c,s,v) insert (s,v) in c. | |
238 | ask() return error string. | |
239 | f it is an error to insert if s is already | |
240 | in table. | |
241 | if any error, c is unchanged. | |
242 | ||
243 | t=hash_jam (c,s,v) assert that new value of s will be v. i | |
244 | ask() it may decide to GROW the table. i | |
245 | f i | |
246 | grow() i | |
247 | t=hash_grow (c) grow the hash table. i | |
248 | jam() will invoke JAM. i | |
249 | ||
250 | ?=hash_apply (c,y) apply y() to every symbol in c. | |
251 | y evtries visited in 'unspecified' order. | |
252 | ||
253 | v=hash_find (c,s) return value of s, or NULL if s not in c. | |
254 | ask() | |
255 | f | |
256 | ||
257 | f,e=hash_ask() (c,s,a) return slot where s SHOULD live. i | |
258 | code() maintain collision stats in c. i | |
259 | ||
260 | .=hash_code (c,s) compute hash-code for s, i | |
261 | from parameters of c. i | |
262 | ||
263 | */ | |
264 | \f | |
265 | /* Returned by hash_ask() to stop extra testing. hash_ask() wants to | |
266 | return both a slot and a status. This is the status. TRUE: found | |
267 | symbol FALSE: absent: empty or deleted slot Also returned by | |
268 | hash_jam(). TRUE: we replaced a value FALSE: we inserted a value. */ | |
269 | static char hash_found; | |
270 | ||
271 | static struct hash_entry *hash_ask PARAMS ((struct hash_control *, | |
272 | const char *, int)); | |
273 | static int hash_code PARAMS ((struct hash_control *, const char *)); | |
274 | static const char *hash_grow PARAMS ((struct hash_control *)); | |
275 | \f | |
276 | /* Create a new hash table. Return NULL if failed; otherwise return handle | |
277 | (address of struct hash). */ | |
278 | struct hash_control * | |
279 | hash_new () | |
280 | { | |
281 | struct hash_control *retval; | |
282 | struct hash_entry *room; /* points to hash table */ | |
283 | struct hash_entry *wall; | |
284 | struct hash_entry *entry; | |
285 | int *ip; /* scan stats block of struct hash_control */ | |
286 | int *nd; /* limit of stats block */ | |
287 | ||
288 | room = (struct hash_entry *) xmalloc (sizeof (struct hash_entry) | |
289 | /* +1 for the wall entry */ | |
290 | * ((1 << START_POWER) + 1)); | |
291 | retval = (struct hash_control *) xmalloc (sizeof (struct hash_control)); | |
292 | ||
293 | nd = retval->hash_stat + STATLENGTH; | |
294 | for (ip = retval->hash_stat; ip < nd; ip++) | |
295 | *ip = 0; | |
296 | ||
297 | retval->hash_stat[STAT_SIZE] = 1 << START_POWER; | |
298 | retval->hash_mask = (1 << START_POWER) - 1; | |
299 | retval->hash_sizelog = START_POWER; | |
300 | /* works for 1's compl ok */ | |
301 | retval->hash_where = room; | |
302 | retval->hash_wall = | |
303 | wall = room + (1 << START_POWER); | |
304 | retval->hash_full = FULL_VALUE (1 << START_POWER); | |
305 | for (entry = room; entry <= wall; entry++) | |
306 | entry->hash_string = NULL; | |
307 | return retval; | |
308 | } | |
309 | ||
310 | /* | |
311 | * h a s h _ d i e ( ) | |
312 | * | |
313 | * Table should be empty, but this is not checked. | |
314 | * To empty the table, try hash_apply()ing a symbol deleter. | |
315 | * Return to free memory both the hash table and it's control | |
316 | * block. | |
317 | * 'handle' has no meaning after this function. | |
318 | * No errors are recoverable. | |
319 | */ | |
320 | void | |
321 | hash_die (handle) | |
322 | struct hash_control *handle; | |
323 | { | |
324 | free ((char *) handle->hash_where); | |
325 | free ((char *) handle); | |
326 | } | |
327 | \f | |
328 | #ifdef TEST | |
329 | /* | |
330 | * h a s h _ s a y ( ) | |
331 | * | |
332 | * Return the size of the statistics table, and as many statistics as | |
333 | * we can until either (a) we have run out of statistics or (b) caller | |
334 | * has run out of buffer. | |
335 | * NOTE: hash_say treats all statistics alike. | |
336 | * These numbers may change with time, due to insertions, deletions | |
337 | * and expansions of the table. | |
338 | * The first "statistic" returned is the length of hash_stat[]. | |
339 | * Then contents of hash_stat[] are read out (in ascending order) | |
340 | * until your buffer or hash_stat[] is exausted. | |
341 | */ | |
342 | static void | |
343 | hash_say (handle, buffer, bufsiz) | |
344 | struct hash_control *handle; | |
345 | int buffer[ /*bufsiz*/ ]; | |
346 | int bufsiz; | |
347 | { | |
348 | int *nd; /* limit of statistics block */ | |
349 | int *ip; /* scan statistics */ | |
350 | ||
351 | ip = handle->hash_stat; | |
352 | nd = ip + min (bufsiz - 1, STATLENGTH); | |
353 | if (bufsiz > 0) /* trust nothing! bufsiz<=0 is dangerous */ | |
354 | { | |
355 | *buffer++ = STATLENGTH; | |
356 | for (; ip < nd; ip++, buffer++) | |
357 | { | |
358 | *buffer = *ip; | |
359 | } | |
360 | } | |
361 | } | |
362 | #endif | |
363 | \f | |
364 | /* | |
365 | * h a s h _ d e l e t e ( ) | |
366 | * | |
367 | * Try to delete a symbol from the table. | |
368 | * If it was there, return its value (and adjust STAT_USED). | |
369 | * Otherwise, return NULL. | |
370 | * Anyway, the symbol is not present after this function. | |
371 | * | |
372 | */ | |
373 | PTR /* NULL if string not in table, else */ | |
374 | /* returns value of deleted symbol */ | |
375 | hash_delete (handle, string) | |
376 | struct hash_control *handle; | |
377 | const char *string; | |
378 | { | |
379 | PTR retval; | |
380 | struct hash_entry *entry; | |
381 | ||
382 | entry = hash_ask (handle, string, STAT__WRITE); | |
383 | if (hash_found) | |
384 | { | |
385 | retval = entry->hash_value; | |
386 | entry->hash_string = DELETED; | |
387 | handle->hash_stat[STAT_USED] -= 1; | |
388 | #ifdef SUSPECT | |
389 | if (handle->hash_stat[STAT_USED] < 0) | |
390 | { | |
391 | error ("hash_delete"); | |
392 | } | |
393 | #endif /* def SUSPECT */ | |
394 | } | |
395 | else | |
396 | { | |
397 | retval = NULL; | |
398 | } | |
399 | return (retval); | |
400 | } | |
401 | \f | |
402 | /* | |
403 | * h a s h _ r e p l a c e ( ) | |
404 | * | |
405 | * Try to replace the old value of a symbol with a new value. | |
406 | * Normally return the old value. | |
407 | * Return NULL and don't change the table if the symbol is not already | |
408 | * in the table. | |
409 | */ | |
410 | PTR | |
411 | hash_replace (handle, string, value) | |
412 | struct hash_control *handle; | |
413 | const char *string; | |
414 | PTR value; | |
415 | { | |
416 | struct hash_entry *entry; | |
417 | char *retval; | |
418 | ||
419 | entry = hash_ask (handle, string, STAT__WRITE); | |
420 | if (hash_found) | |
421 | { | |
422 | retval = entry->hash_value; | |
423 | entry->hash_value = value; | |
424 | } | |
425 | else | |
426 | { | |
427 | retval = NULL; | |
428 | } | |
429 | ; | |
430 | return retval; | |
431 | } | |
432 | \f | |
433 | /* | |
434 | * h a s h _ i n s e r t ( ) | |
435 | * | |
436 | * Insert a (symbol-string, value) into the hash table. | |
437 | * Return an error string, 0 means OK. | |
438 | * It is an 'error' to insert an existing symbol. | |
439 | */ | |
440 | ||
441 | const char * /* return error string */ | |
442 | hash_insert (handle, string, value) | |
443 | struct hash_control *handle; | |
444 | const char *string; | |
445 | PTR value; | |
446 | { | |
447 | struct hash_entry *entry; | |
448 | const char *retval; | |
449 | ||
450 | retval = 0; | |
451 | if (handle->hash_stat[STAT_USED] > handle->hash_full) | |
452 | { | |
453 | retval = hash_grow (handle); | |
454 | } | |
455 | if (!retval) | |
456 | { | |
457 | entry = hash_ask (handle, string, STAT__WRITE); | |
458 | if (hash_found) | |
459 | { | |
460 | retval = "exists"; | |
461 | } | |
462 | else | |
463 | { | |
464 | entry->hash_value = value; | |
465 | entry->hash_string = string; | |
466 | handle->hash_stat[STAT_USED] += 1; | |
467 | } | |
468 | } | |
469 | return retval; | |
470 | } | |
471 | \f | |
472 | /* | |
473 | * h a s h _ j a m ( ) | |
474 | * | |
475 | * Regardless of what was in the symbol table before, after hash_jam() | |
476 | * the named symbol has the given value. The symbol is either inserted or | |
477 | * (its value is) replaced. | |
478 | * An error message string is returned, 0 means OK. | |
479 | * | |
480 | * WARNING: this may decide to grow the hashed symbol table. | |
481 | * To do this, we call hash_grow(), WHICH WILL recursively CALL US. | |
482 | * | |
483 | * We report status internally: hash_found is TRUE if we replaced, but | |
484 | * false if we inserted. | |
485 | */ | |
486 | const char * | |
487 | hash_jam (handle, string, value) | |
488 | struct hash_control *handle; | |
489 | const char *string; | |
490 | PTR value; | |
491 | { | |
492 | const char *retval; | |
493 | struct hash_entry *entry; | |
494 | ||
495 | retval = 0; | |
496 | if (handle->hash_stat[STAT_USED] > handle->hash_full) | |
497 | { | |
498 | retval = hash_grow (handle); | |
499 | } | |
500 | if (!retval) | |
501 | { | |
502 | entry = hash_ask (handle, string, STAT__WRITE); | |
503 | if (!hash_found) | |
504 | { | |
505 | entry->hash_string = string; | |
506 | handle->hash_stat[STAT_USED] += 1; | |
507 | } | |
508 | entry->hash_value = value; | |
509 | } | |
510 | return retval; | |
511 | } | |
512 | ||
513 | /* | |
514 | * h a s h _ g r o w ( ) | |
515 | * | |
516 | * Grow a new (bigger) hash table from the old one. | |
517 | * We choose to double the hash table's size. | |
518 | * Return a human-scrutible error string: 0 if OK. | |
519 | * Warning! This uses hash_jam(), which had better not recurse | |
520 | * back here! Hash_jam() conditionally calls us, but we ALWAYS | |
521 | * call hash_jam()! | |
522 | * Internal. | |
523 | */ | |
524 | static const char * | |
525 | hash_grow (handle) /* make a hash table grow */ | |
526 | struct hash_control *handle; | |
527 | { | |
528 | struct hash_entry *newwall; | |
529 | struct hash_entry *newwhere; | |
530 | struct hash_entry *newtrack; | |
531 | struct hash_entry *oldtrack; | |
532 | struct hash_entry *oldwhere; | |
533 | struct hash_entry *oldwall; | |
534 | int temp; | |
535 | int newsize; | |
536 | const char *string; | |
537 | const char *retval; | |
538 | #ifdef SUSPECT | |
539 | int oldused; | |
540 | #endif | |
541 | ||
542 | /* | |
543 | * capture info about old hash table | |
544 | */ | |
545 | oldwhere = handle->hash_where; | |
546 | oldwall = handle->hash_wall; | |
547 | #ifdef SUSPECT | |
548 | oldused = handle->hash_stat[STAT_USED]; | |
549 | #endif | |
550 | /* | |
551 | * attempt to get enough room for a hash table twice as big | |
552 | */ | |
553 | temp = handle->hash_stat[STAT_SIZE]; | |
554 | newwhere = ((struct hash_entry *) | |
555 | xmalloc ((unsigned long) ((temp << (GROW_FACTOR + 1)) | |
556 | /* +1 for wall slot */ | |
557 | * sizeof (struct hash_entry)))); | |
558 | if (newwhere == NULL) | |
559 | return "no_room"; | |
560 | ||
561 | /* | |
562 | * have enough room: now we do all the work. | |
563 | * double the size of everything in handle. | |
564 | */ | |
565 | handle->hash_mask = ((handle->hash_mask + 1) << GROW_FACTOR) - 1; | |
566 | handle->hash_stat[STAT_SIZE] <<= GROW_FACTOR; | |
567 | newsize = handle->hash_stat[STAT_SIZE]; | |
568 | handle->hash_where = newwhere; | |
569 | handle->hash_full <<= GROW_FACTOR; | |
570 | handle->hash_sizelog += GROW_FACTOR; | |
571 | handle->hash_wall = newwall = newwhere + newsize; | |
572 | /* Set all those pesky new slots to vacant. */ | |
573 | for (newtrack = newwhere; newtrack <= newwall; newtrack++) | |
574 | newtrack->hash_string = NULL; | |
575 | /* We will do a scan of the old table, the hard way, using the | |
576 | * new control block to re-insert the data into new hash table. */ | |
577 | handle->hash_stat[STAT_USED] = 0; | |
578 | for (oldtrack = oldwhere; oldtrack < oldwall; oldtrack++) | |
579 | if (((string = oldtrack->hash_string) != NULL) && string != DELETED) | |
580 | if ((retval = hash_jam (handle, string, oldtrack->hash_value))) | |
581 | return retval; | |
582 | ||
583 | #ifdef SUSPECT | |
584 | if (handle->hash_stat[STAT_USED] != oldused) | |
585 | return "hash_used"; | |
586 | #endif | |
587 | ||
588 | /* We have a completely faked up control block. | |
589 | Return the old hash table. */ | |
590 | free ((char *) oldwhere); | |
591 | ||
592 | return 0; | |
593 | } | |
594 | \f | |
595 | #ifdef TEST | |
596 | /* | |
597 | * h a s h _ a p p l y ( ) | |
598 | * | |
599 | * Use this to scan each entry in symbol table. | |
600 | * For each symbol, this calls (applys) a nominated function supplying the | |
601 | * symbol's value (and the symbol's name). | |
602 | * The idea is you use this to destroy whatever is associted with | |
603 | * any values in the table BEFORE you destroy the table with hash_die. | |
604 | * Of course, you can use it for other jobs; whenever you need to | |
605 | * visit all extant symbols in the table. | |
606 | * | |
607 | * We choose to have a call-you-back idea for two reasons: | |
608 | * asthetic: it is a neater idea to use apply than an explicit loop | |
609 | * sensible: if we ever had to grow the symbol table (due to insertions) | |
610 | * then we would lose our place in the table when we re-hashed | |
611 | * symbols into the new table in a different order. | |
612 | * | |
613 | * The order symbols are visited depends entirely on the hashing function. | |
614 | * Whenever you insert a (symbol, value) you risk expanding the table. If | |
615 | * you do expand the table, then the hashing function WILL change, so you | |
616 | * MIGHT get a different order of symbols visited. In other words, if you | |
617 | * want the same order of visiting symbols as the last time you used | |
618 | * hash_apply() then you better not have done any hash_insert()s or | |
619 | * hash_jam()s since the last time you used hash_apply(). | |
620 | * | |
621 | * In future we may use the value returned by your nominated function. | |
622 | * One idea is to abort the scan if, after applying the function to a | |
623 | * certain node, the function returns a certain code. | |
624 | * | |
625 | * The function you supply should be of the form: | |
626 | * void myfunct(string,value) | |
627 | * char * string; |* the symbol's name *| | |
628 | * char * value; |* the symbol's value *| | |
629 | * { | |
630 | * |* ... *| | |
631 | * } | |
632 | * | |
633 | */ | |
634 | void | |
635 | hash_apply (handle, function) | |
636 | struct hash_control *handle; | |
637 | void (*function) (); | |
638 | { | |
639 | struct hash_entry *entry; | |
640 | struct hash_entry *wall; | |
641 | ||
642 | wall = handle->hash_wall; | |
643 | for (entry = handle->hash_where; entry < wall; entry++) | |
644 | { | |
645 | if (islive (entry)) /* silly code: tests entry->string twice! */ | |
646 | { | |
647 | (*function) (entry->hash_string, entry->hash_value); | |
648 | } | |
649 | } | |
650 | } | |
651 | #endif | |
652 | \f | |
653 | /* | |
654 | * h a s h _ f i n d ( ) | |
655 | * | |
656 | * Given symbol string, find value (if any). | |
657 | * Return found value or NULL. | |
658 | */ | |
659 | PTR | |
660 | hash_find (handle, string) | |
661 | struct hash_control *handle; | |
662 | const char *string; | |
663 | { | |
664 | struct hash_entry *entry; | |
665 | ||
666 | entry = hash_ask (handle, string, STAT__READ); | |
667 | if (hash_found) | |
668 | return entry->hash_value; | |
669 | else | |
670 | return NULL; | |
671 | } | |
672 | \f | |
673 | /* | |
674 | * h a s h _ a s k ( ) | |
675 | * | |
676 | * Searches for given symbol string. | |
677 | * Return the slot where it OUGHT to live. It may be there. | |
678 | * Return hash_found: TRUE only if symbol is in that slot. | |
679 | * Access argument is to help keep statistics in control block. | |
680 | * Internal. | |
681 | */ | |
682 | static struct hash_entry * /* string slot, may be empty or deleted */ | |
683 | hash_ask (handle, string, access_type) | |
684 | struct hash_control *handle; | |
685 | const char *string; | |
686 | int access_type; | |
687 | { | |
688 | const char *s; | |
689 | struct hash_entry *slot; | |
690 | int collision; /* count collisions */ | |
691 | int strcmps; | |
692 | int hcode; | |
693 | ||
694 | /* start looking here */ | |
695 | hcode = hash_code (handle, string); | |
696 | slot = handle->hash_where + (hcode & handle->hash_mask); | |
697 | ||
698 | handle->hash_stat[STAT_ACCESS + access_type] += 1; | |
699 | collision = strcmps = 0; | |
700 | hash_found = FALSE; | |
701 | while (((s = slot->hash_string) != NULL) && s != DELETED) | |
702 | { | |
703 | if (string == s) | |
704 | { | |
705 | hash_found = TRUE; | |
706 | break; | |
707 | } | |
708 | if (slot->h == (unsigned long) hcode) | |
709 | { | |
710 | if (!strcmp (string, s)) | |
711 | { | |
712 | hash_found = TRUE; | |
713 | break; | |
714 | } | |
715 | strcmps++; | |
716 | } | |
717 | collision++; | |
718 | slot++; | |
719 | } | |
720 | /* | |
721 | * slot: return: | |
722 | * in use: we found string slot | |
723 | * at empty: | |
724 | * at wall: we fell off: wrap round ???? | |
725 | * in table: dig here slot | |
726 | * at DELETED: dig here slot | |
727 | */ | |
728 | if (slot == handle->hash_wall) | |
729 | { | |
730 | slot = handle->hash_where;/* now look again */ | |
731 | while (((s = slot->hash_string) != NULL) && s != DELETED) | |
732 | { | |
733 | if (string == s) | |
734 | { | |
735 | hash_found = TRUE; | |
736 | break; | |
737 | } | |
738 | if (slot->h == (unsigned long) hcode) | |
739 | { | |
740 | if (!strcmp (string, s)) | |
741 | { | |
742 | hash_found = TRUE; | |
743 | break; | |
744 | } | |
745 | strcmps++; | |
746 | } | |
747 | collision++; | |
748 | slot++; | |
749 | } | |
750 | /* | |
751 | * slot: return: | |
752 | * in use: we found it slot | |
753 | * empty: wall: ERROR IMPOSSIBLE !!!! | |
754 | * in table: dig here slot | |
755 | * DELETED:dig here slot | |
756 | */ | |
757 | } | |
758 | handle->hash_stat[STAT_COLLIDE + access_type] += collision; | |
759 | handle->hash_stat[STAT_STRCMP + access_type] += strcmps; | |
760 | if (!hash_found) | |
761 | slot->h = hcode; | |
762 | return slot; /* also return hash_found */ | |
763 | } | |
764 | \f | |
765 | /* | |
766 | * h a s h _ c o d e | |
767 | * | |
768 | * Does hashing of symbol string to hash number. | |
769 | * Internal. | |
770 | */ | |
771 | static int | |
772 | hash_code (handle, string) | |
773 | struct hash_control *handle; | |
774 | const char *string; | |
775 | { | |
776 | #if 1 /* There seems to be some interesting property of this function | |
777 | that prevents the bfd version below from being an adequate | |
778 | substitute. @@ Figure out what this property is! */ | |
779 | long h; /* hash code built here */ | |
780 | long c; /* each character lands here */ | |
781 | int n; /* Amount to shift h by */ | |
782 | ||
783 | n = (handle->hash_sizelog - 3); | |
784 | h = 0; | |
785 | while ((c = *string++) != 0) | |
786 | { | |
787 | h += c; | |
788 | h = (h << 3) + (h >> n) + c; | |
789 | } | |
790 | return h; | |
791 | #else | |
792 | /* from bfd */ | |
793 | unsigned long h = 0; | |
794 | unsigned int len = 0; | |
795 | unsigned int c; | |
796 | ||
797 | while ((c = *string++) != 0) | |
798 | { | |
799 | h += c + (c << 17); | |
800 | h ^= h >> 2; | |
801 | ++len; | |
802 | } | |
803 | h += len + (len << 17); | |
804 | h ^= h >> 2; | |
805 | return h; | |
806 | #endif | |
807 | } | |
808 | \f | |
809 | void | |
810 | hash_print_statistics (file, name, h) | |
811 | FILE *file; | |
812 | const char *name; | |
813 | struct hash_control *h; | |
814 | { | |
815 | unsigned long sz, used, pct; | |
816 | ||
817 | if (h == 0) | |
818 | return; | |
819 | ||
820 | sz = h->hash_stat[STAT_SIZE]; | |
821 | used = h->hash_stat[STAT_USED]; | |
822 | pct = (used * 100 + sz / 2) / sz; | |
823 | ||
824 | fprintf (file, "%s hash statistics:\n\t%lu/%lu slots used (%lu%%)\n", | |
825 | name, used, sz, pct); | |
826 | ||
827 | #define P(name, off) \ | |
828 | fprintf (file, "\t%-16s %6dr + %6dw = %7d\n", name, \ | |
829 | h->hash_stat[off+STAT__READ], \ | |
830 | h->hash_stat[off+STAT__WRITE], \ | |
831 | h->hash_stat[off+STAT__READ] + h->hash_stat[off+STAT__WRITE]) | |
832 | ||
833 | P ("accesses:", STAT_ACCESS); | |
834 | P ("collisions:", STAT_COLLIDE); | |
835 | P ("string compares:", STAT_STRCMP); | |
836 | ||
837 | #undef P | |
838 | } | |
839 | \f | |
840 | /* | |
841 | * Here is a test program to exercise above. | |
842 | */ | |
843 | #ifdef TEST | |
844 | ||
845 | #define TABLES (6) /* number of hash tables to maintain */ | |
846 | /* (at once) in any testing */ | |
847 | #define STATBUFSIZE (12) /* we can have 12 statistics */ | |
848 | ||
849 | int statbuf[STATBUFSIZE]; /* display statistics here */ | |
850 | char answer[100]; /* human farts here */ | |
851 | char *hashtable[TABLES]; /* we test many hash tables at once */ | |
852 | char *h; /* points to curent hash_control */ | |
853 | char **pp; | |
854 | char *p; | |
855 | char *name; | |
856 | char *value; | |
857 | int size; | |
858 | int used; | |
859 | char command; | |
860 | int number; /* number 0:TABLES-1 of current hashed */ | |
861 | /* symbol table */ | |
862 | ||
863 | main () | |
864 | { | |
865 | void applicatee (); | |
866 | void destroy (); | |
867 | char *what (); | |
868 | int *ip; | |
869 | ||
870 | number = 0; | |
871 | h = 0; | |
872 | printf ("type h <RETURN> for help\n"); | |
873 | for (;;) | |
874 | { | |
875 | printf ("hash_test command: "); | |
876 | gets (answer); | |
877 | command = answer[0]; | |
878 | if (isupper (command)) | |
879 | command = tolower (command); /* ecch! */ | |
880 | switch (command) | |
881 | { | |
882 | case '#': | |
883 | printf ("old hash table #=%d.\n", number); | |
884 | whattable (); | |
885 | break; | |
886 | case '?': | |
887 | for (pp = hashtable; pp < hashtable + TABLES; pp++) | |
888 | { | |
889 | printf ("address of hash table #%d control block is %xx\n" | |
890 | ,pp - hashtable, *pp); | |
891 | } | |
892 | break; | |
893 | case 'a': | |
894 | hash_apply (h, applicatee); | |
895 | break; | |
896 | case 'd': | |
897 | hash_apply (h, destroy); | |
898 | hash_die (h); | |
899 | break; | |
900 | case 'f': | |
901 | p = hash_find (h, name = what ("symbol")); | |
902 | printf ("value of \"%s\" is \"%s\"\n", name, p ? p : "NOT-PRESENT"); | |
903 | break; | |
904 | case 'h': | |
905 | printf ("# show old, select new default hash table number\n"); | |
906 | printf ("? display all hashtable control block addresses\n"); | |
907 | printf ("a apply a simple display-er to each symbol in table\n"); | |
908 | printf ("d die: destroy hashtable\n"); | |
909 | printf ("f find value of nominated symbol\n"); | |
910 | printf ("h this help\n"); | |
911 | printf ("i insert value into symbol\n"); | |
912 | printf ("j jam value into symbol\n"); | |
913 | printf ("n new hashtable\n"); | |
914 | printf ("r replace a value with another\n"); | |
915 | printf ("s say what %% of table is used\n"); | |
916 | printf ("q exit this program\n"); | |
917 | printf ("x delete a symbol from table, report its value\n"); | |
918 | break; | |
919 | case 'i': | |
920 | p = hash_insert (h, name = what ("symbol"), value = what ("value")); | |
921 | if (p) | |
922 | { | |
923 | printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name, value, | |
924 | p); | |
925 | } | |
926 | break; | |
927 | case 'j': | |
928 | p = hash_jam (h, name = what ("symbol"), value = what ("value")); | |
929 | if (p) | |
930 | { | |
931 | printf ("symbol=\"%s\" value=\"%s\" error=%s\n", name, value, p); | |
932 | } | |
933 | break; | |
934 | case 'n': | |
935 | h = hashtable[number] = (char *) hash_new (); | |
936 | break; | |
937 | case 'q': | |
938 | exit (EXIT_SUCCESS); | |
939 | case 'r': | |
940 | p = hash_replace (h, name = what ("symbol"), value = what ("value")); | |
941 | printf ("old value was \"%s\"\n", p ? p : "{}"); | |
942 | break; | |
943 | case 's': | |
944 | hash_say (h, statbuf, STATBUFSIZE); | |
945 | for (ip = statbuf; ip < statbuf + STATBUFSIZE; ip++) | |
946 | { | |
947 | printf ("%d ", *ip); | |
948 | } | |
949 | printf ("\n"); | |
950 | break; | |
951 | case 'x': | |
952 | p = hash_delete (h, name = what ("symbol")); | |
953 | printf ("old value was \"%s\"\n", p ? p : "{}"); | |
954 | break; | |
955 | default: | |
956 | printf ("I can't understand command \"%c\"\n", command); | |
957 | break; | |
958 | } | |
959 | } | |
960 | } | |
961 | ||
962 | char * | |
963 | what (description) | |
964 | char *description; | |
965 | { | |
966 | char *retval; | |
967 | char *malloc (); | |
968 | ||
969 | printf (" %s : ", description); | |
970 | gets (answer); | |
971 | /* will one day clean up answer here */ | |
972 | retval = malloc (strlen (answer) + 1); | |
973 | if (!retval) | |
974 | { | |
975 | error ("room"); | |
976 | } | |
977 | (void) strcpy (retval, answer); | |
978 | return (retval); | |
979 | } | |
980 | ||
981 | void | |
982 | destroy (string, value) | |
983 | char *string; | |
984 | char *value; | |
985 | { | |
986 | free (string); | |
987 | free (value); | |
988 | } | |
989 | ||
990 | ||
991 | void | |
992 | applicatee (string, value) | |
993 | char *string; | |
994 | char *value; | |
995 | { | |
996 | printf ("%.20s-%.20s\n", string, value); | |
997 | } | |
998 | ||
999 | whattable () /* determine number: what hash table to use */ | |
1000 | /* also determine h: points to hash_control */ | |
1001 | { | |
1002 | ||
1003 | for (;;) | |
1004 | { | |
1005 | printf (" what hash table (%d:%d) ? ", 0, TABLES - 1); | |
1006 | gets (answer); | |
1007 | sscanf (answer, "%d", &number); | |
1008 | if (number >= 0 && number < TABLES) | |
1009 | { | |
1010 | h = hashtable[number]; | |
1011 | if (!h) | |
1012 | { | |
1013 | printf ("warning: current hash-table-#%d. has no hash-control\n", number); | |
1014 | } | |
1015 | return; | |
1016 | } | |
1017 | else | |
1018 | { | |
1019 | printf ("invalid hash table number: %d\n", number); | |
1020 | } | |
1021 | } | |
1022 | } | |
1023 | ||
1024 | ||
1025 | ||
1026 | #endif /* #ifdef TEST */ | |
1027 | ||
1028 | /* end of hash.c */ |