2 * Copyright (c) 2002-2005 Lev Walkin <vlm@lionet.info>. All rights reserved.
3 * Copyright (c) 2001-2004 Netli, Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * $Id: genhash.c 447 2005-06-07 06:51:10Z vlm $
29 * Implementation of a hash data structure.
30 * This particular implementation is supposed to be space-efficient
31 * particularly in the case of tiny number of hash elements.
32 * It also has an aggressive hash buckets expanding technique, which allows
33 * to deal with increasing number of elements without a loss of search speed.
35 * Generally, one structure of type genhash_t is allocated per hash set.
36 * This structure is supposed to hold all information related to the current
37 * set, and also holds a tiny number of hash elements, when hash hasn't yet
38 * grown up. When the number of elements reaches some point, part of the
39 * genhash_t structure is reused to contain the pointers to the actual
40 * hash buckets and LRU (Least Recently Used) list's head and tail.
41 * Elements which were held inside genhash_t will be moved to the hash buckets.
43 * Said above effectively means two modes of operation: TINY and NORMAL.
44 * They can be distinguished by examining the h->numbuckets value, which
45 * is 0 for TINY and greater for NORMAL mode.
47 * In the TINY mode we use a lower part of the genhash_t structure
48 * (lower 32 bytes from 64 bytes of genhash_t) to hold up to IH_VALUE (4)
51 * In the NORMAL mode we use the lower part of the genhash_t structure
52 * to hold a set of pointers, including a pointer to the hash buckets.
53 * We agressively expand hash buckets size when adding new elements
54 * to lower the number of key comparisons.
57 #include <sys/types.h>
65 /* 1M entries, 4M RAM */
66 #define DEFAULT_MAXIMUM_HASH_BUCKETS_NUMBER (1024 * 1024)
67 static int maximum_hash_buckets_number = DEFAULT_MAXIMUM_HASH_BUCKETS_NUMBER;
70 * A single hash element structure which binds a value to its key.
72 typedef struct genhash_el_s {
73 unsigned int key_hash; /* Saved hash of the key */
76 struct genhash_el_s *hash_next; /* Collision list inside the bucket */
77 struct genhash_el_s *hash_prev;
78 struct genhash_el_s *lru_prev; /* Per-hash LRU list */
79 struct genhash_el_s *lru_next;
83 * A hash structure with buckets etc.
86 int (*keycmpf) (const void *lkey1, const void *rkey2);
87 unsigned int (*keyhashf) (const void *key); /* hash function */
88 void (*keydestroyf) (void *key); /* key destructor */
89 void (*valuedestroyf) (void *value); /* value destructor */
91 int numelements; /* Total number of hash elements */
92 int numbuckets; /* 0 means "use _TINY" */
93 int lru_limit; /* Must be initialized explicitly */
94 genhash_iter_t *iters; /* Active iterators */
96 /* 32-byte boundary here */
99 #define IH_VALUES 4 /* Internally held key/value pairs for TINY mode */
100 struct _internal_tiny_s {
101 void *keys[IH_VALUES];
102 void *values[IH_VALUES];
103 } _TINY; /* 32-byte structure */
104 struct _internal_normal_s {
105 genhash_el *lru_head; /* LRU list head */
106 genhash_el *lru_tail; /* LRU list tail */
107 genhash_el **buckets; /* Hash buckets */
111 #define tiny_keys un._TINY.keys
112 #define tiny_values un._TINY.values
113 #define lru_head un._NORMAL.lru_head
114 #define lru_tail un._NORMAL.lru_tail
115 #define buckets un._NORMAL.buckets
120 _genhash_normal_add(genhash_t *h, genhash_el *el, void *key, void *value);
125 int (*keycmpf) (const void *key1, const void *key2),
126 unsigned int (*keyhashf) (const void *key),
127 void (*keydestroyf) (void *key),
128 void (*valuedestroyf) (void *value)
132 h = (genhash_t *)malloc(sizeof(genhash_t));
136 memset(h, 0, sizeof(genhash_t));
138 genhash_reinit(h, keycmpf, keyhashf, keydestroyf, valuedestroyf);
146 int (*keycmpf) (const void *key1, const void *key2),
147 unsigned int (*keyhashf) (const void *key),
148 void (*keydestroyf) (void *key),
149 void (*valuedestroyf) (void *value)
152 assert(keycmpf && keyhashf);
154 h->keycmpf = keycmpf;
155 h->keyhashf = keyhashf;
156 h->keydestroyf = keydestroyf;
157 h->valuedestroyf = valuedestroyf;
163 genhash_count(genhash_t *h) {
165 return h->numelements;
173 _remove_normal_hash_el(genhash_t *h, genhash_el *el) {
174 genhash_iter_t *iter;
178 /* Remove from the collision list */
180 if((el->hash_prev->hash_next = el->hash_next))
181 el->hash_next->hash_prev = el->hash_prev;
184 if((h->buckets[el->key_hash % h->numbuckets] = el->hash_next))
185 el->hash_next->hash_prev = NULL;
188 /* Remove from LRU list */
190 if((el->lru_prev->lru_next = el->lru_next))
191 el->lru_next->lru_prev = el->lru_prev;
193 h->lru_tail = el->lru_prev;
195 if(h->lru_head == el) {
196 if((h->lru_head = el->lru_next) == NULL)
199 h->lru_head->lru_prev = NULL;
203 /* Remember key and value */
207 /* Move iterators off the element being deleted */
208 for(iter = h->iters; iter; iter = iter->iter_next) {
209 assert(iter->hash_ptr == h);
210 if(iter->un.location == el) {
211 iter->un.location = iter->order_lru_first
212 ? el->lru_prev : el->lru_next;
219 /* Remove key and value */
220 if (h->keydestroyf) h->keydestroyf(kd_arg);
221 if (h->valuedestroyf) h->valuedestroyf(vd_arg);
225 _genhash_normal_el_move2top(genhash_t *h, genhash_el *el) {
227 /* Disable sorting if iterators are running */
230 /* Move to the top of the hash bucket */
232 int bucket = el->key_hash % h->numbuckets;
234 /* Remove from the current location */
235 if((el->hash_prev->hash_next = el->hash_next))
236 el->hash_next->hash_prev = el->hash_prev;
238 /* Move to the top of the hash bucket */
239 if((el->hash_next = h->buckets[bucket]))
240 el->hash_next->hash_prev = el;
241 h->buckets[bucket] = el;
242 el->hash_prev = NULL;
245 /* Move to the top of LRU list */
246 if(h->lru_limit && el->lru_prev) {
248 /* Remove from current location */
249 if((el->lru_prev->lru_next = el->lru_next))
250 el->lru_next->lru_prev = el->lru_prev;
252 h->lru_tail = el->lru_prev;
254 /* Append to the head */
256 h->lru_head->lru_prev = el;
257 el->lru_next = h->lru_head;
263 _expand_hash(genhash_t *h) {
264 int newbuckets_count;
265 genhash_el **newbuckets;
268 * Compute a new number of buckets value.
271 newbuckets_count = h->numbuckets << 2;
272 /* Too big hash table */
273 if(newbuckets_count > maximum_hash_buckets_number) {
274 if(h->numbuckets < maximum_hash_buckets_number) {
275 newbuckets_count = maximum_hash_buckets_number;
277 /* No need to set errno here. */
282 /* 8 buckets -> 32 bytes of memory */
283 newbuckets_count = IH_VALUES << 1;
284 if(newbuckets_count > maximum_hash_buckets_number) {
285 if(maximum_hash_buckets_number) {
286 newbuckets_count = maximum_hash_buckets_number;
288 /* Allowed to store only IH_VALUES elements */
296 * Allocate a new storage for buckets.
298 newbuckets = malloc(newbuckets_count * sizeof(*newbuckets));
300 memset(newbuckets, 0, newbuckets_count * sizeof(*newbuckets));
310 * Rehash elements from old h->buckets to newbuckets.
311 * No need to touch LRU pointers and other stuff - it is okay.
313 for(el = h->lru_tail; el; el = el->lru_prev) {
314 bucket = el->key_hash % newbuckets_count;
315 el->hash_prev = NULL;
316 if((el->hash_next = newbuckets[bucket]))
317 el->hash_next->hash_prev = el;
318 newbuckets[bucket] = el;
322 h->buckets = newbuckets;
323 h->numbuckets = newbuckets_count;
326 * Moving from inline tiny storage into buckets.
328 genhash_el *els[IH_VALUES] = { NULL };
329 struct _internal_tiny_s tiny_substruct;
331 int saved_numelements;
333 genhash_iter_t *iter;
335 /* Pre-allocate hash elements (for "undo") */
336 for(i = 0; i < h->numelements; i++) {
337 els[i] = (genhash_el *)malloc(sizeof(genhash_el));
339 for(i = 0; i < h->numelements; i++)
347 /* Save part of the union */
348 tiny_substruct = h->un._TINY;
349 /* Re-initialize this part in NORMAL model */
350 memset(&h->un._NORMAL, 0, sizeof(h->un._NORMAL));
352 /* There was no allocated buckets, when in tiny hash mode. */
353 h->buckets = newbuckets;
354 h->numbuckets = newbuckets_count;
356 saved_numelements = h->numelements;
357 saved_lru_limit = h->lru_limit;
359 h->lru_limit = 0; /* Disable LRU expiration for a while */
361 for(i = saved_numelements - 1; i >= 0; --i) {
363 * genhash_normal_add won't fail, if we supply
364 * an already allocated genhash_el *.
366 (void)_genhash_normal_add(h, els[i],
367 tiny_substruct.keys[i],
368 tiny_substruct.values[i]);
371 /* Now, scan through iterators and convert them TINY->NORMAL */
372 for(iter = h->iters; iter; iter = iter->iter_next) {
373 assert(iter->hash_ptr == h);
374 if(iter->un.item_number < 0
375 || iter->un.item_number >= saved_numelements) {
376 iter->un.location = 0;
378 iter->un.location = els[iter->un.item_number];
382 h->lru_limit = saved_lru_limit;
389 * Won't return with error if el is provided.
392 _genhash_normal_add(genhash_t *h, genhash_el *el, void *key, void *value) {
396 el = malloc(sizeof (*el));
398 /* Errno will be set by malloc() */
403 /* Maintain maximum number of entries */
405 while(h->numelements >= h->lru_limit)
406 _remove_normal_hash_el(h, h->lru_tail);
409 memset(el, 0, sizeof(genhash_el));
411 /* Compute the index of the collision list */
412 el->key_hash = h->keyhashf(key);
413 bucket = &h->buckets[el->key_hash % h->numbuckets];
419 * Add to the collision list
421 el->hash_prev = NULL;
422 if((el->hash_next = *bucket))
423 (*bucket)->hash_prev = el;
427 * Add to the LRU list.
430 el->lru_next = h->lru_head;
431 el->lru_next->lru_prev = el;
445 genhash_add(genhash_t *h, void *key, void *value) {
452 if(h->numbuckets == 0) {
454 /* We have a tiny internally-held set of elements */
455 if(h->numelements < IH_VALUES) {
456 h->tiny_keys[h->numelements] = key;
457 h->tiny_values[h->numelements] = value;
462 if(_expand_hash(h) == -1)
467 if((h->numelements / h->numbuckets) > 2)
468 (void)_expand_hash(h);
471 return _genhash_normal_add(h, NULL, key, value);
475 genhash_addunique(genhash_t *h, void *key, void *value) {
476 if(genhash_get(h, key)) {
480 return genhash_add(h, key, value);
484 genhash_get(genhash_t *h, const void *key) {
489 int bucket = h->keyhashf(key) % h->numbuckets;
491 for(walk = h->buckets[bucket];
492 walk; walk = walk->hash_next) {
494 if (h->keycmpf(walk->key, key) == 0) {
495 _genhash_normal_el_move2top(h, walk);
504 assert(h->numelements <= IH_VALUES);
505 for(i = 0; i < h->numelements; i++) {
506 if(h->keycmpf(h->tiny_keys[i], key) == 0)
507 /* Don't reorder in TINY mode */
508 return h->tiny_values[i];
518 genhash_del(genhash_t *h, void *key) {
525 if(h->numelements == 0) {
527 return -1; /* not found */
530 bucket = h->keyhashf(key) % h->numbuckets;
532 for(walk = h->buckets[bucket]; walk; walk = walk->hash_next)
533 if(h->keycmpf(walk->key, key) == 0)
537 _remove_normal_hash_el(h, walk);
544 /* Look for matching key */
545 for(i = 0; i < h->numelements; i++)
546 if(h->keycmpf(h->tiny_keys[i], key) == 0)
549 if(i < h->numelements) {
550 /* Remember values */
551 void *kd_arg = h->tiny_keys[i];
552 void *vd_arg = h->tiny_values[i];
557 /* If iterators are involved, we have to
558 * shift elements to maintain iteration order
559 * and avoid duplications */
560 genhash_iter_t *iter;
561 memmove(&h->tiny_keys[i],
564 * sizeof(h->tiny_keys[0]));
565 memmove(&h->tiny_values[i],
566 &h->tiny_values[i+1],
568 * sizeof(h->tiny_values[0]));
569 /* Shift the iterator's indexes */
570 for(iter = h->iters; iter;
571 iter = iter->iter_next) {
572 int in = iter->un.item_number;
573 if(iter->order_lru_first) {
575 iter->un.item_number--;
578 iter->un.item_number--;
582 /* Substitute it with the last one */
583 /* No harm if overwriting itself */
584 h->tiny_keys[i] = h->tiny_keys[h->numelements];
585 h->tiny_values[i] = h->tiny_values[h->numelements];
587 h->tiny_keys[h->numelements] = 0;
588 h->tiny_values[h->numelements] = 0;
589 /* Delete for real */
590 if(h->keydestroyf) h->keydestroyf(kd_arg);
591 if(h->valuedestroyf) h->valuedestroyf(vd_arg);
601 * Initialize a hash iterator.
604 genhash_iter_init(genhash_iter_t *iter, genhash_t *h, int reverse_order) {
607 iter->iter_prev = 0; /* Add itself to the iterators list */
608 iter->iter_next = h->iters;
610 iter->order_lru_first = reverse_order;
615 /* Least recent first order */
616 iter->un.location = h->lru_tail;
618 /* Most recent first order */
619 iter->un.location = h->lru_head;
624 iter->un.item_number = 0;
626 iter->un.item_number = h->numelements - 1;
630 return h->numelements;
634 genhash_iter(genhash_iter_t *iter, void *key_p, void *val_p) {
637 genhash_t *h = iter->hash_ptr;
641 genhash_el *cur_el = iter->un.location;
643 /* Already finished */
646 if(key) *key = cur_el->key;
647 if(val) *val = cur_el->value;
649 /* Move pointer to the next hash element */
650 iter->un.location = iter->order_lru_first
651 ? cur_el->lru_prev : cur_el->lru_next;
654 if(iter->un.item_number < 0
655 || iter->un.item_number >= h->numelements
656 || h->tiny_keys[iter->un.item_number] == 0)
659 if(key) *key = h->tiny_keys[iter->un.item_number];
660 if(val) *val = h->tiny_values[iter->un.item_number];
662 /* Advance to the next element */
663 if(iter->order_lru_first)
664 iter->un.item_number++;
666 iter->un.item_number--;
674 genhash_iter_done(genhash_iter_t *iter) {
675 assert(iter->hash_ptr->iters);
676 /* Remove itself from the iterators list */
678 iter->iter_next->iter_prev = iter->iter_prev;
680 iter->iter_prev->iter_next = iter->iter_next;
682 iter->hash_ptr->iters = iter->iter_next; /* Shift the head */
683 iter->hash_ptr = (void *)0xdeadbeef;
687 genhash_set_lru_limit(genhash_t *h, int value) {
689 int prev_limit = h->lru_limit;
691 h->lru_limit = value;
700 genhash_set_buckets_limit(int value) {
701 int prev_limit = maximum_hash_buckets_number;
703 maximum_hash_buckets_number = value;
709 genhash_destroy(genhash_t *h) {
711 assert(h->iters == 0); /* All iterators MUST be _done(). */
712 genhash_empty(h, 1, 1);
718 genhash_empty(genhash_t *h, int freekeys, int freevalues) {
719 genhash_iter_t *iter;
721 if(h == NULL) return;
724 * Don't free what could not be freed.
726 if(h->keydestroyf == NULL) freekeys = 0;
727 if(h->valuedestroyf == NULL) freevalues = 0;
729 if(h->numbuckets == 0) {
730 while(h->numelements > 0) {
731 int n = --h->numelements;
732 void *kd_arg = h->tiny_keys[n];
733 void *vd_arg = h->tiny_values[n];
735 if (freekeys) h->keydestroyf(kd_arg);
736 if (freevalues) h->valuedestroyf(vd_arg);
739 genhash_el *el, *el_next;
741 for(el = h->lru_head; el; el = el_next) {
742 void *kd_arg = el->key;
743 void *vd_arg = el->value;
744 el_next = el->lru_next;
749 if (freekeys) h->keydestroyf(kd_arg);
750 if (freevalues) h->valuedestroyf(vd_arg);
753 h->numbuckets = 0; /* Move back to TINY model */
755 memset(&h->un, 0, sizeof(h->un));
757 /* Invalidate iterators in TINY model */
758 for(iter = h->iters; iter; iter = iter->iter_next) {
759 assert(iter->hash_ptr == h);
760 iter->un.item_number = -1;
763 assert(h->numelements == 0);
767 /*----- Simple hash and compare functions for common data types ------*/
770 hashf_int (const void *key) {
771 return (*(const ptrdiff_t *)key ^ (*(const ptrdiff_t *)key >> 16));
775 cmpf_int (const void *key1, const void *key2) {
776 return (*(const int *)key1 != *(const int *)key2);
780 hashf_void (const void *key) {
781 return ((ptrdiff_t)key ^ ((ptrdiff_t)key >> 16));
785 cmpf_void (const void *key1, const void *key2) {
786 return (key1 != key2);
791 * Phong's linear congruential hash
793 #define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c))
796 hashf_string(const void *keyarg) {
797 register const unsigned char *key;
798 register unsigned int h;
799 register unsigned char c;
802 for (h = 0; (c = *key++);)
809 cmpf_string(const void *key1, const void *key2) {
810 return strcmp((const char *)key1, (const char *)key2);