1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 #ifndef __HASH_TABLE_H
17 #define __HASH_TABLE_H
26 static unsigned int primes[NUM_PRIMES] = {
27 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,
28 25717, 51437, 102877, 205759, 411527, 823117, 1646237,
29 3292489, 6584983, 13169977, 26339969, 52679969
34 template <class key_type, class value_type, class hasher_func, class equal_func>
41 data_item* next; // next data item in overflow chain
43 data_item(const key_type& k, const value_type& v) {
52 hash_bucket* next_bucket; // index of the next bucket in the list of used buckets
53 hash_bucket* prev_bucket; // index of the next bucket in the list of used buckets
57 next_bucket = 0; // 0 means no bucket follows this one
58 prev_bucket = 0; // 0 means no bucket precedes this one
66 iterator& operator++() {
70 bucket = bucket->next_bucket;
79 // Like ++, but don't go beyond the end of the bucket chain.
90 bool operator!=(const iterator& it) {
91 return (data != it.data || bucket != it.bucket);
94 bool operator==(const iterator& it) {
95 return (data == it.data && bucket == it.bucket);
98 data_item &operator*() {
114 // bucket allocation happens on first insert
115 // this flag indicates that buckets has been allocated
119 hash_bucket* first_bucket; // first used bucket
120 hash_bucket* last_bucket; // last used bucket
122 hash_bucket* buckets;
124 // memory footpritn in bytes
125 unsigned int total_memory;
130 hash_table(const size_t expected_size = 100000, const double max_load = 1.25) {
132 size_t min_buckets = (size_t)(expected_size / max_load);
133 load_factor = max_load;
135 for(nb=2;nb<min_buckets;nb*=2);
139 // we will make hash table start with 0-size and expand of first insertion
140 buckets = new hash_bucket[0];
151 int insert(const key_type& key, const value_type& val) {
153 if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
155 buckets = new hash_bucket[num_buckets];
157 total_memory = num_buckets * sizeof(hash_bucket);
159 fprintf(stderr, "Initial allocaton of %d buckets\n", (int)num_buckets);
162 data_item* d = new data_item(key, val);
163 total_memory += sizeof(data_item);
165 // find the insertion bucket
166 size_t bucket_index = hasher(key) & hash_mask;
167 // if the bucket is empty just add new data_item
168 if (!buckets[bucket_index].data) {
169 // create new data item
171 buckets[bucket_index].data = d;
173 first_bucket = &buckets[bucket_index];
175 last_bucket->next_bucket = &buckets[bucket_index];
179 buckets[bucket_index].prev_bucket = last_bucket;
181 last_bucket = &buckets[bucket_index];
182 } else { // we already have data items in this bucket
183 // prepend the item to overflow chain
184 data_item* temp = buckets[bucket_index].data;
185 buckets[bucket_index].data = d;
195 data_item *iterate_find(data_item* t, const key_type& key) {
197 while (temp && !equal(temp->first, key)){
204 iterator find(const key_type& key) {
207 if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
213 // find the insertion bucket
214 size_t bucket_index = hasher(key) & hash_mask;
215 // if the bucket is empty just add new data_item
216 if (!buckets[bucket_index].data) {
219 } else { // we already have data items in this bucket
220 data_item* temp = buckets[bucket_index].data;
221 // temp = iterate_find(temp,key);
223 while (temp && !equal(temp->first, key))
230 iter.bucket = &buckets[bucket_index];
235 void erase(const key_type& key) {
237 if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
242 size_t bucket_index = hasher(key) & hash_mask;
243 // if the bucket is empty just add new data_item
244 if (!buckets[bucket_index].data)
247 data_item* temp = buckets[bucket_index].data;
248 data_item* prev = NULL;
249 while (temp && !equal(temp->first, key)) {
256 // delete this data item from the chain
258 if (!prev){ // we are deleting the first element in chain
259 buckets[bucket_index].data = temp->next;
261 if(temp->next == NULL){
262 if(buckets[bucket_index].next_bucket){
263 buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
265 last_bucket = buckets[bucket_index].prev_bucket;
267 if(buckets[bucket_index].prev_bucket){
268 buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
270 first_bucket = buckets[bucket_index].next_bucket;
272 buckets[bucket_index].next_bucket = NULL;
273 buckets[bucket_index].prev_bucket = NULL;
276 prev->next = temp->next;
279 total_memory -= sizeof(data_item);
282 void erase_full(const key_type& key) {
284 if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
289 size_t bucket_index = hasher(key) & hash_mask;
290 // if the bucket is empty just add new data_item
291 if (!buckets[bucket_index].data)
294 data_item* temp = buckets[bucket_index].data;
295 data_item* prev = NULL;
296 while (temp && !equal(temp->first, key)) {
303 // delete this data item from the chain
305 if (!prev){ // we are deleting the first element in chain
306 buckets[bucket_index].data = temp->next;
308 if(temp->next == NULL){
309 if(buckets[bucket_index].next_bucket){
310 buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
312 last_bucket = buckets[bucket_index].prev_bucket;
314 if(buckets[bucket_index].prev_bucket){
315 buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
317 first_bucket = buckets[bucket_index].next_bucket;
319 buckets[bucket_index].next_bucket = NULL;
320 buckets[bucket_index].prev_bucket = NULL;
323 prev->next = temp->next;
325 // delete (*temp).first;
326 // delete (*temp).second;
328 total_memory -= sizeof(data_item);
333 // find the first data item
334 iter.bucket = first_bucket;
335 iter.data = (first_bucket) ? first_bucket->data : NULL;
351 while (first_bucket) {
352 temp = first_bucket->data;
353 while ( (next = temp->next) ) {
357 if(temp) delete(temp);
358 first_bucket->data = NULL;
360 first_bucket = first_bucket->next_bucket;
361 tmp->next_bucket = NULL;
362 tmp->prev_bucket = NULL;
366 total_memory = num_buckets * sizeof(hash_bucket);
371 fprintf(stderr, "Error: resizing non-empty hash table\n");
375 double max_load = (1.0 * _max_size) / num_buckets;
377 // reize table if its maximum load exceed the load factor
378 // OR it was less then half of the load factor
379 size_t min_buckets = 0;
380 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
381 min_buckets = _max_size / load_factor;
385 // find power-of-2 size large than min_buckets;
387 for(nb=2;nb<min_buckets;nb*=2);
390 fprintf(stderr, "Resizing to %d buckets\n", (int)num_buckets);
392 hash_mask = num_buckets-1;
394 buckets = new hash_bucket[num_buckets];
395 total_memory = num_buckets * sizeof(hash_bucket);
401 size_t size() const {
405 size_t max_size() const {
418 unsigned int get_mem_footprint() {
425 template <class key_type, class hasher_func, class equal_func>
431 data_item* next; // next data item in overflow chain
433 data_item(const key_type& k) : key(k), next(NULL) { }
438 hash_bucket* next_bucket; // index of the next bucket in the list of used buckets
442 next_bucket = 0; // 0 means no bucket follows this one
450 iterator& operator++() {
454 bucket = bucket->next_bucket;
463 bool operator!=(const iterator& it) {
464 return (data != it.data || bucket != it.bucket);
467 bool operator==(const iterator& it) {
468 return (data == it.data && bucket == it.bucket);
471 // NOTE : not certain if returning ref always works
472 key_type &operator*() {
488 hash_bucket* first_bucket; // first used bucket
489 hash_bucket* last_bucket; // last used bucket
491 hash_bucket* buckets;
495 hash_set(const size_t expected_size = 100000, const double max_load = 1.25) {
497 size_t min_buckets = (size_t)(expected_size / max_load);
498 load_factor = max_load;
500 for(nb=2;nb<min_buckets;nb*=2);
503 buckets = new hash_bucket[num_buckets];
511 int insert(const key_type& key) {
512 data_item* d = new data_item(key);
514 // find the insertion bucket
515 size_t bucket_index = hasher(key) & hash_mask;
516 // if the bucket is empty just add new data_item
517 if (!buckets[bucket_index].data) {
518 // create new data item
520 buckets[bucket_index].data = d;
522 first_bucket = &buckets[bucket_index];
524 last_bucket->next_bucket = &buckets[bucket_index];
525 last_bucket = &buckets[bucket_index];
526 } else { // we already have data items in this bucket
527 // prepend the item to overflow chain
528 data_item* temp = buckets[bucket_index].data;
529 buckets[bucket_index].data = d;
539 iterator find(const key_type& key) {
542 // find the insertion bucket
543 size_t bucket_index = hasher(key) & hash_mask;
544 // if the bucket is empty just add new data_item
545 if (!buckets[bucket_index].data) {
548 } else { // we already have data items in this bucket
549 data_item* temp = buckets[bucket_index].data;
550 while (temp && !equal(temp->key, key))
556 iter.bucket = &buckets[bucket_index];
561 void erase(const key_type& key) {
563 size_t bucket_index = hasher(key) & hash_mask;
564 // if the bucket is empty just add new data_item
565 if (!buckets[bucket_index].data)
568 data_item* temp = buckets[bucket_index].data;
569 data_item* prev = NULL;
570 while (temp && !equal(temp->key, key)) {
577 // delete this data item from the chain
579 if (!prev) // we are deleting the first element in chain
580 buckets[bucket_index].data = temp->next;
582 prev->next = temp->next;
588 // find the first data item
589 iter.bucket = first_bucket;
590 iter.data = (first_bucket) ? first_bucket->data : NULL;
606 while (first_bucket) {
607 temp = first_bucket->data;
608 while ( (next = temp->next) ) {
612 if(temp) delete(temp);
613 first_bucket->data = NULL;
615 first_bucket = first_bucket->next_bucket;
616 tmp->next_bucket = NULL;
625 fprintf(stderr, "Error: resizing non-empty hash table\n");
629 double max_load = (1.0 * _max_size) / num_buckets;
631 // reize table if its maximum load exceed the load factor
632 // OR it was less then half of the load factor
633 size_t min_buckets = 0;
634 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
635 min_buckets = _max_size / load_factor;
639 // find power-of-2 size large than min_buckets;
641 for(nb=2;nb<min_buckets;nb*=2);
644 fprintf(stderr, "Resizing to %d buckets\n", num_buckets);
646 hash_mask = num_buckets-1;
648 buckets = new hash_bucket[num_buckets];
655 size_t size() const {
671 #endif // __HASH_TABLE_H