1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 #ifndef __HASH_TABLE_H
17 #define __HASH_TABLE_H
26 static unsigned int primes[NUM_PRIMES] = {
27 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,
28 25717, 51437, 102877, 205759, 411527, 823117, 1646237,
29 3292489, 6584983, 13169977, 26339969, 52679969
34 template <class key_type, class value_type, class hasher_func, class equal_func>
41 data_item* next; // next data item in overflow chain
43 data_item(const key_type& k, const value_type& v) {
52 hash_bucket* next_bucket; // index of the next bucket in the list of used buckets
53 hash_bucket* prev_bucket; // index of the next bucket in the list of used buckets
57 next_bucket = 0; // 0 means no bucket follows this one
58 prev_bucket = 0; // 0 means no bucket precedes this one
66 iterator& operator++() {
70 bucket = bucket->next_bucket;
79 // Like ++, but don't go beyond the end of the bucket chain.
90 bool operator!=(const iterator& it) {
91 return (data != it.data || bucket != it.bucket);
94 bool operator==(const iterator& it) {
95 return (data == it.data && bucket == it.bucket);
98 data_item &operator*() {
114 // bucket allocation happens on first insert
115 // this flag indicates that buckets has been allocated
119 hash_bucket* first_bucket; // first used bucket
120 hash_bucket* last_bucket; // last used bucket
122 hash_bucket* buckets;
124 // memory footpritn in bytes
125 unsigned int total_memory;
130 hash_table(const size_t expected_size = 100000, const double max_load = 1.25) {
132 size_t min_buckets = (size_t)(expected_size / max_load);
133 load_factor = max_load;
135 for(nb=2;nb<min_buckets;nb*=2);
139 // we will make hash table start with 0-size and expand of first insertion
140 buckets = new hash_bucket[0];
151 int insert(const key_type& key, const value_type& val) {
153 if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
155 buckets = new hash_bucket[num_buckets];
157 total_memory = num_buckets * sizeof(hash_bucket);
159 // fprintf(stderr, "Initial allocaton of %d buckets\n", (int)num_buckets);
162 data_item* d = new data_item(key, val);
163 total_memory += sizeof(data_item);
165 // find the insertion bucket
166 size_t bucket_index = hasher(key) & hash_mask;
167 // if the bucket is empty just add new data_item
168 if (!buckets[bucket_index].data) {
169 // create new data item
171 buckets[bucket_index].data = d;
173 first_bucket = &buckets[bucket_index];
175 last_bucket->next_bucket = &buckets[bucket_index];
179 buckets[bucket_index].prev_bucket = last_bucket;
181 last_bucket = &buckets[bucket_index];
182 } else { // we already have data items in this bucket
183 // prepend the item to overflow chain
184 data_item* temp = buckets[bucket_index].data;
185 buckets[bucket_index].data = d;
195 data_item *iterate_find(data_item* t, const key_type& key) {
197 while (temp && !equal(temp->first, key)){
204 iterator find(const key_type& key) {
207 if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
213 // find the insertion bucket
214 size_t bucket_index = hasher(key) & hash_mask;
215 // if the bucket is empty just add new data_item
216 if (!buckets[bucket_index].data) {
219 } else { // we already have data items in this bucket
220 data_item* temp = buckets[bucket_index].data;
221 // temp = iterate_find(temp,key);
223 while (temp && !equal(temp->first, key))
230 iter.bucket = &buckets[bucket_index];
235 void erase(const key_type& key) {
237 if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
242 size_t bucket_index = hasher(key) & hash_mask;
243 // if the bucket is empty just add new data_item
244 if (!buckets[bucket_index].data)
247 data_item* temp = buckets[bucket_index].data;
248 data_item* prev = NULL;
249 while (temp && !equal(temp->first, key)) {
256 // delete this data item from the chain
258 if (!prev){ // we are deleting the first element in chain
259 buckets[bucket_index].data = temp->next;
261 if(temp->next == NULL){
262 if(buckets[bucket_index].next_bucket){
263 buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
265 last_bucket = buckets[bucket_index].prev_bucket;
267 if(buckets[bucket_index].prev_bucket){
268 buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
270 first_bucket = buckets[bucket_index].next_bucket;
272 buckets[bucket_index].next_bucket = NULL;
273 buckets[bucket_index].prev_bucket = NULL;
276 prev->next = temp->next;
279 total_memory -= sizeof(data_item);
282 void erase_full(const key_type& key) {
284 if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
289 size_t bucket_index = hasher(key) & hash_mask;
290 // if the bucket is empty just add new data_item
291 if (!buckets[bucket_index].data)
294 data_item* temp = buckets[bucket_index].data;
295 data_item* prev = NULL;
296 while (temp && !equal(temp->first, key)) {
303 // delete this data item from the chain
305 if (!prev){ // we are deleting the first element in chain
306 buckets[bucket_index].data = temp->next;
308 if(temp->next == NULL){
309 if(buckets[bucket_index].next_bucket){
310 buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
312 last_bucket = buckets[bucket_index].prev_bucket;
314 if(buckets[bucket_index].prev_bucket){
315 buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
317 first_bucket = buckets[bucket_index].next_bucket;
319 buckets[bucket_index].next_bucket = NULL;
320 buckets[bucket_index].prev_bucket = NULL;
323 prev->next = temp->next;
325 // delete (*temp).first;
326 // delete (*temp).second;
328 total_memory -= sizeof(data_item);
333 // find the first data item
334 iter.bucket = first_bucket;
335 iter.data = (first_bucket) ? first_bucket->data : NULL;
351 while (first_bucket) {
352 temp = first_bucket->data;
353 while ( (next = temp->next) ) {
357 if(temp) delete(temp);
358 first_bucket->data = NULL;
360 first_bucket = first_bucket->next_bucket;
361 tmp->next_bucket = NULL;
362 tmp->prev_bucket = NULL;
366 total_memory = num_buckets * sizeof(hash_bucket);
371 fprintf(stderr, "Error: resizing non-empty hash table\n");
375 double max_load = (1.0 * _max_size) / num_buckets;
377 // reize table if its maximum load exceed the load factor
378 // OR it was less then half of the load factor
379 size_t min_buckets = 0;
380 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
381 min_buckets = ceil(_max_size / load_factor);
386 // find power-of-2 size large than min_buckets;
388 for(nb=2;nb<min_buckets;nb*=2);
389 // make sure we actually changing the size
390 if (nb != num_buckets) {
391 fprintf(stderr, "Resizing from %d to %d buckets\n", (int)num_buckets, nb);
394 hash_mask = num_buckets-1;
396 buckets = new hash_bucket[num_buckets];
397 total_memory = num_buckets * sizeof(hash_bucket);
404 size_t size() const {
408 size_t max_size() const {
421 unsigned int get_mem_footprint() {
428 template <class key_type, class hasher_func, class equal_func>
434 data_item* next; // next data item in overflow chain
436 data_item(const key_type& k) {
444 hash_bucket* next_bucket; // index of the next bucket in the list of used buckets
448 next_bucket = 0; // 0 means no bucket follows this one
456 iterator& operator++() {
460 bucket = bucket->next_bucket;
469 bool operator!=(const iterator& it) {
470 return (data != it.data || bucket != it.bucket);
473 bool operator==(const iterator& it) {
474 return (data == it.data && bucket == it.bucket);
477 // NOTE : not certain if returning ref always works
478 key_type &operator*() {
494 hash_bucket* first_bucket; // first used bucket
495 hash_bucket* last_bucket; // last used bucket
497 hash_bucket* buckets;
501 hash_set(const size_t expected_size = 100000, const double max_load = 1.25) {
503 size_t min_buckets = (size_t)(expected_size / max_load);
504 load_factor = max_load;
506 for(nb=2;nb<min_buckets;nb*=2);
509 buckets = new hash_bucket[num_buckets];
517 int insert(const key_type& key) {
518 data_item* d = new data_item(key);
520 // find the insertion bucket
521 size_t bucket_index = hasher(key) & hash_mask;
522 // if the bucket is empty just add new data_item
523 if (!buckets[bucket_index].data) {
524 // create new data item
526 buckets[bucket_index].data = d;
528 first_bucket = &buckets[bucket_index];
530 last_bucket->next_bucket = &buckets[bucket_index];
531 last_bucket = &buckets[bucket_index];
532 } else { // we already have data items in this bucket
533 // prepend the item to overflow chain
534 data_item* temp = buckets[bucket_index].data;
535 buckets[bucket_index].data = d;
545 iterator find(const key_type& key) {
548 // find the insertion bucket
549 size_t bucket_index = hasher(key) & hash_mask;
550 // if the bucket is empty just add new data_item
551 if (!buckets[bucket_index].data) {
554 } else { // we already have data items in this bucket
555 data_item* temp = buckets[bucket_index].data;
556 while (temp && !equal(temp->key, key))
562 iter.bucket = &buckets[bucket_index];
567 void erase(const key_type& key) {
569 size_t bucket_index = hasher(key) & hash_mask;
570 // if the bucket is empty just add new data_item
571 if (!buckets[bucket_index].data)
574 data_item* temp = buckets[bucket_index].data;
575 data_item* prev = NULL;
576 while (temp && !equal(temp->key, key)) {
583 // delete this data item from the chain
585 if (!prev) // we are deleting the first element in chain
586 buckets[bucket_index].data = temp->next;
588 prev->next = temp->next;
594 // find the first data item
595 iter.bucket = first_bucket;
596 iter.data = (first_bucket) ? first_bucket->data : NULL;
612 while (first_bucket) {
613 temp = first_bucket->data;
614 while ( (next = temp->next) ) {
618 if(temp) delete(temp);
619 first_bucket->data = NULL;
621 first_bucket = first_bucket->next_bucket;
622 tmp->next_bucket = NULL;
631 fprintf(stderr, "Error: resizing non-empty hash table\n");
635 double max_load = (1.0 * _max_size) / num_buckets;
637 // reize table if its maximum load exceed the load factor
638 // OR it was less then half of the load factor
639 size_t min_buckets = 0;
640 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
641 min_buckets = ceil(_max_size / load_factor);
645 // find power-of-2 size large than min_buckets;
647 for(nb=2;nb<min_buckets;nb*=2);
648 // make sure we actually changing the size
649 if (nb != num_buckets) {
650 fprintf(stderr, "Resizing from %d to %d buckets\n", (int)num_buckets, nb);
653 hash_mask = num_buckets-1;
655 buckets = new hash_bucket[num_buckets];
663 size_t size() const {
679 #endif // __HASH_TABLE_H