2d800359740e97e847bf18e1d53cc3bcde4fc805
[com/gs-lite.git] / include / hfta / hash_table.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #ifndef __HASH_TABLE_H
17 #define __HASH_TABLE_H
18
19 #include <stdlib.h>
20 #include <stdio.h>
21
22
23
24 #define NUM_PRIMES 23
25
26 static unsigned int primes[NUM_PRIMES] =  {
27         11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,
28         25717, 51437, 102877, 205759, 411527, 823117, 1646237,
29         3292489, 6584983, 13169977, 26339969, 52679969
30 };
31
32
33
34 template <class key_type, class value_type, class hasher_func, class equal_func>
35 class hash_table  {
36
37 public :
38         struct data_item {
39                 key_type first;
40                 value_type second;
41                 data_item* next;        // next data item in overflow chain
42
43                 data_item(const key_type& k, const value_type& v) {
44                         first = k;
45                         second = v;
46                         next = NULL;
47                  }
48         };
49
50         struct hash_bucket {
51                 data_item* data;        //
52                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets
53                 hash_bucket* prev_bucket;       // index of the next bucket in the list of used buckets
54
55                 hash_bucket() {
56                         data = NULL;
57                         next_bucket = 0;        // 0 means no bucket follows this one
58                         prev_bucket = 0;        // 0 means no bucket precedes this one
59                 }
60         };
61
62         struct iterator {
63                 hash_bucket* bucket;
64                 data_item* data;
65
66                 iterator& operator++() {
67                         if (data->next)
68                                 data = data->next;
69                         else {
70                                 bucket = bucket->next_bucket;
71                                 if (bucket)
72                                         data = bucket->data;
73                                 else
74                                         data = NULL;
75                         }
76                         return *this;
77                 }
78
79 //                      Like ++, but don't go beyond the end of the bucket chain.
80                 iterator& next() {
81                         if (data->next)
82                                 data = data->next;
83                         else {
84                                 bucket = NULL;
85                                 data = NULL;
86                         }
87                         return *this;
88                 }
89
90                 bool operator!=(const iterator& it) {
91                         return (data != it.data || bucket != it.bucket);
92                 }
93
94                 bool operator==(const iterator& it) {
95                         return (data == it.data && bucket == it.bucket);
96                 }
97
98                 data_item &operator*() {
99                         return *data;
100                 }
101         };
102
103 private:
104         hasher_func hasher;
105         equal_func equal;
106
107         double load_factor;
108
109         size_t _size;
110         size_t _max_size;
111         size_t num_buckets;
112         size_t hash_mask;
113
114         // bucket allocation happens on first insert
115         // this flag indicates that buckets has been allocated
116         unsigned allocated;
117
118
119         hash_bucket* first_bucket;      // first used bucket
120         hash_bucket* last_bucket;       // last used bucket
121
122         hash_bucket* buckets;
123
124         // memory footpritn in bytes
125         unsigned int total_memory;
126
127 public:
128
129
130         hash_table(const size_t expected_size = 100000, const double max_load = 1.25) {
131
132                 size_t min_buckets = (size_t)(expected_size / max_load);
133                 load_factor = max_load;
134                 int nb;
135                 for(nb=2;nb<min_buckets;nb*=2);
136                 num_buckets = nb;
137                 hash_mask = nb-1;
138
139                 // we will make hash table start with 0-size and expand of first insertion
140                 buckets = new hash_bucket[0];
141                 allocated = 0;
142
143                 _size = 0;
144                 _max_size = 0;
145                 first_bucket = 0;
146                 last_bucket = 0;
147
148                 total_memory = 0;
149         }
150
151         int insert(const key_type& key, const value_type& val) {
152
153                 if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
154                         delete buckets;
155                         buckets = new hash_bucket[num_buckets];
156                         allocated = 1;  
157                         total_memory = num_buckets * sizeof(hash_bucket);
158
159                         // fprintf(stderr, "Initial allocaton of %d buckets\n", (int)num_buckets);
160                 }
161
162                 data_item* d = new data_item(key, val);
163                 total_memory += sizeof(data_item);
164
165                 // find the insertion bucket
166                 size_t bucket_index = hasher(key) & hash_mask;
167                 // if the bucket is empty just add new data_item
168                 if (!buckets[bucket_index].data) {
169                         // create new data item
170
171                         buckets[bucket_index].data = d;
172                         if (!first_bucket){
173                                 first_bucket = &buckets[bucket_index];
174                         }else{
175                                 last_bucket->next_bucket = &buckets[bucket_index];
176                         }
177
178                         if (last_bucket)
179                                 buckets[bucket_index].prev_bucket = last_bucket;
180
181                         last_bucket = &buckets[bucket_index];
182                 } else {        // we already have data items in this bucket
183                         // prepend the item to overflow chain
184                         data_item* temp = buckets[bucket_index].data;
185                         buckets[bucket_index].data = d;
186                         d->next = temp;
187                 }
188                 _size++;
189                 if(_size>_max_size)
190                         _max_size = _size;
191
192                 return 0;
193         }
194
195         data_item *iterate_find(data_item* t, const key_type& key) {
196                 data_item *temp = t;
197                         while (temp && !equal(temp->first, key)){
198                                 temp = temp->next;
199                         }
200                 return temp;
201         }
202
203
204         iterator find(const key_type& key) {
205                 iterator iter;
206
207                 if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
208                         iter.bucket = NULL;
209                         iter.data = NULL;
210                         return iter;
211                 }               
212
213                 // find the insertion bucket
214                 size_t bucket_index = hasher(key) & hash_mask;
215                 // if the bucket is empty just add new data_item
216                 if (!buckets[bucket_index].data) {
217                         iter.bucket = NULL;
218                         iter.data = NULL;
219                 } else {        // we already have data items in this bucket
220                         data_item* temp = buckets[bucket_index].data;
221 //                      temp = iterate_find(temp,key);
222
223                         while (temp && !equal(temp->first, key))
224                                 temp = temp->next;
225
226                         iter.data = temp;
227                         if (!temp)
228                                 iter.bucket = NULL;
229                         else
230                                 iter.bucket = &buckets[bucket_index];
231                 }
232                 return iter;
233         }
234
235         void erase(const key_type& key) {
236
237                 if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
238                         return;
239                 }       
240
241                 // find the  bucket
242                 size_t bucket_index = hasher(key) & hash_mask;
243                 // if the bucket is empty just add new data_item
244                 if (!buckets[bucket_index].data)
245                         return;
246
247                 data_item* temp = buckets[bucket_index].data;
248                 data_item* prev = NULL;
249                 while (temp && !equal(temp->first, key)) {
250                         prev = temp;
251                         temp = temp->next;
252                 }
253                 if (!temp)
254                         return;
255
256                 // delete this data item from the chain
257                 _size--;
258                 if (!prev){     // we are deleting the first element in chain
259                         buckets[bucket_index].data = temp->next;
260
261                         if(temp->next == NULL){
262                                 if(buckets[bucket_index].next_bucket){
263                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
264                                 }else{
265                                         last_bucket = buckets[bucket_index].prev_bucket;
266                                 }
267                                 if(buckets[bucket_index].prev_bucket){
268                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
269                                 }else{
270                                         first_bucket = buckets[bucket_index].next_bucket;
271                                 }
272                                 buckets[bucket_index].next_bucket = NULL;
273                                 buckets[bucket_index].prev_bucket = NULL;
274                         }
275                 }else{
276                         prev->next = temp->next;
277                 }
278         delete temp;
279         total_memory -= sizeof(data_item);
280         }
281
282         void erase_full(const key_type& key) {
283
284                 if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
285                         return;
286                 }
287
288                 // find the  bucket
289                 size_t bucket_index = hasher(key) & hash_mask;
290                 // if the bucket is empty just add new data_item
291                 if (!buckets[bucket_index].data)
292                         return;
293
294                 data_item* temp = buckets[bucket_index].data;
295                 data_item* prev = NULL;
296                 while (temp && !equal(temp->first, key)) {
297                         prev = temp;
298                         temp = temp->next;
299                 }
300                 if (!temp)
301                         return;
302
303                 // delete this data item from the chain
304                 _size--;
305                 if (!prev){     // we are deleting the first element in chain
306                         buckets[bucket_index].data = temp->next;
307
308                         if(temp->next == NULL){
309                                 if(buckets[bucket_index].next_bucket){
310                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
311                                 }else{
312                                         last_bucket = buckets[bucket_index].prev_bucket;
313                                 }
314                                 if(buckets[bucket_index].prev_bucket){
315                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
316                                 }else{
317                                         first_bucket = buckets[bucket_index].next_bucket;
318                                 }
319                                 buckets[bucket_index].next_bucket = NULL;
320                                 buckets[bucket_index].prev_bucket = NULL;
321                         }
322                 }else{
323                         prev->next = temp->next;
324                 }
325 //        delete (*temp).first;
326 //        delete (*temp).second;
327         delete temp;
328         total_memory -= sizeof(data_item);
329         }
330
331         iterator begin() {
332                 iterator iter;
333                 // find the first data item
334                 iter.bucket = first_bucket;
335                 iter.data = (first_bucket) ? first_bucket->data : NULL;
336                 return iter;
337         }
338
339         iterator end() {
340                 iterator iter;
341                 iter.bucket = NULL;
342                 iter.data = NULL;
343                 return iter;
344         }
345
346         void clear() {
347                 data_item* temp;
348                 data_item* next;
349
350                 hash_bucket* tmp;
351                 while (first_bucket) {
352                         temp = first_bucket->data;
353                         while ( (next = temp->next) ) {
354                                 delete temp;
355                                 temp = next;
356                         }
357                         if(temp) delete(temp);
358                         first_bucket->data = NULL;
359                         tmp = first_bucket;
360                         first_bucket = first_bucket->next_bucket;
361                         tmp->next_bucket = NULL;
362                         tmp->prev_bucket = NULL;
363                 }
364                 last_bucket = NULL;
365                 _size = 0;
366                 total_memory = num_buckets * sizeof(hash_bucket);
367         }
368
369         int resize() {
370                 if (_size) {
371                         fprintf(stderr, "Error: resizing non-empty hash table\n");
372                         exit(1);
373                 }
374
375                 double max_load = (1.0 * _max_size) / num_buckets;
376
377                 // reize table if its maximum load exceed the load factor
378                 // OR it was less then half of the load factor
379                 size_t min_buckets = 0;
380                 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
381                         min_buckets = ceil(_max_size / load_factor);
382                 } 
383  
384  
385                 if (min_buckets) {
386                         // find power-of-2 size large than min_buckets;
387                         int nb;
388                         for(nb=2;nb<min_buckets;nb*=2);
389                         // make sure we actually changing the size
390                         if (nb != num_buckets) {
391                                 fprintf(stderr, "Resizing from %d to %d buckets\n", (int)num_buckets, nb);
392                                 num_buckets = nb;
393                                 delete[] buckets;
394                                 hash_mask = num_buckets-1;
395
396                                 buckets = new hash_bucket[num_buckets];
397                                 total_memory = num_buckets * sizeof(hash_bucket);
398                         }
399                 }
400
401                 return 0;
402         }               
403
404         size_t size() const {
405                 return _size;
406         }
407
408         size_t max_size() const {
409                 return _max_size;
410         }
411
412         bool empty() const {
413                 return (_size == 0);
414         }
415
416         ~hash_table() {
417                 clear();
418                 delete[] buckets;
419         }
420
421         unsigned int get_mem_footprint() {
422                 return total_memory;
423         }
424
425 };
426
427
428 template <class key_type, class hasher_func, class equal_func>
429 class hash_set  {
430
431 public :
432         struct data_item {
433                 key_type key;
434                 data_item* next;        // next data item in overflow chain
435
436                 data_item(const key_type& k) {
437                         key = k;
438                         next = NULL;
439                  }
440         };
441
442         struct hash_bucket {
443                 data_item* data;        //
444                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets
445
446                 hash_bucket() {
447                         data = NULL;
448                         next_bucket = 0;        // 0 means no bucket follows this one
449                 }
450         };
451
452         struct iterator {
453                 hash_bucket* bucket;
454                 data_item* data;
455
456                 iterator& operator++() {
457                         if (data->next)
458                                 data = data->next;
459                         else {
460                                 bucket = bucket->next_bucket;
461                                 if (bucket)
462                                         data = bucket->data;
463                                 else
464                                         data = NULL;
465                         }
466                         return *this;
467                 }
468
469                 bool operator!=(const iterator& it) {
470                         return (data != it.data || bucket != it.bucket);
471                 }
472
473                 bool operator==(const iterator& it) {
474                         return (data == it.data && bucket == it.bucket);
475                 }
476
477 //                      NOTE : not certain if returning ref always works
478                 key_type &operator*() {
479                         return data->key;
480                 }
481         };
482
483 private:
484         hasher_func hasher;
485         equal_func equal;
486
487         double load_factor;
488
489         size_t _size;
490         size_t _max_size;
491         size_t num_buckets;
492         size_t hash_mask;
493
494         hash_bucket* first_bucket;      // first used bucket
495         hash_bucket* last_bucket;               // last used bucket
496
497         hash_bucket* buckets;
498
499 public:
500
501         hash_set(const size_t expected_size = 100000, const double max_load = 1.25) {
502
503                 size_t min_buckets = (size_t)(expected_size / max_load);
504                 load_factor = max_load;
505                 int nb;
506                 for(nb=2;nb<min_buckets;nb*=2);
507                 num_buckets = nb;
508                 hash_mask = nb-1;
509                 buckets = new hash_bucket[num_buckets];
510
511                 _size = 0;
512                 _max_size = 0;
513                 first_bucket = 0;
514                 last_bucket = 0;
515         }
516
517         int insert(const key_type& key) {
518                 data_item* d = new data_item(key);
519
520                 // find the insertion bucket
521                 size_t bucket_index = hasher(key) & hash_mask;
522                 // if the bucket is empty just add new data_item
523                 if (!buckets[bucket_index].data) {
524                         // create new data item
525
526                         buckets[bucket_index].data = d;
527                         if (!first_bucket)
528                                 first_bucket = &buckets[bucket_index];
529                         else
530                                 last_bucket->next_bucket = &buckets[bucket_index];
531                         last_bucket = &buckets[bucket_index];
532                 } else {        // we already have data items in this bucket
533                         // prepend the item to overflow chain
534                         data_item* temp = buckets[bucket_index].data;
535                         buckets[bucket_index].data = d;
536                         d->next = temp;
537                 }
538                 _size++;
539                 if(_size>_max_size)
540                         _max_size = _size;
541
542                 return 0;
543         }
544
545         iterator find(const key_type& key) {
546                 iterator iter;
547
548                 // find the insertion bucket
549                 size_t bucket_index = hasher(key) & hash_mask;
550                 // if the bucket is empty just add new data_item
551                 if (!buckets[bucket_index].data) {
552                         iter.bucket = NULL;
553                         iter.data = NULL;
554                 } else {        // we already have data items in this bucket
555                         data_item* temp = buckets[bucket_index].data;
556                         while (temp && !equal(temp->key, key))
557                                 temp = temp->next;
558                         iter.data = temp;
559                         if (!temp)
560                                 iter.bucket = NULL;
561                         else
562                                 iter.bucket = &buckets[bucket_index];
563                 }
564                 return iter;
565         }
566
567         void erase(const key_type& key) {
568                 // find the  bucket
569                 size_t bucket_index = hasher(key) & hash_mask;
570                 // if the bucket is empty just add new data_item
571                 if (!buckets[bucket_index].data)
572                         return;
573
574                 data_item* temp = buckets[bucket_index].data;
575                 data_item* prev = NULL;
576                 while (temp && !equal(temp->key, key)) {
577                         prev = temp;
578                         temp = temp->next;
579                 }
580                 if (!temp)
581                         return;
582
583                 // delete this data item from the chain
584                 _size--;
585                 if (!prev)      // we are deleting the first element in chain
586                         buckets[bucket_index].data = temp->next;
587                 else
588                         prev->next = temp->next;
589                 delete temp;
590         }
591
592         iterator begin() {
593                 iterator iter;
594                 // find the first data item
595                 iter.bucket = first_bucket;
596                 iter.data = (first_bucket) ? first_bucket->data : NULL;
597                 return iter;
598         }
599
600         iterator end() {
601                 iterator iter;
602                 iter.bucket = NULL;
603                 iter.data = NULL;
604                 return iter;
605         }
606
607         void clear() {
608                 data_item* temp;
609                 data_item* next;
610
611                 hash_bucket* tmp;
612                 while (first_bucket) {
613                         temp = first_bucket->data;
614                         while ( (next = temp->next) ) {
615                                 delete temp;
616                                 temp = next;
617                         }
618                         if(temp) delete(temp);
619                         first_bucket->data = NULL;
620                         tmp = first_bucket;
621                         first_bucket = first_bucket->next_bucket;
622                         tmp->next_bucket = NULL;
623                 }
624                 last_bucket = NULL;
625                 _size = 0;
626
627         }
628
629         int resize() {
630                 if (_size) {
631                         fprintf(stderr, "Error: resizing non-empty hash table\n");
632                         exit(1);
633                 }
634                 
635                 double max_load = (1.0 * _max_size) / num_buckets;
636
637                 // reize table if its maximum load exceed the load factor
638                 // OR it was less then half of the load factor
639                 size_t min_buckets = 0;
640                 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
641                         min_buckets = ceil(_max_size / load_factor);
642                 } 
643
644                 if (min_buckets) {
645                         // find power-of-2 size large than min_buckets;
646                         int nb;
647                         for(nb=2;nb<min_buckets;nb*=2);
648                         // make sure we actually changing the size
649                         if (nb != num_buckets) {
650                                 fprintf(stderr, "Resizing from %d to %d buckets\n", (int)num_buckets, nb);
651                                 num_buckets = nb;
652                                 delete[] buckets;
653                                 hash_mask = num_buckets-1;
654
655                                 buckets = new hash_bucket[num_buckets];         
656                         }
657                 }
658
659                 return 0;
660         }
661
662
663         size_t size() const {
664                 return _size;
665         }
666
667         bool empty() const {
668                 return (_size == 0);
669         }
670
671         ~hash_set() {
672                 clear();
673                 delete[] buckets;
674         }
675
676 };
677
678
679 #endif // __HASH_TABLE_H