Improvements to aggregation code and fucntion library
[com/gs-lite.git] / include / hfta / hash_table.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #ifndef __HASH_TABLE_H
17 #define __HASH_TABLE_H
18
19 #include <stdlib.h>
20 #include <stdio.h>
21
22
23
24 #define NUM_PRIMES 23
25
26 static unsigned int primes[NUM_PRIMES] =  {
27         11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,
28         25717, 51437, 102877, 205759, 411527, 823117, 1646237,
29         3292489, 6584983, 13169977, 26339969, 52679969
30 };
31
32
33
34 template <class key_type, class value_type, class hasher_func, class equal_func>
35 class hash_table  {
36
37 public :
38         struct data_item {
39                 key_type first;
40                 value_type second;
41                 data_item* next;        // next data item in overflow chain
42
43                 data_item(const key_type& k, const value_type& v) {
44                         first = k;
45                         second = v;
46                         next = NULL;
47                  }
48         };
49
50         struct hash_bucket {
51                 data_item* data;        //
52                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets
53                 hash_bucket* prev_bucket;       // index of the next bucket in the list of used buckets
54
55                 hash_bucket() {
56                         data = NULL;
57                         next_bucket = 0;        // 0 means no bucket follows this one
58                         prev_bucket = 0;        // 0 means no bucket precedes this one
59                 }
60         };
61
62         struct iterator {
63                 hash_bucket* bucket;
64                 data_item* data;
65
66                 iterator& operator++() {
67                         if (data->next)
68                                 data = data->next;
69                         else {
70                                 bucket = bucket->next_bucket;
71                                 if (bucket)
72                                         data = bucket->data;
73                                 else
74                                         data = NULL;
75                         }
76                         return *this;
77                 }
78
79 //                      Like ++, but don't go beyond the end of the bucket chain.
80                 iterator& next() {
81                         if (data->next)
82                                 data = data->next;
83                         else {
84                                 bucket = NULL;
85                                 data = NULL;
86                         }
87                         return *this;
88                 }
89
90                 bool operator!=(const iterator& it) {
91                         return (data != it.data || bucket != it.bucket);
92                 }
93
94                 bool operator==(const iterator& it) {
95                         return (data == it.data && bucket == it.bucket);
96                 }
97
98                 data_item &operator*() {
99                         return *data;
100                 }
101         };
102
103 private:
104         hasher_func hasher;
105         equal_func equal;
106
107         double load_factor;
108
109         size_t _size;
110         size_t _max_size;
111         size_t num_buckets;
112         size_t hash_mask;
113
114         // bucket allocation happens on first insert
115         // this flag indicates that buckets has been allocated
116         unsigned allocated;
117
118
119         hash_bucket* first_bucket;      // first used bucket
120         hash_bucket* last_bucket;       // last used bucket
121
122         hash_bucket* buckets;
123
124         // memory footpritn in bytes
125         unsigned int total_memory;
126
127 public:
128
129
130         hash_table(const size_t expected_size = 100000, const double max_load = 1.25) {
131
132                 size_t min_buckets = (size_t)(expected_size / max_load);
133                 load_factor = max_load;
134                 int nb;
135                 for(nb=2;nb<min_buckets;nb*=2);
136                 num_buckets = nb;
137                 hash_mask = nb-1;
138
139                 // we will make hash table start with 0-size and expand of first insertion
140                 buckets = new hash_bucket[0];
141                 allocated = 0;
142
143                 _size = 0;
144                 _max_size = 0;
145                 first_bucket = 0;
146                 last_bucket = 0;
147
148                 total_memory = 0;
149         }
150
151         int insert(const key_type& key, const value_type& val) {
152
153                 if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
154                         delete buckets;
155                         buckets = new hash_bucket[num_buckets];
156                         allocated = 1;  
157                         total_memory = num_buckets * sizeof(hash_bucket);
158
159                         fprintf(stderr, "Initial allocaton of %d buckets\n", (int)num_buckets);
160                 }
161
162                 data_item* d = new data_item(key, val);
163                 total_memory += sizeof(data_item);
164
165                 // find the insertion bucket
166                 size_t bucket_index = hasher(key) & hash_mask;
167                 // if the bucket is empty just add new data_item
168                 if (!buckets[bucket_index].data) {
169                         // create new data item
170
171                         buckets[bucket_index].data = d;
172                         if (!first_bucket){
173                                 first_bucket = &buckets[bucket_index];
174                         }else{
175                                 last_bucket->next_bucket = &buckets[bucket_index];
176                         }
177
178                         if (last_bucket)
179                                 buckets[bucket_index].prev_bucket = last_bucket;
180
181                         last_bucket = &buckets[bucket_index];
182                 } else {        // we already have data items in this bucket
183                         // prepend the item to overflow chain
184                         data_item* temp = buckets[bucket_index].data;
185                         buckets[bucket_index].data = d;
186                         d->next = temp;
187                 }
188                 _size++;
189                 if(_size>_max_size)
190                         _max_size = _size;
191
192                 return 0;
193         }
194
195         data_item *iterate_find(data_item* t, const key_type& key) {
196                 data_item *temp = t;
197                         while (temp && !equal(temp->first, key)){
198                                 temp = temp->next;
199                         }
200                 return temp;
201         }
202
203
204         iterator find(const key_type& key) {
205                 iterator iter;
206
207                 if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
208                         iter.bucket = NULL;
209                         iter.data = NULL;
210                         return iter;
211                 }               
212
213                 // find the insertion bucket
214                 size_t bucket_index = hasher(key) & hash_mask;
215                 // if the bucket is empty just add new data_item
216                 if (!buckets[bucket_index].data) {
217                         iter.bucket = NULL;
218                         iter.data = NULL;
219                 } else {        // we already have data items in this bucket
220                         data_item* temp = buckets[bucket_index].data;
221 //                      temp = iterate_find(temp,key);
222
223                         while (temp && !equal(temp->first, key))
224                                 temp = temp->next;
225
226                         iter.data = temp;
227                         if (!temp)
228                                 iter.bucket = NULL;
229                         else
230                                 iter.bucket = &buckets[bucket_index];
231                 }
232                 return iter;
233         }
234
235         void erase(const key_type& key) {
236
237                 if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
238                         return;
239                 }       
240
241                 // find the  bucket
242                 size_t bucket_index = hasher(key) & hash_mask;
243                 // if the bucket is empty just add new data_item
244                 if (!buckets[bucket_index].data)
245                         return;
246
247                 data_item* temp = buckets[bucket_index].data;
248                 data_item* prev = NULL;
249                 while (temp && !equal(temp->first, key)) {
250                         prev = temp;
251                         temp = temp->next;
252                 }
253                 if (!temp)
254                         return;
255
256                 // delete this data item from the chain
257                 _size--;
258                 if (!prev){     // we are deleting the first element in chain
259                         buckets[bucket_index].data = temp->next;
260
261                         if(temp->next == NULL){
262                                 if(buckets[bucket_index].next_bucket){
263                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
264                                 }else{
265                                         last_bucket = buckets[bucket_index].prev_bucket;
266                                 }
267                                 if(buckets[bucket_index].prev_bucket){
268                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
269                                 }else{
270                                         first_bucket = buckets[bucket_index].next_bucket;
271                                 }
272                                 buckets[bucket_index].next_bucket = NULL;
273                                 buckets[bucket_index].prev_bucket = NULL;
274                         }
275                 }else{
276                         prev->next = temp->next;
277                 }
278         delete temp;
279         total_memory -= sizeof(data_item);
280         }
281
282         void erase_full(const key_type& key) {
283
284                 if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
285                         return;
286                 }
287
288                 // find the  bucket
289                 size_t bucket_index = hasher(key) & hash_mask;
290                 // if the bucket is empty just add new data_item
291                 if (!buckets[bucket_index].data)
292                         return;
293
294                 data_item* temp = buckets[bucket_index].data;
295                 data_item* prev = NULL;
296                 while (temp && !equal(temp->first, key)) {
297                         prev = temp;
298                         temp = temp->next;
299                 }
300                 if (!temp)
301                         return;
302
303                 // delete this data item from the chain
304                 _size--;
305                 if (!prev){     // we are deleting the first element in chain
306                         buckets[bucket_index].data = temp->next;
307
308                         if(temp->next == NULL){
309                                 if(buckets[bucket_index].next_bucket){
310                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
311                                 }else{
312                                         last_bucket = buckets[bucket_index].prev_bucket;
313                                 }
314                                 if(buckets[bucket_index].prev_bucket){
315                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
316                                 }else{
317                                         first_bucket = buckets[bucket_index].next_bucket;
318                                 }
319                                 buckets[bucket_index].next_bucket = NULL;
320                                 buckets[bucket_index].prev_bucket = NULL;
321                         }
322                 }else{
323                         prev->next = temp->next;
324                 }
325 //        delete (*temp).first;
326 //        delete (*temp).second;
327         delete temp;
328         total_memory -= sizeof(data_item);
329         }
330
331         iterator begin() {
332                 iterator iter;
333                 // find the first data item
334                 iter.bucket = first_bucket;
335                 iter.data = (first_bucket) ? first_bucket->data : NULL;
336                 return iter;
337         }
338
339         iterator end() {
340                 iterator iter;
341                 iter.bucket = NULL;
342                 iter.data = NULL;
343                 return iter;
344         }
345
346         void clear() {
347                 data_item* temp;
348                 data_item* next;
349
350                 hash_bucket* tmp;
351                 while (first_bucket) {
352                         temp = first_bucket->data;
353                         while ( (next = temp->next) ) {
354                                 delete temp;
355                                 temp = next;
356                         }
357                         if(temp) delete(temp);
358                         first_bucket->data = NULL;
359                         tmp = first_bucket;
360                         first_bucket = first_bucket->next_bucket;
361                         tmp->next_bucket = NULL;
362                         tmp->prev_bucket = NULL;
363                 }
364                 last_bucket = NULL;
365                 _size = 0;
366                 total_memory = num_buckets * sizeof(hash_bucket);
367         }
368
369         int resize() {
370                 if (_size) {
371                         fprintf(stderr, "Error: resizing non-empty hash table\n");
372                         exit(1);
373                 }
374
375                 double max_load = (1.0 * _max_size) / num_buckets;
376
377                 // reize table if its maximum load exceed the load factor
378                 // OR it was less then half of the load factor
379                 size_t min_buckets = 0;
380                 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
381                         min_buckets = _max_size / load_factor;
382                 } 
383
384                 if (min_buckets) {
385                         // find power-of-2 size large than min_buckets;
386                         int nb;
387                         for(nb=2;nb<min_buckets;nb*=2);
388                         num_buckets = nb;
389
390                         fprintf(stderr, "Resizing to %d buckets\n", (int)num_buckets);
391                         delete[] buckets;
392                         hash_mask = num_buckets-1;
393
394                         buckets = new hash_bucket[num_buckets];
395                         total_memory = num_buckets * sizeof(hash_bucket);                       
396                 }
397
398                 return 0;
399         }               
400
401         size_t size() const {
402                 return _size;
403         }
404
405         size_t max_size() const {
406                 return _max_size;
407         }
408
409         bool empty() const {
410                 return (_size == 0);
411         }
412
413         ~hash_table() {
414                 clear();
415                 delete[] buckets;
416         }
417
418         unsigned int get_mem_footprint() {
419                 return total_memory;
420         }
421
422 };
423
424
425 template <class key_type, class hasher_func, class equal_func>
426 class hash_set  {
427
428 public :
429         struct data_item {
430                 key_type key;
431                 data_item* next;        // next data item in overflow chain
432
433                 data_item(const key_type& k) : key(k), next(NULL) { }
434         };
435
436         struct hash_bucket {
437                 data_item* data;        //
438                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets
439
440                 hash_bucket() {
441                         data = NULL;
442                         next_bucket = 0;        // 0 means no bucket follows this one
443                 }
444         };
445
446         struct iterator {
447                 hash_bucket* bucket;
448                 data_item* data;
449
450                 iterator& operator++() {
451                         if (data->next)
452                                 data = data->next;
453                         else {
454                                 bucket = bucket->next_bucket;
455                                 if (bucket)
456                                         data = bucket->data;
457                                 else
458                                         data = NULL;
459                         }
460                         return *this;
461                 }
462
463                 bool operator!=(const iterator& it) {
464                         return (data != it.data || bucket != it.bucket);
465                 }
466
467                 bool operator==(const iterator& it) {
468                         return (data == it.data && bucket == it.bucket);
469                 }
470
471 //                      NOTE : not certain if returning ref always works
472                 key_type &operator*() {
473                         return data->key;
474                 }
475         };
476
477 private:
478         hasher_func hasher;
479         equal_func equal;
480
481         double load_factor;
482
483         size_t _size;
484         size_t _max_size;
485         size_t num_buckets;
486         size_t hash_mask;
487
488         hash_bucket* first_bucket;      // first used bucket
489         hash_bucket* last_bucket;               // last used bucket
490
491         hash_bucket* buckets;
492
493 public:
494
495         hash_set(const size_t expected_size = 100000, const double max_load = 1.25) {
496
497                 size_t min_buckets = (size_t)(expected_size / max_load);
498                 load_factor = max_load;
499                 int nb;
500                 for(nb=2;nb<min_buckets;nb*=2);
501                 num_buckets = nb;
502                 hash_mask = nb-1;
503                 buckets = new hash_bucket[num_buckets];
504
505                 _size = 0;
506                 _max_size = 0;
507                 first_bucket = 0;
508                 last_bucket = 0;
509         }
510
511         int insert(const key_type& key) {
512                 data_item* d = new data_item(key);
513
514                 // find the insertion bucket
515                 size_t bucket_index = hasher(key) & hash_mask;
516                 // if the bucket is empty just add new data_item
517                 if (!buckets[bucket_index].data) {
518                         // create new data item
519
520                         buckets[bucket_index].data = d;
521                         if (!first_bucket)
522                                 first_bucket = &buckets[bucket_index];
523                         else
524                                 last_bucket->next_bucket = &buckets[bucket_index];
525                         last_bucket = &buckets[bucket_index];
526                 } else {        // we already have data items in this bucket
527                         // prepend the item to overflow chain
528                         data_item* temp = buckets[bucket_index].data;
529                         buckets[bucket_index].data = d;
530                         d->next = temp;
531                 }
532                 _size++;
533                 if(_size>_max_size)
534                         _max_size = _size;
535
536                 return 0;
537         }
538
539         iterator find(const key_type& key) {
540                 iterator iter;
541
542                 // find the insertion bucket
543                 size_t bucket_index = hasher(key) & hash_mask;
544                 // if the bucket is empty just add new data_item
545                 if (!buckets[bucket_index].data) {
546                         iter.bucket = NULL;
547                         iter.data = NULL;
548                 } else {        // we already have data items in this bucket
549                         data_item* temp = buckets[bucket_index].data;
550                         while (temp && !equal(temp->key, key))
551                                 temp = temp->next;
552                         iter.data = temp;
553                         if (!temp)
554                                 iter.bucket = NULL;
555                         else
556                                 iter.bucket = &buckets[bucket_index];
557                 }
558                 return iter;
559         }
560
561         void erase(const key_type& key) {
562                 // find the  bucket
563                 size_t bucket_index = hasher(key) & hash_mask;
564                 // if the bucket is empty just add new data_item
565                 if (!buckets[bucket_index].data)
566                         return;
567
568                 data_item* temp = buckets[bucket_index].data;
569                 data_item* prev = NULL;
570                 while (temp && !equal(temp->key, key)) {
571                         prev = temp;
572                         temp = temp->next;
573                 }
574                 if (!temp)
575                         return;
576
577                 // delete this data item from the chain
578                 _size--;
579                 if (!prev)      // we are deleting the first element in chain
580                         buckets[bucket_index].data = temp->next;
581                 else
582                         prev->next = temp->next;
583                 delete temp;
584         }
585
586         iterator begin() {
587                 iterator iter;
588                 // find the first data item
589                 iter.bucket = first_bucket;
590                 iter.data = (first_bucket) ? first_bucket->data : NULL;
591                 return iter;
592         }
593
594         iterator end() {
595                 iterator iter;
596                 iter.bucket = NULL;
597                 iter.data = NULL;
598                 return iter;
599         }
600
601         void clear() {
602                 data_item* temp;
603                 data_item* next;
604
605                 hash_bucket* tmp;
606                 while (first_bucket) {
607                         temp = first_bucket->data;
608                         while ( (next = temp->next) ) {
609                                 delete temp;
610                                 temp = next;
611                         }
612                         if(temp) delete(temp);
613                         first_bucket->data = NULL;
614                         tmp = first_bucket;
615                         first_bucket = first_bucket->next_bucket;
616                         tmp->next_bucket = NULL;
617                 }
618                 last_bucket = NULL;
619                 _size = 0;
620
621         }
622
623         int resize() {
624                 if (_size) {
625                         fprintf(stderr, "Error: resizing non-empty hash table\n");
626                         exit(1);
627                 }
628                 
629                 double max_load = (1.0 * _max_size) / num_buckets;
630
631                 // reize table if its maximum load exceed the load factor
632                 // OR it was less then half of the load factor
633                 size_t min_buckets = 0;
634                 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
635                         min_buckets = _max_size / load_factor;
636                 } 
637
638                 if (min_buckets) {
639                         // find power-of-2 size large than min_buckets;
640                         int nb;
641                         for(nb=2;nb<min_buckets;nb*=2);
642                         num_buckets = nb;
643
644                         fprintf(stderr, "Resizing to %d buckets\n", num_buckets);
645                         delete[] buckets;
646                         hash_mask = num_buckets-1;
647
648                         buckets = new hash_bucket[num_buckets];         
649                 }
650
651                 return 0;
652         }
653
654
655         size_t size() const {
656                 return _size;
657         }
658
659         bool empty() const {
660                 return (_size == 0);
661         }
662
663         ~hash_set() {
664                 clear();
665                 delete[] buckets;
666         }
667
668 };
669
670
671 #endif // __HASH_TABLE_H