cf913e5e3e65b0decc4c07b1eb038d856edba348
[com/gs-lite.git] / include / hfta / hash_table.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #ifndef __HASH_TABLE_H
17 #define __HASH_TABLE_H
18
19 #include <stdlib.h>
20 #include <stdio.h>
21
22
23
24 #define NUM_PRIMES 23
25
26 static unsigned int primes[NUM_PRIMES] =  {
27         11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,
28         25717, 51437, 102877, 205759, 411527, 823117, 1646237,
29         3292489, 6584983, 13169977, 26339969, 52679969
30 };
31
32
33
34 template <class key_type, class value_type, class hasher_func, class equal_func>
35 class hash_table  {
36
37 public :
38         struct data_item {
39                 key_type first;
40                 value_type second;
41                 data_item* next;        // next data item in overflow chain
42
43                 data_item(const key_type& k, const value_type& v) : first(k), second(v), next(NULL) { }
44         };
45
46         struct hash_bucket {
47                 data_item* data;        //
48                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets
49                 hash_bucket* prev_bucket;       // index of the next bucket in the list of used buckets
50
51                 hash_bucket() {
52                         data = NULL;
53                         next_bucket = 0;        // 0 means no bucket follows this one
54                         prev_bucket = 0;        // 0 means no bucket precedes this one
55                 }
56         };
57
58         struct iterator {
59                 hash_bucket* bucket;
60                 data_item* data;
61
62                 iterator& operator++() {
63                         if (data->next)
64                                 data = data->next;
65                         else {
66                                 bucket = bucket->next_bucket;
67                                 if (bucket)
68                                         data = bucket->data;
69                                 else
70                                         data = NULL;
71                         }
72                         return *this;
73                 }
74
75 //                      Like ++, but don't go beyond the end of the bucket chain.
76                 iterator& next() {
77                         if (data->next)
78                                 data = data->next;
79                         else {
80                                 bucket = NULL;
81                                 data = NULL;
82                         }
83                         return *this;
84                 }
85
86                 bool operator!=(const iterator& it) {
87                         return (data != it.data || bucket != it.bucket);
88                 }
89
90                 bool operator==(const iterator& it) {
91                         return (data == it.data && bucket == it.bucket);
92                 }
93
94                 data_item &operator*() {
95                         return *data;
96                 }
97         };
98
99 private:
100         hasher_func hasher;
101         equal_func equal;
102
103         double load_factor;
104 //      double max_load;
105
106         size_t _size;
107         size_t _max_size;
108         size_t num_buckets;
109         size_t hash_mask;
110
111         hash_bucket* first_bucket;      // first used bucket
112         hash_bucket* last_bucket;               // last used bucket
113
114         hash_bucket* buckets;
115
116         // memory footpritn in bytes
117         unsigned int total_memory;
118
119 public:
120
121
122         hash_table(const size_t n_buckets = 65536, const double load = 1.0) {
123                 load_factor = load;
124                 int nb;
125                 for(nb=2;nb<n_buckets;nb*=2);
126                 num_buckets = nb;
127                 hash_mask = nb-1;
128                 buckets = new hash_bucket[num_buckets];
129
130                 _size = 0;
131                 _max_size = 0;
132 //              max_load = 0.0;
133                 first_bucket = 0;
134                 last_bucket = 0;
135
136                 total_memory = num_buckets * sizeof(hash_bucket);
137         }
138
139         int insert(const key_type& key, const value_type& val) {
140                 data_item* d = new data_item(key, val);
141                 total_memory += sizeof(data_item);
142
143                 // find the insertion bucket
144                 size_t bucket_index = hasher(key) & hash_mask;
145                 // if the bucket is empty just add new data_item
146                 if (!buckets[bucket_index].data) {
147                         // create new data item
148
149                         buckets[bucket_index].data = d;
150                         if (!first_bucket){
151                                 first_bucket = &buckets[bucket_index];
152                         }else{
153                                 last_bucket->next_bucket = &buckets[bucket_index];
154                         }
155
156                         if (last_bucket)
157                                 buckets[bucket_index].prev_bucket = last_bucket;
158
159                         last_bucket = &buckets[bucket_index];
160                 } else {        // we already have data items in this bucket
161                         // prepend the item to overflow chain
162                         data_item* temp = buckets[bucket_index].data;
163                         buckets[bucket_index].data = d;
164                         d->next = temp;
165                 }
166                 _size++;
167                 if(_size>_max_size)
168                         _max_size = _size;
169
170                 return 0;
171         }
172
173         data_item *iterate_find(data_item* t, const key_type& key) {
174                 data_item *temp = t;
175                         while (temp && !equal(temp->first, key)){
176                                 temp = temp->next;
177                         }
178                 return temp;
179         }
180
181
182         iterator find(const key_type& key) {
183                 iterator iter;
184
185                 // find the insertion bucket
186                 size_t bucket_index = hasher(key) & hash_mask;
187                 // if the bucket is empty just add new data_item
188                 if (!buckets[bucket_index].data) {
189                         iter.bucket = NULL;
190                         iter.data = NULL;
191                 } else {        // we already have data items in this bucket
192                         data_item* temp = buckets[bucket_index].data;
193 //                      temp = iterate_find(temp,key);
194
195                         while (temp && !equal(temp->first, key))
196                                 temp = temp->next;
197
198                         iter.data = temp;
199                         if (!temp)
200                                 iter.bucket = NULL;
201                         else
202                                 iter.bucket = &buckets[bucket_index];
203                 }
204                 return iter;
205         }
206
207         void erase(const key_type& key) {
208                 // find the  bucket
209                 size_t bucket_index = hasher(key) & hash_mask;
210                 // if the bucket is empty just add new data_item
211                 if (!buckets[bucket_index].data)
212                         return;
213
214                 data_item* temp = buckets[bucket_index].data;
215                 data_item* prev = NULL;
216                 while (temp && !equal(temp->first, key)) {
217                         prev = temp;
218                         temp = temp->next;
219                 }
220                 if (!temp)
221                         return;
222
223                 // delete this data item from the chain
224                 _size--;
225                 if (!prev){     // we are deleting the first element in chain
226                         buckets[bucket_index].data = temp->next;
227
228                         if(temp->next == NULL){
229                                 if(buckets[bucket_index].next_bucket){
230                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
231                                 }else{
232                                         last_bucket = buckets[bucket_index].prev_bucket;
233                                 }
234                                 if(buckets[bucket_index].prev_bucket){
235                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
236                                 }else{
237                                         first_bucket = buckets[bucket_index].next_bucket;
238                                 }
239                                 buckets[bucket_index].next_bucket = NULL;
240                                 buckets[bucket_index].prev_bucket = NULL;
241                         }
242                 }else{
243                         prev->next = temp->next;
244                 }
245         delete temp;
246         total_memory -= sizeof(data_item);
247         }
248
249         void erase_full(const key_type& key) {
250                 // find the  bucket
251                 size_t bucket_index = hasher(key) & hash_mask;
252                 // if the bucket is empty just add new data_item
253                 if (!buckets[bucket_index].data)
254                         return;
255
256                 data_item* temp = buckets[bucket_index].data;
257                 data_item* prev = NULL;
258                 while (temp && !equal(temp->first, key)) {
259                         prev = temp;
260                         temp = temp->next;
261                 }
262                 if (!temp)
263                         return;
264
265                 // delete this data item from the chain
266                 _size--;
267                 if (!prev){     // we are deleting the first element in chain
268                         buckets[bucket_index].data = temp->next;
269
270                         if(temp->next == NULL){
271                                 if(buckets[bucket_index].next_bucket){
272                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
273                                 }else{
274                                         last_bucket = buckets[bucket_index].prev_bucket;
275                                 }
276                                 if(buckets[bucket_index].prev_bucket){
277                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
278                                 }else{
279                                         first_bucket = buckets[bucket_index].next_bucket;
280                                 }
281                                 buckets[bucket_index].next_bucket = NULL;
282                                 buckets[bucket_index].prev_bucket = NULL;
283                         }
284                 }else{
285                         prev->next = temp->next;
286                 }
287         delete (*temp).first;
288         delete (*temp).second;
289         delete temp;
290         total_memory -= sizeof(data_item);
291         }
292
293         iterator begin() {
294                 iterator iter;
295                 // find the first data item
296                 iter.bucket = first_bucket;
297                 iter.data = (first_bucket) ? first_bucket->data : NULL;
298                 return iter;
299         }
300
301         iterator end() {
302                 iterator iter;
303                 iter.bucket = NULL;
304                 iter.data = NULL;
305                 return iter;
306         }
307
308         void clear() {
309                 data_item* temp;
310                 data_item* next;
311
312                 hash_bucket* tmp;
313                 while (first_bucket) {
314                         temp = first_bucket->data;
315                         while ( (next = temp->next) ) {
316                                 delete temp;
317                                 temp = next;
318                         }
319                         if(temp) delete(temp);
320                         first_bucket->data = NULL;
321                         tmp = first_bucket;
322                         first_bucket = first_bucket->next_bucket;
323                         tmp->next_bucket = NULL;
324                         tmp->prev_bucket = NULL;
325                 }
326                 last_bucket = NULL;
327                 _size = 0;
328                 total_memory = num_buckets * sizeof(hash_bucket);
329
330         }
331
332         int rehash() {
333                 if (_size) {
334                         fprintf(stderr, "Error: rehashing non-empty hash table\n");
335                         exit(1);
336                 }               
337
338                 double max_load = (1.0 * _max_size) / num_buckets;
339
340                 // reize table if its maximum load exceed the load factor
341                 // OR it was less then half of the load factor
342                 size_t min_buckets = 0;
343                 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
344                         min_buckets = _max_size / load_factor;
345                 } 
346
347                 if (min_buckets) {
348                         // find power-of-2 size large than min_buckets;
349                         int nb;
350                         for(nb=2;nb<min_buckets;nb*=2);
351                         num_buckets = nb;
352
353                         delete[] buckets;
354                         hash_mask = num_buckets-1;
355
356                         buckets = new hash_bucket[num_buckets];         
357                 }
358
359                 return 0;
360         }
361
362
363
364         size_t size() const {
365                 return _size;
366         }
367
368         bool empty() const {
369                 return (_size == 0);
370         }
371
372         ~hash_table() {
373                 clear();
374                 delete[] buckets;
375         }
376
377         unsigned int get_mem_footprint() {
378                 return total_memory;
379         }
380
381 };
382
383
384 template <class key_type, class hasher_func, class equal_func>
385 class hash_set  {
386
387 public :
388         struct data_item {
389                 key_type key;
390                 data_item* next;        // next data item in overflow chain
391
392                 data_item(const key_type& k) : key(k), next(NULL) { }
393         };
394
395         struct hash_bucket {
396                 data_item* data;        //
397                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets
398
399                 hash_bucket() {
400                         data = NULL;
401                         next_bucket = 0;        // 0 means no bucket follows this one
402                 }
403         };
404
405         struct iterator {
406                 hash_bucket* bucket;
407                 data_item* data;
408
409                 iterator& operator++() {
410                         if (data->next)
411                                 data = data->next;
412                         else {
413                                 bucket = bucket->next_bucket;
414                                 if (bucket)
415                                         data = bucket->data;
416                                 else
417                                         data = NULL;
418                         }
419                         return *this;
420                 }
421
422                 bool operator!=(const iterator& it) {
423                         return (data != it.data || bucket != it.bucket);
424                 }
425
426                 bool operator==(const iterator& it) {
427                         return (data == it.data && bucket == it.bucket);
428                 }
429
430 //                      NOTE : not certain if returning ref always works
431                 key_type &operator*() {
432                         return data->key;
433                 }
434         };
435
436 private:
437         hasher_func hasher;
438         equal_func equal;
439
440         double load_factor;
441 //      double max_load;
442
443         size_t _size;
444         size_t _max_size;
445         size_t num_buckets;
446         size_t hash_mask;
447
448         hash_bucket* first_bucket;      // first used bucket
449         hash_bucket* last_bucket;               // last used bucket
450
451         hash_bucket* buckets;
452
453 public:
454         hash_set(const size_t n_buckets = 65536, const double load = 1.0) {
455                 load_factor = load;
456
457                 int nb;
458                 for(nb=2;nb<n_buckets;nb*=2);
459                 num_buckets = nb;
460                 hash_mask = num_buckets-1;
461
462                 buckets = new hash_bucket[num_buckets];
463
464                 _size = 0;
465                 _max_size = 0;
466 //              max_load = 0.0;
467                 first_bucket = 0;
468                 last_bucket = 0;
469         }
470
471         int insert(const key_type& key) {
472                 data_item* d = new data_item(key);
473
474                 // find the insertion bucket
475                 size_t bucket_index = hasher(key) & hash_mask;
476                 // if the bucket is empty just add new data_item
477                 if (!buckets[bucket_index].data) {
478                         // create new data item
479
480                         buckets[bucket_index].data = d;
481                         if (!first_bucket)
482                                 first_bucket = &buckets[bucket_index];
483                         else
484                                 last_bucket->next_bucket = &buckets[bucket_index];
485                         last_bucket = &buckets[bucket_index];
486                 } else {        // we already have data items in this bucket
487                         // prepend the item to overflow chain
488                         data_item* temp = buckets[bucket_index].data;
489                         buckets[bucket_index].data = d;
490                         d->next = temp;
491                 }
492                 _size++;
493                 if(_size>_max_size)
494                         _max_size = _size;
495
496                 return 0;
497         }
498
499         iterator find(const key_type& key) {
500                 iterator iter;
501
502                 // find the insertion bucket
503                 size_t bucket_index = hasher(key) & hash_mask;
504                 // if the bucket is empty just add new data_item
505                 if (!buckets[bucket_index].data) {
506                         iter.bucket = NULL;
507                         iter.data = NULL;
508                 } else {        // we already have data items in this bucket
509                         data_item* temp = buckets[bucket_index].data;
510                         while (temp && !equal(temp->key, key))
511                                 temp = temp->next;
512                         iter.data = temp;
513                         if (!temp)
514                                 iter.bucket = NULL;
515                         else
516                                 iter.bucket = &buckets[bucket_index];
517                 }
518                 return iter;
519         }
520
521         void erase(const key_type& key) {
522                 // find the  bucket
523                 size_t bucket_index = hasher(key) & hash_mask;
524                 // if the bucket is empty just add new data_item
525                 if (!buckets[bucket_index].data)
526                         return;
527
528                 data_item* temp = buckets[bucket_index].data;
529                 data_item* prev = NULL;
530                 while (temp && !equal(temp->key, key)) {
531                         prev = temp;
532                         temp = temp->next;
533                 }
534                 if (!temp)
535                         return;
536
537                 // delete this data item from the chain
538                 _size--;
539                 if (!prev)      // we are deleting the first element in chain
540                         buckets[bucket_index].data = temp->next;
541                 else
542                         prev->next = temp->next;
543                 delete temp;
544         }
545
546         iterator begin() {
547                 iterator iter;
548                 // find the first data item
549                 iter.bucket = first_bucket;
550                 iter.data = (first_bucket) ? first_bucket->data : NULL;
551                 return iter;
552         }
553
554         iterator end() {
555                 iterator iter;
556                 iter.bucket = NULL;
557                 iter.data = NULL;
558                 return iter;
559         }
560
561         void clear() {
562                 data_item* temp;
563                 data_item* next;
564
565                 hash_bucket* tmp;
566                 while (first_bucket) {
567                         temp = first_bucket->data;
568                         while ( (next = temp->next) ) {
569                                 delete temp;
570                                 temp = next;
571                         }
572                         if(temp) delete(temp);
573                         first_bucket->data = NULL;
574                         tmp = first_bucket;
575                         first_bucket = first_bucket->next_bucket;
576                         tmp->next_bucket = NULL;
577                 }
578                 last_bucket = NULL;
579                 _size = 0;
580
581         }
582
583         int rehash() {
584                 if (_size) {
585                         fprintf(stderr, "Error: rehashing non-empty hash table\n");
586                         exit(1);
587                 }               
588
589                 double max_load = (1.0 * _max_size) / num_buckets;
590
591                 // reize table if its maximum load exceed the load factor
592                 // OR it was less then half of the load factor
593                 size_t min_buckets = 0;
594                 if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
595                         min_buckets = _max_size / load_factor;
596                 } 
597
598                 if (min_buckets) {
599                         // find power-of-2 size large than min_buckets;
600                         int nb;
601                         for(nb=2;nb<min_buckets;nb*=2);
602                         num_buckets = nb;
603
604                         delete[] buckets;
605                         hash_mask = num_buckets-1;
606
607                         buckets = new hash_bucket[num_buckets];         
608                 }
609
610                 return 0;
611         }
612
613
614         size_t size() const {
615                 return _size;
616         }
617
618         bool empty() const {
619                 return (_size == 0);
620         }
621
622         ~hash_set() {
623                 clear();
624                 delete[] buckets;
625         }
626
627 };
628
629
630 #endif // __HASH_TABLE_H