Fixed newline characters throughout the code
[com/gs-lite.git] / include / hfta / hash_table.h
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 #ifndef __HASH_TABLE_H
17 #define __HASH_TABLE_H
18
19 #include <stdlib.h>
20 #include <stdio.h>
21
22
23
24 #define NUM_PRIMES 23
25
26 static unsigned int primes[NUM_PRIMES] =  {
27         11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,
28         25717, 51437, 102877, 205759, 411527, 823117, 1646237,
29         3292489, 6584983, 13169977, 26339969, 52679969
30 };
31
32
33
34 template <class key_type, class value_type, class hasher_func, class equal_func>
35 class hash_table  {
36
37 public :
38         struct data_item {
39                 key_type first;
40                 value_type second;
41                 data_item* next;        // next data item in overflow chain
42
43                 data_item(const key_type& k, const value_type& v) : first(k), second(v), next(NULL) { }
44         };
45
46         struct hash_bucket {
47                 data_item* data;        //
48                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets
49                 hash_bucket* prev_bucket;       // index of the next bucket in the list of used buckets
50
51                 hash_bucket() {
52                         data = NULL;
53                         next_bucket = 0;        // 0 means no bucket follows this one
54                         prev_bucket = 0;        // 0 means no bucket precedes this one
55                 }
56         };
57
58         struct iterator {
59                 hash_bucket* bucket;
60                 data_item* data;
61
62                 iterator& operator++() {
63                         if (data->next)
64                                 data = data->next;
65                         else {
66                                 bucket = bucket->next_bucket;
67                                 if (bucket)
68                                         data = bucket->data;
69                                 else
70                                         data = NULL;
71                         }
72                         return *this;
73                 }
74
75 //                      Like ++, but don't go beyond the end of the bucket chain.
76                 iterator& next() {
77                         if (data->next)
78                                 data = data->next;
79                         else {
80                                 bucket = NULL;
81                                 data = NULL;
82                         }
83                         return *this;
84                 }
85
86                 bool operator!=(const iterator& it) {
87                         return (data != it.data || bucket != it.bucket);
88                 }
89
90                 bool operator==(const iterator& it) {
91                         return (data == it.data && bucket == it.bucket);
92                 }
93
94                 data_item &operator*() {
95                         return *data;
96                 }
97         };
98
99 private:
100         hasher_func hasher;
101         equal_func equal;
102
103         double load_factor;
104 //      double max_load;
105
106         size_t _size;
107         size_t _max_size;
108         size_t num_buckets;
109         size_t hash_mask;
110
111         hash_bucket* first_bucket;      // first used bucket
112         hash_bucket* last_bucket;               // last used bucket
113
114         hash_bucket* buckets;
115
116         // memory footpritn in bytes
117         unsigned int total_memory;
118
119 public:
120
121
122         hash_table(const size_t n_buckets = 131072, const double load = 0.5) {
123                 load_factor = load;
124                 int nb;
125                 for(nb=2;nb<n_buckets;nb*=2);
126                 num_buckets = nb;
127                 hash_mask = nb-1;
128                 buckets = new hash_bucket[num_buckets];
129
130                 _size = 0;
131                 _max_size = 0;
132 //              max_load = 0.0;
133                 first_bucket = 0;
134                 last_bucket = 0;
135
136                 total_memory = num_buckets * sizeof(hash_bucket);
137         }
138
139         int insert(const key_type& key, const value_type& val) {
140                 data_item* d = new data_item(key, val);
141                 total_memory += sizeof(data_item);
142
143                 // find the insertion bucket
144                 size_t bucket_index = hasher(key) & hash_mask;
145                 // if the bucket is empty just add new data_item
146                 if (!buckets[bucket_index].data) {
147                         // create new data item
148
149                         buckets[bucket_index].data = d;
150                         if (!first_bucket){
151                                 first_bucket = &buckets[bucket_index];
152                         }else{
153                                 last_bucket->next_bucket = &buckets[bucket_index];
154                         }
155
156                         if (last_bucket)
157                                 buckets[bucket_index].prev_bucket = last_bucket;
158
159                         last_bucket = &buckets[bucket_index];
160                 } else {        // we already have data items in this bucket
161                         // prepend the item to overflow chain
162                         data_item* temp = buckets[bucket_index].data;
163                         buckets[bucket_index].data = d;
164                         d->next = temp;
165                 }
166                 _size++;
167                 if(_size>_max_size)
168                         _max_size = _size;
169
170                 return 0;
171         }
172
173         data_item *iterate_find(data_item* t, const key_type& key) {
174                 data_item *temp = t;
175                         while (temp && !equal(temp->first, key)){
176                                 temp = temp->next;
177                         }
178                 return temp;
179         }
180
181
182         iterator find(const key_type& key) {
183                 iterator iter;
184
185                 // find the insertion bucket
186                 size_t bucket_index = hasher(key) & hash_mask;
187                 // if the bucket is empty just add new data_item
188                 if (!buckets[bucket_index].data) {
189                         iter.bucket = NULL;
190                         iter.data = NULL;
191                 } else {        // we already have data items in this bucket
192                         data_item* temp = buckets[bucket_index].data;
193 //                      temp = iterate_find(temp,key);
194
195                         while (temp && !equal(temp->first, key))
196                                 temp = temp->next;
197
198                         iter.data = temp;
199                         if (!temp)
200                                 iter.bucket = NULL;
201                         else
202                                 iter.bucket = &buckets[bucket_index];
203                 }
204                 return iter;
205         }
206
207         void erase(const key_type& key) {
208                 // find the  bucket
209                 size_t bucket_index = hasher(key) & hash_mask;
210                 // if the bucket is empty just add new data_item
211                 if (!buckets[bucket_index].data)
212                         return;
213
214                 data_item* temp = buckets[bucket_index].data;
215                 data_item* prev = NULL;
216                 while (temp && !equal(temp->first, key)) {
217                         prev = temp;
218                         temp = temp->next;
219                 }
220                 if (!temp)
221                         return;
222
223                 // delete this data item from the chain
224                 _size--;
225                 if (!prev){     // we are deleting the first element in chain
226                         buckets[bucket_index].data = temp->next;
227
228                         if(temp->next == NULL){
229                                 if(buckets[bucket_index].next_bucket){
230                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
231                                 }else{
232                                         last_bucket = buckets[bucket_index].prev_bucket;
233                                 }
234                                 if(buckets[bucket_index].prev_bucket){
235                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
236                                 }else{
237                                         first_bucket = buckets[bucket_index].next_bucket;
238                                 }
239                                 buckets[bucket_index].next_bucket = NULL;
240                                 buckets[bucket_index].prev_bucket = NULL;
241                         }
242                 }else{
243                         prev->next = temp->next;
244                 }
245         delete temp;
246         total_memory -= sizeof(data_item);
247         }
248
249         void erase_full(const key_type& key) {
250                 // find the  bucket
251                 size_t bucket_index = hasher(key) & hash_mask;
252                 // if the bucket is empty just add new data_item
253                 if (!buckets[bucket_index].data)
254                         return;
255
256                 data_item* temp = buckets[bucket_index].data;
257                 data_item* prev = NULL;
258                 while (temp && !equal(temp->first, key)) {
259                         prev = temp;
260                         temp = temp->next;
261                 }
262                 if (!temp)
263                         return;
264
265                 // delete this data item from the chain
266                 _size--;
267                 if (!prev){     // we are deleting the first element in chain
268                         buckets[bucket_index].data = temp->next;
269
270                         if(temp->next == NULL){
271                                 if(buckets[bucket_index].next_bucket){
272                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
273                                 }else{
274                                         last_bucket = buckets[bucket_index].prev_bucket;
275                                 }
276                                 if(buckets[bucket_index].prev_bucket){
277                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
278                                 }else{
279                                         first_bucket = buckets[bucket_index].next_bucket;
280                                 }
281                                 buckets[bucket_index].next_bucket = NULL;
282                                 buckets[bucket_index].prev_bucket = NULL;
283                         }
284                 }else{
285                         prev->next = temp->next;
286                 }
287         delete (*temp).first;
288         delete (*temp).second;
289         delete temp;
290         total_memory -= sizeof(data_item);
291         }
292
293         iterator begin() {
294                 iterator iter;
295                 // find the first data item
296                 iter.bucket = first_bucket;
297                 iter.data = (first_bucket) ? first_bucket->data : NULL;
298                 return iter;
299         }
300
301         iterator end() {
302                 iterator iter;
303                 iter.bucket = NULL;
304                 iter.data = NULL;
305                 return iter;
306         }
307
308         void clear() {
309                 data_item* temp;
310                 data_item* next;
311
312                 hash_bucket* tmp;
313                 while (first_bucket) {
314                         temp = first_bucket->data;
315                         while ( (next = temp->next) ) {
316                                 delete temp;
317                                 temp = next;
318                         }
319                         if(temp) delete(temp);
320                         first_bucket->data = NULL;
321                         tmp = first_bucket;
322                         first_bucket = first_bucket->next_bucket;
323                         tmp->next_bucket = NULL;
324                         tmp->prev_bucket = NULL;
325                 }
326                 last_bucket = NULL;
327                 _size = 0;
328                 total_memory = num_buckets * sizeof(hash_bucket);
329
330         }
331
332         int rehash() {
333                 if (_size) {
334                         fprintf(stderr, "Error: rehashing non-empty hash table\n");
335                         exit(1);
336                 }
337
338                 double max_load = (1.0 * _max_size) / num_buckets;
339                 if (max_load > load_factor ) {
340                         delete[] buckets;
341                         // roughly double the size of the table
342
343                         num_buckets *= 2;
344                         hash_mask = num_buckets-1;
345
346                         buckets = new hash_bucket[num_buckets];
347                         total_memory = num_buckets * sizeof(hash_bucket);
348                 }
349                 _max_size = _size;
350                 return 0;
351         }
352
353
354         size_t size() const {
355                 return _size;
356         }
357
358         bool empty() const {
359                 return (_size == 0);
360         }
361
362         ~hash_table() {
363                 clear();
364                 delete[] buckets;
365         }
366
367         unsigned int get_mem_footprint() {
368                 return total_memory;
369         }
370
371 };
372
373
374 template <class key_type, class hasher_func, class equal_func>
375 class hash_set  {
376
377 public :
378         struct data_item {
379                 key_type key;
380                 data_item* next;        // next data item in overflow chain
381
382                 data_item(const key_type& k) : key(k), next(NULL) { }
383         };
384
385         struct hash_bucket {
386                 data_item* data;        //
387                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets
388
389                 hash_bucket() {
390                         data = NULL;
391                         next_bucket = 0;        // 0 means no bucket follows this one
392                 }
393         };
394
395         struct iterator {
396                 hash_bucket* bucket;
397                 data_item* data;
398
399                 iterator& operator++() {
400                         if (data->next)
401                                 data = data->next;
402                         else {
403                                 bucket = bucket->next_bucket;
404                                 if (bucket)
405                                         data = bucket->data;
406                                 else
407                                         data = NULL;
408                         }
409                         return *this;
410                 }
411
412                 bool operator!=(const iterator& it) {
413                         return (data != it.data || bucket != it.bucket);
414                 }
415
416                 bool operator==(const iterator& it) {
417                         return (data == it.data && bucket == it.bucket);
418                 }
419
420 //                      NOTE : not certain if returning ref always works
421                 key_type &operator*() {
422                         return data->key;
423                 }
424         };
425
426 private:
427         hasher_func hasher;
428         equal_func equal;
429
430         double load_factor;
431 //      double max_load;
432
433         size_t _size;
434         size_t _max_size;
435         size_t num_buckets;
436         size_t hash_mask;
437
438         hash_bucket* first_bucket;      // first used bucket
439         hash_bucket* last_bucket;               // last used bucket
440
441         hash_bucket* buckets;
442
443 public:
444         hash_set(const size_t n_buckets = 131072, const double load = 0.75) {
445                 load_factor = load;
446
447                 int nb;
448                 for(nb=2;nb<n_buckets;nb*=2);
449                 num_buckets = nb;
450                 hash_mask = num_buckets-1;
451
452                 buckets = new hash_bucket[num_buckets];
453
454                 _size = 0;
455                 _max_size = 0;
456 //              max_load = 0.0;
457                 first_bucket = 0;
458                 last_bucket = 0;
459         }
460
461         int insert(const key_type& key) {
462                 data_item* d = new data_item(key);
463
464                 // find the insertion bucket
465                 size_t bucket_index = hasher(key) & hash_mask;
466                 // if the bucket is empty just add new data_item
467                 if (!buckets[bucket_index].data) {
468                         // create new data item
469
470                         buckets[bucket_index].data = d;
471                         if (!first_bucket)
472                                 first_bucket = &buckets[bucket_index];
473                         else
474                                 last_bucket->next_bucket = &buckets[bucket_index];
475                         last_bucket = &buckets[bucket_index];
476                 } else {        // we already have data items in this bucket
477                         // prepend the item to overflow chain
478                         data_item* temp = buckets[bucket_index].data;
479                         buckets[bucket_index].data = d;
480                         d->next = temp;
481                 }
482                 _size++;
483                 if(_size>_max_size)
484                         _max_size = _size;
485
486                 return 0;
487         }
488
489         iterator find(const key_type& key) {
490                 iterator iter;
491
492                 // find the insertion bucket
493                 size_t bucket_index = hasher(key) & hash_mask;
494                 // if the bucket is empty just add new data_item
495                 if (!buckets[bucket_index].data) {
496                         iter.bucket = NULL;
497                         iter.data = NULL;
498                 } else {        // we already have data items in this bucket
499                         data_item* temp = buckets[bucket_index].data;
500                         while (temp && !equal(temp->key, key))
501                                 temp = temp->next;
502                         iter.data = temp;
503                         if (!temp)
504                                 iter.bucket = NULL;
505                         else
506                                 iter.bucket = &buckets[bucket_index];
507                 }
508                 return iter;
509         }
510
511         void erase(const key_type& key) {
512                 // find the  bucket
513                 size_t bucket_index = hasher(key) & hash_mask;
514                 // if the bucket is empty just add new data_item
515                 if (!buckets[bucket_index].data)
516                         return;
517
518                 data_item* temp = buckets[bucket_index].data;
519                 data_item* prev = NULL;
520                 while (temp && !equal(temp->key, key)) {
521                         prev = temp;
522                         temp = temp->next;
523                 }
524                 if (!temp)
525                         return;
526
527                 // delete this data item from the chain
528                 _size--;
529                 if (!prev)      // we are deleting the first element in chain
530                         buckets[bucket_index].data = temp->next;
531                 else
532                         prev->next = temp->next;
533                 delete temp;
534         }
535
536         iterator begin() {
537                 iterator iter;
538                 // find the first data item
539                 iter.bucket = first_bucket;
540                 iter.data = (first_bucket) ? first_bucket->data : NULL;
541                 return iter;
542         }
543
544         iterator end() {
545                 iterator iter;
546                 iter.bucket = NULL;
547                 iter.data = NULL;
548                 return iter;
549         }
550
551         void clear() {
552                 data_item* temp;
553                 data_item* next;
554
555                 hash_bucket* tmp;
556                 while (first_bucket) {
557                         temp = first_bucket->data;
558                         while ( (next = temp->next) ) {
559                                 delete temp;
560                                 temp = next;
561                         }
562                         if(temp) delete(temp);
563                         first_bucket->data = NULL;
564                         tmp = first_bucket;
565                         first_bucket = first_bucket->next_bucket;
566                         tmp->next_bucket = NULL;
567                 }
568                 last_bucket = NULL;
569                 _size = 0;
570
571         }
572
573         int rehash() {
574                 if (_size) {
575                         fprintf(stderr, "Error: rehashing non-empty hash table\n");
576                         exit(1);
577                 }
578
579                 double max_load = (1.0 * _max_size) / num_buckets;
580
581                 if (max_load > load_factor ) {
582                         delete[] buckets;
583                         // roughly double the size of the table
584
585                         num_buckets *= 2;
586                         hash_mask = num_buckets -1;
587                         buckets = new hash_bucket[num_buckets];
588                 }
589                 _max_size = _size;
590                 return 0;
591         }
592
593
594         size_t size() const {
595                 return _size;
596         }
597
598         bool empty() const {
599                 return (_size == 0);
600         }
601
602         ~hash_set() {
603                 clear();
604                 delete[] buckets;
605         }
606
607 };
608
609
610 #endif // __HASH_TABLE_H