Added quantiling UDAFs
[com/gs-lite.git] / include / hfta / hash_table.h
1 /* ------------------------------------------------\r
2 Copyright 2014 AT&T Intellectual Property\r
3    Licensed under the Apache License, Version 2.0 (the "License");\r
4    you may not use this file except in compliance with the License.\r
5    You may obtain a copy of the License at\r
6 \r
7      http://www.apache.org/licenses/LICENSE-2.0\r
8 \r
9    Unless required by applicable law or agreed to in writing, software\r
10    distributed under the License is distributed on an "AS IS" BASIS,\r
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
12    See the License for the specific language governing permissions and\r
13    limitations under the License.\r
14  ------------------------------------------- */\r
15 \r
16 #ifndef __HASH_TABLE_H\r
17 #define __HASH_TABLE_H\r
18 \r
19 #include <stdlib.h>\r
20 #include <stdio.h>\r
21 \r
22 \r
23 \r
24 #define NUM_PRIMES 23\r
25 \r
26 static unsigned int primes[NUM_PRIMES] =  {\r
27         11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,\r
28         25717, 51437, 102877, 205759, 411527, 823117, 1646237,\r
29         3292489, 6584983, 13169977, 26339969, 52679969\r
30 };\r
31 \r
32 \r
33 \r
34 template <class key_type, class value_type, class hasher_func, class equal_func>\r
35 class hash_table  {\r
36 \r
37 public :\r
38         struct data_item {\r
39                 key_type first;\r
40                 value_type second;\r
41                 data_item* next;        // next data item in overflow chain\r
42 \r
43                 data_item(const key_type& k, const value_type& v) : first(k), second(v), next(NULL) { }\r
44         };\r
45 \r
46         struct hash_bucket {\r
47                 data_item* data;        //\r
48                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets\r
49                 hash_bucket* prev_bucket;       // index of the next bucket in the list of used buckets\r
50 \r
51                 hash_bucket() {\r
52                         data = NULL;\r
53                         next_bucket = 0;        // 0 means no bucket follows this one\r
54                         prev_bucket = 0;        // 0 means no bucket precedes this one\r
55                 }\r
56         };\r
57 \r
58         struct iterator {\r
59                 hash_bucket* bucket;\r
60                 data_item* data;\r
61 \r
62                 iterator& operator++() {\r
63                         if (data->next)\r
64                                 data = data->next;\r
65                         else {\r
66                                 bucket = bucket->next_bucket;\r
67                                 if (bucket)\r
68                                         data = bucket->data;\r
69                                 else\r
70                                         data = NULL;\r
71                         }\r
72                         return *this;\r
73                 }\r
74 \r
75 //                      Like ++, but don't go beyond the end of the bucket chain.\r
76                 iterator& next() {\r
77                         if (data->next)\r
78                                 data = data->next;\r
79                         else {\r
80                                 bucket = NULL;\r
81                                 data = NULL;\r
82                         }\r
83                         return *this;\r
84                 }\r
85 \r
86                 bool operator!=(const iterator& it) {\r
87                         return (data != it.data || bucket != it.bucket);\r
88                 }\r
89 \r
90                 bool operator==(const iterator& it) {\r
91                         return (data == it.data && bucket == it.bucket);\r
92                 }\r
93 \r
94                 data_item &operator*() {\r
95                         return *data;\r
96                 }\r
97         };\r
98 \r
99 private:\r
100         hasher_func hasher;\r
101         equal_func equal;\r
102 \r
103         double load_factor;\r
104 //      double max_load;\r
105 \r
106         size_t _size;\r
107         size_t _max_size;\r
108         size_t num_buckets;\r
109         size_t hash_mask;\r
110 \r
111         hash_bucket* first_bucket;      // first used bucket\r
112         hash_bucket* last_bucket;               // last used bucket\r
113 \r
114         hash_bucket* buckets;\r
115 \r
116         // memory footpritn in bytes\r
117         unsigned int total_memory;\r
118 \r
119 public:\r
120 \r
121 \r
122         hash_table(const size_t n_buckets = 131072, const double load = 0.5) {\r
123                 load_factor = load;\r
124                 int nb;\r
125                 for(nb=2;nb<n_buckets;nb*=2);\r
126                 num_buckets = nb;\r
127                 hash_mask = nb-1;\r
128                 buckets = new hash_bucket[num_buckets];\r
129 \r
130                 _size = 0;\r
131                 _max_size = 0;\r
132 //              max_load = 0.0;\r
133                 first_bucket = 0;\r
134                 last_bucket = 0;\r
135 \r
136                 total_memory = num_buckets * sizeof(hash_bucket);\r
137         }\r
138 \r
139         int insert(const key_type& key, const value_type& val) {\r
140                 data_item* d = new data_item(key, val);\r
141                 total_memory += sizeof(data_item);\r
142 \r
143                 // find the insertion bucket\r
144                 size_t bucket_index = hasher(key) & hash_mask;\r
145                 // if the bucket is empty just add new data_item\r
146                 if (!buckets[bucket_index].data) {\r
147                         // create new data item\r
148 \r
149                         buckets[bucket_index].data = d;\r
150                         if (!first_bucket){\r
151                                 first_bucket = &buckets[bucket_index];\r
152                         }else{\r
153                                 last_bucket->next_bucket = &buckets[bucket_index];\r
154                         }\r
155 \r
156                         if (last_bucket)\r
157                                 buckets[bucket_index].prev_bucket = last_bucket;\r
158 \r
159                         last_bucket = &buckets[bucket_index];\r
160                 } else {        // we already have data items in this bucket\r
161                         // prepend the item to overflow chain\r
162                         data_item* temp = buckets[bucket_index].data;\r
163                         buckets[bucket_index].data = d;\r
164                         d->next = temp;\r
165                 }\r
166                 _size++;\r
167                 if(_size>_max_size)\r
168                         _max_size = _size;\r
169 \r
170                 return 0;\r
171         }\r
172 \r
173         data_item *iterate_find(data_item* t, const key_type& key) {\r
174                 data_item *temp = t;\r
175                         while (temp && !equal(temp->first, key)){\r
176                                 temp = temp->next;\r
177                         }\r
178                 return temp;\r
179         }\r
180 \r
181 \r
182         iterator find(const key_type& key) {\r
183                 iterator iter;\r
184 \r
185                 // find the insertion bucket\r
186                 size_t bucket_index = hasher(key) & hash_mask;\r
187                 // if the bucket is empty just add new data_item\r
188                 if (!buckets[bucket_index].data) {\r
189                         iter.bucket = NULL;\r
190                         iter.data = NULL;\r
191                 } else {        // we already have data items in this bucket\r
192                         data_item* temp = buckets[bucket_index].data;\r
193 //                      temp = iterate_find(temp,key);\r
194 \r
195                         while (temp && !equal(temp->first, key))\r
196                                 temp = temp->next;\r
197 \r
198                         iter.data = temp;\r
199                         if (!temp)\r
200                                 iter.bucket = NULL;\r
201                         else\r
202                                 iter.bucket = &buckets[bucket_index];\r
203                 }\r
204                 return iter;\r
205         }\r
206 \r
207         void erase(const key_type& key) {\r
208                 // find the  bucket\r
209                 size_t bucket_index = hasher(key) & hash_mask;\r
210                 // if the bucket is empty just add new data_item\r
211                 if (!buckets[bucket_index].data)\r
212                         return;\r
213 \r
214                 data_item* temp = buckets[bucket_index].data;\r
215                 data_item* prev = NULL;\r
216                 while (temp && !equal(temp->first, key)) {\r
217                         prev = temp;\r
218                         temp = temp->next;\r
219                 }\r
220                 if (!temp)\r
221                         return;\r
222 \r
223                 // delete this data item from the chain\r
224                 _size--;\r
225                 if (!prev){     // we are deleting the first element in chain\r
226                         buckets[bucket_index].data = temp->next;\r
227 \r
228                         if(temp->next == NULL){\r
229                                 if(buckets[bucket_index].next_bucket){\r
230                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;\r
231                                 }else{\r
232                                         last_bucket = buckets[bucket_index].prev_bucket;\r
233                                 }\r
234                                 if(buckets[bucket_index].prev_bucket){\r
235                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;\r
236                                 }else{\r
237                                         first_bucket = buckets[bucket_index].next_bucket;\r
238                                 }\r
239                                 buckets[bucket_index].next_bucket = NULL;\r
240                                 buckets[bucket_index].prev_bucket = NULL;\r
241                         }\r
242                 }else{\r
243                         prev->next = temp->next;\r
244                 }\r
245         delete temp;\r
246         total_memory -= sizeof(data_item);\r
247         }\r
248 \r
249         void erase_full(const key_type& key) {\r
250                 // find the  bucket\r
251                 size_t bucket_index = hasher(key) & hash_mask;\r
252                 // if the bucket is empty just add new data_item\r
253                 if (!buckets[bucket_index].data)\r
254                         return;\r
255 \r
256                 data_item* temp = buckets[bucket_index].data;\r
257                 data_item* prev = NULL;\r
258                 while (temp && !equal(temp->first, key)) {\r
259                         prev = temp;\r
260                         temp = temp->next;\r
261                 }\r
262                 if (!temp)\r
263                         return;\r
264 \r
265                 // delete this data item from the chain\r
266                 _size--;\r
267                 if (!prev){     // we are deleting the first element in chain\r
268                         buckets[bucket_index].data = temp->next;\r
269 \r
270                         if(temp->next == NULL){\r
271                                 if(buckets[bucket_index].next_bucket){\r
272                                         buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;\r
273                                 }else{\r
274                                         last_bucket = buckets[bucket_index].prev_bucket;\r
275                                 }\r
276                                 if(buckets[bucket_index].prev_bucket){\r
277                                         buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;\r
278                                 }else{\r
279                                         first_bucket = buckets[bucket_index].next_bucket;\r
280                                 }\r
281                                 buckets[bucket_index].next_bucket = NULL;\r
282                                 buckets[bucket_index].prev_bucket = NULL;\r
283                         }\r
284                 }else{\r
285                         prev->next = temp->next;\r
286                 }\r
287         delete (*temp).first;\r
288         delete (*temp).second;\r
289         delete temp;\r
290         total_memory -= sizeof(data_item);\r
291         }\r
292 \r
293         iterator begin() {\r
294                 iterator iter;\r
295                 // find the first data item\r
296                 iter.bucket = first_bucket;\r
297                 iter.data = (first_bucket) ? first_bucket->data : NULL;\r
298                 return iter;\r
299         }\r
300 \r
301         iterator end() {\r
302                 iterator iter;\r
303                 iter.bucket = NULL;\r
304                 iter.data = NULL;\r
305                 return iter;\r
306         }\r
307 \r
308         void clear() {\r
309                 data_item* temp;\r
310                 data_item* next;\r
311 \r
312                 hash_bucket* tmp;\r
313                 while (first_bucket) {\r
314                         temp = first_bucket->data;\r
315                         while ( (next = temp->next) ) {\r
316                                 delete temp;\r
317                                 temp = next;\r
318                         }\r
319                         if(temp) delete(temp);\r
320                         first_bucket->data = NULL;\r
321                         tmp = first_bucket;\r
322                         first_bucket = first_bucket->next_bucket;\r
323                         tmp->next_bucket = NULL;\r
324                         tmp->prev_bucket = NULL;\r
325                 }\r
326                 last_bucket = NULL;\r
327                 _size = 0;\r
328                 total_memory = num_buckets * sizeof(hash_bucket);\r
329 \r
330         }\r
331 \r
332         int rehash() {\r
333                 if (_size) {\r
334                         fprintf(stderr, "Error: rehashing non-empty hash table\n");\r
335                         exit(1);\r
336                 }\r
337 \r
338                 double max_load = (1.0 * _max_size) / num_buckets;\r
339                 if (max_load > load_factor ) {\r
340                         delete[] buckets;\r
341                         // roughly double the size of the table\r
342 \r
343                         num_buckets *= 2;\r
344                         hash_mask = num_buckets-1;\r
345 \r
346                         buckets = new hash_bucket[num_buckets];\r
347                         total_memory = num_buckets * sizeof(hash_bucket);\r
348                 }\r
349                 _max_size = _size;\r
350                 return 0;\r
351         }\r
352 \r
353 \r
354         size_t size() const {\r
355                 return _size;\r
356         }\r
357 \r
358         bool empty() const {\r
359                 return (_size == 0);\r
360         }\r
361 \r
362         ~hash_table() {\r
363                 clear();\r
364                 delete[] buckets;\r
365         }\r
366 \r
367         unsigned int get_mem_footprint() {\r
368                 return total_memory;\r
369         }\r
370 \r
371 };\r
372 \r
373 \r
374 template <class key_type, class hasher_func, class equal_func>\r
375 class hash_set  {\r
376 \r
377 public :\r
378         struct data_item {\r
379                 key_type key;\r
380                 data_item* next;        // next data item in overflow chain\r
381 \r
382                 data_item(const key_type& k) : key(k), next(NULL) { }\r
383         };\r
384 \r
385         struct hash_bucket {\r
386                 data_item* data;        //\r
387                 hash_bucket* next_bucket;       // index of the next bucket in the list of used buckets\r
388 \r
389                 hash_bucket() {\r
390                         data = NULL;\r
391                         next_bucket = 0;        // 0 means no bucket follows this one\r
392                 }\r
393         };\r
394 \r
395         struct iterator {\r
396                 hash_bucket* bucket;\r
397                 data_item* data;\r
398 \r
399                 iterator& operator++() {\r
400                         if (data->next)\r
401                                 data = data->next;\r
402                         else {\r
403                                 bucket = bucket->next_bucket;\r
404                                 if (bucket)\r
405                                         data = bucket->data;\r
406                                 else\r
407                                         data = NULL;\r
408                         }\r
409                         return *this;\r
410                 }\r
411 \r
412                 bool operator!=(const iterator& it) {\r
413                         return (data != it.data || bucket != it.bucket);\r
414                 }\r
415 \r
416                 bool operator==(const iterator& it) {\r
417                         return (data == it.data && bucket == it.bucket);\r
418                 }\r
419 \r
420 //                      NOTE : not certain if returning ref always works\r
421                 key_type &operator*() {\r
422                         return data->key;\r
423                 }\r
424         };\r
425 \r
426 private:\r
427         hasher_func hasher;\r
428         equal_func equal;\r
429 \r
430         double load_factor;\r
431 //      double max_load;\r
432 \r
433         size_t _size;\r
434         size_t _max_size;\r
435         size_t num_buckets;\r
436         size_t hash_mask;\r
437 \r
438         hash_bucket* first_bucket;      // first used bucket\r
439         hash_bucket* last_bucket;               // last used bucket\r
440 \r
441         hash_bucket* buckets;\r
442 \r
443 public:\r
444         hash_set(const size_t n_buckets = 131072, const double load = 0.75) {\r
445                 load_factor = load;\r
446 \r
447                 int nb;\r
448                 for(nb=2;nb<n_buckets;nb*=2);\r
449                 num_buckets = nb;\r
450                 hash_mask = num_buckets-1;\r
451 \r
452                 buckets = new hash_bucket[num_buckets];\r
453 \r
454                 _size = 0;\r
455                 _max_size = 0;\r
456 //              max_load = 0.0;\r
457                 first_bucket = 0;\r
458                 last_bucket = 0;\r
459         }\r
460 \r
461         int insert(const key_type& key) {\r
462                 data_item* d = new data_item(key);\r
463 \r
464                 // find the insertion bucket\r
465                 size_t bucket_index = hasher(key) & hash_mask;\r
466                 // if the bucket is empty just add new data_item\r
467                 if (!buckets[bucket_index].data) {\r
468                         // create new data item\r
469 \r
470                         buckets[bucket_index].data = d;\r
471                         if (!first_bucket)\r
472                                 first_bucket = &buckets[bucket_index];\r
473                         else\r
474                                 last_bucket->next_bucket = &buckets[bucket_index];\r
475                         last_bucket = &buckets[bucket_index];\r
476                 } else {        // we already have data items in this bucket\r
477                         // prepend the item to overflow chain\r
478                         data_item* temp = buckets[bucket_index].data;\r
479                         buckets[bucket_index].data = d;\r
480                         d->next = temp;\r
481                 }\r
482                 _size++;\r
483                 if(_size>_max_size)\r
484                         _max_size = _size;\r
485 \r
486                 return 0;\r
487         }\r
488 \r
489         iterator find(const key_type& key) {\r
490                 iterator iter;\r
491 \r
492                 // find the insertion bucket\r
493                 size_t bucket_index = hasher(key) & hash_mask;\r
494                 // if the bucket is empty just add new data_item\r
495                 if (!buckets[bucket_index].data) {\r
496                         iter.bucket = NULL;\r
497                         iter.data = NULL;\r
498                 } else {        // we already have data items in this bucket\r
499                         data_item* temp = buckets[bucket_index].data;\r
500                         while (temp && !equal(temp->key, key))\r
501                                 temp = temp->next;\r
502                         iter.data = temp;\r
503                         if (!temp)\r
504                                 iter.bucket = NULL;\r
505                         else\r
506                                 iter.bucket = &buckets[bucket_index];\r
507                 }\r
508                 return iter;\r
509         }\r
510 \r
511         void erase(const key_type& key) {\r
512                 // find the  bucket\r
513                 size_t bucket_index = hasher(key) & hash_mask;\r
514                 // if the bucket is empty just add new data_item\r
515                 if (!buckets[bucket_index].data)\r
516                         return;\r
517 \r
518                 data_item* temp = buckets[bucket_index].data;\r
519                 data_item* prev = NULL;\r
520                 while (temp && !equal(temp->key, key)) {\r
521                         prev = temp;\r
522                         temp = temp->next;\r
523                 }\r
524                 if (!temp)\r
525                         return;\r
526 \r
527                 // delete this data item from the chain\r
528                 _size--;\r
529                 if (!prev)      // we are deleting the first element in chain\r
530                         buckets[bucket_index].data = temp->next;\r
531                 else\r
532                         prev->next = temp->next;\r
533                 delete temp;\r
534         }\r
535 \r
536         iterator begin() {\r
537                 iterator iter;\r
538                 // find the first data item\r
539                 iter.bucket = first_bucket;\r
540                 iter.data = (first_bucket) ? first_bucket->data : NULL;\r
541                 return iter;\r
542         }\r
543 \r
544         iterator end() {\r
545                 iterator iter;\r
546                 iter.bucket = NULL;\r
547                 iter.data = NULL;\r
548                 return iter;\r
549         }\r
550 \r
551         void clear() {\r
552                 data_item* temp;\r
553                 data_item* next;\r
554 \r
555                 hash_bucket* tmp;\r
556                 while (first_bucket) {\r
557                         temp = first_bucket->data;\r
558                         while ( (next = temp->next) ) {\r
559                                 delete temp;\r
560                                 temp = next;\r
561                         }\r
562                         if(temp) delete(temp);\r
563                         first_bucket->data = NULL;\r
564                         tmp = first_bucket;\r
565                         first_bucket = first_bucket->next_bucket;\r
566                         tmp->next_bucket = NULL;\r
567                 }\r
568                 last_bucket = NULL;\r
569                 _size = 0;\r
570 \r
571         }\r
572 \r
573         int rehash() {\r
574                 if (_size) {\r
575                         fprintf(stderr, "Error: rehashing non-empty hash table\n");\r
576                         exit(1);\r
577                 }\r
578 \r
579                 double max_load = (1.0 * _max_size) / num_buckets;\r
580 \r
581                 if (max_load > load_factor ) {\r
582                         delete[] buckets;\r
583                         // roughly double the size of the table\r
584 \r
585                         num_buckets *= 2;\r
586                         hash_mask = num_buckets -1;\r
587                         buckets = new hash_bucket[num_buckets];\r
588                 }\r
589                 _max_size = _size;\r
590                 return 0;\r
591         }\r
592 \r
593 \r
594         size_t size() const {\r
595                 return _size;\r
596         }\r
597 \r
598         bool empty() const {\r
599                 return (_size == 0);\r
600         }\r
601 \r
602         ~hash_set() {\r
603                 clear();\r
604                 delete[] buckets;\r
605         }\r
606 \r
607 };\r
608 \r
609 \r
610 #endif // __HASH_TABLE_H\r