1 /* ------------------------------------------------
\r
2 Copyright 2014 AT&T Intellectual Property
\r
3 Licensed under the Apache License, Version 2.0 (the "License");
\r
4 you may not use this file except in compliance with the License.
\r
5 You may obtain a copy of the License at
\r
7 http://www.apache.org/licenses/LICENSE-2.0
\r
9 Unless required by applicable law or agreed to in writing, software
\r
10 distributed under the License is distributed on an "AS IS" BASIS,
\r
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
12 See the License for the specific language governing permissions and
\r
13 limitations under the License.
\r
14 ------------------------------------------- */
\r
16 #ifndef __HASH_TABLE_H
\r
17 #define __HASH_TABLE_H
\r
24 #define NUM_PRIMES 23
\r
26 static unsigned int primes[NUM_PRIMES] = {
\r
27 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,
\r
28 25717, 51437, 102877, 205759, 411527, 823117, 1646237,
\r
29 3292489, 6584983, 13169977, 26339969, 52679969
\r
34 template <class key_type, class value_type, class hasher_func, class equal_func>
\r
41 data_item* next; // next data item in overflow chain
\r
43 data_item(const key_type& k, const value_type& v) : first(k), second(v), next(NULL) { }
\r
46 struct hash_bucket {
\r
48 hash_bucket* next_bucket; // index of the next bucket in the list of used buckets
\r
49 hash_bucket* prev_bucket; // index of the next bucket in the list of used buckets
\r
53 next_bucket = 0; // 0 means no bucket follows this one
\r
54 prev_bucket = 0; // 0 means no bucket precedes this one
\r
59 hash_bucket* bucket;
\r
62 iterator& operator++() {
\r
66 bucket = bucket->next_bucket;
\r
68 data = bucket->data;
\r
75 // Like ++, but don't go beyond the end of the bucket chain.
\r
86 bool operator!=(const iterator& it) {
\r
87 return (data != it.data || bucket != it.bucket);
\r
90 bool operator==(const iterator& it) {
\r
91 return (data == it.data && bucket == it.bucket);
\r
94 data_item &operator*() {
\r
100 hasher_func hasher;
\r
103 double load_factor;
\r
104 // double max_load;
\r
108 size_t num_buckets;
\r
111 hash_bucket* first_bucket; // first used bucket
\r
112 hash_bucket* last_bucket; // last used bucket
\r
114 hash_bucket* buckets;
\r
116 // memory footpritn in bytes
\r
117 unsigned int total_memory;
\r
122 hash_table(const size_t n_buckets = 131072, const double load = 0.5) {
\r
123 load_factor = load;
\r
125 for(nb=2;nb<n_buckets;nb*=2);
\r
128 buckets = new hash_bucket[num_buckets];
\r
136 total_memory = num_buckets * sizeof(hash_bucket);
\r
139 int insert(const key_type& key, const value_type& val) {
\r
140 data_item* d = new data_item(key, val);
\r
141 total_memory += sizeof(data_item);
\r
143 // find the insertion bucket
\r
144 size_t bucket_index = hasher(key) & hash_mask;
\r
145 // if the bucket is empty just add new data_item
\r
146 if (!buckets[bucket_index].data) {
\r
147 // create new data item
\r
149 buckets[bucket_index].data = d;
\r
150 if (!first_bucket){
\r
151 first_bucket = &buckets[bucket_index];
\r
153 last_bucket->next_bucket = &buckets[bucket_index];
\r
157 buckets[bucket_index].prev_bucket = last_bucket;
\r
159 last_bucket = &buckets[bucket_index];
\r
160 } else { // we already have data items in this bucket
\r
161 // prepend the item to overflow chain
\r
162 data_item* temp = buckets[bucket_index].data;
\r
163 buckets[bucket_index].data = d;
\r
167 if(_size>_max_size)
\r
173 data_item *iterate_find(data_item* t, const key_type& key) {
\r
174 data_item *temp = t;
\r
175 while (temp && !equal(temp->first, key)){
\r
182 iterator find(const key_type& key) {
\r
185 // find the insertion bucket
\r
186 size_t bucket_index = hasher(key) & hash_mask;
\r
187 // if the bucket is empty just add new data_item
\r
188 if (!buckets[bucket_index].data) {
\r
189 iter.bucket = NULL;
\r
191 } else { // we already have data items in this bucket
\r
192 data_item* temp = buckets[bucket_index].data;
\r
193 // temp = iterate_find(temp,key);
\r
195 while (temp && !equal(temp->first, key))
\r
200 iter.bucket = NULL;
\r
202 iter.bucket = &buckets[bucket_index];
\r
207 void erase(const key_type& key) {
\r
209 size_t bucket_index = hasher(key) & hash_mask;
\r
210 // if the bucket is empty just add new data_item
\r
211 if (!buckets[bucket_index].data)
\r
214 data_item* temp = buckets[bucket_index].data;
\r
215 data_item* prev = NULL;
\r
216 while (temp && !equal(temp->first, key)) {
\r
223 // delete this data item from the chain
\r
225 if (!prev){ // we are deleting the first element in chain
\r
226 buckets[bucket_index].data = temp->next;
\r
228 if(temp->next == NULL){
\r
229 if(buckets[bucket_index].next_bucket){
\r
230 buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
\r
232 last_bucket = buckets[bucket_index].prev_bucket;
\r
234 if(buckets[bucket_index].prev_bucket){
\r
235 buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
\r
237 first_bucket = buckets[bucket_index].next_bucket;
\r
239 buckets[bucket_index].next_bucket = NULL;
\r
240 buckets[bucket_index].prev_bucket = NULL;
\r
243 prev->next = temp->next;
\r
246 total_memory -= sizeof(data_item);
\r
249 void erase_full(const key_type& key) {
\r
251 size_t bucket_index = hasher(key) & hash_mask;
\r
252 // if the bucket is empty just add new data_item
\r
253 if (!buckets[bucket_index].data)
\r
256 data_item* temp = buckets[bucket_index].data;
\r
257 data_item* prev = NULL;
\r
258 while (temp && !equal(temp->first, key)) {
\r
265 // delete this data item from the chain
\r
267 if (!prev){ // we are deleting the first element in chain
\r
268 buckets[bucket_index].data = temp->next;
\r
270 if(temp->next == NULL){
\r
271 if(buckets[bucket_index].next_bucket){
\r
272 buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
\r
274 last_bucket = buckets[bucket_index].prev_bucket;
\r
276 if(buckets[bucket_index].prev_bucket){
\r
277 buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
\r
279 first_bucket = buckets[bucket_index].next_bucket;
\r
281 buckets[bucket_index].next_bucket = NULL;
\r
282 buckets[bucket_index].prev_bucket = NULL;
\r
285 prev->next = temp->next;
\r
287 delete (*temp).first;
\r
288 delete (*temp).second;
\r
290 total_memory -= sizeof(data_item);
\r
295 // find the first data item
\r
296 iter.bucket = first_bucket;
\r
297 iter.data = (first_bucket) ? first_bucket->data : NULL;
\r
303 iter.bucket = NULL;
\r
313 while (first_bucket) {
\r
314 temp = first_bucket->data;
\r
315 while ( (next = temp->next) ) {
\r
319 if(temp) delete(temp);
\r
320 first_bucket->data = NULL;
\r
321 tmp = first_bucket;
\r
322 first_bucket = first_bucket->next_bucket;
\r
323 tmp->next_bucket = NULL;
\r
324 tmp->prev_bucket = NULL;
\r
326 last_bucket = NULL;
\r
328 total_memory = num_buckets * sizeof(hash_bucket);
\r
334 fprintf(stderr, "Error: rehashing non-empty hash table\n");
\r
338 double max_load = (1.0 * _max_size) / num_buckets;
\r
339 if (max_load > load_factor ) {
\r
341 // roughly double the size of the table
\r
344 hash_mask = num_buckets-1;
\r
346 buckets = new hash_bucket[num_buckets];
\r
347 total_memory = num_buckets * sizeof(hash_bucket);
\r
354 size_t size() const {
\r
358 bool empty() const {
\r
359 return (_size == 0);
\r
367 unsigned int get_mem_footprint() {
\r
368 return total_memory;
\r
374 template <class key_type, class hasher_func, class equal_func>
\r
380 data_item* next; // next data item in overflow chain
\r
382 data_item(const key_type& k) : key(k), next(NULL) { }
\r
385 struct hash_bucket {
\r
386 data_item* data; //
\r
387 hash_bucket* next_bucket; // index of the next bucket in the list of used buckets
\r
391 next_bucket = 0; // 0 means no bucket follows this one
\r
396 hash_bucket* bucket;
\r
399 iterator& operator++() {
\r
403 bucket = bucket->next_bucket;
\r
405 data = bucket->data;
\r
412 bool operator!=(const iterator& it) {
\r
413 return (data != it.data || bucket != it.bucket);
\r
416 bool operator==(const iterator& it) {
\r
417 return (data == it.data && bucket == it.bucket);
\r
420 // NOTE : not certain if returning ref always works
\r
421 key_type &operator*() {
\r
427 hasher_func hasher;
\r
430 double load_factor;
\r
431 // double max_load;
\r
435 size_t num_buckets;
\r
438 hash_bucket* first_bucket; // first used bucket
\r
439 hash_bucket* last_bucket; // last used bucket
\r
441 hash_bucket* buckets;
\r
444 hash_set(const size_t n_buckets = 131072, const double load = 0.75) {
\r
445 load_factor = load;
\r
448 for(nb=2;nb<n_buckets;nb*=2);
\r
450 hash_mask = num_buckets-1;
\r
452 buckets = new hash_bucket[num_buckets];
\r
461 int insert(const key_type& key) {
\r
462 data_item* d = new data_item(key);
\r
464 // find the insertion bucket
\r
465 size_t bucket_index = hasher(key) & hash_mask;
\r
466 // if the bucket is empty just add new data_item
\r
467 if (!buckets[bucket_index].data) {
\r
468 // create new data item
\r
470 buckets[bucket_index].data = d;
\r
472 first_bucket = &buckets[bucket_index];
\r
474 last_bucket->next_bucket = &buckets[bucket_index];
\r
475 last_bucket = &buckets[bucket_index];
\r
476 } else { // we already have data items in this bucket
\r
477 // prepend the item to overflow chain
\r
478 data_item* temp = buckets[bucket_index].data;
\r
479 buckets[bucket_index].data = d;
\r
483 if(_size>_max_size)
\r
489 iterator find(const key_type& key) {
\r
492 // find the insertion bucket
\r
493 size_t bucket_index = hasher(key) & hash_mask;
\r
494 // if the bucket is empty just add new data_item
\r
495 if (!buckets[bucket_index].data) {
\r
496 iter.bucket = NULL;
\r
498 } else { // we already have data items in this bucket
\r
499 data_item* temp = buckets[bucket_index].data;
\r
500 while (temp && !equal(temp->key, key))
\r
504 iter.bucket = NULL;
\r
506 iter.bucket = &buckets[bucket_index];
\r
511 void erase(const key_type& key) {
\r
513 size_t bucket_index = hasher(key) & hash_mask;
\r
514 // if the bucket is empty just add new data_item
\r
515 if (!buckets[bucket_index].data)
\r
518 data_item* temp = buckets[bucket_index].data;
\r
519 data_item* prev = NULL;
\r
520 while (temp && !equal(temp->key, key)) {
\r
527 // delete this data item from the chain
\r
529 if (!prev) // we are deleting the first element in chain
\r
530 buckets[bucket_index].data = temp->next;
\r
532 prev->next = temp->next;
\r
538 // find the first data item
\r
539 iter.bucket = first_bucket;
\r
540 iter.data = (first_bucket) ? first_bucket->data : NULL;
\r
546 iter.bucket = NULL;
\r
556 while (first_bucket) {
\r
557 temp = first_bucket->data;
\r
558 while ( (next = temp->next) ) {
\r
562 if(temp) delete(temp);
\r
563 first_bucket->data = NULL;
\r
564 tmp = first_bucket;
\r
565 first_bucket = first_bucket->next_bucket;
\r
566 tmp->next_bucket = NULL;
\r
568 last_bucket = NULL;
\r
575 fprintf(stderr, "Error: rehashing non-empty hash table\n");
\r
579 double max_load = (1.0 * _max_size) / num_buckets;
\r
581 if (max_load > load_factor ) {
\r
583 // roughly double the size of the table
\r
586 hash_mask = num_buckets -1;
\r
587 buckets = new hash_bucket[num_buckets];
\r
594 size_t size() const {
\r
598 bool empty() const {
\r
599 return (_size == 0);
\r
610 #endif // __HASH_TABLE_H
\r