-/* ------------------------------------------------\r
-Copyright 2014 AT&T Intellectual Property\r
- Licensed under the Apache License, Version 2.0 (the "License");\r
- you may not use this file except in compliance with the License.\r
- You may obtain a copy of the License at\r
-\r
- http://www.apache.org/licenses/LICENSE-2.0\r
-\r
- Unless required by applicable law or agreed to in writing, software\r
- distributed under the License is distributed on an "AS IS" BASIS,\r
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
- See the License for the specific language governing permissions and\r
- limitations under the License.\r
- ------------------------------------------- */\r
-\r
-#ifndef __HASH_TABLE_H\r
-#define __HASH_TABLE_H\r
-\r
-#include <stdlib.h>\r
-#include <stdio.h>\r
-\r
-\r
-\r
-#define NUM_PRIMES 23\r
-\r
-static unsigned int primes[NUM_PRIMES] = {\r
- 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,\r
- 25717, 51437, 102877, 205759, 411527, 823117, 1646237,\r
- 3292489, 6584983, 13169977, 26339969, 52679969\r
-};\r
-\r
-\r
-\r
-template <class key_type, class value_type, class hasher_func, class equal_func>\r
-class hash_table {\r
-\r
-public :\r
- struct data_item {\r
- key_type first;\r
- value_type second;\r
- data_item* next; // next data item in overflow chain\r
-\r
- data_item(const key_type& k, const value_type& v) : first(k), second(v), next(NULL) { }\r
- };\r
-\r
- struct hash_bucket {\r
- data_item* data; //\r
- hash_bucket* next_bucket; // index of the next bucket in the list of used buckets\r
- hash_bucket* prev_bucket; // index of the next bucket in the list of used buckets\r
-\r
- hash_bucket() {\r
- data = NULL;\r
- next_bucket = 0; // 0 means no bucket follows this one\r
- prev_bucket = 0; // 0 means no bucket precedes this one\r
- }\r
- };\r
-\r
- struct iterator {\r
- hash_bucket* bucket;\r
- data_item* data;\r
-\r
- iterator& operator++() {\r
- if (data->next)\r
- data = data->next;\r
- else {\r
- bucket = bucket->next_bucket;\r
- if (bucket)\r
- data = bucket->data;\r
- else\r
- data = NULL;\r
- }\r
- return *this;\r
- }\r
-\r
-// Like ++, but don't go beyond the end of the bucket chain.\r
- iterator& next() {\r
- if (data->next)\r
- data = data->next;\r
- else {\r
- bucket = NULL;\r
- data = NULL;\r
- }\r
- return *this;\r
- }\r
-\r
- bool operator!=(const iterator& it) {\r
- return (data != it.data || bucket != it.bucket);\r
- }\r
-\r
- bool operator==(const iterator& it) {\r
- return (data == it.data && bucket == it.bucket);\r
- }\r
-\r
- data_item &operator*() {\r
- return *data;\r
- }\r
- };\r
-\r
-private:\r
- hasher_func hasher;\r
- equal_func equal;\r
-\r
- double load_factor;\r
-// double max_load;\r
-\r
- size_t _size;\r
- size_t _max_size;\r
- size_t num_buckets;\r
- size_t hash_mask;\r
-\r
- hash_bucket* first_bucket; // first used bucket\r
- hash_bucket* last_bucket; // last used bucket\r
-\r
- hash_bucket* buckets;\r
-\r
- // memory footpritn in bytes\r
- unsigned int total_memory;\r
-\r
-public:\r
-\r
-\r
- hash_table(const size_t n_buckets = 131072, const double load = 0.5) {\r
- load_factor = load;\r
- int nb;\r
- for(nb=2;nb<n_buckets;nb*=2);\r
- num_buckets = nb;\r
- hash_mask = nb-1;\r
- buckets = new hash_bucket[num_buckets];\r
-\r
- _size = 0;\r
- _max_size = 0;\r
-// max_load = 0.0;\r
- first_bucket = 0;\r
- last_bucket = 0;\r
-\r
- total_memory = num_buckets * sizeof(hash_bucket);\r
- }\r
-\r
- int insert(const key_type& key, const value_type& val) {\r
- data_item* d = new data_item(key, val);\r
- total_memory += sizeof(data_item);\r
-\r
- // find the insertion bucket\r
- size_t bucket_index = hasher(key) & hash_mask;\r
- // if the bucket is empty just add new data_item\r
- if (!buckets[bucket_index].data) {\r
- // create new data item\r
-\r
- buckets[bucket_index].data = d;\r
- if (!first_bucket){\r
- first_bucket = &buckets[bucket_index];\r
- }else{\r
- last_bucket->next_bucket = &buckets[bucket_index];\r
- }\r
-\r
- if (last_bucket)\r
- buckets[bucket_index].prev_bucket = last_bucket;\r
-\r
- last_bucket = &buckets[bucket_index];\r
- } else { // we already have data items in this bucket\r
- // prepend the item to overflow chain\r
- data_item* temp = buckets[bucket_index].data;\r
- buckets[bucket_index].data = d;\r
- d->next = temp;\r
- }\r
- _size++;\r
- if(_size>_max_size)\r
- _max_size = _size;\r
-\r
- return 0;\r
- }\r
-\r
- data_item *iterate_find(data_item* t, const key_type& key) {\r
- data_item *temp = t;\r
- while (temp && !equal(temp->first, key)){\r
- temp = temp->next;\r
- }\r
- return temp;\r
- }\r
-\r
-\r
- iterator find(const key_type& key) {\r
- iterator iter;\r
-\r
- // find the insertion bucket\r
- size_t bucket_index = hasher(key) & hash_mask;\r
- // if the bucket is empty just add new data_item\r
- if (!buckets[bucket_index].data) {\r
- iter.bucket = NULL;\r
- iter.data = NULL;\r
- } else { // we already have data items in this bucket\r
- data_item* temp = buckets[bucket_index].data;\r
-// temp = iterate_find(temp,key);\r
-\r
- while (temp && !equal(temp->first, key))\r
- temp = temp->next;\r
-\r
- iter.data = temp;\r
- if (!temp)\r
- iter.bucket = NULL;\r
- else\r
- iter.bucket = &buckets[bucket_index];\r
- }\r
- return iter;\r
- }\r
-\r
- void erase(const key_type& key) {\r
- // find the bucket\r
- size_t bucket_index = hasher(key) & hash_mask;\r
- // if the bucket is empty just add new data_item\r
- if (!buckets[bucket_index].data)\r
- return;\r
-\r
- data_item* temp = buckets[bucket_index].data;\r
- data_item* prev = NULL;\r
- while (temp && !equal(temp->first, key)) {\r
- prev = temp;\r
- temp = temp->next;\r
- }\r
- if (!temp)\r
- return;\r
-\r
- // delete this data item from the chain\r
- _size--;\r
- if (!prev){ // we are deleting the first element in chain\r
- buckets[bucket_index].data = temp->next;\r
-\r
- if(temp->next == NULL){\r
- if(buckets[bucket_index].next_bucket){\r
- buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;\r
- }else{\r
- last_bucket = buckets[bucket_index].prev_bucket;\r
- }\r
- if(buckets[bucket_index].prev_bucket){\r
- buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;\r
- }else{\r
- first_bucket = buckets[bucket_index].next_bucket;\r
- }\r
- buckets[bucket_index].next_bucket = NULL;\r
- buckets[bucket_index].prev_bucket = NULL;\r
- }\r
- }else{\r
- prev->next = temp->next;\r
- }\r
- delete temp;\r
- total_memory -= sizeof(data_item);\r
- }\r
-\r
- void erase_full(const key_type& key) {\r
- // find the bucket\r
- size_t bucket_index = hasher(key) & hash_mask;\r
- // if the bucket is empty just add new data_item\r
- if (!buckets[bucket_index].data)\r
- return;\r
-\r
- data_item* temp = buckets[bucket_index].data;\r
- data_item* prev = NULL;\r
- while (temp && !equal(temp->first, key)) {\r
- prev = temp;\r
- temp = temp->next;\r
- }\r
- if (!temp)\r
- return;\r
-\r
- // delete this data item from the chain\r
- _size--;\r
- if (!prev){ // we are deleting the first element in chain\r
- buckets[bucket_index].data = temp->next;\r
-\r
- if(temp->next == NULL){\r
- if(buckets[bucket_index].next_bucket){\r
- buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;\r
- }else{\r
- last_bucket = buckets[bucket_index].prev_bucket;\r
- }\r
- if(buckets[bucket_index].prev_bucket){\r
- buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;\r
- }else{\r
- first_bucket = buckets[bucket_index].next_bucket;\r
- }\r
- buckets[bucket_index].next_bucket = NULL;\r
- buckets[bucket_index].prev_bucket = NULL;\r
- }\r
- }else{\r
- prev->next = temp->next;\r
- }\r
- delete (*temp).first;\r
- delete (*temp).second;\r
- delete temp;\r
- total_memory -= sizeof(data_item);\r
- }\r
-\r
- iterator begin() {\r
- iterator iter;\r
- // find the first data item\r
- iter.bucket = first_bucket;\r
- iter.data = (first_bucket) ? first_bucket->data : NULL;\r
- return iter;\r
- }\r
-\r
- iterator end() {\r
- iterator iter;\r
- iter.bucket = NULL;\r
- iter.data = NULL;\r
- return iter;\r
- }\r
-\r
- void clear() {\r
- data_item* temp;\r
- data_item* next;\r
-\r
- hash_bucket* tmp;\r
- while (first_bucket) {\r
- temp = first_bucket->data;\r
- while ( (next = temp->next) ) {\r
- delete temp;\r
- temp = next;\r
- }\r
- if(temp) delete(temp);\r
- first_bucket->data = NULL;\r
- tmp = first_bucket;\r
- first_bucket = first_bucket->next_bucket;\r
- tmp->next_bucket = NULL;\r
- tmp->prev_bucket = NULL;\r
- }\r
- last_bucket = NULL;\r
- _size = 0;\r
- total_memory = num_buckets * sizeof(hash_bucket);\r
-\r
- }\r
-\r
- int rehash() {\r
- if (_size) {\r
- fprintf(stderr, "Error: rehashing non-empty hash table\n");\r
- exit(1);\r
- }\r
-\r
- double max_load = (1.0 * _max_size) / num_buckets;\r
- if (max_load > load_factor ) {\r
- delete[] buckets;\r
- // roughly double the size of the table\r
-\r
- num_buckets *= 2;\r
- hash_mask = num_buckets-1;\r
-\r
- buckets = new hash_bucket[num_buckets];\r
- total_memory = num_buckets * sizeof(hash_bucket);\r
- }\r
- _max_size = _size;\r
- return 0;\r
- }\r
-\r
-\r
- size_t size() const {\r
- return _size;\r
- }\r
-\r
- bool empty() const {\r
- return (_size == 0);\r
- }\r
-\r
- ~hash_table() {\r
- clear();\r
- delete[] buckets;\r
- }\r
-\r
- unsigned int get_mem_footprint() {\r
- return total_memory;\r
- }\r
-\r
-};\r
-\r
-\r
-template <class key_type, class hasher_func, class equal_func>\r
-class hash_set {\r
-\r
-public :\r
- struct data_item {\r
- key_type key;\r
- data_item* next; // next data item in overflow chain\r
-\r
- data_item(const key_type& k) : key(k), next(NULL) { }\r
- };\r
-\r
- struct hash_bucket {\r
- data_item* data; //\r
- hash_bucket* next_bucket; // index of the next bucket in the list of used buckets\r
-\r
- hash_bucket() {\r
- data = NULL;\r
- next_bucket = 0; // 0 means no bucket follows this one\r
- }\r
- };\r
-\r
- struct iterator {\r
- hash_bucket* bucket;\r
- data_item* data;\r
-\r
- iterator& operator++() {\r
- if (data->next)\r
- data = data->next;\r
- else {\r
- bucket = bucket->next_bucket;\r
- if (bucket)\r
- data = bucket->data;\r
- else\r
- data = NULL;\r
- }\r
- return *this;\r
- }\r
-\r
- bool operator!=(const iterator& it) {\r
- return (data != it.data || bucket != it.bucket);\r
- }\r
-\r
- bool operator==(const iterator& it) {\r
- return (data == it.data && bucket == it.bucket);\r
- }\r
-\r
-// NOTE : not certain if returning ref always works\r
- key_type &operator*() {\r
- return data->key;\r
- }\r
- };\r
-\r
-private:\r
- hasher_func hasher;\r
- equal_func equal;\r
-\r
- double load_factor;\r
-// double max_load;\r
-\r
- size_t _size;\r
- size_t _max_size;\r
- size_t num_buckets;\r
- size_t hash_mask;\r
-\r
- hash_bucket* first_bucket; // first used bucket\r
- hash_bucket* last_bucket; // last used bucket\r
-\r
- hash_bucket* buckets;\r
-\r
-public:\r
- hash_set(const size_t n_buckets = 131072, const double load = 0.75) {\r
- load_factor = load;\r
-\r
- int nb;\r
- for(nb=2;nb<n_buckets;nb*=2);\r
- num_buckets = nb;\r
- hash_mask = num_buckets-1;\r
-\r
- buckets = new hash_bucket[num_buckets];\r
-\r
- _size = 0;\r
- _max_size = 0;\r
-// max_load = 0.0;\r
- first_bucket = 0;\r
- last_bucket = 0;\r
- }\r
-\r
- int insert(const key_type& key) {\r
- data_item* d = new data_item(key);\r
-\r
- // find the insertion bucket\r
- size_t bucket_index = hasher(key) & hash_mask;\r
- // if the bucket is empty just add new data_item\r
- if (!buckets[bucket_index].data) {\r
- // create new data item\r
-\r
- buckets[bucket_index].data = d;\r
- if (!first_bucket)\r
- first_bucket = &buckets[bucket_index];\r
- else\r
- last_bucket->next_bucket = &buckets[bucket_index];\r
- last_bucket = &buckets[bucket_index];\r
- } else { // we already have data items in this bucket\r
- // prepend the item to overflow chain\r
- data_item* temp = buckets[bucket_index].data;\r
- buckets[bucket_index].data = d;\r
- d->next = temp;\r
- }\r
- _size++;\r
- if(_size>_max_size)\r
- _max_size = _size;\r
-\r
- return 0;\r
- }\r
-\r
- iterator find(const key_type& key) {\r
- iterator iter;\r
-\r
- // find the insertion bucket\r
- size_t bucket_index = hasher(key) & hash_mask;\r
- // if the bucket is empty just add new data_item\r
- if (!buckets[bucket_index].data) {\r
- iter.bucket = NULL;\r
- iter.data = NULL;\r
- } else { // we already have data items in this bucket\r
- data_item* temp = buckets[bucket_index].data;\r
- while (temp && !equal(temp->key, key))\r
- temp = temp->next;\r
- iter.data = temp;\r
- if (!temp)\r
- iter.bucket = NULL;\r
- else\r
- iter.bucket = &buckets[bucket_index];\r
- }\r
- return iter;\r
- }\r
-\r
- void erase(const key_type& key) {\r
- // find the bucket\r
- size_t bucket_index = hasher(key) & hash_mask;\r
- // if the bucket is empty just add new data_item\r
- if (!buckets[bucket_index].data)\r
- return;\r
-\r
- data_item* temp = buckets[bucket_index].data;\r
- data_item* prev = NULL;\r
- while (temp && !equal(temp->key, key)) {\r
- prev = temp;\r
- temp = temp->next;\r
- }\r
- if (!temp)\r
- return;\r
-\r
- // delete this data item from the chain\r
- _size--;\r
- if (!prev) // we are deleting the first element in chain\r
- buckets[bucket_index].data = temp->next;\r
- else\r
- prev->next = temp->next;\r
- delete temp;\r
- }\r
-\r
- iterator begin() {\r
- iterator iter;\r
- // find the first data item\r
- iter.bucket = first_bucket;\r
- iter.data = (first_bucket) ? first_bucket->data : NULL;\r
- return iter;\r
- }\r
-\r
- iterator end() {\r
- iterator iter;\r
- iter.bucket = NULL;\r
- iter.data = NULL;\r
- return iter;\r
- }\r
-\r
- void clear() {\r
- data_item* temp;\r
- data_item* next;\r
-\r
- hash_bucket* tmp;\r
- while (first_bucket) {\r
- temp = first_bucket->data;\r
- while ( (next = temp->next) ) {\r
- delete temp;\r
- temp = next;\r
- }\r
- if(temp) delete(temp);\r
- first_bucket->data = NULL;\r
- tmp = first_bucket;\r
- first_bucket = first_bucket->next_bucket;\r
- tmp->next_bucket = NULL;\r
- }\r
- last_bucket = NULL;\r
- _size = 0;\r
-\r
- }\r
-\r
- int rehash() {\r
- if (_size) {\r
- fprintf(stderr, "Error: rehashing non-empty hash table\n");\r
- exit(1);\r
- }\r
-\r
- double max_load = (1.0 * _max_size) / num_buckets;\r
-\r
- if (max_load > load_factor ) {\r
- delete[] buckets;\r
- // roughly double the size of the table\r
-\r
- num_buckets *= 2;\r
- hash_mask = num_buckets -1;\r
- buckets = new hash_bucket[num_buckets];\r
- }\r
- _max_size = _size;\r
- return 0;\r
- }\r
-\r
-\r
- size_t size() const {\r
- return _size;\r
- }\r
-\r
- bool empty() const {\r
- return (_size == 0);\r
- }\r
-\r
- ~hash_set() {\r
- clear();\r
- delete[] buckets;\r
- }\r
-\r
-};\r
-\r
-\r
-#endif // __HASH_TABLE_H\r
+/* ------------------------------------------------
+Copyright 2014 AT&T Intellectual Property
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ ------------------------------------------- */
+
+#ifndef __HASH_TABLE_H
+#define __HASH_TABLE_H
+
+#include <stdlib.h>
+#include <stdio.h>
+
+
+
+#define NUM_PRIMES 23
+
+static unsigned int primes[NUM_PRIMES] = {
+ 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853,
+ 25717, 51437, 102877, 205759, 411527, 823117, 1646237,
+ 3292489, 6584983, 13169977, 26339969, 52679969
+};
+
+
+
+template <class key_type, class value_type, class hasher_func, class equal_func>
+class hash_table {
+
+public :
+ struct data_item {
+ key_type first;
+ value_type second;
+ data_item* next; // next data item in overflow chain
+
+ data_item(const key_type& k, const value_type& v) {
+ first = k;
+ second = v;
+ next = NULL;
+ }
+ };
+
+ struct hash_bucket {
+ data_item* data; //
+ hash_bucket* next_bucket; // index of the next bucket in the list of used buckets
+ hash_bucket* prev_bucket; // index of the next bucket in the list of used buckets
+
+ hash_bucket() {
+ data = NULL;
+ next_bucket = 0; // 0 means no bucket follows this one
+ prev_bucket = 0; // 0 means no bucket precedes this one
+ }
+ };
+
+ struct iterator {
+ hash_bucket* bucket;
+ data_item* data;
+
+ iterator& operator++() {
+ if (data->next)
+ data = data->next;
+ else {
+ bucket = bucket->next_bucket;
+ if (bucket)
+ data = bucket->data;
+ else
+ data = NULL;
+ }
+ return *this;
+ }
+
+// Like ++, but don't go beyond the end of the bucket chain.
+ iterator& next() {
+ if (data->next)
+ data = data->next;
+ else {
+ bucket = NULL;
+ data = NULL;
+ }
+ return *this;
+ }
+
+ bool operator!=(const iterator& it) {
+ return (data != it.data || bucket != it.bucket);
+ }
+
+ bool operator==(const iterator& it) {
+ return (data == it.data && bucket == it.bucket);
+ }
+
+ data_item &operator*() {
+ return *data;
+ }
+ };
+
+private:
+ hasher_func hasher;
+ equal_func equal;
+
+ double load_factor;
+
+ size_t _size;
+ size_t _max_size;
+ size_t num_buckets;
+ size_t hash_mask;
+
+ // bucket allocation happens on first insert
+ // this flag indicates that buckets has been allocated
+ unsigned allocated;
+
+
+ hash_bucket* first_bucket; // first used bucket
+ hash_bucket* last_bucket; // last used bucket
+
+ hash_bucket* buckets;
+
+ // memory footpritn in bytes
+ unsigned int total_memory;
+
+public:
+
+
+ hash_table(const size_t expected_size = 100000, const double max_load = 1.25) {
+
+ size_t min_buckets = (size_t)(expected_size / max_load);
+ load_factor = max_load;
+ int nb;
+ for(nb=2;nb<min_buckets;nb*=2);
+ num_buckets = nb;
+ hash_mask = nb-1;
+
+ // we will make hash table start with 0-size and expand of first insertion
+ buckets = new hash_bucket[0];
+ allocated = 0;
+
+ _size = 0;
+ _max_size = 0;
+ first_bucket = 0;
+ last_bucket = 0;
+
+ total_memory = 0;
+ }
+
+ int insert(const key_type& key, const value_type& val) {
+
+ if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
+ delete buckets;
+ buckets = new hash_bucket[num_buckets];
+ allocated = 1;
+ total_memory = num_buckets * sizeof(hash_bucket);
+
+ fprintf(stderr, "Initial allocaton of %d buckets\n", (int)num_buckets);
+ }
+
+ data_item* d = new data_item(key, val);
+ total_memory += sizeof(data_item);
+
+ // find the insertion bucket
+ size_t bucket_index = hasher(key) & hash_mask;
+ // if the bucket is empty just add new data_item
+ if (!buckets[bucket_index].data) {
+ // create new data item
+
+ buckets[bucket_index].data = d;
+ if (!first_bucket){
+ first_bucket = &buckets[bucket_index];
+ }else{
+ last_bucket->next_bucket = &buckets[bucket_index];
+ }
+
+ if (last_bucket)
+ buckets[bucket_index].prev_bucket = last_bucket;
+
+ last_bucket = &buckets[bucket_index];
+ } else { // we already have data items in this bucket
+ // prepend the item to overflow chain
+ data_item* temp = buckets[bucket_index].data;
+ buckets[bucket_index].data = d;
+ d->next = temp;
+ }
+ _size++;
+ if(_size>_max_size)
+ _max_size = _size;
+
+ return 0;
+ }
+
+ data_item *iterate_find(data_item* t, const key_type& key) {
+ data_item *temp = t;
+ while (temp && !equal(temp->first, key)){
+ temp = temp->next;
+ }
+ return temp;
+ }
+
+
+ iterator find(const key_type& key) {
+ iterator iter;
+
+ if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
+ iter.bucket = NULL;
+ iter.data = NULL;
+ return iter;
+ }
+
+ // find the insertion bucket
+ size_t bucket_index = hasher(key) & hash_mask;
+ // if the bucket is empty just add new data_item
+ if (!buckets[bucket_index].data) {
+ iter.bucket = NULL;
+ iter.data = NULL;
+ } else { // we already have data items in this bucket
+ data_item* temp = buckets[bucket_index].data;
+// temp = iterate_find(temp,key);
+
+ while (temp && !equal(temp->first, key))
+ temp = temp->next;
+
+ iter.data = temp;
+ if (!temp)
+ iter.bucket = NULL;
+ else
+ iter.bucket = &buckets[bucket_index];
+ }
+ return iter;
+ }
+
+ void erase(const key_type& key) {
+
+ if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
+ return;
+ }
+
+ // find the bucket
+ size_t bucket_index = hasher(key) & hash_mask;
+ // if the bucket is empty just add new data_item
+ if (!buckets[bucket_index].data)
+ return;
+
+ data_item* temp = buckets[bucket_index].data;
+ data_item* prev = NULL;
+ while (temp && !equal(temp->first, key)) {
+ prev = temp;
+ temp = temp->next;
+ }
+ if (!temp)
+ return;
+
+ // delete this data item from the chain
+ _size--;
+ if (!prev){ // we are deleting the first element in chain
+ buckets[bucket_index].data = temp->next;
+
+ if(temp->next == NULL){
+ if(buckets[bucket_index].next_bucket){
+ buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
+ }else{
+ last_bucket = buckets[bucket_index].prev_bucket;
+ }
+ if(buckets[bucket_index].prev_bucket){
+ buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
+ }else{
+ first_bucket = buckets[bucket_index].next_bucket;
+ }
+ buckets[bucket_index].next_bucket = NULL;
+ buckets[bucket_index].prev_bucket = NULL;
+ }
+ }else{
+ prev->next = temp->next;
+ }
+ delete temp;
+ total_memory -= sizeof(data_item);
+ }
+
+ void erase_full(const key_type& key) {
+
+ if (__builtin_expect(!allocated, 0)) { // we expect buckets to be allocated most of the time
+ return;
+ }
+
+ // find the bucket
+ size_t bucket_index = hasher(key) & hash_mask;
+ // if the bucket is empty just add new data_item
+ if (!buckets[bucket_index].data)
+ return;
+
+ data_item* temp = buckets[bucket_index].data;
+ data_item* prev = NULL;
+ while (temp && !equal(temp->first, key)) {
+ prev = temp;
+ temp = temp->next;
+ }
+ if (!temp)
+ return;
+
+ // delete this data item from the chain
+ _size--;
+ if (!prev){ // we are deleting the first element in chain
+ buckets[bucket_index].data = temp->next;
+
+ if(temp->next == NULL){
+ if(buckets[bucket_index].next_bucket){
+ buckets[bucket_index].next_bucket->prev_bucket = buckets[bucket_index].prev_bucket;
+ }else{
+ last_bucket = buckets[bucket_index].prev_bucket;
+ }
+ if(buckets[bucket_index].prev_bucket){
+ buckets[bucket_index].prev_bucket->next_bucket = buckets[bucket_index].next_bucket;
+ }else{
+ first_bucket = buckets[bucket_index].next_bucket;
+ }
+ buckets[bucket_index].next_bucket = NULL;
+ buckets[bucket_index].prev_bucket = NULL;
+ }
+ }else{
+ prev->next = temp->next;
+ }
+// delete (*temp).first;
+// delete (*temp).second;
+ delete temp;
+ total_memory -= sizeof(data_item);
+ }
+
+ iterator begin() {
+ iterator iter;
+ // find the first data item
+ iter.bucket = first_bucket;
+ iter.data = (first_bucket) ? first_bucket->data : NULL;
+ return iter;
+ }
+
+ iterator end() {
+ iterator iter;
+ iter.bucket = NULL;
+ iter.data = NULL;
+ return iter;
+ }
+
+ void clear() {
+ data_item* temp;
+ data_item* next;
+
+ hash_bucket* tmp;
+ while (first_bucket) {
+ temp = first_bucket->data;
+ while ( (next = temp->next) ) {
+ delete temp;
+ temp = next;
+ }
+ if(temp) delete(temp);
+ first_bucket->data = NULL;
+ tmp = first_bucket;
+ first_bucket = first_bucket->next_bucket;
+ tmp->next_bucket = NULL;
+ tmp->prev_bucket = NULL;
+ }
+ last_bucket = NULL;
+ _size = 0;
+ total_memory = num_buckets * sizeof(hash_bucket);
+ }
+
+ int resize() {
+ if (_size) {
+ fprintf(stderr, "Error: resizing non-empty hash table\n");
+ exit(1);
+ }
+
+ double max_load = (1.0 * _max_size) / num_buckets;
+
+ // reize table if its maximum load exceed the load factor
+ // OR it was less then half of the load factor
+ size_t min_buckets = 0;
+ if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
+ min_buckets = _max_size / load_factor;
+ }
+
+ if (min_buckets) {
+ // find power-of-2 size large than min_buckets;
+ int nb;
+ for(nb=2;nb<min_buckets;nb*=2);
+ num_buckets = nb;
+
+ fprintf(stderr, "Resizing to %d buckets\n", (int)num_buckets);
+ delete[] buckets;
+ hash_mask = num_buckets-1;
+
+ buckets = new hash_bucket[num_buckets];
+ total_memory = num_buckets * sizeof(hash_bucket);
+ }
+
+ return 0;
+ }
+
+ size_t size() const {
+ return _size;
+ }
+
+ size_t max_size() const {
+ return _max_size;
+ }
+
+ bool empty() const {
+ return (_size == 0);
+ }
+
+ ~hash_table() {
+ clear();
+ delete[] buckets;
+ }
+
+ unsigned int get_mem_footprint() {
+ return total_memory;
+ }
+
+};
+
+
+template <class key_type, class hasher_func, class equal_func>
+class hash_set {
+
+public :
+ struct data_item {
+ key_type key;
+ data_item* next; // next data item in overflow chain
+
+ data_item(const key_type& k) : key(k), next(NULL) { }
+ };
+
+ struct hash_bucket {
+ data_item* data; //
+ hash_bucket* next_bucket; // index of the next bucket in the list of used buckets
+
+ hash_bucket() {
+ data = NULL;
+ next_bucket = 0; // 0 means no bucket follows this one
+ }
+ };
+
+ struct iterator {
+ hash_bucket* bucket;
+ data_item* data;
+
+ iterator& operator++() {
+ if (data->next)
+ data = data->next;
+ else {
+ bucket = bucket->next_bucket;
+ if (bucket)
+ data = bucket->data;
+ else
+ data = NULL;
+ }
+ return *this;
+ }
+
+ bool operator!=(const iterator& it) {
+ return (data != it.data || bucket != it.bucket);
+ }
+
+ bool operator==(const iterator& it) {
+ return (data == it.data && bucket == it.bucket);
+ }
+
+// NOTE : not certain if returning ref always works
+ key_type &operator*() {
+ return data->key;
+ }
+ };
+
+private:
+ hasher_func hasher;
+ equal_func equal;
+
+ double load_factor;
+
+ size_t _size;
+ size_t _max_size;
+ size_t num_buckets;
+ size_t hash_mask;
+
+ hash_bucket* first_bucket; // first used bucket
+ hash_bucket* last_bucket; // last used bucket
+
+ hash_bucket* buckets;
+
+public:
+
+ hash_set(const size_t expected_size = 100000, const double max_load = 1.25) {
+
+ size_t min_buckets = (size_t)(expected_size / max_load);
+ load_factor = max_load;
+ int nb;
+ for(nb=2;nb<min_buckets;nb*=2);
+ num_buckets = nb;
+ hash_mask = nb-1;
+ buckets = new hash_bucket[num_buckets];
+
+ _size = 0;
+ _max_size = 0;
+ first_bucket = 0;
+ last_bucket = 0;
+ }
+
+ int insert(const key_type& key) {
+ data_item* d = new data_item(key);
+
+ // find the insertion bucket
+ size_t bucket_index = hasher(key) & hash_mask;
+ // if the bucket is empty just add new data_item
+ if (!buckets[bucket_index].data) {
+ // create new data item
+
+ buckets[bucket_index].data = d;
+ if (!first_bucket)
+ first_bucket = &buckets[bucket_index];
+ else
+ last_bucket->next_bucket = &buckets[bucket_index];
+ last_bucket = &buckets[bucket_index];
+ } else { // we already have data items in this bucket
+ // prepend the item to overflow chain
+ data_item* temp = buckets[bucket_index].data;
+ buckets[bucket_index].data = d;
+ d->next = temp;
+ }
+ _size++;
+ if(_size>_max_size)
+ _max_size = _size;
+
+ return 0;
+ }
+
+ iterator find(const key_type& key) {
+ iterator iter;
+
+ // find the insertion bucket
+ size_t bucket_index = hasher(key) & hash_mask;
+ // if the bucket is empty just add new data_item
+ if (!buckets[bucket_index].data) {
+ iter.bucket = NULL;
+ iter.data = NULL;
+ } else { // we already have data items in this bucket
+ data_item* temp = buckets[bucket_index].data;
+ while (temp && !equal(temp->key, key))
+ temp = temp->next;
+ iter.data = temp;
+ if (!temp)
+ iter.bucket = NULL;
+ else
+ iter.bucket = &buckets[bucket_index];
+ }
+ return iter;
+ }
+
+ void erase(const key_type& key) {
+ // find the bucket
+ size_t bucket_index = hasher(key) & hash_mask;
+ // if the bucket is empty just add new data_item
+ if (!buckets[bucket_index].data)
+ return;
+
+ data_item* temp = buckets[bucket_index].data;
+ data_item* prev = NULL;
+ while (temp && !equal(temp->key, key)) {
+ prev = temp;
+ temp = temp->next;
+ }
+ if (!temp)
+ return;
+
+ // delete this data item from the chain
+ _size--;
+ if (!prev) // we are deleting the first element in chain
+ buckets[bucket_index].data = temp->next;
+ else
+ prev->next = temp->next;
+ delete temp;
+ }
+
+ iterator begin() {
+ iterator iter;
+ // find the first data item
+ iter.bucket = first_bucket;
+ iter.data = (first_bucket) ? first_bucket->data : NULL;
+ return iter;
+ }
+
+ iterator end() {
+ iterator iter;
+ iter.bucket = NULL;
+ iter.data = NULL;
+ return iter;
+ }
+
+ void clear() {
+ data_item* temp;
+ data_item* next;
+
+ hash_bucket* tmp;
+ while (first_bucket) {
+ temp = first_bucket->data;
+ while ( (next = temp->next) ) {
+ delete temp;
+ temp = next;
+ }
+ if(temp) delete(temp);
+ first_bucket->data = NULL;
+ tmp = first_bucket;
+ first_bucket = first_bucket->next_bucket;
+ tmp->next_bucket = NULL;
+ }
+ last_bucket = NULL;
+ _size = 0;
+
+ }
+
+ int resize() {
+ if (_size) {
+ fprintf(stderr, "Error: resizing non-empty hash table\n");
+ exit(1);
+ }
+
+ double max_load = (1.0 * _max_size) / num_buckets;
+
+ // reize table if its maximum load exceed the load factor
+ // OR it was less then half of the load factor
+ size_t min_buckets = 0;
+ if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
+ min_buckets = _max_size / load_factor;
+ }
+
+ if (min_buckets) {
+ // find power-of-2 size large than min_buckets;
+ int nb;
+ for(nb=2;nb<min_buckets;nb*=2);
+ num_buckets = nb;
+
+ fprintf(stderr, "Resizing to %d buckets\n", num_buckets);
+ delete[] buckets;
+ hash_mask = num_buckets-1;
+
+ buckets = new hash_bucket[num_buckets];
+ }
+
+ return 0;
+ }
+
+
+ size_t size() const {
+ return _size;
+ }
+
+ bool empty() const {
+ return (_size == 0);
+ }
+
+ ~hash_set() {
+ clear();
+ delete[] buckets;
+ }
+
+};
+
+
+#endif // __HASH_TABLE_H