Fix HFTA operators and UDAFs broken by the last update
[com/gs-lite.git] / include / hfta / hash_table.h
index 45559f8..2d80035 100644 (file)
@@ -40,7 +40,11 @@ public :
                value_type second;
                data_item* next;        // next data item in overflow chain
 
-               data_item(const key_type& k, const value_type& v) : first(k), second(v), next(NULL) { }
+               data_item(const key_type& k, const value_type& v) {
+                       first = k;
+                       second = v;
+                       next = NULL;
+                }
        };
 
        struct hash_bucket {
@@ -101,15 +105,19 @@ private:
        equal_func equal;
 
        double load_factor;
-//     double max_load;
 
        size_t _size;
        size_t _max_size;
        size_t num_buckets;
        size_t hash_mask;
 
+       // bucket allocation happens on first insert
+       // this flag indicates that buckets has been allocated
+       unsigned allocated;
+
+
        hash_bucket* first_bucket;      // first used bucket
-       hash_bucket* last_bucket;               // last used bucket
+       hash_bucket* last_bucket;       // last used bucket
 
        hash_bucket* buckets;
 
@@ -119,24 +127,38 @@ private:
 public:
 
 
-       hash_table(const size_t n_buckets = 51437, const double load = 0.5) {
-               load_factor = load;
+       hash_table(const size_t expected_size = 100000, const double max_load = 1.25) {
+
+               size_t min_buckets = (size_t)(expected_size / max_load);
+               load_factor = max_load;
                int nb;
-               for(nb=2;nb<n_buckets;nb*=2);
+               for(nb=2;nb<min_buckets;nb*=2);
                num_buckets = nb;
                hash_mask = nb-1;
-               buckets = new hash_bucket[num_buckets];
+
+               // we will make hash table start with 0-size and expand of first insertion
+               buckets = new hash_bucket[0];
+               allocated = 0;
 
                _size = 0;
                _max_size = 0;
-//             max_load = 0.0;
                first_bucket = 0;
                last_bucket = 0;
 
-               total_memory = num_buckets * sizeof(hash_bucket);
+               total_memory = 0;
        }
 
        int insert(const key_type& key, const value_type& val) {
+
+               if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
+                       delete buckets;
+                       buckets = new hash_bucket[num_buckets];
+                       allocated = 1;  
+                       total_memory = num_buckets * sizeof(hash_bucket);
+
+                       // fprintf(stderr, "Initial allocaton of %d buckets\n", (int)num_buckets);
+               }
+
                data_item* d = new data_item(key, val);
                total_memory += sizeof(data_item);
 
@@ -182,6 +204,12 @@ public:
        iterator find(const key_type& key) {
                iterator iter;
 
+               if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
+                       iter.bucket = NULL;
+                       iter.data = NULL;
+                       return iter;
+               }               
+
                // find the insertion bucket
                size_t bucket_index = hasher(key) & hash_mask;
                // if the bucket is empty just add new data_item
@@ -205,6 +233,11 @@ public:
        }
 
        void erase(const key_type& key) {
+
+               if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
+                       return;
+               }       
+
                // find the  bucket
                size_t bucket_index = hasher(key) & hash_mask;
                // if the bucket is empty just add new data_item
@@ -247,6 +280,11 @@ public:
        }
 
        void erase_full(const key_type& key) {
+
+               if (__builtin_expect(!allocated, 0)) {          // we expect buckets to be allocated most of the time
+                       return;
+               }
+
                // find the  bucket
                size_t bucket_index = hasher(key) & hash_mask;
                // if the bucket is empty just add new data_item
@@ -284,8 +322,8 @@ public:
                }else{
                        prev->next = temp->next;
                }
-        delete (*temp).first;
-        delete (*temp).second;
+//        delete (*temp).first;
+//        delete (*temp).second;
         delete temp;
         total_memory -= sizeof(data_item);
        }
@@ -326,35 +364,51 @@ public:
                last_bucket = NULL;
                _size = 0;
                total_memory = num_buckets * sizeof(hash_bucket);
-
        }
 
-       int rehash() {
+       int resize() {
                if (_size) {
-                       fprintf(stderr, "Error: rehashing non-empty hash table\n");
+                       fprintf(stderr, "Error: resizing non-empty hash table\n");
                        exit(1);
                }
 
                double max_load = (1.0 * _max_size) / num_buckets;
-               if (max_load > load_factor ) {
-                       delete[] buckets;
-                       // roughly double the size of the table
 
-                       num_buckets *= 2;
-                       hash_mask = num_buckets-1;
-
-                       buckets = new hash_bucket[num_buckets];
-                       total_memory = num_buckets * sizeof(hash_bucket);
+               // reize table if its maximum load exceed the load factor
+               // OR it was less then half of the load factor
+               size_t min_buckets = 0;
+               if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
+                       min_buckets = ceil(_max_size / load_factor);
+               } 
+               if (min_buckets) {
+                       // find power-of-2 size large than min_buckets;
+                       int nb;
+                       for(nb=2;nb<min_buckets;nb*=2);
+                       // make sure we actually changing the size
+                       if (nb != num_buckets) {
+                               fprintf(stderr, "Resizing from %d to %d buckets\n", (int)num_buckets, nb);
+                               num_buckets = nb;
+                               delete[] buckets;
+                               hash_mask = num_buckets-1;
+
+                               buckets = new hash_bucket[num_buckets];
+                               total_memory = num_buckets * sizeof(hash_bucket);
+                       }
                }
-               _max_size = _size;
-               return 0;
-       }
 
+               return 0;
+       }               
 
        size_t size() const {
                return _size;
        }
 
+       size_t max_size() const {
+               return _max_size;
+       }
+
        bool empty() const {
                return (_size == 0);
        }
@@ -379,7 +433,10 @@ public :
                key_type key;
                data_item* next;        // next data item in overflow chain
 
-               data_item(const key_type& k) : key(k), next(NULL) { }
+               data_item(const key_type& k) {
+                       key = k;
+                       next = NULL;
+                }
        };
 
        struct hash_bucket {
@@ -428,7 +485,6 @@ private:
        equal_func equal;
 
        double load_factor;
-//     double max_load;
 
        size_t _size;
        size_t _max_size;
@@ -441,19 +497,19 @@ private:
        hash_bucket* buckets;
 
 public:
-       hash_set(const size_t n_buckets = 131072, const double load = 0.75) {
-               load_factor = load;
 
+       hash_set(const size_t expected_size = 100000, const double max_load = 1.25) {
+
+               size_t min_buckets = (size_t)(expected_size / max_load);
+               load_factor = max_load;
                int nb;
-               for(nb=2;nb<n_buckets;nb*=2);
+               for(nb=2;nb<min_buckets;nb*=2);
                num_buckets = nb;
-               hash_mask = num_buckets-1;
-
+               hash_mask = nb-1;
                buckets = new hash_bucket[num_buckets];
 
                _size = 0;
                _max_size = 0;
-//             max_load = 0.0;
                first_bucket = 0;
                last_bucket = 0;
        }
@@ -570,23 +626,36 @@ public:
 
        }
 
-       int rehash() {
+       int resize() {
                if (_size) {
-                       fprintf(stderr, "Error: rehashing non-empty hash table\n");
+                       fprintf(stderr, "Error: resizing non-empty hash table\n");
                        exit(1);
                }
-
+               
                double max_load = (1.0 * _max_size) / num_buckets;
 
-               if (max_load > load_factor ) {
-                       delete[] buckets;
-                       // roughly double the size of the table
-
-                       num_buckets *= 2;
-                       hash_mask = num_buckets -1;
-                       buckets = new hash_bucket[num_buckets];
+               // reize table if its maximum load exceed the load factor
+               // OR it was less then half of the load factor
+               size_t min_buckets = 0;
+               if ((max_load > load_factor) || ((2 * max_load) < load_factor)) {
+                       min_buckets = ceil(_max_size / load_factor);
+               } 
+
+               if (min_buckets) {
+                       // find power-of-2 size large than min_buckets;
+                       int nb;
+                       for(nb=2;nb<min_buckets;nb*=2);
+                       // make sure we actually changing the size
+                       if (nb != num_buckets) {
+                               fprintf(stderr, "Resizing from %d to %d buckets\n", (int)num_buckets, nb);
+                               num_buckets = nb;
+                               delete[] buckets;
+                               hash_mask = num_buckets-1;
+
+                               buckets = new hash_bucket[num_buckets];         
+                       }
                }
-               _max_size = _size;
+
                return 0;
        }