Added quantiling UDAFs

[com/gs-lite.git] / include / hfta / groupby_operator_oop.h
diff --git a/include/hfta/groupby_operator_oop.h b/include/hfta/groupby_operator_oop.h

index 03a577f..e02a459 100644 (file)
--- a/include/hfta/groupby_operator_oop.h
+++ b/include/hfta/groupby_operator_oop.h
@@ -1,399 +1,399 @@
-/* ------------------------------------------------
-Copyright 2014 AT&T Intellectual Property
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
- ------------------------------------------- */
-
-#ifndef GROUPBY_OPERATOR_OOP_H
-#define GROUPBY_OPERATOR_OOP_H
-
-#include "host_tuple.h"
-#include "base_operator.h"
-#include <list>
-#include <vector>
-#include "hash_table.h"
-#include <cassert>
-
-//             TED: should be supplied by the groupby_func
-#define _GB_FLUSH_PER_TUPLE_ 1
-
-/* max allowed disorder  -- Jin */
-//             TED: should be supplied by the groupby_func
-#define DISORDER_LEVEL 2
-
-//#define NDEBUG
-
-using namespace std;
-
-// ASSUME temporal_type is one of int, uint, llong, ullong
-
-template <class groupby_func, class group, class aggregate, class hasher_func, class equal_func, class temporal_type>
-class groupby_operator_oop : public base_operator {
-private :
-       groupby_func func;
-
-       /* a list of hash tables, which maintains aggregates for current window and also k previous ones  -- Jin */
-       vector<hash_table<group*, aggregate*, hasher_func, equal_func>* > group_tables;
-
-       /* the minimum and maximum window id of the hash tables  -- Jin  */
-       temporal_type min_wid, max_wid;
-
-
-       bool flush_finished;
-       temporal_type curr_table;
-       typename hash_table<group*, aggregate*, hasher_func, equal_func>::iterator flush_pos;
-
-       temporal_type last_flushed_temporal_gb;
-       temporal_type last_temporal_gb;
-
-int n_slow_flush;
-       int n_patterns;
-
-
-public:
-       groupby_operator_oop(int schema_handle, const char* name) : base_operator(name), func(schema_handle) {
-               flush_finished = true;
-
-               min_wid = 0;
-               max_wid = 0;
-n_slow_flush = 0;
-               n_patterns = func.n_groupby_patterns();
-       }
-
-       ~groupby_operator_oop() {
-               hash_table<group*, aggregate*, hasher_func, equal_func>* table;
-               // delete all the elements in the group_tables list;
-               while (!group_tables.empty()) {
-                       table = group_tables.back();
-                       group_tables.pop_back();
-                       table->clear();
-//fprintf(stderr,"Deleting group table (c) at %lx\n",(gs_uint64_t)(table));
-                       delete (table);
-               }
-
-       }
-
-       int accept_tuple(host_tuple& tup, list<host_tuple>& result) {
-
-
-               // Push out completed groups
-               if(!flush_finished) partial_flush(result);
-
-               // create buffer on the stack to store key object
-               char buffer[sizeof(group)];
-
-               // extract the key information from the tuple and
-               // copy it into buffer
-               group* grp = func.create_group(tup, buffer);
-
-
-               if (!grp) {
-//printf("grp==NULL recieved ");
-                       if (func.temp_status_received()) {
-//printf("temp status record ");
-                               last_flushed_temporal_gb = func.get_last_flushed_gb ();
-                               last_temporal_gb = func.get_last_gb ();
-                       }
-//printf("\n");
-
-//fprintf(stderr,"min_wid=%d, max_wid=%d, last_temporal_gb=%d, last_flushed_temporal_gb=%d, flush_finished=%d\n",min_wid, max_wid, last_temporal_gb, last_flushed_temporal_gb, flush_finished);
-
-                       /* no data has arrived, and so we ignore the temp tuples -- Jin */
-                       if (group_tables.size()>0) {
-
-                               gs_int64_t index;
-                               if(last_flushed_temporal_gb >= min_wid){
-                                       index = last_flushed_temporal_gb - min_wid;
-                               }else{
-                                       index = -(min_wid - last_flushed_temporal_gb);  // unsigned arithmetic
-                               }
-
-                               if (func.flush_needed() && index>=0) {
-#ifdef NDEBUG
-//fprintf(stderr, "flush needed: last_flushed_gb  %u , min_wid %u \n", last_flushed_temporal_gb, min_wid);
-#endif
-                                       // Init flush on first temp tuple -- Jin
-                                       if ( !flush_finished) {
-#ifdef NDEBUG
-//fprintf(stderr, "last_flushed_gb is %u, min_wid is %u \n", last_flushed_temporal_gb, min_wid);
-#endif
-                                               flush_old(result);
-                                       }
-                                       if (last_temporal_gb > min_wid && group_tables.size()>0) {
-                                               flush_finished = false;
-                                       }
-
-                               // we start to flush from the head of the group tables -- Jin
-                                       if(group_tables.size()>0){
-                                               flush_pos = group_tables[0]->begin();
-                                       }
-
-#ifdef NDEBUG
-//fprintf(stderr, "after flush old \n");
-#endif
-                               }
-                       }
-
-                       host_tuple temp_tup;
-                       if (!func.create_temp_status_tuple(temp_tup, flush_finished)) {
-                               temp_tup.channel = output_channel;
-                               result.push_back(temp_tup);
-                       }
-
-                       tup.free_tuple();
-                       return 0;
-               }
-
-//fprintf (stderr, "after create group grp=%lx, curr_table = %d\n",(gs_uint64_t)grp, grp->get_curr_gb());
-
-               /*  This is a regular tuple -- Jin */
-               typename hash_table<group*, aggregate*, hasher_func, equal_func>::iterator iter;
-               /*  first, decide which hash table we need to work at  */
-               curr_table = grp->get_curr_gb();
-               if (max_wid == 0 && min_wid == 0) {
-                       group_tables.push_back((new hash_table<group*, aggregate*, hasher_func, equal_func>()));
-//fprintf(stderr,"Added (1) hash tbl at %lx, pos=%d\n",(gs_uint64_t)(group_tables.back()),group_tables.size());
-                       max_wid = min_wid = curr_table;
-               }
-               if (curr_table < min_wid) {
-                       for (temporal_type i = curr_table; i < min_wid; i++){
-                               group_tables.insert(group_tables.begin(), new hash_table<group*, aggregate*, hasher_func, equal_func>());
-//fprintf(stderr,"Added (2) hash tbl at %lx, pos=%d\n",(gs_uint64_t)(group_tables.back()),group_tables.size());
-                       }
-                       min_wid = curr_table;
-               }
-               if (curr_table > max_wid) {
-                       hash_table<group*, aggregate*, hasher_func, equal_func>* pt;
-                       for (temporal_type i = max_wid; i < curr_table; i++) {
-                               pt =new hash_table<group*, aggregate*, hasher_func, equal_func>();
-                               group_tables.push_back(pt);
-//fprintf(stderr,"Added (3) hash tbl at %lx, pos=%d\n",(gs_uint64_t)(group_tables.back()),group_tables.size());
-                       }
-
-                       max_wid = curr_table;
-               }
-               gs_int64_t index = curr_table - min_wid;
-
-               if ((iter = group_tables[index]->find(grp)) != group_tables[index]->end()) {
-                       aggregate* old_aggr = (*iter).second;
-                       func.update_aggregate(tup, grp, old_aggr);
-               }else{
-                       /* We only flush when a temp tuple is received, so we only check on temp tuple  -- Jin */
-                       // create a copy of the group on the heap
-                       if(n_patterns <= 1){
-
-                               group* new_grp = new group(grp);        // need a copy constructor for groups
-
-                               aggregate* aggr = new aggregate();
-
-                       // create an aggregate in preallocated buffer
-                               aggr = func.create_aggregate(tup, (char*)aggr);
-
-//                             hash_table<group*, aggregate*, hasher_func, equal_func>* pt;
-                               group_tables[index]->insert(new_grp, aggr);
-                       }else{
-                               int p;
-                               for(p=0;p<n_patterns;++p){
-                                       group* new_grp = new group(grp, func.get_pattern(p));
-                                       aggregate* aggr = new aggregate();
-                                       aggr = func.create_aggregate(tup, (char*)aggr);
-                                       group_tables[index]->insert(new_grp, aggr);
-                               }
-                       }
-               }
-               tup.free_tuple();
-               return 0;
-       }
-
-       int partial_flush(list<host_tuple>& result) {
-               host_tuple tup;
-               /* the hash table we should flush is func->last_flushed_gb_0  -- Jin */
-               /* should we guarantee that everything before func->last_flushed_gb_0 also flushed ??? */
-
-               gs_int64_t i;
-
-//fprintf(stderr, "partial_flush size of group_tables = %u, min_wid is %u, max_wid is %u last_temporal_gb=%d \n", group_tables.size(), min_wid, max_wid, last_temporal_gb);
-       if(group_tables.size()==0){
-               flush_finished = true;
-//fprintf(stderr, "out of partial flush early \n");
-               return 0;
-       }
-
-//                             emit up to _GB_FLUSH_PER_TABLE_ output tuples.
-               if (!group_tables[0]->empty()) {
-                       for (i=0; flush_pos!=group_tables[0]->end() && i < _GB_FLUSH_PER_TUPLE_; ++flush_pos, ++i) {
-n_slow_flush++;
-                               bool failed = false;
-                               tup = func.create_output_tuple((*flush_pos).first,(*flush_pos).second, failed);
-                               if (!failed) {
-                                       tup.channel = output_channel;
-                                       result.push_back(tup);
-                               }
-//fprintf(stderr,"partial_flush Deleting at Flush_pos=%lx \n",(gs_uint64_t)((*flush_pos).first));
-                               delete ((*flush_pos).first);
-                               delete ((*flush_pos).second);
-                       }
-               }
-
-//                     Finalize processing if empty.
-               if (flush_pos == group_tables[0]->end()) {
-                       /* one window is completely flushed, so recycle the hash table  -- Jin */
-
-                       hash_table<group*, aggregate*, hasher_func, equal_func>* table =  group_tables[0];
-
-//fprintf(stderr,"partial_flush Delelting group table %lx\n",(gs_uint64_t)(group_tables[0]));
-                       group_tables[0]->clear();
-                       delete (group_tables[0]);
-
-                       group_tables.erase(group_tables.begin());
-
-                       min_wid++;
-
-                       if (last_temporal_gb > min_wid && group_tables.size()>0) {
-                               flush_pos = group_tables[0]->begin();
-
-                       } else {
-                               flush_finished = true;
-                       }
-               }
-//fprintf(stderr, "out of partial flush \n");
-               return 0;
-       }
-
-
-       /* Where is this function called ??? */  /* externally */
-       int flush(list<host_tuple>& result) {
-               host_tuple tup;
-               typename hash_table<group*, aggregate*, hasher_func, equal_func>::iterator iter;
-               /* the hash table we should flush is func->last_flushed_gb_0  -- Jin */
-               /* should we guarantee that everything before func->last_flushed_gb_0 also flushed ??? */
-               while ( group_tables.size() > 0) {
-                       if (!group_tables[0]->empty()) {
-                               if (flush_finished)
-                                       flush_pos = group_tables[0]->begin();
-                               for (; flush_pos != group_tables[0]->end(); ++flush_pos) {
-                                       bool failed = false;
-
-                                       tup = func.create_output_tuple((*flush_pos).first,(*flush_pos).second, failed);
-
-                                       if (!failed) {
-
-                                               tup.channel = output_channel;
-                                               result.push_back(tup);
-                                       }
-//fprintf(stderr,"flush_old Deleting at Flush_pos=%lx \n",(gs_uint64_t)((*flush_pos).first));
-                                       delete ((*flush_pos).first);
-                                       delete ((*flush_pos).second);
-
-                               }
-                       }
-                       min_wid++;
-
-               // remove the hashtable from group_tables
-                       hash_table<group*, aggregate*, hasher_func, equal_func>* table = group_tables[0];
-
-                       table->clear();
-//fprintf(stderr,"flush_old Delelting group table %lx\n",(gs_uint64_t)(group_tables[0]));
-                       delete (table);
-                       group_tables.erase(group_tables.begin());
-
-                       if(group_tables.size()>0){
-                               flush_pos = group_tables[0]->begin();
-                       }
-               }
-
-
-
-               flush_finished = true;
-
-               return 0;
-       }
-
-       /* flushes every hash table before last_flush_gb, and get ready to flush the next window  -- Jin */
-       int flush_old(list<host_tuple>& result) {
-               host_tuple tup;
-               typename hash_table<group*, aggregate*, hasher_func, equal_func>::iterator iter;
-               gs_int64_t num, i;
-
-//fprintf(stderr, "flush_old size of group_tables = %u, min_wid is %u, max_wid is %u last_temporal_gb=%d, num=%d\n", group_tables.size(), min_wid, max_wid, last_temporal_gb, num);
-
-               num = last_temporal_gb - min_wid;
-
-               //If the old table isn't empty, flush it now.
-               for (i = 0; i < num && group_tables.size() > 0; i++) {
-                       if (!group_tables[0]->empty()) {
-                               for (; flush_pos != group_tables[0]->end(); ++flush_pos) {
-                                       bool failed = false;
-
-                                       tup = func.create_output_tuple((*flush_pos).first,(*flush_pos).second, failed);
-
-                                       if (!failed) {
-
-                                               tup.channel = output_channel;
-                                               result.push_back(tup);
-                                       }
-//fprintf(stderr,"flush_old Deleting at Flush_pos=%lx \n",(gs_uint64_t)((*flush_pos).first));
-                                       delete ((*flush_pos).first);
-                                       delete ((*flush_pos).second);
-
-                               }
-                       }
-                       min_wid++;
-
-               // remove the hashtable from group_tables
-                       hash_table<group*, aggregate*, hasher_func, equal_func>* table = group_tables[0];
-
-                       table->clear();
-//fprintf(stderr,"flush_old Delelting group table %lx\n",(gs_uint64_t)(group_tables[0]));
-                       delete (table);
-                       group_tables.erase(group_tables.begin());
-
-                       if(group_tables.size()>0){
-                               flush_pos = group_tables[0]->begin();
-                       }
-               }
-
-               flush_finished = true;
-
-//fprintf(stderr, "end of flush_old \n");
-
-               return 0;
-       }
-
-
-       int set_param_block(int sz, void * value) {
-               func.set_param_block(sz, value);
-               return 0;
-       }
-
-       int get_temp_status(host_tuple& result) {
-               result.channel = output_channel;
-               return func.create_temp_status_tuple(result, flush_finished);
-       }
-
-       int get_blocked_status () {
-               return -1;
-       }
-
-       unsigned int get_mem_footprint() {
-               unsigned int ret;
-               unsigned int i;
-
-               for(i=0;i<group_tables.size();++i)
-                       ret += group_tables[i]->get_mem_footprint() ;
-
-               return ret;
-       }
-};
-
-#endif // GROUPBY_OPERATOR_OOP_H
-
+/* ------------------------------------------------\r
+Copyright 2014 AT&T Intellectual Property\r
+   Licensed under the Apache License, Version 2.0 (the "License");\r
+   you may not use this file except in compliance with the License.\r
+   You may obtain a copy of the License at\r
+\r
+     http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+   Unless required by applicable law or agreed to in writing, software\r
+   distributed under the License is distributed on an "AS IS" BASIS,\r
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+   See the License for the specific language governing permissions and\r
+   limitations under the License.\r
+ ------------------------------------------- */\r
+\r
+#ifndef GROUPBY_OPERATOR_OOP_H\r
+#define GROUPBY_OPERATOR_OOP_H\r
+\r
+#include "host_tuple.h"\r
+#include "base_operator.h"\r
+#include <list>\r
+#include <vector>\r
+#include "hash_table.h"\r
+#include <cassert>\r
+\r
+//             TED: should be supplied by the groupby_func\r
+#define _GB_FLUSH_PER_TUPLE_ 1\r
+\r
+/* max allowed disorder  -- Jin */\r
+//             TED: should be supplied by the groupby_func\r
+#define DISORDER_LEVEL 2\r
+\r
+//#define NDEBUG\r
+\r
+using namespace std;\r
+\r
+// ASSUME temporal_type is one of int, uint, llong, ullong\r
+\r
+template <class groupby_func, class group, class aggregate, class hasher_func, class equal_func, class temporal_type>\r
+class groupby_operator_oop : public base_operator {\r
+private :\r
+       groupby_func func;\r
+\r
+       /* a list of hash tables, which maintains aggregates for current window and also k previous ones  -- Jin */\r
+       vector<hash_table<group*, aggregate*, hasher_func, equal_func>* > group_tables;\r
+\r
+       /* the minimum and maximum window id of the hash tables  -- Jin  */\r
+       temporal_type min_wid, max_wid;\r
+\r
+\r
+       bool flush_finished;\r
+       temporal_type curr_table;\r
+       typename hash_table<group*, aggregate*, hasher_func, equal_func>::iterator flush_pos;\r
+\r
+       temporal_type last_flushed_temporal_gb;\r
+       temporal_type last_temporal_gb;\r
+\r
+int n_slow_flush;\r
+       int n_patterns;\r
+\r
+\r
+public:\r
+       groupby_operator_oop(int schema_handle, const char* name) : base_operator(name), func(schema_handle) {\r
+               flush_finished = true;\r
+\r
+               min_wid = 0;\r
+               max_wid = 0;\r
+n_slow_flush = 0;\r
+               n_patterns = func.n_groupby_patterns();\r
+       }\r
+\r
+       ~groupby_operator_oop() {\r
+               hash_table<group*, aggregate*, hasher_func, equal_func>* table;\r
+               // delete all the elements in the group_tables list;\r
+               while (!group_tables.empty()) {\r
+                       table = group_tables.back();\r
+                       group_tables.pop_back();\r
+                       table->clear();\r
+//fprintf(stderr,"Deleting group table (c) at %lx\n",(gs_uint64_t)(table));\r
+                       delete (table);\r
+               }\r
+\r
+       }\r
+\r
+       int accept_tuple(host_tuple& tup, list<host_tuple>& result) {\r
+\r
+\r
+               // Push out completed groups\r
+               if(!flush_finished) partial_flush(result);\r
+\r
+               // create buffer on the stack to store key object\r
+               char buffer[sizeof(group)];\r
+\r
+               // extract the key information from the tuple and\r
+               // copy it into buffer\r
+               group* grp = func.create_group(tup, buffer);\r
+\r
+\r
+               if (!grp) {\r
+//printf("grp==NULL recieved ");\r
+                       if (func.temp_status_received()) {\r
+//printf("temp status record ");\r
+                               last_flushed_temporal_gb = func.get_last_flushed_gb ();\r
+                               last_temporal_gb = func.get_last_gb ();\r
+                       }\r
+//printf("\n");\r
+\r
+//fprintf(stderr,"min_wid=%d, max_wid=%d, last_temporal_gb=%d, last_flushed_temporal_gb=%d, flush_finished=%d\n",min_wid, max_wid, last_temporal_gb, last_flushed_temporal_gb, flush_finished);\r
+\r
+                       /* no data has arrived, and so we ignore the temp tuples -- Jin */\r
+                       if (group_tables.size()>0) {\r
+\r
+                               gs_int64_t index;\r
+                               if(last_flushed_temporal_gb >= min_wid){\r
+                                       index = last_flushed_temporal_gb - min_wid;\r
+                               }else{\r
+                                       index = -(min_wid - last_flushed_temporal_gb);  // unsigned arithmetic\r
+                               }\r
+\r
+                               if (func.flush_needed() && index>=0) {\r
+#ifdef NDEBUG\r
+//fprintf(stderr, "flush needed: last_flushed_gb  %u , min_wid %u \n", last_flushed_temporal_gb, min_wid);\r
+#endif\r
+                                       // Init flush on first temp tuple -- Jin\r
+                                       if ( !flush_finished) {\r
+#ifdef NDEBUG\r
+//fprintf(stderr, "last_flushed_gb is %u, min_wid is %u \n", last_flushed_temporal_gb, min_wid);\r
+#endif\r
+                                               flush_old(result);\r
+                                       }\r
+                                       if (last_temporal_gb > min_wid && group_tables.size()>0) {\r
+                                               flush_finished = false;\r
+                                       }\r
+\r
+                               // we start to flush from the head of the group tables -- Jin\r
+                                       if(group_tables.size()>0){\r
+                                               flush_pos = group_tables[0]->begin();\r
+                                       }\r
+\r
+#ifdef NDEBUG\r
+//fprintf(stderr, "after flush old \n");\r
+#endif\r
+                               }\r
+                       }\r
+\r
+                       host_tuple temp_tup;\r
+                       if (!func.create_temp_status_tuple(temp_tup, flush_finished)) {\r
+                               temp_tup.channel = output_channel;\r
+                               result.push_back(temp_tup);\r
+                       }\r
+\r
+                       tup.free_tuple();\r
+                       return 0;\r
+               }\r
+\r
+//fprintf (stderr, "after create group grp=%lx, curr_table = %d\n",(gs_uint64_t)grp, grp->get_curr_gb());\r
+\r
+               /*  This is a regular tuple -- Jin */\r
+               typename hash_table<group*, aggregate*, hasher_func, equal_func>::iterator iter;\r
+               /*  first, decide which hash table we need to work at  */\r
+               curr_table = grp->get_curr_gb();\r
+               if (max_wid == 0 && min_wid == 0) {\r
+                       group_tables.push_back((new hash_table<group*, aggregate*, hasher_func, equal_func>()));\r
+//fprintf(stderr,"Added (1) hash tbl at %lx, pos=%d\n",(gs_uint64_t)(group_tables.back()),group_tables.size());\r
+                       max_wid = min_wid = curr_table;\r
+               }\r
+               if (curr_table < min_wid) {\r
+                       for (temporal_type i = curr_table; i < min_wid; i++){\r
+                               group_tables.insert(group_tables.begin(), new hash_table<group*, aggregate*, hasher_func, equal_func>());\r
+//fprintf(stderr,"Added (2) hash tbl at %lx, pos=%d\n",(gs_uint64_t)(group_tables.back()),group_tables.size());\r
+                       }\r
+                       min_wid = curr_table;\r
+               }\r
+               if (curr_table > max_wid) {\r
+                       hash_table<group*, aggregate*, hasher_func, equal_func>* pt;\r
+                       for (temporal_type i = max_wid; i < curr_table; i++) {\r
+                               pt =new hash_table<group*, aggregate*, hasher_func, equal_func>();\r
+                               group_tables.push_back(pt);\r
+//fprintf(stderr,"Added (3) hash tbl at %lx, pos=%d\n",(gs_uint64_t)(group_tables.back()),group_tables.size());\r
+                       }\r
+\r
+                       max_wid = curr_table;\r
+               }\r
+               gs_int64_t index = curr_table - min_wid;\r
+\r
+               if ((iter = group_tables[index]->find(grp)) != group_tables[index]->end()) {\r
+                       aggregate* old_aggr = (*iter).second;\r
+                       func.update_aggregate(tup, grp, old_aggr);\r
+               }else{\r
+                       /* We only flush when a temp tuple is received, so we only check on temp tuple  -- Jin */\r
+                       // create a copy of the group on the heap\r
+                       if(n_patterns <= 1){\r
+\r
+                               group* new_grp = new group(grp);        // need a copy constructor for groups\r
+\r
+                               aggregate* aggr = new aggregate();\r
+\r
+                       // create an aggregate in preallocated buffer\r
+                               aggr = func.create_aggregate(tup, (char*)aggr);\r
+\r
+//                             hash_table<group*, aggregate*, hasher_func, equal_func>* pt;\r
+                               group_tables[index]->insert(new_grp, aggr);\r
+                       }else{\r
+                               int p;\r
+                               for(p=0;p<n_patterns;++p){\r
+                                       group* new_grp = new group(grp, func.get_pattern(p));\r
+                                       aggregate* aggr = new aggregate();\r
+                                       aggr = func.create_aggregate(tup, (char*)aggr);\r
+                                       group_tables[index]->insert(new_grp, aggr);\r
+                               }\r
+                       }\r
+               }\r
+               tup.free_tuple();\r
+               return 0;\r
+       }\r
+\r
+       int partial_flush(list<host_tuple>& result) {\r
+               host_tuple tup;\r
+               /* the hash table we should flush is func->last_flushed_gb_0  -- Jin */\r
+               /* should we guarantee that everything before func->last_flushed_gb_0 also flushed ??? */\r
+\r
+               gs_int64_t i;\r
+\r
+//fprintf(stderr, "partial_flush size of group_tables = %u, min_wid is %u, max_wid is %u last_temporal_gb=%d \n", group_tables.size(), min_wid, max_wid, last_temporal_gb);\r
+       if(group_tables.size()==0){\r
+               flush_finished = true;\r
+//fprintf(stderr, "out of partial flush early \n");\r
+               return 0;\r
+       }\r
+\r
+//                             emit up to _GB_FLUSH_PER_TABLE_ output tuples.\r
+               if (!group_tables[0]->empty()) {\r
+                       for (i=0; flush_pos!=group_tables[0]->end() && i < _GB_FLUSH_PER_TUPLE_; ++flush_pos, ++i) {\r
+n_slow_flush++;\r
+                               bool failed = false;\r
+                               tup = func.create_output_tuple((*flush_pos).first,(*flush_pos).second, failed);\r
+                               if (!failed) {\r
+                                       tup.channel = output_channel;\r
+                                       result.push_back(tup);\r
+                               }\r
+//fprintf(stderr,"partial_flush Deleting at Flush_pos=%lx \n",(gs_uint64_t)((*flush_pos).first));\r
+                               delete ((*flush_pos).first);\r
+                               delete ((*flush_pos).second);\r
+                       }\r
+               }\r
+\r
+//                     Finalize processing if empty.\r
+               if (flush_pos == group_tables[0]->end()) {\r
+                       /* one window is completely flushed, so recycle the hash table  -- Jin */\r
+\r
+                       hash_table<group*, aggregate*, hasher_func, equal_func>* table =  group_tables[0];\r
+\r
+//fprintf(stderr,"partial_flush Delelting group table %lx\n",(gs_uint64_t)(group_tables[0]));\r
+                       group_tables[0]->clear();\r
+                       delete (group_tables[0]);\r
+\r
+                       group_tables.erase(group_tables.begin());\r
+\r
+                       min_wid++;\r
+\r
+                       if (last_temporal_gb > min_wid && group_tables.size()>0) {\r
+                               flush_pos = group_tables[0]->begin();\r
+\r
+                       } else {\r
+                               flush_finished = true;\r
+                       }\r
+               }\r
+//fprintf(stderr, "out of partial flush \n");\r
+               return 0;\r
+       }\r
+\r
+\r
+       /* Where is this function called ??? */  /* externally */\r
+       int flush(list<host_tuple>& result) {\r
+               host_tuple tup;\r
+               typename hash_table<group*, aggregate*, hasher_func, equal_func>::iterator iter;\r
+               /* the hash table we should flush is func->last_flushed_gb_0  -- Jin */\r
+               /* should we guarantee that everything before func->last_flushed_gb_0 also flushed ??? */\r
+               while ( group_tables.size() > 0) {\r
+                       if (!group_tables[0]->empty()) {\r
+                               if (flush_finished)\r
+                                       flush_pos = group_tables[0]->begin();\r
+                               for (; flush_pos != group_tables[0]->end(); ++flush_pos) {\r
+                                       bool failed = false;\r
+\r
+                                       tup = func.create_output_tuple((*flush_pos).first,(*flush_pos).second, failed);\r
+\r
+                                       if (!failed) {\r
+\r
+                                               tup.channel = output_channel;\r
+                                               result.push_back(tup);\r
+                                       }\r
+//fprintf(stderr,"flush_old Deleting at Flush_pos=%lx \n",(gs_uint64_t)((*flush_pos).first));\r
+                                       delete ((*flush_pos).first);\r
+                                       delete ((*flush_pos).second);\r
+\r
+                               }\r
+                       }\r
+                       min_wid++;\r
+\r
+               // remove the hashtable from group_tables\r
+                       hash_table<group*, aggregate*, hasher_func, equal_func>* table = group_tables[0];\r
+\r
+                       table->clear();\r
+//fprintf(stderr,"flush_old Delelting group table %lx\n",(gs_uint64_t)(group_tables[0]));\r
+                       delete (table);\r
+                       group_tables.erase(group_tables.begin());\r
+\r
+                       if(group_tables.size()>0){\r
+                               flush_pos = group_tables[0]->begin();\r
+                       }\r
+               }\r
+\r
+\r
+\r
+               flush_finished = true;\r
+\r
+               return 0;\r
+       }\r
+\r
+       /* flushes every hash table before last_flush_gb, and get ready to flush the next window  -- Jin */\r
+       int flush_old(list<host_tuple>& result) {\r
+               host_tuple tup;\r
+               typename hash_table<group*, aggregate*, hasher_func, equal_func>::iterator iter;\r
+               gs_int64_t num, i;\r
+\r
+//fprintf(stderr, "flush_old size of group_tables = %u, min_wid is %u, max_wid is %u last_temporal_gb=%d, num=%d\n", group_tables.size(), min_wid, max_wid, last_temporal_gb, num);\r
+\r
+               num = last_temporal_gb - min_wid;\r
+\r
+               //If the old table isn't empty, flush it now.\r
+               for (i = 0; i < num && group_tables.size() > 0; i++) {\r
+                       if (!group_tables[0]->empty()) {\r
+                               for (; flush_pos != group_tables[0]->end(); ++flush_pos) {\r
+                                       bool failed = false;\r
+\r
+                                       tup = func.create_output_tuple((*flush_pos).first,(*flush_pos).second, failed);\r
+\r
+                                       if (!failed) {\r
+\r
+                                               tup.channel = output_channel;\r
+                                               result.push_back(tup);\r
+                                       }\r
+//fprintf(stderr,"flush_old Deleting at Flush_pos=%lx \n",(gs_uint64_t)((*flush_pos).first));\r
+                                       delete ((*flush_pos).first);\r
+                                       delete ((*flush_pos).second);\r
+\r
+                               }\r
+                       }\r
+                       min_wid++;\r
+\r
+               // remove the hashtable from group_tables\r
+                       hash_table<group*, aggregate*, hasher_func, equal_func>* table = group_tables[0];\r
+\r
+                       table->clear();\r
+//fprintf(stderr,"flush_old Delelting group table %lx\n",(gs_uint64_t)(group_tables[0]));\r
+                       delete (table);\r
+                       group_tables.erase(group_tables.begin());\r
+\r
+                       if(group_tables.size()>0){\r
+                               flush_pos = group_tables[0]->begin();\r
+                       }\r
+               }\r
+\r
+               flush_finished = true;\r
+\r
+//fprintf(stderr, "end of flush_old \n");\r
+\r
+               return 0;\r
+       }\r
+\r
+\r
+       int set_param_block(int sz, void * value) {\r
+               func.set_param_block(sz, value);\r
+               return 0;\r
+       }\r
+\r
+       int get_temp_status(host_tuple& result) {\r
+               result.channel = output_channel;\r
+               return func.create_temp_status_tuple(result, flush_finished);\r
+       }\r
+\r
+       int get_blocked_status () {\r
+               return -1;\r
+       }\r
+\r
+       unsigned int get_mem_footprint() {\r
+               unsigned int ret;\r
+               unsigned int i;\r
+\r
+               for(i=0;i<group_tables.size();++i)\r
+                       ret += group_tables[i]->get_mem_footprint() ;\r
+\r
+               return ret;\r
+       }\r
+};\r
+\r
+#endif // GROUPBY_OPERATOR_OOP_H\r
+\r