Add support for query key extraction
[com/gs-lite.git] / src / ftacmp / query_plan.cc
1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3    Licensed under the Apache License, Version 2.0 (the "License");
4    you may not use this file except in compliance with the License.
5    You may obtain a copy of the License at
6
7      http://www.apache.org/licenses/LICENSE-2.0
8
9    Unless required by applicable law or agreed to in writing, software
10    distributed under the License is distributed on an "AS IS" BASIS,
11    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12    See the License for the specific language governing permissions and
13    limitations under the License.
14  ------------------------------------------- */
15
16 //              Create, manipulate, and dump query plans.
17
18 #include "query_plan.h"
19 #include "analyze_fta.h"
20 #include "generate_utils.h"
21
22 #include<vector>
23
24 using namespace std;
25
26 extern string hash_nums[NRANDS];        // for fast hashing
27
28
29 char tmpstr[1000];
30
31 void untaboo(string &s){
32         int c;
33         for(c=0;c<s.size();++c){
34                 if(s[c] == '.'){
35                         s[c] = '_';
36                 }
37         }
38 }
39
40 //                      mrg_qpn constructor, define here to avoid
41 //                      circular references in the .h file
42 mrg_qpn::mrg_qpn(filter_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
43                 param_tbl = spx->param_tbl;
44                 int i;
45                 node_name = n_name;
46                 field_entry_list *fel = new field_entry_list();
47                 merge_fieldpos = -1;
48
49                 disorder = 1;
50
51                 for(i=0;i<spx->select_list.size();++i){
52                         data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
53                         if(dt->is_temporal()){
54                                 if(merge_fieldpos < 0){
55                                         merge_fieldpos = i;
56                                 }else{
57                                         fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
58                                         dt->reset_temporal();
59                                 }
60                         }
61
62                         field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
63                         fel->append_field(fe);
64                         delete dt;
65                 }
66                 if(merge_fieldpos<0){
67                         fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
68                                 exit(1);
69                 }
70                 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
71
72 //                              NEED TO HANDLE USER_SPECIFIED SLACK
73                 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
74                                 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
75 //      if(this->slack == NULL)
76 //              fprintf(stderr,"Zero slack.\n");
77 //      else
78 //              fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
79
80                 for(i=0;i<sources.size();i++){
81                         std::string rvar = "_m"+int_to_string(i);
82                         mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
83                         mvars[i]->set_tablevar_ref(i);
84                         fm.push_back(new tablevar_t(sources[i].c_str()));
85                         fm[i]->set_range_var(rvar);
86                 }
87
88                 param_tbl = new param_table();
89                 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
90                 int pi;
91                 for(pi=0;pi<param_names.size();pi++){
92                         data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
93                         param_tbl->add_param(param_names[pi],dt->duplicate(),
94                                                         spx->param_tbl->handle_access(param_names[pi]));
95                 }
96                 definitions = spx->definitions;
97
98 }
99
100
101
102 //              This function translates an analyzed parse tree
103 //              into one or more query nodes (qp_node).
104 //              Currently only one node is created, but some query
105 //              fragments might create more than one query node,
106 //              e.g. aggregation over a join, or nested subqueries
107 //              in the FROM clause (unless this is handled at parse tree
108 //              analysis time).  At this stage, they will be linked
109 //              by the names in the FROM clause.
110 //              INVARIANT : if more than one query node is returned,
111 //              the last one represents the output of the query.
112 vector<qp_node *> create_query_nodes(query_summary_class *qs,table_list *Schema){
113
114 //              Classify the query.
115
116         vector <qp_node *> local_plan;
117         qp_node *plan_root;
118
119 //                      TODO
120 //                      I should probably move a lot of this code
121 //                      into the qp_node constructors,
122 //                      and have this code focus on building the query plan tree.
123
124 //              MERGE node
125         if(qs->query_type == MERGE_QUERY){
126                 mrg_qpn *merge_node = new mrg_qpn(qs,Schema);
127
128 //                      Done
129                 plan_root = merge_node;
130                 local_plan.push_back(merge_node);
131
132                 /*
133                 Do not split sources until we are done with optimizations
134                 vector<mrg_qpn *> split_merge = merge_node->split_sources();
135                 local_plan.insert(local_plan.begin(), split_merge.begin(), split_merge.end());
136                 */
137 //                      If children are created, add them to the schema.
138 /*
139                 int i;
140 printf("split_merge size is %d\n",split_merge.size());
141                 for(i=1;i<split_merge.size();++i){
142                         Schema->add_table(split_merge[i]->get_fields());
143 printf("Adding split merge table %d\n",i);
144                 }
145 */
146
147 /*
148 printf("Did split sources on %s:\n",qs->query_name.c_str());
149 int ss;
150 for(ss=0;ss<local_plan.size();ss++){
151 printf("node %d, name=%s, sources=",ss,local_plan[ss]->get_node_name().c_str());
152 vector<tablevar_t *> inv = local_plan[ss]->get_input_tbls();
153 int nn;
154 for(nn=0;nn<inv.size();nn++){
155 printf("%s ",inv[nn]->to_string().c_str());
156 }
157 printf("\n");
158 }
159 */
160
161
162         } else{
163
164 //              Select / Aggregation / Join
165           if(qs->gb_tbl->size() == 0 && qs->aggr_tbl->size() == 0){
166
167                 if(qs->fta_tree->get_from()->size() == 1){
168                         spx_qpn *spx_node = new spx_qpn(qs,Schema);
169
170                         plan_root = spx_node;
171                         local_plan.push_back(spx_node);
172                 }else{
173                         if(qs->fta_tree->get_from()->get_properties() == FILTER_JOIN_PROPERTY){
174                                 filter_join_qpn *join_node = new filter_join_qpn(qs,Schema);
175                                 plan_root = join_node;
176                                 local_plan.push_back(join_node);
177                         }else{
178                                 join_eq_hash_qpn *join_node = new join_eq_hash_qpn(qs,Schema);
179                                 plan_root = join_node;
180                                 local_plan.push_back(join_node);
181                         }
182                 }
183           }else{
184 //                      aggregation
185
186                 if(qs->states_refd.size() || qs->sg_tbl.size() || qs->cb_cnf.size()){
187                         sgahcwcb_qpn *sgahcwcb_node = new sgahcwcb_qpn(qs,Schema);
188                         plan_root = sgahcwcb_node;
189                         local_plan.push_back(sgahcwcb_node);
190                 }else{
191                         if(qs->closew_cnf.size()){
192                                 rsgah_qpn *rsgah_node = new rsgah_qpn(qs,Schema);
193                                 plan_root = rsgah_node;
194                                 local_plan.push_back(rsgah_node);
195                         }else{
196                                 sgah_qpn *sgah_node = new sgah_qpn(qs,Schema);
197                                 plan_root = sgah_node;
198                                 local_plan.push_back(sgah_node);
199                         }
200                 }
201           }
202         }
203
204
205 //              Get the query name and other definitions.
206         plan_root->set_node_name( qs->query_name);
207         plan_root->set_definitions( qs->definitions) ;
208
209
210 //      return(plan_root);
211         return(local_plan);
212
213 }
214
215
216 string se_to_query_string(scalarexp_t *se, aggregate_table *aggr_tbl){
217   string l_str;
218   string r_str;
219   string ret;
220   int p;
221   vector<scalarexp_t *> operand_list;
222   string su_ind = "";
223
224   if(se->is_superaggr())
225         su_ind = "$";
226
227   switch(se->get_operator_type()){
228     case SE_LITERAL:
229                 l_str = se->get_literal()->to_query_string();
230                 return l_str;
231     case SE_PARAM:
232                 l_str = "$" + se->get_op();
233                 return l_str;
234     case SE_COLREF:
235                 l_str =  se->get_colref()->to_query_string() ;
236                 return l_str;
237     case SE_UNARY_OP:
238                  l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
239
240                 return se->get_op()+"( "+l_str+" )";;
241     case SE_BINARY_OP:
242                 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
243                 r_str = se_to_query_string(se->get_right_se(),aggr_tbl);
244                 return( "("+l_str+")"+se->get_op()+"("+r_str+")" );
245     case SE_AGGR_STAR:
246                 return( se->get_op() + su_ind + "(*)");
247     case SE_AGGR_SE:
248                 l_str = se_to_query_string(aggr_tbl->get_aggr_se(se->get_aggr_ref()),aggr_tbl);
249                 return( se->get_op() + su_ind + "(" + l_str + ")" );
250         case SE_FUNC:
251                 if(se->get_aggr_ref() >= 0)
252                         operand_list = aggr_tbl->get_operand_list(se->get_aggr_ref());
253                 else
254                         operand_list = se->get_operands();
255
256                 ret = se->get_op() + su_ind + "(";
257                 for(p=0;p<operand_list.size();p++){
258                         l_str = se_to_query_string(operand_list[p],aggr_tbl);
259                         if(p>0) ret += ", ";
260                         ret += l_str;
261                 }
262                 ret += ")";
263                 return(ret);
264         break;
265   }
266   return "ERROR SE op type not recognized in se_to_query_string.\n";
267 }
268
269
270 string pred_to_query_str(predicate_t *pr, aggregate_table *aggr_tbl){
271   string l_str;
272   string r_str;
273   string ret;
274   int o,l;
275   vector<literal_t *> llist;
276   vector<scalarexp_t *> op_list;
277
278         switch(pr->get_operator_type()){
279         case PRED_IN:
280                 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
281                 ret = l_str + " IN [";
282                 llist = pr->get_lit_vec();
283                 for(l=0;l<llist.size();l++){
284                         if(l>0) ret += ", ";
285                         ret += llist[l]->to_query_string();
286                 }
287                 ret += "]";
288
289                 return(ret);
290         case PRED_COMPARE:
291                 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
292                 r_str = se_to_query_string(pr->get_right_se(),aggr_tbl);
293                 return( l_str + " " + pr->get_op() + " " + r_str );
294         case PRED_UNARY_OP:
295                 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
296                 return(pr->get_op() + "( " + l_str + " )");
297         case PRED_BINARY_OP:
298                 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
299                 r_str = pred_to_query_str(pr->get_right_pr(),aggr_tbl);
300                 return("( " + r_str + " )" + pr->get_op() + "( " + l_str + " )");
301         case PRED_FUNC:
302                 ret = pr->get_op()+"[";
303                 op_list = pr->get_op_list();
304                 for(o=0;o<op_list.size();++o){
305                         if(o>0) ret += ", ";
306                         ret += se_to_query_string(op_list[o],aggr_tbl);
307                 }
308                 ret += "]";
309                 return(ret);
310         default:
311                 fprintf(stderr,"INTERNAL ERROR in pred_to_query_str, line %d, character %d, unknown predicate operator type %d\n",
312                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
313                 exit(1);
314         }
315
316         return(0);
317 }
318
319
320
321 //                      Build a selection list,
322 //                      but avoid adding duplicate SEs.
323
324
325 int add_select_list_nodup(vector<select_element *> &lfta_select_list, scalarexp_t *se,
326                                 bool &new_element){
327         new_element = false;
328         int s;
329         for(s=0;s<lfta_select_list.size();s++){
330                 if(is_equivalent_se(lfta_select_list[s]->se, se)){
331                         return(s);
332                 }
333         }
334         new_element = true;
335         lfta_select_list.push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
336         return(lfta_select_list.size()-1);
337 }
338
339
340
341 //              TODO: The generated colref should be tied to the tablevar
342 //              representing the lfta output.  For now, always 0.
343
344 scalarexp_t *make_fta_se_ref(vector<select_element *> &lfta_select_list, scalarexp_t *se, int h_tvref){
345         bool new_element;
346         int fta_se_nbr = add_select_list_nodup(lfta_select_list, se, new_element);
347         string colname;
348         if(!new_element){
349                 colname = lfta_select_list[fta_se_nbr]->name;
350         }else{
351                 colname = impute_colname(lfta_select_list, se);
352                 lfta_select_list[fta_se_nbr]->name = colname;
353         }
354 //
355 //              TODO: fill in the tablevar and schema of the colref here.
356         colref_t *new_cr = new colref_t(colname.c_str());
357         new_cr->set_tablevar_ref(h_tvref);
358
359
360         scalarexp_t *new_se= new scalarexp_t(new_cr);
361         new_se->use_decorations_of(se);
362
363         return(new_se);
364 }
365
366
367 //                      Build a selection list,
368 //                      but avoid adding duplicate SEs.
369
370
371 int add_select_list_nodup(vector<select_element *> *lfta_select_list, scalarexp_t *se,
372                                 bool &new_element){
373         new_element = false;
374         int s;
375         for(s=0;s<lfta_select_list->size();s++){
376                 if(is_equivalent_se((*lfta_select_list)[s]->se, se)){
377                         return(s);
378                 }
379         }
380         new_element = true;
381         lfta_select_list->push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
382         return(lfta_select_list->size()-1);
383 }
384
385
386
387 //              TODO: The generated colref should be tied to the tablevar
388 //              representing the lfta output.  For now, always 0.
389
390 scalarexp_t *make_fta_se_ref(vector<vector<select_element *> *> &lfta_select_list, scalarexp_t *se, int h_tvref){
391         bool new_element;
392     vector<select_element *> *the_sel_list = lfta_select_list[h_tvref];
393         int fta_se_nbr = add_select_list_nodup(the_sel_list, se, new_element);
394         string colname;
395         if(!new_element){
396                 colname = (*the_sel_list)[fta_se_nbr]->name;
397         }else{
398                 colname = impute_colname(*the_sel_list, se);
399                 (*the_sel_list)[fta_se_nbr]->name = colname;
400         }
401 //
402 //              TODO: fill in the tablevar and schema of the colref here.
403         colref_t *new_cr = new colref_t(colname.c_str());
404         new_cr->set_tablevar_ref(h_tvref);
405
406
407         scalarexp_t *new_se= new scalarexp_t(new_cr);
408         new_se->use_decorations_of(se);
409
410         return(new_se);
411 }
412
413
414
415
416 //
417 //                      Test if a se can be evaluated at the fta.
418 //                      check forbidden types (e.g. float), forbidden operations
419 //                      between types (e.g. divide a long long), forbidden operations
420 //                      (too expensive, not implemented).
421 //
422 //                      Return true if not forbidden, false if forbidden
423 //
424 //                      TODO: the parameter aggr_tbl is not used, delete it.
425
426 bool check_fta_forbidden_se(scalarexp_t *se,
427                                                  aggregate_table *aggr_tbl,
428                                                  ext_fcn_list *Ext_fcns
429                                                  ){
430
431   int p, fcn_id;
432   vector<scalarexp_t *> operand_list;
433   vector<data_type *> dt_signature;
434   data_type *dt = se->get_data_type();
435
436
437
438   switch(se->get_operator_type()){
439     case SE_LITERAL:
440     case SE_PARAM:
441     case SE_COLREF:
442                 return( se->get_data_type()->fta_legal_type() );
443         case SE_IFACE_PARAM:
444                 return true;
445     case SE_UNARY_OP:
446                 if(!check_fta_forbidden_se(se->get_left_se(), aggr_tbl, Ext_fcns))
447                          return(false);
448                 return(
449                    dt->fta_legal_operation(se->get_left_se()->get_data_type(), se->get_op())
450                 );
451     case SE_BINARY_OP:
452                  if(!check_fta_forbidden_se(se->get_left_se(),aggr_tbl, Ext_fcns))
453                          return(false);
454                  if(!check_fta_forbidden_se(se->get_right_se(),aggr_tbl, Ext_fcns))
455                          return(false);
456                  return(dt->fta_legal_operation(se->get_left_se()->get_data_type(),
457                                                                         se->get_right_se()->get_data_type(),
458                                                                         se->get_op()
459                                                                         )
460                 );
461
462 //                      return true, aggregate fta-safeness is determined elsewhere.
463     case SE_AGGR_STAR:
464                 return(true);
465     case SE_AGGR_SE:
466                 return(true);
467
468         case SE_FUNC:
469                 if(se->get_aggr_ref() >= 0) return true;
470
471                 operand_list = se->get_operands();
472                 for(p=0;p<operand_list.size();p++){
473                         if(!check_fta_forbidden_se(operand_list[p],aggr_tbl, Ext_fcns))
474                                 return(false);
475                         dt_signature.push_back(operand_list[p]->get_data_type() );
476                 }
477                 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
478                 if( fcn_id < 0 ){
479                         fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
480                         int o;
481                         for(o=0;o<operand_list.size();o++){
482                                 if(o>0) fprintf(stderr,", ");
483                                 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
484                         }
485                         fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
486                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
487                         return(false);
488                 }
489
490                 return(Ext_fcns->fta_legal(fcn_id) );
491         default:
492                 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
493                 exit(1);
494         break;
495   }
496   return(false);
497
498 }
499
500
501 //              test if a pr can be executed at the fta.
502 //
503 //                      Return true if not forbidden, false if forbidden
504
505 bool check_fta_forbidden_pr(predicate_t *pr,
506                                                  aggregate_table *aggr_tbl,
507                                                  ext_fcn_list *Ext_fcns
508                                                  ){
509
510   vector<literal_t *> llist;
511   data_type *dt;
512   int l,o, fcn_id;
513   vector<scalarexp_t *> op_list;
514   vector<data_type *> dt_signature;
515
516
517
518         switch(pr->get_operator_type()){
519         case PRED_IN:
520                 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns) )
521                         return(false);
522                 llist = pr->get_lit_vec();
523                 for(l=0;l<llist.size();l++){
524                         dt = new data_type(llist[l]->get_type());
525                         if(! dt->fta_legal_type()){
526                                 delete dt;
527                                 return(false);
528                         }
529                         delete dt;
530                 }
531                 return(true);
532         case PRED_COMPARE:
533                 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns))
534                         return(false);
535                 if(! check_fta_forbidden_se(pr->get_right_se(), aggr_tbl, Ext_fcns))
536                         return(false);
537                 return(true);
538         case PRED_UNARY_OP:
539                 return( check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns) );
540         case PRED_BINARY_OP:
541                 if(! check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns))
542                         return(false);
543                 if(! check_fta_forbidden_pr(pr->get_right_pr(), aggr_tbl, Ext_fcns))
544                         return(false);
545                 return(true);
546         case PRED_FUNC:
547                 op_list = pr->get_op_list();
548                 for(o=0;o<op_list.size();o++){
549                         if(!check_fta_forbidden_se(op_list[o],aggr_tbl, Ext_fcns))
550                                 return(false);
551                         dt_signature.push_back(op_list[o]->get_data_type() );
552                 }
553                 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), dt_signature);
554                 if( fcn_id < 0 ){
555                         fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
556                         int o;
557                         for(o=0;o<op_list.size();o++){
558                                 if(o>0) fprintf(stderr,", ");
559                                 fprintf(stderr,"%s",op_list[o]->get_data_type()->to_string().c_str());
560                         }
561                         fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
562                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
563                         return(false);
564                 }
565
566                 return(Ext_fcns->fta_legal(fcn_id) );
567         default:
568                 fprintf(stderr,"INTERNAL ERROR in check_fta_forbidden_pr, line %d, character %d, unknown predicate operator type %d\n",
569                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
570                 exit(1);
571         }
572
573         return(0);
574
575 }
576
577
578 //              Split the aggregates in orig_aggr_tbl, into superaggregates and
579 //              subaggregates.
580 //              (the value of the HFTA aggregate might be a SE of several LFTA
581 //               subaggregates, e.g. avg : sum / count )
582 //              Register the superaggregates in hfta_aggr_tbl, and the
583 //              subaggregates in lfta_aggr_tbl.
584 //              Insert references to the subaggregates into lfta_select_list.
585 //              (and record their names in the currnames list)
586 //              Create a SE for the superaggregate, put it in hfta_aggr_se,
587 //              keyed on agr_id.
588
589 void split_fta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
590                                         aggregate_table *hfta_aggr_tbl,
591                                         aggregate_table *lfta_aggr_tbl,
592                                         vector<select_element *> &lfta_select_list,
593                                         map<int,scalarexp_t *> &hfta_aggr_se,
594                                     ext_fcn_list *Ext_fcns
595                                         ){
596         bool new_element;
597         scalarexp_t *subaggr_se;
598         int fta_se_nbr;
599         string colname;
600         int ano;
601         colref_t *new_cr;
602         scalarexp_t *new_se, *l_se;
603         vector<scalarexp_t *> subaggr_ref_se;
604
605 //              UDAF processing
606         if(! orig_aggr_tbl->is_builtin(agr_id)){
607 //                      Construct the subaggregate
608                 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
609                 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
610                 vector<scalarexp_t *> subopl;
611                 int o;
612                 for(o=0;o<opl.size();++o){
613                         subopl.push_back(dup_se(opl[o], NULL));
614                 }
615                 int sub_id = Ext_fcns->get_subaggr_id(fcn_id);
616                 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
617                 subaggr_se->set_fcn_id(sub_id);
618                 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
619 //                      Add it to the lfta select list.
620                 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
621                 if(!new_element){
622                         colname = lfta_select_list[fta_se_nbr]->name;
623                 }else{
624                         colname = impute_colname(lfta_select_list, subaggr_se);
625                         lfta_select_list[fta_se_nbr]->name = colname;
626                         ano = lfta_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,Ext_fcns->has_lfta_bailout(sub_id));
627                         subaggr_se->set_aggr_id(ano);
628                 }
629
630 //                      Construct a reference to the subaggregate
631                 new_cr = new colref_t(colname.c_str());
632                 new_se = new scalarexp_t(new_cr);
633 //                              I'm not certain what the types should be ....
634 //                              This will need to be filled in by later analysis.
635 //                              NOTE: this might not capture all the meaning of data_type ...
636                 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
637                 subaggr_ref_se.push_back(new_se);
638
639 //                      Construct the superaggregate
640                 int super_id = Ext_fcns->get_superaggr_id(fcn_id);
641                 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
642                 ret_se->set_fcn_id(super_id);
643                 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
644 //                      Register it in the hfta aggregate table
645                 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, Ext_fcns->is_running_aggr(sub_id),false);
646                 ret_se->set_aggr_id(ano);
647                 hfta_aggr_se[agr_id] = ret_se;
648
649                 return;
650         }
651
652
653 //              builtin aggregate processing
654         bool l_forbid;
655
656         vector<bool> use_se;
657         vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
658         vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
659         int sa;
660
661         if(orig_aggr_tbl->is_star_aggr(agr_id)){
662           for(sa=0;sa<subaggr_names.size();sa++){
663                 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
664                 subaggr_se->set_data_type(subaggr_dt[sa]);
665
666 //                      The following sequence is similar to the code in make_fta_se_ref,
667 //                      but there is special processing for the aggregate tables.
668                 int fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
669                 if(!new_element){
670                         colname = lfta_select_list[fta_se_nbr]->name;
671                 }else{
672                         colname = impute_colname(lfta_select_list, subaggr_se);
673                         lfta_select_list[fta_se_nbr]->name = colname;
674                         ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
675                         subaggr_se->set_aggr_id(ano);
676                 }
677                 new_cr = new colref_t(colname.c_str());
678                 new_cr->set_tablevar_ref(0);
679                 new_se = new scalarexp_t(new_cr);
680
681 //                                      I'm not certain what the types should be ....
682 //                                      This will need to be filled in by later analysis.
683 //                                              Actually, this is causing a problem.
684 //                                              I will assume a UINT data type. / change to INT
685 //                                              (consistent with assign_data_types in analyze_fta.cc)
686 //                      TODO: why can't I use subaggr_dt, as I do in the other IF branch?
687                 data_type *ndt = new data_type("Int");  // used to be Uint
688                 new_se->set_data_type(ndt);
689
690                 subaggr_ref_se.push_back(new_se);
691           }
692         }else{
693           for(sa=0;sa<subaggr_names.size();sa++){
694                 if(use_se[sa]){
695                         scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
696                         l_se = dup_se(aggr_operand,  NULL);
697                         subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
698                 }else{
699                         subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
700                 }
701                 subaggr_se->set_data_type(subaggr_dt[sa]);
702
703 //                      again, similar to make_fta_se_ref.
704                 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
705                 if(!new_element){
706                         colname = lfta_select_list[fta_se_nbr]->name;
707                 }else{
708                         colname = impute_colname(lfta_select_list, subaggr_se);
709                         lfta_select_list[fta_se_nbr]->name = colname;
710                         if(use_se[sa])
711                                 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
712                         else
713                                 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
714                         subaggr_se->set_aggr_id(ano);
715                 }
716                 new_cr = new colref_t(colname.c_str());
717                 new_se = new scalarexp_t(new_cr);
718 //                              I'm not certain what the types should be ....
719 //                              This will need to be filled in by later analysis.
720 //                              NOTE: this might not capture all the meaning of data_type ...
721                 new_se->set_data_type(subaggr_dt[sa]);
722                 subaggr_ref_se.push_back(new_se);
723           }
724         }
725         scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
726         ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
727
728 // ASSUME either the return value is an aggregation,
729 // or a binary_op between two aggregations
730         if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
731                 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
732                 ret_se->set_aggr_id(ano);
733         }else{
734 // Basically processing for AVG. 
735 // set the data type of the superagg to that of the subagg.
736                 scalarexp_t *left_se = ret_se->get_left_se();
737                 left_se->set_data_type(subaggr_dt[0]);
738                 ano = hfta_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
739                 left_se->set_aggr_id(ano);
740
741                 scalarexp_t *right_se = ret_se->get_right_se();
742                 right_se->set_data_type(subaggr_dt[1]);
743                 ano = hfta_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
744                 right_se->set_aggr_id(ano);
745         }
746
747         hfta_aggr_se[agr_id] = ret_se;
748
749 }
750
751
752 //              Split the aggregates in orig_aggr_tbl, into hfta_superaggregates and
753 //              hfta_subaggregates.
754 //              Register the superaggregates in hi_aggr_tbl, and the
755 //              subaggregates in loq_aggr_tbl.
756 //              Insert references to the subaggregates into low_select_list.
757 //              (and record their names in the currnames list)
758 //              Create a SE for the superaggregate, put it in hfta_aggr_se,
759 //              keyed on agr_id.
760
761 void split_hfta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
762                                         aggregate_table *hi_aggr_tbl,
763                                         aggregate_table *low_aggr_tbl,
764                                         vector<select_element *> &low_select_list,
765                                         map<int,scalarexp_t *> &hi_aggr_se,
766                                     ext_fcn_list *Ext_fcns
767                                         ){
768         bool new_element;
769         scalarexp_t *subaggr_se;
770         int fta_se_nbr;
771         string colname;
772         int ano;
773         colref_t *new_cr;
774         scalarexp_t *new_se, *l_se;
775         vector<scalarexp_t *> subaggr_ref_se;
776
777 //              UDAF processing
778         if(! orig_aggr_tbl->is_builtin(agr_id)){
779 //                      Construct the subaggregate
780                 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
781                 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
782                 vector<scalarexp_t *> subopl;
783                 int o;
784                 for(o=0;o<opl.size();++o){
785                         subopl.push_back(dup_se(opl[o], NULL));
786                 }
787                 int sub_id = Ext_fcns->get_hfta_subaggr_id(fcn_id);
788                 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
789                 subaggr_se->set_fcn_id(sub_id);
790                 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
791 //                      Add it to the low select list.
792                 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
793                 if(!new_element){
794                         colname = low_select_list[fta_se_nbr]->name;
795                 }else{
796                         colname = impute_colname(low_select_list, subaggr_se);
797                         low_select_list[fta_se_nbr]->name = colname;
798                         ano = low_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,false);
799                         subaggr_se->set_aggr_id(ano);
800                 }
801
802 //                      Construct a reference to the subaggregate
803                 new_cr = new colref_t(colname.c_str());
804                 new_se = new scalarexp_t(new_cr);
805 //                              I'm not certain what the types should be ....
806 //                              This will need to be filled in by later analysis.
807 //                              NOTE: this might not capture all the meaning of data_type ...
808                 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
809                 subaggr_ref_se.push_back(new_se);
810
811 //                      Construct the superaggregate
812                 int super_id = Ext_fcns->get_hfta_superaggr_id(fcn_id);
813                 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
814                 ret_se->set_fcn_id(super_id);
815                 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
816 //                      Register it in the high aggregate table
817                 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, false,false);
818                 ret_se->set_aggr_id(ano);
819                 hi_aggr_se[agr_id] = ret_se;
820
821                 return;
822         }
823
824
825 //              builtin aggregate processing
826         bool l_forbid;
827
828         vector<bool> use_se;
829         vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
830         vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
831         int sa;
832
833         if(orig_aggr_tbl->is_star_aggr(agr_id)){
834           for(sa=0;sa<subaggr_names.size();sa++){
835                 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
836                 subaggr_se->set_data_type(subaggr_dt[sa]);
837
838 //                      The following sequence is similar to the code in make_fta_se_ref,
839 //                      but there is special processing for the aggregate tables.
840                 int fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
841                 if(!new_element){
842                         colname = low_select_list[fta_se_nbr]->name;
843                 }else{
844                         colname = impute_colname(low_select_list, subaggr_se);
845                         low_select_list[fta_se_nbr]->name = colname;
846                         ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
847                         subaggr_se->set_aggr_id(ano);
848                 }
849                 new_cr = new colref_t(colname.c_str());
850                 new_cr->set_tablevar_ref(0);
851                 new_se = new scalarexp_t(new_cr);
852
853 //                                      I'm not certain what the types should be ....
854 //                                      This will need to be filled in by later analysis.
855 //                                              Actually, this is causing a problem.
856 //                                              I will assume a UINT data type.
857 //                                              (consistent with assign_data_types in analyze_fta.cc)
858 //                      TODO: why can't I use subaggr_dt, as I do in the other IF branch?
859                 data_type *ndt = new data_type("Int");  // was Uint
860                 new_se->set_data_type(ndt);
861
862                 subaggr_ref_se.push_back(new_se);
863           }
864         }else{
865           for(sa=0;sa<subaggr_names.size();sa++){
866                 if(use_se[sa]){
867                         scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
868                         l_se = dup_se(aggr_operand,  NULL);
869                         subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
870                 }else{
871                         subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
872                 }
873                 subaggr_se->set_data_type(subaggr_dt[sa]);
874
875 //                      again, similar to make_fta_se_ref.
876                 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
877                 if(!new_element){
878                         colname = low_select_list[fta_se_nbr]->name;
879                 }else{
880                         colname = impute_colname(low_select_list, subaggr_se);
881                         low_select_list[fta_se_nbr]->name = colname;
882                         if(use_se[sa])
883                                 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
884                         else
885                                 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
886                         subaggr_se->set_aggr_id(ano);
887                 }
888                 new_cr = new colref_t(colname.c_str());
889                 new_se = new scalarexp_t(new_cr);
890 //                              I'm not certain what the types should be ....
891 //                              This will need to be filled in by later analysis.
892 //                              NOTE: this might not capture all the meaning of data_type ...
893                 new_se->set_data_type(subaggr_dt[sa]);
894                 subaggr_ref_se.push_back(new_se);
895           }
896         }
897         scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
898 // ASSUME either the return value is an aggregation,
899 // or a binary_op between two aggregations
900         if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
901                 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
902                 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
903         }else{
904 // Basically processing for AVG. 
905 // set the data type of the superagg to that of the subagg.
906                 scalarexp_t *left_se = ret_se->get_left_se();
907                 left_se->set_data_type(subaggr_dt[0]);
908                 ano = hi_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
909                 left_se->set_aggr_id(ano);
910
911                 scalarexp_t *right_se = ret_se->get_right_se();
912                 right_se->set_data_type(subaggr_dt[1]);
913                 ano = hi_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
914                 right_se->set_aggr_id(ano);
915         }
916
917         ret_se->set_aggr_id(ano);
918         hi_aggr_se[agr_id] = ret_se;
919
920 }
921
922
923
924
925
926 //              Split a scalar expression into one part which executes
927 //              at the stream and another set of parts which execute
928 //              at the FTA.
929 //              Because I'm actually modifying the SEs, I will make
930 //              copies.  But I will assume that literals, params, and
931 //              colrefs are immutable at this point.
932 //              (if there is ever a need to change one, must make a
933 //               new value).
934 //              NOTE : if se is constant (only refrences literals),
935 //                      avoid making the fta compute it.
936 //
937 //              NOTE : This will need to be generalized to
938 //              handle join expressions, namely to handle a vector
939 //              of lftas.
940 //
941 //              Return value is the HFTA se.
942 //              Add lftas select_elements to the fta_select_list.
943 //              set fta_forbidden if this node or any child cannot
944 //              execute at the lfta.
945
946 /*
947
948 scalarexp_t *split_fta_se(scalarexp_t *se,
949                                   bool &fta_forbidden,
950                                   vector<select_element *> &lfta_select_list,
951                                   ext_fcn_list *Ext_fcns
952                                  ){
953
954   int p, fcn_id;
955   vector<scalarexp_t *> operand_list;
956   vector<data_type *> dt_signature;
957   scalarexp_t *ret_se, *l_se, *r_se;
958   bool l_forbid, r_forbid, this_forbid;
959   colref_t *new_cr;
960   scalarexp_t *new_se;
961   data_type *dt = se->get_data_type();
962
963   switch(se->get_operator_type()){
964     case SE_LITERAL:
965                 fta_forbidden = ! se->get_data_type()->fta_legal_type();
966                 ret_se = new scalarexp_t(se->get_literal());
967                 ret_se->use_decorations_of(se);
968                 return(ret_se);
969
970     case SE_PARAM:
971                 fta_forbidden = ! se->get_data_type()->fta_legal_type();
972                 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
973                 ret_se->use_decorations_of(se);
974                 return(ret_se);
975
976     case SE_COLREF:
977 //                      No colref should be forbidden,
978 //                      the schema is wrong, the fta_legal_type() fcn is wrong,
979 //                      or the source table is actually a stream.
980 //                      Issue a warning, but proceed with processing.
981 //                      Also, should not be a ref to a gbvar.
982 //                      (a gbvar ref only occurs in an aggregation node,
983 //                      and these SEs are rehomed, not split.
984                 fta_forbidden = ! se->get_data_type()->fta_legal_type();
985
986                 if(fta_forbidden){
987                         fprintf(stderr,"WARNING, a colref is a forbidden data type in split_fta_se,"
988                                                         " colref is %s,"
989                                                         " type is %s, line=%d, col=%d\n",
990                                                         se->get_colref()->to_string().c_str(),
991                                                         se->get_data_type()->get_type_str().c_str(),
992                                                         se->lineno, se->charno
993                                         );
994                 }
995
996                 if(se->is_gb()){
997                         fprintf(stderr,"INTERNAL ERROR, a colref is a gbvar ref in split_fta_se,"
998                                                         " type is %s, line=%d, col=%d\n",
999                                                         se->get_data_type()->get_type_str().c_str(),
1000                                                         se->lineno, se->charno
1001                                         );
1002                         exit(1);
1003                 }
1004
1005                 ret_se = new scalarexp_t(se->get_colref());
1006                 ret_se->use_decorations_of(se);
1007                 return(ret_se);
1008
1009     case SE_UNARY_OP:
1010                  l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1011
1012                  this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1013
1014 //                      If this operation is forbidden but the child SE is not,
1015 //                      put the child se on the lfta_select_list, create a colref
1016 //                      which accesses this se, and make it the child of this op.
1017 //                      Exception : the child se is constant (only literal refs).
1018                  if(this_forbid && !l_forbid){
1019                          if(!is_literal_or_param_only(l_se)){
1020                                  new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1021                                  ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1022                          }
1023                  }else{
1024                          ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1025                  }
1026                  ret_se->use_decorations_of(se);
1027                  fta_forbidden = this_forbid | l_forbid;
1028                  return(ret_se);
1029
1030     case SE_BINARY_OP:
1031                  l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1032                  r_se = split_fta_se(se->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1033
1034                  this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1035
1036 //                      Replace the left se if it is not forbidden, but something else is.
1037                  if((this_forbid || r_forbid) & !l_forbid){
1038                          if(!is_literal_or_param_only(l_se)){
1039                                  new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1040                                  l_se = new_se;
1041                          }
1042                  }
1043
1044 //                      Replace the right se if it is not forbidden, but something else is.
1045                  if((this_forbid || l_forbid) & !r_forbid){
1046                          if(!is_literal_or_param_only(r_se)){
1047                                  new_se = make_fta_se_ref(lfta_select_list, r_se,0);
1048                                  r_se = new_se;
1049                          }
1050                  }
1051
1052                  ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1053                  ret_se->use_decorations_of(se);
1054                  fta_forbidden = this_forbid || r_forbid || l_forbid;
1055
1056                  return(ret_se);
1057
1058     case SE_AGGR_STAR:
1059     case SE_AGGR_SE:
1060
1061                 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_fta_se."
1062                                                 " line=%d, col=%d\n",
1063                                                 se->get_op().c_str(),
1064                                                 se->lineno, se->charno
1065                                 );
1066                 exit(1);
1067                 break;
1068
1069         case SE_FUNC:
1070                 {
1071                         fta_forbidden = false;
1072                         operand_list = se->get_operands();
1073                         vector<scalarexp_t *> new_operands;
1074                         vector<bool> forbidden_op;
1075                         for(p=0;p<operand_list.size();p++){
1076                                 l_se = split_fta_se(operand_list[p], l_forbid, lfta_select_list, Ext_fcns);
1077
1078                                 fta_forbidden |= l_forbid;
1079                                 new_operands.push_back(l_se);
1080                                 forbidden_op.push_back(l_forbid);
1081                                 dt_signature.push_back(operand_list[p]->get_data_type() );
1082                         }
1083
1084                         fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1085                         if( fcn_id < 0 ){
1086                                 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1087                                 int o;
1088                                 for(o=0;o<operand_list.size();o++){
1089                                         if(o>0) fprintf(stderr,", ");
1090                                         fprintf(stderr,"%s",operand_list[o]->get_data_type()->get_type_str().c_str());
1091                                 }
1092                                 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1093                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1094                                 return(false);
1095                         }
1096
1097                         fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1098
1099 //                              Replace the non-forbidden operands.
1100 //                              the forbidden ones are already replaced.
1101                         if(fta_forbidden){
1102                                 for(p=0;p<new_operands.size();p++){
1103                                         if(! forbidden_op[p]){
1104 //                                        if(new_operands[p]->get_data_type()->get_temporal() != constant_t){
1105                                                 if(!is_literal_or_param_only(new_operands[p])){
1106                                                 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],0);
1107                                                 new_operands[p] = new_se;
1108                                           }
1109                                         }
1110                                 }
1111                         }
1112
1113                         ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1114                         ret_se->use_decorations_of(se);
1115
1116                         return(ret_se);
1117
1118                 }
1119         default:
1120                 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
1121                 exit(1);
1122         break;
1123   }
1124   return(false);
1125
1126 }
1127
1128 */
1129
1130
1131 //              The predicates have already been
1132 //              broken into conjunctions.
1133 //              If any part of a conjunction is fta-forbidden,
1134 //              it must be executed in the stream operator.
1135 //              Else it is executed in the FTA.
1136 //              A pre-analysis should determine whether this
1137 //              predicate is fta-safe.  This procedure will
1138 //              assume that it is fta-forbidden and will
1139 //              prepare it for execution in the stream.
1140
1141 /*
1142
1143 predicate_t *split_fta_pr(predicate_t *pr,
1144                                                  vector<select_element *> &lfta_select_list,
1145                                                  ext_fcn_list *Ext_fcns
1146                                                  ){
1147
1148   vector<literal_t *> llist;
1149   scalarexp_t *se_l, *se_r;
1150   bool l_forbid, r_forbid;
1151   predicate_t *ret_pr, *pr_l, *pr_r;
1152   vector<scalarexp_t *> op_list, new_op_list;
1153   int o;
1154   vector<data_type *> dt_signature;
1155
1156
1157         switch(pr->get_operator_type()){
1158         case PRED_IN:
1159                 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1160
1161                 if(!l_forbid){
1162                   if(!is_literal_or_param_only(se_l)){
1163                         scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1164                         se_l = new_se;
1165                   }
1166                 }
1167                 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1168
1169                 return(ret_pr);
1170
1171         case PRED_COMPARE:
1172                 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1173                 if(!l_forbid){
1174                   if(!is_literal_or_param_only(se_l)){
1175                         scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1176                         se_l = new_se;
1177                   }
1178                 }
1179
1180                 se_r = split_fta_se(pr->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1181                 if(!r_forbid){
1182                   if(!is_literal_or_param_only(se_r)){
1183                         scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,0);
1184                         se_r = new_se;
1185                   }
1186                 }
1187
1188                 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1189                 return(ret_pr);
1190
1191         case PRED_UNARY_OP:
1192                 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1193                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1194                 return(ret_pr);
1195
1196         case PRED_BINARY_OP:
1197                 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1198                 pr_r = split_fta_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1199                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1200                 return(ret_pr);
1201
1202         case PRED_FUNC:
1203 //                      I can't push the predicate into the lfta, except by
1204 //                      returning a bool value, and that is not worth the trouble,
1205                 op_list = pr->get_op_list();
1206                 for(o=0;o<op_list.size();++o){
1207                         se_l = split_fta_se(op_list[o],l_forbid,lfta_select_list,Ext_fcns);
1208                         if(!l_forbid){
1209                           if(!is_literal_or_param_only(se_l)){
1210                                 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1211                                 se_l = new_se;
1212                           }
1213                         }
1214                         new_op_list.push_back(se_l);
1215                 }
1216
1217                 ret_pr =  new predicate_t(pr->get_op().c_str(), new_op_list);
1218                 ret_pr->set_fcn_id(pr->get_fcn_id());
1219                 return(ret_pr);
1220         default:
1221                 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1222                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1223                 exit(1);
1224         }
1225
1226         return(0);
1227
1228 }
1229
1230 */
1231
1232
1233 //--------------------------------------------------------------------
1234
1235
1236
1237 //              Split a scalar expression into one part which executes
1238 //              at the stream and another set of parts which execute
1239 //              at the FTA.
1240 //              Because I'm actually modifying the SEs, I will make
1241 //              copies.  But I will assume that literals, params, and
1242 //              colrefs are immutable at this point.
1243 //              (if there is ever a need to change one, must make a
1244 //               new value).
1245 //              NOTE : if se is constant (only refrences literals),
1246 //                      avoid making the fta compute it.
1247 //
1248 //              NOTE : This will need to be generalized to
1249 //              handle join expressions, namely to handle a vector
1250 //              of lftas.
1251 //
1252 //              Return value is the HFTA se.
1253 //              Add lftas select_elements to the fta_select_list.
1254 //              set fta_forbidden if this node or any child cannot
1255 //              execute at the lfta.
1256
1257 #define SPLIT_FTAVEC_NOTBLVAR -1
1258 #define SPLIT_FTAVEC_MIXED -2
1259
1260 bool is_PROTOCOL_source(int colref_source,
1261                         vector< vector<select_element *> *> &lfta_select_list){
1262         if(colref_source>=0 && lfta_select_list[colref_source]!=NULL) return true;
1263         return false;
1264 }
1265
1266 int combine_colref_source(int s1, int s2){
1267         if(s1==s2) return(s1);
1268         if(s1==SPLIT_FTAVEC_NOTBLVAR) return s2;
1269         if(s2==SPLIT_FTAVEC_NOTBLVAR) return s1;
1270         return SPLIT_FTAVEC_MIXED;
1271 }
1272
1273 scalarexp_t *split_ftavec_se(
1274                                   scalarexp_t *se,      // the SE to split
1275                                   bool &fta_forbidden,  // return true if some part of se
1276                                                                                 // is fta-unsafe
1277                                   int &colref_source,   // the tblvar which sources the
1278                                                                                 // colref, or NOTBLVAR, or MIXED
1279                                   vector< vector<select_element *> *> &lfta_select_list,
1280                                                                                 // NULL if the tblvar is not PROTOCOL,
1281                                                                                 // else build the select list.
1282                                   ext_fcn_list *Ext_fcns // is the fcn lfta-safe?
1283                                  ){
1284 //              Return value is the HFTA SE, unless fta_forbidden is true and
1285 //              colref_source>=0 and the indicated source is PROTOCOL.
1286 //              In that case no split was done, the make_fta_se_ref must
1287 //              be done by the caller.
1288
1289   int p, fcn_id;
1290   vector<scalarexp_t *> operand_list;
1291   vector<data_type *> dt_signature;
1292   scalarexp_t *ret_se, *l_se, *r_se;
1293   bool l_forbid, r_forbid, this_forbid;
1294   int l_csource, r_csource, this_csource;
1295   colref_t *new_cr;
1296   scalarexp_t *new_se;
1297   data_type *dt = se->get_data_type();
1298
1299   switch(se->get_operator_type()){
1300     case SE_LITERAL:
1301                 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1302                 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1303                 ret_se = new scalarexp_t(se->get_literal());
1304                 ret_se->use_decorations_of(se);
1305                 return(ret_se);
1306
1307     case SE_PARAM:
1308                 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1309                 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1310                 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1311                 ret_se->use_decorations_of(se);
1312                 return(ret_se);
1313
1314         case SE_IFACE_PARAM:
1315                 fta_forbidden = false;
1316                 colref_source = se->get_ifpref()->get_tablevar_ref();
1317                 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1318                 ret_se->use_decorations_of(se);
1319                 return(ret_se);
1320
1321     case SE_COLREF:
1322 //                      No colref should be forbidden,
1323 //                      the schema is wrong, the fta_legal_type() fcn is wrong,
1324 //                      or the source table is actually a stream.
1325 //                      Issue a warning, but proceed with processing.
1326 //                      Also, should not be a ref to a gbvar.
1327 //                      (a gbvar ref only occurs in an aggregation node,
1328 //                      and these SEs are rehomed, not split.
1329                 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1330                 colref_source = se->get_colref()->get_tablevar_ref();
1331
1332                 if(fta_forbidden && is_PROTOCOL_source(colref_source, lfta_select_list)){
1333                         fprintf(stderr,"WARNING, a PROTOCOL colref is a forbidden data type in split_ftavec_se,"
1334                                                         " colref is %s,"
1335                                                         " type is %s, line=%d, col=%d\n",
1336                                                         se->get_colref()->to_string().c_str(),
1337                                                         se->get_data_type()->to_string().c_str(),
1338                                                         se->lineno, se->charno
1339                                         );
1340                 }
1341
1342                 if(se->is_gb()){
1343                         fta_forbidden = true;   // eval in hfta.  ASSUME make copy as below.
1344                 }
1345
1346                 ret_se = new scalarexp_t(se->get_colref());
1347                 ret_se->use_decorations_of(se);
1348                 return(ret_se);
1349
1350     case SE_UNARY_OP:
1351                  l_se = split_ftavec_se(se->get_left_se(), l_forbid, colref_source, lfta_select_list, Ext_fcns);
1352
1353                  this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1354
1355 //                      If this operation is forbidden but the child SE is not,
1356 //                      AND the colref source in the se is a single PROTOCOL source
1357 //                      put the child se on the lfta_select_list, create a colref
1358 //                      which accesses this se, and make it the child of this op.
1359 //                      Exception : the child se is constant (only literal refs).
1360 //                      TODO: I think the exception is expressed by is_PROTOCOL_source
1361                  if(this_forbid && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list)){
1362                          if(!is_literal_or_param_only(l_se)){
1363                                  new_se = make_fta_se_ref(lfta_select_list, l_se,colref_source);
1364                                  ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1365                          }
1366                  }else{
1367                          ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1368                  }
1369                  ret_se->use_decorations_of(se);
1370                  fta_forbidden = this_forbid | l_forbid;
1371                  return(ret_se);
1372
1373     case SE_BINARY_OP:
1374                  l_se = split_ftavec_se(se->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1375                  r_se = split_ftavec_se(se->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1376
1377                  this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1378                  colref_source=combine_colref_source(l_csource, r_csource);
1379
1380 //                      Replace the left se if the parent must be hfta but the child can
1381 //                      be lfta. This translates to
1382 //                      a) result is PROTOCOL and forbidden, but left SE is not forbidden
1383 //                      OR b) if result is mixed but the left se is PROTOCOL, not forbidden
1384                  if( ((this_forbid || r_forbid) && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1385                                 (colref_source==SPLIT_FTAVEC_MIXED && !l_forbid &&
1386                                  is_PROTOCOL_source(l_csource, lfta_select_list)) ){
1387                          if(!is_literal_or_param_only(l_se)){
1388                                  new_se = make_fta_se_ref(lfta_select_list, l_se,l_csource);
1389                                  l_se = new_se;
1390                          }
1391                  }
1392
1393 //                      same logic as for right se.
1394                  if( ((this_forbid || l_forbid) && !r_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1395                                 (colref_source==SPLIT_FTAVEC_MIXED && !r_forbid &&
1396                                  is_PROTOCOL_source(r_csource, lfta_select_list)) ){
1397                          if(!is_literal_or_param_only(r_se)){
1398                                  new_se = make_fta_se_ref(lfta_select_list, r_se,r_csource);
1399                                  r_se = new_se;
1400                          }
1401                  }
1402
1403                  ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1404                  ret_se->use_decorations_of(se);
1405                  fta_forbidden = this_forbid || r_forbid || l_forbid;
1406
1407                  return(ret_se);
1408
1409     case SE_AGGR_STAR:
1410     case SE_AGGR_SE:
1411
1412                 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_ftavec_se."
1413                                                 " line=%d, col=%d\n",
1414                                                 se->get_op().c_str(),
1415                                                 se->lineno, se->charno
1416                                 );
1417                 exit(1);
1418                 break;
1419
1420         case SE_FUNC:
1421                 {
1422                         operand_list = se->get_operands();
1423                         vector<scalarexp_t *> new_operands;
1424                         vector<bool> forbidden_op;
1425                         vector<int> csource;
1426
1427                         fta_forbidden = false;
1428                         colref_source = SPLIT_FTAVEC_NOTBLVAR;
1429                         for(p=0;p<operand_list.size();p++){
1430                                 l_se = split_ftavec_se(operand_list[p], l_forbid, l_csource, lfta_select_list, Ext_fcns);
1431
1432                                 fta_forbidden |= l_forbid;
1433                                 colref_source = combine_colref_source(colref_source, l_csource);
1434                                 new_operands.push_back(l_se);
1435                                 forbidden_op.push_back(l_forbid);
1436                                 csource.push_back(l_csource);
1437                                 dt_signature.push_back(operand_list[p]->get_data_type() );
1438                         }
1439
1440                         fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1441                         if( fcn_id < 0 ){
1442                                 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1443                                 int o;
1444                                 for(o=0;o<operand_list.size();o++){
1445                                         if(o>0) fprintf(stderr,", ");
1446                                         fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
1447                                 }
1448                                 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1449                         if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1450                                 return NULL;
1451                         }
1452
1453                         fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1454
1455 //                              Replace the non-forbidden operands.
1456 //                              the forbidden ones are already replaced.
1457                         if(fta_forbidden || colref_source == SPLIT_FTAVEC_MIXED){
1458                                 for(p=0;p<new_operands.size();p++){
1459                                         if(! forbidden_op[p] && is_PROTOCOL_source(csource[p], lfta_select_list)){
1460                                                 if(!is_literal_or_param_only(new_operands[p])){
1461                                                 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],csource[p]);
1462                                                 new_operands[p] = new_se;
1463                                           }
1464                                         }
1465                                 }
1466                         }
1467
1468                         ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1469                         ret_se->use_decorations_of(se);
1470
1471                         return(ret_se);
1472
1473                 }
1474         default:
1475                 printf("INTERNAL ERROR in split_ftavec_se: operator type %d\n",se->get_operator_type());
1476                 exit(1);
1477         break;
1478   }
1479   return(NULL);
1480
1481 }
1482
1483
1484 //              The predicates have already been
1485 //              broken into conjunctions.
1486 //              If any part of a conjunction is fta-forbidden,
1487 //              it must be executed in the stream operator.
1488 //              Else it is executed in the FTA.
1489 //              A pre-analysis should determine whether this
1490 //              predicate is fta-safe.  This procedure will
1491 //              assume that it is fta-forbidden and will
1492 //              prepare it for execution in the stream.
1493
1494 predicate_t *split_ftavec_pr(predicate_t *pr,
1495                                   vector< vector<select_element *> *> &lfta_select_list,
1496                                                  ext_fcn_list *Ext_fcns
1497                                                  ){
1498
1499   vector<literal_t *> llist;
1500   scalarexp_t *se_l, *se_r;
1501   bool l_forbid, r_forbid;
1502   int l_csource, r_csource;
1503   predicate_t *ret_pr, *pr_l, *pr_r;
1504   vector<scalarexp_t *> op_list, new_op_list;
1505   int o;
1506   vector<data_type *> dt_signature;
1507
1508
1509         switch(pr->get_operator_type()){
1510         case PRED_IN:
1511                 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1512
1513 //                              TODO: checking that the se is a PROTOCOL source should
1514 //                              take care of literal_or_param_only.
1515                 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1516                   if(!is_literal_or_param_only(se_l)){
1517                         scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1518                         se_l = new_se;
1519                   }
1520                 }
1521                 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1522
1523                 return(ret_pr);
1524
1525         case PRED_COMPARE:
1526                 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1527                 if(!l_forbid  && is_PROTOCOL_source(l_csource, lfta_select_list)){
1528                   if(!is_literal_or_param_only(se_l)){
1529                         scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1530                         se_l = new_se;
1531                   }
1532                 }
1533
1534                 se_r = split_ftavec_se(pr->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1535                 if(!r_forbid  && is_PROTOCOL_source(r_csource, lfta_select_list)){
1536                   if(!is_literal_or_param_only(se_r)){
1537                         scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,r_csource);
1538                         se_r = new_se;
1539                   }
1540                 }
1541
1542                 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1543                 return(ret_pr);
1544
1545         case PRED_UNARY_OP:
1546                 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1547                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1548                 return(ret_pr);
1549
1550         case PRED_BINARY_OP:
1551                 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1552                 pr_r = split_ftavec_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1553                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1554                 return(ret_pr);
1555
1556         case PRED_FUNC:
1557 //                      I can't push the predicate into the lfta, except by
1558 //                      returning a bool value, and that is not worth the trouble,
1559                 op_list = pr->get_op_list();
1560                 for(o=0;o<op_list.size();++o){
1561                         se_l = split_ftavec_se(op_list[o],l_forbid,l_csource,lfta_select_list,Ext_fcns);
1562                         if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1563                           if(!is_literal_or_param_only(se_l)){
1564                                 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1565                                 se_l = new_se;
1566                           }
1567                         }
1568                         new_op_list.push_back(se_l);
1569                 }
1570
1571                 ret_pr =  new predicate_t(pr->get_op().c_str(), new_op_list);
1572                 ret_pr->set_fcn_id(pr->get_fcn_id());
1573                 return(ret_pr);
1574         default:
1575                 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1576                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1577                 exit(1);
1578         }
1579
1580         return(0);
1581
1582 }
1583
1584
1585
1586 ////////////////////////////////////////////////////////////////////////
1587 ///             rehome_hfta_se rehome_hfta_pr
1588 ///             This is use to split an sgah operator (aggregation),
1589 ///             I just need to make gb, aggr references point to the
1590 ///             new gb, aggr table entries.
1591
1592
1593 scalarexp_t *rehome_fta_se(scalarexp_t *se,
1594                                   map< int, scalarexp_t * > *aggr_map
1595                                  ){
1596
1597   int p, fcn_id;
1598   int agr_id;
1599   vector<scalarexp_t *> operand_list;
1600   scalarexp_t *ret_se, *l_se, *r_se;
1601   colref_t *new_cr;
1602   scalarexp_t *new_se;
1603   data_type *dt = se->get_data_type();
1604   vector<scalarexp_t *> new_operands;
1605
1606   switch(se->get_operator_type()){
1607     case SE_LITERAL:
1608                 ret_se = new scalarexp_t(se->get_literal());
1609                 ret_se->use_decorations_of(se);
1610                 return(ret_se);
1611
1612     case SE_PARAM:
1613                 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1614                 ret_se->use_decorations_of(se);
1615                 return(ret_se);
1616
1617         case SE_IFACE_PARAM:
1618                 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1619                 ret_se->use_decorations_of(se);
1620                 return(ret_se);
1621
1622
1623
1624     case SE_COLREF:
1625 //                      Must be a GB REF ...
1626 //                      I'm assuming that the hfta gbvar table has the
1627 //                      same sequence of entries as the input query's gbvar table.
1628 //                      Else I'll need some kind of translation table.
1629
1630                 if(! se->is_gb()){
1631                         fprintf(stderr,"WARNING, a colref is not a gbver ref in rehome_hfta_se"
1632                                                         " type is %s, line=%d, col=%d\n",
1633                                                         se->get_data_type()->to_string().c_str(),
1634                                                         se->lineno, se->charno
1635                                         );
1636                 }
1637
1638                 ret_se = new scalarexp_t(se->get_colref());
1639                 ret_se->use_decorations_of(se);         // just inherit the gbref
1640                 return(ret_se);
1641
1642     case SE_UNARY_OP:
1643                  l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1644
1645                  ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1646                  ret_se->use_decorations_of(se);
1647                  return(ret_se);
1648
1649     case SE_BINARY_OP:
1650                  l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1651                  r_se = rehome_fta_se(se->get_right_se(), aggr_map);
1652
1653                  ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1654                  ret_se->use_decorations_of(se);
1655
1656                  return(ret_se);
1657
1658     case SE_AGGR_STAR:
1659     case SE_AGGR_SE:
1660                 agr_id = se->get_aggr_ref();
1661                 return (*aggr_map)[agr_id];
1662                 break;
1663
1664         case SE_FUNC:
1665                 agr_id = se->get_aggr_ref();
1666                 if(agr_id >= 0) return (*aggr_map)[agr_id];
1667
1668                 operand_list = se->get_operands();
1669                 for(p=0;p<operand_list.size();p++){
1670                         l_se = rehome_fta_se(operand_list[p], aggr_map);
1671
1672                         new_operands.push_back(l_se);
1673                 }
1674
1675
1676                 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1677                 ret_se->use_decorations_of(se);
1678
1679                 return(ret_se);
1680
1681         default:
1682                 printf("INTERNAL ERROR in rehome_fta_se: operator type %d\n",se->get_operator_type());
1683                 exit(1);
1684         break;
1685   }
1686   return(NULL);
1687
1688 }
1689
1690
1691 //              The predicates have already been
1692 //              broken into conjunctions.
1693 //              If any part of a conjunction is fta-forbidden,
1694 //              it must be executed in the stream operator.
1695 //              Else it is executed in the FTA.
1696 //              A pre-analysis should determine whether this
1697 //              predicate is fta-safe.  This procedure will
1698 //              assume that it is fta-forbidden and will
1699 //              prepare it for execution in the stream.
1700
1701 predicate_t *rehome_fta_pr(predicate_t *pr,
1702                                                  map<int, scalarexp_t *> *aggr_map
1703                                                  ){
1704
1705   vector<literal_t *> llist;
1706   scalarexp_t *se_l, *se_r;
1707   predicate_t *ret_pr, *pr_l, *pr_r;
1708   vector<scalarexp_t *> op_list, new_op_list;
1709   int o;
1710
1711         switch(pr->get_operator_type()){
1712         case PRED_IN:
1713                 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1714                 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1715                 return(ret_pr);
1716
1717         case PRED_COMPARE:
1718                 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1719                 se_r = rehome_fta_se(pr->get_right_se(), aggr_map);
1720                 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1721                 return(ret_pr);
1722
1723         case PRED_UNARY_OP:
1724                 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1725                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1726                 return(ret_pr);
1727
1728         case PRED_BINARY_OP:
1729                 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1730                 pr_r = rehome_fta_pr(pr->get_right_pr(), aggr_map);
1731                 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1732                 return(ret_pr);
1733
1734         case PRED_FUNC:
1735                 op_list = pr->get_op_list();
1736                 for(o=0;o<op_list.size();++o){
1737                         se_l = rehome_fta_se(op_list[o], aggr_map);
1738                         new_op_list.push_back(se_l);
1739                 }
1740                 ret_pr=  new predicate_t(pr->get_op().c_str(), new_op_list);
1741                 ret_pr->set_fcn_id(pr->get_fcn_id());
1742                 return(ret_pr);
1743
1744         default:
1745                 fprintf(stderr,"INTERNAL ERROR in rehome_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1746                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1747                 exit(1);
1748         }
1749
1750         return(0);
1751
1752 }
1753
1754
1755 ////////////////////////////////////////////////////////////////////
1756 /////////////////               Create a STREAM table to represent the FTA output.
1757
1758 table_def *create_attributes(string tname, vector<select_element *> &select_list){
1759         int s;
1760
1761
1762 //                      Create a new STREAM schema for the output of the FTA.
1763
1764         field_entry_list *fel = new field_entry_list();
1765         set<string> ufcns;
1766         for(s=0;s<select_list.size();s++){
1767                 scalarexp_t *sel_se = select_list[s]->se;
1768                 data_type *dt = sel_se->get_data_type();
1769
1770 //                      Grab the annotations of the field.
1771 //                      As of this writing, the only meaningful annotations
1772 //                      are whether or not the attribute is temporal.
1773 //                      There can be an annotation of constant_t, but
1774 //                      I'll ignore this, it feels like an unsafe assumption
1775                 param_list *plist = new param_list();
1776 //              if(dt->is_temporal()){
1777                         vector<string> param_strings = dt->get_param_keys();
1778                         int p;
1779                         for(p=0;p<param_strings.size();++p){
1780                                 string v = dt->get_param_val(param_strings[p]);
1781                                 if(v != "")
1782                                         plist->append(param_strings[p].c_str(),v.c_str());
1783                                 else
1784                                         plist->append(param_strings[p].c_str());
1785                         }
1786 //              }
1787
1788 //              char access_fcn_name[500];
1789                 string colname = select_list[s]->name;
1790 //              sprintf(access_fcn_name,"get_field_%s",colname.c_str());
1791                 string access_fcn_name = "get_field_"+colname;
1792                 field_entry *fe = new field_entry(
1793                         dt->get_type_str(), colname, access_fcn_name, plist, ufcns
1794                 );
1795
1796                 fel->append_field(fe);
1797         }
1798
1799         table_def *fta_tbl = new table_def(
1800                 tname.c_str(), NULL, NULL, fel, STREAM_SCHEMA
1801         );
1802
1803         return(fta_tbl);
1804
1805 }
1806
1807 //------------------------------------------------------------------
1808 //              Textual representation of the query node.
1809
1810
1811
1812 string spx_qpn::to_query_string(){
1813
1814         string ret = "Select ";
1815         int s;
1816         for(s=0;s<select_list.size();s++){
1817                 if(s>0) ret+=", ";
1818                 ret += se_to_query_string(select_list[s]->se, NULL);
1819                 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1820         }
1821         ret += "\n";
1822
1823         ret += "From "+table_name->to_string()+"\n";
1824
1825         if(where.size() > 0){
1826                 ret += "Where ";
1827                 int w;
1828                 for(w=0;w<where.size();w++){
1829                         if(w>0) ret += " AND ";
1830                         ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
1831                 }
1832                 ret += "\n";
1833         }
1834
1835         return(ret);
1836 }
1837
1838
1839
1840
1841 string sgah_qpn::to_query_string(){
1842
1843         string ret = "Select ";
1844         int s;
1845         for(s=0;s<select_list.size();s++){
1846                 if(s>0) ret+=", ";
1847                 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1848                 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1849         }
1850         ret += "\n";
1851
1852         ret += "From "+table_name->to_string()+"\n";
1853
1854         if(where.size() > 0){
1855                 ret += "Where ";
1856                 int w;
1857                 for(w=0;w<where.size();w++){
1858                         if(w>0) ret += " AND ";
1859                         ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1860                 }
1861                 ret += "\n";
1862         }
1863
1864         if(gb_tbl.size() > 0){
1865                 ret += "Group By ";
1866                 int g;
1867                 if(gb_tbl.gb_patterns.size() <= 1 || gb_tbl.gb_entry_type.size()==0){
1868                         for(g=0;g<gb_tbl.size();g++){
1869                                 if(g>0) ret += ", ";
1870 //                      if(gb_tbl.get_reftype(g) == GBVAR_SE){
1871                                         ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
1872 //                      }
1873                                 ret += gb_tbl.get_name(g);
1874                         }
1875                 }else{
1876                         int gb_pos = 0;
1877                         for(g=0;g<gb_tbl.gb_entry_type.size();++g){
1878                                 if(g>0) ret += ", ";
1879                                 if(gb_tbl.gb_entry_type[g] == ""){
1880                                         ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+
1881                                                 " AS "+ gb_tbl.get_name(gb_pos);
1882                                         gb_pos++;
1883                                 }
1884                                 if(gb_tbl.gb_entry_type[g] == "CUBE" ||
1885                                                 gb_tbl.gb_entry_type[g] == "ROLLUP"){
1886                                         ret += gb_tbl.gb_entry_type[g] + "(";
1887                                         int gg = 0;
1888                                         for(gg=0;gg<gb_tbl.gb_entry_count[g];++gg){
1889                                                 if(gg>0) ret += ", ";
1890                                                 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+ " AS "+ gb_tbl.get_name(gb_pos);
1891                                                 gb_pos++;
1892                                         }
1893                                         ret += ")";
1894                                 }
1895                                 if(gb_tbl.gb_entry_type[g] == "GROUPING_SETS"){
1896                                         ret += gb_tbl.gb_entry_type[g] + "(";
1897                                         int g1, g2;
1898                                         vector<vector<bool> > &local_components = gb_tbl.pattern_components[g];
1899                                         for(g1=0;g1<local_components.size();++g1){
1900                                                 if(g1>0) ret+=",";
1901                                                 bool first_field = true;
1902                                                 ret += "\n\t\t(";
1903                                                 for(g2=0;g2<=gb_tbl.gb_entry_count[g];g2++){
1904                                                         if(local_components[g1][g2]){
1905                                                                 if(!first_field) ret+=", ";
1906                                                                 else first_field = false;
1907                                                                 ret +=  gb_tbl.get_name(gb_pos+g2);
1908                                                         }
1909                                                 }
1910                                                 ret += ")";
1911                                         }
1912                                         ret += ") ";
1913                                         gb_pos += gb_tbl.gb_entry_count[g];
1914                                 }
1915                         }
1916                 }
1917                 ret += "\n";
1918         }
1919
1920         if(having.size() > 0){
1921                 ret += "Having ";
1922                 int h;
1923                 for(h=0;h<having.size();h++){
1924                         if(h>0) ret += " AND ";
1925                         ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
1926                 }
1927                 ret += "\n";
1928         }
1929
1930         return(ret);
1931 }
1932
1933
1934 string rsgah_qpn::to_query_string(){
1935
1936         string ret = "Select ";
1937         int s;
1938         for(s=0;s<select_list.size();s++){
1939                 if(s>0) ret+=", ";
1940                 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1941                 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1942         }
1943         ret += "\n";
1944
1945         ret += "From "+table_name->to_string()+"\n";
1946
1947         if(where.size() > 0){
1948                 ret += "Where ";
1949                 int w;
1950                 for(w=0;w<where.size();w++){
1951                         if(w>0) ret += " AND ";
1952                         ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1953                 }
1954                 ret += "\n";
1955         }
1956
1957         if(gb_tbl.size() > 0){
1958                 ret += "Group By ";
1959                 int g;
1960                 for(g=0;g<gb_tbl.size();g++){
1961                         if(g>0) ret += ", ";
1962 //                      if(gb_tbl.get_reftype(g) == GBVAR_SE){
1963                                 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl)+" AS ";
1964 //                      }
1965                         ret += gb_tbl.get_name(g);
1966                 }
1967                 ret += "\n";
1968         }
1969
1970         if(having.size() > 0){
1971                 ret += "Having ";
1972                 int h;
1973                 for(h=0;h<having.size();h++){
1974                         if(h>0) ret += " AND ";
1975                         ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
1976                 }
1977                 ret += "\n";
1978         }
1979
1980         if(closing_when.size() > 0){
1981                 ret += "Closing_When ";
1982                 int h;
1983                 for(h=0;h<closing_when.size();h++){
1984                         if(h>0) ret += " AND ";
1985                         ret += "(" + pred_to_query_str(closing_when[h]->pr,&aggr_tbl) + ")";
1986                 }
1987                 ret += "\n";
1988         }
1989
1990         return(ret);
1991 }
1992
1993
1994 string sgahcwcb_qpn::to_query_string(){
1995
1996         string ret = "Select ";
1997         int s;
1998         for(s=0;s<select_list.size();s++){
1999                 if(s>0) ret+=", ";
2000                 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2001                 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2002         }
2003         ret += "\n";
2004
2005         ret += "From "+table_name->to_string()+"\n";
2006
2007         if(where.size() > 0){
2008                 ret += "Where ";
2009                 int w;
2010                 for(w=0;w<where.size();w++){
2011                         if(w>0) ret += " AND ";
2012                         ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2013                 }
2014                 ret += "\n";
2015         }
2016
2017         if(gb_tbl.size() > 0){
2018                 ret += "Group By ";
2019                 int g;
2020                 for(g=0;g<gb_tbl.size();g++){
2021                         if(g>0) ret += ", ";
2022 //                      if(gb_tbl.get_reftype(g) == GBVAR_SE){
2023                                 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
2024 //                      }
2025                         ret += gb_tbl.get_name(g);
2026                 }
2027                 ret += "\n";
2028         }
2029
2030         if(sg_tbl.size() > 0){
2031                 ret += "Supergroup ";
2032                 int g;
2033                 bool first_elem = true;
2034                 for(g=0;g<gb_tbl.size();g++){
2035                         if(sg_tbl.count(g)){
2036                                 if(first_elem){
2037                                         ret += ", ";
2038                                         first_elem = false;
2039                                 }
2040                                 ret += gb_tbl.get_name(g);
2041                         }
2042                 }
2043                 ret += "\n";
2044         }
2045
2046         if(having.size() > 0){
2047                 ret += "Having ";
2048                 int h;
2049                 for(h=0;h<having.size();h++){
2050                         if(h>0) ret += " AND ";
2051                         ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2052                 }
2053                 ret += "\n";
2054         }
2055
2056
2057         if(cleanwhen.size() > 0){
2058                 ret += "Cleaning_When ";
2059                 int h;
2060                 for(h=0;h<cleanwhen.size();h++){
2061                         if(h>0) ret += " AND ";
2062                         ret += "(" + pred_to_query_str(cleanwhen[h]->pr,&aggr_tbl) + ")";
2063                 }
2064                 ret += "\n";
2065         }
2066
2067         if(cleanby.size() > 0){
2068                 ret += "Cleaning_By ";
2069                 int h;
2070                 for(h=0;h<cleanby.size();h++){
2071                         if(h>0) ret += " AND ";
2072                         ret += "(" + pred_to_query_str(cleanby[h]->pr,&aggr_tbl) + ")";
2073                 }
2074                 ret += "\n";
2075         }
2076
2077         return(ret);
2078 }
2079
2080
2081 string mrg_qpn::to_query_string(){
2082
2083         string ret="Merge ";
2084         ret += mvars[0]->to_query_string() + " : " + mvars[1]->to_query_string();
2085         if(slack != NULL){
2086                 ret += " SLACK "+se_to_query_string(slack, NULL);
2087         }
2088
2089         ret += "\nFrom ";
2090         int t;
2091         for(t=0;t<fm.size();++t){
2092                 if(t>0) ret += ", ";
2093                 ret += fm[t]->to_string();
2094         }
2095         ret += "\n";
2096
2097         return(ret);
2098 }
2099
2100 string join_eq_hash_qpn::to_query_string(){
2101
2102         string ret = "Select ";
2103         int s;
2104         for(s=0;s<select_list.size();s++){
2105                 if(s>0) ret+=", ";
2106                 ret += se_to_query_string(select_list[s]->se, NULL);
2107                 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2108         }
2109         ret += "\n";
2110
2111 //                      NOTE: assuming binary join.
2112         int properties = from[0]->get_property()+2*from[1]->get_property();
2113         switch(properties){
2114         case 0:
2115                 ret += "INNER_JOIN ";
2116                 break;
2117         case 1:
2118                 ret += "LEFT_OUTER_JOIN ";
2119                 break;
2120         case 2:
2121                 ret += "RIGHT_OUTER_JOIN ";
2122                 break;
2123         case 3:
2124                 ret += "OUTER_JOIN ";
2125                 break;
2126         }
2127
2128         ret += "From ";
2129         int f;
2130         for(f=0;f<from.size();++f){
2131                 if(f>0) ret+=", ";
2132                 ret += from[f]->to_string();
2133         }
2134         ret += "\n";
2135
2136         if(where.size() > 0){
2137                 ret += "Where ";
2138                 int w;
2139                 for(w=0;w<where.size();w++){
2140                         if(w>0) ret += " AND ";
2141                         ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2142                 }
2143                 ret += "\n";
2144         }
2145
2146         return(ret);
2147 }
2148
2149 string filter_join_qpn::to_query_string(){
2150
2151         string ret = "Select ";
2152         int s;
2153         for(s=0;s<select_list.size();s++){
2154                 if(s>0) ret+=", ";
2155                 ret += se_to_query_string(select_list[s]->se, NULL);
2156                 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2157         }
2158         ret += "\n";
2159
2160 //                      NOTE: assuming binary join.
2161         ret += "FILTER_JOIN("+temporal_var->field+","+int_to_string(temporal_range)+") ";
2162
2163         ret += "From ";
2164         int f;
2165         for(f=0;f<from.size();++f){
2166                 if(f>0) ret+=", ";
2167                 ret += from[f]->to_string();
2168         }
2169         ret += "\n";
2170
2171         if(where.size() > 0){
2172                 ret += "Where ";
2173                 int w;
2174                 for(w=0;w<where.size();w++){
2175                         if(w>0) ret += " AND ";
2176                         ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2177                 }
2178                 ret += "\n";
2179         }
2180
2181         return(ret);
2182 }
2183
2184
2185 // -----------------------------------------------------------------
2186 //              Query node subclass specific processing.
2187
2188
2189 vector<mrg_qpn *> mrg_qpn::split_sources(){
2190   vector<mrg_qpn *> ret;
2191   int i;
2192
2193 //                      sanity check
2194         if(fm.size() != mvars.size()){
2195                 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources.  fm.size() = %lu, mvars.size() = %lu\n",fm.size(),mvars.size());
2196                 exit(1);
2197         }
2198         if(fm.size() == 1){
2199                 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources, fm size is 1.\n");
2200                 exit(1);
2201         }
2202
2203 /*
2204 int ff;
2205 printf("spliting sources merge node, name = %s, %d sources.\n\t",node_name.c_str(), fm.size());
2206 for(ff=0;ff<fm.size();++ff){
2207 printf("%s ",fm[ff]->to_string().c_str());
2208 }
2209 printf("\n");
2210 */
2211
2212 //              Handle special cases.
2213         if(fm.size() == 2){
2214                 ret.push_back(this);
2215                 return ret;
2216         }
2217
2218         if(fm.size() == 3){
2219                 mrg_qpn *new_mrg = (mrg_qpn *)this->make_copy("_cH1");
2220                 new_mrg->fm.push_back(this->fm[0]);
2221                 new_mrg->fm.push_back(this->fm[1]);
2222                 new_mrg->mvars.push_back(this->mvars[0]);
2223                 new_mrg->mvars.push_back(this->mvars[1]);
2224
2225                 this->fm.erase(this->fm.begin());
2226                 this->mvars.erase(this->mvars.begin());
2227                 string vname = fm[0]->get_var_name();
2228                 this->fm[0] = new tablevar_t(new_mrg->node_name.c_str());
2229                 this->fm[0]->set_range_var(vname);
2230                 this->mvars[0]->set_field(table_layout->get_field_name(merge_fieldpos));
2231                 this->mvars[0]->set_tablevar_ref(0);
2232                 this->mvars[1]->set_tablevar_ref(1);
2233
2234                 ret.push_back(new_mrg);
2235                 ret.push_back(this);
2236
2237 /*
2238 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg->node_name.c_str(),this->node_name.c_str());
2239 for(i=0;i<new_mrg->fm.size();++i)
2240 printf("\tsource %s var %d (%s, %s) \n",new_mrg->node_name.c_str(),i,new_mrg->fm[i]->to_string().c_str(), new_mrg->mvars[i]->to_string().c_str());
2241 for(i=0;i<this->fm.size();++i)
2242 printf("\tsource %s var %d (%s, %s) \n",this->node_name.c_str(),i,this->fm[i]->to_string().c_str(), this->mvars[i]->to_string().c_str());
2243 */
2244
2245                 return ret;
2246         }
2247
2248 //              General case.
2249 //              divide up the sources between two children.
2250 //              Then, recurse on the children.
2251
2252                 mrg_qpn *new_mrg1 = (mrg_qpn *)this->make_copy("_cH1");
2253                 mrg_qpn *new_mrg2 = (mrg_qpn *)this->make_copy("_cH2");
2254                 for(i=0;i<this->fm.size()/2;++i){
2255                         new_mrg1->fm.push_back(this->fm[i]);
2256                         new_mrg1->mvars.push_back(this->mvars[i]);
2257 //printf("Pushing %d (%s, %s) to new_mrg1\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2258                 }
2259                 for(;i<this->fm.size();++i){
2260                         new_mrg2->fm.push_back(this->fm[i]);
2261                         new_mrg2->mvars.push_back(this->mvars[i]);
2262 //printf("Pushing %d (%s, %s) to new_mrg2\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2263                 }
2264                 for(i=0;i<new_mrg1->mvars.size();++i)
2265                         new_mrg1->mvars[i]->set_tablevar_ref(i);
2266                 for(i=0;i<new_mrg2->mvars.size();++i)
2267                         new_mrg2->mvars[i]->set_tablevar_ref(i);
2268
2269 //                      Children created, make this merge them.
2270                 fm.clear();
2271                 mvars.clear();
2272 //                      var 1
2273                 tablevar_t *tmp_tblvar = new tablevar_t(new_mrg1->node_name.c_str());
2274                 tmp_tblvar->set_range_var("_mrg_var_1");
2275                 fm.push_back(tmp_tblvar);
2276                 colref_t *tmp_cref = new colref_t("_mrg_var_1",table_layout->get_field_name(merge_fieldpos).c_str());
2277                 tmp_cref->set_tablevar_ref(0);
2278                 mvars.push_back(tmp_cref);
2279 //                      var 2
2280                 tmp_tblvar = new tablevar_t(new_mrg2->node_name.c_str());
2281                 tmp_tblvar->set_range_var("_mrg_var_2");
2282                 fm.push_back(tmp_tblvar);
2283                 tmp_cref = new colref_t("_mrg_var_2",table_layout->get_field_name(merge_fieldpos).c_str());
2284                 tmp_cref->set_tablevar_ref(1);
2285                 mvars.push_back(tmp_cref);
2286
2287
2288 /*
2289 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg1->node_name.c_str(),new_mrg2->node_name.c_str());
2290 for(i=0;i<new_mrg1->fm.size();++i)
2291 printf("\tsource %s var %d (%s, %s) \n",new_mrg1->node_name.c_str(),i,new_mrg1->fm[i]->to_string().c_str(), new_mrg1->mvars[i]->to_string().c_str());
2292 for(i=0;i<new_mrg2->fm.size();++i)
2293 printf("\tsource %s var %d (%s, %s) \n",new_mrg2->node_name.c_str(),i,new_mrg2->fm[i]->to_string().c_str(), new_mrg2->mvars[i]->to_string().c_str());
2294 */
2295
2296 //              Recurse and put them together
2297                 vector<mrg_qpn *> st1 = new_mrg1->split_sources();
2298                 ret.insert(ret.end(), st1.begin(), st1.end());
2299                 vector<mrg_qpn *> st2 = new_mrg2->split_sources();
2300                 ret.insert(ret.end(), st2.begin(), st2.end());
2301
2302                 ret.push_back(this);
2303
2304                 return(ret);
2305
2306 }
2307
2308
2309
2310 ////////        Split helper function : resolve interfaces
2311
2312 vector<pair<string,string> > get_ifaces(tablevar_t *table, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2313         vector<pair<string,string> > basic_ifaces;
2314         int ierr;
2315         if(table->get_ifq()){
2316                 basic_ifaces= ifdb->eval(table->get_interface(),ierr);
2317                 if(ierr==1){
2318                 fprintf(stderr,"ERROR, Interface set %s not found.\n",table->get_interface().c_str());
2319                 }
2320                 if(ierr==2){
2321                         fprintf(stderr,"ERROR, interface definition file didn't parse.\n");
2322                 }
2323         }else{
2324                 basic_ifaces.push_back(make_pair(table->get_machine(), table->get_interface()));
2325         }
2326
2327         if(n_virtual_ifaces == 1)
2328                 return basic_ifaces;
2329
2330         int stride = n_virtual_ifaces / hfta_parallelism;
2331         int i,s;
2332         vector<pair<string,string> > ifaces;
2333
2334         for(i=0;i<basic_ifaces.size();++i){
2335                 string mach = basic_ifaces[i].first;
2336                 string iface = basic_ifaces[i].second;
2337                 for(s=hfta_idx*stride;s<(hfta_idx+1)*stride;++s){
2338                         ifaces.push_back(pair<string, string>(mach,iface+"X"+int_to_string(2*s)));
2339                 }
2340         }
2341
2342         return ifaces;
2343 }
2344
2345
2346 /////////       Split helper function : compute slack in a generated
2347 /////////       merge.
2348
2349 void mrg_qpn::resolve_slack(scalarexp_t *t_se, string fname, vector<pair<string, string> > &sources, ifq_t *ifdb, gb_table *gbt){
2350         int s,e,v;
2351         string es;
2352
2353 //              Find slack divisor, if any.
2354         string fnm;
2355         long long int slack_divisor = find_temporal_divisor(t_se,gbt, fnm);
2356         if(slack_divisor <= 0){
2357                 slack = NULL;
2358                 return;
2359         }
2360
2361 //              find max slack in the iface spec
2362         long long int max_slacker = 0, this_slacker;
2363         string rname = "Slack_"+fnm;
2364         for(s=0;s<sources.size();++s){
2365                 string src_machine = sources[s].first;
2366                 string src_iface = sources[s].second;
2367                 vector<string> slack_vec = ifdb->get_iface_vals(src_machine, src_iface,rname,e,es);
2368                 for(v=0;v<slack_vec.size();++v){
2369                         if(sscanf(slack_vec[v].c_str(),"%qd",&this_slacker)){
2370                                 if(this_slacker > max_slacker)
2371                                         max_slacker = this_slacker;
2372                         }
2373                 }
2374         }
2375
2376         if(max_slacker <= 0){
2377                 slack = NULL;
2378                 return;
2379         }
2380
2381 //              convert to SE
2382         long long int the_slack=(long long int)(ceil(((double)max_slacker)/((double)slack_divisor)));
2383         char tmps[256];
2384         sprintf(tmps,"%lld",the_slack);
2385         literal_t *slack_lit = new literal_t(tmps, LITERAL_LONGINT);
2386         slack = new scalarexp_t(slack_lit);
2387 }
2388
2389
2390 //------------------------------------------------------------------
2391 //              split a node to extract LFTA components.
2392
2393
2394 vector<qp_node *> mrg_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2395         // nothing to do, nothing to split, return copy of self.
2396
2397         hfta_returned = 1;
2398
2399         vector<qp_node *> ret_vec;
2400
2401         ret_vec.push_back(this);
2402         return(ret_vec);
2403
2404 }
2405
2406 vector<qp_node *> filter_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2407         vector<qp_node *> ret_vec;
2408
2409 //              First check if the query can be pushed to the FTA.
2410         bool fta_ok = true;
2411         int s;
2412         for(s=0;s<select_list.size();s++){
2413                 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2414         }
2415         int p;
2416         for(p=0;p<where.size();p++){
2417                 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2418         }
2419
2420         if(!fta_ok){
2421                 fprintf(stderr,"ERROR, filter join %s is fta-unsafe.\n",node_name.c_str());
2422                 exit(1);
2423         }
2424
2425 //              Can it be done in a single lfta?
2426 //                      Get the set of interfaces it accesses.
2427         int ierr;
2428         int si;
2429         vector<string> sel_names;
2430         vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2431         if (ifaces.empty()) {
2432                 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set\n");
2433                 exit(1);
2434         }
2435
2436         if(ifaces.size() == 1){
2437 //                              Single interface, no need to merge.
2438                 hfta_returned = 0;
2439                 ret_vec.push_back(this);
2440                 int i;
2441                 for(i=0;i<from.size();i++){
2442                         from[i]->set_machine(ifaces[0].first);
2443                         from[i]->set_interface(ifaces[0].second);
2444                         from[i]->set_ifq(false);
2445                 }
2446                 return(ret_vec);
2447         }else{
2448 //                              Multiple interfaces, generate the interface-specific queries plus
2449 //                              the merge.
2450                 hfta_returned = 1;
2451
2452                 vector<string> sel_names;
2453                 for(si=0;si<ifaces.size();++si){
2454                         filter_join_qpn *fta_node = new filter_join_qpn();
2455
2456 //                      Name the fta
2457                         if(ifaces.size()==1)
2458                                 fta_node->set_node_name( node_name );
2459                         else{
2460                                 string new_name =  "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2461                                 untaboo(new_name);
2462                                 fta_node->set_node_name(new_name);
2463                         }
2464                         sel_names.push_back(fta_node->get_node_name());
2465
2466 //                      Assign the table
2467                         int f;
2468                         for(f=0;f<from.size();f++){
2469                                 fta_node->from.push_back(from[f]->duplicate());
2470                                 fta_node->from[f]->set_machine(ifaces[si].first);
2471                                 fta_node->from[f]->set_interface(ifaces[si].second);
2472                                 fta_node->from[f]->set_ifq(false);
2473                         }
2474                         fta_node->temporal_var = temporal_var;
2475                         fta_node->temporal_range = temporal_range;
2476
2477                         fta_node->use_bloom = use_bloom;
2478
2479                         for(s=0;s<select_list.size();s++){
2480                                 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2481                         }
2482
2483                         for(p=0;p<shared_pred.size();p++){
2484                                 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2485                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
2486                                 analyze_cnf(new_cnf);
2487                                 fta_node->shared_pred.push_back(new_cnf);
2488                                 fta_node->where.push_back(new_cnf);
2489                         }
2490                         for(p=0;p<pred_t0.size();p++){
2491                                 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2492                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
2493                                 analyze_cnf(new_cnf);
2494                                 fta_node->pred_t0.push_back(new_cnf);
2495                                 fta_node->where.push_back(new_cnf);
2496                         }
2497                         for(p=0;p<pred_t1.size();p++){
2498                                 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2499                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
2500                                 analyze_cnf(new_cnf);
2501                                 fta_node->pred_t1.push_back(new_cnf);
2502                                 fta_node->where.push_back(new_cnf);
2503                         }
2504                         for(p=0;p<hash_eq.size();p++){
2505                                 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2506                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
2507                                 analyze_cnf(new_cnf);
2508                                 fta_node->hash_eq.push_back(new_cnf);
2509                                 fta_node->where.push_back(new_cnf);
2510                         }
2511                         for(p=0;p<postfilter.size();p++){
2512                                 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2513                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
2514                                 analyze_cnf(new_cnf);
2515                                 fta_node->postfilter.push_back(new_cnf);
2516                                 fta_node->where.push_back(new_cnf);
2517                         }
2518
2519 //                      Xfer all of the parameters.
2520 //                      Use existing handle annotations.
2521                         vector<string> param_names = param_tbl->get_param_names();
2522                         int pi;
2523                         for(pi=0;pi<param_names.size();pi++){
2524                                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2525                                 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2526                                                                         param_tbl->handle_access(param_names[pi]));
2527                         }
2528                         fta_node->definitions = definitions;
2529                         if(fta_node->resolve_if_params(ifdb, this->err_str)){
2530                                 this->error_code = 3;
2531                                 return ret_vec;
2532                         }
2533
2534                         ret_vec.push_back(fta_node);
2535                 }
2536
2537                 mrg_qpn *mrg_node = new mrg_qpn((filter_join_qpn *)ret_vec[0],
2538                          node_name,  sel_names,ifaces, ifdb);
2539                 ret_vec.push_back(mrg_node);
2540
2541                 return(ret_vec);
2542         }
2543
2544 }
2545
2546 //              Use to search for unresolved interface param refs in an hfta.
2547
2548 int spx_qpn::count_ifp_refs(set<string> &ifpnames){
2549         int ret = 0;
2550         int i;
2551         for(i=0;i<select_list.size();++i)
2552                 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2553         for(i=0;i<where.size();++i)
2554                 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2555         return ret;
2556 }
2557
2558 int sgah_qpn::count_ifp_refs(set<string> &ifpnames){
2559         int ret = 0;
2560         int i,j;
2561         for(i=0;i<select_list.size();++i)
2562                 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2563         for(i=0;i<where.size();++i)
2564                 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2565         for(i=0;i<having.size();++i)
2566                 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2567         for(i=0;i<aggr_tbl.size();++i){
2568                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2569                         ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2570                 }else{
2571                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2572                         for(j=0;j<opl.size();++j)
2573                                 ret += count_se_ifp_refs(opl[j],ifpnames);
2574                 }
2575         }
2576         for(i=0;i<gb_tbl.size();++i){
2577                 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2578         }
2579         return ret;
2580 }
2581
2582
2583 int rsgah_qpn::count_ifp_refs(set<string> &ifpnames){
2584         int ret = 0;
2585         int i,j;
2586         for(i=0;i<select_list.size();++i)
2587                 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2588         for(i=0;i<where.size();++i)
2589                 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2590         for(i=0;i<having.size();++i)
2591                 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2592         for(i=0;i<closing_when.size();++i)
2593                 ret += count_pr_ifp_refs(closing_when[i]->pr,ifpnames);
2594         for(i=0;i<aggr_tbl.size();++i){
2595                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2596                         ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2597                 }else{
2598                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2599                         for(j=0;j<opl.size();++j)
2600                                 ret += count_se_ifp_refs(opl[j],ifpnames);
2601                 }
2602         }
2603         for(i=0;i<gb_tbl.size();++i){
2604                 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2605         }
2606         return ret;
2607 }
2608
2609 int mrg_qpn::count_ifp_refs(set<string> &ifpnames){
2610         return 0;
2611 }
2612
2613 int join_eq_hash_qpn::count_ifp_refs(set<string> &ifpnames){
2614         int ret = 0;
2615         int i;
2616         for(i=0;i<select_list.size();++i)
2617                 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2618         for(i=0;i<prefilter[0].size();++i)
2619                 ret += count_pr_ifp_refs(prefilter[0][i]->pr,ifpnames);
2620         for(i=0;i<prefilter[1].size();++i)
2621                 ret += count_pr_ifp_refs(prefilter[1][i]->pr,ifpnames);
2622         for(i=0;i<temporal_eq.size();++i)
2623                 ret += count_pr_ifp_refs(temporal_eq[i]->pr,ifpnames);
2624         for(i=0;i<hash_eq.size();++i)
2625                 ret += count_pr_ifp_refs(hash_eq[i]->pr,ifpnames);
2626         for(i=0;i<postfilter.size();++i)
2627                 ret += count_pr_ifp_refs(postfilter[i]->pr,ifpnames);
2628         return ret;
2629 }
2630
2631 int filter_join_qpn::count_ifp_refs(set<string> &ifpnames){
2632         int ret = 0;
2633         int i;
2634         for(i=0;i<select_list.size();++i)
2635                 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2636         for(i=0;i<where.size();++i)
2637                 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2638         return ret;
2639 }
2640
2641
2642 //              Resolve interface params to string literals
2643 int filter_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2644         int ret = 0;
2645         int i;
2646         string ifname = from[0]->get_interface();
2647         string ifmach = from[0]->get_machine();
2648         for(i=0;i<select_list.size();++i)
2649                 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2650                         ret = 1;
2651         for(i=0;i<where.size();++i)
2652                 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2653                         ret = 1;
2654         return ret;
2655 }
2656
2657
2658 int spx_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2659         int ret = 0;
2660         int i;
2661         string ifname = table_name->get_interface();
2662         string ifmach = table_name->get_machine();
2663         for(i=0;i<select_list.size();++i)
2664                 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2665                         ret = 1;
2666         for(i=0;i<where.size();++i)
2667                 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2668                         ret = 1;
2669         return ret;
2670 }
2671
2672 int sgah_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2673         int ret = 0;
2674         int i,j;
2675         string ifname = table_name->get_interface();
2676         string ifmach = table_name->get_machine();
2677
2678 //printf("Select list has %d elements\n",select_list.size());
2679         for(i=0;i<select_list.size();++i){
2680 //printf("\tresolving elemet %d\n",i);
2681                 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) ){
2682                         ret = 1;
2683                 }
2684         }
2685         for(i=0;i<where.size();++i){
2686                 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err) )
2687                         ret = 1;
2688         }
2689         for(i=0;i<having.size();++i){
2690                 if( resolve_pr_ifp_refs(having[i]->pr,ifmach, ifname, ifdb, err) )
2691                         ret = 1;
2692         }
2693 //printf("aggr list has %d elements\n",select_list.size());
2694         for(i=0;i<aggr_tbl.size();++i){
2695 //printf("\tresolving elemet %d\n",i);
2696                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2697 //printf("\t\t\tbuiltin\n");
2698                         if( resolve_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifmach, ifname, ifdb, err) )
2699                                         ret = 1;
2700                 }else{
2701 //printf("\t\t\tudaf\n");
2702                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2703                         for(j=0;j<opl.size();++j)
2704                                 if( resolve_se_ifp_refs(opl[j],ifmach, ifname, ifdb, err) )
2705                                         ret = 1;
2706                 }
2707         }
2708         for(i=0;i<gb_tbl.size();++i){
2709                 if( resolve_se_ifp_refs(gb_tbl.get_def(i), ifmach, ifname, ifdb, err) )
2710                         ret = 1;
2711         }
2712         return ret;
2713 }
2714
2715
2716
2717 /*
2718         SPLITTING A SELECTION_PROJECTION OPERATOR
2719
2720         An SPX node may reference:
2721                 literals, parameters, colrefs, functions, operators
2722         An SPX node may not reference:
2723                 group-by variables, aggregates
2724
2725         An SPX node contains
2726                 selection list of SEs
2727                 where list of CNF predicates
2728
2729         Algorithm:
2730                 If each selection SE and each where predicate is fta-safe
2731                         execute entire operator as an LFTA.
2732                 Else
2733                         for each predicate in the where clause
2734                           if it is fta safe, execute it in the lfta
2735                           else, split each SE in the predicate, evaluate the
2736                                 top-level SEs in the hfta and eval the predicate on that.
2737                         For each SE in the se list
2738                           Split the SE, eval the high level part, push onto hfta
2739                                 selection list
2740
2741         Splitting an SE:
2742                 A SE represents a value which must be computed.  The LFTA
2743                 must provide sub-values from which the HFTA can compute the
2744                 desired value.
2745                 1) the SE is fta-safe
2746                         Create an entry in the selection list of the LFTA which is
2747                         the SE itself.  Reference this LFTA selection list entry in
2748                         the HFTA (via a field name assigned to the lfta selection
2749                         list entry).
2750                 2) The SE is not fta-safe
2751                         Determine the boundary between the fta-safe and the fta-unsafe
2752                         portions of the SE.  The result is a rooted tree (which is
2753                         evaluated at the HFTA) which references sub-SEs (which are
2754                         evaluated at the LFTA).  Each of the sub-SEs is placed on
2755                         the selection list of the LFTA and assigned field names,
2756                         the top part is evaluated at the HFTA and references the
2757                         sub-SEs through their assigned field names.
2758                 The only SEs on the LFTA selection list are those created by
2759                 the above mechanism.  The collection of assigned field names becomes
2760                 the schema of the LFTA.
2761
2762                 TODO: insert tablevar names into the colrefs.
2763
2764 */
2765
2766 vector<qp_node *> spx_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2767
2768         int i;
2769         vector<qp_node *> ret_vec;
2770
2771 //                      If the node reads from a stream, don't split.
2772 //      int t = Schema->get_table_ref(table_name->get_schema_name());
2773         int t = table_name->get_schema_ref();
2774         if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
2775                 hfta_returned = 1;
2776                 ret_vec.push_back(this);
2777                 return(ret_vec);
2778         }
2779
2780
2781 //                      Get the set of interfaces it accesses.
2782         int ierr;
2783         int si;
2784         vector<string> sel_names;
2785         vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2786         if (ifaces.empty()) {
2787                 fprintf(stderr,"INTERNAL ERROR in spx_qpn::split_node_for_fta - empty interface set\n");
2788                 exit(1);
2789         }
2790
2791
2792 //                      The FTA node, it is always returned.
2793
2794         spx_qpn *fta_node = new spx_qpn();
2795                 fta_node->table_name = table_name;
2796
2797 //                      for colname imputation
2798 //      vector<string> fta_flds, stream_flds;
2799
2800
2801 //              First check if the query can be pushed to the FTA.
2802         bool fta_ok = true;
2803         int s;
2804         for(s=0;s<select_list.size();s++){
2805                 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2806         }
2807         int p;
2808         for(p=0;p<where.size();p++){
2809                 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2810         }
2811
2812         if(fta_ok){
2813 ////////////////////////////////////////////////////////////
2814 //                      The query can be executed entirely in the FTA.
2815                 hfta_returned = 0;
2816
2817                 for(si=0;si<ifaces.size();++si){
2818                         fta_node = new spx_qpn();
2819
2820 //                      Name the fta
2821                         if(ifaces.size()==1)
2822                                 fta_node->set_node_name( node_name );
2823                         else{
2824                                 string new_name =  "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2825                                 untaboo(new_name);
2826                                 fta_node->set_node_name(new_name);
2827                         }
2828                         sel_names.push_back(fta_node->get_node_name());
2829
2830 //                      Assign the table
2831                         fta_node->table_name = table_name->duplicate();
2832                         fta_node->table_name->set_machine(ifaces[si].first);
2833                         fta_node->table_name->set_interface(ifaces[si].second);
2834                         fta_node->table_name->set_ifq(false);
2835
2836                         for(s=0;s<select_list.size();s++){
2837                                 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2838                         }
2839                         for(p=0;p<where.size();p++){
2840                                 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2841                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
2842                                 analyze_cnf(new_cnf);
2843
2844                                 fta_node->where.push_back(new_cnf);
2845                         }
2846
2847 //                      Xfer all of the parameters.
2848 //                      Use existing handle annotations.
2849                         vector<string> param_names = param_tbl->get_param_names();
2850                         int pi;
2851                         for(pi=0;pi<param_names.size();pi++){
2852                                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2853                                 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2854                                                                         param_tbl->handle_access(param_names[pi]));
2855                         }
2856                         fta_node->definitions = definitions;
2857                         if(fta_node->resolve_if_params(ifdb, this->err_str)){
2858                                 this->error_code = 3;
2859                                 return ret_vec;
2860                         }
2861
2862                         ret_vec.push_back(fta_node);
2863                 }
2864
2865                 if(ifaces.size() > 1){
2866                 spx_qpn *tmp_spx = (spx_qpn *)(ret_vec[0]);
2867                         mrg_qpn *mrg_node = new mrg_qpn(tmp_spx,
2868                                  node_name,  sel_names,ifaces, ifdb);
2869                         /*
2870                         Do not split sources until we are done with optimizations
2871                         vector<mrg_qpn *> split_merge = mrg_node->split_sources();
2872                         for(i=0;i<split_merge.size();++i){
2873                                 ret_vec.push_back(split_merge[i]);
2874                         }
2875                         hfta_returned = split_merge.size();
2876                         */
2877                         ret_vec.push_back(mrg_node);
2878                         hfta_returned = 1;
2879                 }
2880
2881
2882 // printf("OK as FTA.\n");
2883 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
2884
2885                 return(ret_vec);
2886         }
2887
2888 ////////////////////////////////////////////////////
2889 //                      The fta must be split.  Create a stream node.
2890 //                      NOTE : I am counting on the single
2891 //                      table in the from list.  (Joins handled in a different operator).
2892
2893         hfta_returned = 1;
2894
2895         spx_qpn *stream_node = new spx_qpn();
2896         stream_node->set_node_name( node_name );
2897 //              Create the tablevar in the stream's FROM clause.
2898 //              set the schema name to the name of the LFTA,
2899 //              and use the same tablevar name.
2900         stream_node->table_name = new tablevar_t(
2901                          ("_fta_"+node_name).c_str()
2902          );
2903         stream_node->table_name->set_range_var(table_name->get_var_name());
2904
2905 //                      Name the fta
2906         fta_node->set_node_name( "_fta_"+node_name );
2907
2908 //                      table var names of fta, stream.
2909     string fta_var = fta_node->table_name->get_var_name();
2910     string stream_var = stream_node->table_name->get_var_name();
2911
2912 //                      Set up select list vector
2913         vector< vector<select_element *> *> select_vec;
2914         select_vec.push_back(&(fta_node->select_list)); // only one child
2915
2916
2917 //                      Split the select list into its FTA and stream parts.
2918 //                      If any part of the SE is fta-unsafe, it will return
2919 //                      a SE to execute at the stream ref'ing SE's evaluated
2920 //                      at the fta (which are put on the FTA's select list as a side effect).
2921 //                      If the SE is fta-safe, put it on the fta select list, make
2922 //                      a ref to it and put the ref on the stream select list.
2923         for(s=0;s<select_list.size();s++){
2924                 bool fta_forbidden = false;
2925                 int se_src = SPLIT_FTAVEC_NOTBLVAR;
2926 //              scalarexp_t *root_se = split_fta_se(
2927 //                      select_list[s]->se,fta_forbidden, fta_node->select_list, Ext_fcns
2928 //              );
2929                 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
2930                                         fta_forbidden, se_src, select_vec, Ext_fcns
2931                 );
2932 //              if(fta_forbidden){
2933                 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
2934                         stream_node->select_list.push_back(
2935                                 new select_element(root_se, select_list[s]->name)
2936                         );
2937                 }else{
2938                         scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,root_se,0);
2939                         stream_node->select_list.push_back(
2940                                 new select_element(new_se, select_list[s]->name)
2941                         );
2942                 }
2943         }
2944
2945
2946 //              The WHERE clause has already been split into a set of clauses
2947 //              that are ANDED together.  For each clause, check if its FTA-safe.
2948 //              If not, split its SE's into fta-safe and stream-executing parts,
2949 //              then put a clause which ref's the SEs into the stream.
2950 //              Else put it into the LFTA.
2951         predicate_t *pr_root;
2952         bool fta_forbidden;
2953         for(p=0;p<where.size();p++){
2954                 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) ){
2955                         pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
2956 //                      pr_root = split_fta_pr( where[p]->pr, fta_node->select_list, Ext_fcns);
2957                         fta_forbidden = true;
2958                 }else{
2959                         pr_root = dup_pr(where[p]->pr, NULL);
2960                         fta_forbidden = false;
2961                 }
2962                 cnf_elem *cnf_root = new cnf_elem(pr_root);
2963                 analyze_cnf(cnf_root);
2964
2965                 if(fta_forbidden){
2966                         stream_node->where.push_back(cnf_root);
2967                 }else{
2968                         fta_node->where.push_back(cnf_root);
2969                 }
2970         }
2971
2972
2973
2974 //                      Divide the parameters among the stream, FTA.
2975 //                      Currently : assume that the stream receives all parameters
2976 //                      and parameter updates, incorporates them, then passes
2977 //                      all of the parameters to the FTA.
2978 //                      This will need to change (tables, fta-unsafe types. etc.)
2979
2980 //                      I will pass on the use_handle_access marking, even
2981 //                      though the fcn call that requires handle access might
2982 //                      exist in only one of the parts of the query.
2983 //                      Parameter manipulation and handle access determination will
2984 //                      need to be revisited anyway.
2985         vector<string> param_names = param_tbl->get_param_names();
2986         int pi;
2987         for(pi=0;pi<param_names.size();pi++){
2988                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2989                 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2990                                                                         param_tbl->handle_access(param_names[pi]));
2991                 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2992                                                                         param_tbl->handle_access(param_names[pi]));
2993         }
2994
2995         fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
2996         stream_node->definitions = definitions;
2997
2998 //              Now split by interfaces
2999         if(ifaces.size() > 1){
3000                 for(si=0;si<ifaces.size();++si){
3001                         spx_qpn *subq_node = new spx_qpn();
3002
3003 //                      Name the subquery
3004                         string new_name =  "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3005                         untaboo(new_name);
3006                         subq_node->set_node_name( new_name) ;
3007                         sel_names.push_back(subq_node->get_node_name());
3008
3009 //                      Assign the table
3010                         subq_node->table_name = fta_node->table_name->duplicate();
3011                         subq_node->table_name->set_machine(ifaces[si].first);
3012                         subq_node->table_name->set_interface(ifaces[si].second);
3013                         subq_node->table_name->set_ifq(false);
3014
3015                         for(s=0;s<fta_node->select_list.size();s++){
3016                                 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3017                         }
3018                         for(p=0;p<fta_node->where.size();p++){
3019                                 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3020                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
3021                                 analyze_cnf(new_cnf);
3022
3023                                 subq_node->where.push_back(new_cnf);
3024                         }
3025 //                      Xfer all of the parameters.
3026 //                      Use existing handle annotations.
3027                         vector<string> param_names = param_tbl->get_param_names();
3028                         int pi;
3029                         for(pi=0;pi<param_names.size();pi++){
3030                                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3031                                 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3032                                                                         param_tbl->handle_access(param_names[pi]));
3033                         }
3034                         if(subq_node->resolve_if_params(ifdb, this->err_str)){
3035                                 this->error_code = 3;
3036                                 return ret_vec;
3037                         }
3038                         subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3039
3040                         ret_vec.push_back(subq_node);
3041                 }
3042
3043                 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3044                          fta_node->node_name, sel_names, ifaces, ifdb);
3045                 /*
3046                 Do not split sources until we are done with optimizations
3047                 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3048                 for(i=0;i<split_merge.size();++i){
3049                         ret_vec.push_back(split_merge[i]);
3050                 }
3051                 */
3052                 ret_vec.push_back(mrg_node);
3053                 ret_vec.push_back(stream_node);
3054                 hfta_returned = 1/*split_merge.size()*/ + 1;
3055
3056         }else{
3057                 fta_node->table_name->set_machine(ifaces[0].first);
3058                 fta_node->table_name->set_interface(ifaces[0].second);
3059                 fta_node->table_name->set_ifq(false);
3060                 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3061                         this->error_code = 3;
3062                         return ret_vec;
3063                 }
3064                 ret_vec.push_back(fta_node);
3065                 ret_vec.push_back(stream_node);
3066                 hfta_returned = 1;
3067         }
3068
3069 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3070 // printf("Stream node is:\n%s\n\n",stream_node->to_query_string().c_str() );
3071
3072
3073         return(ret_vec);
3074 }
3075
3076
3077 /*
3078         Splitting a aggregation+sampling operator.
3079     right now, return an error if any splitting is required.
3080 */
3081
3082 vector<qp_node *> sgahcwcb_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3083
3084         hfta_returned = 1;
3085
3086         vector<qp_node *> ret_vec;
3087         int s, p, g, a, o, i;
3088         int si;
3089
3090         vector<string> fta_flds, stream_flds;
3091
3092 //                      If the node reads from a stream, don't split.
3093 //      int t = Schema->get_table_ref(table_name->get_schema_name());
3094         int t = table_name->get_schema_ref();
3095         if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3096                 ret_vec.push_back(this);
3097                 return(ret_vec);
3098         }
3099
3100         fprintf(stderr,"ERROR : cannot split a sampling operator (not yet implemented).\n");
3101         exit(1);
3102
3103         return ret_vec;
3104
3105
3106 }
3107
3108
3109 /*
3110         Splitting a running aggregation operator.
3111     The code is almost identical to that of the the sgah operator
3112     except that
3113        - there is no lfta-only option.
3114            - the stream node is rsagh_qpn (lfta is sgah or spx)
3115            - need to handle the closing when (similar to having)
3116 */
3117
3118 vector<qp_node *> rsgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3119
3120         hfta_returned = 1;
3121
3122         vector<qp_node *> ret_vec;
3123         int s, p, g, a, o, i;
3124         int si;
3125
3126         vector<string> fta_flds, stream_flds;
3127
3128 //                      If the node reads from a stream, don't split.
3129 //      int t = Schema->get_table_ref(table_name->get_schema_name());
3130         int t = table_name->get_schema_ref();
3131         if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3132                 ret_vec.push_back(this);
3133                 return(ret_vec);
3134         }
3135
3136 //                      Get the set of interfaces it accesses.
3137         int ierr;
3138         vector<string> sel_names;
3139         vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3140         if (ifaces.empty()) {
3141                 fprintf(stderr,"INTERNAL ERROR in rsgah_qpn::split_node_for_fta - empty interface set\n");
3142                 exit(1);
3143         }
3144
3145
3146
3147
3148 //////////////////////////////////////////////////////////////
3149 ///                     Split into lfta, hfta.
3150
3151 //                      A rsgah node must always be split,
3152 //                      if for no other reason than to complete the
3153 //                      partial aggregation.
3154
3155 //                      First, determine if the query can be spit into aggr/aggr,
3156 //                      or if it must be selection/aggr.
3157 //                      Splitting into selection/aggr is allowed only
3158 //                      if select_lfta is set.
3159
3160
3161         bool select_allowed = definitions.count("select_lfta")>0;
3162         bool select_rqd = false;
3163
3164         set<int> unsafe_gbvars;         // for processing where clause
3165         for(g=0;g<gb_tbl.size();g++){
3166                 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3167                         if(!select_allowed){
3168                           sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3169                                 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3170                           );
3171                           this->error_code = 1;
3172                           this->err_str = tmpstr;
3173                           return(ret_vec);
3174                         }else{
3175                           select_rqd = true;
3176                           unsafe_gbvars.insert(g);
3177                         }
3178                 }
3179         }
3180
3181 //                      Verify that the SEs in the aggregate definitions are fta-safe
3182         for(a=0;a<aggr_tbl.size();++a){
3183                 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3184                 if(ase != NULL){        // COUNT(*) does not have a SE.
3185                   if(!select_allowed){
3186                     if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3187                           sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3188                                 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3189                           );
3190                           this->error_code = 1;
3191                           this->err_str = tmpstr;
3192                           return(ret_vec);
3193                     }
3194                   }else{
3195                         select_rqd = true;
3196                   }
3197                 }
3198         }
3199
3200 //                      Verify that all of the ref'd UDAFs can be split.
3201
3202         for(a=0;a<aggr_tbl.size();++a){
3203                 if(! aggr_tbl.is_builtin(a)){
3204                         int afcn = aggr_tbl.get_fcn_id(a);
3205                         int super_id = Ext_fcns->get_superaggr_id(afcn);
3206                         int sub_id = Ext_fcns->get_subaggr_id(afcn);
3207                         if(super_id < 0 || sub_id < 0){
3208                           if(!select_allowed){
3209                                 this->err_str += "ERROR in rsgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3210                                 this->error_code = 1;
3211                                 return(ret_vec);
3212                           }else{
3213                                 select_rqd = true;
3214                           }
3215                         }
3216                 }
3217     }
3218
3219         for(p=0;p<where.size();p++){
3220                 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3221                   if(!select_allowed){
3222                         sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3223                                 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3224                         );
3225                         this->error_code = 1;
3226                         this->err_str = tmpstr;
3227                         return(ret_vec);
3228                   }else{
3229                         select_rqd = true;
3230                   }
3231                 }
3232         }
3233
3234
3235         if(! select_rqd){
3236
3237 /////////////////////////////////////////////////////
3238 //                      Split into  aggr/aggr.
3239
3240
3241
3242
3243
3244         sgah_qpn *fta_node = new sgah_qpn();
3245                 fta_node->table_name = table_name;
3246                 fta_node->set_node_name( "_fta_"+node_name );
3247                 fta_node->table_name->set_range_var(table_name->get_var_name());
3248
3249
3250         rsgah_qpn *stream_node = new rsgah_qpn();
3251                 stream_node->table_name = new tablevar_t(  ("_fta_"+node_name).c_str());
3252                 stream_node->set_node_name( node_name );
3253                 stream_node->table_name->set_range_var(table_name->get_var_name());
3254
3255 //                      First, process the group-by variables.
3256 //                      The fta must supply the values of all the gbvars.
3257 //                      If a gb is computed, the computation must be
3258 //                      performed at the FTA, so the SE must be FTA-safe.
3259 //                      Nice side effect : the gbvar table contains
3260 //                      matching entries for the original query, the lfta query,
3261 //                      and the hfta query.  So gbrefs in the new queries are set
3262 //                      correctly just by inheriting the gbrefs from the old query.
3263 //                      If this property changed, I'll need translation tables.
3264
3265
3266         for(g=0;g<gb_tbl.size();g++){
3267 //                      Insert the gbvar into the lfta.
3268                 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
3269                 fta_node->gb_tbl.add_gb_var(
3270                         gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
3271                 );
3272
3273 //                      Insert a ref to the value of the gbvar into the lfta select list.
3274                 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
3275                 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
3276                 gbvar_fta->set_gb_ref(g);
3277                 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
3278                 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
3279
3280 //                      Insert the corresponding gbvar ref (gbvar_stream) into the stream.
3281                 gbvar_stream->set_gb_ref(-1);   // used as GBvar def
3282                 stream_node->gb_tbl.add_gb_var(
3283                         gbvar_stream->get_colref()->get_field(), -1, gbvar_stream,  gb_tbl.get_reftype(g)
3284                 );
3285
3286         }
3287
3288 //                      SEs in the aggregate definitions.
3289 //                      They are all safe, so split them up for later processing.
3290         map<int, scalarexp_t *> hfta_aggr_se;
3291         for(a=0;a<aggr_tbl.size();++a){
3292                 split_fta_aggr( &(aggr_tbl), a,
3293                                                 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl)  ,
3294                                                 fta_node->select_list,
3295                                                 hfta_aggr_se,
3296                                                 Ext_fcns
3297                                         );
3298         }
3299
3300
3301 //                      Next, the select list.
3302
3303         for(s=0;s<select_list.size();s++){
3304                 bool fta_forbidden = false;
3305                 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3306                 stream_node->select_list.push_back(
3307                         new select_element(root_se, select_list[s]->name));
3308         }
3309
3310
3311
3312 //                      All the predicates in the where clause must execute
3313 //                      in the fta.
3314
3315         for(p=0;p<where.size();p++){
3316                 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
3317                 cnf_elem *new_cnf = new cnf_elem(new_pr);
3318                 analyze_cnf(new_cnf);
3319
3320                 fta_node->where.push_back(new_cnf);
3321         }
3322
3323 //                      All of the predicates in the having clause must
3324 //                      execute in the stream node.
3325
3326         for(p=0;p<having.size();p++){
3327                 predicate_t *pr_root = rehome_fta_pr( having[p]->pr,  &hfta_aggr_se);
3328                 cnf_elem *cnf_root = new cnf_elem(pr_root);
3329                 analyze_cnf(cnf_root);
3330
3331                 stream_node->having.push_back(cnf_root);
3332         }
3333
3334 //                      All of the predicates in the closing when clause must
3335 //                      execute in the stream node.
3336
3337         for(p=0;p<closing_when.size();p++){
3338                 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3339                 cnf_elem *cnf_root = new cnf_elem(pr_root);
3340                 analyze_cnf(cnf_root);
3341
3342                 stream_node->closing_when.push_back(cnf_root);
3343         }
3344
3345
3346 //                      Divide the parameters among the stream, FTA.
3347 //                      Currently : assume that the stream receives all parameters
3348 //                      and parameter updates, incorporates them, then passes
3349 //                      all of the parameters to the FTA.
3350 //                      This will need to change (tables, fta-unsafe types. etc.)
3351
3352 //                      I will pass on the use_handle_access marking, even
3353 //                      though the fcn call that requires handle access might
3354 //                      exist in only one of the parts of the query.
3355 //                      Parameter manipulation and handle access determination will
3356 //                      need to be revisited anyway.
3357         vector<string> param_names = param_tbl->get_param_names();
3358         int pi;
3359         for(pi=0;pi<param_names.size();pi++){
3360                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3361                 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3362                                                                         param_tbl->handle_access(param_names[pi]));
3363                 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3364                                                                         param_tbl->handle_access(param_names[pi]));
3365         }
3366         fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3367         stream_node->definitions = definitions;
3368
3369 //              Now split by interfaces XXXX
3370         if(ifaces.size() > 1){
3371                 for(si=0;si<ifaces.size();++si){
3372                         sgah_qpn *subq_node = new sgah_qpn();
3373
3374 //                      Name the subquery
3375                         string new_name =  "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3376                         untaboo(new_name);
3377                         subq_node->set_node_name( new_name) ;
3378                         sel_names.push_back(subq_node->get_node_name());
3379
3380 //                      Assign the table
3381                         subq_node->table_name = fta_node->table_name->duplicate();
3382                         subq_node->table_name->set_machine(ifaces[si].first);
3383                         subq_node->table_name->set_interface(ifaces[si].second);
3384                         subq_node->table_name->set_ifq(false);
3385
3386 //                      the GB vars.
3387                         for(g=0;g<fta_node->gb_tbl.size();g++){
3388 //                      Insert the gbvar into the lfta.
3389                                 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
3390                                 subq_node->gb_tbl.add_gb_var(
3391                                         fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
3392                                 );
3393                         }
3394
3395 //                      Insert the aggregates
3396                         for(a=0;a<fta_node->aggr_tbl.size();++a){
3397                                 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
3398                         }
3399
3400                         for(s=0;s<fta_node->select_list.size();s++){
3401                                 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3402                         }
3403                         for(p=0;p<fta_node->where.size();p++){
3404                                 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3405                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
3406                                 analyze_cnf(new_cnf);
3407
3408                                 subq_node->where.push_back(new_cnf);
3409                         }
3410                         for(p=0;p<fta_node->having.size();p++){
3411                                 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
3412                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
3413                                 analyze_cnf(new_cnf);
3414
3415                                 subq_node->having.push_back(new_cnf);
3416                         }
3417 //                      Xfer all of the parameters.
3418 //                      Use existing handle annotations.
3419                         vector<string> param_names = param_tbl->get_param_names();
3420                         int pi;
3421                         for(pi=0;pi<param_names.size();pi++){
3422                                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3423                                 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3424                                                                         param_tbl->handle_access(param_names[pi]));
3425                         }
3426                         subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3427                         if(subq_node->resolve_if_params(ifdb, this->err_str)){
3428                                 this->error_code = 3;
3429                                 return ret_vec;
3430                         }
3431
3432                         ret_vec.push_back(subq_node);
3433                 }
3434
3435                 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
3436                          fta_node->node_name, sel_names, ifaces, ifdb);
3437
3438                 /*
3439                 Do not split sources until we are done with optimizations
3440                 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3441                 for(i=0;i<split_merge.size();++i){
3442                         ret_vec.push_back(split_merge[i]);
3443                 }
3444                 */
3445                 ret_vec.push_back(mrg_node);
3446                 ret_vec.push_back(stream_node);
3447                 hfta_returned = 1/*split_merge.size()*/+1;
3448
3449         }else{
3450                 fta_node->table_name->set_machine(ifaces[0].first);
3451                 fta_node->table_name->set_interface(ifaces[0].second);
3452                 fta_node->table_name->set_ifq(false);
3453                 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3454                         this->error_code = 3;
3455                         return ret_vec;
3456                 }
3457                 ret_vec.push_back(fta_node);
3458                 ret_vec.push_back(stream_node);
3459                 hfta_returned = 1;
3460         }
3461
3462
3463 //      ret_vec.push_back(fta_node);
3464 //      ret_vec.push_back(stream_node);
3465
3466
3467         return(ret_vec);
3468
3469         }
3470
3471 /////////////////////////////////////////////////////////////////////
3472 ///             Split into selection LFTA, aggregation HFTA.
3473
3474         spx_qpn *fta_node = new spx_qpn();
3475                 fta_node->table_name = table_name;
3476                 fta_node->set_node_name( "_fta_"+node_name );
3477                 fta_node->table_name->set_range_var(table_name->get_var_name());
3478
3479
3480         rsgah_qpn *stream_node = new rsgah_qpn();
3481                 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3482                 stream_node->set_node_name( node_name );
3483                 stream_node->table_name->set_range_var(table_name->get_var_name());
3484
3485
3486         vector< vector<select_element *> *> select_vec;
3487         select_vec.push_back(&(fta_node->select_list)); // only one child
3488
3489 //                      Process the gbvars.  Split their defining SEs.
3490         for(g=0;g<gb_tbl.size();g++){
3491                 bool fta_forbidden = false;
3492                 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3493
3494                 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
3495                                         fta_forbidden, se_src, select_vec, Ext_fcns
3496                 );
3497 //              if(fta_forbidden) (
3498                 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3499                         stream_node->gb_tbl.add_gb_var(
3500                           gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
3501                         );
3502                 }else{
3503                         scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
3504                         stream_node->gb_tbl.add_gb_var(
3505                           gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
3506                         );
3507                 }
3508         }
3509
3510 //              Process the aggregate table.
3511 //              Copy to stream, split the SEs.
3512         map<int, scalarexp_t *> hfta_aggr_se;   // for rehome
3513         for(a=0;a<aggr_tbl.size();++a){
3514                 scalarexp_t *hse;
3515                 if(aggr_tbl.is_builtin(a)){
3516                         if(aggr_tbl.is_star_aggr(a)){
3517                                 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
3518                                 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
3519                         }else{
3520                                 bool fta_forbidden = false;
3521                                 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3522
3523                                 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3524                                         fta_forbidden, se_src, select_vec, Ext_fcns
3525                                 );
3526 //                              if(fta_forbidden) (
3527                                 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3528                                         stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
3529                                         hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
3530                                 }else{
3531                                         scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3532                                         stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
3533                                         hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
3534                                 }
3535                         }
3536                         hse->set_data_type(aggr_tbl.get_data_type(a));
3537                         hse->set_aggr_id(a);
3538                         hfta_aggr_se[a]=hse;
3539                 }else{
3540                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
3541                         vector<scalarexp_t *> new_opl;
3542                         for(o=0;o<opl.size();++o){
3543                                 bool fta_forbidden = false;
3544                                 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3545                                 scalarexp_t *agg_se = split_ftavec_se( opl[o],
3546                                         fta_forbidden, se_src, select_vec, Ext_fcns
3547                                 );
3548 //                              scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3549 //                                      fta_forbidden, se_src, select_vec, Ext_fcns
3550 //                              );
3551 //                              if(fta_forbidden) (
3552                                 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3553                                         new_opl.push_back(agg_se);
3554                                 }else{
3555                                         scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3556                                         new_opl.push_back(new_se);
3557                                 }
3558                         }
3559                         stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
3560                         hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
3561                         hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
3562                         hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
3563                         hse->set_aggr_id(a);
3564                         hfta_aggr_se[a]=hse;
3565                 }
3566         }
3567
3568
3569 //              Process the WHERE clause.
3570 //              If it is fta-safe AND it refs only fta-safe gbvars,
3571 //              then expand the gbvars and put it into the lfta.
3572 //              Else, split it into an hfta predicate ref'ing
3573 //              se's computed partially in the lfta.
3574
3575         predicate_t *pr_root;
3576         bool fta_forbidden;
3577         for(p=0;p<where.size();p++){
3578                 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
3579                         pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3580                         fta_forbidden = true;
3581                 }else{
3582                         pr_root = dup_pr(where[p]->pr, NULL);
3583                         expand_gbvars_pr(pr_root, gb_tbl);
3584                         fta_forbidden = false;
3585                 }
3586                 cnf_elem *cnf_root = new cnf_elem(pr_root);
3587                 analyze_cnf(cnf_root);
3588
3589                 if(fta_forbidden){
3590                         stream_node->where.push_back(cnf_root);
3591                 }else{
3592                         fta_node->where.push_back(cnf_root);
3593                 }
3594         }
3595
3596
3597 //              Process the Select clause, rehome it on the
3598 //              new defs.
3599         for(s=0;s<select_list.size();s++){
3600                 bool fta_forbidden = false;
3601                 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3602                 stream_node->select_list.push_back(
3603                         new select_element(root_se, select_list[s]->name));
3604         }
3605
3606
3607 // Process the Having clause
3608
3609 //                      All of the predicates in the having clause must
3610 //                      execute in the stream node.
3611
3612         for(p=0;p<having.size();p++){
3613                 predicate_t *pr_root = rehome_fta_pr( having[p]->pr,  &hfta_aggr_se);
3614                 cnf_elem *cnf_root = new cnf_elem(pr_root);
3615                 analyze_cnf(cnf_root);
3616
3617                 stream_node->having.push_back(cnf_root);
3618         }
3619 //                      Same for closing when
3620         for(p=0;p<closing_when.size();p++){
3621                 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3622                 cnf_elem *cnf_root = new cnf_elem(pr_root);
3623                 analyze_cnf(cnf_root);
3624
3625                 stream_node->closing_when.push_back(cnf_root);
3626         }
3627
3628
3629 //              Handle parameters and a few last details.
3630         vector<string> param_names = param_tbl->get_param_names();
3631         int pi;
3632         for(pi=0;pi<param_names.size();pi++){
3633                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3634                 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3635                                                                         param_tbl->handle_access(param_names[pi]));
3636                 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3637                                                                         param_tbl->handle_access(param_names[pi]));
3638         }
3639
3640         fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3641         stream_node->definitions = definitions;
3642
3643 //              Now split by interfaces YYYY
3644         if(ifaces.size() > 1){
3645                 for(si=0;si<ifaces.size();++si){
3646                         spx_qpn *subq_node = new spx_qpn();
3647
3648 //                      Name the subquery
3649                         string new_name =  "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3650                         untaboo(new_name);
3651                         subq_node->set_node_name( new_name) ;
3652                         sel_names.push_back(subq_node->get_node_name());
3653
3654 //                      Assign the table
3655                         subq_node->table_name = fta_node->table_name->duplicate();
3656                         subq_node->table_name->set_machine(ifaces[si].first);
3657                         subq_node->table_name->set_interface(ifaces[si].second);
3658                         subq_node->table_name->set_ifq(false);
3659
3660                         for(s=0;s<fta_node->select_list.size();s++){
3661                                 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3662                         }
3663                         for(p=0;p<fta_node->where.size();p++){
3664                                 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3665                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
3666                                 analyze_cnf(new_cnf);
3667
3668                                 subq_node->where.push_back(new_cnf);
3669                         }
3670 //                      Xfer all of the parameters.
3671 //                      Use existing handle annotations.
3672                         vector<string> param_names = param_tbl->get_param_names();
3673                         int pi;
3674                         for(pi=0;pi<param_names.size();pi++){
3675                                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3676                                 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3677                                                                         param_tbl->handle_access(param_names[pi]));
3678                         }
3679                         subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3680                         if(subq_node->resolve_if_params(ifdb, this->err_str)){
3681                                 this->error_code = 3;
3682                                 return ret_vec;
3683                         }
3684
3685                         ret_vec.push_back(subq_node);
3686                 }
3687
3688                 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3689                          fta_node->node_name, sel_names, ifaces, ifdb);
3690                 /*
3691                 Do not split sources until we are done with optimizations
3692                 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3693                 for(i=0;i<split_merge.size();++i){
3694                         ret_vec.push_back(split_merge[i]);
3695                 }
3696                 */
3697                 ret_vec.push_back(mrg_node);
3698                 ret_vec.push_back(stream_node);
3699                 hfta_returned = 1/*split_merge.size()*/+1;
3700
3701         }else{
3702                 fta_node->table_name->set_machine(ifaces[0].first);
3703                 fta_node->table_name->set_interface(ifaces[0].second);
3704                 fta_node->table_name->set_ifq(false);
3705                 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3706                         this->error_code = 3;
3707                         return ret_vec;
3708                 }
3709                 ret_vec.push_back(fta_node);
3710                 ret_vec.push_back(stream_node);
3711                 hfta_returned = 1;
3712         }
3713
3714         return(ret_vec);
3715
3716 }
3717
3718
3719 /*
3720                 Splitting an aggregation operator
3721
3722                 An aggregation operator can reference
3723                         literals, parameters, colrefs, group-by vars, aggregates,
3724                         operators, functions
3725
3726                 an aggregation contains
3727                         A selection list of SEs
3728                         A where list of predicates
3729                         A list group-by variable definition
3730                         A list of aggregates to be computed
3731                         A HAVING list of predicates.
3732
3733                 Aggregation involves two phases:
3734                         1) given an input tuple, determine if it satisfies all of
3735                                 the WHERE predicates.  If so, compute the group.
3736                                 Look up the group, update its aggregates.
3737                         2) given a closed group and its aggregates, determine
3738                                 if these values satisfy all of the HAVING predicates.
3739                                 If so, evaluate the SEs on the selection list from the
3740                                 group and its aggregates.
3741                 The two-phase nature of aggregation places restrictions on
3742                 what can be referenced by different components of the operator
3743                 (in addition to functions and operators).
3744                 - group-by variables : literals, parameters, colrefs
3745                 - WHERE predicates : group-by vars, literals, params, colrefs
3746                 - HAVING predicates : group-by vars, literals, params, aggregates
3747                 - Selection list SEs : group-by vars, literals, params, aggregates
3748
3749                 Splitting an aggregation operator into an LFTA/HFTA part
3750                 involves performing partial aggregation at the LFTA and
3751                 completing the aggregation at the HFTA.
3752                 - given a tuple, the LFTA part evaluates the WHERE clause,
3753                   and if it is satisfied, computes the group.  lookup the group
3754                   and update the aggregates.  output the group and its partial
3755                   aggregates
3756                 - Given a partial aggregate from the LFTA, look up the group and
3757                   update its aggregates.  When the group is closed, evalute
3758                   the HAVING clause and the SEs on the selection list.
3759                 THEREFORE the selection list of the LFTA must consist of the
3760                 group-by variables and the set of (bare) subaggregate values
3761                 necessary to compute the super aggregates.
3762                 Unlike the case with the SPX operator, the SE splitting point
3763                 is at the GBvar and the aggregate value level.
3764
3765                 ALGORITHM:
3766                 For each group-by variable
3767                         Put the GB variable definition in the LFTA GBVAR list.
3768                         Put the GBVAR in the LFTA selection list (as an SE).
3769                         Put a reference to that GBVAR in the HFTA GBVAR list.
3770                 For each aggregate
3771                         Split the aggregate into a superaggregate and a subaggregate.
3772                                 The SE of the superaggregate references the subaggregate value.
3773                                 (this will need modifications for MF aggregation)
3774                 For each SE in the selection list, HAVING predicate
3775                         Make GBVAR references point to the new GBVAR
3776                         make the aggregate value references point to the new aggregates.
3777
3778                 SEs are not so much split as their ref's are changed.
3779
3780                 TODO: insert tablevar names into the colrefs.
3781 */
3782
3783
3784
3785 vector<qp_node *> sgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3786
3787         hfta_returned = 1;
3788
3789         vector<qp_node *> ret_vec;
3790         int s, p, g, a, o, i;
3791         int si;
3792
3793         vector<string> fta_flds, stream_flds;
3794
3795 //                      If the node reads from a stream, don't split.
3796 //      int t = Schema->get_table_ref(table_name->get_schema_name());
3797         int t = table_name->get_schema_ref();
3798         if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3799                 ret_vec.push_back(this);
3800                 return(ret_vec);
3801         }
3802
3803 //                      Get the set of interfaces it accesses.
3804         int ierr;
3805         vector<string> sel_names;
3806         vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3807         if (ifaces.empty()) {
3808                 fprintf(stderr,"INTERNAL ERROR in sgah_qpn::split_node_for_fta - empty interface set\n");
3809                 exit(1);
3810         }
3811
3812
3813
3814 //////////////////////////////////////////////
3815 //              Is this LFTA-only?
3816         if(definitions.count("lfta_aggregation")>0){
3817 //                      Yes.  Ensure that everything is lfta-safe.
3818
3819 //                      Check only one interface is accessed.
3820                 if(ifaces.size()>1){
3821                         this->err_str = "ERROR, group-by query "+node_name+" is lfta-only, but it accesses more than one interface:\n";
3822                         for(si=0;si<ifaces.size();++si)
3823                                 this->err_str += "\t"+ifaces[si].first+"."+ifaces[si].second+"\n";
3824                         this->error_code = 2;
3825                         return(ret_vec);
3826                 }
3827
3828 //                      Check the group-by attributes
3829                 for(g=0;g<gb_tbl.size();g++){
3830                         if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3831                                 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition and the query is lfta-only (%s).\n",
3832                                         gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3833                                 );
3834                                 this->error_code = 1;
3835                                 this->err_str = tmpstr;
3836                                 return(ret_vec);
3837                         }
3838                 }
3839
3840 //                      Verify that the SEs in the aggregate definitions are fta-safe
3841                 for(a=0;a<aggr_tbl.size();++a){
3842                         scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3843                         if(ase != NULL){        // COUNT(*) does not have a SE.
3844                                 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3845                                         sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe scalar expression and the query is lfta-only (%s).\n",
3846                                                 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3847                                         );
3848                                         this->error_code = 1;
3849                                         this->err_str = tmpstr;
3850                                         return(ret_vec);
3851                                 }
3852                         }
3853                         if(! aggr_tbl.fta_legal(a,Ext_fcns)){
3854                           if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3855                                 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe aggregate and the query is lfta-only (%s).\n",
3856                                         aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3857                                 );
3858                                 this->error_code = 1;
3859                                 this->err_str = tmpstr;
3860                                 return(ret_vec);
3861                                 }
3862                         }
3863                 }
3864
3865 //              Ensure that all the aggregates are fta-safe ....
3866
3867 //              select list
3868
3869                 for(s=0;s<select_list.size();s++){
3870                         if(! check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns)){
3871                                 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
3872                                         pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3873                                 );
3874                                 this->error_code = 1;
3875                                 this->err_str = tmpstr;
3876                                 return(ret_vec);
3877                         }
3878                 }
3879
3880 //              where predicate
3881
3882                 for(p=0;p<where.size();p++){
3883                         if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3884                                 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
3885                                         pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3886                                 );
3887                                 this->error_code = 1;
3888                                 this->err_str = tmpstr;
3889                                 return(ret_vec);
3890                         }
3891                 }
3892
3893
3894 //              having predicate
3895                 if(having.size()>0){
3896                         sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta :  the query is lfta-only, so it can't have a HAVING clause.(%s).\n",
3897                                 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3898                         );
3899                         this->error_code = 1;
3900                         this->err_str = tmpstr;
3901                         return(ret_vec);
3902                 }
3903 //                      The query is lfta safe, return it.
3904
3905                 hfta_returned = 0;
3906                 ret_vec.push_back(this);
3907                 return(ret_vec);
3908         }
3909
3910 //////////////////////////////////////////////////////////////
3911 ///                     Split into lfta, hfta.
3912
3913 //                      A sgah node must always be split,
3914 //                      if for no other reason than to complete the
3915 //                      partial aggregation.
3916
3917 //                      First, determine if the query can be spit into aggr/aggr,
3918 //                      or if it must be selection/aggr.
3919 //                      Splitting into selection/aggr is allowed only
3920 //                      if select_lfta is set.
3921
3922
3923         bool select_allowed = definitions.count("select_lfta")>0;
3924         bool select_rqd = false;
3925
3926         set<int> unsafe_gbvars;         // for processing where clause
3927         for(g=0;g<gb_tbl.size();g++){
3928                 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3929                         if(!select_allowed){
3930                           sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3931                                 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3932                           );
3933                           this->error_code = 1;
3934                           this->err_str = tmpstr;
3935                           return(ret_vec);
3936                         }else{
3937                           select_rqd = true;
3938                           unsafe_gbvars.insert(g);
3939                         }
3940                 }
3941         }
3942
3943 //                      Verify that the SEs in the aggregate definitions are fta-safe
3944         for(a=0;a<aggr_tbl.size();++a){
3945                 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3946                 if(ase != NULL){        // COUNT(*) does not have a SE.
3947                   if(!select_allowed){
3948                     if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3949                           sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3950                                 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3951                           );
3952                           this->error_code = 1;
3953                           this->err_str = tmpstr;
3954                           return(ret_vec);
3955                     }
3956                   }else{
3957                         select_rqd = true;
3958                   }
3959                 }
3960         }
3961
3962 //                      Verify that all of the ref'd UDAFs can be split.
3963
3964         for(a=0;a<aggr_tbl.size();++a){
3965                 if(! aggr_tbl.is_builtin(a)){
3966                         int afcn = aggr_tbl.get_fcn_id(a);
3967                         int super_id = Ext_fcns->get_superaggr_id(afcn);
3968                         int sub_id = Ext_fcns->get_subaggr_id(afcn);
3969                         if(super_id < 0 || sub_id < 0){
3970                           if(!select_allowed){
3971                                 this->err_str += "ERROR in sgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3972                                 this->error_code = 1;
3973                                 return(ret_vec);
3974                           }else{
3975                                 select_rqd = true;
3976                           }
3977                         }
3978                 }
3979     }
3980
3981         for(p=0;p<where.size();p++){
3982                 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3983                   if(!select_allowed){
3984                         sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3985                                 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3986                         );
3987                         this->error_code = 1;
3988                         this->err_str = tmpstr;
3989                         return(ret_vec);
3990                   }else{
3991                         select_rqd = true;
3992                   }
3993                 }
3994         }
3995
3996
3997         if(! select_rqd){
3998
3999 /////////////////////////////////////////////////////
4000 //                      Split into  aggr/aggr.
4001
4002
4003
4004
4005
4006         sgah_qpn *fta_node = new sgah_qpn();
4007                 fta_node->table_name = table_name;
4008                 fta_node->set_node_name( "_fta_"+node_name );
4009                 fta_node->table_name->set_range_var(table_name->get_var_name());
4010
4011
4012         sgah_qpn *stream_node = new sgah_qpn();
4013                 stream_node->table_name = new tablevar_t(  ("_fta_"+node_name).c_str());
4014                 stream_node->set_node_name( node_name );
4015                 stream_node->table_name->set_range_var(table_name->get_var_name());
4016
4017 //                      allowed stream disorder.  Default is 2,
4018 //                      can override with max_lfta_disorder setting.
4019 //                      Also limit the hfta disorder, set to lfta disorder + 1.
4020 //                      can override with max_hfta_disorder.
4021
4022         fta_node->lfta_disorder = 2;
4023         if(this->get_val_of_def("max_lfta_disorder") != ""){
4024                 int d = atoi(this->get_val_of_def("max_lfta_disorder").c_str() );
4025                 if(d<1){
4026                         fprintf(stderr,"Warning, max_lfta_disorder in node %s is %d, must be at least 1, ignoring.\n",node_name.c_str(), d);
4027                 }else{
4028                         fta_node->lfta_disorder = d;
4029 printf("node %s setting lfta_disorder = %d\n",node_name.c_str(),fta_node->lfta_disorder);
4030                 }
4031         }
4032         if(fta_node->lfta_disorder > 1)
4033                 stream_node->hfta_disorder = fta_node->lfta_disorder + 1;
4034         else
4035                 stream_node->hfta_disorder =  1;
4036
4037         if(this->get_val_of_def("max_hfta_disorder") != ""){
4038                 int d = atoi(this->get_val_of_def("max_hfta_disorder").c_str() );
4039                 if(d<fta_node->lfta_disorder){
4040                         fprintf(stderr,"Warning, max_hfta_disorder in node %s is %d, must be at least the max lfta disorder %d, ignoring.\n",node_name.c_str(), d,fta_node->lfta_disorder);
4041                 }else{
4042                         fta_node->lfta_disorder = d;
4043                 }
4044                 if(fta_node->lfta_disorder < fta_node->hfta_disorder){
4045                         fta_node->hfta_disorder = fta_node->lfta_disorder + 1;
4046                 }
4047         }
4048
4049 //                      First, process the group-by variables.
4050 //                      The fta must supply the values of all the gbvars.
4051 //                      If a gb is computed, the computation must be
4052 //                      performed at the FTA, so the SE must be FTA-safe.
4053 //                      Nice side effect : the gbvar table contains
4054 //                      matching entries for the original query, the lfta query,
4055 //                      and the hfta query.  So gbrefs in the new queries are set
4056 //                      correctly just by inheriting the gbrefs from the old query.
4057 //                      If this property changed, I'll need translation tables.
4058
4059
4060         for(g=0;g<gb_tbl.size();g++){
4061 //                      Insert the gbvar into the lfta.
4062                 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
4063                 fta_node->gb_tbl.add_gb_var(
4064                         gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
4065                 );
4066
4067 //                      Insert a ref to the value of the gbvar into the lfta select list.
4068                 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
4069                 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
4070                 gbvar_fta->set_gb_ref(g);
4071                 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
4072                 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
4073
4074 //                      Insert the corresponding gbvar ref (gbvar_stream) into the stream.
4075                 gbvar_stream->set_gb_ref(-1);   // used as GBvar def
4076                 stream_node->gb_tbl.add_gb_var(
4077                         gbvar_stream->get_colref()->get_field(), -1, gbvar_stream,  gb_tbl.get_reftype(g)
4078                 );
4079         }
4080 //                      multiple aggregation patterns, if any, go with the hfta
4081         stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4082
4083 //                      SEs in the aggregate definitions.
4084 //                      They are all safe, so split them up for later processing.
4085         map<int, scalarexp_t *> hfta_aggr_se;
4086         for(a=0;a<aggr_tbl.size();++a){
4087                 split_fta_aggr( &(aggr_tbl), a,
4088                                                 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl)  ,
4089                                                 fta_node->select_list,
4090                                                 hfta_aggr_se,
4091                                                 Ext_fcns
4092                                         );
4093 /*
4094 //              OLD TRACING CODE
4095
4096 int ii;
4097 for(ii=0;ii<fta_flds.size() || ii < fta_node->select_list.size();++ii){
4098         if(ii<fta_flds.size())
4099                 printf("\t%s : ",fta_flds[ii].c_str());
4100         else
4101                 printf("\t. : ");
4102         if(ii<fta_node->select_list.size())
4103                 printf("%s\n",fta_node->select_list[ii]->to_string().c_str());
4104         else
4105                 printf(".\n");
4106 }
4107 printf("hfta aggregates are:");
4108 for(ii=0;ii<stream_node->aggr_tbl.size();++ii){
4109         printf(" %s",stream_node->aggr_tbl.get_op(ii).c_str());
4110 }
4111 printf("\nlfta aggregates are:");
4112 for(ii=0;ii<fta_node->aggr_tbl.size();++ii){
4113         printf(" %s",fta_node->aggr_tbl.get_op(ii).c_str());
4114 }
4115 printf("\n\n");
4116 */
4117
4118         }
4119
4120
4121 //                      Next, the select list.
4122
4123         for(s=0;s<select_list.size();s++){
4124                 bool fta_forbidden = false;
4125                 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4126                 stream_node->select_list.push_back(
4127                         new select_element(root_se, select_list[s]->name));
4128         }
4129
4130
4131
4132 //                      All the predicates in the where clause must execute
4133 //                      in the fta.
4134
4135         for(p=0;p<where.size();p++){
4136                 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
4137                 cnf_elem *new_cnf = new cnf_elem(new_pr);
4138                 analyze_cnf(new_cnf);
4139
4140                 fta_node->where.push_back(new_cnf);
4141         }
4142
4143 //                      All of the predicates in the having clause must
4144 //                      execute in the stream node.
4145
4146         for(p=0;p<having.size();p++){
4147                 predicate_t *pr_root = rehome_fta_pr( having[p]->pr,  &hfta_aggr_se);
4148                 cnf_elem *cnf_root = new cnf_elem(pr_root);
4149                 analyze_cnf(cnf_root);
4150
4151                 stream_node->having.push_back(cnf_root);
4152         }
4153
4154
4155 //                      Divide the parameters among the stream, FTA.
4156 //                      Currently : assume that the stream receives all parameters
4157 //                      and parameter updates, incorporates them, then passes
4158 //                      all of the parameters to the FTA.
4159 //                      This will need to change (tables, fta-unsafe types. etc.)
4160
4161 //                      I will pass on the use_handle_access marking, even
4162 //                      though the fcn call that requires handle access might
4163 //                      exist in only one of the parts of the query.
4164 //                      Parameter manipulation and handle access determination will
4165 //                      need to be revisited anyway.
4166         vector<string> param_names = param_tbl->get_param_names();
4167         int pi;
4168         for(pi=0;pi<param_names.size();pi++){
4169                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4170                 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4171                                                                         param_tbl->handle_access(param_names[pi]));
4172                 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4173                                                                         param_tbl->handle_access(param_names[pi]));
4174         }
4175         fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4176         stream_node->definitions = definitions;
4177
4178 //              Now split by interfaces XXXX
4179         if(ifaces.size() > 1){
4180                 for(si=0;si<ifaces.size();++si){
4181                         sgah_qpn *subq_node = new sgah_qpn();
4182
4183 //                      Name the subquery
4184                         string new_name =  "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4185                         untaboo(new_name);
4186                         subq_node->set_node_name( new_name) ;
4187                         sel_names.push_back(subq_node->get_node_name());
4188
4189 //                      Assign the table
4190                         subq_node->table_name = fta_node->table_name->duplicate();
4191                         subq_node->table_name->set_machine(ifaces[si].first);
4192                         subq_node->table_name->set_interface(ifaces[si].second);
4193                         subq_node->table_name->set_ifq(false);
4194
4195 //                      the GB vars.
4196                         for(g=0;g<fta_node->gb_tbl.size();g++){
4197 //                      Insert the gbvar into the lfta.
4198                                 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
4199                                 subq_node->gb_tbl.add_gb_var(
4200                                         fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
4201                                 );
4202                         }
4203
4204 //                      Insert the aggregates
4205                         for(a=0;a<fta_node->aggr_tbl.size();++a){
4206                                 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
4207                         }
4208
4209                         for(s=0;s<fta_node->select_list.size();s++){
4210                                 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4211                         }
4212                         for(p=0;p<fta_node->where.size();p++){
4213                                 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4214                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
4215                                 analyze_cnf(new_cnf);
4216
4217                                 subq_node->where.push_back(new_cnf);
4218                         }
4219                         for(p=0;p<fta_node->having.size();p++){
4220                                 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
4221                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
4222                                 analyze_cnf(new_cnf);
4223
4224                                 subq_node->having.push_back(new_cnf);
4225                         }
4226 //                      Xfer all of the parameters.
4227 //                      Use existing handle annotations.
4228                         vector<string> param_names = param_tbl->get_param_names();
4229                         int pi;
4230                         for(pi=0;pi<param_names.size();pi++){
4231                                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4232                                 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4233                                                                         param_tbl->handle_access(param_names[pi]));
4234                         }
4235                         subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4236                         if(subq_node->resolve_if_params(ifdb, this->err_str)){
4237                                 this->error_code = 3;
4238                                 return ret_vec;
4239                         }
4240
4241 //                      THe disorder
4242                         subq_node->lfta_disorder = fta_node->lfta_disorder;
4243
4244                         ret_vec.push_back(subq_node);
4245                 }
4246
4247                 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
4248                          fta_node->node_name, sel_names, ifaces, ifdb);
4249                 mrg_node->set_disorder(fta_node->lfta_disorder);
4250
4251                 /*
4252                 Do not split sources until we are done with optimizations
4253                 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4254                 for(i=0;i<split_merge.size();++i){
4255                         ret_vec.push_back(split_merge[i]);
4256                 }
4257                 */
4258                 ret_vec.push_back(mrg_node);
4259                 ret_vec.push_back(stream_node);
4260                 hfta_returned = 1/*split_merge.size()*/+1;
4261
4262         }else{
4263                 fta_node->table_name->set_machine(ifaces[0].first);
4264                 fta_node->table_name->set_interface(ifaces[0].second);
4265                 fta_node->table_name->set_ifq(false);
4266                 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4267                         this->error_code = 3;
4268                         return ret_vec;
4269                 }
4270                 ret_vec.push_back(fta_node);
4271                 ret_vec.push_back(stream_node);
4272                 hfta_returned = 1;
4273         }
4274
4275
4276 //      ret_vec.push_back(fta_node);
4277 //      ret_vec.push_back(stream_node);
4278
4279
4280         return(ret_vec);
4281
4282         }
4283
4284 /////////////////////////////////////////////////////////////////////
4285 ///             Split into selection LFTA, aggregation HFTA.
4286
4287         spx_qpn *fta_node = new spx_qpn();
4288                 fta_node->table_name = table_name;
4289                 fta_node->set_node_name( "_fta_"+node_name );
4290                 fta_node->table_name->set_range_var(table_name->get_var_name());
4291
4292
4293         sgah_qpn *stream_node = new sgah_qpn();
4294                 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4295                 stream_node->set_node_name( node_name );
4296                 stream_node->table_name->set_range_var(table_name->get_var_name());
4297
4298
4299         vector< vector<select_element *> *> select_vec;
4300         select_vec.push_back(&(fta_node->select_list)); // only one child
4301
4302 //                      Process the gbvars.  Split their defining SEs.
4303         for(g=0;g<gb_tbl.size();g++){
4304                 bool fta_forbidden = false;
4305                 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4306
4307                 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
4308                                         fta_forbidden, se_src, select_vec, Ext_fcns
4309                 );
4310 //              if(fta_forbidden) (
4311                 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4312                         stream_node->gb_tbl.add_gb_var(
4313                           gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
4314                         );
4315                 }else{
4316                         scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
4317                         stream_node->gb_tbl.add_gb_var(
4318                           gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
4319                         );
4320                 }
4321         }
4322         stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4323
4324 //              Process the aggregate table.
4325 //              Copy to stream, split the SEs.
4326         map<int, scalarexp_t *> hfta_aggr_se;   // for rehome
4327         for(a=0;a<aggr_tbl.size();++a){
4328                 scalarexp_t *hse;
4329                 if(aggr_tbl.is_builtin(a)){
4330                         if(aggr_tbl.is_star_aggr(a)){
4331                                 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
4332                                 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
4333                         }else{
4334                                 bool fta_forbidden = false;
4335                                 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4336
4337                                 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4338                                         fta_forbidden, se_src, select_vec, Ext_fcns
4339                                 );
4340 //                              if(fta_forbidden) (
4341                                 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4342                                         stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
4343                                         hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
4344                                 }else{
4345                                         scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4346                                         stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
4347                                         hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
4348                                 }
4349                         }
4350                         hse->set_data_type(aggr_tbl.get_data_type(a));
4351                         hse->set_aggr_id(a);
4352                         hfta_aggr_se[a]=hse;
4353                 }else{
4354                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
4355                         vector<scalarexp_t *> new_opl;
4356                         for(o=0;o<opl.size();++o){
4357                                 bool fta_forbidden = false;
4358                                 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4359                                 scalarexp_t *agg_se = split_ftavec_se( opl[o],
4360                                         fta_forbidden, se_src, select_vec, Ext_fcns
4361                                 );
4362 //                              scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4363 //                                      fta_forbidden, se_src, select_vec, Ext_fcns
4364 //                              );
4365 //                              if(fta_forbidden) (
4366                                 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4367                                         new_opl.push_back(agg_se);
4368                                 }else{
4369                                         scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4370                                         new_opl.push_back(new_se);
4371                                 }
4372                         }
4373                         stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
4374                         hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
4375                         hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
4376                         hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
4377                         hse->set_aggr_id(a);
4378                         hfta_aggr_se[a]=hse;
4379                 }
4380         }
4381
4382
4383 //              Process the WHERE clause.
4384 //              If it is fta-safe AND it refs only fta-safe gbvars,
4385 //              then expand the gbvars and put it into the lfta.
4386 //              Else, split it into an hfta predicate ref'ing
4387 //              se's computed partially in the lfta.
4388
4389         predicate_t *pr_root;
4390         bool fta_forbidden;
4391         for(p=0;p<where.size();p++){
4392                 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
4393                         pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
4394                         fta_forbidden = true;
4395                 }else{
4396                         pr_root = dup_pr(where[p]->pr, NULL);
4397                         expand_gbvars_pr(pr_root, gb_tbl);
4398                         fta_forbidden = false;
4399                 }
4400                 cnf_elem *cnf_root = new cnf_elem(pr_root);
4401                 analyze_cnf(cnf_root);
4402
4403                 if(fta_forbidden){
4404                         stream_node->where.push_back(cnf_root);
4405                 }else{
4406                         fta_node->where.push_back(cnf_root);
4407                 }
4408         }
4409
4410
4411 //              Process the Select clause, rehome it on the
4412 //              new defs.
4413         for(s=0;s<select_list.size();s++){
4414                 bool fta_forbidden = false;
4415                 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4416                 stream_node->select_list.push_back(
4417                         new select_element(root_se, select_list[s]->name));
4418         }
4419
4420
4421 // Process the Having clause
4422
4423 //                      All of the predicates in the having clause must
4424 //                      execute in the stream node.
4425
4426         for(p=0;p<having.size();p++){
4427                 predicate_t *pr_root = rehome_fta_pr( having[p]->pr,  &hfta_aggr_se);
4428                 cnf_elem *cnf_root = new cnf_elem(pr_root);
4429                 analyze_cnf(cnf_root);
4430
4431                 stream_node->having.push_back(cnf_root);
4432         }
4433
4434 //              Handle parameters and a few last details.
4435         vector<string> param_names = param_tbl->get_param_names();
4436         int pi;
4437         for(pi=0;pi<param_names.size();pi++){
4438                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4439                 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4440                                                                         param_tbl->handle_access(param_names[pi]));
4441                 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4442                                                                         param_tbl->handle_access(param_names[pi]));
4443         }
4444
4445         fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4446         stream_node->definitions = definitions;
4447
4448 //              Now split by interfaces YYYY
4449         if(ifaces.size() > 1){
4450                 for(si=0;si<ifaces.size();++si){
4451                         spx_qpn *subq_node = new spx_qpn();
4452
4453 //                      Name the subquery
4454                         string new_name =  "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4455                         untaboo(new_name);
4456                         subq_node->set_node_name( new_name) ;
4457                         sel_names.push_back(subq_node->get_node_name());
4458
4459 //                      Assign the table
4460                         subq_node->table_name = fta_node->table_name->duplicate();
4461                         subq_node->table_name->set_machine(ifaces[si].first);
4462                         subq_node->table_name->set_interface(ifaces[si].second);
4463                         subq_node->table_name->set_ifq(false);
4464
4465                         for(s=0;s<fta_node->select_list.size();s++){
4466                                 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4467                         }
4468                         for(p=0;p<fta_node->where.size();p++){
4469                                 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4470                                 cnf_elem *new_cnf = new cnf_elem(new_pr);
4471                                 analyze_cnf(new_cnf);
4472
4473                                 subq_node->where.push_back(new_cnf);
4474                         }
4475 //                      Xfer all of the parameters.
4476 //                      Use existing handle annotations.
4477                         vector<string> param_names = param_tbl->get_param_names();
4478                         int pi;
4479                         for(pi=0;pi<param_names.size();pi++){
4480                                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4481                                 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4482                                                                         param_tbl->handle_access(param_names[pi]));
4483                         }
4484                         subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4485                         if(subq_node->resolve_if_params(ifdb, this->err_str)){
4486                                 this->error_code = 3;
4487                                 return ret_vec;
4488                         }
4489
4490                         ret_vec.push_back(subq_node);
4491                 }
4492
4493                 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4494                          fta_node->node_name, sel_names, ifaces, ifdb);
4495                 /*
4496                 Do not split sources until we are done with optimizations
4497                 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4498                 for(i=0;i<split_merge.size();++i){
4499                         ret_vec.push_back(split_merge[i]);
4500                 }
4501                 */
4502                 ret_vec.push_back(mrg_node);
4503                 ret_vec.push_back(stream_node);
4504                 hfta_returned = 1/*split_merge.size()*/+1;
4505
4506         }else{
4507                 fta_node->table_name->set_machine(ifaces[0].first);
4508                 fta_node->table_name->set_interface(ifaces[0].second);
4509                 fta_node->table_name->set_ifq(false);
4510                 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4511                         this->error_code = 3;
4512                         return ret_vec;
4513                 }
4514                 ret_vec.push_back(fta_node);
4515                 ret_vec.push_back(stream_node);
4516                 hfta_returned = 1;
4517         }
4518
4519
4520 //      ret_vec.push_back(fta_node);
4521 //      ret_vec.push_back(stream_node);
4522
4523
4524         return(ret_vec);
4525
4526 }
4527
4528
4529 /*
4530         SPLITTING A EQ-TEMPORAL, HASH JOIN OPERATOR
4531
4532         An JOIN_EQ_HASH_QPN node may reference:
4533                 literals, parameters, colrefs, functions, operators
4534         An JOIN_EQ_HASH_QPN node may not reference:
4535                 group-by variables, aggregates
4536
4537         An JOIN_EQ_HASH_QPN node contains
4538                 selection list of SEs
4539                 where list of CNF predicates, broken into:
4540                         prefilter[2]
4541                         temporal_eq
4542                         hash_eq
4543                         postfilter
4544
4545         Algorithm:
4546                 For each tablevar whose source is a PROTOCOL
4547                         Create a LFTA for that tablevar
4548                         Push as many prefilter[..] predicates to that tablevar as is
4549                                 possible.
4550                         Split the SEs in the select list, and the predicates not
4551                                 pushed to the LFTA.
4552
4553 */
4554
4555 vector<qp_node *> join_eq_hash_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4556
4557         vector<qp_node *> ret_vec;
4558         int f,p,s;
4559
4560 //                      If the node reads from streams only, don't split.
4561         bool stream_only = true;
4562         for(f=0;f<from.size();++f){
4563 //              int t = Schema->get_table_ref(from[f]->get_schema_name());
4564                 int t = from[f]->get_schema_ref();
4565                 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) stream_only = false;
4566         }
4567         if(stream_only){
4568                 hfta_returned = 1;
4569                 ret_vec.push_back(this);
4570                 return(ret_vec);
4571         }
4572
4573
4574 //                      The HFTA node, it is always returned.
4575
4576         join_eq_hash_qpn *stream_node = new join_eq_hash_qpn();
4577         for(f=0;f<from.size();++f){
4578 //              tablevar_t *tmp_tblvar = new tablevar_t( from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str());
4579                 tablevar_t *tmp_tblvar =  from[f]->duplicate();
4580 //              tmp_tblvar->set_range_var(from[f]->get_var_name());
4581
4582                 stream_node->from.push_back(tmp_tblvar);
4583         }
4584         stream_node->set_node_name(node_name);
4585
4586 //                      Create spx (selection) children for each PROTOCOL source.
4587         vector<spx_qpn *> child_vec;
4588         vector< vector<select_element *> *> select_vec;
4589         for(f=0;f<from.size();++f){
4590 //              int t = Schema->get_table_ref(from[f]->get_schema_name());
4591                 int t = from[f]->get_schema_ref();
4592                 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA){
4593                         spx_qpn *child_qpn = new spx_qpn();
4594                         sprintf(tmpstr,"_fta_%d_%s",f,node_name.c_str());
4595                         child_qpn->set_node_name(string(tmpstr));
4596                         child_qpn->table_name = new tablevar_t(
4597                            from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str(), from[f]->get_ifq());
4598                         child_qpn->table_name->set_range_var(from[f]->get_var_name());
4599
4600                         child_vec.push_back(child_qpn);
4601                         select_vec.push_back(&(child_qpn->select_list));
4602
4603 //                      Update the stream's FROM clause to read from this child
4604                         stream_node->from[f]->set_interface("");
4605                         stream_node->from[f]->set_schema(tmpstr);
4606                 }else{
4607                         child_vec.push_back(NULL);
4608                         select_vec.push_back(NULL);
4609                 }
4610         }
4611
4612 //              Push lfta-safe prefilter to the lfta
4613 //              TODO: I'm not copying the preds, I dont *think* it will be a problem.
4614         predicate_t *pr_root;
4615
4616         for(f=0;f<from.size();++f){
4617           vector<cnf_elem *> pred_vec = prefilter[f];
4618           if(child_vec[f] != NULL){
4619                 for(p=0;p<pred_vec.size();++p){
4620                         if(check_fta_forbidden_pr(pred_vec[p]->pr,NULL, Ext_fcns)){
4621                                 child_vec[f]->where.push_back(pred_vec[p]);
4622                         }else{
4623                                 pr_root = split_ftavec_pr(pred_vec[p]->pr,select_vec,Ext_fcns);
4624                                 cnf_elem *cnf_root = new cnf_elem(pr_root);
4625                                 analyze_cnf(cnf_root);
4626                                 stream_node->prefilter[f].push_back(cnf_root);
4627                         }
4628                 }
4629           }else{
4630                 for(p=0;p<pred_vec.size();++p){
4631                         stream_node->prefilter[f].push_back(pred_vec[p]);
4632                 }
4633           }
4634
4635         }
4636
4637 //              Process the other predicates
4638         for(p=0;p<temporal_eq.size();++p){
4639                 pr_root = split_ftavec_pr(temporal_eq[p]->pr,select_vec,Ext_fcns);
4640                 cnf_elem *cnf_root = new cnf_elem(pr_root);
4641                 analyze_cnf(cnf_root);
4642                 stream_node->temporal_eq.push_back(cnf_root);
4643         }
4644         for(p=0;p<hash_eq.size();++p){
4645                 pr_root = split_ftavec_pr(hash_eq[p]->pr,select_vec,Ext_fcns);
4646                 cnf_elem *cnf_root = new cnf_elem(pr_root);
4647                 analyze_cnf(cnf_root);
4648                 stream_node->hash_eq.push_back(cnf_root);
4649         }
4650         for(p=0;p<postfilter.size();++p){
4651                 pr_root = split_ftavec_pr(postfilter[p]->pr,select_vec,Ext_fcns);
4652                 cnf_elem *cnf_root = new cnf_elem(pr_root);
4653                 analyze_cnf(cnf_root);
4654                 stream_node->postfilter.push_back(cnf_root);
4655         }
4656
4657 //              Process the SEs
4658         for(s=0;s<select_list.size();s++){
4659                 bool fta_forbidden = false;
4660                 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4661                 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
4662                                         fta_forbidden, se_src, select_vec, Ext_fcns
4663                 );
4664                 if(fta_forbidden || !is_PROTOCOL_source(se_src, select_vec)){
4665                         stream_node->select_list.push_back(
4666                                 new select_element(root_se, select_list[s]->name) );
4667                 }else{
4668                         scalarexp_t *new_se=make_fta_se_ref(select_vec,root_se,se_src);
4669                         stream_node->select_list.push_back(
4670                                 new select_element(new_se, select_list[s]->name)
4671                         );
4672                 }
4673         }
4674
4675
4676 //              I need to "rehome" the colrefs -- make the annotations in the colrefs
4677 //              agree with their tablevars.
4678         for(f=0;f<child_vec.size();++f){
4679           if(child_vec[f]!=NULL){
4680                 vector<tablevar_t *> fm; fm.push_back(child_vec[f]->table_name);
4681
4682                 for(s=0;s<child_vec[f]->select_list.size();++s)
4683                         bind_colref_se(child_vec[f]->select_list[s]->se, fm,0,0);
4684                 for(p=0;p<child_vec[f]->where.size();++p)
4685 //                      bind_colref_pr(child_vec[f]->where[p]->pr, fm,f,0);
4686                         bind_colref_pr(child_vec[f]->where[p]->pr, fm,0,0);
4687           }
4688         }
4689
4690 //              rehome the colrefs in the hfta node.
4691         for(f=0;f<stream_node->from.size();++f){
4692           stream_node->where.clear();
4693           for(s=0;s<stream_node->from.size();++s){
4694                 for(p=0;p<stream_node->prefilter[s].size();++p){
4695                   bind_colref_pr((stream_node->prefilter[s])[p]->pr,stream_node->from,f,f);
4696                 }
4697           }
4698           for(p=0;p<stream_node->temporal_eq.size();++p){
4699                 bind_colref_pr(stream_node->temporal_eq[p]->pr,stream_node->from,f,f);
4700           }
4701           for(p=0;p<stream_node->hash_eq.size();++p){
4702                 bind_colref_pr(stream_node->hash_eq[p]->pr,stream_node->from,f,f);
4703           }
4704           for(p=0;p<stream_node->postfilter.size();++p){
4705                 bind_colref_pr(stream_node->postfilter[p]->pr,stream_node->from,f,f);
4706           }
4707           for(s=0;s<stream_node->select_list.size();++s){
4708                 bind_colref_se(stream_node->select_list[s]->se,stream_node->from,f,f);
4709           }
4710         }
4711
4712 //                      Rebuild the WHERE clause
4713         stream_node->where.clear();
4714         for(s=0;s<stream_node->from.size();++s){
4715                 for(p=0;p<stream_node->prefilter[s].size();++p){
4716                   stream_node->where.push_back((stream_node->prefilter[s])[p]);
4717                 }
4718         }
4719         for(p=0;p<stream_node->temporal_eq.size();++p){
4720                 stream_node->where.push_back(stream_node->temporal_eq[p]);
4721         }
4722         for(p=0;p<stream_node->hash_eq.size();++p){
4723                 stream_node->where.push_back(stream_node->hash_eq[p]);
4724         }
4725         for(p=0;p<stream_node->postfilter.size();++p){
4726                 stream_node->where.push_back(stream_node->postfilter[p]);
4727         }
4728
4729
4730 //              Build the return list
4731         vector<qp_node *> hfta_nodes;
4732         hfta_returned = 1;
4733         for(f=0;f<from.size();++f){
4734                 if(child_vec[f] != NULL){
4735                         spx_qpn *c_node = child_vec[f];
4736                         vector<pair<string, string> > ifaces = get_ifaces(c_node->table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
4737                         if (ifaces.empty()) {
4738                                 fprintf(stderr,"INTERNAL ERROR in join_eq_hash_qpn::split_node_for_fta - empty interface set\n");
4739                                 exit(1);
4740                         }
4741
4742                         if(ifaces.size() == 1){
4743                                 c_node->table_name->set_machine(ifaces[0].first);
4744                                 c_node->table_name->set_interface(ifaces[0].second);
4745                                 c_node->table_name->set_ifq(false);
4746                                 if(c_node->resolve_if_params(ifdb, this->err_str)){
4747                                         this->error_code = 3;
4748                                         return ret_vec;
4749                                 }
4750                                 ret_vec.push_back(c_node);
4751                         }else{
4752                                 vector<string> sel_names;
4753                                 int si;
4754                                 for(si=0;si<ifaces.size();++si){
4755                                         spx_qpn *subq_node = new spx_qpn();
4756
4757 //                      Name the subquery
4758                                         string new_name =  "_"+c_node->node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4759                                         untaboo(new_name);
4760                                         subq_node->set_node_name( new_name) ;
4761                                         sel_names.push_back(subq_node->get_node_name());
4762
4763 //                      Assign the table
4764                                         subq_node->table_name = c_node->table_name->duplicate();
4765                                         subq_node->table_name->set_machine(ifaces[si].first);
4766                                         subq_node->table_name->set_interface(ifaces[si].second);
4767                                         subq_node->table_name->set_ifq(false);
4768
4769                                         for(s=0;s<c_node->select_list.size();s++){
4770                                           subq_node->select_list.push_back(dup_select(c_node->select_list[s], NULL));
4771                                         }
4772                                         for(p=0;p<c_node->where.size();p++){
4773                                           predicate_t *new_pr = dup_pr(c_node->where[p]->pr, NULL);
4774                                           cnf_elem *new_cnf = new cnf_elem(new_pr);
4775                                           analyze_cnf(new_cnf);
4776
4777 printf("table name is %s\n",subq_node->table_name->to_string().c_str());
4778                                           subq_node->where.push_back(new_cnf);
4779                                         }
4780 //                      Xfer all of the parameters.
4781 //                      Use existing handle annotations.
4782 //                                      vector<string> param_names = param_tbl->get_param_names();
4783 //                                      int pi;
4784 //                                      for(pi=0;pi<param_names.size();pi++){
4785 //                                              data_type *dt = param_tbl->get_data_type(param_names[pi]);
4786 //                                              subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4787 //                                                                      param_tbl->handle_access(param_names[pi]));
4788 //                                      }
4789 //                                      subq_node->definitions = definitions;
4790
4791                                 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4792                                         this->error_code = 3;
4793                                         return ret_vec;
4794                                 }
4795
4796                                         ret_vec.push_back(subq_node);
4797                                 }
4798                                 int lpos = ret_vec.size()-1     ;
4799                                 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[lpos]),c_node->node_name,sel_names, ifaces, ifdb);
4800                                 /*
4801                                 Do not split sources until we are done with optimizations
4802                                 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4803                                 int i;
4804                                 for(i=0;i<split_merge.size();++i){
4805                                         hfta_nodes.push_back(split_merge[i]);
4806                                 }
4807                                 */
4808                                 hfta_nodes.push_back(mrg_node);
4809                         }
4810                 }
4811         }
4812         int i;
4813         for(i=0;i<hfta_nodes.size();++i) ret_vec.push_back(hfta_nodes[i]);
4814         ret_vec.push_back(stream_node);
4815         hfta_returned = hfta_nodes.size()+1;
4816
4817 //                      Currently : assume that the stream receives all parameters
4818 //                      and parameter updates, incorporates them, then passes
4819 //                      all of the parameters to the FTA.
4820 //                      This will need to change (tables, fta-unsafe types. etc.)
4821
4822 //                      I will pass on the use_handle_access marking, even
4823 //                      though the fcn call that requires handle access might
4824 //                      exist in only one of the parts of the query.
4825 //                      Parameter manipulation and handle access determination will
4826 //                      need to be revisited anyway.
4827         vector<string> param_names = param_tbl->get_param_names();
4828         int pi;
4829         for(pi=0;pi<param_names.size();pi++){
4830                 int ri;
4831                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4832                 for(ri=0;ri<ret_vec.size();++ri){
4833                         ret_vec[ri]->param_tbl->add_param(param_names[pi],dt->duplicate(),
4834                                                                         param_tbl->handle_access(param_names[pi]));
4835                         ret_vec[ri]->definitions = definitions; ret_vec[ri]->definitions.erase("_referenced_ifaces");
4836                 }
4837         }
4838
4839
4840
4841         return(ret_vec);
4842
4843 }
4844
4845
4846 /////////////////////////////////////////////////////////////
4847 ////                    extract_opview
4848
4849 //              Common processing
4850 int process_opview(tablevar_t *fmtbl, int pos, string node_name,
4851                                  table_list *Schema,
4852                                 vector<query_node *> &qnodes,
4853                                 opview_set &opviews,
4854                                 vector<table_exp_t *> &ret, string rootnm, string silo_nm){
4855
4856         int s,f,q,m;
4857
4858         int schref = fmtbl->get_schema_ref();
4859         if(schref <= 0)
4860                 return 0;
4861
4862         if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){
4863                 opview_entry *opv = new opview_entry();
4864                 opv->parent_qname = node_name;
4865                 opv->root_name = rootnm;
4866                 opv->view_name = fmtbl->get_schema_name();
4867                 opv->pos = pos;
4868                 sprintf(tmpstr,"%s_UDOP%d_%s",node_name.c_str(),pos,opv->view_name.c_str());
4869                 opv->udop_alias = tmpstr;
4870                 fmtbl->set_udop_alias(opv->udop_alias);
4871
4872                 opv->exec_fl = Schema->get_op_prop(schref, string("file"));
4873                 opv->liveness_timeout = atoi(Schema->get_op_prop(schref, string("liveness_timeout")).c_str());
4874
4875                 vector<subquery_spec *> subq = Schema->get_subqueryspecs(schref);
4876                 for(s=0;s<subq.size();++s){
4877 //                              Validate that the fields match.
4878                         subquery_spec *sqs = subq[s];
4879                         vector<field_entry *> flds = Schema->get_fields(sqs->name+silo_nm);
4880                         if(flds.size() == 0){
4881                                 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in Schema.\n",sqs->name.c_str(), opv->view_name.c_str());
4882                                 return(1);
4883                         }
4884                         if(flds.size() < sqs->types.size()){
4885                                 fprintf(stderr,"ERROR: subquery %s of view %s does not have enough fields (%lu found, %lu expected).\n",sqs->name.c_str(), opv->view_name.c_str(),flds.size(), sqs->types.size());
4886                                 return(1);
4887                         }
4888                         bool failed = false;
4889                         for(f=0;f<sqs->types.size();++f){
4890                                 data_type dte(sqs->types[f],sqs->modifiers[f]);
4891                                 data_type dtf(flds[f]->get_type(),flds[f]->get_modifier_list());
4892                                 if(! dte.subsumes_type(&dtf) ){
4893                                         fprintf(stderr,"ERROR: subquery %s of view %s does not have the correct type for field %d (%s found, %s expected).\n",sqs->name.c_str(), opv->view_name.c_str(),f,dtf.to_string().c_str(), dte.to_string().c_str());
4894                                         failed = true;
4895                                 }
4896 /*
4897                                 if(dte.is_temporal() && (dte.get_temporal() != dtf.get_temporal()) ){
4898                                         string pstr = dte.get_temporal_string();
4899                                         fprintf(stderr,"ERROR: subquery %s of view %s does not have the expected temporal value %s of field %d.\n",sqs->name.c_str(), opv->view_name.c_str(),pstr.c_str(),f);
4900                                         failed = true;
4901                                 }
4902 */
4903                         }
4904                         if(failed)
4905                                 return(1);
4906 ///                             Validation done, find the subquery, make a copy of the
4907 ///                             parse tree, and add it to the return list.
4908                         for(q=0;q<qnodes.size();++q)
4909                                 if(qnodes[q]->name == sqs->name)
4910                                         break;
4911                         if(q==qnodes.size()){
4912                                 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in list of query names.\n",sqs->name.c_str(), opv->view_name.c_str());
4913                                 return(1);
4914                         }
4915
4916                         table_exp_t *newq = dup_table_exp(qnodes[q]->parse_tree);
4917                         sprintf(tmpstr,"%s_OP%d_%s_SUBQ%d",node_name.c_str(),pos,opv->view_name.c_str(),s);
4918                         string newq_name = tmpstr;
4919                         newq->nmap["query_name"] = newq_name;
4920                         ret.push_back(newq);
4921                         opv->subq_names.push_back(newq_name);
4922                 }
4923                 fmtbl->set_opview_idx(opviews.append(opv));
4924         }
4925
4926         return 0;
4927 }
4928
4929 vector<table_exp_t *> spx_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4930         vector<table_exp_t *> ret;
4931
4932         int retval = process_opview(table_name,0,node_name,
4933                                                                 Schema,qnodes,opviews,ret, rootnm, silo_name);
4934         if(retval) exit(1);
4935     return(ret);
4936 }
4937
4938
4939 vector<table_exp_t *> sgah_qpn::extract_opview(table_list *Schema,  vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4940         vector<table_exp_t *> ret;
4941
4942         int retval = process_opview(table_name,0,node_name,
4943                                                                 Schema,qnodes,opviews,ret, rootnm, silo_name);
4944         if(retval) exit(1);
4945     return(ret);
4946 }
4947
4948 vector<table_exp_t *> rsgah_qpn::extract_opview(table_list *Schema,  vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4949         vector<table_exp_t *> ret;
4950
4951         int retval = process_opview(table_name,0,node_name,
4952                                                                 Schema,qnodes,opviews,ret, rootnm, silo_name);
4953         if(retval) exit(1);
4954     return(ret);
4955 }
4956
4957
4958 vector<table_exp_t *> sgahcwcb_qpn::extract_opview(table_list *Schema,  vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4959         vector<table_exp_t *> ret;
4960
4961         int retval = process_opview(table_name,0,node_name,
4962                                                                 Schema,qnodes,opviews,ret, rootnm, silo_name);
4963         if(retval) exit(1);
4964     return(ret);
4965 }
4966
4967
4968
4969 vector<table_exp_t *> mrg_qpn::extract_opview(table_list *Schema,  vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4970         vector<table_exp_t *> ret;
4971         int f;
4972         for(f=0;f<fm.size();++f){
4973                 int retval = process_opview(fm[f],f,node_name,
4974                                                                 Schema,qnodes,opviews,ret, rootnm, silo_name);
4975                 if(retval) exit(1);
4976         }
4977     return(ret);
4978 }
4979
4980
4981
4982
4983 vector<table_exp_t *> join_eq_hash_qpn::extract_opview(table_list *Schema,  vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4984         vector<table_exp_t *> ret;
4985         int f;
4986         for(f=0;f<from.size();++f){
4987                 int retval = process_opview(from[f],f,node_name,
4988                                                                 Schema,qnodes,opviews,ret, rootnm, silo_name);
4989                 if(retval) exit(1);
4990         }
4991     return(ret);
4992 }
4993
4994 vector<table_exp_t *> filter_join_qpn::extract_opview(table_list *Schema,  vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4995         vector<table_exp_t *> ret;
4996         int f;
4997         for(f=0;f<from.size();++f){
4998                 int retval = process_opview(from[f],f,node_name,
4999                                                                 Schema,qnodes,opviews,ret, rootnm, silo_name);
5000                 if(retval) exit(1);
5001         }
5002     return(ret);
5003 }
5004
5005
5006
5007 //////////////////////////////////////////////////////////////////
5008 //////////////////////////////////////////////////////////////////
5009 ///////                 Additional methods
5010
5011
5012
5013 //////////////////////////////////////////////////////////////////
5014 //              Get schema of operator output
5015
5016 table_def *mrg_qpn::get_fields(){
5017         return(table_layout);
5018 }
5019
5020
5021 table_def *spx_qpn::get_fields(){
5022         return(create_attributes(node_name, select_list));
5023 }
5024
5025 table_def *sgah_qpn::get_fields(){
5026         return(create_attributes(node_name, select_list));
5027 }
5028
5029 table_def *rsgah_qpn::get_fields(){
5030         return(create_attributes(node_name, select_list));
5031 }
5032
5033 table_def *sgahcwcb_qpn::get_fields(){
5034         return(create_attributes(node_name, select_list));
5035 }
5036
5037 table_def *filter_join_qpn::get_fields(){
5038         return(create_attributes(node_name, select_list));
5039 }
5040
5041
5042 table_def *join_eq_hash_qpn::get_fields(){
5043         int i, h, s, t;
5044
5045 //                      First, gather temporal colrefs and SEs.
5046         map<col_id, temporal_type> temporal_cids;
5047         vector<scalarexp_t *> temporal_se;
5048         for(h=0;h<temporal_eq.size();++h){
5049                 scalarexp_t *sel = temporal_eq[h]->pr->get_left_se();
5050                 scalarexp_t *ser = temporal_eq[h]->pr->get_right_se();
5051
5052                 if(sel->get_operator_type() == SE_COLREF){
5053                         col_id tcol(sel->get_colref());
5054                         if(temporal_cids.count(tcol) == 0){
5055                                 temporal_cids[tcol] = sel->get_data_type()->get_temporal();
5056                         }
5057                 }else{
5058                         temporal_se.push_back(sel);
5059                 }
5060
5061                 if(ser->get_operator_type() == SE_COLREF){
5062                         col_id tcol(ser->get_colref());
5063                         if(temporal_cids.count(tcol) == 0){
5064                                 temporal_cids[tcol] = ser->get_data_type()->get_temporal();
5065                         }
5066                 }else{
5067                         temporal_se.push_back(ser);
5068                 }
5069         }
5070
5071 //              Mark select elements as nontemporal, then deduce which
5072 //              ones are temporal.
5073         for(s=0;s<select_list.size();++s){
5074                 select_list[s]->se->get_data_type()->set_temporal(
5075                         compute_se_temporal(select_list[s]->se, temporal_cids)
5076                 );
5077 //                              Second chance if it is an exact match to an SE.
5078 //      for(s=0;s<select_list.size();++s){
5079                 if(! select_list[s]->se->get_data_type()->is_temporal() ){
5080                         for(t=0;t<temporal_se.size();++t){
5081                                 if(is_equivalent_se(temporal_se[t], select_list[s]->se)){
5082                                         select_list[s]->se->get_data_type()->set_temporal(
5083                                                 temporal_se[t]->get_data_type()->get_temporal()
5084                                         );
5085                                 }
5086                         }
5087                 }
5088 //      }
5089         }
5090
5091 //                      If there is an outer join, verify that
5092 //                      the temporal attributes are actually temporal.
5093 //                      NOTE: this code must be synchronized with the
5094 //                      equivalence finding in join_eq_hash_qpn::generate_functor
5095 //                      (and also, the join_eq_hash_qpn constructor)
5096   if(from[0]->get_property() || from[1]->get_property()){
5097         set<string> l_equiv, r_equiv;
5098         for(i=0;i<temporal_eq.size();i++){
5099                 scalarexp_t *lse =      temporal_eq[i]->pr->get_left_se();
5100                 scalarexp_t *rse =      temporal_eq[i]->pr->get_right_se();
5101                 if(lse->get_operator_type()==SE_COLREF){
5102                         l_equiv.insert(lse->get_colref()->get_field());
5103                 }
5104                 if(rse->get_operator_type()==SE_COLREF){
5105                         r_equiv.insert(rse->get_colref()->get_field());
5106                 }
5107         }
5108
5109         for(s=0;s<select_list.size();++s){
5110                 if(select_list[s]->se->get_data_type()->is_temporal()){
5111                         col_id_set cid_set;
5112                         col_id_set::iterator ci;
5113                         bool failed = false;
5114                         gather_se_col_ids(select_list[s]->se,cid_set, NULL);
5115                         for(ci=cid_set.begin();ci!=cid_set.end();++ci){
5116                                 if((*ci).tblvar_ref == 0){
5117                                          if(from[0]->get_property()){
5118                                                 if(l_equiv.count((*ci).field) == 0){
5119                                                         failed = true;
5120                                                 }
5121                                         }
5122                                 }else{
5123                                          if(from[1]->get_property()){
5124                                                 if(r_equiv.count((*ci).field) == 0){
5125                                                         failed = true;
5126                                                 }
5127                                         }
5128                                 }
5129                         }
5130                         if(failed){
5131                                 select_list[s]->se->get_data_type()->reset_temporal();
5132                         }
5133                 }
5134         }
5135   }
5136
5137
5138         return create_attributes(node_name, select_list);
5139 }
5140
5141
5142 //-----------------------------------------------------------------
5143 //                      get output "keys"
5144 //                      This is a guess about the set of fields which are a key
5145 //                      Use as metadata output, e.g. in qtree.xml
5146
5147
5148
5149 //              refs to GB attribtues are keys, if a SE is not a GB colref
5150 //              but refers to a GB colref (outside of an aggregation)
5151 //              then set partial_keys to true
5152 vector<string> sgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5153         vector<string> keys;
5154
5155         set<int> gref_set;
5156         for(int i=0; i<gb_tbl.size();++i)
5157                 gref_set.insert(i);
5158
5159         for(int s=0;s<select_list.size();++s){
5160                 if(select_list[s]->se->is_gb()){
5161                         keys.push_back(select_list[s]->name);
5162                 }else{
5163                         if(contains_gb_se(select_list[s]->se, gref_set)){
5164                                 partial_keys.push_back(select_list[s]->name);
5165                         }
5166                 }
5167         }
5168         return keys;
5169 }
5170
5171 vector<string> rsgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5172         vector<string> keys;
5173
5174         set<int> gref_set;
5175         for(int i=0; i<gb_tbl.size();++i)
5176                 gref_set.insert(i);
5177
5178         for(int s=0;s<select_list.size();++s){
5179                 if(select_list[s]->se->is_gb()){
5180                         keys.push_back(select_list[s]->name);
5181                 }else{
5182                         if(contains_gb_se(select_list[s]->se, gref_set)){
5183                                 partial_keys.push_back(select_list[s]->name);
5184                         }
5185                 }
5186         }
5187         return keys;
5188 }
5189
5190
5191
5192
5193
5194 //-----------------------------------------------------------------
5195 //                      get output tables
5196
5197
5198 //                      Get tablevar_t names of input and output tables
5199
5200 //      output_file_qpn::output_file_qpn(){source_op_name = ""; }
5201         vector<tablevar_t *> output_file_qpn::get_input_tbls(){
5202                 return(fm);
5203         }
5204
5205         vector<tablevar_t *> mrg_qpn::get_input_tbls(){
5206                 return(fm);
5207         }
5208
5209         vector<tablevar_t *> spx_qpn::get_input_tbls(){
5210                 vector<tablevar_t *> retval(1,table_name);
5211                 return(retval);
5212         }
5213
5214         vector<tablevar_t *> sgah_qpn::get_input_tbls(){
5215                 vector<tablevar_t *> retval(1,table_name);
5216                 return(retval);
5217         }
5218
5219         vector<tablevar_t *> rsgah_qpn::get_input_tbls(){
5220                 vector<tablevar_t *> retval(1,table_name);
5221                 return(retval);
5222         }
5223
5224         vector<tablevar_t *> sgahcwcb_qpn::get_input_tbls(){
5225                 vector<tablevar_t *> retval(1,table_name);
5226                 return(retval);
5227         }
5228
5229         vector<tablevar_t *> join_eq_hash_qpn::get_input_tbls(){
5230                 return(from);
5231         }
5232
5233         vector<tablevar_t *> filter_join_qpn::get_input_tbls(){
5234                 return(from);
5235         }
5236
5237 //-----------------------------------------------------------------
5238 //                      get output tables
5239
5240
5241 //              This does not make sense, this fcn returns the output table *name*,
5242 //              not its schema, and then there is another fcn to rturn the schema.
5243         vector<tablevar_t *> output_file_qpn::get_output_tbls(){
5244                 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5245                 return(retval);
5246         }
5247
5248         vector<tablevar_t *> mrg_qpn::get_output_tbls(){
5249                 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5250                 return(retval);
5251         }
5252
5253         vector<tablevar_t *> spx_qpn::get_output_tbls(){
5254                 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5255                 return(retval);
5256         }
5257
5258         vector<tablevar_t *> sgah_qpn::get_output_tbls(){
5259                 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5260                 return(retval);
5261         }
5262
5263         vector<tablevar_t *> rsgah_qpn::get_output_tbls(){
5264                 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5265                 return(retval);
5266         }
5267
5268         vector<tablevar_t *> sgahcwcb_qpn::get_output_tbls(){
5269                 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5270                 return(retval);
5271         }
5272
5273         vector<tablevar_t *> join_eq_hash_qpn::get_output_tbls(){
5274                 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5275                 return(retval);
5276         }
5277
5278         vector<tablevar_t *> filter_join_qpn::get_output_tbls(){
5279                 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5280                 return(retval);
5281         }
5282
5283
5284
5285 //-----------------------------------------------------------------
5286 //                      Bind to schema
5287
5288 //              Associate colrefs with this schema.
5289 //              Also, use this opportunity to create table_layout (the output schema).
5290 //              If the output schema is ever needed before
5291 void mrg_qpn::bind_to_schema(table_list *Schema){
5292         int t;
5293         for(t=0;t<fm.size();++t){
5294                 int tblref = Schema->get_table_ref(fm[t]->get_schema_name());
5295                 if(tblref>=0)
5296                 fm[t]->set_schema_ref(tblref );
5297         }
5298
5299 //              Here I assume that the colrefs have been reorderd
5300 //              during analysis so that mvars line up with fm.
5301         mvars[0]->set_schema_ref(fm[0]->get_schema_ref());
5302         mvars[1]->set_schema_ref(fm[1]->get_schema_ref());
5303
5304
5305 }
5306
5307
5308
5309 //              Associate colrefs in SEs with this schema.
5310 void spx_qpn::bind_to_schema(table_list *Schema){
5311 //                      Bind the tablevars in the From clause to the Schema
5312 //                      (it might have changed from analysis time)
5313         int t = Schema->get_table_ref(table_name->get_schema_name() );
5314         if(t>=0)
5315         table_name->set_schema_ref(t );
5316
5317 //                      Get the "from" clause
5318         tablevar_list_t fm(table_name);
5319
5320 //                      Bind all SEs to this schema
5321         int p;
5322         for(p=0;p<where.size();++p){
5323                 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5324         }
5325         int s;
5326         for(s=0;s<select_list.size();++s){
5327                 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5328         }
5329
5330 //              Collect set of tuples referenced in this HFTA
5331 //              input, internal, or output.
5332
5333 }
5334
5335 col_id_set spx_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5336         col_id_set retval, tmp_cset;
5337         int p;
5338         for(p=0;p<where.size();++p){
5339                 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5340         }
5341         int s;
5342         for(s=0;s<select_list.size();++s){
5343                 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5344         }
5345         col_id_set::iterator  cisi;
5346         if(ext_fcns_only){
5347                 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5348                         field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5349                         if(fe->get_unpack_fcns().size()>0)
5350                                 retval.insert((*cisi));
5351                 }
5352                 return retval;
5353         }
5354
5355         return tmp_cset;
5356 }
5357
5358 col_id_set filter_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5359         col_id_set retval, tmp_cset;
5360         int p;
5361         for(p=0;p<where.size();++p){
5362                 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5363         }
5364         int s;
5365         for(s=0;s<select_list.size();++s){
5366                 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5367         }
5368         col_id_set::iterator  cisi;
5369         if(ext_fcns_only){
5370                 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5371                         field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5372                         if(fe->get_unpack_fcns().size()>0)
5373                                 retval.insert((*cisi));
5374                 }
5375                 return retval;
5376         }
5377
5378         return tmp_cset;
5379 }
5380
5381
5382
5383 //              Associate colrefs in SEs with this schema.
5384 void join_eq_hash_qpn::bind_to_schema(table_list *Schema){
5385 //                      Bind the tablevars in the From clause to the Schema
5386 //                      (it might have changed from analysis time)
5387         int f;
5388         for(f=0;f<from.size();++f){
5389                 string snm = from[f]->get_schema_name();
5390                 int tbl_ref = Schema->get_table_ref(snm);
5391                 if(tbl_ref >= 0)
5392                 from[f]->set_schema_ref(tbl_ref);
5393         }
5394
5395 //                      Bind all SEs to this schema
5396         tablevar_list_t fm(from);
5397
5398         int p;
5399         for(p=0;p<where.size();++p){
5400                 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5401         }
5402         int s;
5403         for(s=0;s<select_list.size();++s){
5404                 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5405         }
5406
5407 //              Collect set of tuples referenced in this HFTA
5408 //              input, internal, or output.
5409
5410 }
5411
5412 void filter_join_qpn::bind_to_schema(table_list *Schema){
5413 //                      Bind the tablevars in the From clause to the Schema
5414 //                      (it might have changed from analysis time)
5415         int f;
5416         for(f=0;f<from.size();++f){
5417                 string snm = from[f]->get_schema_name();
5418                 int tbl_ref = Schema->get_table_ref(snm);
5419                 if(tbl_ref >= 0)
5420                 from[f]->set_schema_ref(tbl_ref);
5421         }
5422
5423 //                      Bind all SEs to this schema
5424         tablevar_list_t fm(from);
5425
5426         int p;
5427         for(p=0;p<where.size();++p){
5428                 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5429         }
5430         int s;
5431         for(s=0;s<select_list.size();++s){
5432                 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5433         }
5434
5435 //              Collect set of tuples referenced in this HFTA
5436 //              input, internal, or output.
5437
5438 }
5439
5440
5441
5442
5443 void sgah_qpn::bind_to_schema(table_list *Schema){
5444 //                      Bind the tablevars in the From clause to the Schema
5445 //                      (it might have changed from analysis time)
5446
5447
5448         int t = Schema->get_table_ref(table_name->get_schema_name() );
5449         if(t>=0)
5450         table_name->set_schema_ref(t );
5451
5452 //                      Get the "from" clause
5453         tablevar_list_t fm(table_name);
5454
5455
5456
5457 //                      Bind all SEs to this schema
5458         int p;
5459         for(p=0;p<where.size();++p){
5460                 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5461         }
5462         for(p=0;p<having.size();++p){
5463                 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5464         }
5465         int s;
5466         for(s=0;s<select_list.size();++s){
5467                 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5468         }
5469         int g;
5470         for(g=0;g<gb_tbl.size();++g){
5471                 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5472         }
5473         int a;
5474         for(a=0;a<aggr_tbl.size();++a){
5475                 if(aggr_tbl.is_builtin(a)){
5476                         bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5477                 }else{
5478                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5479                         int o;
5480                         for(o=0;o<opl.size();++o){
5481                                 bind_to_schema_se(opl[o],&fm,Schema);
5482                         }
5483                 }
5484         }
5485 }
5486
5487 col_id_set sgah_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5488         col_id_set retval, tmp_cset;
5489         int p;
5490         for(p=0;p<where.size();++p){
5491                 gather_pr_col_ids(where[p]->pr, tmp_cset, &gb_tbl);
5492         }
5493         int g;
5494         for(g=0;g<gb_tbl.size();++g){
5495                 gather_se_col_ids(gb_tbl.get_def(g), tmp_cset, &gb_tbl);
5496         }
5497         int a;
5498         for(a=0;a<aggr_tbl.size();++a){
5499                 if(aggr_tbl.is_builtin(a)){
5500                         gather_se_col_ids(aggr_tbl.get_aggr_se(a), tmp_cset, &gb_tbl);
5501                 }else{
5502                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5503                         int o;
5504                         for(o=0;o<opl.size();++o){
5505                                 gather_se_col_ids(opl[o], tmp_cset, &gb_tbl);
5506                         }
5507                 }
5508         }
5509
5510         col_id_set::iterator  cisi;
5511         if(ext_fcns_only){
5512                 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5513                         field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5514                         if(fe->get_unpack_fcns().size()>0)
5515                                 retval.insert((*cisi));
5516                 }
5517                 return retval;
5518         }
5519
5520         return tmp_cset;
5521 }
5522
5523
5524 void rsgah_qpn::bind_to_schema(table_list *Schema){
5525 //                      Bind the tablevars in the From clause to the Schema
5526 //                      (it might have changed from analysis time)
5527         int t = Schema->get_table_ref(table_name->get_schema_name() );
5528         if(t>=0)
5529         table_name->set_schema_ref(t );
5530
5531 //                      Get the "from" clause
5532         tablevar_list_t fm(table_name);
5533
5534 //                      Bind all SEs to this schema
5535         int p;
5536         for(p=0;p<where.size();++p){
5537                 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5538         }
5539         for(p=0;p<having.size();++p){
5540                 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5541         }
5542         for(p=0;p<closing_when.size();++p){
5543                 bind_to_schema_pr(closing_when[p]->pr, &fm, Schema);
5544         }
5545         int s;
5546         for(s=0;s<select_list.size();++s){
5547                 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5548         }
5549         int g;
5550         for(g=0;g<gb_tbl.size();++g){
5551                 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5552         }
5553         int a;
5554         for(a=0;a<aggr_tbl.size();++a){
5555                 if(aggr_tbl.is_builtin(a)){
5556                         bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5557                 }else{
5558                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5559                         int o;
5560                         for(o=0;o<opl.size();++o){
5561                                 bind_to_schema_se(opl[o],&fm,Schema);
5562                         }
5563                 }
5564         }
5565 }
5566
5567
5568 void sgahcwcb_qpn::bind_to_schema(table_list *Schema){
5569 //                      Bind the tablevars in the From clause to the Schema
5570 //                      (it might have changed from analysis time)
5571         int t = Schema->get_table_ref(table_name->get_schema_name() );
5572         if(t>=0)
5573         table_name->set_schema_ref(t );
5574
5575 //                      Get the "from" clause
5576         tablevar_list_t fm(table_name);
5577
5578 //                      Bind all SEs to this schema
5579         int p;
5580         for(p=0;p<where.size();++p){
5581                 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5582         }
5583         for(p=0;p<having.size();++p){
5584                 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5585         }
5586         for(p=0;p<having.size();++p){
5587                 bind_to_schema_pr(cleanby[p]->pr, &fm, Schema);
5588         }
5589         for(p=0;p<having.size();++p){
5590                 bind_to_schema_pr(cleanwhen[p]->pr, &fm, Schema);
5591         }
5592         int s;
5593         for(s=0;s<select_list.size();++s){
5594                 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5595         }
5596         int g;
5597         for(g=0;g<gb_tbl.size();++g){
5598                 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5599         }
5600         int a;
5601         for(a=0;a<aggr_tbl.size();++a){
5602                 if(aggr_tbl.is_builtin(a)){
5603                         bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5604                 }else{
5605                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5606                         int o;
5607                         for(o=0;o<opl.size();++o){
5608                                 bind_to_schema_se(opl[o],&fm,Schema);
5609                         }
5610                 }
5611         }
5612 }
5613
5614
5615
5616
5617
5618
5619 ///////////////////////////////////////////////////////////////
5620 ///////////////////////////////////////////////////////////////
5621 ///             Functions for code generation.
5622
5623
5624 //-----------------------------------------------------------------
5625 //              get_cplx_lit_tbl
5626
5627 cplx_lit_table *mrg_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5628         return(new cplx_lit_table());
5629 }
5630
5631 cplx_lit_table *spx_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5632         int i;
5633         cplx_lit_table *complex_literals = new cplx_lit_table();
5634
5635         for(i=0;i<select_list.size();i++){
5636                 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5637         }
5638         for(i=0;i<where.size();++i){
5639                 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5640         }
5641
5642         return(complex_literals);
5643 }
5644
5645 cplx_lit_table *sgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5646         int i,j;
5647         cplx_lit_table *complex_literals = new cplx_lit_table();
5648
5649         for(i=0;i<aggr_tbl.size();++i){
5650                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5651                         find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5652                 }else{
5653                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5654                         for(j=0;j<opl.size();++j)
5655                                 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5656                 }
5657         }
5658
5659         for(i=0;i<select_list.size();i++){
5660                 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5661         }
5662     for(i=0;i<gb_tbl.size();i++){
5663         find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5664     }
5665         for(i=0;i<where.size();++i){
5666                 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5667         }
5668         for(i=0;i<having.size();++i){
5669                         find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5670         }
5671
5672         return(complex_literals);
5673 }
5674
5675
5676 cplx_lit_table *rsgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5677         int i,j;
5678         cplx_lit_table *complex_literals = new cplx_lit_table();
5679
5680         for(i=0;i<aggr_tbl.size();++i){
5681                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5682                         find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5683                 }else{
5684                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5685                         for(j=0;j<opl.size();++j)
5686                                 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5687                 }
5688         }
5689
5690         for(i=0;i<select_list.size();i++){
5691                 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5692         }
5693     for(i=0;i<gb_tbl.size();i++){
5694         find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5695     }
5696         for(i=0;i<where.size();++i){
5697                 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5698         }
5699         for(i=0;i<having.size();++i){
5700                         find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5701         }
5702         for(i=0;i<closing_when.size();++i){
5703                         find_complex_literal_pr(closing_when[i]->pr,Ext_fcns, complex_literals);
5704         }
5705
5706         return(complex_literals);
5707 }
5708
5709
5710 cplx_lit_table *sgahcwcb_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5711         int i,j;
5712         cplx_lit_table *complex_literals = new cplx_lit_table();
5713
5714         for(i=0;i<aggr_tbl.size();++i){
5715                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5716                         find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5717                 }else{
5718                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5719                         for(j=0;j<opl.size();++j)
5720                                 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5721                 }
5722         }
5723
5724         for(i=0;i<select_list.size();i++){
5725                 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5726         }
5727     for(i=0;i<gb_tbl.size();i++){
5728         find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5729     }
5730         for(i=0;i<where.size();++i){
5731                 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5732         }
5733         for(i=0;i<having.size();++i){
5734                         find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5735         }
5736         for(i=0;i<cleanwhen.size();++i){
5737                         find_complex_literal_pr(cleanwhen[i]->pr,Ext_fcns, complex_literals);
5738         }
5739         for(i=0;i<cleanby.size();++i){
5740                         find_complex_literal_pr(cleanby[i]->pr,Ext_fcns, complex_literals);
5741         }
5742
5743         return(complex_literals);
5744 }
5745
5746 cplx_lit_table *join_eq_hash_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5747         int i;
5748         cplx_lit_table *complex_literals = new cplx_lit_table();
5749
5750         for(i=0;i<select_list.size();i++){
5751                 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5752         }
5753         for(i=0;i<where.size();++i){
5754                 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5755         }
5756
5757         return(complex_literals);
5758 }
5759
5760 cplx_lit_table *filter_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5761         int i;
5762         cplx_lit_table *complex_literals = new cplx_lit_table();
5763
5764         for(i=0;i<select_list.size();i++){
5765                 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5766         }
5767         for(i=0;i<where.size();++i){
5768                 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5769         }
5770
5771         return(complex_literals);
5772 }
5773
5774
5775
5776
5777 //-----------------------------------------------------------------
5778 //              get_handle_param_tbl
5779
5780 vector<handle_param_tbl_entry *> mrg_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5781     vector<handle_param_tbl_entry *> retval;
5782         return(retval);
5783 }
5784
5785
5786 vector<handle_param_tbl_entry *> spx_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5787         int i;
5788     vector<handle_param_tbl_entry *> retval;
5789
5790         for(i=0;i<select_list.size();i++){
5791                 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5792         }
5793         for(i=0;i<where.size();++i){
5794                 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5795         }
5796
5797         return(retval);
5798 }
5799
5800
5801 vector<handle_param_tbl_entry *> sgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5802         int i,j;
5803     vector<handle_param_tbl_entry *> retval;
5804
5805
5806         for(i=0;i<aggr_tbl.size();++i){
5807                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5808                         find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5809                 }else{
5810                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5811                         for(j=0;j<opl.size();++j)
5812                                 find_param_handles_se(opl[j], Ext_fcns, retval);
5813                 }
5814         }
5815         for(i=0;i<select_list.size();i++){
5816                 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5817         }
5818     for(i=0;i<gb_tbl.size();i++){
5819         find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5820     }
5821         for(i=0;i<where.size();++i){
5822                 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5823         }
5824         for(i=0;i<having.size();++i){
5825                         find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5826         }
5827
5828         return(retval);
5829 }
5830
5831
5832 vector<handle_param_tbl_entry *> rsgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5833         int i,j;
5834     vector<handle_param_tbl_entry *> retval;
5835
5836
5837         for(i=0;i<aggr_tbl.size();++i){
5838                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5839                         find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5840                 }else{
5841                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5842                         for(j=0;j<opl.size();++j)
5843                                 find_param_handles_se(opl[j], Ext_fcns, retval);
5844                 }
5845         }
5846         for(i=0;i<select_list.size();i++){
5847                 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5848         }
5849     for(i=0;i<gb_tbl.size();i++){
5850         find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5851     }
5852         for(i=0;i<where.size();++i){
5853                 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5854         }
5855         for(i=0;i<having.size();++i){
5856                         find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5857         }
5858         for(i=0;i<closing_when.size();++i){
5859                         find_param_handles_pr(closing_when[i]->pr,Ext_fcns, retval);
5860         }
5861
5862         return(retval);
5863 }
5864
5865
5866 vector<handle_param_tbl_entry *> sgahcwcb_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5867         int i,j;
5868     vector<handle_param_tbl_entry *> retval;
5869
5870
5871         for(i=0;i<aggr_tbl.size();++i){
5872                 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5873                         find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5874                 }else{
5875                         vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5876                         for(j=0;j<opl.size();++j)
5877                                 find_param_handles_se(opl[j], Ext_fcns, retval);
5878                 }
5879         }
5880         for(i=0;i<select_list.size();i++){
5881                 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5882         }
5883     for(i=0;i<gb_tbl.size();i++){
5884         find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5885     }
5886         for(i=0;i<where.size();++i){
5887                 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5888         }
5889         for(i=0;i<having.size();++i){
5890                         find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5891         }
5892         for(i=0;i<cleanwhen.size();++i){
5893                         find_param_handles_pr(cleanwhen[i]->pr,Ext_fcns, retval);
5894         }
5895         for(i=0;i<cleanby.size();++i){
5896                         find_param_handles_pr(cleanby[i]->pr,Ext_fcns, retval);
5897         }
5898
5899         return(retval);
5900 }
5901
5902 vector<handle_param_tbl_entry *> join_eq_hash_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5903         int i;
5904     vector<handle_param_tbl_entry *> retval;
5905
5906         for(i=0;i<select_list.size();i++){
5907                 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5908         }
5909         for(i=0;i<where.size();++i){
5910                 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5911         }
5912
5913         return(retval);
5914 }
5915
5916
5917 vector<handle_param_tbl_entry *> filter_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5918         int i;
5919     vector<handle_param_tbl_entry *> retval;
5920
5921         for(i=0;i<select_list.size();i++){
5922                 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5923         }
5924         for(i=0;i<where.size();++i){
5925                 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5926         }
5927
5928         return(retval);
5929 }
5930
5931 ///////////////////////////////////////////////////////////////
5932 ///////////////////////////////////////////////////////////////
5933 ///             Functions for operator output rates estimations
5934
5935
5936 //-----------------------------------------------------------------
5937 //              get_rate_estimate
5938
5939 double spx_qpn::get_rate_estimate() {
5940
5941         // dummy method for now
5942         return SPX_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5943 }
5944
5945 double sgah_qpn::get_rate_estimate() {
5946
5947         // dummy method for now
5948         return SGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5949 }
5950
5951 double rsgah_qpn::get_rate_estimate() {
5952
5953         // dummy method for now
5954         return RSGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5955 }
5956
5957 double sgahcwcb_qpn::get_rate_estimate() {
5958
5959         // dummy method for now
5960         return SGAHCWCB_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5961 }
5962
5963 double mrg_qpn::get_rate_estimate() {
5964
5965         // dummy method for now
5966         return MRG_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5967 }
5968
5969 double join_eq_hash_qpn::get_rate_estimate() {
5970
5971         // dummy method for now
5972         return JOIN_EQ_HASH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5973 }
5974
5975
5976 //////////////////////////////////////////////////////////////////////////////
5977 //////////////////////////////////////////////////////////////////////////////
5978 /////           Generate functors
5979
5980
5981
5982
5983 //-------------------------------------------------------------------------
5984 //                      Code generation utilities.
5985 //-------------------------------------------------------------------------
5986
5987 //              Globals referenced by generate utilities
5988
5989 static gb_table *segen_gb_tbl;            // Table of all group-by attributes.
5990
5991
5992
5993 //                      Generate code that makes reference
5994 //                      to the tuple, and not to any aggregates.
5995 //                              NEW : it might reference a stateful function.
5996 static string generate_se_code(scalarexp_t *se,table_list *schema){
5997         string ret;
5998     data_type *ldt, *rdt;
5999         int o;
6000         vector<scalarexp_t *> operands;
6001
6002
6003         switch(se->get_operator_type()){
6004         case SE_LITERAL:
6005                 if(se->is_handle_ref()){
6006                         sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6007                         ret = tmpstr;
6008                         return(ret);
6009                 }
6010                 if(se->get_literal()->is_cpx_lit()){
6011                         sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6012                         ret = tmpstr;
6013                         return(ret);
6014                 }
6015                 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6016         case SE_PARAM:
6017                 if(se->is_handle_ref()){
6018                         sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6019                         ret = tmpstr;
6020                         return(ret);
6021                 }
6022                 ret.append("param_");
6023                 ret.append(se->get_param_name());
6024                 return(ret);
6025         case SE_UNARY_OP:
6026         ldt = se->get_left_se()->get_data_type();
6027         if(ldt->complex_operator(se->get_op()) ){
6028                         ret.append( ldt->get_complex_operator(se->get_op()) );
6029                         ret.append("(");
6030                         ret.append(generate_se_code(se->get_left_se(),schema));
6031             ret.append(")");
6032                 }else{
6033                         ret.append("(");
6034                         ret.append(se->get_op());
6035                         ret.append(generate_se_code(se->get_left_se(),schema));
6036                         ret.append(")");
6037                 }
6038                 return(ret);
6039         case SE_BINARY_OP:
6040         ldt = se->get_left_se()->get_data_type();
6041         rdt = se->get_right_se()->get_data_type();
6042
6043         if(ldt->complex_operator(rdt, se->get_op()) ){
6044                         ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6045                         ret.append("(");
6046                         ret.append(generate_se_code(se->get_left_se(),schema));
6047                         ret.append(", ");
6048                         ret.append(generate_se_code(se->get_right_se(),schema));
6049                         ret.append(")");
6050                 }else{
6051                         ret.append("(");
6052                         ret.append(generate_se_code(se->get_left_se(),schema));
6053                         ret.append(se->get_op());
6054                         ret.append(generate_se_code(se->get_right_se(),schema));
6055                         ret.append(")");
6056                 }
6057                 return(ret);
6058         case SE_COLREF:
6059                 if(se->is_gb()){                // OK to ref gb attrs, but they're not yet unpacked ...
6060                                                         // so return the defining code.
6061                         int gref = se->get_gb_ref();
6062                         scalarexp_t *gdef_se = segen_gb_tbl->get_def(gref);
6063                         ret = generate_se_code(gdef_se, schema );
6064
6065                 }else{
6066                 sprintf(tmpstr,"unpack_var_%s_%d",
6067                   se->get_colref()->get_field().c_str(), se->get_colref()->get_tablevar_ref() );
6068                 ret = tmpstr;
6069                 }
6070                 return(ret);
6071         case SE_FUNC:
6072                 if(se->is_partial()){
6073                         sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6074                         ret = tmpstr;
6075                 }else{
6076                         ret += se->op + "(";
6077                         operands = se->get_operands();
6078                         bool first_elem = true;
6079                         if(se->get_storage_state() != ""){
6080                                 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6081                                 first_elem = false;
6082                         }
6083                         for(o=0;o<operands.size();o++){
6084                                 if(first_elem) first_elem=false; else ret += ", ";
6085                                 if(operands[o]->get_data_type()->is_buffer_type() &&
6086                                         (! (operands[o]->is_handle_ref()) ) )
6087                                         ret.append("&");
6088                                 ret += generate_se_code(operands[o], schema);
6089                         }
6090                         ret += ")";
6091                 }
6092                 return(ret);
6093         default:
6094                 fprintf(stderr,"INTERNAL ERROR in generate_se_code (hfta), line %d, character %d: unknown operator type %d\n",
6095                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
6096                 return("ERROR in generate_se_code");
6097         }
6098 }
6099
6100 //              generate code that refers only to aggregate data and constants.
6101 //                      NEW : modified to handle superaggregates and stateful fcn refs.
6102 //                      Assume that the state is in *stval
6103 static string generate_se_code_fm_aggr(scalarexp_t *se, string gbvar, string aggvar, table_list *schema){
6104
6105         string ret;
6106     data_type *ldt, *rdt;
6107         int o;
6108         vector<scalarexp_t *> operands;
6109
6110
6111         switch(se->get_operator_type()){
6112         case SE_LITERAL:
6113                 if(se->is_handle_ref()){
6114                         sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6115                         ret = tmpstr;
6116                         return(ret);
6117                 }
6118                 if(se->get_literal()->is_cpx_lit()){
6119                         sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6120                         ret = tmpstr;
6121                         return(ret);
6122                 }
6123                 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6124         case SE_PARAM:
6125                 if(se->is_handle_ref()){
6126                         sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6127                         ret = tmpstr;
6128                         return(ret);
6129                 }
6130                 ret.append("param_");
6131                 ret.append(se->get_param_name());
6132                 return(ret);
6133         case SE_UNARY_OP:
6134         ldt = se->get_left_se()->get_data_type();
6135         if(ldt->complex_operator(se->get_op()) ){
6136                         ret.append( ldt->get_complex_operator(se->get_op()) );
6137                         ret.append("(");
6138                         ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6139             ret.append(")");
6140                 }else{
6141                         ret.append("(");
6142                         ret.append(se->get_op());
6143                         ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6144                         ret.append(")");
6145                 }
6146                 return(ret);
6147         case SE_BINARY_OP:
6148         ldt = se->get_left_se()->get_data_type();
6149         rdt = se->get_right_se()->get_data_type();
6150
6151         if(ldt->complex_operator(rdt, se->get_op()) ){
6152                         ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6153                         ret.append("(");
6154                         ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6155                         ret.append(", ");
6156                         ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6157                         ret.append(")");
6158                 }else{
6159                         ret.append("(");
6160                         ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6161                         ret.append(se->get_op());
6162                         ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6163                         ret.append(")");
6164                 }
6165                 return(ret);
6166         case SE_COLREF:
6167                 if(se->is_gb()){                // OK to ref gb attrs, but they're not yet unpacked ...
6168                                                         // so return the defining code.
6169                         sprintf(tmpstr,"%s%d",gbvar.c_str(),se->get_gb_ref());
6170                         ret = tmpstr;
6171
6172                 }else{
6173                 fprintf(stderr,"ERROR reference to non-GB column ref not permitted here,"
6174                                 "error in query_plan.cc:generate_se_code_fm_aggr, line %d, character %d.\n",
6175                                 se->get_lineno(), se->get_charno());
6176                 ret = tmpstr;
6177                 }
6178                 return(ret);
6179         case SE_AGGR_STAR:
6180         case SE_AGGR_SE:
6181                 if(se->is_superaggr()){
6182                         sprintf(tmpstr,"stval->aggr_var%d",se->get_aggr_ref());
6183                 }else{
6184                         sprintf(tmpstr,"%saggr_var%d",aggvar.c_str(),se->get_aggr_ref());
6185                 }
6186                 ret = tmpstr;
6187                 return(ret);
6188         case SE_FUNC:
6189 //                              Is it a UDAF?
6190                 if(se->get_aggr_ref() >= 0){
6191                         sprintf(tmpstr,"udaf_ret_%d",se->get_aggr_ref());
6192                         ret = tmpstr;
6193                         return(ret);
6194                 }
6195
6196                 if(se->is_partial()){
6197                         sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6198                         ret = tmpstr;
6199                 }else{
6200                         ret += se->op + "(";
6201                         bool first_elem = true;
6202                         if(se->get_storage_state() != ""){
6203                                 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6204                                 first_elem = false;
6205                         }
6206                         operands = se->get_operands();
6207                         for(o=0;o<operands.size();o++){
6208                                 if(first_elem) first_elem=false; else ret += ", ";
6209                                 if(operands[o]->get_data_type()->is_buffer_type() &&
6210                                         (! (operands[o]->is_handle_ref()) ) )
6211                                         ret.append("&");
6212                                 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6213                         }
6214                         ret += ")";
6215                 }
6216                 return(ret);
6217         default:
6218                 fprintf(stderr,"INTERNAL ERROR in query_plan.cc::generate_se_code_fm_aggr, line %d, character %d: unknown operator type %d\n",
6219                                 se->get_lineno(), se->get_charno(),se->get_operator_type());
6220                 return("ERROR in generate_se_code_fm_aggr");
6221         }
6222
6223 }
6224
6225
6226 static string unpack_partial_fcn_fm_aggr(scalarexp_t *se, int pfn_id, string gbvar, string aggvar, table_list *schema){
6227         string ret;
6228         int o;
6229         vector<scalarexp_t *> operands;
6230
6231
6232         if(se->get_operator_type() != SE_FUNC){
6233                 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn_fm_aggr. line %d, character %d\n",
6234                                 se->get_lineno(), se->get_charno());
6235                 return("ERROR in unpack_partial_fcn_fm_aggr");
6236         }
6237
6238         ret = "\tretval = " + se->get_op() + "( ",
6239         sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6240         ret += tmpstr;
6241
6242         if(se->get_storage_state() != ""){
6243                 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6244         }
6245
6246         operands = se->get_operands();
6247         for(o=0;o<operands.size();o++){
6248                 ret += ", ";
6249                 if(operands[o]->get_data_type()->is_buffer_type() &&
6250                                         (! (operands[o]->is_handle_ref()) ) )
6251                         ret.append("&");
6252                 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6253         }
6254         ret += ");\n";
6255
6256         return(ret);
6257 }
6258
6259
6260 static string unpack_partial_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6261         string ret;
6262         int o;
6263         vector<scalarexp_t *> operands;
6264
6265         if(se->get_operator_type() != SE_FUNC){
6266                 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn. line %d, character %d\n",
6267                                 se->get_lineno(), se->get_charno());
6268                 return("ERROR in unpack_partial_fcn");
6269         }
6270
6271         ret = "\tretval = " + se->get_op() + "( ",
6272         sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6273         ret += tmpstr;
6274
6275         if(se->get_storage_state() != ""){
6276                 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6277         }
6278
6279         operands = se->get_operands();
6280         for(o=0;o<operands.size();o++){
6281                 ret += ", ";
6282                 if(operands[o]->get_data_type()->is_buffer_type() &&
6283                                         (! (operands[o]->is_handle_ref()) ) )
6284                         ret.append("&");
6285                 ret += generate_se_code(operands[o], schema);
6286         }
6287         ret += ");\n";
6288
6289         return(ret);
6290 }
6291
6292 static string generate_cached_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6293         string ret;
6294         int o;
6295         vector<scalarexp_t *> operands;
6296
6297         if(se->get_operator_type() != SE_FUNC){
6298                 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to generate_cached_fcn. line %d, character %d\n",
6299                                 se->get_lineno(), se->get_charno());
6300                 return("ERROR in generate_cached_fcn");
6301         }
6302
6303         ret = se->get_op()+"(";
6304
6305         if(se->get_storage_state() != ""){
6306                 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd,";
6307         }
6308
6309         operands = se->get_operands();
6310         for(o=0;o<operands.size();o++){
6311                 if(o) ret += ", ";
6312                 if(operands[o]->get_data_type()->is_buffer_type() &&
6313                                         (! (operands[o]->is_handle_ref()) ) )
6314                         ret.append("&");
6315                 ret += generate_se_code(operands[o], schema);
6316         }
6317         ret += ");\n";
6318
6319         return(ret);
6320 }
6321
6322
6323
6324
6325
6326 static string generate_C_comparison_op(string op){
6327   if(op == "=") return("==");
6328   if(op == "<>") return("!=");
6329   return(op);
6330 }
6331
6332 static string generate_C_boolean_op(string op){
6333         if( (op == "AND") || (op == "And") || (op == "and") ){
6334                 return("&&");
6335         }
6336         if( (op == "OR") || (op == "Or") || (op == "or") ){
6337                 return("||");
6338         }
6339         if( (op == "NOT") || (op == "Not") || (op == "not") ){
6340                 return("!");
6341         }
6342
6343         return("ERROR UNKNOWN BOOLEAN OPERATOR");
6344 }
6345
6346
6347 static string generate_predicate_code(predicate_t *pr,table_list *schema){
6348         string ret;
6349         vector<literal_t *>  litv;
6350         int i;
6351     data_type *ldt, *rdt;
6352         vector<scalarexp_t *> op_list;
6353         int o;
6354
6355         switch(pr->get_operator_type()){
6356         case PRED_IN:
6357         ldt = pr->get_left_se()->get_data_type();
6358
6359                 ret.append("( ");
6360                 litv = pr->get_lit_vec();
6361                 for(i=0;i<litv.size();i++){
6362                         if(i>0) ret.append(" || ");
6363                         ret.append("( ");
6364
6365                 if(ldt->complex_comparison(ldt) ){
6366                                 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6367                                 ret.append("( ");
6368                                 if(ldt->is_buffer_type() )
6369                                         ret.append("&");
6370                                 ret.append(generate_se_code(pr->get_left_se(), schema));
6371                                 ret.append(", ");
6372                                 if(ldt->is_buffer_type() )
6373                                         ret.append("&");
6374                                 if(litv[i]->is_cpx_lit()){
6375                                         sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6376                                         ret += tmpstr;
6377                                 }else{
6378                                         ret.append(litv[i]->to_C_code(""));
6379                                 }
6380                                 ret.append(") == 0");
6381                         }else{
6382                                 ret.append(generate_se_code(pr->get_left_se(), schema));
6383                                 ret.append(" == ");
6384                                 ret.append(litv[i]->to_hfta_C_code(""));
6385                         }
6386
6387                         ret.append(" )");
6388                 }
6389                 ret.append(" )");
6390                 return(ret);
6391
6392         case PRED_COMPARE:
6393         ldt = pr->get_left_se()->get_data_type();
6394         rdt = pr->get_right_se()->get_data_type();
6395
6396                 ret.append("( ");
6397         if(ldt->complex_comparison(rdt) ){
6398                         ret.append(ldt->get_hfta_comparison_fcn(rdt));
6399                         ret.append("(");
6400                         if(ldt->is_buffer_type() )
6401                                 ret.append("&");
6402                         ret.append(generate_se_code(pr->get_left_se(),schema) );
6403                         ret.append(", ");
6404                         if(rdt->is_buffer_type() )
6405                                 ret.append("&");
6406                         ret.append(generate_se_code(pr->get_right_se(),schema) );
6407                         ret.append(") ");
6408                         ret.append( generate_C_comparison_op(pr->get_op()));
6409                         ret.append("0");
6410                 }else{
6411                         ret.append(generate_se_code(pr->get_left_se(),schema) );
6412                         ret.append( generate_C_comparison_op(pr->get_op()));
6413                         ret.append(generate_se_code(pr->get_right_se(),schema) );
6414                 }
6415                 ret.append(" )");
6416                 return(ret);
6417         case PRED_UNARY_OP:
6418                 ret.append("( ");
6419                 ret.append( generate_C_boolean_op(pr->get_op()) );
6420                 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6421                 ret.append(" )");
6422                 return(ret);
6423         case PRED_BINARY_OP:
6424                 ret.append("( ");
6425                 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6426                 ret.append( generate_C_boolean_op(pr->get_op()) );
6427                 ret.append(generate_predicate_code(pr->get_right_pr(),schema) );
6428                 ret.append(" )");
6429                 return(ret);
6430         case PRED_FUNC:
6431                 ret += pr->get_op() + "( ";
6432                 op_list = pr->get_op_list();
6433                 for(o=0;o<op_list.size();++o){
6434                         if(o>0) ret += ", ";
6435                         if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6436                                         ret.append("&");
6437                         ret += generate_se_code(op_list[o], schema);
6438                 }
6439                 ret += " )";
6440                 return(ret);
6441         default:
6442                 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6443                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6444                 return("ERROR in generate_predicate_code");
6445         }
6446 }
6447
6448 static string generate_predicate_code_fm_aggr(predicate_t *pr, string gbvar, string aggvar,table_list *schema){
6449         string ret;
6450         vector<literal_t *>  litv;
6451         int i;
6452     data_type *ldt, *rdt;
6453         vector<scalarexp_t *> op_list;
6454         int o;
6455
6456         switch(pr->get_operator_type()){
6457         case PRED_IN:
6458         ldt = pr->get_left_se()->get_data_type();
6459
6460                 ret.append("( ");
6461                 litv = pr->get_lit_vec();
6462                 for(i=0;i<litv.size();i++){
6463                         if(i>0) ret.append(" || ");
6464                         ret.append("( ");
6465
6466                 if(ldt->complex_comparison(ldt) ){
6467                                 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6468                                 ret.append("( ");
6469                                 if(ldt->is_buffer_type() )
6470                                         ret.append("&");
6471                                 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6472                                 ret.append(", ");
6473                                 if(ldt->is_buffer_type() )
6474                                         ret.append("&");
6475                                 if(litv[i]->is_cpx_lit()){
6476                                         sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6477                                         ret += tmpstr;
6478                                 }else{
6479                                         ret.append(litv[i]->to_C_code(""));
6480                                 }
6481                                 ret.append(") == 0");
6482                         }else{
6483                                 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6484                                 ret.append(" == ");
6485                                 ret.append(litv[i]->to_hfta_C_code(""));
6486                         }
6487
6488                         ret.append(" )");
6489                 }
6490                 ret.append(" )");
6491                 return(ret);
6492
6493         case PRED_COMPARE:
6494         ldt = pr->get_left_se()->get_data_type();
6495         rdt = pr->get_right_se()->get_data_type();
6496
6497                 ret.append("( ");
6498         if(ldt->complex_comparison(rdt) ){
6499                         ret.append(ldt->get_hfta_comparison_fcn(rdt));
6500                         ret.append("(");
6501                         if(ldt->is_buffer_type() )
6502                                 ret.append("&");
6503                         ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6504                         ret.append(", ");
6505                         if(rdt->is_buffer_type() )
6506                                 ret.append("&");
6507                         ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6508                         ret.append(") ");
6509                         ret.append( generate_C_comparison_op(pr->get_op()));
6510                         ret.append("0");
6511                 }else{
6512                         ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6513                         ret.append( generate_C_comparison_op(pr->get_op()));
6514                         ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6515                 }
6516                 ret.append(" )");
6517                 return(ret);
6518         case PRED_UNARY_OP:
6519                 ret.append("( ");
6520                 ret.append( generate_C_boolean_op(pr->get_op()) );
6521                 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6522                 ret.append(" )");
6523                 return(ret);
6524         case PRED_BINARY_OP:
6525                 ret.append("( ");
6526                 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6527                 ret.append( generate_C_boolean_op(pr->get_op()) );
6528                 ret.append(generate_predicate_code_fm_aggr(pr->get_right_pr(), gbvar, aggvar,schema) );
6529                 ret.append(" )");
6530                 return(ret);
6531         case PRED_FUNC:
6532                 ret += pr->get_op() + "( ";
6533                 op_list = pr->get_op_list();
6534                 for(o=0;o<op_list.size();++o){
6535                         if(o>0) ret += ", ";
6536                         if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6537                                         ret.append("&");
6538                         ret += generate_se_code_fm_aggr(op_list[o], gbvar, aggvar, schema);
6539                 }
6540                 ret += " )";
6541                 return(ret);
6542         default:
6543                 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6544                         pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6545                 return("ERROR in generate_predicate_code");
6546         }
6547 }
6548
6549
6550 //                              Aggregation code
6551
6552
6553 static string generate_equality_test(string &lhs_op, string &rhs_op, data_type *dt){
6554         string ret;
6555
6556     if(dt->complex_comparison(dt) ){
6557                 ret.append(dt->get_hfta_comparison_fcn(dt));
6558                 ret.append("(");
6559                         if(dt->is_buffer_type() )
6560                                 ret.append("&");
6561                 ret.append(lhs_op);
6562                 ret.append(", ");
6563                         if(dt->is_buffer_type() )
6564                                 ret.append("&");
6565                 ret.append(rhs_op );
6566                 ret.append(") == 0");
6567         }else{
6568                 ret.append(lhs_op );
6569                 ret.append(" == ");
6570                 ret.append(rhs_op );
6571         }
6572
6573         return(ret);
6574 }
6575
6576 static string generate_comparison(string &lhs_op, string &rhs_op, data_type *dt){
6577         string ret;
6578
6579     if(dt->complex_comparison(dt) ){
6580                 ret.append(dt->get_hfta_comparison_fcn(dt));
6581                 ret.append("(");
6582                         if(dt->is_buffer_type() )
6583                                 ret.append("&");
6584                 ret.append(lhs_op);
6585                 ret.append(", ");
6586                         if(dt->is_buffer_type() )
6587                                 ret.append("&");
6588                 ret.append(rhs_op );
6589                 ret.append(") == 0");
6590         }else{
6591                 ret.append(lhs_op );
6592                 ret.append(" == ");
6593                 ret.append(rhs_op );
6594         }
6595
6596         return(ret);
6597 }
6598
6599
6600 //              Here I assume that only MIN and MAX aggregates can be computed
6601 //              over BUFFER data types.
6602
6603 static string generate_aggr_update(string var, aggregate_table *atbl,int aidx, table_list *schema){
6604         string retval = "\t\t";
6605         string op = atbl->get_op(aidx);
6606
6607 //              Is it a UDAF
6608         if(! atbl->is_builtin(aidx)) {
6609                 int o;
6610                 retval += op+"_HFTA_AGGR_UPDATE_(";
6611                 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6612                 retval+="("+var+")";
6613                 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
6614                 for(o=0;o<opl.size();++o){{
6615                         retval += ",";
6616                         if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
6617                                         retval.append("&");
6618                                 retval += generate_se_code(opl[o], schema);
6619                         }
6620                 }
6621                 retval += ");\n";
6622
6623                 return retval;
6624         }
6625
6626
6627 //                      builtin processing
6628         data_type *dt = atbl->get_data_type(aidx);
6629
6630         if(op == "COUNT"){
6631                 retval.append(var);
6632                 retval.append("++;\n");
6633                 return(retval);
6634         }
6635         if(op == "SUM"){
6636                 retval.append(var);
6637                 retval.append(" += ");
6638                 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6639                 retval.append(";\n");
6640                 return(retval);
6641         }
6642         if(op == "MIN"){
6643                 sprintf(tmpstr,"aggr_tmp_%d",aidx);
6644                 retval += dt->make_host_cvar(tmpstr);
6645                 retval += " = ";
6646                 retval += generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
6647                 if(dt->complex_comparison(dt)){
6648                         if(dt->is_buffer_type())
6649                           sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6650                         else
6651                           sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6652                 }else{
6653                         sprintf(tmpstr,"\t\tif(aggr_tmp_%d < %s)\n",aidx,var.c_str());
6654                 }
6655                 retval.append(tmpstr);
6656                 if(dt->is_buffer_type()){
6657                         sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
6658                 }else{
6659                         sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
6660                 }
6661                 retval.append(tmpstr);
6662
6663                 return(retval);
6664         }
6665         if(op == "MAX"){
6666                 sprintf(tmpstr,"aggr_tmp_%d",aidx);
6667                 retval+=dt->make_host_cvar(tmpstr);
6668                 retval+=" = ";
6669                 retval+=generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
6670                 if(dt->complex_comparison(dt)){
6671                         if(dt->is_buffer_type())
6672                          sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6673                         else
6674                          sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6675                 }else{
6676                         sprintf(tmpstr,"\t\tif(aggr_tmp_%d > %s)\n",aidx,var.c_str());
6677                 }
6678                 retval.append(tmpstr);
6679                 if(dt->is_buffer_type()){
6680                         sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
6681                 }else{
6682                         sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
6683                 }
6684                 retval.append(tmpstr);
6685
6686                 return(retval);
6687
6688         }
6689         if(op == "AND_AGGR"){
6690                 retval.append(var);
6691                 retval.append(" &= ");
6692                 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6693                 retval.append(";\n");
6694                 return(retval);
6695         }
6696         if(op == "OR_AGGR"){
6697                 retval.append(var);
6698                 retval.append(" |= ");
6699                 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6700                 retval.append(";\n");
6701                 return(retval);
6702         }
6703         if(op == "XOR_AGGR"){
6704                 retval.append(var);
6705                 retval.append(" ^= ");
6706                 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6707                 retval.append(";\n");
6708                 return(retval);
6709         }
6710         if(op=="AVG"){
6711                 retval += var+"_sum += "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
6712                 retval += "\t\t"+var+"_cnt += 1;\n";
6713                 retval += "\t\t"+var+" = "+var+"_sum / "+var+"_cnt;\n";
6714                 return retval;
6715         }
6716
6717         fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_update.\n",op.c_str());
6718         exit(1);
6719         return(retval);
6720
6721 }
6722
6723
6724 //              superaggr minus.
6725
6726 static string generate_superaggr_minus(string var, string supervar, aggregate_table *atbl,int aidx, table_list *schema){
6727         string retval = "\t\t";
6728         string op = atbl->get_op(aidx);
6729
6730 //              Is it a UDAF
6731         if(! atbl->is_builtin(aidx)) {
6732                 int o;
6733                 retval += op+"_HFTA_AGGR_MINUS_(";
6734                 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6735                 retval+="("+supervar+"),";
6736                 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6737                 retval+="("+var+");\n";
6738
6739                 return retval;
6740         }
6741
6742
6743         if(op == "COUNT" || op == "SUM"){
6744                 retval += supervar + "-=" +var + ";\n";
6745                 return(retval);
6746         }
6747
6748         if(op == "XOR_AGGR"){
6749                 retval += supervar + "^=" +var + ";\n";
6750                 return(retval);
6751         }
6752
6753         if(op=="MIN" || op == "MAX")
6754                 return "";
6755
6756         fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_superaggr_minus.\n",op.c_str());
6757         exit(1);
6758         return(retval);
6759
6760 }
6761
6762
6763
6764
6765 static string generate_aggr_init(string var, aggregate_table *atbl,int aidx, table_list *schema){
6766         string retval;
6767         string op = atbl->get_op(aidx);
6768
6769 //                      UDAF processing
6770         if(! atbl->is_builtin(aidx)){
6771 //                      initialize
6772                 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_INIT_(";
6773                 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6774                 retval+="("+var+"));\n";
6775 //                      Add 1st tupl
6776                 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_UPDATE_(";
6777                 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6778                 retval+="("+var+")";
6779                 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
6780                 int o;
6781                 for(o=0;o<opl.size();++o){
6782                         retval += ",";
6783                         if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
6784                                         retval.append("&");
6785                                 retval += generate_se_code(opl[o],schema);
6786                         }
6787                 retval += ");\n";
6788                 return(retval);
6789         }
6790
6791 //                      builtin aggregate processing
6792         data_type *dt = atbl->get_data_type(aidx);
6793
6794         if(op == "COUNT"){
6795                 retval = var;
6796                 retval.append(" = 1;\n");
6797                 return(retval);
6798         }
6799
6800         if(op == "SUM" || op == "MIN" || op == "MAX" || op == "AND_AGGR" ||
6801                                         op=="AVG" || op == "OR_AGGR" || op == "XOR_AGGR"){
6802                 if(dt->is_buffer_type()){
6803                         sprintf(tmpstr,"\t\taggr_tmp_%d = %s;\n",aidx,generate_se_code(atbl->get_aggr_se(aidx), schema ).c_str() );
6804                         retval.append(tmpstr);
6805                         sprintf(tmpstr,"\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_assign_copy().c_str(),var.c_str(),aidx);
6806                         retval.append(tmpstr);
6807                 }else{
6808                         if(op=="AVG"){
6809                                 retval += var+"_sum = "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
6810                                 retval += "\t"+var+"_cnt = 1;\n";
6811                                 retval += "\t"+var+" = "+var+"_sum;\n";
6812                         }else{
6813                                 retval = var;
6814                                 retval += " = ";
6815                                 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema));
6816                                 retval.append(";\n");
6817                         }
6818                 }
6819                 return(retval);
6820         }
6821
6822         fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_init.\n",op.c_str());
6823         exit(1);
6824         return(retval);
6825
6826 }
6827
6828
6829
6830 static string generate_aggr_reinitialize(string var, aggregate_table *atbl,int aidx, table_list *schema){
6831         string retval;
6832         string op = atbl->get_op(aidx);
6833
6834 //                      UDAF processing
6835         if(! atbl->is_builtin(aidx)){
6836 //                      initialize
6837                 retval +=  "\t"+atbl->get_op(aidx);
6838                 if(atbl->is_running_aggr(aidx)){
6839                         retval += "_HFTA_AGGR_REINIT_(";
6840                 }else{
6841                         retval += "_HFTA_AGGR_INIT_(";
6842                 }
6843                 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6844                 retval+="("+var+"));\n";
6845                 return(retval);
6846         }
6847
6848 //                      builtin aggregate processing
6849         data_type *dt = atbl->get_data_type(aidx);
6850
6851         if(op == "COUNT"){
6852                 retval = var;
6853                 retval.append(" = 0;\n");
6854                 return(retval);
6855         }
6856
6857         if(op == "SUM" ||  op == "AND_AGGR" ||
6858                                                                         op == "OR_AGGR" || op == "XOR_AGGR"){
6859                 if(dt->is_buffer_type()){
6860                         return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6861                 }else{
6862                         retval = var;
6863                         retval += " = ";
6864                         literal_t l(dt->type_indicator());
6865                         retval.append(l.to_string());
6866                         retval.append(";\n");
6867                 }
6868                 return(retval);
6869         }
6870
6871         if(op == "MIN"){
6872                 if(dt->is_buffer_type()){
6873                         return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6874                 }else{
6875                         retval = var;
6876                         retval += " = ";
6877                         retval.append(dt->get_max_literal());
6878                         retval.append(";\n");
6879                 }
6880                 return(retval);
6881         }
6882
6883         if(op == "MAX"){
6884                 if(dt->is_buffer_type()){
6885                         return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6886                 }else{
6887                         retval = var;
6888                         retval += " = ";
6889                         retval.append(dt->get_min_literal());
6890                         retval.append(";\n");
6891                 }
6892                 return(retval);
6893         }
6894
6895         fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_aggr_reinitialize.\n",op.c_str());
6896         exit(1);
6897         return(retval);
6898
6899 }
6900
6901
6902 //                      Generate parameter holding vars from a param table.
6903 static string generate_param_vars(param_table *param_tbl){
6904         string ret;
6905         int p;
6906         vector<string> param_vec = param_tbl->get_param_names();
6907         for(p=0;p<param_vec.size();p++){
6908                 data_type *dt = param_tbl->get_data_type(param_vec[p]);
6909                 sprintf(tmpstr,"param_%s;\n", param_vec[p].c_str());
6910                 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
6911                 if(param_tbl->handle_access(param_vec[p])){
6912                         ret += "\tstruct search_handle *param_handle_"+param_vec[p]+";\n";
6913                 }
6914         }
6915         return(ret);
6916 }
6917
6918 //                      Parameter manipulation routines
6919 static string generate_load_param_block(string functor_name,
6920                                                         param_table *param_tbl,
6921                                                         vector<handle_param_tbl_entry *> param_handle_table
6922                                                         ){
6923         int p;
6924         vector<string> param_names = param_tbl->get_param_names();
6925
6926         string ret = "int load_params_"+functor_name+"(gs_int32_t sz, void *value){\n";
6927     ret.append("\tint pos=0;\n");
6928     ret.append("\tint data_pos;\n");
6929
6930         for(p=0;p<param_names.size();p++){
6931                 data_type *dt = param_tbl->get_data_type(param_names[p]);
6932                 if(dt->is_buffer_type()){
6933                         sprintf(tmpstr,"tmp_var_%s;\n", param_names[p].c_str());
6934                         ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
6935                 }
6936         }
6937
6938
6939 //              Verify that the block is of minimum size
6940         if(param_names.size() > 0){
6941                 ret += "//\tVerify that the value block is large enough */\n";
6942                 ret.append("\n\tdata_pos = ");
6943                 for(p=0;p<param_names.size();p++){
6944                         if(p>0) ret.append(" + ");
6945                         data_type *dt = param_tbl->get_data_type(param_names[p]);
6946                         ret.append("sizeof( ");
6947                         ret.append( dt->get_host_cvar_type() );
6948                         ret.append(" )");
6949                 }
6950                 ret.append(";\n");
6951                 ret.append("\tif(data_pos > sz) return 1;\n\n");
6952         }
6953
6954 ///////////////////////
6955 ///             Verify that all strings can be unpacked.
6956
6957         ret += "//\tVerify that the strings can be unpacked */\n";
6958         for(p=0;p<param_names.size();p++){
6959                 data_type *dt = param_tbl->get_data_type(param_names[p]);
6960                 if(dt->is_buffer_type()){
6961                         sprintf(tmpstr,"\ttmp_var_%s =  *( (%s *)((gs_sp_t )value+pos) );\n",param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
6962                         ret.append(tmpstr);
6963                         switch( dt->get_type() ){
6964                         case v_str_t:
6965 //                              ret += "\ttmp_var_"+param_names[p]+".offset = ntohl( tmp_var_"+param_names[p]+".offset );\n";           // ntoh conversion
6966 //                              ret += "\ttmp_var_"+param_names[p]+".length = ntohl( tmp_var_"+param_names[p]+".length );\n";   // ntoh conversion
6967                                 sprintf(tmpstr,"\tif( (int)(tmp_var_%s.offset) + tmp_var_%s.length > sz) return 1;\n",param_names[p].c_str(), param_names[p].c_str() );
6968                                 ret.append(tmpstr);
6969                                 sprintf(tmpstr,"\ttmp_var_%s.offset = (gs_p_t)( (gs_sp_t )value + (gs_p_t)(tmp_var_%s.offset) );\n",param_names[p].c_str(), param_names[p].c_str() );
6970                                 ret.append(tmpstr);
6971                         break;
6972                         default:
6973                                 fprintf(stderr,"ERROR: parameter %s is of type %s, a buffered type, but I don't know how to unpack it as a parameter.\n",param_names[p].c_str(), dt->to_string().c_str() );
6974                                 exit(1);
6975                         break;
6976                         }
6977                 }
6978                 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
6979         }
6980
6981
6982 /////////////////////////
6983
6984         ret += "/*\tThe block is OK, do the unpacking.  */\n";
6985         ret += "\tpos = 0;\n";
6986
6987         for(p=0;p<param_names.size();p++){
6988                 data_type *dt = param_tbl->get_data_type(param_names[p]);
6989                 if(dt->is_buffer_type()){
6990             sprintf(tmpstr,"\t%s(&param_%s, &tmp_var_%s);\n", dt->get_hfta_buffer_assign_copy().c_str(),param_names[p].c_str(),param_names[p].c_str() );
6991             ret.append(tmpstr);
6992                 }else{
6993 //                      if(dt->needs_hn_translation()){
6994 //                              sprintf(tmpstr,"\tparam_%s =  %s( *( (%s *)( (gs_sp_t )value+pos) ) );\n",
6995 //                                param_names[p].c_str(), dt->ntoh_translation().c_str(), dt->get_host_cvar_type().c_str() );
6996 //                      }else{
6997                                 sprintf(tmpstr,"\tparam_%s =  *( (%s *)( (gs_sp_t )value+pos) );\n",
6998                                   param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
6999 //                      }
7000                         ret.append(tmpstr);
7001                 }
7002                 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7003         }
7004
7005 //                      TODO: I think this method of handle registration is obsolete
7006 //                      and should be deleted.
7007 //                         some examination reveals that handle_access is always false.
7008         for(p=0;p<param_names.size();p++){
7009                 if(param_tbl->handle_access(param_names[p]) ){
7010                         data_type *pdt = param_tbl->get_data_type(param_names[p]);
7011 //                                      create the new.
7012                         ret += "\tt->param_handle_"+param_names[p]+" = " +
7013                                 pdt->handle_registration_name() +
7014                                 "((struct FTA *)t, &(t->param_"+param_names[p]+"));\n";
7015                 }
7016         }
7017 //                      Register the pass-by-handle parameters
7018
7019         ret += "/* register the pass-by-handle parameters */\n";
7020
7021     int ph;
7022     for(ph=0;ph<param_handle_table.size();++ph){
7023                 data_type pdt(param_handle_table[ph]->type_name);
7024                 switch(param_handle_table[ph]->val_type){
7025                 case cplx_lit_e:
7026                         break;
7027                 case litval_e:
7028                         break;
7029                 case param_e:
7030                         sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7031                         ret += tmpstr;
7032                         if(pdt.is_buffer_type()) ret += "&(";
7033                         ret += "param_"+param_handle_table[ph]->param_name;
7034                         if(pdt.is_buffer_type()) ret += ")";
7035                     ret += ");\n";
7036                         break;
7037                 default:
7038                         fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7039                         exit(1);
7040                 }
7041         }
7042
7043
7044         ret += "\treturn(0);\n";
7045         ret.append("}\n\n");
7046
7047         return(ret);
7048
7049 }
7050
7051 static string generate_delete_param_block(string functor_name,
7052                                                 param_table *param_tbl,
7053                                                 vector<handle_param_tbl_entry *> param_handle_table
7054                                 ){
7055
7056         int p;
7057         vector<string> param_names = param_tbl->get_param_names();
7058
7059         string ret = "void destroy_params_"+functor_name+"(){\n";
7060
7061         for(p=0;p<param_names.size();p++){
7062                 data_type *dt = param_tbl->get_data_type(param_names[p]);
7063                 if(dt->is_buffer_type()){
7064                         sprintf(tmpstr,"\t\t%s(&param_%s);\n",dt->get_hfta_buffer_destroy().c_str(),param_names[p].c_str());
7065                         ret.append(tmpstr);
7066                 }
7067                 if(param_tbl->handle_access(param_names[p]) ){
7068                         ret += "\t\t" + dt->get_handle_destructor() +
7069                                 "(t->param_handle_" + param_names[p] + ");\n";
7070                 }
7071         }
7072
7073         ret += "//\t\tDeregister handles.\n";
7074     int ph;
7075     for(ph=0;ph<param_handle_table.size();++ph){
7076                 if(param_handle_table[ph]->val_type == param_e){
7077                   sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7078                         param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7079                   ret += tmpstr;
7080                 }
7081         }
7082
7083         ret += "}\n\n";
7084         return ret;
7085 }
7086
7087 // ---------------------------------------------------------------------
7088 //              functions for creating functor variables.
7089
7090 static string generate_access_vars(col_id_set &cid_set, table_list *schema){
7091         string ret;
7092         col_id_set::iterator csi;
7093
7094         for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7095         int schref = (*csi).schema_ref;
7096                 int tblref = (*csi).tblvar_ref;
7097                 string field = (*csi).field;
7098                 data_type dt(schema->get_type_name(schref,field));
7099                 sprintf(tmpstr,"unpack_var_%s_%d", field.c_str(), tblref);
7100                 ret+="\t"+dt.make_host_cvar(tmpstr)+";\n";
7101                 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", field.c_str(), tblref);
7102                 ret.append(tmpstr);
7103         }
7104         return(ret);
7105 }
7106
7107 static string generate_partial_fcn_vars(vector<scalarexp_t *> &partial_fcns,
7108         vector<int> &ref_cnt, vector<bool> &is_partial, bool gen_fcn_cache){
7109         string ret;
7110         int p;
7111
7112
7113         for(p=0;p<partial_fcns.size();++p){
7114                 if(!gen_fcn_cache || is_partial[p] ||  ref_cnt[p]>1){
7115                         sprintf(tmpstr,"partial_fcn_result_%d", p);
7116                         ret+="\t"+partial_fcns[p]->get_data_type()->make_host_cvar(tmpstr)+";\n";
7117                         if(gen_fcn_cache && ref_cnt[p]>1){
7118                                 ret+="\tint fcn_ref_cnt_"+int_to_string(p)+";\n";
7119                         }
7120                 }
7121         }
7122         return(ret);
7123 }
7124
7125
7126 static string generate_complex_lit_vars(cplx_lit_table *complex_literals){
7127         string ret;
7128     int cl;
7129     for(cl=0;cl<complex_literals->size();cl++){
7130         literal_t *l = complex_literals->get_literal(cl);
7131         data_type *dtl = new data_type( l->get_type() );
7132         sprintf(tmpstr,"complex_literal_%d",cl);
7133                 ret += "\t"+dtl->make_host_cvar(tmpstr)+";\n";
7134         if(complex_literals->is_handle_ref(cl)){
7135             sprintf(tmpstr,"\tstruct search_handle *lit_handle_%d;\n",cl);
7136             ret.append(tmpstr);
7137         }
7138     }
7139         return(ret);
7140 }
7141
7142
7143 static string generate_pass_by_handle_vars(
7144                                 vector<handle_param_tbl_entry *> &param_handle_table){
7145         string ret;
7146         int p;
7147
7148         for(p=0;p<param_handle_table.size();++p){
7149                 sprintf(tmpstr,"\tgs_param_handle_t handle_param_%d;\n",p);
7150                 ret += tmpstr;
7151         }
7152
7153         return(ret);
7154 }
7155
7156
7157 // ------------------------------------------------------------
7158 //              functions for generating initialization code.
7159
7160 static string gen_access_var_init(col_id_set &cid_set){
7161         string ret;
7162         col_id_set::iterator csi;
7163
7164     for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7165         int tblref = (*csi).tblvar_ref;
7166         string field = (*csi).field;
7167         sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle%d, \"%s\");\n", field.c_str(),tblref,tblref,field.c_str());
7168         ret.append(tmpstr);
7169     }
7170         return ret;
7171 }
7172
7173
7174 static string gen_complex_lit_init(cplx_lit_table *complex_literals){
7175         string ret;
7176
7177         int cl;
7178     for(cl=0;cl<complex_literals->size();cl++){
7179         literal_t *l = complex_literals->get_literal(cl);
7180 //        sprintf(tmpstr,"\tcomplex_literal_%d = ",cl);
7181 //        ret += tmpstr + l->to_hfta_C_code() + ";\n";
7182         sprintf(tmpstr,"&(complex_literal_%d)",cl);
7183         ret += "\t" + l->to_hfta_C_code(tmpstr) + ";\n";
7184 //                      I think that the code below is obsolete
7185 //                      TODO: it is obsolete.  add_cpx_lit is always
7186 //                      called with the handle indicator being false.
7187 //                      This entire structure should be cleansed.
7188         if(complex_literals->is_handle_ref(cl)){
7189             data_type *dt = new data_type( l->get_type() );
7190             sprintf(tmpstr,"\tlit_handle_%d = %s(&(f->complex_literal_%d));\n",
7191                 cl, dt->hfta_handle_registration_name().c_str(), cl);
7192             ret += tmpstr;
7193             delete dt;
7194        }
7195     }
7196         return(ret);
7197 }
7198
7199
7200 static string gen_partial_fcn_init(vector<scalarexp_t *> &partial_fcns){
7201         string ret;
7202
7203         int p;
7204         for(p=0;p<partial_fcns.size();++p){
7205                 data_type *pdt =partial_fcns[p]->get_data_type();
7206                 literal_t empty_lit(pdt->type_indicator());
7207                 if(pdt->is_buffer_type()){
7208 //                      sprintf(tmpstr,"\tpartial_fcn_result_%d = %s;\n",
7209 //                               p, empty_lit.to_hfta_C_code().c_str());
7210                         sprintf(tmpstr,"&(partial_fcn_result_%d)",p);
7211                         ret += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
7212                 }
7213         }
7214         return(ret);
7215 }
7216
7217 static string gen_pass_by_handle_init(
7218                                 vector<handle_param_tbl_entry *> &param_handle_table){
7219         string ret;
7220
7221     int ph;
7222     for(ph=0;ph<param_handle_table.size();++ph){
7223                 data_type pdt(param_handle_table[ph]->type_name);
7224                 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7225                 switch(param_handle_table[ph]->val_type){
7226                 case cplx_lit_e:
7227                         ret += tmpstr;
7228                         if(pdt.is_buffer_type()) ret += "&(";
7229                         sprintf(tmpstr,"complex_literal_%d",param_handle_table[ph]->complex_literal_idx);
7230                         ret += tmpstr;
7231                         if(pdt.is_buffer_type()) ret += ")";
7232                         ret += ");\n";
7233                         break;
7234                 case litval_e:
7235                         ret += tmpstr;
7236                         ret += param_handle_table[ph]->litval->to_hfta_C_code("") + ");\n";
7237 //                      ret += ");\n";
7238                         break;
7239                 case param_e:
7240 //                              query parameter handles are regstered/deregistered in the
7241 //                              load_params function.
7242 //                      ret += "t->param_"+param_handle_table[ph]->param_name;
7243                         break;
7244                 default:
7245                         fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7246                         exit(1);
7247                 }
7248         }
7249         return(ret);
7250 }
7251
7252 //------------------------------------------------------------
7253 //                      functions for destructor and deregistration code
7254
7255 static string gen_complex_lit_dtr(cplx_lit_table *complex_literals){
7256         string ret;
7257
7258         int cl;
7259     for(cl=0;cl<complex_literals->size();cl++){
7260         literal_t *l = complex_literals->get_literal(cl);
7261                 data_type ldt(  l->get_type() );
7262         if(ldt.is_buffer_type()){
7263                         sprintf(tmpstr,"\t\t%s(&complex_literal_%d);\n",
7264                           ldt.get_hfta_buffer_destroy().c_str(), cl );
7265             ret += tmpstr;
7266         }
7267     }
7268         return(ret);
7269 }
7270
7271
7272 static string gen_pass_by_handle_dtr(
7273                                 vector<handle_param_tbl_entry *> &param_handle_table){
7274         string ret;
7275
7276         int ph;
7277     for(ph=0;ph<param_handle_table.size();++ph){
7278                 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7279                         param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7280                 ret += tmpstr;
7281         }
7282         return(ret);
7283 }
7284
7285 //                      Destroy all previous results
7286 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns){
7287         string ret;
7288
7289         int p;
7290         for(p=0;p<partial_fcns.size();++p){
7291                 data_type *pdt =partial_fcns[p]->get_data_type();
7292                 if(pdt->is_buffer_type()){
7293                         sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7294                           pdt->get_hfta_buffer_destroy().c_str(), p );
7295                         ret += tmpstr;
7296                 }
7297         }
7298         return(ret);
7299 }
7300
7301 //              Destroy previsou results of fcns in pfcn_set
7302 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns, set<int> &pfcn_set){
7303         string ret;
7304         set<int>::iterator si;
7305
7306         for(si=pfcn_set.begin(); si!=pfcn_set.end(); ++si){
7307                 data_type *pdt =partial_fcns[(*si)]->get_data_type();
7308                 if(pdt->is_buffer_type()){
7309                         sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7310                           pdt->get_hfta_buffer_destroy().c_str(), (*si) );
7311                         ret += tmpstr;
7312                 }
7313         }
7314         return(ret);
7315 }
7316
7317
7318 //-------------------------------------------------------------------------
7319 //                      Functions related to se generation bookkeeping.
7320
7321 static void get_new_pred_cids(predicate_t *pr, col_id_set &found_cids,
7322                                                                 col_id_set &new_cids, gb_table *gtbl){
7323         col_id_set this_pred_cids;
7324         col_id_set::iterator csi;
7325
7326 //                              get colrefs in predicate not already found.
7327         gather_pr_col_ids(pr,this_pred_cids,gtbl);
7328         set_difference(this_pred_cids.begin(), this_pred_cids.end(),
7329                                            found_cids.begin(), found_cids.end(),
7330                                                 inserter(new_cids,new_cids.begin()) );
7331
7332 //                              We've found these cids, so update found_cids
7333         for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7334                 found_cids.insert((*csi));
7335
7336 }
7337
7338 //              after the call, new_cids will have the colrefs in se but not found_cids.
7339 //              update found_cids with the new cids.
7340 static void get_new_se_cids(scalarexp_t *se, col_id_set &found_cids,
7341                                                                 col_id_set &new_cids, gb_table *gtbl){
7342         col_id_set this_se_cids;
7343         col_id_set::iterator csi;
7344
7345 //                              get colrefs in se not already found.
7346         gather_se_col_ids(se,this_se_cids,gtbl);
7347         set_difference(this_se_cids.begin(), this_se_cids.end(),
7348                                            found_cids.begin(), found_cids.end(),
7349                                                 inserter(new_cids,new_cids.begin()) );
7350
7351 //                              We've found these cids, so update found_cids
7352         for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7353                 found_cids.insert((*csi));
7354
7355 }
7356
7357 static string gen_unpack_cids(table_list *schema, col_id_set &new_cids, string on_problem, vector<bool> &needs_xform){
7358         string ret;
7359         col_id_set::iterator csi;
7360
7361         for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7362         int schref = (*csi).schema_ref;
7363             int tblref = (*csi).tblvar_ref;
7364         string field = (*csi).field;
7365                 data_type dt(schema->get_type_name(schref,field));
7366                 string unpack_fcn;
7367                 if(needs_xform[tblref]){
7368                         unpack_fcn = dt.get_hfta_unpack_fcn();
7369                 }else{
7370                         unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7371                 }
7372                 if(dt.is_buffer_type()){
7373                         sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7374                 }else{
7375                         sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data,  unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref,  field.c_str(), tblref);
7376                 }
7377                 ret += tmpstr;
7378                 if(dt.is_buffer_type()){
7379                         ret += "\tif(problem) return "+on_problem+" ;\n";
7380                 }
7381         }
7382         return(ret);
7383 }
7384
7385 // generates the declaration of all the variables related to
7386 // temp tuples generation
7387 static string gen_decl_temp_vars(){
7388         string ret;
7389
7390         ret += "\t// variables related to temp tuple generation\n";
7391         ret += "\tbool temp_tuple_received;\n";
7392
7393         return(ret);
7394 }
7395
7396 // generates initialization code for variables related to temp tuple processing
7397 static string gen_init_temp_vars(table_list *schema, vector<select_element *>& select_list, gb_table *gtbl){
7398         string ret;
7399         col_id_set::iterator csi;
7400         int s;
7401
7402 //              Initialize internal state
7403         ret += "\ttemp_tuple_received = false;\n";
7404
7405         col_id_set temp_cids;   // colrefs unpacked thus far.
7406
7407         for(s=0;s<select_list.size();s++){
7408                 if (select_list[s]->se->get_data_type()->is_temporal()) {
7409 //                      Find the set of attributes accessed in this SE
7410                         col_id_set new_cids;
7411                         get_new_se_cids(select_list[s]->se,temp_cids, new_cids, gtbl);
7412
7413                         // init these vars
7414                         for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7415                                 int schref = (*csi).schema_ref;
7416                                 int tblref = (*csi).tblvar_ref;
7417                                 string field = (*csi).field;
7418                                 data_type dt(schema->get_type_name(schref,field), schema->get_modifier_list(schref,field));
7419
7420                                 sprintf(tmpstr,"\t unpack_var_%s_%d = %s;\n", field.c_str(), tblref,
7421                                         dt.is_increasing() ? dt.get_min_literal().c_str() : dt.get_max_literal().c_str());
7422                                 ret += tmpstr;
7423                         }
7424                 }
7425         }
7426         return(ret);
7427 }
7428
7429
7430
7431 // generates a check if tuple is temporal
7432 static string gen_temp_tuple_check(string node_name, int channel) {
7433         string ret;
7434
7435         char tmpstr[256];
7436         sprintf(tmpstr, "tup%d", channel);
7437         string tup_name = tmpstr;
7438         sprintf(tmpstr, "schema_handle%d", channel);
7439         string schema_handle_name = tmpstr;
7440         string tuple_offset_name = "tuple_metadata_offset"+int_to_string(channel);
7441
7442 //                      check if it is a temporary status tuple
7443         ret += "\t// check if tuple is temp status tuple\n";
7444 //              ret += "\tif (ftaschema_is_temporal_tuple(" + schema_handle_name + ", " + tup_name + ".data)) {\n";
7445         ret += "\tif (ftaschema_is_temporal_tuple_offset(" + tuple_offset_name + ", " + tup_name + ".data)) {\n";
7446         ret += "\t\ttemp_tuple_received = true;\n";
7447         ret += "\t}\n";
7448         ret += "\telse\n\t\ttemp_tuple_received = false;\n\n";
7449
7450         return(ret);
7451 }
7452
7453 // generates unpacking code for all temporal attributes referenced in select
7454 static string gen_unpack_temp_vars(table_list *schema, col_id_set& found_cids, vector<select_element *>& select_list, gb_table *gtbl, vector<bool> &needs_xform) {
7455         string ret;
7456         int s;
7457
7458 //              Unpack all the temporal attributes references in select list
7459 //              we need it to be able to generate temp status tuples
7460         for(s=0;s<select_list.size();s++){
7461                 if (select_list[s]->se->get_data_type()->is_temporal()) {
7462 //                      Find the set of attributes accessed in this SE
7463                         col_id_set new_cids;
7464                         get_new_se_cids(select_list[s]->se,found_cids, new_cids, gtbl);
7465 //                      Unpack these values.
7466                         ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
7467                 }
7468         }
7469
7470         return(ret);
7471 }
7472
7473
7474 //              Generates temporal tuple generation code (except attribute packing)
7475 static string gen_init_temp_status_tuple(string node_name) {
7476         string ret;
7477
7478         ret += "\t// create temp status tuple\n";
7479         ret += "\tresult.tuple_size = sizeof("+generate_tuple_name( node_name)+") + sizeof(gs_uint8_t);\n";
7480         ret += "\tresult.data = (gs_sp_t )malloc(result.tuple_size);\n";
7481         ret += "\tresult.heap_resident = true;\n";
7482         ret += "\t//            Mark tuple as temporal\n";
7483         ret += "\t*((gs_sp_t )result.data + sizeof("+generate_tuple_name( node_name)+")) = TEMPORAL_TUPLE;\n";
7484
7485         ret += "\t"+generate_tuple_name( node_name)+" *tuple = ("+
7486                 generate_tuple_name( node_name) +" *)(result.data);\n";
7487
7488         return(ret);
7489 }
7490
7491
7492 //              Assume that all colrefs unpacked already ...
7493 static string gen_unpack_partial_fcn(table_list *schema,
7494                                         vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7495                                         string on_problem){
7496         string ret;
7497         set<int>::iterator si;
7498
7499 //                      Since set<..> is a "Sorted Associative Container",
7500 //                      we can walk through it in sorted order by walking from
7501 //                      begin() to end().  (and the partial fcns must be
7502 //                      evaluated in this order).
7503         for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7504                 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7505                 ret += "\tif(retval) return "+on_problem+" ;\n";
7506         }
7507         return(ret);
7508 }
7509
7510 //              Assume that all colrefs unpacked already ...
7511 //              this time with cached functions.
7512 static string gen_unpack_partial_fcn(table_list *schema,
7513                                         vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7514                                         vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
7515                                         string on_problem){
7516         string ret;
7517         set<int>::iterator si;
7518
7519 //                      Since set<..> is a "Sorted Associative Container",
7520 //                      we can walk through it in sorted order by walking from
7521 //                      begin() to end().  (and the partial fcns must be
7522 //                      evaluated in this order).
7523         for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7524                 if(fcn_ref_cnt[(*si)] > 1){
7525                         ret += "\tif(fcn_ref_cnt_"+int_to_string((*si))+"==0){\n";
7526                 }
7527                 if(is_partial_fcn[(*si)]){
7528                         ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7529                         ret += "\tif(retval) return "+on_problem+" ;\n";
7530                 }
7531                 if(fcn_ref_cnt[(*si)] > 1){
7532                         if(!is_partial_fcn[(*si)]){
7533                                 ret += "\t\tpartial_fcn_result_"+int_to_string((*si))+"="+generate_cached_fcn(partial_fcns[(*si)],(*si),schema)+";\n";
7534                         }
7535                         ret += "\t\tfcn_ref_cnt_"+int_to_string((*si))+"=1;\n";
7536                         ret += "\t}\n";
7537                 }
7538         }
7539
7540         return(ret);
7541 }
7542
7543
7544 //              This version finds and unpacks new colrefs.
7545 //              found_cids gets updated with the newly unpacked cids.
7546 static string gen_full_unpack_partial_fcn(table_list *schema,
7547                                         vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7548                                         col_id_set &found_cids, gb_table *gtbl, string on_problem,
7549                                         vector<bool> &needs_xform){
7550         string ret;
7551         set<int>::iterator slsi;
7552
7553         for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7554 //                      find all new fields ref'd by this partial fcn.
7555                 col_id_set new_cids;
7556                 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
7557 //                      Unpack these values.
7558                 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
7559
7560 //                      Now evaluate the partial fcn.
7561                 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
7562                 ret += "\tif(retval) return "+on_problem+" ;\n";
7563         }
7564         return(ret);
7565 }
7566
7567 //              This version finds and unpacks new colrefs.
7568 //              found_cids gets updated with the newly unpacked cids.
7569 //                      BUT : only for the partial functions.
7570 static string gen_full_unpack_partial_fcn(table_list *schema,
7571                                         vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7572                                         vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
7573                                         col_id_set &found_cids, gb_table *gtbl, string on_problem,
7574                                         vector<bool> &needs_xform){
7575         string ret;
7576         set<int>::iterator slsi;
7577
7578         for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7579           if(is_partial_fcn[(*slsi)]){
7580 //                      find all new fields ref'd by this partial fcn.
7581                 col_id_set new_cids;
7582                 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
7583 //                      Unpack these values.
7584                 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
7585
7586 //                      Now evaluate the partial fcn.
7587                 if(fcn_ref_cnt[(*slsi)] > 1){
7588                         ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
7589                 }
7590                 if(is_partial_fcn[(*slsi)]){
7591                         ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
7592                         ret += "\tif(retval) return "+on_problem+" ;\n";
7593                 }
7594                 if(fcn_ref_cnt[(*slsi)] > 1){
7595                         ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
7596                         ret += "\t}\n";
7597                 }
7598
7599           }
7600         }
7601         return(ret);
7602 }
7603
7604 static string gen_remaining_cached_fcns(table_list *schema,
7605                                         vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7606                                         vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn){
7607         string ret;
7608         set<int>::iterator slsi;
7609
7610         for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7611           if(!is_partial_fcn[(*slsi)] && fcn_ref_cnt[(*slsi)] > 1){
7612
7613                 if(fcn_ref_cnt[(*slsi)] > 1){
7614                         ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
7615                         ret += "\t\tpartial_fcn_result_"+int_to_string((*slsi))+"="+generate_cached_fcn(partial_fcns[(*slsi)],(*slsi),schema)+";\n";
7616                         ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
7617                         ret += "\t}\n";
7618                 }
7619           }
7620         }
7621         return(ret);
7622 }
7623
7624
7625 //              unpack the colrefs in cid_set not in found_cids
7626 static string gen_remaining_colrefs(table_list *schema,
7627                         col_id_set &cid_set, col_id_set &found_cids, string on_problem,
7628                         vector<bool> &needs_xform){
7629         string ret;
7630         col_id_set::iterator csi;
7631
7632         for(csi=cid_set.begin(); csi!=cid_set.end();csi++){
7633                 if(found_cids.count( (*csi) ) == 0){
7634                 int schref = (*csi).schema_ref;
7635                     int tblref = (*csi).tblvar_ref;
7636                 string field = (*csi).field;
7637                         data_type dt(schema->get_type_name(schref,field));
7638                         string unpack_fcn;
7639                         if(needs_xform[tblref]){
7640                                 unpack_fcn = dt.get_hfta_unpack_fcn();
7641                         }else{
7642                                 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7643                         }
7644                         if(dt.is_buffer_type()){
7645                                 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7646                         }else{
7647                                 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7648                         }
7649                         ret += tmpstr;
7650                         if(dt.is_buffer_type()){
7651                                 ret.append("\tif(problem) return "+on_problem+" ;\n");
7652                         }
7653                 }
7654         }
7655         return(ret);
7656 }
7657
7658 static string gen_buffer_selvars(table_list *schema,
7659                                                                 vector<select_element *> &select_list){
7660         string ret;
7661         int s;
7662
7663     for(s=0;s<select_list.size();s++){
7664                 scalarexp_t *se = select_list[s]->se;
7665         data_type *sdt = se->get_data_type();
7666         if(sdt->is_buffer_type() &&
7667                         !( (se->get_operator_type() == SE_COLREF) ||
7668                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7669                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7670                 ){
7671             sprintf(tmpstr,"selvar_%d",s);
7672                         ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
7673                         ret += generate_se_code(se,schema) +";\n";
7674         }
7675     }
7676         return(ret);
7677 }
7678
7679 static string gen_buffer_selvars_size(vector<select_element *> &select_list,table_list *schema){
7680         string ret;
7681         int s;
7682
7683     for(s=0;s<select_list.size();s++){
7684                 scalarexp_t *se = select_list[s]->se;
7685         data_type *sdt = se->get_data_type();
7686         if(sdt->is_buffer_type()){
7687                   if( !( (se->get_operator_type() == SE_COLREF) ||
7688                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7689                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7690                   ){
7691             sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
7692             ret.append(tmpstr);
7693                   }else{
7694             sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),
7695                                 generate_se_code(se,schema).c_str());
7696             ret.append(tmpstr);
7697                   }
7698         }
7699     }
7700         return(ret);
7701 }
7702
7703 static string gen_buffer_selvars_dtr(vector<select_element *> &select_list){
7704         string ret;
7705         int s;
7706
7707     for(s=0;s<select_list.size();s++){
7708                 scalarexp_t *se = select_list[s]->se;
7709         data_type *sdt = se->get_data_type();
7710         if(sdt->is_buffer_type() &&
7711                         !( (se->get_operator_type() == SE_COLREF) ||
7712                                 (se->get_operator_type() == SE_AGGR_STAR) ||
7713                                 (se->get_operator_type() == SE_AGGR_SE) ||
7714                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7715                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7716                         ){
7717                                 sprintf(tmpstr,"\t\t%s(&selvar_%d);\n",
7718                                   sdt->get_hfta_buffer_destroy().c_str(), s );
7719                 ret += tmpstr;
7720         }
7721     }
7722         return(ret);
7723 }
7724
7725
7726 static string gen_pack_tuple(table_list *schema, vector<select_element *> &select_list, string node_name, bool temporal_only){
7727         string ret;
7728         int s;
7729
7730         ret += "\tint tuple_pos = sizeof("+generate_tuple_name(node_name)+") + sizeof(gs_uint8_t);\n";
7731     for(s=0;s<select_list.size();s++){
7732                 scalarexp_t *se  = select_list[s]->se;
7733         data_type *sdt = se->get_data_type();
7734
7735         if(!temporal_only && sdt->is_buffer_type()){
7736                   if( !( (se->get_operator_type() == SE_COLREF) ||
7737                            (se->get_operator_type() == SE_FUNC && se->is_partial()))
7738                         ){
7739                 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
7740                 ret.append(tmpstr);
7741                 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
7742                 ret.append(tmpstr);
7743                         }else{
7744                 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code(se,schema).c_str());
7745                 ret.append(tmpstr);
7746                 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code(se,schema).c_str());
7747                 ret.append(tmpstr);
7748                         }
7749         }else if (!temporal_only || sdt->is_temporal()) {
7750             sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
7751             ret.append(tmpstr);
7752             ret.append(generate_se_code(se,schema) );
7753             ret.append(";\n");
7754         }
7755     }
7756         return(ret);
7757 }
7758
7759
7760 //-------------------------------------------------------------------------
7761 //                      functor generation methods
7762 //-------------------------------------------------------------------------
7763
7764 /////////////////////////////////////////////////////////
7765 ////                    File Output Operator
7766 string output_file_qpn::generate_functor_name(){
7767         return("output_file_functor_" + normalize_name(get_node_name()));
7768 }
7769
7770
7771 string output_file_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
7772         string ret = "class " + this->generate_functor_name() + "{\n";
7773
7774 //              Find the temporal field
7775         int temporal_field_idx;
7776         data_type *tdt = NULL;
7777         for(temporal_field_idx=0;temporal_field_idx<fields.size();temporal_field_idx++){
7778                 tdt = new data_type(fields[temporal_field_idx]->get_type(), fields[temporal_field_idx]->get_modifier_list());
7779                 if(tdt->is_temporal()){
7780                         break;
7781                 }else{
7782                         delete tdt;
7783                 }
7784         }
7785
7786         if(temporal_field_idx == fields.size()){
7787                 fprintf(stderr,"ERROR, no temporal field for file output operator %s\n",node_name.c_str());
7788                 exit(1);
7789         }
7790
7791         ret += "private:\n";
7792
7793         // var to save the schema handle
7794         ret += "\tint schema_handle0;\n";
7795 //                      tuple metadata offset
7796         ret += "\tint tuple_metadata_offset0;\n";
7797         sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n",  fields[temporal_field_idx]->get_name().c_str());
7798         ret.append(tmpstr);
7799
7800 //              For unpacking the hashing fields, if any
7801         int h;
7802         for(h=0;h<hash_flds.size();++h){
7803                 sprintf(tmpstr,"unpack_var_%s", fields[hash_flds[h]]->get_name().c_str());
7804                 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7805                 ret+="\t"+hdt->make_host_cvar(tmpstr)+";\n";
7806                 if(hash_flds[h]!=temporal_field_idx){
7807                         sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n",  fields[hash_flds[h]]->get_name().c_str());
7808                         ret.append(tmpstr);
7809                 }
7810         }
7811 //              Specail case for output file hashing
7812         if(n_streams>1 && hash_flds.size()==0){
7813                 ret+="\tgs_uint32_t outfl_cnt;\n";
7814         }
7815
7816         ret += "//\t\tRemember the last posted timestamp.\n";
7817         ret+="\t"+tdt->make_host_cvar("timestamp")+";\n";
7818         ret+="\t"+tdt->make_host_cvar("last_bucket")+";\n";
7819         ret+="\t"+tdt->make_host_cvar("slack")+";\n";
7820         ret += "\tbool first_execution;\n";
7821         ret += "\tbool temp_tuple_received;\n";
7822         ret += "\tbool is_eof;\n";
7823
7824         ret += "\tgs_int32_t bucketwidth;\n";
7825
7826         ret += "public:\n";
7827 //-------------------
7828 //                      The functor constructor
7829 //                      pass in a schema handle (e.g. for the 1st input stream),
7830 //                      use it to determine how to unpack the merge variable.
7831 //                      ASSUME that both streams have the same layout,
7832 //                      just duplicate it.
7833
7834 //              unpack vars
7835         ret += "//\t\tFunctor constructor.\n";
7836         ret +=  this->generate_functor_name()+"(int schema_hndl){\n";
7837
7838         ret += "\tschema_handle0 = schema_hndl;\n";
7839 //              tuple metadata offset
7840         ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
7841
7842         if(output_spec->bucketwidth == 0)
7843                 ret += "\tbucketwidth = 60;\n";
7844         else
7845                 ret += "\tbucketwidth = "+int_to_string(output_spec->bucketwidth)+";\n";
7846         ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
7847
7848    sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[temporal_field_idx]->get_name().c_str(), fields[temporal_field_idx]->get_name().c_str());
7849    ret.append(tmpstr);
7850 //              Hashing field unpacking, if any
7851         for(h=0;h<hash_flds.size();++h){
7852                 if(hash_flds[h]!=temporal_field_idx){
7853                         sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n",  fields[hash_flds[h]]->get_name().c_str(),fields[hash_flds[h]]->get_name().c_str());
7854                         ret.append(tmpstr);
7855                 }
7856         }
7857
7858         ret+="\tfirst_execution = true;\n";
7859
7860 //              Initialize internal state
7861         ret += "\ttemp_tuple_received = false;\n";
7862
7863         //              Init last timestamp values to minimum value for their type
7864         if (tdt->is_increasing()){
7865                 ret+="\ttimestamp = " + tdt->get_min_literal() + ";\n";
7866                 ret+="\tlast_bucket = " + tdt->get_min_literal() + ";\n";
7867         }else{
7868                 ret+="\ttimestamp = " + tdt->get_max_literal() + ";\n";
7869                 ret+="\tlast_bucket = " + tdt->get_max_literal() + ";\n";
7870         }
7871
7872
7873         ret += "};\n\n";
7874
7875         ret += "//\t\tFunctor destructor.\n";
7876         ret +=  "~"+this->generate_functor_name()+"(){\n";
7877         ret+="};\n\n";
7878
7879
7880         ret += "int load_params_"+this->generate_functor_name()+"(gs_int32_t sz, void *value){return 0;}\n";
7881         ret += "void destroy_params_"+this->generate_functor_name()+"(){}\n";
7882
7883 //                      Register new parameter block
7884         ret += "int set_param_block(gs_int32_t sz, void* value){\n";
7885           ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
7886           ret += "\treturn this->load_params_"+this->generate_functor_name()+
7887                                 "(sz, value);\n";
7888         ret += "};\n\n";
7889
7890         ret+="\nbool temp_status_received(const host_tuple& tup0)/* const*/   {\n";
7891         ret+="\tgs_int32_t problem;\n";
7892
7893         ret += "\tvoid *tup_ptr = (void *)(&tup0);\n";
7894         ret += "\tis_eof = ftaschema_is_eof_tuple(schema_handle0,tup_ptr);\n";
7895
7896         ret += gen_temp_tuple_check(this->node_name, 0);
7897
7898         sprintf(tmpstr,"\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n",  tdt->get_hfta_unpack_fcn_noxf().c_str(), fields[temporal_field_idx]->get_name().c_str(), 0);
7899         ret += tmpstr;
7900
7901         for(h=0;h<hash_flds.size();++h){
7902                 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7903                 sprintf(tmpstr,"\tunpack_var_%s = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", fields[hash_flds[h]]->get_name().c_str(), hdt->get_hfta_unpack_fcn_noxf().c_str(), fields[hash_flds[h]]->get_name().c_str(), 0);
7904         ret += tmpstr;
7905         }
7906         ret +=
7907 "       return temp_tuple_received;\n"
7908 "}\n"
7909 "\n"
7910 ;
7911
7912         ret +=
7913 "bool new_epoch(){\n"
7914 "       if(first_execution || (last_bucket + 1) * bucketwidth <= timestamp){\n"
7915 "               last_bucket = timestamp / bucketwidth;\n"
7916 "               first_execution = false;\n"
7917 "               return true;\n"
7918 "       }\n"
7919 "       return false;\n"
7920 "}\n"
7921 "\n"
7922 ;
7923
7924         if(n_streams <= 1){
7925                 ret+=
7926 "inline gs_uint32_t output_hash(){return 0;}\n\n";
7927         }else{
7928                 if(hash_flds.size()==0){
7929                         ret +=
7930 "gs_uint32_t output_hash(){\n"
7931 "       outfl_cnt++;\n"
7932 "       if(outfl_cnt >= "+int_to_string(n_streams)+")\n"
7933 "               outfl_cnt = 0;\n"
7934 "       return outfl_cnt;\n"
7935 "}\n"
7936 "\n"
7937 ;
7938                 }else{
7939                         ret +=
7940 "gs_uint32_t output_hash(){\n"
7941 "       gs_uint32_t ret = "
7942 ;
7943                         for(h=0;h<hash_flds.size();++h){
7944                                 if(h>0) ret += "^";
7945                                 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7946                                 if(hdt->use_hashfunc()){
7947                                         sprintf(tmpstr,"%s(&(unpack_var_%s))",hdt->get_hfta_hashfunc().c_str(),fields[hash_flds[h]]->get_name().c_str());
7948                                 }else{
7949                                         sprintf(tmpstr,"unpack_var_%s",fields[hash_flds[h]]->get_name().c_str());
7950                                 }
7951                                 ret += tmpstr;
7952                         }
7953                         ret +=
7954 ";\n"
7955 "       return  ret % "+int_to_string(hash_flds.size())+";\n"
7956 "}\n\n"
7957 ;
7958                 }
7959         }
7960
7961 ret +=
7962 "gs_uint32_t num_file_streams(){\n"
7963 "       return("+int_to_string(n_streams)+");\n"
7964 "}\n\n"
7965 ;
7966
7967         ret +=
7968 "string get_filename_base(){\n"
7969 "       char tmp_fname[500];\n";
7970
7971         string output_filename_base = hfta_query_name+filestream_id;
7972 /*
7973         if(n_hfta_clones > 1){
7974                 output_filename_base += "_"+int_to_string(parallel_idx);
7975         }
7976 */
7977
7978
7979
7980         if(output_spec->output_directory == "")
7981                 ret +=
7982 "       sprintf(tmp_fname,\""+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
7983                 else ret +=
7984 "       sprintf(tmp_fname,\""+output_spec->output_directory+"/"+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
7985 ret +=
7986 "       return (string)(tmp_fname);\n"
7987 "}\n"
7988 "\n";
7989
7990
7991 ret+=
7992 "bool do_compression(){\n";
7993         if(do_gzip)
7994                 ret += "        return true;\n";
7995         else
7996                 ret += "        return false;\n";
7997 ret+=
7998 "}\n"
7999 "\n"
8000 "bool is_eof_tuple(){\n"
8001 "       return is_eof;\n"
8002 "}\n"
8003 "\n"
8004 "bool propagate_tuple(){\n"
8005 ;
8006 if(eat_input)
8007         ret+="\treturn false;\n";
8008 else
8009         ret+="\treturn true;\n";
8010 ret+="}\n\n";
8011 //              create a temp status tuple
8012         ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8013
8014         ret += gen_init_temp_status_tuple(this->hfta_query_name);
8015
8016         sprintf(tmpstr,"\ttuple->tuple_var%d = timestamp;\n",temporal_field_idx);
8017
8018
8019         ret += tmpstr;
8020
8021         ret += "\treturn 0;\n";
8022         ret += "}\n\n";
8023         ret += "};\n\n";
8024
8025         return ret;
8026 }
8027
8028
8029 string output_file_qpn::generate_operator(int i, string params){
8030         string optype = "file_output_operator";
8031         switch(compression_type){
8032         case regular:
8033                 optype = "file_output_operator";
8034         break;
8035         case gzip:
8036                 optype = "zfile_output_operator";
8037         break;
8038         case bzip:
8039                 optype = "bfile_output_operator";
8040         break;
8041         }
8042
8043                 return("        "+optype+"<" +
8044                 generate_functor_name() +
8045                 "> *op"+int_to_string(i)+" = new "+optype+"<"+
8046                 generate_functor_name() +">("+params+", \"" + hfta_query_name + "\""
8047                 + "," + hfta_query_name + "_schema_definition);\n");
8048 }
8049
8050 /////////////////////////////////////////////////////////
8051 //////                  SPX functor
8052
8053
8054 string spx_qpn::generate_functor_name(){
8055         return("spx_functor_" + normalize_name(normalize_name(this->get_node_name())));
8056 }
8057
8058 string spx_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8059 //                      Initialize generate utility globals
8060         segen_gb_tbl = NULL;
8061
8062         string ret = "class " + this->generate_functor_name() + "{\n";
8063
8064 //                      Find variables referenced in this query node.
8065
8066   col_id_set cid_set;
8067   col_id_set::iterator csi;
8068
8069         int w, s, p;
8070     for(w=0;w<where.size();++w)
8071         gather_pr_col_ids(where[w]->pr,cid_set,NULL);
8072     for(s=0;s<select_list.size();s++){
8073         gather_se_col_ids(select_list[s]->se,cid_set,NULL);
8074     }
8075
8076
8077 //                      Private variables : store the state of the functor.
8078 //                      1) variables for unpacked attributes
8079 //                      2) offsets of the upacked attributes
8080 //                      3) storage of partial functions
8081 //                      4) storage of complex literals (i.e., require a constructor)
8082
8083         ret += "private:\n";
8084         ret += "\tbool first_execution;\t// internal processing state \n";
8085         ret += "\tint schema_handle0;\n";
8086
8087         // generate the declaration of all the variables related to
8088         // temp tuples generation
8089         ret += gen_decl_temp_vars();
8090
8091
8092 //                      unpacked attribute storage, offsets
8093         ret += "//\t\tstorage and offsets of accessed fields.\n";
8094         ret += generate_access_vars(cid_set,schema);
8095 //                      tuple metadata management
8096         ret += "\tint tuple_metadata_offset0;\n";
8097
8098 //                      Variables to store results of partial functions.
8099 //                      WARNING find_partial_functions modifies the SE
8100 //                      (it marks the partial function id).
8101         ret += "//\t\tParital function result storage\n";
8102         vector<scalarexp_t *> partial_fcns;
8103         vector<int> fcn_ref_cnt;
8104         vector<bool> is_partial_fcn;
8105         for(s=0;s<select_list.size();s++){
8106                 find_partial_fcns(select_list[s]->se, &partial_fcns,&fcn_ref_cnt,&is_partial_fcn, Ext_fcns);
8107         }
8108         for(w=0;w<where.size();w++){
8109                 find_partial_fcns_pr(where[w]->pr, &partial_fcns, &fcn_ref_cnt,&is_partial_fcn,Ext_fcns);
8110         }
8111 //              Unmark non-partial expensive functions referenced only once.
8112         for(p=0; p<partial_fcns.size();p++){
8113                 if(!is_partial_fcn[p] && fcn_ref_cnt[p] <= 1){
8114                         partial_fcns[p]->set_partial_ref(-1);
8115                 }
8116         }
8117         if(partial_fcns.size()>0){
8118           ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,true);
8119         }
8120
8121 //                      Complex literals (i.e., they need constructors)
8122         ret += "//\t\tComplex literal storage.\n";
8123         cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8124         ret += generate_complex_lit_vars(complex_literals);
8125
8126 //                      Pass-by-handle parameters
8127         ret += "//\t\tPass-by-handle storage.\n";
8128         vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8129         ret += generate_pass_by_handle_vars(param_handle_table);
8130
8131 //                      Variables to hold parameters
8132         ret += "//\tfor query parameters\n";
8133         ret += generate_param_vars(param_tbl);
8134
8135
8136 //                      The publicly exposed functions
8137
8138         ret += "\npublic:\n";
8139
8140
8141 //-------------------
8142 //                      The functor constructor
8143 //                      pass in the schema handle.
8144 //                      1) make assignments to the unpack offset variables
8145 //                      2) initialize the complex literals
8146 //                      3) Set the initial values of the temporal attributes
8147 //                              referenced in select clause (in case we need to emit
8148 //                              temporal tuple before receiving first tuple )
8149
8150         ret += "//\t\tFunctor constructor.\n";
8151         ret +=  this->generate_functor_name()+"(int schema_handle0){\n";
8152
8153 //              save schema handle
8154         ret += "this->schema_handle0 = schema_handle0;\n";
8155
8156 //              unpack vars
8157         ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8158         ret += gen_access_var_init(cid_set);
8159 //              tuple metadata
8160         ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8161
8162 //              complex literals
8163         ret += "//\t\tInitialize complex literals.\n";
8164         ret += gen_complex_lit_init(complex_literals);
8165
8166 //              Initialize partial function results so they can be safely GC'd
8167         ret += gen_partial_fcn_init(partial_fcns);
8168
8169 //              Initialize non-query-parameter parameter handles
8170         ret += gen_pass_by_handle_init(param_handle_table);
8171
8172 //              Init temporal attributes referenced in select list
8173         ret += gen_init_temp_vars(schema, select_list, NULL);
8174
8175         ret += "};\n\n";
8176
8177
8178 //-------------------
8179 //                      Functor destructor
8180         ret += "//\t\tFunctor destructor.\n";
8181         ret +=  "~"+this->generate_functor_name()+"(){\n";
8182
8183 //              clean up buffer-type complex literals.
8184         ret += gen_complex_lit_dtr(complex_literals);
8185
8186 //                      Deregister the pass-by-handle parameters
8187         ret += "/* register and de-register the pass-by-handle parameters */\n";
8188         ret += gen_pass_by_handle_dtr(param_handle_table);
8189
8190 //                      Reclaim buffer space for partial fucntion results
8191         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8192         ret += gen_partial_fcn_dtr(partial_fcns);
8193
8194
8195 //                      Destroy the parameters, if any need to be destroyed
8196         ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8197
8198         ret += "};\n\n";
8199
8200
8201 //-------------------
8202 //                      Parameter manipulation routines
8203         ret += generate_load_param_block(this->generate_functor_name(),
8204                                                                         this->param_tbl,param_handle_table );
8205         ret += generate_delete_param_block(this->generate_functor_name(),
8206                                                                         this->param_tbl,param_handle_table);
8207
8208
8209 //-------------------
8210 //                      Register new parameter block
8211         ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8212           ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8213           ret += "\treturn this->load_params_"+this->generate_functor_name()+
8214                                 "(sz, value);\n";
8215         ret += "};\n\n";
8216
8217
8218 //-------------------
8219 //                      The selection predicate.
8220 //                      Unpack variables for 1 cnf element
8221 //                      at a time, return false immediately if the
8222 //                      predicate fails.
8223 //                      optimization : evaluate the cheap cnf elements
8224 //                      first, the expensive ones last.
8225
8226         ret += "bool predicate(host_tuple &tup0){\n";
8227         //              Variables for execution of the function.
8228         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
8229 //              Initialize cached function indicators.
8230         for(p=0;p<partial_fcns.size();++p){
8231                 if(fcn_ref_cnt[p]>1){
8232                         ret+="\tfcn_ref_cnt_"+int_to_string(p)+"=0;\n";
8233                 }
8234         }
8235
8236
8237         ret += gen_temp_tuple_check(this->node_name, 0);
8238
8239         if(partial_fcns.size()>0){              // partial fcn access failure
8240           ret += "\tgs_retval_t retval = 0;\n";
8241           ret += "\n";
8242         }
8243
8244 //                      Reclaim buffer space for partial fucntion results
8245         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8246         ret += gen_partial_fcn_dtr(partial_fcns);
8247
8248         col_id_set found_cids;  // colrefs unpacked thus far.
8249         ret += gen_unpack_temp_vars(schema, found_cids, select_list, NULL, needs_xform);
8250
8251 //              For temporal status tuple we don't need to do anything else
8252         ret += "\tif (temp_tuple_received) return false;\n\n";
8253
8254
8255         for(w=0;w<where.size();++w){
8256                 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8257                 ret += tmpstr;
8258 //                      Find the set of variables accessed in this CNF elem,
8259 //                      but in no previous element.
8260                 col_id_set new_cids;
8261                 get_new_pred_cids(where[w]->pr,found_cids, new_cids, NULL);
8262 //                      Unpack these values.
8263                 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
8264 //                      Find partial fcns ref'd in this cnf element
8265                 set<int> pfcn_refs;
8266                 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8267                 ret += gen_unpack_partial_fcn(schema,partial_fcns,pfcn_refs,fcn_ref_cnt, is_partial_fcn, "false");
8268
8269                 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8270                                 +") ) return(false);\n";
8271         }
8272
8273 //              The partial functions ref'd in the select list
8274 //              must also be evaluated.  If one returns false,
8275 //              then implicitly the predicate is false.
8276         set<int> sl_pfcns;
8277         for(s=0;s<select_list.size();s++){
8278                 collect_partial_fcns(select_list[s]->se, sl_pfcns);
8279         }
8280         if(sl_pfcns.size() > 0)
8281                 ret += "//\t\tUnpack remaining partial fcns.\n";
8282         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
8283                                         fcn_ref_cnt, is_partial_fcn,
8284                                         found_cids, NULL, "false", needs_xform);
8285
8286 //                      Unpack remaining fields
8287         ret += "//\t\tunpack any remaining fields from the input tuple.\n";
8288         ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
8289
8290
8291         ret += "\treturn(true);\n";
8292         ret += "};\n\n";
8293
8294
8295 //-------------------
8296 //                      The output tuple function.
8297 //                      Unpack the remaining attributes into
8298 //                      the placeholder variables, unpack the
8299 //                      partial fcn refs, then pack up the tuple.
8300
8301         ret += "host_tuple create_output_tuple() {\n";
8302         ret += "\thost_tuple tup;\n";
8303         ret += "\tgs_retval_t retval = 0;\n";
8304
8305 //                      Unpack any remaining cached functions.
8306         ret += gen_remaining_cached_fcns(schema, partial_fcns, sl_pfcns,
8307                                         fcn_ref_cnt, is_partial_fcn);
8308
8309
8310 //          Now, compute the size of the tuple.
8311
8312 //          Unpack any BUFFER type selections into temporaries
8313 //          so that I can compute their size and not have
8314 //          to recompute their value during tuple packing.
8315 //          I can use regular assignment here because
8316 //          these temporaries are non-persistent.
8317
8318         ret += "//\t\tCompute the size of the tuple.\n";
8319         ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
8320
8321 //                      Unpack all buffer type selections, to be able to compute their size
8322         ret += gen_buffer_selvars(schema, select_list);
8323
8324 //      The size of the tuple is the size of the tuple struct plus the
8325 //      size of the buffers to be copied in.
8326
8327
8328       ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
8329         ret += gen_buffer_selvars_size(select_list,schema);
8330         ret.append(";\n");
8331
8332 //              Allocate tuple data block.
8333         ret += "//\t\tCreate the tuple block.\n";
8334           ret += "\ttup.data = malloc(tup.tuple_size);\n";
8335           ret += "\ttup.heap_resident = true;\n";
8336 //              Mark tuple as regular
8337           ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
8338
8339 //        ret += "\ttup.channel = 0;\n";
8340           ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
8341                                 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
8342
8343 //              Start packing.
8344 //                      (Here, offsets are hard-wired.  is this a problem?)
8345
8346         ret += "//\t\tPack the fields into the tuple.\n";
8347         ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
8348
8349 //                      Delete string temporaries
8350         ret += gen_buffer_selvars_dtr(select_list);
8351
8352         ret += "\treturn tup;\n";
8353         ret += "};\n";
8354
8355 //-------------------------------------------------------------------
8356 //              Temporal update functions
8357
8358         ret += "bool temp_status_received(){return temp_tuple_received;};\n\n";
8359
8360
8361 //              create a temp status tuple
8362         ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8363
8364         ret += gen_init_temp_status_tuple(this->get_node_name());
8365
8366 //              Start packing.
8367 //                      (Here, offsets are hard-wired.  is this a problem?)
8368
8369         ret += "//\t\tPack the fields into the tuple.\n";
8370         ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
8371
8372         ret += "\treturn 0;\n";
8373         ret += "};};\n\n";
8374
8375         return(ret);
8376 }
8377
8378
8379 string spx_qpn::generate_operator(int i, string params){
8380
8381                 return("        select_project_operator<" +
8382                 generate_functor_name() +
8383                 "> *op"+int_to_string(i)+" = new select_project_operator<"+
8384                 generate_functor_name() +">("+params+", \"" + get_node_name() + "\");\n");
8385 }
8386
8387
8388 ////////////////////////////////////////////////////////////////
8389 ////    SGAH functor
8390
8391
8392
8393 string sgah_qpn::generate_functor_name(){
8394         return("sgah_functor_" + normalize_name(this->get_node_name()));
8395 }
8396
8397
8398 string sgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8399         int a,g,w,s;
8400
8401
8402 //                      Initialize generate utility globals
8403         segen_gb_tbl = &(gb_tbl);
8404
8405 //              Might need to generate empty values for cube processing.
8406         map<int, string> structured_types;
8407         for(g=0;g<gb_tbl.size();++g){
8408                 if(gb_tbl.get_data_type(g)->is_structured_type()){
8409                         structured_types[gb_tbl.get_data_type(g)->type_indicator()] = gb_tbl.get_data_type(g)->get_type_str();
8410                 }
8411         }
8412
8413 //--------------------------------
8414 //                      group definition class
8415         string ret = "class " + generate_functor_name() + "_groupdef{\n";
8416         ret += "public:\n";
8417         for(g=0;g<this->gb_tbl.size();g++){
8418                 sprintf(tmpstr,"gb_var%d",g);
8419                 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8420         }
8421 //              empty strucutred literals
8422         map<int, string>::iterator sii;
8423         for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8424                 data_type dt(sii->second);
8425                 literal_t empty_lit(sii->first);
8426                 ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8427         }
8428 //              Constructors
8429         if(structured_types.size()==0){
8430                 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
8431         }else{
8432                 ret += "\t"+generate_functor_name() + "_groupdef(){}\n";
8433         }
8434
8435
8436         ret += "\t"+generate_functor_name() + "_groupdef("+
8437                 this->generate_functor_name() + "_groupdef *gd){\n";
8438         for(g=0;g<gb_tbl.size();g++){
8439                 data_type *gdt = gb_tbl.get_data_type(g);
8440                 if(gdt->is_buffer_type()){
8441                         sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8442                           gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8443                         ret += tmpstr;
8444                 }else{
8445                         sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8446                         ret += tmpstr;
8447                 }
8448         }
8449         ret += "\t}\n";
8450         ret += "\t"+generate_functor_name() + "_groupdef("+
8451                 this->generate_functor_name() + "_groupdef *gd, bool *pattern){\n";
8452         for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8453                 literal_t empty_lit(sii->first);
8454                 ret += "\t\t"+empty_lit.to_hfta_C_code("&"+empty_lit.hfta_empty_literal_name())+";\n";
8455         }
8456         for(g=0;g<gb_tbl.size();g++){
8457                 data_type *gdt = gb_tbl.get_data_type(g);
8458                 ret += "\t\tif(pattern["+int_to_string(g)+"]){\n";
8459                 if(gdt->is_buffer_type()){
8460                         sprintf(tmpstr,"\t\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8461                           gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8462                         ret += tmpstr;
8463                 }else{
8464                         sprintf(tmpstr,"\t\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8465                         ret += tmpstr;
8466                 }
8467                 ret += "\t\t}else{\n";
8468                 literal_t empty_lit(gdt->type_indicator());
8469                 if(empty_lit.is_cpx_lit()){
8470                         ret +="\t\t\tgb_var"+int_to_string(g)+"= "+empty_lit.hfta_empty_literal_name()+";\n";
8471                 }else{
8472                         ret +="\t\t\tgb_var"+int_to_string(g)+"="+empty_lit.to_hfta_C_code("")+";\n";
8473                 }
8474                 ret += "\t\t}\n";
8475         }
8476         ret += "\t};\n";
8477 //              destructor
8478         ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
8479         for(g=0;g<gb_tbl.size();g++){
8480                 data_type *gdt = gb_tbl.get_data_type(g);
8481                 if(gdt->is_buffer_type()){
8482                         sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
8483                           gdt->get_hfta_buffer_destroy().c_str(), g );
8484                         ret += tmpstr;
8485                 }
8486         }
8487         ret += "\t};\n";
8488
8489         data_type *tgdt;
8490         for(g=0;g<gb_tbl.size();g++){
8491                 data_type *gdt = gb_tbl.get_data_type(g);
8492                 if(gdt->is_temporal()){
8493                         tgdt = gdt;
8494                         break;
8495                 }
8496         }
8497         ret += tgdt->get_host_cvar_type()+" get_curr_gb(){\n";
8498         ret+="\treturn gb_var"+int_to_string(g)+";\n";
8499         ret+="}\n";
8500
8501         ret +="};\n\n";
8502
8503 //--------------------------------
8504 //                      aggr definition class
8505         ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
8506         ret += "public:\n";
8507         for(a=0;a<aggr_tbl.size();a++){
8508                 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
8509                 sprintf(tmpstr,"aggr_var%d",a);
8510                 if(aggr_tbl.is_builtin(a)){
8511                   ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
8512                   if(aggr_tbl.get_op(a) == "AVG"){      // HACK!
8513                         data_type cnt_type = data_type("ullong");
8514                         ret+="\t"+cnt_type.make_host_cvar(string(tmpstr)+"_cnt")+";\n";
8515                         ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(string(tmpstr)+"_sum")+";\n";
8516                   }
8517                 }else{
8518                   ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
8519                 }
8520         }
8521 //              Constructors
8522         ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
8523 //              destructor
8524         ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
8525         for(a=0;a<aggr_tbl.size();a++){
8526                 if(aggr_tbl.is_builtin(a)){
8527                         data_type *adt = aggr_tbl.get_data_type(a);
8528                         if(adt->is_buffer_type()){
8529                                 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
8530                                 adt->get_hfta_buffer_destroy().c_str(), a );
8531                                 ret += tmpstr;
8532                         }
8533                 }else{
8534                         ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
8535                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
8536                         ret+="(aggr_var"+int_to_string(a)+"));\n";
8537                 }
8538         }
8539         ret += "\t};\n";
8540         ret +="};\n\n";
8541
8542 //-------------------------------------------
8543 //              group-by patterns for the functor,
8544 //              initialization within the class is cumbersome.
8545         int n_patterns = gb_tbl.gb_patterns.size();
8546         int i,j;
8547         ret += "bool "+this->generate_functor_name()+"_gb_patterns["+int_to_string(n_patterns)+
8548                         "]["+int_to_string(gb_tbl.size())+"] = {\n";
8549         if(n_patterns == 0){
8550                 for(i=0;i<gb_tbl.size();++i){
8551                         if(i>0) ret += ",";
8552                         ret += "true";
8553                 }
8554         }else{
8555                 for(i=0;i<n_patterns;++i){
8556                         if(i>0) ret += ",\n";
8557                         ret += "\t{";
8558                         for(j=0;j<gb_tbl.size();j++){
8559                                 if(j>0) ret += ", ";
8560                                 if(gb_tbl.gb_patterns[i][j]){
8561                                         ret += "true";
8562                                 }else{
8563                                         ret += "false";
8564                                 }
8565                         }
8566                         ret += "}";
8567                 }
8568                 ret += "\n";
8569         }
8570         ret += "};\n";
8571
8572
8573 //--------------------------------
8574 //                      gb functor class
8575         ret += "class " + this->generate_functor_name() + "{\n";
8576
8577 //                      Find variables referenced in this query node.
8578
8579   col_id_set cid_set;
8580   col_id_set::iterator csi;
8581
8582     for(w=0;w<where.size();++w)
8583         gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
8584     for(w=0;w<having.size();++w)
8585         gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
8586         for(g=0;g<gb_tbl.size();g++)
8587                 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
8588
8589     for(s=0;s<select_list.size();s++){
8590         gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl);     // descends into aggregates
8591     }
8592
8593
8594 //                      Private variables : store the state of the functor.
8595 //                      1) variables for unpacked attributes
8596 //                      2) offsets of the upacked attributes
8597 //                      3) storage of partial functions
8598 //                      4) storage of complex literals (i.e., require a constructor)
8599
8600         ret += "private:\n";
8601
8602         // var to save the schema handle
8603         ret += "\tint schema_handle0;\n";
8604         // metadata from schema handle
8605         ret += "\tint tuple_metadata_offset0;\n";
8606
8607         // generate the declaration of all the variables related to
8608         // temp tuples generation
8609         ret += gen_decl_temp_vars();
8610
8611 //                      unpacked attribute storage, offsets
8612         ret += "//\t\tstorage and offsets of accessed fields.\n";
8613         ret += generate_access_vars(cid_set, schema);
8614
8615 //                      Variables to store results of partial functions.
8616 //                      WARNING find_partial_functions modifies the SE
8617 //                      (it marks the partial function id).
8618         ret += "//\t\tParital function result storage\n";
8619         vector<scalarexp_t *> partial_fcns;
8620         vector<int> fcn_ref_cnt;
8621         vector<bool> is_partial_fcn;
8622         for(s=0;s<select_list.size();s++){
8623                 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL,  Ext_fcns);
8624         }
8625         for(w=0;w<where.size();w++){
8626                 find_partial_fcns_pr(where[w]->pr, &partial_fcns,NULL,NULL,  Ext_fcns);
8627         }
8628         for(w=0;w<having.size();w++){
8629                 find_partial_fcns_pr(having[w]->pr, &partial_fcns,NULL,NULL,  Ext_fcns);
8630         }
8631         for(g=0;g<gb_tbl.size();g++){
8632                 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns,NULL,NULL,  Ext_fcns);
8633         }
8634         for(a=0;a<aggr_tbl.size();a++){
8635                 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns,NULL,NULL,  Ext_fcns);
8636         }
8637         if(partial_fcns.size()>0){
8638           ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
8639           ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
8640         }
8641
8642 //                      Complex literals (i.e., they need constructors)
8643         ret += "//\t\tComplex literal storage.\n";
8644         cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8645         ret += generate_complex_lit_vars(complex_literals);
8646
8647 //                      Pass-by-handle parameters
8648         ret += "//\t\tPass-by-handle storage.\n";
8649         vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8650         ret += generate_pass_by_handle_vars(param_handle_table);
8651
8652
8653 //                      variables to hold parameters.
8654         ret += "//\tfor query parameters\n";
8655         ret += generate_param_vars(param_tbl);
8656
8657 //              Is there a temporal flush?  If so create flush temporaries,
8658 //              create flush indicator.
8659         bool uses_temporal_flush = false;
8660         for(g=0;g<gb_tbl.size();g++){
8661                 data_type *gdt = gb_tbl.get_data_type(g);
8662                 if(gdt->is_temporal())
8663                         uses_temporal_flush = true;
8664         }
8665
8666         if(uses_temporal_flush){
8667                 ret += "//\t\tFor temporal flush\n";
8668                 for(g=0;g<gb_tbl.size();g++){
8669                         data_type *gdt = gb_tbl.get_data_type(g);
8670                         if(gdt->is_temporal()){
8671                           sprintf(tmpstr,"last_gb%d",g);
8672                           ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8673                           sprintf(tmpstr,"last_flushed_gb%d",g);
8674                           ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8675                         }
8676                 }
8677                 ret += "\tbool needs_temporal_flush;\n";
8678         }
8679
8680
8681 //                      The publicly exposed functions
8682
8683         ret += "\npublic:\n";
8684
8685
8686 //-------------------
8687 //                      The functor constructor
8688 //                      pass in the schema handle.
8689 //                      1) make assignments to the unpack offset variables
8690 //                      2) initialize the complex literals
8691
8692         ret += "//\t\tFunctor constructor.\n";
8693         ret +=  this->generate_functor_name()+"(int schema_handle0){\n";
8694
8695         // save the schema handle
8696         ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
8697
8698 //              unpack vars
8699         ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8700         ret += gen_access_var_init(cid_set);
8701 //              tuple metadata
8702         ret += "tuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8703
8704 //              complex literals
8705         ret += "//\t\tInitialize complex literals.\n";
8706         ret += gen_complex_lit_init(complex_literals);
8707
8708 //              Initialize partial function results so they can be safely GC'd
8709         ret += gen_partial_fcn_init(partial_fcns);
8710
8711 //              Initialize non-query-parameter parameter handles
8712         ret += gen_pass_by_handle_init(param_handle_table);
8713
8714 //              temporal flush variables
8715 //              ASSUME that structured values won't be temporal.
8716         if(uses_temporal_flush){
8717                 ret += "//\t\tInitialize temporal flush variables.\n";
8718                 for(g=0;g<gb_tbl.size();g++){
8719                         data_type *gdt = gb_tbl.get_data_type(g);
8720                         if(gdt->is_temporal()){
8721                                 literal_t gl(gdt->type_indicator());
8722                                 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
8723                                 ret.append(tmpstr);
8724                                 sprintf(tmpstr,"\tlast_flushed_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
8725                                 ret.append(tmpstr);
8726                         }
8727                 }
8728                 ret += "\tneeds_temporal_flush = false;\n";
8729         }
8730
8731         //              Init temporal attributes referenced in select list
8732         ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
8733
8734         ret += "}\n\n";
8735
8736 //-------------------
8737 //                      Functor destructor
8738         ret += "//\t\tFunctor destructor.\n";
8739         ret +=  "~"+this->generate_functor_name()+"(){\n";
8740
8741 //                      clean up buffer type complex literals
8742         ret += gen_complex_lit_dtr(complex_literals);
8743
8744 //                      Deregister the pass-by-handle parameters
8745         ret += "/* register and de-register the pass-by-handle parameters */\n";
8746         ret += gen_pass_by_handle_dtr(param_handle_table);
8747
8748 //                      clean up partial function results.
8749         ret += "/* clean up partial function storage    */\n";
8750         ret += gen_partial_fcn_dtr(partial_fcns);
8751
8752 //                      Destroy the parameters, if any need to be destroyed
8753         ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8754
8755         ret += "};\n\n";
8756
8757
8758 //-------------------
8759 //                      Parameter manipulation routines
8760         ret += generate_load_param_block(this->generate_functor_name(),
8761                                                                         this->param_tbl,param_handle_table);
8762         ret += generate_delete_param_block(this->generate_functor_name(),
8763                                                                         this->param_tbl,param_handle_table);
8764
8765 //-------------------
8766 //                      Register new parameter block
8767
8768         ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8769           ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8770           ret += "\treturn this->load_params_"+this->generate_functor_name()+
8771                                 "(sz, value);\n";
8772         ret += "};\n\n";
8773
8774 // -----------------------------------
8775 //                      group-by pattern support
8776
8777         ret +=
8778 "int n_groupby_patterns(){\n"
8779 "       return "+int_to_string(gb_tbl.gb_patterns.size())+";\n"
8780 "}\n"
8781 "bool *get_pattern(int p){\n"
8782 "       return "+this->generate_functor_name()+"_gb_patterns[p];\n"
8783 "}\n\n"
8784 ;
8785
8786
8787
8788
8789 //-------------------
8790 //              the create_group method.
8791 //              This method creates a group in a buffer passed in
8792 //              (to allow for creation on the stack).
8793 //              There are also a couple of side effects:
8794 //              1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
8795 //              2) determine if a temporal flush is required.
8796
8797         ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
8798         //              Variables for execution of the function.
8799         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
8800
8801         if(partial_fcns.size()>0){              // partial fcn access failure
8802           ret += "\tgs_retval_t retval = 0;\n";
8803           ret += "\n";
8804         }
8805 //              return value
8806         ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
8807                         "_groupdef *) buffer;\n";
8808
8809 //              Start by cleaning up partial function results
8810         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8811         set<int> w_pfcns;       // partial fcns in where clause
8812         for(w=0;w<where.size();++w)
8813                 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
8814
8815         set<int> ag_gb_pfcns;   // partial fcns in gbdefs, aggr se's
8816         for(g=0;g<gb_tbl.size();g++){
8817                 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
8818         }
8819         for(a=0;a<aggr_tbl.size();a++){
8820                 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
8821         }
8822         ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
8823         ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
8824 //      ret += gen_partial_fcn_dtr(partial_fcns);
8825
8826
8827         ret += gen_temp_tuple_check(this->node_name, 0);
8828         col_id_set found_cids;  // colrefs unpacked thus far.
8829         ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
8830
8831
8832 //                      Save temporal group-by variables
8833
8834
8835         ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
8836
8837           for(g=0;g<gb_tbl.size();g++){
8838
8839                         data_type *gdt = gb_tbl.get_data_type(g);
8840
8841                         if(gdt->is_temporal()){
8842                                 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8843                                         g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8844                                 ret.append(tmpstr);
8845                         }
8846                 }
8847                 ret.append("\n");
8848
8849
8850
8851 //                      Compare the temporal GB vars with the stored ones,
8852 //                      set flush indicator and update stored GB vars if there is any change.
8853
8854 ret += "// hfta_disorder = "+int_to_string(hfta_disorder)+"\n";
8855         if(hfta_disorder < 2){
8856                 if(uses_temporal_flush){
8857                         ret+= "\tif( !( (";
8858                         bool first_one = true;
8859                         for(g=0;g<gb_tbl.size();g++){
8860                                 data_type *gdt = gb_tbl.get_data_type(g);
8861
8862                                 if(gdt->is_temporal()){
8863                                 sprintf(tmpstr,"last_gb%d",g);   string lhs_op = tmpstr;
8864                                 sprintf(tmpstr,"gbval->gb_var%d",g);   string rhs_op = tmpstr;
8865                                 if(first_one){first_one = false;} else {ret += ") && (";}
8866                                 ret += generate_equality_test(lhs_op, rhs_op, gdt);
8867                                 }
8868                         }
8869                         ret += ") ) ){\n";
8870                         for(g=0;g<gb_tbl.size();g++){
8871                         data_type *gdt = gb_tbl.get_data_type(g);
8872                         if(gdt->is_temporal()){
8873                                 if(gdt->is_buffer_type()){
8874                                         sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
8875                                 }else{
8876                                         sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
8877                                         ret += tmpstr;
8878                                         sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
8879                                 }
8880                                 ret += tmpstr;
8881                                 }
8882                         }
8883                         ret += "\t\tneeds_temporal_flush=true;\n";
8884                         ret += "\t\t}else{\n"
8885                                 "\t\t\tneeds_temporal_flush=false;\n"
8886                                 "\t\t}\n";
8887                 }
8888         }else{
8889                 ret+= "\tif(temp_tuple_received && !( (";
8890                 bool first_one = true;
8891                 for(g=0;g<gb_tbl.size();g++){
8892                         data_type *gdt = gb_tbl.get_data_type(g);
8893
8894                         if(gdt->is_temporal()){
8895                                 sprintf(tmpstr,"last_gb%d",g);   string lhs_op = tmpstr;
8896                                 sprintf(tmpstr,"gbval->gb_var%d",g);   string rhs_op = tmpstr;
8897                                 if(first_one){first_one = false;} else {ret += ") && (";}
8898                                 ret += generate_equality_test(lhs_op, rhs_op, gdt);
8899                                 break;
8900                         }
8901                 }
8902                 ret += ") ) ){\n";
8903                 int temporal_g = 0;
8904                 for(g=0;g<gb_tbl.size();g++){
8905                         data_type *gdt = gb_tbl.get_data_type(g);
8906                         if(gdt->is_temporal()){
8907                                 temporal_g = g;
8908                                 if(gdt->is_buffer_type()){
8909                                         sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
8910                                 }else{
8911                                         sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
8912                                         ret += tmpstr;
8913                                         sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
8914                                 }
8915                                 ret += tmpstr;
8916                                 break;
8917                         }
8918                 }
8919                 data_type *tgdt = gb_tbl.get_data_type(temporal_g);
8920                 literal_t gl(tgdt->type_indicator());
8921                 ret += "\t\tif(last_flushed_gb"+int_to_string(temporal_g)+">"+gl.to_hfta_C_code("")+")\n";
8922                 ret += "\t\t\tneeds_temporal_flush=true;\n";
8923                 ret += "\t\t}else{\n"
8924                         "\t\t\tneeds_temporal_flush=false;\n"
8925                         "\t\t}\n";
8926         }
8927
8928
8929 //              For temporal status tuple we don't need to do anything else
8930         ret += "\tif (temp_tuple_received) return NULL;\n\n";
8931
8932         for(w=0;w<where.size();++w){
8933                 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8934                 ret += tmpstr;
8935 //                      Find the set of variables accessed in this CNF elem,
8936 //                      but in no previous element.
8937                 col_id_set new_cids;
8938                 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
8939
8940 //                      Unpack these values.
8941                 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
8942 //                      Find partial fcns ref'd in this cnf element
8943                 set<int> pfcn_refs;
8944                 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8945                 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
8946
8947                 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8948                                 +") ) return(NULL);\n";
8949         }
8950
8951 //              The partial functions ref'd in the group-by var and aggregate
8952 //              definitions must also be evaluated.  If one returns false,
8953 //              then implicitly the predicate is false.
8954         set<int>::iterator pfsi;
8955
8956         if(ag_gb_pfcns.size() > 0)
8957                 ret += "//\t\tUnpack remaining partial fcns.\n";
8958         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
8959                                                                                 found_cids, segen_gb_tbl, "NULL", needs_xform);
8960
8961 //                      Unpack the group-by variables
8962
8963           for(g=0;g<gb_tbl.size();g++){
8964                 data_type *gdt = gb_tbl.get_data_type(g);
8965
8966                 if(!gdt->is_temporal()){
8967 //                      Find the new fields ref'd by this GBvar def.
8968                         col_id_set new_cids;
8969                         get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
8970 //                      Unpack these values.
8971                         ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
8972
8973                         sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8974                                 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8975 /*
8976 //                              There seems to be no difference between the two
8977 //                              branches of the IF statement.
8978                 data_type *gdt = gb_tbl.get_data_type(g);
8979                   if(gdt->is_buffer_type()){
8980 //                              Create temporary copy.
8981                         sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8982                                 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8983                   }else{
8984                         scalarexp_t *gse = gb_tbl.get_def(g);
8985                         sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8986                                         g,generate_se_code(gse,schema).c_str());
8987                   }
8988 */
8989
8990                         ret.append(tmpstr);
8991                 }
8992           }
8993           ret.append("\n");
8994
8995         ret+= "\treturn gbval;\n";
8996         ret += "};\n\n\n";
8997
8998 //--------------------------------------------------------
8999 //                      Create and initialize an aggregate object
9000
9001         ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
9002         //              Variables for execution of the function.
9003         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
9004
9005 //              return value
9006         ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
9007                         "_aggrdef *)buffer;\n";
9008
9009         for(a=0;a<aggr_tbl.size();a++){
9010                 if(aggr_tbl.is_builtin(a)){
9011 //                      Create temporaries for buffer return values
9012                   data_type *adt = aggr_tbl.get_data_type(a);
9013                   if(adt->is_buffer_type()){
9014                         sprintf(tmpstr,"aggr_tmp_%d", a);
9015                         ret+=adt->make_host_cvar(tmpstr)+";\n";
9016                   }
9017                 }
9018         }
9019
9020 //              Unpack all remaining attributes
9021         ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
9022         for(a=0;a<aggr_tbl.size();a++){
9023           sprintf(tmpstr,"aggval->aggr_var%d",a);
9024           string assignto_var = tmpstr;
9025           ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
9026         }
9027
9028         ret += "\treturn aggval;\n";
9029         ret += "};\n\n";
9030
9031 //--------------------------------------------------------
9032 //                      update an aggregate object
9033
9034         ret += "void update_aggregate(host_tuple &tup0, "
9035                 +generate_functor_name()+"_groupdef *gbval, "+
9036                 generate_functor_name()+"_aggrdef *aggval){\n";
9037         //              Variables for execution of the function.
9038         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
9039
9040 //                      use of temporaries depends on the aggregate,
9041 //                      generate them in generate_aggr_update
9042
9043
9044 //              Unpack all remaining attributes
9045         ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
9046         for(a=0;a<aggr_tbl.size();a++){
9047           sprintf(tmpstr,"aggval->aggr_var%d",a);
9048           string varname = tmpstr;
9049           ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
9050         }
9051
9052         ret += "\treturn;\n";
9053         ret += "};\n";
9054
9055 //---------------------------------------------------
9056 //                      Flush test
9057
9058         ret += "\tbool flush_needed(){\n";
9059         if(uses_temporal_flush){
9060                 ret += "\t\treturn needs_temporal_flush;\n";
9061         }else{
9062                 ret += "\t\treturn false;\n";
9063         }
9064         ret += "\t};\n";
9065
9066 //---------------------------------------------------
9067 //                      create output tuple
9068 //                      Unpack the partial functions ref'd in the where clause,
9069 //                      select clause.  Evaluate the where clause.
9070 //                      Finally, pack the tuple.
9071
9072 //                      I need to use special code generation here,
9073 //                      so I'll leave it in longhand.
9074
9075         ret += "host_tuple create_output_tuple("
9076                 +generate_functor_name()+"_groupdef *gbval, "+
9077                 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
9078
9079         ret += "\thost_tuple tup;\n";
9080         ret += "\tfailed = false;\n";
9081         ret += "\tgs_retval_t retval = 0;\n";
9082
9083         string gbvar = "gbval->gb_var";
9084         string aggvar = "aggval->";
9085
9086 //                      Create cached temporaries for UDAF return values.
9087         for(a=0;a<aggr_tbl.size();a++){
9088                 if(! aggr_tbl.is_builtin(a)){
9089                         int afcn_id = aggr_tbl.get_fcn_id(a);
9090                         data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9091                         sprintf(tmpstr,"udaf_ret_%d", a);
9092                         ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
9093                 }
9094         }
9095
9096
9097 //                      First, get the return values from the UDAFS
9098         for(a=0;a<aggr_tbl.size();a++){
9099                 if(! aggr_tbl.is_builtin(a)){
9100                         ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
9101                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9102                         ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
9103                 }
9104         }
9105
9106         set<int> hv_sl_pfcns;
9107         for(w=0;w<having.size();w++){
9108                 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
9109         }
9110         for(s=0;s<select_list.size();s++){
9111                 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
9112         }
9113
9114 //              clean up the partial fcn results from any previous execution
9115         ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
9116
9117 //              Unpack them now
9118         for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
9119                 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
9120                 ret += "\tif(retval){ failed = true; return(tup);}\n";
9121         }
9122
9123 //              Evalaute the HAVING clause
9124 //              TODO: this seems to have a ++ operator rather than a + operator.
9125         for(w=0;w<having.size();++w){
9126                 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
9127         }
9128
9129 //          Now, compute the size of the tuple.
9130
9131 //          Unpack any BUFFER type selections into temporaries
9132 //          so that I can compute their size and not have
9133 //          to recompute their value during tuple packing.
9134 //          I can use regular assignment here because
9135 //          these temporaries are non-persistent.
9136 //                      TODO: should I be using the selvar generation routine?
9137
9138         ret += "//\t\tCompute the size of the tuple.\n";
9139         ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
9140       for(s=0;s<select_list.size();s++){
9141                 scalarexp_t *se = select_list[s]->se;
9142         data_type *sdt = se->get_data_type();
9143         if(sdt->is_buffer_type() &&
9144                          !( (se->get_operator_type() == SE_COLREF) ||
9145                                 (se->get_operator_type() == SE_AGGR_STAR) ||
9146                                 (se->get_operator_type() == SE_AGGR_SE) ||
9147                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9148                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9149                 ){
9150             sprintf(tmpstr,"selvar_%d",s);
9151                         ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
9152                         ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
9153         }
9154       }
9155
9156 //      The size of the tuple is the size of the tuple struct plus the
9157 //      size of the buffers to be copied in.
9158
9159       ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
9160       for(s=0;s<select_list.size();s++){
9161 //              if(s>0) ret += "+";
9162                 scalarexp_t *se = select_list[s]->se;
9163         data_type *sdt = select_list[s]->se->get_data_type();
9164         if(sdt->is_buffer_type()){
9165                   if(!( (se->get_operator_type() == SE_COLREF) ||
9166                                 (se->get_operator_type() == SE_AGGR_STAR) ||
9167                                 (se->get_operator_type() == SE_AGGR_SE) ||
9168                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9169                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9170                   ){
9171             sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
9172             ret.append(tmpstr);
9173                   }else{
9174             sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9175             ret.append(tmpstr);
9176                   }
9177         }
9178       }
9179       ret.append(";\n");
9180
9181 //              Allocate tuple data block.
9182         ret += "//\t\tCreate the tuple block.\n";
9183           ret += "\ttup.data = malloc(tup.tuple_size);\n";
9184           ret += "\ttup.heap_resident = true;\n";
9185
9186 //              Mark tuple as regular
9187           ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
9188
9189 //        ret += "\ttup.channel = 0;\n";
9190           ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
9191                                 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
9192
9193 //              Start packing.
9194 //                      (Here, offsets are hard-wired.  is this a problem?)
9195
9196         ret += "//\t\tPack the fields into the tuple.\n";
9197           ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
9198       for(s=0;s<select_list.size();s++){
9199                 scalarexp_t *se = select_list[s]->se;
9200         data_type *sdt = se->get_data_type();
9201         if(sdt->is_buffer_type()){
9202                   if(!( (se->get_operator_type() == SE_COLREF) ||
9203                                 (se->get_operator_type() == SE_AGGR_STAR) ||
9204                                 (se->get_operator_type() == SE_AGGR_SE) ||
9205                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9206                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9207                   ){
9208             sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d,  ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
9209             ret.append(tmpstr);
9210             sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
9211             ret.append(tmpstr);
9212                   }else{
9213             sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s,  ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9214             ret.append(tmpstr);
9215             sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9216             ret.append(tmpstr);
9217                   }
9218         }else{
9219             sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9220             ret.append(tmpstr);
9221             ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
9222             ret.append(";\n");
9223         }
9224       }
9225
9226 //                      Destroy string temporaries
9227           ret += gen_buffer_selvars_dtr(select_list);
9228 //                      Destroy string return vals of UDAFs
9229         for(a=0;a<aggr_tbl.size();a++){
9230                 if(! aggr_tbl.is_builtin(a)){
9231                         int afcn_id = aggr_tbl.get_fcn_id(a);
9232                         data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9233                         if(adt->is_buffer_type()){
9234                                 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
9235                                 adt->get_hfta_buffer_destroy().c_str(), a );
9236                                 ret += tmpstr;
9237                         }
9238                 }
9239         }
9240
9241
9242           ret += "\treturn tup;\n";
9243           ret += "};\n";
9244
9245
9246 //-------------------------------------------------------------------
9247 //              Temporal update functions
9248
9249         ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
9250
9251         for(g=0;g<gb_tbl.size();g++){
9252                 data_type *gdt = gb_tbl.get_data_type(g);
9253                 if(gdt->is_temporal()){
9254                         tgdt = gdt;
9255                         break;
9256                 }
9257         }
9258         ret += tgdt->get_host_cvar_type()+" get_last_flushed_gb(){\n";
9259         ret+="\treturn last_flushed_gb"+int_to_string(g)+";\n";
9260         ret+="}\n";
9261         ret += tgdt->get_host_cvar_type()+" get_last_gb(){\n";
9262         ret+="\treturn last_gb"+int_to_string(g)+";\n";
9263         ret+="}\n";
9264
9265
9266
9267
9268 //              create a temp status tuple
9269         ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
9270
9271         ret += gen_init_temp_status_tuple(this->get_node_name());
9272
9273 //              Start packing.
9274 //                      (Here, offsets are hard-wired.  is this a problem?)
9275
9276         ret += "//\t\tPack the fields into the tuple.\n";
9277         for(s=0;s<select_list.size();s++){
9278                 data_type *sdt = select_list[s]->se->get_data_type();
9279                 if(sdt->is_temporal()){
9280                         sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9281                         ret += tmpstr;
9282
9283                         sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
9284                         ret += tmpstr;
9285                         ret += ";\n";
9286                 }
9287         }
9288
9289
9290         ret += "\treturn 0;\n";
9291         ret += "};};\n\n\n";
9292
9293
9294 //----------------------------------------------------------
9295 //                      The hash function
9296
9297         ret += "struct "+generate_functor_name()+"_hash_func{\n";
9298         ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
9299                                 "_groupdef *grp) const{\n";
9300         ret += "\t\treturn( (";
9301         for(g=0;g<gb_tbl.size();g++){
9302                 if(g>0) ret += "^";
9303                 data_type *gdt = gb_tbl.get_data_type(g);
9304                 if(gdt->use_hashfunc()){
9305                         if(gdt->is_buffer_type())
9306                                 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9307                         else
9308                                 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9309                 }else{
9310                         sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
9311                 }
9312                 ret += tmpstr;
9313         }
9314         ret += ") >> 32);\n";
9315         ret += "\t}\n";
9316         ret += "};\n\n";
9317
9318 //----------------------------------------------------------
9319 //                      The comparison function
9320
9321         ret += "struct "+generate_functor_name()+"_equal_func{\n";
9322         ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
9323                         generate_functor_name()+"_groupdef *grp2) const{\n";
9324         ret += "\t\treturn( (";
9325
9326         for(g=0;g<gb_tbl.size();g++){
9327                 if(g>0) ret += ") && (";
9328                 data_type *gdt = gb_tbl.get_data_type(g);
9329                 if(gdt->complex_comparison(gdt)){
9330                 if(gdt->is_buffer_type())
9331                         sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
9332                                 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9333                 else
9334                         sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
9335                                 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9336                 }else{
9337                         sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
9338                 }
9339                 ret += tmpstr;
9340         }
9341         ret += ") );\n";
9342         ret += "\t}\n";
9343         ret += "};\n\n";
9344
9345
9346         return(ret);
9347 }
9348
9349 string sgah_qpn::generate_operator(int i, string params){
9350
9351         if(hfta_disorder < 2){
9352                 return(
9353                         "       groupby_operator<" +
9354                         generate_functor_name()+","+
9355                         generate_functor_name() + "_groupdef, " +
9356                         generate_functor_name() + "_aggrdef, " +
9357                         generate_functor_name()+"_hash_func, "+
9358                         generate_functor_name()+"_equal_func "
9359                         "> *op"+int_to_string(i)+" = new groupby_operator<"+
9360                         generate_functor_name()+","+
9361                         generate_functor_name() + "_groupdef, " +
9362                         generate_functor_name() + "_aggrdef, " +
9363                         generate_functor_name()+"_hash_func, "+
9364                         generate_functor_name()+"_equal_func "
9365                         ">("+params+", \"" + get_node_name() +
9366 "\");\n"
9367                 );
9368         }
9369         data_type *tgdt;
9370         for(int g=0;g<gb_tbl.size();g++){
9371                 data_type *gdt = gb_tbl.get_data_type(g);
9372                 if(gdt->is_temporal()){
9373                         tgdt = gdt;
9374                         break;
9375                 }
9376         }
9377
9378         return(
9379                         "       groupby_operator_oop<" +
9380                         generate_functor_name()+","+
9381                         generate_functor_name() + "_groupdef, " +
9382                         generate_functor_name() + "_aggrdef, " +
9383                         generate_functor_name()+"_hash_func, "+
9384                         generate_functor_name()+"_equal_func, " +
9385             tgdt->get_host_cvar_type() +
9386                         "> *op"+int_to_string(i)+" = new groupby_operator_oop<"+
9387                         generate_functor_name()+","+
9388                         generate_functor_name() + "_groupdef, " +
9389                         generate_functor_name() + "_aggrdef, " +
9390                         generate_functor_name()+"_hash_func, "+
9391                         generate_functor_name()+"_equal_func, " +
9392             tgdt->get_host_cvar_type() +
9393                         ">("+params+", \"" + get_node_name() +
9394 "\");\n"
9395                 );
9396 }
9397
9398
9399 ////////////////////////////////////////////////
9400 ///             MERGE operator
9401 ///             MRG functor
9402 ////////////////////////////////////////////
9403
9404 string mrg_qpn::generate_functor_name(){
9405         return("mrg_functor_" + normalize_name(this->get_node_name()));
9406 }
9407
9408 string mrg_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9409         int tblref;
9410
9411
9412 //              Sanity check
9413         if(fm.size() != mvars.size()){
9414                 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=%lu, mvars.size=%lu\n",fm.size(),mvars.size());
9415                 exit(1);
9416         }
9417         if(fm.size() != 2){
9418                 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=mvars.size=%lu\n",fm.size());
9419                 exit(1);
9420         }
9421
9422
9423 //                      Initialize generate utility globals
9424         segen_gb_tbl = NULL;
9425
9426         string ret = "class " + this->generate_functor_name() + "{\n";
9427
9428 //              Private variable:
9429 //              1) Vars for unpacked attrs.
9430 //              2) offsets ofthe unpakced attrs
9431 //              3) last_posted_timestamp
9432
9433         data_type dta(
9434                 schema->get_type_name(mvars[0]->get_schema_ref(), mvars[0]->get_field()),
9435                 schema->get_modifier_list(mvars[0]->get_schema_ref(), mvars[0]->get_field())
9436         );
9437         data_type dtb(
9438                 schema->get_type_name(mvars[1]->get_schema_ref(), mvars[1]->get_field()),
9439                 schema->get_modifier_list(mvars[1]->get_schema_ref(), mvars[1]->get_field())
9440         );
9441
9442         ret += "private:\n";
9443
9444         // var to save the schema handle
9445         ret += "\tint schema_handle0;\n";
9446
9447         // generate the declaration of all the variables related to
9448         // temp tuples generation
9449         ret += gen_decl_temp_vars();
9450
9451 //                      unpacked attribute storage, offsets
9452         ret += "//\t\tstorage and offsets of accessed fields.\n";
9453         ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
9454         tblref = 0;
9455         sprintf(tmpstr,"unpack_var_%s_%d", mvars[0]->get_field().c_str(), tblref);
9456         ret+="\t"+dta.make_host_cvar(tmpstr)+";\n";
9457         sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[0]->get_field().c_str(), tblref);
9458         ret.append(tmpstr);
9459         tblref = 1;
9460         sprintf(tmpstr,"unpack_var_%s_%d", mvars[1]->get_field().c_str(), tblref);
9461         ret+="\t"+dtb.make_host_cvar(tmpstr)+";\n";
9462         sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[1]->get_field().c_str(), tblref);
9463         ret.append(tmpstr);
9464
9465         ret += "//\t\tRemember the last posted timestamp.\n";
9466         ret+="\t"+dta.make_host_cvar("last_posted_timestamp_0")+";\n";
9467         ret+="\t"+dta.make_host_cvar("last_posted_timestamp_1")+";\n";
9468         ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9469         ret+="\t"+dta.make_host_cvar("slack")+";\n";
9470 //      ret += "\t bool first_execution_0, first_execution_1;\n";
9471
9472 //                      variables to hold parameters.
9473         ret += "//\tfor query parameters\n";
9474         ret += generate_param_vars(param_tbl);
9475
9476         ret += "public:\n";
9477 //-------------------
9478 //                      The functor constructor
9479 //                      pass in a schema handle (e.g. for the 1st input stream),
9480 //                      use it to determine how to unpack the merge variable.
9481 //                      ASSUME that both streams have the same layout,
9482 //                      just duplicate it.
9483
9484 //              unpack vars
9485         ret += "//\t\tFunctor constructor.\n";
9486         ret +=  this->generate_functor_name()+"(int schema_handle0){\n";
9487
9488         // var to save the schema handle
9489         ret += "\tthis->schema_handle0 = schema_handle0;\n";
9490         ret += "\ttuple_metadata_offset0=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9491         ret += "\ttuple_metadata_offset1=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9492
9493         ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9494
9495    sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", mvars[0]->get_field().c_str(), 0,mvars[0]->get_field().c_str());
9496    ret.append(tmpstr);
9497         sprintf(tmpstr,"\tunpack_offset_%s_%d = unpack_offset_%s_%d;\n",mvars[1]->get_field().c_str(), 1,mvars[0]->get_field().c_str(), 0);
9498         ret.append(tmpstr);
9499 //      ret+="\tfirst_execution_0 = first_execution_1 = true;\n";
9500         if(slack)
9501                 ret+="\tslack = "+generate_se_code(slack,schema)+";\n";
9502         else
9503                 ret+="\tslack = 0;\n";
9504
9505 //              Initialize internal state
9506         ret += "\ttemp_tuple_received = false;\n";
9507
9508         //              Init last timestamp values to minimum value for their type
9509         if (dta.is_increasing())
9510                 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_min_literal() + ";\n";
9511         else
9512                 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_max_literal() + ";\n";
9513
9514
9515         ret += "};\n\n";
9516
9517         ret += "//\t\tFunctor destructor.\n";
9518         ret +=  "~"+this->generate_functor_name()+"(){\n";
9519
9520 //                      Destroy the parameters, if any need to be destroyed
9521         ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9522
9523         ret+="};\n\n";
9524
9525
9526 //                      no pass-by-handle params.
9527         vector<handle_param_tbl_entry *> param_handle_table;
9528
9529 //                      Parameter manipulation routines
9530         ret += generate_load_param_block(this->generate_functor_name(),
9531                                                                         this->param_tbl,param_handle_table);
9532         ret += generate_delete_param_block(this->generate_functor_name(),
9533                                                                         this->param_tbl,param_handle_table);
9534
9535 //                      Register new parameter block
9536
9537         ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9538           ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9539           ret += "\treturn this->load_params_"+this->generate_functor_name()+
9540                                 "(sz, value);\n";
9541         ret += "};\n\n";
9542
9543
9544 //      -----------------------------------
9545 //                      Compare method
9546
9547         string unpack_fcna;
9548         if(needs_xform[0]) unpack_fcna = dta.get_hfta_unpack_fcn();
9549         else unpack_fcna = dta.get_hfta_unpack_fcn_noxf();
9550         string unpack_fcnb;
9551         if(needs_xform[1]) unpack_fcnb = dtb.get_hfta_unpack_fcn();
9552         else unpack_fcnb = dtb.get_hfta_unpack_fcn_noxf();
9553
9554 /*
9555         ret+="\tint compare(const host_tuple& tup1, const host_tuple& tup2) const{ \n";
9556         ret+="\t"+dta.make_host_cvar("timestamp1")+";\n";
9557         ret+="\t"+dta.make_host_cvar("timestamp2")+";\n";
9558         ret+="\tgs_int32_t problem;\n";
9559         ret+="\tif (tup1.channel == 0)  {\n";
9560         sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n",  unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9561         ret += tmpstr;
9562         sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n",  unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9563         ret += tmpstr;
9564         ret+="\t}else{\n";
9565         sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n",  unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 1);
9566         ret += tmpstr;
9567         sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n",  unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 0);
9568         ret += tmpstr;
9569         ret+="\t}\n";
9570         ret+=
9571 "        if (timestamp1 > timestamp2+slack)\n"
9572 "            return 1;\n"
9573 "        else if (timestamp1 < timestamp2)\n"
9574 "            return -1;\n"
9575 "        else\n"
9576 "            return 0;\n"
9577 "\n"
9578 "    }\n\n";
9579 */
9580
9581 ret +=
9582 "       void get_timestamp(const host_tuple& tup0){\n"
9583 "               gs_int32_t problem;\n"
9584 ;
9585         sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n",  unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9586         ret += tmpstr;
9587 ret +=
9588 "       }\n"
9589 "\n"
9590 ;
9591
9592
9593
9594 //                      Compare to temp status.
9595         ret+=
9596 "       int compare_with_temp_status(int channel)   {\n"
9597 "       // check if tuple is temp status tuple\n"
9598 "\n"
9599 "       if (channel == 0)  {\n"
9600 //"     if(first_execution_0) return 1;\n"
9601 "        if (timestamp == last_posted_timestamp_0)\n"
9602 "            return 0;\n"
9603 "        else if (timestamp < last_posted_timestamp_0)\n"
9604 "            return -1;\n"
9605 "        else\n"
9606 "            return 1;\n"
9607 "       }\n"
9608 //"     if(first_execution_1) return 1;\n"
9609 "        if (timestamp == last_posted_timestamp_1)\n"
9610 "            return 0;\n"
9611 "        else if (timestamp < last_posted_timestamp_1)\n"
9612 "            return -1;\n"
9613 "        else\n"
9614 "            return 1;\n"
9615 "\n"
9616 "    }\n"
9617 ;
9618
9619         ret +=
9620 "       int compare_stored_with_temp_status(const host_tuple& tup0, int channel)/* const*/   {\n"
9621 ;
9622         ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
9623         ret+="\tgs_int32_t problem;\n";
9624
9625         sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n",  unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9626         ret += tmpstr;
9627         ret+="\tif (channel == 0)  {\n";
9628 //              ret+="\tif(first_execution_0) return 1;\n";
9629         ret+=
9630 "        if (l_timestamp == last_posted_timestamp_0)\n"
9631 "            return 0;\n"
9632 "        else if (l_timestamp < last_posted_timestamp_0)\n"
9633 "            return -1;\n"
9634 "        else\n"
9635 "            return 1;\n"
9636 "       }\n";
9637 //              ret+="\tif(first_execution_1) return 1;\n";
9638         ret+=
9639 "        if (l_timestamp == last_posted_timestamp_1)\n"
9640 "            return 0;\n"
9641 "        else if (l_timestamp < last_posted_timestamp_1)\n"
9642 "            return -1;\n"
9643 "        else\n"
9644 "            return 1;\n"
9645 "\n"
9646 "    }\n\n";
9647
9648
9649 //                      update temp status.
9650         ret+=
9651 "       int update_temp_status(const host_tuple& tup) {\n"
9652 "               if (tup.channel == 0)  {\n"
9653 "                       last_posted_timestamp_0=timestamp;\n"
9654 //"                     first_execution_0 = false;\n"
9655 "               }else{\n"
9656 "                       last_posted_timestamp_1=timestamp;\n"
9657 //"                     first_execution_1 = false;\n"
9658 "               }\n"
9659 "               return 0;\n"
9660 "   }\n"
9661 ;
9662         ret+=
9663 "       int update_stored_temp_status(const host_tuple& tup, int channel) {\n"
9664 ;
9665         ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
9666         ret+="\tgs_int32_t problem;\n";
9667         sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n",  unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9668         ret += tmpstr;
9669 ret+=
9670 "               if (tup.channel == 0)  {\n"
9671 "                       last_posted_timestamp_0=l_timestamp;\n"
9672 //"                     first_execution_0 = false;\n"
9673 "               }else{\n"
9674 "                       last_posted_timestamp_1=l_timestamp;\n"
9675 //"                     first_execution_1 = false;\n"
9676 "               }\n"
9677 "               return 0;\n"
9678 "   }\n"
9679 ;
9680 /*
9681         ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9682         ret+="\tgs_int32_t problem;\n";
9683         ret+="\tif (tup.channel == 0)  {\n";
9684         sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n",  unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9685         ret += tmpstr;
9686         ret+="\t}else{\n";
9687         sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n",  unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9688         ret += tmpstr;
9689         ret+="\t}\n";
9690         ret+="\tif (tup.channel == 0)  {\n";
9691         ret+="\tlast_posted_timestamp_0=timestamp;\n";
9692         ret +="\tfirst_execution_0 = false;\n";
9693         ret+="\t}else{\n";
9694         ret+="\tlast_posted_timestamp_1=timestamp;\n";
9695         ret +="\tfirst_execution_1 = false;\n";
9696         ret+="\t}\n";
9697         ret+=
9698 "    }\n\n";
9699 */
9700
9701
9702 //                      update temp status modulo slack.
9703         ret+="\tint update_temp_status_by_slack(const host_tuple& tup, int channel) {\n";
9704     if(slack){
9705         ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9706         ret+="\tgs_int32_t problem;\n";
9707         ret+="\tif (tup.channel == 0)  {\n";
9708         sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n",  unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9709         ret += tmpstr;
9710         ret+="\t}else{\n";
9711         sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n",  unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9712         ret += tmpstr;
9713         ret+="\t}\n";
9714 ret +=
9715 "       if (channel == 0)  {\n"
9716 "               if(first_execution_0){\n"
9717 "                       last_posted_timestamp_0=timestamp - slack;\n"
9718 "                       first_execution_0 = false;\n"
9719 "               }else{\n"
9720 "                       if(last_posted_timestamp_0 < timestamp-slack)\n"
9721 "                               last_posted_timestamp_0 = timestamp-slack;\n"
9722 "               }\n"
9723 "       }else{\n"
9724 "               if(first_execution_1){\n"
9725 "                       last_posted_timestamp_1=timestamp - slack;\n"
9726 "                       first_execution_1 = false;\n"
9727 "               }else{\n"
9728 "                       if(last_posted_timestamp_1 < timestamp-slack)\n"
9729 "                               last_posted_timestamp_1 = timestamp-slack;\n"
9730 "               }\n"
9731 "       }\n"
9732 "       return 0;\n"
9733 "    }\n\n";
9734         }else{
9735         ret +=
9736 "       return 0;\n"
9737 "       }\n\n";
9738         }
9739
9740
9741 //
9742         ret+=
9743 "bool temp_status_received(const host_tuple& tup0){\n"
9744 "       return ftaschema_is_temporal_tuple_offset(tuple_metadata_offset0, tup0.data);\n"
9745 "};\n"
9746 ;
9747 //"bool temp_status_received(){return temp_tuple_received;};\n\n";
9748
9749
9750 //              create a temp status tuple
9751         ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
9752
9753         ret += gen_init_temp_status_tuple(this->get_node_name());
9754
9755 //              Start packing.
9756         ret += "//\t\tPack the fields into the tuple.\n";
9757
9758         string fld_name = mvars[0]->get_field();
9759         int idx = table_layout->get_field_idx(fld_name);
9760         field_entry* fld = table_layout->get_field(idx);
9761         data_type dt(fld->get_type());
9762
9763 //      if (needs_xform[0] && needs_xform[1] && dt.needs_hn_translation())
9764 //              sprintf(tmpstr,"\ttuple->tuple_var%d = %s((last_posted_timestamp_0 < last_posted_timestamp_1) ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx, dt.hton_translation().c_str());
9765 //      else
9766                 sprintf(tmpstr,"\ttuple->tuple_var%d = (last_posted_timestamp_0 < last_posted_timestamp_1 ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx);
9767
9768         ret += tmpstr;
9769
9770         ret += "\treturn 0;\n";
9771         ret += "}\n\n";
9772
9773 //                      Transform tuple (before output)
9774
9775
9776  ret += "void xform_tuple(host_tuple &tup){\n";
9777  if((needs_xform[0] && !needs_xform[1]) || (needs_xform[1] && !needs_xform[0])){
9778   ret += "\tstruct "+generate_tuple_name(this->get_node_name())+" *tuple = ("+
9779                 generate_tuple_name(this->get_node_name())+" *)(tup.data);\n";
9780
9781   vector<field_entry *> flds = table_layout->get_fields();
9782
9783   ret+="\tif(tup.channel == 0){\n";
9784   if(needs_xform[0] && !needs_xform[1]){
9785         int f;
9786         for(f=0;f<flds.size();f++){
9787                 ret.append("\t");
9788                 data_type dt(flds[f]->get_type());
9789                 if(dt.get_type() == v_str_t){
9790 //                      sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
9791 //                      ret += tmpstr;
9792 //                      sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
9793 //                      ret += tmpstr;
9794 //                      sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
9795 //                      ret += tmpstr;
9796                 }else{
9797                         if(dt.needs_hn_translation()){
9798 //                              sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
9799 //                                      f, dt.hton_translation().c_str(), f);
9800 //                              ret += tmpstr;
9801                         }
9802                 }
9803         }
9804   }else{
9805         ret += "\t\treturn;\n";
9806   }
9807   ret.append("\t}\n");
9808
9809
9810   ret+="\tif(tup.channel == 1){\n";
9811   if(needs_xform[1] && !needs_xform[0]){
9812         int f;
9813         for(f=0;f<flds.size();f++){
9814                 ret.append("\t");
9815                 data_type dt(flds[f]->get_type());
9816                 if(dt.get_type() == v_str_t){
9817 //                      sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
9818 //                      ret += tmpstr;
9819 //                      sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
9820 //                      ret += tmpstr;
9821 //                      sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
9822 //                      ret += tmpstr;
9823                 }else{
9824                         if(dt.needs_hn_translation()){
9825 //                              sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
9826 //                                      f, dt.hton_translation().c_str(), f);
9827 //                              ret += tmpstr;
9828                         }
9829                 }
9830         }
9831   }else{
9832         ret += "\t\treturn;\n";
9833   }
9834   ret.append("\t}\n");
9835  }
9836
9837   ret.append("};\n\n");
9838
9839 //              print_warnings() : tell the functor if the user wants to print warnings.
9840   ret += "bool print_warnings(){\n";
9841   if(definitions.count("print_warnings") && (
9842                 definitions["print_warnings"] == "yes" ||
9843                 definitions["print_warnings"] == "Yes" ||
9844                 definitions["print_warnings"] == "YES" )) {
9845         ret += "return true;\n";
9846   }else{
9847         ret += "return false;\n";
9848   }
9849   ret.append("};\n\n");
9850
9851
9852 //              Done with methods.
9853         ret+="\n};\n\n";
9854
9855
9856         return(ret);
9857 }
9858
9859 string mrg_qpn::generate_operator(int i, string params){
9860
9861         if(disorder < 2){
9862                 return(
9863                         "       merge_operator<" +
9864                         generate_functor_name()+
9865                         "> *op"+int_to_string(i)+" = new merge_operator<"+
9866                         generate_functor_name()+
9867                         ">("+params+",10000,\"" + get_node_name() + "\");\n"
9868                 );
9869         }
9870         return(
9871                         "       merge_operator_oop<" +
9872                         generate_functor_name()+
9873                         "> *op"+int_to_string(i)+" = new merge_operator_oop<"+
9874                         generate_functor_name()+
9875                         ">("+params+",10000,\"" + get_node_name() + "\");\n"
9876         );
9877 }
9878
9879
9880 /////////////////////////////////////////////////////////
9881 //////                  JOIN_EQ_HASH functor
9882
9883
9884 string join_eq_hash_qpn::generate_functor_name(){
9885         return("join_eq_hash_functor_" + normalize_name(this->get_node_name()));
9886 }
9887
9888 string join_eq_hash_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9889         int p,s;
9890         vector<data_type *> hashkey_dt;         // data types in the hash key
9891         vector<data_type *> temporal_dt;        // data types in the temporal key
9892         map<string,scalarexp_t *> l_equiv, r_equiv;     // field equivalences
9893         set<int> pfcn_refs;
9894         col_id_set new_cids, local_cids;
9895
9896 //--------------------------------
9897 //              Global init
9898
9899         string plus_op = "+";
9900
9901 //--------------------------------
9902 //                      key definition class
9903         string ret = "class " + generate_functor_name() + "_keydef{\n";
9904         ret += "public:\n";
9905 //                      Collect attributes from hash join predicates.
9906 //                      ASSUME equality predicate.
9907 //                      Use the upwardly compatible data type
9908 //                      (infer from '+' operator if possible, else use left type)
9909         for(p=0;p<this->hash_eq.size();++p){
9910                 scalarexp_t *lse =      hash_eq[p]->pr->get_left_se();
9911                 scalarexp_t *rse =      hash_eq[p]->pr->get_right_se();
9912                 data_type *hdt = new data_type(
9913                         lse->get_data_type(), rse->get_data_type(), plus_op );
9914                 if(hdt->get_type() == undefined_t){
9915                         hashkey_dt.push_back(lse->get_data_type()->duplicate());
9916                         delete hdt;
9917                 }else{
9918                         hashkey_dt.push_back(hdt);
9919                 }
9920                 sprintf(tmpstr,"hashkey_var%d",p);
9921                 ret+="\t"+hashkey_dt[p]->make_host_cvar(tmpstr)+";\n";
9922
9923 //                      find equivalences
9924 //                      NOTE: this code needs to be synched with the temporality
9925 //                      checking done at join_eq_hash_qpn::get_fields
9926                 if(lse->get_operator_type()==SE_COLREF){
9927                         l_equiv[lse->get_colref()->get_field()] = rse;
9928                 }
9929                 if(rse->get_operator_type()==SE_COLREF){
9930                         r_equiv[rse->get_colref()->get_field()] = lse;
9931                 }
9932         }
9933         ret += "\tbool touched;\n";
9934
9935 //              Constructors
9936         ret += "\t"+generate_functor_name() + "_keydef(){touched=false;};\n";
9937 //              destructor
9938         ret += "\t~"+ generate_functor_name() + "_keydef(){\n";
9939         for(p=0;p<hashkey_dt.size();p++){
9940                 if(hashkey_dt[p]->is_buffer_type()){
9941                         sprintf(tmpstr,"\t\t%s(&hashkey_var%d);\n",
9942                           hashkey_dt[p]->get_hfta_buffer_destroy().c_str(), p );
9943                         ret += tmpstr;
9944                 }
9945         }
9946         ret += "\t};\n";
9947         ret+="\tvoid touch(){touched = true;};\n";
9948         ret+="\tbool is_touched(){return touched;};\n";
9949         ret +="};\n\n";
9950
9951
9952 //--------------------------------
9953 //              temporal equality definition class
9954         ret += "class " + generate_functor_name() + "_tempeqdef{\n";
9955         ret += "public:\n";
9956 //                      Collect attributes from hash join predicates.
9957 //                      ASSUME equality predicate.
9958 //                      Use the upwardly compatible date type
9959 //                      (infer from '+' operator if possible, else use left type)
9960         for(p=0;p<this->temporal_eq.size();++p){
9961                 scalarexp_t *lse =      temporal_eq[p]->pr->get_left_se();
9962                 scalarexp_t *rse =      temporal_eq[p]->pr->get_right_se();
9963                 data_type *hdt = new data_type(
9964                         lse->get_data_type(), rse->get_data_type(), plus_op );
9965                 if(hdt->get_type() == undefined_t){
9966                         temporal_dt.push_back(hash_eq[p]->pr->get_left_se()->get_data_type()->duplicate());
9967                         delete hdt;
9968                 }else{
9969                         temporal_dt.push_back(hdt);
9970                 }
9971                 sprintf(tmpstr,"tempeq_var%d",p);
9972                 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
9973 //                      find equivalences
9974                 if(lse->get_operator_type()==SE_COLREF){
9975                         l_equiv[lse->get_colref()->get_field()] = rse;
9976                 }
9977                 if(rse->get_operator_type()==SE_COLREF){
9978                         r_equiv[rse->get_colref()->get_field()] = lse;
9979                 }
9980         }
9981
9982 //              Constructors
9983         ret += "\t"+generate_functor_name() + "_tempeqdef(){};\n";
9984 //              destructor
9985         ret += "\t~"+ generate_functor_name() + "_tempeqdef(){\n";
9986         for(p=0;p<temporal_dt.size();p++){
9987                 if(temporal_dt[p]->is_buffer_type()){
9988                         sprintf(tmpstr,"\t\t%s(&tempeq_var%d);\n",
9989                           temporal_dt[p]->get_hfta_buffer_destroy().c_str(), p );
9990                         ret += tmpstr;
9991                 }
9992         }
9993         ret += "\t};\n";
9994         ret +="};\n\n";
9995
9996
9997 //--------------------------------
9998 //                      temporal eq, hash join functor class
9999         ret += "class " + this->generate_functor_name() + "{\n";
10000
10001 //                      Find variables referenced in this query node.
10002
10003         col_id_set cid_set;
10004         col_id_set::iterator csi;
10005
10006     for(p=0;p<where.size();++p)
10007         gather_pr_col_ids(where[p]->pr,cid_set,NULL);
10008     for(s=0;s<select_list.size();s++)
10009         gather_se_col_ids(select_list[s]->se,cid_set,NULL);
10010
10011 //                      Private variables : store the state of the functor.
10012 //                      1) variables for unpacked attributes
10013 //                      2) offsets of the upacked attributes
10014 //                      3) storage of partial functions
10015 //                      4) storage of complex literals (i.e., require a constructor)
10016
10017         ret += "private:\n";
10018
10019         // var to save the schema handles
10020         ret += "\tint schema_handle0;\n";
10021         ret += "\tint schema_handle1;\n";
10022
10023         // generate the declaration of all the variables related to
10024         // temp tuples generation
10025         ret += gen_decl_temp_vars();
10026         // tuple metadata offsets
10027         ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10028
10029 //                      unpacked attribute storage, offsets
10030         ret += "//\t\tstorage and offsets of accessed fields.\n";
10031         ret += generate_access_vars(cid_set, schema);
10032
10033
10034 //                      Variables to store results of partial functions.
10035 //                      WARNING find_partial_functions modifies the SE
10036 //                      (it marks the partial function id).
10037         ret += "//\t\tParital function result storage\n";
10038         vector<scalarexp_t *> partial_fcns;
10039         vector<int> fcn_ref_cnt;
10040         vector<bool> is_partial_fcn;
10041         for(s=0;s<select_list.size();s++){
10042                 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL,  Ext_fcns);
10043         }
10044         for(p=0;p<where.size();p++){
10045                 find_partial_fcns_pr(where[p]->pr, &partial_fcns,NULL,NULL,  Ext_fcns);
10046         }
10047         if(partial_fcns.size()>0){
10048           ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
10049           ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10050         }
10051
10052 //                      Complex literals (i.e., they need constructors)
10053         ret += "//\t\tComplex literal storage.\n";
10054         cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10055         ret += generate_complex_lit_vars(complex_literals);
10056 //                      We need the following to handle strings in outer joins.
10057 //                              NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10058         ret += "\tstruct vstring EmptyString;\n";
10059         ret += "\tstruct hfta_ipv6_str EmptyIp6;\n";
10060
10061 //                      Pass-by-handle parameters
10062         ret += "//\t\tPass-by-handle storage.\n";
10063         vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10064         ret += generate_pass_by_handle_vars(param_handle_table);
10065
10066
10067 //                      variables to hold parameters.
10068         ret += "//\tfor query parameters\n";
10069         ret += generate_param_vars(param_tbl);
10070
10071
10072         ret += "\npublic:\n";
10073 //-------------------
10074 //                      The functor constructor
10075 //                      pass in the schema handle.
10076 //                      1) make assignments to the unpack offset variables
10077 //                      2) initialize the complex literals
10078
10079         ret += "//\t\tFunctor constructor.\n";
10080         ret +=  this->generate_functor_name()+"(int schema_handle0, int schema_handle1){\n";
10081
10082         ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
10083         ret += "\t\tthis->schema_handle1 = schema_handle1;\n";
10084 //              metadata offsets
10085         ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10086         ret += "\ttuple_metadata_offset1 = ftaschema_get_tuple_metadata_offset(schema_handle1);\n";
10087
10088 //              unpack vars
10089         ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10090         ret += gen_access_var_init(cid_set);
10091
10092 //              complex literals
10093         ret += "//\t\tInitialize complex literals.\n";
10094         ret += gen_complex_lit_init(complex_literals);
10095 //              Initialize EmptyString to the ... empty string
10096 //                              NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10097         literal_t mtstr_lit("");
10098         ret += "\t" + mtstr_lit.to_hfta_C_code("&EmptyString")+";\n";
10099         literal_t mip6_lit("0:0:0:0:0:0:0:0",LITERAL_IPV6);
10100         ret += "\t" + mip6_lit.to_hfta_C_code("&EmptyIp6")+";\n";
10101
10102 //              Initialize partial function results so they can be safely GC'd
10103         ret += gen_partial_fcn_init(partial_fcns);
10104
10105 //              Initialize non-query-parameter parameter handles
10106         ret += gen_pass_by_handle_init(param_handle_table);
10107
10108 //              Init temporal attributes referenced in select list
10109         ret += gen_init_temp_vars(schema, select_list, NULL);
10110
10111
10112         ret += "};\n";
10113
10114
10115
10116 //-------------------
10117 //                      Functor destructor
10118         ret += "//\t\tFunctor destructor.\n";
10119         ret +=  "~"+this->generate_functor_name()+"(){\n";
10120
10121 //                      clean up buffer type complex literals
10122         ret += gen_complex_lit_dtr(complex_literals);
10123
10124 //                      Deregister the pass-by-handle parameters
10125         ret += "/* register and de-register the pass-by-handle parameters */\n";
10126         ret += gen_pass_by_handle_dtr(param_handle_table);
10127
10128 //                      clean up partial function results.
10129         ret += "/* clean up partial function storage    */\n";
10130         ret += gen_partial_fcn_dtr(partial_fcns);
10131
10132 //                      Destroy the parameters, if any need to be destroyed
10133         ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10134
10135         ret += "};\n\n";
10136
10137
10138 //-------------------
10139 //                      Parameter manipulation routines
10140         ret += generate_load_param_block(this->generate_functor_name(),
10141                                                                         this->param_tbl,param_handle_table);
10142         ret += generate_delete_param_block(this->generate_functor_name(),
10143                                                                         this->param_tbl,param_handle_table);
10144
10145 //-------------------
10146 //                      Register new parameter block
10147
10148         ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10149           ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10150           ret += "\treturn this->load_params_"+this->generate_functor_name()+
10151                                 "(sz, value);\n";
10152         ret += "};\n\n";
10153
10154
10155 //-------------------
10156 //                      The create_key method.
10157 //                      Perform heap allocation.
10158 //                      ASSUME : the LHS of the preds reference channel 0 attributes
10159 //                      NOTE : it may fail if a partial function fails.
10160
10161         ret += this->generate_functor_name()+"_keydef *create_key(host_tuple &tup, bool &failed){\n";
10162 //              Variables for execution of the function.
10163         ret+="\t"+this->generate_functor_name()+"_keydef *retval = NULL;\n";
10164         ret+="\tgs_int32_t problem = 0;\n";
10165
10166 //              Assume unsuccessful completion
10167         ret+= "\tfailed = true;\n";
10168
10169 //              Switch the processing based on the channel
10170         ret+="\tif(tup.channel == 0){\n";
10171         ret+="// ------------ processing for channel 0\n";
10172         ret+="\t\thost_tuple &tup0 = tup;\n";
10173 //              Gather partial fcns and colids ref'd by this branch
10174         pfcn_refs.clear();
10175         new_cids.clear(); local_cids.clear();
10176         for(p=0;p<hash_eq.size();p++){
10177                 collect_partial_fcns(hash_eq[p]->pr->get_left_se(), pfcn_refs);
10178                 gather_se_col_ids(hash_eq[p]->pr->get_left_se(),local_cids,NULL);
10179         }
10180
10181 //              Start by cleaning up partial function results
10182         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10183         ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10184
10185 //                      Evaluate the partial functions
10186         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10187                                 new_cids, NULL, "NULL", needs_xform);
10188 //                      test passed -- unpack remaining cids.
10189         ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10190
10191 //                      Alloc and load a key object
10192         ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10193         for(p=0;p<hash_eq.size();p++){
10194                 data_type *hdt = hash_eq[p]->pr->get_left_se()->get_data_type();
10195                 if(hdt->is_buffer_type()){
10196                         string vname = "tmp_keyvar"+int_to_string(p);
10197                         ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_left_se(),schema)+";\n";
10198                         ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10199                 }else{
10200                   sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10201                         p,generate_se_code(hash_eq[p]->pr->get_left_se(),schema).c_str() );
10202                   ret += tmpstr;
10203                 }
10204         }
10205         ret += "\t}else{\n";
10206
10207         ret+="// ------------ processing for channel 1\n";
10208         ret+="\t\thost_tuple &tup1 = tup;\n";
10209 //              Gather partial fcns and colids ref'd by this branch
10210         pfcn_refs.clear();
10211         new_cids.clear(); local_cids.clear();
10212         for(p=0;p<hash_eq.size();p++){
10213                 collect_partial_fcns(hash_eq[p]->pr->get_right_se(), pfcn_refs);
10214                 gather_se_col_ids(hash_eq[p]->pr->get_right_se(),local_cids,NULL);
10215         }
10216
10217 //              Start by cleaning up partial function results
10218         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10219         ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10220
10221 //                      Evaluate the partial functions
10222         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10223                                 new_cids, NULL, "NULL", needs_xform);
10224
10225 //                      test passed -- unpack remaining cids.
10226         ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10227
10228 //                      Alloc and load a key object
10229         ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10230         for(p=0;p<hash_eq.size();p++){
10231                 data_type *hdt = hash_eq[p]->pr->get_right_se()->get_data_type();
10232                 if(hdt->is_buffer_type()){
10233                         string vname = "tmp_keyvar"+int_to_string(p);
10234                         ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_right_se(),schema)+";\n";
10235                         ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10236                 }else{
10237                   sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10238                         p,generate_se_code(hash_eq[p]->pr->get_right_se(),schema).c_str() );
10239                   ret += tmpstr;
10240                 }
10241         }
10242         ret += "\t}\n";
10243
10244         ret += "\tfailed = false;\n";
10245         ret += "\t return retval;\n";
10246         ret += "}\n";
10247
10248
10249 //-------------------
10250 //                      The load_ts method.
10251 //                      load into an allocated buffer.
10252 //                      ASSUME : the LHS of the preds reference channel 0 attributes
10253 //                      NOTE : it may fail if a partial function fails.
10254 //                      NOTE : cann't handle buffer attributes
10255
10256         ret += "bool load_ts_from_tup("+this->generate_functor_name()+"_tempeqdef *ts, host_tuple &tup){\n";
10257 //              Variables for execution of the function.
10258         ret+="\tgs_int32_t problem = 0;\n";
10259
10260 //              Switch the processing based on the channel
10261         ret+="\tif(tup.channel == 0){\n";
10262         ret+="// ------------ processing for channel 0\n";
10263         ret+="\t\thost_tuple &tup0 = tup;\n";
10264
10265 //              Gather partial fcns and colids ref'd by this branch
10266         pfcn_refs.clear();
10267         new_cids.clear(); local_cids.clear();
10268         for(p=0;p<temporal_eq.size();p++){
10269                 collect_partial_fcns(temporal_eq[p]->pr->get_left_se(), pfcn_refs);
10270                 gather_se_col_ids(temporal_eq[p]->pr->get_left_se(),local_cids,NULL);
10271         }
10272
10273 //              Start by cleaning up partial function results
10274         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10275         ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10276
10277 //                      Evaluate the partial functions
10278         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10279                                 new_cids, NULL, "false", needs_xform);
10280
10281 //                      test passed -- unpack remaining cids.
10282         ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10283
10284 //                      load the temporal key object
10285         for(p=0;p<temporal_eq.size();p++){
10286                 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10287                         p,generate_se_code(temporal_eq[p]->pr->get_left_se(),schema).c_str() );
10288                 ret += tmpstr;
10289         }
10290
10291         ret += "\t}else{\n";
10292
10293         ret+="// ------------ processing for channel 1\n";
10294         ret+="\t\thost_tuple &tup1 = tup;\n";
10295
10296 //              Gather partial fcns and colids ref'd by this branch
10297         pfcn_refs.clear();
10298         new_cids.clear(); local_cids.clear();
10299         for(p=0;p<temporal_eq.size();p++){
10300                 collect_partial_fcns(temporal_eq[p]->pr->get_right_se(), pfcn_refs);
10301                 gather_se_col_ids(temporal_eq[p]->pr->get_right_se(),local_cids,NULL);
10302         }
10303
10304 //              Start by cleaning up partial function results
10305         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10306         ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10307
10308 //                      Evaluate the partial functions
10309         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10310                                 new_cids, NULL, "false", needs_xform);
10311
10312 //                      test passed -- unpack remaining cids.
10313         ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10314
10315 //                      load the key object
10316         for(p=0;p<temporal_eq.size();p++){
10317                 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10318                         p,generate_se_code(temporal_eq[p]->pr->get_right_se(),schema).c_str() );
10319                 ret += tmpstr;
10320         }
10321
10322         ret += "\t}\n";
10323
10324         ret += "\t return true;\n";
10325         ret += "}\n";
10326
10327
10328 //      ------------------------------
10329 //              Load ts from ts
10330 //              (i.e make a copy)
10331
10332         ret += "bool load_ts_from_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10333         for(p=0;p<temporal_eq.size();p++){
10334                 sprintf(tmpstr,"\tlts->tempeq_var%d = rts->tempeq_var%d;\n",p,p);
10335                 ret += tmpstr;
10336         }
10337         ret += "}\n";
10338
10339 //      -------------------------------------
10340 //              compare_ts_to_ts
10341 //              There should be only one variable to compare.
10342 //              If there is more, assume an arbitrary lexicographic order.
10343
10344         ret += "int compare_ts_with_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10345         for(p=0;p<temporal_eq.size();p++){
10346                 sprintf(tmpstr,"\tif(lts->tempeq_var%d < rts->tempeq_var%d) return(-1);\n",p,p);
10347                 ret += tmpstr;
10348                 sprintf(tmpstr,"\tif(lts->tempeq_var%d > rts->tempeq_var%d) return(1);\n",p,p);
10349                 ret += tmpstr;
10350         }
10351         ret += "\treturn(0);\n";
10352         ret += "}\n";
10353
10354 //      ------------------------------------------
10355 //              apply_prefilter
10356 //              apply the prefilter
10357
10358         ret += "bool apply_prefilter(host_tuple &tup){\n";
10359
10360 //              Variables for this procedure
10361         ret+="\tgs_int32_t problem = 0;\n";
10362         ret+="\tgs_retval_t retval;\n";
10363
10364 //              Switch the processing based on the channel
10365         ret+="\tif(tup.channel == 0){\n";
10366         ret+="// ------------ processing for channel 0\n";
10367         ret+="\t\thost_tuple &tup0 = tup;\n";
10368 //              Gather partial fcns and colids ref'd by this branch
10369         pfcn_refs.clear();
10370         new_cids.clear(); local_cids.clear();
10371         for(p=0;p<prefilter[0].size();p++){
10372                 collect_partial_fcns_pr((prefilter[0])[p]->pr, pfcn_refs);
10373         }
10374
10375 //              Start by cleaning up partial function results
10376         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10377         ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10378
10379         for(p=0;p<(prefilter[0]).size();++p){
10380                 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10381                 ret += tmpstr;
10382 //                      Find the set of variables accessed in this CNF elem,
10383 //                      but in no previous element.
10384                 col_id_set new_pr_cids;
10385                 get_new_pred_cids((prefilter[0])[p]->pr,local_cids,new_pr_cids, NULL);
10386 //                      Unpack these values.
10387                 ret += gen_unpack_cids(schema, new_pr_cids, "false", needs_xform);
10388 //                      Find partial fcns ref'd in this cnf element
10389                 set<int> pr_pfcn_refs;
10390                 collect_partial_fcns_pr((prefilter[0])[p]->pr, pr_pfcn_refs);
10391                 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10392
10393                 ret += "\t\tif( !("+generate_predicate_code((prefilter[0])[p]->pr,schema)+") ) return(false);\n";
10394         }
10395         ret += "\t}else{\n";
10396         ret+="// ------------ processing for channel 1\n";
10397         ret+="\t\thost_tuple &tup1 = tup;\n";
10398 //              Gather partial fcns and colids ref'd by this branch
10399         pfcn_refs.clear();
10400         new_cids.clear(); local_cids.clear();
10401         for(p=0;p<prefilter[1].size();p++){
10402                 collect_partial_fcns_pr((prefilter[1])[p]->pr, pfcn_refs);
10403         }
10404
10405 //              Start by cleaning up partial function results
10406         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10407         ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10408
10409         for(p=0;p<(prefilter[1]).size();++p){
10410                 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10411                 ret += tmpstr;
10412 //                      Find the set of variables accessed in this CNF elem,
10413 //                      but in no previous element.
10414                 col_id_set pr_new_cids;
10415                 get_new_pred_cids((prefilter[1])[p]->pr,local_cids, pr_new_cids, NULL);
10416 //                      Unpack these values.
10417                 ret += gen_unpack_cids(schema, pr_new_cids, "false", needs_xform);
10418 //                      Find partial fcns ref'd in this cnf element
10419                 set<int> pr_pfcn_refs;
10420                 collect_partial_fcns_pr((prefilter[1])[p]->pr, pr_pfcn_refs);
10421                 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10422
10423                 ret += "\t\tif( !("+generate_predicate_code((prefilter[1])[p]->pr,schema)+ ") ) return(false);\n";
10424         }
10425
10426         ret += "\t}\n";
10427         ret+="\treturn true;\n";
10428         ret += "}\n";
10429
10430
10431 //      -------------------------------------
10432 //                      create_output_tuple
10433 //                      If the postfilter on the pair of tuples passes,
10434 //                      create an output tuple from the combined information.
10435 //                      (Plus, outer join processing)
10436
10437         ret += "host_tuple create_output_tuple(const host_tuple &tup0, const host_tuple &tup1, bool &failed){\n";
10438
10439         ret += "\thost_tuple tup;\n";
10440         ret += "\tfailed = true;\n";
10441         ret += "\tgs_retval_t retval = 0;\n";
10442         ret += "\tgs_int32_t problem = 0;\n";
10443
10444 //              Start by cleaning up partial function results
10445         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10446         pfcn_refs.clear();
10447         new_cids.clear(); local_cids.clear();
10448         for(p=0;p<postfilter.size();p++){
10449                 collect_partial_fcns_pr(postfilter[p]->pr, pfcn_refs);
10450         }
10451         for(s=0;s<select_list.size();s++){
10452                 collect_partial_fcns(select_list[s]->se, pfcn_refs);
10453         }
10454         ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10455
10456
10457         ret+="\tif(tup0.data && tup1.data){\n";
10458 //                      Evaluate the postfilter
10459         new_cids.clear(); local_cids.clear();
10460         for(p=0;p<postfilter.size();p++){
10461                 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10462                 ret += tmpstr;
10463 //                      Find the set of variables accessed in this CNF elem,
10464 //                      but in no previous element.
10465                 col_id_set pr_new_cids;
10466                 get_new_pred_cids(postfilter[p]->pr,local_cids, pr_new_cids, NULL);
10467 //                      Unpack these values.
10468                 ret += gen_unpack_cids(schema, pr_new_cids, "tup", needs_xform);
10469 //                      Find partial fcns ref'd in this cnf element
10470                 set<int> pr_pfcn_refs;
10471                 collect_partial_fcns_pr(postfilter[p]->pr, pr_pfcn_refs);
10472                 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"tup");
10473
10474                 ret += "\t\tif( !("+generate_predicate_code(postfilter[p]->pr,schema)+ ") ) return(tup);\n";
10475         }
10476
10477
10478 //              postfilter passed, evaluate partial functions for select list
10479
10480         set<int> sl_pfcns;
10481         col_id_set se_cids;
10482         for(s=0;s<select_list.size();s++){
10483                 collect_partial_fcns(select_list[s]->se, sl_pfcns);
10484         }
10485
10486         if(sl_pfcns.size() > 0)
10487                 ret += "//\t\tUnpack remaining partial fcns.\n";
10488         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
10489                                         local_cids, NULL, "tup", needs_xform);
10490
10491 //                      Unpack remaining fields
10492         ret += "//\t\tunpack any remaining fields from the input tuples.\n";
10493         for(s=0;s<select_list.size();s++)
10494                 get_new_se_cids(select_list[s]->se, local_cids,se_cids,NULL);
10495         ret += gen_unpack_cids(schema,  se_cids,"tup", needs_xform);
10496
10497
10498 //                      Deal with outer join stuff
10499         col_id_set l_cids, r_cids;
10500         col_id_set::iterator ocsi;
10501         for(ocsi=local_cids.begin();ocsi!=local_cids.end();++ocsi){
10502                 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10503                 else                                            r_cids.insert((*ocsi));
10504         }
10505         for(ocsi=se_cids.begin();ocsi!=se_cids.end();++ocsi){
10506                 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10507                 else                                            r_cids.insert((*ocsi));
10508         }
10509
10510         ret += "\t}else if(tup0.data){\n";
10511         string unpack_null = ""; col_id_set extra_cids;
10512         for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
10513                 string field = (*ocsi).field;
10514                 if(r_equiv.count(field)){
10515                         unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
10516                         get_new_se_cids(r_equiv[field],l_cids,new_cids,NULL);
10517                 }else{
10518                 int schref = (*ocsi).schema_ref;
10519                         data_type dt(schema->get_type_name(schref,field));
10520                         literal_t empty_lit(dt.type_indicator());
10521                         if(empty_lit.is_cpx_lit()){
10522 //                              sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
10523 //                              unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10524 //                                      NB : works for string type only
10525 //                                      NNB: installed fix for ipv6, more of this should be pushed
10526 //                                              into the literal_t code.
10527                                 unpack_null+="\tunpack_var_"+field+"_1= "+empty_lit.hfta_empty_literal_name()+";\n";
10528                         }else{
10529                                 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
10530                         }
10531                 }
10532         }
10533         ret += gen_unpack_cids(schema,  l_cids, "tup", needs_xform);
10534         ret += gen_unpack_cids(schema,  extra_cids, "tup", needs_xform);
10535         ret += unpack_null;
10536         ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
10537
10538         ret+="\t}else{\n";
10539         unpack_null = ""; extra_cids.clear();
10540         for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
10541                 string field = (*ocsi).field;
10542                 if(l_equiv.count(field)){
10543                         unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
10544                         get_new_se_cids(l_equiv[field],r_cids,new_cids,NULL);
10545                 }else{
10546                 int schref = (*ocsi).schema_ref;
10547                         data_type dt(schema->get_type_name(schref,field));
10548                         literal_t empty_lit(dt.type_indicator());
10549                         if(empty_lit.is_cpx_lit()){
10550 //                              sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
10551 //                              unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10552 //                                      NB : works for string type only
10553 //                                      NNB: installed fix for ipv6, more of this should be pushed
10554 //                                              into the literal_t code.
10555                                 unpack_null+="\tunpack_var_"+field+"_0= "+empty_lit.hfta_empty_literal_name()+";\n";
10556                         }else{
10557                                 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
10558                         }
10559                 }
10560         }
10561         ret += gen_unpack_cids(schema,  r_cids, "tup", needs_xform);
10562         ret += gen_unpack_cids(schema,  extra_cids, "tup", needs_xform);
10563         ret += unpack_null;
10564         ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
10565         ret+="\t}\n";
10566
10567
10568
10569 //          Unpack any BUFFER type selections into temporaries
10570 //          so that I can compute their size and not have
10571 //          to recompute their value during tuple packing.
10572 //          I can use regular assignment here because
10573 //          these temporaries are non-persistent.
10574
10575         ret += "//\t\tCompute the size of the tuple.\n";
10576         ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
10577
10578 //                      Unpack all buffer type selections, to be able to compute their size
10579         ret += gen_buffer_selvars(schema, select_list);
10580
10581 //      The size of the tuple is the size of the tuple struct plus the
10582 //      size of the buffers to be copied in.
10583
10584     ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
10585         ret += gen_buffer_selvars_size(select_list,schema);
10586       ret.append(";\n");
10587
10588 //              Allocate tuple data block.
10589         ret += "//\t\tCreate the tuple block.\n";
10590           ret += "\ttup.data = malloc(tup.tuple_size);\n";
10591           ret += "\ttup.heap_resident = true;\n";
10592 //        ret += "\ttup.channel = 0;\n";
10593
10594 //              Mark tuple as regular
10595           ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
10596
10597
10598           ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
10599                                 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
10600
10601 //              Start packing.
10602 //                      (Here, offsets are hard-wired.  is this a problem?)
10603
10604         ret += "//\t\tPack the fields into the tuple.\n";
10605         ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
10606
10607 //                      Delete string temporaries
10608         ret += gen_buffer_selvars_dtr(select_list);
10609
10610         ret += "\tfailed = false;\n";
10611         ret += "\treturn tup;\n";
10612         ret += "};\n";
10613
10614
10615
10616 //-----------------------------
10617 //                      Method for checking whether tuple is temporal
10618
10619         ret += "bool temp_status_received(host_tuple &tup){\n";
10620
10621 //              Switch the processing based on the channel
10622         ret+="\tif(tup.channel == 0){\n";
10623         ret+="\t\thost_tuple &tup0 = tup;\n";
10624         ret += gen_temp_tuple_check(this->node_name, 0);
10625         ret += "\t}else{\n";
10626         ret+="\t\thost_tuple &tup1 = tup;\n";
10627         ret += gen_temp_tuple_check(this->node_name, 1);
10628         ret += "\t}\n";
10629         ret += "\treturn temp_tuple_received;\n};\n\n";
10630
10631
10632 //-------------------------------------------------------------------
10633 //              Temporal update functions
10634
10635
10636 //              create a temp status tuple
10637         ret += "int create_temp_status_tuple(const host_tuple &tup0, const host_tuple &tup1, host_tuple& result) {\n\n";
10638
10639         ret += "\tgs_retval_t retval = 0;\n";
10640         ret += "\tgs_int32_t problem = 0;\n";
10641
10642         ret += "\tif(tup0.data){\n";
10643
10644 //              Unpack all the temporal attributes references in select list
10645         col_id_set found_cids;
10646
10647         for(s=0;s<select_list.size();s++){
10648                 if (select_list[s]->se->get_data_type()->is_temporal()) {
10649 //                      Find the set of attributes accessed in this SE
10650                         col_id_set new_cids;
10651                         get_new_se_cids(select_list[s]->se,found_cids, new_cids, NULL);
10652                 }
10653         }
10654
10655         //                      Deal with outer join stuff
10656         l_cids.clear(), r_cids.clear();
10657         for(ocsi=found_cids.begin();ocsi!=found_cids.end();++ocsi){
10658                 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10659                 else                                            r_cids.insert((*ocsi));
10660         }
10661         unpack_null = "";
10662         extra_cids.clear();
10663         for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
10664                 string field = (*ocsi).field;
10665                 if(r_equiv.count(field)){
10666                         unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
10667                         col_id_set addnl_cids;
10668                         get_new_se_cids(r_equiv[field],l_cids,addnl_cids,NULL);
10669                 }else{
10670                 int schref = (*ocsi).schema_ref;
10671                         data_type dt(schema->get_type_name(schref,field));
10672                         literal_t empty_lit(dt.type_indicator());
10673                         if(empty_lit.is_cpx_lit()){
10674                                 sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
10675                                 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10676                         }else{
10677                                 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
10678                         }
10679                 }
10680         }
10681         ret += gen_unpack_cids(schema,  l_cids, "1", needs_xform);
10682         ret += gen_unpack_cids(schema,  extra_cids, "1", needs_xform);
10683         ret += unpack_null;
10684
10685         ret+="\t}else if (tup1.data) {\n";
10686         unpack_null = ""; extra_cids.clear();
10687         for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
10688                 string field = (*ocsi).field;
10689                 if(l_equiv.count(field)){
10690                         unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
10691                         col_id_set addnl_cids;
10692                         get_new_se_cids(l_equiv[field],r_cids,addnl_cids,NULL);
10693                 }else{
10694                 int schref = (*ocsi).schema_ref;
10695                         data_type dt(schema->get_type_name(schref,field));
10696                         literal_t empty_lit(dt.type_indicator());
10697                         if(empty_lit.is_cpx_lit()){
10698                                 sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
10699                                 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10700                         }else{
10701                                 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
10702                         }
10703                 }
10704         }
10705         ret += gen_unpack_cids(schema,  r_cids, "1", needs_xform);
10706         ret += gen_unpack_cids(schema,  extra_cids, "1", needs_xform);
10707         ret += unpack_null;
10708         ret+="\t}\n";
10709
10710         ret += gen_init_temp_status_tuple(this->get_node_name());
10711
10712 //              Start packing.
10713         ret += "//\t\tPack the fields into the tuple.\n";
10714         ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
10715
10716
10717         ret += "\treturn 0;\n";
10718         ret += "};\n\n";
10719
10720
10721         ret += "};\n\n\n";
10722
10723 //----------------------------------------------------------
10724 //                      The hash function
10725
10726         ret += "struct "+generate_functor_name()+"_hash_func{\n";
10727         ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
10728                                 "_keydef *key) const{\n";
10729         ret += "\t\treturn( (";
10730         if(hashkey_dt.size() > 0){
10731           for(p=0;p<hashkey_dt.size();p++){
10732                 if(p>0) ret += "^";
10733                 if(hashkey_dt[p]->use_hashfunc()){
10734 //                      sprintf(tmpstr,"%s(&(key->hashkey_var%d))",hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10735                         if(hashkey_dt[p]->is_buffer_type())
10736                                 sprintf(tmpstr,"(%s*%s(&(key->hashkey_var%d)))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10737                         else
10738                                 sprintf(tmpstr,"(%s*%s(key->hashkey_var%d))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10739                 }else{
10740                         sprintf(tmpstr,"(%s*key->hashkey_var%d)",hash_nums[p%NRANDS].c_str(),p);
10741                 }
10742                 ret += tmpstr;
10743           }
10744         }else{
10745                 ret += "0";
10746         }
10747         ret += ") >> 32);\n";
10748         ret += "\t}\n";
10749         ret += "};\n\n";
10750
10751 //----------------------------------------------------------
10752 //                      The comparison function
10753
10754         ret += "struct "+generate_functor_name()+"_equal_func{\n";
10755         ret += "\tbool operator()(const "+generate_functor_name()+"_keydef *key1, "+
10756                         generate_functor_name()+"_keydef *key2) const{\n";
10757         ret += "\t\treturn( (";
10758         if(hashkey_dt.size() > 0){
10759           for(p=0;p<hashkey_dt.size();p++){
10760                 if(p>0) ret += ") && (";
10761                 if(hashkey_dt[p]->complex_comparison(hashkey_dt[p])){
10762                   if(hashkey_dt[p]->is_buffer_type())
10763                         sprintf(tmpstr,"(%s(&(key1->hashkey_var%d), &(key2->hashkey_var%d))==0)",
10764                                 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
10765                   else
10766                         sprintf(tmpstr,"(%s((key1->hashkey_var%d), (key2->hashkey_var%d))==0)",
10767                                 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
10768                 }else{
10769                         sprintf(tmpstr,"key1->hashkey_var%d == key2->hashkey_var%d",p,p);
10770                 }
10771                 ret += tmpstr;
10772           }
10773         }else{
10774                 ret += "1";
10775         }
10776         ret += ") );\n";
10777         ret += "\t}\n";
10778         ret += "};\n\n";
10779
10780
10781         return(ret);
10782 }
10783
10784
10785
10786 string join_eq_hash_qpn::generate_operator(int i, string params){
10787
10788                 return(
10789                         "       join_eq_hash_operator<" +
10790                         generate_functor_name()+ ","+
10791                         generate_functor_name() + "_tempeqdef,"+
10792                         generate_functor_name() + "_keydef,"+
10793                         generate_functor_name()+"_hash_func,"+
10794                         generate_functor_name()+"_equal_func"
10795                         "> *op"+int_to_string(i)+" = new join_eq_hash_operator<"+
10796                         generate_functor_name()+","+
10797                         generate_functor_name() + "_tempeqdef,"+
10798                         generate_functor_name() + "_keydef,"+
10799                         generate_functor_name()+"_hash_func,"+
10800                         generate_functor_name()+"_equal_func"
10801                         ">("+params+", "+
10802                         int_to_string(from[0]->get_property()+2*from[1]->get_property())+", \"" + get_node_name() +
10803 "\");\n"
10804                 );
10805 }
10806
10807
10808
10809 ////////////////////////////////////////////////////////////////
10810 ////    SGAHCWCB functor
10811
10812
10813
10814 string sgahcwcb_qpn::generate_functor_name(){
10815         return("sgahcwcb_functor_" + normalize_name(this->get_node_name()));
10816 }
10817
10818
10819 string sgahcwcb_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10820         int a,g,w,s;
10821
10822
10823 //                      Initialize generate utility globals
10824         segen_gb_tbl = &(gb_tbl);
10825
10826
10827 //--------------------------------
10828 //                      group definition class
10829         string ret = "class " + generate_functor_name() + "_groupdef{\n";
10830         ret += "public:\n";
10831         ret += "\tbool valid;\n";
10832         for(g=0;g<this->gb_tbl.size();g++){
10833                 sprintf(tmpstr,"gb_var%d",g);
10834                 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
10835         }
10836 //              Constructors
10837         ret += "\t"+generate_functor_name() + "_groupdef(){valid=true;};\n";
10838         ret += "\t"+generate_functor_name() + "_groupdef("+
10839                 this->generate_functor_name() + "_groupdef *gd){\n";
10840         for(g=0;g<gb_tbl.size();g++){
10841                 data_type *gdt = gb_tbl.get_data_type(g);
10842                 if(gdt->is_buffer_type()){
10843                         sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
10844                           gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
10845                         ret += tmpstr;
10846                 }else{
10847                         sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
10848                         ret += tmpstr;
10849                 }
10850         }
10851         ret += "\tvalid=true;\n";
10852         ret += "\t};\n";
10853 //              destructor
10854         ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
10855         for(g=0;g<gb_tbl.size();g++){
10856                 data_type *gdt = gb_tbl.get_data_type(g);
10857                 if(gdt->is_buffer_type()){
10858                         sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
10859                           gdt->get_hfta_buffer_destroy().c_str(), g );
10860                         ret += tmpstr;
10861                 }
10862         }
10863         ret += "\t};\n";
10864         ret +="};\n\n";
10865
10866 //--------------------------------
10867 //                      aggr definition class
10868         ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
10869         ret += "public:\n";
10870         for(a=0;a<aggr_tbl.size();a++){
10871 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10872                 sprintf(tmpstr,"aggr_var%d",a);
10873                 if(aggr_tbl.is_builtin(a))
10874                 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
10875                 else
10876                 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
10877         }
10878 //              Constructors
10879         ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
10880 //              destructor
10881         ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
10882         for(a=0;a<aggr_tbl.size();a++){
10883 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10884                 if(aggr_tbl.is_builtin(a)){
10885                         data_type *adt = aggr_tbl.get_data_type(a);
10886                         if(adt->is_buffer_type()){
10887                                 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
10888                                 adt->get_hfta_buffer_destroy().c_str(), a );
10889                                 ret += tmpstr;
10890                         }
10891                 }else{
10892                         ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
10893                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
10894                         ret+="(aggr_var"+int_to_string(a)+"));\n";
10895                 }
10896         }
10897         ret += "\t};\n";
10898         ret +="};\n\n";
10899
10900 //--------------------------------
10901 //                      superaggr definition class
10902         ret += "class " + this->generate_functor_name() + "_statedef{\n";
10903         ret += "public:\n";
10904         for(a=0;a<aggr_tbl.size();a++){
10905 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10906                 if(ate->is_superaggr()){
10907                         sprintf(tmpstr,"aggr_var%d",a);
10908                         if(aggr_tbl.is_builtin(a))
10909                         ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
10910                         else
10911                         ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
10912                 }
10913         }
10914         set<string>::iterator ssi;
10915         for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
10916                 string state_nm = (*ssi);
10917                 int state_id = Ext_fcns->lookup_state(state_nm);
10918                 data_type *dt = Ext_fcns->get_storage_dt(state_id);
10919                 string state_var = "state_var_"+state_nm;
10920                 ret += "\t"+dt->make_host_cvar(state_var)+";\n";
10921         }
10922 //              Constructors
10923         ret += "\t"+this->generate_functor_name() + "_statedef(){};\n";
10924 //              destructor
10925         ret += "\t~"+this->generate_functor_name() + "_statedef(){\n";
10926         for(a=0;a<aggr_tbl.size();a++){
10927 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10928                 if(ate->is_superaggr()){
10929                         if(aggr_tbl.is_builtin(a)){
10930                                 data_type *adt = aggr_tbl.get_data_type(a);
10931                                 if(adt->is_buffer_type()){
10932                                         sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
10933                                         adt->get_hfta_buffer_destroy().c_str(), a );
10934                                         ret += tmpstr;
10935                                 }
10936                         }else{
10937                                 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
10938                                 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
10939                                 ret+="(aggr_var"+int_to_string(a)+"));\n";
10940                         }
10941                 }
10942         }
10943         for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
10944                 string state_nm = (*ssi);
10945                 int state_id = Ext_fcns->lookup_state(state_nm);
10946                 string state_var = "state_var_"+state_nm;
10947                 ret += "\t_sfun_state_destroy_"+state_nm+"(&"+state_var+");\n";
10948         }
10949
10950         ret += "\t};\n";
10951         ret +="};\n\n";
10952
10953
10954 //--------------------------------
10955 //                      gb functor class
10956         ret += "class " + this->generate_functor_name() + "{\n";
10957
10958 //                      Find variables referenced in this query node.
10959
10960   col_id_set cid_set;
10961   col_id_set::iterator csi;
10962
10963     for(w=0;w<where.size();++w)
10964         gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
10965     for(w=0;w<having.size();++w)
10966         gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
10967     for(w=0;w<cleanby.size();++w)
10968         gather_pr_col_ids(cleanby[w]->pr,cid_set,segen_gb_tbl);
10969     for(w=0;w<cleanwhen.size();++w)
10970         gather_pr_col_ids(cleanwhen[w]->pr,cid_set,segen_gb_tbl);
10971         for(g=0;g<gb_tbl.size();g++)
10972                 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
10973
10974     for(s=0;s<select_list.size();s++){
10975         gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl);     // descends into aggregates
10976     }
10977
10978
10979 //                      Private variables : store the state of the functor.
10980 //                      1) variables for unpacked attributes
10981 //                      2) offsets of the upacked attributes
10982 //                      3) storage of partial functions
10983 //                      4) storage of complex literals (i.e., require a constructor)
10984
10985         ret += "private:\n";
10986
10987         // var to save the schema handle
10988         ret += "\tint schema_handle0;\n";
10989
10990         // generate the declaration of all the variables related to
10991         // temp tuples generation
10992         ret += gen_decl_temp_vars();
10993
10994 //                      unpacked attribute storage, offsets
10995         ret += "//\t\tstorage and offsets of accessed fields.\n";
10996         ret += generate_access_vars(cid_set, schema);
10997 //              tuple metadata offset
10998         ret += "\ttuple_metadata_offset0;\n";
10999
11000 //                      Variables to store results of partial functions.
11001 //                      WARNING find_partial_functions modifies the SE
11002 //                      (it marks the partial function id).
11003         ret += "//\t\tParital function result storage\n";
11004         vector<scalarexp_t *> partial_fcns;
11005         vector<int> fcn_ref_cnt;
11006         vector<bool> is_partial_fcn;
11007         for(s=0;s<select_list.size();s++){
11008                 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
11009         }
11010         for(w=0;w<where.size();w++){
11011                 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11012         }
11013         for(w=0;w<having.size();w++){
11014                 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11015         }
11016         for(w=0;w<cleanby.size();w++){
11017                 find_partial_fcns_pr(cleanby[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11018         }
11019         for(w=0;w<cleanwhen.size();w++){
11020                 find_partial_fcns_pr(cleanwhen[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11021         }
11022         for(g=0;g<gb_tbl.size();g++){
11023                 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
11024         }
11025         for(a=0;a<aggr_tbl.size();a++){
11026                 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
11027         }
11028         if(partial_fcns.size()>0){
11029           ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
11030           ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
11031         }
11032
11033 //                      Complex literals (i.e., they need constructors)
11034         ret += "//\t\tComplex literal storage.\n";
11035         cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
11036         ret += generate_complex_lit_vars(complex_literals);
11037
11038 //                      Pass-by-handle parameters
11039         ret += "//\t\tPass-by-handle storage.\n";
11040         vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
11041         ret += generate_pass_by_handle_vars(param_handle_table);
11042
11043 //                      Create cached temporaries for UDAF return values.
11044         ret += "//\t\tTemporaries for UDAF return values.\n";
11045         for(a=0;a<aggr_tbl.size();a++){
11046                 if(! aggr_tbl.is_builtin(a)){
11047                         int afcn_id = aggr_tbl.get_fcn_id(a);
11048                         data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11049                         sprintf(tmpstr,"udaf_ret_%d", a);
11050                         ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11051                 }
11052         }
11053
11054
11055
11056 //                      variables to hold parameters.
11057         ret += "//\tfor query parameters\n";
11058         ret += generate_param_vars(param_tbl);
11059
11060 //              Is there a temporal flush?  If so create flush temporaries,
11061 //              create flush indicator.
11062         bool uses_temporal_flush = false;
11063         for(g=0;g<gb_tbl.size();g++){
11064                 data_type *gdt = gb_tbl.get_data_type(g);
11065                 if(gdt->is_temporal())
11066                         uses_temporal_flush = true;
11067         }
11068
11069         if(uses_temporal_flush){
11070                 ret += "//\t\tFor temporal flush\n";
11071                 for(g=0;g<gb_tbl.size();g++){
11072                         data_type *gdt = gb_tbl.get_data_type(g);
11073                         if(gdt->is_temporal()){
11074                           sprintf(tmpstr,"last_gb%d",g);
11075                           ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11076                           sprintf(tmpstr,"last_flushed_gb%d",g);
11077                           ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11078                         }
11079                 }
11080                 ret += "\tbool needs_temporal_flush;\n";
11081         }
11082
11083 //                      The publicly exposed functions
11084
11085         ret += "\npublic:\n";
11086
11087
11088 //-------------------
11089 //                      The functor constructor
11090 //                      pass in the schema handle.
11091 //                      1) make assignments to the unpack offset variables
11092 //                      2) initialize the complex literals
11093
11094         ret += "//\t\tFunctor constructor.\n";
11095         ret +=  this->generate_functor_name()+"(int schema_handle0){\n";
11096
11097         // save the schema handle
11098         ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
11099 //              tuple metadata offset
11100         ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
11101
11102 //              unpack vars
11103         ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
11104         ret += gen_access_var_init(cid_set);
11105
11106 //              aggregate return vals : refd in both final_sample
11107 //              and create_output_tuple
11108 //                      Create cached temporaries for UDAF return values.
11109         for(a=0;a<aggr_tbl.size();a++){
11110                 if(! aggr_tbl.is_builtin(a)){
11111                         int afcn_id = aggr_tbl.get_fcn_id(a);
11112                         data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11113                         sprintf(tmpstr,"udaf_ret_%d", a);
11114                         ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11115                 }
11116         }
11117
11118 //              complex literals
11119         ret += "//\t\tInitialize complex literals.\n";
11120         ret += gen_complex_lit_init(complex_literals);
11121
11122 //              Initialize partial function results so they can be safely GC'd
11123         ret += gen_partial_fcn_init(partial_fcns);
11124
11125 //              Initialize non-query-parameter parameter handles
11126         ret += gen_pass_by_handle_init(param_handle_table);
11127
11128 //              temporal flush variables
11129 //              ASSUME that structured values won't be temporal.
11130         if(uses_temporal_flush){
11131                 ret += "//\t\tInitialize temporal flush variables.\n";
11132                 for(g=0;g<gb_tbl.size();g++){
11133                         data_type *gdt = gb_tbl.get_data_type(g);
11134                         if(gdt->is_temporal()){
11135                                 literal_t gl(gdt->type_indicator());
11136                                 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
11137                                 ret.append(tmpstr);
11138                         }
11139                 }
11140                 ret += "\tneeds_temporal_flush = false;\n";
11141         }
11142
11143         //              Init temporal attributes referenced in select list
11144         ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
11145
11146         ret += "};\n";
11147
11148
11149 //-------------------
11150 //                      Functor destructor
11151         ret += "//\t\tFunctor destructor.\n";
11152         ret +=  "~"+this->generate_functor_name()+"(){\n";
11153
11154 //                      clean up buffer type complex literals
11155         ret += gen_complex_lit_dtr(complex_literals);
11156
11157 //                      Deregister the pass-by-handle parameters
11158         ret += "/* register and de-register the pass-by-handle parameters */\n";
11159         ret += gen_pass_by_handle_dtr(param_handle_table);
11160
11161 //                      clean up partial function results.
11162         ret += "/* clean up partial function storage    */\n";
11163         ret += gen_partial_fcn_dtr(partial_fcns);
11164
11165 //                      Destroy the parameters, if any need to be destroyed
11166         ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11167
11168         ret += "};\n\n";
11169
11170
11171 //-------------------
11172 //                      Parameter manipulation routines
11173         ret += generate_load_param_block(this->generate_functor_name(),
11174                                                                         this->param_tbl,param_handle_table);
11175         ret += generate_delete_param_block(this->generate_functor_name(),
11176                                                                         this->param_tbl,param_handle_table);
11177
11178 //-------------------
11179 //                      Register new parameter block
11180
11181         ret += "int set_param_block(gs_int32_t sz, void* value){\n";
11182           ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11183           ret += "\treturn this->load_params_"+this->generate_functor_name()+
11184                                 "(sz, value);\n";
11185         ret += "};\n\n";
11186
11187 //-------------------
11188 //              the create_group method.
11189 //              This method creates a group in a buffer passed in
11190 //              (to allow for creation on the stack).
11191 //              There are also a couple of side effects:
11192 //              1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11193 //              2) determine if a temporal flush is required.
11194
11195         ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
11196         //              Variables for execution of the function.
11197         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11198
11199         if(partial_fcns.size()>0){              // partial fcn access failure
11200           ret += "\tgs_retval_t retval = 0;\n";
11201           ret += "\n";
11202         }
11203 //              return value
11204         ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
11205                         "_groupdef *) buffer;\n";
11206
11207 //              Start by cleaning up partial function results
11208         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11209
11210         set<int> gb_pfcns;      // partial fcns in gbdefs, aggr se's
11211         for(g=0;g<gb_tbl.size();g++){
11212                 collect_partial_fcns(gb_tbl.get_def(g), gb_pfcns);
11213         }
11214         ret += gen_partial_fcn_dtr(partial_fcns,gb_pfcns);
11215 //      ret += gen_partial_fcn_dtr(partial_fcns);
11216
11217
11218         ret += gen_temp_tuple_check(this->node_name, 0);
11219         col_id_set found_cids;  // colrefs unpacked thus far.
11220         ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
11221
11222
11223
11224 //                      Save temporal group-by variables
11225
11226
11227         ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
11228
11229           for(g=0;g<gb_tbl.size();g++){
11230
11231                         data_type *gdt = gb_tbl.get_data_type(g);
11232
11233                         if(gdt->is_temporal()){
11234                                 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11235                                         g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11236                                 ret.append(tmpstr);
11237                         }
11238                 }
11239                 ret.append("\n");
11240
11241
11242
11243 //                      Compare the temporal GB vars with the stored ones,
11244 //                      set flush indicator and update stored GB vars if there is any change.
11245
11246         if(uses_temporal_flush){
11247                 ret+= "\tif( !( (";
11248                 bool first_one = true;
11249                 for(g=0;g<gb_tbl.size();g++){
11250                         data_type *gdt = gb_tbl.get_data_type(g);
11251
11252                         if(gdt->is_temporal()){
11253                           sprintf(tmpstr,"last_gb%d",g);   string lhs_op = tmpstr;
11254                           sprintf(tmpstr,"gbval->gb_var%d",g);   string rhs_op = tmpstr;
11255                           if(first_one){first_one = false;} else {ret += ") && (";}
11256                           ret += generate_equality_test(lhs_op, rhs_op, gdt);
11257                         }
11258                 }
11259                 ret += ") ) ){\n";
11260                 for(g=0;g<gb_tbl.size();g++){
11261                   data_type *gdt = gb_tbl.get_data_type(g);
11262                   if(gdt->is_temporal()){
11263                           if(gdt->is_buffer_type()){
11264                                 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
11265                           }else{
11266                                 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
11267                                 ret += tmpstr;
11268                                 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
11269                           }
11270                           ret += tmpstr;
11271                         }
11272                 }
11273 /*
11274                 if(uses_temporal_flush){
11275                         for(g=0;g<gb_tbl.size();g++){
11276                                 data_type *gdt = gb_tbl.get_data_type(g);
11277                                 if(gdt->is_temporal()){
11278                                         ret+="if(last_flushed_gb"+int_to_string(g)+">0)\n";
11279                                         break;
11280                                 }
11281                         }
11282                 }
11283 */
11284                 ret += "\t\tneeds_temporal_flush=true;\n";
11285                 ret += "\t\t}else{\n"
11286                         "\t\t\tneeds_temporal_flush=false;\n"
11287                         "\t\t}\n";
11288         }
11289
11290
11291 //              For temporal status tuple we don't need to do anything else
11292         ret += "\tif (temp_tuple_received) return NULL;\n\n";
11293
11294
11295 //              The partial functions ref'd in the group-by var
11296 //              definitions must be evaluated.  If one returns false,
11297 //              then implicitly the predicate is false.
11298         set<int>::iterator pfsi;
11299
11300         if(gb_pfcns.size() > 0)
11301                 ret += "//\t\tUnpack partial fcns.\n";
11302         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, gb_pfcns,
11303                                                                                 found_cids, segen_gb_tbl, "NULL", needs_xform);
11304
11305 //                      Unpack the group-by variables
11306
11307           for(g=0;g<gb_tbl.size();g++){
11308 //                      Find the new fields ref'd by this GBvar def.
11309                 col_id_set new_cids;
11310                 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
11311 //                      Unpack these values.
11312                 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
11313
11314                 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11315                                 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11316 /*
11317 //                              There seems to be no difference between the two
11318 //                              branches of the IF statement.
11319                 data_type *gdt = gb_tbl.get_data_type(g);
11320                   if(gdt->is_buffer_type()){
11321 //                              Create temporary copy.
11322                         sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11323                                 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11324                   }else{
11325                         scalarexp_t *gse = gb_tbl.get_def(g);
11326                         sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11327                                         g,generate_se_code(gse,schema).c_str());
11328                   }
11329 */
11330                   ret.append(tmpstr);
11331           }
11332           ret.append("\n");
11333
11334
11335         ret+= "\treturn gbval;\n";
11336         ret += "};\n\n\n";
11337
11338
11339
11340 //-------------------
11341 //              the create_group method.
11342 //              This method creates a group in a buffer passed in
11343 //              (to allow for creation on the stack).
11344 //              There are also a couple of side effects:
11345 //              1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11346 //              2) determine if a temporal flush is required.
11347
11348         ret += "bool evaluate_predicate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, int cd){\n";
11349         //              Variables for execution of the function.
11350         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11351
11352         if(partial_fcns.size()>0){              // partial fcn access failure
11353           ret += "\tgs_retval_t retval = 0;\n";
11354           ret += "\n";
11355         }
11356
11357 //              Start by cleaning up partial function results
11358         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11359         set<int> w_pfcns;       // partial fcns in where clause
11360         for(w=0;w<where.size();++w)
11361                 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
11362
11363         set<int> ag_pfcns;      // partial fcns in gbdefs, aggr se's
11364         for(a=0;a<aggr_tbl.size();a++){
11365                 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_pfcns);
11366         }
11367         ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
11368         ret += gen_partial_fcn_dtr(partial_fcns,ag_pfcns);
11369
11370         ret+="//\t\tEvaluate clauses which don't reference stateful fcns first \n";
11371         for(w=0;w<where.size();++w){
11372                 if(! pred_refs_sfun(where[w]->pr)){
11373                         sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11374                         ret += tmpstr;
11375 //                      Find the set of variables accessed in this CNF elem,
11376 //                      but in no previous element.
11377                         col_id_set new_cids;
11378                         get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11379
11380 //                      Unpack these values.
11381                         ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11382 //                      Find partial fcns ref'd in this cnf element
11383                         set<int> pfcn_refs;
11384                         collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11385                         ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11386
11387                         ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11388                                 +") ) return(false);\n";
11389                 }
11390         }
11391
11392
11393 //              The partial functions ref'd in the and aggregate
11394 //              definitions must also be evaluated.  If one returns false,
11395 //              then implicitly the predicate is false.
11396 //              ASSUME that aggregates cannot reference stateful fcns.
11397
11398         if(ag_pfcns.size() > 0)
11399                 ret += "//\t\tUnpack remaining partial fcns.\n";
11400         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_pfcns,
11401                                                                                 found_cids, segen_gb_tbl, "false", needs_xform);
11402
11403         ret+="//\t\tEvaluate all remaining where clauses.\n";
11404         ret+="\tbool retval = true;\n";
11405         for(w=0;w<where.size();++w){
11406                 if( pred_refs_sfun(where[w]->pr)){
11407                         sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11408                         ret += tmpstr;
11409 //                      Find the set of variables accessed in this CNF elem,
11410 //                      but in no previous element.
11411                         col_id_set new_cids;
11412                         get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11413
11414 //                      Unpack these values.
11415                         ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11416 //                      Find partial fcns ref'd in this cnf element
11417                         set<int> pfcn_refs;
11418                         collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11419                         ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11420
11421                         ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11422                                 +") ) retval = false;\n";
11423                 }
11424         }
11425
11426         ret+="//                Unpack all remaining attributes\n";
11427         ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
11428
11429     ret += "\n\treturn retval;\n";
11430         ret += "};\n\n\n";
11431
11432 //--------------------------------------------------------
11433 //                      Create and initialize an aggregate object
11434
11435         ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, gs_sp_t a,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11436         //              Variables for execution of the function.
11437         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11438
11439 //              return value
11440         ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+ "_aggrdef *)a;\n";
11441
11442         for(a=0;a<aggr_tbl.size();a++){
11443                 if(aggr_tbl.is_builtin(a)){
11444 //                      Create temporaries for buffer return values
11445                   data_type *adt = aggr_tbl.get_data_type(a);
11446                   if(adt->is_buffer_type()){
11447                         sprintf(tmpstr,"aggr_tmp_%d", a);
11448                         ret+=adt->make_host_cvar(tmpstr)+";\n";
11449                   }
11450                 }
11451         }
11452
11453         for(a=0;a<aggr_tbl.size();a++){
11454                 sprintf(tmpstr,"aggval->aggr_var%d",a);
11455                 string assignto_var = tmpstr;
11456                 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11457         }
11458
11459         ret += "\treturn aggval;\n";
11460         ret += "};\n\n";
11461
11462
11463 //--------------------------------------------------------
11464 //                      initialize an aggregate object inplace
11465
11466         ret += "void create_aggregate(host_tuple &tup0, "+this->generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11467         //              Variables for execution of the function.
11468         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11469
11470 //              return value
11471
11472         for(a=0;a<aggr_tbl.size();a++){
11473                 if(aggr_tbl.is_builtin(a)){
11474 //                      Create temporaries for buffer return values
11475                   data_type *adt = aggr_tbl.get_data_type(a);
11476                   if(adt->is_buffer_type()){
11477                         sprintf(tmpstr,"aggr_tmp_%d", a);
11478                         ret+=adt->make_host_cvar(tmpstr)+";\n";
11479                   }
11480                 }
11481         }
11482
11483         for(a=0;a<aggr_tbl.size();a++){
11484                 sprintf(tmpstr,"aggval->aggr_var%d",a);
11485                 string assignto_var = tmpstr;
11486                 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11487         }
11488
11489         ret += "};\n\n";
11490
11491
11492 //--------------------------------------------------------
11493 //                      Create and clean-initialize an state object
11494
11495         ret += "void initialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval){\n";
11496         //              Variables for execution of the function.
11497         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11498
11499 //              return value
11500 //      ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
11501
11502         for(a=0;a<aggr_tbl.size();a++){
11503                 if( aggr_tbl.is_superaggr(a)){
11504                         if(aggr_tbl.is_builtin(a)){
11505 //                      Create temporaries for buffer return values
11506                           data_type *adt = aggr_tbl.get_data_type(a);
11507                           if(adt->is_buffer_type()){
11508                                 sprintf(tmpstr,"aggr_tmp_%d", a);
11509                                 ret+=adt->make_host_cvar(tmpstr)+";\n";
11510                           }
11511                         }
11512                 }
11513         }
11514
11515         for(a=0;a<aggr_tbl.size();a++){
11516                 if( aggr_tbl.is_superaggr(a)){
11517                         sprintf(tmpstr,"stval->aggr_var%d",a);
11518                         string assignto_var = tmpstr;
11519                         ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11520                 }
11521         }
11522
11523         for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11524                 string state_nm = (*ssi);
11525                 ret += "_sfun_state_clean_init_"+state_nm+"(&(stval->state_var_"+state_nm+"));\n";
11526         }
11527
11528         ret += "};\n\n";
11529
11530
11531 //--------------------------------------------------------
11532 //                      Create and dirty-initialize an state object
11533
11534         ret += "void reinitialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, "+generate_functor_name()+"_statedef *old_stval, int cd){\n";
11535         //              Variables for execution of the function.
11536         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11537
11538 //              return value
11539 //      ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
11540
11541         for(a=0;a<aggr_tbl.size();a++){
11542                 if( aggr_tbl.is_superaggr(a)){
11543                         if(aggr_tbl.is_builtin(a)){
11544 //                      Create temporaries for buffer return values
11545                           data_type *adt = aggr_tbl.get_data_type(a);
11546                           if(adt->is_buffer_type()){
11547                                 sprintf(tmpstr,"aggr_tmp_%d", a);
11548                                 ret+=adt->make_host_cvar(tmpstr)+";\n";
11549                           }
11550                         }
11551                 }
11552         }
11553
11554 //              initialize superaggregates
11555         for(a=0;a<aggr_tbl.size();a++){
11556                 if( aggr_tbl.is_superaggr(a)){
11557                         sprintf(tmpstr,"stval->aggr_var%d",a);
11558                         string assignto_var = tmpstr;
11559                         ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11560                 }
11561         }
11562
11563         for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11564                 string state_nm = (*ssi);
11565                 ret += "_sfun_state_dirty_init_"+state_nm+"(&(stval->state_var_"+state_nm+"),&(old_stval->state_var_"+state_nm+"), cd );\n";
11566         }
11567
11568         ret += "};\n\n";
11569
11570 //--------------------------------------------------------
11571 //              Finalize_state : call the finalize fcn on all states
11572
11573
11574         ret += "void finalize_state( "+generate_functor_name()+"_statedef *stval, int cd){\n";
11575
11576         for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11577                 string state_nm = (*ssi);
11578                 ret += "_sfun_state_final_init_"+state_nm+"(&(stval->state_var_"+state_nm+"), cd);\n";
11579         }
11580
11581         ret += "};\n\n";
11582
11583
11584
11585
11586 //--------------------------------------------------------
11587 //                      update (plus) a superaggregate object
11588
11589         ret += "void update_plus_superaggr(host_tuple &tup0, " +
11590                 generate_functor_name()+"_groupdef *gbval, "+
11591                 generate_functor_name()+"_statedef *stval){\n";
11592         //              Variables for execution of the function.
11593         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11594
11595 //                      use of temporaries depends on the aggregate,
11596 //                      generate them in generate_aggr_update
11597
11598
11599         for(a=0;a<aggr_tbl.size();a++){
11600           if(aggr_tbl.is_superaggr(a)){
11601                 sprintf(tmpstr,"stval->aggr_var%d",a);
11602                 string varname = tmpstr;
11603                 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
11604           }
11605         }
11606
11607         ret += "\treturn;\n";
11608         ret += "};\n";
11609
11610
11611
11612 //--------------------------------------------------------
11613 //                      update (minus) a superaggregate object
11614
11615         ret += "void update_minus_superaggr( "+
11616                 generate_functor_name()+"_groupdef *gbval, "+
11617                 generate_functor_name()+"_aggrdef *aggval,"+
11618                 generate_functor_name()+"_statedef *stval"+
11619                 "){\n";
11620         //              Variables for execution of the function.
11621         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11622
11623 //                      use of temporaries depends on the aggregate,
11624 //                      generate them in generate_aggr_update
11625
11626
11627         for(a=0;a<aggr_tbl.size();a++){
11628           if(aggr_tbl.is_superaggr(a)){
11629                 sprintf(tmpstr,"stval->aggr_var%d",a);
11630                 string super_varname = tmpstr;
11631                 sprintf(tmpstr,"aggval->aggr_var%d",a);
11632                 string sub_varname = tmpstr;
11633                 ret.append(generate_superaggr_minus(sub_varname, super_varname,&aggr_tbl,a, schema));
11634           }
11635         }
11636
11637         ret += "\treturn;\n";
11638         ret += "};\n";
11639
11640
11641 //--------------------------------------------------------
11642 //                      update an aggregate object
11643
11644         ret += "void update_aggregate(host_tuple &tup0, "
11645                 +generate_functor_name()+"_groupdef *gbval, "+
11646                 generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11647         //              Variables for execution of the function.
11648         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
11649
11650 //                      use of temporaries depends on the aggregate,
11651 //                      generate them in generate_aggr_update
11652
11653
11654         for(a=0;a<aggr_tbl.size();a++){
11655           sprintf(tmpstr,"aggval->aggr_var%d",a);
11656           string varname = tmpstr;
11657           ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
11658         }
11659
11660         ret += "\treturn;\n";
11661         ret += "};\n";
11662
11663 //---------------------------------------------------
11664 //                      Flush test
11665
11666         ret += "\tbool flush_needed(){\n";
11667         if(uses_temporal_flush){
11668                 ret += "\t\treturn needs_temporal_flush;\n";
11669         }else{
11670                 ret += "\t\treturn false;\n";
11671         }
11672         ret += "\t};\n";
11673
11674
11675 //------------------------------------------------------
11676 //                      THe cleaning_when predicate
11677
11678         string gbvar = "gbval->gb_var";
11679         string aggvar = "aggval->";
11680
11681         ret += "bool need_to_clean( "
11682                 +generate_functor_name()+"_groupdef *gbval, "+
11683                 generate_functor_name()+"_statedef *stval, int cd"+
11684                 "){\n";
11685
11686         if(cleanwhen.size()>0)
11687                 ret += "\tbool predval = true;\n";
11688         else
11689                 ret += "\tbool predval = false;\n";
11690
11691 //                      Find the udafs ref'd in the having clause
11692         set<int> cw_aggs;
11693         for(w=0;w<cleanwhen.size();++w)
11694                 collect_aggr_refs_pr(cleanwhen[w]->pr, cw_aggs);
11695
11696
11697 //                      get the return values from the UDAFS
11698         for(a=0;a<aggr_tbl.size();a++){
11699                 if(! aggr_tbl.is_builtin(a) && cw_aggs.count(a)){
11700                         ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11701                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11702                         ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11703                 }
11704         }
11705
11706
11707 //              Start by cleaning up partial function results
11708         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11709         set<int> cw_pfcns;      // partial fcns in where clause
11710         for(w=0;w<cleanwhen.size();++w)
11711                 collect_partial_fcns_pr(cleanwhen[w]->pr, cw_pfcns);
11712
11713         ret += gen_partial_fcn_dtr(partial_fcns,cw_pfcns);
11714
11715
11716         for(w=0;w<cleanwhen.size();++w){
11717                 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11718                 ret += tmpstr;
11719 //                      Find partial fcns ref'd in this cnf element
11720                 set<int> pfcn_refs;
11721                 collect_partial_fcns_pr(cleanwhen[w]->pr, pfcn_refs);
11722                 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
11723                         ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11724                         ret += "\tif(retval){ return false;}\n";
11725                 }
11726 //              ret += unpack_partial_fcn_fm_aggr(schema, partial_fcns, pfcn_refs,"false");
11727
11728                 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanwhen[w]->pr,gbvar, aggvar, schema)+
11729                                 ") ) predval = false;\n";
11730         }
11731
11732         ret += "\treturn predval;\n";
11733         ret += "\t};\n";
11734
11735 //------------------------------------------------------
11736 //                      THe cleaning_by predicate
11737
11738         ret += "bool sample_group("
11739                 +generate_functor_name()+"_groupdef *gbval, "+
11740                 generate_functor_name()+"_aggrdef *aggval,"+
11741                 generate_functor_name()+"_statedef *stval, int cd"+
11742                 "){\n";
11743
11744         if(cleanby.size()>0)
11745                 ret += "\tbool retval = true;\n";
11746         else
11747                 ret += "\tbool retval = false;\n";
11748
11749 //                      Find the udafs ref'd in the having clause
11750         set<int> cb_aggs;
11751         for(w=0;w<cleanby.size();++w)
11752                 collect_aggr_refs_pr(cleanby[w]->pr, cb_aggs);
11753
11754
11755 //                      get the return values from the UDAFS
11756         for(a=0;a<aggr_tbl.size();a++){
11757                 if(! aggr_tbl.is_builtin(a) && cb_aggs.count(a)){
11758                         ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11759                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11760                         ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11761                 }
11762         }
11763
11764
11765 //              Start by cleaning up partial function results
11766         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11767         set<int> cb_pfcns;      // partial fcns in where clause
11768         for(w=0;w<cleanby.size();++w)
11769                 collect_partial_fcns_pr(cleanby[w]->pr, cb_pfcns);
11770
11771         ret += gen_partial_fcn_dtr(partial_fcns,cb_pfcns);
11772
11773
11774         for(w=0;w<cleanwhen.size();++w){
11775                 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11776                 ret += tmpstr;
11777
11778 /*
11779 //                      Find the set of variables accessed in this CNF elem,
11780 //                      but in no previous element.
11781                 col_id_set new_cids;
11782                 get_new_pred_cids(cleanby[w]->pr, found_cids, new_cids, segen_gb_tbl);
11783
11784 //                      Unpack these values.
11785                 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11786 */
11787
11788 //                      Find partial fcns ref'd in this cnf element
11789                 set<int> pfcn_refs;
11790                 collect_partial_fcns_pr(cleanby[w]->pr, pfcn_refs);
11791                 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
11792                         ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11793                         ret += "\tif(retval){ return false;}\n";
11794                 }
11795 //              ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11796
11797                 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanby[w]->pr,gbvar, aggvar, schema)+
11798                         +") ) retval = false;\n";
11799         }
11800
11801         ret += "\treturn retval;\n";
11802         ret += "\t};\n";
11803
11804
11805 //-----------------------------------------------------
11806 //
11807         ret += "bool final_sample_group("
11808                 +generate_functor_name()+"_groupdef *gbval, "+
11809                 generate_functor_name()+"_aggrdef *aggval,"+
11810                 generate_functor_name()+"_statedef *stval,"+
11811                 "int cd){\n";
11812
11813         ret += "\tgs_retval_t retval = 0;\n";
11814
11815 //                      Find the udafs ref'd in the having clause
11816         set<int> hv_aggs;
11817         for(w=0;w<having.size();++w)
11818                 collect_aggr_refs_pr(having[w]->pr, hv_aggs);
11819
11820
11821 //                      get the return values from the UDAFS
11822         for(a=0;a<aggr_tbl.size();a++){
11823                 if(! aggr_tbl.is_builtin(a) && hv_aggs.count(a)){
11824                         ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11825                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11826                         ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11827                 }
11828         }
11829
11830
11831         set<int> hv_sl_pfcns;
11832         for(w=0;w<having.size();w++){
11833                 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
11834         }
11835
11836 //              clean up the partial fcn results from any previous execution
11837         ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
11838
11839 //              Unpack them now
11840         for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
11841                 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11842                 ret += "\tif(retval){ return false;}\n";
11843         }
11844
11845 //              Evalaute the HAVING clause
11846 //              TODO: this seems to have a ++ operator rather than a + operator.
11847         for(w=0;w<having.size();++w){
11848                 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
11849         }
11850
11851         ret += "\treturn true;\n";
11852         ret+="}\n\n";
11853
11854 //---------------------------------------------------
11855 //                      create output tuple
11856 //                      Unpack the partial functions ref'd in the where clause,
11857 //                      select clause.  Evaluate the where clause.
11858 //                      Finally, pack the tuple.
11859
11860 //                      I need to use special code generation here,
11861 //                      so I'll leave it in longhand.
11862
11863         ret += "host_tuple create_output_tuple("
11864                 +generate_functor_name()+"_groupdef *gbval, "+
11865                 generate_functor_name()+"_aggrdef *aggval,"+
11866                 generate_functor_name()+"_statedef *stval,"+
11867                 "int cd, bool &failed){\n";
11868
11869         ret += "\thost_tuple tup;\n";
11870         ret += "\tfailed = false;\n";
11871         ret += "\tgs_retval_t retval = 0;\n";
11872
11873
11874 //                      Find the udafs ref'd in the select clause
11875         set<int> sl_aggs;
11876         for(s=0;s<select_list.size();s++)
11877                 collect_agg_refs(select_list[s]->se, sl_aggs);
11878
11879
11880 //                      get the return values from the UDAFS
11881         for(a=0;a<aggr_tbl.size();a++){
11882                 if(! aggr_tbl.is_builtin(a) && sl_aggs.count(a)){
11883                         ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11884                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11885                         ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11886                 }
11887         }
11888
11889
11890 //                      I can't cache partial fcn results from the having
11891 //                      clause because evaluation is separated.
11892         set<int> sl_pfcns;
11893         for(s=0;s<select_list.size();s++){
11894                 collect_partial_fcns(select_list[s]->se, sl_pfcns);
11895         }
11896 //              Unpack them now
11897         for(pfsi=sl_pfcns.begin();pfsi!=sl_pfcns.end();++pfsi){
11898                 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11899                 ret += "\tif(retval){ failed=true; return tup;}\n";
11900         }
11901
11902
11903 //          Now, compute the size of the tuple.
11904
11905 //          Unpack any BUFFER type selections into temporaries
11906 //          so that I can compute their size and not have
11907 //          to recompute their value during tuple packing.
11908 //          I can use regular assignment here because
11909 //          these temporaries are non-persistent.
11910 //                      TODO: should I be using the selvar generation routine?
11911
11912         ret += "//\t\tCompute the size of the tuple.\n";
11913         ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
11914       for(s=0;s<select_list.size();s++){
11915                 scalarexp_t *se = select_list[s]->se;
11916         data_type *sdt = se->get_data_type();
11917         if(sdt->is_buffer_type() &&
11918                          !( (se->get_operator_type() == SE_COLREF) ||
11919                                 (se->get_operator_type() == SE_AGGR_STAR) ||
11920                                 (se->get_operator_type() == SE_AGGR_SE) ||
11921                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11922                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11923                 ){
11924             sprintf(tmpstr,"selvar_%d",s);
11925                         ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
11926                         ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
11927         }
11928       }
11929
11930 //      The size of the tuple is the size of the tuple struct plus the
11931 //      size of the buffers to be copied in.
11932
11933       ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
11934       for(s=0;s<select_list.size();s++){
11935 //              if(s>0) ret += "+";
11936                 scalarexp_t *se = select_list[s]->se;
11937         data_type *sdt = select_list[s]->se->get_data_type();
11938         if(sdt->is_buffer_type()){
11939                   if(!( (se->get_operator_type() == SE_COLREF) ||
11940                                 (se->get_operator_type() == SE_AGGR_STAR) ||
11941                                 (se->get_operator_type() == SE_AGGR_SE) ||
11942                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11943                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11944                   ){
11945             sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
11946             ret.append(tmpstr);
11947                   }else{
11948             sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11949             ret.append(tmpstr);
11950                   }
11951         }
11952       }
11953       ret.append(";\n");
11954
11955 //              Allocate tuple data block.
11956         ret += "//\t\tCreate the tuple block.\n";
11957           ret += "\ttup.data = malloc(tup.tuple_size);\n";
11958           ret += "\ttup.heap_resident = true;\n";
11959
11960 //              Mark tuple as regular
11961           ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
11962
11963 //        ret += "\ttup.channel = 0;\n";
11964           ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
11965                                 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
11966
11967 //              Start packing.
11968 //                      (Here, offsets are hard-wired.  is this a problem?)
11969
11970         ret += "//\t\tPack the fields into the tuple.\n";
11971           ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
11972       for(s=0;s<select_list.size();s++){
11973                 scalarexp_t *se = select_list[s]->se;
11974         data_type *sdt = se->get_data_type();
11975         if(sdt->is_buffer_type()){
11976                   if(!( (se->get_operator_type() == SE_COLREF) ||
11977                                 (se->get_operator_type() == SE_AGGR_STAR) ||
11978                                 (se->get_operator_type() == SE_AGGR_SE) ||
11979                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11980                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11981                   ){
11982             sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d,  ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
11983             ret.append(tmpstr);
11984             sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
11985             ret.append(tmpstr);
11986                   }else{
11987             sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s,  ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11988             ret.append(tmpstr);
11989             sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11990             ret.append(tmpstr);
11991                   }
11992         }else{
11993             sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
11994             ret.append(tmpstr);
11995             ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
11996             ret.append(";\n");
11997         }
11998       }
11999
12000 //                      Destroy string temporaries
12001           ret += gen_buffer_selvars_dtr(select_list);
12002 //                      Destroy string return vals of UDAFs
12003         for(a=0;a<aggr_tbl.size();a++){
12004                 if(! aggr_tbl.is_builtin(a)){
12005                         int afcn_id = aggr_tbl.get_fcn_id(a);
12006                         data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12007                         if(adt->is_buffer_type()){
12008                                 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12009                                 adt->get_hfta_buffer_destroy().c_str(), a );
12010                                 ret += tmpstr;
12011                         }
12012                 }
12013         }
12014
12015
12016           ret += "\treturn tup;\n";
12017           ret += "};\n";
12018
12019
12020 //-------------------------------------------------------------------
12021 //              Temporal update functions
12022
12023         ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12024
12025 //              create a temp status tuple
12026         ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12027
12028         ret += gen_init_temp_status_tuple(this->get_node_name());
12029
12030 //              Start packing.
12031 //                      (Here, offsets are hard-wired.  is this a problem?)
12032
12033         ret += "//\t\tPack the fields into the tuple.\n";
12034         for(s=0;s<select_list.size();s++){
12035                 data_type *sdt = select_list[s]->se->get_data_type();
12036                 if(sdt->is_temporal()){
12037                         sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12038                         ret += tmpstr;
12039                         sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12040                         ret += tmpstr;
12041                         ret += ";\n";
12042                 }
12043         }
12044
12045         ret += "\treturn 0;\n";
12046         ret += "};};\n\n\n";
12047
12048
12049 //----------------------------------------------------------
12050 //                      The hash function
12051
12052         ret += "struct "+generate_functor_name()+"_hash_func{\n";
12053         ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12054                                 "_groupdef *grp) const{\n";
12055         ret += "\t\treturn(";
12056         for(g=0;g<gb_tbl.size();g++){
12057                 if(g>0) ret += "^";
12058                 data_type *gdt = gb_tbl.get_data_type(g);
12059                 if(gdt->use_hashfunc()){
12060                         if(gdt->is_buffer_type())
12061                                 sprintf(tmpstr,"(%s*%s(&)grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12062                         else
12063                                 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12064                 }else{
12065                         sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12066                 }
12067                 ret += tmpstr;
12068         }
12069         ret += ") >> 32);\n";
12070         ret += "\t}\n";
12071         ret += "};\n\n";
12072
12073 //----------------------------------------------------------
12074 //                      The superhash function
12075
12076         ret += "struct "+generate_functor_name()+"_superhash_func{\n";
12077         ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12078                                 "_groupdef *grp) const{\n";
12079         ret += "\t\treturn(0";
12080
12081         for(g=0;g<gb_tbl.size();g++){
12082                 if(sg_tbl.count(g)>0){
12083                         ret += "^";
12084                         data_type *gdt = gb_tbl.get_data_type(g);
12085                         if(gdt->use_hashfunc()){
12086                                 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12087                         }else{
12088                                 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12089                         }
12090                         ret += tmpstr;
12091                 }
12092         }
12093         ret += ") >> 32);\n";
12094
12095         ret += "\t}\n";
12096         ret += "};\n\n";
12097
12098 //----------------------------------------------------------
12099 //                      The comparison function
12100
12101         ret += "struct "+generate_functor_name()+"_equal_func{\n";
12102         ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12103                         generate_functor_name()+"_groupdef *grp2) const{\n";
12104         ret += "\t\treturn( (";
12105         for(g=0;g<gb_tbl.size();g++){
12106                 if(g>0) ret += ") && (";
12107                 data_type *gdt = gb_tbl.get_data_type(g);
12108                 if(gdt->complex_comparison(gdt)){
12109                   if(gdt->is_buffer_type())
12110                         sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12111                                 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12112                   else
12113                         sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12114                                 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12115                 }else{
12116                         sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12117                 }
12118                 ret += tmpstr;
12119         }
12120         ret += ") );\n";
12121         ret += "\t}\n";
12122         ret += "};\n\n";
12123
12124
12125 //----------------------------------------------------------
12126 //                      The superhashcomparison function
12127
12128         ret += "struct "+generate_functor_name()+"_superequal_func{\n";
12129         ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12130                         generate_functor_name()+"_groupdef *grp2) const{\n";
12131         ret += "\t\treturn( (";
12132     if(sg_tbl.size()){
12133                 bool first_elem = true;
12134                 for(g=0;g<gb_tbl.size();g++){
12135                         if(sg_tbl.count(g)){
12136                                 if(first_elem) first_elem=false; else ret += ") && (";
12137                                 data_type *gdt = gb_tbl.get_data_type(g);
12138                                 if(gdt->complex_comparison(gdt)){
12139                                   if(gdt->is_buffer_type())
12140                                         sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12141                                                 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12142                                   else
12143                                         sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12144                                                 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12145                                 }else{
12146                                         sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12147                                 }
12148                         ret += tmpstr;
12149                         }
12150                 }
12151         }else{
12152                 ret += "true";
12153         }
12154
12155         ret += ") );\n";
12156         ret += "\t}\n";
12157
12158
12159         ret += "};\n\n";
12160         return(ret);
12161 }
12162
12163 string sgahcwcb_qpn::generate_operator(int i, string params){
12164
12165                 return(
12166                         "       clean_operator<" +
12167                         generate_functor_name()+",\n\t"+
12168                         generate_functor_name() + "_groupdef, \n\t" +
12169                         generate_functor_name() + "_aggrdef, \n\t" +
12170                         generate_functor_name() + "_statedef, \n\t" +
12171                         generate_functor_name()+"_hash_func, \n\t"+
12172                         generate_functor_name()+"_equal_func ,\n\t"+
12173                         generate_functor_name()+"_superhash_func,\n\t "+
12174                         generate_functor_name()+"_superequal_func \n\t"+
12175                         "> *op"+int_to_string(i)+" = new clean_operator<"+
12176                         generate_functor_name()+",\n\t"+
12177                         generate_functor_name() + "_groupdef,\n\t " +
12178                         generate_functor_name() + "_aggrdef, \n\t" +
12179                         generate_functor_name() + "_statedef, \n\t" +
12180                         generate_functor_name()+"_hash_func, \n\t"+
12181                         generate_functor_name()+"_equal_func, \n\t"+
12182                         generate_functor_name()+"_superhash_func, \n\t"+
12183                         generate_functor_name()+"_superequal_func\n\t "
12184                         ">("+params+", \"" + get_node_name() + "\");\n"
12185                 );
12186 }
12187
12188 ////////////////////////////////////////////////////////////////
12189 ////    RSGAH functor
12190
12191
12192
12193 string rsgah_qpn::generate_functor_name(){
12194         return("rsgah_functor_" + normalize_name(this->get_node_name()));
12195 }
12196
12197
12198 string rsgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
12199         int a,g,w,s;
12200
12201
12202 //                      Initialize generate utility globals
12203         segen_gb_tbl = &(gb_tbl);
12204
12205
12206 //--------------------------------
12207 //                      group definition class
12208         string ret = "class " + generate_functor_name() + "_groupdef{\n";
12209         ret += "public:\n";
12210         for(g=0;g<this->gb_tbl.size();g++){
12211                 sprintf(tmpstr,"gb_var%d",g);
12212                 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12213         }
12214 //              Constructors
12215         ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
12216         ret += "\t"+generate_functor_name() + "_groupdef("+
12217                 this->generate_functor_name() + "_groupdef *gd){\n";
12218         for(g=0;g<gb_tbl.size();g++){
12219                 data_type *gdt = gb_tbl.get_data_type(g);
12220                 if(gdt->is_buffer_type()){
12221                         sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
12222                           gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
12223                         ret += tmpstr;
12224                 }else{
12225                         sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
12226                         ret += tmpstr;
12227                 }
12228         }
12229         ret += "\t};\n";
12230 //              destructor
12231         ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
12232         for(g=0;g<gb_tbl.size();g++){
12233                 data_type *gdt = gb_tbl.get_data_type(g);
12234                 if(gdt->is_buffer_type()){
12235                         sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12236                           gdt->get_hfta_buffer_destroy().c_str(), g );
12237                         ret += tmpstr;
12238                 }
12239         }
12240         ret += "\t};\n";
12241         ret +="};\n\n";
12242
12243 //--------------------------------
12244 //                      aggr definition class
12245         ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
12246         ret += "public:\n";
12247         for(a=0;a<aggr_tbl.size();a++){
12248 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
12249                 sprintf(tmpstr,"aggr_var%d",a);
12250                 if(aggr_tbl.is_builtin(a))
12251                   ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
12252                 else
12253                   ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
12254         }
12255 //              Constructors
12256         ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
12257 //              destructor
12258         ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
12259         for(a=0;a<aggr_tbl.size();a++){
12260                 if(aggr_tbl.is_builtin(a)){
12261                         data_type *adt = aggr_tbl.get_data_type(a);
12262                         if(adt->is_buffer_type()){
12263                                 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
12264                                 adt->get_hfta_buffer_destroy().c_str(), a );
12265                                 ret += tmpstr;
12266                         }
12267                 }else{
12268                         ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
12269                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12270                         ret+="(aggr_var"+int_to_string(a)+"));\n";
12271                 }
12272         }
12273         ret += "\t};\n";
12274         ret +="};\n\n";
12275
12276 //--------------------------------
12277 //                      gb functor class
12278         ret += "class " + this->generate_functor_name() + "{\n";
12279
12280 //                      Find variables referenced in this query node.
12281
12282   col_id_set cid_set;
12283   col_id_set::iterator csi;
12284
12285     for(w=0;w<where.size();++w)
12286         gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
12287     for(w=0;w<having.size();++w)
12288         gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
12289     for(w=0;w<closing_when.size();++w)
12290         gather_pr_col_ids(closing_when[w]->pr,cid_set,segen_gb_tbl);
12291         for(g=0;g<gb_tbl.size();g++)
12292                 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
12293
12294     for(s=0;s<select_list.size();s++){
12295         gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl);     // descends into aggregates
12296     }
12297
12298
12299 //                      Private variables : store the state of the functor.
12300 //                      1) variables for unpacked attributes
12301 //                      2) offsets of the upacked attributes
12302 //                      3) storage of partial functions
12303 //                      4) storage of complex literals (i.e., require a constructor)
12304
12305         ret += "private:\n";
12306
12307         // var to save the schema handle
12308         ret += "\tint schema_handle0;\n";
12309
12310         // generate the declaration of all the variables related to
12311         // temp tuples generation
12312         ret += gen_decl_temp_vars();
12313
12314 //                      unpacked attribute storage, offsets
12315         ret += "//\t\tstorage and offsets of accessed fields.\n";
12316         ret += generate_access_vars(cid_set, schema);
12317 //                      tuple metadata offset
12318         ret += "\tint tuple_metadata_offset0;\n";
12319
12320 //                      Variables to store results of partial functions.
12321 //                      WARNING find_partial_functions modifies the SE
12322 //                      (it marks the partial function id).
12323         ret += "//\t\tParital function result storage\n";
12324         vector<scalarexp_t *> partial_fcns;
12325         vector<int> fcn_ref_cnt;
12326         vector<bool> is_partial_fcn;
12327         for(s=0;s<select_list.size();s++){
12328                 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
12329         }
12330         for(w=0;w<where.size();w++){
12331                 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12332         }
12333         for(w=0;w<having.size();w++){
12334                 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12335         }
12336         for(w=0;w<closing_when.size();w++){
12337                 find_partial_fcns_pr(closing_when[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12338         }
12339         for(g=0;g<gb_tbl.size();g++){
12340                 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
12341         }
12342         for(a=0;a<aggr_tbl.size();a++){
12343                 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
12344         }
12345         if(partial_fcns.size()>0){
12346           ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
12347           ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
12348         }
12349
12350 //                      Create cached temporaries for UDAF return values.
12351         for(a=0;a<aggr_tbl.size();a++){
12352                 if(! aggr_tbl.is_builtin(a)){
12353                         int afcn_id = aggr_tbl.get_fcn_id(a);
12354                         data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12355                         sprintf(tmpstr,"udaf_ret_%d", a);
12356                         ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
12357                 }
12358         }
12359
12360
12361 //                      Complex literals (i.e., they need constructors)
12362         ret += "//\t\tComplex literal storage.\n";
12363         cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
12364         ret += generate_complex_lit_vars(complex_literals);
12365
12366 //                      Pass-by-handle parameters
12367         ret += "//\t\tPass-by-handle storage.\n";
12368         vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
12369         ret += generate_pass_by_handle_vars(param_handle_table);
12370
12371
12372 //                      variables to hold parameters.
12373         ret += "//\tfor query parameters\n";
12374         ret += generate_param_vars(param_tbl);
12375
12376 //              Is there a temporal flush?  If so create flush temporaries,
12377 //              create flush indicator.
12378         bool uses_temporal_flush = false;
12379         for(g=0;g<gb_tbl.size();g++){
12380                 data_type *gdt = gb_tbl.get_data_type(g);
12381                 if(gdt->is_temporal())
12382                         uses_temporal_flush = true;
12383         }
12384
12385         if(uses_temporal_flush){
12386                 ret += "//\t\tFor temporal flush\n";
12387                 for(g=0;g<gb_tbl.size();g++){
12388                         data_type *gdt = gb_tbl.get_data_type(g);
12389                         if(gdt->is_temporal()){
12390                           sprintf(tmpstr,"last_gb%d",g);
12391                           ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12392                           sprintf(tmpstr,"last_flushed_gb%d",g);
12393                           ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12394                         }
12395                 }
12396                 ret += "\tbool needs_temporal_flush;\n";
12397         }
12398
12399 //                      The publicly exposed functions
12400
12401         ret += "\npublic:\n";
12402
12403
12404 //-------------------
12405 //                      The functor constructor
12406 //                      pass in the schema handle.
12407 //                      1) make assignments to the unpack offset variables
12408 //                      2) initialize the complex literals
12409
12410         ret += "//\t\tFunctor constructor.\n";
12411         ret +=  this->generate_functor_name()+"(int schema_handle0){\n";
12412
12413         // save the schema handle
12414         ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
12415 //              metadata offset
12416         ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
12417
12418 //              unpack vars
12419         ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
12420         ret += gen_access_var_init(cid_set);
12421
12422 //              complex literals
12423         ret += "//\t\tInitialize complex literals.\n";
12424         ret += gen_complex_lit_init(complex_literals);
12425
12426 //              Initialize partial function results so they can be safely GC'd
12427         ret += gen_partial_fcn_init(partial_fcns);
12428
12429 //              Initialize non-query-parameter parameter handles
12430         ret += gen_pass_by_handle_init(param_handle_table);
12431
12432 //              temporal flush variables
12433 //              ASSUME that structured values won't be temporal.
12434         if(uses_temporal_flush){
12435                 ret += "//\t\tInitialize temporal flush variables.\n";
12436                 for(g=0;g<gb_tbl.size();g++){
12437                         data_type *gdt = gb_tbl.get_data_type(g);
12438                         if(gdt->is_temporal()){
12439                                 literal_t gl(gdt->type_indicator());
12440                                 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
12441                                 ret.append(tmpstr);
12442                         }
12443                 }
12444                 ret += "\tneeds_temporal_flush = false;\n";
12445         }
12446
12447         //              Init temporal attributes referenced in select list
12448         ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
12449
12450         ret += "};\n";
12451
12452
12453 //-------------------
12454 //                      Functor destructor
12455         ret += "//\t\tFunctor destructor.\n";
12456         ret +=  "~"+this->generate_functor_name()+"(){\n";
12457
12458 //                      clean up buffer type complex literals
12459         ret += gen_complex_lit_dtr(complex_literals);
12460
12461 //                      Deregister the pass-by-handle parameters
12462         ret += "/* register and de-register the pass-by-handle parameters */\n";
12463         ret += gen_pass_by_handle_dtr(param_handle_table);
12464
12465 //                      clean up partial function results.
12466         ret += "/* clean up partial function storage    */\n";
12467         ret += gen_partial_fcn_dtr(partial_fcns);
12468
12469 //                      Destroy the parameters, if any need to be destroyed
12470         ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12471
12472         ret += "};\n\n";
12473
12474
12475 //-------------------
12476 //                      Parameter manipulation routines
12477         ret += generate_load_param_block(this->generate_functor_name(),
12478                                                                         this->param_tbl,param_handle_table);
12479         ret += generate_delete_param_block(this->generate_functor_name(),
12480                                                                         this->param_tbl,param_handle_table);
12481
12482 //-------------------
12483 //                      Register new parameter block
12484
12485         ret += "int set_param_block(gs_int32_t sz, void* value){\n";
12486           ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12487           ret += "\treturn this->load_params_"+this->generate_functor_name()+
12488                                 "(sz, value);\n";
12489         ret += "};\n\n";
12490
12491
12492 //-------------------
12493 //              the create_group method.
12494 //              This method creates a group in a buffer passed in
12495 //              (to allow for creation on the stack).
12496 //              There are also a couple of side effects:
12497 //              1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
12498 //              2) determine if a temporal flush is required.
12499
12500         ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
12501         //              Variables for execution of the function.
12502         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
12503
12504         if(partial_fcns.size()>0){              // partial fcn access failure
12505           ret += "\tgs_retval_t retval = 0;\n";
12506           ret += "\n";
12507         }
12508 //              return value
12509         ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
12510                         "_groupdef *) buffer;\n";
12511
12512 //              Start by cleaning up partial function results
12513         ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12514         set<int> w_pfcns;       // partial fcns in where clause
12515         for(w=0;w<where.size();++w)
12516                 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
12517
12518         set<int> ag_gb_pfcns;   // partial fcns in gbdefs, aggr se's
12519         for(g=0;g<gb_tbl.size();g++){
12520                 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
12521         }
12522         for(a=0;a<aggr_tbl.size();a++){
12523                 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
12524         }
12525         ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
12526         ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
12527 //      ret += gen_partial_fcn_dtr(partial_fcns);
12528
12529
12530         ret += gen_temp_tuple_check(this->node_name, 0);
12531         col_id_set found_cids;  // colrefs unpacked thus far.
12532         ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
12533
12534
12535 //                      Save temporal group-by variables
12536
12537
12538         ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
12539
12540           for(g=0;g<gb_tbl.size();g++){
12541
12542                         data_type *gdt = gb_tbl.get_data_type(g);
12543
12544                         if(gdt->is_temporal()){
12545                                 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12546                                         g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12547                                 ret.append(tmpstr);
12548                         }
12549                 }
12550                 ret.append("\n");
12551
12552
12553
12554 //                      Compare the temporal GB vars with the stored ones,
12555 //                      set flush indicator and update stored GB vars if there is any change.
12556
12557         if(uses_temporal_flush){
12558                 ret+= "\tif( !( (";
12559                 bool first_one = true;
12560                 for(g=0;g<gb_tbl.size();g++){
12561                         data_type *gdt = gb_tbl.get_data_type(g);
12562
12563                         if(gdt->is_temporal()){
12564                           sprintf(tmpstr,"last_gb%d",g);   string lhs_op = tmpstr;
12565                           sprintf(tmpstr,"gbval->gb_var%d",g);   string rhs_op = tmpstr;
12566                           if(first_one){first_one = false;} else {ret += ") && (";}
12567                           ret += generate_equality_test(lhs_op, rhs_op, gdt);
12568                         }
12569                 }
12570                 ret += ") ) ){\n";
12571                 for(g=0;g<gb_tbl.size();g++){
12572                   data_type *gdt = gb_tbl.get_data_type(g);
12573                   if(gdt->is_temporal()){
12574                           if(gdt->is_buffer_type()){
12575                                 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
12576                           }else{
12577                                 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
12578                                 ret += tmpstr;
12579                                 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
12580                           }
12581                           ret += tmpstr;
12582                         }
12583                 }
12584                 ret += "\t\tneeds_temporal_flush=true;\n";
12585                 ret += "\t\t}else{\n"
12586                         "\t\t\tneeds_temporal_flush=false;\n"
12587                         "\t\t}\n";
12588         }
12589
12590
12591 //              For temporal status tuple we don't need to do anything else
12592         ret += "\tif (temp_tuple_received) return NULL;\n\n";
12593
12594         for(w=0;w<where.size();++w){
12595                 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12596                 ret += tmpstr;
12597 //                      Find the set of variables accessed in this CNF elem,
12598 //                      but in no previous element.
12599                 col_id_set new_cids;
12600                 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
12601
12602 //                      Unpack these values.
12603                 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
12604 //                      Find partial fcns ref'd in this cnf element
12605                 set<int> pfcn_refs;
12606                 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
12607                 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
12608
12609                 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
12610                                 +") ) return(NULL);\n";
12611         }
12612
12613 //              The partial functions ref'd in the group-by var and aggregate
12614 //              definitions must also be evaluated.  If one returns false,
12615 //              then implicitly the predicate is false.
12616         set<int>::iterator pfsi;
12617
12618         if(ag_gb_pfcns.size() > 0)
12619                 ret += "//\t\tUnpack remaining partial fcns.\n";
12620         ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
12621                                                                                 found_cids, segen_gb_tbl, "NULL", needs_xform);
12622
12623 //                      Unpack the group-by variables
12624
12625           for(g=0;g<gb_tbl.size();g++){
12626                 data_type *gdt = gb_tbl.get_data_type(g);
12627                 if(!gdt->is_temporal()){        // temproal gbs already computed
12628 //                      Find the new fields ref'd by this GBvar def.
12629                         col_id_set new_cids;
12630                         get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
12631 //                      Unpack these values.
12632                         ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
12633
12634                         sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12635                                 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12636 /*
12637 //                              There seems to be no difference between the two
12638 //                              branches of the IF statement.
12639                 data_type *gdt = gb_tbl.get_data_type(g);
12640                   if(gdt->is_buffer_type()){
12641 //                              Create temporary copy.
12642                         sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12643                                 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12644                   }else{
12645                         scalarexp_t *gse = gb_tbl.get_def(g);
12646                         sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12647                                         g,generate_se_code(gse,schema).c_str());
12648                   }
12649 */
12650                         ret.append(tmpstr);
12651                 }
12652           }
12653           ret.append("\n");
12654
12655
12656         ret+= "\treturn gbval;\n";
12657         ret += "};\n\n\n";
12658
12659 //--------------------------------------------------------
12660 //                      Create and initialize an aggregate object
12661
12662         ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
12663         //              Variables for execution of the function.
12664         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
12665
12666 //              return value
12667         ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
12668                         "_aggrdef *)buffer;\n";
12669
12670         for(a=0;a<aggr_tbl.size();a++){
12671                 if(aggr_tbl.is_builtin(a)){
12672 //                      Create temporaries for buffer return values
12673                   data_type *adt = aggr_tbl.get_data_type(a);
12674                   if(adt->is_buffer_type()){
12675                         sprintf(tmpstr,"aggr_tmp_%d", a);
12676                         ret+=adt->make_host_cvar(tmpstr)+";\n";
12677                   }
12678                 }
12679         }
12680
12681 //              Unpack all remaining attributes
12682         ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
12683         for(a=0;a<aggr_tbl.size();a++){
12684           sprintf(tmpstr,"aggval->aggr_var%d",a);
12685           string assignto_var = tmpstr;
12686           ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12687         }
12688
12689         ret += "\treturn aggval;\n";
12690         ret += "};\n\n";
12691
12692 //--------------------------------------------------------
12693 //                      update an aggregate object
12694
12695         ret += "void update_aggregate(host_tuple &tup0, "
12696                 +generate_functor_name()+"_groupdef *gbval, "+
12697                 generate_functor_name()+"_aggrdef *aggval){\n";
12698         //              Variables for execution of the function.
12699         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
12700
12701 //                      use of temporaries depends on the aggregate,
12702 //                      generate them in generate_aggr_update
12703
12704
12705 //              Unpack all remaining attributes
12706         ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
12707         for(a=0;a<aggr_tbl.size();a++){
12708           sprintf(tmpstr,"aggval->aggr_var%d",a);
12709           string varname = tmpstr;
12710           ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12711         }
12712
12713         ret += "\treturn;\n";
12714         ret += "};\n";
12715
12716 //--------------------------------------------------------
12717 //                      reinitialize an aggregate object
12718
12719         ret += "void reinit_aggregates( "+
12720                 generate_functor_name()+"_groupdef *gbval, "+
12721                 generate_functor_name()+"_aggrdef *aggval){\n";
12722         //              Variables for execution of the function.
12723         ret += "\tgs_int32_t problem = 0;\n";   // return unpack failure
12724
12725 //                      use of temporaries depends on the aggregate,
12726 //                      generate them in generate_aggr_update
12727
12728         for(g=0;g<gb_tbl.size();g++){
12729           data_type *gdt = gb_tbl.get_data_type(g);
12730           if(gdt->is_temporal()){
12731                   if(gdt->is_buffer_type()){
12732                         sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
12733                   }else{
12734                         sprintf(tmpstr,"\t\t gbval->gb_var%d =last_gb%d;\n",g,g);
12735                   }
12736                   ret += tmpstr;
12737                 }
12738         }
12739
12740 //              Unpack all remaining attributes
12741         for(a=0;a<aggr_tbl.size();a++){
12742           sprintf(tmpstr,"aggval->aggr_var%d",a);
12743           string varname = tmpstr;
12744           ret.append(generate_aggr_reinitialize(varname,&aggr_tbl,a, schema));
12745         }
12746
12747         ret += "\treturn;\n";
12748         ret += "};\n";
12749
12750
12751
12752
12753
12754 //---------------------------------------------------
12755 //                      Flush test
12756
12757         ret += "\tbool flush_needed(){\n";
12758         if(uses_temporal_flush){
12759                 ret += "\t\treturn needs_temporal_flush;\n";
12760         }else{
12761                 ret += "\t\treturn false;\n";
12762         }
12763         ret += "\t};\n";
12764
12765 //---------------------------------------------------
12766 //                      create output tuple
12767 //                      Unpack the partial functions ref'd in the where clause,
12768 //                      select clause.  Evaluate the where clause.
12769 //                      Finally, pack the tuple.
12770
12771 //                      I need to use special code generation here,
12772 //                      so I'll leave it in longhand.
12773
12774         ret += "host_tuple create_output_tuple("
12775                 +generate_functor_name()+"_groupdef *gbval, "+
12776                 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
12777
12778         ret += "\thost_tuple tup;\n";
12779         ret += "\tfailed = false;\n";
12780         ret += "\tgs_retval_t retval = 0;\n";
12781
12782         string gbvar = "gbval->gb_var";
12783         string aggvar = "aggval->";
12784
12785
12786 //                      First, get the return values from the UDAFS
12787         for(a=0;a<aggr_tbl.size();a++){
12788                 if(! aggr_tbl.is_builtin(a)){
12789                         ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12790                         if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12791                         ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12792                 }
12793         }
12794
12795         set<int> hv_sl_pfcns;
12796         for(w=0;w<having.size();w++){
12797                 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
12798         }
12799         for(s=0;s<select_list.size();s++){
12800                 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
12801         }
12802
12803 //              clean up the partial fcn results from any previous execution
12804         ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
12805
12806 //              Unpack them now
12807         for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
12808                 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12809                 ret += "\tif(retval){ failed = true; return(tup);}\n";
12810         }
12811
12812 //              Evalaute the HAVING clause
12813 //              TODO: this seems to have a ++ operator rather than a + operator.
12814         for(w=0;w<having.size();++w){
12815                 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
12816         }
12817
12818 //          Now, compute the size of the tuple.
12819
12820 //          Unpack any BUFFER type selections into temporaries
12821 //          so that I can compute their size and not have
12822 //          to recompute their value during tuple packing.
12823 //          I can use regular assignment here because
12824 //          these temporaries are non-persistent.
12825 //                      TODO: should I be using the selvar generation routine?
12826
12827         ret += "//\t\tCompute the size of the tuple.\n";
12828         ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
12829       for(s=0;s<select_list.size();s++){
12830                 scalarexp_t *se = select_list[s]->se;
12831         data_type *sdt = se->get_data_type();
12832         if(sdt->is_buffer_type() &&
12833                          !( (se->get_operator_type() == SE_COLREF) ||
12834                                 (se->get_operator_type() == SE_AGGR_STAR) ||
12835                                 (se->get_operator_type() == SE_AGGR_SE) ||
12836                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12837                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12838                 ){
12839             sprintf(tmpstr,"selvar_%d",s);
12840                         ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
12841                         ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
12842         }
12843       }
12844
12845 //      The size of the tuple is the size of the tuple struct plus the
12846 //      size of the buffers to be copied in.
12847
12848       ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
12849       for(s=0;s<select_list.size();s++){
12850 //              if(s>0) ret += "+";
12851                 scalarexp_t *se = select_list[s]->se;
12852         data_type *sdt = select_list[s]->se->get_data_type();
12853         if(sdt->is_buffer_type()){
12854                   if(!( (se->get_operator_type() == SE_COLREF) ||
12855                                 (se->get_operator_type() == SE_AGGR_STAR) ||
12856                                 (se->get_operator_type() == SE_AGGR_SE) ||
12857                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12858                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12859                   ){
12860             sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
12861             ret.append(tmpstr);
12862                   }else{
12863             sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12864             ret.append(tmpstr);
12865                   }
12866         }
12867       }
12868       ret.append(";\n");
12869
12870 //              Allocate tuple data block.
12871         ret += "//\t\tCreate the tuple block.\n";
12872           ret += "\ttup.data = malloc(tup.tuple_size);\n";
12873           ret += "\ttup.heap_resident = true;\n";
12874
12875 //              Mark tuple as regular
12876           ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
12877
12878 //        ret += "\ttup.channel = 0;\n";
12879           ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
12880                                 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
12881
12882 //              Start packing.
12883 //                      (Here, offsets are hard-wired.  is this a problem?)
12884
12885         ret += "//\t\tPack the fields into the tuple.\n";
12886           ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
12887       for(s=0;s<select_list.size();s++){
12888                 scalarexp_t *se = select_list[s]->se;
12889         data_type *sdt = se->get_data_type();
12890         if(sdt->is_buffer_type()){
12891                   if(!( (se->get_operator_type() == SE_COLREF) ||
12892                                 (se->get_operator_type() == SE_AGGR_STAR) ||
12893                                 (se->get_operator_type() == SE_AGGR_SE) ||
12894                            (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12895                            (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12896                   ){
12897             sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d,  ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
12898             ret.append(tmpstr);
12899             sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
12900             ret.append(tmpstr);
12901                   }else{
12902             sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s,  ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12903             ret.append(tmpstr);
12904             sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12905             ret.append(tmpstr);
12906                   }
12907         }else{
12908             sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12909             ret.append(tmpstr);
12910             ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12911             ret.append(";\n");
12912         }
12913       }
12914
12915 //                      Destroy string temporaries
12916           ret += gen_buffer_selvars_dtr(select_list);
12917
12918           ret += "\treturn tup;\n";
12919           ret += "};\n";
12920
12921 //------------------------------------------------------------------
12922 //              Cleaning_when : evaluate the cleaning_when clause.
12923 //              ASSUME that the udaf return values have already
12924 //              been unpacked.  delete the string udaf return values at the end.
12925
12926         ret += "bool cleaning_when("
12927                 +generate_functor_name()+"_groupdef *gbval, "+
12928                 generate_functor_name()+"_aggrdef *aggval){\n";
12929
12930         ret += "\tbool retval = true;\n";
12931
12932
12933         gbvar = "gbval->gb_var";
12934         aggvar = "aggval->";
12935
12936
12937         set<int> clw_pfcns;
12938         for(w=0;w<closing_when.size();w++){
12939                 collect_partial_fcns_pr(closing_when[w]->pr, clw_pfcns);
12940         }
12941
12942 //              clean up the partial fcn results from any previous execution
12943         ret += gen_partial_fcn_dtr(partial_fcns,clw_pfcns);
12944
12945 //              Unpack them now
12946         for(pfsi=clw_pfcns.begin();pfsi!=clw_pfcns.end();++pfsi){
12947                 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12948                 ret += "\tif(retval){ return false;}\n";
12949         }
12950
12951 //              Evalaute the Closing When clause
12952 //              TODO: this seems to have a ++ operator rather than a + operator.
12953         for(w=0;w<closing_when.size();++w){
12954                 ret += "\tif( !("+generate_predicate_code_fm_aggr(closing_when[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
12955         }
12956
12957
12958 //                      Destroy string return vals of UDAFs
12959         for(a=0;a<aggr_tbl.size();a++){
12960                 if(! aggr_tbl.is_builtin(a)){
12961                         int afcn_id = aggr_tbl.get_fcn_id(a);
12962                         data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12963                         if(adt->is_buffer_type()){
12964                                 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12965                                 adt->get_hfta_buffer_destroy().c_str(), a );
12966                                 ret += tmpstr;
12967                         }
12968                 }
12969         }
12970
12971         ret += "\treturn retval;\n";
12972         ret += "};\n";
12973
12974
12975
12976
12977 //-------------------------------------------------------------------
12978 //              Temporal update functions
12979
12980         ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12981
12982 //              create a temp status tuple
12983         ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12984
12985         ret += gen_init_temp_status_tuple(this->get_node_name());
12986
12987 //              Start packing.
12988 //                      (Here, offsets are hard-wired.  is this a problem?)
12989
12990         ret += "//\t\tPack the fields into the tuple.\n";
12991         for(s=0;s<select_list.size();s++){
12992                 data_type *sdt = select_list[s]->se->get_data_type();
12993                 if(sdt->is_temporal()){
12994                         sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12995                         ret += tmpstr;
12996                         sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12997                         ret += tmpstr;
12998                         ret += ";\n";
12999                 }
13000         }
13001
13002         ret += "\treturn 0;\n";
13003         ret += "};};\n\n\n";
13004
13005
13006 //----------------------------------------------------------
13007 //                      The hash function
13008
13009         ret += "struct "+generate_functor_name()+"_hash_func{\n";
13010         ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
13011                                 "_groupdef *grp) const{\n";
13012         ret += "\t\treturn(0";
13013         for(g=0;g<gb_tbl.size();g++){
13014                 data_type *gdt = gb_tbl.get_data_type(g);
13015                 if(! gdt->is_temporal()){
13016                         ret += "^";
13017                         if(gdt->use_hashfunc()){
13018                                 if(gdt->is_buffer_type())
13019                                         sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13020                                         else
13021                                 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13022                         }else{
13023                                 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
13024                         }
13025                         ret += tmpstr;
13026                 }
13027         }
13028         ret += " >> 32);\n";
13029         ret += "\t}\n";
13030         ret += "};\n\n";
13031
13032 //----------------------------------------------------------
13033 //                      The comparison function
13034
13035         ret += "struct "+generate_functor_name()+"_equal_func{\n";
13036         ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
13037                         generate_functor_name()+"_groupdef *grp2) const{\n";
13038         ret += "\t\treturn( (";
13039
13040         string hcmpr = "";
13041         bool first_exec = true;
13042         for(g=0;g<gb_tbl.size();g++){
13043                 data_type *gdt = gb_tbl.get_data_type(g);
13044                 if(! gdt->is_temporal()){
13045                         if(first_exec){first_exec=false;}else{ hcmpr += ") && (";}
13046                         if(gdt->complex_comparison(gdt)){
13047                           if(gdt->is_buffer_type())
13048                                 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
13049                                 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13050                           else
13051                                 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
13052                                 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13053                         }else{
13054                                 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
13055                         }
13056                         hcmpr += tmpstr;
13057                 }
13058         }
13059         if(hcmpr == "")
13060                 hcmpr = "true";
13061         ret += hcmpr;
13062
13063         ret += ") );\n";
13064         ret += "\t}\n";
13065         ret += "};\n\n";
13066
13067
13068         return(ret);
13069 }
13070
13071 string rsgah_qpn::generate_operator(int i, string params){
13072
13073                 return(
13074                         "       running_agg_operator<" +
13075                         generate_functor_name()+","+
13076                         generate_functor_name() + "_groupdef, " +
13077                         generate_functor_name() + "_aggrdef, " +
13078                         generate_functor_name()+"_hash_func, "+
13079                         generate_functor_name()+"_equal_func "
13080                         "> *op"+int_to_string(i)+" = new running_agg_operator<"+
13081                         generate_functor_name()+","+
13082                         generate_functor_name() + "_groupdef, " +
13083                         generate_functor_name() + "_aggrdef, " +
13084                         generate_functor_name()+"_hash_func, "+
13085                         generate_functor_name()+"_equal_func "
13086                         ">("+params+", \"" + get_node_name() + "\");\n"
13087                 );
13088 }
13089
13090
13091
13092 //              Split aggregation into two HFTA components - sub and superaggregation
13093 //              If unable to split the aggreagates, empty vector will be returned
13094 vector<qp_node *> sgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13095
13096         vector<qp_node *> ret_vec;
13097         int s, p, g, a, o, i;
13098         int si;
13099
13100         vector<string> fta_flds, stream_flds;
13101         int t = table_name->get_schema_ref();
13102
13103 //                      Get the set of interfaces it accesses.
13104         int ierr;
13105         vector<string> sel_names;
13106
13107 //                      Verify that all of the ref'd UDAFs can be split.
13108
13109         for(a=0;a<aggr_tbl.size();++a){
13110                 if(! aggr_tbl.is_builtin(a)){
13111                         int afcn = aggr_tbl.get_fcn_id(a);
13112                         int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13113                         int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13114                         if(hfta_super_id < 0 || hfta_sub_id < 0){
13115                                 return(ret_vec);
13116                         }
13117                 }
13118     }
13119
13120 /////////////////////////////////////////////////////
13121 //                      Split into  aggr/aggr.
13122
13123
13124         sgah_qpn *low_hfta_node = new sgah_qpn();
13125         low_hfta_node->table_name = table_name;
13126         low_hfta_node->set_node_name( "_"+node_name );
13127         low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13128
13129
13130         sgah_qpn *hi_hfta_node = new sgah_qpn();
13131         hi_hfta_node->table_name = new tablevar_t(  ("_"+node_name).c_str());
13132         hi_hfta_node->set_node_name( node_name );
13133         hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13134
13135 //                      First, process the group-by variables.
13136 //                      both low and hi level queries duplicate group-by variables of original query
13137
13138
13139         for(g=0;g<gb_tbl.size();g++){
13140 //                      Insert the gbvar into both low- and hi level hfta.
13141                 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13142                 low_hfta_node->gb_tbl.add_gb_var(
13143                         gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13144                 );
13145
13146 //                      Insert a ref to the value of the gbvar into the low-level hfta select list.
13147                 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13148                 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13149                 gbvar_fta->set_gb_ref(g);
13150                 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13151                 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13152
13153 //                      Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13154                 gbvar_stream->set_gb_ref(-1);   // used as GBvar def
13155                 hi_hfta_node->gb_tbl.add_gb_var(
13156                         gbvar_stream->get_colref()->get_field(), -1, gbvar_stream,  gb_tbl.get_reftype(g)
13157                 );
13158
13159         }
13160 //      hi_hfta_node->gb_tbl.gb_patterns = gb_tbl.gb_patterns; // pattern processing at higtest level
13161         hi_hfta_node->gb_tbl.set_pattern_info( &gb_tbl); // pattern processing at higtest level
13162
13163 //                      SEs in the aggregate definitions.
13164 //                      They are all safe, so split them up for later processing.
13165         map<int, scalarexp_t *> hfta_aggr_se;
13166         for(a=0;a<aggr_tbl.size();++a){
13167                 split_hfta_aggr( &(aggr_tbl), a,
13168                                                 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl)  ,
13169                                                 low_hfta_node->select_list,
13170                                                 hfta_aggr_se,
13171                                                 Ext_fcns
13172                                         );
13173         }
13174
13175
13176 //                      Next, the select list.
13177
13178         for(s=0;s<select_list.size();s++){
13179                 bool fta_forbidden = false;
13180                 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13181                 hi_hfta_node->select_list.push_back(
13182                         new select_element(root_se, select_list[s]->name));
13183         }
13184
13185
13186
13187 //                      All the predicates in the where clause must execute
13188 //                      in the low-level hfta.
13189
13190         for(p=0;p<where.size();p++){
13191                 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13192                 cnf_elem *new_cnf = new cnf_elem(new_pr);
13193                 analyze_cnf(new_cnf);
13194
13195                 low_hfta_node->where.push_back(new_cnf);
13196         }
13197
13198 //                      All of the predicates in the having clause must
13199 //                      execute in the high-level hfta node.
13200
13201         for(p=0;p<having.size();p++){
13202                 predicate_t *pr_root = rehome_fta_pr( having[p]->pr,  &hfta_aggr_se);
13203                 cnf_elem *cnf_root = new cnf_elem(pr_root);
13204                 analyze_cnf(cnf_root);
13205
13206                 hi_hfta_node->having.push_back(cnf_root);
13207         }
13208
13209
13210 //                      Copy parameters to both nodes
13211         vector<string> param_names = param_tbl->get_param_names();
13212         int pi;
13213         for(pi=0;pi<param_names.size();pi++){
13214                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13215                 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13216                                                                         param_tbl->handle_access(param_names[pi]));
13217                 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13218                                                                         param_tbl->handle_access(param_names[pi]));
13219         }
13220         low_hfta_node->definitions = definitions;
13221         hi_hfta_node->definitions = definitions;
13222
13223
13224         low_hfta_node->table_name->set_machine(table_name->get_machine());
13225         low_hfta_node->table_name->set_interface(table_name->get_interface());
13226         low_hfta_node->table_name->set_ifq(false);
13227
13228         hi_hfta_node->table_name->set_machine(table_name->get_machine());
13229         hi_hfta_node->table_name->set_interface(table_name->get_interface());
13230         hi_hfta_node->table_name->set_ifq(false);
13231
13232         ret_vec.push_back(low_hfta_node);
13233         ret_vec.push_back(hi_hfta_node);
13234
13235
13236         return(ret_vec);
13237
13238
13239         // TODO: add splitting into selection/aggregation
13240 }
13241
13242
13243 //              Split aggregation into two HFTA components - sub and superaggregation
13244 //              If unable to split the aggreagates, empty vector will be returned
13245 //                      Similar to sgah, but super aggregate is rsgah, subaggr is sgah
13246 vector<qp_node *> rsgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13247
13248         vector<qp_node *> ret_vec;
13249         int s, p, g, a, o, i;
13250         int si;
13251
13252         vector<string> fta_flds, stream_flds;
13253         int t = table_name->get_schema_ref();
13254
13255 //                      Get the set of interfaces it accesses.
13256         int ierr;
13257         vector<string> sel_names;
13258
13259 //                      Verify that all of the ref'd UDAFs can be split.
13260
13261         for(a=0;a<aggr_tbl.size();++a){
13262                 if(! aggr_tbl.is_builtin(a)){
13263                         int afcn = aggr_tbl.get_fcn_id(a);
13264                         int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13265                         int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13266                         if(hfta_super_id < 0 || hfta_sub_id < 0){
13267                                 return(ret_vec);
13268                         }
13269                 }
13270     }
13271
13272 /////////////////////////////////////////////////////
13273 //                      Split into  aggr/aggr.
13274
13275
13276         sgah_qpn *low_hfta_node = new sgah_qpn();
13277         low_hfta_node->table_name = table_name;
13278         low_hfta_node->set_node_name( "_"+node_name );
13279         low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13280
13281
13282         rsgah_qpn *hi_hfta_node = new rsgah_qpn();
13283         hi_hfta_node->table_name = new tablevar_t(  ("_"+node_name).c_str());
13284         hi_hfta_node->set_node_name( node_name );
13285         hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13286
13287 //                      First, process the group-by variables.
13288 //                      both low and hi level queries duplicate group-by variables of original query
13289
13290
13291         for(g=0;g<gb_tbl.size();g++){
13292 //                      Insert the gbvar into both low- and hi level hfta.
13293                 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13294                 low_hfta_node->gb_tbl.add_gb_var(
13295                         gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13296                 );
13297
13298 //                      Insert a ref to the value of the gbvar into the low-level hfta select list.
13299                 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13300                 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13301                 gbvar_fta->set_gb_ref(g);
13302                 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13303                 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13304
13305 //                      Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13306                 gbvar_stream->set_gb_ref(-1);   // used as GBvar def
13307                 hi_hfta_node->gb_tbl.add_gb_var(
13308                         gbvar_stream->get_colref()->get_field(), -1, gbvar_stream,  gb_tbl.get_reftype(g)
13309                 );
13310
13311         }
13312
13313 //                      SEs in the aggregate definitions.
13314 //                      They are all safe, so split them up for later processing.
13315         map<int, scalarexp_t *> hfta_aggr_se;
13316         for(a=0;a<aggr_tbl.size();++a){
13317                 split_hfta_aggr( &(aggr_tbl), a,
13318                                                 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl)  ,
13319                                                 low_hfta_node->select_list,
13320                                                 hfta_aggr_se,
13321                                                 Ext_fcns
13322                                         );
13323         }
13324
13325
13326 //                      Next, the select list.
13327
13328         for(s=0;s<select_list.size();s++){
13329                 bool fta_forbidden = false;
13330                 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13331                 hi_hfta_node->select_list.push_back(
13332                         new select_element(root_se, select_list[s]->name));
13333         }
13334
13335
13336
13337 //                      All the predicates in the where clause must execute
13338 //                      in the low-level hfta.
13339
13340         for(p=0;p<where.size();p++){
13341                 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13342                 cnf_elem *new_cnf = new cnf_elem(new_pr);
13343                 analyze_cnf(new_cnf);
13344
13345                 low_hfta_node->where.push_back(new_cnf);
13346         }
13347
13348 //                      All of the predicates in the having clause must
13349 //                      execute in the high-level hfta node.
13350
13351         for(p=0;p<having.size();p++){
13352                 predicate_t *pr_root = rehome_fta_pr( having[p]->pr,  &hfta_aggr_se);
13353                 cnf_elem *cnf_root = new cnf_elem(pr_root);
13354                 analyze_cnf(cnf_root);
13355
13356                 hi_hfta_node->having.push_back(cnf_root);
13357         }
13358
13359 //              Similar for closing when
13360         for(p=0;p<closing_when.size();p++){
13361                 predicate_t *pr_root = rehome_fta_pr( closing_when[p]->pr,  &hfta_aggr_se);
13362                 cnf_elem *cnf_root = new cnf_elem(pr_root);
13363                 analyze_cnf(cnf_root);
13364
13365                 hi_hfta_node->closing_when.push_back(cnf_root);
13366         }
13367
13368
13369 //                      Copy parameters to both nodes
13370         vector<string> param_names = param_tbl->get_param_names();
13371         int pi;
13372         for(pi=0;pi<param_names.size();pi++){
13373                 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13374                 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13375                                                                         param_tbl->handle_access(param_names[pi]));
13376                 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13377                                                                         param_tbl->handle_access(param_names[pi]));
13378         }
13379         low_hfta_node->definitions = definitions;
13380         hi_hfta_node->definitions = definitions;
13381
13382
13383         low_hfta_node->table_name->set_machine(table_name->get_machine());
13384         low_hfta_node->table_name->set_interface(table_name->get_interface());
13385         low_hfta_node->table_name->set_ifq(false);
13386
13387         hi_hfta_node->table_name->set_machine(table_name->get_machine());
13388         hi_hfta_node->table_name->set_interface(table_name->get_interface());
13389         hi_hfta_node->table_name->set_ifq(false);
13390
13391         ret_vec.push_back(low_hfta_node);
13392         ret_vec.push_back(hi_hfta_node);
13393
13394
13395         return(ret_vec);
13396
13397
13398         // TODO: add splitting into selection/aggregation
13399 }
13400
13401 //---------------------------------------------------------------
13402 //              Code for propagating Protocol field source information
13403
13404
13405 scalarexp_t *resolve_protocol_se(scalarexp_t *se, vector<map<string, scalarexp_t *> *> &src_vec, gb_table *gb_tbl, table_list *Schema){
13406         scalarexp_t *rse, *lse,*p_se, *gb_se;
13407         int tno, schema_type;
13408         map<string, scalarexp_t *> *pse_map;
13409
13410   switch(se->get_operator_type()){
13411     case SE_LITERAL:
13412                 return new scalarexp_t(se->get_literal());
13413     case SE_PARAM:
13414                 return scalarexp_t::make_param_reference(se->get_op().c_str());
13415     case SE_COLREF:
13416         if(se->is_gb()){
13417                         if(gb_tbl == NULL)
13418                                         fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, se->gb_ref=%d, but gb_tbl is NULL\n",se->get_gb_ref());
13419                         gb_se = gb_tbl->get_def(se->get_gb_ref());
13420                         return resolve_protocol_se(gb_se,src_vec,gb_tbl,Schema);
13421                 }
13422
13423                 schema_type = Schema->get_schema_type(se->get_colref()->get_schema_ref());
13424                 if(schema_type == PROTOCOL_SCHEMA)
13425                         return dup_se(se,NULL);
13426
13427         tno = se->get_colref()->get_tablevar_ref();
13428         if(tno >= src_vec.size()){
13429                         fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, tno=%d, src_vec.size()=%lu\n",tno,src_vec.size());
13430                 }
13431                 if(src_vec[tno] == NULL)
13432                         return NULL;
13433
13434                 pse_map =src_vec[tno];
13435                 p_se = (*pse_map)[se->get_colref()->get_field()];
13436                 if(p_se == NULL)
13437                         return NULL;
13438                 return dup_se(p_se,NULL);
13439     case SE_UNARY_OP:
13440         lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13441         if(lse == NULL)
13442                 return NULL;
13443         else
13444                 return new scalarexp_t(se->get_op().c_str(),lse);
13445     case SE_BINARY_OP:
13446         lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13447         if(lse == NULL)
13448                 return NULL;
13449         rse = resolve_protocol_se(se->get_right_se(),src_vec,gb_tbl,Schema);
13450         if(rse == NULL)
13451                 return NULL;
13452                 return new scalarexp_t(se->get_op().c_str(),lse,rse);
13453     case SE_AGGR_STAR:
13454                 return( NULL );
13455     case SE_AGGR_SE:
13456                 return( NULL );
13457         case SE_FUNC:
13458                 return(NULL);
13459         default:
13460                 return(NULL);
13461         break;
13462   }
13463
13464 }
13465
13466 void spx_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13467         int i;
13468         vector<map<string, scalarexp_t *> *> src_vec;
13469
13470         for(i=0;i<q_sources.size();i++){
13471                 if(q_sources[i] != NULL)
13472                         src_vec.push_back(q_sources[i]->get_protocol_se());
13473                 else
13474                         src_vec.push_back(NULL);
13475         }
13476
13477         for(i=0;i<select_list.size();i++){
13478                 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13479         }
13480 }
13481
13482 void join_eq_hash_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13483         int i;
13484         vector<map<string, scalarexp_t *> *> src_vec;
13485
13486         for(i=0;i<q_sources.size();i++){
13487                 if(q_sources[i] != NULL)
13488                         src_vec.push_back(q_sources[i]->get_protocol_se());
13489                 else
13490                         src_vec.push_back(NULL);
13491         }
13492
13493         for(i=0;i<select_list.size();i++){
13494                 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13495         }
13496
13497         for(i=0;i<hash_eq.size();i++){
13498                 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13499                 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
13500         }
13501 }
13502
13503 void filter_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13504         int i;
13505         vector<map<string, scalarexp_t *> *> src_vec;
13506
13507         for(i=0;i<q_sources.size();i++){
13508                 if(q_sources[i] != NULL)
13509                         src_vec.push_back(q_sources[i]->get_protocol_se());
13510                 else
13511                         src_vec.push_back(NULL);
13512         }
13513
13514         for(i=0;i<select_list.size();i++){
13515                 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13516         }
13517
13518         for(i=0;i<hash_eq.size();i++){
13519                 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13520                 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
13521         }
13522 }
13523
13524 void sgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13525         int i;
13526         vector<map<string, scalarexp_t *> *> src_vec;
13527
13528         for(i=0;i<q_sources.size();i++){
13529                 if(q_sources[i] != NULL)
13530                         src_vec.push_back(q_sources[i]->get_protocol_se());
13531                 else
13532                         src_vec.push_back(NULL);
13533         }
13534
13535         for(i=0;i<select_list.size();i++){
13536                 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13537         }
13538
13539         for(i=0;i<gb_tbl.size();i++)
13540                 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13541
13542 }
13543
13544 void rsgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13545         int i;
13546         vector<map<string, scalarexp_t *> *> src_vec;
13547
13548         for(i=0;i<q_sources.size();i++){
13549                 if(q_sources[i] != NULL)
13550                         src_vec.push_back(q_sources[i]->get_protocol_se());
13551                 else
13552                         src_vec.push_back(NULL);
13553         }
13554
13555         for(i=0;i<select_list.size();i++){
13556                 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13557         }
13558
13559         for(i=0;i<gb_tbl.size();i++)
13560                 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13561 }
13562
13563 void sgahcwcb_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13564         int i;
13565         vector<map<string, scalarexp_t *> *> src_vec;
13566
13567         for(i=0;i<q_sources.size();i++){
13568                 if(q_sources[i] != NULL)
13569                         src_vec.push_back(q_sources[i]->get_protocol_se());
13570                 else
13571                         src_vec.push_back(NULL);
13572         }
13573
13574         for(i=0;i<select_list.size();i++){
13575                 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13576         }
13577
13578         for(i=0;i<gb_tbl.size();i++)
13579                 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13580 }
13581
13582 void mrg_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13583         int f,s,i;
13584         scalarexp_t *first_se;
13585
13586         vector<map<string, scalarexp_t *> *> src_vec;
13587         map<string, scalarexp_t *> *pse_map;
13588
13589         for(i=0;i<q_sources.size();i++){
13590                 if(q_sources[i] != NULL)
13591                         src_vec.push_back(q_sources[i]->get_protocol_se());
13592                 else
13593                         src_vec.push_back(NULL);
13594         }
13595
13596         if(q_sources.size() == 0){
13597                 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::create_protocol_se, q_sources.size() == 0\n");
13598                 exit(1);
13599         }
13600
13601         vector<field_entry *> tbl_flds = table_layout->get_fields();
13602         for(f=0;f<tbl_flds.size();f++){
13603                 bool match = true;
13604                 string fld_nm = tbl_flds[f]->get_name();
13605                 pse_map = src_vec[0];
13606                 first_se = (*pse_map)[fld_nm];
13607                 if(first_se == NULL)
13608                         match = false;
13609                 for(s=1;s<src_vec.size() && match;s++){
13610                         pse_map = src_vec[s];
13611                         scalarexp_t *match_se = (*pse_map)[fld_nm];
13612                         if(match_se == false)
13613                                 match = false;
13614                         else
13615                                 match = is_equivalent_se_base(first_se, match_se, Schema);
13616                 }
13617                 if(match)
13618                         protocol_map[fld_nm] = first_se;
13619                 else
13620                         protocol_map[fld_nm] = NULL;
13621         }
13622 }