1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 // Create, manipulate, and dump query plans.
18 #include "query_plan.h"
19 #include "analyze_fta.h"
20 #include "generate_utils.h"
26 extern string hash_nums[NRANDS]; // for fast hashing
31 void untaboo(string &s){
33 for(c=0;c<s.size();++c){
40 // mrg_qpn constructor, define here to avoid
41 // circular references in the .h file
42 mrg_qpn::mrg_qpn(filter_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
43 param_tbl = spx->param_tbl;
46 field_entry_list *fel = new field_entry_list();
51 for(i=0;i<spx->select_list.size();++i){
52 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
53 if(dt->is_temporal()){
54 if(merge_fieldpos < 0){
57 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
62 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
63 fel->append_field(fe);
67 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
70 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
72 // NEED TO HANDLE USER_SPECIFIED SLACK
73 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
74 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
75 // if(this->slack == NULL)
76 // fprintf(stderr,"Zero slack.\n");
78 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
80 for(i=0;i<sources.size();i++){
81 std::string rvar = "_m"+int_to_string(i);
82 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
83 mvars[i]->set_tablevar_ref(i);
84 fm.push_back(new tablevar_t(sources[i].c_str()));
85 fm[i]->set_range_var(rvar);
88 param_tbl = new param_table();
89 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
91 for(pi=0;pi<param_names.size();pi++){
92 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
93 param_tbl->add_param(param_names[pi],dt->duplicate(),
94 spx->param_tbl->handle_access(param_names[pi]));
96 definitions = spx->definitions;
102 // This function translates an analyzed parse tree
103 // into one or more query nodes (qp_node).
104 // Currently only one node is created, but some query
105 // fragments might create more than one query node,
106 // e.g. aggregation over a joim, or nested subqueries
107 // in the FROM clause (unless this is handles at parse tree
108 // analysis time). At this stage, they will be linked
109 // by the names in the FROM clause.
110 // INVARIANT : if mroe than one query node is returned,
111 // the last one represents the output of the query.
112 vector<qp_node *> create_query_nodes(query_summary_class *qs,table_list *Schema){
114 // Classify the query.
116 vector <qp_node *> local_plan;
120 // I should probably move a lot of this code
121 // into the qp_node constructors,
122 // and have this code focus on building the query plan tree.
125 if(qs->query_type == MERGE_QUERY){
126 mrg_qpn *merge_node = new mrg_qpn(qs,Schema);
129 plan_root = merge_node;
130 local_plan.push_back(merge_node);
133 Do not split sources until we are done with optimizations
134 vector<mrg_qpn *> split_merge = merge_node->split_sources();
135 local_plan.insert(local_plan.begin(), split_merge.begin(), split_merge.end());
137 // If children are created, add them to the schema.
140 printf("split_merge size is %d\n",split_merge.size());
141 for(i=1;i<split_merge.size();++i){
142 Schema->add_table(split_merge[i]->get_fields());
143 printf("Adding split merge table %d\n",i);
148 printf("Did split sources on %s:\n",qs->query_name.c_str());
150 for(ss=0;ss<local_plan.size();ss++){
151 printf("node %d, name=%s, sources=",ss,local_plan[ss]->get_node_name().c_str());
152 vector<tablevar_t *> inv = local_plan[ss]->get_input_tbls();
154 for(nn=0;nn<inv.size();nn++){
155 printf("%s ",inv[nn]->to_string().c_str());
164 // Select / Aggregation / Join
165 if(qs->gb_tbl->size() == 0 && qs->aggr_tbl->size() == 0){
167 if(qs->fta_tree->get_from()->size() == 1){
168 spx_qpn *spx_node = new spx_qpn(qs,Schema);
170 plan_root = spx_node;
171 local_plan.push_back(spx_node);
173 if(qs->fta_tree->get_from()->get_properties() == FILTER_JOIN_PROPERTY){
174 filter_join_qpn *join_node = new filter_join_qpn(qs,Schema);
175 plan_root = join_node;
176 local_plan.push_back(join_node);
178 join_eq_hash_qpn *join_node = new join_eq_hash_qpn(qs,Schema);
179 plan_root = join_node;
180 local_plan.push_back(join_node);
186 if(qs->states_refd.size() || qs->sg_tbl.size() || qs->cb_cnf.size()){
187 sgahcwcb_qpn *sgahcwcb_node = new sgahcwcb_qpn(qs,Schema);
188 plan_root = sgahcwcb_node;
189 local_plan.push_back(sgahcwcb_node);
191 if(qs->closew_cnf.size()){
192 rsgah_qpn *rsgah_node = new rsgah_qpn(qs,Schema);
193 plan_root = rsgah_node;
194 local_plan.push_back(rsgah_node);
196 sgah_qpn *sgah_node = new sgah_qpn(qs,Schema);
197 plan_root = sgah_node;
198 local_plan.push_back(sgah_node);
205 // Get the query name and other definitions.
206 plan_root->set_node_name( qs->query_name);
207 plan_root->set_definitions( qs->definitions) ;
210 // return(plan_root);
216 string se_to_query_string(scalarexp_t *se, aggregate_table *aggr_tbl){
221 vector<scalarexp_t *> operand_list;
224 if(se->is_superaggr())
227 switch(se->get_operator_type()){
229 l_str = se->get_literal()->to_query_string();
232 l_str = "$" + se->get_op();
235 l_str = se->get_colref()->to_query_string() ;
238 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
240 return se->get_op()+"( "+l_str+" )";;
242 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
243 r_str = se_to_query_string(se->get_right_se(),aggr_tbl);
244 return( "("+l_str+")"+se->get_op()+"("+r_str+")" );
246 return( se->get_op() + su_ind + "(*)");
248 l_str = se_to_query_string(aggr_tbl->get_aggr_se(se->get_aggr_ref()),aggr_tbl);
249 return( se->get_op() + su_ind + "(" + l_str + ")" );
251 if(se->get_aggr_ref() >= 0)
252 operand_list = aggr_tbl->get_operand_list(se->get_aggr_ref());
254 operand_list = se->get_operands();
256 ret = se->get_op() + su_ind + "(";
257 for(p=0;p<operand_list.size();p++){
258 l_str = se_to_query_string(operand_list[p],aggr_tbl);
266 return "ERROR SE op type not recognized in se_to_query_string.\n";
270 string pred_to_query_str(predicate_t *pr, aggregate_table *aggr_tbl){
275 vector<literal_t *> llist;
276 vector<scalarexp_t *> op_list;
278 switch(pr->get_operator_type()){
280 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
281 ret = l_str + " IN [";
282 llist = pr->get_lit_vec();
283 for(l=0;l<llist.size();l++){
285 ret += llist[l]->to_query_string();
291 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
292 r_str = se_to_query_string(pr->get_right_se(),aggr_tbl);
293 return( l_str + " " + pr->get_op() + " " + r_str );
295 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
296 return(pr->get_op() + "( " + l_str + " )");
298 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
299 r_str = pred_to_query_str(pr->get_right_pr(),aggr_tbl);
300 return("( " + r_str + " )" + pr->get_op() + "( " + l_str + " )");
302 ret = pr->get_op()+"[";
303 op_list = pr->get_op_list();
304 for(o=0;o<op_list.size();++o){
306 ret += se_to_query_string(op_list[o],aggr_tbl);
311 fprintf(stderr,"INTERNAL ERROR in pred_to_query_str, line %d, character %d, unknown predicate operator type %d\n",
312 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
321 // Build a selection list,
322 // but avoid adding duplicate SEs.
325 int add_select_list_nodup(vector<select_element *> &lfta_select_list, scalarexp_t *se,
329 for(s=0;s<lfta_select_list.size();s++){
330 if(is_equivalent_se(lfta_select_list[s]->se, se)){
335 lfta_select_list.push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
336 return(lfta_select_list.size()-1);
341 // TODO: The generated colref should be tied to the tablevar
342 // representing the lfta output. For now, always 0.
344 scalarexp_t *make_fta_se_ref(vector<select_element *> &lfta_select_list, scalarexp_t *se, int h_tvref){
346 int fta_se_nbr = add_select_list_nodup(lfta_select_list, se, new_element);
349 colname = lfta_select_list[fta_se_nbr]->name;
351 colname = impute_colname(lfta_select_list, se);
352 lfta_select_list[fta_se_nbr]->name = colname;
355 // TODO: fill in the tablevar and schema of the colref here.
356 colref_t *new_cr = new colref_t(colname.c_str());
357 new_cr->set_tablevar_ref(h_tvref);
360 scalarexp_t *new_se= new scalarexp_t(new_cr);
361 new_se->use_decorations_of(se);
367 // Build a selection list,
368 // but avoid adding duplicate SEs.
371 int add_select_list_nodup(vector<select_element *> *lfta_select_list, scalarexp_t *se,
375 for(s=0;s<lfta_select_list->size();s++){
376 if(is_equivalent_se((*lfta_select_list)[s]->se, se)){
381 lfta_select_list->push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
382 return(lfta_select_list->size()-1);
387 // TODO: The generated colref should be tied to the tablevar
388 // representing the lfta output. For now, always 0.
390 scalarexp_t *make_fta_se_ref(vector<vector<select_element *> *> &lfta_select_list, scalarexp_t *se, int h_tvref){
392 vector<select_element *> *the_sel_list = lfta_select_list[h_tvref];
393 int fta_se_nbr = add_select_list_nodup(the_sel_list, se, new_element);
396 colname = (*the_sel_list)[fta_se_nbr]->name;
398 colname = impute_colname(*the_sel_list, se);
399 (*the_sel_list)[fta_se_nbr]->name = colname;
402 // TODO: fill in the tablevar and schema of the colref here.
403 colref_t *new_cr = new colref_t(colname.c_str());
404 new_cr->set_tablevar_ref(h_tvref);
407 scalarexp_t *new_se= new scalarexp_t(new_cr);
408 new_se->use_decorations_of(se);
417 // Test if a se can be evaluated at the fta.
418 // check forbidden types (e.g. float), forbidden operations
419 // between types (e.g. divide a long long), forbidden operations
420 // (too expensive, not implemented).
422 // Return true if not forbidden, false if forbidden
424 // TODO: the parameter aggr_tbl is not used, delete it.
426 bool check_fta_forbidden_se(scalarexp_t *se,
427 aggregate_table *aggr_tbl,
428 ext_fcn_list *Ext_fcns
432 vector<scalarexp_t *> operand_list;
433 vector<data_type *> dt_signature;
434 data_type *dt = se->get_data_type();
438 switch(se->get_operator_type()){
442 return( se->get_data_type()->fta_legal_type() );
446 if(!check_fta_forbidden_se(se->get_left_se(), aggr_tbl, Ext_fcns))
449 dt->fta_legal_operation(se->get_left_se()->get_data_type(), se->get_op())
452 if(!check_fta_forbidden_se(se->get_left_se(),aggr_tbl, Ext_fcns))
454 if(!check_fta_forbidden_se(se->get_right_se(),aggr_tbl, Ext_fcns))
456 return(dt->fta_legal_operation(se->get_left_se()->get_data_type(),
457 se->get_right_se()->get_data_type(),
462 // return true, aggregate fta-safeness is determined elsewhere.
469 if(se->get_aggr_ref() >= 0) return true;
471 operand_list = se->get_operands();
472 for(p=0;p<operand_list.size();p++){
473 if(!check_fta_forbidden_se(operand_list[p],aggr_tbl, Ext_fcns))
475 dt_signature.push_back(operand_list[p]->get_data_type() );
477 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
479 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
481 for(o=0;o<operand_list.size();o++){
482 if(o>0) fprintf(stderr,", ");
483 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
485 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
486 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
490 return(Ext_fcns->fta_legal(fcn_id) );
492 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
501 // test if a pr can be executed at the fta.
503 // Return true if not forbidden, false if forbidden
505 bool check_fta_forbidden_pr(predicate_t *pr,
506 aggregate_table *aggr_tbl,
507 ext_fcn_list *Ext_fcns
510 vector<literal_t *> llist;
513 vector<scalarexp_t *> op_list;
514 vector<data_type *> dt_signature;
518 switch(pr->get_operator_type()){
520 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns) )
522 llist = pr->get_lit_vec();
523 for(l=0;l<llist.size();l++){
524 dt = new data_type(llist[l]->get_type());
525 if(! dt->fta_legal_type()){
533 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns))
535 if(! check_fta_forbidden_se(pr->get_right_se(), aggr_tbl, Ext_fcns))
539 return( check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns) );
541 if(! check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns))
543 if(! check_fta_forbidden_pr(pr->get_right_pr(), aggr_tbl, Ext_fcns))
547 op_list = pr->get_op_list();
548 for(o=0;o<op_list.size();o++){
549 if(!check_fta_forbidden_se(op_list[o],aggr_tbl, Ext_fcns))
551 dt_signature.push_back(op_list[o]->get_data_type() );
553 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), dt_signature);
555 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
557 for(o=0;o<op_list.size();o++){
558 if(o>0) fprintf(stderr,", ");
559 fprintf(stderr,"%s",op_list[o]->get_data_type()->to_string().c_str());
561 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
562 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
566 return(Ext_fcns->fta_legal(fcn_id) );
568 fprintf(stderr,"INTERNAL ERROR in check_fta_forbidden_pr, line %d, character %d, unknown predicate operator type %d\n",
569 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
578 // Split the aggregates in orig_aggr_tbl, into superaggregates and
580 // (the value of the HFTA aggregate might be a SE of several LFTA
581 // subaggregates, e.g. avg : sum / count )
582 // Register the superaggregates in hfta_aggr_tbl, and the
583 // subaggregates in lfta_aggr_tbl.
584 // Insert references to the subaggregates into lfta_select_list.
585 // (and record their names in the currnames list)
586 // Create a SE for the superaggregate, put it in hfta_aggr_se,
589 void split_fta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
590 aggregate_table *hfta_aggr_tbl,
591 aggregate_table *lfta_aggr_tbl,
592 vector<select_element *> &lfta_select_list,
593 map<int,scalarexp_t *> &hfta_aggr_se,
594 ext_fcn_list *Ext_fcns
597 scalarexp_t *subaggr_se;
602 scalarexp_t *new_se, *l_se;
603 vector<scalarexp_t *> subaggr_ref_se;
606 if(! orig_aggr_tbl->is_builtin(agr_id)){
607 // Construct the subaggregate
608 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
609 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
610 vector<scalarexp_t *> subopl;
612 for(o=0;o<opl.size();++o){
613 subopl.push_back(dup_se(opl[o], NULL));
615 int sub_id = Ext_fcns->get_subaggr_id(fcn_id);
616 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
617 subaggr_se->set_fcn_id(sub_id);
618 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
619 // Add it to the lfta select list.
620 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
622 colname = lfta_select_list[fta_se_nbr]->name;
624 colname = impute_colname(lfta_select_list, subaggr_se);
625 lfta_select_list[fta_se_nbr]->name = colname;
626 ano = lfta_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,Ext_fcns->has_lfta_bailout(sub_id));
627 subaggr_se->set_aggr_id(ano);
630 // Construct a reference to the subaggregate
631 new_cr = new colref_t(colname.c_str());
632 new_se = new scalarexp_t(new_cr);
633 // I'm not certain what the types should be ....
634 // This will need to be filled in by later analysis.
635 // NOTE: this might not capture all the meaning of data_type ...
636 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
637 subaggr_ref_se.push_back(new_se);
639 // Construct the superaggregate
640 int super_id = Ext_fcns->get_superaggr_id(fcn_id);
641 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
642 ret_se->set_fcn_id(super_id);
643 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
644 // Register it in the hfta aggregate table
645 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, Ext_fcns->is_running_aggr(sub_id),false);
646 ret_se->set_aggr_id(ano);
647 hfta_aggr_se[agr_id] = ret_se;
653 // builtin aggregate processing
657 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
658 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
661 if(orig_aggr_tbl->is_star_aggr(agr_id)){
662 for(sa=0;sa<subaggr_names.size();sa++){
663 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
664 subaggr_se->set_data_type(subaggr_dt[sa]);
666 // The following sequence is similar to the code in make_fta_se_ref,
667 // but there is special processing for the aggregate tables.
668 int fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
670 colname = lfta_select_list[fta_se_nbr]->name;
672 colname = impute_colname(lfta_select_list, subaggr_se);
673 lfta_select_list[fta_se_nbr]->name = colname;
674 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
675 subaggr_se->set_aggr_id(ano);
677 new_cr = new colref_t(colname.c_str());
678 new_cr->set_tablevar_ref(0);
679 new_se = new scalarexp_t(new_cr);
681 // I'm not certain what the types should be ....
682 // This will need to be filled in by later analysis.
683 // Actually, this is causing a problem.
684 // I will assume a UINT data type. / change to INT
685 // (consistent with assign_data_types in analyze_fta.cc)
686 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
687 data_type *ndt = new data_type("Int"); // used to be Uint
688 new_se->set_data_type(ndt);
690 subaggr_ref_se.push_back(new_se);
693 for(sa=0;sa<subaggr_names.size();sa++){
695 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
696 l_se = dup_se(aggr_operand, NULL);
697 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
699 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
701 subaggr_se->set_data_type(subaggr_dt[sa]);
703 // again, similar to make_fta_se_ref.
704 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
706 colname = lfta_select_list[fta_se_nbr]->name;
708 colname = impute_colname(lfta_select_list, subaggr_se);
709 lfta_select_list[fta_se_nbr]->name = colname;
711 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
713 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
714 subaggr_se->set_aggr_id(ano);
716 new_cr = new colref_t(colname.c_str());
717 new_se = new scalarexp_t(new_cr);
718 // I'm not certain what the types should be ....
719 // This will need to be filled in by later analysis.
720 // NOTE: this might not capture all the meaning of data_type ...
721 new_se->set_data_type(subaggr_dt[sa]);
722 subaggr_ref_se.push_back(new_se);
725 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
726 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
728 // ASSUME either the return value is an aggregation,
729 // or a binary_op between two aggregations
730 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
731 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
732 ret_se->set_aggr_id(ano);
734 // Basically processing for AVG.
735 // set the data type of the superagg to that of the subagg.
736 scalarexp_t *left_se = ret_se->get_left_se();
737 left_se->set_data_type(subaggr_dt[0]);
738 ano = hfta_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
739 left_se->set_aggr_id(ano);
741 scalarexp_t *right_se = ret_se->get_right_se();
742 right_se->set_data_type(subaggr_dt[1]);
743 ano = hfta_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
744 right_se->set_aggr_id(ano);
747 hfta_aggr_se[agr_id] = ret_se;
752 // Split the aggregates in orig_aggr_tbl, into hfta_superaggregates and
753 // hfta_subaggregates.
754 // Register the superaggregates in hi_aggr_tbl, and the
755 // subaggregates in loq_aggr_tbl.
756 // Insert references to the subaggregates into low_select_list.
757 // (and record their names in the currnames list)
758 // Create a SE for the superaggregate, put it in hfta_aggr_se,
761 void split_hfta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
762 aggregate_table *hi_aggr_tbl,
763 aggregate_table *low_aggr_tbl,
764 vector<select_element *> &low_select_list,
765 map<int,scalarexp_t *> &hi_aggr_se,
766 ext_fcn_list *Ext_fcns
769 scalarexp_t *subaggr_se;
774 scalarexp_t *new_se, *l_se;
775 vector<scalarexp_t *> subaggr_ref_se;
778 if(! orig_aggr_tbl->is_builtin(agr_id)){
779 // Construct the subaggregate
780 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
781 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
782 vector<scalarexp_t *> subopl;
784 for(o=0;o<opl.size();++o){
785 subopl.push_back(dup_se(opl[o], NULL));
787 int sub_id = Ext_fcns->get_hfta_subaggr_id(fcn_id);
788 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
789 subaggr_se->set_fcn_id(sub_id);
790 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
791 // Add it to the low select list.
792 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
794 colname = low_select_list[fta_se_nbr]->name;
796 colname = impute_colname(low_select_list, subaggr_se);
797 low_select_list[fta_se_nbr]->name = colname;
798 ano = low_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,false);
799 subaggr_se->set_aggr_id(ano);
802 // Construct a reference to the subaggregate
803 new_cr = new colref_t(colname.c_str());
804 new_se = new scalarexp_t(new_cr);
805 // I'm not certain what the types should be ....
806 // This will need to be filled in by later analysis.
807 // NOTE: this might not capture all the meaning of data_type ...
808 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
809 subaggr_ref_se.push_back(new_se);
811 // Construct the superaggregate
812 int super_id = Ext_fcns->get_hfta_superaggr_id(fcn_id);
813 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
814 ret_se->set_fcn_id(super_id);
815 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
816 // Register it in the high aggregate table
817 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, false,false);
818 ret_se->set_aggr_id(ano);
819 hi_aggr_se[agr_id] = ret_se;
825 // builtin aggregate processing
829 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
830 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
833 if(orig_aggr_tbl->is_star_aggr(agr_id)){
834 for(sa=0;sa<subaggr_names.size();sa++){
835 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
836 subaggr_se->set_data_type(subaggr_dt[sa]);
838 // The following sequence is similar to the code in make_fta_se_ref,
839 // but there is special processing for the aggregate tables.
840 int fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
842 colname = low_select_list[fta_se_nbr]->name;
844 colname = impute_colname(low_select_list, subaggr_se);
845 low_select_list[fta_se_nbr]->name = colname;
846 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
847 subaggr_se->set_aggr_id(ano);
849 new_cr = new colref_t(colname.c_str());
850 new_cr->set_tablevar_ref(0);
851 new_se = new scalarexp_t(new_cr);
853 // I'm not certain what the types should be ....
854 // This will need to be filled in by later analysis.
855 // Actually, this is causing a problem.
856 // I will assume a UINT data type.
857 // (consistent with assign_data_types in analyze_fta.cc)
858 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
859 data_type *ndt = new data_type("Int"); // was Uint
860 new_se->set_data_type(ndt);
862 subaggr_ref_se.push_back(new_se);
865 for(sa=0;sa<subaggr_names.size();sa++){
867 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
868 l_se = dup_se(aggr_operand, NULL);
869 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
871 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
873 subaggr_se->set_data_type(subaggr_dt[sa]);
875 // again, similar to make_fta_se_ref.
876 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
878 colname = low_select_list[fta_se_nbr]->name;
880 colname = impute_colname(low_select_list, subaggr_se);
881 low_select_list[fta_se_nbr]->name = colname;
883 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
885 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
886 subaggr_se->set_aggr_id(ano);
888 new_cr = new colref_t(colname.c_str());
889 new_se = new scalarexp_t(new_cr);
890 // I'm not certain what the types should be ....
891 // This will need to be filled in by later analysis.
892 // NOTE: this might not capture all the meaning of data_type ...
893 new_se->set_data_type(subaggr_dt[sa]);
894 subaggr_ref_se.push_back(new_se);
897 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
898 // ASSUME either the return value is an aggregation,
899 // or a binary_op between two aggregations
900 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
901 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
902 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
904 // Basically processing for AVG.
905 // set the data type of the superagg to that of the subagg.
906 scalarexp_t *left_se = ret_se->get_left_se();
907 left_se->set_data_type(subaggr_dt[0]);
908 ano = hi_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
909 left_se->set_aggr_id(ano);
911 scalarexp_t *right_se = ret_se->get_right_se();
912 right_se->set_data_type(subaggr_dt[1]);
913 ano = hi_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
914 right_se->set_aggr_id(ano);
917 ret_se->set_aggr_id(ano);
918 hi_aggr_se[agr_id] = ret_se;
926 // Split a scalar expression into one part which executes
927 // at the stream and another set of parts which execute
929 // Because I'm actually modifying the SEs, I will make
930 // copies. But I will assume that literals, params, and
931 // colrefs are immutable at this point.
932 // (if there is ever a need to change one, must make a
934 // NOTE : if se is constant (only refrences literals),
935 // avoid making the fta compute it.
937 // NOTE : This will need to be generalized to
938 // handle join expressions, namely to handle a vector
941 // Return value is the HFTA se.
942 // Add lftas select_elements to the fta_select_list.
943 // set fta_forbidden if this node or any child cannot
944 // execute at the lfta.
948 scalarexp_t *split_fta_se(scalarexp_t *se,
950 vector<select_element *> &lfta_select_list,
951 ext_fcn_list *Ext_fcns
955 vector<scalarexp_t *> operand_list;
956 vector<data_type *> dt_signature;
957 scalarexp_t *ret_se, *l_se, *r_se;
958 bool l_forbid, r_forbid, this_forbid;
961 data_type *dt = se->get_data_type();
963 switch(se->get_operator_type()){
965 fta_forbidden = ! se->get_data_type()->fta_legal_type();
966 ret_se = new scalarexp_t(se->get_literal());
967 ret_se->use_decorations_of(se);
971 fta_forbidden = ! se->get_data_type()->fta_legal_type();
972 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
973 ret_se->use_decorations_of(se);
977 // No colref should be forbidden,
978 // the schema is wrong, the fta_legal_type() fcn is wrong,
979 // or the source table is actually a stream.
980 // Issue a warning, but proceed with processing.
981 // Also, should not be a ref to a gbvar.
982 // (a gbvar ref only occurs in an aggregation node,
983 // and these SEs are rehomed, not split.
984 fta_forbidden = ! se->get_data_type()->fta_legal_type();
987 fprintf(stderr,"WARNING, a colref is a forbidden data type in split_fta_se,"
989 " type is %s, line=%d, col=%d\n",
990 se->get_colref()->to_string().c_str(),
991 se->get_data_type()->get_type_str().c_str(),
992 se->lineno, se->charno
997 fprintf(stderr,"INTERNAL ERROR, a colref is a gbvar ref in split_fta_se,"
998 " type is %s, line=%d, col=%d\n",
999 se->get_data_type()->get_type_str().c_str(),
1000 se->lineno, se->charno
1005 ret_se = new scalarexp_t(se->get_colref());
1006 ret_se->use_decorations_of(se);
1010 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1012 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1014 // If this operation is forbidden but the child SE is not,
1015 // put the child se on the lfta_select_list, create a colref
1016 // which accesses this se, and make it the child of this op.
1017 // Exception : the child se is constant (only literal refs).
1018 if(this_forbid && !l_forbid){
1019 if(!is_literal_or_param_only(l_se)){
1020 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1021 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1024 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1026 ret_se->use_decorations_of(se);
1027 fta_forbidden = this_forbid | l_forbid;
1031 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1032 r_se = split_fta_se(se->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1034 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1036 // Replace the left se if it is not forbidden, but something else is.
1037 if((this_forbid || r_forbid) & !l_forbid){
1038 if(!is_literal_or_param_only(l_se)){
1039 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1044 // Replace the right se if it is not forbidden, but something else is.
1045 if((this_forbid || l_forbid) & !r_forbid){
1046 if(!is_literal_or_param_only(r_se)){
1047 new_se = make_fta_se_ref(lfta_select_list, r_se,0);
1052 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1053 ret_se->use_decorations_of(se);
1054 fta_forbidden = this_forbid || r_forbid || l_forbid;
1061 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_fta_se."
1062 " line=%d, col=%d\n",
1063 se->get_op().c_str(),
1064 se->lineno, se->charno
1071 fta_forbidden = false;
1072 operand_list = se->get_operands();
1073 vector<scalarexp_t *> new_operands;
1074 vector<bool> forbidden_op;
1075 for(p=0;p<operand_list.size();p++){
1076 l_se = split_fta_se(operand_list[p], l_forbid, lfta_select_list, Ext_fcns);
1078 fta_forbidden |= l_forbid;
1079 new_operands.push_back(l_se);
1080 forbidden_op.push_back(l_forbid);
1081 dt_signature.push_back(operand_list[p]->get_data_type() );
1084 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1086 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1088 for(o=0;o<operand_list.size();o++){
1089 if(o>0) fprintf(stderr,", ");
1090 fprintf(stderr,"%s",operand_list[o]->get_data_type()->get_type_str().c_str());
1092 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1093 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1097 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1099 // Replace the non-forbidden operands.
1100 // the forbidden ones are already replaced.
1102 for(p=0;p<new_operands.size();p++){
1103 if(! forbidden_op[p]){
1104 // if(new_operands[p]->get_data_type()->get_temporal() != constant_t){
1105 if(!is_literal_or_param_only(new_operands[p])){
1106 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],0);
1107 new_operands[p] = new_se;
1113 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1114 ret_se->use_decorations_of(se);
1120 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
1131 // The predicates have already been
1132 // broken into conjunctions.
1133 // If any part of a conjunction is fta-forbidden,
1134 // it must be executed in the stream operator.
1135 // Else it is executed in the FTA.
1136 // A pre-analysis should determine whether this
1137 // predicate is fta-safe. This procedure will
1138 // assume that it is fta-forbidden and will
1139 // prepare it for execution in the stream.
1143 predicate_t *split_fta_pr(predicate_t *pr,
1144 vector<select_element *> &lfta_select_list,
1145 ext_fcn_list *Ext_fcns
1148 vector<literal_t *> llist;
1149 scalarexp_t *se_l, *se_r;
1150 bool l_forbid, r_forbid;
1151 predicate_t *ret_pr, *pr_l, *pr_r;
1152 vector<scalarexp_t *> op_list, new_op_list;
1154 vector<data_type *> dt_signature;
1157 switch(pr->get_operator_type()){
1159 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1162 if(!is_literal_or_param_only(se_l)){
1163 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1167 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1172 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1174 if(!is_literal_or_param_only(se_l)){
1175 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1180 se_r = split_fta_se(pr->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1182 if(!is_literal_or_param_only(se_r)){
1183 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,0);
1188 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1192 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1193 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1196 case PRED_BINARY_OP:
1197 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1198 pr_r = split_fta_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1199 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1203 // I can't push the predicate into the lfta, except by
1204 // returning a bool value, and that is not worth the trouble,
1205 op_list = pr->get_op_list();
1206 for(o=0;o<op_list.size();++o){
1207 se_l = split_fta_se(op_list[o],l_forbid,lfta_select_list,Ext_fcns);
1209 if(!is_literal_or_param_only(se_l)){
1210 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1214 new_op_list.push_back(se_l);
1217 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1218 ret_pr->set_fcn_id(pr->get_fcn_id());
1221 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1222 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1233 //--------------------------------------------------------------------
1237 // Split a scalar expression into one part which executes
1238 // at the stream and another set of parts which execute
1240 // Because I'm actually modifying the SEs, I will make
1241 // copies. But I will assume that literals, params, and
1242 // colrefs are immutable at this point.
1243 // (if there is ever a need to change one, must make a
1245 // NOTE : if se is constant (only refrences literals),
1246 // avoid making the fta compute it.
1248 // NOTE : This will need to be generalized to
1249 // handle join expressions, namely to handle a vector
1252 // Return value is the HFTA se.
1253 // Add lftas select_elements to the fta_select_list.
1254 // set fta_forbidden if this node or any child cannot
1255 // execute at the lfta.
1257 #define SPLIT_FTAVEC_NOTBLVAR -1
1258 #define SPLIT_FTAVEC_MIXED -2
1260 bool is_PROTOCOL_source(int colref_source,
1261 vector< vector<select_element *> *> &lfta_select_list){
1262 if(colref_source>=0 && lfta_select_list[colref_source]!=NULL) return true;
1266 int combine_colref_source(int s1, int s2){
1267 if(s1==s2) return(s1);
1268 if(s1==SPLIT_FTAVEC_NOTBLVAR) return s2;
1269 if(s2==SPLIT_FTAVEC_NOTBLVAR) return s1;
1270 return SPLIT_FTAVEC_MIXED;
1273 scalarexp_t *split_ftavec_se(
1274 scalarexp_t *se, // the SE to split
1275 bool &fta_forbidden, // return true if some part of se
1277 int &colref_source, // the tblvar which sources the
1278 // colref, or NOTBLVAR, or MIXED
1279 vector< vector<select_element *> *> &lfta_select_list,
1280 // NULL if the tblvar is not PROTOCOL,
1281 // else build the select list.
1282 ext_fcn_list *Ext_fcns // is the fcn lfta-safe?
1284 // Return value is the HFTA SE, unless fta_forbidden is true and
1285 // colref_source>=0 and the indicated source is PROTOCOL.
1286 // In that case no split was done, the make_fta_se_ref must
1287 // be done by the caller.
1290 vector<scalarexp_t *> operand_list;
1291 vector<data_type *> dt_signature;
1292 scalarexp_t *ret_se, *l_se, *r_se;
1293 bool l_forbid, r_forbid, this_forbid;
1294 int l_csource, r_csource, this_csource;
1296 scalarexp_t *new_se;
1297 data_type *dt = se->get_data_type();
1299 switch(se->get_operator_type()){
1301 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1302 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1303 ret_se = new scalarexp_t(se->get_literal());
1304 ret_se->use_decorations_of(se);
1308 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1309 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1310 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1311 ret_se->use_decorations_of(se);
1314 case SE_IFACE_PARAM:
1315 fta_forbidden = false;
1316 colref_source = se->get_ifpref()->get_tablevar_ref();
1317 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1318 ret_se->use_decorations_of(se);
1322 // No colref should be forbidden,
1323 // the schema is wrong, the fta_legal_type() fcn is wrong,
1324 // or the source table is actually a stream.
1325 // Issue a warning, but proceed with processing.
1326 // Also, should not be a ref to a gbvar.
1327 // (a gbvar ref only occurs in an aggregation node,
1328 // and these SEs are rehomed, not split.
1329 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1330 colref_source = se->get_colref()->get_tablevar_ref();
1332 if(fta_forbidden && is_PROTOCOL_source(colref_source, lfta_select_list)){
1333 fprintf(stderr,"WARNING, a PROTOCOL colref is a forbidden data type in split_ftavec_se,"
1335 " type is %s, line=%d, col=%d\n",
1336 se->get_colref()->to_string().c_str(),
1337 se->get_data_type()->to_string().c_str(),
1338 se->lineno, se->charno
1343 fta_forbidden = true; // eval in hfta. ASSUME make copy as below.
1346 ret_se = new scalarexp_t(se->get_colref());
1347 ret_se->use_decorations_of(se);
1351 l_se = split_ftavec_se(se->get_left_se(), l_forbid, colref_source, lfta_select_list, Ext_fcns);
1353 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1355 // If this operation is forbidden but the child SE is not,
1356 // AND the colref source in the se is a single PROTOCOL source
1357 // put the child se on the lfta_select_list, create a colref
1358 // which accesses this se, and make it the child of this op.
1359 // Exception : the child se is constant (only literal refs).
1360 // TODO: I think the exception is expressed by is_PROTOCOL_source
1361 if(this_forbid && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list)){
1362 if(!is_literal_or_param_only(l_se)){
1363 new_se = make_fta_se_ref(lfta_select_list, l_se,colref_source);
1364 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1367 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1369 ret_se->use_decorations_of(se);
1370 fta_forbidden = this_forbid | l_forbid;
1374 l_se = split_ftavec_se(se->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1375 r_se = split_ftavec_se(se->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1377 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1378 colref_source=combine_colref_source(l_csource, r_csource);
1380 // Replace the left se if the parent must be hfta but the child can
1381 // be lfta. This translates to
1382 // a) result is PROTOCOL and forbidden, but left SE is not forbidden
1383 // OR b) if result is mixed but the left se is PROTOCOL, not forbidden
1384 if( ((this_forbid || r_forbid) && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1385 (colref_source==SPLIT_FTAVEC_MIXED && !l_forbid &&
1386 is_PROTOCOL_source(l_csource, lfta_select_list)) ){
1387 if(!is_literal_or_param_only(l_se)){
1388 new_se = make_fta_se_ref(lfta_select_list, l_se,l_csource);
1393 // same logic as for right se.
1394 if( ((this_forbid || l_forbid) && !r_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1395 (colref_source==SPLIT_FTAVEC_MIXED && !r_forbid &&
1396 is_PROTOCOL_source(r_csource, lfta_select_list)) ){
1397 if(!is_literal_or_param_only(r_se)){
1398 new_se = make_fta_se_ref(lfta_select_list, r_se,r_csource);
1403 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1404 ret_se->use_decorations_of(se);
1405 fta_forbidden = this_forbid || r_forbid || l_forbid;
1412 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_ftavec_se."
1413 " line=%d, col=%d\n",
1414 se->get_op().c_str(),
1415 se->lineno, se->charno
1422 operand_list = se->get_operands();
1423 vector<scalarexp_t *> new_operands;
1424 vector<bool> forbidden_op;
1425 vector<int> csource;
1427 fta_forbidden = false;
1428 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1429 for(p=0;p<operand_list.size();p++){
1430 l_se = split_ftavec_se(operand_list[p], l_forbid, l_csource, lfta_select_list, Ext_fcns);
1432 fta_forbidden |= l_forbid;
1433 colref_source = combine_colref_source(colref_source, l_csource);
1434 new_operands.push_back(l_se);
1435 forbidden_op.push_back(l_forbid);
1436 csource.push_back(l_csource);
1437 dt_signature.push_back(operand_list[p]->get_data_type() );
1440 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1442 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1444 for(o=0;o<operand_list.size();o++){
1445 if(o>0) fprintf(stderr,", ");
1446 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
1448 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1449 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1453 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1455 // Replace the non-forbidden operands.
1456 // the forbidden ones are already replaced.
1457 if(fta_forbidden || colref_source == SPLIT_FTAVEC_MIXED){
1458 for(p=0;p<new_operands.size();p++){
1459 if(! forbidden_op[p] && is_PROTOCOL_source(csource[p], lfta_select_list)){
1460 if(!is_literal_or_param_only(new_operands[p])){
1461 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],csource[p]);
1462 new_operands[p] = new_se;
1468 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1469 ret_se->use_decorations_of(se);
1475 printf("INTERNAL ERROR in split_ftavec_se: operator type %d\n",se->get_operator_type());
1484 // The predicates have already been
1485 // broken into conjunctions.
1486 // If any part of a conjunction is fta-forbidden,
1487 // it must be executed in the stream operator.
1488 // Else it is executed in the FTA.
1489 // A pre-analysis should determine whether this
1490 // predicate is fta-safe. This procedure will
1491 // assume that it is fta-forbidden and will
1492 // prepare it for execution in the stream.
1494 predicate_t *split_ftavec_pr(predicate_t *pr,
1495 vector< vector<select_element *> *> &lfta_select_list,
1496 ext_fcn_list *Ext_fcns
1499 vector<literal_t *> llist;
1500 scalarexp_t *se_l, *se_r;
1501 bool l_forbid, r_forbid;
1502 int l_csource, r_csource;
1503 predicate_t *ret_pr, *pr_l, *pr_r;
1504 vector<scalarexp_t *> op_list, new_op_list;
1506 vector<data_type *> dt_signature;
1509 switch(pr->get_operator_type()){
1511 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1513 // TODO: checking that the se is a PROTOCOL source should
1514 // take care of literal_or_param_only.
1515 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1516 if(!is_literal_or_param_only(se_l)){
1517 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1521 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1526 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1527 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1528 if(!is_literal_or_param_only(se_l)){
1529 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1534 se_r = split_ftavec_se(pr->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1535 if(!r_forbid && is_PROTOCOL_source(r_csource, lfta_select_list)){
1536 if(!is_literal_or_param_only(se_r)){
1537 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,r_csource);
1542 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1546 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1547 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1550 case PRED_BINARY_OP:
1551 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1552 pr_r = split_ftavec_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1553 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1557 // I can't push the predicate into the lfta, except by
1558 // returning a bool value, and that is not worth the trouble,
1559 op_list = pr->get_op_list();
1560 for(o=0;o<op_list.size();++o){
1561 se_l = split_ftavec_se(op_list[o],l_forbid,l_csource,lfta_select_list,Ext_fcns);
1562 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1563 if(!is_literal_or_param_only(se_l)){
1564 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1568 new_op_list.push_back(se_l);
1571 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1572 ret_pr->set_fcn_id(pr->get_fcn_id());
1575 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1576 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1586 ////////////////////////////////////////////////////////////////////////
1587 /// rehome_hfta_se rehome_hfta_pr
1588 /// This is use to split an sgah operator (aggregation),
1589 /// I just need to make gb, aggr references point to the
1590 /// new gb, aggr table entries.
1593 scalarexp_t *rehome_fta_se(scalarexp_t *se,
1594 map< int, scalarexp_t * > *aggr_map
1599 vector<scalarexp_t *> operand_list;
1600 scalarexp_t *ret_se, *l_se, *r_se;
1602 scalarexp_t *new_se;
1603 data_type *dt = se->get_data_type();
1604 vector<scalarexp_t *> new_operands;
1606 switch(se->get_operator_type()){
1608 ret_se = new scalarexp_t(se->get_literal());
1609 ret_se->use_decorations_of(se);
1613 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1614 ret_se->use_decorations_of(se);
1617 case SE_IFACE_PARAM:
1618 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1619 ret_se->use_decorations_of(se);
1625 // Must be a GB REF ...
1626 // I'm assuming that the hfta gbvar table has the
1627 // same sequence of entries as the input query's gbvar table.
1628 // Else I'll need some kind of translation table.
1631 fprintf(stderr,"WARNING, a colref is not a gbver ref in rehome_hfta_se"
1632 " type is %s, line=%d, col=%d\n",
1633 se->get_data_type()->to_string().c_str(),
1634 se->lineno, se->charno
1638 ret_se = new scalarexp_t(se->get_colref());
1639 ret_se->use_decorations_of(se); // just inherit the gbref
1643 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1645 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1646 ret_se->use_decorations_of(se);
1650 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1651 r_se = rehome_fta_se(se->get_right_se(), aggr_map);
1653 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1654 ret_se->use_decorations_of(se);
1660 agr_id = se->get_aggr_ref();
1661 return (*aggr_map)[agr_id];
1665 agr_id = se->get_aggr_ref();
1666 if(agr_id >= 0) return (*aggr_map)[agr_id];
1668 operand_list = se->get_operands();
1669 for(p=0;p<operand_list.size();p++){
1670 l_se = rehome_fta_se(operand_list[p], aggr_map);
1672 new_operands.push_back(l_se);
1676 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1677 ret_se->use_decorations_of(se);
1682 printf("INTERNAL ERROR in rehome_fta_se: operator type %d\n",se->get_operator_type());
1691 // The predicates have already been
1692 // broken into conjunctions.
1693 // If any part of a conjunction is fta-forbidden,
1694 // it must be executed in the stream operator.
1695 // Else it is executed in the FTA.
1696 // A pre-analysis should determine whether this
1697 // predicate is fta-safe. This procedure will
1698 // assume that it is fta-forbidden and will
1699 // prepare it for execution in the stream.
1701 predicate_t *rehome_fta_pr(predicate_t *pr,
1702 map<int, scalarexp_t *> *aggr_map
1705 vector<literal_t *> llist;
1706 scalarexp_t *se_l, *se_r;
1707 predicate_t *ret_pr, *pr_l, *pr_r;
1708 vector<scalarexp_t *> op_list, new_op_list;
1711 switch(pr->get_operator_type()){
1713 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1714 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1718 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1719 se_r = rehome_fta_se(pr->get_right_se(), aggr_map);
1720 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1724 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1725 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1728 case PRED_BINARY_OP:
1729 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1730 pr_r = rehome_fta_pr(pr->get_right_pr(), aggr_map);
1731 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1735 op_list = pr->get_op_list();
1736 for(o=0;o<op_list.size();++o){
1737 se_l = rehome_fta_se(op_list[o], aggr_map);
1738 new_op_list.push_back(se_l);
1740 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1741 ret_pr->set_fcn_id(pr->get_fcn_id());
1745 fprintf(stderr,"INTERNAL ERROR in rehome_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1746 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1755 ////////////////////////////////////////////////////////////////////
1756 ///////////////// Create a STREAM table to represent the FTA output.
1758 table_def *create_attributes(string tname, vector<select_element *> &select_list){
1762 // Create a new STREAM schema for the output of the FTA.
1764 field_entry_list *fel = new field_entry_list();
1766 for(s=0;s<select_list.size();s++){
1767 scalarexp_t *sel_se = select_list[s]->se;
1768 data_type *dt = sel_se->get_data_type();
1770 // Grab the annotations of the field.
1771 // As of this writing, the only meaningful annotations
1772 // are whether or not the attribute is temporal.
1773 // There can be an annotation of constant_t, but
1774 // I'll ignore this, it feels like an unsafe assumption
1775 param_list *plist = new param_list();
1776 // if(dt->is_temporal()){
1777 vector<string> param_strings = dt->get_param_keys();
1779 for(p=0;p<param_strings.size();++p){
1780 string v = dt->get_param_val(param_strings[p]);
1782 plist->append(param_strings[p].c_str(),v.c_str());
1784 plist->append(param_strings[p].c_str());
1788 // char access_fcn_name[500];
1789 string colname = select_list[s]->name;
1790 // sprintf(access_fcn_name,"get_field_%s",colname.c_str());
1791 string access_fcn_name = "get_field_"+colname;
1792 field_entry *fe = new field_entry(
1793 dt->get_type_str(), colname, access_fcn_name, plist, ufcns
1796 fel->append_field(fe);
1799 table_def *fta_tbl = new table_def(
1800 tname.c_str(), NULL, NULL, fel, STREAM_SCHEMA
1807 //------------------------------------------------------------------
1808 // Textual representation of the query node.
1812 string spx_qpn::to_query_string(){
1814 string ret = "Select ";
1816 for(s=0;s<select_list.size();s++){
1818 ret += se_to_query_string(select_list[s]->se, NULL);
1819 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1823 ret += "From "+table_name->to_string()+"\n";
1825 if(where.size() > 0){
1828 for(w=0;w<where.size();w++){
1829 if(w>0) ret += " AND ";
1830 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
1841 string sgah_qpn::to_query_string(){
1843 string ret = "Select ";
1845 for(s=0;s<select_list.size();s++){
1847 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1848 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1852 ret += "From "+table_name->to_string()+"\n";
1854 if(where.size() > 0){
1857 for(w=0;w<where.size();w++){
1858 if(w>0) ret += " AND ";
1859 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1864 if(gb_tbl.size() > 0){
1867 if(gb_tbl.gb_patterns.size() <= 1 || gb_tbl.gb_entry_type.size()==0){
1868 for(g=0;g<gb_tbl.size();g++){
1869 if(g>0) ret += ", ";
1870 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1871 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
1873 ret += gb_tbl.get_name(g);
1877 for(g=0;g<gb_tbl.gb_entry_type.size();++g){
1878 if(g>0) ret += ", ";
1879 if(gb_tbl.gb_entry_type[g] == ""){
1880 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+
1881 " AS "+ gb_tbl.get_name(gb_pos);
1884 if(gb_tbl.gb_entry_type[g] == "CUBE" ||
1885 gb_tbl.gb_entry_type[g] == "ROLLUP"){
1886 ret += gb_tbl.gb_entry_type[g] + "(";
1888 for(gg=0;gg<gb_tbl.gb_entry_count[g];++gg){
1889 if(gg>0) ret += ", ";
1890 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+ " AS "+ gb_tbl.get_name(gb_pos);
1895 if(gb_tbl.gb_entry_type[g] == "GROUPING_SETS"){
1896 ret += gb_tbl.gb_entry_type[g] + "(";
1898 vector<vector<bool> > &local_components = gb_tbl.pattern_components[g];
1899 for(g1=0;g1<local_components.size();++g1){
1901 bool first_field = true;
1903 for(g2=0;g2<=gb_tbl.gb_entry_count[g];g2++){
1904 if(local_components[g1][g2]){
1905 if(!first_field) ret+=", ";
1906 else first_field = false;
1907 ret += gb_tbl.get_name(gb_pos+g2);
1913 gb_pos += gb_tbl.gb_entry_count[g];
1920 if(having.size() > 0){
1923 for(h=0;h<having.size();h++){
1924 if(h>0) ret += " AND ";
1925 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
1934 string rsgah_qpn::to_query_string(){
1936 string ret = "Select ";
1938 for(s=0;s<select_list.size();s++){
1940 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1941 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1945 ret += "From "+table_name->to_string()+"\n";
1947 if(where.size() > 0){
1950 for(w=0;w<where.size();w++){
1951 if(w>0) ret += " AND ";
1952 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1957 if(gb_tbl.size() > 0){
1960 for(g=0;g<gb_tbl.size();g++){
1961 if(g>0) ret += ", ";
1962 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1963 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl)+" AS ";
1965 ret += gb_tbl.get_name(g);
1970 if(having.size() > 0){
1973 for(h=0;h<having.size();h++){
1974 if(h>0) ret += " AND ";
1975 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
1980 if(closing_when.size() > 0){
1981 ret += "Closing_When ";
1983 for(h=0;h<closing_when.size();h++){
1984 if(h>0) ret += " AND ";
1985 ret += "(" + pred_to_query_str(closing_when[h]->pr,&aggr_tbl) + ")";
1994 string sgahcwcb_qpn::to_query_string(){
1996 string ret = "Select ";
1998 for(s=0;s<select_list.size();s++){
2000 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2001 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2005 ret += "From "+table_name->to_string()+"\n";
2007 if(where.size() > 0){
2010 for(w=0;w<where.size();w++){
2011 if(w>0) ret += " AND ";
2012 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2017 if(gb_tbl.size() > 0){
2020 for(g=0;g<gb_tbl.size();g++){
2021 if(g>0) ret += ", ";
2022 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2023 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
2025 ret += gb_tbl.get_name(g);
2030 if(sg_tbl.size() > 0){
2031 ret += "Supergroup ";
2033 bool first_elem = true;
2034 for(g=0;g<gb_tbl.size();g++){
2035 if(sg_tbl.count(g)){
2040 ret += gb_tbl.get_name(g);
2046 if(having.size() > 0){
2049 for(h=0;h<having.size();h++){
2050 if(h>0) ret += " AND ";
2051 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2057 if(cleanwhen.size() > 0){
2058 ret += "Cleaning_When ";
2060 for(h=0;h<cleanwhen.size();h++){
2061 if(h>0) ret += " AND ";
2062 ret += "(" + pred_to_query_str(cleanwhen[h]->pr,&aggr_tbl) + ")";
2067 if(cleanby.size() > 0){
2068 ret += "Cleaning_By ";
2070 for(h=0;h<cleanby.size();h++){
2071 if(h>0) ret += " AND ";
2072 ret += "(" + pred_to_query_str(cleanby[h]->pr,&aggr_tbl) + ")";
2081 string mrg_qpn::to_query_string(){
2083 string ret="Merge ";
2084 ret += mvars[0]->to_query_string() + " : " + mvars[1]->to_query_string();
2086 ret += " SLACK "+se_to_query_string(slack, NULL);
2091 for(t=0;t<fm.size();++t){
2092 if(t>0) ret += ", ";
2093 ret += fm[t]->to_string();
2100 string join_eq_hash_qpn::to_query_string(){
2102 string ret = "Select ";
2104 for(s=0;s<select_list.size();s++){
2106 ret += se_to_query_string(select_list[s]->se, NULL);
2107 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2111 // NOTE: assuming binary join.
2112 int properties = from[0]->get_property()+2*from[1]->get_property();
2115 ret += "INNER_JOIN ";
2118 ret += "LEFT_OUTER_JOIN ";
2121 ret += "RIGHT_OUTER_JOIN ";
2124 ret += "OUTER_JOIN ";
2130 for(f=0;f<from.size();++f){
2132 ret += from[f]->to_string();
2136 if(where.size() > 0){
2139 for(w=0;w<where.size();w++){
2140 if(w>0) ret += " AND ";
2141 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2149 string filter_join_qpn::to_query_string(){
2151 string ret = "Select ";
2153 for(s=0;s<select_list.size();s++){
2155 ret += se_to_query_string(select_list[s]->se, NULL);
2156 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2160 // NOTE: assuming binary join.
2161 ret += "FILTER_JOIN("+temporal_var->field+","+int_to_string(temporal_range)+") ";
2165 for(f=0;f<from.size();++f){
2167 ret += from[f]->to_string();
2171 if(where.size() > 0){
2174 for(w=0;w<where.size();w++){
2175 if(w>0) ret += " AND ";
2176 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2185 // -----------------------------------------------------------------
2186 // Query node subclass specific processing.
2189 vector<mrg_qpn *> mrg_qpn::split_sources(){
2190 vector<mrg_qpn *> ret;
2194 if(fm.size() != mvars.size()){
2195 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources. fm.size() = %lu, mvars.size() = %lu\n",fm.size(),mvars.size());
2199 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources, fm size is 1.\n");
2205 printf("spliting sources merge node, name = %s, %d sources.\n\t",node_name.c_str(), fm.size());
2206 for(ff=0;ff<fm.size();++ff){
2207 printf("%s ",fm[ff]->to_string().c_str());
2212 // Handle special cases.
2214 ret.push_back(this);
2219 mrg_qpn *new_mrg = (mrg_qpn *)this->make_copy("_cH1");
2220 new_mrg->fm.push_back(this->fm[0]);
2221 new_mrg->fm.push_back(this->fm[1]);
2222 new_mrg->mvars.push_back(this->mvars[0]);
2223 new_mrg->mvars.push_back(this->mvars[1]);
2225 this->fm.erase(this->fm.begin());
2226 this->mvars.erase(this->mvars.begin());
2227 string vname = fm[0]->get_var_name();
2228 this->fm[0] = new tablevar_t(new_mrg->node_name.c_str());
2229 this->fm[0]->set_range_var(vname);
2230 this->mvars[0]->set_field(table_layout->get_field_name(merge_fieldpos));
2231 this->mvars[0]->set_tablevar_ref(0);
2232 this->mvars[1]->set_tablevar_ref(1);
2234 ret.push_back(new_mrg);
2235 ret.push_back(this);
2238 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg->node_name.c_str(),this->node_name.c_str());
2239 for(i=0;i<new_mrg->fm.size();++i)
2240 printf("\tsource %s var %d (%s, %s) \n",new_mrg->node_name.c_str(),i,new_mrg->fm[i]->to_string().c_str(), new_mrg->mvars[i]->to_string().c_str());
2241 for(i=0;i<this->fm.size();++i)
2242 printf("\tsource %s var %d (%s, %s) \n",this->node_name.c_str(),i,this->fm[i]->to_string().c_str(), this->mvars[i]->to_string().c_str());
2249 // divide up the sources between two children.
2250 // Then, recurse on the children.
2252 mrg_qpn *new_mrg1 = (mrg_qpn *)this->make_copy("_cH1");
2253 mrg_qpn *new_mrg2 = (mrg_qpn *)this->make_copy("_cH2");
2254 for(i=0;i<this->fm.size()/2;++i){
2255 new_mrg1->fm.push_back(this->fm[i]);
2256 new_mrg1->mvars.push_back(this->mvars[i]);
2257 //printf("Pushing %d (%s, %s) to new_mrg1\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2259 for(;i<this->fm.size();++i){
2260 new_mrg2->fm.push_back(this->fm[i]);
2261 new_mrg2->mvars.push_back(this->mvars[i]);
2262 //printf("Pushing %d (%s, %s) to new_mrg2\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2264 for(i=0;i<new_mrg1->mvars.size();++i)
2265 new_mrg1->mvars[i]->set_tablevar_ref(i);
2266 for(i=0;i<new_mrg2->mvars.size();++i)
2267 new_mrg2->mvars[i]->set_tablevar_ref(i);
2269 // Children created, make this merge them.
2273 tablevar_t *tmp_tblvar = new tablevar_t(new_mrg1->node_name.c_str());
2274 tmp_tblvar->set_range_var("_mrg_var_1");
2275 fm.push_back(tmp_tblvar);
2276 colref_t *tmp_cref = new colref_t("_mrg_var_1",table_layout->get_field_name(merge_fieldpos).c_str());
2277 tmp_cref->set_tablevar_ref(0);
2278 mvars.push_back(tmp_cref);
2280 tmp_tblvar = new tablevar_t(new_mrg2->node_name.c_str());
2281 tmp_tblvar->set_range_var("_mrg_var_2");
2282 fm.push_back(tmp_tblvar);
2283 tmp_cref = new colref_t("_mrg_var_2",table_layout->get_field_name(merge_fieldpos).c_str());
2284 tmp_cref->set_tablevar_ref(1);
2285 mvars.push_back(tmp_cref);
2289 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg1->node_name.c_str(),new_mrg2->node_name.c_str());
2290 for(i=0;i<new_mrg1->fm.size();++i)
2291 printf("\tsource %s var %d (%s, %s) \n",new_mrg1->node_name.c_str(),i,new_mrg1->fm[i]->to_string().c_str(), new_mrg1->mvars[i]->to_string().c_str());
2292 for(i=0;i<new_mrg2->fm.size();++i)
2293 printf("\tsource %s var %d (%s, %s) \n",new_mrg2->node_name.c_str(),i,new_mrg2->fm[i]->to_string().c_str(), new_mrg2->mvars[i]->to_string().c_str());
2296 // Recurse and put them together
2297 vector<mrg_qpn *> st1 = new_mrg1->split_sources();
2298 ret.insert(ret.end(), st1.begin(), st1.end());
2299 vector<mrg_qpn *> st2 = new_mrg2->split_sources();
2300 ret.insert(ret.end(), st2.begin(), st2.end());
2302 ret.push_back(this);
2310 //////// Split helper function : resolve interfaces
2312 vector<pair<string,string> > get_ifaces(tablevar_t *table, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2313 vector<pair<string,string> > basic_ifaces;
2315 if(table->get_ifq()){
2316 basic_ifaces= ifdb->eval(table->get_interface(),ierr);
2318 fprintf(stderr,"ERROR, Interface set %s not found.\n",table->get_interface().c_str());
2321 fprintf(stderr,"ERROR, interface definition file didn't parse.\n");
2324 basic_ifaces.push_back(make_pair(table->get_machine(), table->get_interface()));
2327 if(n_virtual_ifaces == 1)
2328 return basic_ifaces;
2330 int stride = n_virtual_ifaces / hfta_parallelism;
2332 vector<pair<string,string> > ifaces;
2334 for(i=0;i<basic_ifaces.size();++i){
2335 string mach = basic_ifaces[i].first;
2336 string iface = basic_ifaces[i].second;
2337 for(s=hfta_idx*stride;s<(hfta_idx+1)*stride;++s){
2338 ifaces.push_back(pair<string, string>(mach,iface+"X"+int_to_string(2*s)));
2346 ///////// Split helper function : compute slack in a generated
2349 void mrg_qpn::resolve_slack(scalarexp_t *t_se, string fname, vector<pair<string, string> > &sources, ifq_t *ifdb, gb_table *gbt){
2353 // Find slack divisor, if any.
2355 long long int slack_divisor = find_temporal_divisor(t_se,gbt, fnm);
2356 if(slack_divisor <= 0){
2361 // find max slack in the iface spec
2362 long long int max_slacker = 0, this_slacker;
2363 string rname = "Slack_"+fnm;
2364 for(s=0;s<sources.size();++s){
2365 string src_machine = sources[s].first;
2366 string src_iface = sources[s].second;
2367 vector<string> slack_vec = ifdb->get_iface_vals(src_machine, src_iface,rname,e,es);
2368 for(v=0;v<slack_vec.size();++v){
2369 if(sscanf(slack_vec[v].c_str(),"%qd",&this_slacker)){
2370 if(this_slacker > max_slacker)
2371 max_slacker = this_slacker;
2376 if(max_slacker <= 0){
2382 long long int the_slack=(long long int)(ceil(((double)max_slacker)/((double)slack_divisor)));
2384 sprintf(tmps,"%lld",the_slack);
2385 literal_t *slack_lit = new literal_t(tmps, LITERAL_LONGINT);
2386 slack = new scalarexp_t(slack_lit);
2390 //------------------------------------------------------------------
2391 // split a node to extract LFTA components.
2394 vector<qp_node *> mrg_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2395 // nothing to do, nothing to split, return copy of self.
2399 vector<qp_node *> ret_vec;
2401 ret_vec.push_back(this);
2406 vector<qp_node *> filter_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2407 vector<qp_node *> ret_vec;
2409 // First check if the query can be pushed to the FTA.
2412 for(s=0;s<select_list.size();s++){
2413 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2416 for(p=0;p<where.size();p++){
2417 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2421 fprintf(stderr,"ERROR, filter join %s is fta-unsafe.\n",node_name.c_str());
2425 // Can it be done in a single lfta?
2426 // Get the set of interfaces it accesses.
2429 vector<string> sel_names;
2430 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2431 if (ifaces.empty()) {
2432 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set\n");
2436 if(ifaces.size() == 1){
2437 // Single interface, no need to merge.
2439 ret_vec.push_back(this);
2441 for(i=0;i<from.size();i++){
2442 from[i]->set_machine(ifaces[0].first);
2443 from[i]->set_interface(ifaces[0].second);
2444 from[i]->set_ifq(false);
2448 // Multiple interfaces, generate the interface-specific queries plus
2452 vector<string> sel_names;
2453 for(si=0;si<ifaces.size();++si){
2454 filter_join_qpn *fta_node = new filter_join_qpn();
2457 if(ifaces.size()==1)
2458 fta_node->set_node_name( node_name );
2460 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2462 fta_node->set_node_name(new_name);
2464 sel_names.push_back(fta_node->get_node_name());
2468 for(f=0;f<from.size();f++){
2469 fta_node->from.push_back(from[f]->duplicate());
2470 fta_node->from[f]->set_machine(ifaces[si].first);
2471 fta_node->from[f]->set_interface(ifaces[si].second);
2472 fta_node->from[f]->set_ifq(false);
2474 fta_node->temporal_var = temporal_var;
2475 fta_node->temporal_range = temporal_range;
2477 fta_node->use_bloom = use_bloom;
2479 for(s=0;s<select_list.size();s++){
2480 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2483 for(p=0;p<shared_pred.size();p++){
2484 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2485 cnf_elem *new_cnf = new cnf_elem(new_pr);
2486 analyze_cnf(new_cnf);
2487 fta_node->shared_pred.push_back(new_cnf);
2488 fta_node->where.push_back(new_cnf);
2490 for(p=0;p<pred_t0.size();p++){
2491 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2492 cnf_elem *new_cnf = new cnf_elem(new_pr);
2493 analyze_cnf(new_cnf);
2494 fta_node->pred_t0.push_back(new_cnf);
2495 fta_node->where.push_back(new_cnf);
2497 for(p=0;p<pred_t1.size();p++){
2498 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2499 cnf_elem *new_cnf = new cnf_elem(new_pr);
2500 analyze_cnf(new_cnf);
2501 fta_node->pred_t1.push_back(new_cnf);
2502 fta_node->where.push_back(new_cnf);
2504 for(p=0;p<hash_eq.size();p++){
2505 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2506 cnf_elem *new_cnf = new cnf_elem(new_pr);
2507 analyze_cnf(new_cnf);
2508 fta_node->hash_eq.push_back(new_cnf);
2509 fta_node->where.push_back(new_cnf);
2511 for(p=0;p<postfilter.size();p++){
2512 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2513 cnf_elem *new_cnf = new cnf_elem(new_pr);
2514 analyze_cnf(new_cnf);
2515 fta_node->postfilter.push_back(new_cnf);
2516 fta_node->where.push_back(new_cnf);
2519 // Xfer all of the parameters.
2520 // Use existing handle annotations.
2521 vector<string> param_names = param_tbl->get_param_names();
2523 for(pi=0;pi<param_names.size();pi++){
2524 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2525 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2526 param_tbl->handle_access(param_names[pi]));
2528 fta_node->definitions = definitions;
2529 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2530 this->error_code = 3;
2534 ret_vec.push_back(fta_node);
2537 mrg_qpn *mrg_node = new mrg_qpn((filter_join_qpn *)ret_vec[0],
2538 node_name, sel_names,ifaces, ifdb);
2539 ret_vec.push_back(mrg_node);
2546 // Use to search for unresolved interface param refs in an hfta.
2548 int spx_qpn::count_ifp_refs(set<string> &ifpnames){
2551 for(i=0;i<select_list.size();++i)
2552 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2553 for(i=0;i<where.size();++i)
2554 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2558 int sgah_qpn::count_ifp_refs(set<string> &ifpnames){
2561 for(i=0;i<select_list.size();++i)
2562 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2563 for(i=0;i<where.size();++i)
2564 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2565 for(i=0;i<having.size();++i)
2566 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2567 for(i=0;i<aggr_tbl.size();++i){
2568 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2569 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2571 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2572 for(j=0;j<opl.size();++j)
2573 ret += count_se_ifp_refs(opl[j],ifpnames);
2576 for(i=0;i<gb_tbl.size();++i){
2577 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2583 int rsgah_qpn::count_ifp_refs(set<string> &ifpnames){
2586 for(i=0;i<select_list.size();++i)
2587 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2588 for(i=0;i<where.size();++i)
2589 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2590 for(i=0;i<having.size();++i)
2591 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2592 for(i=0;i<closing_when.size();++i)
2593 ret += count_pr_ifp_refs(closing_when[i]->pr,ifpnames);
2594 for(i=0;i<aggr_tbl.size();++i){
2595 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2596 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2598 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2599 for(j=0;j<opl.size();++j)
2600 ret += count_se_ifp_refs(opl[j],ifpnames);
2603 for(i=0;i<gb_tbl.size();++i){
2604 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2609 int mrg_qpn::count_ifp_refs(set<string> &ifpnames){
2613 int join_eq_hash_qpn::count_ifp_refs(set<string> &ifpnames){
2616 for(i=0;i<select_list.size();++i)
2617 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2618 for(i=0;i<prefilter[0].size();++i)
2619 ret += count_pr_ifp_refs(prefilter[0][i]->pr,ifpnames);
2620 for(i=0;i<prefilter[1].size();++i)
2621 ret += count_pr_ifp_refs(prefilter[1][i]->pr,ifpnames);
2622 for(i=0;i<temporal_eq.size();++i)
2623 ret += count_pr_ifp_refs(temporal_eq[i]->pr,ifpnames);
2624 for(i=0;i<hash_eq.size();++i)
2625 ret += count_pr_ifp_refs(hash_eq[i]->pr,ifpnames);
2626 for(i=0;i<postfilter.size();++i)
2627 ret += count_pr_ifp_refs(postfilter[i]->pr,ifpnames);
2631 int filter_join_qpn::count_ifp_refs(set<string> &ifpnames){
2634 for(i=0;i<select_list.size();++i)
2635 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2636 for(i=0;i<where.size();++i)
2637 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2642 // Resolve interface params to string literals
2643 int filter_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2646 string ifname = from[0]->get_interface();
2647 string ifmach = from[0]->get_machine();
2648 for(i=0;i<select_list.size();++i)
2649 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2651 for(i=0;i<where.size();++i)
2652 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2658 int spx_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2661 string ifname = table_name->get_interface();
2662 string ifmach = table_name->get_machine();
2663 for(i=0;i<select_list.size();++i)
2664 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2666 for(i=0;i<where.size();++i)
2667 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2672 int sgah_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2675 string ifname = table_name->get_interface();
2676 string ifmach = table_name->get_machine();
2678 //printf("Select list has %d elements\n",select_list.size());
2679 for(i=0;i<select_list.size();++i){
2680 //printf("\tresolving elemet %d\n",i);
2681 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) ){
2685 for(i=0;i<where.size();++i){
2686 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err) )
2689 for(i=0;i<having.size();++i){
2690 if( resolve_pr_ifp_refs(having[i]->pr,ifmach, ifname, ifdb, err) )
2693 //printf("aggr list has %d elements\n",select_list.size());
2694 for(i=0;i<aggr_tbl.size();++i){
2695 //printf("\tresolving elemet %d\n",i);
2696 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2697 //printf("\t\t\tbuiltin\n");
2698 if( resolve_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifmach, ifname, ifdb, err) )
2701 //printf("\t\t\tudaf\n");
2702 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2703 for(j=0;j<opl.size();++j)
2704 if( resolve_se_ifp_refs(opl[j],ifmach, ifname, ifdb, err) )
2708 for(i=0;i<gb_tbl.size();++i){
2709 if( resolve_se_ifp_refs(gb_tbl.get_def(i), ifmach, ifname, ifdb, err) )
2718 SPLITTING A SELECTION_PROJECTION OPERATOR
2720 An SPX node may reference:
2721 literals, parameters, colrefs, functions, operators
2722 An SPX node may not reference:
2723 group-by variables, aggregates
2725 An SPX node contains
2726 selection list of SEs
2727 where list of CNF predicates
2730 If each selection SE and each where predicate is fta-safe
2731 execute entire operator as an LFTA.
2733 for each predicate in the where clause
2734 if it is fta safe, execute it in the lfta
2735 else, split each SE in the predicate, evaluate the
2736 top-level SEs in the hfta and eval the predicate on that.
2737 For each SE in the se list
2738 Split the SE, eval the high level part, push onto hfta
2742 A SE represents a value which must be computed. The LFTA
2743 must provide sub-values from which the HFTA can compute the
2745 1) the SE is fta-safe
2746 Create an entry in the selection list of the LFTA which is
2747 the SE itself. Reference this LFTA selection list entry in
2748 the HFTA (via a field name assigned to the lfta selection
2750 2) The SE is not fta-safe
2751 Determine the boundary between the fta-safe and the fta-unsafe
2752 portions of the SE. The result is a rooted tree (which is
2753 evaluated at the HFTA) which references sub-SEs (which are
2754 evaluated at the LFTA). Each of the sub-SEs is placed on
2755 the selection list of the LFTA and assigned field names,
2756 the top part is evaluated at the HFTA and references the
2757 sub-SEs through their assigned field names.
2758 The only SEs on the LFTA selection list are those created by
2759 the above mechanism. The collection of assigned field names becomes
2760 the schema of the LFTA.
2762 TODO: insert tablevar names into the colrefs.
2766 vector<qp_node *> spx_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2769 vector<qp_node *> ret_vec;
2771 // If the node reads from a stream, don't split.
2772 // int t = Schema->get_table_ref(table_name->get_schema_name());
2773 int t = table_name->get_schema_ref();
2774 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
2776 ret_vec.push_back(this);
2781 // Get the set of interfaces it accesses.
2784 vector<string> sel_names;
2785 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2786 if (ifaces.empty()) {
2787 fprintf(stderr,"INTERNAL ERROR in spx_qpn::split_node_for_fta - empty interface set\n");
2792 // The FTA node, it is always returned.
2794 spx_qpn *fta_node = new spx_qpn();
2795 fta_node->table_name = table_name;
2797 // for colname imputation
2798 // vector<string> fta_flds, stream_flds;
2801 // First check if the query can be pushed to the FTA.
2804 for(s=0;s<select_list.size();s++){
2805 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2808 for(p=0;p<where.size();p++){
2809 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2813 ////////////////////////////////////////////////////////////
2814 // The query can be executed entirely in the FTA.
2817 for(si=0;si<ifaces.size();++si){
2818 fta_node = new spx_qpn();
2821 if(ifaces.size()==1)
2822 fta_node->set_node_name( node_name );
2824 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2826 fta_node->set_node_name(new_name);
2828 sel_names.push_back(fta_node->get_node_name());
2831 fta_node->table_name = table_name->duplicate();
2832 fta_node->table_name->set_machine(ifaces[si].first);
2833 fta_node->table_name->set_interface(ifaces[si].second);
2834 fta_node->table_name->set_ifq(false);
2836 for(s=0;s<select_list.size();s++){
2837 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2839 for(p=0;p<where.size();p++){
2840 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2841 cnf_elem *new_cnf = new cnf_elem(new_pr);
2842 analyze_cnf(new_cnf);
2844 fta_node->where.push_back(new_cnf);
2847 // Xfer all of the parameters.
2848 // Use existing handle annotations.
2849 vector<string> param_names = param_tbl->get_param_names();
2851 for(pi=0;pi<param_names.size();pi++){
2852 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2853 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2854 param_tbl->handle_access(param_names[pi]));
2856 fta_node->definitions = definitions;
2857 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2858 this->error_code = 3;
2862 ret_vec.push_back(fta_node);
2865 if(ifaces.size() > 1){
2866 spx_qpn *tmp_spx = (spx_qpn *)(ret_vec[0]);
2867 mrg_qpn *mrg_node = new mrg_qpn(tmp_spx,
2868 node_name, sel_names,ifaces, ifdb);
2870 Do not split sources until we are done with optimizations
2871 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
2872 for(i=0;i<split_merge.size();++i){
2873 ret_vec.push_back(split_merge[i]);
2875 hfta_returned = split_merge.size();
2877 ret_vec.push_back(mrg_node);
2882 // printf("OK as FTA.\n");
2883 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
2888 ////////////////////////////////////////////////////
2889 // The fta must be split. Create a stream node.
2890 // NOTE : I am counting on the single
2891 // table in the from list. (Joins handled in a different operator).
2895 spx_qpn *stream_node = new spx_qpn();
2896 stream_node->set_node_name( node_name );
2897 // Create the tablevar in the stream's FROM clause.
2898 // set the schema name to the name of the LFTA,
2899 // and use the same tablevar name.
2900 stream_node->table_name = new tablevar_t(
2901 ("_fta_"+node_name).c_str()
2903 stream_node->table_name->set_range_var(table_name->get_var_name());
2906 fta_node->set_node_name( "_fta_"+node_name );
2908 // table var names of fta, stream.
2909 string fta_var = fta_node->table_name->get_var_name();
2910 string stream_var = stream_node->table_name->get_var_name();
2912 // Set up select list vector
2913 vector< vector<select_element *> *> select_vec;
2914 select_vec.push_back(&(fta_node->select_list)); // only one child
2917 // Split the select list into its FTA and stream parts.
2918 // If any part of the SE is fta-unsafe, it will return
2919 // a SE to execute at the stream ref'ing SE's evaluated
2920 // at the fta (which are put on the FTA's select list as a side effect).
2921 // If the SE is fta-safe, put it on the fta select list, make
2922 // a ref to it and put the ref on the stream select list.
2923 for(s=0;s<select_list.size();s++){
2924 bool fta_forbidden = false;
2925 int se_src = SPLIT_FTAVEC_NOTBLVAR;
2926 // scalarexp_t *root_se = split_fta_se(
2927 // select_list[s]->se,fta_forbidden, fta_node->select_list, Ext_fcns
2929 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
2930 fta_forbidden, se_src, select_vec, Ext_fcns
2932 // if(fta_forbidden){
2933 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
2934 stream_node->select_list.push_back(
2935 new select_element(root_se, select_list[s]->name)
2938 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,root_se,0);
2939 stream_node->select_list.push_back(
2940 new select_element(new_se, select_list[s]->name)
2946 // The WHERE clause has already been split into a set of clauses
2947 // that are ANDED together. For each clause, check if its FTA-safe.
2948 // If not, split its SE's into fta-safe and stream-executing parts,
2949 // then put a clause which ref's the SEs into the stream.
2950 // Else put it into the LFTA.
2951 predicate_t *pr_root;
2953 for(p=0;p<where.size();p++){
2954 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) ){
2955 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
2956 // pr_root = split_fta_pr( where[p]->pr, fta_node->select_list, Ext_fcns);
2957 fta_forbidden = true;
2959 pr_root = dup_pr(where[p]->pr, NULL);
2960 fta_forbidden = false;
2962 cnf_elem *cnf_root = new cnf_elem(pr_root);
2963 analyze_cnf(cnf_root);
2966 stream_node->where.push_back(cnf_root);
2968 fta_node->where.push_back(cnf_root);
2974 // Divide the parameters among the stream, FTA.
2975 // Currently : assume that the stream receives all parameters
2976 // and parameter updates, incorporates them, then passes
2977 // all of the parameters to the FTA.
2978 // This will need to change (tables, fta-unsafe types. etc.)
2980 // I will pass on the use_handle_access marking, even
2981 // though the fcn call that requires handle access might
2982 // exist in only one of the parts of the query.
2983 // Parameter manipulation and handle access determination will
2984 // need to be revisited anyway.
2985 vector<string> param_names = param_tbl->get_param_names();
2987 for(pi=0;pi<param_names.size();pi++){
2988 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2989 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2990 param_tbl->handle_access(param_names[pi]));
2991 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2992 param_tbl->handle_access(param_names[pi]));
2995 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
2996 stream_node->definitions = definitions;
2998 // Now split by interfaces
2999 if(ifaces.size() > 1){
3000 for(si=0;si<ifaces.size();++si){
3001 spx_qpn *subq_node = new spx_qpn();
3003 // Name the subquery
3004 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3006 subq_node->set_node_name( new_name) ;
3007 sel_names.push_back(subq_node->get_node_name());
3010 subq_node->table_name = fta_node->table_name->duplicate();
3011 subq_node->table_name->set_machine(ifaces[si].first);
3012 subq_node->table_name->set_interface(ifaces[si].second);
3013 subq_node->table_name->set_ifq(false);
3015 for(s=0;s<fta_node->select_list.size();s++){
3016 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3018 for(p=0;p<fta_node->where.size();p++){
3019 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3020 cnf_elem *new_cnf = new cnf_elem(new_pr);
3021 analyze_cnf(new_cnf);
3023 subq_node->where.push_back(new_cnf);
3025 // Xfer all of the parameters.
3026 // Use existing handle annotations.
3027 vector<string> param_names = param_tbl->get_param_names();
3029 for(pi=0;pi<param_names.size();pi++){
3030 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3031 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3032 param_tbl->handle_access(param_names[pi]));
3034 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3035 this->error_code = 3;
3038 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3040 ret_vec.push_back(subq_node);
3043 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3044 fta_node->node_name, sel_names, ifaces, ifdb);
3046 Do not split sources until we are done with optimizations
3047 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3048 for(i=0;i<split_merge.size();++i){
3049 ret_vec.push_back(split_merge[i]);
3052 ret_vec.push_back(mrg_node);
3053 ret_vec.push_back(stream_node);
3054 hfta_returned = 1/*split_merge.size()*/ + 1;
3057 fta_node->table_name->set_machine(ifaces[0].first);
3058 fta_node->table_name->set_interface(ifaces[0].second);
3059 fta_node->table_name->set_ifq(false);
3060 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3061 this->error_code = 3;
3064 ret_vec.push_back(fta_node);
3065 ret_vec.push_back(stream_node);
3069 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3070 // printf("Stream node is:\n%s\n\n",stream_node->to_query_string().c_str() );
3078 Splitting a aggregation+sampling operator.
3079 right now, return an error if any splitting is required.
3082 vector<qp_node *> sgahcwcb_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3086 vector<qp_node *> ret_vec;
3087 int s, p, g, a, o, i;
3090 vector<string> fta_flds, stream_flds;
3092 // If the node reads from a stream, don't split.
3093 // int t = Schema->get_table_ref(table_name->get_schema_name());
3094 int t = table_name->get_schema_ref();
3095 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3096 ret_vec.push_back(this);
3100 fprintf(stderr,"ERROR : cannot split a sampling operator (not yet implemented).\n");
3110 Splitting a running aggregation operator.
3111 The code is almost identical to that of the the sgah operator
3113 - there is no lfta-only option.
3114 - the stream node is rsagh_qpn (lfta is sgah or spx)
3115 - need to handle the closing when (similar to having)
3118 vector<qp_node *> rsgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3122 vector<qp_node *> ret_vec;
3123 int s, p, g, a, o, i;
3126 vector<string> fta_flds, stream_flds;
3128 // If the node reads from a stream, don't split.
3129 // int t = Schema->get_table_ref(table_name->get_schema_name());
3130 int t = table_name->get_schema_ref();
3131 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3132 ret_vec.push_back(this);
3136 // Get the set of interfaces it accesses.
3138 vector<string> sel_names;
3139 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3140 if (ifaces.empty()) {
3141 fprintf(stderr,"INTERNAL ERROR in rsgah_qpn::split_node_for_fta - empty interface set\n");
3148 //////////////////////////////////////////////////////////////
3149 /// Split into lfta, hfta.
3151 // A rsgah node must always be split,
3152 // if for no other reason than to complete the
3153 // partial aggregation.
3155 // First, determine if the query can be spit into aggr/aggr,
3156 // or if it must be selection/aggr.
3157 // Splitting into selection/aggr is allowed only
3158 // if select_lfta is set.
3161 bool select_allowed = definitions.count("select_lfta")>0;
3162 bool select_rqd = false;
3164 set<int> unsafe_gbvars; // for processing where clause
3165 for(g=0;g<gb_tbl.size();g++){
3166 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3167 if(!select_allowed){
3168 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3169 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3171 this->error_code = 1;
3172 this->err_str = tmpstr;
3176 unsafe_gbvars.insert(g);
3181 // Verify that the SEs in the aggregate definitions are fta-safe
3182 for(a=0;a<aggr_tbl.size();++a){
3183 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3184 if(ase != NULL){ // COUNT(*) does not have a SE.
3185 if(!select_allowed){
3186 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3187 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3188 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3190 this->error_code = 1;
3191 this->err_str = tmpstr;
3200 // Verify that all of the ref'd UDAFs can be split.
3202 for(a=0;a<aggr_tbl.size();++a){
3203 if(! aggr_tbl.is_builtin(a)){
3204 int afcn = aggr_tbl.get_fcn_id(a);
3205 int super_id = Ext_fcns->get_superaggr_id(afcn);
3206 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3207 if(super_id < 0 || sub_id < 0){
3208 if(!select_allowed){
3209 this->err_str += "ERROR in rsgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3210 this->error_code = 1;
3219 for(p=0;p<where.size();p++){
3220 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3221 if(!select_allowed){
3222 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3223 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3225 this->error_code = 1;
3226 this->err_str = tmpstr;
3237 /////////////////////////////////////////////////////
3238 // Split into aggr/aggr.
3244 sgah_qpn *fta_node = new sgah_qpn();
3245 fta_node->table_name = table_name;
3246 fta_node->set_node_name( "_fta_"+node_name );
3247 fta_node->table_name->set_range_var(table_name->get_var_name());
3250 rsgah_qpn *stream_node = new rsgah_qpn();
3251 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3252 stream_node->set_node_name( node_name );
3253 stream_node->table_name->set_range_var(table_name->get_var_name());
3255 // First, process the group-by variables.
3256 // The fta must supply the values of all the gbvars.
3257 // If a gb is computed, the computation must be
3258 // performed at the FTA, so the SE must be FTA-safe.
3259 // Nice side effect : the gbvar table contains
3260 // matching entries for the original query, the lfta query,
3261 // and the hfta query. So gbrefs in the new queries are set
3262 // correctly just by inheriting the gbrefs from the old query.
3263 // If this property changed, I'll need translation tables.
3266 for(g=0;g<gb_tbl.size();g++){
3267 // Insert the gbvar into the lfta.
3268 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
3269 fta_node->gb_tbl.add_gb_var(
3270 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
3273 // Insert a ref to the value of the gbvar into the lfta select list.
3274 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
3275 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
3276 gbvar_fta->set_gb_ref(g);
3277 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
3278 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
3280 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
3281 gbvar_stream->set_gb_ref(-1); // used as GBvar def
3282 stream_node->gb_tbl.add_gb_var(
3283 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
3288 // SEs in the aggregate definitions.
3289 // They are all safe, so split them up for later processing.
3290 map<int, scalarexp_t *> hfta_aggr_se;
3291 for(a=0;a<aggr_tbl.size();++a){
3292 split_fta_aggr( &(aggr_tbl), a,
3293 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
3294 fta_node->select_list,
3301 // Next, the select list.
3303 for(s=0;s<select_list.size();s++){
3304 bool fta_forbidden = false;
3305 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3306 stream_node->select_list.push_back(
3307 new select_element(root_se, select_list[s]->name));
3312 // All the predicates in the where clause must execute
3315 for(p=0;p<where.size();p++){
3316 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
3317 cnf_elem *new_cnf = new cnf_elem(new_pr);
3318 analyze_cnf(new_cnf);
3320 fta_node->where.push_back(new_cnf);
3323 // All of the predicates in the having clause must
3324 // execute in the stream node.
3326 for(p=0;p<having.size();p++){
3327 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3328 cnf_elem *cnf_root = new cnf_elem(pr_root);
3329 analyze_cnf(cnf_root);
3331 stream_node->having.push_back(cnf_root);
3334 // All of the predicates in the closing when clause must
3335 // execute in the stream node.
3337 for(p=0;p<closing_when.size();p++){
3338 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3339 cnf_elem *cnf_root = new cnf_elem(pr_root);
3340 analyze_cnf(cnf_root);
3342 stream_node->closing_when.push_back(cnf_root);
3346 // Divide the parameters among the stream, FTA.
3347 // Currently : assume that the stream receives all parameters
3348 // and parameter updates, incorporates them, then passes
3349 // all of the parameters to the FTA.
3350 // This will need to change (tables, fta-unsafe types. etc.)
3352 // I will pass on the use_handle_access marking, even
3353 // though the fcn call that requires handle access might
3354 // exist in only one of the parts of the query.
3355 // Parameter manipulation and handle access determination will
3356 // need to be revisited anyway.
3357 vector<string> param_names = param_tbl->get_param_names();
3359 for(pi=0;pi<param_names.size();pi++){
3360 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3361 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3362 param_tbl->handle_access(param_names[pi]));
3363 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3364 param_tbl->handle_access(param_names[pi]));
3366 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3367 stream_node->definitions = definitions;
3369 // Now split by interfaces XXXX
3370 if(ifaces.size() > 1){
3371 for(si=0;si<ifaces.size();++si){
3372 sgah_qpn *subq_node = new sgah_qpn();
3374 // Name the subquery
3375 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3377 subq_node->set_node_name( new_name) ;
3378 sel_names.push_back(subq_node->get_node_name());
3381 subq_node->table_name = fta_node->table_name->duplicate();
3382 subq_node->table_name->set_machine(ifaces[si].first);
3383 subq_node->table_name->set_interface(ifaces[si].second);
3384 subq_node->table_name->set_ifq(false);
3387 for(g=0;g<fta_node->gb_tbl.size();g++){
3388 // Insert the gbvar into the lfta.
3389 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
3390 subq_node->gb_tbl.add_gb_var(
3391 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
3395 // Insert the aggregates
3396 for(a=0;a<fta_node->aggr_tbl.size();++a){
3397 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
3400 for(s=0;s<fta_node->select_list.size();s++){
3401 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3403 for(p=0;p<fta_node->where.size();p++){
3404 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3405 cnf_elem *new_cnf = new cnf_elem(new_pr);
3406 analyze_cnf(new_cnf);
3408 subq_node->where.push_back(new_cnf);
3410 for(p=0;p<fta_node->having.size();p++){
3411 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
3412 cnf_elem *new_cnf = new cnf_elem(new_pr);
3413 analyze_cnf(new_cnf);
3415 subq_node->having.push_back(new_cnf);
3417 // Xfer all of the parameters.
3418 // Use existing handle annotations.
3419 vector<string> param_names = param_tbl->get_param_names();
3421 for(pi=0;pi<param_names.size();pi++){
3422 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3423 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3424 param_tbl->handle_access(param_names[pi]));
3426 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3427 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3428 this->error_code = 3;
3432 ret_vec.push_back(subq_node);
3435 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
3436 fta_node->node_name, sel_names, ifaces, ifdb);
3439 Do not split sources until we are done with optimizations
3440 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3441 for(i=0;i<split_merge.size();++i){
3442 ret_vec.push_back(split_merge[i]);
3445 ret_vec.push_back(mrg_node);
3446 ret_vec.push_back(stream_node);
3447 hfta_returned = 1/*split_merge.size()*/+1;
3450 fta_node->table_name->set_machine(ifaces[0].first);
3451 fta_node->table_name->set_interface(ifaces[0].second);
3452 fta_node->table_name->set_ifq(false);
3453 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3454 this->error_code = 3;
3457 ret_vec.push_back(fta_node);
3458 ret_vec.push_back(stream_node);
3463 // ret_vec.push_back(fta_node);
3464 // ret_vec.push_back(stream_node);
3471 /////////////////////////////////////////////////////////////////////
3472 /// Split into selection LFTA, aggregation HFTA.
3474 spx_qpn *fta_node = new spx_qpn();
3475 fta_node->table_name = table_name;
3476 fta_node->set_node_name( "_fta_"+node_name );
3477 fta_node->table_name->set_range_var(table_name->get_var_name());
3480 rsgah_qpn *stream_node = new rsgah_qpn();
3481 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3482 stream_node->set_node_name( node_name );
3483 stream_node->table_name->set_range_var(table_name->get_var_name());
3486 vector< vector<select_element *> *> select_vec;
3487 select_vec.push_back(&(fta_node->select_list)); // only one child
3489 // Process the gbvars. Split their defining SEs.
3490 for(g=0;g<gb_tbl.size();g++){
3491 bool fta_forbidden = false;
3492 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3494 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
3495 fta_forbidden, se_src, select_vec, Ext_fcns
3497 // if(fta_forbidden) (
3498 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3499 stream_node->gb_tbl.add_gb_var(
3500 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
3503 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
3504 stream_node->gb_tbl.add_gb_var(
3505 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
3510 // Process the aggregate table.
3511 // Copy to stream, split the SEs.
3512 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
3513 for(a=0;a<aggr_tbl.size();++a){
3515 if(aggr_tbl.is_builtin(a)){
3516 if(aggr_tbl.is_star_aggr(a)){
3517 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
3518 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
3520 bool fta_forbidden = false;
3521 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3523 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3524 fta_forbidden, se_src, select_vec, Ext_fcns
3526 // if(fta_forbidden) (
3527 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3528 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
3529 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
3531 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3532 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
3533 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
3536 hse->set_data_type(aggr_tbl.get_data_type(a));
3537 hse->set_aggr_id(a);
3538 hfta_aggr_se[a]=hse;
3540 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
3541 vector<scalarexp_t *> new_opl;
3542 for(o=0;o<opl.size();++o){
3543 bool fta_forbidden = false;
3544 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3545 scalarexp_t *agg_se = split_ftavec_se( opl[o],
3546 fta_forbidden, se_src, select_vec, Ext_fcns
3548 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3549 // fta_forbidden, se_src, select_vec, Ext_fcns
3551 // if(fta_forbidden) (
3552 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3553 new_opl.push_back(agg_se);
3555 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3556 new_opl.push_back(new_se);
3559 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
3560 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
3561 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
3562 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
3563 hse->set_aggr_id(a);
3564 hfta_aggr_se[a]=hse;
3569 // Process the WHERE clause.
3570 // If it is fta-safe AND it refs only fta-safe gbvars,
3571 // then expand the gbvars and put it into the lfta.
3572 // Else, split it into an hfta predicate ref'ing
3573 // se's computed partially in the lfta.
3575 predicate_t *pr_root;
3577 for(p=0;p<where.size();p++){
3578 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
3579 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3580 fta_forbidden = true;
3582 pr_root = dup_pr(where[p]->pr, NULL);
3583 expand_gbvars_pr(pr_root, gb_tbl);
3584 fta_forbidden = false;
3586 cnf_elem *cnf_root = new cnf_elem(pr_root);
3587 analyze_cnf(cnf_root);
3590 stream_node->where.push_back(cnf_root);
3592 fta_node->where.push_back(cnf_root);
3597 // Process the Select clause, rehome it on the
3599 for(s=0;s<select_list.size();s++){
3600 bool fta_forbidden = false;
3601 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3602 stream_node->select_list.push_back(
3603 new select_element(root_se, select_list[s]->name));
3607 // Process the Having clause
3609 // All of the predicates in the having clause must
3610 // execute in the stream node.
3612 for(p=0;p<having.size();p++){
3613 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3614 cnf_elem *cnf_root = new cnf_elem(pr_root);
3615 analyze_cnf(cnf_root);
3617 stream_node->having.push_back(cnf_root);
3619 // Same for closing when
3620 for(p=0;p<closing_when.size();p++){
3621 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3622 cnf_elem *cnf_root = new cnf_elem(pr_root);
3623 analyze_cnf(cnf_root);
3625 stream_node->closing_when.push_back(cnf_root);
3629 // Handle parameters and a few last details.
3630 vector<string> param_names = param_tbl->get_param_names();
3632 for(pi=0;pi<param_names.size();pi++){
3633 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3634 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3635 param_tbl->handle_access(param_names[pi]));
3636 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3637 param_tbl->handle_access(param_names[pi]));
3640 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3641 stream_node->definitions = definitions;
3643 // Now split by interfaces YYYY
3644 if(ifaces.size() > 1){
3645 for(si=0;si<ifaces.size();++si){
3646 spx_qpn *subq_node = new spx_qpn();
3648 // Name the subquery
3649 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3651 subq_node->set_node_name( new_name) ;
3652 sel_names.push_back(subq_node->get_node_name());
3655 subq_node->table_name = fta_node->table_name->duplicate();
3656 subq_node->table_name->set_machine(ifaces[si].first);
3657 subq_node->table_name->set_interface(ifaces[si].second);
3658 subq_node->table_name->set_ifq(false);
3660 for(s=0;s<fta_node->select_list.size();s++){
3661 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3663 for(p=0;p<fta_node->where.size();p++){
3664 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3665 cnf_elem *new_cnf = new cnf_elem(new_pr);
3666 analyze_cnf(new_cnf);
3668 subq_node->where.push_back(new_cnf);
3670 // Xfer all of the parameters.
3671 // Use existing handle annotations.
3672 vector<string> param_names = param_tbl->get_param_names();
3674 for(pi=0;pi<param_names.size();pi++){
3675 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3676 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3677 param_tbl->handle_access(param_names[pi]));
3679 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3680 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3681 this->error_code = 3;
3685 ret_vec.push_back(subq_node);
3688 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3689 fta_node->node_name, sel_names, ifaces, ifdb);
3691 Do not split sources until we are done with optimizations
3692 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3693 for(i=0;i<split_merge.size();++i){
3694 ret_vec.push_back(split_merge[i]);
3697 ret_vec.push_back(mrg_node);
3698 ret_vec.push_back(stream_node);
3699 hfta_returned = 1/*split_merge.size()*/+1;
3702 fta_node->table_name->set_machine(ifaces[0].first);
3703 fta_node->table_name->set_interface(ifaces[0].second);
3704 fta_node->table_name->set_ifq(false);
3705 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3706 this->error_code = 3;
3709 ret_vec.push_back(fta_node);
3710 ret_vec.push_back(stream_node);
3720 Splitting an aggregation operator
3722 An aggregation operator can reference
3723 literals, parameters, colrefs, group-by vars, aggregates,
3724 operators, functions
3726 an aggregation contains
3727 A selection list of SEs
3728 A where list of predicates
3729 A list group-by variable definition
3730 A list of aggregates to be computed
3731 A HAVING list of predicates.
3733 Aggregation involves two phases:
3734 1) given an input tuple, determine if it satisfies all of
3735 the WHERE predicates. If so, compute the group.
3736 Look up the group, update its aggregates.
3737 2) given a closed group and its aggregates, determine
3738 if these values satisfy all of the HAVING predicates.
3739 If so, evaluate the SEs on the selection list from the
3740 group and its aggregates.
3741 The two-phase nature of aggregation places restrictions on
3742 what can be referenced by different components of the operator
3743 (in addition to functions and operators).
3744 - group-by variables : literals, parameters, colrefs
3745 - WHERE predicates : group-by vars, literals, params, colrefs
3746 - HAVING predicates : group-by vars, literals, params, aggregates
3747 - Selection list SEs : group-by vars, literals, params, aggregates
3749 Splitting an aggregation operator into an LFTA/HFTA part
3750 involves performing partial aggregation at the LFTA and
3751 completing the aggregation at the HFTA.
3752 - given a tuple, the LFTA part evaluates the WHERE clause,
3753 and if it is satisfied, computes the group. lookup the group
3754 and update the aggregates. output the group and its partial
3756 - Given a partial aggregate from the LFTA, look up the group and
3757 update its aggregates. When the group is closed, evalute
3758 the HAVING clause and the SEs on the selection list.
3759 THEREFORE the selection list of the LFTA must consist of the
3760 group-by variables and the set of (bare) subaggregate values
3761 necessary to compute the super aggregates.
3762 Unlike the case with the SPX operator, the SE splitting point
3763 is at the GBvar and the aggregate value level.
3766 For each group-by variable
3767 Put the GB variable definition in the LFTA GBVAR list.
3768 Put the GBVAR in the LFTA selection list (as an SE).
3769 Put a reference to that GBVAR in the HFTA GBVAR list.
3771 Split the aggregate into a superaggregate and a subaggregate.
3772 The SE of the superaggregate references the subaggregate value.
3773 (this will need modifications for MF aggregation)
3774 For each SE in the selection list, HAVING predicate
3775 Make GBVAR references point to the new GBVAR
3776 make the aggregate value references point to the new aggregates.
3778 SEs are not so much split as their ref's are changed.
3780 TODO: insert tablevar names into the colrefs.
3785 vector<qp_node *> sgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3789 vector<qp_node *> ret_vec;
3790 int s, p, g, a, o, i;
3793 vector<string> fta_flds, stream_flds;
3795 // If the node reads from a stream, don't split.
3796 // int t = Schema->get_table_ref(table_name->get_schema_name());
3797 int t = table_name->get_schema_ref();
3798 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3799 ret_vec.push_back(this);
3803 // Get the set of interfaces it accesses.
3805 vector<string> sel_names;
3806 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3807 if (ifaces.empty()) {
3808 fprintf(stderr,"INTERNAL ERROR in sgah_qpn::split_node_for_fta - empty interface set\n");
3814 //////////////////////////////////////////////
3815 // Is this LFTA-only?
3816 if(definitions.count("lfta_aggregation")>0){
3817 // Yes. Ensure that everything is lfta-safe.
3819 // Check only one interface is accessed.
3820 if(ifaces.size()>1){
3821 this->err_str = "ERROR, group-by query "+node_name+" is lfta-only, but it accesses more than one interface:\n";
3822 for(si=0;si<ifaces.size();++si)
3823 this->err_str += "\t"+ifaces[si].first+"."+ifaces[si].second+"\n";
3824 this->error_code = 2;
3828 // Check the group-by attributes
3829 for(g=0;g<gb_tbl.size();g++){
3830 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3831 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition and the query is lfta-only (%s).\n",
3832 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3834 this->error_code = 1;
3835 this->err_str = tmpstr;
3840 // Verify that the SEs in the aggregate definitions are fta-safe
3841 for(a=0;a<aggr_tbl.size();++a){
3842 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3843 if(ase != NULL){ // COUNT(*) does not have a SE.
3844 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3845 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe scalar expression and the query is lfta-only (%s).\n",
3846 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3848 this->error_code = 1;
3849 this->err_str = tmpstr;
3853 if(! aggr_tbl.fta_legal(a,Ext_fcns)){
3854 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3855 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe aggregate and the query is lfta-only (%s).\n",
3856 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3858 this->error_code = 1;
3859 this->err_str = tmpstr;
3865 // Ensure that all the aggregates are fta-safe ....
3869 for(s=0;s<select_list.size();s++){
3870 if(! check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns)){
3871 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
3872 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3874 this->error_code = 1;
3875 this->err_str = tmpstr;
3882 for(p=0;p<where.size();p++){
3883 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3884 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
3885 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3887 this->error_code = 1;
3888 this->err_str = tmpstr;
3895 if(having.size()>0){
3896 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : the query is lfta-only, so it can't have a HAVING clause.(%s).\n",
3897 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3899 this->error_code = 1;
3900 this->err_str = tmpstr;
3903 // The query is lfta safe, return it.
3906 ret_vec.push_back(this);
3910 //////////////////////////////////////////////////////////////
3911 /// Split into lfta, hfta.
3913 // A sgah node must always be split,
3914 // if for no other reason than to complete the
3915 // partial aggregation.
3917 // First, determine if the query can be spit into aggr/aggr,
3918 // or if it must be selection/aggr.
3919 // Splitting into selection/aggr is allowed only
3920 // if select_lfta is set.
3923 bool select_allowed = definitions.count("select_lfta")>0;
3924 bool select_rqd = false;
3926 set<int> unsafe_gbvars; // for processing where clause
3927 for(g=0;g<gb_tbl.size();g++){
3928 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3929 if(!select_allowed){
3930 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3931 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3933 this->error_code = 1;
3934 this->err_str = tmpstr;
3938 unsafe_gbvars.insert(g);
3943 // Verify that the SEs in the aggregate definitions are fta-safe
3944 for(a=0;a<aggr_tbl.size();++a){
3945 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3946 if(ase != NULL){ // COUNT(*) does not have a SE.
3947 if(!select_allowed){
3948 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3949 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3950 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3952 this->error_code = 1;
3953 this->err_str = tmpstr;
3962 // Verify that all of the ref'd UDAFs can be split.
3964 for(a=0;a<aggr_tbl.size();++a){
3965 if(! aggr_tbl.is_builtin(a)){
3966 int afcn = aggr_tbl.get_fcn_id(a);
3967 int super_id = Ext_fcns->get_superaggr_id(afcn);
3968 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3969 if(super_id < 0 || sub_id < 0){
3970 if(!select_allowed){
3971 this->err_str += "ERROR in sgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3972 this->error_code = 1;
3981 for(p=0;p<where.size();p++){
3982 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3983 if(!select_allowed){
3984 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3985 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3987 this->error_code = 1;
3988 this->err_str = tmpstr;
3999 /////////////////////////////////////////////////////
4000 // Split into aggr/aggr.
4006 sgah_qpn *fta_node = new sgah_qpn();
4007 fta_node->table_name = table_name;
4008 fta_node->set_node_name( "_fta_"+node_name );
4009 fta_node->table_name->set_range_var(table_name->get_var_name());
4012 sgah_qpn *stream_node = new sgah_qpn();
4013 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4014 stream_node->set_node_name( node_name );
4015 stream_node->table_name->set_range_var(table_name->get_var_name());
4017 // allowed stream disorder. Default is 2,
4018 // can override with max_lfta_disorder setting.
4019 // Also limit the hfta disorder, set to lfta disorder + 1.
4020 // can override with max_hfta_disorder.
4022 fta_node->lfta_disorder = 2;
4023 if(this->get_val_of_def("max_lfta_disorder") != ""){
4024 int d = atoi(this->get_val_of_def("max_lfta_disorder").c_str() );
4026 fprintf(stderr,"Warning, max_lfta_disorder in node %s is %d, must be at least 1, ignoring.\n",node_name.c_str(), d);
4028 fta_node->lfta_disorder = d;
4029 printf("node %s setting lfta_disorder = %d\n",node_name.c_str(),fta_node->lfta_disorder);
4032 if(fta_node->lfta_disorder > 1)
4033 stream_node->hfta_disorder = fta_node->lfta_disorder + 1;
4035 stream_node->hfta_disorder = 1;
4037 if(this->get_val_of_def("max_hfta_disorder") != ""){
4038 int d = atoi(this->get_val_of_def("max_hfta_disorder").c_str() );
4039 if(d<fta_node->lfta_disorder){
4040 fprintf(stderr,"Warning, max_hfta_disorder in node %s is %d, must be at least the max lfta disorder %d, ignoring.\n",node_name.c_str(), d,fta_node->lfta_disorder);
4042 fta_node->lfta_disorder = d;
4044 if(fta_node->lfta_disorder < fta_node->hfta_disorder){
4045 fta_node->hfta_disorder = fta_node->lfta_disorder + 1;
4049 // First, process the group-by variables.
4050 // The fta must supply the values of all the gbvars.
4051 // If a gb is computed, the computation must be
4052 // performed at the FTA, so the SE must be FTA-safe.
4053 // Nice side effect : the gbvar table contains
4054 // matching entries for the original query, the lfta query,
4055 // and the hfta query. So gbrefs in the new queries are set
4056 // correctly just by inheriting the gbrefs from the old query.
4057 // If this property changed, I'll need translation tables.
4060 for(g=0;g<gb_tbl.size();g++){
4061 // Insert the gbvar into the lfta.
4062 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
4063 fta_node->gb_tbl.add_gb_var(
4064 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
4067 // Insert a ref to the value of the gbvar into the lfta select list.
4068 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
4069 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
4070 gbvar_fta->set_gb_ref(g);
4071 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
4072 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
4074 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
4075 gbvar_stream->set_gb_ref(-1); // used as GBvar def
4076 stream_node->gb_tbl.add_gb_var(
4077 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
4080 // multiple aggregation patterns, if any, go with the hfta
4081 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4083 // SEs in the aggregate definitions.
4084 // They are all safe, so split them up for later processing.
4085 map<int, scalarexp_t *> hfta_aggr_se;
4086 for(a=0;a<aggr_tbl.size();++a){
4087 split_fta_aggr( &(aggr_tbl), a,
4088 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
4089 fta_node->select_list,
4097 for(ii=0;ii<fta_flds.size() || ii < fta_node->select_list.size();++ii){
4098 if(ii<fta_flds.size())
4099 printf("\t%s : ",fta_flds[ii].c_str());
4102 if(ii<fta_node->select_list.size())
4103 printf("%s\n",fta_node->select_list[ii]->to_string().c_str());
4107 printf("hfta aggregates are:");
4108 for(ii=0;ii<stream_node->aggr_tbl.size();++ii){
4109 printf(" %s",stream_node->aggr_tbl.get_op(ii).c_str());
4111 printf("\nlfta aggregates are:");
4112 for(ii=0;ii<fta_node->aggr_tbl.size();++ii){
4113 printf(" %s",fta_node->aggr_tbl.get_op(ii).c_str());
4121 // Next, the select list.
4123 for(s=0;s<select_list.size();s++){
4124 bool fta_forbidden = false;
4125 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4126 stream_node->select_list.push_back(
4127 new select_element(root_se, select_list[s]->name));
4132 // All the predicates in the where clause must execute
4135 for(p=0;p<where.size();p++){
4136 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
4137 cnf_elem *new_cnf = new cnf_elem(new_pr);
4138 analyze_cnf(new_cnf);
4140 fta_node->where.push_back(new_cnf);
4143 // All of the predicates in the having clause must
4144 // execute in the stream node.
4146 for(p=0;p<having.size();p++){
4147 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4148 cnf_elem *cnf_root = new cnf_elem(pr_root);
4149 analyze_cnf(cnf_root);
4151 stream_node->having.push_back(cnf_root);
4155 // Divide the parameters among the stream, FTA.
4156 // Currently : assume that the stream receives all parameters
4157 // and parameter updates, incorporates them, then passes
4158 // all of the parameters to the FTA.
4159 // This will need to change (tables, fta-unsafe types. etc.)
4161 // I will pass on the use_handle_access marking, even
4162 // though the fcn call that requires handle access might
4163 // exist in only one of the parts of the query.
4164 // Parameter manipulation and handle access determination will
4165 // need to be revisited anyway.
4166 vector<string> param_names = param_tbl->get_param_names();
4168 for(pi=0;pi<param_names.size();pi++){
4169 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4170 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4171 param_tbl->handle_access(param_names[pi]));
4172 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4173 param_tbl->handle_access(param_names[pi]));
4175 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4176 stream_node->definitions = definitions;
4178 // Now split by interfaces XXXX
4179 if(ifaces.size() > 1){
4180 for(si=0;si<ifaces.size();++si){
4181 sgah_qpn *subq_node = new sgah_qpn();
4183 // Name the subquery
4184 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4186 subq_node->set_node_name( new_name) ;
4187 sel_names.push_back(subq_node->get_node_name());
4190 subq_node->table_name = fta_node->table_name->duplicate();
4191 subq_node->table_name->set_machine(ifaces[si].first);
4192 subq_node->table_name->set_interface(ifaces[si].second);
4193 subq_node->table_name->set_ifq(false);
4196 for(g=0;g<fta_node->gb_tbl.size();g++){
4197 // Insert the gbvar into the lfta.
4198 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
4199 subq_node->gb_tbl.add_gb_var(
4200 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
4204 // Insert the aggregates
4205 for(a=0;a<fta_node->aggr_tbl.size();++a){
4206 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
4209 for(s=0;s<fta_node->select_list.size();s++){
4210 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4212 for(p=0;p<fta_node->where.size();p++){
4213 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4214 cnf_elem *new_cnf = new cnf_elem(new_pr);
4215 analyze_cnf(new_cnf);
4217 subq_node->where.push_back(new_cnf);
4219 for(p=0;p<fta_node->having.size();p++){
4220 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
4221 cnf_elem *new_cnf = new cnf_elem(new_pr);
4222 analyze_cnf(new_cnf);
4224 subq_node->having.push_back(new_cnf);
4226 // Xfer all of the parameters.
4227 // Use existing handle annotations.
4228 vector<string> param_names = param_tbl->get_param_names();
4230 for(pi=0;pi<param_names.size();pi++){
4231 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4232 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4233 param_tbl->handle_access(param_names[pi]));
4235 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4236 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4237 this->error_code = 3;
4242 subq_node->lfta_disorder = fta_node->lfta_disorder;
4244 ret_vec.push_back(subq_node);
4247 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
4248 fta_node->node_name, sel_names, ifaces, ifdb);
4249 mrg_node->set_disorder(fta_node->lfta_disorder);
4252 Do not split sources until we are done with optimizations
4253 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4254 for(i=0;i<split_merge.size();++i){
4255 ret_vec.push_back(split_merge[i]);
4258 ret_vec.push_back(mrg_node);
4259 ret_vec.push_back(stream_node);
4260 hfta_returned = 1/*split_merge.size()*/+1;
4263 fta_node->table_name->set_machine(ifaces[0].first);
4264 fta_node->table_name->set_interface(ifaces[0].second);
4265 fta_node->table_name->set_ifq(false);
4266 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4267 this->error_code = 3;
4270 ret_vec.push_back(fta_node);
4271 ret_vec.push_back(stream_node);
4276 // ret_vec.push_back(fta_node);
4277 // ret_vec.push_back(stream_node);
4284 /////////////////////////////////////////////////////////////////////
4285 /// Split into selection LFTA, aggregation HFTA.
4287 spx_qpn *fta_node = new spx_qpn();
4288 fta_node->table_name = table_name;
4289 fta_node->set_node_name( "_fta_"+node_name );
4290 fta_node->table_name->set_range_var(table_name->get_var_name());
4293 sgah_qpn *stream_node = new sgah_qpn();
4294 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4295 stream_node->set_node_name( node_name );
4296 stream_node->table_name->set_range_var(table_name->get_var_name());
4299 vector< vector<select_element *> *> select_vec;
4300 select_vec.push_back(&(fta_node->select_list)); // only one child
4302 // Process the gbvars. Split their defining SEs.
4303 for(g=0;g<gb_tbl.size();g++){
4304 bool fta_forbidden = false;
4305 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4307 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
4308 fta_forbidden, se_src, select_vec, Ext_fcns
4310 // if(fta_forbidden) (
4311 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4312 stream_node->gb_tbl.add_gb_var(
4313 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
4316 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
4317 stream_node->gb_tbl.add_gb_var(
4318 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
4322 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4324 // Process the aggregate table.
4325 // Copy to stream, split the SEs.
4326 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
4327 for(a=0;a<aggr_tbl.size();++a){
4329 if(aggr_tbl.is_builtin(a)){
4330 if(aggr_tbl.is_star_aggr(a)){
4331 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
4332 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
4334 bool fta_forbidden = false;
4335 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4337 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4338 fta_forbidden, se_src, select_vec, Ext_fcns
4340 // if(fta_forbidden) (
4341 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4342 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
4343 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
4345 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4346 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
4347 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
4350 hse->set_data_type(aggr_tbl.get_data_type(a));
4351 hse->set_aggr_id(a);
4352 hfta_aggr_se[a]=hse;
4354 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
4355 vector<scalarexp_t *> new_opl;
4356 for(o=0;o<opl.size();++o){
4357 bool fta_forbidden = false;
4358 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4359 scalarexp_t *agg_se = split_ftavec_se( opl[o],
4360 fta_forbidden, se_src, select_vec, Ext_fcns
4362 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4363 // fta_forbidden, se_src, select_vec, Ext_fcns
4365 // if(fta_forbidden) (
4366 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4367 new_opl.push_back(agg_se);
4369 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4370 new_opl.push_back(new_se);
4373 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
4374 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
4375 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
4376 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
4377 hse->set_aggr_id(a);
4378 hfta_aggr_se[a]=hse;
4383 // Process the WHERE clause.
4384 // If it is fta-safe AND it refs only fta-safe gbvars,
4385 // then expand the gbvars and put it into the lfta.
4386 // Else, split it into an hfta predicate ref'ing
4387 // se's computed partially in the lfta.
4389 predicate_t *pr_root;
4391 for(p=0;p<where.size();p++){
4392 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
4393 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
4394 fta_forbidden = true;
4396 pr_root = dup_pr(where[p]->pr, NULL);
4397 expand_gbvars_pr(pr_root, gb_tbl);
4398 fta_forbidden = false;
4400 cnf_elem *cnf_root = new cnf_elem(pr_root);
4401 analyze_cnf(cnf_root);
4404 stream_node->where.push_back(cnf_root);
4406 fta_node->where.push_back(cnf_root);
4411 // Process the Select clause, rehome it on the
4413 for(s=0;s<select_list.size();s++){
4414 bool fta_forbidden = false;
4415 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4416 stream_node->select_list.push_back(
4417 new select_element(root_se, select_list[s]->name));
4421 // Process the Having clause
4423 // All of the predicates in the having clause must
4424 // execute in the stream node.
4426 for(p=0;p<having.size();p++){
4427 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4428 cnf_elem *cnf_root = new cnf_elem(pr_root);
4429 analyze_cnf(cnf_root);
4431 stream_node->having.push_back(cnf_root);
4434 // Handle parameters and a few last details.
4435 vector<string> param_names = param_tbl->get_param_names();
4437 for(pi=0;pi<param_names.size();pi++){
4438 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4439 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4440 param_tbl->handle_access(param_names[pi]));
4441 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4442 param_tbl->handle_access(param_names[pi]));
4445 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4446 stream_node->definitions = definitions;
4448 // Now split by interfaces YYYY
4449 if(ifaces.size() > 1){
4450 for(si=0;si<ifaces.size();++si){
4451 spx_qpn *subq_node = new spx_qpn();
4453 // Name the subquery
4454 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4456 subq_node->set_node_name( new_name) ;
4457 sel_names.push_back(subq_node->get_node_name());
4460 subq_node->table_name = fta_node->table_name->duplicate();
4461 subq_node->table_name->set_machine(ifaces[si].first);
4462 subq_node->table_name->set_interface(ifaces[si].second);
4463 subq_node->table_name->set_ifq(false);
4465 for(s=0;s<fta_node->select_list.size();s++){
4466 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4468 for(p=0;p<fta_node->where.size();p++){
4469 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4470 cnf_elem *new_cnf = new cnf_elem(new_pr);
4471 analyze_cnf(new_cnf);
4473 subq_node->where.push_back(new_cnf);
4475 // Xfer all of the parameters.
4476 // Use existing handle annotations.
4477 vector<string> param_names = param_tbl->get_param_names();
4479 for(pi=0;pi<param_names.size();pi++){
4480 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4481 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4482 param_tbl->handle_access(param_names[pi]));
4484 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4485 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4486 this->error_code = 3;
4490 ret_vec.push_back(subq_node);
4493 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4494 fta_node->node_name, sel_names, ifaces, ifdb);
4496 Do not split sources until we are done with optimizations
4497 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4498 for(i=0;i<split_merge.size();++i){
4499 ret_vec.push_back(split_merge[i]);
4502 ret_vec.push_back(mrg_node);
4503 ret_vec.push_back(stream_node);
4504 hfta_returned = 1/*split_merge.size()*/+1;
4507 fta_node->table_name->set_machine(ifaces[0].first);
4508 fta_node->table_name->set_interface(ifaces[0].second);
4509 fta_node->table_name->set_ifq(false);
4510 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4511 this->error_code = 3;
4514 ret_vec.push_back(fta_node);
4515 ret_vec.push_back(stream_node);
4520 // ret_vec.push_back(fta_node);
4521 // ret_vec.push_back(stream_node);
4530 SPLITTING A EQ-TEMPORAL, HASH JOIN OPERATOR
4532 An JOIN_EQ_HASH_QPN node may reference:
4533 literals, parameters, colrefs, functions, operators
4534 An JOIN_EQ_HASH_QPN node may not reference:
4535 group-by variables, aggregates
4537 An JOIN_EQ_HASH_QPN node contains
4538 selection list of SEs
4539 where list of CNF predicates, broken into:
4546 For each tablevar whose source is a PROTOCOL
4547 Create a LFTA for that tablevar
4548 Push as many prefilter[..] predicates to that tablevar as is
4550 Split the SEs in the select list, and the predicates not
4555 vector<qp_node *> join_eq_hash_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4557 vector<qp_node *> ret_vec;
4560 // If the node reads from streams only, don't split.
4561 bool stream_only = true;
4562 for(f=0;f<from.size();++f){
4563 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4564 int t = from[f]->get_schema_ref();
4565 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) stream_only = false;
4569 ret_vec.push_back(this);
4574 // The HFTA node, it is always returned.
4576 join_eq_hash_qpn *stream_node = new join_eq_hash_qpn();
4577 for(f=0;f<from.size();++f){
4578 // tablevar_t *tmp_tblvar = new tablevar_t( from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str());
4579 tablevar_t *tmp_tblvar = from[f]->duplicate();
4580 // tmp_tblvar->set_range_var(from[f]->get_var_name());
4582 stream_node->from.push_back(tmp_tblvar);
4584 stream_node->set_node_name(node_name);
4586 // Create spx (selection) children for each PROTOCOL source.
4587 vector<spx_qpn *> child_vec;
4588 vector< vector<select_element *> *> select_vec;
4589 for(f=0;f<from.size();++f){
4590 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4591 int t = from[f]->get_schema_ref();
4592 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA){
4593 spx_qpn *child_qpn = new spx_qpn();
4594 sprintf(tmpstr,"_fta_%d_%s",f,node_name.c_str());
4595 child_qpn->set_node_name(string(tmpstr));
4596 child_qpn->table_name = new tablevar_t(
4597 from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str(), from[f]->get_ifq());
4598 child_qpn->table_name->set_range_var(from[f]->get_var_name());
4600 child_vec.push_back(child_qpn);
4601 select_vec.push_back(&(child_qpn->select_list));
4603 // Update the stream's FROM clause to read from this child
4604 stream_node->from[f]->set_interface("");
4605 stream_node->from[f]->set_schema(tmpstr);
4607 child_vec.push_back(NULL);
4608 select_vec.push_back(NULL);
4612 // Push lfta-safe prefilter to the lfta
4613 // TODO: I'm not copying the preds, I dont *think* it will be a problem.
4614 predicate_t *pr_root;
4616 for(f=0;f<from.size();++f){
4617 vector<cnf_elem *> pred_vec = prefilter[f];
4618 if(child_vec[f] != NULL){
4619 for(p=0;p<pred_vec.size();++p){
4620 if(check_fta_forbidden_pr(pred_vec[p]->pr,NULL, Ext_fcns)){
4621 child_vec[f]->where.push_back(pred_vec[p]);
4623 pr_root = split_ftavec_pr(pred_vec[p]->pr,select_vec,Ext_fcns);
4624 cnf_elem *cnf_root = new cnf_elem(pr_root);
4625 analyze_cnf(cnf_root);
4626 stream_node->prefilter[f].push_back(cnf_root);
4630 for(p=0;p<pred_vec.size();++p){
4631 stream_node->prefilter[f].push_back(pred_vec[p]);
4637 // Process the other predicates
4638 for(p=0;p<temporal_eq.size();++p){
4639 pr_root = split_ftavec_pr(temporal_eq[p]->pr,select_vec,Ext_fcns);
4640 cnf_elem *cnf_root = new cnf_elem(pr_root);
4641 analyze_cnf(cnf_root);
4642 stream_node->temporal_eq.push_back(cnf_root);
4644 for(p=0;p<hash_eq.size();++p){
4645 pr_root = split_ftavec_pr(hash_eq[p]->pr,select_vec,Ext_fcns);
4646 cnf_elem *cnf_root = new cnf_elem(pr_root);
4647 analyze_cnf(cnf_root);
4648 stream_node->hash_eq.push_back(cnf_root);
4650 for(p=0;p<postfilter.size();++p){
4651 pr_root = split_ftavec_pr(postfilter[p]->pr,select_vec,Ext_fcns);
4652 cnf_elem *cnf_root = new cnf_elem(pr_root);
4653 analyze_cnf(cnf_root);
4654 stream_node->postfilter.push_back(cnf_root);
4658 for(s=0;s<select_list.size();s++){
4659 bool fta_forbidden = false;
4660 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4661 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
4662 fta_forbidden, se_src, select_vec, Ext_fcns
4664 if(fta_forbidden || !is_PROTOCOL_source(se_src, select_vec)){
4665 stream_node->select_list.push_back(
4666 new select_element(root_se, select_list[s]->name) );
4668 scalarexp_t *new_se=make_fta_se_ref(select_vec,root_se,se_src);
4669 stream_node->select_list.push_back(
4670 new select_element(new_se, select_list[s]->name)
4676 // I need to "rehome" the colrefs -- make the annotations in the colrefs
4677 // agree with their tablevars.
4678 for(f=0;f<child_vec.size();++f){
4679 if(child_vec[f]!=NULL){
4680 vector<tablevar_t *> fm; fm.push_back(child_vec[f]->table_name);
4682 for(s=0;s<child_vec[f]->select_list.size();++s)
4683 bind_colref_se(child_vec[f]->select_list[s]->se, fm,0,0);
4684 for(p=0;p<child_vec[f]->where.size();++p)
4685 // bind_colref_pr(child_vec[f]->where[p]->pr, fm,f,0);
4686 bind_colref_pr(child_vec[f]->where[p]->pr, fm,0,0);
4690 // rehome the colrefs in the hfta node.
4691 for(f=0;f<stream_node->from.size();++f){
4692 stream_node->where.clear();
4693 for(s=0;s<stream_node->from.size();++s){
4694 for(p=0;p<stream_node->prefilter[s].size();++p){
4695 bind_colref_pr((stream_node->prefilter[s])[p]->pr,stream_node->from,f,f);
4698 for(p=0;p<stream_node->temporal_eq.size();++p){
4699 bind_colref_pr(stream_node->temporal_eq[p]->pr,stream_node->from,f,f);
4701 for(p=0;p<stream_node->hash_eq.size();++p){
4702 bind_colref_pr(stream_node->hash_eq[p]->pr,stream_node->from,f,f);
4704 for(p=0;p<stream_node->postfilter.size();++p){
4705 bind_colref_pr(stream_node->postfilter[p]->pr,stream_node->from,f,f);
4707 for(s=0;s<stream_node->select_list.size();++s){
4708 bind_colref_se(stream_node->select_list[s]->se,stream_node->from,f,f);
4712 // Rebuild the WHERE clause
4713 stream_node->where.clear();
4714 for(s=0;s<stream_node->from.size();++s){
4715 for(p=0;p<stream_node->prefilter[s].size();++p){
4716 stream_node->where.push_back((stream_node->prefilter[s])[p]);
4719 for(p=0;p<stream_node->temporal_eq.size();++p){
4720 stream_node->where.push_back(stream_node->temporal_eq[p]);
4722 for(p=0;p<stream_node->hash_eq.size();++p){
4723 stream_node->where.push_back(stream_node->hash_eq[p]);
4725 for(p=0;p<stream_node->postfilter.size();++p){
4726 stream_node->where.push_back(stream_node->postfilter[p]);
4730 // Build the return list
4731 vector<qp_node *> hfta_nodes;
4733 for(f=0;f<from.size();++f){
4734 if(child_vec[f] != NULL){
4735 spx_qpn *c_node = child_vec[f];
4736 vector<pair<string, string> > ifaces = get_ifaces(c_node->table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
4737 if (ifaces.empty()) {
4738 fprintf(stderr,"INTERNAL ERROR in join_eq_hash_qpn::split_node_for_fta - empty interface set\n");
4742 if(ifaces.size() == 1){
4743 c_node->table_name->set_machine(ifaces[0].first);
4744 c_node->table_name->set_interface(ifaces[0].second);
4745 c_node->table_name->set_ifq(false);
4746 if(c_node->resolve_if_params(ifdb, this->err_str)){
4747 this->error_code = 3;
4750 ret_vec.push_back(c_node);
4752 vector<string> sel_names;
4754 for(si=0;si<ifaces.size();++si){
4755 spx_qpn *subq_node = new spx_qpn();
4757 // Name the subquery
4758 string new_name = "_"+c_node->node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4760 subq_node->set_node_name( new_name) ;
4761 sel_names.push_back(subq_node->get_node_name());
4764 subq_node->table_name = c_node->table_name->duplicate();
4765 subq_node->table_name->set_machine(ifaces[si].first);
4766 subq_node->table_name->set_interface(ifaces[si].second);
4767 subq_node->table_name->set_ifq(false);
4769 for(s=0;s<c_node->select_list.size();s++){
4770 subq_node->select_list.push_back(dup_select(c_node->select_list[s], NULL));
4772 for(p=0;p<c_node->where.size();p++){
4773 predicate_t *new_pr = dup_pr(c_node->where[p]->pr, NULL);
4774 cnf_elem *new_cnf = new cnf_elem(new_pr);
4775 analyze_cnf(new_cnf);
4777 printf("table name is %s\n",subq_node->table_name->to_string().c_str());
4778 subq_node->where.push_back(new_cnf);
4780 // Xfer all of the parameters.
4781 // Use existing handle annotations.
4782 // vector<string> param_names = param_tbl->get_param_names();
4784 // for(pi=0;pi<param_names.size();pi++){
4785 // data_type *dt = param_tbl->get_data_type(param_names[pi]);
4786 // subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4787 // param_tbl->handle_access(param_names[pi]));
4789 // subq_node->definitions = definitions;
4791 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4792 this->error_code = 3;
4796 ret_vec.push_back(subq_node);
4798 int lpos = ret_vec.size()-1 ;
4799 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[lpos]),c_node->node_name,sel_names, ifaces, ifdb);
4801 Do not split sources until we are done with optimizations
4802 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4804 for(i=0;i<split_merge.size();++i){
4805 hfta_nodes.push_back(split_merge[i]);
4808 hfta_nodes.push_back(mrg_node);
4813 for(i=0;i<hfta_nodes.size();++i) ret_vec.push_back(hfta_nodes[i]);
4814 ret_vec.push_back(stream_node);
4815 hfta_returned = hfta_nodes.size()+1;
4817 // Currently : assume that the stream receives all parameters
4818 // and parameter updates, incorporates them, then passes
4819 // all of the parameters to the FTA.
4820 // This will need to change (tables, fta-unsafe types. etc.)
4822 // I will pass on the use_handle_access marking, even
4823 // though the fcn call that requires handle access might
4824 // exist in only one of the parts of the query.
4825 // Parameter manipulation and handle access determination will
4826 // need to be revisited anyway.
4827 vector<string> param_names = param_tbl->get_param_names();
4829 for(pi=0;pi<param_names.size();pi++){
4831 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4832 for(ri=0;ri<ret_vec.size();++ri){
4833 ret_vec[ri]->param_tbl->add_param(param_names[pi],dt->duplicate(),
4834 param_tbl->handle_access(param_names[pi]));
4835 ret_vec[ri]->definitions = definitions; ret_vec[ri]->definitions.erase("_referenced_ifaces");
4846 /////////////////////////////////////////////////////////////
4849 // Common processing
4850 int process_opview(tablevar_t *fmtbl, int pos, string node_name,
4852 vector<query_node *> &qnodes,
4853 opview_set &opviews,
4854 vector<table_exp_t *> &ret, string rootnm, string silo_nm){
4858 int schref = fmtbl->get_schema_ref();
4862 if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){
4863 opview_entry *opv = new opview_entry();
4864 opv->parent_qname = node_name;
4865 opv->root_name = rootnm;
4866 opv->view_name = fmtbl->get_schema_name();
4868 sprintf(tmpstr,"%s_UDOP%d_%s",node_name.c_str(),pos,opv->view_name.c_str());
4869 opv->udop_alias = tmpstr;
4870 fmtbl->set_udop_alias(opv->udop_alias);
4872 opv->exec_fl = Schema->get_op_prop(schref, string("file"));
4873 opv->liveness_timeout = atoi(Schema->get_op_prop(schref, string("liveness_timeout")).c_str());
4875 vector<subquery_spec *> subq = Schema->get_subqueryspecs(schref);
4876 for(s=0;s<subq.size();++s){
4877 // Validate that the fields match.
4878 subquery_spec *sqs = subq[s];
4879 vector<field_entry *> flds = Schema->get_fields(sqs->name+silo_nm);
4880 if(flds.size() == 0){
4881 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in Schema.\n",sqs->name.c_str(), opv->view_name.c_str());
4884 if(flds.size() < sqs->types.size()){
4885 fprintf(stderr,"ERROR: subquery %s of view %s does not have enough fields (%lu found, %lu expected).\n",sqs->name.c_str(), opv->view_name.c_str(),flds.size(), sqs->types.size());
4888 bool failed = false;
4889 for(f=0;f<sqs->types.size();++f){
4890 data_type dte(sqs->types[f],sqs->modifiers[f]);
4891 data_type dtf(flds[f]->get_type(),flds[f]->get_modifier_list());
4892 if(! dte.subsumes_type(&dtf) ){
4893 fprintf(stderr,"ERROR: subquery %s of view %s does not have the correct type for field %d (%s found, %s expected).\n",sqs->name.c_str(), opv->view_name.c_str(),f,dtf.to_string().c_str(), dte.to_string().c_str());
4897 if(dte.is_temporal() && (dte.get_temporal() != dtf.get_temporal()) ){
4898 string pstr = dte.get_temporal_string();
4899 fprintf(stderr,"ERROR: subquery %s of view %s does not have the expected temporal value %s of field %d.\n",sqs->name.c_str(), opv->view_name.c_str(),pstr.c_str(),f);
4906 /// Validation done, find the subquery, make a copy of the
4907 /// parse tree, and add it to the return list.
4908 for(q=0;q<qnodes.size();++q)
4909 if(qnodes[q]->name == sqs->name)
4911 if(q==qnodes.size()){
4912 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in list of query names.\n",sqs->name.c_str(), opv->view_name.c_str());
4916 table_exp_t *newq = dup_table_exp(qnodes[q]->parse_tree);
4917 sprintf(tmpstr,"%s_OP%d_%s_SUBQ%d",node_name.c_str(),pos,opv->view_name.c_str(),s);
4918 string newq_name = tmpstr;
4919 newq->nmap["query_name"] = newq_name;
4920 ret.push_back(newq);
4921 opv->subq_names.push_back(newq_name);
4923 fmtbl->set_opview_idx(opviews.append(opv));
4929 vector<table_exp_t *> spx_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4930 vector<table_exp_t *> ret;
4932 int retval = process_opview(table_name,0,node_name,
4933 Schema,qnodes,opviews,ret, rootnm, silo_name);
4939 vector<table_exp_t *> sgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4940 vector<table_exp_t *> ret;
4942 int retval = process_opview(table_name,0,node_name,
4943 Schema,qnodes,opviews,ret, rootnm, silo_name);
4948 vector<table_exp_t *> rsgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4949 vector<table_exp_t *> ret;
4951 int retval = process_opview(table_name,0,node_name,
4952 Schema,qnodes,opviews,ret, rootnm, silo_name);
4958 vector<table_exp_t *> sgahcwcb_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4959 vector<table_exp_t *> ret;
4961 int retval = process_opview(table_name,0,node_name,
4962 Schema,qnodes,opviews,ret, rootnm, silo_name);
4969 vector<table_exp_t *> mrg_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4970 vector<table_exp_t *> ret;
4972 for(f=0;f<fm.size();++f){
4973 int retval = process_opview(fm[f],f,node_name,
4974 Schema,qnodes,opviews,ret, rootnm, silo_name);
4983 vector<table_exp_t *> join_eq_hash_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4984 vector<table_exp_t *> ret;
4986 for(f=0;f<from.size();++f){
4987 int retval = process_opview(from[f],f,node_name,
4988 Schema,qnodes,opviews,ret, rootnm, silo_name);
4994 vector<table_exp_t *> filter_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4995 vector<table_exp_t *> ret;
4997 for(f=0;f<from.size();++f){
4998 int retval = process_opview(from[f],f,node_name,
4999 Schema,qnodes,opviews,ret, rootnm, silo_name);
5007 //////////////////////////////////////////////////////////////////
5008 //////////////////////////////////////////////////////////////////
5009 /////// Additional methods
5013 //////////////////////////////////////////////////////////////////
5014 // Get schema of operator output
5016 table_def *mrg_qpn::get_fields(){
5017 return(table_layout);
5021 table_def *spx_qpn::get_fields(){
5022 return(create_attributes(node_name, select_list));
5025 table_def *sgah_qpn::get_fields(){
5026 return(create_attributes(node_name, select_list));
5029 table_def *rsgah_qpn::get_fields(){
5030 return(create_attributes(node_name, select_list));
5033 table_def *sgahcwcb_qpn::get_fields(){
5034 return(create_attributes(node_name, select_list));
5037 table_def *filter_join_qpn::get_fields(){
5038 return(create_attributes(node_name, select_list));
5042 table_def *join_eq_hash_qpn::get_fields(){
5045 // First, gather temporal colrefs and SEs.
5046 map<col_id, temporal_type> temporal_cids;
5047 vector<scalarexp_t *> temporal_se;
5048 for(h=0;h<temporal_eq.size();++h){
5049 scalarexp_t *sel = temporal_eq[h]->pr->get_left_se();
5050 scalarexp_t *ser = temporal_eq[h]->pr->get_right_se();
5052 if(sel->get_operator_type() == SE_COLREF){
5053 col_id tcol(sel->get_colref());
5054 if(temporal_cids.count(tcol) == 0){
5055 temporal_cids[tcol] = sel->get_data_type()->get_temporal();
5058 temporal_se.push_back(sel);
5061 if(ser->get_operator_type() == SE_COLREF){
5062 col_id tcol(ser->get_colref());
5063 if(temporal_cids.count(tcol) == 0){
5064 temporal_cids[tcol] = ser->get_data_type()->get_temporal();
5067 temporal_se.push_back(ser);
5071 // Mark select elements as nontemporal, then deduce which
5072 // ones are temporal.
5073 for(s=0;s<select_list.size();++s){
5074 select_list[s]->se->get_data_type()->set_temporal(
5075 compute_se_temporal(select_list[s]->se, temporal_cids)
5077 // Second chance if it is an exact match to an SE.
5078 // for(s=0;s<select_list.size();++s){
5079 if(! select_list[s]->se->get_data_type()->is_temporal() ){
5080 for(t=0;t<temporal_se.size();++t){
5081 if(is_equivalent_se(temporal_se[t], select_list[s]->se)){
5082 select_list[s]->se->get_data_type()->set_temporal(
5083 temporal_se[t]->get_data_type()->get_temporal()
5091 // If there is an outer join, verify that
5092 // the temporal attributes are actually temporal.
5093 // NOTE: this code must be synchronized with the
5094 // equivalence finding in join_eq_hash_qpn::generate_functor
5095 // (and also, the join_eq_hash_qpn constructor)
5096 if(from[0]->get_property() || from[1]->get_property()){
5097 set<string> l_equiv, r_equiv;
5098 for(i=0;i<temporal_eq.size();i++){
5099 scalarexp_t *lse = temporal_eq[i]->pr->get_left_se();
5100 scalarexp_t *rse = temporal_eq[i]->pr->get_right_se();
5101 if(lse->get_operator_type()==SE_COLREF){
5102 l_equiv.insert(lse->get_colref()->get_field());
5104 if(rse->get_operator_type()==SE_COLREF){
5105 r_equiv.insert(rse->get_colref()->get_field());
5109 for(s=0;s<select_list.size();++s){
5110 if(select_list[s]->se->get_data_type()->is_temporal()){
5112 col_id_set::iterator ci;
5113 bool failed = false;
5114 gather_se_col_ids(select_list[s]->se,cid_set, NULL);
5115 for(ci=cid_set.begin();ci!=cid_set.end();++ci){
5116 if((*ci).tblvar_ref == 0){
5117 if(from[0]->get_property()){
5118 if(l_equiv.count((*ci).field) == 0){
5123 if(from[1]->get_property()){
5124 if(r_equiv.count((*ci).field) == 0){
5131 select_list[s]->se->get_data_type()->reset_temporal();
5138 return create_attributes(node_name, select_list);
5143 //-----------------------------------------------------------------
5144 // get output tables
5147 // Get tablevar_t names of input and output tables
5149 // output_file_qpn::output_file_qpn(){source_op_name = ""; }
5150 vector<tablevar_t *> output_file_qpn::get_input_tbls(){
5154 vector<tablevar_t *> mrg_qpn::get_input_tbls(){
5158 vector<tablevar_t *> spx_qpn::get_input_tbls(){
5159 vector<tablevar_t *> retval(1,table_name);
5163 vector<tablevar_t *> sgah_qpn::get_input_tbls(){
5164 vector<tablevar_t *> retval(1,table_name);
5168 vector<tablevar_t *> rsgah_qpn::get_input_tbls(){
5169 vector<tablevar_t *> retval(1,table_name);
5173 vector<tablevar_t *> sgahcwcb_qpn::get_input_tbls(){
5174 vector<tablevar_t *> retval(1,table_name);
5178 vector<tablevar_t *> join_eq_hash_qpn::get_input_tbls(){
5182 vector<tablevar_t *> filter_join_qpn::get_input_tbls(){
5186 //-----------------------------------------------------------------
5187 // get output tables
5190 // This does not make sense, this fcn returns the output table *name*,
5191 // not its schema, and then there is another fcn to rturn the schema.
5192 vector<tablevar_t *> output_file_qpn::get_output_tbls(){
5193 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5197 vector<tablevar_t *> mrg_qpn::get_output_tbls(){
5198 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5202 vector<tablevar_t *> spx_qpn::get_output_tbls(){
5203 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5207 vector<tablevar_t *> sgah_qpn::get_output_tbls(){
5208 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5212 vector<tablevar_t *> rsgah_qpn::get_output_tbls(){
5213 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5217 vector<tablevar_t *> sgahcwcb_qpn::get_output_tbls(){
5218 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5222 vector<tablevar_t *> join_eq_hash_qpn::get_output_tbls(){
5223 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5227 vector<tablevar_t *> filter_join_qpn::get_output_tbls(){
5228 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5234 //-----------------------------------------------------------------
5237 // Associate colrefs with this schema.
5238 // Also, use this opportunity to create table_layout (the output schema).
5239 // If the output schema is ever needed before
5240 void mrg_qpn::bind_to_schema(table_list *Schema){
5242 for(t=0;t<fm.size();++t){
5243 int tblref = Schema->get_table_ref(fm[t]->get_schema_name());
5245 fm[t]->set_schema_ref(tblref );
5248 // Here I assume that the colrefs have been reorderd
5249 // during analysis so that mvars line up with fm.
5250 mvars[0]->set_schema_ref(fm[0]->get_schema_ref());
5251 mvars[1]->set_schema_ref(fm[1]->get_schema_ref());
5258 // Associate colrefs in SEs with this schema.
5259 void spx_qpn::bind_to_schema(table_list *Schema){
5260 // Bind the tablevars in the From clause to the Schema
5261 // (it might have changed from analysis time)
5262 int t = Schema->get_table_ref(table_name->get_schema_name() );
5264 table_name->set_schema_ref(t );
5266 // Get the "from" clause
5267 tablevar_list_t fm(table_name);
5269 // Bind all SEs to this schema
5271 for(p=0;p<where.size();++p){
5272 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5275 for(s=0;s<select_list.size();++s){
5276 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5279 // Collect set of tuples referenced in this HFTA
5280 // input, internal, or output.
5284 col_id_set spx_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5285 col_id_set retval, tmp_cset;
5287 for(p=0;p<where.size();++p){
5288 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5291 for(s=0;s<select_list.size();++s){
5292 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5294 col_id_set::iterator cisi;
5296 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5297 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5298 if(fe->get_unpack_fcns().size()>0)
5299 retval.insert((*cisi));
5307 col_id_set filter_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5308 col_id_set retval, tmp_cset;
5310 for(p=0;p<where.size();++p){
5311 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5314 for(s=0;s<select_list.size();++s){
5315 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5317 col_id_set::iterator cisi;
5319 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5320 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5321 if(fe->get_unpack_fcns().size()>0)
5322 retval.insert((*cisi));
5332 // Associate colrefs in SEs with this schema.
5333 void join_eq_hash_qpn::bind_to_schema(table_list *Schema){
5334 // Bind the tablevars in the From clause to the Schema
5335 // (it might have changed from analysis time)
5337 for(f=0;f<from.size();++f){
5338 string snm = from[f]->get_schema_name();
5339 int tbl_ref = Schema->get_table_ref(snm);
5341 from[f]->set_schema_ref(tbl_ref);
5344 // Bind all SEs to this schema
5345 tablevar_list_t fm(from);
5348 for(p=0;p<where.size();++p){
5349 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5352 for(s=0;s<select_list.size();++s){
5353 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5356 // Collect set of tuples referenced in this HFTA
5357 // input, internal, or output.
5361 void filter_join_qpn::bind_to_schema(table_list *Schema){
5362 // Bind the tablevars in the From clause to the Schema
5363 // (it might have changed from analysis time)
5365 for(f=0;f<from.size();++f){
5366 string snm = from[f]->get_schema_name();
5367 int tbl_ref = Schema->get_table_ref(snm);
5369 from[f]->set_schema_ref(tbl_ref);
5372 // Bind all SEs to this schema
5373 tablevar_list_t fm(from);
5376 for(p=0;p<where.size();++p){
5377 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5380 for(s=0;s<select_list.size();++s){
5381 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5384 // Collect set of tuples referenced in this HFTA
5385 // input, internal, or output.
5392 void sgah_qpn::bind_to_schema(table_list *Schema){
5393 // Bind the tablevars in the From clause to the Schema
5394 // (it might have changed from analysis time)
5397 int t = Schema->get_table_ref(table_name->get_schema_name() );
5399 table_name->set_schema_ref(t );
5401 // Get the "from" clause
5402 tablevar_list_t fm(table_name);
5406 // Bind all SEs to this schema
5408 for(p=0;p<where.size();++p){
5409 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5411 for(p=0;p<having.size();++p){
5412 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5415 for(s=0;s<select_list.size();++s){
5416 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5419 for(g=0;g<gb_tbl.size();++g){
5420 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5423 for(a=0;a<aggr_tbl.size();++a){
5424 if(aggr_tbl.is_builtin(a)){
5425 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5427 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5429 for(o=0;o<opl.size();++o){
5430 bind_to_schema_se(opl[o],&fm,Schema);
5436 col_id_set sgah_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5437 col_id_set retval, tmp_cset;
5439 for(p=0;p<where.size();++p){
5440 gather_pr_col_ids(where[p]->pr, tmp_cset, &gb_tbl);
5443 for(g=0;g<gb_tbl.size();++g){
5444 gather_se_col_ids(gb_tbl.get_def(g), tmp_cset, &gb_tbl);
5447 for(a=0;a<aggr_tbl.size();++a){
5448 if(aggr_tbl.is_builtin(a)){
5449 gather_se_col_ids(aggr_tbl.get_aggr_se(a), tmp_cset, &gb_tbl);
5451 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5453 for(o=0;o<opl.size();++o){
5454 gather_se_col_ids(opl[o], tmp_cset, &gb_tbl);
5459 col_id_set::iterator cisi;
5461 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5462 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5463 if(fe->get_unpack_fcns().size()>0)
5464 retval.insert((*cisi));
5473 void rsgah_qpn::bind_to_schema(table_list *Schema){
5474 // Bind the tablevars in the From clause to the Schema
5475 // (it might have changed from analysis time)
5476 int t = Schema->get_table_ref(table_name->get_schema_name() );
5478 table_name->set_schema_ref(t );
5480 // Get the "from" clause
5481 tablevar_list_t fm(table_name);
5483 // Bind all SEs to this schema
5485 for(p=0;p<where.size();++p){
5486 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5488 for(p=0;p<having.size();++p){
5489 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5491 for(p=0;p<closing_when.size();++p){
5492 bind_to_schema_pr(closing_when[p]->pr, &fm, Schema);
5495 for(s=0;s<select_list.size();++s){
5496 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5499 for(g=0;g<gb_tbl.size();++g){
5500 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5503 for(a=0;a<aggr_tbl.size();++a){
5504 if(aggr_tbl.is_builtin(a)){
5505 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5507 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5509 for(o=0;o<opl.size();++o){
5510 bind_to_schema_se(opl[o],&fm,Schema);
5517 void sgahcwcb_qpn::bind_to_schema(table_list *Schema){
5518 // Bind the tablevars in the From clause to the Schema
5519 // (it might have changed from analysis time)
5520 int t = Schema->get_table_ref(table_name->get_schema_name() );
5522 table_name->set_schema_ref(t );
5524 // Get the "from" clause
5525 tablevar_list_t fm(table_name);
5527 // Bind all SEs to this schema
5529 for(p=0;p<where.size();++p){
5530 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5532 for(p=0;p<having.size();++p){
5533 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5535 for(p=0;p<having.size();++p){
5536 bind_to_schema_pr(cleanby[p]->pr, &fm, Schema);
5538 for(p=0;p<having.size();++p){
5539 bind_to_schema_pr(cleanwhen[p]->pr, &fm, Schema);
5542 for(s=0;s<select_list.size();++s){
5543 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5546 for(g=0;g<gb_tbl.size();++g){
5547 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5550 for(a=0;a<aggr_tbl.size();++a){
5551 if(aggr_tbl.is_builtin(a)){
5552 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5554 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5556 for(o=0;o<opl.size();++o){
5557 bind_to_schema_se(opl[o],&fm,Schema);
5568 ///////////////////////////////////////////////////////////////
5569 ///////////////////////////////////////////////////////////////
5570 /// Functions for code generation.
5573 //-----------------------------------------------------------------
5576 cplx_lit_table *mrg_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5577 return(new cplx_lit_table());
5580 cplx_lit_table *spx_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5582 cplx_lit_table *complex_literals = new cplx_lit_table();
5584 for(i=0;i<select_list.size();i++){
5585 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5587 for(i=0;i<where.size();++i){
5588 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5591 return(complex_literals);
5594 cplx_lit_table *sgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5596 cplx_lit_table *complex_literals = new cplx_lit_table();
5598 for(i=0;i<aggr_tbl.size();++i){
5599 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5600 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5602 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5603 for(j=0;j<opl.size();++j)
5604 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5608 for(i=0;i<select_list.size();i++){
5609 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5611 for(i=0;i<gb_tbl.size();i++){
5612 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5614 for(i=0;i<where.size();++i){
5615 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5617 for(i=0;i<having.size();++i){
5618 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5621 return(complex_literals);
5625 cplx_lit_table *rsgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5627 cplx_lit_table *complex_literals = new cplx_lit_table();
5629 for(i=0;i<aggr_tbl.size();++i){
5630 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5631 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5633 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5634 for(j=0;j<opl.size();++j)
5635 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5639 for(i=0;i<select_list.size();i++){
5640 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5642 for(i=0;i<gb_tbl.size();i++){
5643 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5645 for(i=0;i<where.size();++i){
5646 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5648 for(i=0;i<having.size();++i){
5649 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5651 for(i=0;i<closing_when.size();++i){
5652 find_complex_literal_pr(closing_when[i]->pr,Ext_fcns, complex_literals);
5655 return(complex_literals);
5659 cplx_lit_table *sgahcwcb_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5661 cplx_lit_table *complex_literals = new cplx_lit_table();
5663 for(i=0;i<aggr_tbl.size();++i){
5664 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5665 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5667 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5668 for(j=0;j<opl.size();++j)
5669 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5673 for(i=0;i<select_list.size();i++){
5674 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5676 for(i=0;i<gb_tbl.size();i++){
5677 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5679 for(i=0;i<where.size();++i){
5680 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5682 for(i=0;i<having.size();++i){
5683 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5685 for(i=0;i<cleanwhen.size();++i){
5686 find_complex_literal_pr(cleanwhen[i]->pr,Ext_fcns, complex_literals);
5688 for(i=0;i<cleanby.size();++i){
5689 find_complex_literal_pr(cleanby[i]->pr,Ext_fcns, complex_literals);
5692 return(complex_literals);
5695 cplx_lit_table *join_eq_hash_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5697 cplx_lit_table *complex_literals = new cplx_lit_table();
5699 for(i=0;i<select_list.size();i++){
5700 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5702 for(i=0;i<where.size();++i){
5703 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5706 return(complex_literals);
5709 cplx_lit_table *filter_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5711 cplx_lit_table *complex_literals = new cplx_lit_table();
5713 for(i=0;i<select_list.size();i++){
5714 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5716 for(i=0;i<where.size();++i){
5717 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5720 return(complex_literals);
5726 //-----------------------------------------------------------------
5727 // get_handle_param_tbl
5729 vector<handle_param_tbl_entry *> mrg_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5730 vector<handle_param_tbl_entry *> retval;
5735 vector<handle_param_tbl_entry *> spx_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5737 vector<handle_param_tbl_entry *> retval;
5739 for(i=0;i<select_list.size();i++){
5740 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5742 for(i=0;i<where.size();++i){
5743 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5750 vector<handle_param_tbl_entry *> sgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5752 vector<handle_param_tbl_entry *> retval;
5755 for(i=0;i<aggr_tbl.size();++i){
5756 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5757 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5759 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5760 for(j=0;j<opl.size();++j)
5761 find_param_handles_se(opl[j], Ext_fcns, retval);
5764 for(i=0;i<select_list.size();i++){
5765 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5767 for(i=0;i<gb_tbl.size();i++){
5768 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5770 for(i=0;i<where.size();++i){
5771 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5773 for(i=0;i<having.size();++i){
5774 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5781 vector<handle_param_tbl_entry *> rsgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5783 vector<handle_param_tbl_entry *> retval;
5786 for(i=0;i<aggr_tbl.size();++i){
5787 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5788 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5790 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5791 for(j=0;j<opl.size();++j)
5792 find_param_handles_se(opl[j], Ext_fcns, retval);
5795 for(i=0;i<select_list.size();i++){
5796 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5798 for(i=0;i<gb_tbl.size();i++){
5799 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5801 for(i=0;i<where.size();++i){
5802 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5804 for(i=0;i<having.size();++i){
5805 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5807 for(i=0;i<closing_when.size();++i){
5808 find_param_handles_pr(closing_when[i]->pr,Ext_fcns, retval);
5815 vector<handle_param_tbl_entry *> sgahcwcb_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5817 vector<handle_param_tbl_entry *> retval;
5820 for(i=0;i<aggr_tbl.size();++i){
5821 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5822 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5824 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5825 for(j=0;j<opl.size();++j)
5826 find_param_handles_se(opl[j], Ext_fcns, retval);
5829 for(i=0;i<select_list.size();i++){
5830 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5832 for(i=0;i<gb_tbl.size();i++){
5833 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5835 for(i=0;i<where.size();++i){
5836 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5838 for(i=0;i<having.size();++i){
5839 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5841 for(i=0;i<cleanwhen.size();++i){
5842 find_param_handles_pr(cleanwhen[i]->pr,Ext_fcns, retval);
5844 for(i=0;i<cleanby.size();++i){
5845 find_param_handles_pr(cleanby[i]->pr,Ext_fcns, retval);
5851 vector<handle_param_tbl_entry *> join_eq_hash_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5853 vector<handle_param_tbl_entry *> retval;
5855 for(i=0;i<select_list.size();i++){
5856 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5858 for(i=0;i<where.size();++i){
5859 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5866 vector<handle_param_tbl_entry *> filter_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5868 vector<handle_param_tbl_entry *> retval;
5870 for(i=0;i<select_list.size();i++){
5871 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5873 for(i=0;i<where.size();++i){
5874 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5880 ///////////////////////////////////////////////////////////////
5881 ///////////////////////////////////////////////////////////////
5882 /// Functions for operator output rates estimations
5885 //-----------------------------------------------------------------
5886 // get_rate_estimate
5888 double spx_qpn::get_rate_estimate() {
5890 // dummy method for now
5891 return SPX_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5894 double sgah_qpn::get_rate_estimate() {
5896 // dummy method for now
5897 return SGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5900 double rsgah_qpn::get_rate_estimate() {
5902 // dummy method for now
5903 return RSGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5906 double sgahcwcb_qpn::get_rate_estimate() {
5908 // dummy method for now
5909 return SGAHCWCB_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5912 double mrg_qpn::get_rate_estimate() {
5914 // dummy method for now
5915 return MRG_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5918 double join_eq_hash_qpn::get_rate_estimate() {
5920 // dummy method for now
5921 return JOIN_EQ_HASH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5925 //////////////////////////////////////////////////////////////////////////////
5926 //////////////////////////////////////////////////////////////////////////////
5927 ///// Generate functors
5932 //-------------------------------------------------------------------------
5933 // Code generation utilities.
5934 //-------------------------------------------------------------------------
5936 // Globals referenced by generate utilities
5938 static gb_table *segen_gb_tbl; // Table of all group-by attributes.
5942 // Generate code that makes reference
5943 // to the tuple, and not to any aggregates.
5944 // NEW : it might reference a stateful function.
5945 static string generate_se_code(scalarexp_t *se,table_list *schema){
5947 data_type *ldt, *rdt;
5949 vector<scalarexp_t *> operands;
5952 switch(se->get_operator_type()){
5954 if(se->is_handle_ref()){
5955 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
5959 if(se->get_literal()->is_cpx_lit()){
5960 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
5964 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
5966 if(se->is_handle_ref()){
5967 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
5971 ret.append("param_");
5972 ret.append(se->get_param_name());
5975 ldt = se->get_left_se()->get_data_type();
5976 if(ldt->complex_operator(se->get_op()) ){
5977 ret.append( ldt->get_complex_operator(se->get_op()) );
5979 ret.append(generate_se_code(se->get_left_se(),schema));
5983 ret.append(se->get_op());
5984 ret.append(generate_se_code(se->get_left_se(),schema));
5989 ldt = se->get_left_se()->get_data_type();
5990 rdt = se->get_right_se()->get_data_type();
5992 if(ldt->complex_operator(rdt, se->get_op()) ){
5993 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
5995 ret.append(generate_se_code(se->get_left_se(),schema));
5997 ret.append(generate_se_code(se->get_right_se(),schema));
6001 ret.append(generate_se_code(se->get_left_se(),schema));
6002 ret.append(se->get_op());
6003 ret.append(generate_se_code(se->get_right_se(),schema));
6008 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6009 // so return the defining code.
6010 int gref = se->get_gb_ref();
6011 scalarexp_t *gdef_se = segen_gb_tbl->get_def(gref);
6012 ret = generate_se_code(gdef_se, schema );
6015 sprintf(tmpstr,"unpack_var_%s_%d",
6016 se->get_colref()->get_field().c_str(), se->get_colref()->get_tablevar_ref() );
6021 if(se->is_partial()){
6022 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6025 ret += se->op + "(";
6026 operands = se->get_operands();
6027 bool first_elem = true;
6028 if(se->get_storage_state() != ""){
6029 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6032 for(o=0;o<operands.size();o++){
6033 if(first_elem) first_elem=false; else ret += ", ";
6034 if(operands[o]->get_data_type()->is_buffer_type() &&
6035 (! (operands[o]->is_handle_ref()) ) )
6037 ret += generate_se_code(operands[o], schema);
6043 fprintf(stderr,"INTERNAL ERROR in generate_se_code (hfta), line %d, character %d: unknown operator type %d\n",
6044 se->get_lineno(), se->get_charno(),se->get_operator_type());
6045 return("ERROR in generate_se_code");
6049 // generate code that refers only to aggregate data and constants.
6050 // NEW : modified to handle superaggregates and stateful fcn refs.
6051 // Assume that the state is in *stval
6052 static string generate_se_code_fm_aggr(scalarexp_t *se, string gbvar, string aggvar, table_list *schema){
6055 data_type *ldt, *rdt;
6057 vector<scalarexp_t *> operands;
6060 switch(se->get_operator_type()){
6062 if(se->is_handle_ref()){
6063 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6067 if(se->get_literal()->is_cpx_lit()){
6068 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6072 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6074 if(se->is_handle_ref()){
6075 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6079 ret.append("param_");
6080 ret.append(se->get_param_name());
6083 ldt = se->get_left_se()->get_data_type();
6084 if(ldt->complex_operator(se->get_op()) ){
6085 ret.append( ldt->get_complex_operator(se->get_op()) );
6087 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6091 ret.append(se->get_op());
6092 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6097 ldt = se->get_left_se()->get_data_type();
6098 rdt = se->get_right_se()->get_data_type();
6100 if(ldt->complex_operator(rdt, se->get_op()) ){
6101 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6103 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6105 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6109 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6110 ret.append(se->get_op());
6111 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6116 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6117 // so return the defining code.
6118 sprintf(tmpstr,"%s%d",gbvar.c_str(),se->get_gb_ref());
6122 fprintf(stderr,"ERROR reference to non-GB column ref not permitted here,"
6123 "error in query_plan.cc:generate_se_code_fm_aggr, line %d, character %d.\n",
6124 se->get_lineno(), se->get_charno());
6130 if(se->is_superaggr()){
6131 sprintf(tmpstr,"stval->aggr_var%d",se->get_aggr_ref());
6133 sprintf(tmpstr,"%saggr_var%d",aggvar.c_str(),se->get_aggr_ref());
6139 if(se->get_aggr_ref() >= 0){
6140 sprintf(tmpstr,"udaf_ret_%d",se->get_aggr_ref());
6145 if(se->is_partial()){
6146 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6149 ret += se->op + "(";
6150 bool first_elem = true;
6151 if(se->get_storage_state() != ""){
6152 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6155 operands = se->get_operands();
6156 for(o=0;o<operands.size();o++){
6157 if(first_elem) first_elem=false; else ret += ", ";
6158 if(operands[o]->get_data_type()->is_buffer_type() &&
6159 (! (operands[o]->is_handle_ref()) ) )
6161 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6167 fprintf(stderr,"INTERNAL ERROR in query_plan.cc::generate_se_code_fm_aggr, line %d, character %d: unknown operator type %d\n",
6168 se->get_lineno(), se->get_charno(),se->get_operator_type());
6169 return("ERROR in generate_se_code_fm_aggr");
6175 static string unpack_partial_fcn_fm_aggr(scalarexp_t *se, int pfn_id, string gbvar, string aggvar, table_list *schema){
6178 vector<scalarexp_t *> operands;
6181 if(se->get_operator_type() != SE_FUNC){
6182 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn_fm_aggr. line %d, character %d\n",
6183 se->get_lineno(), se->get_charno());
6184 return("ERROR in unpack_partial_fcn_fm_aggr");
6187 ret = "\tretval = " + se->get_op() + "( ",
6188 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6191 if(se->get_storage_state() != ""){
6192 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6195 operands = se->get_operands();
6196 for(o=0;o<operands.size();o++){
6198 if(operands[o]->get_data_type()->is_buffer_type() &&
6199 (! (operands[o]->is_handle_ref()) ) )
6201 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6209 static string unpack_partial_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6212 vector<scalarexp_t *> operands;
6214 if(se->get_operator_type() != SE_FUNC){
6215 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn. line %d, character %d\n",
6216 se->get_lineno(), se->get_charno());
6217 return("ERROR in unpack_partial_fcn");
6220 ret = "\tretval = " + se->get_op() + "( ",
6221 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6224 if(se->get_storage_state() != ""){
6225 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6228 operands = se->get_operands();
6229 for(o=0;o<operands.size();o++){
6231 if(operands[o]->get_data_type()->is_buffer_type() &&
6232 (! (operands[o]->is_handle_ref()) ) )
6234 ret += generate_se_code(operands[o], schema);
6241 static string generate_cached_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6244 vector<scalarexp_t *> operands;
6246 if(se->get_operator_type() != SE_FUNC){
6247 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to generate_cached_fcn. line %d, character %d\n",
6248 se->get_lineno(), se->get_charno());
6249 return("ERROR in generate_cached_fcn");
6252 ret = se->get_op()+"(";
6254 if(se->get_storage_state() != ""){
6255 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd,";
6258 operands = se->get_operands();
6259 for(o=0;o<operands.size();o++){
6261 if(operands[o]->get_data_type()->is_buffer_type() &&
6262 (! (operands[o]->is_handle_ref()) ) )
6264 ret += generate_se_code(operands[o], schema);
6275 static string generate_C_comparison_op(string op){
6276 if(op == "=") return("==");
6277 if(op == "<>") return("!=");
6281 static string generate_C_boolean_op(string op){
6282 if( (op == "AND") || (op == "And") || (op == "and") ){
6285 if( (op == "OR") || (op == "Or") || (op == "or") ){
6288 if( (op == "NOT") || (op == "Not") || (op == "not") ){
6292 return("ERROR UNKNOWN BOOLEAN OPERATOR");
6296 static string generate_predicate_code(predicate_t *pr,table_list *schema){
6298 vector<literal_t *> litv;
6300 data_type *ldt, *rdt;
6301 vector<scalarexp_t *> op_list;
6304 switch(pr->get_operator_type()){
6306 ldt = pr->get_left_se()->get_data_type();
6309 litv = pr->get_lit_vec();
6310 for(i=0;i<litv.size();i++){
6311 if(i>0) ret.append(" || ");
6314 if(ldt->complex_comparison(ldt) ){
6315 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6317 if(ldt->is_buffer_type() )
6319 ret.append(generate_se_code(pr->get_left_se(), schema));
6321 if(ldt->is_buffer_type() )
6323 if(litv[i]->is_cpx_lit()){
6324 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6327 ret.append(litv[i]->to_C_code(""));
6329 ret.append(") == 0");
6331 ret.append(generate_se_code(pr->get_left_se(), schema));
6333 ret.append(litv[i]->to_hfta_C_code(""));
6342 ldt = pr->get_left_se()->get_data_type();
6343 rdt = pr->get_right_se()->get_data_type();
6346 if(ldt->complex_comparison(rdt) ){
6347 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6349 if(ldt->is_buffer_type() )
6351 ret.append(generate_se_code(pr->get_left_se(),schema) );
6353 if(rdt->is_buffer_type() )
6355 ret.append(generate_se_code(pr->get_right_se(),schema) );
6357 ret.append( generate_C_comparison_op(pr->get_op()));
6360 ret.append(generate_se_code(pr->get_left_se(),schema) );
6361 ret.append( generate_C_comparison_op(pr->get_op()));
6362 ret.append(generate_se_code(pr->get_right_se(),schema) );
6368 ret.append( generate_C_boolean_op(pr->get_op()) );
6369 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6372 case PRED_BINARY_OP:
6374 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6375 ret.append( generate_C_boolean_op(pr->get_op()) );
6376 ret.append(generate_predicate_code(pr->get_right_pr(),schema) );
6380 ret += pr->get_op() + "( ";
6381 op_list = pr->get_op_list();
6382 for(o=0;o<op_list.size();++o){
6383 if(o>0) ret += ", ";
6384 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6386 ret += generate_se_code(op_list[o], schema);
6391 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6392 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6393 return("ERROR in generate_predicate_code");
6397 static string generate_predicate_code_fm_aggr(predicate_t *pr, string gbvar, string aggvar,table_list *schema){
6399 vector<literal_t *> litv;
6401 data_type *ldt, *rdt;
6402 vector<scalarexp_t *> op_list;
6405 switch(pr->get_operator_type()){
6407 ldt = pr->get_left_se()->get_data_type();
6410 litv = pr->get_lit_vec();
6411 for(i=0;i<litv.size();i++){
6412 if(i>0) ret.append(" || ");
6415 if(ldt->complex_comparison(ldt) ){
6416 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6418 if(ldt->is_buffer_type() )
6420 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6422 if(ldt->is_buffer_type() )
6424 if(litv[i]->is_cpx_lit()){
6425 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6428 ret.append(litv[i]->to_C_code(""));
6430 ret.append(") == 0");
6432 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6434 ret.append(litv[i]->to_hfta_C_code(""));
6443 ldt = pr->get_left_se()->get_data_type();
6444 rdt = pr->get_right_se()->get_data_type();
6447 if(ldt->complex_comparison(rdt) ){
6448 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6450 if(ldt->is_buffer_type() )
6452 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6454 if(rdt->is_buffer_type() )
6456 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6458 ret.append( generate_C_comparison_op(pr->get_op()));
6461 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6462 ret.append( generate_C_comparison_op(pr->get_op()));
6463 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6469 ret.append( generate_C_boolean_op(pr->get_op()) );
6470 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6473 case PRED_BINARY_OP:
6475 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6476 ret.append( generate_C_boolean_op(pr->get_op()) );
6477 ret.append(generate_predicate_code_fm_aggr(pr->get_right_pr(), gbvar, aggvar,schema) );
6481 ret += pr->get_op() + "( ";
6482 op_list = pr->get_op_list();
6483 for(o=0;o<op_list.size();++o){
6484 if(o>0) ret += ", ";
6485 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6487 ret += generate_se_code_fm_aggr(op_list[o], gbvar, aggvar, schema);
6492 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6493 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6494 return("ERROR in generate_predicate_code");
6502 static string generate_equality_test(string &lhs_op, string &rhs_op, data_type *dt){
6505 if(dt->complex_comparison(dt) ){
6506 ret.append(dt->get_hfta_comparison_fcn(dt));
6508 if(dt->is_buffer_type() )
6512 if(dt->is_buffer_type() )
6514 ret.append(rhs_op );
6515 ret.append(") == 0");
6517 ret.append(lhs_op );
6519 ret.append(rhs_op );
6525 static string generate_comparison(string &lhs_op, string &rhs_op, data_type *dt){
6528 if(dt->complex_comparison(dt) ){
6529 ret.append(dt->get_hfta_comparison_fcn(dt));
6531 if(dt->is_buffer_type() )
6535 if(dt->is_buffer_type() )
6537 ret.append(rhs_op );
6538 ret.append(") == 0");
6540 ret.append(lhs_op );
6542 ret.append(rhs_op );
6549 // Here I assume that only MIN and MAX aggregates can be computed
6550 // over BUFFER data types.
6552 static string generate_aggr_update(string var, aggregate_table *atbl,int aidx, table_list *schema){
6553 string retval = "\t\t";
6554 string op = atbl->get_op(aidx);
6557 if(! atbl->is_builtin(aidx)) {
6559 retval += op+"_HFTA_AGGR_UPDATE_(";
6560 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6561 retval+="("+var+")";
6562 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
6563 for(o=0;o<opl.size();++o){{
6565 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
6567 retval += generate_se_code(opl[o], schema);
6576 // builtin processing
6577 data_type *dt = atbl->get_data_type(aidx);
6581 retval.append("++;\n");
6586 retval.append(" += ");
6587 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6588 retval.append(";\n");
6592 sprintf(tmpstr,"aggr_tmp_%d",aidx);
6593 retval += dt->make_host_cvar(tmpstr);
6595 retval += generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
6596 if(dt->complex_comparison(dt)){
6597 if(dt->is_buffer_type())
6598 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6600 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6602 sprintf(tmpstr,"\t\tif(aggr_tmp_%d < %s)\n",aidx,var.c_str());
6604 retval.append(tmpstr);
6605 if(dt->is_buffer_type()){
6606 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
6608 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
6610 retval.append(tmpstr);
6615 sprintf(tmpstr,"aggr_tmp_%d",aidx);
6616 retval+=dt->make_host_cvar(tmpstr);
6618 retval+=generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
6619 if(dt->complex_comparison(dt)){
6620 if(dt->is_buffer_type())
6621 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6623 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6625 sprintf(tmpstr,"\t\tif(aggr_tmp_%d > %s)\n",aidx,var.c_str());
6627 retval.append(tmpstr);
6628 if(dt->is_buffer_type()){
6629 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
6631 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
6633 retval.append(tmpstr);
6638 if(op == "AND_AGGR"){
6640 retval.append(" &= ");
6641 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6642 retval.append(";\n");
6645 if(op == "OR_AGGR"){
6647 retval.append(" |= ");
6648 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6649 retval.append(";\n");
6652 if(op == "XOR_AGGR"){
6654 retval.append(" ^= ");
6655 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6656 retval.append(";\n");
6660 retval += var+"_sum += "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
6661 retval += "\t\t"+var+"_cnt += 1;\n";
6662 retval += "\t\t"+var+" = "+var+"_sum / "+var+"_cnt;\n";
6666 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_update.\n",op.c_str());
6675 static string generate_superaggr_minus(string var, string supervar, aggregate_table *atbl,int aidx, table_list *schema){
6676 string retval = "\t\t";
6677 string op = atbl->get_op(aidx);
6680 if(! atbl->is_builtin(aidx)) {
6682 retval += op+"_HFTA_AGGR_MINUS_(";
6683 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6684 retval+="("+supervar+"),";
6685 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6686 retval+="("+var+");\n";
6692 if(op == "COUNT" || op == "SUM"){
6693 retval += supervar + "-=" +var + ";\n";
6697 if(op == "XOR_AGGR"){
6698 retval += supervar + "^=" +var + ";\n";
6702 if(op=="MIN" || op == "MAX")
6705 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_superaggr_minus.\n",op.c_str());
6714 static string generate_aggr_init(string var, aggregate_table *atbl,int aidx, table_list *schema){
6716 string op = atbl->get_op(aidx);
6719 if(! atbl->is_builtin(aidx)){
6721 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_INIT_(";
6722 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6723 retval+="("+var+"));\n";
6725 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_UPDATE_(";
6726 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6727 retval+="("+var+")";
6728 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
6730 for(o=0;o<opl.size();++o){
6732 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
6734 retval += generate_se_code(opl[o],schema);
6740 // builtin aggregate processing
6741 data_type *dt = atbl->get_data_type(aidx);
6745 retval.append(" = 1;\n");
6749 if(op == "SUM" || op == "MIN" || op == "MAX" || op == "AND_AGGR" ||
6750 op=="AVG" || op == "OR_AGGR" || op == "XOR_AGGR"){
6751 if(dt->is_buffer_type()){
6752 sprintf(tmpstr,"\t\taggr_tmp_%d = %s;\n",aidx,generate_se_code(atbl->get_aggr_se(aidx), schema ).c_str() );
6753 retval.append(tmpstr);
6754 sprintf(tmpstr,"\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_assign_copy().c_str(),var.c_str(),aidx);
6755 retval.append(tmpstr);
6758 retval += var+"_sum = "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
6759 retval += "\t"+var+"_cnt = 1;\n";
6760 retval += "\t"+var+" = "+var+"_sum;\n";
6764 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema));
6765 retval.append(";\n");
6771 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_init.\n",op.c_str());
6779 static string generate_aggr_reinitialize(string var, aggregate_table *atbl,int aidx, table_list *schema){
6781 string op = atbl->get_op(aidx);
6784 if(! atbl->is_builtin(aidx)){
6786 retval += "\t"+atbl->get_op(aidx);
6787 if(atbl->is_running_aggr(aidx)){
6788 retval += "_HFTA_AGGR_REINIT_(";
6790 retval += "_HFTA_AGGR_INIT_(";
6792 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6793 retval+="("+var+"));\n";
6797 // builtin aggregate processing
6798 data_type *dt = atbl->get_data_type(aidx);
6802 retval.append(" = 0;\n");
6806 if(op == "SUM" || op == "AND_AGGR" ||
6807 op == "OR_AGGR" || op == "XOR_AGGR"){
6808 if(dt->is_buffer_type()){
6809 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6813 literal_t l(dt->type_indicator());
6814 retval.append(l.to_string());
6815 retval.append(";\n");
6821 if(dt->is_buffer_type()){
6822 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6826 retval.append(dt->get_max_literal());
6827 retval.append(";\n");
6833 if(dt->is_buffer_type()){
6834 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6838 retval.append(dt->get_min_literal());
6839 retval.append(";\n");
6844 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_aggr_reinitialize.\n",op.c_str());
6851 // Generate parameter holding vars from a param table.
6852 static string generate_param_vars(param_table *param_tbl){
6855 vector<string> param_vec = param_tbl->get_param_names();
6856 for(p=0;p<param_vec.size();p++){
6857 data_type *dt = param_tbl->get_data_type(param_vec[p]);
6858 sprintf(tmpstr,"param_%s;\n", param_vec[p].c_str());
6859 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
6860 if(param_tbl->handle_access(param_vec[p])){
6861 ret += "\tstruct search_handle *param_handle_"+param_vec[p]+";\n";
6867 // Parameter manipulation routines
6868 static string generate_load_param_block(string functor_name,
6869 param_table *param_tbl,
6870 vector<handle_param_tbl_entry *> param_handle_table
6873 vector<string> param_names = param_tbl->get_param_names();
6875 string ret = "int load_params_"+functor_name+"(gs_int32_t sz, void *value){\n";
6876 ret.append("\tint pos=0;\n");
6877 ret.append("\tint data_pos;\n");
6879 for(p=0;p<param_names.size();p++){
6880 data_type *dt = param_tbl->get_data_type(param_names[p]);
6881 if(dt->is_buffer_type()){
6882 sprintf(tmpstr,"tmp_var_%s;\n", param_names[p].c_str());
6883 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
6888 // Verify that the block is of minimum size
6889 if(param_names.size() > 0){
6890 ret += "//\tVerify that the value block is large enough */\n";
6891 ret.append("\n\tdata_pos = ");
6892 for(p=0;p<param_names.size();p++){
6893 if(p>0) ret.append(" + ");
6894 data_type *dt = param_tbl->get_data_type(param_names[p]);
6895 ret.append("sizeof( ");
6896 ret.append( dt->get_host_cvar_type() );
6900 ret.append("\tif(data_pos > sz) return 1;\n\n");
6903 ///////////////////////
6904 /// Verify that all strings can be unpacked.
6906 ret += "//\tVerify that the strings can be unpacked */\n";
6907 for(p=0;p<param_names.size();p++){
6908 data_type *dt = param_tbl->get_data_type(param_names[p]);
6909 if(dt->is_buffer_type()){
6910 sprintf(tmpstr,"\ttmp_var_%s = *( (%s *)((gs_sp_t )value+pos) );\n",param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
6912 switch( dt->get_type() ){
6914 // ret += "\ttmp_var_"+param_names[p]+".offset = ntohl( tmp_var_"+param_names[p]+".offset );\n"; // ntoh conversion
6915 // ret += "\ttmp_var_"+param_names[p]+".length = ntohl( tmp_var_"+param_names[p]+".length );\n"; // ntoh conversion
6916 sprintf(tmpstr,"\tif( (int)(tmp_var_%s.offset) + tmp_var_%s.length > sz) return 1;\n",param_names[p].c_str(), param_names[p].c_str() );
6918 sprintf(tmpstr,"\ttmp_var_%s.offset = (gs_p_t)( (gs_sp_t )value + (gs_p_t)(tmp_var_%s.offset) );\n",param_names[p].c_str(), param_names[p].c_str() );
6922 fprintf(stderr,"ERROR: parameter %s is of type %s, a buffered type, but I don't know how to unpack it as a parameter.\n",param_names[p].c_str(), dt->to_string().c_str() );
6927 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
6931 /////////////////////////
6933 ret += "/*\tThe block is OK, do the unpacking. */\n";
6934 ret += "\tpos = 0;\n";
6936 for(p=0;p<param_names.size();p++){
6937 data_type *dt = param_tbl->get_data_type(param_names[p]);
6938 if(dt->is_buffer_type()){
6939 sprintf(tmpstr,"\t%s(¶m_%s, &tmp_var_%s);\n", dt->get_hfta_buffer_assign_copy().c_str(),param_names[p].c_str(),param_names[p].c_str() );
6942 // if(dt->needs_hn_translation()){
6943 // sprintf(tmpstr,"\tparam_%s = %s( *( (%s *)( (gs_sp_t )value+pos) ) );\n",
6944 // param_names[p].c_str(), dt->ntoh_translation().c_str(), dt->get_host_cvar_type().c_str() );
6946 sprintf(tmpstr,"\tparam_%s = *( (%s *)( (gs_sp_t )value+pos) );\n",
6947 param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
6951 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
6954 // TODO: I think this method of handle registration is obsolete
6955 // and should be deleted.
6956 // some examination reveals that handle_access is always false.
6957 for(p=0;p<param_names.size();p++){
6958 if(param_tbl->handle_access(param_names[p]) ){
6959 data_type *pdt = param_tbl->get_data_type(param_names[p]);
6961 ret += "\tt->param_handle_"+param_names[p]+" = " +
6962 pdt->handle_registration_name() +
6963 "((struct FTA *)t, &(t->param_"+param_names[p]+"));\n";
6966 // Register the pass-by-handle parameters
6968 ret += "/* register the pass-by-handle parameters */\n";
6971 for(ph=0;ph<param_handle_table.size();++ph){
6972 data_type pdt(param_handle_table[ph]->type_name);
6973 switch(param_handle_table[ph]->val_type){
6979 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
6981 if(pdt.is_buffer_type()) ret += "&(";
6982 ret += "param_"+param_handle_table[ph]->param_name;
6983 if(pdt.is_buffer_type()) ret += ")";
6987 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
6993 ret += "\treturn(0);\n";
6994 ret.append("}\n\n");
7000 static string generate_delete_param_block(string functor_name,
7001 param_table *param_tbl,
7002 vector<handle_param_tbl_entry *> param_handle_table
7006 vector<string> param_names = param_tbl->get_param_names();
7008 string ret = "void destroy_params_"+functor_name+"(){\n";
7010 for(p=0;p<param_names.size();p++){
7011 data_type *dt = param_tbl->get_data_type(param_names[p]);
7012 if(dt->is_buffer_type()){
7013 sprintf(tmpstr,"\t\t%s(¶m_%s);\n",dt->get_hfta_buffer_destroy().c_str(),param_names[p].c_str());
7016 if(param_tbl->handle_access(param_names[p]) ){
7017 ret += "\t\t" + dt->get_handle_destructor() +
7018 "(t->param_handle_" + param_names[p] + ");\n";
7022 ret += "//\t\tDeregister handles.\n";
7024 for(ph=0;ph<param_handle_table.size();++ph){
7025 if(param_handle_table[ph]->val_type == param_e){
7026 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7027 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7036 // ---------------------------------------------------------------------
7037 // functions for creating functor variables.
7039 static string generate_access_vars(col_id_set &cid_set, table_list *schema){
7041 col_id_set::iterator csi;
7043 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7044 int schref = (*csi).schema_ref;
7045 int tblref = (*csi).tblvar_ref;
7046 string field = (*csi).field;
7047 data_type dt(schema->get_type_name(schref,field));
7048 sprintf(tmpstr,"unpack_var_%s_%d", field.c_str(), tblref);
7049 ret+="\t"+dt.make_host_cvar(tmpstr)+";\n";
7050 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", field.c_str(), tblref);
7056 static string generate_partial_fcn_vars(vector<scalarexp_t *> &partial_fcns,
7057 vector<int> &ref_cnt, vector<bool> &is_partial, bool gen_fcn_cache){
7062 for(p=0;p<partial_fcns.size();++p){
7063 if(!gen_fcn_cache || is_partial[p] || ref_cnt[p]>1){
7064 sprintf(tmpstr,"partial_fcn_result_%d", p);
7065 ret+="\t"+partial_fcns[p]->get_data_type()->make_host_cvar(tmpstr)+";\n";
7066 if(gen_fcn_cache && ref_cnt[p]>1){
7067 ret+="\tint fcn_ref_cnt_"+int_to_string(p)+";\n";
7075 static string generate_complex_lit_vars(cplx_lit_table *complex_literals){
7078 for(cl=0;cl<complex_literals->size();cl++){
7079 literal_t *l = complex_literals->get_literal(cl);
7080 data_type *dtl = new data_type( l->get_type() );
7081 sprintf(tmpstr,"complex_literal_%d",cl);
7082 ret += "\t"+dtl->make_host_cvar(tmpstr)+";\n";
7083 if(complex_literals->is_handle_ref(cl)){
7084 sprintf(tmpstr,"\tstruct search_handle *lit_handle_%d;\n",cl);
7092 static string generate_pass_by_handle_vars(
7093 vector<handle_param_tbl_entry *> ¶m_handle_table){
7097 for(p=0;p<param_handle_table.size();++p){
7098 sprintf(tmpstr,"\tgs_param_handle_t handle_param_%d;\n",p);
7106 // ------------------------------------------------------------
7107 // functions for generating initialization code.
7109 static string gen_access_var_init(col_id_set &cid_set){
7111 col_id_set::iterator csi;
7113 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7114 int tblref = (*csi).tblvar_ref;
7115 string field = (*csi).field;
7116 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle%d, \"%s\");\n", field.c_str(),tblref,tblref,field.c_str());
7123 static string gen_complex_lit_init(cplx_lit_table *complex_literals){
7127 for(cl=0;cl<complex_literals->size();cl++){
7128 literal_t *l = complex_literals->get_literal(cl);
7129 // sprintf(tmpstr,"\tcomplex_literal_%d = ",cl);
7130 // ret += tmpstr + l->to_hfta_C_code() + ";\n";
7131 sprintf(tmpstr,"&(complex_literal_%d)",cl);
7132 ret += "\t" + l->to_hfta_C_code(tmpstr) + ";\n";
7133 // I think that the code below is obsolete
7134 // TODO: it is obsolete. add_cpx_lit is always
7135 // called with the handle indicator being false.
7136 // This entire structure should be cleansed.
7137 if(complex_literals->is_handle_ref(cl)){
7138 data_type *dt = new data_type( l->get_type() );
7139 sprintf(tmpstr,"\tlit_handle_%d = %s(&(f->complex_literal_%d));\n",
7140 cl, dt->hfta_handle_registration_name().c_str(), cl);
7149 static string gen_partial_fcn_init(vector<scalarexp_t *> &partial_fcns){
7153 for(p=0;p<partial_fcns.size();++p){
7154 data_type *pdt =partial_fcns[p]->get_data_type();
7155 literal_t empty_lit(pdt->type_indicator());
7156 if(pdt->is_buffer_type()){
7157 // sprintf(tmpstr,"\tpartial_fcn_result_%d = %s;\n",
7158 // p, empty_lit.to_hfta_C_code().c_str());
7159 sprintf(tmpstr,"&(partial_fcn_result_%d)",p);
7160 ret += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
7166 static string gen_pass_by_handle_init(
7167 vector<handle_param_tbl_entry *> ¶m_handle_table){
7171 for(ph=0;ph<param_handle_table.size();++ph){
7172 data_type pdt(param_handle_table[ph]->type_name);
7173 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7174 switch(param_handle_table[ph]->val_type){
7177 if(pdt.is_buffer_type()) ret += "&(";
7178 sprintf(tmpstr,"complex_literal_%d",param_handle_table[ph]->complex_literal_idx);
7180 if(pdt.is_buffer_type()) ret += ")";
7185 ret += param_handle_table[ph]->litval->to_hfta_C_code("") + ");\n";
7189 // query parameter handles are regstered/deregistered in the
7190 // load_params function.
7191 // ret += "t->param_"+param_handle_table[ph]->param_name;
7194 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7201 //------------------------------------------------------------
7202 // functions for destructor and deregistration code
7204 static string gen_complex_lit_dtr(cplx_lit_table *complex_literals){
7208 for(cl=0;cl<complex_literals->size();cl++){
7209 literal_t *l = complex_literals->get_literal(cl);
7210 data_type ldt( l->get_type() );
7211 if(ldt.is_buffer_type()){
7212 sprintf(tmpstr,"\t\t%s(&complex_literal_%d);\n",
7213 ldt.get_hfta_buffer_destroy().c_str(), cl );
7221 static string gen_pass_by_handle_dtr(
7222 vector<handle_param_tbl_entry *> ¶m_handle_table){
7226 for(ph=0;ph<param_handle_table.size();++ph){
7227 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7228 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7234 // Destroy all previous results
7235 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns){
7239 for(p=0;p<partial_fcns.size();++p){
7240 data_type *pdt =partial_fcns[p]->get_data_type();
7241 if(pdt->is_buffer_type()){
7242 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7243 pdt->get_hfta_buffer_destroy().c_str(), p );
7250 // Destroy previsou results of fcns in pfcn_set
7251 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns, set<int> &pfcn_set){
7253 set<int>::iterator si;
7255 for(si=pfcn_set.begin(); si!=pfcn_set.end(); ++si){
7256 data_type *pdt =partial_fcns[(*si)]->get_data_type();
7257 if(pdt->is_buffer_type()){
7258 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7259 pdt->get_hfta_buffer_destroy().c_str(), (*si) );
7267 //-------------------------------------------------------------------------
7268 // Functions related to se generation bookkeeping.
7270 static void get_new_pred_cids(predicate_t *pr, col_id_set &found_cids,
7271 col_id_set &new_cids, gb_table *gtbl){
7272 col_id_set this_pred_cids;
7273 col_id_set::iterator csi;
7275 // get colrefs in predicate not already found.
7276 gather_pr_col_ids(pr,this_pred_cids,gtbl);
7277 set_difference(this_pred_cids.begin(), this_pred_cids.end(),
7278 found_cids.begin(), found_cids.end(),
7279 inserter(new_cids,new_cids.begin()) );
7281 // We've found these cids, so update found_cids
7282 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7283 found_cids.insert((*csi));
7287 // after the call, new_cids will have the colrefs in se but not found_cids.
7288 // update found_cids with the new cids.
7289 static void get_new_se_cids(scalarexp_t *se, col_id_set &found_cids,
7290 col_id_set &new_cids, gb_table *gtbl){
7291 col_id_set this_se_cids;
7292 col_id_set::iterator csi;
7294 // get colrefs in se not already found.
7295 gather_se_col_ids(se,this_se_cids,gtbl);
7296 set_difference(this_se_cids.begin(), this_se_cids.end(),
7297 found_cids.begin(), found_cids.end(),
7298 inserter(new_cids,new_cids.begin()) );
7300 // We've found these cids, so update found_cids
7301 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7302 found_cids.insert((*csi));
7306 static string gen_unpack_cids(table_list *schema, col_id_set &new_cids, string on_problem, vector<bool> &needs_xform){
7308 col_id_set::iterator csi;
7310 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7311 int schref = (*csi).schema_ref;
7312 int tblref = (*csi).tblvar_ref;
7313 string field = (*csi).field;
7314 data_type dt(schema->get_type_name(schref,field));
7316 if(needs_xform[tblref]){
7317 unpack_fcn = dt.get_hfta_unpack_fcn();
7319 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7321 if(dt.is_buffer_type()){
7322 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7324 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7327 if(dt.is_buffer_type()){
7328 ret += "\tif(problem) return "+on_problem+" ;\n";
7334 // generates the declaration of all the variables related to
7335 // temp tuples generation
7336 static string gen_decl_temp_vars(){
7339 ret += "\t// variables related to temp tuple generation\n";
7340 ret += "\tbool temp_tuple_received;\n";
7345 // generates initialization code for variables related to temp tuple processing
7346 static string gen_init_temp_vars(table_list *schema, vector<select_element *>& select_list, gb_table *gtbl){
7348 col_id_set::iterator csi;
7351 // Initialize internal state
7352 ret += "\ttemp_tuple_received = false;\n";
7354 col_id_set temp_cids; // colrefs unpacked thus far.
7356 for(s=0;s<select_list.size();s++){
7357 if (select_list[s]->se->get_data_type()->is_temporal()) {
7358 // Find the set of attributes accessed in this SE
7359 col_id_set new_cids;
7360 get_new_se_cids(select_list[s]->se,temp_cids, new_cids, gtbl);
7363 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7364 int schref = (*csi).schema_ref;
7365 int tblref = (*csi).tblvar_ref;
7366 string field = (*csi).field;
7367 data_type dt(schema->get_type_name(schref,field), schema->get_modifier_list(schref,field));
7369 sprintf(tmpstr,"\t unpack_var_%s_%d = %s;\n", field.c_str(), tblref,
7370 dt.is_increasing() ? dt.get_min_literal().c_str() : dt.get_max_literal().c_str());
7380 // generates a check if tuple is temporal
7381 static string gen_temp_tuple_check(string node_name, int channel) {
7385 sprintf(tmpstr, "tup%d", channel);
7386 string tup_name = tmpstr;
7387 sprintf(tmpstr, "schema_handle%d", channel);
7388 string schema_handle_name = tmpstr;
7389 string tuple_offset_name = "tuple_metadata_offset"+int_to_string(channel);
7391 // check if it is a temporary status tuple
7392 ret += "\t// check if tuple is temp status tuple\n";
7393 // ret += "\tif (ftaschema_is_temporal_tuple(" + schema_handle_name + ", " + tup_name + ".data)) {\n";
7394 ret += "\tif (ftaschema_is_temporal_tuple_offset(" + tuple_offset_name + ", " + tup_name + ".data)) {\n";
7395 ret += "\t\ttemp_tuple_received = true;\n";
7397 ret += "\telse\n\t\ttemp_tuple_received = false;\n\n";
7402 // generates unpacking code for all temporal attributes referenced in select
7403 static string gen_unpack_temp_vars(table_list *schema, col_id_set& found_cids, vector<select_element *>& select_list, gb_table *gtbl, vector<bool> &needs_xform) {
7407 // Unpack all the temporal attributes references in select list
7408 // we need it to be able to generate temp status tuples
7409 for(s=0;s<select_list.size();s++){
7410 if (select_list[s]->se->get_data_type()->is_temporal()) {
7411 // Find the set of attributes accessed in this SE
7412 col_id_set new_cids;
7413 get_new_se_cids(select_list[s]->se,found_cids, new_cids, gtbl);
7414 // Unpack these values.
7415 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
7423 // Generates temporal tuple generation code (except attribute packing)
7424 static string gen_init_temp_status_tuple(string node_name) {
7427 ret += "\t// create temp status tuple\n";
7428 ret += "\tresult.tuple_size = sizeof("+generate_tuple_name( node_name)+") + sizeof(gs_uint8_t);\n";
7429 ret += "\tresult.data = (gs_sp_t )malloc(result.tuple_size);\n";
7430 ret += "\tresult.heap_resident = true;\n";
7431 ret += "\t// Mark tuple as temporal\n";
7432 ret += "\t*((gs_sp_t )result.data + sizeof("+generate_tuple_name( node_name)+")) = TEMPORAL_TUPLE;\n";
7434 ret += "\t"+generate_tuple_name( node_name)+" *tuple = ("+
7435 generate_tuple_name( node_name) +" *)(result.data);\n";
7441 // Assume that all colrefs unpacked already ...
7442 static string gen_unpack_partial_fcn(table_list *schema,
7443 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7446 set<int>::iterator si;
7448 // Since set<..> is a "Sorted Associative Container",
7449 // we can walk through it in sorted order by walking from
7450 // begin() to end(). (and the partial fcns must be
7451 // evaluated in this order).
7452 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7453 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7454 ret += "\tif(retval) return "+on_problem+" ;\n";
7459 // Assume that all colrefs unpacked already ...
7460 // this time with cached functions.
7461 static string gen_unpack_partial_fcn(table_list *schema,
7462 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7463 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
7466 set<int>::iterator si;
7468 // Since set<..> is a "Sorted Associative Container",
7469 // we can walk through it in sorted order by walking from
7470 // begin() to end(). (and the partial fcns must be
7471 // evaluated in this order).
7472 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7473 if(fcn_ref_cnt[(*si)] > 1){
7474 ret += "\tif(fcn_ref_cnt_"+int_to_string((*si))+"==0){\n";
7476 if(is_partial_fcn[(*si)]){
7477 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7478 ret += "\tif(retval) return "+on_problem+" ;\n";
7480 if(fcn_ref_cnt[(*si)] > 1){
7481 if(!is_partial_fcn[(*si)]){
7482 ret += "\t\tpartial_fcn_result_"+int_to_string((*si))+"="+generate_cached_fcn(partial_fcns[(*si)],(*si),schema)+";\n";
7484 ret += "\t\tfcn_ref_cnt_"+int_to_string((*si))+"=1;\n";
7493 // This version finds and unpacks new colrefs.
7494 // found_cids gets updated with the newly unpacked cids.
7495 static string gen_full_unpack_partial_fcn(table_list *schema,
7496 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7497 col_id_set &found_cids, gb_table *gtbl, string on_problem,
7498 vector<bool> &needs_xform){
7500 set<int>::iterator slsi;
7502 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7503 // find all new fields ref'd by this partial fcn.
7504 col_id_set new_cids;
7505 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
7506 // Unpack these values.
7507 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
7509 // Now evaluate the partial fcn.
7510 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
7511 ret += "\tif(retval) return "+on_problem+" ;\n";
7516 // This version finds and unpacks new colrefs.
7517 // found_cids gets updated with the newly unpacked cids.
7518 // BUT : only for the partial functions.
7519 static string gen_full_unpack_partial_fcn(table_list *schema,
7520 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7521 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
7522 col_id_set &found_cids, gb_table *gtbl, string on_problem,
7523 vector<bool> &needs_xform){
7525 set<int>::iterator slsi;
7527 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7528 if(is_partial_fcn[(*slsi)]){
7529 // find all new fields ref'd by this partial fcn.
7530 col_id_set new_cids;
7531 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
7532 // Unpack these values.
7533 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
7535 // Now evaluate the partial fcn.
7536 if(fcn_ref_cnt[(*slsi)] > 1){
7537 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
7539 if(is_partial_fcn[(*slsi)]){
7540 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
7541 ret += "\tif(retval) return "+on_problem+" ;\n";
7543 if(fcn_ref_cnt[(*slsi)] > 1){
7544 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
7553 static string gen_remaining_cached_fcns(table_list *schema,
7554 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7555 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn){
7557 set<int>::iterator slsi;
7559 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7560 if(!is_partial_fcn[(*slsi)] && fcn_ref_cnt[(*slsi)] > 1){
7562 if(fcn_ref_cnt[(*slsi)] > 1){
7563 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
7564 ret += "\t\tpartial_fcn_result_"+int_to_string((*slsi))+"="+generate_cached_fcn(partial_fcns[(*slsi)],(*slsi),schema)+";\n";
7565 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
7574 // unpack the colrefs in cid_set not in found_cids
7575 static string gen_remaining_colrefs(table_list *schema,
7576 col_id_set &cid_set, col_id_set &found_cids, string on_problem,
7577 vector<bool> &needs_xform){
7579 col_id_set::iterator csi;
7581 for(csi=cid_set.begin(); csi!=cid_set.end();csi++){
7582 if(found_cids.count( (*csi) ) == 0){
7583 int schref = (*csi).schema_ref;
7584 int tblref = (*csi).tblvar_ref;
7585 string field = (*csi).field;
7586 data_type dt(schema->get_type_name(schref,field));
7588 if(needs_xform[tblref]){
7589 unpack_fcn = dt.get_hfta_unpack_fcn();
7591 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7593 if(dt.is_buffer_type()){
7594 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7596 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7599 if(dt.is_buffer_type()){
7600 ret.append("\tif(problem) return "+on_problem+" ;\n");
7607 static string gen_buffer_selvars(table_list *schema,
7608 vector<select_element *> &select_list){
7612 for(s=0;s<select_list.size();s++){
7613 scalarexp_t *se = select_list[s]->se;
7614 data_type *sdt = se->get_data_type();
7615 if(sdt->is_buffer_type() &&
7616 !( (se->get_operator_type() == SE_COLREF) ||
7617 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7618 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7620 sprintf(tmpstr,"selvar_%d",s);
7621 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
7622 ret += generate_se_code(se,schema) +";\n";
7628 static string gen_buffer_selvars_size(vector<select_element *> &select_list,table_list *schema){
7632 for(s=0;s<select_list.size();s++){
7633 scalarexp_t *se = select_list[s]->se;
7634 data_type *sdt = se->get_data_type();
7635 if(sdt->is_buffer_type()){
7636 if( !( (se->get_operator_type() == SE_COLREF) ||
7637 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7638 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7640 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
7643 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),
7644 generate_se_code(se,schema).c_str());
7652 static string gen_buffer_selvars_dtr(vector<select_element *> &select_list){
7656 for(s=0;s<select_list.size();s++){
7657 scalarexp_t *se = select_list[s]->se;
7658 data_type *sdt = se->get_data_type();
7659 if(sdt->is_buffer_type() &&
7660 !( (se->get_operator_type() == SE_COLREF) ||
7661 (se->get_operator_type() == SE_AGGR_STAR) ||
7662 (se->get_operator_type() == SE_AGGR_SE) ||
7663 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7664 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7666 sprintf(tmpstr,"\t\t%s(&selvar_%d);\n",
7667 sdt->get_hfta_buffer_destroy().c_str(), s );
7675 static string gen_pack_tuple(table_list *schema, vector<select_element *> &select_list, string node_name, bool temporal_only){
7679 ret += "\tint tuple_pos = sizeof("+generate_tuple_name(node_name)+") + sizeof(gs_uint8_t);\n";
7680 for(s=0;s<select_list.size();s++){
7681 scalarexp_t *se = select_list[s]->se;
7682 data_type *sdt = se->get_data_type();
7684 if(!temporal_only && sdt->is_buffer_type()){
7685 if( !( (se->get_operator_type() == SE_COLREF) ||
7686 (se->get_operator_type() == SE_FUNC && se->is_partial()))
7688 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
7690 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
7693 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code(se,schema).c_str());
7695 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code(se,schema).c_str());
7698 }else if (!temporal_only || sdt->is_temporal()) {
7699 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
7701 ret.append(generate_se_code(se,schema) );
7709 //-------------------------------------------------------------------------
7710 // functor generation methods
7711 //-------------------------------------------------------------------------
7713 /////////////////////////////////////////////////////////
7714 //// File Output Operator
7715 string output_file_qpn::generate_functor_name(){
7716 return("output_file_functor_" + normalize_name(get_node_name()));
7720 string output_file_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
7721 string ret = "class " + this->generate_functor_name() + "{\n";
7723 // Find the temporal field
7724 int temporal_field_idx;
7725 data_type *tdt = NULL;
7726 for(temporal_field_idx=0;temporal_field_idx<fields.size();temporal_field_idx++){
7727 tdt = new data_type(fields[temporal_field_idx]->get_type(), fields[temporal_field_idx]->get_modifier_list());
7728 if(tdt->is_temporal()){
7735 if(temporal_field_idx == fields.size()){
7736 fprintf(stderr,"ERROR, no temporal field for file output operator %s\n",node_name.c_str());
7740 ret += "private:\n";
7742 // var to save the schema handle
7743 ret += "\tint schema_handle0;\n";
7744 // tuple metadata offset
7745 ret += "\tint tuple_metadata_offset0;\n";
7746 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[temporal_field_idx]->get_name().c_str());
7749 // For unpacking the hashing fields, if any
7751 for(h=0;h<hash_flds.size();++h){
7752 sprintf(tmpstr,"unpack_var_%s", fields[hash_flds[h]]->get_name().c_str());
7753 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7754 ret+="\t"+hdt->make_host_cvar(tmpstr)+";\n";
7755 if(hash_flds[h]!=temporal_field_idx){
7756 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[hash_flds[h]]->get_name().c_str());
7760 // Specail case for output file hashing
7761 if(n_streams>1 && hash_flds.size()==0){
7762 ret+="\tgs_uint32_t outfl_cnt;\n";
7765 ret += "//\t\tRemember the last posted timestamp.\n";
7766 ret+="\t"+tdt->make_host_cvar("timestamp")+";\n";
7767 ret+="\t"+tdt->make_host_cvar("last_bucket")+";\n";
7768 ret+="\t"+tdt->make_host_cvar("slack")+";\n";
7769 ret += "\tbool first_execution;\n";
7770 ret += "\tbool temp_tuple_received;\n";
7771 ret += "\tbool is_eof;\n";
7773 ret += "\tgs_int32_t bucketwidth;\n";
7776 //-------------------
7777 // The functor constructor
7778 // pass in a schema handle (e.g. for the 1st input stream),
7779 // use it to determine how to unpack the merge variable.
7780 // ASSUME that both streams have the same layout,
7781 // just duplicate it.
7784 ret += "//\t\tFunctor constructor.\n";
7785 ret += this->generate_functor_name()+"(int schema_hndl){\n";
7787 ret += "\tschema_handle0 = schema_hndl;\n";
7788 // tuple metadata offset
7789 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
7791 if(output_spec->bucketwidth == 0)
7792 ret += "\tbucketwidth = 60;\n";
7794 ret += "\tbucketwidth = "+int_to_string(output_spec->bucketwidth)+";\n";
7795 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
7797 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[temporal_field_idx]->get_name().c_str(), fields[temporal_field_idx]->get_name().c_str());
7799 // Hashing field unpacking, if any
7800 for(h=0;h<hash_flds.size();++h){
7801 if(hash_flds[h]!=temporal_field_idx){
7802 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[hash_flds[h]]->get_name().c_str(),fields[hash_flds[h]]->get_name().c_str());
7807 ret+="\tfirst_execution = true;\n";
7809 // Initialize internal state
7810 ret += "\ttemp_tuple_received = false;\n";
7812 // Init last timestamp values to minimum value for their type
7813 if (tdt->is_increasing()){
7814 ret+="\ttimestamp = " + tdt->get_min_literal() + ";\n";
7815 ret+="\tlast_bucket = " + tdt->get_min_literal() + ";\n";
7817 ret+="\ttimestamp = " + tdt->get_max_literal() + ";\n";
7818 ret+="\tlast_bucket = " + tdt->get_max_literal() + ";\n";
7824 ret += "//\t\tFunctor destructor.\n";
7825 ret += "~"+this->generate_functor_name()+"(){\n";
7829 ret += "int load_params_"+this->generate_functor_name()+"(gs_int32_t sz, void *value){return 0;}\n";
7830 ret += "void destroy_params_"+this->generate_functor_name()+"(){}\n";
7832 // Register new parameter block
7833 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
7834 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
7835 ret += "\treturn this->load_params_"+this->generate_functor_name()+
7839 ret+="\nbool temp_status_received(const host_tuple& tup0)/* const*/ {\n";
7840 ret+="\tgs_int32_t problem;\n";
7842 ret += "\tvoid *tup_ptr = (void *)(&tup0);\n";
7843 ret += "\tis_eof = ftaschema_is_eof_tuple(schema_handle0,tup_ptr);\n";
7845 ret += gen_temp_tuple_check(this->node_name, 0);
7847 sprintf(tmpstr,"\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", tdt->get_hfta_unpack_fcn_noxf().c_str(), fields[temporal_field_idx]->get_name().c_str(), 0);
7850 for(h=0;h<hash_flds.size();++h){
7851 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7852 sprintf(tmpstr,"\tunpack_var_%s = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", fields[hash_flds[h]]->get_name().c_str(), hdt->get_hfta_unpack_fcn_noxf().c_str(), fields[hash_flds[h]]->get_name().c_str(), 0);
7856 " return temp_tuple_received;\n"
7862 "bool new_epoch(){\n"
7863 " if(first_execution || (last_bucket + 1) * bucketwidth <= timestamp){\n"
7864 " last_bucket = timestamp / bucketwidth;\n"
7865 " first_execution = false;\n"
7875 "inline gs_uint32_t output_hash(){return 0;}\n\n";
7877 if(hash_flds.size()==0){
7879 "gs_uint32_t output_hash(){\n"
7881 " if(outfl_cnt >= "+int_to_string(n_streams)+")\n"
7883 " return outfl_cnt;\n"
7889 "gs_uint32_t output_hash(){\n"
7890 " gs_uint32_t ret = "
7892 for(h=0;h<hash_flds.size();++h){
7894 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7895 if(hdt->use_hashfunc()){
7896 sprintf(tmpstr,"%s(&(unpack_var_%s))",hdt->get_hfta_hashfunc().c_str(),fields[hash_flds[h]]->get_name().c_str());
7898 sprintf(tmpstr,"unpack_var_%s",fields[hash_flds[h]]->get_name().c_str());
7904 " return ret % "+int_to_string(hash_flds.size())+";\n"
7911 "gs_uint32_t num_file_streams(){\n"
7912 " return("+int_to_string(n_streams)+");\n"
7917 "string get_filename_base(){\n"
7918 " char tmp_fname[500];\n";
7920 string output_filename_base = hfta_query_name+filestream_id;
7922 if(n_hfta_clones > 1){
7923 output_filename_base += "_"+int_to_string(parallel_idx);
7929 if(output_spec->output_directory == "")
7931 " sprintf(tmp_fname,\""+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
7933 " sprintf(tmp_fname,\""+output_spec->output_directory+"/"+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
7935 " return (string)(tmp_fname);\n"
7941 "bool do_compression(){\n";
7943 ret += " return true;\n";
7945 ret += " return false;\n";
7949 "bool is_eof_tuple(){\n"
7953 "bool propagate_tuple(){\n"
7956 ret+="\treturn false;\n";
7958 ret+="\treturn true;\n";
7960 // create a temp status tuple
7961 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
7963 ret += gen_init_temp_status_tuple(this->hfta_query_name);
7965 sprintf(tmpstr,"\ttuple->tuple_var%d = timestamp;\n",temporal_field_idx);
7970 ret += "\treturn 0;\n";
7978 string output_file_qpn::generate_operator(int i, string params){
7979 string optype = "file_output_operator";
7980 switch(compression_type){
7982 optype = "file_output_operator";
7985 optype = "zfile_output_operator";
7988 optype = "bfile_output_operator";
7992 return(" "+optype+"<" +
7993 generate_functor_name() +
7994 "> *op"+int_to_string(i)+" = new "+optype+"<"+
7995 generate_functor_name() +">("+params+", \"" + hfta_query_name + "\""
7996 + "," + hfta_query_name + "_schema_definition);\n");
7999 /////////////////////////////////////////////////////////
8003 string spx_qpn::generate_functor_name(){
8004 return("spx_functor_" + normalize_name(normalize_name(this->get_node_name())));
8007 string spx_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8008 // Initialize generate utility globals
8009 segen_gb_tbl = NULL;
8011 string ret = "class " + this->generate_functor_name() + "{\n";
8013 // Find variables referenced in this query node.
8016 col_id_set::iterator csi;
8019 for(w=0;w<where.size();++w)
8020 gather_pr_col_ids(where[w]->pr,cid_set,NULL);
8021 for(s=0;s<select_list.size();s++){
8022 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
8026 // Private variables : store the state of the functor.
8027 // 1) variables for unpacked attributes
8028 // 2) offsets of the upacked attributes
8029 // 3) storage of partial functions
8030 // 4) storage of complex literals (i.e., require a constructor)
8032 ret += "private:\n";
8033 ret += "\tbool first_execution;\t// internal processing state \n";
8034 ret += "\tint schema_handle0;\n";
8036 // generate the declaration of all the variables related to
8037 // temp tuples generation
8038 ret += gen_decl_temp_vars();
8041 // unpacked attribute storage, offsets
8042 ret += "//\t\tstorage and offsets of accessed fields.\n";
8043 ret += generate_access_vars(cid_set,schema);
8044 // tuple metadata management
8045 ret += "\tint tuple_metadata_offset0;\n";
8047 // Variables to store results of partial functions.
8048 // WARNING find_partial_functions modifies the SE
8049 // (it marks the partial function id).
8050 ret += "//\t\tParital function result storage\n";
8051 vector<scalarexp_t *> partial_fcns;
8052 vector<int> fcn_ref_cnt;
8053 vector<bool> is_partial_fcn;
8054 for(s=0;s<select_list.size();s++){
8055 find_partial_fcns(select_list[s]->se, &partial_fcns,&fcn_ref_cnt,&is_partial_fcn, Ext_fcns);
8057 for(w=0;w<where.size();w++){
8058 find_partial_fcns_pr(where[w]->pr, &partial_fcns, &fcn_ref_cnt,&is_partial_fcn,Ext_fcns);
8060 // Unmark non-partial expensive functions referenced only once.
8061 for(p=0; p<partial_fcns.size();p++){
8062 if(!is_partial_fcn[p] && fcn_ref_cnt[p] <= 1){
8063 partial_fcns[p]->set_partial_ref(-1);
8066 if(partial_fcns.size()>0){
8067 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,true);
8070 // Complex literals (i.e., they need constructors)
8071 ret += "//\t\tComplex literal storage.\n";
8072 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8073 ret += generate_complex_lit_vars(complex_literals);
8075 // Pass-by-handle parameters
8076 ret += "//\t\tPass-by-handle storage.\n";
8077 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8078 ret += generate_pass_by_handle_vars(param_handle_table);
8080 // Variables to hold parameters
8081 ret += "//\tfor query parameters\n";
8082 ret += generate_param_vars(param_tbl);
8085 // The publicly exposed functions
8087 ret += "\npublic:\n";
8090 //-------------------
8091 // The functor constructor
8092 // pass in the schema handle.
8093 // 1) make assignments to the unpack offset variables
8094 // 2) initialize the complex literals
8095 // 3) Set the initial values of the temporal attributes
8096 // referenced in select clause (in case we need to emit
8097 // temporal tuple before receiving first tuple )
8099 ret += "//\t\tFunctor constructor.\n";
8100 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8102 // save schema handle
8103 ret += "this->schema_handle0 = schema_handle0;\n";
8106 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8107 ret += gen_access_var_init(cid_set);
8109 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8112 ret += "//\t\tInitialize complex literals.\n";
8113 ret += gen_complex_lit_init(complex_literals);
8115 // Initialize partial function results so they can be safely GC'd
8116 ret += gen_partial_fcn_init(partial_fcns);
8118 // Initialize non-query-parameter parameter handles
8119 ret += gen_pass_by_handle_init(param_handle_table);
8121 // Init temporal attributes referenced in select list
8122 ret += gen_init_temp_vars(schema, select_list, NULL);
8127 //-------------------
8128 // Functor destructor
8129 ret += "//\t\tFunctor destructor.\n";
8130 ret += "~"+this->generate_functor_name()+"(){\n";
8132 // clean up buffer-type complex literals.
8133 ret += gen_complex_lit_dtr(complex_literals);
8135 // Deregister the pass-by-handle parameters
8136 ret += "/* register and de-register the pass-by-handle parameters */\n";
8137 ret += gen_pass_by_handle_dtr(param_handle_table);
8139 // Reclaim buffer space for partial fucntion results
8140 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8141 ret += gen_partial_fcn_dtr(partial_fcns);
8144 // Destroy the parameters, if any need to be destroyed
8145 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8150 //-------------------
8151 // Parameter manipulation routines
8152 ret += generate_load_param_block(this->generate_functor_name(),
8153 this->param_tbl,param_handle_table );
8154 ret += generate_delete_param_block(this->generate_functor_name(),
8155 this->param_tbl,param_handle_table);
8158 //-------------------
8159 // Register new parameter block
8160 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8161 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8162 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8167 //-------------------
8168 // The selection predicate.
8169 // Unpack variables for 1 cnf element
8170 // at a time, return false immediately if the
8172 // optimization : evaluate the cheap cnf elements
8173 // first, the expensive ones last.
8175 ret += "bool predicate(host_tuple &tup0){\n";
8176 // Variables for execution of the function.
8177 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8178 // Initialize cached function indicators.
8179 for(p=0;p<partial_fcns.size();++p){
8180 if(fcn_ref_cnt[p]>1){
8181 ret+="\tfcn_ref_cnt_"+int_to_string(p)+"=0;\n";
8186 ret += gen_temp_tuple_check(this->node_name, 0);
8188 if(partial_fcns.size()>0){ // partial fcn access failure
8189 ret += "\tgs_retval_t retval = 0;\n";
8193 // Reclaim buffer space for partial fucntion results
8194 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8195 ret += gen_partial_fcn_dtr(partial_fcns);
8197 col_id_set found_cids; // colrefs unpacked thus far.
8198 ret += gen_unpack_temp_vars(schema, found_cids, select_list, NULL, needs_xform);
8200 // For temporal status tuple we don't need to do anything else
8201 ret += "\tif (temp_tuple_received) return false;\n\n";
8204 for(w=0;w<where.size();++w){
8205 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8207 // Find the set of variables accessed in this CNF elem,
8208 // but in no previous element.
8209 col_id_set new_cids;
8210 get_new_pred_cids(where[w]->pr,found_cids, new_cids, NULL);
8211 // Unpack these values.
8212 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
8213 // Find partial fcns ref'd in this cnf element
8215 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8216 ret += gen_unpack_partial_fcn(schema,partial_fcns,pfcn_refs,fcn_ref_cnt, is_partial_fcn, "false");
8218 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8219 +") ) return(false);\n";
8222 // The partial functions ref'd in the select list
8223 // must also be evaluated. If one returns false,
8224 // then implicitly the predicate is false.
8226 for(s=0;s<select_list.size();s++){
8227 collect_partial_fcns(select_list[s]->se, sl_pfcns);
8229 if(sl_pfcns.size() > 0)
8230 ret += "//\t\tUnpack remaining partial fcns.\n";
8231 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
8232 fcn_ref_cnt, is_partial_fcn,
8233 found_cids, NULL, "false", needs_xform);
8235 // Unpack remaining fields
8236 ret += "//\t\tunpack any remaining fields from the input tuple.\n";
8237 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
8240 ret += "\treturn(true);\n";
8244 //-------------------
8245 // The output tuple function.
8246 // Unpack the remaining attributes into
8247 // the placeholder variables, unpack the
8248 // partial fcn refs, then pack up the tuple.
8250 ret += "host_tuple create_output_tuple() {\n";
8251 ret += "\thost_tuple tup;\n";
8252 ret += "\tgs_retval_t retval = 0;\n";
8254 // Unpack any remaining cached functions.
8255 ret += gen_remaining_cached_fcns(schema, partial_fcns, sl_pfcns,
8256 fcn_ref_cnt, is_partial_fcn);
8259 // Now, compute the size of the tuple.
8261 // Unpack any BUFFER type selections into temporaries
8262 // so that I can compute their size and not have
8263 // to recompute their value during tuple packing.
8264 // I can use regular assignment here because
8265 // these temporaries are non-persistent.
8267 ret += "//\t\tCompute the size of the tuple.\n";
8268 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
8270 // Unpack all buffer type selections, to be able to compute their size
8271 ret += gen_buffer_selvars(schema, select_list);
8273 // The size of the tuple is the size of the tuple struct plus the
8274 // size of the buffers to be copied in.
8277 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
8278 ret += gen_buffer_selvars_size(select_list,schema);
8281 // Allocate tuple data block.
8282 ret += "//\t\tCreate the tuple block.\n";
8283 ret += "\ttup.data = malloc(tup.tuple_size);\n";
8284 ret += "\ttup.heap_resident = true;\n";
8285 // Mark tuple as regular
8286 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
8288 // ret += "\ttup.channel = 0;\n";
8289 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
8290 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
8293 // (Here, offsets are hard-wired. is this a problem?)
8295 ret += "//\t\tPack the fields into the tuple.\n";
8296 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
8298 // Delete string temporaries
8299 ret += gen_buffer_selvars_dtr(select_list);
8301 ret += "\treturn tup;\n";
8304 //-------------------------------------------------------------------
8305 // Temporal update functions
8307 ret += "bool temp_status_received(){return temp_tuple_received;};\n\n";
8310 // create a temp status tuple
8311 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8313 ret += gen_init_temp_status_tuple(this->get_node_name());
8316 // (Here, offsets are hard-wired. is this a problem?)
8318 ret += "//\t\tPack the fields into the tuple.\n";
8319 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
8321 ret += "\treturn 0;\n";
8328 string spx_qpn::generate_operator(int i, string params){
8330 return(" select_project_operator<" +
8331 generate_functor_name() +
8332 "> *op"+int_to_string(i)+" = new select_project_operator<"+
8333 generate_functor_name() +">("+params+", \"" + get_node_name() + "\");\n");
8337 ////////////////////////////////////////////////////////////////
8342 string sgah_qpn::generate_functor_name(){
8343 return("sgah_functor_" + normalize_name(this->get_node_name()));
8347 string sgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8351 // Initialize generate utility globals
8352 segen_gb_tbl = &(gb_tbl);
8354 // Might need to generate empty values for cube processing.
8355 map<int, string> structured_types;
8356 for(g=0;g<gb_tbl.size();++g){
8357 if(gb_tbl.get_data_type(g)->is_structured_type()){
8358 structured_types[gb_tbl.get_data_type(g)->type_indicator()] = gb_tbl.get_data_type(g)->get_type_str();
8362 //--------------------------------
8363 // group definition class
8364 string ret = "class " + generate_functor_name() + "_groupdef{\n";
8366 for(g=0;g<this->gb_tbl.size();g++){
8367 sprintf(tmpstr,"gb_var%d",g);
8368 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8370 // empty strucutred literals
8371 map<int, string>::iterator sii;
8372 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8373 data_type dt(sii->second);
8374 literal_t empty_lit(sii->first);
8375 ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8378 if(structured_types.size()==0){
8379 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
8381 ret += "\t"+generate_functor_name() + "_groupdef(){}\n";
8385 ret += "\t"+generate_functor_name() + "_groupdef("+
8386 this->generate_functor_name() + "_groupdef *gd){\n";
8387 for(g=0;g<gb_tbl.size();g++){
8388 data_type *gdt = gb_tbl.get_data_type(g);
8389 if(gdt->is_buffer_type()){
8390 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8391 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8394 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8399 ret += "\t"+generate_functor_name() + "_groupdef("+
8400 this->generate_functor_name() + "_groupdef *gd, bool *pattern){\n";
8401 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8402 literal_t empty_lit(sii->first);
8403 ret += "\t\t"+empty_lit.to_hfta_C_code("&"+empty_lit.hfta_empty_literal_name())+";\n";
8405 for(g=0;g<gb_tbl.size();g++){
8406 data_type *gdt = gb_tbl.get_data_type(g);
8407 ret += "\t\tif(pattern["+int_to_string(g)+"]){\n";
8408 if(gdt->is_buffer_type()){
8409 sprintf(tmpstr,"\t\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8410 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8413 sprintf(tmpstr,"\t\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8416 ret += "\t\t}else{\n";
8417 literal_t empty_lit(gdt->type_indicator());
8418 if(empty_lit.is_cpx_lit()){
8419 ret +="\t\t\tgb_var"+int_to_string(g)+"= "+empty_lit.hfta_empty_literal_name()+";\n";
8421 ret +="\t\t\tgb_var"+int_to_string(g)+"="+empty_lit.to_hfta_C_code("")+";\n";
8427 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
8428 for(g=0;g<gb_tbl.size();g++){
8429 data_type *gdt = gb_tbl.get_data_type(g);
8430 if(gdt->is_buffer_type()){
8431 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
8432 gdt->get_hfta_buffer_destroy().c_str(), g );
8439 for(g=0;g<gb_tbl.size();g++){
8440 data_type *gdt = gb_tbl.get_data_type(g);
8441 if(gdt->is_temporal()){
8446 ret += tgdt->get_host_cvar_type()+" get_curr_gb(){\n";
8447 ret+="\treturn gb_var"+int_to_string(g)+";\n";
8452 //--------------------------------
8453 // aggr definition class
8454 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
8456 for(a=0;a<aggr_tbl.size();a++){
8457 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
8458 sprintf(tmpstr,"aggr_var%d",a);
8459 if(aggr_tbl.is_builtin(a)){
8460 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
8461 if(aggr_tbl.get_op(a) == "AVG"){ // HACK!
8462 data_type cnt_type = data_type("ullong");
8463 ret+="\t"+cnt_type.make_host_cvar(string(tmpstr)+"_cnt")+";\n";
8464 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(string(tmpstr)+"_sum")+";\n";
8467 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
8471 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
8473 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
8474 for(a=0;a<aggr_tbl.size();a++){
8475 if(aggr_tbl.is_builtin(a)){
8476 data_type *adt = aggr_tbl.get_data_type(a);
8477 if(adt->is_buffer_type()){
8478 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
8479 adt->get_hfta_buffer_destroy().c_str(), a );
8483 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
8484 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
8485 ret+="(aggr_var"+int_to_string(a)+"));\n";
8491 //-------------------------------------------
8492 // group-by patterns for the functor,
8493 // initialization within the class is cumbersome.
8494 int n_patterns = gb_tbl.gb_patterns.size();
8496 ret += "bool "+this->generate_functor_name()+"_gb_patterns["+int_to_string(n_patterns)+
8497 "]["+int_to_string(gb_tbl.size())+"] = {\n";
8498 if(n_patterns == 0){
8499 for(i=0;i<gb_tbl.size();++i){
8504 for(i=0;i<n_patterns;++i){
8505 if(i>0) ret += ",\n";
8507 for(j=0;j<gb_tbl.size();j++){
8508 if(j>0) ret += ", ";
8509 if(gb_tbl.gb_patterns[i][j]){
8522 //--------------------------------
8524 ret += "class " + this->generate_functor_name() + "{\n";
8526 // Find variables referenced in this query node.
8529 col_id_set::iterator csi;
8531 for(w=0;w<where.size();++w)
8532 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
8533 for(w=0;w<having.size();++w)
8534 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
8535 for(g=0;g<gb_tbl.size();g++)
8536 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
8538 for(s=0;s<select_list.size();s++){
8539 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
8543 // Private variables : store the state of the functor.
8544 // 1) variables for unpacked attributes
8545 // 2) offsets of the upacked attributes
8546 // 3) storage of partial functions
8547 // 4) storage of complex literals (i.e., require a constructor)
8549 ret += "private:\n";
8551 // var to save the schema handle
8552 ret += "\tint schema_handle0;\n";
8553 // metadata from schema handle
8554 ret += "\tint tuple_metadata_offset0;\n";
8556 // generate the declaration of all the variables related to
8557 // temp tuples generation
8558 ret += gen_decl_temp_vars();
8560 // unpacked attribute storage, offsets
8561 ret += "//\t\tstorage and offsets of accessed fields.\n";
8562 ret += generate_access_vars(cid_set, schema);
8564 // Variables to store results of partial functions.
8565 // WARNING find_partial_functions modifies the SE
8566 // (it marks the partial function id).
8567 ret += "//\t\tParital function result storage\n";
8568 vector<scalarexp_t *> partial_fcns;
8569 vector<int> fcn_ref_cnt;
8570 vector<bool> is_partial_fcn;
8571 for(s=0;s<select_list.size();s++){
8572 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
8574 for(w=0;w<where.size();w++){
8575 find_partial_fcns_pr(where[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
8577 for(w=0;w<having.size();w++){
8578 find_partial_fcns_pr(having[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
8580 for(g=0;g<gb_tbl.size();g++){
8581 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns,NULL,NULL, Ext_fcns);
8583 for(a=0;a<aggr_tbl.size();a++){
8584 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns,NULL,NULL, Ext_fcns);
8586 if(partial_fcns.size()>0){
8587 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
8588 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
8591 // Complex literals (i.e., they need constructors)
8592 ret += "//\t\tComplex literal storage.\n";
8593 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8594 ret += generate_complex_lit_vars(complex_literals);
8596 // Pass-by-handle parameters
8597 ret += "//\t\tPass-by-handle storage.\n";
8598 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8599 ret += generate_pass_by_handle_vars(param_handle_table);
8602 // variables to hold parameters.
8603 ret += "//\tfor query parameters\n";
8604 ret += generate_param_vars(param_tbl);
8606 // Is there a temporal flush? If so create flush temporaries,
8607 // create flush indicator.
8608 bool uses_temporal_flush = false;
8609 for(g=0;g<gb_tbl.size();g++){
8610 data_type *gdt = gb_tbl.get_data_type(g);
8611 if(gdt->is_temporal())
8612 uses_temporal_flush = true;
8615 if(uses_temporal_flush){
8616 ret += "//\t\tFor temporal flush\n";
8617 for(g=0;g<gb_tbl.size();g++){
8618 data_type *gdt = gb_tbl.get_data_type(g);
8619 if(gdt->is_temporal()){
8620 sprintf(tmpstr,"last_gb%d",g);
8621 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8622 sprintf(tmpstr,"last_flushed_gb%d",g);
8623 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8626 ret += "\tbool needs_temporal_flush;\n";
8630 // The publicly exposed functions
8632 ret += "\npublic:\n";
8635 //-------------------
8636 // The functor constructor
8637 // pass in the schema handle.
8638 // 1) make assignments to the unpack offset variables
8639 // 2) initialize the complex literals
8641 ret += "//\t\tFunctor constructor.\n";
8642 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8644 // save the schema handle
8645 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
8648 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8649 ret += gen_access_var_init(cid_set);
8651 ret += "tuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8654 ret += "//\t\tInitialize complex literals.\n";
8655 ret += gen_complex_lit_init(complex_literals);
8657 // Initialize partial function results so they can be safely GC'd
8658 ret += gen_partial_fcn_init(partial_fcns);
8660 // Initialize non-query-parameter parameter handles
8661 ret += gen_pass_by_handle_init(param_handle_table);
8663 // temporal flush variables
8664 // ASSUME that structured values won't be temporal.
8665 if(uses_temporal_flush){
8666 ret += "//\t\tInitialize temporal flush variables.\n";
8667 for(g=0;g<gb_tbl.size();g++){
8668 data_type *gdt = gb_tbl.get_data_type(g);
8669 if(gdt->is_temporal()){
8670 literal_t gl(gdt->type_indicator());
8671 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
8673 sprintf(tmpstr,"\tlast_flushed_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
8677 ret += "\tneeds_temporal_flush = false;\n";
8680 // Init temporal attributes referenced in select list
8681 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
8685 //-------------------
8686 // Functor destructor
8687 ret += "//\t\tFunctor destructor.\n";
8688 ret += "~"+this->generate_functor_name()+"(){\n";
8690 // clean up buffer type complex literals
8691 ret += gen_complex_lit_dtr(complex_literals);
8693 // Deregister the pass-by-handle parameters
8694 ret += "/* register and de-register the pass-by-handle parameters */\n";
8695 ret += gen_pass_by_handle_dtr(param_handle_table);
8697 // clean up partial function results.
8698 ret += "/* clean up partial function storage */\n";
8699 ret += gen_partial_fcn_dtr(partial_fcns);
8701 // Destroy the parameters, if any need to be destroyed
8702 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8707 //-------------------
8708 // Parameter manipulation routines
8709 ret += generate_load_param_block(this->generate_functor_name(),
8710 this->param_tbl,param_handle_table);
8711 ret += generate_delete_param_block(this->generate_functor_name(),
8712 this->param_tbl,param_handle_table);
8714 //-------------------
8715 // Register new parameter block
8717 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8718 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8719 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8723 // -----------------------------------
8724 // group-by pattern support
8727 "int n_groupby_patterns(){\n"
8728 " return "+int_to_string(gb_tbl.gb_patterns.size())+";\n"
8730 "bool *get_pattern(int p){\n"
8731 " return "+this->generate_functor_name()+"_gb_patterns[p];\n"
8738 //-------------------
8739 // the create_group method.
8740 // This method creates a group in a buffer passed in
8741 // (to allow for creation on the stack).
8742 // There are also a couple of side effects:
8743 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
8744 // 2) determine if a temporal flush is required.
8746 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
8747 // Variables for execution of the function.
8748 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8750 if(partial_fcns.size()>0){ // partial fcn access failure
8751 ret += "\tgs_retval_t retval = 0;\n";
8755 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
8756 "_groupdef *) buffer;\n";
8758 // Start by cleaning up partial function results
8759 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8760 set<int> w_pfcns; // partial fcns in where clause
8761 for(w=0;w<where.size();++w)
8762 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
8764 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
8765 for(g=0;g<gb_tbl.size();g++){
8766 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
8768 for(a=0;a<aggr_tbl.size();a++){
8769 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
8771 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
8772 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
8773 // ret += gen_partial_fcn_dtr(partial_fcns);
8776 ret += gen_temp_tuple_check(this->node_name, 0);
8777 col_id_set found_cids; // colrefs unpacked thus far.
8778 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
8781 // Save temporal group-by variables
8784 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
8786 for(g=0;g<gb_tbl.size();g++){
8788 data_type *gdt = gb_tbl.get_data_type(g);
8790 if(gdt->is_temporal()){
8791 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8792 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8800 // Compare the temporal GB vars with the stored ones,
8801 // set flush indicator and update stored GB vars if there is any change.
8803 ret += "// hfta_disorder = "+int_to_string(hfta_disorder)+"\n";
8804 if(hfta_disorder < 2){
8805 if(uses_temporal_flush){
8807 bool first_one = true;
8808 for(g=0;g<gb_tbl.size();g++){
8809 data_type *gdt = gb_tbl.get_data_type(g);
8811 if(gdt->is_temporal()){
8812 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
8813 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
8814 if(first_one){first_one = false;} else {ret += ") && (";}
8815 ret += generate_equality_test(lhs_op, rhs_op, gdt);
8819 for(g=0;g<gb_tbl.size();g++){
8820 data_type *gdt = gb_tbl.get_data_type(g);
8821 if(gdt->is_temporal()){
8822 if(gdt->is_buffer_type()){
8823 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
8825 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
8827 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
8832 ret += "\t\tneeds_temporal_flush=true;\n";
8833 ret += "\t\t}else{\n"
8834 "\t\t\tneeds_temporal_flush=false;\n"
8838 ret+= "\tif(temp_tuple_received && !( (";
8839 bool first_one = true;
8840 for(g=0;g<gb_tbl.size();g++){
8841 data_type *gdt = gb_tbl.get_data_type(g);
8843 if(gdt->is_temporal()){
8844 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
8845 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
8846 if(first_one){first_one = false;} else {ret += ") && (";}
8847 ret += generate_equality_test(lhs_op, rhs_op, gdt);
8853 for(g=0;g<gb_tbl.size();g++){
8854 data_type *gdt = gb_tbl.get_data_type(g);
8855 if(gdt->is_temporal()){
8857 if(gdt->is_buffer_type()){
8858 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
8860 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
8862 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
8868 data_type *tgdt = gb_tbl.get_data_type(temporal_g);
8869 literal_t gl(tgdt->type_indicator());
8870 ret += "\t\tif(last_flushed_gb"+int_to_string(temporal_g)+">"+gl.to_hfta_C_code("")+")\n";
8871 ret += "\t\t\tneeds_temporal_flush=true;\n";
8872 ret += "\t\t}else{\n"
8873 "\t\t\tneeds_temporal_flush=false;\n"
8878 // For temporal status tuple we don't need to do anything else
8879 ret += "\tif (temp_tuple_received) return NULL;\n\n";
8881 for(w=0;w<where.size();++w){
8882 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8884 // Find the set of variables accessed in this CNF elem,
8885 // but in no previous element.
8886 col_id_set new_cids;
8887 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
8889 // Unpack these values.
8890 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
8891 // Find partial fcns ref'd in this cnf element
8893 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8894 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
8896 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8897 +") ) return(NULL);\n";
8900 // The partial functions ref'd in the group-by var and aggregate
8901 // definitions must also be evaluated. If one returns false,
8902 // then implicitly the predicate is false.
8903 set<int>::iterator pfsi;
8905 if(ag_gb_pfcns.size() > 0)
8906 ret += "//\t\tUnpack remaining partial fcns.\n";
8907 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
8908 found_cids, segen_gb_tbl, "NULL", needs_xform);
8910 // Unpack the group-by variables
8912 for(g=0;g<gb_tbl.size();g++){
8913 data_type *gdt = gb_tbl.get_data_type(g);
8915 if(!gdt->is_temporal()){
8916 // Find the new fields ref'd by this GBvar def.
8917 col_id_set new_cids;
8918 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
8919 // Unpack these values.
8920 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
8922 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8923 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8925 // There seems to be no difference between the two
8926 // branches of the IF statement.
8927 data_type *gdt = gb_tbl.get_data_type(g);
8928 if(gdt->is_buffer_type()){
8929 // Create temporary copy.
8930 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8931 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8933 scalarexp_t *gse = gb_tbl.get_def(g);
8934 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8935 g,generate_se_code(gse,schema).c_str());
8944 ret+= "\treturn gbval;\n";
8947 //--------------------------------------------------------
8948 // Create and initialize an aggregate object
8950 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
8951 // Variables for execution of the function.
8952 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8955 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
8956 "_aggrdef *)buffer;\n";
8958 for(a=0;a<aggr_tbl.size();a++){
8959 if(aggr_tbl.is_builtin(a)){
8960 // Create temporaries for buffer return values
8961 data_type *adt = aggr_tbl.get_data_type(a);
8962 if(adt->is_buffer_type()){
8963 sprintf(tmpstr,"aggr_tmp_%d", a);
8964 ret+=adt->make_host_cvar(tmpstr)+";\n";
8969 // Unpack all remaining attributes
8970 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
8971 for(a=0;a<aggr_tbl.size();a++){
8972 sprintf(tmpstr,"aggval->aggr_var%d",a);
8973 string assignto_var = tmpstr;
8974 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
8977 ret += "\treturn aggval;\n";
8980 //--------------------------------------------------------
8981 // update an aggregate object
8983 ret += "void update_aggregate(host_tuple &tup0, "
8984 +generate_functor_name()+"_groupdef *gbval, "+
8985 generate_functor_name()+"_aggrdef *aggval){\n";
8986 // Variables for execution of the function.
8987 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8989 // use of temporaries depends on the aggregate,
8990 // generate them in generate_aggr_update
8993 // Unpack all remaining attributes
8994 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
8995 for(a=0;a<aggr_tbl.size();a++){
8996 sprintf(tmpstr,"aggval->aggr_var%d",a);
8997 string varname = tmpstr;
8998 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
9001 ret += "\treturn;\n";
9004 //---------------------------------------------------
9007 ret += "\tbool flush_needed(){\n";
9008 if(uses_temporal_flush){
9009 ret += "\t\treturn needs_temporal_flush;\n";
9011 ret += "\t\treturn false;\n";
9015 //---------------------------------------------------
9016 // create output tuple
9017 // Unpack the partial functions ref'd in the where clause,
9018 // select clause. Evaluate the where clause.
9019 // Finally, pack the tuple.
9021 // I need to use special code generation here,
9022 // so I'll leave it in longhand.
9024 ret += "host_tuple create_output_tuple("
9025 +generate_functor_name()+"_groupdef *gbval, "+
9026 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
9028 ret += "\thost_tuple tup;\n";
9029 ret += "\tfailed = false;\n";
9030 ret += "\tgs_retval_t retval = 0;\n";
9032 string gbvar = "gbval->gb_var";
9033 string aggvar = "aggval->";
9035 // Create cached temporaries for UDAF return values.
9036 for(a=0;a<aggr_tbl.size();a++){
9037 if(! aggr_tbl.is_builtin(a)){
9038 int afcn_id = aggr_tbl.get_fcn_id(a);
9039 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9040 sprintf(tmpstr,"udaf_ret_%d", a);
9041 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
9046 // First, get the return values from the UDAFS
9047 for(a=0;a<aggr_tbl.size();a++){
9048 if(! aggr_tbl.is_builtin(a)){
9049 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
9050 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9051 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
9055 set<int> hv_sl_pfcns;
9056 for(w=0;w<having.size();w++){
9057 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
9059 for(s=0;s<select_list.size();s++){
9060 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
9063 // clean up the partial fcn results from any previous execution
9064 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
9067 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
9068 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
9069 ret += "\tif(retval){ failed = true; return(tup);}\n";
9072 // Evalaute the HAVING clause
9073 // TODO: this seems to have a ++ operator rather than a + operator.
9074 for(w=0;w<having.size();++w){
9075 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
9078 // Now, compute the size of the tuple.
9080 // Unpack any BUFFER type selections into temporaries
9081 // so that I can compute their size and not have
9082 // to recompute their value during tuple packing.
9083 // I can use regular assignment here because
9084 // these temporaries are non-persistent.
9085 // TODO: should I be using the selvar generation routine?
9087 ret += "//\t\tCompute the size of the tuple.\n";
9088 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
9089 for(s=0;s<select_list.size();s++){
9090 scalarexp_t *se = select_list[s]->se;
9091 data_type *sdt = se->get_data_type();
9092 if(sdt->is_buffer_type() &&
9093 !( (se->get_operator_type() == SE_COLREF) ||
9094 (se->get_operator_type() == SE_AGGR_STAR) ||
9095 (se->get_operator_type() == SE_AGGR_SE) ||
9096 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9097 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9099 sprintf(tmpstr,"selvar_%d",s);
9100 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
9101 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
9105 // The size of the tuple is the size of the tuple struct plus the
9106 // size of the buffers to be copied in.
9108 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
9109 for(s=0;s<select_list.size();s++){
9110 // if(s>0) ret += "+";
9111 scalarexp_t *se = select_list[s]->se;
9112 data_type *sdt = select_list[s]->se->get_data_type();
9113 if(sdt->is_buffer_type()){
9114 if(!( (se->get_operator_type() == SE_COLREF) ||
9115 (se->get_operator_type() == SE_AGGR_STAR) ||
9116 (se->get_operator_type() == SE_AGGR_SE) ||
9117 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9118 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9120 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
9123 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9130 // Allocate tuple data block.
9131 ret += "//\t\tCreate the tuple block.\n";
9132 ret += "\ttup.data = malloc(tup.tuple_size);\n";
9133 ret += "\ttup.heap_resident = true;\n";
9135 // Mark tuple as regular
9136 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
9138 // ret += "\ttup.channel = 0;\n";
9139 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
9140 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
9143 // (Here, offsets are hard-wired. is this a problem?)
9145 ret += "//\t\tPack the fields into the tuple.\n";
9146 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
9147 for(s=0;s<select_list.size();s++){
9148 scalarexp_t *se = select_list[s]->se;
9149 data_type *sdt = se->get_data_type();
9150 if(sdt->is_buffer_type()){
9151 if(!( (se->get_operator_type() == SE_COLREF) ||
9152 (se->get_operator_type() == SE_AGGR_STAR) ||
9153 (se->get_operator_type() == SE_AGGR_SE) ||
9154 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9155 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9157 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
9159 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
9162 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9164 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9168 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9170 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
9175 // Destroy string temporaries
9176 ret += gen_buffer_selvars_dtr(select_list);
9177 // Destroy string return vals of UDAFs
9178 for(a=0;a<aggr_tbl.size();a++){
9179 if(! aggr_tbl.is_builtin(a)){
9180 int afcn_id = aggr_tbl.get_fcn_id(a);
9181 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9182 if(adt->is_buffer_type()){
9183 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
9184 adt->get_hfta_buffer_destroy().c_str(), a );
9191 ret += "\treturn tup;\n";
9195 //-------------------------------------------------------------------
9196 // Temporal update functions
9198 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
9200 for(g=0;g<gb_tbl.size();g++){
9201 data_type *gdt = gb_tbl.get_data_type(g);
9202 if(gdt->is_temporal()){
9207 ret += tgdt->get_host_cvar_type()+" get_last_flushed_gb(){\n";
9208 ret+="\treturn last_flushed_gb"+int_to_string(g)+";\n";
9210 ret += tgdt->get_host_cvar_type()+" get_last_gb(){\n";
9211 ret+="\treturn last_gb"+int_to_string(g)+";\n";
9217 // create a temp status tuple
9218 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
9220 ret += gen_init_temp_status_tuple(this->get_node_name());
9223 // (Here, offsets are hard-wired. is this a problem?)
9225 ret += "//\t\tPack the fields into the tuple.\n";
9226 for(s=0;s<select_list.size();s++){
9227 data_type *sdt = select_list[s]->se->get_data_type();
9228 if(sdt->is_temporal()){
9229 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9232 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
9239 ret += "\treturn 0;\n";
9240 ret += "};};\n\n\n";
9243 //----------------------------------------------------------
9244 // The hash function
9246 ret += "struct "+generate_functor_name()+"_hash_func{\n";
9247 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
9248 "_groupdef *grp) const{\n";
9249 ret += "\t\treturn( (";
9250 for(g=0;g<gb_tbl.size();g++){
9252 data_type *gdt = gb_tbl.get_data_type(g);
9253 if(gdt->use_hashfunc()){
9254 if(gdt->is_buffer_type())
9255 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9257 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9259 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
9263 ret += ") >> 32);\n";
9267 //----------------------------------------------------------
9268 // The comparison function
9270 ret += "struct "+generate_functor_name()+"_equal_func{\n";
9271 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
9272 generate_functor_name()+"_groupdef *grp2) const{\n";
9273 ret += "\t\treturn( (";
9275 for(g=0;g<gb_tbl.size();g++){
9276 if(g>0) ret += ") && (";
9277 data_type *gdt = gb_tbl.get_data_type(g);
9278 if(gdt->complex_comparison(gdt)){
9279 if(gdt->is_buffer_type())
9280 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
9281 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9283 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
9284 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9286 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
9298 string sgah_qpn::generate_operator(int i, string params){
9300 if(hfta_disorder < 2){
9302 " groupby_operator<" +
9303 generate_functor_name()+","+
9304 generate_functor_name() + "_groupdef, " +
9305 generate_functor_name() + "_aggrdef, " +
9306 generate_functor_name()+"_hash_func, "+
9307 generate_functor_name()+"_equal_func "
9308 "> *op"+int_to_string(i)+" = new groupby_operator<"+
9309 generate_functor_name()+","+
9310 generate_functor_name() + "_groupdef, " +
9311 generate_functor_name() + "_aggrdef, " +
9312 generate_functor_name()+"_hash_func, "+
9313 generate_functor_name()+"_equal_func "
9314 ">("+params+", \"" + get_node_name() +
9319 for(int g=0;g<gb_tbl.size();g++){
9320 data_type *gdt = gb_tbl.get_data_type(g);
9321 if(gdt->is_temporal()){
9328 " groupby_operator_oop<" +
9329 generate_functor_name()+","+
9330 generate_functor_name() + "_groupdef, " +
9331 generate_functor_name() + "_aggrdef, " +
9332 generate_functor_name()+"_hash_func, "+
9333 generate_functor_name()+"_equal_func, " +
9334 tgdt->get_host_cvar_type() +
9335 "> *op"+int_to_string(i)+" = new groupby_operator_oop<"+
9336 generate_functor_name()+","+
9337 generate_functor_name() + "_groupdef, " +
9338 generate_functor_name() + "_aggrdef, " +
9339 generate_functor_name()+"_hash_func, "+
9340 generate_functor_name()+"_equal_func, " +
9341 tgdt->get_host_cvar_type() +
9342 ">("+params+", \"" + get_node_name() +
9348 ////////////////////////////////////////////////
9351 ////////////////////////////////////////////
9353 string mrg_qpn::generate_functor_name(){
9354 return("mrg_functor_" + normalize_name(this->get_node_name()));
9357 string mrg_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9362 if(fm.size() != mvars.size()){
9363 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=%lu, mvars.size=%lu\n",fm.size(),mvars.size());
9367 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=mvars.size=%lu\n",fm.size());
9372 // Initialize generate utility globals
9373 segen_gb_tbl = NULL;
9375 string ret = "class " + this->generate_functor_name() + "{\n";
9377 // Private variable:
9378 // 1) Vars for unpacked attrs.
9379 // 2) offsets ofthe unpakced attrs
9380 // 3) last_posted_timestamp
9383 schema->get_type_name(mvars[0]->get_schema_ref(), mvars[0]->get_field()),
9384 schema->get_modifier_list(mvars[0]->get_schema_ref(), mvars[0]->get_field())
9387 schema->get_type_name(mvars[1]->get_schema_ref(), mvars[1]->get_field()),
9388 schema->get_modifier_list(mvars[1]->get_schema_ref(), mvars[1]->get_field())
9391 ret += "private:\n";
9393 // var to save the schema handle
9394 ret += "\tint schema_handle0;\n";
9396 // generate the declaration of all the variables related to
9397 // temp tuples generation
9398 ret += gen_decl_temp_vars();
9400 // unpacked attribute storage, offsets
9401 ret += "//\t\tstorage and offsets of accessed fields.\n";
9402 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
9404 sprintf(tmpstr,"unpack_var_%s_%d", mvars[0]->get_field().c_str(), tblref);
9405 ret+="\t"+dta.make_host_cvar(tmpstr)+";\n";
9406 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[0]->get_field().c_str(), tblref);
9409 sprintf(tmpstr,"unpack_var_%s_%d", mvars[1]->get_field().c_str(), tblref);
9410 ret+="\t"+dtb.make_host_cvar(tmpstr)+";\n";
9411 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[1]->get_field().c_str(), tblref);
9414 ret += "//\t\tRemember the last posted timestamp.\n";
9415 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_0")+";\n";
9416 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_1")+";\n";
9417 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9418 ret+="\t"+dta.make_host_cvar("slack")+";\n";
9419 // ret += "\t bool first_execution_0, first_execution_1;\n";
9421 // variables to hold parameters.
9422 ret += "//\tfor query parameters\n";
9423 ret += generate_param_vars(param_tbl);
9426 //-------------------
9427 // The functor constructor
9428 // pass in a schema handle (e.g. for the 1st input stream),
9429 // use it to determine how to unpack the merge variable.
9430 // ASSUME that both streams have the same layout,
9431 // just duplicate it.
9434 ret += "//\t\tFunctor constructor.\n";
9435 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9437 // var to save the schema handle
9438 ret += "\tthis->schema_handle0 = schema_handle0;\n";
9439 ret += "\ttuple_metadata_offset0=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9440 ret += "\ttuple_metadata_offset1=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9442 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9444 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", mvars[0]->get_field().c_str(), 0,mvars[0]->get_field().c_str());
9446 sprintf(tmpstr,"\tunpack_offset_%s_%d = unpack_offset_%s_%d;\n",mvars[1]->get_field().c_str(), 1,mvars[0]->get_field().c_str(), 0);
9448 // ret+="\tfirst_execution_0 = first_execution_1 = true;\n";
9450 ret+="\tslack = "+generate_se_code(slack,schema)+";\n";
9452 ret+="\tslack = 0;\n";
9454 // Initialize internal state
9455 ret += "\ttemp_tuple_received = false;\n";
9457 // Init last timestamp values to minimum value for their type
9458 if (dta.is_increasing())
9459 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_min_literal() + ";\n";
9461 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_max_literal() + ";\n";
9466 ret += "//\t\tFunctor destructor.\n";
9467 ret += "~"+this->generate_functor_name()+"(){\n";
9469 // Destroy the parameters, if any need to be destroyed
9470 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9475 // no pass-by-handle params.
9476 vector<handle_param_tbl_entry *> param_handle_table;
9478 // Parameter manipulation routines
9479 ret += generate_load_param_block(this->generate_functor_name(),
9480 this->param_tbl,param_handle_table);
9481 ret += generate_delete_param_block(this->generate_functor_name(),
9482 this->param_tbl,param_handle_table);
9484 // Register new parameter block
9486 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9487 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9488 ret += "\treturn this->load_params_"+this->generate_functor_name()+
9493 // -----------------------------------
9497 if(needs_xform[0]) unpack_fcna = dta.get_hfta_unpack_fcn();
9498 else unpack_fcna = dta.get_hfta_unpack_fcn_noxf();
9500 if(needs_xform[1]) unpack_fcnb = dtb.get_hfta_unpack_fcn();
9501 else unpack_fcnb = dtb.get_hfta_unpack_fcn_noxf();
9504 ret+="\tint compare(const host_tuple& tup1, const host_tuple& tup2) const{ \n";
9505 ret+="\t"+dta.make_host_cvar("timestamp1")+";\n";
9506 ret+="\t"+dta.make_host_cvar("timestamp2")+";\n";
9507 ret+="\tgs_int32_t problem;\n";
9508 ret+="\tif (tup1.channel == 0) {\n";
9509 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9511 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9514 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 1);
9516 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 0);
9520 " if (timestamp1 > timestamp2+slack)\n"
9522 " else if (timestamp1 < timestamp2)\n"
9531 " void get_timestamp(const host_tuple& tup0){\n"
9532 " gs_int32_t problem;\n"
9534 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9543 // Compare to temp status.
9545 " int compare_with_temp_status(int channel) {\n"
9546 " // check if tuple is temp status tuple\n"
9548 " if (channel == 0) {\n"
9549 //" if(first_execution_0) return 1;\n"
9550 " if (timestamp == last_posted_timestamp_0)\n"
9552 " else if (timestamp < last_posted_timestamp_0)\n"
9557 //" if(first_execution_1) return 1;\n"
9558 " if (timestamp == last_posted_timestamp_1)\n"
9560 " else if (timestamp < last_posted_timestamp_1)\n"
9569 " int compare_stored_with_temp_status(const host_tuple& tup0, int channel)/* const*/ {\n"
9571 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
9572 ret+="\tgs_int32_t problem;\n";
9574 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9576 ret+="\tif (channel == 0) {\n";
9577 // ret+="\tif(first_execution_0) return 1;\n";
9579 " if (l_timestamp == last_posted_timestamp_0)\n"
9581 " else if (l_timestamp < last_posted_timestamp_0)\n"
9586 // ret+="\tif(first_execution_1) return 1;\n";
9588 " if (l_timestamp == last_posted_timestamp_1)\n"
9590 " else if (l_timestamp < last_posted_timestamp_1)\n"
9598 // update temp status.
9600 " int update_temp_status(const host_tuple& tup) {\n"
9601 " if (tup.channel == 0) {\n"
9602 " last_posted_timestamp_0=timestamp;\n"
9603 //" first_execution_0 = false;\n"
9605 " last_posted_timestamp_1=timestamp;\n"
9606 //" first_execution_1 = false;\n"
9612 " int update_stored_temp_status(const host_tuple& tup, int channel) {\n"
9614 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
9615 ret+="\tgs_int32_t problem;\n";
9616 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9619 " if (tup.channel == 0) {\n"
9620 " last_posted_timestamp_0=l_timestamp;\n"
9621 //" first_execution_0 = false;\n"
9623 " last_posted_timestamp_1=l_timestamp;\n"
9624 //" first_execution_1 = false;\n"
9630 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9631 ret+="\tgs_int32_t problem;\n";
9632 ret+="\tif (tup.channel == 0) {\n";
9633 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9636 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9639 ret+="\tif (tup.channel == 0) {\n";
9640 ret+="\tlast_posted_timestamp_0=timestamp;\n";
9641 ret +="\tfirst_execution_0 = false;\n";
9643 ret+="\tlast_posted_timestamp_1=timestamp;\n";
9644 ret +="\tfirst_execution_1 = false;\n";
9651 // update temp status modulo slack.
9652 ret+="\tint update_temp_status_by_slack(const host_tuple& tup, int channel) {\n";
9654 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9655 ret+="\tgs_int32_t problem;\n";
9656 ret+="\tif (tup.channel == 0) {\n";
9657 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9660 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9664 " if (channel == 0) {\n"
9665 " if(first_execution_0){\n"
9666 " last_posted_timestamp_0=timestamp - slack;\n"
9667 " first_execution_0 = false;\n"
9669 " if(last_posted_timestamp_0 < timestamp-slack)\n"
9670 " last_posted_timestamp_0 = timestamp-slack;\n"
9673 " if(first_execution_1){\n"
9674 " last_posted_timestamp_1=timestamp - slack;\n"
9675 " first_execution_1 = false;\n"
9677 " if(last_posted_timestamp_1 < timestamp-slack)\n"
9678 " last_posted_timestamp_1 = timestamp-slack;\n"
9692 "bool temp_status_received(const host_tuple& tup0){\n"
9693 " return ftaschema_is_temporal_tuple_offset(tuple_metadata_offset0, tup0.data);\n"
9696 //"bool temp_status_received(){return temp_tuple_received;};\n\n";
9699 // create a temp status tuple
9700 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
9702 ret += gen_init_temp_status_tuple(this->get_node_name());
9705 ret += "//\t\tPack the fields into the tuple.\n";
9707 string fld_name = mvars[0]->get_field();
9708 int idx = table_layout->get_field_idx(fld_name);
9709 field_entry* fld = table_layout->get_field(idx);
9710 data_type dt(fld->get_type());
9712 // if (needs_xform[0] && needs_xform[1] && dt.needs_hn_translation())
9713 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s((last_posted_timestamp_0 < last_posted_timestamp_1) ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx, dt.hton_translation().c_str());
9715 sprintf(tmpstr,"\ttuple->tuple_var%d = (last_posted_timestamp_0 < last_posted_timestamp_1 ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx);
9719 ret += "\treturn 0;\n";
9722 // Transform tuple (before output)
9725 ret += "void xform_tuple(host_tuple &tup){\n";
9726 if((needs_xform[0] && !needs_xform[1]) || (needs_xform[1] && !needs_xform[0])){
9727 ret += "\tstruct "+generate_tuple_name(this->get_node_name())+" *tuple = ("+
9728 generate_tuple_name(this->get_node_name())+" *)(tup.data);\n";
9730 vector<field_entry *> flds = table_layout->get_fields();
9732 ret+="\tif(tup.channel == 0){\n";
9733 if(needs_xform[0] && !needs_xform[1]){
9735 for(f=0;f<flds.size();f++){
9737 data_type dt(flds[f]->get_type());
9738 if(dt.get_type() == v_str_t){
9739 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
9741 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
9743 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
9746 if(dt.needs_hn_translation()){
9747 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
9748 // f, dt.hton_translation().c_str(), f);
9754 ret += "\t\treturn;\n";
9756 ret.append("\t}\n");
9759 ret+="\tif(tup.channel == 1){\n";
9760 if(needs_xform[1] && !needs_xform[0]){
9762 for(f=0;f<flds.size();f++){
9764 data_type dt(flds[f]->get_type());
9765 if(dt.get_type() == v_str_t){
9766 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
9768 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
9770 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
9773 if(dt.needs_hn_translation()){
9774 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
9775 // f, dt.hton_translation().c_str(), f);
9781 ret += "\t\treturn;\n";
9783 ret.append("\t}\n");
9786 ret.append("};\n\n");
9788 // print_warnings() : tell the functor if the user wants to print warnings.
9789 ret += "bool print_warnings(){\n";
9790 if(definitions.count("print_warnings") && (
9791 definitions["print_warnings"] == "yes" ||
9792 definitions["print_warnings"] == "Yes" ||
9793 definitions["print_warnings"] == "YES" )) {
9794 ret += "return true;\n";
9796 ret += "return false;\n";
9798 ret.append("};\n\n");
9801 // Done with methods.
9808 string mrg_qpn::generate_operator(int i, string params){
9812 " merge_operator<" +
9813 generate_functor_name()+
9814 "> *op"+int_to_string(i)+" = new merge_operator<"+
9815 generate_functor_name()+
9816 ">("+params+",10000,\"" + get_node_name() + "\");\n"
9820 " merge_operator_oop<" +
9821 generate_functor_name()+
9822 "> *op"+int_to_string(i)+" = new merge_operator_oop<"+
9823 generate_functor_name()+
9824 ">("+params+",10000,\"" + get_node_name() + "\");\n"
9829 /////////////////////////////////////////////////////////
9830 ////// JOIN_EQ_HASH functor
9833 string join_eq_hash_qpn::generate_functor_name(){
9834 return("join_eq_hash_functor_" + normalize_name(this->get_node_name()));
9837 string join_eq_hash_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9839 vector<data_type *> hashkey_dt; // data types in the hash key
9840 vector<data_type *> temporal_dt; // data types in the temporal key
9841 map<string,scalarexp_t *> l_equiv, r_equiv; // field equivalences
9843 col_id_set new_cids, local_cids;
9845 //--------------------------------
9848 string plus_op = "+";
9850 //--------------------------------
9851 // key definition class
9852 string ret = "class " + generate_functor_name() + "_keydef{\n";
9854 // Collect attributes from hash join predicates.
9855 // ASSUME equality predicate.
9856 // Use the upwardly compatible data type
9857 // (infer from '+' operator if possible, else use left type)
9858 for(p=0;p<this->hash_eq.size();++p){
9859 scalarexp_t *lse = hash_eq[p]->pr->get_left_se();
9860 scalarexp_t *rse = hash_eq[p]->pr->get_right_se();
9861 data_type *hdt = new data_type(
9862 lse->get_data_type(), rse->get_data_type(), plus_op );
9863 if(hdt->get_type() == undefined_t){
9864 hashkey_dt.push_back(lse->get_data_type()->duplicate());
9867 hashkey_dt.push_back(hdt);
9869 sprintf(tmpstr,"hashkey_var%d",p);
9870 ret+="\t"+hashkey_dt[p]->make_host_cvar(tmpstr)+";\n";
9872 // find equivalences
9873 // NOTE: this code needs to be synched with the temporality
9874 // checking done at join_eq_hash_qpn::get_fields
9875 if(lse->get_operator_type()==SE_COLREF){
9876 l_equiv[lse->get_colref()->get_field()] = rse;
9878 if(rse->get_operator_type()==SE_COLREF){
9879 r_equiv[rse->get_colref()->get_field()] = lse;
9882 ret += "\tbool touched;\n";
9885 ret += "\t"+generate_functor_name() + "_keydef(){touched=false;};\n";
9887 ret += "\t~"+ generate_functor_name() + "_keydef(){\n";
9888 for(p=0;p<hashkey_dt.size();p++){
9889 if(hashkey_dt[p]->is_buffer_type()){
9890 sprintf(tmpstr,"\t\t%s(&hashkey_var%d);\n",
9891 hashkey_dt[p]->get_hfta_buffer_destroy().c_str(), p );
9896 ret+="\tvoid touch(){touched = true;};\n";
9897 ret+="\tbool is_touched(){return touched;};\n";
9901 //--------------------------------
9902 // temporal equality definition class
9903 ret += "class " + generate_functor_name() + "_tempeqdef{\n";
9905 // Collect attributes from hash join predicates.
9906 // ASSUME equality predicate.
9907 // Use the upwardly compatible date type
9908 // (infer from '+' operator if possible, else use left type)
9909 for(p=0;p<this->temporal_eq.size();++p){
9910 scalarexp_t *lse = temporal_eq[p]->pr->get_left_se();
9911 scalarexp_t *rse = temporal_eq[p]->pr->get_right_se();
9912 data_type *hdt = new data_type(
9913 lse->get_data_type(), rse->get_data_type(), plus_op );
9914 if(hdt->get_type() == undefined_t){
9915 temporal_dt.push_back(hash_eq[p]->pr->get_left_se()->get_data_type()->duplicate());
9918 temporal_dt.push_back(hdt);
9920 sprintf(tmpstr,"tempeq_var%d",p);
9921 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
9922 // find equivalences
9923 if(lse->get_operator_type()==SE_COLREF){
9924 l_equiv[lse->get_colref()->get_field()] = rse;
9926 if(rse->get_operator_type()==SE_COLREF){
9927 r_equiv[rse->get_colref()->get_field()] = lse;
9932 ret += "\t"+generate_functor_name() + "_tempeqdef(){};\n";
9934 ret += "\t~"+ generate_functor_name() + "_tempeqdef(){\n";
9935 for(p=0;p<temporal_dt.size();p++){
9936 if(temporal_dt[p]->is_buffer_type()){
9937 sprintf(tmpstr,"\t\t%s(&tempeq_var%d);\n",
9938 temporal_dt[p]->get_hfta_buffer_destroy().c_str(), p );
9946 //--------------------------------
9947 // temporal eq, hash join functor class
9948 ret += "class " + this->generate_functor_name() + "{\n";
9950 // Find variables referenced in this query node.
9953 col_id_set::iterator csi;
9955 for(p=0;p<where.size();++p)
9956 gather_pr_col_ids(where[p]->pr,cid_set,NULL);
9957 for(s=0;s<select_list.size();s++)
9958 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
9960 // Private variables : store the state of the functor.
9961 // 1) variables for unpacked attributes
9962 // 2) offsets of the upacked attributes
9963 // 3) storage of partial functions
9964 // 4) storage of complex literals (i.e., require a constructor)
9966 ret += "private:\n";
9968 // var to save the schema handles
9969 ret += "\tint schema_handle0;\n";
9970 ret += "\tint schema_handle1;\n";
9972 // generate the declaration of all the variables related to
9973 // temp tuples generation
9974 ret += gen_decl_temp_vars();
9975 // tuple metadata offsets
9976 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
9978 // unpacked attribute storage, offsets
9979 ret += "//\t\tstorage and offsets of accessed fields.\n";
9980 ret += generate_access_vars(cid_set, schema);
9983 // Variables to store results of partial functions.
9984 // WARNING find_partial_functions modifies the SE
9985 // (it marks the partial function id).
9986 ret += "//\t\tParital function result storage\n";
9987 vector<scalarexp_t *> partial_fcns;
9988 vector<int> fcn_ref_cnt;
9989 vector<bool> is_partial_fcn;
9990 for(s=0;s<select_list.size();s++){
9991 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
9993 for(p=0;p<where.size();p++){
9994 find_partial_fcns_pr(where[p]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9996 if(partial_fcns.size()>0){
9997 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
9998 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10001 // Complex literals (i.e., they need constructors)
10002 ret += "//\t\tComplex literal storage.\n";
10003 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10004 ret += generate_complex_lit_vars(complex_literals);
10005 // We need the following to handle strings in outer joins.
10006 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10007 ret += "\tstruct vstring EmptyString;\n";
10008 ret += "\tstruct hfta_ipv6_str EmptyIp6;\n";
10010 // Pass-by-handle parameters
10011 ret += "//\t\tPass-by-handle storage.\n";
10012 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10013 ret += generate_pass_by_handle_vars(param_handle_table);
10016 // variables to hold parameters.
10017 ret += "//\tfor query parameters\n";
10018 ret += generate_param_vars(param_tbl);
10021 ret += "\npublic:\n";
10022 //-------------------
10023 // The functor constructor
10024 // pass in the schema handle.
10025 // 1) make assignments to the unpack offset variables
10026 // 2) initialize the complex literals
10028 ret += "//\t\tFunctor constructor.\n";
10029 ret += this->generate_functor_name()+"(int schema_handle0, int schema_handle1){\n";
10031 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
10032 ret += "\t\tthis->schema_handle1 = schema_handle1;\n";
10033 // metadata offsets
10034 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10035 ret += "\ttuple_metadata_offset1 = ftaschema_get_tuple_metadata_offset(schema_handle1);\n";
10038 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10039 ret += gen_access_var_init(cid_set);
10041 // complex literals
10042 ret += "//\t\tInitialize complex literals.\n";
10043 ret += gen_complex_lit_init(complex_literals);
10044 // Initialize EmptyString to the ... empty string
10045 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10046 literal_t mtstr_lit("");
10047 ret += "\t" + mtstr_lit.to_hfta_C_code("&EmptyString")+";\n";
10048 literal_t mip6_lit("0:0:0:0:0:0:0:0",LITERAL_IPV6);
10049 ret += "\t" + mip6_lit.to_hfta_C_code("&EmptyIp6")+";\n";
10051 // Initialize partial function results so they can be safely GC'd
10052 ret += gen_partial_fcn_init(partial_fcns);
10054 // Initialize non-query-parameter parameter handles
10055 ret += gen_pass_by_handle_init(param_handle_table);
10057 // Init temporal attributes referenced in select list
10058 ret += gen_init_temp_vars(schema, select_list, NULL);
10065 //-------------------
10066 // Functor destructor
10067 ret += "//\t\tFunctor destructor.\n";
10068 ret += "~"+this->generate_functor_name()+"(){\n";
10070 // clean up buffer type complex literals
10071 ret += gen_complex_lit_dtr(complex_literals);
10073 // Deregister the pass-by-handle parameters
10074 ret += "/* register and de-register the pass-by-handle parameters */\n";
10075 ret += gen_pass_by_handle_dtr(param_handle_table);
10077 // clean up partial function results.
10078 ret += "/* clean up partial function storage */\n";
10079 ret += gen_partial_fcn_dtr(partial_fcns);
10081 // Destroy the parameters, if any need to be destroyed
10082 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10087 //-------------------
10088 // Parameter manipulation routines
10089 ret += generate_load_param_block(this->generate_functor_name(),
10090 this->param_tbl,param_handle_table);
10091 ret += generate_delete_param_block(this->generate_functor_name(),
10092 this->param_tbl,param_handle_table);
10094 //-------------------
10095 // Register new parameter block
10097 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10098 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10099 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10104 //-------------------
10105 // The create_key method.
10106 // Perform heap allocation.
10107 // ASSUME : the LHS of the preds reference channel 0 attributes
10108 // NOTE : it may fail if a partial function fails.
10110 ret += this->generate_functor_name()+"_keydef *create_key(host_tuple &tup, bool &failed){\n";
10111 // Variables for execution of the function.
10112 ret+="\t"+this->generate_functor_name()+"_keydef *retval = NULL;\n";
10113 ret+="\tgs_int32_t problem = 0;\n";
10115 // Assume unsuccessful completion
10116 ret+= "\tfailed = true;\n";
10118 // Switch the processing based on the channel
10119 ret+="\tif(tup.channel == 0){\n";
10120 ret+="// ------------ processing for channel 0\n";
10121 ret+="\t\thost_tuple &tup0 = tup;\n";
10122 // Gather partial fcns and colids ref'd by this branch
10124 new_cids.clear(); local_cids.clear();
10125 for(p=0;p<hash_eq.size();p++){
10126 collect_partial_fcns(hash_eq[p]->pr->get_left_se(), pfcn_refs);
10127 gather_se_col_ids(hash_eq[p]->pr->get_left_se(),local_cids,NULL);
10130 // Start by cleaning up partial function results
10131 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10132 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10134 // Evaluate the partial functions
10135 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10136 new_cids, NULL, "NULL", needs_xform);
10137 // test passed -- unpack remaining cids.
10138 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10140 // Alloc and load a key object
10141 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10142 for(p=0;p<hash_eq.size();p++){
10143 data_type *hdt = hash_eq[p]->pr->get_left_se()->get_data_type();
10144 if(hdt->is_buffer_type()){
10145 string vname = "tmp_keyvar"+int_to_string(p);
10146 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_left_se(),schema)+";\n";
10147 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10149 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10150 p,generate_se_code(hash_eq[p]->pr->get_left_se(),schema).c_str() );
10154 ret += "\t}else{\n";
10156 ret+="// ------------ processing for channel 1\n";
10157 ret+="\t\thost_tuple &tup1 = tup;\n";
10158 // Gather partial fcns and colids ref'd by this branch
10160 new_cids.clear(); local_cids.clear();
10161 for(p=0;p<hash_eq.size();p++){
10162 collect_partial_fcns(hash_eq[p]->pr->get_right_se(), pfcn_refs);
10163 gather_se_col_ids(hash_eq[p]->pr->get_right_se(),local_cids,NULL);
10166 // Start by cleaning up partial function results
10167 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10168 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10170 // Evaluate the partial functions
10171 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10172 new_cids, NULL, "NULL", needs_xform);
10174 // test passed -- unpack remaining cids.
10175 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10177 // Alloc and load a key object
10178 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10179 for(p=0;p<hash_eq.size();p++){
10180 data_type *hdt = hash_eq[p]->pr->get_right_se()->get_data_type();
10181 if(hdt->is_buffer_type()){
10182 string vname = "tmp_keyvar"+int_to_string(p);
10183 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_right_se(),schema)+";\n";
10184 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10186 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10187 p,generate_se_code(hash_eq[p]->pr->get_right_se(),schema).c_str() );
10193 ret += "\tfailed = false;\n";
10194 ret += "\t return retval;\n";
10198 //-------------------
10199 // The load_ts method.
10200 // load into an allocated buffer.
10201 // ASSUME : the LHS of the preds reference channel 0 attributes
10202 // NOTE : it may fail if a partial function fails.
10203 // NOTE : cann't handle buffer attributes
10205 ret += "bool load_ts_from_tup("+this->generate_functor_name()+"_tempeqdef *ts, host_tuple &tup){\n";
10206 // Variables for execution of the function.
10207 ret+="\tgs_int32_t problem = 0;\n";
10209 // Switch the processing based on the channel
10210 ret+="\tif(tup.channel == 0){\n";
10211 ret+="// ------------ processing for channel 0\n";
10212 ret+="\t\thost_tuple &tup0 = tup;\n";
10214 // Gather partial fcns and colids ref'd by this branch
10216 new_cids.clear(); local_cids.clear();
10217 for(p=0;p<temporal_eq.size();p++){
10218 collect_partial_fcns(temporal_eq[p]->pr->get_left_se(), pfcn_refs);
10219 gather_se_col_ids(temporal_eq[p]->pr->get_left_se(),local_cids,NULL);
10222 // Start by cleaning up partial function results
10223 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10224 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10226 // Evaluate the partial functions
10227 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10228 new_cids, NULL, "false", needs_xform);
10230 // test passed -- unpack remaining cids.
10231 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10233 // load the temporal key object
10234 for(p=0;p<temporal_eq.size();p++){
10235 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10236 p,generate_se_code(temporal_eq[p]->pr->get_left_se(),schema).c_str() );
10240 ret += "\t}else{\n";
10242 ret+="// ------------ processing for channel 1\n";
10243 ret+="\t\thost_tuple &tup1 = tup;\n";
10245 // Gather partial fcns and colids ref'd by this branch
10247 new_cids.clear(); local_cids.clear();
10248 for(p=0;p<temporal_eq.size();p++){
10249 collect_partial_fcns(temporal_eq[p]->pr->get_right_se(), pfcn_refs);
10250 gather_se_col_ids(temporal_eq[p]->pr->get_right_se(),local_cids,NULL);
10253 // Start by cleaning up partial function results
10254 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10255 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10257 // Evaluate the partial functions
10258 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10259 new_cids, NULL, "false", needs_xform);
10261 // test passed -- unpack remaining cids.
10262 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10264 // load the key object
10265 for(p=0;p<temporal_eq.size();p++){
10266 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10267 p,generate_se_code(temporal_eq[p]->pr->get_right_se(),schema).c_str() );
10273 ret += "\t return true;\n";
10277 // ------------------------------
10279 // (i.e make a copy)
10281 ret += "bool load_ts_from_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10282 for(p=0;p<temporal_eq.size();p++){
10283 sprintf(tmpstr,"\tlts->tempeq_var%d = rts->tempeq_var%d;\n",p,p);
10288 // -------------------------------------
10289 // compare_ts_to_ts
10290 // There should be only one variable to compare.
10291 // If there is more, assume an arbitrary lexicographic order.
10293 ret += "int compare_ts_with_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10294 for(p=0;p<temporal_eq.size();p++){
10295 sprintf(tmpstr,"\tif(lts->tempeq_var%d < rts->tempeq_var%d) return(-1);\n",p,p);
10297 sprintf(tmpstr,"\tif(lts->tempeq_var%d > rts->tempeq_var%d) return(1);\n",p,p);
10300 ret += "\treturn(0);\n";
10303 // ------------------------------------------
10305 // apply the prefilter
10307 ret += "bool apply_prefilter(host_tuple &tup){\n";
10309 // Variables for this procedure
10310 ret+="\tgs_int32_t problem = 0;\n";
10311 ret+="\tgs_retval_t retval;\n";
10313 // Switch the processing based on the channel
10314 ret+="\tif(tup.channel == 0){\n";
10315 ret+="// ------------ processing for channel 0\n";
10316 ret+="\t\thost_tuple &tup0 = tup;\n";
10317 // Gather partial fcns and colids ref'd by this branch
10319 new_cids.clear(); local_cids.clear();
10320 for(p=0;p<prefilter[0].size();p++){
10321 collect_partial_fcns_pr((prefilter[0])[p]->pr, pfcn_refs);
10324 // Start by cleaning up partial function results
10325 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10326 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10328 for(p=0;p<(prefilter[0]).size();++p){
10329 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10331 // Find the set of variables accessed in this CNF elem,
10332 // but in no previous element.
10333 col_id_set new_pr_cids;
10334 get_new_pred_cids((prefilter[0])[p]->pr,local_cids,new_pr_cids, NULL);
10335 // Unpack these values.
10336 ret += gen_unpack_cids(schema, new_pr_cids, "false", needs_xform);
10337 // Find partial fcns ref'd in this cnf element
10338 set<int> pr_pfcn_refs;
10339 collect_partial_fcns_pr((prefilter[0])[p]->pr, pr_pfcn_refs);
10340 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10342 ret += "\t\tif( !("+generate_predicate_code((prefilter[0])[p]->pr,schema)+") ) return(false);\n";
10344 ret += "\t}else{\n";
10345 ret+="// ------------ processing for channel 1\n";
10346 ret+="\t\thost_tuple &tup1 = tup;\n";
10347 // Gather partial fcns and colids ref'd by this branch
10349 new_cids.clear(); local_cids.clear();
10350 for(p=0;p<prefilter[1].size();p++){
10351 collect_partial_fcns_pr((prefilter[1])[p]->pr, pfcn_refs);
10354 // Start by cleaning up partial function results
10355 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10356 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10358 for(p=0;p<(prefilter[1]).size();++p){
10359 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10361 // Find the set of variables accessed in this CNF elem,
10362 // but in no previous element.
10363 col_id_set pr_new_cids;
10364 get_new_pred_cids((prefilter[1])[p]->pr,local_cids, pr_new_cids, NULL);
10365 // Unpack these values.
10366 ret += gen_unpack_cids(schema, pr_new_cids, "false", needs_xform);
10367 // Find partial fcns ref'd in this cnf element
10368 set<int> pr_pfcn_refs;
10369 collect_partial_fcns_pr((prefilter[1])[p]->pr, pr_pfcn_refs);
10370 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10372 ret += "\t\tif( !("+generate_predicate_code((prefilter[1])[p]->pr,schema)+ ") ) return(false);\n";
10376 ret+="\treturn true;\n";
10380 // -------------------------------------
10381 // create_output_tuple
10382 // If the postfilter on the pair of tuples passes,
10383 // create an output tuple from the combined information.
10384 // (Plus, outer join processing)
10386 ret += "host_tuple create_output_tuple(const host_tuple &tup0, const host_tuple &tup1, bool &failed){\n";
10388 ret += "\thost_tuple tup;\n";
10389 ret += "\tfailed = true;\n";
10390 ret += "\tgs_retval_t retval = 0;\n";
10391 ret += "\tgs_int32_t problem = 0;\n";
10393 // Start by cleaning up partial function results
10394 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10396 new_cids.clear(); local_cids.clear();
10397 for(p=0;p<postfilter.size();p++){
10398 collect_partial_fcns_pr(postfilter[p]->pr, pfcn_refs);
10400 for(s=0;s<select_list.size();s++){
10401 collect_partial_fcns(select_list[s]->se, pfcn_refs);
10403 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10406 ret+="\tif(tup0.data && tup1.data){\n";
10407 // Evaluate the postfilter
10408 new_cids.clear(); local_cids.clear();
10409 for(p=0;p<postfilter.size();p++){
10410 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10412 // Find the set of variables accessed in this CNF elem,
10413 // but in no previous element.
10414 col_id_set pr_new_cids;
10415 get_new_pred_cids(postfilter[p]->pr,local_cids, pr_new_cids, NULL);
10416 // Unpack these values.
10417 ret += gen_unpack_cids(schema, pr_new_cids, "tup", needs_xform);
10418 // Find partial fcns ref'd in this cnf element
10419 set<int> pr_pfcn_refs;
10420 collect_partial_fcns_pr(postfilter[p]->pr, pr_pfcn_refs);
10421 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"tup");
10423 ret += "\t\tif( !("+generate_predicate_code(postfilter[p]->pr,schema)+ ") ) return(tup);\n";
10427 // postfilter passed, evaluate partial functions for select list
10430 col_id_set se_cids;
10431 for(s=0;s<select_list.size();s++){
10432 collect_partial_fcns(select_list[s]->se, sl_pfcns);
10435 if(sl_pfcns.size() > 0)
10436 ret += "//\t\tUnpack remaining partial fcns.\n";
10437 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
10438 local_cids, NULL, "tup", needs_xform);
10440 // Unpack remaining fields
10441 ret += "//\t\tunpack any remaining fields from the input tuples.\n";
10442 for(s=0;s<select_list.size();s++)
10443 get_new_se_cids(select_list[s]->se, local_cids,se_cids,NULL);
10444 ret += gen_unpack_cids(schema, se_cids,"tup", needs_xform);
10447 // Deal with outer join stuff
10448 col_id_set l_cids, r_cids;
10449 col_id_set::iterator ocsi;
10450 for(ocsi=local_cids.begin();ocsi!=local_cids.end();++ocsi){
10451 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10452 else r_cids.insert((*ocsi));
10454 for(ocsi=se_cids.begin();ocsi!=se_cids.end();++ocsi){
10455 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10456 else r_cids.insert((*ocsi));
10459 ret += "\t}else if(tup0.data){\n";
10460 string unpack_null = ""; col_id_set extra_cids;
10461 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
10462 string field = (*ocsi).field;
10463 if(r_equiv.count(field)){
10464 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
10465 get_new_se_cids(r_equiv[field],l_cids,new_cids,NULL);
10467 int schref = (*ocsi).schema_ref;
10468 data_type dt(schema->get_type_name(schref,field));
10469 literal_t empty_lit(dt.type_indicator());
10470 if(empty_lit.is_cpx_lit()){
10471 // sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
10472 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10473 // NB : works for string type only
10474 // NNB: installed fix for ipv6, more of this should be pushed
10475 // into the literal_t code.
10476 unpack_null+="\tunpack_var_"+field+"_1= "+empty_lit.hfta_empty_literal_name()+";\n";
10478 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
10482 ret += gen_unpack_cids(schema, l_cids, "tup", needs_xform);
10483 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
10484 ret += unpack_null;
10485 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
10488 unpack_null = ""; extra_cids.clear();
10489 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
10490 string field = (*ocsi).field;
10491 if(l_equiv.count(field)){
10492 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
10493 get_new_se_cids(l_equiv[field],r_cids,new_cids,NULL);
10495 int schref = (*ocsi).schema_ref;
10496 data_type dt(schema->get_type_name(schref,field));
10497 literal_t empty_lit(dt.type_indicator());
10498 if(empty_lit.is_cpx_lit()){
10499 // sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
10500 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10501 // NB : works for string type only
10502 // NNB: installed fix for ipv6, more of this should be pushed
10503 // into the literal_t code.
10504 unpack_null+="\tunpack_var_"+field+"_0= "+empty_lit.hfta_empty_literal_name()+";\n";
10506 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
10510 ret += gen_unpack_cids(schema, r_cids, "tup", needs_xform);
10511 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
10512 ret += unpack_null;
10513 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
10518 // Unpack any BUFFER type selections into temporaries
10519 // so that I can compute their size and not have
10520 // to recompute their value during tuple packing.
10521 // I can use regular assignment here because
10522 // these temporaries are non-persistent.
10524 ret += "//\t\tCompute the size of the tuple.\n";
10525 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
10527 // Unpack all buffer type selections, to be able to compute their size
10528 ret += gen_buffer_selvars(schema, select_list);
10530 // The size of the tuple is the size of the tuple struct plus the
10531 // size of the buffers to be copied in.
10533 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
10534 ret += gen_buffer_selvars_size(select_list,schema);
10537 // Allocate tuple data block.
10538 ret += "//\t\tCreate the tuple block.\n";
10539 ret += "\ttup.data = malloc(tup.tuple_size);\n";
10540 ret += "\ttup.heap_resident = true;\n";
10541 // ret += "\ttup.channel = 0;\n";
10543 // Mark tuple as regular
10544 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
10547 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
10548 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
10551 // (Here, offsets are hard-wired. is this a problem?)
10553 ret += "//\t\tPack the fields into the tuple.\n";
10554 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
10556 // Delete string temporaries
10557 ret += gen_buffer_selvars_dtr(select_list);
10559 ret += "\tfailed = false;\n";
10560 ret += "\treturn tup;\n";
10565 //-----------------------------
10566 // Method for checking whether tuple is temporal
10568 ret += "bool temp_status_received(host_tuple &tup){\n";
10570 // Switch the processing based on the channel
10571 ret+="\tif(tup.channel == 0){\n";
10572 ret+="\t\thost_tuple &tup0 = tup;\n";
10573 ret += gen_temp_tuple_check(this->node_name, 0);
10574 ret += "\t}else{\n";
10575 ret+="\t\thost_tuple &tup1 = tup;\n";
10576 ret += gen_temp_tuple_check(this->node_name, 1);
10578 ret += "\treturn temp_tuple_received;\n};\n\n";
10581 //-------------------------------------------------------------------
10582 // Temporal update functions
10585 // create a temp status tuple
10586 ret += "int create_temp_status_tuple(const host_tuple &tup0, const host_tuple &tup1, host_tuple& result) {\n\n";
10588 ret += "\tgs_retval_t retval = 0;\n";
10589 ret += "\tgs_int32_t problem = 0;\n";
10591 ret += "\tif(tup0.data){\n";
10593 // Unpack all the temporal attributes references in select list
10594 col_id_set found_cids;
10596 for(s=0;s<select_list.size();s++){
10597 if (select_list[s]->se->get_data_type()->is_temporal()) {
10598 // Find the set of attributes accessed in this SE
10599 col_id_set new_cids;
10600 get_new_se_cids(select_list[s]->se,found_cids, new_cids, NULL);
10604 // Deal with outer join stuff
10605 l_cids.clear(), r_cids.clear();
10606 for(ocsi=found_cids.begin();ocsi!=found_cids.end();++ocsi){
10607 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10608 else r_cids.insert((*ocsi));
10611 extra_cids.clear();
10612 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
10613 string field = (*ocsi).field;
10614 if(r_equiv.count(field)){
10615 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
10616 col_id_set addnl_cids;
10617 get_new_se_cids(r_equiv[field],l_cids,addnl_cids,NULL);
10619 int schref = (*ocsi).schema_ref;
10620 data_type dt(schema->get_type_name(schref,field));
10621 literal_t empty_lit(dt.type_indicator());
10622 if(empty_lit.is_cpx_lit()){
10623 sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
10624 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10626 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
10630 ret += gen_unpack_cids(schema, l_cids, "1", needs_xform);
10631 ret += gen_unpack_cids(schema, extra_cids, "1", needs_xform);
10632 ret += unpack_null;
10634 ret+="\t}else if (tup1.data) {\n";
10635 unpack_null = ""; extra_cids.clear();
10636 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
10637 string field = (*ocsi).field;
10638 if(l_equiv.count(field)){
10639 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
10640 col_id_set addnl_cids;
10641 get_new_se_cids(l_equiv[field],r_cids,addnl_cids,NULL);
10643 int schref = (*ocsi).schema_ref;
10644 data_type dt(schema->get_type_name(schref,field));
10645 literal_t empty_lit(dt.type_indicator());
10646 if(empty_lit.is_cpx_lit()){
10647 sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
10648 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10650 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
10654 ret += gen_unpack_cids(schema, r_cids, "1", needs_xform);
10655 ret += gen_unpack_cids(schema, extra_cids, "1", needs_xform);
10656 ret += unpack_null;
10659 ret += gen_init_temp_status_tuple(this->get_node_name());
10662 ret += "//\t\tPack the fields into the tuple.\n";
10663 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
10666 ret += "\treturn 0;\n";
10672 //----------------------------------------------------------
10673 // The hash function
10675 ret += "struct "+generate_functor_name()+"_hash_func{\n";
10676 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
10677 "_keydef *key) const{\n";
10678 ret += "\t\treturn( (";
10679 if(hashkey_dt.size() > 0){
10680 for(p=0;p<hashkey_dt.size();p++){
10681 if(p>0) ret += "^";
10682 if(hashkey_dt[p]->use_hashfunc()){
10683 // sprintf(tmpstr,"%s(&(key->hashkey_var%d))",hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10684 if(hashkey_dt[p]->is_buffer_type())
10685 sprintf(tmpstr,"(%s*%s(&(key->hashkey_var%d)))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10687 sprintf(tmpstr,"(%s*%s(key->hashkey_var%d))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10689 sprintf(tmpstr,"(%s*key->hashkey_var%d)",hash_nums[p%NRANDS].c_str(),p);
10696 ret += ") >> 32);\n";
10700 //----------------------------------------------------------
10701 // The comparison function
10703 ret += "struct "+generate_functor_name()+"_equal_func{\n";
10704 ret += "\tbool operator()(const "+generate_functor_name()+"_keydef *key1, "+
10705 generate_functor_name()+"_keydef *key2) const{\n";
10706 ret += "\t\treturn( (";
10707 if(hashkey_dt.size() > 0){
10708 for(p=0;p<hashkey_dt.size();p++){
10709 if(p>0) ret += ") && (";
10710 if(hashkey_dt[p]->complex_comparison(hashkey_dt[p])){
10711 if(hashkey_dt[p]->is_buffer_type())
10712 sprintf(tmpstr,"(%s(&(key1->hashkey_var%d), &(key2->hashkey_var%d))==0)",
10713 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
10715 sprintf(tmpstr,"(%s((key1->hashkey_var%d), (key2->hashkey_var%d))==0)",
10716 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
10718 sprintf(tmpstr,"key1->hashkey_var%d == key2->hashkey_var%d",p,p);
10735 string join_eq_hash_qpn::generate_operator(int i, string params){
10738 " join_eq_hash_operator<" +
10739 generate_functor_name()+ ","+
10740 generate_functor_name() + "_tempeqdef,"+
10741 generate_functor_name() + "_keydef,"+
10742 generate_functor_name()+"_hash_func,"+
10743 generate_functor_name()+"_equal_func"
10744 "> *op"+int_to_string(i)+" = new join_eq_hash_operator<"+
10745 generate_functor_name()+","+
10746 generate_functor_name() + "_tempeqdef,"+
10747 generate_functor_name() + "_keydef,"+
10748 generate_functor_name()+"_hash_func,"+
10749 generate_functor_name()+"_equal_func"
10751 int_to_string(from[0]->get_property()+2*from[1]->get_property())+", \"" + get_node_name() +
10758 ////////////////////////////////////////////////////////////////
10759 //// SGAHCWCB functor
10763 string sgahcwcb_qpn::generate_functor_name(){
10764 return("sgahcwcb_functor_" + normalize_name(this->get_node_name()));
10768 string sgahcwcb_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10772 // Initialize generate utility globals
10773 segen_gb_tbl = &(gb_tbl);
10776 //--------------------------------
10777 // group definition class
10778 string ret = "class " + generate_functor_name() + "_groupdef{\n";
10779 ret += "public:\n";
10780 ret += "\tbool valid;\n";
10781 for(g=0;g<this->gb_tbl.size();g++){
10782 sprintf(tmpstr,"gb_var%d",g);
10783 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
10786 ret += "\t"+generate_functor_name() + "_groupdef(){valid=true;};\n";
10787 ret += "\t"+generate_functor_name() + "_groupdef("+
10788 this->generate_functor_name() + "_groupdef *gd){\n";
10789 for(g=0;g<gb_tbl.size();g++){
10790 data_type *gdt = gb_tbl.get_data_type(g);
10791 if(gdt->is_buffer_type()){
10792 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
10793 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
10796 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
10800 ret += "\tvalid=true;\n";
10803 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
10804 for(g=0;g<gb_tbl.size();g++){
10805 data_type *gdt = gb_tbl.get_data_type(g);
10806 if(gdt->is_buffer_type()){
10807 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
10808 gdt->get_hfta_buffer_destroy().c_str(), g );
10815 //--------------------------------
10816 // aggr definition class
10817 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
10818 ret += "public:\n";
10819 for(a=0;a<aggr_tbl.size();a++){
10820 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10821 sprintf(tmpstr,"aggr_var%d",a);
10822 if(aggr_tbl.is_builtin(a))
10823 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
10825 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
10828 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
10830 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
10831 for(a=0;a<aggr_tbl.size();a++){
10832 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10833 if(aggr_tbl.is_builtin(a)){
10834 data_type *adt = aggr_tbl.get_data_type(a);
10835 if(adt->is_buffer_type()){
10836 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
10837 adt->get_hfta_buffer_destroy().c_str(), a );
10841 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
10842 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
10843 ret+="(aggr_var"+int_to_string(a)+"));\n";
10849 //--------------------------------
10850 // superaggr definition class
10851 ret += "class " + this->generate_functor_name() + "_statedef{\n";
10852 ret += "public:\n";
10853 for(a=0;a<aggr_tbl.size();a++){
10854 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10855 if(ate->is_superaggr()){
10856 sprintf(tmpstr,"aggr_var%d",a);
10857 if(aggr_tbl.is_builtin(a))
10858 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
10860 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
10863 set<string>::iterator ssi;
10864 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
10865 string state_nm = (*ssi);
10866 int state_id = Ext_fcns->lookup_state(state_nm);
10867 data_type *dt = Ext_fcns->get_storage_dt(state_id);
10868 string state_var = "state_var_"+state_nm;
10869 ret += "\t"+dt->make_host_cvar(state_var)+";\n";
10872 ret += "\t"+this->generate_functor_name() + "_statedef(){};\n";
10874 ret += "\t~"+this->generate_functor_name() + "_statedef(){\n";
10875 for(a=0;a<aggr_tbl.size();a++){
10876 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10877 if(ate->is_superaggr()){
10878 if(aggr_tbl.is_builtin(a)){
10879 data_type *adt = aggr_tbl.get_data_type(a);
10880 if(adt->is_buffer_type()){
10881 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
10882 adt->get_hfta_buffer_destroy().c_str(), a );
10886 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
10887 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
10888 ret+="(aggr_var"+int_to_string(a)+"));\n";
10892 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
10893 string state_nm = (*ssi);
10894 int state_id = Ext_fcns->lookup_state(state_nm);
10895 string state_var = "state_var_"+state_nm;
10896 ret += "\t_sfun_state_destroy_"+state_nm+"(&"+state_var+");\n";
10903 //--------------------------------
10904 // gb functor class
10905 ret += "class " + this->generate_functor_name() + "{\n";
10907 // Find variables referenced in this query node.
10909 col_id_set cid_set;
10910 col_id_set::iterator csi;
10912 for(w=0;w<where.size();++w)
10913 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
10914 for(w=0;w<having.size();++w)
10915 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
10916 for(w=0;w<cleanby.size();++w)
10917 gather_pr_col_ids(cleanby[w]->pr,cid_set,segen_gb_tbl);
10918 for(w=0;w<cleanwhen.size();++w)
10919 gather_pr_col_ids(cleanwhen[w]->pr,cid_set,segen_gb_tbl);
10920 for(g=0;g<gb_tbl.size();g++)
10921 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
10923 for(s=0;s<select_list.size();s++){
10924 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
10928 // Private variables : store the state of the functor.
10929 // 1) variables for unpacked attributes
10930 // 2) offsets of the upacked attributes
10931 // 3) storage of partial functions
10932 // 4) storage of complex literals (i.e., require a constructor)
10934 ret += "private:\n";
10936 // var to save the schema handle
10937 ret += "\tint schema_handle0;\n";
10939 // generate the declaration of all the variables related to
10940 // temp tuples generation
10941 ret += gen_decl_temp_vars();
10943 // unpacked attribute storage, offsets
10944 ret += "//\t\tstorage and offsets of accessed fields.\n";
10945 ret += generate_access_vars(cid_set, schema);
10946 // tuple metadata offset
10947 ret += "\ttuple_metadata_offset0;\n";
10949 // Variables to store results of partial functions.
10950 // WARNING find_partial_functions modifies the SE
10951 // (it marks the partial function id).
10952 ret += "//\t\tParital function result storage\n";
10953 vector<scalarexp_t *> partial_fcns;
10954 vector<int> fcn_ref_cnt;
10955 vector<bool> is_partial_fcn;
10956 for(s=0;s<select_list.size();s++){
10957 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
10959 for(w=0;w<where.size();w++){
10960 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
10962 for(w=0;w<having.size();w++){
10963 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
10965 for(w=0;w<cleanby.size();w++){
10966 find_partial_fcns_pr(cleanby[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
10968 for(w=0;w<cleanwhen.size();w++){
10969 find_partial_fcns_pr(cleanwhen[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
10971 for(g=0;g<gb_tbl.size();g++){
10972 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
10974 for(a=0;a<aggr_tbl.size();a++){
10975 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
10977 if(partial_fcns.size()>0){
10978 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
10979 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10982 // Complex literals (i.e., they need constructors)
10983 ret += "//\t\tComplex literal storage.\n";
10984 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10985 ret += generate_complex_lit_vars(complex_literals);
10987 // Pass-by-handle parameters
10988 ret += "//\t\tPass-by-handle storage.\n";
10989 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10990 ret += generate_pass_by_handle_vars(param_handle_table);
10992 // Create cached temporaries for UDAF return values.
10993 ret += "//\t\tTemporaries for UDAF return values.\n";
10994 for(a=0;a<aggr_tbl.size();a++){
10995 if(! aggr_tbl.is_builtin(a)){
10996 int afcn_id = aggr_tbl.get_fcn_id(a);
10997 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
10998 sprintf(tmpstr,"udaf_ret_%d", a);
10999 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11005 // variables to hold parameters.
11006 ret += "//\tfor query parameters\n";
11007 ret += generate_param_vars(param_tbl);
11009 // Is there a temporal flush? If so create flush temporaries,
11010 // create flush indicator.
11011 bool uses_temporal_flush = false;
11012 for(g=0;g<gb_tbl.size();g++){
11013 data_type *gdt = gb_tbl.get_data_type(g);
11014 if(gdt->is_temporal())
11015 uses_temporal_flush = true;
11018 if(uses_temporal_flush){
11019 ret += "//\t\tFor temporal flush\n";
11020 for(g=0;g<gb_tbl.size();g++){
11021 data_type *gdt = gb_tbl.get_data_type(g);
11022 if(gdt->is_temporal()){
11023 sprintf(tmpstr,"last_gb%d",g);
11024 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11025 sprintf(tmpstr,"last_flushed_gb%d",g);
11026 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11029 ret += "\tbool needs_temporal_flush;\n";
11032 // The publicly exposed functions
11034 ret += "\npublic:\n";
11037 //-------------------
11038 // The functor constructor
11039 // pass in the schema handle.
11040 // 1) make assignments to the unpack offset variables
11041 // 2) initialize the complex literals
11043 ret += "//\t\tFunctor constructor.\n";
11044 ret += this->generate_functor_name()+"(int schema_handle0){\n";
11046 // save the schema handle
11047 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
11048 // tuple metadata offset
11049 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
11052 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
11053 ret += gen_access_var_init(cid_set);
11055 // aggregate return vals : refd in both final_sample
11056 // and create_output_tuple
11057 // Create cached temporaries for UDAF return values.
11058 for(a=0;a<aggr_tbl.size();a++){
11059 if(! aggr_tbl.is_builtin(a)){
11060 int afcn_id = aggr_tbl.get_fcn_id(a);
11061 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11062 sprintf(tmpstr,"udaf_ret_%d", a);
11063 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11067 // complex literals
11068 ret += "//\t\tInitialize complex literals.\n";
11069 ret += gen_complex_lit_init(complex_literals);
11071 // Initialize partial function results so they can be safely GC'd
11072 ret += gen_partial_fcn_init(partial_fcns);
11074 // Initialize non-query-parameter parameter handles
11075 ret += gen_pass_by_handle_init(param_handle_table);
11077 // temporal flush variables
11078 // ASSUME that structured values won't be temporal.
11079 if(uses_temporal_flush){
11080 ret += "//\t\tInitialize temporal flush variables.\n";
11081 for(g=0;g<gb_tbl.size();g++){
11082 data_type *gdt = gb_tbl.get_data_type(g);
11083 if(gdt->is_temporal()){
11084 literal_t gl(gdt->type_indicator());
11085 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
11086 ret.append(tmpstr);
11089 ret += "\tneeds_temporal_flush = false;\n";
11092 // Init temporal attributes referenced in select list
11093 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
11098 //-------------------
11099 // Functor destructor
11100 ret += "//\t\tFunctor destructor.\n";
11101 ret += "~"+this->generate_functor_name()+"(){\n";
11103 // clean up buffer type complex literals
11104 ret += gen_complex_lit_dtr(complex_literals);
11106 // Deregister the pass-by-handle parameters
11107 ret += "/* register and de-register the pass-by-handle parameters */\n";
11108 ret += gen_pass_by_handle_dtr(param_handle_table);
11110 // clean up partial function results.
11111 ret += "/* clean up partial function storage */\n";
11112 ret += gen_partial_fcn_dtr(partial_fcns);
11114 // Destroy the parameters, if any need to be destroyed
11115 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11120 //-------------------
11121 // Parameter manipulation routines
11122 ret += generate_load_param_block(this->generate_functor_name(),
11123 this->param_tbl,param_handle_table);
11124 ret += generate_delete_param_block(this->generate_functor_name(),
11125 this->param_tbl,param_handle_table);
11127 //-------------------
11128 // Register new parameter block
11130 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
11131 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11132 ret += "\treturn this->load_params_"+this->generate_functor_name()+
11136 //-------------------
11137 // the create_group method.
11138 // This method creates a group in a buffer passed in
11139 // (to allow for creation on the stack).
11140 // There are also a couple of side effects:
11141 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11142 // 2) determine if a temporal flush is required.
11144 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
11145 // Variables for execution of the function.
11146 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11148 if(partial_fcns.size()>0){ // partial fcn access failure
11149 ret += "\tgs_retval_t retval = 0;\n";
11153 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
11154 "_groupdef *) buffer;\n";
11156 // Start by cleaning up partial function results
11157 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11159 set<int> gb_pfcns; // partial fcns in gbdefs, aggr se's
11160 for(g=0;g<gb_tbl.size();g++){
11161 collect_partial_fcns(gb_tbl.get_def(g), gb_pfcns);
11163 ret += gen_partial_fcn_dtr(partial_fcns,gb_pfcns);
11164 // ret += gen_partial_fcn_dtr(partial_fcns);
11167 ret += gen_temp_tuple_check(this->node_name, 0);
11168 col_id_set found_cids; // colrefs unpacked thus far.
11169 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
11173 // Save temporal group-by variables
11176 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
11178 for(g=0;g<gb_tbl.size();g++){
11180 data_type *gdt = gb_tbl.get_data_type(g);
11182 if(gdt->is_temporal()){
11183 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11184 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11185 ret.append(tmpstr);
11192 // Compare the temporal GB vars with the stored ones,
11193 // set flush indicator and update stored GB vars if there is any change.
11195 if(uses_temporal_flush){
11196 ret+= "\tif( !( (";
11197 bool first_one = true;
11198 for(g=0;g<gb_tbl.size();g++){
11199 data_type *gdt = gb_tbl.get_data_type(g);
11201 if(gdt->is_temporal()){
11202 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
11203 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
11204 if(first_one){first_one = false;} else {ret += ") && (";}
11205 ret += generate_equality_test(lhs_op, rhs_op, gdt);
11209 for(g=0;g<gb_tbl.size();g++){
11210 data_type *gdt = gb_tbl.get_data_type(g);
11211 if(gdt->is_temporal()){
11212 if(gdt->is_buffer_type()){
11213 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
11215 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
11217 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
11223 if(uses_temporal_flush){
11224 for(g=0;g<gb_tbl.size();g++){
11225 data_type *gdt = gb_tbl.get_data_type(g);
11226 if(gdt->is_temporal()){
11227 ret+="if(last_flushed_gb"+int_to_string(g)+">0)\n";
11233 ret += "\t\tneeds_temporal_flush=true;\n";
11234 ret += "\t\t}else{\n"
11235 "\t\t\tneeds_temporal_flush=false;\n"
11240 // For temporal status tuple we don't need to do anything else
11241 ret += "\tif (temp_tuple_received) return NULL;\n\n";
11244 // The partial functions ref'd in the group-by var
11245 // definitions must be evaluated. If one returns false,
11246 // then implicitly the predicate is false.
11247 set<int>::iterator pfsi;
11249 if(gb_pfcns.size() > 0)
11250 ret += "//\t\tUnpack partial fcns.\n";
11251 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, gb_pfcns,
11252 found_cids, segen_gb_tbl, "NULL", needs_xform);
11254 // Unpack the group-by variables
11256 for(g=0;g<gb_tbl.size();g++){
11257 // Find the new fields ref'd by this GBvar def.
11258 col_id_set new_cids;
11259 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
11260 // Unpack these values.
11261 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
11263 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11264 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11266 // There seems to be no difference between the two
11267 // branches of the IF statement.
11268 data_type *gdt = gb_tbl.get_data_type(g);
11269 if(gdt->is_buffer_type()){
11270 // Create temporary copy.
11271 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11272 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11274 scalarexp_t *gse = gb_tbl.get_def(g);
11275 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11276 g,generate_se_code(gse,schema).c_str());
11279 ret.append(tmpstr);
11284 ret+= "\treturn gbval;\n";
11289 //-------------------
11290 // the create_group method.
11291 // This method creates a group in a buffer passed in
11292 // (to allow for creation on the stack).
11293 // There are also a couple of side effects:
11294 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11295 // 2) determine if a temporal flush is required.
11297 ret += "bool evaluate_predicate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, int cd){\n";
11298 // Variables for execution of the function.
11299 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11301 if(partial_fcns.size()>0){ // partial fcn access failure
11302 ret += "\tgs_retval_t retval = 0;\n";
11306 // Start by cleaning up partial function results
11307 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11308 set<int> w_pfcns; // partial fcns in where clause
11309 for(w=0;w<where.size();++w)
11310 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
11312 set<int> ag_pfcns; // partial fcns in gbdefs, aggr se's
11313 for(a=0;a<aggr_tbl.size();a++){
11314 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_pfcns);
11316 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
11317 ret += gen_partial_fcn_dtr(partial_fcns,ag_pfcns);
11319 ret+="//\t\tEvaluate clauses which don't reference stateful fcns first \n";
11320 for(w=0;w<where.size();++w){
11321 if(! pred_refs_sfun(where[w]->pr)){
11322 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11324 // Find the set of variables accessed in this CNF elem,
11325 // but in no previous element.
11326 col_id_set new_cids;
11327 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11329 // Unpack these values.
11330 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11331 // Find partial fcns ref'd in this cnf element
11332 set<int> pfcn_refs;
11333 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11334 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11336 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11337 +") ) return(false);\n";
11342 // The partial functions ref'd in the and aggregate
11343 // definitions must also be evaluated. If one returns false,
11344 // then implicitly the predicate is false.
11345 // ASSUME that aggregates cannot reference stateful fcns.
11347 if(ag_pfcns.size() > 0)
11348 ret += "//\t\tUnpack remaining partial fcns.\n";
11349 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_pfcns,
11350 found_cids, segen_gb_tbl, "false", needs_xform);
11352 ret+="//\t\tEvaluate all remaining where clauses.\n";
11353 ret+="\tbool retval = true;\n";
11354 for(w=0;w<where.size();++w){
11355 if( pred_refs_sfun(where[w]->pr)){
11356 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11358 // Find the set of variables accessed in this CNF elem,
11359 // but in no previous element.
11360 col_id_set new_cids;
11361 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11363 // Unpack these values.
11364 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11365 // Find partial fcns ref'd in this cnf element
11366 set<int> pfcn_refs;
11367 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11368 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11370 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11371 +") ) retval = false;\n";
11375 ret+="// Unpack all remaining attributes\n";
11376 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
11378 ret += "\n\treturn retval;\n";
11381 //--------------------------------------------------------
11382 // Create and initialize an aggregate object
11384 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, gs_sp_t a,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11385 // Variables for execution of the function.
11386 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11389 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+ "_aggrdef *)a;\n";
11391 for(a=0;a<aggr_tbl.size();a++){
11392 if(aggr_tbl.is_builtin(a)){
11393 // Create temporaries for buffer return values
11394 data_type *adt = aggr_tbl.get_data_type(a);
11395 if(adt->is_buffer_type()){
11396 sprintf(tmpstr,"aggr_tmp_%d", a);
11397 ret+=adt->make_host_cvar(tmpstr)+";\n";
11402 for(a=0;a<aggr_tbl.size();a++){
11403 sprintf(tmpstr,"aggval->aggr_var%d",a);
11404 string assignto_var = tmpstr;
11405 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11408 ret += "\treturn aggval;\n";
11412 //--------------------------------------------------------
11413 // initialize an aggregate object inplace
11415 ret += "void create_aggregate(host_tuple &tup0, "+this->generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11416 // Variables for execution of the function.
11417 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11421 for(a=0;a<aggr_tbl.size();a++){
11422 if(aggr_tbl.is_builtin(a)){
11423 // Create temporaries for buffer return values
11424 data_type *adt = aggr_tbl.get_data_type(a);
11425 if(adt->is_buffer_type()){
11426 sprintf(tmpstr,"aggr_tmp_%d", a);
11427 ret+=adt->make_host_cvar(tmpstr)+";\n";
11432 for(a=0;a<aggr_tbl.size();a++){
11433 sprintf(tmpstr,"aggval->aggr_var%d",a);
11434 string assignto_var = tmpstr;
11435 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11441 //--------------------------------------------------------
11442 // Create and clean-initialize an state object
11444 ret += "void initialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval){\n";
11445 // Variables for execution of the function.
11446 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11449 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
11451 for(a=0;a<aggr_tbl.size();a++){
11452 if( aggr_tbl.is_superaggr(a)){
11453 if(aggr_tbl.is_builtin(a)){
11454 // Create temporaries for buffer return values
11455 data_type *adt = aggr_tbl.get_data_type(a);
11456 if(adt->is_buffer_type()){
11457 sprintf(tmpstr,"aggr_tmp_%d", a);
11458 ret+=adt->make_host_cvar(tmpstr)+";\n";
11464 for(a=0;a<aggr_tbl.size();a++){
11465 if( aggr_tbl.is_superaggr(a)){
11466 sprintf(tmpstr,"stval->aggr_var%d",a);
11467 string assignto_var = tmpstr;
11468 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11472 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11473 string state_nm = (*ssi);
11474 ret += "_sfun_state_clean_init_"+state_nm+"(&(stval->state_var_"+state_nm+"));\n";
11480 //--------------------------------------------------------
11481 // Create and dirty-initialize an state object
11483 ret += "void reinitialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, "+generate_functor_name()+"_statedef *old_stval, int cd){\n";
11484 // Variables for execution of the function.
11485 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11488 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
11490 for(a=0;a<aggr_tbl.size();a++){
11491 if( aggr_tbl.is_superaggr(a)){
11492 if(aggr_tbl.is_builtin(a)){
11493 // Create temporaries for buffer return values
11494 data_type *adt = aggr_tbl.get_data_type(a);
11495 if(adt->is_buffer_type()){
11496 sprintf(tmpstr,"aggr_tmp_%d", a);
11497 ret+=adt->make_host_cvar(tmpstr)+";\n";
11503 // initialize superaggregates
11504 for(a=0;a<aggr_tbl.size();a++){
11505 if( aggr_tbl.is_superaggr(a)){
11506 sprintf(tmpstr,"stval->aggr_var%d",a);
11507 string assignto_var = tmpstr;
11508 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11512 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11513 string state_nm = (*ssi);
11514 ret += "_sfun_state_dirty_init_"+state_nm+"(&(stval->state_var_"+state_nm+"),&(old_stval->state_var_"+state_nm+"), cd );\n";
11519 //--------------------------------------------------------
11520 // Finalize_state : call the finalize fcn on all states
11523 ret += "void finalize_state( "+generate_functor_name()+"_statedef *stval, int cd){\n";
11525 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11526 string state_nm = (*ssi);
11527 ret += "_sfun_state_final_init_"+state_nm+"(&(stval->state_var_"+state_nm+"), cd);\n";
11535 //--------------------------------------------------------
11536 // update (plus) a superaggregate object
11538 ret += "void update_plus_superaggr(host_tuple &tup0, " +
11539 generate_functor_name()+"_groupdef *gbval, "+
11540 generate_functor_name()+"_statedef *stval){\n";
11541 // Variables for execution of the function.
11542 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11544 // use of temporaries depends on the aggregate,
11545 // generate them in generate_aggr_update
11548 for(a=0;a<aggr_tbl.size();a++){
11549 if(aggr_tbl.is_superaggr(a)){
11550 sprintf(tmpstr,"stval->aggr_var%d",a);
11551 string varname = tmpstr;
11552 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
11556 ret += "\treturn;\n";
11561 //--------------------------------------------------------
11562 // update (minus) a superaggregate object
11564 ret += "void update_minus_superaggr( "+
11565 generate_functor_name()+"_groupdef *gbval, "+
11566 generate_functor_name()+"_aggrdef *aggval,"+
11567 generate_functor_name()+"_statedef *stval"+
11569 // Variables for execution of the function.
11570 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11572 // use of temporaries depends on the aggregate,
11573 // generate them in generate_aggr_update
11576 for(a=0;a<aggr_tbl.size();a++){
11577 if(aggr_tbl.is_superaggr(a)){
11578 sprintf(tmpstr,"stval->aggr_var%d",a);
11579 string super_varname = tmpstr;
11580 sprintf(tmpstr,"aggval->aggr_var%d",a);
11581 string sub_varname = tmpstr;
11582 ret.append(generate_superaggr_minus(sub_varname, super_varname,&aggr_tbl,a, schema));
11586 ret += "\treturn;\n";
11590 //--------------------------------------------------------
11591 // update an aggregate object
11593 ret += "void update_aggregate(host_tuple &tup0, "
11594 +generate_functor_name()+"_groupdef *gbval, "+
11595 generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11596 // Variables for execution of the function.
11597 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11599 // use of temporaries depends on the aggregate,
11600 // generate them in generate_aggr_update
11603 for(a=0;a<aggr_tbl.size();a++){
11604 sprintf(tmpstr,"aggval->aggr_var%d",a);
11605 string varname = tmpstr;
11606 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
11609 ret += "\treturn;\n";
11612 //---------------------------------------------------
11615 ret += "\tbool flush_needed(){\n";
11616 if(uses_temporal_flush){
11617 ret += "\t\treturn needs_temporal_flush;\n";
11619 ret += "\t\treturn false;\n";
11624 //------------------------------------------------------
11625 // THe cleaning_when predicate
11627 string gbvar = "gbval->gb_var";
11628 string aggvar = "aggval->";
11630 ret += "bool need_to_clean( "
11631 +generate_functor_name()+"_groupdef *gbval, "+
11632 generate_functor_name()+"_statedef *stval, int cd"+
11635 if(cleanwhen.size()>0)
11636 ret += "\tbool predval = true;\n";
11638 ret += "\tbool predval = false;\n";
11640 // Find the udafs ref'd in the having clause
11642 for(w=0;w<cleanwhen.size();++w)
11643 collect_aggr_refs_pr(cleanwhen[w]->pr, cw_aggs);
11646 // get the return values from the UDAFS
11647 for(a=0;a<aggr_tbl.size();a++){
11648 if(! aggr_tbl.is_builtin(a) && cw_aggs.count(a)){
11649 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11650 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11651 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11656 // Start by cleaning up partial function results
11657 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11658 set<int> cw_pfcns; // partial fcns in where clause
11659 for(w=0;w<cleanwhen.size();++w)
11660 collect_partial_fcns_pr(cleanwhen[w]->pr, cw_pfcns);
11662 ret += gen_partial_fcn_dtr(partial_fcns,cw_pfcns);
11665 for(w=0;w<cleanwhen.size();++w){
11666 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11668 // Find partial fcns ref'd in this cnf element
11669 set<int> pfcn_refs;
11670 collect_partial_fcns_pr(cleanwhen[w]->pr, pfcn_refs);
11671 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
11672 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11673 ret += "\tif(retval){ return false;}\n";
11675 // ret += unpack_partial_fcn_fm_aggr(schema, partial_fcns, pfcn_refs,"false");
11677 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanwhen[w]->pr,gbvar, aggvar, schema)+
11678 ") ) predval = false;\n";
11681 ret += "\treturn predval;\n";
11684 //------------------------------------------------------
11685 // THe cleaning_by predicate
11687 ret += "bool sample_group("
11688 +generate_functor_name()+"_groupdef *gbval, "+
11689 generate_functor_name()+"_aggrdef *aggval,"+
11690 generate_functor_name()+"_statedef *stval, int cd"+
11693 if(cleanby.size()>0)
11694 ret += "\tbool retval = true;\n";
11696 ret += "\tbool retval = false;\n";
11698 // Find the udafs ref'd in the having clause
11700 for(w=0;w<cleanby.size();++w)
11701 collect_aggr_refs_pr(cleanby[w]->pr, cb_aggs);
11704 // get the return values from the UDAFS
11705 for(a=0;a<aggr_tbl.size();a++){
11706 if(! aggr_tbl.is_builtin(a) && cb_aggs.count(a)){
11707 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11708 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11709 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11714 // Start by cleaning up partial function results
11715 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11716 set<int> cb_pfcns; // partial fcns in where clause
11717 for(w=0;w<cleanby.size();++w)
11718 collect_partial_fcns_pr(cleanby[w]->pr, cb_pfcns);
11720 ret += gen_partial_fcn_dtr(partial_fcns,cb_pfcns);
11723 for(w=0;w<cleanwhen.size();++w){
11724 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11728 // Find the set of variables accessed in this CNF elem,
11729 // but in no previous element.
11730 col_id_set new_cids;
11731 get_new_pred_cids(cleanby[w]->pr, found_cids, new_cids, segen_gb_tbl);
11733 // Unpack these values.
11734 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11737 // Find partial fcns ref'd in this cnf element
11738 set<int> pfcn_refs;
11739 collect_partial_fcns_pr(cleanby[w]->pr, pfcn_refs);
11740 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
11741 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11742 ret += "\tif(retval){ return false;}\n";
11744 // ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11746 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanby[w]->pr,gbvar, aggvar, schema)+
11747 +") ) retval = false;\n";
11750 ret += "\treturn retval;\n";
11754 //-----------------------------------------------------
11756 ret += "bool final_sample_group("
11757 +generate_functor_name()+"_groupdef *gbval, "+
11758 generate_functor_name()+"_aggrdef *aggval,"+
11759 generate_functor_name()+"_statedef *stval,"+
11762 ret += "\tgs_retval_t retval = 0;\n";
11764 // Find the udafs ref'd in the having clause
11766 for(w=0;w<having.size();++w)
11767 collect_aggr_refs_pr(having[w]->pr, hv_aggs);
11770 // get the return values from the UDAFS
11771 for(a=0;a<aggr_tbl.size();a++){
11772 if(! aggr_tbl.is_builtin(a) && hv_aggs.count(a)){
11773 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11774 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11775 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11780 set<int> hv_sl_pfcns;
11781 for(w=0;w<having.size();w++){
11782 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
11785 // clean up the partial fcn results from any previous execution
11786 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
11789 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
11790 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11791 ret += "\tif(retval){ return false;}\n";
11794 // Evalaute the HAVING clause
11795 // TODO: this seems to have a ++ operator rather than a + operator.
11796 for(w=0;w<having.size();++w){
11797 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
11800 ret += "\treturn true;\n";
11803 //---------------------------------------------------
11804 // create output tuple
11805 // Unpack the partial functions ref'd in the where clause,
11806 // select clause. Evaluate the where clause.
11807 // Finally, pack the tuple.
11809 // I need to use special code generation here,
11810 // so I'll leave it in longhand.
11812 ret += "host_tuple create_output_tuple("
11813 +generate_functor_name()+"_groupdef *gbval, "+
11814 generate_functor_name()+"_aggrdef *aggval,"+
11815 generate_functor_name()+"_statedef *stval,"+
11816 "int cd, bool &failed){\n";
11818 ret += "\thost_tuple tup;\n";
11819 ret += "\tfailed = false;\n";
11820 ret += "\tgs_retval_t retval = 0;\n";
11823 // Find the udafs ref'd in the select clause
11825 for(s=0;s<select_list.size();s++)
11826 collect_agg_refs(select_list[s]->se, sl_aggs);
11829 // get the return values from the UDAFS
11830 for(a=0;a<aggr_tbl.size();a++){
11831 if(! aggr_tbl.is_builtin(a) && sl_aggs.count(a)){
11832 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11833 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11834 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11839 // I can't cache partial fcn results from the having
11840 // clause because evaluation is separated.
11842 for(s=0;s<select_list.size();s++){
11843 collect_partial_fcns(select_list[s]->se, sl_pfcns);
11846 for(pfsi=sl_pfcns.begin();pfsi!=sl_pfcns.end();++pfsi){
11847 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11848 ret += "\tif(retval){ failed=true; return tup;}\n";
11852 // Now, compute the size of the tuple.
11854 // Unpack any BUFFER type selections into temporaries
11855 // so that I can compute their size and not have
11856 // to recompute their value during tuple packing.
11857 // I can use regular assignment here because
11858 // these temporaries are non-persistent.
11859 // TODO: should I be using the selvar generation routine?
11861 ret += "//\t\tCompute the size of the tuple.\n";
11862 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
11863 for(s=0;s<select_list.size();s++){
11864 scalarexp_t *se = select_list[s]->se;
11865 data_type *sdt = se->get_data_type();
11866 if(sdt->is_buffer_type() &&
11867 !( (se->get_operator_type() == SE_COLREF) ||
11868 (se->get_operator_type() == SE_AGGR_STAR) ||
11869 (se->get_operator_type() == SE_AGGR_SE) ||
11870 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11871 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11873 sprintf(tmpstr,"selvar_%d",s);
11874 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
11875 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
11879 // The size of the tuple is the size of the tuple struct plus the
11880 // size of the buffers to be copied in.
11882 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
11883 for(s=0;s<select_list.size();s++){
11884 // if(s>0) ret += "+";
11885 scalarexp_t *se = select_list[s]->se;
11886 data_type *sdt = select_list[s]->se->get_data_type();
11887 if(sdt->is_buffer_type()){
11888 if(!( (se->get_operator_type() == SE_COLREF) ||
11889 (se->get_operator_type() == SE_AGGR_STAR) ||
11890 (se->get_operator_type() == SE_AGGR_SE) ||
11891 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11892 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11894 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
11895 ret.append(tmpstr);
11897 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11898 ret.append(tmpstr);
11904 // Allocate tuple data block.
11905 ret += "//\t\tCreate the tuple block.\n";
11906 ret += "\ttup.data = malloc(tup.tuple_size);\n";
11907 ret += "\ttup.heap_resident = true;\n";
11909 // Mark tuple as regular
11910 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
11912 // ret += "\ttup.channel = 0;\n";
11913 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
11914 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
11917 // (Here, offsets are hard-wired. is this a problem?)
11919 ret += "//\t\tPack the fields into the tuple.\n";
11920 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
11921 for(s=0;s<select_list.size();s++){
11922 scalarexp_t *se = select_list[s]->se;
11923 data_type *sdt = se->get_data_type();
11924 if(sdt->is_buffer_type()){
11925 if(!( (se->get_operator_type() == SE_COLREF) ||
11926 (se->get_operator_type() == SE_AGGR_STAR) ||
11927 (se->get_operator_type() == SE_AGGR_SE) ||
11928 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11929 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11931 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
11932 ret.append(tmpstr);
11933 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
11934 ret.append(tmpstr);
11936 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11937 ret.append(tmpstr);
11938 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11939 ret.append(tmpstr);
11942 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
11943 ret.append(tmpstr);
11944 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
11949 // Destroy string temporaries
11950 ret += gen_buffer_selvars_dtr(select_list);
11951 // Destroy string return vals of UDAFs
11952 for(a=0;a<aggr_tbl.size();a++){
11953 if(! aggr_tbl.is_builtin(a)){
11954 int afcn_id = aggr_tbl.get_fcn_id(a);
11955 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11956 if(adt->is_buffer_type()){
11957 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
11958 adt->get_hfta_buffer_destroy().c_str(), a );
11965 ret += "\treturn tup;\n";
11969 //-------------------------------------------------------------------
11970 // Temporal update functions
11972 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
11974 // create a temp status tuple
11975 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
11977 ret += gen_init_temp_status_tuple(this->get_node_name());
11980 // (Here, offsets are hard-wired. is this a problem?)
11982 ret += "//\t\tPack the fields into the tuple.\n";
11983 for(s=0;s<select_list.size();s++){
11984 data_type *sdt = select_list[s]->se->get_data_type();
11985 if(sdt->is_temporal()){
11986 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
11988 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
11994 ret += "\treturn 0;\n";
11995 ret += "};};\n\n\n";
11998 //----------------------------------------------------------
11999 // The hash function
12001 ret += "struct "+generate_functor_name()+"_hash_func{\n";
12002 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12003 "_groupdef *grp) const{\n";
12004 ret += "\t\treturn(";
12005 for(g=0;g<gb_tbl.size();g++){
12006 if(g>0) ret += "^";
12007 data_type *gdt = gb_tbl.get_data_type(g);
12008 if(gdt->use_hashfunc()){
12009 if(gdt->is_buffer_type())
12010 sprintf(tmpstr,"(%s*%s(&)grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12012 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12014 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12018 ret += ") >> 32);\n";
12022 //----------------------------------------------------------
12023 // The superhash function
12025 ret += "struct "+generate_functor_name()+"_superhash_func{\n";
12026 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12027 "_groupdef *grp) const{\n";
12028 ret += "\t\treturn(0";
12030 for(g=0;g<gb_tbl.size();g++){
12031 if(sg_tbl.count(g)>0){
12033 data_type *gdt = gb_tbl.get_data_type(g);
12034 if(gdt->use_hashfunc()){
12035 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12037 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12042 ret += ") >> 32);\n";
12047 //----------------------------------------------------------
12048 // The comparison function
12050 ret += "struct "+generate_functor_name()+"_equal_func{\n";
12051 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12052 generate_functor_name()+"_groupdef *grp2) const{\n";
12053 ret += "\t\treturn( (";
12054 for(g=0;g<gb_tbl.size();g++){
12055 if(g>0) ret += ") && (";
12056 data_type *gdt = gb_tbl.get_data_type(g);
12057 if(gdt->complex_comparison(gdt)){
12058 if(gdt->is_buffer_type())
12059 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12060 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12062 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12063 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12065 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12074 //----------------------------------------------------------
12075 // The superhashcomparison function
12077 ret += "struct "+generate_functor_name()+"_superequal_func{\n";
12078 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12079 generate_functor_name()+"_groupdef *grp2) const{\n";
12080 ret += "\t\treturn( (";
12082 bool first_elem = true;
12083 for(g=0;g<gb_tbl.size();g++){
12084 if(sg_tbl.count(g)){
12085 if(first_elem) first_elem=false; else ret += ") && (";
12086 data_type *gdt = gb_tbl.get_data_type(g);
12087 if(gdt->complex_comparison(gdt)){
12088 if(gdt->is_buffer_type())
12089 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12090 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12092 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12093 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12095 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12112 string sgahcwcb_qpn::generate_operator(int i, string params){
12115 " clean_operator<" +
12116 generate_functor_name()+",\n\t"+
12117 generate_functor_name() + "_groupdef, \n\t" +
12118 generate_functor_name() + "_aggrdef, \n\t" +
12119 generate_functor_name() + "_statedef, \n\t" +
12120 generate_functor_name()+"_hash_func, \n\t"+
12121 generate_functor_name()+"_equal_func ,\n\t"+
12122 generate_functor_name()+"_superhash_func,\n\t "+
12123 generate_functor_name()+"_superequal_func \n\t"+
12124 "> *op"+int_to_string(i)+" = new clean_operator<"+
12125 generate_functor_name()+",\n\t"+
12126 generate_functor_name() + "_groupdef,\n\t " +
12127 generate_functor_name() + "_aggrdef, \n\t" +
12128 generate_functor_name() + "_statedef, \n\t" +
12129 generate_functor_name()+"_hash_func, \n\t"+
12130 generate_functor_name()+"_equal_func, \n\t"+
12131 generate_functor_name()+"_superhash_func, \n\t"+
12132 generate_functor_name()+"_superequal_func\n\t "
12133 ">("+params+", \"" + get_node_name() + "\");\n"
12137 ////////////////////////////////////////////////////////////////
12142 string rsgah_qpn::generate_functor_name(){
12143 return("rsgah_functor_" + normalize_name(this->get_node_name()));
12147 string rsgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
12151 // Initialize generate utility globals
12152 segen_gb_tbl = &(gb_tbl);
12155 //--------------------------------
12156 // group definition class
12157 string ret = "class " + generate_functor_name() + "_groupdef{\n";
12158 ret += "public:\n";
12159 for(g=0;g<this->gb_tbl.size();g++){
12160 sprintf(tmpstr,"gb_var%d",g);
12161 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12164 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
12165 ret += "\t"+generate_functor_name() + "_groupdef("+
12166 this->generate_functor_name() + "_groupdef *gd){\n";
12167 for(g=0;g<gb_tbl.size();g++){
12168 data_type *gdt = gb_tbl.get_data_type(g);
12169 if(gdt->is_buffer_type()){
12170 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
12171 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
12174 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
12180 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
12181 for(g=0;g<gb_tbl.size();g++){
12182 data_type *gdt = gb_tbl.get_data_type(g);
12183 if(gdt->is_buffer_type()){
12184 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12185 gdt->get_hfta_buffer_destroy().c_str(), g );
12192 //--------------------------------
12193 // aggr definition class
12194 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
12195 ret += "public:\n";
12196 for(a=0;a<aggr_tbl.size();a++){
12197 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
12198 sprintf(tmpstr,"aggr_var%d",a);
12199 if(aggr_tbl.is_builtin(a))
12200 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
12202 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
12205 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
12207 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
12208 for(a=0;a<aggr_tbl.size();a++){
12209 if(aggr_tbl.is_builtin(a)){
12210 data_type *adt = aggr_tbl.get_data_type(a);
12211 if(adt->is_buffer_type()){
12212 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
12213 adt->get_hfta_buffer_destroy().c_str(), a );
12217 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
12218 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12219 ret+="(aggr_var"+int_to_string(a)+"));\n";
12225 //--------------------------------
12226 // gb functor class
12227 ret += "class " + this->generate_functor_name() + "{\n";
12229 // Find variables referenced in this query node.
12231 col_id_set cid_set;
12232 col_id_set::iterator csi;
12234 for(w=0;w<where.size();++w)
12235 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
12236 for(w=0;w<having.size();++w)
12237 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
12238 for(w=0;w<closing_when.size();++w)
12239 gather_pr_col_ids(closing_when[w]->pr,cid_set,segen_gb_tbl);
12240 for(g=0;g<gb_tbl.size();g++)
12241 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
12243 for(s=0;s<select_list.size();s++){
12244 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
12248 // Private variables : store the state of the functor.
12249 // 1) variables for unpacked attributes
12250 // 2) offsets of the upacked attributes
12251 // 3) storage of partial functions
12252 // 4) storage of complex literals (i.e., require a constructor)
12254 ret += "private:\n";
12256 // var to save the schema handle
12257 ret += "\tint schema_handle0;\n";
12259 // generate the declaration of all the variables related to
12260 // temp tuples generation
12261 ret += gen_decl_temp_vars();
12263 // unpacked attribute storage, offsets
12264 ret += "//\t\tstorage and offsets of accessed fields.\n";
12265 ret += generate_access_vars(cid_set, schema);
12266 // tuple metadata offset
12267 ret += "\tint tuple_metadata_offset0;\n";
12269 // Variables to store results of partial functions.
12270 // WARNING find_partial_functions modifies the SE
12271 // (it marks the partial function id).
12272 ret += "//\t\tParital function result storage\n";
12273 vector<scalarexp_t *> partial_fcns;
12274 vector<int> fcn_ref_cnt;
12275 vector<bool> is_partial_fcn;
12276 for(s=0;s<select_list.size();s++){
12277 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
12279 for(w=0;w<where.size();w++){
12280 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12282 for(w=0;w<having.size();w++){
12283 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12285 for(w=0;w<closing_when.size();w++){
12286 find_partial_fcns_pr(closing_when[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12288 for(g=0;g<gb_tbl.size();g++){
12289 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
12291 for(a=0;a<aggr_tbl.size();a++){
12292 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
12294 if(partial_fcns.size()>0){
12295 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
12296 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
12299 // Create cached temporaries for UDAF return values.
12300 for(a=0;a<aggr_tbl.size();a++){
12301 if(! aggr_tbl.is_builtin(a)){
12302 int afcn_id = aggr_tbl.get_fcn_id(a);
12303 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12304 sprintf(tmpstr,"udaf_ret_%d", a);
12305 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
12310 // Complex literals (i.e., they need constructors)
12311 ret += "//\t\tComplex literal storage.\n";
12312 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
12313 ret += generate_complex_lit_vars(complex_literals);
12315 // Pass-by-handle parameters
12316 ret += "//\t\tPass-by-handle storage.\n";
12317 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
12318 ret += generate_pass_by_handle_vars(param_handle_table);
12321 // variables to hold parameters.
12322 ret += "//\tfor query parameters\n";
12323 ret += generate_param_vars(param_tbl);
12325 // Is there a temporal flush? If so create flush temporaries,
12326 // create flush indicator.
12327 bool uses_temporal_flush = false;
12328 for(g=0;g<gb_tbl.size();g++){
12329 data_type *gdt = gb_tbl.get_data_type(g);
12330 if(gdt->is_temporal())
12331 uses_temporal_flush = true;
12334 if(uses_temporal_flush){
12335 ret += "//\t\tFor temporal flush\n";
12336 for(g=0;g<gb_tbl.size();g++){
12337 data_type *gdt = gb_tbl.get_data_type(g);
12338 if(gdt->is_temporal()){
12339 sprintf(tmpstr,"last_gb%d",g);
12340 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12341 sprintf(tmpstr,"last_flushed_gb%d",g);
12342 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12345 ret += "\tbool needs_temporal_flush;\n";
12348 // The publicly exposed functions
12350 ret += "\npublic:\n";
12353 //-------------------
12354 // The functor constructor
12355 // pass in the schema handle.
12356 // 1) make assignments to the unpack offset variables
12357 // 2) initialize the complex literals
12359 ret += "//\t\tFunctor constructor.\n";
12360 ret += this->generate_functor_name()+"(int schema_handle0){\n";
12362 // save the schema handle
12363 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
12365 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
12368 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
12369 ret += gen_access_var_init(cid_set);
12371 // complex literals
12372 ret += "//\t\tInitialize complex literals.\n";
12373 ret += gen_complex_lit_init(complex_literals);
12375 // Initialize partial function results so they can be safely GC'd
12376 ret += gen_partial_fcn_init(partial_fcns);
12378 // Initialize non-query-parameter parameter handles
12379 ret += gen_pass_by_handle_init(param_handle_table);
12381 // temporal flush variables
12382 // ASSUME that structured values won't be temporal.
12383 if(uses_temporal_flush){
12384 ret += "//\t\tInitialize temporal flush variables.\n";
12385 for(g=0;g<gb_tbl.size();g++){
12386 data_type *gdt = gb_tbl.get_data_type(g);
12387 if(gdt->is_temporal()){
12388 literal_t gl(gdt->type_indicator());
12389 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
12390 ret.append(tmpstr);
12393 ret += "\tneeds_temporal_flush = false;\n";
12396 // Init temporal attributes referenced in select list
12397 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
12402 //-------------------
12403 // Functor destructor
12404 ret += "//\t\tFunctor destructor.\n";
12405 ret += "~"+this->generate_functor_name()+"(){\n";
12407 // clean up buffer type complex literals
12408 ret += gen_complex_lit_dtr(complex_literals);
12410 // Deregister the pass-by-handle parameters
12411 ret += "/* register and de-register the pass-by-handle parameters */\n";
12412 ret += gen_pass_by_handle_dtr(param_handle_table);
12414 // clean up partial function results.
12415 ret += "/* clean up partial function storage */\n";
12416 ret += gen_partial_fcn_dtr(partial_fcns);
12418 // Destroy the parameters, if any need to be destroyed
12419 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12424 //-------------------
12425 // Parameter manipulation routines
12426 ret += generate_load_param_block(this->generate_functor_name(),
12427 this->param_tbl,param_handle_table);
12428 ret += generate_delete_param_block(this->generate_functor_name(),
12429 this->param_tbl,param_handle_table);
12431 //-------------------
12432 // Register new parameter block
12434 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
12435 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12436 ret += "\treturn this->load_params_"+this->generate_functor_name()+
12441 //-------------------
12442 // the create_group method.
12443 // This method creates a group in a buffer passed in
12444 // (to allow for creation on the stack).
12445 // There are also a couple of side effects:
12446 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
12447 // 2) determine if a temporal flush is required.
12449 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
12450 // Variables for execution of the function.
12451 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12453 if(partial_fcns.size()>0){ // partial fcn access failure
12454 ret += "\tgs_retval_t retval = 0;\n";
12458 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
12459 "_groupdef *) buffer;\n";
12461 // Start by cleaning up partial function results
12462 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12463 set<int> w_pfcns; // partial fcns in where clause
12464 for(w=0;w<where.size();++w)
12465 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
12467 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
12468 for(g=0;g<gb_tbl.size();g++){
12469 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
12471 for(a=0;a<aggr_tbl.size();a++){
12472 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
12474 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
12475 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
12476 // ret += gen_partial_fcn_dtr(partial_fcns);
12479 ret += gen_temp_tuple_check(this->node_name, 0);
12480 col_id_set found_cids; // colrefs unpacked thus far.
12481 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
12484 // Save temporal group-by variables
12487 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
12489 for(g=0;g<gb_tbl.size();g++){
12491 data_type *gdt = gb_tbl.get_data_type(g);
12493 if(gdt->is_temporal()){
12494 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12495 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12496 ret.append(tmpstr);
12503 // Compare the temporal GB vars with the stored ones,
12504 // set flush indicator and update stored GB vars if there is any change.
12506 if(uses_temporal_flush){
12507 ret+= "\tif( !( (";
12508 bool first_one = true;
12509 for(g=0;g<gb_tbl.size();g++){
12510 data_type *gdt = gb_tbl.get_data_type(g);
12512 if(gdt->is_temporal()){
12513 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
12514 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
12515 if(first_one){first_one = false;} else {ret += ") && (";}
12516 ret += generate_equality_test(lhs_op, rhs_op, gdt);
12520 for(g=0;g<gb_tbl.size();g++){
12521 data_type *gdt = gb_tbl.get_data_type(g);
12522 if(gdt->is_temporal()){
12523 if(gdt->is_buffer_type()){
12524 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
12526 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
12528 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
12533 ret += "\t\tneeds_temporal_flush=true;\n";
12534 ret += "\t\t}else{\n"
12535 "\t\t\tneeds_temporal_flush=false;\n"
12540 // For temporal status tuple we don't need to do anything else
12541 ret += "\tif (temp_tuple_received) return NULL;\n\n";
12543 for(w=0;w<where.size();++w){
12544 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12546 // Find the set of variables accessed in this CNF elem,
12547 // but in no previous element.
12548 col_id_set new_cids;
12549 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
12551 // Unpack these values.
12552 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
12553 // Find partial fcns ref'd in this cnf element
12554 set<int> pfcn_refs;
12555 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
12556 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
12558 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
12559 +") ) return(NULL);\n";
12562 // The partial functions ref'd in the group-by var and aggregate
12563 // definitions must also be evaluated. If one returns false,
12564 // then implicitly the predicate is false.
12565 set<int>::iterator pfsi;
12567 if(ag_gb_pfcns.size() > 0)
12568 ret += "//\t\tUnpack remaining partial fcns.\n";
12569 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
12570 found_cids, segen_gb_tbl, "NULL", needs_xform);
12572 // Unpack the group-by variables
12574 for(g=0;g<gb_tbl.size();g++){
12575 data_type *gdt = gb_tbl.get_data_type(g);
12576 if(!gdt->is_temporal()){ // temproal gbs already computed
12577 // Find the new fields ref'd by this GBvar def.
12578 col_id_set new_cids;
12579 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
12580 // Unpack these values.
12581 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
12583 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12584 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12586 // There seems to be no difference between the two
12587 // branches of the IF statement.
12588 data_type *gdt = gb_tbl.get_data_type(g);
12589 if(gdt->is_buffer_type()){
12590 // Create temporary copy.
12591 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12592 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12594 scalarexp_t *gse = gb_tbl.get_def(g);
12595 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12596 g,generate_se_code(gse,schema).c_str());
12599 ret.append(tmpstr);
12605 ret+= "\treturn gbval;\n";
12608 //--------------------------------------------------------
12609 // Create and initialize an aggregate object
12611 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
12612 // Variables for execution of the function.
12613 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12616 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
12617 "_aggrdef *)buffer;\n";
12619 for(a=0;a<aggr_tbl.size();a++){
12620 if(aggr_tbl.is_builtin(a)){
12621 // Create temporaries for buffer return values
12622 data_type *adt = aggr_tbl.get_data_type(a);
12623 if(adt->is_buffer_type()){
12624 sprintf(tmpstr,"aggr_tmp_%d", a);
12625 ret+=adt->make_host_cvar(tmpstr)+";\n";
12630 // Unpack all remaining attributes
12631 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
12632 for(a=0;a<aggr_tbl.size();a++){
12633 sprintf(tmpstr,"aggval->aggr_var%d",a);
12634 string assignto_var = tmpstr;
12635 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12638 ret += "\treturn aggval;\n";
12641 //--------------------------------------------------------
12642 // update an aggregate object
12644 ret += "void update_aggregate(host_tuple &tup0, "
12645 +generate_functor_name()+"_groupdef *gbval, "+
12646 generate_functor_name()+"_aggrdef *aggval){\n";
12647 // Variables for execution of the function.
12648 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12650 // use of temporaries depends on the aggregate,
12651 // generate them in generate_aggr_update
12654 // Unpack all remaining attributes
12655 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
12656 for(a=0;a<aggr_tbl.size();a++){
12657 sprintf(tmpstr,"aggval->aggr_var%d",a);
12658 string varname = tmpstr;
12659 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12662 ret += "\treturn;\n";
12665 //--------------------------------------------------------
12666 // reinitialize an aggregate object
12668 ret += "void reinit_aggregates( "+
12669 generate_functor_name()+"_groupdef *gbval, "+
12670 generate_functor_name()+"_aggrdef *aggval){\n";
12671 // Variables for execution of the function.
12672 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12674 // use of temporaries depends on the aggregate,
12675 // generate them in generate_aggr_update
12677 for(g=0;g<gb_tbl.size();g++){
12678 data_type *gdt = gb_tbl.get_data_type(g);
12679 if(gdt->is_temporal()){
12680 if(gdt->is_buffer_type()){
12681 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
12683 sprintf(tmpstr,"\t\t gbval->gb_var%d =last_gb%d;\n",g,g);
12689 // Unpack all remaining attributes
12690 for(a=0;a<aggr_tbl.size();a++){
12691 sprintf(tmpstr,"aggval->aggr_var%d",a);
12692 string varname = tmpstr;
12693 ret.append(generate_aggr_reinitialize(varname,&aggr_tbl,a, schema));
12696 ret += "\treturn;\n";
12703 //---------------------------------------------------
12706 ret += "\tbool flush_needed(){\n";
12707 if(uses_temporal_flush){
12708 ret += "\t\treturn needs_temporal_flush;\n";
12710 ret += "\t\treturn false;\n";
12714 //---------------------------------------------------
12715 // create output tuple
12716 // Unpack the partial functions ref'd in the where clause,
12717 // select clause. Evaluate the where clause.
12718 // Finally, pack the tuple.
12720 // I need to use special code generation here,
12721 // so I'll leave it in longhand.
12723 ret += "host_tuple create_output_tuple("
12724 +generate_functor_name()+"_groupdef *gbval, "+
12725 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
12727 ret += "\thost_tuple tup;\n";
12728 ret += "\tfailed = false;\n";
12729 ret += "\tgs_retval_t retval = 0;\n";
12731 string gbvar = "gbval->gb_var";
12732 string aggvar = "aggval->";
12735 // First, get the return values from the UDAFS
12736 for(a=0;a<aggr_tbl.size();a++){
12737 if(! aggr_tbl.is_builtin(a)){
12738 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12739 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12740 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12744 set<int> hv_sl_pfcns;
12745 for(w=0;w<having.size();w++){
12746 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
12748 for(s=0;s<select_list.size();s++){
12749 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
12752 // clean up the partial fcn results from any previous execution
12753 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
12756 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
12757 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12758 ret += "\tif(retval){ failed = true; return(tup);}\n";
12761 // Evalaute the HAVING clause
12762 // TODO: this seems to have a ++ operator rather than a + operator.
12763 for(w=0;w<having.size();++w){
12764 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
12767 // Now, compute the size of the tuple.
12769 // Unpack any BUFFER type selections into temporaries
12770 // so that I can compute their size and not have
12771 // to recompute their value during tuple packing.
12772 // I can use regular assignment here because
12773 // these temporaries are non-persistent.
12774 // TODO: should I be using the selvar generation routine?
12776 ret += "//\t\tCompute the size of the tuple.\n";
12777 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
12778 for(s=0;s<select_list.size();s++){
12779 scalarexp_t *se = select_list[s]->se;
12780 data_type *sdt = se->get_data_type();
12781 if(sdt->is_buffer_type() &&
12782 !( (se->get_operator_type() == SE_COLREF) ||
12783 (se->get_operator_type() == SE_AGGR_STAR) ||
12784 (se->get_operator_type() == SE_AGGR_SE) ||
12785 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12786 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12788 sprintf(tmpstr,"selvar_%d",s);
12789 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
12790 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
12794 // The size of the tuple is the size of the tuple struct plus the
12795 // size of the buffers to be copied in.
12797 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
12798 for(s=0;s<select_list.size();s++){
12799 // if(s>0) ret += "+";
12800 scalarexp_t *se = select_list[s]->se;
12801 data_type *sdt = select_list[s]->se->get_data_type();
12802 if(sdt->is_buffer_type()){
12803 if(!( (se->get_operator_type() == SE_COLREF) ||
12804 (se->get_operator_type() == SE_AGGR_STAR) ||
12805 (se->get_operator_type() == SE_AGGR_SE) ||
12806 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12807 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12809 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
12810 ret.append(tmpstr);
12812 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12813 ret.append(tmpstr);
12819 // Allocate tuple data block.
12820 ret += "//\t\tCreate the tuple block.\n";
12821 ret += "\ttup.data = malloc(tup.tuple_size);\n";
12822 ret += "\ttup.heap_resident = true;\n";
12824 // Mark tuple as regular
12825 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
12827 // ret += "\ttup.channel = 0;\n";
12828 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
12829 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
12832 // (Here, offsets are hard-wired. is this a problem?)
12834 ret += "//\t\tPack the fields into the tuple.\n";
12835 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
12836 for(s=0;s<select_list.size();s++){
12837 scalarexp_t *se = select_list[s]->se;
12838 data_type *sdt = se->get_data_type();
12839 if(sdt->is_buffer_type()){
12840 if(!( (se->get_operator_type() == SE_COLREF) ||
12841 (se->get_operator_type() == SE_AGGR_STAR) ||
12842 (se->get_operator_type() == SE_AGGR_SE) ||
12843 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12844 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12846 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
12847 ret.append(tmpstr);
12848 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
12849 ret.append(tmpstr);
12851 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12852 ret.append(tmpstr);
12853 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12854 ret.append(tmpstr);
12857 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12858 ret.append(tmpstr);
12859 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12864 // Destroy string temporaries
12865 ret += gen_buffer_selvars_dtr(select_list);
12867 ret += "\treturn tup;\n";
12870 //------------------------------------------------------------------
12871 // Cleaning_when : evaluate the cleaning_when clause.
12872 // ASSUME that the udaf return values have already
12873 // been unpacked. delete the string udaf return values at the end.
12875 ret += "bool cleaning_when("
12876 +generate_functor_name()+"_groupdef *gbval, "+
12877 generate_functor_name()+"_aggrdef *aggval){\n";
12879 ret += "\tbool retval = true;\n";
12882 gbvar = "gbval->gb_var";
12883 aggvar = "aggval->";
12886 set<int> clw_pfcns;
12887 for(w=0;w<closing_when.size();w++){
12888 collect_partial_fcns_pr(closing_when[w]->pr, clw_pfcns);
12891 // clean up the partial fcn results from any previous execution
12892 ret += gen_partial_fcn_dtr(partial_fcns,clw_pfcns);
12895 for(pfsi=clw_pfcns.begin();pfsi!=clw_pfcns.end();++pfsi){
12896 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12897 ret += "\tif(retval){ return false;}\n";
12900 // Evalaute the Closing When clause
12901 // TODO: this seems to have a ++ operator rather than a + operator.
12902 for(w=0;w<closing_when.size();++w){
12903 ret += "\tif( !("+generate_predicate_code_fm_aggr(closing_when[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
12907 // Destroy string return vals of UDAFs
12908 for(a=0;a<aggr_tbl.size();a++){
12909 if(! aggr_tbl.is_builtin(a)){
12910 int afcn_id = aggr_tbl.get_fcn_id(a);
12911 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12912 if(adt->is_buffer_type()){
12913 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12914 adt->get_hfta_buffer_destroy().c_str(), a );
12920 ret += "\treturn retval;\n";
12926 //-------------------------------------------------------------------
12927 // Temporal update functions
12929 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12931 // create a temp status tuple
12932 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12934 ret += gen_init_temp_status_tuple(this->get_node_name());
12937 // (Here, offsets are hard-wired. is this a problem?)
12939 ret += "//\t\tPack the fields into the tuple.\n";
12940 for(s=0;s<select_list.size();s++){
12941 data_type *sdt = select_list[s]->se->get_data_type();
12942 if(sdt->is_temporal()){
12943 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12945 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12951 ret += "\treturn 0;\n";
12952 ret += "};};\n\n\n";
12955 //----------------------------------------------------------
12956 // The hash function
12958 ret += "struct "+generate_functor_name()+"_hash_func{\n";
12959 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12960 "_groupdef *grp) const{\n";
12961 ret += "\t\treturn(0";
12962 for(g=0;g<gb_tbl.size();g++){
12963 data_type *gdt = gb_tbl.get_data_type(g);
12964 if(! gdt->is_temporal()){
12966 if(gdt->use_hashfunc()){
12967 if(gdt->is_buffer_type())
12968 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12970 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12972 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12977 ret += " >> 32);\n";
12981 //----------------------------------------------------------
12982 // The comparison function
12984 ret += "struct "+generate_functor_name()+"_equal_func{\n";
12985 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12986 generate_functor_name()+"_groupdef *grp2) const{\n";
12987 ret += "\t\treturn( (";
12990 bool first_exec = true;
12991 for(g=0;g<gb_tbl.size();g++){
12992 data_type *gdt = gb_tbl.get_data_type(g);
12993 if(! gdt->is_temporal()){
12994 if(first_exec){first_exec=false;}else{ hcmpr += ") && (";}
12995 if(gdt->complex_comparison(gdt)){
12996 if(gdt->is_buffer_type())
12997 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12998 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13000 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
13001 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13003 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
13020 string rsgah_qpn::generate_operator(int i, string params){
13023 " running_agg_operator<" +
13024 generate_functor_name()+","+
13025 generate_functor_name() + "_groupdef, " +
13026 generate_functor_name() + "_aggrdef, " +
13027 generate_functor_name()+"_hash_func, "+
13028 generate_functor_name()+"_equal_func "
13029 "> *op"+int_to_string(i)+" = new running_agg_operator<"+
13030 generate_functor_name()+","+
13031 generate_functor_name() + "_groupdef, " +
13032 generate_functor_name() + "_aggrdef, " +
13033 generate_functor_name()+"_hash_func, "+
13034 generate_functor_name()+"_equal_func "
13035 ">("+params+", \"" + get_node_name() + "\");\n"
13041 // Split aggregation into two HFTA components - sub and superaggregation
13042 // If unable to split the aggreagates, empty vector will be returned
13043 vector<qp_node *> sgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13045 vector<qp_node *> ret_vec;
13046 int s, p, g, a, o, i;
13049 vector<string> fta_flds, stream_flds;
13050 int t = table_name->get_schema_ref();
13052 // Get the set of interfaces it accesses.
13054 vector<string> sel_names;
13056 // Verify that all of the ref'd UDAFs can be split.
13058 for(a=0;a<aggr_tbl.size();++a){
13059 if(! aggr_tbl.is_builtin(a)){
13060 int afcn = aggr_tbl.get_fcn_id(a);
13061 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13062 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13063 if(hfta_super_id < 0 || hfta_sub_id < 0){
13069 /////////////////////////////////////////////////////
13070 // Split into aggr/aggr.
13073 sgah_qpn *low_hfta_node = new sgah_qpn();
13074 low_hfta_node->table_name = table_name;
13075 low_hfta_node->set_node_name( "_"+node_name );
13076 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13079 sgah_qpn *hi_hfta_node = new sgah_qpn();
13080 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13081 hi_hfta_node->set_node_name( node_name );
13082 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13084 // First, process the group-by variables.
13085 // both low and hi level queries duplicate group-by variables of original query
13088 for(g=0;g<gb_tbl.size();g++){
13089 // Insert the gbvar into both low- and hi level hfta.
13090 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13091 low_hfta_node->gb_tbl.add_gb_var(
13092 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13095 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13096 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13097 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13098 gbvar_fta->set_gb_ref(g);
13099 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13100 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13102 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13103 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13104 hi_hfta_node->gb_tbl.add_gb_var(
13105 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13109 // hi_hfta_node->gb_tbl.gb_patterns = gb_tbl.gb_patterns; // pattern processing at higtest level
13110 hi_hfta_node->gb_tbl.set_pattern_info( &gb_tbl); // pattern processing at higtest level
13112 // SEs in the aggregate definitions.
13113 // They are all safe, so split them up for later processing.
13114 map<int, scalarexp_t *> hfta_aggr_se;
13115 for(a=0;a<aggr_tbl.size();++a){
13116 split_hfta_aggr( &(aggr_tbl), a,
13117 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13118 low_hfta_node->select_list,
13125 // Next, the select list.
13127 for(s=0;s<select_list.size();s++){
13128 bool fta_forbidden = false;
13129 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13130 hi_hfta_node->select_list.push_back(
13131 new select_element(root_se, select_list[s]->name));
13136 // All the predicates in the where clause must execute
13137 // in the low-level hfta.
13139 for(p=0;p<where.size();p++){
13140 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13141 cnf_elem *new_cnf = new cnf_elem(new_pr);
13142 analyze_cnf(new_cnf);
13144 low_hfta_node->where.push_back(new_cnf);
13147 // All of the predicates in the having clause must
13148 // execute in the high-level hfta node.
13150 for(p=0;p<having.size();p++){
13151 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13152 cnf_elem *cnf_root = new cnf_elem(pr_root);
13153 analyze_cnf(cnf_root);
13155 hi_hfta_node->having.push_back(cnf_root);
13159 // Copy parameters to both nodes
13160 vector<string> param_names = param_tbl->get_param_names();
13162 for(pi=0;pi<param_names.size();pi++){
13163 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13164 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13165 param_tbl->handle_access(param_names[pi]));
13166 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13167 param_tbl->handle_access(param_names[pi]));
13169 low_hfta_node->definitions = definitions;
13170 hi_hfta_node->definitions = definitions;
13173 low_hfta_node->table_name->set_machine(table_name->get_machine());
13174 low_hfta_node->table_name->set_interface(table_name->get_interface());
13175 low_hfta_node->table_name->set_ifq(false);
13177 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13178 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13179 hi_hfta_node->table_name->set_ifq(false);
13181 ret_vec.push_back(low_hfta_node);
13182 ret_vec.push_back(hi_hfta_node);
13188 // TODO: add splitting into selection/aggregation
13192 // Split aggregation into two HFTA components - sub and superaggregation
13193 // If unable to split the aggreagates, empty vector will be returned
13194 // Similar to sgah, but super aggregate is rsgah, subaggr is sgah
13195 vector<qp_node *> rsgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13197 vector<qp_node *> ret_vec;
13198 int s, p, g, a, o, i;
13201 vector<string> fta_flds, stream_flds;
13202 int t = table_name->get_schema_ref();
13204 // Get the set of interfaces it accesses.
13206 vector<string> sel_names;
13208 // Verify that all of the ref'd UDAFs can be split.
13210 for(a=0;a<aggr_tbl.size();++a){
13211 if(! aggr_tbl.is_builtin(a)){
13212 int afcn = aggr_tbl.get_fcn_id(a);
13213 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13214 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13215 if(hfta_super_id < 0 || hfta_sub_id < 0){
13221 /////////////////////////////////////////////////////
13222 // Split into aggr/aggr.
13225 sgah_qpn *low_hfta_node = new sgah_qpn();
13226 low_hfta_node->table_name = table_name;
13227 low_hfta_node->set_node_name( "_"+node_name );
13228 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13231 rsgah_qpn *hi_hfta_node = new rsgah_qpn();
13232 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13233 hi_hfta_node->set_node_name( node_name );
13234 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13236 // First, process the group-by variables.
13237 // both low and hi level queries duplicate group-by variables of original query
13240 for(g=0;g<gb_tbl.size();g++){
13241 // Insert the gbvar into both low- and hi level hfta.
13242 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13243 low_hfta_node->gb_tbl.add_gb_var(
13244 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13247 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13248 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13249 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13250 gbvar_fta->set_gb_ref(g);
13251 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13252 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13254 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13255 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13256 hi_hfta_node->gb_tbl.add_gb_var(
13257 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13262 // SEs in the aggregate definitions.
13263 // They are all safe, so split them up for later processing.
13264 map<int, scalarexp_t *> hfta_aggr_se;
13265 for(a=0;a<aggr_tbl.size();++a){
13266 split_hfta_aggr( &(aggr_tbl), a,
13267 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13268 low_hfta_node->select_list,
13275 // Next, the select list.
13277 for(s=0;s<select_list.size();s++){
13278 bool fta_forbidden = false;
13279 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13280 hi_hfta_node->select_list.push_back(
13281 new select_element(root_se, select_list[s]->name));
13286 // All the predicates in the where clause must execute
13287 // in the low-level hfta.
13289 for(p=0;p<where.size();p++){
13290 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13291 cnf_elem *new_cnf = new cnf_elem(new_pr);
13292 analyze_cnf(new_cnf);
13294 low_hfta_node->where.push_back(new_cnf);
13297 // All of the predicates in the having clause must
13298 // execute in the high-level hfta node.
13300 for(p=0;p<having.size();p++){
13301 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13302 cnf_elem *cnf_root = new cnf_elem(pr_root);
13303 analyze_cnf(cnf_root);
13305 hi_hfta_node->having.push_back(cnf_root);
13308 // Similar for closing when
13309 for(p=0;p<closing_when.size();p++){
13310 predicate_t *pr_root = rehome_fta_pr( closing_when[p]->pr, &hfta_aggr_se);
13311 cnf_elem *cnf_root = new cnf_elem(pr_root);
13312 analyze_cnf(cnf_root);
13314 hi_hfta_node->closing_when.push_back(cnf_root);
13318 // Copy parameters to both nodes
13319 vector<string> param_names = param_tbl->get_param_names();
13321 for(pi=0;pi<param_names.size();pi++){
13322 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13323 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13324 param_tbl->handle_access(param_names[pi]));
13325 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13326 param_tbl->handle_access(param_names[pi]));
13328 low_hfta_node->definitions = definitions;
13329 hi_hfta_node->definitions = definitions;
13332 low_hfta_node->table_name->set_machine(table_name->get_machine());
13333 low_hfta_node->table_name->set_interface(table_name->get_interface());
13334 low_hfta_node->table_name->set_ifq(false);
13336 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13337 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13338 hi_hfta_node->table_name->set_ifq(false);
13340 ret_vec.push_back(low_hfta_node);
13341 ret_vec.push_back(hi_hfta_node);
13347 // TODO: add splitting into selection/aggregation
13350 //---------------------------------------------------------------
13351 // Code for propagating Protocol field source information
13354 scalarexp_t *resolve_protocol_se(scalarexp_t *se, vector<map<string, scalarexp_t *> *> &src_vec, gb_table *gb_tbl, table_list *Schema){
13355 scalarexp_t *rse, *lse,*p_se, *gb_se;
13356 int tno, schema_type;
13357 map<string, scalarexp_t *> *pse_map;
13359 switch(se->get_operator_type()){
13361 return new scalarexp_t(se->get_literal());
13363 return scalarexp_t::make_param_reference(se->get_op().c_str());
13367 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, se->gb_ref=%d, but gb_tbl is NULL\n",se->get_gb_ref());
13368 gb_se = gb_tbl->get_def(se->get_gb_ref());
13369 return resolve_protocol_se(gb_se,src_vec,gb_tbl,Schema);
13372 schema_type = Schema->get_schema_type(se->get_colref()->get_schema_ref());
13373 if(schema_type == PROTOCOL_SCHEMA)
13374 return dup_se(se,NULL);
13376 tno = se->get_colref()->get_tablevar_ref();
13377 if(tno >= src_vec.size()){
13378 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, tno=%d, src_vec.size()=%lu\n",tno,src_vec.size());
13380 if(src_vec[tno] == NULL)
13383 pse_map =src_vec[tno];
13384 p_se = (*pse_map)[se->get_colref()->get_field()];
13387 return dup_se(p_se,NULL);
13389 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13393 return new scalarexp_t(se->get_op().c_str(),lse);
13395 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13398 rse = resolve_protocol_se(se->get_right_se(),src_vec,gb_tbl,Schema);
13401 return new scalarexp_t(se->get_op().c_str(),lse,rse);
13415 void spx_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13417 vector<map<string, scalarexp_t *> *> src_vec;
13419 for(i=0;i<q_sources.size();i++){
13420 if(q_sources[i] != NULL)
13421 src_vec.push_back(q_sources[i]->get_protocol_se());
13423 src_vec.push_back(NULL);
13426 for(i=0;i<select_list.size();i++){
13427 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13431 void join_eq_hash_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13433 vector<map<string, scalarexp_t *> *> src_vec;
13435 for(i=0;i<q_sources.size();i++){
13436 if(q_sources[i] != NULL)
13437 src_vec.push_back(q_sources[i]->get_protocol_se());
13439 src_vec.push_back(NULL);
13442 for(i=0;i<select_list.size();i++){
13443 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13446 for(i=0;i<hash_eq.size();i++){
13447 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13448 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
13452 void filter_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13454 vector<map<string, scalarexp_t *> *> src_vec;
13456 for(i=0;i<q_sources.size();i++){
13457 if(q_sources[i] != NULL)
13458 src_vec.push_back(q_sources[i]->get_protocol_se());
13460 src_vec.push_back(NULL);
13463 for(i=0;i<select_list.size();i++){
13464 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13467 for(i=0;i<hash_eq.size();i++){
13468 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13469 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
13473 void sgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13475 vector<map<string, scalarexp_t *> *> src_vec;
13477 for(i=0;i<q_sources.size();i++){
13478 if(q_sources[i] != NULL)
13479 src_vec.push_back(q_sources[i]->get_protocol_se());
13481 src_vec.push_back(NULL);
13484 for(i=0;i<select_list.size();i++){
13485 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13488 for(i=0;i<gb_tbl.size();i++)
13489 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13493 void rsgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13495 vector<map<string, scalarexp_t *> *> src_vec;
13497 for(i=0;i<q_sources.size();i++){
13498 if(q_sources[i] != NULL)
13499 src_vec.push_back(q_sources[i]->get_protocol_se());
13501 src_vec.push_back(NULL);
13504 for(i=0;i<select_list.size();i++){
13505 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13508 for(i=0;i<gb_tbl.size();i++)
13509 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13512 void sgahcwcb_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13514 vector<map<string, scalarexp_t *> *> src_vec;
13516 for(i=0;i<q_sources.size();i++){
13517 if(q_sources[i] != NULL)
13518 src_vec.push_back(q_sources[i]->get_protocol_se());
13520 src_vec.push_back(NULL);
13523 for(i=0;i<select_list.size();i++){
13524 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13527 for(i=0;i<gb_tbl.size();i++)
13528 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13531 void mrg_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13533 scalarexp_t *first_se;
13535 vector<map<string, scalarexp_t *> *> src_vec;
13536 map<string, scalarexp_t *> *pse_map;
13538 for(i=0;i<q_sources.size();i++){
13539 if(q_sources[i] != NULL)
13540 src_vec.push_back(q_sources[i]->get_protocol_se());
13542 src_vec.push_back(NULL);
13545 if(q_sources.size() == 0){
13546 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::create_protocol_se, q_sources.size() == 0\n");
13550 vector<field_entry *> tbl_flds = table_layout->get_fields();
13551 for(f=0;f<tbl_flds.size();f++){
13553 string fld_nm = tbl_flds[f]->get_name();
13554 pse_map = src_vec[0];
13555 first_se = (*pse_map)[fld_nm];
13556 if(first_se == NULL)
13558 for(s=1;s<src_vec.size() && match;s++){
13559 pse_map = src_vec[s];
13560 scalarexp_t *match_se = (*pse_map)[fld_nm];
13561 if(match_se == false)
13564 match = is_equivalent_se_base(first_se, match_se, Schema);
13567 protocol_map[fld_nm] = first_se;
13569 protocol_map[fld_nm] = NULL;