1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 // Create, manipulate, and dump query plans.
18 #include "query_plan.h"
19 #include "analyze_fta.h"
20 #include "generate_utils.h"
26 extern string hash_nums[NRANDS]; // for fast hashing
31 void untaboo(string &s){
33 for(c=0;c<s.size();++c){
40 // mrg_qpn constructor, define here to avoid
41 // circular references in the .h file
42 mrg_qpn::mrg_qpn(filter_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
43 param_tbl = spx->param_tbl;
46 field_entry_list *fel = new field_entry_list();
51 for(i=0;i<spx->select_list.size();++i){
52 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
53 if(dt->is_temporal()){
54 if(merge_fieldpos < 0){
57 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
62 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
63 fel->append_field(fe);
67 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
70 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
72 // NEED TO HANDLE USER_SPECIFIED SLACK
73 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
74 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
75 // if(this->slack == NULL)
76 // fprintf(stderr,"Zero slack.\n");
78 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
80 for(i=0;i<sources.size();i++){
81 std::string rvar = "_m"+int_to_string(i);
82 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
83 mvars[i]->set_tablevar_ref(i);
84 fm.push_back(new tablevar_t(sources[i].c_str()));
85 fm[i]->set_range_var(rvar);
88 param_tbl = new param_table();
89 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
91 for(pi=0;pi<param_names.size();pi++){
92 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
93 param_tbl->add_param(param_names[pi],dt->duplicate(),
94 spx->param_tbl->handle_access(param_names[pi]));
96 definitions = spx->definitions;
102 // This function translates an analyzed parse tree
103 // into one or more query nodes (qp_node).
104 // Currently only one node is created, but some query
105 // fragments might create more than one query node,
106 // e.g. aggregation over a join, or nested subqueries
107 // in the FROM clause (unless this is handled at parse tree
108 // analysis time). At this stage, they will be linked
109 // by the names in the FROM clause.
110 // INVARIANT : if more than one query node is returned,
111 // the last one represents the output of the query.
112 vector<qp_node *> create_query_nodes(query_summary_class *qs,table_list *Schema){
114 // Classify the query.
116 vector <qp_node *> local_plan;
120 // I should probably move a lot of this code
121 // into the qp_node constructors,
122 // and have this code focus on building the query plan tree.
125 if(qs->query_type == MERGE_QUERY){
126 mrg_qpn *merge_node = new mrg_qpn(qs,Schema);
129 plan_root = merge_node;
130 local_plan.push_back(merge_node);
133 Do not split sources until we are done with optimizations
134 vector<mrg_qpn *> split_merge = merge_node->split_sources();
135 local_plan.insert(local_plan.begin(), split_merge.begin(), split_merge.end());
137 // If children are created, add them to the schema.
140 printf("split_merge size is %d\n",split_merge.size());
141 for(i=1;i<split_merge.size();++i){
142 Schema->add_table(split_merge[i]->get_fields());
143 printf("Adding split merge table %d\n",i);
148 printf("Did split sources on %s:\n",qs->query_name.c_str());
150 for(ss=0;ss<local_plan.size();ss++){
151 printf("node %d, name=%s, sources=",ss,local_plan[ss]->get_node_name().c_str());
152 vector<tablevar_t *> inv = local_plan[ss]->get_input_tbls();
154 for(nn=0;nn<inv.size();nn++){
155 printf("%s ",inv[nn]->to_string().c_str());
164 // Select / Aggregation / Join
165 if(qs->gb_tbl->size() == 0 && qs->aggr_tbl->size() == 0){
167 if(qs->fta_tree->get_from()->size() == 1){
168 spx_qpn *spx_node = new spx_qpn(qs,Schema);
170 plan_root = spx_node;
171 local_plan.push_back(spx_node);
173 if(qs->fta_tree->get_from()->get_properties() == FILTER_JOIN_PROPERTY){
174 filter_join_qpn *join_node = new filter_join_qpn(qs,Schema);
175 plan_root = join_node;
176 local_plan.push_back(join_node);
178 join_eq_hash_qpn *join_node = new join_eq_hash_qpn(qs,Schema);
179 plan_root = join_node;
180 local_plan.push_back(join_node);
186 if(qs->states_refd.size() || qs->sg_tbl.size() || qs->cb_cnf.size()){
187 sgahcwcb_qpn *sgahcwcb_node = new sgahcwcb_qpn(qs,Schema);
188 plan_root = sgahcwcb_node;
189 local_plan.push_back(sgahcwcb_node);
191 if(qs->closew_cnf.size()){
192 rsgah_qpn *rsgah_node = new rsgah_qpn(qs,Schema);
193 plan_root = rsgah_node;
194 local_plan.push_back(rsgah_node);
196 sgah_qpn *sgah_node = new sgah_qpn(qs,Schema);
197 plan_root = sgah_node;
198 local_plan.push_back(sgah_node);
205 // Get the query name and other definitions.
206 plan_root->set_node_name( qs->query_name);
207 plan_root->set_definitions( qs->definitions) ;
210 // return(plan_root);
216 string se_to_query_string(scalarexp_t *se, aggregate_table *aggr_tbl){
221 vector<scalarexp_t *> operand_list;
224 if(se->is_superaggr())
227 switch(se->get_operator_type()){
229 l_str = se->get_literal()->to_query_string();
232 l_str = "$" + se->get_op();
235 l_str = se->get_colref()->to_query_string() ;
238 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
240 return se->get_op()+"( "+l_str+" )";;
242 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
243 r_str = se_to_query_string(se->get_right_se(),aggr_tbl);
244 return( "("+l_str+")"+se->get_op()+"("+r_str+")" );
246 return( se->get_op() + su_ind + "(*)");
248 l_str = se_to_query_string(aggr_tbl->get_aggr_se(se->get_aggr_ref()),aggr_tbl);
249 return( se->get_op() + su_ind + "(" + l_str + ")" );
251 if(se->get_aggr_ref() >= 0)
252 operand_list = aggr_tbl->get_operand_list(se->get_aggr_ref());
254 operand_list = se->get_operands();
256 ret = se->get_op() + su_ind + "(";
257 for(p=0;p<operand_list.size();p++){
258 l_str = se_to_query_string(operand_list[p],aggr_tbl);
266 return "ERROR SE op type not recognized in se_to_query_string.\n";
270 string pred_to_query_str(predicate_t *pr, aggregate_table *aggr_tbl){
275 vector<literal_t *> llist;
276 vector<scalarexp_t *> op_list;
278 switch(pr->get_operator_type()){
280 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
281 ret = l_str + " IN [";
282 llist = pr->get_lit_vec();
283 for(l=0;l<llist.size();l++){
285 ret += llist[l]->to_query_string();
291 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
292 r_str = se_to_query_string(pr->get_right_se(),aggr_tbl);
293 return( l_str + " " + pr->get_op() + " " + r_str );
295 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
296 return(pr->get_op() + "( " + l_str + " )");
298 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
299 r_str = pred_to_query_str(pr->get_right_pr(),aggr_tbl);
300 return("( " + r_str + " )" + pr->get_op() + "( " + l_str + " )");
302 ret = pr->get_op()+"[";
303 op_list = pr->get_op_list();
304 for(o=0;o<op_list.size();++o){
306 ret += se_to_query_string(op_list[o],aggr_tbl);
311 fprintf(stderr,"INTERNAL ERROR in pred_to_query_str, line %d, character %d, unknown predicate operator type %d\n",
312 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
321 // Build a selection list,
322 // but avoid adding duplicate SEs.
325 int add_select_list_nodup(vector<select_element *> &lfta_select_list, scalarexp_t *se,
329 for(s=0;s<lfta_select_list.size();s++){
330 if(is_equivalent_se(lfta_select_list[s]->se, se)){
335 lfta_select_list.push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
336 return(lfta_select_list.size()-1);
341 // TODO: The generated colref should be tied to the tablevar
342 // representing the lfta output. For now, always 0.
344 scalarexp_t *make_fta_se_ref(vector<select_element *> &lfta_select_list, scalarexp_t *se, int h_tvref){
346 int fta_se_nbr = add_select_list_nodup(lfta_select_list, se, new_element);
349 colname = lfta_select_list[fta_se_nbr]->name;
351 colname = impute_colname(lfta_select_list, se);
352 lfta_select_list[fta_se_nbr]->name = colname;
355 // TODO: fill in the tablevar and schema of the colref here.
356 colref_t *new_cr = new colref_t(colname.c_str());
357 new_cr->set_tablevar_ref(h_tvref);
360 scalarexp_t *new_se= new scalarexp_t(new_cr);
361 new_se->use_decorations_of(se);
367 // Build a selection list,
368 // but avoid adding duplicate SEs.
371 int add_select_list_nodup(vector<select_element *> *lfta_select_list, scalarexp_t *se,
375 for(s=0;s<lfta_select_list->size();s++){
376 if(is_equivalent_se((*lfta_select_list)[s]->se, se)){
381 lfta_select_list->push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
382 return(lfta_select_list->size()-1);
387 // TODO: The generated colref should be tied to the tablevar
388 // representing the lfta output. For now, always 0.
390 scalarexp_t *make_fta_se_ref(vector<vector<select_element *> *> &lfta_select_list, scalarexp_t *se, int h_tvref){
392 vector<select_element *> *the_sel_list = lfta_select_list[h_tvref];
393 int fta_se_nbr = add_select_list_nodup(the_sel_list, se, new_element);
396 colname = (*the_sel_list)[fta_se_nbr]->name;
398 colname = impute_colname(*the_sel_list, se);
399 (*the_sel_list)[fta_se_nbr]->name = colname;
402 // TODO: fill in the tablevar and schema of the colref here.
403 colref_t *new_cr = new colref_t(colname.c_str());
404 new_cr->set_tablevar_ref(h_tvref);
407 scalarexp_t *new_se= new scalarexp_t(new_cr);
408 new_se->use_decorations_of(se);
417 // Test if a se can be evaluated at the fta.
418 // check forbidden types (e.g. float), forbidden operations
419 // between types (e.g. divide a long long), forbidden operations
420 // (too expensive, not implemented).
422 // Return true if not forbidden, false if forbidden
424 // TODO: the parameter aggr_tbl is not used, delete it.
426 bool check_fta_forbidden_se(scalarexp_t *se,
427 aggregate_table *aggr_tbl,
428 ext_fcn_list *Ext_fcns
432 vector<scalarexp_t *> operand_list;
433 vector<data_type *> dt_signature;
434 data_type *dt = se->get_data_type();
438 switch(se->get_operator_type()){
442 return( se->get_data_type()->fta_legal_type() );
446 if(!check_fta_forbidden_se(se->get_left_se(), aggr_tbl, Ext_fcns))
449 dt->fta_legal_operation(se->get_left_se()->get_data_type(), se->get_op())
452 if(!check_fta_forbidden_se(se->get_left_se(),aggr_tbl, Ext_fcns))
454 if(!check_fta_forbidden_se(se->get_right_se(),aggr_tbl, Ext_fcns))
456 return(dt->fta_legal_operation(se->get_left_se()->get_data_type(),
457 se->get_right_se()->get_data_type(),
462 // return true, aggregate fta-safeness is determined elsewhere.
469 if(se->get_aggr_ref() >= 0) return true;
471 operand_list = se->get_operands();
472 for(p=0;p<operand_list.size();p++){
473 if(!check_fta_forbidden_se(operand_list[p],aggr_tbl, Ext_fcns))
475 dt_signature.push_back(operand_list[p]->get_data_type() );
477 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
479 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
481 for(o=0;o<operand_list.size();o++){
482 if(o>0) fprintf(stderr,", ");
483 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
485 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
486 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
490 return(Ext_fcns->fta_legal(fcn_id) );
492 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
501 // test if a pr can be executed at the fta.
503 // Return true if not forbidden, false if forbidden
505 bool check_fta_forbidden_pr(predicate_t *pr,
506 aggregate_table *aggr_tbl,
507 ext_fcn_list *Ext_fcns
510 vector<literal_t *> llist;
513 vector<scalarexp_t *> op_list;
514 vector<data_type *> dt_signature;
518 switch(pr->get_operator_type()){
520 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns) )
522 llist = pr->get_lit_vec();
523 for(l=0;l<llist.size();l++){
524 dt = new data_type(llist[l]->get_type());
525 if(! dt->fta_legal_type()){
533 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns))
535 if(! check_fta_forbidden_se(pr->get_right_se(), aggr_tbl, Ext_fcns))
539 return( check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns) );
541 if(! check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns))
543 if(! check_fta_forbidden_pr(pr->get_right_pr(), aggr_tbl, Ext_fcns))
547 op_list = pr->get_op_list();
548 for(o=0;o<op_list.size();o++){
549 if(!check_fta_forbidden_se(op_list[o],aggr_tbl, Ext_fcns))
551 dt_signature.push_back(op_list[o]->get_data_type() );
553 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), dt_signature);
555 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
557 for(o=0;o<op_list.size();o++){
558 if(o>0) fprintf(stderr,", ");
559 fprintf(stderr,"%s",op_list[o]->get_data_type()->to_string().c_str());
561 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
562 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
566 return(Ext_fcns->fta_legal(fcn_id) );
568 fprintf(stderr,"INTERNAL ERROR in check_fta_forbidden_pr, line %d, character %d, unknown predicate operator type %d\n",
569 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
578 // Split the aggregates in orig_aggr_tbl, into superaggregates and
580 // (the value of the HFTA aggregate might be a SE of several LFTA
581 // subaggregates, e.g. avg : sum / count )
582 // Register the superaggregates in hfta_aggr_tbl, and the
583 // subaggregates in lfta_aggr_tbl.
584 // Insert references to the subaggregates into lfta_select_list.
585 // (and record their names in the currnames list)
586 // Create a SE for the superaggregate, put it in hfta_aggr_se,
589 void split_fta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
590 aggregate_table *hfta_aggr_tbl,
591 aggregate_table *lfta_aggr_tbl,
592 vector<select_element *> &lfta_select_list,
593 map<int,scalarexp_t *> &hfta_aggr_se,
594 ext_fcn_list *Ext_fcns
597 scalarexp_t *subaggr_se;
602 scalarexp_t *new_se, *l_se;
603 vector<scalarexp_t *> subaggr_ref_se;
606 if(! orig_aggr_tbl->is_builtin(agr_id)){
607 // Construct the subaggregate
608 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
609 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
610 vector<scalarexp_t *> subopl;
612 for(o=0;o<opl.size();++o){
613 subopl.push_back(dup_se(opl[o], NULL));
615 int sub_id = Ext_fcns->get_subaggr_id(fcn_id);
616 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
617 subaggr_se->set_fcn_id(sub_id);
618 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
619 // Add it to the lfta select list.
620 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
622 colname = lfta_select_list[fta_se_nbr]->name;
624 colname = impute_colname(lfta_select_list, subaggr_se);
625 lfta_select_list[fta_se_nbr]->name = colname;
626 ano = lfta_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,Ext_fcns->has_lfta_bailout(sub_id));
627 subaggr_se->set_aggr_id(ano);
630 // Construct a reference to the subaggregate
631 new_cr = new colref_t(colname.c_str());
632 new_se = new scalarexp_t(new_cr);
633 // I'm not certain what the types should be ....
634 // This will need to be filled in by later analysis.
635 // NOTE: this might not capture all the meaning of data_type ...
636 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
637 subaggr_ref_se.push_back(new_se);
639 // Construct the superaggregate
640 int super_id = Ext_fcns->get_superaggr_id(fcn_id);
641 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
642 ret_se->set_fcn_id(super_id);
643 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
644 // Register it in the hfta aggregate table
645 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, Ext_fcns->is_running_aggr(sub_id),false);
646 ret_se->set_aggr_id(ano);
647 hfta_aggr_se[agr_id] = ret_se;
653 // builtin aggregate processing
657 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
658 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
661 if(orig_aggr_tbl->is_star_aggr(agr_id)){
662 for(sa=0;sa<subaggr_names.size();sa++){
663 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
664 subaggr_se->set_data_type(subaggr_dt[sa]);
666 // The following sequence is similar to the code in make_fta_se_ref,
667 // but there is special processing for the aggregate tables.
668 int fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
670 colname = lfta_select_list[fta_se_nbr]->name;
672 colname = impute_colname(lfta_select_list, subaggr_se);
673 lfta_select_list[fta_se_nbr]->name = colname;
674 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
675 subaggr_se->set_aggr_id(ano);
677 new_cr = new colref_t(colname.c_str());
678 new_cr->set_tablevar_ref(0);
679 new_se = new scalarexp_t(new_cr);
681 // I'm not certain what the types should be ....
682 // This will need to be filled in by later analysis.
683 // Actually, this is causing a problem.
684 // I will assume a UINT data type. / change to INT
685 // (consistent with assign_data_types in analyze_fta.cc)
686 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
687 data_type *ndt = new data_type("Int"); // used to be Uint
688 new_se->set_data_type(ndt);
690 subaggr_ref_se.push_back(new_se);
693 for(sa=0;sa<subaggr_names.size();sa++){
695 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
696 l_se = dup_se(aggr_operand, NULL);
697 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
699 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
701 subaggr_se->set_data_type(subaggr_dt[sa]);
703 // again, similar to make_fta_se_ref.
704 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
706 colname = lfta_select_list[fta_se_nbr]->name;
708 colname = impute_colname(lfta_select_list, subaggr_se);
709 lfta_select_list[fta_se_nbr]->name = colname;
711 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
713 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
714 subaggr_se->set_aggr_id(ano);
716 new_cr = new colref_t(colname.c_str());
717 new_se = new scalarexp_t(new_cr);
718 // I'm not certain what the types should be ....
719 // This will need to be filled in by later analysis.
720 // NOTE: this might not capture all the meaning of data_type ...
721 new_se->set_data_type(subaggr_dt[sa]);
722 subaggr_ref_se.push_back(new_se);
725 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
726 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
728 // ASSUME either the return value is an aggregation,
729 // or a binary_op between two aggregations
730 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
731 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
732 ret_se->set_aggr_id(ano);
734 // Basically processing for AVG.
735 // set the data type of the superagg to that of the subagg.
736 scalarexp_t *left_se = ret_se->get_left_se();
737 left_se->set_data_type(subaggr_dt[0]);
738 ano = hfta_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
739 left_se->set_aggr_id(ano);
741 scalarexp_t *right_se = ret_se->get_right_se();
742 right_se->set_data_type(subaggr_dt[1]);
743 ano = hfta_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
744 right_se->set_aggr_id(ano);
747 hfta_aggr_se[agr_id] = ret_se;
752 // Split the aggregates in orig_aggr_tbl, into hfta_superaggregates and
753 // hfta_subaggregates.
754 // Register the superaggregates in hi_aggr_tbl, and the
755 // subaggregates in loq_aggr_tbl.
756 // Insert references to the subaggregates into low_select_list.
757 // (and record their names in the currnames list)
758 // Create a SE for the superaggregate, put it in hfta_aggr_se,
761 void split_hfta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
762 aggregate_table *hi_aggr_tbl,
763 aggregate_table *low_aggr_tbl,
764 vector<select_element *> &low_select_list,
765 map<int,scalarexp_t *> &hi_aggr_se,
766 ext_fcn_list *Ext_fcns
769 scalarexp_t *subaggr_se;
774 scalarexp_t *new_se, *l_se;
775 vector<scalarexp_t *> subaggr_ref_se;
778 if(! orig_aggr_tbl->is_builtin(agr_id)){
779 // Construct the subaggregate
780 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
781 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
782 vector<scalarexp_t *> subopl;
784 for(o=0;o<opl.size();++o){
785 subopl.push_back(dup_se(opl[o], NULL));
787 int sub_id = Ext_fcns->get_hfta_subaggr_id(fcn_id);
788 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
789 subaggr_se->set_fcn_id(sub_id);
790 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
791 // Add it to the low select list.
792 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
794 colname = low_select_list[fta_se_nbr]->name;
796 colname = impute_colname(low_select_list, subaggr_se);
797 low_select_list[fta_se_nbr]->name = colname;
798 ano = low_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,false);
799 subaggr_se->set_aggr_id(ano);
802 // Construct a reference to the subaggregate
803 new_cr = new colref_t(colname.c_str());
804 new_se = new scalarexp_t(new_cr);
805 // I'm not certain what the types should be ....
806 // This will need to be filled in by later analysis.
807 // NOTE: this might not capture all the meaning of data_type ...
808 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
809 subaggr_ref_se.push_back(new_se);
811 // Construct the superaggregate
812 int super_id = Ext_fcns->get_hfta_superaggr_id(fcn_id);
813 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
814 ret_se->set_fcn_id(super_id);
815 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
816 // Register it in the high aggregate table
817 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, false,false);
818 ret_se->set_aggr_id(ano);
819 hi_aggr_se[agr_id] = ret_se;
825 // builtin aggregate processing
829 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
830 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
833 if(orig_aggr_tbl->is_star_aggr(agr_id)){
834 for(sa=0;sa<subaggr_names.size();sa++){
835 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
836 subaggr_se->set_data_type(subaggr_dt[sa]);
838 // The following sequence is similar to the code in make_fta_se_ref,
839 // but there is special processing for the aggregate tables.
840 int fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
842 colname = low_select_list[fta_se_nbr]->name;
844 colname = impute_colname(low_select_list, subaggr_se);
845 low_select_list[fta_se_nbr]->name = colname;
846 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
847 subaggr_se->set_aggr_id(ano);
849 new_cr = new colref_t(colname.c_str());
850 new_cr->set_tablevar_ref(0);
851 new_se = new scalarexp_t(new_cr);
853 // I'm not certain what the types should be ....
854 // This will need to be filled in by later analysis.
855 // Actually, this is causing a problem.
856 // I will assume a UINT data type.
857 // (consistent with assign_data_types in analyze_fta.cc)
858 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
859 data_type *ndt = new data_type("Int"); // was Uint
860 new_se->set_data_type(ndt);
862 subaggr_ref_se.push_back(new_se);
865 for(sa=0;sa<subaggr_names.size();sa++){
867 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
868 l_se = dup_se(aggr_operand, NULL);
869 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
871 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
873 subaggr_se->set_data_type(subaggr_dt[sa]);
875 // again, similar to make_fta_se_ref.
876 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
878 colname = low_select_list[fta_se_nbr]->name;
880 colname = impute_colname(low_select_list, subaggr_se);
881 low_select_list[fta_se_nbr]->name = colname;
883 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
885 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
886 subaggr_se->set_aggr_id(ano);
888 new_cr = new colref_t(colname.c_str());
889 new_se = new scalarexp_t(new_cr);
890 // I'm not certain what the types should be ....
891 // This will need to be filled in by later analysis.
892 // NOTE: this might not capture all the meaning of data_type ...
893 new_se->set_data_type(subaggr_dt[sa]);
894 subaggr_ref_se.push_back(new_se);
897 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
898 // ASSUME either the return value is an aggregation,
899 // or a binary_op between two aggregations
900 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
901 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
902 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
904 // Basically processing for AVG.
905 // set the data type of the superagg to that of the subagg.
906 scalarexp_t *left_se = ret_se->get_left_se();
907 left_se->set_data_type(subaggr_dt[0]);
908 ano = hi_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
909 left_se->set_aggr_id(ano);
911 scalarexp_t *right_se = ret_se->get_right_se();
912 right_se->set_data_type(subaggr_dt[1]);
913 ano = hi_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
914 right_se->set_aggr_id(ano);
917 ret_se->set_aggr_id(ano);
918 hi_aggr_se[agr_id] = ret_se;
926 // Split a scalar expression into one part which executes
927 // at the stream and another set of parts which execute
929 // Because I'm actually modifying the SEs, I will make
930 // copies. But I will assume that literals, params, and
931 // colrefs are immutable at this point.
932 // (if there is ever a need to change one, must make a
934 // NOTE : if se is constant (only refrences literals),
935 // avoid making the fta compute it.
937 // NOTE : This will need to be generalized to
938 // handle join expressions, namely to handle a vector
941 // Return value is the HFTA se.
942 // Add lftas select_elements to the fta_select_list.
943 // set fta_forbidden if this node or any child cannot
944 // execute at the lfta.
948 scalarexp_t *split_fta_se(scalarexp_t *se,
950 vector<select_element *> &lfta_select_list,
951 ext_fcn_list *Ext_fcns
955 vector<scalarexp_t *> operand_list;
956 vector<data_type *> dt_signature;
957 scalarexp_t *ret_se, *l_se, *r_se;
958 bool l_forbid, r_forbid, this_forbid;
961 data_type *dt = se->get_data_type();
963 switch(se->get_operator_type()){
965 fta_forbidden = ! se->get_data_type()->fta_legal_type();
966 ret_se = new scalarexp_t(se->get_literal());
967 ret_se->use_decorations_of(se);
971 fta_forbidden = ! se->get_data_type()->fta_legal_type();
972 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
973 ret_se->use_decorations_of(se);
977 // No colref should be forbidden,
978 // the schema is wrong, the fta_legal_type() fcn is wrong,
979 // or the source table is actually a stream.
980 // Issue a warning, but proceed with processing.
981 // Also, should not be a ref to a gbvar.
982 // (a gbvar ref only occurs in an aggregation node,
983 // and these SEs are rehomed, not split.
984 fta_forbidden = ! se->get_data_type()->fta_legal_type();
987 fprintf(stderr,"WARNING, a colref is a forbidden data type in split_fta_se,"
989 " type is %s, line=%d, col=%d\n",
990 se->get_colref()->to_string().c_str(),
991 se->get_data_type()->get_type_str().c_str(),
992 se->lineno, se->charno
997 fprintf(stderr,"INTERNAL ERROR, a colref is a gbvar ref in split_fta_se,"
998 " type is %s, line=%d, col=%d\n",
999 se->get_data_type()->get_type_str().c_str(),
1000 se->lineno, se->charno
1005 ret_se = new scalarexp_t(se->get_colref());
1006 ret_se->use_decorations_of(se);
1010 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1012 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1014 // If this operation is forbidden but the child SE is not,
1015 // put the child se on the lfta_select_list, create a colref
1016 // which accesses this se, and make it the child of this op.
1017 // Exception : the child se is constant (only literal refs).
1018 if(this_forbid && !l_forbid){
1019 if(!is_literal_or_param_only(l_se)){
1020 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1021 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1024 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1026 ret_se->use_decorations_of(se);
1027 fta_forbidden = this_forbid | l_forbid;
1031 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1032 r_se = split_fta_se(se->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1034 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1036 // Replace the left se if it is not forbidden, but something else is.
1037 if((this_forbid || r_forbid) & !l_forbid){
1038 if(!is_literal_or_param_only(l_se)){
1039 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1044 // Replace the right se if it is not forbidden, but something else is.
1045 if((this_forbid || l_forbid) & !r_forbid){
1046 if(!is_literal_or_param_only(r_se)){
1047 new_se = make_fta_se_ref(lfta_select_list, r_se,0);
1052 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1053 ret_se->use_decorations_of(se);
1054 fta_forbidden = this_forbid || r_forbid || l_forbid;
1061 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_fta_se."
1062 " line=%d, col=%d\n",
1063 se->get_op().c_str(),
1064 se->lineno, se->charno
1071 fta_forbidden = false;
1072 operand_list = se->get_operands();
1073 vector<scalarexp_t *> new_operands;
1074 vector<bool> forbidden_op;
1075 for(p=0;p<operand_list.size();p++){
1076 l_se = split_fta_se(operand_list[p], l_forbid, lfta_select_list, Ext_fcns);
1078 fta_forbidden |= l_forbid;
1079 new_operands.push_back(l_se);
1080 forbidden_op.push_back(l_forbid);
1081 dt_signature.push_back(operand_list[p]->get_data_type() );
1084 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1086 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1088 for(o=0;o<operand_list.size();o++){
1089 if(o>0) fprintf(stderr,", ");
1090 fprintf(stderr,"%s",operand_list[o]->get_data_type()->get_type_str().c_str());
1092 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1093 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1097 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1099 // Replace the non-forbidden operands.
1100 // the forbidden ones are already replaced.
1102 for(p=0;p<new_operands.size();p++){
1103 if(! forbidden_op[p]){
1104 // if(new_operands[p]->get_data_type()->get_temporal() != constant_t){
1105 if(!is_literal_or_param_only(new_operands[p])){
1106 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],0);
1107 new_operands[p] = new_se;
1113 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1114 ret_se->use_decorations_of(se);
1120 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
1131 // The predicates have already been
1132 // broken into conjunctions.
1133 // If any part of a conjunction is fta-forbidden,
1134 // it must be executed in the stream operator.
1135 // Else it is executed in the FTA.
1136 // A pre-analysis should determine whether this
1137 // predicate is fta-safe. This procedure will
1138 // assume that it is fta-forbidden and will
1139 // prepare it for execution in the stream.
1143 predicate_t *split_fta_pr(predicate_t *pr,
1144 vector<select_element *> &lfta_select_list,
1145 ext_fcn_list *Ext_fcns
1148 vector<literal_t *> llist;
1149 scalarexp_t *se_l, *se_r;
1150 bool l_forbid, r_forbid;
1151 predicate_t *ret_pr, *pr_l, *pr_r;
1152 vector<scalarexp_t *> op_list, new_op_list;
1154 vector<data_type *> dt_signature;
1157 switch(pr->get_operator_type()){
1159 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1162 if(!is_literal_or_param_only(se_l)){
1163 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1167 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1172 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1174 if(!is_literal_or_param_only(se_l)){
1175 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1180 se_r = split_fta_se(pr->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1182 if(!is_literal_or_param_only(se_r)){
1183 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,0);
1188 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1192 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1193 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1196 case PRED_BINARY_OP:
1197 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1198 pr_r = split_fta_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1199 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1203 // I can't push the predicate into the lfta, except by
1204 // returning a bool value, and that is not worth the trouble,
1205 op_list = pr->get_op_list();
1206 for(o=0;o<op_list.size();++o){
1207 se_l = split_fta_se(op_list[o],l_forbid,lfta_select_list,Ext_fcns);
1209 if(!is_literal_or_param_only(se_l)){
1210 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1214 new_op_list.push_back(se_l);
1217 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1218 ret_pr->set_fcn_id(pr->get_fcn_id());
1221 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1222 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1233 //--------------------------------------------------------------------
1237 // Split a scalar expression into one part which executes
1238 // at the stream and another set of parts which execute
1240 // Because I'm actually modifying the SEs, I will make
1241 // copies. But I will assume that literals, params, and
1242 // colrefs are immutable at this point.
1243 // (if there is ever a need to change one, must make a
1245 // NOTE : if se is constant (only refrences literals),
1246 // avoid making the fta compute it.
1248 // NOTE : This will need to be generalized to
1249 // handle join expressions, namely to handle a vector
1252 // Return value is the HFTA se.
1253 // Add lftas select_elements to the fta_select_list.
1254 // set fta_forbidden if this node or any child cannot
1255 // execute at the lfta.
1257 #define SPLIT_FTAVEC_NOTBLVAR -1
1258 #define SPLIT_FTAVEC_MIXED -2
1260 bool is_PROTOCOL_source(int colref_source,
1261 vector< vector<select_element *> *> &lfta_select_list){
1262 if(colref_source>=0 && lfta_select_list[colref_source]!=NULL) return true;
1266 int combine_colref_source(int s1, int s2){
1267 if(s1==s2) return(s1);
1268 if(s1==SPLIT_FTAVEC_NOTBLVAR) return s2;
1269 if(s2==SPLIT_FTAVEC_NOTBLVAR) return s1;
1270 return SPLIT_FTAVEC_MIXED;
1273 scalarexp_t *split_ftavec_se(
1274 scalarexp_t *se, // the SE to split
1275 bool &fta_forbidden, // return true if some part of se
1277 int &colref_source, // the tblvar which sources the
1278 // colref, or NOTBLVAR, or MIXED
1279 vector< vector<select_element *> *> &lfta_select_list,
1280 // NULL if the tblvar is not PROTOCOL,
1281 // else build the select list.
1282 ext_fcn_list *Ext_fcns // is the fcn lfta-safe?
1284 // Return value is the HFTA SE, unless fta_forbidden is true and
1285 // colref_source>=0 and the indicated source is PROTOCOL.
1286 // In that case no split was done, the make_fta_se_ref must
1287 // be done by the caller.
1290 vector<scalarexp_t *> operand_list;
1291 vector<data_type *> dt_signature;
1292 scalarexp_t *ret_se, *l_se, *r_se;
1293 bool l_forbid, r_forbid, this_forbid;
1294 int l_csource, r_csource, this_csource;
1296 scalarexp_t *new_se;
1297 data_type *dt = se->get_data_type();
1299 switch(se->get_operator_type()){
1301 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1302 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1303 ret_se = new scalarexp_t(se->get_literal());
1304 ret_se->use_decorations_of(se);
1308 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1309 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1310 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1311 ret_se->use_decorations_of(se);
1314 case SE_IFACE_PARAM:
1315 fta_forbidden = false;
1316 colref_source = se->get_ifpref()->get_tablevar_ref();
1317 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1318 ret_se->use_decorations_of(se);
1322 // No colref should be forbidden,
1323 // the schema is wrong, the fta_legal_type() fcn is wrong,
1324 // or the source table is actually a stream.
1325 // Issue a warning, but proceed with processing.
1326 // Also, should not be a ref to a gbvar.
1327 // (a gbvar ref only occurs in an aggregation node,
1328 // and these SEs are rehomed, not split.
1329 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1330 colref_source = se->get_colref()->get_tablevar_ref();
1332 if(fta_forbidden && is_PROTOCOL_source(colref_source, lfta_select_list)){
1333 fprintf(stderr,"WARNING, a PROTOCOL colref is a forbidden data type in split_ftavec_se,"
1335 " type is %s, line=%d, col=%d\n",
1336 se->get_colref()->to_string().c_str(),
1337 se->get_data_type()->to_string().c_str(),
1338 se->lineno, se->charno
1343 fta_forbidden = true; // eval in hfta. ASSUME make copy as below.
1346 ret_se = new scalarexp_t(se->get_colref());
1347 ret_se->use_decorations_of(se);
1351 l_se = split_ftavec_se(se->get_left_se(), l_forbid, colref_source, lfta_select_list, Ext_fcns);
1353 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1355 // If this operation is forbidden but the child SE is not,
1356 // AND the colref source in the se is a single PROTOCOL source
1357 // put the child se on the lfta_select_list, create a colref
1358 // which accesses this se, and make it the child of this op.
1359 // Exception : the child se is constant (only literal refs).
1360 // TODO: I think the exception is expressed by is_PROTOCOL_source
1361 if(this_forbid && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list)){
1362 if(!is_literal_or_param_only(l_se)){
1363 new_se = make_fta_se_ref(lfta_select_list, l_se,colref_source);
1364 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1367 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1369 ret_se->use_decorations_of(se);
1370 fta_forbidden = this_forbid | l_forbid;
1374 l_se = split_ftavec_se(se->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1375 r_se = split_ftavec_se(se->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1377 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1378 colref_source=combine_colref_source(l_csource, r_csource);
1380 // Replace the left se if the parent must be hfta but the child can
1381 // be lfta. This translates to
1382 // a) result is PROTOCOL and forbidden, but left SE is not forbidden
1383 // OR b) if result is mixed but the left se is PROTOCOL, not forbidden
1384 if( ((this_forbid || r_forbid) && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1385 (colref_source==SPLIT_FTAVEC_MIXED && !l_forbid &&
1386 is_PROTOCOL_source(l_csource, lfta_select_list)) ){
1387 if(!is_literal_or_param_only(l_se)){
1388 new_se = make_fta_se_ref(lfta_select_list, l_se,l_csource);
1393 // same logic as for right se.
1394 if( ((this_forbid || l_forbid) && !r_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1395 (colref_source==SPLIT_FTAVEC_MIXED && !r_forbid &&
1396 is_PROTOCOL_source(r_csource, lfta_select_list)) ){
1397 if(!is_literal_or_param_only(r_se)){
1398 new_se = make_fta_se_ref(lfta_select_list, r_se,r_csource);
1403 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1404 ret_se->use_decorations_of(se);
1405 fta_forbidden = this_forbid || r_forbid || l_forbid;
1412 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_ftavec_se."
1413 " line=%d, col=%d\n",
1414 se->get_op().c_str(),
1415 se->lineno, se->charno
1422 operand_list = se->get_operands();
1423 vector<scalarexp_t *> new_operands;
1424 vector<bool> forbidden_op;
1425 vector<int> csource;
1427 fta_forbidden = false;
1428 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1429 for(p=0;p<operand_list.size();p++){
1430 l_se = split_ftavec_se(operand_list[p], l_forbid, l_csource, lfta_select_list, Ext_fcns);
1432 fta_forbidden |= l_forbid;
1433 colref_source = combine_colref_source(colref_source, l_csource);
1434 new_operands.push_back(l_se);
1435 forbidden_op.push_back(l_forbid);
1436 csource.push_back(l_csource);
1437 dt_signature.push_back(operand_list[p]->get_data_type() );
1440 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1442 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1444 for(o=0;o<operand_list.size();o++){
1445 if(o>0) fprintf(stderr,", ");
1446 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
1448 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1449 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1453 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1455 // Replace the non-forbidden operands.
1456 // the forbidden ones are already replaced.
1457 if(fta_forbidden || colref_source == SPLIT_FTAVEC_MIXED){
1458 for(p=0;p<new_operands.size();p++){
1459 if(! forbidden_op[p] && is_PROTOCOL_source(csource[p], lfta_select_list)){
1460 if(!is_literal_or_param_only(new_operands[p])){
1461 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],csource[p]);
1462 new_operands[p] = new_se;
1468 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1469 ret_se->use_decorations_of(se);
1475 printf("INTERNAL ERROR in split_ftavec_se: operator type %d\n",se->get_operator_type());
1484 // The predicates have already been
1485 // broken into conjunctions.
1486 // If any part of a conjunction is fta-forbidden,
1487 // it must be executed in the stream operator.
1488 // Else it is executed in the FTA.
1489 // A pre-analysis should determine whether this
1490 // predicate is fta-safe. This procedure will
1491 // assume that it is fta-forbidden and will
1492 // prepare it for execution in the stream.
1494 predicate_t *split_ftavec_pr(predicate_t *pr,
1495 vector< vector<select_element *> *> &lfta_select_list,
1496 ext_fcn_list *Ext_fcns
1499 vector<literal_t *> llist;
1500 scalarexp_t *se_l, *se_r;
1501 bool l_forbid, r_forbid;
1502 int l_csource, r_csource;
1503 predicate_t *ret_pr, *pr_l, *pr_r;
1504 vector<scalarexp_t *> op_list, new_op_list;
1506 vector<data_type *> dt_signature;
1509 switch(pr->get_operator_type()){
1511 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1513 // TODO: checking that the se is a PROTOCOL source should
1514 // take care of literal_or_param_only.
1515 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1516 if(!is_literal_or_param_only(se_l)){
1517 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1521 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1526 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1527 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1528 if(!is_literal_or_param_only(se_l)){
1529 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1534 se_r = split_ftavec_se(pr->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1535 if(!r_forbid && is_PROTOCOL_source(r_csource, lfta_select_list)){
1536 if(!is_literal_or_param_only(se_r)){
1537 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,r_csource);
1542 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1546 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1547 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1550 case PRED_BINARY_OP:
1551 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1552 pr_r = split_ftavec_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1553 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1557 // I can't push the predicate into the lfta, except by
1558 // returning a bool value, and that is not worth the trouble,
1559 op_list = pr->get_op_list();
1560 for(o=0;o<op_list.size();++o){
1561 se_l = split_ftavec_se(op_list[o],l_forbid,l_csource,lfta_select_list,Ext_fcns);
1562 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1563 if(!is_literal_or_param_only(se_l)){
1564 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1568 new_op_list.push_back(se_l);
1571 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1572 ret_pr->set_fcn_id(pr->get_fcn_id());
1575 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1576 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1586 ////////////////////////////////////////////////////////////////////////
1587 /// rehome_hfta_se rehome_hfta_pr
1588 /// This is use to split an sgah operator (aggregation),
1589 /// I just need to make gb, aggr references point to the
1590 /// new gb, aggr table entries.
1593 scalarexp_t *rehome_fta_se(scalarexp_t *se,
1594 map< int, scalarexp_t * > *aggr_map
1599 vector<scalarexp_t *> operand_list;
1600 scalarexp_t *ret_se, *l_se, *r_se;
1602 scalarexp_t *new_se;
1603 data_type *dt = se->get_data_type();
1604 vector<scalarexp_t *> new_operands;
1606 switch(se->get_operator_type()){
1608 ret_se = new scalarexp_t(se->get_literal());
1609 ret_se->use_decorations_of(se);
1613 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1614 ret_se->use_decorations_of(se);
1617 case SE_IFACE_PARAM:
1618 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1619 ret_se->use_decorations_of(se);
1625 // Must be a GB REF ...
1626 // I'm assuming that the hfta gbvar table has the
1627 // same sequence of entries as the input query's gbvar table.
1628 // Else I'll need some kind of translation table.
1631 fprintf(stderr,"WARNING, a colref is not a gbver ref in rehome_hfta_se"
1632 " type is %s, line=%d, col=%d\n",
1633 se->get_data_type()->to_string().c_str(),
1634 se->lineno, se->charno
1638 ret_se = new scalarexp_t(se->get_colref());
1639 ret_se->use_decorations_of(se); // just inherit the gbref
1643 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1645 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1646 ret_se->use_decorations_of(se);
1650 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1651 r_se = rehome_fta_se(se->get_right_se(), aggr_map);
1653 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1654 ret_se->use_decorations_of(se);
1660 agr_id = se->get_aggr_ref();
1661 return (*aggr_map)[agr_id];
1665 agr_id = se->get_aggr_ref();
1666 if(agr_id >= 0) return (*aggr_map)[agr_id];
1668 operand_list = se->get_operands();
1669 for(p=0;p<operand_list.size();p++){
1670 l_se = rehome_fta_se(operand_list[p], aggr_map);
1672 new_operands.push_back(l_se);
1676 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1677 ret_se->use_decorations_of(se);
1682 printf("INTERNAL ERROR in rehome_fta_se: operator type %d\n",se->get_operator_type());
1691 // The predicates have already been
1692 // broken into conjunctions.
1693 // If any part of a conjunction is fta-forbidden,
1694 // it must be executed in the stream operator.
1695 // Else it is executed in the FTA.
1696 // A pre-analysis should determine whether this
1697 // predicate is fta-safe. This procedure will
1698 // assume that it is fta-forbidden and will
1699 // prepare it for execution in the stream.
1701 predicate_t *rehome_fta_pr(predicate_t *pr,
1702 map<int, scalarexp_t *> *aggr_map
1705 vector<literal_t *> llist;
1706 scalarexp_t *se_l, *se_r;
1707 predicate_t *ret_pr, *pr_l, *pr_r;
1708 vector<scalarexp_t *> op_list, new_op_list;
1711 switch(pr->get_operator_type()){
1713 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1714 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1718 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1719 se_r = rehome_fta_se(pr->get_right_se(), aggr_map);
1720 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1724 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1725 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1728 case PRED_BINARY_OP:
1729 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1730 pr_r = rehome_fta_pr(pr->get_right_pr(), aggr_map);
1731 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1735 op_list = pr->get_op_list();
1736 for(o=0;o<op_list.size();++o){
1737 se_l = rehome_fta_se(op_list[o], aggr_map);
1738 new_op_list.push_back(se_l);
1740 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1741 ret_pr->set_fcn_id(pr->get_fcn_id());
1745 fprintf(stderr,"INTERNAL ERROR in rehome_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1746 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1755 ////////////////////////////////////////////////////////////////////
1756 ///////////////// Create a STREAM table to represent the FTA output.
1758 table_def *create_attributes(string tname, vector<select_element *> &select_list){
1762 // Create a new STREAM schema for the output of the FTA.
1764 field_entry_list *fel = new field_entry_list();
1766 for(s=0;s<select_list.size();s++){
1767 scalarexp_t *sel_se = select_list[s]->se;
1768 data_type *dt = sel_se->get_data_type();
1770 // Grab the annotations of the field.
1771 // As of this writing, the only meaningful annotations
1772 // are whether or not the attribute is temporal.
1773 // There can be an annotation of constant_t, but
1774 // I'll ignore this, it feels like an unsafe assumption
1775 param_list *plist = new param_list();
1776 // if(dt->is_temporal()){
1777 vector<string> param_strings = dt->get_param_keys();
1779 for(p=0;p<param_strings.size();++p){
1780 string v = dt->get_param_val(param_strings[p]);
1782 plist->append(param_strings[p].c_str(),v.c_str());
1784 plist->append(param_strings[p].c_str());
1788 // char access_fcn_name[500];
1789 string colname = select_list[s]->name;
1790 // sprintf(access_fcn_name,"get_field_%s",colname.c_str());
1791 string access_fcn_name = "get_field_"+colname;
1792 field_entry *fe = new field_entry(
1793 dt->get_type_str(), colname, access_fcn_name, plist, ufcns
1796 fel->append_field(fe);
1799 table_def *fta_tbl = new table_def(
1800 tname.c_str(), NULL, NULL, fel, STREAM_SCHEMA
1807 //------------------------------------------------------------------
1808 // Textual representation of the query node.
1812 string spx_qpn::to_query_string(){
1814 string ret = "Select ";
1816 for(s=0;s<select_list.size();s++){
1818 ret += se_to_query_string(select_list[s]->se, NULL);
1819 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1823 ret += "From "+table_name->to_string()+"\n";
1825 if(where.size() > 0){
1828 for(w=0;w<where.size();w++){
1829 if(w>0) ret += " AND ";
1830 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
1841 string sgah_qpn::to_query_string(){
1843 string ret = "Select ";
1845 for(s=0;s<select_list.size();s++){
1847 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1848 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1852 ret += "From "+table_name->to_string()+"\n";
1854 if(where.size() > 0){
1857 for(w=0;w<where.size();w++){
1858 if(w>0) ret += " AND ";
1859 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1864 if(gb_tbl.size() > 0){
1867 if(gb_tbl.gb_patterns.size() <= 1 || gb_tbl.gb_entry_type.size()==0){
1868 for(g=0;g<gb_tbl.size();g++){
1869 if(g>0) ret += ", ";
1870 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1871 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
1873 ret += gb_tbl.get_name(g);
1877 for(g=0;g<gb_tbl.gb_entry_type.size();++g){
1878 if(g>0) ret += ", ";
1879 if(gb_tbl.gb_entry_type[g] == ""){
1880 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+
1881 " AS "+ gb_tbl.get_name(gb_pos);
1884 if(gb_tbl.gb_entry_type[g] == "CUBE" ||
1885 gb_tbl.gb_entry_type[g] == "ROLLUP"){
1886 ret += gb_tbl.gb_entry_type[g] + "(";
1888 for(gg=0;gg<gb_tbl.gb_entry_count[g];++gg){
1889 if(gg>0) ret += ", ";
1890 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+ " AS "+ gb_tbl.get_name(gb_pos);
1895 if(gb_tbl.gb_entry_type[g] == "GROUPING_SETS"){
1896 ret += gb_tbl.gb_entry_type[g] + "(";
1898 vector<vector<bool> > &local_components = gb_tbl.pattern_components[g];
1899 for(g1=0;g1<local_components.size();++g1){
1901 bool first_field = true;
1903 for(g2=0;g2<=gb_tbl.gb_entry_count[g];g2++){
1904 if(local_components[g1][g2]){
1905 if(!first_field) ret+=", ";
1906 else first_field = false;
1907 ret += gb_tbl.get_name(gb_pos+g2);
1913 gb_pos += gb_tbl.gb_entry_count[g];
1920 if(having.size() > 0){
1923 for(h=0;h<having.size();h++){
1924 if(h>0) ret += " AND ";
1925 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
1934 string rsgah_qpn::to_query_string(){
1936 string ret = "Select ";
1938 for(s=0;s<select_list.size();s++){
1940 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1941 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1945 ret += "From "+table_name->to_string()+"\n";
1947 if(where.size() > 0){
1950 for(w=0;w<where.size();w++){
1951 if(w>0) ret += " AND ";
1952 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1957 if(gb_tbl.size() > 0){
1960 for(g=0;g<gb_tbl.size();g++){
1961 if(g>0) ret += ", ";
1962 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1963 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl)+" AS ";
1965 ret += gb_tbl.get_name(g);
1970 if(having.size() > 0){
1973 for(h=0;h<having.size();h++){
1974 if(h>0) ret += " AND ";
1975 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
1980 if(closing_when.size() > 0){
1981 ret += "Closing_When ";
1983 for(h=0;h<closing_when.size();h++){
1984 if(h>0) ret += " AND ";
1985 ret += "(" + pred_to_query_str(closing_when[h]->pr,&aggr_tbl) + ")";
1994 string sgahcwcb_qpn::to_query_string(){
1996 string ret = "Select ";
1998 for(s=0;s<select_list.size();s++){
2000 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2001 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2005 ret += "From "+table_name->to_string()+"\n";
2007 if(where.size() > 0){
2010 for(w=0;w<where.size();w++){
2011 if(w>0) ret += " AND ";
2012 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2017 if(gb_tbl.size() > 0){
2020 for(g=0;g<gb_tbl.size();g++){
2021 if(g>0) ret += ", ";
2022 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2023 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
2025 ret += gb_tbl.get_name(g);
2030 if(sg_tbl.size() > 0){
2031 ret += "Supergroup ";
2033 bool first_elem = true;
2034 for(g=0;g<gb_tbl.size();g++){
2035 if(sg_tbl.count(g)){
2040 ret += gb_tbl.get_name(g);
2046 if(having.size() > 0){
2049 for(h=0;h<having.size();h++){
2050 if(h>0) ret += " AND ";
2051 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2057 if(cleanwhen.size() > 0){
2058 ret += "Cleaning_When ";
2060 for(h=0;h<cleanwhen.size();h++){
2061 if(h>0) ret += " AND ";
2062 ret += "(" + pred_to_query_str(cleanwhen[h]->pr,&aggr_tbl) + ")";
2067 if(cleanby.size() > 0){
2068 ret += "Cleaning_By ";
2070 for(h=0;h<cleanby.size();h++){
2071 if(h>0) ret += " AND ";
2072 ret += "(" + pred_to_query_str(cleanby[h]->pr,&aggr_tbl) + ")";
2081 string mrg_qpn::to_query_string(){
2083 string ret="Merge ";
2084 ret += mvars[0]->to_query_string() + " : " + mvars[1]->to_query_string();
2086 ret += " SLACK "+se_to_query_string(slack, NULL);
2091 for(t=0;t<fm.size();++t){
2092 if(t>0) ret += ", ";
2093 ret += fm[t]->to_string();
2100 string join_eq_hash_qpn::to_query_string(){
2102 string ret = "Select ";
2104 for(s=0;s<select_list.size();s++){
2106 ret += se_to_query_string(select_list[s]->se, NULL);
2107 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2111 // NOTE: assuming binary join.
2112 int properties = from[0]->get_property()+2*from[1]->get_property();
2115 ret += "INNER_JOIN ";
2118 ret += "LEFT_OUTER_JOIN ";
2121 ret += "RIGHT_OUTER_JOIN ";
2124 ret += "OUTER_JOIN ";
2130 for(f=0;f<from.size();++f){
2132 ret += from[f]->to_string();
2136 if(where.size() > 0){
2139 for(w=0;w<where.size();w++){
2140 if(w>0) ret += " AND ";
2141 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2149 string filter_join_qpn::to_query_string(){
2151 string ret = "Select ";
2153 for(s=0;s<select_list.size();s++){
2155 ret += se_to_query_string(select_list[s]->se, NULL);
2156 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2160 // NOTE: assuming binary join.
2161 ret += "FILTER_JOIN("+temporal_var->field+","+int_to_string(temporal_range)+") ";
2165 for(f=0;f<from.size();++f){
2167 ret += from[f]->to_string();
2171 if(where.size() > 0){
2174 for(w=0;w<where.size();w++){
2175 if(w>0) ret += " AND ";
2176 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2185 // -----------------------------------------------------------------
2186 // Query node subclass specific processing.
2189 vector<mrg_qpn *> mrg_qpn::split_sources(){
2190 vector<mrg_qpn *> ret;
2194 if(fm.size() != mvars.size()){
2195 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources. fm.size() = %lu, mvars.size() = %lu\n",fm.size(),mvars.size());
2199 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources, fm size is 1.\n");
2205 printf("spliting sources merge node, name = %s, %d sources.\n\t",node_name.c_str(), fm.size());
2206 for(ff=0;ff<fm.size();++ff){
2207 printf("%s ",fm[ff]->to_string().c_str());
2212 // Handle special cases.
2214 ret.push_back(this);
2219 mrg_qpn *new_mrg = (mrg_qpn *)this->make_copy("_cH1");
2220 new_mrg->fm.push_back(this->fm[0]);
2221 new_mrg->fm.push_back(this->fm[1]);
2222 new_mrg->mvars.push_back(this->mvars[0]);
2223 new_mrg->mvars.push_back(this->mvars[1]);
2225 this->fm.erase(this->fm.begin());
2226 this->mvars.erase(this->mvars.begin());
2227 string vname = fm[0]->get_var_name();
2228 this->fm[0] = new tablevar_t(new_mrg->node_name.c_str());
2229 this->fm[0]->set_range_var(vname);
2230 this->mvars[0]->set_field(table_layout->get_field_name(merge_fieldpos));
2231 this->mvars[0]->set_tablevar_ref(0);
2232 this->mvars[1]->set_tablevar_ref(1);
2234 ret.push_back(new_mrg);
2235 ret.push_back(this);
2238 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg->node_name.c_str(),this->node_name.c_str());
2239 for(i=0;i<new_mrg->fm.size();++i)
2240 printf("\tsource %s var %d (%s, %s) \n",new_mrg->node_name.c_str(),i,new_mrg->fm[i]->to_string().c_str(), new_mrg->mvars[i]->to_string().c_str());
2241 for(i=0;i<this->fm.size();++i)
2242 printf("\tsource %s var %d (%s, %s) \n",this->node_name.c_str(),i,this->fm[i]->to_string().c_str(), this->mvars[i]->to_string().c_str());
2249 // divide up the sources between two children.
2250 // Then, recurse on the children.
2252 mrg_qpn *new_mrg1 = (mrg_qpn *)this->make_copy("_cH1");
2253 mrg_qpn *new_mrg2 = (mrg_qpn *)this->make_copy("_cH2");
2254 for(i=0;i<this->fm.size()/2;++i){
2255 new_mrg1->fm.push_back(this->fm[i]);
2256 new_mrg1->mvars.push_back(this->mvars[i]);
2257 //printf("Pushing %d (%s, %s) to new_mrg1\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2259 for(;i<this->fm.size();++i){
2260 new_mrg2->fm.push_back(this->fm[i]);
2261 new_mrg2->mvars.push_back(this->mvars[i]);
2262 //printf("Pushing %d (%s, %s) to new_mrg2\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2264 for(i=0;i<new_mrg1->mvars.size();++i)
2265 new_mrg1->mvars[i]->set_tablevar_ref(i);
2266 for(i=0;i<new_mrg2->mvars.size();++i)
2267 new_mrg2->mvars[i]->set_tablevar_ref(i);
2269 // Children created, make this merge them.
2273 tablevar_t *tmp_tblvar = new tablevar_t(new_mrg1->node_name.c_str());
2274 tmp_tblvar->set_range_var("_mrg_var_1");
2275 fm.push_back(tmp_tblvar);
2276 colref_t *tmp_cref = new colref_t("_mrg_var_1",table_layout->get_field_name(merge_fieldpos).c_str());
2277 tmp_cref->set_tablevar_ref(0);
2278 mvars.push_back(tmp_cref);
2280 tmp_tblvar = new tablevar_t(new_mrg2->node_name.c_str());
2281 tmp_tblvar->set_range_var("_mrg_var_2");
2282 fm.push_back(tmp_tblvar);
2283 tmp_cref = new colref_t("_mrg_var_2",table_layout->get_field_name(merge_fieldpos).c_str());
2284 tmp_cref->set_tablevar_ref(1);
2285 mvars.push_back(tmp_cref);
2289 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg1->node_name.c_str(),new_mrg2->node_name.c_str());
2290 for(i=0;i<new_mrg1->fm.size();++i)
2291 printf("\tsource %s var %d (%s, %s) \n",new_mrg1->node_name.c_str(),i,new_mrg1->fm[i]->to_string().c_str(), new_mrg1->mvars[i]->to_string().c_str());
2292 for(i=0;i<new_mrg2->fm.size();++i)
2293 printf("\tsource %s var %d (%s, %s) \n",new_mrg2->node_name.c_str(),i,new_mrg2->fm[i]->to_string().c_str(), new_mrg2->mvars[i]->to_string().c_str());
2296 // Recurse and put them together
2297 vector<mrg_qpn *> st1 = new_mrg1->split_sources();
2298 ret.insert(ret.end(), st1.begin(), st1.end());
2299 vector<mrg_qpn *> st2 = new_mrg2->split_sources();
2300 ret.insert(ret.end(), st2.begin(), st2.end());
2302 ret.push_back(this);
2310 //////// Split helper function : resolve interfaces
2312 vector<pair<string,string> > get_ifaces(tablevar_t *table, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2313 vector<pair<string,string> > basic_ifaces;
2315 if(table->get_ifq()){
2316 basic_ifaces= ifdb->eval(table->get_interface(),ierr);
2318 fprintf(stderr,"ERROR, Interface set %s not found.\n",table->get_interface().c_str());
2321 fprintf(stderr,"ERROR, interface definition file didn't parse.\n");
2324 basic_ifaces.push_back(make_pair(table->get_machine(), table->get_interface()));
2327 if(n_virtual_ifaces == 1)
2328 return basic_ifaces;
2330 int stride = n_virtual_ifaces / hfta_parallelism;
2332 vector<pair<string,string> > ifaces;
2334 for(i=0;i<basic_ifaces.size();++i){
2335 string mach = basic_ifaces[i].first;
2336 string iface = basic_ifaces[i].second;
2337 for(s=hfta_idx*stride;s<(hfta_idx+1)*stride;++s){
2338 ifaces.push_back(pair<string, string>(mach,iface+"X"+int_to_string(2*s)));
2346 ///////// Split helper function : compute slack in a generated
2349 void mrg_qpn::resolve_slack(scalarexp_t *t_se, string fname, vector<pair<string, string> > &sources, ifq_t *ifdb, gb_table *gbt){
2353 // Find slack divisor, if any.
2355 long long int slack_divisor = find_temporal_divisor(t_se,gbt, fnm);
2356 if(slack_divisor <= 0){
2361 // find max slack in the iface spec
2362 long long int max_slacker = 0, this_slacker;
2363 string rname = "Slack_"+fnm;
2364 for(s=0;s<sources.size();++s){
2365 string src_machine = sources[s].first;
2366 string src_iface = sources[s].second;
2367 vector<string> slack_vec = ifdb->get_iface_vals(src_machine, src_iface,rname,e,es);
2368 for(v=0;v<slack_vec.size();++v){
2369 if(sscanf(slack_vec[v].c_str(),"%qd",&this_slacker)){
2370 if(this_slacker > max_slacker)
2371 max_slacker = this_slacker;
2376 if(max_slacker <= 0){
2382 long long int the_slack=(long long int)(ceil(((double)max_slacker)/((double)slack_divisor)));
2384 sprintf(tmps,"%lld",the_slack);
2385 literal_t *slack_lit = new literal_t(tmps, LITERAL_LONGINT);
2386 slack = new scalarexp_t(slack_lit);
2390 //------------------------------------------------------------------
2391 // split a node to extract LFTA components.
2394 vector<qp_node *> mrg_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2395 // nothing to do, nothing to split, return copy of self.
2399 vector<qp_node *> ret_vec;
2401 ret_vec.push_back(this);
2406 vector<qp_node *> filter_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2407 vector<qp_node *> ret_vec;
2409 // First check if the query can be pushed to the FTA.
2412 for(s=0;s<select_list.size();s++){
2413 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2416 for(p=0;p<where.size();p++){
2417 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2421 fprintf(stderr,"ERROR, filter join %s is fta-unsafe.\n",node_name.c_str());
2425 // Can it be done in a single lfta?
2426 // Get the set of interfaces it accesses.
2429 vector<string> sel_names;
2430 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2431 if (ifaces.empty()) {
2432 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set\n");
2436 if(ifaces.size() == 1){
2437 // Single interface, no need to merge.
2439 ret_vec.push_back(this);
2441 for(i=0;i<from.size();i++){
2442 from[i]->set_machine(ifaces[0].first);
2443 from[i]->set_interface(ifaces[0].second);
2444 from[i]->set_ifq(false);
2448 // Multiple interfaces, generate the interface-specific queries plus
2452 vector<string> sel_names;
2453 for(si=0;si<ifaces.size();++si){
2454 filter_join_qpn *fta_node = new filter_join_qpn();
2457 if(ifaces.size()==1)
2458 fta_node->set_node_name( node_name );
2460 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2462 fta_node->set_node_name(new_name);
2464 sel_names.push_back(fta_node->get_node_name());
2468 for(f=0;f<from.size();f++){
2469 fta_node->from.push_back(from[f]->duplicate());
2470 fta_node->from[f]->set_machine(ifaces[si].first);
2471 fta_node->from[f]->set_interface(ifaces[si].second);
2472 fta_node->from[f]->set_ifq(false);
2474 fta_node->temporal_var = temporal_var;
2475 fta_node->temporal_range = temporal_range;
2477 fta_node->use_bloom = use_bloom;
2479 for(s=0;s<select_list.size();s++){
2480 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2483 for(p=0;p<shared_pred.size();p++){
2484 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2485 cnf_elem *new_cnf = new cnf_elem(new_pr);
2486 analyze_cnf(new_cnf);
2487 fta_node->shared_pred.push_back(new_cnf);
2488 fta_node->where.push_back(new_cnf);
2490 for(p=0;p<pred_t0.size();p++){
2491 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2492 cnf_elem *new_cnf = new cnf_elem(new_pr);
2493 analyze_cnf(new_cnf);
2494 fta_node->pred_t0.push_back(new_cnf);
2495 fta_node->where.push_back(new_cnf);
2497 for(p=0;p<pred_t1.size();p++){
2498 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2499 cnf_elem *new_cnf = new cnf_elem(new_pr);
2500 analyze_cnf(new_cnf);
2501 fta_node->pred_t1.push_back(new_cnf);
2502 fta_node->where.push_back(new_cnf);
2504 for(p=0;p<hash_eq.size();p++){
2505 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2506 cnf_elem *new_cnf = new cnf_elem(new_pr);
2507 analyze_cnf(new_cnf);
2508 fta_node->hash_eq.push_back(new_cnf);
2509 fta_node->where.push_back(new_cnf);
2511 for(p=0;p<postfilter.size();p++){
2512 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2513 cnf_elem *new_cnf = new cnf_elem(new_pr);
2514 analyze_cnf(new_cnf);
2515 fta_node->postfilter.push_back(new_cnf);
2516 fta_node->where.push_back(new_cnf);
2519 // Xfer all of the parameters.
2520 // Use existing handle annotations.
2521 vector<string> param_names = param_tbl->get_param_names();
2523 for(pi=0;pi<param_names.size();pi++){
2524 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2525 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2526 param_tbl->handle_access(param_names[pi]));
2528 fta_node->definitions = definitions;
2529 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2530 this->error_code = 3;
2534 ret_vec.push_back(fta_node);
2537 mrg_qpn *mrg_node = new mrg_qpn((filter_join_qpn *)ret_vec[0],
2538 node_name, sel_names,ifaces, ifdb);
2539 ret_vec.push_back(mrg_node);
2546 // Use to search for unresolved interface param refs in an hfta.
2548 int spx_qpn::count_ifp_refs(set<string> &ifpnames){
2551 for(i=0;i<select_list.size();++i)
2552 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2553 for(i=0;i<where.size();++i)
2554 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2558 int sgah_qpn::count_ifp_refs(set<string> &ifpnames){
2561 for(i=0;i<select_list.size();++i)
2562 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2563 for(i=0;i<where.size();++i)
2564 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2565 for(i=0;i<having.size();++i)
2566 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2567 for(i=0;i<aggr_tbl.size();++i){
2568 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2569 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2571 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2572 for(j=0;j<opl.size();++j)
2573 ret += count_se_ifp_refs(opl[j],ifpnames);
2576 for(i=0;i<gb_tbl.size();++i){
2577 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2583 int rsgah_qpn::count_ifp_refs(set<string> &ifpnames){
2586 for(i=0;i<select_list.size();++i)
2587 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2588 for(i=0;i<where.size();++i)
2589 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2590 for(i=0;i<having.size();++i)
2591 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2592 for(i=0;i<closing_when.size();++i)
2593 ret += count_pr_ifp_refs(closing_when[i]->pr,ifpnames);
2594 for(i=0;i<aggr_tbl.size();++i){
2595 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2596 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2598 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2599 for(j=0;j<opl.size();++j)
2600 ret += count_se_ifp_refs(opl[j],ifpnames);
2603 for(i=0;i<gb_tbl.size();++i){
2604 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2609 int mrg_qpn::count_ifp_refs(set<string> &ifpnames){
2613 int join_eq_hash_qpn::count_ifp_refs(set<string> &ifpnames){
2616 for(i=0;i<select_list.size();++i)
2617 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2618 for(i=0;i<prefilter[0].size();++i)
2619 ret += count_pr_ifp_refs(prefilter[0][i]->pr,ifpnames);
2620 for(i=0;i<prefilter[1].size();++i)
2621 ret += count_pr_ifp_refs(prefilter[1][i]->pr,ifpnames);
2622 for(i=0;i<temporal_eq.size();++i)
2623 ret += count_pr_ifp_refs(temporal_eq[i]->pr,ifpnames);
2624 for(i=0;i<hash_eq.size();++i)
2625 ret += count_pr_ifp_refs(hash_eq[i]->pr,ifpnames);
2626 for(i=0;i<postfilter.size();++i)
2627 ret += count_pr_ifp_refs(postfilter[i]->pr,ifpnames);
2631 int filter_join_qpn::count_ifp_refs(set<string> &ifpnames){
2634 for(i=0;i<select_list.size();++i)
2635 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2636 for(i=0;i<where.size();++i)
2637 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2642 // Resolve interface params to string literals
2643 int filter_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2646 string ifname = from[0]->get_interface();
2647 string ifmach = from[0]->get_machine();
2648 for(i=0;i<select_list.size();++i)
2649 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2651 for(i=0;i<where.size();++i)
2652 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2658 int spx_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2661 string ifname = table_name->get_interface();
2662 string ifmach = table_name->get_machine();
2663 for(i=0;i<select_list.size();++i)
2664 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2666 for(i=0;i<where.size();++i)
2667 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2672 int sgah_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2675 string ifname = table_name->get_interface();
2676 string ifmach = table_name->get_machine();
2678 //printf("Select list has %d elements\n",select_list.size());
2679 for(i=0;i<select_list.size();++i){
2680 //printf("\tresolving elemet %d\n",i);
2681 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) ){
2685 for(i=0;i<where.size();++i){
2686 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err) )
2689 for(i=0;i<having.size();++i){
2690 if( resolve_pr_ifp_refs(having[i]->pr,ifmach, ifname, ifdb, err) )
2693 //printf("aggr list has %d elements\n",select_list.size());
2694 for(i=0;i<aggr_tbl.size();++i){
2695 //printf("\tresolving elemet %d\n",i);
2696 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2697 //printf("\t\t\tbuiltin\n");
2698 if( resolve_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifmach, ifname, ifdb, err) )
2701 //printf("\t\t\tudaf\n");
2702 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2703 for(j=0;j<opl.size();++j)
2704 if( resolve_se_ifp_refs(opl[j],ifmach, ifname, ifdb, err) )
2708 for(i=0;i<gb_tbl.size();++i){
2709 if( resolve_se_ifp_refs(gb_tbl.get_def(i), ifmach, ifname, ifdb, err) )
2718 SPLITTING A SELECTION_PROJECTION OPERATOR
2720 An SPX node may reference:
2721 literals, parameters, colrefs, functions, operators
2722 An SPX node may not reference:
2723 group-by variables, aggregates
2725 An SPX node contains
2726 selection list of SEs
2727 where list of CNF predicates
2730 If each selection SE and each where predicate is fta-safe
2731 execute entire operator as an LFTA.
2733 for each predicate in the where clause
2734 if it is fta safe, execute it in the lfta
2735 else, split each SE in the predicate, evaluate the
2736 top-level SEs in the hfta and eval the predicate on that.
2737 For each SE in the se list
2738 Split the SE, eval the high level part, push onto hfta
2742 A SE represents a value which must be computed. The LFTA
2743 must provide sub-values from which the HFTA can compute the
2745 1) the SE is fta-safe
2746 Create an entry in the selection list of the LFTA which is
2747 the SE itself. Reference this LFTA selection list entry in
2748 the HFTA (via a field name assigned to the lfta selection
2750 2) The SE is not fta-safe
2751 Determine the boundary between the fta-safe and the fta-unsafe
2752 portions of the SE. The result is a rooted tree (which is
2753 evaluated at the HFTA) which references sub-SEs (which are
2754 evaluated at the LFTA). Each of the sub-SEs is placed on
2755 the selection list of the LFTA and assigned field names,
2756 the top part is evaluated at the HFTA and references the
2757 sub-SEs through their assigned field names.
2758 The only SEs on the LFTA selection list are those created by
2759 the above mechanism. The collection of assigned field names becomes
2760 the schema of the LFTA.
2762 TODO: insert tablevar names into the colrefs.
2766 vector<qp_node *> spx_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2769 vector<qp_node *> ret_vec;
2771 // If the node reads from a stream, don't split.
2772 // int t = Schema->get_table_ref(table_name->get_schema_name());
2773 int t = table_name->get_schema_ref();
2774 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
2776 ret_vec.push_back(this);
2781 // Get the set of interfaces it accesses.
2784 vector<string> sel_names;
2785 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2786 if (ifaces.empty()) {
2787 fprintf(stderr,"INTERNAL ERROR in spx_qpn::split_node_for_fta - empty interface set\n");
2792 // The FTA node, it is always returned.
2794 spx_qpn *fta_node = new spx_qpn();
2795 fta_node->table_name = table_name;
2797 // for colname imputation
2798 // vector<string> fta_flds, stream_flds;
2801 // First check if the query can be pushed to the FTA.
2804 for(s=0;s<select_list.size();s++){
2805 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2808 for(p=0;p<where.size();p++){
2809 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2813 ////////////////////////////////////////////////////////////
2814 // The query can be executed entirely in the FTA.
2817 for(si=0;si<ifaces.size();++si){
2818 fta_node = new spx_qpn();
2821 if(ifaces.size()==1)
2822 fta_node->set_node_name( node_name );
2824 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2826 fta_node->set_node_name(new_name);
2828 sel_names.push_back(fta_node->get_node_name());
2831 fta_node->table_name = table_name->duplicate();
2832 fta_node->table_name->set_machine(ifaces[si].first);
2833 fta_node->table_name->set_interface(ifaces[si].second);
2834 fta_node->table_name->set_ifq(false);
2836 for(s=0;s<select_list.size();s++){
2837 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2839 for(p=0;p<where.size();p++){
2840 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
2841 cnf_elem *new_cnf = new cnf_elem(new_pr);
2842 analyze_cnf(new_cnf);
2844 fta_node->where.push_back(new_cnf);
2847 // Xfer all of the parameters.
2848 // Use existing handle annotations.
2849 vector<string> param_names = param_tbl->get_param_names();
2851 for(pi=0;pi<param_names.size();pi++){
2852 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2853 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2854 param_tbl->handle_access(param_names[pi]));
2856 fta_node->definitions = definitions;
2857 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2858 this->error_code = 3;
2862 ret_vec.push_back(fta_node);
2865 if(ifaces.size() > 1){
2866 spx_qpn *tmp_spx = (spx_qpn *)(ret_vec[0]);
2867 mrg_qpn *mrg_node = new mrg_qpn(tmp_spx,
2868 node_name, sel_names,ifaces, ifdb);
2870 Do not split sources until we are done with optimizations
2871 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
2872 for(i=0;i<split_merge.size();++i){
2873 ret_vec.push_back(split_merge[i]);
2875 hfta_returned = split_merge.size();
2877 ret_vec.push_back(mrg_node);
2882 // printf("OK as FTA.\n");
2883 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
2888 ////////////////////////////////////////////////////
2889 // The fta must be split. Create a stream node.
2890 // NOTE : I am counting on the single
2891 // table in the from list. (Joins handled in a different operator).
2895 spx_qpn *stream_node = new spx_qpn();
2896 stream_node->set_node_name( node_name );
2897 // Create the tablevar in the stream's FROM clause.
2898 // set the schema name to the name of the LFTA,
2899 // and use the same tablevar name.
2900 stream_node->table_name = new tablevar_t(
2901 ("_fta_"+node_name).c_str()
2903 stream_node->table_name->set_range_var(table_name->get_var_name());
2906 fta_node->set_node_name( "_fta_"+node_name );
2908 // table var names of fta, stream.
2909 string fta_var = fta_node->table_name->get_var_name();
2910 string stream_var = stream_node->table_name->get_var_name();
2912 // Set up select list vector
2913 vector< vector<select_element *> *> select_vec;
2914 select_vec.push_back(&(fta_node->select_list)); // only one child
2917 // Split the select list into its FTA and stream parts.
2918 // If any part of the SE is fta-unsafe, it will return
2919 // a SE to execute at the stream ref'ing SE's evaluated
2920 // at the fta (which are put on the FTA's select list as a side effect).
2921 // If the SE is fta-safe, put it on the fta select list, make
2922 // a ref to it and put the ref on the stream select list.
2923 for(s=0;s<select_list.size();s++){
2924 bool fta_forbidden = false;
2925 int se_src = SPLIT_FTAVEC_NOTBLVAR;
2926 // scalarexp_t *root_se = split_fta_se(
2927 // select_list[s]->se,fta_forbidden, fta_node->select_list, Ext_fcns
2929 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
2930 fta_forbidden, se_src, select_vec, Ext_fcns
2932 // if(fta_forbidden){
2933 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
2934 stream_node->select_list.push_back(
2935 new select_element(root_se, select_list[s]->name)
2938 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,root_se,0);
2939 stream_node->select_list.push_back(
2940 new select_element(new_se, select_list[s]->name)
2946 // The WHERE clause has already been split into a set of clauses
2947 // that are ANDED together. For each clause, check if its FTA-safe.
2948 // If not, split its SE's into fta-safe and stream-executing parts,
2949 // then put a clause which ref's the SEs into the stream.
2950 // Else put it into the LFTA.
2951 predicate_t *pr_root;
2953 for(p=0;p<where.size();p++){
2954 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) ){
2955 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
2956 // pr_root = split_fta_pr( where[p]->pr, fta_node->select_list, Ext_fcns);
2957 fta_forbidden = true;
2959 pr_root = dup_pr(where[p]->pr, NULL);
2960 fta_forbidden = false;
2962 cnf_elem *cnf_root = new cnf_elem(pr_root);
2963 analyze_cnf(cnf_root);
2966 stream_node->where.push_back(cnf_root);
2968 fta_node->where.push_back(cnf_root);
2974 // Divide the parameters among the stream, FTA.
2975 // Currently : assume that the stream receives all parameters
2976 // and parameter updates, incorporates them, then passes
2977 // all of the parameters to the FTA.
2978 // This will need to change (tables, fta-unsafe types. etc.)
2980 // I will pass on the use_handle_access marking, even
2981 // though the fcn call that requires handle access might
2982 // exist in only one of the parts of the query.
2983 // Parameter manipulation and handle access determination will
2984 // need to be revisited anyway.
2985 vector<string> param_names = param_tbl->get_param_names();
2987 for(pi=0;pi<param_names.size();pi++){
2988 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2989 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2990 param_tbl->handle_access(param_names[pi]));
2991 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2992 param_tbl->handle_access(param_names[pi]));
2995 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
2996 stream_node->definitions = definitions;
2998 // Now split by interfaces
2999 if(ifaces.size() > 1){
3000 for(si=0;si<ifaces.size();++si){
3001 spx_qpn *subq_node = new spx_qpn();
3003 // Name the subquery
3004 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3006 subq_node->set_node_name( new_name) ;
3007 sel_names.push_back(subq_node->get_node_name());
3010 subq_node->table_name = fta_node->table_name->duplicate();
3011 subq_node->table_name->set_machine(ifaces[si].first);
3012 subq_node->table_name->set_interface(ifaces[si].second);
3013 subq_node->table_name->set_ifq(false);
3015 for(s=0;s<fta_node->select_list.size();s++){
3016 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3018 for(p=0;p<fta_node->where.size();p++){
3019 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3020 cnf_elem *new_cnf = new cnf_elem(new_pr);
3021 analyze_cnf(new_cnf);
3023 subq_node->where.push_back(new_cnf);
3025 // Xfer all of the parameters.
3026 // Use existing handle annotations.
3027 vector<string> param_names = param_tbl->get_param_names();
3029 for(pi=0;pi<param_names.size();pi++){
3030 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3031 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3032 param_tbl->handle_access(param_names[pi]));
3034 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3035 this->error_code = 3;
3038 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3040 ret_vec.push_back(subq_node);
3043 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3044 fta_node->node_name, sel_names, ifaces, ifdb);
3046 Do not split sources until we are done with optimizations
3047 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3048 for(i=0;i<split_merge.size();++i){
3049 ret_vec.push_back(split_merge[i]);
3052 ret_vec.push_back(mrg_node);
3053 ret_vec.push_back(stream_node);
3054 hfta_returned = 1/*split_merge.size()*/ + 1;
3057 fta_node->table_name->set_machine(ifaces[0].first);
3058 fta_node->table_name->set_interface(ifaces[0].second);
3059 fta_node->table_name->set_ifq(false);
3060 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3061 this->error_code = 3;
3064 ret_vec.push_back(fta_node);
3065 ret_vec.push_back(stream_node);
3069 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3070 // printf("Stream node is:\n%s\n\n",stream_node->to_query_string().c_str() );
3078 Splitting a aggregation+sampling operator.
3079 right now, return an error if any splitting is required.
3082 vector<qp_node *> sgahcwcb_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3086 vector<qp_node *> ret_vec;
3087 int s, p, g, a, o, i;
3090 vector<string> fta_flds, stream_flds;
3092 // If the node reads from a stream, don't split.
3093 // int t = Schema->get_table_ref(table_name->get_schema_name());
3094 int t = table_name->get_schema_ref();
3095 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3096 ret_vec.push_back(this);
3100 fprintf(stderr,"ERROR : cannot split a sampling operator (not yet implemented).\n");
3110 Splitting a running aggregation operator.
3111 The code is almost identical to that of the the sgah operator
3113 - there is no lfta-only option.
3114 - the stream node is rsagh_qpn (lfta is sgah or spx)
3115 - need to handle the closing when (similar to having)
3118 vector<qp_node *> rsgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3122 vector<qp_node *> ret_vec;
3123 int s, p, g, a, o, i;
3126 vector<string> fta_flds, stream_flds;
3128 // If the node reads from a stream, don't split.
3129 // int t = Schema->get_table_ref(table_name->get_schema_name());
3130 int t = table_name->get_schema_ref();
3131 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3132 ret_vec.push_back(this);
3136 // Get the set of interfaces it accesses.
3138 vector<string> sel_names;
3139 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3140 if (ifaces.empty()) {
3141 fprintf(stderr,"INTERNAL ERROR in rsgah_qpn::split_node_for_fta - empty interface set\n");
3148 //////////////////////////////////////////////////////////////
3149 /// Split into lfta, hfta.
3151 // A rsgah node must always be split,
3152 // if for no other reason than to complete the
3153 // partial aggregation.
3155 // First, determine if the query can be spit into aggr/aggr,
3156 // or if it must be selection/aggr.
3157 // Splitting into selection/aggr is allowed only
3158 // if select_lfta is set.
3161 bool select_allowed = definitions.count("select_lfta")>0;
3162 bool select_rqd = false;
3164 set<int> unsafe_gbvars; // for processing where clause
3165 for(g=0;g<gb_tbl.size();g++){
3166 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3167 if(!select_allowed){
3168 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3169 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3171 this->error_code = 1;
3172 this->err_str = tmpstr;
3176 unsafe_gbvars.insert(g);
3181 // Verify that the SEs in the aggregate definitions are fta-safe
3182 for(a=0;a<aggr_tbl.size();++a){
3183 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3184 if(ase != NULL){ // COUNT(*) does not have a SE.
3185 if(!select_allowed){
3186 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3187 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3188 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3190 this->error_code = 1;
3191 this->err_str = tmpstr;
3200 // Verify that all of the ref'd UDAFs can be split.
3202 for(a=0;a<aggr_tbl.size();++a){
3203 if(! aggr_tbl.is_builtin(a)){
3204 int afcn = aggr_tbl.get_fcn_id(a);
3205 int super_id = Ext_fcns->get_superaggr_id(afcn);
3206 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3207 if(super_id < 0 || sub_id < 0){
3208 if(!select_allowed){
3209 this->err_str += "ERROR in rsgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3210 this->error_code = 1;
3219 for(p=0;p<where.size();p++){
3220 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3221 if(!select_allowed){
3222 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3223 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3225 this->error_code = 1;
3226 this->err_str = tmpstr;
3237 /////////////////////////////////////////////////////
3238 // Split into aggr/aggr.
3244 sgah_qpn *fta_node = new sgah_qpn();
3245 fta_node->table_name = table_name;
3246 fta_node->set_node_name( "_fta_"+node_name );
3247 fta_node->table_name->set_range_var(table_name->get_var_name());
3250 rsgah_qpn *stream_node = new rsgah_qpn();
3251 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3252 stream_node->set_node_name( node_name );
3253 stream_node->table_name->set_range_var(table_name->get_var_name());
3255 // First, process the group-by variables.
3256 // The fta must supply the values of all the gbvars.
3257 // If a gb is computed, the computation must be
3258 // performed at the FTA, so the SE must be FTA-safe.
3259 // Nice side effect : the gbvar table contains
3260 // matching entries for the original query, the lfta query,
3261 // and the hfta query. So gbrefs in the new queries are set
3262 // correctly just by inheriting the gbrefs from the old query.
3263 // If this property changed, I'll need translation tables.
3266 for(g=0;g<gb_tbl.size();g++){
3267 // Insert the gbvar into the lfta.
3268 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
3269 fta_node->gb_tbl.add_gb_var(
3270 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
3273 // Insert a ref to the value of the gbvar into the lfta select list.
3274 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
3275 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
3276 gbvar_fta->set_gb_ref(g);
3277 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
3278 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
3280 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
3281 gbvar_stream->set_gb_ref(-1); // used as GBvar def
3282 stream_node->gb_tbl.add_gb_var(
3283 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
3288 // SEs in the aggregate definitions.
3289 // They are all safe, so split them up for later processing.
3290 map<int, scalarexp_t *> hfta_aggr_se;
3291 for(a=0;a<aggr_tbl.size();++a){
3292 split_fta_aggr( &(aggr_tbl), a,
3293 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
3294 fta_node->select_list,
3301 // Next, the select list.
3303 for(s=0;s<select_list.size();s++){
3304 bool fta_forbidden = false;
3305 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3306 stream_node->select_list.push_back(
3307 new select_element(root_se, select_list[s]->name));
3312 // All the predicates in the where clause must execute
3315 for(p=0;p<where.size();p++){
3316 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
3317 cnf_elem *new_cnf = new cnf_elem(new_pr);
3318 analyze_cnf(new_cnf);
3320 fta_node->where.push_back(new_cnf);
3323 // All of the predicates in the having clause must
3324 // execute in the stream node.
3326 for(p=0;p<having.size();p++){
3327 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3328 cnf_elem *cnf_root = new cnf_elem(pr_root);
3329 analyze_cnf(cnf_root);
3331 stream_node->having.push_back(cnf_root);
3334 // All of the predicates in the closing when clause must
3335 // execute in the stream node.
3337 for(p=0;p<closing_when.size();p++){
3338 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3339 cnf_elem *cnf_root = new cnf_elem(pr_root);
3340 analyze_cnf(cnf_root);
3342 stream_node->closing_when.push_back(cnf_root);
3346 // Divide the parameters among the stream, FTA.
3347 // Currently : assume that the stream receives all parameters
3348 // and parameter updates, incorporates them, then passes
3349 // all of the parameters to the FTA.
3350 // This will need to change (tables, fta-unsafe types. etc.)
3352 // I will pass on the use_handle_access marking, even
3353 // though the fcn call that requires handle access might
3354 // exist in only one of the parts of the query.
3355 // Parameter manipulation and handle access determination will
3356 // need to be revisited anyway.
3357 vector<string> param_names = param_tbl->get_param_names();
3359 for(pi=0;pi<param_names.size();pi++){
3360 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3361 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3362 param_tbl->handle_access(param_names[pi]));
3363 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3364 param_tbl->handle_access(param_names[pi]));
3366 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3367 stream_node->definitions = definitions;
3369 // Now split by interfaces XXXX
3370 if(ifaces.size() > 1){
3371 for(si=0;si<ifaces.size();++si){
3372 sgah_qpn *subq_node = new sgah_qpn();
3374 // Name the subquery
3375 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3377 subq_node->set_node_name( new_name) ;
3378 sel_names.push_back(subq_node->get_node_name());
3381 subq_node->table_name = fta_node->table_name->duplicate();
3382 subq_node->table_name->set_machine(ifaces[si].first);
3383 subq_node->table_name->set_interface(ifaces[si].second);
3384 subq_node->table_name->set_ifq(false);
3387 for(g=0;g<fta_node->gb_tbl.size();g++){
3388 // Insert the gbvar into the lfta.
3389 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
3390 subq_node->gb_tbl.add_gb_var(
3391 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
3395 // Insert the aggregates
3396 for(a=0;a<fta_node->aggr_tbl.size();++a){
3397 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
3400 for(s=0;s<fta_node->select_list.size();s++){
3401 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3403 for(p=0;p<fta_node->where.size();p++){
3404 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3405 cnf_elem *new_cnf = new cnf_elem(new_pr);
3406 analyze_cnf(new_cnf);
3408 subq_node->where.push_back(new_cnf);
3410 for(p=0;p<fta_node->having.size();p++){
3411 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
3412 cnf_elem *new_cnf = new cnf_elem(new_pr);
3413 analyze_cnf(new_cnf);
3415 subq_node->having.push_back(new_cnf);
3417 // Xfer all of the parameters.
3418 // Use existing handle annotations.
3419 vector<string> param_names = param_tbl->get_param_names();
3421 for(pi=0;pi<param_names.size();pi++){
3422 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3423 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3424 param_tbl->handle_access(param_names[pi]));
3426 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3427 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3428 this->error_code = 3;
3432 ret_vec.push_back(subq_node);
3435 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
3436 fta_node->node_name, sel_names, ifaces, ifdb);
3439 Do not split sources until we are done with optimizations
3440 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3441 for(i=0;i<split_merge.size();++i){
3442 ret_vec.push_back(split_merge[i]);
3445 ret_vec.push_back(mrg_node);
3446 ret_vec.push_back(stream_node);
3447 hfta_returned = 1/*split_merge.size()*/+1;
3450 fta_node->table_name->set_machine(ifaces[0].first);
3451 fta_node->table_name->set_interface(ifaces[0].second);
3452 fta_node->table_name->set_ifq(false);
3453 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3454 this->error_code = 3;
3457 ret_vec.push_back(fta_node);
3458 ret_vec.push_back(stream_node);
3463 // ret_vec.push_back(fta_node);
3464 // ret_vec.push_back(stream_node);
3471 /////////////////////////////////////////////////////////////////////
3472 /// Split into selection LFTA, aggregation HFTA.
3474 spx_qpn *fta_node = new spx_qpn();
3475 fta_node->table_name = table_name;
3476 fta_node->set_node_name( "_fta_"+node_name );
3477 fta_node->table_name->set_range_var(table_name->get_var_name());
3480 rsgah_qpn *stream_node = new rsgah_qpn();
3481 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3482 stream_node->set_node_name( node_name );
3483 stream_node->table_name->set_range_var(table_name->get_var_name());
3486 vector< vector<select_element *> *> select_vec;
3487 select_vec.push_back(&(fta_node->select_list)); // only one child
3489 // Process the gbvars. Split their defining SEs.
3490 for(g=0;g<gb_tbl.size();g++){
3491 bool fta_forbidden = false;
3492 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3494 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
3495 fta_forbidden, se_src, select_vec, Ext_fcns
3497 // if(fta_forbidden) (
3498 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3499 stream_node->gb_tbl.add_gb_var(
3500 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
3503 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
3504 stream_node->gb_tbl.add_gb_var(
3505 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
3510 // Process the aggregate table.
3511 // Copy to stream, split the SEs.
3512 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
3513 for(a=0;a<aggr_tbl.size();++a){
3515 if(aggr_tbl.is_builtin(a)){
3516 if(aggr_tbl.is_star_aggr(a)){
3517 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
3518 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
3520 bool fta_forbidden = false;
3521 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3523 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3524 fta_forbidden, se_src, select_vec, Ext_fcns
3526 // if(fta_forbidden) (
3527 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3528 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
3529 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
3531 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3532 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
3533 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
3536 hse->set_data_type(aggr_tbl.get_data_type(a));
3537 hse->set_aggr_id(a);
3538 hfta_aggr_se[a]=hse;
3540 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
3541 vector<scalarexp_t *> new_opl;
3542 for(o=0;o<opl.size();++o){
3543 bool fta_forbidden = false;
3544 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3545 scalarexp_t *agg_se = split_ftavec_se( opl[o],
3546 fta_forbidden, se_src, select_vec, Ext_fcns
3548 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3549 // fta_forbidden, se_src, select_vec, Ext_fcns
3551 // if(fta_forbidden) (
3552 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3553 new_opl.push_back(agg_se);
3555 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3556 new_opl.push_back(new_se);
3559 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
3560 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
3561 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
3562 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
3563 hse->set_aggr_id(a);
3564 hfta_aggr_se[a]=hse;
3569 // Process the WHERE clause.
3570 // If it is fta-safe AND it refs only fta-safe gbvars,
3571 // then expand the gbvars and put it into the lfta.
3572 // Else, split it into an hfta predicate ref'ing
3573 // se's computed partially in the lfta.
3575 predicate_t *pr_root;
3577 for(p=0;p<where.size();p++){
3578 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
3579 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3580 fta_forbidden = true;
3582 pr_root = dup_pr(where[p]->pr, NULL);
3583 expand_gbvars_pr(pr_root, gb_tbl);
3584 fta_forbidden = false;
3586 cnf_elem *cnf_root = new cnf_elem(pr_root);
3587 analyze_cnf(cnf_root);
3590 stream_node->where.push_back(cnf_root);
3592 fta_node->where.push_back(cnf_root);
3597 // Process the Select clause, rehome it on the
3599 for(s=0;s<select_list.size();s++){
3600 bool fta_forbidden = false;
3601 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3602 stream_node->select_list.push_back(
3603 new select_element(root_se, select_list[s]->name));
3607 // Process the Having clause
3609 // All of the predicates in the having clause must
3610 // execute in the stream node.
3612 for(p=0;p<having.size();p++){
3613 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3614 cnf_elem *cnf_root = new cnf_elem(pr_root);
3615 analyze_cnf(cnf_root);
3617 stream_node->having.push_back(cnf_root);
3619 // Same for closing when
3620 for(p=0;p<closing_when.size();p++){
3621 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3622 cnf_elem *cnf_root = new cnf_elem(pr_root);
3623 analyze_cnf(cnf_root);
3625 stream_node->closing_when.push_back(cnf_root);
3629 // Handle parameters and a few last details.
3630 vector<string> param_names = param_tbl->get_param_names();
3632 for(pi=0;pi<param_names.size();pi++){
3633 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3634 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3635 param_tbl->handle_access(param_names[pi]));
3636 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3637 param_tbl->handle_access(param_names[pi]));
3640 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3641 stream_node->definitions = definitions;
3643 // Now split by interfaces YYYY
3644 if(ifaces.size() > 1){
3645 for(si=0;si<ifaces.size();++si){
3646 spx_qpn *subq_node = new spx_qpn();
3648 // Name the subquery
3649 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3651 subq_node->set_node_name( new_name) ;
3652 sel_names.push_back(subq_node->get_node_name());
3655 subq_node->table_name = fta_node->table_name->duplicate();
3656 subq_node->table_name->set_machine(ifaces[si].first);
3657 subq_node->table_name->set_interface(ifaces[si].second);
3658 subq_node->table_name->set_ifq(false);
3660 for(s=0;s<fta_node->select_list.size();s++){
3661 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3663 for(p=0;p<fta_node->where.size();p++){
3664 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3665 cnf_elem *new_cnf = new cnf_elem(new_pr);
3666 analyze_cnf(new_cnf);
3668 subq_node->where.push_back(new_cnf);
3670 // Xfer all of the parameters.
3671 // Use existing handle annotations.
3672 vector<string> param_names = param_tbl->get_param_names();
3674 for(pi=0;pi<param_names.size();pi++){
3675 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3676 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3677 param_tbl->handle_access(param_names[pi]));
3679 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3680 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3681 this->error_code = 3;
3685 ret_vec.push_back(subq_node);
3688 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3689 fta_node->node_name, sel_names, ifaces, ifdb);
3691 Do not split sources until we are done with optimizations
3692 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3693 for(i=0;i<split_merge.size();++i){
3694 ret_vec.push_back(split_merge[i]);
3697 ret_vec.push_back(mrg_node);
3698 ret_vec.push_back(stream_node);
3699 hfta_returned = 1/*split_merge.size()*/+1;
3702 fta_node->table_name->set_machine(ifaces[0].first);
3703 fta_node->table_name->set_interface(ifaces[0].second);
3704 fta_node->table_name->set_ifq(false);
3705 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3706 this->error_code = 3;
3709 ret_vec.push_back(fta_node);
3710 ret_vec.push_back(stream_node);
3720 Splitting an aggregation operator
3722 An aggregation operator can reference
3723 literals, parameters, colrefs, group-by vars, aggregates,
3724 operators, functions
3726 an aggregation contains
3727 A selection list of SEs
3728 A where list of predicates
3729 A list group-by variable definition
3730 A list of aggregates to be computed
3731 A HAVING list of predicates.
3733 Aggregation involves two phases:
3734 1) given an input tuple, determine if it satisfies all of
3735 the WHERE predicates. If so, compute the group.
3736 Look up the group, update its aggregates.
3737 2) given a closed group and its aggregates, determine
3738 if these values satisfy all of the HAVING predicates.
3739 If so, evaluate the SEs on the selection list from the
3740 group and its aggregates.
3741 The two-phase nature of aggregation places restrictions on
3742 what can be referenced by different components of the operator
3743 (in addition to functions and operators).
3744 - group-by variables : literals, parameters, colrefs
3745 - WHERE predicates : group-by vars, literals, params, colrefs
3746 - HAVING predicates : group-by vars, literals, params, aggregates
3747 - Selection list SEs : group-by vars, literals, params, aggregates
3749 Splitting an aggregation operator into an LFTA/HFTA part
3750 involves performing partial aggregation at the LFTA and
3751 completing the aggregation at the HFTA.
3752 - given a tuple, the LFTA part evaluates the WHERE clause,
3753 and if it is satisfied, computes the group. lookup the group
3754 and update the aggregates. output the group and its partial
3756 - Given a partial aggregate from the LFTA, look up the group and
3757 update its aggregates. When the group is closed, evalute
3758 the HAVING clause and the SEs on the selection list.
3759 THEREFORE the selection list of the LFTA must consist of the
3760 group-by variables and the set of (bare) subaggregate values
3761 necessary to compute the super aggregates.
3762 Unlike the case with the SPX operator, the SE splitting point
3763 is at the GBvar and the aggregate value level.
3766 For each group-by variable
3767 Put the GB variable definition in the LFTA GBVAR list.
3768 Put the GBVAR in the LFTA selection list (as an SE).
3769 Put a reference to that GBVAR in the HFTA GBVAR list.
3771 Split the aggregate into a superaggregate and a subaggregate.
3772 The SE of the superaggregate references the subaggregate value.
3773 (this will need modifications for MF aggregation)
3774 For each SE in the selection list, HAVING predicate
3775 Make GBVAR references point to the new GBVAR
3776 make the aggregate value references point to the new aggregates.
3778 SEs are not so much split as their ref's are changed.
3780 TODO: insert tablevar names into the colrefs.
3785 vector<qp_node *> sgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3789 vector<qp_node *> ret_vec;
3790 int s, p, g, a, o, i;
3793 vector<string> fta_flds, stream_flds;
3795 // If the node reads from a stream, don't split.
3796 // int t = Schema->get_table_ref(table_name->get_schema_name());
3797 int t = table_name->get_schema_ref();
3798 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3799 ret_vec.push_back(this);
3803 // Get the set of interfaces it accesses.
3805 vector<string> sel_names;
3806 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3807 if (ifaces.empty()) {
3808 fprintf(stderr,"INTERNAL ERROR in sgah_qpn::split_node_for_fta - empty interface set\n");
3814 //////////////////////////////////////////////
3815 // Is this LFTA-only?
3816 if(definitions.count("lfta_aggregation")>0){
3817 // Yes. Ensure that everything is lfta-safe.
3819 // Check only one interface is accessed.
3820 if(ifaces.size()>1){
3821 this->err_str = "ERROR, group-by query "+node_name+" is lfta-only, but it accesses more than one interface:\n";
3822 for(si=0;si<ifaces.size();++si)
3823 this->err_str += "\t"+ifaces[si].first+"."+ifaces[si].second+"\n";
3824 this->error_code = 2;
3828 // Check the group-by attributes
3829 for(g=0;g<gb_tbl.size();g++){
3830 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3831 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition and the query is lfta-only (%s).\n",
3832 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3834 this->error_code = 1;
3835 this->err_str = tmpstr;
3840 // Verify that the SEs in the aggregate definitions are fta-safe
3841 for(a=0;a<aggr_tbl.size();++a){
3842 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3843 if(ase != NULL){ // COUNT(*) does not have a SE.
3844 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3845 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe scalar expression and the query is lfta-only (%s).\n",
3846 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3848 this->error_code = 1;
3849 this->err_str = tmpstr;
3853 if(! aggr_tbl.fta_legal(a,Ext_fcns)){
3854 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3855 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe aggregate and the query is lfta-only (%s).\n",
3856 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3858 this->error_code = 1;
3859 this->err_str = tmpstr;
3865 // Ensure that all the aggregates are fta-safe ....
3869 for(s=0;s<select_list.size();s++){
3870 if(! check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns)){
3871 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
3872 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3874 this->error_code = 1;
3875 this->err_str = tmpstr;
3882 for(p=0;p<where.size();p++){
3883 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3884 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
3885 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3887 this->error_code = 1;
3888 this->err_str = tmpstr;
3895 if(having.size()>0){
3896 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : the query is lfta-only, so it can't have a HAVING clause.(%s).\n",
3897 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3899 this->error_code = 1;
3900 this->err_str = tmpstr;
3903 // The query is lfta safe, return it.
3906 ret_vec.push_back(this);
3910 //////////////////////////////////////////////////////////////
3911 /// Split into lfta, hfta.
3913 // A sgah node must always be split,
3914 // if for no other reason than to complete the
3915 // partial aggregation.
3917 // First, determine if the query can be spit into aggr/aggr,
3918 // or if it must be selection/aggr.
3919 // Splitting into selection/aggr is allowed only
3920 // if select_lfta is set.
3923 bool select_allowed = definitions.count("select_lfta")>0;
3924 bool select_rqd = false;
3926 set<int> unsafe_gbvars; // for processing where clause
3927 for(g=0;g<gb_tbl.size();g++){
3928 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3929 if(!select_allowed){
3930 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3931 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3933 this->error_code = 1;
3934 this->err_str = tmpstr;
3938 unsafe_gbvars.insert(g);
3943 // Verify that the SEs in the aggregate definitions are fta-safe
3944 for(a=0;a<aggr_tbl.size();++a){
3945 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3946 if(ase != NULL){ // COUNT(*) does not have a SE.
3947 if(!select_allowed){
3948 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3949 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3950 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3952 this->error_code = 1;
3953 this->err_str = tmpstr;
3962 // Verify that all of the ref'd UDAFs can be split.
3964 for(a=0;a<aggr_tbl.size();++a){
3965 if(! aggr_tbl.is_builtin(a)){
3966 int afcn = aggr_tbl.get_fcn_id(a);
3967 int super_id = Ext_fcns->get_superaggr_id(afcn);
3968 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3969 if(super_id < 0 || sub_id < 0){
3970 if(!select_allowed){
3971 this->err_str += "ERROR in sgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3972 this->error_code = 1;
3981 for(p=0;p<where.size();p++){
3982 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3983 if(!select_allowed){
3984 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3985 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3987 this->error_code = 1;
3988 this->err_str = tmpstr;
3999 /////////////////////////////////////////////////////
4000 // Split into aggr/aggr.
4006 sgah_qpn *fta_node = new sgah_qpn();
4007 fta_node->table_name = table_name;
4008 fta_node->set_node_name( "_fta_"+node_name );
4009 fta_node->table_name->set_range_var(table_name->get_var_name());
4012 sgah_qpn *stream_node = new sgah_qpn();
4013 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4014 stream_node->set_node_name( node_name );
4015 stream_node->table_name->set_range_var(table_name->get_var_name());
4017 // allowed stream disorder. Default is 2,
4018 // can override with max_lfta_disorder setting.
4019 // Also limit the hfta disorder, set to lfta disorder + 1.
4020 // can override with max_hfta_disorder.
4022 fta_node->lfta_disorder = 2;
4023 if(this->get_val_of_def("max_lfta_disorder") != ""){
4024 int d = atoi(this->get_val_of_def("max_lfta_disorder").c_str() );
4026 fprintf(stderr,"Warning, max_lfta_disorder in node %s is %d, must be at least 1, ignoring.\n",node_name.c_str(), d);
4028 fta_node->lfta_disorder = d;
4029 printf("node %s setting lfta_disorder = %d\n",node_name.c_str(),fta_node->lfta_disorder);
4032 if(fta_node->lfta_disorder > 1)
4033 stream_node->hfta_disorder = fta_node->lfta_disorder + 1;
4035 stream_node->hfta_disorder = 1;
4037 if(this->get_val_of_def("max_hfta_disorder") != ""){
4038 int d = atoi(this->get_val_of_def("max_hfta_disorder").c_str() );
4039 if(d<fta_node->lfta_disorder){
4040 fprintf(stderr,"Warning, max_hfta_disorder in node %s is %d, must be at least the max lfta disorder %d, ignoring.\n",node_name.c_str(), d,fta_node->lfta_disorder);
4042 fta_node->lfta_disorder = d;
4044 if(fta_node->lfta_disorder < fta_node->hfta_disorder){
4045 fta_node->hfta_disorder = fta_node->lfta_disorder + 1;
4049 // First, process the group-by variables.
4050 // The fta must supply the values of all the gbvars.
4051 // If a gb is computed, the computation must be
4052 // performed at the FTA, so the SE must be FTA-safe.
4053 // Nice side effect : the gbvar table contains
4054 // matching entries for the original query, the lfta query,
4055 // and the hfta query. So gbrefs in the new queries are set
4056 // correctly just by inheriting the gbrefs from the old query.
4057 // If this property changed, I'll need translation tables.
4060 for(g=0;g<gb_tbl.size();g++){
4061 // Insert the gbvar into the lfta.
4062 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
4063 fta_node->gb_tbl.add_gb_var(
4064 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
4067 // Insert a ref to the value of the gbvar into the lfta select list.
4068 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
4069 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
4070 gbvar_fta->set_gb_ref(g);
4071 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
4072 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
4074 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
4075 gbvar_stream->set_gb_ref(-1); // used as GBvar def
4076 stream_node->gb_tbl.add_gb_var(
4077 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
4080 // multiple aggregation patterns, if any, go with the hfta
4081 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4083 // SEs in the aggregate definitions.
4084 // They are all safe, so split them up for later processing.
4085 map<int, scalarexp_t *> hfta_aggr_se;
4086 for(a=0;a<aggr_tbl.size();++a){
4087 split_fta_aggr( &(aggr_tbl), a,
4088 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
4089 fta_node->select_list,
4097 for(ii=0;ii<fta_flds.size() || ii < fta_node->select_list.size();++ii){
4098 if(ii<fta_flds.size())
4099 printf("\t%s : ",fta_flds[ii].c_str());
4102 if(ii<fta_node->select_list.size())
4103 printf("%s\n",fta_node->select_list[ii]->to_string().c_str());
4107 printf("hfta aggregates are:");
4108 for(ii=0;ii<stream_node->aggr_tbl.size();++ii){
4109 printf(" %s",stream_node->aggr_tbl.get_op(ii).c_str());
4111 printf("\nlfta aggregates are:");
4112 for(ii=0;ii<fta_node->aggr_tbl.size();++ii){
4113 printf(" %s",fta_node->aggr_tbl.get_op(ii).c_str());
4121 // Next, the select list.
4123 for(s=0;s<select_list.size();s++){
4124 bool fta_forbidden = false;
4125 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4126 stream_node->select_list.push_back(
4127 new select_element(root_se, select_list[s]->name));
4132 // All the predicates in the where clause must execute
4135 for(p=0;p<where.size();p++){
4136 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
4137 cnf_elem *new_cnf = new cnf_elem(new_pr);
4138 analyze_cnf(new_cnf);
4140 fta_node->where.push_back(new_cnf);
4143 // All of the predicates in the having clause must
4144 // execute in the stream node.
4146 for(p=0;p<having.size();p++){
4147 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4148 cnf_elem *cnf_root = new cnf_elem(pr_root);
4149 analyze_cnf(cnf_root);
4151 stream_node->having.push_back(cnf_root);
4155 // Divide the parameters among the stream, FTA.
4156 // Currently : assume that the stream receives all parameters
4157 // and parameter updates, incorporates them, then passes
4158 // all of the parameters to the FTA.
4159 // This will need to change (tables, fta-unsafe types. etc.)
4161 // I will pass on the use_handle_access marking, even
4162 // though the fcn call that requires handle access might
4163 // exist in only one of the parts of the query.
4164 // Parameter manipulation and handle access determination will
4165 // need to be revisited anyway.
4166 vector<string> param_names = param_tbl->get_param_names();
4168 for(pi=0;pi<param_names.size();pi++){
4169 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4170 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4171 param_tbl->handle_access(param_names[pi]));
4172 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4173 param_tbl->handle_access(param_names[pi]));
4175 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4176 stream_node->definitions = definitions;
4178 // Now split by interfaces XXXX
4179 if(ifaces.size() > 1){
4180 for(si=0;si<ifaces.size();++si){
4181 sgah_qpn *subq_node = new sgah_qpn();
4183 // Name the subquery
4184 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4186 subq_node->set_node_name( new_name) ;
4187 sel_names.push_back(subq_node->get_node_name());
4190 subq_node->table_name = fta_node->table_name->duplicate();
4191 subq_node->table_name->set_machine(ifaces[si].first);
4192 subq_node->table_name->set_interface(ifaces[si].second);
4193 subq_node->table_name->set_ifq(false);
4196 for(g=0;g<fta_node->gb_tbl.size();g++){
4197 // Insert the gbvar into the lfta.
4198 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
4199 subq_node->gb_tbl.add_gb_var(
4200 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
4204 // Insert the aggregates
4205 for(a=0;a<fta_node->aggr_tbl.size();++a){
4206 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
4209 for(s=0;s<fta_node->select_list.size();s++){
4210 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4212 for(p=0;p<fta_node->where.size();p++){
4213 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4214 cnf_elem *new_cnf = new cnf_elem(new_pr);
4215 analyze_cnf(new_cnf);
4217 subq_node->where.push_back(new_cnf);
4219 for(p=0;p<fta_node->having.size();p++){
4220 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
4221 cnf_elem *new_cnf = new cnf_elem(new_pr);
4222 analyze_cnf(new_cnf);
4224 subq_node->having.push_back(new_cnf);
4226 // Xfer all of the parameters.
4227 // Use existing handle annotations.
4228 vector<string> param_names = param_tbl->get_param_names();
4230 for(pi=0;pi<param_names.size();pi++){
4231 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4232 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4233 param_tbl->handle_access(param_names[pi]));
4235 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4236 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4237 this->error_code = 3;
4242 subq_node->lfta_disorder = fta_node->lfta_disorder;
4244 ret_vec.push_back(subq_node);
4247 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
4248 fta_node->node_name, sel_names, ifaces, ifdb);
4249 mrg_node->set_disorder(fta_node->lfta_disorder);
4252 Do not split sources until we are done with optimizations
4253 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4254 for(i=0;i<split_merge.size();++i){
4255 ret_vec.push_back(split_merge[i]);
4258 ret_vec.push_back(mrg_node);
4259 ret_vec.push_back(stream_node);
4260 hfta_returned = 1/*split_merge.size()*/+1;
4263 fta_node->table_name->set_machine(ifaces[0].first);
4264 fta_node->table_name->set_interface(ifaces[0].second);
4265 fta_node->table_name->set_ifq(false);
4266 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4267 this->error_code = 3;
4270 ret_vec.push_back(fta_node);
4271 ret_vec.push_back(stream_node);
4276 // ret_vec.push_back(fta_node);
4277 // ret_vec.push_back(stream_node);
4284 /////////////////////////////////////////////////////////////////////
4285 /// Split into selection LFTA, aggregation HFTA.
4287 spx_qpn *fta_node = new spx_qpn();
4288 fta_node->table_name = table_name;
4289 fta_node->set_node_name( "_fta_"+node_name );
4290 fta_node->table_name->set_range_var(table_name->get_var_name());
4293 sgah_qpn *stream_node = new sgah_qpn();
4294 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4295 stream_node->set_node_name( node_name );
4296 stream_node->table_name->set_range_var(table_name->get_var_name());
4299 vector< vector<select_element *> *> select_vec;
4300 select_vec.push_back(&(fta_node->select_list)); // only one child
4302 // Process the gbvars. Split their defining SEs.
4303 for(g=0;g<gb_tbl.size();g++){
4304 bool fta_forbidden = false;
4305 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4307 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
4308 fta_forbidden, se_src, select_vec, Ext_fcns
4310 // if(fta_forbidden) (
4311 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4312 stream_node->gb_tbl.add_gb_var(
4313 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
4316 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
4317 stream_node->gb_tbl.add_gb_var(
4318 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
4322 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4324 // Process the aggregate table.
4325 // Copy to stream, split the SEs.
4326 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
4327 for(a=0;a<aggr_tbl.size();++a){
4329 if(aggr_tbl.is_builtin(a)){
4330 if(aggr_tbl.is_star_aggr(a)){
4331 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
4332 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
4334 bool fta_forbidden = false;
4335 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4337 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4338 fta_forbidden, se_src, select_vec, Ext_fcns
4340 // if(fta_forbidden) (
4341 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4342 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
4343 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
4345 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4346 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
4347 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
4350 hse->set_data_type(aggr_tbl.get_data_type(a));
4351 hse->set_aggr_id(a);
4352 hfta_aggr_se[a]=hse;
4354 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
4355 vector<scalarexp_t *> new_opl;
4356 for(o=0;o<opl.size();++o){
4357 bool fta_forbidden = false;
4358 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4359 scalarexp_t *agg_se = split_ftavec_se( opl[o],
4360 fta_forbidden, se_src, select_vec, Ext_fcns
4362 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4363 // fta_forbidden, se_src, select_vec, Ext_fcns
4365 // if(fta_forbidden) (
4366 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4367 new_opl.push_back(agg_se);
4369 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4370 new_opl.push_back(new_se);
4373 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
4374 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
4375 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
4376 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
4377 hse->set_aggr_id(a);
4378 hfta_aggr_se[a]=hse;
4383 // Process the WHERE clause.
4384 // If it is fta-safe AND it refs only fta-safe gbvars,
4385 // then expand the gbvars and put it into the lfta.
4386 // Else, split it into an hfta predicate ref'ing
4387 // se's computed partially in the lfta.
4389 predicate_t *pr_root;
4391 for(p=0;p<where.size();p++){
4392 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
4393 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
4394 fta_forbidden = true;
4396 pr_root = dup_pr(where[p]->pr, NULL);
4397 expand_gbvars_pr(pr_root, gb_tbl);
4398 fta_forbidden = false;
4400 cnf_elem *cnf_root = new cnf_elem(pr_root);
4401 analyze_cnf(cnf_root);
4404 stream_node->where.push_back(cnf_root);
4406 fta_node->where.push_back(cnf_root);
4411 // Process the Select clause, rehome it on the
4413 for(s=0;s<select_list.size();s++){
4414 bool fta_forbidden = false;
4415 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4416 stream_node->select_list.push_back(
4417 new select_element(root_se, select_list[s]->name));
4421 // Process the Having clause
4423 // All of the predicates in the having clause must
4424 // execute in the stream node.
4426 for(p=0;p<having.size();p++){
4427 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4428 cnf_elem *cnf_root = new cnf_elem(pr_root);
4429 analyze_cnf(cnf_root);
4431 stream_node->having.push_back(cnf_root);
4434 // Handle parameters and a few last details.
4435 vector<string> param_names = param_tbl->get_param_names();
4437 for(pi=0;pi<param_names.size();pi++){
4438 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4439 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4440 param_tbl->handle_access(param_names[pi]));
4441 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4442 param_tbl->handle_access(param_names[pi]));
4445 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4446 stream_node->definitions = definitions;
4448 // Now split by interfaces YYYY
4449 if(ifaces.size() > 1){
4450 for(si=0;si<ifaces.size();++si){
4451 spx_qpn *subq_node = new spx_qpn();
4453 // Name the subquery
4454 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4456 subq_node->set_node_name( new_name) ;
4457 sel_names.push_back(subq_node->get_node_name());
4460 subq_node->table_name = fta_node->table_name->duplicate();
4461 subq_node->table_name->set_machine(ifaces[si].first);
4462 subq_node->table_name->set_interface(ifaces[si].second);
4463 subq_node->table_name->set_ifq(false);
4465 for(s=0;s<fta_node->select_list.size();s++){
4466 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4468 for(p=0;p<fta_node->where.size();p++){
4469 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4470 cnf_elem *new_cnf = new cnf_elem(new_pr);
4471 analyze_cnf(new_cnf);
4473 subq_node->where.push_back(new_cnf);
4475 // Xfer all of the parameters.
4476 // Use existing handle annotations.
4477 vector<string> param_names = param_tbl->get_param_names();
4479 for(pi=0;pi<param_names.size();pi++){
4480 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4481 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4482 param_tbl->handle_access(param_names[pi]));
4484 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4485 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4486 this->error_code = 3;
4490 ret_vec.push_back(subq_node);
4493 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4494 fta_node->node_name, sel_names, ifaces, ifdb);
4496 Do not split sources until we are done with optimizations
4497 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4498 for(i=0;i<split_merge.size();++i){
4499 ret_vec.push_back(split_merge[i]);
4502 ret_vec.push_back(mrg_node);
4503 ret_vec.push_back(stream_node);
4504 hfta_returned = 1/*split_merge.size()*/+1;
4507 fta_node->table_name->set_machine(ifaces[0].first);
4508 fta_node->table_name->set_interface(ifaces[0].second);
4509 fta_node->table_name->set_ifq(false);
4510 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4511 this->error_code = 3;
4514 ret_vec.push_back(fta_node);
4515 ret_vec.push_back(stream_node);
4520 // ret_vec.push_back(fta_node);
4521 // ret_vec.push_back(stream_node);
4530 SPLITTING A EQ-TEMPORAL, HASH JOIN OPERATOR
4532 An JOIN_EQ_HASH_QPN node may reference:
4533 literals, parameters, colrefs, functions, operators
4534 An JOIN_EQ_HASH_QPN node may not reference:
4535 group-by variables, aggregates
4537 An JOIN_EQ_HASH_QPN node contains
4538 selection list of SEs
4539 where list of CNF predicates, broken into:
4546 For each tablevar whose source is a PROTOCOL
4547 Create a LFTA for that tablevar
4548 Push as many prefilter[..] predicates to that tablevar as is
4550 Split the SEs in the select list, and the predicates not
4555 vector<qp_node *> join_eq_hash_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4557 vector<qp_node *> ret_vec;
4560 // If the node reads from streams only, don't split.
4561 bool stream_only = true;
4562 for(f=0;f<from.size();++f){
4563 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4564 int t = from[f]->get_schema_ref();
4565 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) stream_only = false;
4569 ret_vec.push_back(this);
4574 // The HFTA node, it is always returned.
4576 join_eq_hash_qpn *stream_node = new join_eq_hash_qpn();
4577 for(f=0;f<from.size();++f){
4578 // tablevar_t *tmp_tblvar = new tablevar_t( from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str());
4579 tablevar_t *tmp_tblvar = from[f]->duplicate();
4580 // tmp_tblvar->set_range_var(from[f]->get_var_name());
4582 stream_node->from.push_back(tmp_tblvar);
4584 stream_node->set_node_name(node_name);
4586 // Create spx (selection) children for each PROTOCOL source.
4587 vector<spx_qpn *> child_vec;
4588 vector< vector<select_element *> *> select_vec;
4589 for(f=0;f<from.size();++f){
4590 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4591 int t = from[f]->get_schema_ref();
4592 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA){
4593 spx_qpn *child_qpn = new spx_qpn();
4594 sprintf(tmpstr,"_fta_%d_%s",f,node_name.c_str());
4595 child_qpn->set_node_name(string(tmpstr));
4596 child_qpn->table_name = new tablevar_t(
4597 from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str(), from[f]->get_ifq());
4598 child_qpn->table_name->set_range_var(from[f]->get_var_name());
4600 child_vec.push_back(child_qpn);
4601 select_vec.push_back(&(child_qpn->select_list));
4603 // Update the stream's FROM clause to read from this child
4604 stream_node->from[f]->set_interface("");
4605 stream_node->from[f]->set_schema(tmpstr);
4607 child_vec.push_back(NULL);
4608 select_vec.push_back(NULL);
4612 // Push lfta-safe prefilter to the lfta
4613 // TODO: I'm not copying the preds, I dont *think* it will be a problem.
4614 predicate_t *pr_root;
4616 for(f=0;f<from.size();++f){
4617 vector<cnf_elem *> pred_vec = prefilter[f];
4618 if(child_vec[f] != NULL){
4619 for(p=0;p<pred_vec.size();++p){
4620 if(check_fta_forbidden_pr(pred_vec[p]->pr,NULL, Ext_fcns)){
4621 child_vec[f]->where.push_back(pred_vec[p]);
4623 pr_root = split_ftavec_pr(pred_vec[p]->pr,select_vec,Ext_fcns);
4624 cnf_elem *cnf_root = new cnf_elem(pr_root);
4625 analyze_cnf(cnf_root);
4626 stream_node->prefilter[f].push_back(cnf_root);
4630 for(p=0;p<pred_vec.size();++p){
4631 stream_node->prefilter[f].push_back(pred_vec[p]);
4637 // Process the other predicates
4638 for(p=0;p<temporal_eq.size();++p){
4639 pr_root = split_ftavec_pr(temporal_eq[p]->pr,select_vec,Ext_fcns);
4640 cnf_elem *cnf_root = new cnf_elem(pr_root);
4641 analyze_cnf(cnf_root);
4642 stream_node->temporal_eq.push_back(cnf_root);
4644 for(p=0;p<hash_eq.size();++p){
4645 pr_root = split_ftavec_pr(hash_eq[p]->pr,select_vec,Ext_fcns);
4646 cnf_elem *cnf_root = new cnf_elem(pr_root);
4647 analyze_cnf(cnf_root);
4648 stream_node->hash_eq.push_back(cnf_root);
4650 for(p=0;p<postfilter.size();++p){
4651 pr_root = split_ftavec_pr(postfilter[p]->pr,select_vec,Ext_fcns);
4652 cnf_elem *cnf_root = new cnf_elem(pr_root);
4653 analyze_cnf(cnf_root);
4654 stream_node->postfilter.push_back(cnf_root);
4658 for(s=0;s<select_list.size();s++){
4659 bool fta_forbidden = false;
4660 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4661 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
4662 fta_forbidden, se_src, select_vec, Ext_fcns
4664 if(fta_forbidden || !is_PROTOCOL_source(se_src, select_vec)){
4665 stream_node->select_list.push_back(
4666 new select_element(root_se, select_list[s]->name) );
4668 scalarexp_t *new_se=make_fta_se_ref(select_vec,root_se,se_src);
4669 stream_node->select_list.push_back(
4670 new select_element(new_se, select_list[s]->name)
4676 // I need to "rehome" the colrefs -- make the annotations in the colrefs
4677 // agree with their tablevars.
4678 for(f=0;f<child_vec.size();++f){
4679 if(child_vec[f]!=NULL){
4680 vector<tablevar_t *> fm; fm.push_back(child_vec[f]->table_name);
4682 for(s=0;s<child_vec[f]->select_list.size();++s)
4683 bind_colref_se(child_vec[f]->select_list[s]->se, fm,0,0);
4684 for(p=0;p<child_vec[f]->where.size();++p)
4685 // bind_colref_pr(child_vec[f]->where[p]->pr, fm,f,0);
4686 bind_colref_pr(child_vec[f]->where[p]->pr, fm,0,0);
4690 // rehome the colrefs in the hfta node.
4691 for(f=0;f<stream_node->from.size();++f){
4692 stream_node->where.clear();
4693 for(s=0;s<stream_node->from.size();++s){
4694 for(p=0;p<stream_node->prefilter[s].size();++p){
4695 bind_colref_pr((stream_node->prefilter[s])[p]->pr,stream_node->from,f,f);
4698 for(p=0;p<stream_node->temporal_eq.size();++p){
4699 bind_colref_pr(stream_node->temporal_eq[p]->pr,stream_node->from,f,f);
4701 for(p=0;p<stream_node->hash_eq.size();++p){
4702 bind_colref_pr(stream_node->hash_eq[p]->pr,stream_node->from,f,f);
4704 for(p=0;p<stream_node->postfilter.size();++p){
4705 bind_colref_pr(stream_node->postfilter[p]->pr,stream_node->from,f,f);
4707 for(s=0;s<stream_node->select_list.size();++s){
4708 bind_colref_se(stream_node->select_list[s]->se,stream_node->from,f,f);
4712 // Rebuild the WHERE clause
4713 stream_node->where.clear();
4714 for(s=0;s<stream_node->from.size();++s){
4715 for(p=0;p<stream_node->prefilter[s].size();++p){
4716 stream_node->where.push_back((stream_node->prefilter[s])[p]);
4719 for(p=0;p<stream_node->temporal_eq.size();++p){
4720 stream_node->where.push_back(stream_node->temporal_eq[p]);
4722 for(p=0;p<stream_node->hash_eq.size();++p){
4723 stream_node->where.push_back(stream_node->hash_eq[p]);
4725 for(p=0;p<stream_node->postfilter.size();++p){
4726 stream_node->where.push_back(stream_node->postfilter[p]);
4730 // Build the return list
4731 vector<qp_node *> hfta_nodes;
4733 for(f=0;f<from.size();++f){
4734 if(child_vec[f] != NULL){
4735 spx_qpn *c_node = child_vec[f];
4736 vector<pair<string, string> > ifaces = get_ifaces(c_node->table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
4737 if (ifaces.empty()) {
4738 fprintf(stderr,"INTERNAL ERROR in join_eq_hash_qpn::split_node_for_fta - empty interface set\n");
4742 if(ifaces.size() == 1){
4743 c_node->table_name->set_machine(ifaces[0].first);
4744 c_node->table_name->set_interface(ifaces[0].second);
4745 c_node->table_name->set_ifq(false);
4746 if(c_node->resolve_if_params(ifdb, this->err_str)){
4747 this->error_code = 3;
4750 ret_vec.push_back(c_node);
4752 vector<string> sel_names;
4754 for(si=0;si<ifaces.size();++si){
4755 spx_qpn *subq_node = new spx_qpn();
4757 // Name the subquery
4758 string new_name = "_"+c_node->node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4760 subq_node->set_node_name( new_name) ;
4761 sel_names.push_back(subq_node->get_node_name());
4764 subq_node->table_name = c_node->table_name->duplicate();
4765 subq_node->table_name->set_machine(ifaces[si].first);
4766 subq_node->table_name->set_interface(ifaces[si].second);
4767 subq_node->table_name->set_ifq(false);
4769 for(s=0;s<c_node->select_list.size();s++){
4770 subq_node->select_list.push_back(dup_select(c_node->select_list[s], NULL));
4772 for(p=0;p<c_node->where.size();p++){
4773 predicate_t *new_pr = dup_pr(c_node->where[p]->pr, NULL);
4774 cnf_elem *new_cnf = new cnf_elem(new_pr);
4775 analyze_cnf(new_cnf);
4777 printf("table name is %s\n",subq_node->table_name->to_string().c_str());
4778 subq_node->where.push_back(new_cnf);
4780 // Xfer all of the parameters.
4781 // Use existing handle annotations.
4782 // vector<string> param_names = param_tbl->get_param_names();
4784 // for(pi=0;pi<param_names.size();pi++){
4785 // data_type *dt = param_tbl->get_data_type(param_names[pi]);
4786 // subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4787 // param_tbl->handle_access(param_names[pi]));
4789 // subq_node->definitions = definitions;
4791 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4792 this->error_code = 3;
4796 ret_vec.push_back(subq_node);
4798 int lpos = ret_vec.size()-1 ;
4799 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[lpos]),c_node->node_name,sel_names, ifaces, ifdb);
4801 Do not split sources until we are done with optimizations
4802 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4804 for(i=0;i<split_merge.size();++i){
4805 hfta_nodes.push_back(split_merge[i]);
4808 hfta_nodes.push_back(mrg_node);
4813 for(i=0;i<hfta_nodes.size();++i) ret_vec.push_back(hfta_nodes[i]);
4814 ret_vec.push_back(stream_node);
4815 hfta_returned = hfta_nodes.size()+1;
4817 // Currently : assume that the stream receives all parameters
4818 // and parameter updates, incorporates them, then passes
4819 // all of the parameters to the FTA.
4820 // This will need to change (tables, fta-unsafe types. etc.)
4822 // I will pass on the use_handle_access marking, even
4823 // though the fcn call that requires handle access might
4824 // exist in only one of the parts of the query.
4825 // Parameter manipulation and handle access determination will
4826 // need to be revisited anyway.
4827 vector<string> param_names = param_tbl->get_param_names();
4829 for(pi=0;pi<param_names.size();pi++){
4831 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4832 for(ri=0;ri<ret_vec.size();++ri){
4833 ret_vec[ri]->param_tbl->add_param(param_names[pi],dt->duplicate(),
4834 param_tbl->handle_access(param_names[pi]));
4835 ret_vec[ri]->definitions = definitions; ret_vec[ri]->definitions.erase("_referenced_ifaces");
4846 /////////////////////////////////////////////////////////////
4849 // Common processing
4850 int process_opview(tablevar_t *fmtbl, int pos, string node_name,
4852 vector<query_node *> &qnodes,
4853 opview_set &opviews,
4854 vector<table_exp_t *> &ret, string rootnm, string silo_nm){
4858 int schref = fmtbl->get_schema_ref();
4862 if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){
4863 opview_entry *opv = new opview_entry();
4864 opv->parent_qname = node_name;
4865 opv->root_name = rootnm;
4866 opv->view_name = fmtbl->get_schema_name();
4868 sprintf(tmpstr,"%s_UDOP%d_%s",node_name.c_str(),pos,opv->view_name.c_str());
4869 opv->udop_alias = tmpstr;
4870 fmtbl->set_udop_alias(opv->udop_alias);
4872 opv->exec_fl = Schema->get_op_prop(schref, string("file"));
4873 opv->liveness_timeout = atoi(Schema->get_op_prop(schref, string("liveness_timeout")).c_str());
4875 vector<subquery_spec *> subq = Schema->get_subqueryspecs(schref);
4876 for(s=0;s<subq.size();++s){
4877 // Validate that the fields match.
4878 subquery_spec *sqs = subq[s];
4879 vector<field_entry *> flds = Schema->get_fields(sqs->name+silo_nm);
4880 if(flds.size() == 0){
4881 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in Schema.\n",sqs->name.c_str(), opv->view_name.c_str());
4884 if(flds.size() < sqs->types.size()){
4885 fprintf(stderr,"ERROR: subquery %s of view %s does not have enough fields (%lu found, %lu expected).\n",sqs->name.c_str(), opv->view_name.c_str(),flds.size(), sqs->types.size());
4888 bool failed = false;
4889 for(f=0;f<sqs->types.size();++f){
4890 data_type dte(sqs->types[f],sqs->modifiers[f]);
4891 data_type dtf(flds[f]->get_type(),flds[f]->get_modifier_list());
4892 if(! dte.subsumes_type(&dtf) ){
4893 fprintf(stderr,"ERROR: subquery %s of view %s does not have the correct type for field %d (%s found, %s expected).\n",sqs->name.c_str(), opv->view_name.c_str(),f,dtf.to_string().c_str(), dte.to_string().c_str());
4897 if(dte.is_temporal() && (dte.get_temporal() != dtf.get_temporal()) ){
4898 string pstr = dte.get_temporal_string();
4899 fprintf(stderr,"ERROR: subquery %s of view %s does not have the expected temporal value %s of field %d.\n",sqs->name.c_str(), opv->view_name.c_str(),pstr.c_str(),f);
4906 /// Validation done, find the subquery, make a copy of the
4907 /// parse tree, and add it to the return list.
4908 for(q=0;q<qnodes.size();++q)
4909 if(qnodes[q]->name == sqs->name)
4911 if(q==qnodes.size()){
4912 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in list of query names.\n",sqs->name.c_str(), opv->view_name.c_str());
4916 table_exp_t *newq = dup_table_exp(qnodes[q]->parse_tree);
4917 sprintf(tmpstr,"%s_OP%d_%s_SUBQ%d",node_name.c_str(),pos,opv->view_name.c_str(),s);
4918 string newq_name = tmpstr;
4919 newq->nmap["query_name"] = newq_name;
4920 ret.push_back(newq);
4921 opv->subq_names.push_back(newq_name);
4923 fmtbl->set_opview_idx(opviews.append(opv));
4929 vector<table_exp_t *> spx_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4930 vector<table_exp_t *> ret;
4932 int retval = process_opview(table_name,0,node_name,
4933 Schema,qnodes,opviews,ret, rootnm, silo_name);
4939 vector<table_exp_t *> sgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4940 vector<table_exp_t *> ret;
4942 int retval = process_opview(table_name,0,node_name,
4943 Schema,qnodes,opviews,ret, rootnm, silo_name);
4948 vector<table_exp_t *> rsgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4949 vector<table_exp_t *> ret;
4951 int retval = process_opview(table_name,0,node_name,
4952 Schema,qnodes,opviews,ret, rootnm, silo_name);
4958 vector<table_exp_t *> sgahcwcb_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4959 vector<table_exp_t *> ret;
4961 int retval = process_opview(table_name,0,node_name,
4962 Schema,qnodes,opviews,ret, rootnm, silo_name);
4969 vector<table_exp_t *> mrg_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4970 vector<table_exp_t *> ret;
4972 for(f=0;f<fm.size();++f){
4973 int retval = process_opview(fm[f],f,node_name,
4974 Schema,qnodes,opviews,ret, rootnm, silo_name);
4983 vector<table_exp_t *> join_eq_hash_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4984 vector<table_exp_t *> ret;
4986 for(f=0;f<from.size();++f){
4987 int retval = process_opview(from[f],f,node_name,
4988 Schema,qnodes,opviews,ret, rootnm, silo_name);
4994 vector<table_exp_t *> filter_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
4995 vector<table_exp_t *> ret;
4997 for(f=0;f<from.size();++f){
4998 int retval = process_opview(from[f],f,node_name,
4999 Schema,qnodes,opviews,ret, rootnm, silo_name);
5007 //////////////////////////////////////////////////////////////////
5008 //////////////////////////////////////////////////////////////////
5009 /////// Additional methods
5013 //////////////////////////////////////////////////////////////////
5014 // Get schema of operator output
5016 table_def *mrg_qpn::get_fields(){
5017 return(table_layout);
5021 table_def *spx_qpn::get_fields(){
5022 return(create_attributes(node_name, select_list));
5025 table_def *sgah_qpn::get_fields(){
5026 return(create_attributes(node_name, select_list));
5029 table_def *rsgah_qpn::get_fields(){
5030 return(create_attributes(node_name, select_list));
5033 table_def *sgahcwcb_qpn::get_fields(){
5034 return(create_attributes(node_name, select_list));
5037 table_def *filter_join_qpn::get_fields(){
5038 return(create_attributes(node_name, select_list));
5042 table_def *join_eq_hash_qpn::get_fields(){
5045 // First, gather temporal colrefs and SEs.
5046 map<col_id, temporal_type> temporal_cids;
5047 vector<scalarexp_t *> temporal_se;
5048 for(h=0;h<temporal_eq.size();++h){
5049 scalarexp_t *sel = temporal_eq[h]->pr->get_left_se();
5050 scalarexp_t *ser = temporal_eq[h]->pr->get_right_se();
5052 if(sel->get_operator_type() == SE_COLREF){
5053 col_id tcol(sel->get_colref());
5054 if(temporal_cids.count(tcol) == 0){
5055 temporal_cids[tcol] = sel->get_data_type()->get_temporal();
5058 temporal_se.push_back(sel);
5061 if(ser->get_operator_type() == SE_COLREF){
5062 col_id tcol(ser->get_colref());
5063 if(temporal_cids.count(tcol) == 0){
5064 temporal_cids[tcol] = ser->get_data_type()->get_temporal();
5067 temporal_se.push_back(ser);
5071 // Mark select elements as nontemporal, then deduce which
5072 // ones are temporal.
5073 for(s=0;s<select_list.size();++s){
5074 select_list[s]->se->get_data_type()->set_temporal(
5075 compute_se_temporal(select_list[s]->se, temporal_cids)
5077 // Second chance if it is an exact match to an SE.
5078 // for(s=0;s<select_list.size();++s){
5079 if(! select_list[s]->se->get_data_type()->is_temporal() ){
5080 for(t=0;t<temporal_se.size();++t){
5081 if(is_equivalent_se(temporal_se[t], select_list[s]->se)){
5082 select_list[s]->se->get_data_type()->set_temporal(
5083 temporal_se[t]->get_data_type()->get_temporal()
5091 // If there is an outer join, verify that
5092 // the temporal attributes are actually temporal.
5093 // NOTE: this code must be synchronized with the
5094 // equivalence finding in join_eq_hash_qpn::generate_functor
5095 // (and also, the join_eq_hash_qpn constructor)
5096 if(from[0]->get_property() || from[1]->get_property()){
5097 set<string> l_equiv, r_equiv;
5098 for(i=0;i<temporal_eq.size();i++){
5099 scalarexp_t *lse = temporal_eq[i]->pr->get_left_se();
5100 scalarexp_t *rse = temporal_eq[i]->pr->get_right_se();
5101 if(lse->get_operator_type()==SE_COLREF){
5102 l_equiv.insert(lse->get_colref()->get_field());
5104 if(rse->get_operator_type()==SE_COLREF){
5105 r_equiv.insert(rse->get_colref()->get_field());
5109 for(s=0;s<select_list.size();++s){
5110 if(select_list[s]->se->get_data_type()->is_temporal()){
5112 col_id_set::iterator ci;
5113 bool failed = false;
5114 gather_se_col_ids(select_list[s]->se,cid_set, NULL);
5115 for(ci=cid_set.begin();ci!=cid_set.end();++ci){
5116 if((*ci).tblvar_ref == 0){
5117 if(from[0]->get_property()){
5118 if(l_equiv.count((*ci).field) == 0){
5123 if(from[1]->get_property()){
5124 if(r_equiv.count((*ci).field) == 0){
5131 select_list[s]->se->get_data_type()->reset_temporal();
5138 return create_attributes(node_name, select_list);
5142 //-----------------------------------------------------------------
5143 // get output "keys"
5144 // This is a guess about the set of fields which are a key
5145 // Use as metadata output, e.g. in qtree.xml
5149 // refs to GB attribtues are keys, if a SE is not a GB colref
5150 // but refers to a GB colref (outside of an aggregation)
5151 // then set partial_keys to true
5152 vector<string> sgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5153 vector<string> keys;
5156 for(int i=0; i<gb_tbl.size();++i)
5159 for(int s=0;s<select_list.size();++s){
5160 if(select_list[s]->se->is_gb()){
5161 keys.push_back(select_list[s]->name);
5163 if(contains_gb_se(select_list[s]->se, gref_set)){
5164 partial_keys.push_back(select_list[s]->name);
5171 vector<string> rsgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5172 vector<string> keys;
5175 for(int i=0; i<gb_tbl.size();++i)
5178 for(int s=0;s<select_list.size();++s){
5179 if(select_list[s]->se->is_gb()){
5180 keys.push_back(select_list[s]->name);
5182 if(contains_gb_se(select_list[s]->se, gref_set)){
5183 partial_keys.push_back(select_list[s]->name);
5194 //-----------------------------------------------------------------
5195 // get output tables
5198 // Get tablevar_t names of input and output tables
5200 // output_file_qpn::output_file_qpn(){source_op_name = ""; }
5201 vector<tablevar_t *> output_file_qpn::get_input_tbls(){
5205 vector<tablevar_t *> mrg_qpn::get_input_tbls(){
5209 vector<tablevar_t *> spx_qpn::get_input_tbls(){
5210 vector<tablevar_t *> retval(1,table_name);
5214 vector<tablevar_t *> sgah_qpn::get_input_tbls(){
5215 vector<tablevar_t *> retval(1,table_name);
5219 vector<tablevar_t *> rsgah_qpn::get_input_tbls(){
5220 vector<tablevar_t *> retval(1,table_name);
5224 vector<tablevar_t *> sgahcwcb_qpn::get_input_tbls(){
5225 vector<tablevar_t *> retval(1,table_name);
5229 vector<tablevar_t *> join_eq_hash_qpn::get_input_tbls(){
5233 vector<tablevar_t *> filter_join_qpn::get_input_tbls(){
5237 //-----------------------------------------------------------------
5238 // get output tables
5241 // This does not make sense, this fcn returns the output table *name*,
5242 // not its schema, and then there is another fcn to rturn the schema.
5243 vector<tablevar_t *> output_file_qpn::get_output_tbls(){
5244 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5248 vector<tablevar_t *> mrg_qpn::get_output_tbls(){
5249 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5253 vector<tablevar_t *> spx_qpn::get_output_tbls(){
5254 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5258 vector<tablevar_t *> sgah_qpn::get_output_tbls(){
5259 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5263 vector<tablevar_t *> rsgah_qpn::get_output_tbls(){
5264 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5268 vector<tablevar_t *> sgahcwcb_qpn::get_output_tbls(){
5269 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5273 vector<tablevar_t *> join_eq_hash_qpn::get_output_tbls(){
5274 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5278 vector<tablevar_t *> filter_join_qpn::get_output_tbls(){
5279 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5285 //-----------------------------------------------------------------
5288 // Associate colrefs with this schema.
5289 // Also, use this opportunity to create table_layout (the output schema).
5290 // If the output schema is ever needed before
5291 void mrg_qpn::bind_to_schema(table_list *Schema){
5293 for(t=0;t<fm.size();++t){
5294 int tblref = Schema->get_table_ref(fm[t]->get_schema_name());
5296 fm[t]->set_schema_ref(tblref );
5299 // Here I assume that the colrefs have been reorderd
5300 // during analysis so that mvars line up with fm.
5301 mvars[0]->set_schema_ref(fm[0]->get_schema_ref());
5302 mvars[1]->set_schema_ref(fm[1]->get_schema_ref());
5309 // Associate colrefs in SEs with this schema.
5310 void spx_qpn::bind_to_schema(table_list *Schema){
5311 // Bind the tablevars in the From clause to the Schema
5312 // (it might have changed from analysis time)
5313 int t = Schema->get_table_ref(table_name->get_schema_name() );
5315 table_name->set_schema_ref(t );
5317 // Get the "from" clause
5318 tablevar_list_t fm(table_name);
5320 // Bind all SEs to this schema
5322 for(p=0;p<where.size();++p){
5323 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5326 for(s=0;s<select_list.size();++s){
5327 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5330 // Collect set of tuples referenced in this HFTA
5331 // input, internal, or output.
5335 col_id_set spx_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5336 col_id_set retval, tmp_cset;
5338 for(p=0;p<where.size();++p){
5339 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5342 for(s=0;s<select_list.size();++s){
5343 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5345 col_id_set::iterator cisi;
5347 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5348 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5349 if(fe->get_unpack_fcns().size()>0)
5350 retval.insert((*cisi));
5358 col_id_set filter_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5359 col_id_set retval, tmp_cset;
5361 for(p=0;p<where.size();++p){
5362 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5365 for(s=0;s<select_list.size();++s){
5366 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5368 col_id_set::iterator cisi;
5370 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5371 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5372 if(fe->get_unpack_fcns().size()>0)
5373 retval.insert((*cisi));
5383 // Associate colrefs in SEs with this schema.
5384 void join_eq_hash_qpn::bind_to_schema(table_list *Schema){
5385 // Bind the tablevars in the From clause to the Schema
5386 // (it might have changed from analysis time)
5388 for(f=0;f<from.size();++f){
5389 string snm = from[f]->get_schema_name();
5390 int tbl_ref = Schema->get_table_ref(snm);
5392 from[f]->set_schema_ref(tbl_ref);
5395 // Bind all SEs to this schema
5396 tablevar_list_t fm(from);
5399 for(p=0;p<where.size();++p){
5400 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5403 for(s=0;s<select_list.size();++s){
5404 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5407 // Collect set of tuples referenced in this HFTA
5408 // input, internal, or output.
5412 void filter_join_qpn::bind_to_schema(table_list *Schema){
5413 // Bind the tablevars in the From clause to the Schema
5414 // (it might have changed from analysis time)
5416 for(f=0;f<from.size();++f){
5417 string snm = from[f]->get_schema_name();
5418 int tbl_ref = Schema->get_table_ref(snm);
5420 from[f]->set_schema_ref(tbl_ref);
5423 // Bind all SEs to this schema
5424 tablevar_list_t fm(from);
5427 for(p=0;p<where.size();++p){
5428 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5431 for(s=0;s<select_list.size();++s){
5432 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5435 // Collect set of tuples referenced in this HFTA
5436 // input, internal, or output.
5443 void sgah_qpn::bind_to_schema(table_list *Schema){
5444 // Bind the tablevars in the From clause to the Schema
5445 // (it might have changed from analysis time)
5448 int t = Schema->get_table_ref(table_name->get_schema_name() );
5450 table_name->set_schema_ref(t );
5452 // Get the "from" clause
5453 tablevar_list_t fm(table_name);
5457 // Bind all SEs to this schema
5459 for(p=0;p<where.size();++p){
5460 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5462 for(p=0;p<having.size();++p){
5463 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5466 for(s=0;s<select_list.size();++s){
5467 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5470 for(g=0;g<gb_tbl.size();++g){
5471 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5474 for(a=0;a<aggr_tbl.size();++a){
5475 if(aggr_tbl.is_builtin(a)){
5476 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5478 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5480 for(o=0;o<opl.size();++o){
5481 bind_to_schema_se(opl[o],&fm,Schema);
5487 col_id_set sgah_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5488 col_id_set retval, tmp_cset;
5490 for(p=0;p<where.size();++p){
5491 gather_pr_col_ids(where[p]->pr, tmp_cset, &gb_tbl);
5494 for(g=0;g<gb_tbl.size();++g){
5495 gather_se_col_ids(gb_tbl.get_def(g), tmp_cset, &gb_tbl);
5498 for(a=0;a<aggr_tbl.size();++a){
5499 if(aggr_tbl.is_builtin(a)){
5500 gather_se_col_ids(aggr_tbl.get_aggr_se(a), tmp_cset, &gb_tbl);
5502 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5504 for(o=0;o<opl.size();++o){
5505 gather_se_col_ids(opl[o], tmp_cset, &gb_tbl);
5510 col_id_set::iterator cisi;
5512 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5513 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5514 if(fe->get_unpack_fcns().size()>0)
5515 retval.insert((*cisi));
5524 void rsgah_qpn::bind_to_schema(table_list *Schema){
5525 // Bind the tablevars in the From clause to the Schema
5526 // (it might have changed from analysis time)
5527 int t = Schema->get_table_ref(table_name->get_schema_name() );
5529 table_name->set_schema_ref(t );
5531 // Get the "from" clause
5532 tablevar_list_t fm(table_name);
5534 // Bind all SEs to this schema
5536 for(p=0;p<where.size();++p){
5537 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5539 for(p=0;p<having.size();++p){
5540 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5542 for(p=0;p<closing_when.size();++p){
5543 bind_to_schema_pr(closing_when[p]->pr, &fm, Schema);
5546 for(s=0;s<select_list.size();++s){
5547 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5550 for(g=0;g<gb_tbl.size();++g){
5551 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5554 for(a=0;a<aggr_tbl.size();++a){
5555 if(aggr_tbl.is_builtin(a)){
5556 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5558 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5560 for(o=0;o<opl.size();++o){
5561 bind_to_schema_se(opl[o],&fm,Schema);
5568 void sgahcwcb_qpn::bind_to_schema(table_list *Schema){
5569 // Bind the tablevars in the From clause to the Schema
5570 // (it might have changed from analysis time)
5571 int t = Schema->get_table_ref(table_name->get_schema_name() );
5573 table_name->set_schema_ref(t );
5575 // Get the "from" clause
5576 tablevar_list_t fm(table_name);
5578 // Bind all SEs to this schema
5580 for(p=0;p<where.size();++p){
5581 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5583 for(p=0;p<having.size();++p){
5584 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5586 for(p=0;p<having.size();++p){
5587 bind_to_schema_pr(cleanby[p]->pr, &fm, Schema);
5589 for(p=0;p<having.size();++p){
5590 bind_to_schema_pr(cleanwhen[p]->pr, &fm, Schema);
5593 for(s=0;s<select_list.size();++s){
5594 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5597 for(g=0;g<gb_tbl.size();++g){
5598 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5601 for(a=0;a<aggr_tbl.size();++a){
5602 if(aggr_tbl.is_builtin(a)){
5603 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5605 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5607 for(o=0;o<opl.size();++o){
5608 bind_to_schema_se(opl[o],&fm,Schema);
5619 ///////////////////////////////////////////////////////////////
5620 ///////////////////////////////////////////////////////////////
5621 /// Functions for code generation.
5624 //-----------------------------------------------------------------
5627 cplx_lit_table *mrg_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5628 return(new cplx_lit_table());
5631 cplx_lit_table *spx_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5633 cplx_lit_table *complex_literals = new cplx_lit_table();
5635 for(i=0;i<select_list.size();i++){
5636 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5638 for(i=0;i<where.size();++i){
5639 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5642 return(complex_literals);
5645 cplx_lit_table *sgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5647 cplx_lit_table *complex_literals = new cplx_lit_table();
5649 for(i=0;i<aggr_tbl.size();++i){
5650 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5651 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5653 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5654 for(j=0;j<opl.size();++j)
5655 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5659 for(i=0;i<select_list.size();i++){
5660 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5662 for(i=0;i<gb_tbl.size();i++){
5663 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5665 for(i=0;i<where.size();++i){
5666 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5668 for(i=0;i<having.size();++i){
5669 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5672 return(complex_literals);
5676 cplx_lit_table *rsgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5678 cplx_lit_table *complex_literals = new cplx_lit_table();
5680 for(i=0;i<aggr_tbl.size();++i){
5681 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5682 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5684 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5685 for(j=0;j<opl.size();++j)
5686 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5690 for(i=0;i<select_list.size();i++){
5691 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5693 for(i=0;i<gb_tbl.size();i++){
5694 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5696 for(i=0;i<where.size();++i){
5697 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5699 for(i=0;i<having.size();++i){
5700 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5702 for(i=0;i<closing_when.size();++i){
5703 find_complex_literal_pr(closing_when[i]->pr,Ext_fcns, complex_literals);
5706 return(complex_literals);
5710 cplx_lit_table *sgahcwcb_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5712 cplx_lit_table *complex_literals = new cplx_lit_table();
5714 for(i=0;i<aggr_tbl.size();++i){
5715 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5716 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
5718 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5719 for(j=0;j<opl.size();++j)
5720 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
5724 for(i=0;i<select_list.size();i++){
5725 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5727 for(i=0;i<gb_tbl.size();i++){
5728 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
5730 for(i=0;i<where.size();++i){
5731 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5733 for(i=0;i<having.size();++i){
5734 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
5736 for(i=0;i<cleanwhen.size();++i){
5737 find_complex_literal_pr(cleanwhen[i]->pr,Ext_fcns, complex_literals);
5739 for(i=0;i<cleanby.size();++i){
5740 find_complex_literal_pr(cleanby[i]->pr,Ext_fcns, complex_literals);
5743 return(complex_literals);
5746 cplx_lit_table *join_eq_hash_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5748 cplx_lit_table *complex_literals = new cplx_lit_table();
5750 for(i=0;i<select_list.size();i++){
5751 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5753 for(i=0;i<where.size();++i){
5754 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5757 return(complex_literals);
5760 cplx_lit_table *filter_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
5762 cplx_lit_table *complex_literals = new cplx_lit_table();
5764 for(i=0;i<select_list.size();i++){
5765 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
5767 for(i=0;i<where.size();++i){
5768 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
5771 return(complex_literals);
5777 //-----------------------------------------------------------------
5778 // get_handle_param_tbl
5780 vector<handle_param_tbl_entry *> mrg_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5781 vector<handle_param_tbl_entry *> retval;
5786 vector<handle_param_tbl_entry *> spx_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5788 vector<handle_param_tbl_entry *> retval;
5790 for(i=0;i<select_list.size();i++){
5791 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5793 for(i=0;i<where.size();++i){
5794 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5801 vector<handle_param_tbl_entry *> sgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5803 vector<handle_param_tbl_entry *> retval;
5806 for(i=0;i<aggr_tbl.size();++i){
5807 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5808 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5810 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5811 for(j=0;j<opl.size();++j)
5812 find_param_handles_se(opl[j], Ext_fcns, retval);
5815 for(i=0;i<select_list.size();i++){
5816 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5818 for(i=0;i<gb_tbl.size();i++){
5819 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5821 for(i=0;i<where.size();++i){
5822 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5824 for(i=0;i<having.size();++i){
5825 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5832 vector<handle_param_tbl_entry *> rsgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5834 vector<handle_param_tbl_entry *> retval;
5837 for(i=0;i<aggr_tbl.size();++i){
5838 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5839 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5841 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5842 for(j=0;j<opl.size();++j)
5843 find_param_handles_se(opl[j], Ext_fcns, retval);
5846 for(i=0;i<select_list.size();i++){
5847 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5849 for(i=0;i<gb_tbl.size();i++){
5850 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5852 for(i=0;i<where.size();++i){
5853 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5855 for(i=0;i<having.size();++i){
5856 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5858 for(i=0;i<closing_when.size();++i){
5859 find_param_handles_pr(closing_when[i]->pr,Ext_fcns, retval);
5866 vector<handle_param_tbl_entry *> sgahcwcb_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5868 vector<handle_param_tbl_entry *> retval;
5871 for(i=0;i<aggr_tbl.size();++i){
5872 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
5873 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
5875 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
5876 for(j=0;j<opl.size();++j)
5877 find_param_handles_se(opl[j], Ext_fcns, retval);
5880 for(i=0;i<select_list.size();i++){
5881 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5883 for(i=0;i<gb_tbl.size();i++){
5884 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
5886 for(i=0;i<where.size();++i){
5887 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5889 for(i=0;i<having.size();++i){
5890 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
5892 for(i=0;i<cleanwhen.size();++i){
5893 find_param_handles_pr(cleanwhen[i]->pr,Ext_fcns, retval);
5895 for(i=0;i<cleanby.size();++i){
5896 find_param_handles_pr(cleanby[i]->pr,Ext_fcns, retval);
5902 vector<handle_param_tbl_entry *> join_eq_hash_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5904 vector<handle_param_tbl_entry *> retval;
5906 for(i=0;i<select_list.size();i++){
5907 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5909 for(i=0;i<where.size();++i){
5910 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5917 vector<handle_param_tbl_entry *> filter_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
5919 vector<handle_param_tbl_entry *> retval;
5921 for(i=0;i<select_list.size();i++){
5922 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
5924 for(i=0;i<where.size();++i){
5925 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
5931 ///////////////////////////////////////////////////////////////
5932 ///////////////////////////////////////////////////////////////
5933 /// Functions for operator output rates estimations
5936 //-----------------------------------------------------------------
5937 // get_rate_estimate
5939 double spx_qpn::get_rate_estimate() {
5941 // dummy method for now
5942 return SPX_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5945 double sgah_qpn::get_rate_estimate() {
5947 // dummy method for now
5948 return SGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5951 double rsgah_qpn::get_rate_estimate() {
5953 // dummy method for now
5954 return RSGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5957 double sgahcwcb_qpn::get_rate_estimate() {
5959 // dummy method for now
5960 return SGAHCWCB_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5963 double mrg_qpn::get_rate_estimate() {
5965 // dummy method for now
5966 return MRG_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5969 double join_eq_hash_qpn::get_rate_estimate() {
5971 // dummy method for now
5972 return JOIN_EQ_HASH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
5976 //////////////////////////////////////////////////////////////////////////////
5977 //////////////////////////////////////////////////////////////////////////////
5978 ///// Generate functors
5983 //-------------------------------------------------------------------------
5984 // Code generation utilities.
5985 //-------------------------------------------------------------------------
5987 // Globals referenced by generate utilities
5989 static gb_table *segen_gb_tbl; // Table of all group-by attributes.
5993 // Generate code that makes reference
5994 // to the tuple, and not to any aggregates.
5995 // NEW : it might reference a stateful function.
5996 static string generate_se_code(scalarexp_t *se,table_list *schema){
5998 data_type *ldt, *rdt;
6000 vector<scalarexp_t *> operands;
6003 switch(se->get_operator_type()){
6005 if(se->is_handle_ref()){
6006 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6010 if(se->get_literal()->is_cpx_lit()){
6011 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6015 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6017 if(se->is_handle_ref()){
6018 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6022 ret.append("param_");
6023 ret.append(se->get_param_name());
6026 ldt = se->get_left_se()->get_data_type();
6027 if(ldt->complex_operator(se->get_op()) ){
6028 ret.append( ldt->get_complex_operator(se->get_op()) );
6030 ret.append(generate_se_code(se->get_left_se(),schema));
6034 ret.append(se->get_op());
6035 ret.append(generate_se_code(se->get_left_se(),schema));
6040 ldt = se->get_left_se()->get_data_type();
6041 rdt = se->get_right_se()->get_data_type();
6043 if(ldt->complex_operator(rdt, se->get_op()) ){
6044 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6046 ret.append(generate_se_code(se->get_left_se(),schema));
6048 ret.append(generate_se_code(se->get_right_se(),schema));
6052 ret.append(generate_se_code(se->get_left_se(),schema));
6053 ret.append(se->get_op());
6054 ret.append(generate_se_code(se->get_right_se(),schema));
6059 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6060 // so return the defining code.
6061 int gref = se->get_gb_ref();
6062 scalarexp_t *gdef_se = segen_gb_tbl->get_def(gref);
6063 ret = generate_se_code(gdef_se, schema );
6066 sprintf(tmpstr,"unpack_var_%s_%d",
6067 se->get_colref()->get_field().c_str(), se->get_colref()->get_tablevar_ref() );
6072 if(se->is_partial()){
6073 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6076 ret += se->op + "(";
6077 operands = se->get_operands();
6078 bool first_elem = true;
6079 if(se->get_storage_state() != ""){
6080 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6083 for(o=0;o<operands.size();o++){
6084 if(first_elem) first_elem=false; else ret += ", ";
6085 if(operands[o]->get_data_type()->is_buffer_type() &&
6086 (! (operands[o]->is_handle_ref()) ) )
6088 ret += generate_se_code(operands[o], schema);
6094 fprintf(stderr,"INTERNAL ERROR in generate_se_code (hfta), line %d, character %d: unknown operator type %d\n",
6095 se->get_lineno(), se->get_charno(),se->get_operator_type());
6096 return("ERROR in generate_se_code");
6100 // generate code that refers only to aggregate data and constants.
6101 // NEW : modified to handle superaggregates and stateful fcn refs.
6102 // Assume that the state is in *stval
6103 static string generate_se_code_fm_aggr(scalarexp_t *se, string gbvar, string aggvar, table_list *schema){
6106 data_type *ldt, *rdt;
6108 vector<scalarexp_t *> operands;
6111 switch(se->get_operator_type()){
6113 if(se->is_handle_ref()){
6114 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6118 if(se->get_literal()->is_cpx_lit()){
6119 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6123 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6125 if(se->is_handle_ref()){
6126 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6130 ret.append("param_");
6131 ret.append(se->get_param_name());
6134 ldt = se->get_left_se()->get_data_type();
6135 if(ldt->complex_operator(se->get_op()) ){
6136 ret.append( ldt->get_complex_operator(se->get_op()) );
6138 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6142 ret.append(se->get_op());
6143 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6148 ldt = se->get_left_se()->get_data_type();
6149 rdt = se->get_right_se()->get_data_type();
6151 if(ldt->complex_operator(rdt, se->get_op()) ){
6152 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6154 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6156 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6160 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6161 ret.append(se->get_op());
6162 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6167 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6168 // so return the defining code.
6169 sprintf(tmpstr,"%s%d",gbvar.c_str(),se->get_gb_ref());
6173 fprintf(stderr,"ERROR reference to non-GB column ref not permitted here,"
6174 "error in query_plan.cc:generate_se_code_fm_aggr, line %d, character %d.\n",
6175 se->get_lineno(), se->get_charno());
6181 if(se->is_superaggr()){
6182 sprintf(tmpstr,"stval->aggr_var%d",se->get_aggr_ref());
6184 sprintf(tmpstr,"%saggr_var%d",aggvar.c_str(),se->get_aggr_ref());
6190 if(se->get_aggr_ref() >= 0){
6191 sprintf(tmpstr,"udaf_ret_%d",se->get_aggr_ref());
6196 if(se->is_partial()){
6197 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6200 ret += se->op + "(";
6201 bool first_elem = true;
6202 if(se->get_storage_state() != ""){
6203 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6206 operands = se->get_operands();
6207 for(o=0;o<operands.size();o++){
6208 if(first_elem) first_elem=false; else ret += ", ";
6209 if(operands[o]->get_data_type()->is_buffer_type() &&
6210 (! (operands[o]->is_handle_ref()) ) )
6212 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6218 fprintf(stderr,"INTERNAL ERROR in query_plan.cc::generate_se_code_fm_aggr, line %d, character %d: unknown operator type %d\n",
6219 se->get_lineno(), se->get_charno(),se->get_operator_type());
6220 return("ERROR in generate_se_code_fm_aggr");
6226 static string unpack_partial_fcn_fm_aggr(scalarexp_t *se, int pfn_id, string gbvar, string aggvar, table_list *schema){
6229 vector<scalarexp_t *> operands;
6232 if(se->get_operator_type() != SE_FUNC){
6233 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn_fm_aggr. line %d, character %d\n",
6234 se->get_lineno(), se->get_charno());
6235 return("ERROR in unpack_partial_fcn_fm_aggr");
6238 ret = "\tretval = " + se->get_op() + "( ",
6239 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6242 if(se->get_storage_state() != ""){
6243 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6246 operands = se->get_operands();
6247 for(o=0;o<operands.size();o++){
6249 if(operands[o]->get_data_type()->is_buffer_type() &&
6250 (! (operands[o]->is_handle_ref()) ) )
6252 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6260 static string unpack_partial_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6263 vector<scalarexp_t *> operands;
6265 if(se->get_operator_type() != SE_FUNC){
6266 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn. line %d, character %d\n",
6267 se->get_lineno(), se->get_charno());
6268 return("ERROR in unpack_partial_fcn");
6271 ret = "\tretval = " + se->get_op() + "( ",
6272 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6275 if(se->get_storage_state() != ""){
6276 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6279 operands = se->get_operands();
6280 for(o=0;o<operands.size();o++){
6282 if(operands[o]->get_data_type()->is_buffer_type() &&
6283 (! (operands[o]->is_handle_ref()) ) )
6285 ret += generate_se_code(operands[o], schema);
6292 static string generate_cached_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6295 vector<scalarexp_t *> operands;
6297 if(se->get_operator_type() != SE_FUNC){
6298 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to generate_cached_fcn. line %d, character %d\n",
6299 se->get_lineno(), se->get_charno());
6300 return("ERROR in generate_cached_fcn");
6303 ret = se->get_op()+"(";
6305 if(se->get_storage_state() != ""){
6306 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd,";
6309 operands = se->get_operands();
6310 for(o=0;o<operands.size();o++){
6312 if(operands[o]->get_data_type()->is_buffer_type() &&
6313 (! (operands[o]->is_handle_ref()) ) )
6315 ret += generate_se_code(operands[o], schema);
6326 static string generate_C_comparison_op(string op){
6327 if(op == "=") return("==");
6328 if(op == "<>") return("!=");
6332 static string generate_C_boolean_op(string op){
6333 if( (op == "AND") || (op == "And") || (op == "and") ){
6336 if( (op == "OR") || (op == "Or") || (op == "or") ){
6339 if( (op == "NOT") || (op == "Not") || (op == "not") ){
6343 return("ERROR UNKNOWN BOOLEAN OPERATOR");
6347 static string generate_predicate_code(predicate_t *pr,table_list *schema){
6349 vector<literal_t *> litv;
6351 data_type *ldt, *rdt;
6352 vector<scalarexp_t *> op_list;
6355 switch(pr->get_operator_type()){
6357 ldt = pr->get_left_se()->get_data_type();
6360 litv = pr->get_lit_vec();
6361 for(i=0;i<litv.size();i++){
6362 if(i>0) ret.append(" || ");
6365 if(ldt->complex_comparison(ldt) ){
6366 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6368 if(ldt->is_buffer_type() )
6370 ret.append(generate_se_code(pr->get_left_se(), schema));
6372 if(ldt->is_buffer_type() )
6374 if(litv[i]->is_cpx_lit()){
6375 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6378 ret.append(litv[i]->to_C_code(""));
6380 ret.append(") == 0");
6382 ret.append(generate_se_code(pr->get_left_se(), schema));
6384 ret.append(litv[i]->to_hfta_C_code(""));
6393 ldt = pr->get_left_se()->get_data_type();
6394 rdt = pr->get_right_se()->get_data_type();
6397 if(ldt->complex_comparison(rdt) ){
6398 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6400 if(ldt->is_buffer_type() )
6402 ret.append(generate_se_code(pr->get_left_se(),schema) );
6404 if(rdt->is_buffer_type() )
6406 ret.append(generate_se_code(pr->get_right_se(),schema) );
6408 ret.append( generate_C_comparison_op(pr->get_op()));
6411 ret.append(generate_se_code(pr->get_left_se(),schema) );
6412 ret.append( generate_C_comparison_op(pr->get_op()));
6413 ret.append(generate_se_code(pr->get_right_se(),schema) );
6419 ret.append( generate_C_boolean_op(pr->get_op()) );
6420 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6423 case PRED_BINARY_OP:
6425 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6426 ret.append( generate_C_boolean_op(pr->get_op()) );
6427 ret.append(generate_predicate_code(pr->get_right_pr(),schema) );
6431 ret += pr->get_op() + "( ";
6432 op_list = pr->get_op_list();
6433 for(o=0;o<op_list.size();++o){
6434 if(o>0) ret += ", ";
6435 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6437 ret += generate_se_code(op_list[o], schema);
6442 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6443 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6444 return("ERROR in generate_predicate_code");
6448 static string generate_predicate_code_fm_aggr(predicate_t *pr, string gbvar, string aggvar,table_list *schema){
6450 vector<literal_t *> litv;
6452 data_type *ldt, *rdt;
6453 vector<scalarexp_t *> op_list;
6456 switch(pr->get_operator_type()){
6458 ldt = pr->get_left_se()->get_data_type();
6461 litv = pr->get_lit_vec();
6462 for(i=0;i<litv.size();i++){
6463 if(i>0) ret.append(" || ");
6466 if(ldt->complex_comparison(ldt) ){
6467 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6469 if(ldt->is_buffer_type() )
6471 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6473 if(ldt->is_buffer_type() )
6475 if(litv[i]->is_cpx_lit()){
6476 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6479 ret.append(litv[i]->to_C_code(""));
6481 ret.append(") == 0");
6483 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6485 ret.append(litv[i]->to_hfta_C_code(""));
6494 ldt = pr->get_left_se()->get_data_type();
6495 rdt = pr->get_right_se()->get_data_type();
6498 if(ldt->complex_comparison(rdt) ){
6499 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6501 if(ldt->is_buffer_type() )
6503 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6505 if(rdt->is_buffer_type() )
6507 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6509 ret.append( generate_C_comparison_op(pr->get_op()));
6512 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6513 ret.append( generate_C_comparison_op(pr->get_op()));
6514 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6520 ret.append( generate_C_boolean_op(pr->get_op()) );
6521 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6524 case PRED_BINARY_OP:
6526 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6527 ret.append( generate_C_boolean_op(pr->get_op()) );
6528 ret.append(generate_predicate_code_fm_aggr(pr->get_right_pr(), gbvar, aggvar,schema) );
6532 ret += pr->get_op() + "( ";
6533 op_list = pr->get_op_list();
6534 for(o=0;o<op_list.size();++o){
6535 if(o>0) ret += ", ";
6536 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6538 ret += generate_se_code_fm_aggr(op_list[o], gbvar, aggvar, schema);
6543 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6544 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6545 return("ERROR in generate_predicate_code");
6553 static string generate_equality_test(string &lhs_op, string &rhs_op, data_type *dt){
6556 if(dt->complex_comparison(dt) ){
6557 ret.append(dt->get_hfta_comparison_fcn(dt));
6559 if(dt->is_buffer_type() )
6563 if(dt->is_buffer_type() )
6565 ret.append(rhs_op );
6566 ret.append(") == 0");
6568 ret.append(lhs_op );
6570 ret.append(rhs_op );
6576 static string generate_comparison(string &lhs_op, string &rhs_op, data_type *dt){
6579 if(dt->complex_comparison(dt) ){
6580 ret.append(dt->get_hfta_comparison_fcn(dt));
6582 if(dt->is_buffer_type() )
6586 if(dt->is_buffer_type() )
6588 ret.append(rhs_op );
6589 ret.append(") == 0");
6591 ret.append(lhs_op );
6593 ret.append(rhs_op );
6600 // Here I assume that only MIN and MAX aggregates can be computed
6601 // over BUFFER data types.
6603 static string generate_aggr_update(string var, aggregate_table *atbl,int aidx, table_list *schema){
6604 string retval = "\t\t";
6605 string op = atbl->get_op(aidx);
6608 if(! atbl->is_builtin(aidx)) {
6610 retval += op+"_HFTA_AGGR_UPDATE_(";
6611 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6612 retval+="("+var+")";
6613 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
6614 for(o=0;o<opl.size();++o){{
6616 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
6618 retval += generate_se_code(opl[o], schema);
6627 // builtin processing
6628 data_type *dt = atbl->get_data_type(aidx);
6632 retval.append("++;\n");
6637 retval.append(" += ");
6638 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6639 retval.append(";\n");
6643 sprintf(tmpstr,"aggr_tmp_%d",aidx);
6644 retval += dt->make_host_cvar(tmpstr);
6646 retval += generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
6647 if(dt->complex_comparison(dt)){
6648 if(dt->is_buffer_type())
6649 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6651 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6653 sprintf(tmpstr,"\t\tif(aggr_tmp_%d < %s)\n",aidx,var.c_str());
6655 retval.append(tmpstr);
6656 if(dt->is_buffer_type()){
6657 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
6659 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
6661 retval.append(tmpstr);
6666 sprintf(tmpstr,"aggr_tmp_%d",aidx);
6667 retval+=dt->make_host_cvar(tmpstr);
6669 retval+=generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
6670 if(dt->complex_comparison(dt)){
6671 if(dt->is_buffer_type())
6672 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6674 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
6676 sprintf(tmpstr,"\t\tif(aggr_tmp_%d > %s)\n",aidx,var.c_str());
6678 retval.append(tmpstr);
6679 if(dt->is_buffer_type()){
6680 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
6682 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
6684 retval.append(tmpstr);
6689 if(op == "AND_AGGR"){
6691 retval.append(" &= ");
6692 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6693 retval.append(";\n");
6696 if(op == "OR_AGGR"){
6698 retval.append(" |= ");
6699 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6700 retval.append(";\n");
6703 if(op == "XOR_AGGR"){
6705 retval.append(" ^= ");
6706 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
6707 retval.append(";\n");
6711 retval += var+"_sum += "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
6712 retval += "\t\t"+var+"_cnt += 1;\n";
6713 retval += "\t\t"+var+" = "+var+"_sum / "+var+"_cnt;\n";
6717 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_update.\n",op.c_str());
6726 static string generate_superaggr_minus(string var, string supervar, aggregate_table *atbl,int aidx, table_list *schema){
6727 string retval = "\t\t";
6728 string op = atbl->get_op(aidx);
6731 if(! atbl->is_builtin(aidx)) {
6733 retval += op+"_HFTA_AGGR_MINUS_(";
6734 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6735 retval+="("+supervar+"),";
6736 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6737 retval+="("+var+");\n";
6743 if(op == "COUNT" || op == "SUM"){
6744 retval += supervar + "-=" +var + ";\n";
6748 if(op == "XOR_AGGR"){
6749 retval += supervar + "^=" +var + ";\n";
6753 if(op=="MIN" || op == "MAX")
6756 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_superaggr_minus.\n",op.c_str());
6765 static string generate_aggr_init(string var, aggregate_table *atbl,int aidx, table_list *schema){
6767 string op = atbl->get_op(aidx);
6770 if(! atbl->is_builtin(aidx)){
6772 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_INIT_(";
6773 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6774 retval+="("+var+"));\n";
6776 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_UPDATE_(";
6777 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6778 retval+="("+var+")";
6779 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
6781 for(o=0;o<opl.size();++o){
6783 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
6785 retval += generate_se_code(opl[o],schema);
6791 // builtin aggregate processing
6792 data_type *dt = atbl->get_data_type(aidx);
6796 retval.append(" = 1;\n");
6800 if(op == "SUM" || op == "MIN" || op == "MAX" || op == "AND_AGGR" ||
6801 op=="AVG" || op == "OR_AGGR" || op == "XOR_AGGR"){
6802 if(dt->is_buffer_type()){
6803 sprintf(tmpstr,"\t\taggr_tmp_%d = %s;\n",aidx,generate_se_code(atbl->get_aggr_se(aidx), schema ).c_str() );
6804 retval.append(tmpstr);
6805 sprintf(tmpstr,"\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_assign_copy().c_str(),var.c_str(),aidx);
6806 retval.append(tmpstr);
6809 retval += var+"_sum = "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
6810 retval += "\t"+var+"_cnt = 1;\n";
6811 retval += "\t"+var+" = "+var+"_sum;\n";
6815 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema));
6816 retval.append(";\n");
6822 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_init.\n",op.c_str());
6830 static string generate_aggr_reinitialize(string var, aggregate_table *atbl,int aidx, table_list *schema){
6832 string op = atbl->get_op(aidx);
6835 if(! atbl->is_builtin(aidx)){
6837 retval += "\t"+atbl->get_op(aidx);
6838 if(atbl->is_running_aggr(aidx)){
6839 retval += "_HFTA_AGGR_REINIT_(";
6841 retval += "_HFTA_AGGR_INIT_(";
6843 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
6844 retval+="("+var+"));\n";
6848 // builtin aggregate processing
6849 data_type *dt = atbl->get_data_type(aidx);
6853 retval.append(" = 0;\n");
6857 if(op == "SUM" || op == "AND_AGGR" ||
6858 op == "OR_AGGR" || op == "XOR_AGGR"){
6859 if(dt->is_buffer_type()){
6860 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6864 literal_t l(dt->type_indicator());
6865 retval.append(l.to_string());
6866 retval.append(";\n");
6872 if(dt->is_buffer_type()){
6873 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6877 retval.append(dt->get_max_literal());
6878 retval.append(";\n");
6884 if(dt->is_buffer_type()){
6885 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
6889 retval.append(dt->get_min_literal());
6890 retval.append(";\n");
6895 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_aggr_reinitialize.\n",op.c_str());
6902 // Generate parameter holding vars from a param table.
6903 static string generate_param_vars(param_table *param_tbl){
6906 vector<string> param_vec = param_tbl->get_param_names();
6907 for(p=0;p<param_vec.size();p++){
6908 data_type *dt = param_tbl->get_data_type(param_vec[p]);
6909 sprintf(tmpstr,"param_%s;\n", param_vec[p].c_str());
6910 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
6911 if(param_tbl->handle_access(param_vec[p])){
6912 ret += "\tstruct search_handle *param_handle_"+param_vec[p]+";\n";
6918 // Parameter manipulation routines
6919 static string generate_load_param_block(string functor_name,
6920 param_table *param_tbl,
6921 vector<handle_param_tbl_entry *> param_handle_table
6924 vector<string> param_names = param_tbl->get_param_names();
6926 string ret = "int load_params_"+functor_name+"(gs_int32_t sz, void *value){\n";
6927 ret.append("\tint pos=0;\n");
6928 ret.append("\tint data_pos;\n");
6930 for(p=0;p<param_names.size();p++){
6931 data_type *dt = param_tbl->get_data_type(param_names[p]);
6932 if(dt->is_buffer_type()){
6933 sprintf(tmpstr,"tmp_var_%s;\n", param_names[p].c_str());
6934 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
6939 // Verify that the block is of minimum size
6940 if(param_names.size() > 0){
6941 ret += "//\tVerify that the value block is large enough */\n";
6942 ret.append("\n\tdata_pos = ");
6943 for(p=0;p<param_names.size();p++){
6944 if(p>0) ret.append(" + ");
6945 data_type *dt = param_tbl->get_data_type(param_names[p]);
6946 ret.append("sizeof( ");
6947 ret.append( dt->get_host_cvar_type() );
6951 ret.append("\tif(data_pos > sz) return 1;\n\n");
6954 ///////////////////////
6955 /// Verify that all strings can be unpacked.
6957 ret += "//\tVerify that the strings can be unpacked */\n";
6958 for(p=0;p<param_names.size();p++){
6959 data_type *dt = param_tbl->get_data_type(param_names[p]);
6960 if(dt->is_buffer_type()){
6961 sprintf(tmpstr,"\ttmp_var_%s = *( (%s *)((gs_sp_t )value+pos) );\n",param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
6963 switch( dt->get_type() ){
6965 // ret += "\ttmp_var_"+param_names[p]+".offset = ntohl( tmp_var_"+param_names[p]+".offset );\n"; // ntoh conversion
6966 // ret += "\ttmp_var_"+param_names[p]+".length = ntohl( tmp_var_"+param_names[p]+".length );\n"; // ntoh conversion
6967 sprintf(tmpstr,"\tif( (int)(tmp_var_%s.offset) + tmp_var_%s.length > sz) return 1;\n",param_names[p].c_str(), param_names[p].c_str() );
6969 sprintf(tmpstr,"\ttmp_var_%s.offset = (gs_p_t)( (gs_sp_t )value + (gs_p_t)(tmp_var_%s.offset) );\n",param_names[p].c_str(), param_names[p].c_str() );
6973 fprintf(stderr,"ERROR: parameter %s is of type %s, a buffered type, but I don't know how to unpack it as a parameter.\n",param_names[p].c_str(), dt->to_string().c_str() );
6978 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
6982 /////////////////////////
6984 ret += "/*\tThe block is OK, do the unpacking. */\n";
6985 ret += "\tpos = 0;\n";
6987 for(p=0;p<param_names.size();p++){
6988 data_type *dt = param_tbl->get_data_type(param_names[p]);
6989 if(dt->is_buffer_type()){
6990 sprintf(tmpstr,"\t%s(¶m_%s, &tmp_var_%s);\n", dt->get_hfta_buffer_assign_copy().c_str(),param_names[p].c_str(),param_names[p].c_str() );
6993 // if(dt->needs_hn_translation()){
6994 // sprintf(tmpstr,"\tparam_%s = %s( *( (%s *)( (gs_sp_t )value+pos) ) );\n",
6995 // param_names[p].c_str(), dt->ntoh_translation().c_str(), dt->get_host_cvar_type().c_str() );
6997 sprintf(tmpstr,"\tparam_%s = *( (%s *)( (gs_sp_t )value+pos) );\n",
6998 param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7002 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7005 // TODO: I think this method of handle registration is obsolete
7006 // and should be deleted.
7007 // some examination reveals that handle_access is always false.
7008 for(p=0;p<param_names.size();p++){
7009 if(param_tbl->handle_access(param_names[p]) ){
7010 data_type *pdt = param_tbl->get_data_type(param_names[p]);
7012 ret += "\tt->param_handle_"+param_names[p]+" = " +
7013 pdt->handle_registration_name() +
7014 "((struct FTA *)t, &(t->param_"+param_names[p]+"));\n";
7017 // Register the pass-by-handle parameters
7019 ret += "/* register the pass-by-handle parameters */\n";
7022 for(ph=0;ph<param_handle_table.size();++ph){
7023 data_type pdt(param_handle_table[ph]->type_name);
7024 switch(param_handle_table[ph]->val_type){
7030 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7032 if(pdt.is_buffer_type()) ret += "&(";
7033 ret += "param_"+param_handle_table[ph]->param_name;
7034 if(pdt.is_buffer_type()) ret += ")";
7038 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7044 ret += "\treturn(0);\n";
7045 ret.append("}\n\n");
7051 static string generate_delete_param_block(string functor_name,
7052 param_table *param_tbl,
7053 vector<handle_param_tbl_entry *> param_handle_table
7057 vector<string> param_names = param_tbl->get_param_names();
7059 string ret = "void destroy_params_"+functor_name+"(){\n";
7061 for(p=0;p<param_names.size();p++){
7062 data_type *dt = param_tbl->get_data_type(param_names[p]);
7063 if(dt->is_buffer_type()){
7064 sprintf(tmpstr,"\t\t%s(¶m_%s);\n",dt->get_hfta_buffer_destroy().c_str(),param_names[p].c_str());
7067 if(param_tbl->handle_access(param_names[p]) ){
7068 ret += "\t\t" + dt->get_handle_destructor() +
7069 "(t->param_handle_" + param_names[p] + ");\n";
7073 ret += "//\t\tDeregister handles.\n";
7075 for(ph=0;ph<param_handle_table.size();++ph){
7076 if(param_handle_table[ph]->val_type == param_e){
7077 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7078 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7087 // ---------------------------------------------------------------------
7088 // functions for creating functor variables.
7090 static string generate_access_vars(col_id_set &cid_set, table_list *schema){
7092 col_id_set::iterator csi;
7094 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7095 int schref = (*csi).schema_ref;
7096 int tblref = (*csi).tblvar_ref;
7097 string field = (*csi).field;
7098 data_type dt(schema->get_type_name(schref,field));
7099 sprintf(tmpstr,"unpack_var_%s_%d", field.c_str(), tblref);
7100 ret+="\t"+dt.make_host_cvar(tmpstr)+";\n";
7101 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", field.c_str(), tblref);
7107 static string generate_partial_fcn_vars(vector<scalarexp_t *> &partial_fcns,
7108 vector<int> &ref_cnt, vector<bool> &is_partial, bool gen_fcn_cache){
7113 for(p=0;p<partial_fcns.size();++p){
7114 if(!gen_fcn_cache || is_partial[p] || ref_cnt[p]>1){
7115 sprintf(tmpstr,"partial_fcn_result_%d", p);
7116 ret+="\t"+partial_fcns[p]->get_data_type()->make_host_cvar(tmpstr)+";\n";
7117 if(gen_fcn_cache && ref_cnt[p]>1){
7118 ret+="\tint fcn_ref_cnt_"+int_to_string(p)+";\n";
7126 static string generate_complex_lit_vars(cplx_lit_table *complex_literals){
7129 for(cl=0;cl<complex_literals->size();cl++){
7130 literal_t *l = complex_literals->get_literal(cl);
7131 data_type *dtl = new data_type( l->get_type() );
7132 sprintf(tmpstr,"complex_literal_%d",cl);
7133 ret += "\t"+dtl->make_host_cvar(tmpstr)+";\n";
7134 if(complex_literals->is_handle_ref(cl)){
7135 sprintf(tmpstr,"\tstruct search_handle *lit_handle_%d;\n",cl);
7143 static string generate_pass_by_handle_vars(
7144 vector<handle_param_tbl_entry *> ¶m_handle_table){
7148 for(p=0;p<param_handle_table.size();++p){
7149 sprintf(tmpstr,"\tgs_param_handle_t handle_param_%d;\n",p);
7157 // ------------------------------------------------------------
7158 // functions for generating initialization code.
7160 static string gen_access_var_init(col_id_set &cid_set){
7162 col_id_set::iterator csi;
7164 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7165 int tblref = (*csi).tblvar_ref;
7166 string field = (*csi).field;
7167 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle%d, \"%s\");\n", field.c_str(),tblref,tblref,field.c_str());
7174 static string gen_complex_lit_init(cplx_lit_table *complex_literals){
7178 for(cl=0;cl<complex_literals->size();cl++){
7179 literal_t *l = complex_literals->get_literal(cl);
7180 // sprintf(tmpstr,"\tcomplex_literal_%d = ",cl);
7181 // ret += tmpstr + l->to_hfta_C_code() + ";\n";
7182 sprintf(tmpstr,"&(complex_literal_%d)",cl);
7183 ret += "\t" + l->to_hfta_C_code(tmpstr) + ";\n";
7184 // I think that the code below is obsolete
7185 // TODO: it is obsolete. add_cpx_lit is always
7186 // called with the handle indicator being false.
7187 // This entire structure should be cleansed.
7188 if(complex_literals->is_handle_ref(cl)){
7189 data_type *dt = new data_type( l->get_type() );
7190 sprintf(tmpstr,"\tlit_handle_%d = %s(&(f->complex_literal_%d));\n",
7191 cl, dt->hfta_handle_registration_name().c_str(), cl);
7200 static string gen_partial_fcn_init(vector<scalarexp_t *> &partial_fcns){
7204 for(p=0;p<partial_fcns.size();++p){
7205 data_type *pdt =partial_fcns[p]->get_data_type();
7206 literal_t empty_lit(pdt->type_indicator());
7207 if(pdt->is_buffer_type()){
7208 // sprintf(tmpstr,"\tpartial_fcn_result_%d = %s;\n",
7209 // p, empty_lit.to_hfta_C_code().c_str());
7210 sprintf(tmpstr,"&(partial_fcn_result_%d)",p);
7211 ret += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
7217 static string gen_pass_by_handle_init(
7218 vector<handle_param_tbl_entry *> ¶m_handle_table){
7222 for(ph=0;ph<param_handle_table.size();++ph){
7223 data_type pdt(param_handle_table[ph]->type_name);
7224 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7225 switch(param_handle_table[ph]->val_type){
7228 if(pdt.is_buffer_type()) ret += "&(";
7229 sprintf(tmpstr,"complex_literal_%d",param_handle_table[ph]->complex_literal_idx);
7231 if(pdt.is_buffer_type()) ret += ")";
7236 ret += param_handle_table[ph]->litval->to_hfta_C_code("") + ");\n";
7240 // query parameter handles are regstered/deregistered in the
7241 // load_params function.
7242 // ret += "t->param_"+param_handle_table[ph]->param_name;
7245 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7252 //------------------------------------------------------------
7253 // functions for destructor and deregistration code
7255 static string gen_complex_lit_dtr(cplx_lit_table *complex_literals){
7259 for(cl=0;cl<complex_literals->size();cl++){
7260 literal_t *l = complex_literals->get_literal(cl);
7261 data_type ldt( l->get_type() );
7262 if(ldt.is_buffer_type()){
7263 sprintf(tmpstr,"\t\t%s(&complex_literal_%d);\n",
7264 ldt.get_hfta_buffer_destroy().c_str(), cl );
7272 static string gen_pass_by_handle_dtr(
7273 vector<handle_param_tbl_entry *> ¶m_handle_table){
7277 for(ph=0;ph<param_handle_table.size();++ph){
7278 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7279 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7285 // Destroy all previous results
7286 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns){
7290 for(p=0;p<partial_fcns.size();++p){
7291 data_type *pdt =partial_fcns[p]->get_data_type();
7292 if(pdt->is_buffer_type()){
7293 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7294 pdt->get_hfta_buffer_destroy().c_str(), p );
7301 // Destroy previsou results of fcns in pfcn_set
7302 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns, set<int> &pfcn_set){
7304 set<int>::iterator si;
7306 for(si=pfcn_set.begin(); si!=pfcn_set.end(); ++si){
7307 data_type *pdt =partial_fcns[(*si)]->get_data_type();
7308 if(pdt->is_buffer_type()){
7309 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7310 pdt->get_hfta_buffer_destroy().c_str(), (*si) );
7318 //-------------------------------------------------------------------------
7319 // Functions related to se generation bookkeeping.
7321 static void get_new_pred_cids(predicate_t *pr, col_id_set &found_cids,
7322 col_id_set &new_cids, gb_table *gtbl){
7323 col_id_set this_pred_cids;
7324 col_id_set::iterator csi;
7326 // get colrefs in predicate not already found.
7327 gather_pr_col_ids(pr,this_pred_cids,gtbl);
7328 set_difference(this_pred_cids.begin(), this_pred_cids.end(),
7329 found_cids.begin(), found_cids.end(),
7330 inserter(new_cids,new_cids.begin()) );
7332 // We've found these cids, so update found_cids
7333 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7334 found_cids.insert((*csi));
7338 // after the call, new_cids will have the colrefs in se but not found_cids.
7339 // update found_cids with the new cids.
7340 static void get_new_se_cids(scalarexp_t *se, col_id_set &found_cids,
7341 col_id_set &new_cids, gb_table *gtbl){
7342 col_id_set this_se_cids;
7343 col_id_set::iterator csi;
7345 // get colrefs in se not already found.
7346 gather_se_col_ids(se,this_se_cids,gtbl);
7347 set_difference(this_se_cids.begin(), this_se_cids.end(),
7348 found_cids.begin(), found_cids.end(),
7349 inserter(new_cids,new_cids.begin()) );
7351 // We've found these cids, so update found_cids
7352 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7353 found_cids.insert((*csi));
7357 static string gen_unpack_cids(table_list *schema, col_id_set &new_cids, string on_problem, vector<bool> &needs_xform){
7359 col_id_set::iterator csi;
7361 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7362 int schref = (*csi).schema_ref;
7363 int tblref = (*csi).tblvar_ref;
7364 string field = (*csi).field;
7365 data_type dt(schema->get_type_name(schref,field));
7367 if(needs_xform[tblref]){
7368 unpack_fcn = dt.get_hfta_unpack_fcn();
7370 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7372 if(dt.is_buffer_type()){
7373 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7375 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7378 if(dt.is_buffer_type()){
7379 ret += "\tif(problem) return "+on_problem+" ;\n";
7385 // generates the declaration of all the variables related to
7386 // temp tuples generation
7387 static string gen_decl_temp_vars(){
7390 ret += "\t// variables related to temp tuple generation\n";
7391 ret += "\tbool temp_tuple_received;\n";
7396 // generates initialization code for variables related to temp tuple processing
7397 static string gen_init_temp_vars(table_list *schema, vector<select_element *>& select_list, gb_table *gtbl){
7399 col_id_set::iterator csi;
7402 // Initialize internal state
7403 ret += "\ttemp_tuple_received = false;\n";
7405 col_id_set temp_cids; // colrefs unpacked thus far.
7407 for(s=0;s<select_list.size();s++){
7408 if (select_list[s]->se->get_data_type()->is_temporal()) {
7409 // Find the set of attributes accessed in this SE
7410 col_id_set new_cids;
7411 get_new_se_cids(select_list[s]->se,temp_cids, new_cids, gtbl);
7414 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7415 int schref = (*csi).schema_ref;
7416 int tblref = (*csi).tblvar_ref;
7417 string field = (*csi).field;
7418 data_type dt(schema->get_type_name(schref,field), schema->get_modifier_list(schref,field));
7420 sprintf(tmpstr,"\t unpack_var_%s_%d = %s;\n", field.c_str(), tblref,
7421 dt.is_increasing() ? dt.get_min_literal().c_str() : dt.get_max_literal().c_str());
7431 // generates a check if tuple is temporal
7432 static string gen_temp_tuple_check(string node_name, int channel) {
7436 sprintf(tmpstr, "tup%d", channel);
7437 string tup_name = tmpstr;
7438 sprintf(tmpstr, "schema_handle%d", channel);
7439 string schema_handle_name = tmpstr;
7440 string tuple_offset_name = "tuple_metadata_offset"+int_to_string(channel);
7442 // check if it is a temporary status tuple
7443 ret += "\t// check if tuple is temp status tuple\n";
7444 // ret += "\tif (ftaschema_is_temporal_tuple(" + schema_handle_name + ", " + tup_name + ".data)) {\n";
7445 ret += "\tif (ftaschema_is_temporal_tuple_offset(" + tuple_offset_name + ", " + tup_name + ".data)) {\n";
7446 ret += "\t\ttemp_tuple_received = true;\n";
7448 ret += "\telse\n\t\ttemp_tuple_received = false;\n\n";
7453 // generates unpacking code for all temporal attributes referenced in select
7454 static string gen_unpack_temp_vars(table_list *schema, col_id_set& found_cids, vector<select_element *>& select_list, gb_table *gtbl, vector<bool> &needs_xform) {
7458 // Unpack all the temporal attributes references in select list
7459 // we need it to be able to generate temp status tuples
7460 for(s=0;s<select_list.size();s++){
7461 if (select_list[s]->se->get_data_type()->is_temporal()) {
7462 // Find the set of attributes accessed in this SE
7463 col_id_set new_cids;
7464 get_new_se_cids(select_list[s]->se,found_cids, new_cids, gtbl);
7465 // Unpack these values.
7466 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
7474 // Generates temporal tuple generation code (except attribute packing)
7475 static string gen_init_temp_status_tuple(string node_name) {
7478 ret += "\t// create temp status tuple\n";
7479 ret += "\tresult.tuple_size = sizeof("+generate_tuple_name( node_name)+") + sizeof(gs_uint8_t);\n";
7480 ret += "\tresult.data = (gs_sp_t )malloc(result.tuple_size);\n";
7481 ret += "\tresult.heap_resident = true;\n";
7482 ret += "\t// Mark tuple as temporal\n";
7483 ret += "\t*((gs_sp_t )result.data + sizeof("+generate_tuple_name( node_name)+")) = TEMPORAL_TUPLE;\n";
7485 ret += "\t"+generate_tuple_name( node_name)+" *tuple = ("+
7486 generate_tuple_name( node_name) +" *)(result.data);\n";
7492 // Assume that all colrefs unpacked already ...
7493 static string gen_unpack_partial_fcn(table_list *schema,
7494 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7497 set<int>::iterator si;
7499 // Since set<..> is a "Sorted Associative Container",
7500 // we can walk through it in sorted order by walking from
7501 // begin() to end(). (and the partial fcns must be
7502 // evaluated in this order).
7503 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7504 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7505 ret += "\tif(retval) return "+on_problem+" ;\n";
7510 // Assume that all colrefs unpacked already ...
7511 // this time with cached functions.
7512 static string gen_unpack_partial_fcn(table_list *schema,
7513 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7514 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
7517 set<int>::iterator si;
7519 // Since set<..> is a "Sorted Associative Container",
7520 // we can walk through it in sorted order by walking from
7521 // begin() to end(). (and the partial fcns must be
7522 // evaluated in this order).
7523 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7524 if(fcn_ref_cnt[(*si)] > 1){
7525 ret += "\tif(fcn_ref_cnt_"+int_to_string((*si))+"==0){\n";
7527 if(is_partial_fcn[(*si)]){
7528 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7529 ret += "\tif(retval) return "+on_problem+" ;\n";
7531 if(fcn_ref_cnt[(*si)] > 1){
7532 if(!is_partial_fcn[(*si)]){
7533 ret += "\t\tpartial_fcn_result_"+int_to_string((*si))+"="+generate_cached_fcn(partial_fcns[(*si)],(*si),schema)+";\n";
7535 ret += "\t\tfcn_ref_cnt_"+int_to_string((*si))+"=1;\n";
7544 // This version finds and unpacks new colrefs.
7545 // found_cids gets updated with the newly unpacked cids.
7546 static string gen_full_unpack_partial_fcn(table_list *schema,
7547 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7548 col_id_set &found_cids, gb_table *gtbl, string on_problem,
7549 vector<bool> &needs_xform){
7551 set<int>::iterator slsi;
7553 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7554 // find all new fields ref'd by this partial fcn.
7555 col_id_set new_cids;
7556 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
7557 // Unpack these values.
7558 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
7560 // Now evaluate the partial fcn.
7561 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
7562 ret += "\tif(retval) return "+on_problem+" ;\n";
7567 // This version finds and unpacks new colrefs.
7568 // found_cids gets updated with the newly unpacked cids.
7569 // BUT : only for the partial functions.
7570 static string gen_full_unpack_partial_fcn(table_list *schema,
7571 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7572 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
7573 col_id_set &found_cids, gb_table *gtbl, string on_problem,
7574 vector<bool> &needs_xform){
7576 set<int>::iterator slsi;
7578 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7579 if(is_partial_fcn[(*slsi)]){
7580 // find all new fields ref'd by this partial fcn.
7581 col_id_set new_cids;
7582 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
7583 // Unpack these values.
7584 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
7586 // Now evaluate the partial fcn.
7587 if(fcn_ref_cnt[(*slsi)] > 1){
7588 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
7590 if(is_partial_fcn[(*slsi)]){
7591 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
7592 ret += "\tif(retval) return "+on_problem+" ;\n";
7594 if(fcn_ref_cnt[(*slsi)] > 1){
7595 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
7604 static string gen_remaining_cached_fcns(table_list *schema,
7605 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7606 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn){
7608 set<int>::iterator slsi;
7610 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
7611 if(!is_partial_fcn[(*slsi)] && fcn_ref_cnt[(*slsi)] > 1){
7613 if(fcn_ref_cnt[(*slsi)] > 1){
7614 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
7615 ret += "\t\tpartial_fcn_result_"+int_to_string((*slsi))+"="+generate_cached_fcn(partial_fcns[(*slsi)],(*slsi),schema)+";\n";
7616 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
7625 // unpack the colrefs in cid_set not in found_cids
7626 static string gen_remaining_colrefs(table_list *schema,
7627 col_id_set &cid_set, col_id_set &found_cids, string on_problem,
7628 vector<bool> &needs_xform){
7630 col_id_set::iterator csi;
7632 for(csi=cid_set.begin(); csi!=cid_set.end();csi++){
7633 if(found_cids.count( (*csi) ) == 0){
7634 int schref = (*csi).schema_ref;
7635 int tblref = (*csi).tblvar_ref;
7636 string field = (*csi).field;
7637 data_type dt(schema->get_type_name(schref,field));
7639 if(needs_xform[tblref]){
7640 unpack_fcn = dt.get_hfta_unpack_fcn();
7642 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7644 if(dt.is_buffer_type()){
7645 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7647 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7650 if(dt.is_buffer_type()){
7651 ret.append("\tif(problem) return "+on_problem+" ;\n");
7658 static string gen_buffer_selvars(table_list *schema,
7659 vector<select_element *> &select_list){
7663 for(s=0;s<select_list.size();s++){
7664 scalarexp_t *se = select_list[s]->se;
7665 data_type *sdt = se->get_data_type();
7666 if(sdt->is_buffer_type() &&
7667 !( (se->get_operator_type() == SE_COLREF) ||
7668 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7669 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7671 sprintf(tmpstr,"selvar_%d",s);
7672 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
7673 ret += generate_se_code(se,schema) +";\n";
7679 static string gen_buffer_selvars_size(vector<select_element *> &select_list,table_list *schema){
7683 for(s=0;s<select_list.size();s++){
7684 scalarexp_t *se = select_list[s]->se;
7685 data_type *sdt = se->get_data_type();
7686 if(sdt->is_buffer_type()){
7687 if( !( (se->get_operator_type() == SE_COLREF) ||
7688 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7689 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7691 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
7694 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),
7695 generate_se_code(se,schema).c_str());
7703 static string gen_buffer_selvars_dtr(vector<select_element *> &select_list){
7707 for(s=0;s<select_list.size();s++){
7708 scalarexp_t *se = select_list[s]->se;
7709 data_type *sdt = se->get_data_type();
7710 if(sdt->is_buffer_type() &&
7711 !( (se->get_operator_type() == SE_COLREF) ||
7712 (se->get_operator_type() == SE_AGGR_STAR) ||
7713 (se->get_operator_type() == SE_AGGR_SE) ||
7714 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
7715 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
7717 sprintf(tmpstr,"\t\t%s(&selvar_%d);\n",
7718 sdt->get_hfta_buffer_destroy().c_str(), s );
7726 static string gen_pack_tuple(table_list *schema, vector<select_element *> &select_list, string node_name, bool temporal_only){
7730 ret += "\tint tuple_pos = sizeof("+generate_tuple_name(node_name)+") + sizeof(gs_uint8_t);\n";
7731 for(s=0;s<select_list.size();s++){
7732 scalarexp_t *se = select_list[s]->se;
7733 data_type *sdt = se->get_data_type();
7735 if(!temporal_only && sdt->is_buffer_type()){
7736 if( !( (se->get_operator_type() == SE_COLREF) ||
7737 (se->get_operator_type() == SE_FUNC && se->is_partial()))
7739 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
7741 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
7744 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code(se,schema).c_str());
7746 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code(se,schema).c_str());
7749 }else if (!temporal_only || sdt->is_temporal()) {
7750 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
7752 ret.append(generate_se_code(se,schema) );
7760 //-------------------------------------------------------------------------
7761 // functor generation methods
7762 //-------------------------------------------------------------------------
7764 /////////////////////////////////////////////////////////
7765 //// File Output Operator
7766 string output_file_qpn::generate_functor_name(){
7767 return("output_file_functor_" + normalize_name(get_node_name()));
7771 string output_file_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
7772 string ret = "class " + this->generate_functor_name() + "{\n";
7774 // Find the temporal field
7775 int temporal_field_idx;
7776 data_type *tdt = NULL;
7777 for(temporal_field_idx=0;temporal_field_idx<fields.size();temporal_field_idx++){
7778 tdt = new data_type(fields[temporal_field_idx]->get_type(), fields[temporal_field_idx]->get_modifier_list());
7779 if(tdt->is_temporal()){
7786 if(temporal_field_idx == fields.size()){
7787 fprintf(stderr,"ERROR, no temporal field for file output operator %s\n",node_name.c_str());
7791 ret += "private:\n";
7793 // var to save the schema handle
7794 ret += "\tint schema_handle0;\n";
7795 // tuple metadata offset
7796 ret += "\tint tuple_metadata_offset0;\n";
7797 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[temporal_field_idx]->get_name().c_str());
7800 // For unpacking the hashing fields, if any
7802 for(h=0;h<hash_flds.size();++h){
7803 sprintf(tmpstr,"unpack_var_%s", fields[hash_flds[h]]->get_name().c_str());
7804 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7805 ret+="\t"+hdt->make_host_cvar(tmpstr)+";\n";
7806 if(hash_flds[h]!=temporal_field_idx){
7807 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[hash_flds[h]]->get_name().c_str());
7811 // Specail case for output file hashing
7812 if(n_streams>1 && hash_flds.size()==0){
7813 ret+="\tgs_uint32_t outfl_cnt;\n";
7816 ret += "//\t\tRemember the last posted timestamp.\n";
7817 ret+="\t"+tdt->make_host_cvar("timestamp")+";\n";
7818 ret+="\t"+tdt->make_host_cvar("last_bucket")+";\n";
7819 ret+="\t"+tdt->make_host_cvar("slack")+";\n";
7820 ret += "\tbool first_execution;\n";
7821 ret += "\tbool temp_tuple_received;\n";
7822 ret += "\tbool is_eof;\n";
7824 ret += "\tgs_int32_t bucketwidth;\n";
7827 //-------------------
7828 // The functor constructor
7829 // pass in a schema handle (e.g. for the 1st input stream),
7830 // use it to determine how to unpack the merge variable.
7831 // ASSUME that both streams have the same layout,
7832 // just duplicate it.
7835 ret += "//\t\tFunctor constructor.\n";
7836 ret += this->generate_functor_name()+"(int schema_hndl){\n";
7838 ret += "\tschema_handle0 = schema_hndl;\n";
7839 // tuple metadata offset
7840 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
7842 if(output_spec->bucketwidth == 0)
7843 ret += "\tbucketwidth = 60;\n";
7845 ret += "\tbucketwidth = "+int_to_string(output_spec->bucketwidth)+";\n";
7846 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
7848 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[temporal_field_idx]->get_name().c_str(), fields[temporal_field_idx]->get_name().c_str());
7850 // Hashing field unpacking, if any
7851 for(h=0;h<hash_flds.size();++h){
7852 if(hash_flds[h]!=temporal_field_idx){
7853 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[hash_flds[h]]->get_name().c_str(),fields[hash_flds[h]]->get_name().c_str());
7858 ret+="\tfirst_execution = true;\n";
7860 // Initialize internal state
7861 ret += "\ttemp_tuple_received = false;\n";
7863 // Init last timestamp values to minimum value for their type
7864 if (tdt->is_increasing()){
7865 ret+="\ttimestamp = " + tdt->get_min_literal() + ";\n";
7866 ret+="\tlast_bucket = " + tdt->get_min_literal() + ";\n";
7868 ret+="\ttimestamp = " + tdt->get_max_literal() + ";\n";
7869 ret+="\tlast_bucket = " + tdt->get_max_literal() + ";\n";
7875 ret += "//\t\tFunctor destructor.\n";
7876 ret += "~"+this->generate_functor_name()+"(){\n";
7880 ret += "int load_params_"+this->generate_functor_name()+"(gs_int32_t sz, void *value){return 0;}\n";
7881 ret += "void destroy_params_"+this->generate_functor_name()+"(){}\n";
7883 // Register new parameter block
7884 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
7885 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
7886 ret += "\treturn this->load_params_"+this->generate_functor_name()+
7890 ret+="\nbool temp_status_received(const host_tuple& tup0)/* const*/ {\n";
7891 ret+="\tgs_int32_t problem;\n";
7893 ret += "\tvoid *tup_ptr = (void *)(&tup0);\n";
7894 ret += "\tis_eof = ftaschema_is_eof_tuple(schema_handle0,tup_ptr);\n";
7896 ret += gen_temp_tuple_check(this->node_name, 0);
7898 sprintf(tmpstr,"\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", tdt->get_hfta_unpack_fcn_noxf().c_str(), fields[temporal_field_idx]->get_name().c_str(), 0);
7901 for(h=0;h<hash_flds.size();++h){
7902 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7903 sprintf(tmpstr,"\tunpack_var_%s = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", fields[hash_flds[h]]->get_name().c_str(), hdt->get_hfta_unpack_fcn_noxf().c_str(), fields[hash_flds[h]]->get_name().c_str(), 0);
7907 " return temp_tuple_received;\n"
7913 "bool new_epoch(){\n"
7914 " if(first_execution || (last_bucket + 1) * bucketwidth <= timestamp){\n"
7915 " last_bucket = timestamp / bucketwidth;\n"
7916 " first_execution = false;\n"
7926 "inline gs_uint32_t output_hash(){return 0;}\n\n";
7928 if(hash_flds.size()==0){
7930 "gs_uint32_t output_hash(){\n"
7932 " if(outfl_cnt >= "+int_to_string(n_streams)+")\n"
7934 " return outfl_cnt;\n"
7940 "gs_uint32_t output_hash(){\n"
7941 " gs_uint32_t ret = "
7943 for(h=0;h<hash_flds.size();++h){
7945 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
7946 if(hdt->use_hashfunc()){
7947 sprintf(tmpstr,"%s(&(unpack_var_%s))",hdt->get_hfta_hashfunc().c_str(),fields[hash_flds[h]]->get_name().c_str());
7949 sprintf(tmpstr,"unpack_var_%s",fields[hash_flds[h]]->get_name().c_str());
7955 " return ret % "+int_to_string(hash_flds.size())+";\n"
7962 "gs_uint32_t num_file_streams(){\n"
7963 " return("+int_to_string(n_streams)+");\n"
7968 "string get_filename_base(){\n"
7969 " char tmp_fname[500];\n";
7971 string output_filename_base = hfta_query_name+filestream_id;
7973 if(n_hfta_clones > 1){
7974 output_filename_base += "_"+int_to_string(parallel_idx);
7980 if(output_spec->output_directory == "")
7982 " sprintf(tmp_fname,\""+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
7984 " sprintf(tmp_fname,\""+output_spec->output_directory+"/"+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
7986 " return (string)(tmp_fname);\n"
7992 "bool do_compression(){\n";
7994 ret += " return true;\n";
7996 ret += " return false;\n";
8000 "bool is_eof_tuple(){\n"
8004 "bool propagate_tuple(){\n"
8007 ret+="\treturn false;\n";
8009 ret+="\treturn true;\n";
8011 // create a temp status tuple
8012 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8014 ret += gen_init_temp_status_tuple(this->hfta_query_name);
8016 sprintf(tmpstr,"\ttuple->tuple_var%d = timestamp;\n",temporal_field_idx);
8021 ret += "\treturn 0;\n";
8029 string output_file_qpn::generate_operator(int i, string params){
8030 string optype = "file_output_operator";
8031 switch(compression_type){
8033 optype = "file_output_operator";
8036 optype = "zfile_output_operator";
8039 optype = "bfile_output_operator";
8043 return(" "+optype+"<" +
8044 generate_functor_name() +
8045 "> *op"+int_to_string(i)+" = new "+optype+"<"+
8046 generate_functor_name() +">("+params+", \"" + hfta_query_name + "\""
8047 + "," + hfta_query_name + "_schema_definition);\n");
8050 /////////////////////////////////////////////////////////
8054 string spx_qpn::generate_functor_name(){
8055 return("spx_functor_" + normalize_name(normalize_name(this->get_node_name())));
8058 string spx_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8059 // Initialize generate utility globals
8060 segen_gb_tbl = NULL;
8062 string ret = "class " + this->generate_functor_name() + "{\n";
8064 // Find variables referenced in this query node.
8067 col_id_set::iterator csi;
8070 for(w=0;w<where.size();++w)
8071 gather_pr_col_ids(where[w]->pr,cid_set,NULL);
8072 for(s=0;s<select_list.size();s++){
8073 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
8077 // Private variables : store the state of the functor.
8078 // 1) variables for unpacked attributes
8079 // 2) offsets of the upacked attributes
8080 // 3) storage of partial functions
8081 // 4) storage of complex literals (i.e., require a constructor)
8083 ret += "private:\n";
8084 ret += "\tbool first_execution;\t// internal processing state \n";
8085 ret += "\tint schema_handle0;\n";
8087 // generate the declaration of all the variables related to
8088 // temp tuples generation
8089 ret += gen_decl_temp_vars();
8092 // unpacked attribute storage, offsets
8093 ret += "//\t\tstorage and offsets of accessed fields.\n";
8094 ret += generate_access_vars(cid_set,schema);
8095 // tuple metadata management
8096 ret += "\tint tuple_metadata_offset0;\n";
8098 // Variables to store results of partial functions.
8099 // WARNING find_partial_functions modifies the SE
8100 // (it marks the partial function id).
8101 ret += "//\t\tParital function result storage\n";
8102 vector<scalarexp_t *> partial_fcns;
8103 vector<int> fcn_ref_cnt;
8104 vector<bool> is_partial_fcn;
8105 for(s=0;s<select_list.size();s++){
8106 find_partial_fcns(select_list[s]->se, &partial_fcns,&fcn_ref_cnt,&is_partial_fcn, Ext_fcns);
8108 for(w=0;w<where.size();w++){
8109 find_partial_fcns_pr(where[w]->pr, &partial_fcns, &fcn_ref_cnt,&is_partial_fcn,Ext_fcns);
8111 // Unmark non-partial expensive functions referenced only once.
8112 for(p=0; p<partial_fcns.size();p++){
8113 if(!is_partial_fcn[p] && fcn_ref_cnt[p] <= 1){
8114 partial_fcns[p]->set_partial_ref(-1);
8117 if(partial_fcns.size()>0){
8118 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,true);
8121 // Complex literals (i.e., they need constructors)
8122 ret += "//\t\tComplex literal storage.\n";
8123 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8124 ret += generate_complex_lit_vars(complex_literals);
8126 // Pass-by-handle parameters
8127 ret += "//\t\tPass-by-handle storage.\n";
8128 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8129 ret += generate_pass_by_handle_vars(param_handle_table);
8131 // Variables to hold parameters
8132 ret += "//\tfor query parameters\n";
8133 ret += generate_param_vars(param_tbl);
8136 // The publicly exposed functions
8138 ret += "\npublic:\n";
8141 //-------------------
8142 // The functor constructor
8143 // pass in the schema handle.
8144 // 1) make assignments to the unpack offset variables
8145 // 2) initialize the complex literals
8146 // 3) Set the initial values of the temporal attributes
8147 // referenced in select clause (in case we need to emit
8148 // temporal tuple before receiving first tuple )
8150 ret += "//\t\tFunctor constructor.\n";
8151 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8153 // save schema handle
8154 ret += "this->schema_handle0 = schema_handle0;\n";
8157 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8158 ret += gen_access_var_init(cid_set);
8160 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8163 ret += "//\t\tInitialize complex literals.\n";
8164 ret += gen_complex_lit_init(complex_literals);
8166 // Initialize partial function results so they can be safely GC'd
8167 ret += gen_partial_fcn_init(partial_fcns);
8169 // Initialize non-query-parameter parameter handles
8170 ret += gen_pass_by_handle_init(param_handle_table);
8172 // Init temporal attributes referenced in select list
8173 ret += gen_init_temp_vars(schema, select_list, NULL);
8178 //-------------------
8179 // Functor destructor
8180 ret += "//\t\tFunctor destructor.\n";
8181 ret += "~"+this->generate_functor_name()+"(){\n";
8183 // clean up buffer-type complex literals.
8184 ret += gen_complex_lit_dtr(complex_literals);
8186 // Deregister the pass-by-handle parameters
8187 ret += "/* register and de-register the pass-by-handle parameters */\n";
8188 ret += gen_pass_by_handle_dtr(param_handle_table);
8190 // Reclaim buffer space for partial fucntion results
8191 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8192 ret += gen_partial_fcn_dtr(partial_fcns);
8195 // Destroy the parameters, if any need to be destroyed
8196 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8201 //-------------------
8202 // Parameter manipulation routines
8203 ret += generate_load_param_block(this->generate_functor_name(),
8204 this->param_tbl,param_handle_table );
8205 ret += generate_delete_param_block(this->generate_functor_name(),
8206 this->param_tbl,param_handle_table);
8209 //-------------------
8210 // Register new parameter block
8211 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8212 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8213 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8218 //-------------------
8219 // The selection predicate.
8220 // Unpack variables for 1 cnf element
8221 // at a time, return false immediately if the
8223 // optimization : evaluate the cheap cnf elements
8224 // first, the expensive ones last.
8226 ret += "bool predicate(host_tuple &tup0){\n";
8227 // Variables for execution of the function.
8228 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8229 // Initialize cached function indicators.
8230 for(p=0;p<partial_fcns.size();++p){
8231 if(fcn_ref_cnt[p]>1){
8232 ret+="\tfcn_ref_cnt_"+int_to_string(p)+"=0;\n";
8237 ret += gen_temp_tuple_check(this->node_name, 0);
8239 if(partial_fcns.size()>0){ // partial fcn access failure
8240 ret += "\tgs_retval_t retval = 0;\n";
8244 // Reclaim buffer space for partial fucntion results
8245 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8246 ret += gen_partial_fcn_dtr(partial_fcns);
8248 col_id_set found_cids; // colrefs unpacked thus far.
8249 ret += gen_unpack_temp_vars(schema, found_cids, select_list, NULL, needs_xform);
8251 // For temporal status tuple we don't need to do anything else
8252 ret += "\tif (temp_tuple_received) return false;\n\n";
8255 for(w=0;w<where.size();++w){
8256 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8258 // Find the set of variables accessed in this CNF elem,
8259 // but in no previous element.
8260 col_id_set new_cids;
8261 get_new_pred_cids(where[w]->pr,found_cids, new_cids, NULL);
8262 // Unpack these values.
8263 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
8264 // Find partial fcns ref'd in this cnf element
8266 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8267 ret += gen_unpack_partial_fcn(schema,partial_fcns,pfcn_refs,fcn_ref_cnt, is_partial_fcn, "false");
8269 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8270 +") ) return(false);\n";
8273 // The partial functions ref'd in the select list
8274 // must also be evaluated. If one returns false,
8275 // then implicitly the predicate is false.
8277 for(s=0;s<select_list.size();s++){
8278 collect_partial_fcns(select_list[s]->se, sl_pfcns);
8280 if(sl_pfcns.size() > 0)
8281 ret += "//\t\tUnpack remaining partial fcns.\n";
8282 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
8283 fcn_ref_cnt, is_partial_fcn,
8284 found_cids, NULL, "false", needs_xform);
8286 // Unpack remaining fields
8287 ret += "//\t\tunpack any remaining fields from the input tuple.\n";
8288 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
8291 ret += "\treturn(true);\n";
8295 //-------------------
8296 // The output tuple function.
8297 // Unpack the remaining attributes into
8298 // the placeholder variables, unpack the
8299 // partial fcn refs, then pack up the tuple.
8301 ret += "host_tuple create_output_tuple() {\n";
8302 ret += "\thost_tuple tup;\n";
8303 ret += "\tgs_retval_t retval = 0;\n";
8305 // Unpack any remaining cached functions.
8306 ret += gen_remaining_cached_fcns(schema, partial_fcns, sl_pfcns,
8307 fcn_ref_cnt, is_partial_fcn);
8310 // Now, compute the size of the tuple.
8312 // Unpack any BUFFER type selections into temporaries
8313 // so that I can compute their size and not have
8314 // to recompute their value during tuple packing.
8315 // I can use regular assignment here because
8316 // these temporaries are non-persistent.
8318 ret += "//\t\tCompute the size of the tuple.\n";
8319 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
8321 // Unpack all buffer type selections, to be able to compute their size
8322 ret += gen_buffer_selvars(schema, select_list);
8324 // The size of the tuple is the size of the tuple struct plus the
8325 // size of the buffers to be copied in.
8328 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
8329 ret += gen_buffer_selvars_size(select_list,schema);
8332 // Allocate tuple data block.
8333 ret += "//\t\tCreate the tuple block.\n";
8334 ret += "\ttup.data = malloc(tup.tuple_size);\n";
8335 ret += "\ttup.heap_resident = true;\n";
8336 // Mark tuple as regular
8337 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
8339 // ret += "\ttup.channel = 0;\n";
8340 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
8341 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
8344 // (Here, offsets are hard-wired. is this a problem?)
8346 ret += "//\t\tPack the fields into the tuple.\n";
8347 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
8349 // Delete string temporaries
8350 ret += gen_buffer_selvars_dtr(select_list);
8352 ret += "\treturn tup;\n";
8355 //-------------------------------------------------------------------
8356 // Temporal update functions
8358 ret += "bool temp_status_received(){return temp_tuple_received;};\n\n";
8361 // create a temp status tuple
8362 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8364 ret += gen_init_temp_status_tuple(this->get_node_name());
8367 // (Here, offsets are hard-wired. is this a problem?)
8369 ret += "//\t\tPack the fields into the tuple.\n";
8370 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
8372 ret += "\treturn 0;\n";
8379 string spx_qpn::generate_operator(int i, string params){
8381 return(" select_project_operator<" +
8382 generate_functor_name() +
8383 "> *op"+int_to_string(i)+" = new select_project_operator<"+
8384 generate_functor_name() +">("+params+", \"" + get_node_name() + "\");\n");
8388 ////////////////////////////////////////////////////////////////
8393 string sgah_qpn::generate_functor_name(){
8394 return("sgah_functor_" + normalize_name(this->get_node_name()));
8398 string sgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8402 // Initialize generate utility globals
8403 segen_gb_tbl = &(gb_tbl);
8405 // Might need to generate empty values for cube processing.
8406 map<int, string> structured_types;
8407 for(g=0;g<gb_tbl.size();++g){
8408 if(gb_tbl.get_data_type(g)->is_structured_type()){
8409 structured_types[gb_tbl.get_data_type(g)->type_indicator()] = gb_tbl.get_data_type(g)->get_type_str();
8413 //--------------------------------
8414 // group definition class
8415 string ret = "class " + generate_functor_name() + "_groupdef{\n";
8417 for(g=0;g<this->gb_tbl.size();g++){
8418 sprintf(tmpstr,"gb_var%d",g);
8419 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8421 // empty strucutred literals
8422 map<int, string>::iterator sii;
8423 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8424 data_type dt(sii->second);
8425 literal_t empty_lit(sii->first);
8426 ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8429 if(structured_types.size()==0){
8430 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
8432 ret += "\t"+generate_functor_name() + "_groupdef(){}\n";
8436 ret += "\t"+generate_functor_name() + "_groupdef("+
8437 this->generate_functor_name() + "_groupdef *gd){\n";
8438 for(g=0;g<gb_tbl.size();g++){
8439 data_type *gdt = gb_tbl.get_data_type(g);
8440 if(gdt->is_buffer_type()){
8441 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8442 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8445 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8450 ret += "\t"+generate_functor_name() + "_groupdef("+
8451 this->generate_functor_name() + "_groupdef *gd, bool *pattern){\n";
8452 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8453 literal_t empty_lit(sii->first);
8454 ret += "\t\t"+empty_lit.to_hfta_C_code("&"+empty_lit.hfta_empty_literal_name())+";\n";
8456 for(g=0;g<gb_tbl.size();g++){
8457 data_type *gdt = gb_tbl.get_data_type(g);
8458 ret += "\t\tif(pattern["+int_to_string(g)+"]){\n";
8459 if(gdt->is_buffer_type()){
8460 sprintf(tmpstr,"\t\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8461 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8464 sprintf(tmpstr,"\t\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8467 ret += "\t\t}else{\n";
8468 literal_t empty_lit(gdt->type_indicator());
8469 if(empty_lit.is_cpx_lit()){
8470 ret +="\t\t\tgb_var"+int_to_string(g)+"= "+empty_lit.hfta_empty_literal_name()+";\n";
8472 ret +="\t\t\tgb_var"+int_to_string(g)+"="+empty_lit.to_hfta_C_code("")+";\n";
8478 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
8479 for(g=0;g<gb_tbl.size();g++){
8480 data_type *gdt = gb_tbl.get_data_type(g);
8481 if(gdt->is_buffer_type()){
8482 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
8483 gdt->get_hfta_buffer_destroy().c_str(), g );
8490 for(g=0;g<gb_tbl.size();g++){
8491 data_type *gdt = gb_tbl.get_data_type(g);
8492 if(gdt->is_temporal()){
8497 ret += tgdt->get_host_cvar_type()+" get_curr_gb(){\n";
8498 ret+="\treturn gb_var"+int_to_string(g)+";\n";
8503 //--------------------------------
8504 // aggr definition class
8505 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
8507 for(a=0;a<aggr_tbl.size();a++){
8508 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
8509 sprintf(tmpstr,"aggr_var%d",a);
8510 if(aggr_tbl.is_builtin(a)){
8511 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
8512 if(aggr_tbl.get_op(a) == "AVG"){ // HACK!
8513 data_type cnt_type = data_type("ullong");
8514 ret+="\t"+cnt_type.make_host_cvar(string(tmpstr)+"_cnt")+";\n";
8515 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(string(tmpstr)+"_sum")+";\n";
8518 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
8522 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
8524 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
8525 for(a=0;a<aggr_tbl.size();a++){
8526 if(aggr_tbl.is_builtin(a)){
8527 data_type *adt = aggr_tbl.get_data_type(a);
8528 if(adt->is_buffer_type()){
8529 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
8530 adt->get_hfta_buffer_destroy().c_str(), a );
8534 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
8535 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
8536 ret+="(aggr_var"+int_to_string(a)+"));\n";
8542 //-------------------------------------------
8543 // group-by patterns for the functor,
8544 // initialization within the class is cumbersome.
8545 int n_patterns = gb_tbl.gb_patterns.size();
8547 ret += "bool "+this->generate_functor_name()+"_gb_patterns["+int_to_string(n_patterns)+
8548 "]["+int_to_string(gb_tbl.size())+"] = {\n";
8549 if(n_patterns == 0){
8550 for(i=0;i<gb_tbl.size();++i){
8555 for(i=0;i<n_patterns;++i){
8556 if(i>0) ret += ",\n";
8558 for(j=0;j<gb_tbl.size();j++){
8559 if(j>0) ret += ", ";
8560 if(gb_tbl.gb_patterns[i][j]){
8573 //--------------------------------
8575 ret += "class " + this->generate_functor_name() + "{\n";
8577 // Find variables referenced in this query node.
8580 col_id_set::iterator csi;
8582 for(w=0;w<where.size();++w)
8583 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
8584 for(w=0;w<having.size();++w)
8585 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
8586 for(g=0;g<gb_tbl.size();g++)
8587 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
8589 for(s=0;s<select_list.size();s++){
8590 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
8594 // Private variables : store the state of the functor.
8595 // 1) variables for unpacked attributes
8596 // 2) offsets of the upacked attributes
8597 // 3) storage of partial functions
8598 // 4) storage of complex literals (i.e., require a constructor)
8600 ret += "private:\n";
8602 // var to save the schema handle
8603 ret += "\tint schema_handle0;\n";
8604 // metadata from schema handle
8605 ret += "\tint tuple_metadata_offset0;\n";
8607 // generate the declaration of all the variables related to
8608 // temp tuples generation
8609 ret += gen_decl_temp_vars();
8611 // unpacked attribute storage, offsets
8612 ret += "//\t\tstorage and offsets of accessed fields.\n";
8613 ret += generate_access_vars(cid_set, schema);
8615 // Variables to store results of partial functions.
8616 // WARNING find_partial_functions modifies the SE
8617 // (it marks the partial function id).
8618 ret += "//\t\tParital function result storage\n";
8619 vector<scalarexp_t *> partial_fcns;
8620 vector<int> fcn_ref_cnt;
8621 vector<bool> is_partial_fcn;
8622 for(s=0;s<select_list.size();s++){
8623 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
8625 for(w=0;w<where.size();w++){
8626 find_partial_fcns_pr(where[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
8628 for(w=0;w<having.size();w++){
8629 find_partial_fcns_pr(having[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
8631 for(g=0;g<gb_tbl.size();g++){
8632 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns,NULL,NULL, Ext_fcns);
8634 for(a=0;a<aggr_tbl.size();a++){
8635 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns,NULL,NULL, Ext_fcns);
8637 if(partial_fcns.size()>0){
8638 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
8639 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
8642 // Complex literals (i.e., they need constructors)
8643 ret += "//\t\tComplex literal storage.\n";
8644 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8645 ret += generate_complex_lit_vars(complex_literals);
8647 // Pass-by-handle parameters
8648 ret += "//\t\tPass-by-handle storage.\n";
8649 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8650 ret += generate_pass_by_handle_vars(param_handle_table);
8653 // variables to hold parameters.
8654 ret += "//\tfor query parameters\n";
8655 ret += generate_param_vars(param_tbl);
8657 // Is there a temporal flush? If so create flush temporaries,
8658 // create flush indicator.
8659 bool uses_temporal_flush = false;
8660 for(g=0;g<gb_tbl.size();g++){
8661 data_type *gdt = gb_tbl.get_data_type(g);
8662 if(gdt->is_temporal())
8663 uses_temporal_flush = true;
8666 if(uses_temporal_flush){
8667 ret += "//\t\tFor temporal flush\n";
8668 for(g=0;g<gb_tbl.size();g++){
8669 data_type *gdt = gb_tbl.get_data_type(g);
8670 if(gdt->is_temporal()){
8671 sprintf(tmpstr,"last_gb%d",g);
8672 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8673 sprintf(tmpstr,"last_flushed_gb%d",g);
8674 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8677 ret += "\tbool needs_temporal_flush;\n";
8681 // The publicly exposed functions
8683 ret += "\npublic:\n";
8686 //-------------------
8687 // The functor constructor
8688 // pass in the schema handle.
8689 // 1) make assignments to the unpack offset variables
8690 // 2) initialize the complex literals
8692 ret += "//\t\tFunctor constructor.\n";
8693 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8695 // save the schema handle
8696 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
8699 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8700 ret += gen_access_var_init(cid_set);
8702 ret += "tuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8705 ret += "//\t\tInitialize complex literals.\n";
8706 ret += gen_complex_lit_init(complex_literals);
8708 // Initialize partial function results so they can be safely GC'd
8709 ret += gen_partial_fcn_init(partial_fcns);
8711 // Initialize non-query-parameter parameter handles
8712 ret += gen_pass_by_handle_init(param_handle_table);
8714 // temporal flush variables
8715 // ASSUME that structured values won't be temporal.
8716 if(uses_temporal_flush){
8717 ret += "//\t\tInitialize temporal flush variables.\n";
8718 for(g=0;g<gb_tbl.size();g++){
8719 data_type *gdt = gb_tbl.get_data_type(g);
8720 if(gdt->is_temporal()){
8721 literal_t gl(gdt->type_indicator());
8722 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
8724 sprintf(tmpstr,"\tlast_flushed_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
8728 ret += "\tneeds_temporal_flush = false;\n";
8731 // Init temporal attributes referenced in select list
8732 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
8736 //-------------------
8737 // Functor destructor
8738 ret += "//\t\tFunctor destructor.\n";
8739 ret += "~"+this->generate_functor_name()+"(){\n";
8741 // clean up buffer type complex literals
8742 ret += gen_complex_lit_dtr(complex_literals);
8744 // Deregister the pass-by-handle parameters
8745 ret += "/* register and de-register the pass-by-handle parameters */\n";
8746 ret += gen_pass_by_handle_dtr(param_handle_table);
8748 // clean up partial function results.
8749 ret += "/* clean up partial function storage */\n";
8750 ret += gen_partial_fcn_dtr(partial_fcns);
8752 // Destroy the parameters, if any need to be destroyed
8753 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8758 //-------------------
8759 // Parameter manipulation routines
8760 ret += generate_load_param_block(this->generate_functor_name(),
8761 this->param_tbl,param_handle_table);
8762 ret += generate_delete_param_block(this->generate_functor_name(),
8763 this->param_tbl,param_handle_table);
8765 //-------------------
8766 // Register new parameter block
8768 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8769 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8770 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8774 // -----------------------------------
8775 // group-by pattern support
8778 "int n_groupby_patterns(){\n"
8779 " return "+int_to_string(gb_tbl.gb_patterns.size())+";\n"
8781 "bool *get_pattern(int p){\n"
8782 " return "+this->generate_functor_name()+"_gb_patterns[p];\n"
8789 //-------------------
8790 // the create_group method.
8791 // This method creates a group in a buffer passed in
8792 // (to allow for creation on the stack).
8793 // There are also a couple of side effects:
8794 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
8795 // 2) determine if a temporal flush is required.
8797 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
8798 // Variables for execution of the function.
8799 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8801 if(partial_fcns.size()>0){ // partial fcn access failure
8802 ret += "\tgs_retval_t retval = 0;\n";
8806 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
8807 "_groupdef *) buffer;\n";
8809 // Start by cleaning up partial function results
8810 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8811 set<int> w_pfcns; // partial fcns in where clause
8812 for(w=0;w<where.size();++w)
8813 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
8815 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
8816 for(g=0;g<gb_tbl.size();g++){
8817 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
8819 for(a=0;a<aggr_tbl.size();a++){
8820 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
8822 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
8823 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
8824 // ret += gen_partial_fcn_dtr(partial_fcns);
8827 ret += gen_temp_tuple_check(this->node_name, 0);
8828 col_id_set found_cids; // colrefs unpacked thus far.
8829 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
8832 // Save temporal group-by variables
8835 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
8837 for(g=0;g<gb_tbl.size();g++){
8839 data_type *gdt = gb_tbl.get_data_type(g);
8841 if(gdt->is_temporal()){
8842 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8843 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8851 // Compare the temporal GB vars with the stored ones,
8852 // set flush indicator and update stored GB vars if there is any change.
8854 ret += "// hfta_disorder = "+int_to_string(hfta_disorder)+"\n";
8855 if(hfta_disorder < 2){
8856 if(uses_temporal_flush){
8858 bool first_one = true;
8859 for(g=0;g<gb_tbl.size();g++){
8860 data_type *gdt = gb_tbl.get_data_type(g);
8862 if(gdt->is_temporal()){
8863 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
8864 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
8865 if(first_one){first_one = false;} else {ret += ") && (";}
8866 ret += generate_equality_test(lhs_op, rhs_op, gdt);
8870 for(g=0;g<gb_tbl.size();g++){
8871 data_type *gdt = gb_tbl.get_data_type(g);
8872 if(gdt->is_temporal()){
8873 if(gdt->is_buffer_type()){
8874 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
8876 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
8878 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
8883 ret += "\t\tneeds_temporal_flush=true;\n";
8884 ret += "\t\t}else{\n"
8885 "\t\t\tneeds_temporal_flush=false;\n"
8889 ret+= "\tif(temp_tuple_received && !( (";
8890 bool first_one = true;
8891 for(g=0;g<gb_tbl.size();g++){
8892 data_type *gdt = gb_tbl.get_data_type(g);
8894 if(gdt->is_temporal()){
8895 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
8896 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
8897 if(first_one){first_one = false;} else {ret += ") && (";}
8898 ret += generate_equality_test(lhs_op, rhs_op, gdt);
8904 for(g=0;g<gb_tbl.size();g++){
8905 data_type *gdt = gb_tbl.get_data_type(g);
8906 if(gdt->is_temporal()){
8908 if(gdt->is_buffer_type()){
8909 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
8911 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
8913 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
8919 data_type *tgdt = gb_tbl.get_data_type(temporal_g);
8920 literal_t gl(tgdt->type_indicator());
8921 ret += "\t\tif(last_flushed_gb"+int_to_string(temporal_g)+">"+gl.to_hfta_C_code("")+")\n";
8922 ret += "\t\t\tneeds_temporal_flush=true;\n";
8923 ret += "\t\t}else{\n"
8924 "\t\t\tneeds_temporal_flush=false;\n"
8929 // For temporal status tuple we don't need to do anything else
8930 ret += "\tif (temp_tuple_received) return NULL;\n\n";
8932 for(w=0;w<where.size();++w){
8933 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8935 // Find the set of variables accessed in this CNF elem,
8936 // but in no previous element.
8937 col_id_set new_cids;
8938 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
8940 // Unpack these values.
8941 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
8942 // Find partial fcns ref'd in this cnf element
8944 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8945 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
8947 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8948 +") ) return(NULL);\n";
8951 // The partial functions ref'd in the group-by var and aggregate
8952 // definitions must also be evaluated. If one returns false,
8953 // then implicitly the predicate is false.
8954 set<int>::iterator pfsi;
8956 if(ag_gb_pfcns.size() > 0)
8957 ret += "//\t\tUnpack remaining partial fcns.\n";
8958 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
8959 found_cids, segen_gb_tbl, "NULL", needs_xform);
8961 // Unpack the group-by variables
8963 for(g=0;g<gb_tbl.size();g++){
8964 data_type *gdt = gb_tbl.get_data_type(g);
8966 if(!gdt->is_temporal()){
8967 // Find the new fields ref'd by this GBvar def.
8968 col_id_set new_cids;
8969 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
8970 // Unpack these values.
8971 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
8973 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8974 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8976 // There seems to be no difference between the two
8977 // branches of the IF statement.
8978 data_type *gdt = gb_tbl.get_data_type(g);
8979 if(gdt->is_buffer_type()){
8980 // Create temporary copy.
8981 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8982 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
8984 scalarexp_t *gse = gb_tbl.get_def(g);
8985 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
8986 g,generate_se_code(gse,schema).c_str());
8995 ret+= "\treturn gbval;\n";
8998 //--------------------------------------------------------
8999 // Create and initialize an aggregate object
9001 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
9002 // Variables for execution of the function.
9003 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9006 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
9007 "_aggrdef *)buffer;\n";
9009 for(a=0;a<aggr_tbl.size();a++){
9010 if(aggr_tbl.is_builtin(a)){
9011 // Create temporaries for buffer return values
9012 data_type *adt = aggr_tbl.get_data_type(a);
9013 if(adt->is_buffer_type()){
9014 sprintf(tmpstr,"aggr_tmp_%d", a);
9015 ret+=adt->make_host_cvar(tmpstr)+";\n";
9020 // Unpack all remaining attributes
9021 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
9022 for(a=0;a<aggr_tbl.size();a++){
9023 sprintf(tmpstr,"aggval->aggr_var%d",a);
9024 string assignto_var = tmpstr;
9025 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
9028 ret += "\treturn aggval;\n";
9031 //--------------------------------------------------------
9032 // update an aggregate object
9034 ret += "void update_aggregate(host_tuple &tup0, "
9035 +generate_functor_name()+"_groupdef *gbval, "+
9036 generate_functor_name()+"_aggrdef *aggval){\n";
9037 // Variables for execution of the function.
9038 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9040 // use of temporaries depends on the aggregate,
9041 // generate them in generate_aggr_update
9044 // Unpack all remaining attributes
9045 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
9046 for(a=0;a<aggr_tbl.size();a++){
9047 sprintf(tmpstr,"aggval->aggr_var%d",a);
9048 string varname = tmpstr;
9049 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
9052 ret += "\treturn;\n";
9055 //---------------------------------------------------
9058 ret += "\tbool flush_needed(){\n";
9059 if(uses_temporal_flush){
9060 ret += "\t\treturn needs_temporal_flush;\n";
9062 ret += "\t\treturn false;\n";
9066 //---------------------------------------------------
9067 // create output tuple
9068 // Unpack the partial functions ref'd in the where clause,
9069 // select clause. Evaluate the where clause.
9070 // Finally, pack the tuple.
9072 // I need to use special code generation here,
9073 // so I'll leave it in longhand.
9075 ret += "host_tuple create_output_tuple("
9076 +generate_functor_name()+"_groupdef *gbval, "+
9077 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
9079 ret += "\thost_tuple tup;\n";
9080 ret += "\tfailed = false;\n";
9081 ret += "\tgs_retval_t retval = 0;\n";
9083 string gbvar = "gbval->gb_var";
9084 string aggvar = "aggval->";
9086 // Create cached temporaries for UDAF return values.
9087 for(a=0;a<aggr_tbl.size();a++){
9088 if(! aggr_tbl.is_builtin(a)){
9089 int afcn_id = aggr_tbl.get_fcn_id(a);
9090 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9091 sprintf(tmpstr,"udaf_ret_%d", a);
9092 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
9097 // First, get the return values from the UDAFS
9098 for(a=0;a<aggr_tbl.size();a++){
9099 if(! aggr_tbl.is_builtin(a)){
9100 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
9101 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9102 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
9106 set<int> hv_sl_pfcns;
9107 for(w=0;w<having.size();w++){
9108 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
9110 for(s=0;s<select_list.size();s++){
9111 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
9114 // clean up the partial fcn results from any previous execution
9115 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
9118 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
9119 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
9120 ret += "\tif(retval){ failed = true; return(tup);}\n";
9123 // Evalaute the HAVING clause
9124 // TODO: this seems to have a ++ operator rather than a + operator.
9125 for(w=0;w<having.size();++w){
9126 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
9129 // Now, compute the size of the tuple.
9131 // Unpack any BUFFER type selections into temporaries
9132 // so that I can compute their size and not have
9133 // to recompute their value during tuple packing.
9134 // I can use regular assignment here because
9135 // these temporaries are non-persistent.
9136 // TODO: should I be using the selvar generation routine?
9138 ret += "//\t\tCompute the size of the tuple.\n";
9139 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
9140 for(s=0;s<select_list.size();s++){
9141 scalarexp_t *se = select_list[s]->se;
9142 data_type *sdt = se->get_data_type();
9143 if(sdt->is_buffer_type() &&
9144 !( (se->get_operator_type() == SE_COLREF) ||
9145 (se->get_operator_type() == SE_AGGR_STAR) ||
9146 (se->get_operator_type() == SE_AGGR_SE) ||
9147 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9148 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9150 sprintf(tmpstr,"selvar_%d",s);
9151 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
9152 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
9156 // The size of the tuple is the size of the tuple struct plus the
9157 // size of the buffers to be copied in.
9159 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
9160 for(s=0;s<select_list.size();s++){
9161 // if(s>0) ret += "+";
9162 scalarexp_t *se = select_list[s]->se;
9163 data_type *sdt = select_list[s]->se->get_data_type();
9164 if(sdt->is_buffer_type()){
9165 if(!( (se->get_operator_type() == SE_COLREF) ||
9166 (se->get_operator_type() == SE_AGGR_STAR) ||
9167 (se->get_operator_type() == SE_AGGR_SE) ||
9168 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9169 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9171 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
9174 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9181 // Allocate tuple data block.
9182 ret += "//\t\tCreate the tuple block.\n";
9183 ret += "\ttup.data = malloc(tup.tuple_size);\n";
9184 ret += "\ttup.heap_resident = true;\n";
9186 // Mark tuple as regular
9187 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
9189 // ret += "\ttup.channel = 0;\n";
9190 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
9191 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
9194 // (Here, offsets are hard-wired. is this a problem?)
9196 ret += "//\t\tPack the fields into the tuple.\n";
9197 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
9198 for(s=0;s<select_list.size();s++){
9199 scalarexp_t *se = select_list[s]->se;
9200 data_type *sdt = se->get_data_type();
9201 if(sdt->is_buffer_type()){
9202 if(!( (se->get_operator_type() == SE_COLREF) ||
9203 (se->get_operator_type() == SE_AGGR_STAR) ||
9204 (se->get_operator_type() == SE_AGGR_SE) ||
9205 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9206 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9208 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
9210 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
9213 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9215 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9219 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9221 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
9226 // Destroy string temporaries
9227 ret += gen_buffer_selvars_dtr(select_list);
9228 // Destroy string return vals of UDAFs
9229 for(a=0;a<aggr_tbl.size();a++){
9230 if(! aggr_tbl.is_builtin(a)){
9231 int afcn_id = aggr_tbl.get_fcn_id(a);
9232 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9233 if(adt->is_buffer_type()){
9234 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
9235 adt->get_hfta_buffer_destroy().c_str(), a );
9242 ret += "\treturn tup;\n";
9246 //-------------------------------------------------------------------
9247 // Temporal update functions
9249 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
9251 for(g=0;g<gb_tbl.size();g++){
9252 data_type *gdt = gb_tbl.get_data_type(g);
9253 if(gdt->is_temporal()){
9258 ret += tgdt->get_host_cvar_type()+" get_last_flushed_gb(){\n";
9259 ret+="\treturn last_flushed_gb"+int_to_string(g)+";\n";
9261 ret += tgdt->get_host_cvar_type()+" get_last_gb(){\n";
9262 ret+="\treturn last_gb"+int_to_string(g)+";\n";
9268 // create a temp status tuple
9269 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
9271 ret += gen_init_temp_status_tuple(this->get_node_name());
9274 // (Here, offsets are hard-wired. is this a problem?)
9276 ret += "//\t\tPack the fields into the tuple.\n";
9277 for(s=0;s<select_list.size();s++){
9278 data_type *sdt = select_list[s]->se->get_data_type();
9279 if(sdt->is_temporal()){
9280 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9283 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
9290 ret += "\treturn 0;\n";
9291 ret += "};};\n\n\n";
9294 //----------------------------------------------------------
9295 // The hash function
9297 ret += "struct "+generate_functor_name()+"_hash_func{\n";
9298 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
9299 "_groupdef *grp) const{\n";
9300 ret += "\t\treturn( (";
9301 for(g=0;g<gb_tbl.size();g++){
9303 data_type *gdt = gb_tbl.get_data_type(g);
9304 if(gdt->use_hashfunc()){
9305 if(gdt->is_buffer_type())
9306 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9308 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9310 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
9314 ret += ") >> 32);\n";
9318 //----------------------------------------------------------
9319 // The comparison function
9321 ret += "struct "+generate_functor_name()+"_equal_func{\n";
9322 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
9323 generate_functor_name()+"_groupdef *grp2) const{\n";
9324 ret += "\t\treturn( (";
9326 for(g=0;g<gb_tbl.size();g++){
9327 if(g>0) ret += ") && (";
9328 data_type *gdt = gb_tbl.get_data_type(g);
9329 if(gdt->complex_comparison(gdt)){
9330 if(gdt->is_buffer_type())
9331 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
9332 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9334 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
9335 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9337 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
9349 string sgah_qpn::generate_operator(int i, string params){
9351 if(hfta_disorder < 2){
9353 " groupby_operator<" +
9354 generate_functor_name()+","+
9355 generate_functor_name() + "_groupdef, " +
9356 generate_functor_name() + "_aggrdef, " +
9357 generate_functor_name()+"_hash_func, "+
9358 generate_functor_name()+"_equal_func "
9359 "> *op"+int_to_string(i)+" = new groupby_operator<"+
9360 generate_functor_name()+","+
9361 generate_functor_name() + "_groupdef, " +
9362 generate_functor_name() + "_aggrdef, " +
9363 generate_functor_name()+"_hash_func, "+
9364 generate_functor_name()+"_equal_func "
9365 ">("+params+", \"" + get_node_name() +
9370 for(int g=0;g<gb_tbl.size();g++){
9371 data_type *gdt = gb_tbl.get_data_type(g);
9372 if(gdt->is_temporal()){
9379 " groupby_operator_oop<" +
9380 generate_functor_name()+","+
9381 generate_functor_name() + "_groupdef, " +
9382 generate_functor_name() + "_aggrdef, " +
9383 generate_functor_name()+"_hash_func, "+
9384 generate_functor_name()+"_equal_func, " +
9385 tgdt->get_host_cvar_type() +
9386 "> *op"+int_to_string(i)+" = new groupby_operator_oop<"+
9387 generate_functor_name()+","+
9388 generate_functor_name() + "_groupdef, " +
9389 generate_functor_name() + "_aggrdef, " +
9390 generate_functor_name()+"_hash_func, "+
9391 generate_functor_name()+"_equal_func, " +
9392 tgdt->get_host_cvar_type() +
9393 ">("+params+", \"" + get_node_name() +
9399 ////////////////////////////////////////////////
9402 ////////////////////////////////////////////
9404 string mrg_qpn::generate_functor_name(){
9405 return("mrg_functor_" + normalize_name(this->get_node_name()));
9408 string mrg_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9413 if(fm.size() != mvars.size()){
9414 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=%lu, mvars.size=%lu\n",fm.size(),mvars.size());
9418 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=mvars.size=%lu\n",fm.size());
9423 // Initialize generate utility globals
9424 segen_gb_tbl = NULL;
9426 string ret = "class " + this->generate_functor_name() + "{\n";
9428 // Private variable:
9429 // 1) Vars for unpacked attrs.
9430 // 2) offsets ofthe unpakced attrs
9431 // 3) last_posted_timestamp
9434 schema->get_type_name(mvars[0]->get_schema_ref(), mvars[0]->get_field()),
9435 schema->get_modifier_list(mvars[0]->get_schema_ref(), mvars[0]->get_field())
9438 schema->get_type_name(mvars[1]->get_schema_ref(), mvars[1]->get_field()),
9439 schema->get_modifier_list(mvars[1]->get_schema_ref(), mvars[1]->get_field())
9442 ret += "private:\n";
9444 // var to save the schema handle
9445 ret += "\tint schema_handle0;\n";
9447 // generate the declaration of all the variables related to
9448 // temp tuples generation
9449 ret += gen_decl_temp_vars();
9451 // unpacked attribute storage, offsets
9452 ret += "//\t\tstorage and offsets of accessed fields.\n";
9453 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
9455 sprintf(tmpstr,"unpack_var_%s_%d", mvars[0]->get_field().c_str(), tblref);
9456 ret+="\t"+dta.make_host_cvar(tmpstr)+";\n";
9457 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[0]->get_field().c_str(), tblref);
9460 sprintf(tmpstr,"unpack_var_%s_%d", mvars[1]->get_field().c_str(), tblref);
9461 ret+="\t"+dtb.make_host_cvar(tmpstr)+";\n";
9462 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[1]->get_field().c_str(), tblref);
9465 ret += "//\t\tRemember the last posted timestamp.\n";
9466 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_0")+";\n";
9467 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_1")+";\n";
9468 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9469 ret+="\t"+dta.make_host_cvar("slack")+";\n";
9470 // ret += "\t bool first_execution_0, first_execution_1;\n";
9472 // variables to hold parameters.
9473 ret += "//\tfor query parameters\n";
9474 ret += generate_param_vars(param_tbl);
9477 //-------------------
9478 // The functor constructor
9479 // pass in a schema handle (e.g. for the 1st input stream),
9480 // use it to determine how to unpack the merge variable.
9481 // ASSUME that both streams have the same layout,
9482 // just duplicate it.
9485 ret += "//\t\tFunctor constructor.\n";
9486 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9488 // var to save the schema handle
9489 ret += "\tthis->schema_handle0 = schema_handle0;\n";
9490 ret += "\ttuple_metadata_offset0=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9491 ret += "\ttuple_metadata_offset1=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9493 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9495 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", mvars[0]->get_field().c_str(), 0,mvars[0]->get_field().c_str());
9497 sprintf(tmpstr,"\tunpack_offset_%s_%d = unpack_offset_%s_%d;\n",mvars[1]->get_field().c_str(), 1,mvars[0]->get_field().c_str(), 0);
9499 // ret+="\tfirst_execution_0 = first_execution_1 = true;\n";
9501 ret+="\tslack = "+generate_se_code(slack,schema)+";\n";
9503 ret+="\tslack = 0;\n";
9505 // Initialize internal state
9506 ret += "\ttemp_tuple_received = false;\n";
9508 // Init last timestamp values to minimum value for their type
9509 if (dta.is_increasing())
9510 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_min_literal() + ";\n";
9512 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_max_literal() + ";\n";
9517 ret += "//\t\tFunctor destructor.\n";
9518 ret += "~"+this->generate_functor_name()+"(){\n";
9520 // Destroy the parameters, if any need to be destroyed
9521 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9526 // no pass-by-handle params.
9527 vector<handle_param_tbl_entry *> param_handle_table;
9529 // Parameter manipulation routines
9530 ret += generate_load_param_block(this->generate_functor_name(),
9531 this->param_tbl,param_handle_table);
9532 ret += generate_delete_param_block(this->generate_functor_name(),
9533 this->param_tbl,param_handle_table);
9535 // Register new parameter block
9537 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9538 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9539 ret += "\treturn this->load_params_"+this->generate_functor_name()+
9544 // -----------------------------------
9548 if(needs_xform[0]) unpack_fcna = dta.get_hfta_unpack_fcn();
9549 else unpack_fcna = dta.get_hfta_unpack_fcn_noxf();
9551 if(needs_xform[1]) unpack_fcnb = dtb.get_hfta_unpack_fcn();
9552 else unpack_fcnb = dtb.get_hfta_unpack_fcn_noxf();
9555 ret+="\tint compare(const host_tuple& tup1, const host_tuple& tup2) const{ \n";
9556 ret+="\t"+dta.make_host_cvar("timestamp1")+";\n";
9557 ret+="\t"+dta.make_host_cvar("timestamp2")+";\n";
9558 ret+="\tgs_int32_t problem;\n";
9559 ret+="\tif (tup1.channel == 0) {\n";
9560 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9562 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9565 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 1);
9567 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 0);
9571 " if (timestamp1 > timestamp2+slack)\n"
9573 " else if (timestamp1 < timestamp2)\n"
9582 " void get_timestamp(const host_tuple& tup0){\n"
9583 " gs_int32_t problem;\n"
9585 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9594 // Compare to temp status.
9596 " int compare_with_temp_status(int channel) {\n"
9597 " // check if tuple is temp status tuple\n"
9599 " if (channel == 0) {\n"
9600 //" if(first_execution_0) return 1;\n"
9601 " if (timestamp == last_posted_timestamp_0)\n"
9603 " else if (timestamp < last_posted_timestamp_0)\n"
9608 //" if(first_execution_1) return 1;\n"
9609 " if (timestamp == last_posted_timestamp_1)\n"
9611 " else if (timestamp < last_posted_timestamp_1)\n"
9620 " int compare_stored_with_temp_status(const host_tuple& tup0, int channel)/* const*/ {\n"
9622 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
9623 ret+="\tgs_int32_t problem;\n";
9625 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9627 ret+="\tif (channel == 0) {\n";
9628 // ret+="\tif(first_execution_0) return 1;\n";
9630 " if (l_timestamp == last_posted_timestamp_0)\n"
9632 " else if (l_timestamp < last_posted_timestamp_0)\n"
9637 // ret+="\tif(first_execution_1) return 1;\n";
9639 " if (l_timestamp == last_posted_timestamp_1)\n"
9641 " else if (l_timestamp < last_posted_timestamp_1)\n"
9649 // update temp status.
9651 " int update_temp_status(const host_tuple& tup) {\n"
9652 " if (tup.channel == 0) {\n"
9653 " last_posted_timestamp_0=timestamp;\n"
9654 //" first_execution_0 = false;\n"
9656 " last_posted_timestamp_1=timestamp;\n"
9657 //" first_execution_1 = false;\n"
9663 " int update_stored_temp_status(const host_tuple& tup, int channel) {\n"
9665 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
9666 ret+="\tgs_int32_t problem;\n";
9667 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9670 " if (tup.channel == 0) {\n"
9671 " last_posted_timestamp_0=l_timestamp;\n"
9672 //" first_execution_0 = false;\n"
9674 " last_posted_timestamp_1=l_timestamp;\n"
9675 //" first_execution_1 = false;\n"
9681 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9682 ret+="\tgs_int32_t problem;\n";
9683 ret+="\tif (tup.channel == 0) {\n";
9684 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9687 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9690 ret+="\tif (tup.channel == 0) {\n";
9691 ret+="\tlast_posted_timestamp_0=timestamp;\n";
9692 ret +="\tfirst_execution_0 = false;\n";
9694 ret+="\tlast_posted_timestamp_1=timestamp;\n";
9695 ret +="\tfirst_execution_1 = false;\n";
9702 // update temp status modulo slack.
9703 ret+="\tint update_temp_status_by_slack(const host_tuple& tup, int channel) {\n";
9705 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9706 ret+="\tgs_int32_t problem;\n";
9707 ret+="\tif (tup.channel == 0) {\n";
9708 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
9711 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
9715 " if (channel == 0) {\n"
9716 " if(first_execution_0){\n"
9717 " last_posted_timestamp_0=timestamp - slack;\n"
9718 " first_execution_0 = false;\n"
9720 " if(last_posted_timestamp_0 < timestamp-slack)\n"
9721 " last_posted_timestamp_0 = timestamp-slack;\n"
9724 " if(first_execution_1){\n"
9725 " last_posted_timestamp_1=timestamp - slack;\n"
9726 " first_execution_1 = false;\n"
9728 " if(last_posted_timestamp_1 < timestamp-slack)\n"
9729 " last_posted_timestamp_1 = timestamp-slack;\n"
9743 "bool temp_status_received(const host_tuple& tup0){\n"
9744 " return ftaschema_is_temporal_tuple_offset(tuple_metadata_offset0, tup0.data);\n"
9747 //"bool temp_status_received(){return temp_tuple_received;};\n\n";
9750 // create a temp status tuple
9751 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
9753 ret += gen_init_temp_status_tuple(this->get_node_name());
9756 ret += "//\t\tPack the fields into the tuple.\n";
9758 string fld_name = mvars[0]->get_field();
9759 int idx = table_layout->get_field_idx(fld_name);
9760 field_entry* fld = table_layout->get_field(idx);
9761 data_type dt(fld->get_type());
9763 // if (needs_xform[0] && needs_xform[1] && dt.needs_hn_translation())
9764 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s((last_posted_timestamp_0 < last_posted_timestamp_1) ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx, dt.hton_translation().c_str());
9766 sprintf(tmpstr,"\ttuple->tuple_var%d = (last_posted_timestamp_0 < last_posted_timestamp_1 ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx);
9770 ret += "\treturn 0;\n";
9773 // Transform tuple (before output)
9776 ret += "void xform_tuple(host_tuple &tup){\n";
9777 if((needs_xform[0] && !needs_xform[1]) || (needs_xform[1] && !needs_xform[0])){
9778 ret += "\tstruct "+generate_tuple_name(this->get_node_name())+" *tuple = ("+
9779 generate_tuple_name(this->get_node_name())+" *)(tup.data);\n";
9781 vector<field_entry *> flds = table_layout->get_fields();
9783 ret+="\tif(tup.channel == 0){\n";
9784 if(needs_xform[0] && !needs_xform[1]){
9786 for(f=0;f<flds.size();f++){
9788 data_type dt(flds[f]->get_type());
9789 if(dt.get_type() == v_str_t){
9790 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
9792 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
9794 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
9797 if(dt.needs_hn_translation()){
9798 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
9799 // f, dt.hton_translation().c_str(), f);
9805 ret += "\t\treturn;\n";
9807 ret.append("\t}\n");
9810 ret+="\tif(tup.channel == 1){\n";
9811 if(needs_xform[1] && !needs_xform[0]){
9813 for(f=0;f<flds.size();f++){
9815 data_type dt(flds[f]->get_type());
9816 if(dt.get_type() == v_str_t){
9817 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
9819 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
9821 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
9824 if(dt.needs_hn_translation()){
9825 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
9826 // f, dt.hton_translation().c_str(), f);
9832 ret += "\t\treturn;\n";
9834 ret.append("\t}\n");
9837 ret.append("};\n\n");
9839 // print_warnings() : tell the functor if the user wants to print warnings.
9840 ret += "bool print_warnings(){\n";
9841 if(definitions.count("print_warnings") && (
9842 definitions["print_warnings"] == "yes" ||
9843 definitions["print_warnings"] == "Yes" ||
9844 definitions["print_warnings"] == "YES" )) {
9845 ret += "return true;\n";
9847 ret += "return false;\n";
9849 ret.append("};\n\n");
9852 // Done with methods.
9859 string mrg_qpn::generate_operator(int i, string params){
9863 " merge_operator<" +
9864 generate_functor_name()+
9865 "> *op"+int_to_string(i)+" = new merge_operator<"+
9866 generate_functor_name()+
9867 ">("+params+",10000,\"" + get_node_name() + "\");\n"
9871 " merge_operator_oop<" +
9872 generate_functor_name()+
9873 "> *op"+int_to_string(i)+" = new merge_operator_oop<"+
9874 generate_functor_name()+
9875 ">("+params+",10000,\"" + get_node_name() + "\");\n"
9880 /////////////////////////////////////////////////////////
9881 ////// JOIN_EQ_HASH functor
9884 string join_eq_hash_qpn::generate_functor_name(){
9885 return("join_eq_hash_functor_" + normalize_name(this->get_node_name()));
9888 string join_eq_hash_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9890 vector<data_type *> hashkey_dt; // data types in the hash key
9891 vector<data_type *> temporal_dt; // data types in the temporal key
9892 map<string,scalarexp_t *> l_equiv, r_equiv; // field equivalences
9894 col_id_set new_cids, local_cids;
9896 //--------------------------------
9899 string plus_op = "+";
9901 //--------------------------------
9902 // key definition class
9903 string ret = "class " + generate_functor_name() + "_keydef{\n";
9905 // Collect attributes from hash join predicates.
9906 // ASSUME equality predicate.
9907 // Use the upwardly compatible data type
9908 // (infer from '+' operator if possible, else use left type)
9909 for(p=0;p<this->hash_eq.size();++p){
9910 scalarexp_t *lse = hash_eq[p]->pr->get_left_se();
9911 scalarexp_t *rse = hash_eq[p]->pr->get_right_se();
9912 data_type *hdt = new data_type(
9913 lse->get_data_type(), rse->get_data_type(), plus_op );
9914 if(hdt->get_type() == undefined_t){
9915 hashkey_dt.push_back(lse->get_data_type()->duplicate());
9918 hashkey_dt.push_back(hdt);
9920 sprintf(tmpstr,"hashkey_var%d",p);
9921 ret+="\t"+hashkey_dt[p]->make_host_cvar(tmpstr)+";\n";
9923 // find equivalences
9924 // NOTE: this code needs to be synched with the temporality
9925 // checking done at join_eq_hash_qpn::get_fields
9926 if(lse->get_operator_type()==SE_COLREF){
9927 l_equiv[lse->get_colref()->get_field()] = rse;
9929 if(rse->get_operator_type()==SE_COLREF){
9930 r_equiv[rse->get_colref()->get_field()] = lse;
9933 ret += "\tbool touched;\n";
9936 ret += "\t"+generate_functor_name() + "_keydef(){touched=false;};\n";
9938 ret += "\t~"+ generate_functor_name() + "_keydef(){\n";
9939 for(p=0;p<hashkey_dt.size();p++){
9940 if(hashkey_dt[p]->is_buffer_type()){
9941 sprintf(tmpstr,"\t\t%s(&hashkey_var%d);\n",
9942 hashkey_dt[p]->get_hfta_buffer_destroy().c_str(), p );
9947 ret+="\tvoid touch(){touched = true;};\n";
9948 ret+="\tbool is_touched(){return touched;};\n";
9952 //--------------------------------
9953 // temporal equality definition class
9954 ret += "class " + generate_functor_name() + "_tempeqdef{\n";
9956 // Collect attributes from hash join predicates.
9957 // ASSUME equality predicate.
9958 // Use the upwardly compatible date type
9959 // (infer from '+' operator if possible, else use left type)
9960 for(p=0;p<this->temporal_eq.size();++p){
9961 scalarexp_t *lse = temporal_eq[p]->pr->get_left_se();
9962 scalarexp_t *rse = temporal_eq[p]->pr->get_right_se();
9963 data_type *hdt = new data_type(
9964 lse->get_data_type(), rse->get_data_type(), plus_op );
9965 if(hdt->get_type() == undefined_t){
9966 temporal_dt.push_back(hash_eq[p]->pr->get_left_se()->get_data_type()->duplicate());
9969 temporal_dt.push_back(hdt);
9971 sprintf(tmpstr,"tempeq_var%d",p);
9972 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
9973 // find equivalences
9974 if(lse->get_operator_type()==SE_COLREF){
9975 l_equiv[lse->get_colref()->get_field()] = rse;
9977 if(rse->get_operator_type()==SE_COLREF){
9978 r_equiv[rse->get_colref()->get_field()] = lse;
9983 ret += "\t"+generate_functor_name() + "_tempeqdef(){};\n";
9985 ret += "\t~"+ generate_functor_name() + "_tempeqdef(){\n";
9986 for(p=0;p<temporal_dt.size();p++){
9987 if(temporal_dt[p]->is_buffer_type()){
9988 sprintf(tmpstr,"\t\t%s(&tempeq_var%d);\n",
9989 temporal_dt[p]->get_hfta_buffer_destroy().c_str(), p );
9997 //--------------------------------
9998 // temporal eq, hash join functor class
9999 ret += "class " + this->generate_functor_name() + "{\n";
10001 // Find variables referenced in this query node.
10003 col_id_set cid_set;
10004 col_id_set::iterator csi;
10006 for(p=0;p<where.size();++p)
10007 gather_pr_col_ids(where[p]->pr,cid_set,NULL);
10008 for(s=0;s<select_list.size();s++)
10009 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
10011 // Private variables : store the state of the functor.
10012 // 1) variables for unpacked attributes
10013 // 2) offsets of the upacked attributes
10014 // 3) storage of partial functions
10015 // 4) storage of complex literals (i.e., require a constructor)
10017 ret += "private:\n";
10019 // var to save the schema handles
10020 ret += "\tint schema_handle0;\n";
10021 ret += "\tint schema_handle1;\n";
10023 // generate the declaration of all the variables related to
10024 // temp tuples generation
10025 ret += gen_decl_temp_vars();
10026 // tuple metadata offsets
10027 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10029 // unpacked attribute storage, offsets
10030 ret += "//\t\tstorage and offsets of accessed fields.\n";
10031 ret += generate_access_vars(cid_set, schema);
10034 // Variables to store results of partial functions.
10035 // WARNING find_partial_functions modifies the SE
10036 // (it marks the partial function id).
10037 ret += "//\t\tParital function result storage\n";
10038 vector<scalarexp_t *> partial_fcns;
10039 vector<int> fcn_ref_cnt;
10040 vector<bool> is_partial_fcn;
10041 for(s=0;s<select_list.size();s++){
10042 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
10044 for(p=0;p<where.size();p++){
10045 find_partial_fcns_pr(where[p]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
10047 if(partial_fcns.size()>0){
10048 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
10049 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10052 // Complex literals (i.e., they need constructors)
10053 ret += "//\t\tComplex literal storage.\n";
10054 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10055 ret += generate_complex_lit_vars(complex_literals);
10056 // We need the following to handle strings in outer joins.
10057 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10058 ret += "\tstruct vstring EmptyString;\n";
10059 ret += "\tstruct hfta_ipv6_str EmptyIp6;\n";
10061 // Pass-by-handle parameters
10062 ret += "//\t\tPass-by-handle storage.\n";
10063 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10064 ret += generate_pass_by_handle_vars(param_handle_table);
10067 // variables to hold parameters.
10068 ret += "//\tfor query parameters\n";
10069 ret += generate_param_vars(param_tbl);
10072 ret += "\npublic:\n";
10073 //-------------------
10074 // The functor constructor
10075 // pass in the schema handle.
10076 // 1) make assignments to the unpack offset variables
10077 // 2) initialize the complex literals
10079 ret += "//\t\tFunctor constructor.\n";
10080 ret += this->generate_functor_name()+"(int schema_handle0, int schema_handle1){\n";
10082 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
10083 ret += "\t\tthis->schema_handle1 = schema_handle1;\n";
10084 // metadata offsets
10085 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10086 ret += "\ttuple_metadata_offset1 = ftaschema_get_tuple_metadata_offset(schema_handle1);\n";
10089 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10090 ret += gen_access_var_init(cid_set);
10092 // complex literals
10093 ret += "//\t\tInitialize complex literals.\n";
10094 ret += gen_complex_lit_init(complex_literals);
10095 // Initialize EmptyString to the ... empty string
10096 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10097 literal_t mtstr_lit("");
10098 ret += "\t" + mtstr_lit.to_hfta_C_code("&EmptyString")+";\n";
10099 literal_t mip6_lit("0:0:0:0:0:0:0:0",LITERAL_IPV6);
10100 ret += "\t" + mip6_lit.to_hfta_C_code("&EmptyIp6")+";\n";
10102 // Initialize partial function results so they can be safely GC'd
10103 ret += gen_partial_fcn_init(partial_fcns);
10105 // Initialize non-query-parameter parameter handles
10106 ret += gen_pass_by_handle_init(param_handle_table);
10108 // Init temporal attributes referenced in select list
10109 ret += gen_init_temp_vars(schema, select_list, NULL);
10116 //-------------------
10117 // Functor destructor
10118 ret += "//\t\tFunctor destructor.\n";
10119 ret += "~"+this->generate_functor_name()+"(){\n";
10121 // clean up buffer type complex literals
10122 ret += gen_complex_lit_dtr(complex_literals);
10124 // Deregister the pass-by-handle parameters
10125 ret += "/* register and de-register the pass-by-handle parameters */\n";
10126 ret += gen_pass_by_handle_dtr(param_handle_table);
10128 // clean up partial function results.
10129 ret += "/* clean up partial function storage */\n";
10130 ret += gen_partial_fcn_dtr(partial_fcns);
10132 // Destroy the parameters, if any need to be destroyed
10133 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10138 //-------------------
10139 // Parameter manipulation routines
10140 ret += generate_load_param_block(this->generate_functor_name(),
10141 this->param_tbl,param_handle_table);
10142 ret += generate_delete_param_block(this->generate_functor_name(),
10143 this->param_tbl,param_handle_table);
10145 //-------------------
10146 // Register new parameter block
10148 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10149 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10150 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10155 //-------------------
10156 // The create_key method.
10157 // Perform heap allocation.
10158 // ASSUME : the LHS of the preds reference channel 0 attributes
10159 // NOTE : it may fail if a partial function fails.
10161 ret += this->generate_functor_name()+"_keydef *create_key(host_tuple &tup, bool &failed){\n";
10162 // Variables for execution of the function.
10163 ret+="\t"+this->generate_functor_name()+"_keydef *retval = NULL;\n";
10164 ret+="\tgs_int32_t problem = 0;\n";
10166 // Assume unsuccessful completion
10167 ret+= "\tfailed = true;\n";
10169 // Switch the processing based on the channel
10170 ret+="\tif(tup.channel == 0){\n";
10171 ret+="// ------------ processing for channel 0\n";
10172 ret+="\t\thost_tuple &tup0 = tup;\n";
10173 // Gather partial fcns and colids ref'd by this branch
10175 new_cids.clear(); local_cids.clear();
10176 for(p=0;p<hash_eq.size();p++){
10177 collect_partial_fcns(hash_eq[p]->pr->get_left_se(), pfcn_refs);
10178 gather_se_col_ids(hash_eq[p]->pr->get_left_se(),local_cids,NULL);
10181 // Start by cleaning up partial function results
10182 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10183 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10185 // Evaluate the partial functions
10186 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10187 new_cids, NULL, "NULL", needs_xform);
10188 // test passed -- unpack remaining cids.
10189 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10191 // Alloc and load a key object
10192 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10193 for(p=0;p<hash_eq.size();p++){
10194 data_type *hdt = hash_eq[p]->pr->get_left_se()->get_data_type();
10195 if(hdt->is_buffer_type()){
10196 string vname = "tmp_keyvar"+int_to_string(p);
10197 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_left_se(),schema)+";\n";
10198 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10200 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10201 p,generate_se_code(hash_eq[p]->pr->get_left_se(),schema).c_str() );
10205 ret += "\t}else{\n";
10207 ret+="// ------------ processing for channel 1\n";
10208 ret+="\t\thost_tuple &tup1 = tup;\n";
10209 // Gather partial fcns and colids ref'd by this branch
10211 new_cids.clear(); local_cids.clear();
10212 for(p=0;p<hash_eq.size();p++){
10213 collect_partial_fcns(hash_eq[p]->pr->get_right_se(), pfcn_refs);
10214 gather_se_col_ids(hash_eq[p]->pr->get_right_se(),local_cids,NULL);
10217 // Start by cleaning up partial function results
10218 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10219 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10221 // Evaluate the partial functions
10222 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10223 new_cids, NULL, "NULL", needs_xform);
10225 // test passed -- unpack remaining cids.
10226 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10228 // Alloc and load a key object
10229 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10230 for(p=0;p<hash_eq.size();p++){
10231 data_type *hdt = hash_eq[p]->pr->get_right_se()->get_data_type();
10232 if(hdt->is_buffer_type()){
10233 string vname = "tmp_keyvar"+int_to_string(p);
10234 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_right_se(),schema)+";\n";
10235 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10237 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10238 p,generate_se_code(hash_eq[p]->pr->get_right_se(),schema).c_str() );
10244 ret += "\tfailed = false;\n";
10245 ret += "\t return retval;\n";
10249 //-------------------
10250 // The load_ts method.
10251 // load into an allocated buffer.
10252 // ASSUME : the LHS of the preds reference channel 0 attributes
10253 // NOTE : it may fail if a partial function fails.
10254 // NOTE : cann't handle buffer attributes
10256 ret += "bool load_ts_from_tup("+this->generate_functor_name()+"_tempeqdef *ts, host_tuple &tup){\n";
10257 // Variables for execution of the function.
10258 ret+="\tgs_int32_t problem = 0;\n";
10260 // Switch the processing based on the channel
10261 ret+="\tif(tup.channel == 0){\n";
10262 ret+="// ------------ processing for channel 0\n";
10263 ret+="\t\thost_tuple &tup0 = tup;\n";
10265 // Gather partial fcns and colids ref'd by this branch
10267 new_cids.clear(); local_cids.clear();
10268 for(p=0;p<temporal_eq.size();p++){
10269 collect_partial_fcns(temporal_eq[p]->pr->get_left_se(), pfcn_refs);
10270 gather_se_col_ids(temporal_eq[p]->pr->get_left_se(),local_cids,NULL);
10273 // Start by cleaning up partial function results
10274 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10275 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10277 // Evaluate the partial functions
10278 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10279 new_cids, NULL, "false", needs_xform);
10281 // test passed -- unpack remaining cids.
10282 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10284 // load the temporal key object
10285 for(p=0;p<temporal_eq.size();p++){
10286 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10287 p,generate_se_code(temporal_eq[p]->pr->get_left_se(),schema).c_str() );
10291 ret += "\t}else{\n";
10293 ret+="// ------------ processing for channel 1\n";
10294 ret+="\t\thost_tuple &tup1 = tup;\n";
10296 // Gather partial fcns and colids ref'd by this branch
10298 new_cids.clear(); local_cids.clear();
10299 for(p=0;p<temporal_eq.size();p++){
10300 collect_partial_fcns(temporal_eq[p]->pr->get_right_se(), pfcn_refs);
10301 gather_se_col_ids(temporal_eq[p]->pr->get_right_se(),local_cids,NULL);
10304 // Start by cleaning up partial function results
10305 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10306 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10308 // Evaluate the partial functions
10309 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10310 new_cids, NULL, "false", needs_xform);
10312 // test passed -- unpack remaining cids.
10313 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10315 // load the key object
10316 for(p=0;p<temporal_eq.size();p++){
10317 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10318 p,generate_se_code(temporal_eq[p]->pr->get_right_se(),schema).c_str() );
10324 ret += "\t return true;\n";
10328 // ------------------------------
10330 // (i.e make a copy)
10332 ret += "bool load_ts_from_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10333 for(p=0;p<temporal_eq.size();p++){
10334 sprintf(tmpstr,"\tlts->tempeq_var%d = rts->tempeq_var%d;\n",p,p);
10339 // -------------------------------------
10340 // compare_ts_to_ts
10341 // There should be only one variable to compare.
10342 // If there is more, assume an arbitrary lexicographic order.
10344 ret += "int compare_ts_with_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10345 for(p=0;p<temporal_eq.size();p++){
10346 sprintf(tmpstr,"\tif(lts->tempeq_var%d < rts->tempeq_var%d) return(-1);\n",p,p);
10348 sprintf(tmpstr,"\tif(lts->tempeq_var%d > rts->tempeq_var%d) return(1);\n",p,p);
10351 ret += "\treturn(0);\n";
10354 // ------------------------------------------
10356 // apply the prefilter
10358 ret += "bool apply_prefilter(host_tuple &tup){\n";
10360 // Variables for this procedure
10361 ret+="\tgs_int32_t problem = 0;\n";
10362 ret+="\tgs_retval_t retval;\n";
10364 // Switch the processing based on the channel
10365 ret+="\tif(tup.channel == 0){\n";
10366 ret+="// ------------ processing for channel 0\n";
10367 ret+="\t\thost_tuple &tup0 = tup;\n";
10368 // Gather partial fcns and colids ref'd by this branch
10370 new_cids.clear(); local_cids.clear();
10371 for(p=0;p<prefilter[0].size();p++){
10372 collect_partial_fcns_pr((prefilter[0])[p]->pr, pfcn_refs);
10375 // Start by cleaning up partial function results
10376 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10377 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10379 for(p=0;p<(prefilter[0]).size();++p){
10380 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10382 // Find the set of variables accessed in this CNF elem,
10383 // but in no previous element.
10384 col_id_set new_pr_cids;
10385 get_new_pred_cids((prefilter[0])[p]->pr,local_cids,new_pr_cids, NULL);
10386 // Unpack these values.
10387 ret += gen_unpack_cids(schema, new_pr_cids, "false", needs_xform);
10388 // Find partial fcns ref'd in this cnf element
10389 set<int> pr_pfcn_refs;
10390 collect_partial_fcns_pr((prefilter[0])[p]->pr, pr_pfcn_refs);
10391 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10393 ret += "\t\tif( !("+generate_predicate_code((prefilter[0])[p]->pr,schema)+") ) return(false);\n";
10395 ret += "\t}else{\n";
10396 ret+="// ------------ processing for channel 1\n";
10397 ret+="\t\thost_tuple &tup1 = tup;\n";
10398 // Gather partial fcns and colids ref'd by this branch
10400 new_cids.clear(); local_cids.clear();
10401 for(p=0;p<prefilter[1].size();p++){
10402 collect_partial_fcns_pr((prefilter[1])[p]->pr, pfcn_refs);
10405 // Start by cleaning up partial function results
10406 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10407 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10409 for(p=0;p<(prefilter[1]).size();++p){
10410 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10412 // Find the set of variables accessed in this CNF elem,
10413 // but in no previous element.
10414 col_id_set pr_new_cids;
10415 get_new_pred_cids((prefilter[1])[p]->pr,local_cids, pr_new_cids, NULL);
10416 // Unpack these values.
10417 ret += gen_unpack_cids(schema, pr_new_cids, "false", needs_xform);
10418 // Find partial fcns ref'd in this cnf element
10419 set<int> pr_pfcn_refs;
10420 collect_partial_fcns_pr((prefilter[1])[p]->pr, pr_pfcn_refs);
10421 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10423 ret += "\t\tif( !("+generate_predicate_code((prefilter[1])[p]->pr,schema)+ ") ) return(false);\n";
10427 ret+="\treturn true;\n";
10431 // -------------------------------------
10432 // create_output_tuple
10433 // If the postfilter on the pair of tuples passes,
10434 // create an output tuple from the combined information.
10435 // (Plus, outer join processing)
10437 ret += "host_tuple create_output_tuple(const host_tuple &tup0, const host_tuple &tup1, bool &failed){\n";
10439 ret += "\thost_tuple tup;\n";
10440 ret += "\tfailed = true;\n";
10441 ret += "\tgs_retval_t retval = 0;\n";
10442 ret += "\tgs_int32_t problem = 0;\n";
10444 // Start by cleaning up partial function results
10445 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10447 new_cids.clear(); local_cids.clear();
10448 for(p=0;p<postfilter.size();p++){
10449 collect_partial_fcns_pr(postfilter[p]->pr, pfcn_refs);
10451 for(s=0;s<select_list.size();s++){
10452 collect_partial_fcns(select_list[s]->se, pfcn_refs);
10454 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10457 ret+="\tif(tup0.data && tup1.data){\n";
10458 // Evaluate the postfilter
10459 new_cids.clear(); local_cids.clear();
10460 for(p=0;p<postfilter.size();p++){
10461 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10463 // Find the set of variables accessed in this CNF elem,
10464 // but in no previous element.
10465 col_id_set pr_new_cids;
10466 get_new_pred_cids(postfilter[p]->pr,local_cids, pr_new_cids, NULL);
10467 // Unpack these values.
10468 ret += gen_unpack_cids(schema, pr_new_cids, "tup", needs_xform);
10469 // Find partial fcns ref'd in this cnf element
10470 set<int> pr_pfcn_refs;
10471 collect_partial_fcns_pr(postfilter[p]->pr, pr_pfcn_refs);
10472 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"tup");
10474 ret += "\t\tif( !("+generate_predicate_code(postfilter[p]->pr,schema)+ ") ) return(tup);\n";
10478 // postfilter passed, evaluate partial functions for select list
10481 col_id_set se_cids;
10482 for(s=0;s<select_list.size();s++){
10483 collect_partial_fcns(select_list[s]->se, sl_pfcns);
10486 if(sl_pfcns.size() > 0)
10487 ret += "//\t\tUnpack remaining partial fcns.\n";
10488 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
10489 local_cids, NULL, "tup", needs_xform);
10491 // Unpack remaining fields
10492 ret += "//\t\tunpack any remaining fields from the input tuples.\n";
10493 for(s=0;s<select_list.size();s++)
10494 get_new_se_cids(select_list[s]->se, local_cids,se_cids,NULL);
10495 ret += gen_unpack_cids(schema, se_cids,"tup", needs_xform);
10498 // Deal with outer join stuff
10499 col_id_set l_cids, r_cids;
10500 col_id_set::iterator ocsi;
10501 for(ocsi=local_cids.begin();ocsi!=local_cids.end();++ocsi){
10502 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10503 else r_cids.insert((*ocsi));
10505 for(ocsi=se_cids.begin();ocsi!=se_cids.end();++ocsi){
10506 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10507 else r_cids.insert((*ocsi));
10510 ret += "\t}else if(tup0.data){\n";
10511 string unpack_null = ""; col_id_set extra_cids;
10512 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
10513 string field = (*ocsi).field;
10514 if(r_equiv.count(field)){
10515 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
10516 get_new_se_cids(r_equiv[field],l_cids,new_cids,NULL);
10518 int schref = (*ocsi).schema_ref;
10519 data_type dt(schema->get_type_name(schref,field));
10520 literal_t empty_lit(dt.type_indicator());
10521 if(empty_lit.is_cpx_lit()){
10522 // sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
10523 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10524 // NB : works for string type only
10525 // NNB: installed fix for ipv6, more of this should be pushed
10526 // into the literal_t code.
10527 unpack_null+="\tunpack_var_"+field+"_1= "+empty_lit.hfta_empty_literal_name()+";\n";
10529 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
10533 ret += gen_unpack_cids(schema, l_cids, "tup", needs_xform);
10534 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
10535 ret += unpack_null;
10536 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
10539 unpack_null = ""; extra_cids.clear();
10540 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
10541 string field = (*ocsi).field;
10542 if(l_equiv.count(field)){
10543 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
10544 get_new_se_cids(l_equiv[field],r_cids,new_cids,NULL);
10546 int schref = (*ocsi).schema_ref;
10547 data_type dt(schema->get_type_name(schref,field));
10548 literal_t empty_lit(dt.type_indicator());
10549 if(empty_lit.is_cpx_lit()){
10550 // sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
10551 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10552 // NB : works for string type only
10553 // NNB: installed fix for ipv6, more of this should be pushed
10554 // into the literal_t code.
10555 unpack_null+="\tunpack_var_"+field+"_0= "+empty_lit.hfta_empty_literal_name()+";\n";
10557 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
10561 ret += gen_unpack_cids(schema, r_cids, "tup", needs_xform);
10562 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
10563 ret += unpack_null;
10564 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
10569 // Unpack any BUFFER type selections into temporaries
10570 // so that I can compute their size and not have
10571 // to recompute their value during tuple packing.
10572 // I can use regular assignment here because
10573 // these temporaries are non-persistent.
10575 ret += "//\t\tCompute the size of the tuple.\n";
10576 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
10578 // Unpack all buffer type selections, to be able to compute their size
10579 ret += gen_buffer_selvars(schema, select_list);
10581 // The size of the tuple is the size of the tuple struct plus the
10582 // size of the buffers to be copied in.
10584 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
10585 ret += gen_buffer_selvars_size(select_list,schema);
10588 // Allocate tuple data block.
10589 ret += "//\t\tCreate the tuple block.\n";
10590 ret += "\ttup.data = malloc(tup.tuple_size);\n";
10591 ret += "\ttup.heap_resident = true;\n";
10592 // ret += "\ttup.channel = 0;\n";
10594 // Mark tuple as regular
10595 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
10598 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
10599 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
10602 // (Here, offsets are hard-wired. is this a problem?)
10604 ret += "//\t\tPack the fields into the tuple.\n";
10605 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
10607 // Delete string temporaries
10608 ret += gen_buffer_selvars_dtr(select_list);
10610 ret += "\tfailed = false;\n";
10611 ret += "\treturn tup;\n";
10616 //-----------------------------
10617 // Method for checking whether tuple is temporal
10619 ret += "bool temp_status_received(host_tuple &tup){\n";
10621 // Switch the processing based on the channel
10622 ret+="\tif(tup.channel == 0){\n";
10623 ret+="\t\thost_tuple &tup0 = tup;\n";
10624 ret += gen_temp_tuple_check(this->node_name, 0);
10625 ret += "\t}else{\n";
10626 ret+="\t\thost_tuple &tup1 = tup;\n";
10627 ret += gen_temp_tuple_check(this->node_name, 1);
10629 ret += "\treturn temp_tuple_received;\n};\n\n";
10632 //-------------------------------------------------------------------
10633 // Temporal update functions
10636 // create a temp status tuple
10637 ret += "int create_temp_status_tuple(const host_tuple &tup0, const host_tuple &tup1, host_tuple& result) {\n\n";
10639 ret += "\tgs_retval_t retval = 0;\n";
10640 ret += "\tgs_int32_t problem = 0;\n";
10642 ret += "\tif(tup0.data){\n";
10644 // Unpack all the temporal attributes references in select list
10645 col_id_set found_cids;
10647 for(s=0;s<select_list.size();s++){
10648 if (select_list[s]->se->get_data_type()->is_temporal()) {
10649 // Find the set of attributes accessed in this SE
10650 col_id_set new_cids;
10651 get_new_se_cids(select_list[s]->se,found_cids, new_cids, NULL);
10655 // Deal with outer join stuff
10656 l_cids.clear(), r_cids.clear();
10657 for(ocsi=found_cids.begin();ocsi!=found_cids.end();++ocsi){
10658 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10659 else r_cids.insert((*ocsi));
10662 extra_cids.clear();
10663 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
10664 string field = (*ocsi).field;
10665 if(r_equiv.count(field)){
10666 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
10667 col_id_set addnl_cids;
10668 get_new_se_cids(r_equiv[field],l_cids,addnl_cids,NULL);
10670 int schref = (*ocsi).schema_ref;
10671 data_type dt(schema->get_type_name(schref,field));
10672 literal_t empty_lit(dt.type_indicator());
10673 if(empty_lit.is_cpx_lit()){
10674 sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
10675 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10677 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
10681 ret += gen_unpack_cids(schema, l_cids, "1", needs_xform);
10682 ret += gen_unpack_cids(schema, extra_cids, "1", needs_xform);
10683 ret += unpack_null;
10685 ret+="\t}else if (tup1.data) {\n";
10686 unpack_null = ""; extra_cids.clear();
10687 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
10688 string field = (*ocsi).field;
10689 if(l_equiv.count(field)){
10690 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
10691 col_id_set addnl_cids;
10692 get_new_se_cids(l_equiv[field],r_cids,addnl_cids,NULL);
10694 int schref = (*ocsi).schema_ref;
10695 data_type dt(schema->get_type_name(schref,field));
10696 literal_t empty_lit(dt.type_indicator());
10697 if(empty_lit.is_cpx_lit()){
10698 sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
10699 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
10701 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
10705 ret += gen_unpack_cids(schema, r_cids, "1", needs_xform);
10706 ret += gen_unpack_cids(schema, extra_cids, "1", needs_xform);
10707 ret += unpack_null;
10710 ret += gen_init_temp_status_tuple(this->get_node_name());
10713 ret += "//\t\tPack the fields into the tuple.\n";
10714 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
10717 ret += "\treturn 0;\n";
10723 //----------------------------------------------------------
10724 // The hash function
10726 ret += "struct "+generate_functor_name()+"_hash_func{\n";
10727 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
10728 "_keydef *key) const{\n";
10729 ret += "\t\treturn( (";
10730 if(hashkey_dt.size() > 0){
10731 for(p=0;p<hashkey_dt.size();p++){
10732 if(p>0) ret += "^";
10733 if(hashkey_dt[p]->use_hashfunc()){
10734 // sprintf(tmpstr,"%s(&(key->hashkey_var%d))",hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10735 if(hashkey_dt[p]->is_buffer_type())
10736 sprintf(tmpstr,"(%s*%s(&(key->hashkey_var%d)))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10738 sprintf(tmpstr,"(%s*%s(key->hashkey_var%d))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
10740 sprintf(tmpstr,"(%s*key->hashkey_var%d)",hash_nums[p%NRANDS].c_str(),p);
10747 ret += ") >> 32);\n";
10751 //----------------------------------------------------------
10752 // The comparison function
10754 ret += "struct "+generate_functor_name()+"_equal_func{\n";
10755 ret += "\tbool operator()(const "+generate_functor_name()+"_keydef *key1, "+
10756 generate_functor_name()+"_keydef *key2) const{\n";
10757 ret += "\t\treturn( (";
10758 if(hashkey_dt.size() > 0){
10759 for(p=0;p<hashkey_dt.size();p++){
10760 if(p>0) ret += ") && (";
10761 if(hashkey_dt[p]->complex_comparison(hashkey_dt[p])){
10762 if(hashkey_dt[p]->is_buffer_type())
10763 sprintf(tmpstr,"(%s(&(key1->hashkey_var%d), &(key2->hashkey_var%d))==0)",
10764 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
10766 sprintf(tmpstr,"(%s((key1->hashkey_var%d), (key2->hashkey_var%d))==0)",
10767 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
10769 sprintf(tmpstr,"key1->hashkey_var%d == key2->hashkey_var%d",p,p);
10786 string join_eq_hash_qpn::generate_operator(int i, string params){
10789 " join_eq_hash_operator<" +
10790 generate_functor_name()+ ","+
10791 generate_functor_name() + "_tempeqdef,"+
10792 generate_functor_name() + "_keydef,"+
10793 generate_functor_name()+"_hash_func,"+
10794 generate_functor_name()+"_equal_func"
10795 "> *op"+int_to_string(i)+" = new join_eq_hash_operator<"+
10796 generate_functor_name()+","+
10797 generate_functor_name() + "_tempeqdef,"+
10798 generate_functor_name() + "_keydef,"+
10799 generate_functor_name()+"_hash_func,"+
10800 generate_functor_name()+"_equal_func"
10802 int_to_string(from[0]->get_property()+2*from[1]->get_property())+", \"" + get_node_name() +
10809 ////////////////////////////////////////////////////////////////
10810 //// SGAHCWCB functor
10814 string sgahcwcb_qpn::generate_functor_name(){
10815 return("sgahcwcb_functor_" + normalize_name(this->get_node_name()));
10819 string sgahcwcb_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10823 // Initialize generate utility globals
10824 segen_gb_tbl = &(gb_tbl);
10827 //--------------------------------
10828 // group definition class
10829 string ret = "class " + generate_functor_name() + "_groupdef{\n";
10830 ret += "public:\n";
10831 ret += "\tbool valid;\n";
10832 for(g=0;g<this->gb_tbl.size();g++){
10833 sprintf(tmpstr,"gb_var%d",g);
10834 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
10837 ret += "\t"+generate_functor_name() + "_groupdef(){valid=true;};\n";
10838 ret += "\t"+generate_functor_name() + "_groupdef("+
10839 this->generate_functor_name() + "_groupdef *gd){\n";
10840 for(g=0;g<gb_tbl.size();g++){
10841 data_type *gdt = gb_tbl.get_data_type(g);
10842 if(gdt->is_buffer_type()){
10843 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
10844 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
10847 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
10851 ret += "\tvalid=true;\n";
10854 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
10855 for(g=0;g<gb_tbl.size();g++){
10856 data_type *gdt = gb_tbl.get_data_type(g);
10857 if(gdt->is_buffer_type()){
10858 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
10859 gdt->get_hfta_buffer_destroy().c_str(), g );
10866 //--------------------------------
10867 // aggr definition class
10868 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
10869 ret += "public:\n";
10870 for(a=0;a<aggr_tbl.size();a++){
10871 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10872 sprintf(tmpstr,"aggr_var%d",a);
10873 if(aggr_tbl.is_builtin(a))
10874 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
10876 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
10879 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
10881 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
10882 for(a=0;a<aggr_tbl.size();a++){
10883 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10884 if(aggr_tbl.is_builtin(a)){
10885 data_type *adt = aggr_tbl.get_data_type(a);
10886 if(adt->is_buffer_type()){
10887 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
10888 adt->get_hfta_buffer_destroy().c_str(), a );
10892 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
10893 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
10894 ret+="(aggr_var"+int_to_string(a)+"));\n";
10900 //--------------------------------
10901 // superaggr definition class
10902 ret += "class " + this->generate_functor_name() + "_statedef{\n";
10903 ret += "public:\n";
10904 for(a=0;a<aggr_tbl.size();a++){
10905 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10906 if(ate->is_superaggr()){
10907 sprintf(tmpstr,"aggr_var%d",a);
10908 if(aggr_tbl.is_builtin(a))
10909 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
10911 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
10914 set<string>::iterator ssi;
10915 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
10916 string state_nm = (*ssi);
10917 int state_id = Ext_fcns->lookup_state(state_nm);
10918 data_type *dt = Ext_fcns->get_storage_dt(state_id);
10919 string state_var = "state_var_"+state_nm;
10920 ret += "\t"+dt->make_host_cvar(state_var)+";\n";
10923 ret += "\t"+this->generate_functor_name() + "_statedef(){};\n";
10925 ret += "\t~"+this->generate_functor_name() + "_statedef(){\n";
10926 for(a=0;a<aggr_tbl.size();a++){
10927 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
10928 if(ate->is_superaggr()){
10929 if(aggr_tbl.is_builtin(a)){
10930 data_type *adt = aggr_tbl.get_data_type(a);
10931 if(adt->is_buffer_type()){
10932 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
10933 adt->get_hfta_buffer_destroy().c_str(), a );
10937 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
10938 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
10939 ret+="(aggr_var"+int_to_string(a)+"));\n";
10943 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
10944 string state_nm = (*ssi);
10945 int state_id = Ext_fcns->lookup_state(state_nm);
10946 string state_var = "state_var_"+state_nm;
10947 ret += "\t_sfun_state_destroy_"+state_nm+"(&"+state_var+");\n";
10954 //--------------------------------
10955 // gb functor class
10956 ret += "class " + this->generate_functor_name() + "{\n";
10958 // Find variables referenced in this query node.
10960 col_id_set cid_set;
10961 col_id_set::iterator csi;
10963 for(w=0;w<where.size();++w)
10964 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
10965 for(w=0;w<having.size();++w)
10966 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
10967 for(w=0;w<cleanby.size();++w)
10968 gather_pr_col_ids(cleanby[w]->pr,cid_set,segen_gb_tbl);
10969 for(w=0;w<cleanwhen.size();++w)
10970 gather_pr_col_ids(cleanwhen[w]->pr,cid_set,segen_gb_tbl);
10971 for(g=0;g<gb_tbl.size();g++)
10972 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
10974 for(s=0;s<select_list.size();s++){
10975 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
10979 // Private variables : store the state of the functor.
10980 // 1) variables for unpacked attributes
10981 // 2) offsets of the upacked attributes
10982 // 3) storage of partial functions
10983 // 4) storage of complex literals (i.e., require a constructor)
10985 ret += "private:\n";
10987 // var to save the schema handle
10988 ret += "\tint schema_handle0;\n";
10990 // generate the declaration of all the variables related to
10991 // temp tuples generation
10992 ret += gen_decl_temp_vars();
10994 // unpacked attribute storage, offsets
10995 ret += "//\t\tstorage and offsets of accessed fields.\n";
10996 ret += generate_access_vars(cid_set, schema);
10997 // tuple metadata offset
10998 ret += "\ttuple_metadata_offset0;\n";
11000 // Variables to store results of partial functions.
11001 // WARNING find_partial_functions modifies the SE
11002 // (it marks the partial function id).
11003 ret += "//\t\tParital function result storage\n";
11004 vector<scalarexp_t *> partial_fcns;
11005 vector<int> fcn_ref_cnt;
11006 vector<bool> is_partial_fcn;
11007 for(s=0;s<select_list.size();s++){
11008 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
11010 for(w=0;w<where.size();w++){
11011 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11013 for(w=0;w<having.size();w++){
11014 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11016 for(w=0;w<cleanby.size();w++){
11017 find_partial_fcns_pr(cleanby[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11019 for(w=0;w<cleanwhen.size();w++){
11020 find_partial_fcns_pr(cleanwhen[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11022 for(g=0;g<gb_tbl.size();g++){
11023 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
11025 for(a=0;a<aggr_tbl.size();a++){
11026 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
11028 if(partial_fcns.size()>0){
11029 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
11030 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
11033 // Complex literals (i.e., they need constructors)
11034 ret += "//\t\tComplex literal storage.\n";
11035 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
11036 ret += generate_complex_lit_vars(complex_literals);
11038 // Pass-by-handle parameters
11039 ret += "//\t\tPass-by-handle storage.\n";
11040 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
11041 ret += generate_pass_by_handle_vars(param_handle_table);
11043 // Create cached temporaries for UDAF return values.
11044 ret += "//\t\tTemporaries for UDAF return values.\n";
11045 for(a=0;a<aggr_tbl.size();a++){
11046 if(! aggr_tbl.is_builtin(a)){
11047 int afcn_id = aggr_tbl.get_fcn_id(a);
11048 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11049 sprintf(tmpstr,"udaf_ret_%d", a);
11050 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11056 // variables to hold parameters.
11057 ret += "//\tfor query parameters\n";
11058 ret += generate_param_vars(param_tbl);
11060 // Is there a temporal flush? If so create flush temporaries,
11061 // create flush indicator.
11062 bool uses_temporal_flush = false;
11063 for(g=0;g<gb_tbl.size();g++){
11064 data_type *gdt = gb_tbl.get_data_type(g);
11065 if(gdt->is_temporal())
11066 uses_temporal_flush = true;
11069 if(uses_temporal_flush){
11070 ret += "//\t\tFor temporal flush\n";
11071 for(g=0;g<gb_tbl.size();g++){
11072 data_type *gdt = gb_tbl.get_data_type(g);
11073 if(gdt->is_temporal()){
11074 sprintf(tmpstr,"last_gb%d",g);
11075 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11076 sprintf(tmpstr,"last_flushed_gb%d",g);
11077 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11080 ret += "\tbool needs_temporal_flush;\n";
11083 // The publicly exposed functions
11085 ret += "\npublic:\n";
11088 //-------------------
11089 // The functor constructor
11090 // pass in the schema handle.
11091 // 1) make assignments to the unpack offset variables
11092 // 2) initialize the complex literals
11094 ret += "//\t\tFunctor constructor.\n";
11095 ret += this->generate_functor_name()+"(int schema_handle0){\n";
11097 // save the schema handle
11098 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
11099 // tuple metadata offset
11100 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
11103 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
11104 ret += gen_access_var_init(cid_set);
11106 // aggregate return vals : refd in both final_sample
11107 // and create_output_tuple
11108 // Create cached temporaries for UDAF return values.
11109 for(a=0;a<aggr_tbl.size();a++){
11110 if(! aggr_tbl.is_builtin(a)){
11111 int afcn_id = aggr_tbl.get_fcn_id(a);
11112 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11113 sprintf(tmpstr,"udaf_ret_%d", a);
11114 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11118 // complex literals
11119 ret += "//\t\tInitialize complex literals.\n";
11120 ret += gen_complex_lit_init(complex_literals);
11122 // Initialize partial function results so they can be safely GC'd
11123 ret += gen_partial_fcn_init(partial_fcns);
11125 // Initialize non-query-parameter parameter handles
11126 ret += gen_pass_by_handle_init(param_handle_table);
11128 // temporal flush variables
11129 // ASSUME that structured values won't be temporal.
11130 if(uses_temporal_flush){
11131 ret += "//\t\tInitialize temporal flush variables.\n";
11132 for(g=0;g<gb_tbl.size();g++){
11133 data_type *gdt = gb_tbl.get_data_type(g);
11134 if(gdt->is_temporal()){
11135 literal_t gl(gdt->type_indicator());
11136 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
11137 ret.append(tmpstr);
11140 ret += "\tneeds_temporal_flush = false;\n";
11143 // Init temporal attributes referenced in select list
11144 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
11149 //-------------------
11150 // Functor destructor
11151 ret += "//\t\tFunctor destructor.\n";
11152 ret += "~"+this->generate_functor_name()+"(){\n";
11154 // clean up buffer type complex literals
11155 ret += gen_complex_lit_dtr(complex_literals);
11157 // Deregister the pass-by-handle parameters
11158 ret += "/* register and de-register the pass-by-handle parameters */\n";
11159 ret += gen_pass_by_handle_dtr(param_handle_table);
11161 // clean up partial function results.
11162 ret += "/* clean up partial function storage */\n";
11163 ret += gen_partial_fcn_dtr(partial_fcns);
11165 // Destroy the parameters, if any need to be destroyed
11166 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11171 //-------------------
11172 // Parameter manipulation routines
11173 ret += generate_load_param_block(this->generate_functor_name(),
11174 this->param_tbl,param_handle_table);
11175 ret += generate_delete_param_block(this->generate_functor_name(),
11176 this->param_tbl,param_handle_table);
11178 //-------------------
11179 // Register new parameter block
11181 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
11182 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11183 ret += "\treturn this->load_params_"+this->generate_functor_name()+
11187 //-------------------
11188 // the create_group method.
11189 // This method creates a group in a buffer passed in
11190 // (to allow for creation on the stack).
11191 // There are also a couple of side effects:
11192 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11193 // 2) determine if a temporal flush is required.
11195 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
11196 // Variables for execution of the function.
11197 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11199 if(partial_fcns.size()>0){ // partial fcn access failure
11200 ret += "\tgs_retval_t retval = 0;\n";
11204 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
11205 "_groupdef *) buffer;\n";
11207 // Start by cleaning up partial function results
11208 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11210 set<int> gb_pfcns; // partial fcns in gbdefs, aggr se's
11211 for(g=0;g<gb_tbl.size();g++){
11212 collect_partial_fcns(gb_tbl.get_def(g), gb_pfcns);
11214 ret += gen_partial_fcn_dtr(partial_fcns,gb_pfcns);
11215 // ret += gen_partial_fcn_dtr(partial_fcns);
11218 ret += gen_temp_tuple_check(this->node_name, 0);
11219 col_id_set found_cids; // colrefs unpacked thus far.
11220 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
11224 // Save temporal group-by variables
11227 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
11229 for(g=0;g<gb_tbl.size();g++){
11231 data_type *gdt = gb_tbl.get_data_type(g);
11233 if(gdt->is_temporal()){
11234 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11235 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11236 ret.append(tmpstr);
11243 // Compare the temporal GB vars with the stored ones,
11244 // set flush indicator and update stored GB vars if there is any change.
11246 if(uses_temporal_flush){
11247 ret+= "\tif( !( (";
11248 bool first_one = true;
11249 for(g=0;g<gb_tbl.size();g++){
11250 data_type *gdt = gb_tbl.get_data_type(g);
11252 if(gdt->is_temporal()){
11253 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
11254 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
11255 if(first_one){first_one = false;} else {ret += ") && (";}
11256 ret += generate_equality_test(lhs_op, rhs_op, gdt);
11260 for(g=0;g<gb_tbl.size();g++){
11261 data_type *gdt = gb_tbl.get_data_type(g);
11262 if(gdt->is_temporal()){
11263 if(gdt->is_buffer_type()){
11264 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
11266 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
11268 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
11274 if(uses_temporal_flush){
11275 for(g=0;g<gb_tbl.size();g++){
11276 data_type *gdt = gb_tbl.get_data_type(g);
11277 if(gdt->is_temporal()){
11278 ret+="if(last_flushed_gb"+int_to_string(g)+">0)\n";
11284 ret += "\t\tneeds_temporal_flush=true;\n";
11285 ret += "\t\t}else{\n"
11286 "\t\t\tneeds_temporal_flush=false;\n"
11291 // For temporal status tuple we don't need to do anything else
11292 ret += "\tif (temp_tuple_received) return NULL;\n\n";
11295 // The partial functions ref'd in the group-by var
11296 // definitions must be evaluated. If one returns false,
11297 // then implicitly the predicate is false.
11298 set<int>::iterator pfsi;
11300 if(gb_pfcns.size() > 0)
11301 ret += "//\t\tUnpack partial fcns.\n";
11302 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, gb_pfcns,
11303 found_cids, segen_gb_tbl, "NULL", needs_xform);
11305 // Unpack the group-by variables
11307 for(g=0;g<gb_tbl.size();g++){
11308 // Find the new fields ref'd by this GBvar def.
11309 col_id_set new_cids;
11310 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
11311 // Unpack these values.
11312 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
11314 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11315 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11317 // There seems to be no difference between the two
11318 // branches of the IF statement.
11319 data_type *gdt = gb_tbl.get_data_type(g);
11320 if(gdt->is_buffer_type()){
11321 // Create temporary copy.
11322 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11323 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11325 scalarexp_t *gse = gb_tbl.get_def(g);
11326 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11327 g,generate_se_code(gse,schema).c_str());
11330 ret.append(tmpstr);
11335 ret+= "\treturn gbval;\n";
11340 //-------------------
11341 // the create_group method.
11342 // This method creates a group in a buffer passed in
11343 // (to allow for creation on the stack).
11344 // There are also a couple of side effects:
11345 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11346 // 2) determine if a temporal flush is required.
11348 ret += "bool evaluate_predicate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, int cd){\n";
11349 // Variables for execution of the function.
11350 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11352 if(partial_fcns.size()>0){ // partial fcn access failure
11353 ret += "\tgs_retval_t retval = 0;\n";
11357 // Start by cleaning up partial function results
11358 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11359 set<int> w_pfcns; // partial fcns in where clause
11360 for(w=0;w<where.size();++w)
11361 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
11363 set<int> ag_pfcns; // partial fcns in gbdefs, aggr se's
11364 for(a=0;a<aggr_tbl.size();a++){
11365 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_pfcns);
11367 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
11368 ret += gen_partial_fcn_dtr(partial_fcns,ag_pfcns);
11370 ret+="//\t\tEvaluate clauses which don't reference stateful fcns first \n";
11371 for(w=0;w<where.size();++w){
11372 if(! pred_refs_sfun(where[w]->pr)){
11373 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11375 // Find the set of variables accessed in this CNF elem,
11376 // but in no previous element.
11377 col_id_set new_cids;
11378 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11380 // Unpack these values.
11381 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11382 // Find partial fcns ref'd in this cnf element
11383 set<int> pfcn_refs;
11384 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11385 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11387 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11388 +") ) return(false);\n";
11393 // The partial functions ref'd in the and aggregate
11394 // definitions must also be evaluated. If one returns false,
11395 // then implicitly the predicate is false.
11396 // ASSUME that aggregates cannot reference stateful fcns.
11398 if(ag_pfcns.size() > 0)
11399 ret += "//\t\tUnpack remaining partial fcns.\n";
11400 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_pfcns,
11401 found_cids, segen_gb_tbl, "false", needs_xform);
11403 ret+="//\t\tEvaluate all remaining where clauses.\n";
11404 ret+="\tbool retval = true;\n";
11405 for(w=0;w<where.size();++w){
11406 if( pred_refs_sfun(where[w]->pr)){
11407 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11409 // Find the set of variables accessed in this CNF elem,
11410 // but in no previous element.
11411 col_id_set new_cids;
11412 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11414 // Unpack these values.
11415 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11416 // Find partial fcns ref'd in this cnf element
11417 set<int> pfcn_refs;
11418 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11419 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11421 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11422 +") ) retval = false;\n";
11426 ret+="// Unpack all remaining attributes\n";
11427 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
11429 ret += "\n\treturn retval;\n";
11432 //--------------------------------------------------------
11433 // Create and initialize an aggregate object
11435 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, gs_sp_t a,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11436 // Variables for execution of the function.
11437 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11440 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+ "_aggrdef *)a;\n";
11442 for(a=0;a<aggr_tbl.size();a++){
11443 if(aggr_tbl.is_builtin(a)){
11444 // Create temporaries for buffer return values
11445 data_type *adt = aggr_tbl.get_data_type(a);
11446 if(adt->is_buffer_type()){
11447 sprintf(tmpstr,"aggr_tmp_%d", a);
11448 ret+=adt->make_host_cvar(tmpstr)+";\n";
11453 for(a=0;a<aggr_tbl.size();a++){
11454 sprintf(tmpstr,"aggval->aggr_var%d",a);
11455 string assignto_var = tmpstr;
11456 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11459 ret += "\treturn aggval;\n";
11463 //--------------------------------------------------------
11464 // initialize an aggregate object inplace
11466 ret += "void create_aggregate(host_tuple &tup0, "+this->generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11467 // Variables for execution of the function.
11468 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11472 for(a=0;a<aggr_tbl.size();a++){
11473 if(aggr_tbl.is_builtin(a)){
11474 // Create temporaries for buffer return values
11475 data_type *adt = aggr_tbl.get_data_type(a);
11476 if(adt->is_buffer_type()){
11477 sprintf(tmpstr,"aggr_tmp_%d", a);
11478 ret+=adt->make_host_cvar(tmpstr)+";\n";
11483 for(a=0;a<aggr_tbl.size();a++){
11484 sprintf(tmpstr,"aggval->aggr_var%d",a);
11485 string assignto_var = tmpstr;
11486 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11492 //--------------------------------------------------------
11493 // Create and clean-initialize an state object
11495 ret += "void initialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval){\n";
11496 // Variables for execution of the function.
11497 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11500 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
11502 for(a=0;a<aggr_tbl.size();a++){
11503 if( aggr_tbl.is_superaggr(a)){
11504 if(aggr_tbl.is_builtin(a)){
11505 // Create temporaries for buffer return values
11506 data_type *adt = aggr_tbl.get_data_type(a);
11507 if(adt->is_buffer_type()){
11508 sprintf(tmpstr,"aggr_tmp_%d", a);
11509 ret+=adt->make_host_cvar(tmpstr)+";\n";
11515 for(a=0;a<aggr_tbl.size();a++){
11516 if( aggr_tbl.is_superaggr(a)){
11517 sprintf(tmpstr,"stval->aggr_var%d",a);
11518 string assignto_var = tmpstr;
11519 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11523 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11524 string state_nm = (*ssi);
11525 ret += "_sfun_state_clean_init_"+state_nm+"(&(stval->state_var_"+state_nm+"));\n";
11531 //--------------------------------------------------------
11532 // Create and dirty-initialize an state object
11534 ret += "void reinitialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, "+generate_functor_name()+"_statedef *old_stval, int cd){\n";
11535 // Variables for execution of the function.
11536 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11539 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
11541 for(a=0;a<aggr_tbl.size();a++){
11542 if( aggr_tbl.is_superaggr(a)){
11543 if(aggr_tbl.is_builtin(a)){
11544 // Create temporaries for buffer return values
11545 data_type *adt = aggr_tbl.get_data_type(a);
11546 if(adt->is_buffer_type()){
11547 sprintf(tmpstr,"aggr_tmp_%d", a);
11548 ret+=adt->make_host_cvar(tmpstr)+";\n";
11554 // initialize superaggregates
11555 for(a=0;a<aggr_tbl.size();a++){
11556 if( aggr_tbl.is_superaggr(a)){
11557 sprintf(tmpstr,"stval->aggr_var%d",a);
11558 string assignto_var = tmpstr;
11559 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11563 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11564 string state_nm = (*ssi);
11565 ret += "_sfun_state_dirty_init_"+state_nm+"(&(stval->state_var_"+state_nm+"),&(old_stval->state_var_"+state_nm+"), cd );\n";
11570 //--------------------------------------------------------
11571 // Finalize_state : call the finalize fcn on all states
11574 ret += "void finalize_state( "+generate_functor_name()+"_statedef *stval, int cd){\n";
11576 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
11577 string state_nm = (*ssi);
11578 ret += "_sfun_state_final_init_"+state_nm+"(&(stval->state_var_"+state_nm+"), cd);\n";
11586 //--------------------------------------------------------
11587 // update (plus) a superaggregate object
11589 ret += "void update_plus_superaggr(host_tuple &tup0, " +
11590 generate_functor_name()+"_groupdef *gbval, "+
11591 generate_functor_name()+"_statedef *stval){\n";
11592 // Variables for execution of the function.
11593 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11595 // use of temporaries depends on the aggregate,
11596 // generate them in generate_aggr_update
11599 for(a=0;a<aggr_tbl.size();a++){
11600 if(aggr_tbl.is_superaggr(a)){
11601 sprintf(tmpstr,"stval->aggr_var%d",a);
11602 string varname = tmpstr;
11603 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
11607 ret += "\treturn;\n";
11612 //--------------------------------------------------------
11613 // update (minus) a superaggregate object
11615 ret += "void update_minus_superaggr( "+
11616 generate_functor_name()+"_groupdef *gbval, "+
11617 generate_functor_name()+"_aggrdef *aggval,"+
11618 generate_functor_name()+"_statedef *stval"+
11620 // Variables for execution of the function.
11621 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11623 // use of temporaries depends on the aggregate,
11624 // generate them in generate_aggr_update
11627 for(a=0;a<aggr_tbl.size();a++){
11628 if(aggr_tbl.is_superaggr(a)){
11629 sprintf(tmpstr,"stval->aggr_var%d",a);
11630 string super_varname = tmpstr;
11631 sprintf(tmpstr,"aggval->aggr_var%d",a);
11632 string sub_varname = tmpstr;
11633 ret.append(generate_superaggr_minus(sub_varname, super_varname,&aggr_tbl,a, schema));
11637 ret += "\treturn;\n";
11641 //--------------------------------------------------------
11642 // update an aggregate object
11644 ret += "void update_aggregate(host_tuple &tup0, "
11645 +generate_functor_name()+"_groupdef *gbval, "+
11646 generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11647 // Variables for execution of the function.
11648 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11650 // use of temporaries depends on the aggregate,
11651 // generate them in generate_aggr_update
11654 for(a=0;a<aggr_tbl.size();a++){
11655 sprintf(tmpstr,"aggval->aggr_var%d",a);
11656 string varname = tmpstr;
11657 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
11660 ret += "\treturn;\n";
11663 //---------------------------------------------------
11666 ret += "\tbool flush_needed(){\n";
11667 if(uses_temporal_flush){
11668 ret += "\t\treturn needs_temporal_flush;\n";
11670 ret += "\t\treturn false;\n";
11675 //------------------------------------------------------
11676 // THe cleaning_when predicate
11678 string gbvar = "gbval->gb_var";
11679 string aggvar = "aggval->";
11681 ret += "bool need_to_clean( "
11682 +generate_functor_name()+"_groupdef *gbval, "+
11683 generate_functor_name()+"_statedef *stval, int cd"+
11686 if(cleanwhen.size()>0)
11687 ret += "\tbool predval = true;\n";
11689 ret += "\tbool predval = false;\n";
11691 // Find the udafs ref'd in the having clause
11693 for(w=0;w<cleanwhen.size();++w)
11694 collect_aggr_refs_pr(cleanwhen[w]->pr, cw_aggs);
11697 // get the return values from the UDAFS
11698 for(a=0;a<aggr_tbl.size();a++){
11699 if(! aggr_tbl.is_builtin(a) && cw_aggs.count(a)){
11700 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11701 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11702 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11707 // Start by cleaning up partial function results
11708 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11709 set<int> cw_pfcns; // partial fcns in where clause
11710 for(w=0;w<cleanwhen.size();++w)
11711 collect_partial_fcns_pr(cleanwhen[w]->pr, cw_pfcns);
11713 ret += gen_partial_fcn_dtr(partial_fcns,cw_pfcns);
11716 for(w=0;w<cleanwhen.size();++w){
11717 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11719 // Find partial fcns ref'd in this cnf element
11720 set<int> pfcn_refs;
11721 collect_partial_fcns_pr(cleanwhen[w]->pr, pfcn_refs);
11722 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
11723 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11724 ret += "\tif(retval){ return false;}\n";
11726 // ret += unpack_partial_fcn_fm_aggr(schema, partial_fcns, pfcn_refs,"false");
11728 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanwhen[w]->pr,gbvar, aggvar, schema)+
11729 ") ) predval = false;\n";
11732 ret += "\treturn predval;\n";
11735 //------------------------------------------------------
11736 // THe cleaning_by predicate
11738 ret += "bool sample_group("
11739 +generate_functor_name()+"_groupdef *gbval, "+
11740 generate_functor_name()+"_aggrdef *aggval,"+
11741 generate_functor_name()+"_statedef *stval, int cd"+
11744 if(cleanby.size()>0)
11745 ret += "\tbool retval = true;\n";
11747 ret += "\tbool retval = false;\n";
11749 // Find the udafs ref'd in the having clause
11751 for(w=0;w<cleanby.size();++w)
11752 collect_aggr_refs_pr(cleanby[w]->pr, cb_aggs);
11755 // get the return values from the UDAFS
11756 for(a=0;a<aggr_tbl.size();a++){
11757 if(! aggr_tbl.is_builtin(a) && cb_aggs.count(a)){
11758 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11759 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11760 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11765 // Start by cleaning up partial function results
11766 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11767 set<int> cb_pfcns; // partial fcns in where clause
11768 for(w=0;w<cleanby.size();++w)
11769 collect_partial_fcns_pr(cleanby[w]->pr, cb_pfcns);
11771 ret += gen_partial_fcn_dtr(partial_fcns,cb_pfcns);
11774 for(w=0;w<cleanwhen.size();++w){
11775 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11779 // Find the set of variables accessed in this CNF elem,
11780 // but in no previous element.
11781 col_id_set new_cids;
11782 get_new_pred_cids(cleanby[w]->pr, found_cids, new_cids, segen_gb_tbl);
11784 // Unpack these values.
11785 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11788 // Find partial fcns ref'd in this cnf element
11789 set<int> pfcn_refs;
11790 collect_partial_fcns_pr(cleanby[w]->pr, pfcn_refs);
11791 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
11792 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11793 ret += "\tif(retval){ return false;}\n";
11795 // ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11797 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanby[w]->pr,gbvar, aggvar, schema)+
11798 +") ) retval = false;\n";
11801 ret += "\treturn retval;\n";
11805 //-----------------------------------------------------
11807 ret += "bool final_sample_group("
11808 +generate_functor_name()+"_groupdef *gbval, "+
11809 generate_functor_name()+"_aggrdef *aggval,"+
11810 generate_functor_name()+"_statedef *stval,"+
11813 ret += "\tgs_retval_t retval = 0;\n";
11815 // Find the udafs ref'd in the having clause
11817 for(w=0;w<having.size();++w)
11818 collect_aggr_refs_pr(having[w]->pr, hv_aggs);
11821 // get the return values from the UDAFS
11822 for(a=0;a<aggr_tbl.size();a++){
11823 if(! aggr_tbl.is_builtin(a) && hv_aggs.count(a)){
11824 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11825 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11826 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11831 set<int> hv_sl_pfcns;
11832 for(w=0;w<having.size();w++){
11833 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
11836 // clean up the partial fcn results from any previous execution
11837 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
11840 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
11841 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11842 ret += "\tif(retval){ return false;}\n";
11845 // Evalaute the HAVING clause
11846 // TODO: this seems to have a ++ operator rather than a + operator.
11847 for(w=0;w<having.size();++w){
11848 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
11851 ret += "\treturn true;\n";
11854 //---------------------------------------------------
11855 // create output tuple
11856 // Unpack the partial functions ref'd in the where clause,
11857 // select clause. Evaluate the where clause.
11858 // Finally, pack the tuple.
11860 // I need to use special code generation here,
11861 // so I'll leave it in longhand.
11863 ret += "host_tuple create_output_tuple("
11864 +generate_functor_name()+"_groupdef *gbval, "+
11865 generate_functor_name()+"_aggrdef *aggval,"+
11866 generate_functor_name()+"_statedef *stval,"+
11867 "int cd, bool &failed){\n";
11869 ret += "\thost_tuple tup;\n";
11870 ret += "\tfailed = false;\n";
11871 ret += "\tgs_retval_t retval = 0;\n";
11874 // Find the udafs ref'd in the select clause
11876 for(s=0;s<select_list.size();s++)
11877 collect_agg_refs(select_list[s]->se, sl_aggs);
11880 // get the return values from the UDAFS
11881 for(a=0;a<aggr_tbl.size();a++){
11882 if(! aggr_tbl.is_builtin(a) && sl_aggs.count(a)){
11883 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
11884 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11885 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
11890 // I can't cache partial fcn results from the having
11891 // clause because evaluation is separated.
11893 for(s=0;s<select_list.size();s++){
11894 collect_partial_fcns(select_list[s]->se, sl_pfcns);
11897 for(pfsi=sl_pfcns.begin();pfsi!=sl_pfcns.end();++pfsi){
11898 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
11899 ret += "\tif(retval){ failed=true; return tup;}\n";
11903 // Now, compute the size of the tuple.
11905 // Unpack any BUFFER type selections into temporaries
11906 // so that I can compute their size and not have
11907 // to recompute their value during tuple packing.
11908 // I can use regular assignment here because
11909 // these temporaries are non-persistent.
11910 // TODO: should I be using the selvar generation routine?
11912 ret += "//\t\tCompute the size of the tuple.\n";
11913 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
11914 for(s=0;s<select_list.size();s++){
11915 scalarexp_t *se = select_list[s]->se;
11916 data_type *sdt = se->get_data_type();
11917 if(sdt->is_buffer_type() &&
11918 !( (se->get_operator_type() == SE_COLREF) ||
11919 (se->get_operator_type() == SE_AGGR_STAR) ||
11920 (se->get_operator_type() == SE_AGGR_SE) ||
11921 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11922 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11924 sprintf(tmpstr,"selvar_%d",s);
11925 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
11926 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
11930 // The size of the tuple is the size of the tuple struct plus the
11931 // size of the buffers to be copied in.
11933 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
11934 for(s=0;s<select_list.size();s++){
11935 // if(s>0) ret += "+";
11936 scalarexp_t *se = select_list[s]->se;
11937 data_type *sdt = select_list[s]->se->get_data_type();
11938 if(sdt->is_buffer_type()){
11939 if(!( (se->get_operator_type() == SE_COLREF) ||
11940 (se->get_operator_type() == SE_AGGR_STAR) ||
11941 (se->get_operator_type() == SE_AGGR_SE) ||
11942 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11943 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11945 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
11946 ret.append(tmpstr);
11948 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11949 ret.append(tmpstr);
11955 // Allocate tuple data block.
11956 ret += "//\t\tCreate the tuple block.\n";
11957 ret += "\ttup.data = malloc(tup.tuple_size);\n";
11958 ret += "\ttup.heap_resident = true;\n";
11960 // Mark tuple as regular
11961 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
11963 // ret += "\ttup.channel = 0;\n";
11964 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
11965 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
11968 // (Here, offsets are hard-wired. is this a problem?)
11970 ret += "//\t\tPack the fields into the tuple.\n";
11971 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
11972 for(s=0;s<select_list.size();s++){
11973 scalarexp_t *se = select_list[s]->se;
11974 data_type *sdt = se->get_data_type();
11975 if(sdt->is_buffer_type()){
11976 if(!( (se->get_operator_type() == SE_COLREF) ||
11977 (se->get_operator_type() == SE_AGGR_STAR) ||
11978 (se->get_operator_type() == SE_AGGR_SE) ||
11979 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
11980 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
11982 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
11983 ret.append(tmpstr);
11984 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
11985 ret.append(tmpstr);
11987 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11988 ret.append(tmpstr);
11989 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
11990 ret.append(tmpstr);
11993 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
11994 ret.append(tmpstr);
11995 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12000 // Destroy string temporaries
12001 ret += gen_buffer_selvars_dtr(select_list);
12002 // Destroy string return vals of UDAFs
12003 for(a=0;a<aggr_tbl.size();a++){
12004 if(! aggr_tbl.is_builtin(a)){
12005 int afcn_id = aggr_tbl.get_fcn_id(a);
12006 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12007 if(adt->is_buffer_type()){
12008 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12009 adt->get_hfta_buffer_destroy().c_str(), a );
12016 ret += "\treturn tup;\n";
12020 //-------------------------------------------------------------------
12021 // Temporal update functions
12023 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12025 // create a temp status tuple
12026 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12028 ret += gen_init_temp_status_tuple(this->get_node_name());
12031 // (Here, offsets are hard-wired. is this a problem?)
12033 ret += "//\t\tPack the fields into the tuple.\n";
12034 for(s=0;s<select_list.size();s++){
12035 data_type *sdt = select_list[s]->se->get_data_type();
12036 if(sdt->is_temporal()){
12037 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12039 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12045 ret += "\treturn 0;\n";
12046 ret += "};};\n\n\n";
12049 //----------------------------------------------------------
12050 // The hash function
12052 ret += "struct "+generate_functor_name()+"_hash_func{\n";
12053 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12054 "_groupdef *grp) const{\n";
12055 ret += "\t\treturn(";
12056 for(g=0;g<gb_tbl.size();g++){
12057 if(g>0) ret += "^";
12058 data_type *gdt = gb_tbl.get_data_type(g);
12059 if(gdt->use_hashfunc()){
12060 if(gdt->is_buffer_type())
12061 sprintf(tmpstr,"(%s*%s(&)grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12063 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12065 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12069 ret += ") >> 32);\n";
12073 //----------------------------------------------------------
12074 // The superhash function
12076 ret += "struct "+generate_functor_name()+"_superhash_func{\n";
12077 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12078 "_groupdef *grp) const{\n";
12079 ret += "\t\treturn(0";
12081 for(g=0;g<gb_tbl.size();g++){
12082 if(sg_tbl.count(g)>0){
12084 data_type *gdt = gb_tbl.get_data_type(g);
12085 if(gdt->use_hashfunc()){
12086 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12088 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12093 ret += ") >> 32);\n";
12098 //----------------------------------------------------------
12099 // The comparison function
12101 ret += "struct "+generate_functor_name()+"_equal_func{\n";
12102 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12103 generate_functor_name()+"_groupdef *grp2) const{\n";
12104 ret += "\t\treturn( (";
12105 for(g=0;g<gb_tbl.size();g++){
12106 if(g>0) ret += ") && (";
12107 data_type *gdt = gb_tbl.get_data_type(g);
12108 if(gdt->complex_comparison(gdt)){
12109 if(gdt->is_buffer_type())
12110 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12111 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12113 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12114 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12116 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12125 //----------------------------------------------------------
12126 // The superhashcomparison function
12128 ret += "struct "+generate_functor_name()+"_superequal_func{\n";
12129 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12130 generate_functor_name()+"_groupdef *grp2) const{\n";
12131 ret += "\t\treturn( (";
12133 bool first_elem = true;
12134 for(g=0;g<gb_tbl.size();g++){
12135 if(sg_tbl.count(g)){
12136 if(first_elem) first_elem=false; else ret += ") && (";
12137 data_type *gdt = gb_tbl.get_data_type(g);
12138 if(gdt->complex_comparison(gdt)){
12139 if(gdt->is_buffer_type())
12140 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12141 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12143 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12144 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12146 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12163 string sgahcwcb_qpn::generate_operator(int i, string params){
12166 " clean_operator<" +
12167 generate_functor_name()+",\n\t"+
12168 generate_functor_name() + "_groupdef, \n\t" +
12169 generate_functor_name() + "_aggrdef, \n\t" +
12170 generate_functor_name() + "_statedef, \n\t" +
12171 generate_functor_name()+"_hash_func, \n\t"+
12172 generate_functor_name()+"_equal_func ,\n\t"+
12173 generate_functor_name()+"_superhash_func,\n\t "+
12174 generate_functor_name()+"_superequal_func \n\t"+
12175 "> *op"+int_to_string(i)+" = new clean_operator<"+
12176 generate_functor_name()+",\n\t"+
12177 generate_functor_name() + "_groupdef,\n\t " +
12178 generate_functor_name() + "_aggrdef, \n\t" +
12179 generate_functor_name() + "_statedef, \n\t" +
12180 generate_functor_name()+"_hash_func, \n\t"+
12181 generate_functor_name()+"_equal_func, \n\t"+
12182 generate_functor_name()+"_superhash_func, \n\t"+
12183 generate_functor_name()+"_superequal_func\n\t "
12184 ">("+params+", \"" + get_node_name() + "\");\n"
12188 ////////////////////////////////////////////////////////////////
12193 string rsgah_qpn::generate_functor_name(){
12194 return("rsgah_functor_" + normalize_name(this->get_node_name()));
12198 string rsgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
12202 // Initialize generate utility globals
12203 segen_gb_tbl = &(gb_tbl);
12206 //--------------------------------
12207 // group definition class
12208 string ret = "class " + generate_functor_name() + "_groupdef{\n";
12209 ret += "public:\n";
12210 for(g=0;g<this->gb_tbl.size();g++){
12211 sprintf(tmpstr,"gb_var%d",g);
12212 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12215 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
12216 ret += "\t"+generate_functor_name() + "_groupdef("+
12217 this->generate_functor_name() + "_groupdef *gd){\n";
12218 for(g=0;g<gb_tbl.size();g++){
12219 data_type *gdt = gb_tbl.get_data_type(g);
12220 if(gdt->is_buffer_type()){
12221 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
12222 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
12225 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
12231 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
12232 for(g=0;g<gb_tbl.size();g++){
12233 data_type *gdt = gb_tbl.get_data_type(g);
12234 if(gdt->is_buffer_type()){
12235 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12236 gdt->get_hfta_buffer_destroy().c_str(), g );
12243 //--------------------------------
12244 // aggr definition class
12245 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
12246 ret += "public:\n";
12247 for(a=0;a<aggr_tbl.size();a++){
12248 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
12249 sprintf(tmpstr,"aggr_var%d",a);
12250 if(aggr_tbl.is_builtin(a))
12251 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
12253 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
12256 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
12258 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
12259 for(a=0;a<aggr_tbl.size();a++){
12260 if(aggr_tbl.is_builtin(a)){
12261 data_type *adt = aggr_tbl.get_data_type(a);
12262 if(adt->is_buffer_type()){
12263 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
12264 adt->get_hfta_buffer_destroy().c_str(), a );
12268 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
12269 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12270 ret+="(aggr_var"+int_to_string(a)+"));\n";
12276 //--------------------------------
12277 // gb functor class
12278 ret += "class " + this->generate_functor_name() + "{\n";
12280 // Find variables referenced in this query node.
12282 col_id_set cid_set;
12283 col_id_set::iterator csi;
12285 for(w=0;w<where.size();++w)
12286 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
12287 for(w=0;w<having.size();++w)
12288 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
12289 for(w=0;w<closing_when.size();++w)
12290 gather_pr_col_ids(closing_when[w]->pr,cid_set,segen_gb_tbl);
12291 for(g=0;g<gb_tbl.size();g++)
12292 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
12294 for(s=0;s<select_list.size();s++){
12295 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
12299 // Private variables : store the state of the functor.
12300 // 1) variables for unpacked attributes
12301 // 2) offsets of the upacked attributes
12302 // 3) storage of partial functions
12303 // 4) storage of complex literals (i.e., require a constructor)
12305 ret += "private:\n";
12307 // var to save the schema handle
12308 ret += "\tint schema_handle0;\n";
12310 // generate the declaration of all the variables related to
12311 // temp tuples generation
12312 ret += gen_decl_temp_vars();
12314 // unpacked attribute storage, offsets
12315 ret += "//\t\tstorage and offsets of accessed fields.\n";
12316 ret += generate_access_vars(cid_set, schema);
12317 // tuple metadata offset
12318 ret += "\tint tuple_metadata_offset0;\n";
12320 // Variables to store results of partial functions.
12321 // WARNING find_partial_functions modifies the SE
12322 // (it marks the partial function id).
12323 ret += "//\t\tParital function result storage\n";
12324 vector<scalarexp_t *> partial_fcns;
12325 vector<int> fcn_ref_cnt;
12326 vector<bool> is_partial_fcn;
12327 for(s=0;s<select_list.size();s++){
12328 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
12330 for(w=0;w<where.size();w++){
12331 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12333 for(w=0;w<having.size();w++){
12334 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12336 for(w=0;w<closing_when.size();w++){
12337 find_partial_fcns_pr(closing_when[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12339 for(g=0;g<gb_tbl.size();g++){
12340 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
12342 for(a=0;a<aggr_tbl.size();a++){
12343 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
12345 if(partial_fcns.size()>0){
12346 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
12347 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
12350 // Create cached temporaries for UDAF return values.
12351 for(a=0;a<aggr_tbl.size();a++){
12352 if(! aggr_tbl.is_builtin(a)){
12353 int afcn_id = aggr_tbl.get_fcn_id(a);
12354 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12355 sprintf(tmpstr,"udaf_ret_%d", a);
12356 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
12361 // Complex literals (i.e., they need constructors)
12362 ret += "//\t\tComplex literal storage.\n";
12363 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
12364 ret += generate_complex_lit_vars(complex_literals);
12366 // Pass-by-handle parameters
12367 ret += "//\t\tPass-by-handle storage.\n";
12368 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
12369 ret += generate_pass_by_handle_vars(param_handle_table);
12372 // variables to hold parameters.
12373 ret += "//\tfor query parameters\n";
12374 ret += generate_param_vars(param_tbl);
12376 // Is there a temporal flush? If so create flush temporaries,
12377 // create flush indicator.
12378 bool uses_temporal_flush = false;
12379 for(g=0;g<gb_tbl.size();g++){
12380 data_type *gdt = gb_tbl.get_data_type(g);
12381 if(gdt->is_temporal())
12382 uses_temporal_flush = true;
12385 if(uses_temporal_flush){
12386 ret += "//\t\tFor temporal flush\n";
12387 for(g=0;g<gb_tbl.size();g++){
12388 data_type *gdt = gb_tbl.get_data_type(g);
12389 if(gdt->is_temporal()){
12390 sprintf(tmpstr,"last_gb%d",g);
12391 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12392 sprintf(tmpstr,"last_flushed_gb%d",g);
12393 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12396 ret += "\tbool needs_temporal_flush;\n";
12399 // The publicly exposed functions
12401 ret += "\npublic:\n";
12404 //-------------------
12405 // The functor constructor
12406 // pass in the schema handle.
12407 // 1) make assignments to the unpack offset variables
12408 // 2) initialize the complex literals
12410 ret += "//\t\tFunctor constructor.\n";
12411 ret += this->generate_functor_name()+"(int schema_handle0){\n";
12413 // save the schema handle
12414 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
12416 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
12419 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
12420 ret += gen_access_var_init(cid_set);
12422 // complex literals
12423 ret += "//\t\tInitialize complex literals.\n";
12424 ret += gen_complex_lit_init(complex_literals);
12426 // Initialize partial function results so they can be safely GC'd
12427 ret += gen_partial_fcn_init(partial_fcns);
12429 // Initialize non-query-parameter parameter handles
12430 ret += gen_pass_by_handle_init(param_handle_table);
12432 // temporal flush variables
12433 // ASSUME that structured values won't be temporal.
12434 if(uses_temporal_flush){
12435 ret += "//\t\tInitialize temporal flush variables.\n";
12436 for(g=0;g<gb_tbl.size();g++){
12437 data_type *gdt = gb_tbl.get_data_type(g);
12438 if(gdt->is_temporal()){
12439 literal_t gl(gdt->type_indicator());
12440 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
12441 ret.append(tmpstr);
12444 ret += "\tneeds_temporal_flush = false;\n";
12447 // Init temporal attributes referenced in select list
12448 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
12453 //-------------------
12454 // Functor destructor
12455 ret += "//\t\tFunctor destructor.\n";
12456 ret += "~"+this->generate_functor_name()+"(){\n";
12458 // clean up buffer type complex literals
12459 ret += gen_complex_lit_dtr(complex_literals);
12461 // Deregister the pass-by-handle parameters
12462 ret += "/* register and de-register the pass-by-handle parameters */\n";
12463 ret += gen_pass_by_handle_dtr(param_handle_table);
12465 // clean up partial function results.
12466 ret += "/* clean up partial function storage */\n";
12467 ret += gen_partial_fcn_dtr(partial_fcns);
12469 // Destroy the parameters, if any need to be destroyed
12470 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12475 //-------------------
12476 // Parameter manipulation routines
12477 ret += generate_load_param_block(this->generate_functor_name(),
12478 this->param_tbl,param_handle_table);
12479 ret += generate_delete_param_block(this->generate_functor_name(),
12480 this->param_tbl,param_handle_table);
12482 //-------------------
12483 // Register new parameter block
12485 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
12486 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12487 ret += "\treturn this->load_params_"+this->generate_functor_name()+
12492 //-------------------
12493 // the create_group method.
12494 // This method creates a group in a buffer passed in
12495 // (to allow for creation on the stack).
12496 // There are also a couple of side effects:
12497 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
12498 // 2) determine if a temporal flush is required.
12500 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
12501 // Variables for execution of the function.
12502 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12504 if(partial_fcns.size()>0){ // partial fcn access failure
12505 ret += "\tgs_retval_t retval = 0;\n";
12509 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
12510 "_groupdef *) buffer;\n";
12512 // Start by cleaning up partial function results
12513 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12514 set<int> w_pfcns; // partial fcns in where clause
12515 for(w=0;w<where.size();++w)
12516 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
12518 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
12519 for(g=0;g<gb_tbl.size();g++){
12520 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
12522 for(a=0;a<aggr_tbl.size();a++){
12523 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
12525 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
12526 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
12527 // ret += gen_partial_fcn_dtr(partial_fcns);
12530 ret += gen_temp_tuple_check(this->node_name, 0);
12531 col_id_set found_cids; // colrefs unpacked thus far.
12532 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
12535 // Save temporal group-by variables
12538 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
12540 for(g=0;g<gb_tbl.size();g++){
12542 data_type *gdt = gb_tbl.get_data_type(g);
12544 if(gdt->is_temporal()){
12545 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12546 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12547 ret.append(tmpstr);
12554 // Compare the temporal GB vars with the stored ones,
12555 // set flush indicator and update stored GB vars if there is any change.
12557 if(uses_temporal_flush){
12558 ret+= "\tif( !( (";
12559 bool first_one = true;
12560 for(g=0;g<gb_tbl.size();g++){
12561 data_type *gdt = gb_tbl.get_data_type(g);
12563 if(gdt->is_temporal()){
12564 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
12565 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
12566 if(first_one){first_one = false;} else {ret += ") && (";}
12567 ret += generate_equality_test(lhs_op, rhs_op, gdt);
12571 for(g=0;g<gb_tbl.size();g++){
12572 data_type *gdt = gb_tbl.get_data_type(g);
12573 if(gdt->is_temporal()){
12574 if(gdt->is_buffer_type()){
12575 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
12577 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
12579 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
12584 ret += "\t\tneeds_temporal_flush=true;\n";
12585 ret += "\t\t}else{\n"
12586 "\t\t\tneeds_temporal_flush=false;\n"
12591 // For temporal status tuple we don't need to do anything else
12592 ret += "\tif (temp_tuple_received) return NULL;\n\n";
12594 for(w=0;w<where.size();++w){
12595 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12597 // Find the set of variables accessed in this CNF elem,
12598 // but in no previous element.
12599 col_id_set new_cids;
12600 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
12602 // Unpack these values.
12603 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
12604 // Find partial fcns ref'd in this cnf element
12605 set<int> pfcn_refs;
12606 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
12607 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
12609 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
12610 +") ) return(NULL);\n";
12613 // The partial functions ref'd in the group-by var and aggregate
12614 // definitions must also be evaluated. If one returns false,
12615 // then implicitly the predicate is false.
12616 set<int>::iterator pfsi;
12618 if(ag_gb_pfcns.size() > 0)
12619 ret += "//\t\tUnpack remaining partial fcns.\n";
12620 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
12621 found_cids, segen_gb_tbl, "NULL", needs_xform);
12623 // Unpack the group-by variables
12625 for(g=0;g<gb_tbl.size();g++){
12626 data_type *gdt = gb_tbl.get_data_type(g);
12627 if(!gdt->is_temporal()){ // temproal gbs already computed
12628 // Find the new fields ref'd by this GBvar def.
12629 col_id_set new_cids;
12630 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
12631 // Unpack these values.
12632 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
12634 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12635 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12637 // There seems to be no difference between the two
12638 // branches of the IF statement.
12639 data_type *gdt = gb_tbl.get_data_type(g);
12640 if(gdt->is_buffer_type()){
12641 // Create temporary copy.
12642 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12643 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
12645 scalarexp_t *gse = gb_tbl.get_def(g);
12646 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
12647 g,generate_se_code(gse,schema).c_str());
12650 ret.append(tmpstr);
12656 ret+= "\treturn gbval;\n";
12659 //--------------------------------------------------------
12660 // Create and initialize an aggregate object
12662 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
12663 // Variables for execution of the function.
12664 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12667 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
12668 "_aggrdef *)buffer;\n";
12670 for(a=0;a<aggr_tbl.size();a++){
12671 if(aggr_tbl.is_builtin(a)){
12672 // Create temporaries for buffer return values
12673 data_type *adt = aggr_tbl.get_data_type(a);
12674 if(adt->is_buffer_type()){
12675 sprintf(tmpstr,"aggr_tmp_%d", a);
12676 ret+=adt->make_host_cvar(tmpstr)+";\n";
12681 // Unpack all remaining attributes
12682 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
12683 for(a=0;a<aggr_tbl.size();a++){
12684 sprintf(tmpstr,"aggval->aggr_var%d",a);
12685 string assignto_var = tmpstr;
12686 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12689 ret += "\treturn aggval;\n";
12692 //--------------------------------------------------------
12693 // update an aggregate object
12695 ret += "void update_aggregate(host_tuple &tup0, "
12696 +generate_functor_name()+"_groupdef *gbval, "+
12697 generate_functor_name()+"_aggrdef *aggval){\n";
12698 // Variables for execution of the function.
12699 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12701 // use of temporaries depends on the aggregate,
12702 // generate them in generate_aggr_update
12705 // Unpack all remaining attributes
12706 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
12707 for(a=0;a<aggr_tbl.size();a++){
12708 sprintf(tmpstr,"aggval->aggr_var%d",a);
12709 string varname = tmpstr;
12710 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12713 ret += "\treturn;\n";
12716 //--------------------------------------------------------
12717 // reinitialize an aggregate object
12719 ret += "void reinit_aggregates( "+
12720 generate_functor_name()+"_groupdef *gbval, "+
12721 generate_functor_name()+"_aggrdef *aggval){\n";
12722 // Variables for execution of the function.
12723 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12725 // use of temporaries depends on the aggregate,
12726 // generate them in generate_aggr_update
12728 for(g=0;g<gb_tbl.size();g++){
12729 data_type *gdt = gb_tbl.get_data_type(g);
12730 if(gdt->is_temporal()){
12731 if(gdt->is_buffer_type()){
12732 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
12734 sprintf(tmpstr,"\t\t gbval->gb_var%d =last_gb%d;\n",g,g);
12740 // Unpack all remaining attributes
12741 for(a=0;a<aggr_tbl.size();a++){
12742 sprintf(tmpstr,"aggval->aggr_var%d",a);
12743 string varname = tmpstr;
12744 ret.append(generate_aggr_reinitialize(varname,&aggr_tbl,a, schema));
12747 ret += "\treturn;\n";
12754 //---------------------------------------------------
12757 ret += "\tbool flush_needed(){\n";
12758 if(uses_temporal_flush){
12759 ret += "\t\treturn needs_temporal_flush;\n";
12761 ret += "\t\treturn false;\n";
12765 //---------------------------------------------------
12766 // create output tuple
12767 // Unpack the partial functions ref'd in the where clause,
12768 // select clause. Evaluate the where clause.
12769 // Finally, pack the tuple.
12771 // I need to use special code generation here,
12772 // so I'll leave it in longhand.
12774 ret += "host_tuple create_output_tuple("
12775 +generate_functor_name()+"_groupdef *gbval, "+
12776 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
12778 ret += "\thost_tuple tup;\n";
12779 ret += "\tfailed = false;\n";
12780 ret += "\tgs_retval_t retval = 0;\n";
12782 string gbvar = "gbval->gb_var";
12783 string aggvar = "aggval->";
12786 // First, get the return values from the UDAFS
12787 for(a=0;a<aggr_tbl.size();a++){
12788 if(! aggr_tbl.is_builtin(a)){
12789 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12790 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12791 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12795 set<int> hv_sl_pfcns;
12796 for(w=0;w<having.size();w++){
12797 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
12799 for(s=0;s<select_list.size();s++){
12800 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
12803 // clean up the partial fcn results from any previous execution
12804 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
12807 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
12808 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12809 ret += "\tif(retval){ failed = true; return(tup);}\n";
12812 // Evalaute the HAVING clause
12813 // TODO: this seems to have a ++ operator rather than a + operator.
12814 for(w=0;w<having.size();++w){
12815 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
12818 // Now, compute the size of the tuple.
12820 // Unpack any BUFFER type selections into temporaries
12821 // so that I can compute their size and not have
12822 // to recompute their value during tuple packing.
12823 // I can use regular assignment here because
12824 // these temporaries are non-persistent.
12825 // TODO: should I be using the selvar generation routine?
12827 ret += "//\t\tCompute the size of the tuple.\n";
12828 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
12829 for(s=0;s<select_list.size();s++){
12830 scalarexp_t *se = select_list[s]->se;
12831 data_type *sdt = se->get_data_type();
12832 if(sdt->is_buffer_type() &&
12833 !( (se->get_operator_type() == SE_COLREF) ||
12834 (se->get_operator_type() == SE_AGGR_STAR) ||
12835 (se->get_operator_type() == SE_AGGR_SE) ||
12836 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12837 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12839 sprintf(tmpstr,"selvar_%d",s);
12840 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
12841 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
12845 // The size of the tuple is the size of the tuple struct plus the
12846 // size of the buffers to be copied in.
12848 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
12849 for(s=0;s<select_list.size();s++){
12850 // if(s>0) ret += "+";
12851 scalarexp_t *se = select_list[s]->se;
12852 data_type *sdt = select_list[s]->se->get_data_type();
12853 if(sdt->is_buffer_type()){
12854 if(!( (se->get_operator_type() == SE_COLREF) ||
12855 (se->get_operator_type() == SE_AGGR_STAR) ||
12856 (se->get_operator_type() == SE_AGGR_SE) ||
12857 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12858 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12860 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
12861 ret.append(tmpstr);
12863 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12864 ret.append(tmpstr);
12870 // Allocate tuple data block.
12871 ret += "//\t\tCreate the tuple block.\n";
12872 ret += "\ttup.data = malloc(tup.tuple_size);\n";
12873 ret += "\ttup.heap_resident = true;\n";
12875 // Mark tuple as regular
12876 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
12878 // ret += "\ttup.channel = 0;\n";
12879 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
12880 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
12883 // (Here, offsets are hard-wired. is this a problem?)
12885 ret += "//\t\tPack the fields into the tuple.\n";
12886 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
12887 for(s=0;s<select_list.size();s++){
12888 scalarexp_t *se = select_list[s]->se;
12889 data_type *sdt = se->get_data_type();
12890 if(sdt->is_buffer_type()){
12891 if(!( (se->get_operator_type() == SE_COLREF) ||
12892 (se->get_operator_type() == SE_AGGR_STAR) ||
12893 (se->get_operator_type() == SE_AGGR_SE) ||
12894 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12895 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12897 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
12898 ret.append(tmpstr);
12899 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
12900 ret.append(tmpstr);
12902 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12903 ret.append(tmpstr);
12904 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12905 ret.append(tmpstr);
12908 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12909 ret.append(tmpstr);
12910 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12915 // Destroy string temporaries
12916 ret += gen_buffer_selvars_dtr(select_list);
12918 ret += "\treturn tup;\n";
12921 //------------------------------------------------------------------
12922 // Cleaning_when : evaluate the cleaning_when clause.
12923 // ASSUME that the udaf return values have already
12924 // been unpacked. delete the string udaf return values at the end.
12926 ret += "bool cleaning_when("
12927 +generate_functor_name()+"_groupdef *gbval, "+
12928 generate_functor_name()+"_aggrdef *aggval){\n";
12930 ret += "\tbool retval = true;\n";
12933 gbvar = "gbval->gb_var";
12934 aggvar = "aggval->";
12937 set<int> clw_pfcns;
12938 for(w=0;w<closing_when.size();w++){
12939 collect_partial_fcns_pr(closing_when[w]->pr, clw_pfcns);
12942 // clean up the partial fcn results from any previous execution
12943 ret += gen_partial_fcn_dtr(partial_fcns,clw_pfcns);
12946 for(pfsi=clw_pfcns.begin();pfsi!=clw_pfcns.end();++pfsi){
12947 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12948 ret += "\tif(retval){ return false;}\n";
12951 // Evalaute the Closing When clause
12952 // TODO: this seems to have a ++ operator rather than a + operator.
12953 for(w=0;w<closing_when.size();++w){
12954 ret += "\tif( !("+generate_predicate_code_fm_aggr(closing_when[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
12958 // Destroy string return vals of UDAFs
12959 for(a=0;a<aggr_tbl.size();a++){
12960 if(! aggr_tbl.is_builtin(a)){
12961 int afcn_id = aggr_tbl.get_fcn_id(a);
12962 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12963 if(adt->is_buffer_type()){
12964 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12965 adt->get_hfta_buffer_destroy().c_str(), a );
12971 ret += "\treturn retval;\n";
12977 //-------------------------------------------------------------------
12978 // Temporal update functions
12980 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12982 // create a temp status tuple
12983 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12985 ret += gen_init_temp_status_tuple(this->get_node_name());
12988 // (Here, offsets are hard-wired. is this a problem?)
12990 ret += "//\t\tPack the fields into the tuple.\n";
12991 for(s=0;s<select_list.size();s++){
12992 data_type *sdt = select_list[s]->se->get_data_type();
12993 if(sdt->is_temporal()){
12994 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12996 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
13002 ret += "\treturn 0;\n";
13003 ret += "};};\n\n\n";
13006 //----------------------------------------------------------
13007 // The hash function
13009 ret += "struct "+generate_functor_name()+"_hash_func{\n";
13010 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
13011 "_groupdef *grp) const{\n";
13012 ret += "\t\treturn(0";
13013 for(g=0;g<gb_tbl.size();g++){
13014 data_type *gdt = gb_tbl.get_data_type(g);
13015 if(! gdt->is_temporal()){
13017 if(gdt->use_hashfunc()){
13018 if(gdt->is_buffer_type())
13019 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13021 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13023 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
13028 ret += " >> 32);\n";
13032 //----------------------------------------------------------
13033 // The comparison function
13035 ret += "struct "+generate_functor_name()+"_equal_func{\n";
13036 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
13037 generate_functor_name()+"_groupdef *grp2) const{\n";
13038 ret += "\t\treturn( (";
13041 bool first_exec = true;
13042 for(g=0;g<gb_tbl.size();g++){
13043 data_type *gdt = gb_tbl.get_data_type(g);
13044 if(! gdt->is_temporal()){
13045 if(first_exec){first_exec=false;}else{ hcmpr += ") && (";}
13046 if(gdt->complex_comparison(gdt)){
13047 if(gdt->is_buffer_type())
13048 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
13049 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13051 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
13052 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13054 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
13071 string rsgah_qpn::generate_operator(int i, string params){
13074 " running_agg_operator<" +
13075 generate_functor_name()+","+
13076 generate_functor_name() + "_groupdef, " +
13077 generate_functor_name() + "_aggrdef, " +
13078 generate_functor_name()+"_hash_func, "+
13079 generate_functor_name()+"_equal_func "
13080 "> *op"+int_to_string(i)+" = new running_agg_operator<"+
13081 generate_functor_name()+","+
13082 generate_functor_name() + "_groupdef, " +
13083 generate_functor_name() + "_aggrdef, " +
13084 generate_functor_name()+"_hash_func, "+
13085 generate_functor_name()+"_equal_func "
13086 ">("+params+", \"" + get_node_name() + "\");\n"
13092 // Split aggregation into two HFTA components - sub and superaggregation
13093 // If unable to split the aggreagates, empty vector will be returned
13094 vector<qp_node *> sgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13096 vector<qp_node *> ret_vec;
13097 int s, p, g, a, o, i;
13100 vector<string> fta_flds, stream_flds;
13101 int t = table_name->get_schema_ref();
13103 // Get the set of interfaces it accesses.
13105 vector<string> sel_names;
13107 // Verify that all of the ref'd UDAFs can be split.
13109 for(a=0;a<aggr_tbl.size();++a){
13110 if(! aggr_tbl.is_builtin(a)){
13111 int afcn = aggr_tbl.get_fcn_id(a);
13112 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13113 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13114 if(hfta_super_id < 0 || hfta_sub_id < 0){
13120 /////////////////////////////////////////////////////
13121 // Split into aggr/aggr.
13124 sgah_qpn *low_hfta_node = new sgah_qpn();
13125 low_hfta_node->table_name = table_name;
13126 low_hfta_node->set_node_name( "_"+node_name );
13127 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13130 sgah_qpn *hi_hfta_node = new sgah_qpn();
13131 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13132 hi_hfta_node->set_node_name( node_name );
13133 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13135 // First, process the group-by variables.
13136 // both low and hi level queries duplicate group-by variables of original query
13139 for(g=0;g<gb_tbl.size();g++){
13140 // Insert the gbvar into both low- and hi level hfta.
13141 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13142 low_hfta_node->gb_tbl.add_gb_var(
13143 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13146 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13147 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13148 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13149 gbvar_fta->set_gb_ref(g);
13150 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13151 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13153 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13154 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13155 hi_hfta_node->gb_tbl.add_gb_var(
13156 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13160 // hi_hfta_node->gb_tbl.gb_patterns = gb_tbl.gb_patterns; // pattern processing at higtest level
13161 hi_hfta_node->gb_tbl.set_pattern_info( &gb_tbl); // pattern processing at higtest level
13163 // SEs in the aggregate definitions.
13164 // They are all safe, so split them up for later processing.
13165 map<int, scalarexp_t *> hfta_aggr_se;
13166 for(a=0;a<aggr_tbl.size();++a){
13167 split_hfta_aggr( &(aggr_tbl), a,
13168 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13169 low_hfta_node->select_list,
13176 // Next, the select list.
13178 for(s=0;s<select_list.size();s++){
13179 bool fta_forbidden = false;
13180 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13181 hi_hfta_node->select_list.push_back(
13182 new select_element(root_se, select_list[s]->name));
13187 // All the predicates in the where clause must execute
13188 // in the low-level hfta.
13190 for(p=0;p<where.size();p++){
13191 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13192 cnf_elem *new_cnf = new cnf_elem(new_pr);
13193 analyze_cnf(new_cnf);
13195 low_hfta_node->where.push_back(new_cnf);
13198 // All of the predicates in the having clause must
13199 // execute in the high-level hfta node.
13201 for(p=0;p<having.size();p++){
13202 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13203 cnf_elem *cnf_root = new cnf_elem(pr_root);
13204 analyze_cnf(cnf_root);
13206 hi_hfta_node->having.push_back(cnf_root);
13210 // Copy parameters to both nodes
13211 vector<string> param_names = param_tbl->get_param_names();
13213 for(pi=0;pi<param_names.size();pi++){
13214 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13215 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13216 param_tbl->handle_access(param_names[pi]));
13217 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13218 param_tbl->handle_access(param_names[pi]));
13220 low_hfta_node->definitions = definitions;
13221 hi_hfta_node->definitions = definitions;
13224 low_hfta_node->table_name->set_machine(table_name->get_machine());
13225 low_hfta_node->table_name->set_interface(table_name->get_interface());
13226 low_hfta_node->table_name->set_ifq(false);
13228 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13229 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13230 hi_hfta_node->table_name->set_ifq(false);
13232 ret_vec.push_back(low_hfta_node);
13233 ret_vec.push_back(hi_hfta_node);
13239 // TODO: add splitting into selection/aggregation
13243 // Split aggregation into two HFTA components - sub and superaggregation
13244 // If unable to split the aggreagates, empty vector will be returned
13245 // Similar to sgah, but super aggregate is rsgah, subaggr is sgah
13246 vector<qp_node *> rsgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13248 vector<qp_node *> ret_vec;
13249 int s, p, g, a, o, i;
13252 vector<string> fta_flds, stream_flds;
13253 int t = table_name->get_schema_ref();
13255 // Get the set of interfaces it accesses.
13257 vector<string> sel_names;
13259 // Verify that all of the ref'd UDAFs can be split.
13261 for(a=0;a<aggr_tbl.size();++a){
13262 if(! aggr_tbl.is_builtin(a)){
13263 int afcn = aggr_tbl.get_fcn_id(a);
13264 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13265 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13266 if(hfta_super_id < 0 || hfta_sub_id < 0){
13272 /////////////////////////////////////////////////////
13273 // Split into aggr/aggr.
13276 sgah_qpn *low_hfta_node = new sgah_qpn();
13277 low_hfta_node->table_name = table_name;
13278 low_hfta_node->set_node_name( "_"+node_name );
13279 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13282 rsgah_qpn *hi_hfta_node = new rsgah_qpn();
13283 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13284 hi_hfta_node->set_node_name( node_name );
13285 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13287 // First, process the group-by variables.
13288 // both low and hi level queries duplicate group-by variables of original query
13291 for(g=0;g<gb_tbl.size();g++){
13292 // Insert the gbvar into both low- and hi level hfta.
13293 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13294 low_hfta_node->gb_tbl.add_gb_var(
13295 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13298 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13299 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13300 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13301 gbvar_fta->set_gb_ref(g);
13302 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13303 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13305 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13306 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13307 hi_hfta_node->gb_tbl.add_gb_var(
13308 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13313 // SEs in the aggregate definitions.
13314 // They are all safe, so split them up for later processing.
13315 map<int, scalarexp_t *> hfta_aggr_se;
13316 for(a=0;a<aggr_tbl.size();++a){
13317 split_hfta_aggr( &(aggr_tbl), a,
13318 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13319 low_hfta_node->select_list,
13326 // Next, the select list.
13328 for(s=0;s<select_list.size();s++){
13329 bool fta_forbidden = false;
13330 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13331 hi_hfta_node->select_list.push_back(
13332 new select_element(root_se, select_list[s]->name));
13337 // All the predicates in the where clause must execute
13338 // in the low-level hfta.
13340 for(p=0;p<where.size();p++){
13341 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13342 cnf_elem *new_cnf = new cnf_elem(new_pr);
13343 analyze_cnf(new_cnf);
13345 low_hfta_node->where.push_back(new_cnf);
13348 // All of the predicates in the having clause must
13349 // execute in the high-level hfta node.
13351 for(p=0;p<having.size();p++){
13352 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13353 cnf_elem *cnf_root = new cnf_elem(pr_root);
13354 analyze_cnf(cnf_root);
13356 hi_hfta_node->having.push_back(cnf_root);
13359 // Similar for closing when
13360 for(p=0;p<closing_when.size();p++){
13361 predicate_t *pr_root = rehome_fta_pr( closing_when[p]->pr, &hfta_aggr_se);
13362 cnf_elem *cnf_root = new cnf_elem(pr_root);
13363 analyze_cnf(cnf_root);
13365 hi_hfta_node->closing_when.push_back(cnf_root);
13369 // Copy parameters to both nodes
13370 vector<string> param_names = param_tbl->get_param_names();
13372 for(pi=0;pi<param_names.size();pi++){
13373 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13374 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13375 param_tbl->handle_access(param_names[pi]));
13376 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13377 param_tbl->handle_access(param_names[pi]));
13379 low_hfta_node->definitions = definitions;
13380 hi_hfta_node->definitions = definitions;
13383 low_hfta_node->table_name->set_machine(table_name->get_machine());
13384 low_hfta_node->table_name->set_interface(table_name->get_interface());
13385 low_hfta_node->table_name->set_ifq(false);
13387 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13388 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13389 hi_hfta_node->table_name->set_ifq(false);
13391 ret_vec.push_back(low_hfta_node);
13392 ret_vec.push_back(hi_hfta_node);
13398 // TODO: add splitting into selection/aggregation
13401 //---------------------------------------------------------------
13402 // Code for propagating Protocol field source information
13405 scalarexp_t *resolve_protocol_se(scalarexp_t *se, vector<map<string, scalarexp_t *> *> &src_vec, gb_table *gb_tbl, table_list *Schema){
13406 scalarexp_t *rse, *lse,*p_se, *gb_se;
13407 int tno, schema_type;
13408 map<string, scalarexp_t *> *pse_map;
13410 switch(se->get_operator_type()){
13412 return new scalarexp_t(se->get_literal());
13414 return scalarexp_t::make_param_reference(se->get_op().c_str());
13418 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, se->gb_ref=%d, but gb_tbl is NULL\n",se->get_gb_ref());
13419 gb_se = gb_tbl->get_def(se->get_gb_ref());
13420 return resolve_protocol_se(gb_se,src_vec,gb_tbl,Schema);
13423 schema_type = Schema->get_schema_type(se->get_colref()->get_schema_ref());
13424 if(schema_type == PROTOCOL_SCHEMA)
13425 return dup_se(se,NULL);
13427 tno = se->get_colref()->get_tablevar_ref();
13428 if(tno >= src_vec.size()){
13429 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, tno=%d, src_vec.size()=%lu\n",tno,src_vec.size());
13431 if(src_vec[tno] == NULL)
13434 pse_map =src_vec[tno];
13435 p_se = (*pse_map)[se->get_colref()->get_field()];
13438 return dup_se(p_se,NULL);
13440 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13444 return new scalarexp_t(se->get_op().c_str(),lse);
13446 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13449 rse = resolve_protocol_se(se->get_right_se(),src_vec,gb_tbl,Schema);
13452 return new scalarexp_t(se->get_op().c_str(),lse,rse);
13466 void spx_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13468 vector<map<string, scalarexp_t *> *> src_vec;
13470 for(i=0;i<q_sources.size();i++){
13471 if(q_sources[i] != NULL)
13472 src_vec.push_back(q_sources[i]->get_protocol_se());
13474 src_vec.push_back(NULL);
13477 for(i=0;i<select_list.size();i++){
13478 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13482 void join_eq_hash_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13484 vector<map<string, scalarexp_t *> *> src_vec;
13486 for(i=0;i<q_sources.size();i++){
13487 if(q_sources[i] != NULL)
13488 src_vec.push_back(q_sources[i]->get_protocol_se());
13490 src_vec.push_back(NULL);
13493 for(i=0;i<select_list.size();i++){
13494 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13497 for(i=0;i<hash_eq.size();i++){
13498 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13499 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
13503 void filter_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13505 vector<map<string, scalarexp_t *> *> src_vec;
13507 for(i=0;i<q_sources.size();i++){
13508 if(q_sources[i] != NULL)
13509 src_vec.push_back(q_sources[i]->get_protocol_se());
13511 src_vec.push_back(NULL);
13514 for(i=0;i<select_list.size();i++){
13515 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13518 for(i=0;i<hash_eq.size();i++){
13519 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13520 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
13524 void sgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13526 vector<map<string, scalarexp_t *> *> src_vec;
13528 for(i=0;i<q_sources.size();i++){
13529 if(q_sources[i] != NULL)
13530 src_vec.push_back(q_sources[i]->get_protocol_se());
13532 src_vec.push_back(NULL);
13535 for(i=0;i<select_list.size();i++){
13536 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13539 for(i=0;i<gb_tbl.size();i++)
13540 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13544 void rsgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13546 vector<map<string, scalarexp_t *> *> src_vec;
13548 for(i=0;i<q_sources.size();i++){
13549 if(q_sources[i] != NULL)
13550 src_vec.push_back(q_sources[i]->get_protocol_se());
13552 src_vec.push_back(NULL);
13555 for(i=0;i<select_list.size();i++){
13556 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13559 for(i=0;i<gb_tbl.size();i++)
13560 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13563 void sgahcwcb_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13565 vector<map<string, scalarexp_t *> *> src_vec;
13567 for(i=0;i<q_sources.size();i++){
13568 if(q_sources[i] != NULL)
13569 src_vec.push_back(q_sources[i]->get_protocol_se());
13571 src_vec.push_back(NULL);
13574 for(i=0;i<select_list.size();i++){
13575 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
13578 for(i=0;i<gb_tbl.size();i++)
13579 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
13582 void mrg_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13584 scalarexp_t *first_se;
13586 vector<map<string, scalarexp_t *> *> src_vec;
13587 map<string, scalarexp_t *> *pse_map;
13589 for(i=0;i<q_sources.size();i++){
13590 if(q_sources[i] != NULL)
13591 src_vec.push_back(q_sources[i]->get_protocol_se());
13593 src_vec.push_back(NULL);
13596 if(q_sources.size() == 0){
13597 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::create_protocol_se, q_sources.size() == 0\n");
13601 vector<field_entry *> tbl_flds = table_layout->get_fields();
13602 for(f=0;f<tbl_flds.size();f++){
13604 string fld_nm = tbl_flds[f]->get_name();
13605 pse_map = src_vec[0];
13606 first_se = (*pse_map)[fld_nm];
13607 if(first_se == NULL)
13609 for(s=1;s<src_vec.size() && match;s++){
13610 pse_map = src_vec[s];
13611 scalarexp_t *match_se = (*pse_map)[fld_nm];
13612 if(match_se == NULL)
13615 match = is_equivalent_se_base(first_se, match_se, Schema);
13618 protocol_map[fld_nm] = first_se;
13620 protocol_map[fld_nm] = NULL;