1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 // Create, manipulate, and dump query plans.
18 #include "query_plan.h"
19 #include "analyze_fta.h"
20 #include "generate_utils.h"
26 extern string hash_nums[NRANDS]; // for fast hashing
31 void untaboo(string &s){
33 for(c=0;c<s.size();++c){
40 // mrg_qpn constructor, define here to avoid
41 // circular references in the .h file
42 mrg_qpn::mrg_qpn(filter_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
43 param_tbl = spx->param_tbl;
46 field_entry_list *fel = new field_entry_list();
51 for(i=0;i<spx->select_list.size();++i){
52 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
53 if(dt->is_temporal()){
54 if(merge_fieldpos < 0){
57 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
62 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
63 fel->append_field(fe);
67 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
70 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
72 // NEED TO HANDLE USER_SPECIFIED SLACK
73 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
74 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
75 // if(this->slack == NULL)
76 // fprintf(stderr,"Zero slack.\n");
78 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
80 for(i=0;i<sources.size();i++){
81 std::string rvar = "_m"+int_to_string(i);
82 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
83 mvars[i]->set_tablevar_ref(i);
84 fm.push_back(new tablevar_t(sources[i].c_str()));
85 fm[i]->set_range_var(rvar);
88 param_tbl = new param_table();
89 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
91 for(pi=0;pi<param_names.size();pi++){
92 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
93 param_tbl->add_param(param_names[pi],dt->duplicate(),
94 spx->param_tbl->handle_access(param_names[pi]));
96 definitions = spx->definitions;
101 mrg_qpn::mrg_qpn(watch_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
102 param_tbl = spx->param_tbl;
105 field_entry_list *fel = new field_entry_list();
110 for(i=0;i<spx->select_list.size();++i){
111 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
112 if(dt->is_temporal()){
113 if(merge_fieldpos < 0){
116 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
117 dt->reset_temporal();
121 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
122 fel->append_field(fe);
125 if(merge_fieldpos<0){
126 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
129 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
131 // NEED TO HANDLE USER_SPECIFIED SLACK
132 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
133 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
134 // if(this->slack == NULL)
135 // fprintf(stderr,"Zero slack.\n");
137 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
139 for(i=0;i<sources.size();i++){
140 std::string rvar = "_m"+int_to_string(i);
141 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
142 mvars[i]->set_tablevar_ref(i);
143 fm.push_back(new tablevar_t(sources[i].c_str()));
144 fm[i]->set_range_var(rvar);
147 param_tbl = new param_table();
148 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
150 for(pi=0;pi<param_names.size();pi++){
151 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
152 param_tbl->add_param(param_names[pi],dt->duplicate(),
153 spx->param_tbl->handle_access(param_names[pi]));
155 definitions = spx->definitions;
162 // This function translates an analyzed parse tree
163 // into one or more query nodes (qp_node).
164 // Currently only one node is created, but some query
165 // fragments might create more than one query node,
166 // e.g. aggregation over a join, or nested subqueries
167 // in the FROM clause (unless this is handled at parse tree
168 // analysis time). At this stage, they will be linked
169 // by the names in the FROM clause.
170 // INVARIANT : if more than one query node is returned,
171 // the last one represents the output of the query.
172 vector<qp_node *> create_query_nodes(query_summary_class *qs,table_list *Schema){
174 // Classify the query.
176 vector <qp_node *> local_plan;
180 // I should probably move a lot of this code
181 // into the qp_node constructors,
182 // and have this code focus on building the query plan tree.
185 if(qs->query_type == WATCHLIST_QUERY){
186 watch_tbl_qpn *watchnode = new watch_tbl_qpn(qs, Schema);
189 plan_root = watchnode;
190 local_plan.push_back(watchnode);
195 if(qs->query_type == MERGE_QUERY){
196 mrg_qpn *merge_node = new mrg_qpn(qs,Schema);
199 plan_root = merge_node;
200 local_plan.push_back(merge_node);
203 Do not split sources until we are done with optimizations
204 vector<mrg_qpn *> split_merge = merge_node->split_sources();
205 local_plan.insert(local_plan.begin(), split_merge.begin(), split_merge.end());
207 // If children are created, add them to the schema.
210 printf("split_merge size is %d\n",split_merge.size());
211 for(i=1;i<split_merge.size();++i){
212 Schema->add_table(split_merge[i]->get_fields());
213 printf("Adding split merge table %d\n",i);
218 printf("Did split sources on %s:\n",qs->query_name.c_str());
220 for(ss=0;ss<local_plan.size();ss++){
221 printf("node %d, name=%s, sources=",ss,local_plan[ss]->get_node_name().c_str());
222 vector<tablevar_t *> inv = local_plan[ss]->get_input_tbls();
224 for(nn=0;nn<inv.size();nn++){
225 printf("%s ",inv[nn]->to_string().c_str());
234 if(qs->query_type == SELECT_QUERY){
236 // Select / Aggregation / Join
237 if(qs->gb_tbl->size() == 0 && qs->aggr_tbl->size() == 0){
239 if(qs->fta_tree->get_from()->size() == 1){
240 spx_qpn *spx_node = new spx_qpn(qs,Schema);
242 plan_root = spx_node;
243 local_plan.push_back(spx_node);
245 if(qs->fta_tree->get_from()->get_properties() == FILTER_JOIN_PROPERTY){
246 filter_join_qpn *join_node = new filter_join_qpn(qs,Schema);
247 plan_root = join_node;
248 local_plan.push_back(join_node);
250 if(qs->fta_tree->get_from()->get_properties() == WATCHLIST_JOIN_PROPERTY){
251 watch_join_qpn *join_node = new watch_join_qpn(qs,Schema);
252 plan_root = join_node;
253 local_plan.push_back(join_node);
255 join_eq_hash_qpn *join_node = new join_eq_hash_qpn(qs,Schema);
256 plan_root = join_node;
257 local_plan.push_back(join_node);
264 if(qs->states_refd.size() || qs->sg_tbl.size() || qs->cb_cnf.size()){
265 sgahcwcb_qpn *sgahcwcb_node = new sgahcwcb_qpn(qs,Schema);
266 plan_root = sgahcwcb_node;
267 local_plan.push_back(sgahcwcb_node);
269 if(qs->closew_cnf.size()){
270 rsgah_qpn *rsgah_node = new rsgah_qpn(qs,Schema);
271 plan_root = rsgah_node;
272 local_plan.push_back(rsgah_node);
274 sgah_qpn *sgah_node = new sgah_qpn(qs,Schema);
275 plan_root = sgah_node;
276 local_plan.push_back(sgah_node);
283 // Get the query name and other definitions.
284 plan_root->set_node_name( qs->query_name);
285 plan_root->set_definitions( qs->definitions) ;
288 // return(plan_root);
294 string se_to_query_string(scalarexp_t *se, aggregate_table *aggr_tbl){
299 vector<scalarexp_t *> operand_list;
302 if(se->is_superaggr())
305 switch(se->get_operator_type()){
307 l_str = se->get_literal()->to_query_string();
310 l_str = "$" + se->get_op();
313 l_str = se->get_colref()->to_query_string() ;
316 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
318 return se->get_op()+"( "+l_str+" )";;
320 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
321 r_str = se_to_query_string(se->get_right_se(),aggr_tbl);
322 return( "("+l_str+")"+se->get_op()+"("+r_str+")" );
324 return( se->get_op() + su_ind + "(*)");
326 l_str = se_to_query_string(aggr_tbl->get_aggr_se(se->get_aggr_ref()),aggr_tbl);
327 return( se->get_op() + su_ind + "(" + l_str + ")" );
329 if(se->get_aggr_ref() >= 0)
330 operand_list = aggr_tbl->get_operand_list(se->get_aggr_ref());
332 operand_list = se->get_operands();
334 ret = se->get_op() + su_ind + "(";
335 for(p=0;p<operand_list.size();p++){
336 l_str = se_to_query_string(operand_list[p],aggr_tbl);
344 return "ERROR SE op type not recognized in se_to_query_string.\n";
348 string pred_to_query_str(predicate_t *pr, aggregate_table *aggr_tbl){
353 vector<literal_t *> llist;
354 vector<scalarexp_t *> op_list;
356 switch(pr->get_operator_type()){
358 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
359 ret = l_str + " IN [";
360 llist = pr->get_lit_vec();
361 for(l=0;l<llist.size();l++){
363 ret += llist[l]->to_query_string();
369 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
370 r_str = se_to_query_string(pr->get_right_se(),aggr_tbl);
371 return( l_str + " " + pr->get_op() + " " + r_str );
373 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
374 return(pr->get_op() + "( " + l_str + " )");
376 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
377 r_str = pred_to_query_str(pr->get_right_pr(),aggr_tbl);
378 return("( " + r_str + " )" + pr->get_op() + "( " + l_str + " )");
380 ret = pr->get_op()+"[";
381 op_list = pr->get_op_list();
382 for(o=0;o<op_list.size();++o){
384 ret += se_to_query_string(op_list[o],aggr_tbl);
389 fprintf(stderr,"INTERNAL ERROR in pred_to_query_str, line %d, character %d, unknown predicate operator type %d\n",
390 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
399 // Build a selection list,
400 // but avoid adding duplicate SEs.
403 int add_select_list_nodup(vector<select_element *> &lfta_select_list, scalarexp_t *se,
407 for(s=0;s<lfta_select_list.size();s++){
408 if(is_equivalent_se(lfta_select_list[s]->se, se)){
413 lfta_select_list.push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
414 return(lfta_select_list.size()-1);
419 // TODO: The generated colref should be tied to the tablevar
420 // representing the lfta output. For now, always 0.
422 scalarexp_t *make_fta_se_ref(vector<select_element *> &lfta_select_list, scalarexp_t *se, int h_tvref){
424 int fta_se_nbr = add_select_list_nodup(lfta_select_list, se, new_element);
427 colname = lfta_select_list[fta_se_nbr]->name;
429 colname = impute_colname(lfta_select_list, se);
430 lfta_select_list[fta_se_nbr]->name = colname;
433 // TODO: fill in the tablevar and schema of the colref here.
434 colref_t *new_cr = new colref_t(colname.c_str());
435 new_cr->set_tablevar_ref(h_tvref);
438 scalarexp_t *new_se= new scalarexp_t(new_cr);
439 new_se->use_decorations_of(se);
445 // Build a selection list,
446 // but avoid adding duplicate SEs.
449 int add_select_list_nodup(vector<select_element *> *lfta_select_list, scalarexp_t *se,
453 for(s=0;s<lfta_select_list->size();s++){
454 if(is_equivalent_se((*lfta_select_list)[s]->se, se)){
459 lfta_select_list->push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
460 return(lfta_select_list->size()-1);
465 // TODO: The generated colref should be tied to the tablevar
466 // representing the lfta output. For now, always 0.
468 scalarexp_t *make_fta_se_ref(vector<vector<select_element *> *> &lfta_select_list, scalarexp_t *se, int h_tvref){
470 vector<select_element *> *the_sel_list = lfta_select_list[h_tvref];
471 int fta_se_nbr = add_select_list_nodup(the_sel_list, se, new_element);
474 colname = (*the_sel_list)[fta_se_nbr]->name;
476 colname = impute_colname(*the_sel_list, se);
477 (*the_sel_list)[fta_se_nbr]->name = colname;
480 // TODO: fill in the tablevar and schema of the colref here.
481 colref_t *new_cr = new colref_t(colname.c_str());
482 new_cr->set_tablevar_ref(h_tvref);
485 scalarexp_t *new_se= new scalarexp_t(new_cr);
486 new_se->use_decorations_of(se);
495 // Test if a se can be evaluated at the fta.
496 // check forbidden types (e.g. float), forbidden operations
497 // between types (e.g. divide a long long), forbidden operations
498 // (too expensive, not implemented).
500 // Return true if not forbidden, false if forbidden
502 // TODO: the parameter aggr_tbl is not used, delete it.
504 bool check_fta_forbidden_se(scalarexp_t *se,
505 aggregate_table *aggr_tbl,
506 ext_fcn_list *Ext_fcns
510 vector<scalarexp_t *> operand_list;
511 vector<data_type *> dt_signature;
512 data_type *dt = se->get_data_type();
516 switch(se->get_operator_type()){
520 return( se->get_data_type()->fta_legal_type() );
524 if(!check_fta_forbidden_se(se->get_left_se(), aggr_tbl, Ext_fcns))
527 dt->fta_legal_operation(se->get_left_se()->get_data_type(), se->get_op())
530 if(!check_fta_forbidden_se(se->get_left_se(),aggr_tbl, Ext_fcns))
532 if(!check_fta_forbidden_se(se->get_right_se(),aggr_tbl, Ext_fcns))
534 return(dt->fta_legal_operation(se->get_left_se()->get_data_type(),
535 se->get_right_se()->get_data_type(),
540 // return true, aggregate fta-safeness is determined elsewhere.
547 if(se->get_aggr_ref() >= 0) return true;
549 operand_list = se->get_operands();
550 for(p=0;p<operand_list.size();p++){
551 if(!check_fta_forbidden_se(operand_list[p],aggr_tbl, Ext_fcns))
553 dt_signature.push_back(operand_list[p]->get_data_type() );
555 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
557 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
559 for(o=0;o<operand_list.size();o++){
560 if(o>0) fprintf(stderr,", ");
561 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
563 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
564 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
568 return(Ext_fcns->fta_legal(fcn_id) );
570 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
579 // test if a pr can be executed at the fta.
581 // Return true if not forbidden, false if forbidden
583 bool check_fta_forbidden_pr(predicate_t *pr,
584 aggregate_table *aggr_tbl,
585 ext_fcn_list *Ext_fcns
588 vector<literal_t *> llist;
591 vector<scalarexp_t *> op_list;
592 vector<data_type *> dt_signature;
596 switch(pr->get_operator_type()){
598 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns) )
600 llist = pr->get_lit_vec();
601 for(l=0;l<llist.size();l++){
602 dt = new data_type(llist[l]->get_type());
603 if(! dt->fta_legal_type()){
611 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns))
613 if(! check_fta_forbidden_se(pr->get_right_se(), aggr_tbl, Ext_fcns))
617 return( check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns) );
619 if(! check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns))
621 if(! check_fta_forbidden_pr(pr->get_right_pr(), aggr_tbl, Ext_fcns))
625 op_list = pr->get_op_list();
626 for(o=0;o<op_list.size();o++){
627 if(!check_fta_forbidden_se(op_list[o],aggr_tbl, Ext_fcns))
629 dt_signature.push_back(op_list[o]->get_data_type() );
631 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), dt_signature);
633 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
635 for(o=0;o<op_list.size();o++){
636 if(o>0) fprintf(stderr,", ");
637 fprintf(stderr,"%s",op_list[o]->get_data_type()->to_string().c_str());
639 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
640 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
644 return(Ext_fcns->fta_legal(fcn_id) );
646 fprintf(stderr,"INTERNAL ERROR in check_fta_forbidden_pr, line %d, character %d, unknown predicate operator type %d\n",
647 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
656 // Split the aggregates in orig_aggr_tbl, into superaggregates and
658 // (the value of the HFTA aggregate might be a SE of several LFTA
659 // subaggregates, e.g. avg : sum / count )
660 // Register the superaggregates in hfta_aggr_tbl, and the
661 // subaggregates in lfta_aggr_tbl.
662 // Insert references to the subaggregates into lfta_select_list.
663 // (and record their names in the currnames list)
664 // Create a SE for the superaggregate, put it in hfta_aggr_se,
667 void split_fta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
668 aggregate_table *hfta_aggr_tbl,
669 aggregate_table *lfta_aggr_tbl,
670 vector<select_element *> &lfta_select_list,
671 map<int,scalarexp_t *> &hfta_aggr_se,
672 ext_fcn_list *Ext_fcns
675 scalarexp_t *subaggr_se;
680 scalarexp_t *new_se, *l_se;
681 vector<scalarexp_t *> subaggr_ref_se;
684 if(! orig_aggr_tbl->is_builtin(agr_id)){
685 // Construct the subaggregate
686 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
687 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
688 vector<scalarexp_t *> subopl;
690 for(o=0;o<opl.size();++o){
691 subopl.push_back(dup_se(opl[o], NULL));
693 int sub_id = Ext_fcns->get_subaggr_id(fcn_id);
694 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
695 subaggr_se->set_fcn_id(sub_id);
696 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
697 // Add it to the lfta select list.
698 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
700 colname = lfta_select_list[fta_se_nbr]->name;
702 colname = impute_colname(lfta_select_list, subaggr_se);
703 lfta_select_list[fta_se_nbr]->name = colname;
704 ano = lfta_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,Ext_fcns->has_lfta_bailout(sub_id));
705 subaggr_se->set_aggr_id(ano);
708 // Construct a reference to the subaggregate
709 new_cr = new colref_t(colname.c_str());
710 new_se = new scalarexp_t(new_cr);
711 // I'm not certain what the types should be ....
712 // This will need to be filled in by later analysis.
713 // NOTE: this might not capture all the meaning of data_type ...
714 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
715 subaggr_ref_se.push_back(new_se);
717 // Construct the superaggregate
718 int super_id = Ext_fcns->get_superaggr_id(fcn_id);
719 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
720 ret_se->set_fcn_id(super_id);
721 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
722 // Register it in the hfta aggregate table
723 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, Ext_fcns->is_running_aggr(sub_id),false);
724 ret_se->set_aggr_id(ano);
725 hfta_aggr_se[agr_id] = ret_se;
731 // builtin aggregate processing
735 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
736 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
739 if(orig_aggr_tbl->is_star_aggr(agr_id)){
740 for(sa=0;sa<subaggr_names.size();sa++){
741 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
742 subaggr_se->set_data_type(subaggr_dt[sa]);
744 // The following sequence is similar to the code in make_fta_se_ref,
745 // but there is special processing for the aggregate tables.
746 int fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
748 colname = lfta_select_list[fta_se_nbr]->name;
750 colname = impute_colname(lfta_select_list, subaggr_se);
751 lfta_select_list[fta_se_nbr]->name = colname;
752 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
753 subaggr_se->set_aggr_id(ano);
755 new_cr = new colref_t(colname.c_str());
756 new_cr->set_tablevar_ref(0);
757 new_se = new scalarexp_t(new_cr);
759 // I'm not certain what the types should be ....
760 // This will need to be filled in by later analysis.
761 // Actually, this is causing a problem.
762 // I will assume a UINT data type. / change to INT
763 // (consistent with assign_data_types in analyze_fta.cc)
764 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
765 data_type *ndt = new data_type("Int"); // used to be Uint
766 new_se->set_data_type(ndt);
768 subaggr_ref_se.push_back(new_se);
771 for(sa=0;sa<subaggr_names.size();sa++){
773 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
774 l_se = dup_se(aggr_operand, NULL);
775 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
777 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
779 subaggr_se->set_data_type(subaggr_dt[sa]);
781 // again, similar to make_fta_se_ref.
782 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
784 colname = lfta_select_list[fta_se_nbr]->name;
786 colname = impute_colname(lfta_select_list, subaggr_se);
787 lfta_select_list[fta_se_nbr]->name = colname;
789 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
791 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
792 subaggr_se->set_aggr_id(ano);
794 new_cr = new colref_t(colname.c_str());
795 new_se = new scalarexp_t(new_cr);
796 // I'm not certain what the types should be ....
797 // This will need to be filled in by later analysis.
798 // NOTE: this might not capture all the meaning of data_type ...
799 new_se->set_data_type(subaggr_dt[sa]);
800 subaggr_ref_se.push_back(new_se);
803 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
804 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
806 // ASSUME either the return value is an aggregation,
807 // or a binary_op between two aggregations
808 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
809 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
810 ret_se->set_aggr_id(ano);
812 // Basically processing for AVG.
813 // set the data type of the superagg to that of the subagg.
814 scalarexp_t *left_se = ret_se->get_left_se();
815 left_se->set_data_type(subaggr_dt[0]);
816 ano = hfta_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
817 left_se->set_aggr_id(ano);
819 scalarexp_t *right_se = ret_se->get_right_se();
820 right_se->set_data_type(subaggr_dt[1]);
821 ano = hfta_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
822 right_se->set_aggr_id(ano);
825 hfta_aggr_se[agr_id] = ret_se;
830 // Split the aggregates in orig_aggr_tbl, into hfta_superaggregates and
831 // hfta_subaggregates.
832 // Register the superaggregates in hi_aggr_tbl, and the
833 // subaggregates in loq_aggr_tbl.
834 // Insert references to the subaggregates into low_select_list.
835 // (and record their names in the currnames list)
836 // Create a SE for the superaggregate, put it in hfta_aggr_se,
839 void split_hfta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
840 aggregate_table *hi_aggr_tbl,
841 aggregate_table *low_aggr_tbl,
842 vector<select_element *> &low_select_list,
843 map<int,scalarexp_t *> &hi_aggr_se,
844 ext_fcn_list *Ext_fcns
847 scalarexp_t *subaggr_se;
852 scalarexp_t *new_se, *l_se;
853 vector<scalarexp_t *> subaggr_ref_se;
856 if(! orig_aggr_tbl->is_builtin(agr_id)){
857 // Construct the subaggregate
858 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
859 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
860 vector<scalarexp_t *> subopl;
862 for(o=0;o<opl.size();++o){
863 subopl.push_back(dup_se(opl[o], NULL));
865 int sub_id = Ext_fcns->get_hfta_subaggr_id(fcn_id);
866 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
867 subaggr_se->set_fcn_id(sub_id);
868 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
869 // Add it to the low select list.
870 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
872 colname = low_select_list[fta_se_nbr]->name;
874 colname = impute_colname(low_select_list, subaggr_se);
875 low_select_list[fta_se_nbr]->name = colname;
876 ano = low_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,false);
877 subaggr_se->set_aggr_id(ano);
880 // Construct a reference to the subaggregate
881 new_cr = new colref_t(colname.c_str());
882 new_se = new scalarexp_t(new_cr);
883 // I'm not certain what the types should be ....
884 // This will need to be filled in by later analysis.
885 // NOTE: this might not capture all the meaning of data_type ...
886 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
887 subaggr_ref_se.push_back(new_se);
889 // Construct the superaggregate
890 int super_id = Ext_fcns->get_hfta_superaggr_id(fcn_id);
891 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
892 ret_se->set_fcn_id(super_id);
893 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
894 // Register it in the high aggregate table
895 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, false,false);
896 ret_se->set_aggr_id(ano);
897 hi_aggr_se[agr_id] = ret_se;
903 // builtin aggregate processing
907 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
908 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
911 if(orig_aggr_tbl->is_star_aggr(agr_id)){
912 for(sa=0;sa<subaggr_names.size();sa++){
913 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
914 subaggr_se->set_data_type(subaggr_dt[sa]);
916 // The following sequence is similar to the code in make_fta_se_ref,
917 // but there is special processing for the aggregate tables.
918 int fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
920 colname = low_select_list[fta_se_nbr]->name;
922 colname = impute_colname(low_select_list, subaggr_se);
923 low_select_list[fta_se_nbr]->name = colname;
924 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
925 subaggr_se->set_aggr_id(ano);
927 new_cr = new colref_t(colname.c_str());
928 new_cr->set_tablevar_ref(0);
929 new_se = new scalarexp_t(new_cr);
931 // I'm not certain what the types should be ....
932 // This will need to be filled in by later analysis.
933 // Actually, this is causing a problem.
934 // I will assume a UINT data type.
935 // (consistent with assign_data_types in analyze_fta.cc)
936 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
937 data_type *ndt = new data_type("Int"); // was Uint
938 new_se->set_data_type(ndt);
940 subaggr_ref_se.push_back(new_se);
943 for(sa=0;sa<subaggr_names.size();sa++){
945 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
946 l_se = dup_se(aggr_operand, NULL);
947 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
949 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
951 subaggr_se->set_data_type(subaggr_dt[sa]);
953 // again, similar to make_fta_se_ref.
954 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
956 colname = low_select_list[fta_se_nbr]->name;
958 colname = impute_colname(low_select_list, subaggr_se);
959 low_select_list[fta_se_nbr]->name = colname;
961 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
963 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
964 subaggr_se->set_aggr_id(ano);
966 new_cr = new colref_t(colname.c_str());
967 new_se = new scalarexp_t(new_cr);
968 // I'm not certain what the types should be ....
969 // This will need to be filled in by later analysis.
970 // NOTE: this might not capture all the meaning of data_type ...
971 new_se->set_data_type(subaggr_dt[sa]);
972 subaggr_ref_se.push_back(new_se);
975 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
976 // ASSUME either the return value is an aggregation,
977 // or a binary_op between two aggregations
978 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
979 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
980 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
982 // Basically processing for AVG.
983 // set the data type of the superagg to that of the subagg.
984 scalarexp_t *left_se = ret_se->get_left_se();
985 left_se->set_data_type(subaggr_dt[0]);
986 ano = hi_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
987 left_se->set_aggr_id(ano);
989 scalarexp_t *right_se = ret_se->get_right_se();
990 right_se->set_data_type(subaggr_dt[1]);
991 ano = hi_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
992 right_se->set_aggr_id(ano);
995 ret_se->set_aggr_id(ano);
996 hi_aggr_se[agr_id] = ret_se;
1004 // Split a scalar expression into one part which executes
1005 // at the stream and another set of parts which execute
1007 // Because I'm actually modifying the SEs, I will make
1008 // copies. But I will assume that literals, params, and
1009 // colrefs are immutable at this point.
1010 // (if there is ever a need to change one, must make a
1012 // NOTE : if se is constant (only refrences literals),
1013 // avoid making the fta compute it.
1015 // NOTE : This will need to be generalized to
1016 // handle join expressions, namely to handle a vector
1019 // Return value is the HFTA se.
1020 // Add lftas select_elements to the fta_select_list.
1021 // set fta_forbidden if this node or any child cannot
1022 // execute at the lfta.
1026 scalarexp_t *split_fta_se(scalarexp_t *se,
1027 bool &fta_forbidden,
1028 vector<select_element *> &lfta_select_list,
1029 ext_fcn_list *Ext_fcns
1033 vector<scalarexp_t *> operand_list;
1034 vector<data_type *> dt_signature;
1035 scalarexp_t *ret_se, *l_se, *r_se;
1036 bool l_forbid, r_forbid, this_forbid;
1038 scalarexp_t *new_se;
1039 data_type *dt = se->get_data_type();
1041 switch(se->get_operator_type()){
1043 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1044 ret_se = new scalarexp_t(se->get_literal());
1045 ret_se->use_decorations_of(se);
1049 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1050 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1051 ret_se->use_decorations_of(se);
1055 // No colref should be forbidden,
1056 // the schema is wrong, the fta_legal_type() fcn is wrong,
1057 // or the source table is actually a stream.
1058 // Issue a warning, but proceed with processing.
1059 // Also, should not be a ref to a gbvar.
1060 // (a gbvar ref only occurs in an aggregation node,
1061 // and these SEs are rehomed, not split.
1062 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1065 fprintf(stderr,"WARNING, a colref is a forbidden data type in split_fta_se,"
1067 " type is %s, line=%d, col=%d\n",
1068 se->get_colref()->to_string().c_str(),
1069 se->get_data_type()->get_type_str().c_str(),
1070 se->lineno, se->charno
1075 fprintf(stderr,"INTERNAL ERROR, a colref is a gbvar ref in split_fta_se,"
1076 " type is %s, line=%d, col=%d\n",
1077 se->get_data_type()->get_type_str().c_str(),
1078 se->lineno, se->charno
1083 ret_se = new scalarexp_t(se->get_colref());
1084 ret_se->use_decorations_of(se);
1088 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1090 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1092 // If this operation is forbidden but the child SE is not,
1093 // put the child se on the lfta_select_list, create a colref
1094 // which accesses this se, and make it the child of this op.
1095 // Exception : the child se is constant (only literal refs).
1096 if(this_forbid && !l_forbid){
1097 if(!is_literal_or_param_only(l_se)){
1098 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1099 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1102 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1104 ret_se->use_decorations_of(se);
1105 fta_forbidden = this_forbid | l_forbid;
1109 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1110 r_se = split_fta_se(se->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1112 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1114 // Replace the left se if it is not forbidden, but something else is.
1115 if((this_forbid || r_forbid) & !l_forbid){
1116 if(!is_literal_or_param_only(l_se)){
1117 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1122 // Replace the right se if it is not forbidden, but something else is.
1123 if((this_forbid || l_forbid) & !r_forbid){
1124 if(!is_literal_or_param_only(r_se)){
1125 new_se = make_fta_se_ref(lfta_select_list, r_se,0);
1130 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1131 ret_se->use_decorations_of(se);
1132 fta_forbidden = this_forbid || r_forbid || l_forbid;
1139 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_fta_se."
1140 " line=%d, col=%d\n",
1141 se->get_op().c_str(),
1142 se->lineno, se->charno
1149 fta_forbidden = false;
1150 operand_list = se->get_operands();
1151 vector<scalarexp_t *> new_operands;
1152 vector<bool> forbidden_op;
1153 for(p=0;p<operand_list.size();p++){
1154 l_se = split_fta_se(operand_list[p], l_forbid, lfta_select_list, Ext_fcns);
1156 fta_forbidden |= l_forbid;
1157 new_operands.push_back(l_se);
1158 forbidden_op.push_back(l_forbid);
1159 dt_signature.push_back(operand_list[p]->get_data_type() );
1162 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1164 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1166 for(o=0;o<operand_list.size();o++){
1167 if(o>0) fprintf(stderr,", ");
1168 fprintf(stderr,"%s",operand_list[o]->get_data_type()->get_type_str().c_str());
1170 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1171 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1175 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1177 // Replace the non-forbidden operands.
1178 // the forbidden ones are already replaced.
1180 for(p=0;p<new_operands.size();p++){
1181 if(! forbidden_op[p]){
1182 // if(new_operands[p]->get_data_type()->get_temporal() != constant_t){
1183 if(!is_literal_or_param_only(new_operands[p])){
1184 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],0);
1185 new_operands[p] = new_se;
1191 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1192 ret_se->use_decorations_of(se);
1198 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
1209 // The predicates have already been
1210 // broken into conjunctions.
1211 // If any part of a conjunction is fta-forbidden,
1212 // it must be executed in the stream operator.
1213 // Else it is executed in the FTA.
1214 // A pre-analysis should determine whether this
1215 // predicate is fta-safe. This procedure will
1216 // assume that it is fta-forbidden and will
1217 // prepare it for execution in the stream.
1221 predicate_t *split_fta_pr(predicate_t *pr,
1222 vector<select_element *> &lfta_select_list,
1223 ext_fcn_list *Ext_fcns
1226 vector<literal_t *> llist;
1227 scalarexp_t *se_l, *se_r;
1228 bool l_forbid, r_forbid;
1229 predicate_t *ret_pr, *pr_l, *pr_r;
1230 vector<scalarexp_t *> op_list, new_op_list;
1232 vector<data_type *> dt_signature;
1235 switch(pr->get_operator_type()){
1237 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1240 if(!is_literal_or_param_only(se_l)){
1241 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1245 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1250 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1252 if(!is_literal_or_param_only(se_l)){
1253 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1258 se_r = split_fta_se(pr->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1260 if(!is_literal_or_param_only(se_r)){
1261 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,0);
1266 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1270 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1271 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1274 case PRED_BINARY_OP:
1275 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1276 pr_r = split_fta_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1277 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1281 // I can't push the predicate into the lfta, except by
1282 // returning a bool value, and that is not worth the trouble,
1283 op_list = pr->get_op_list();
1284 for(o=0;o<op_list.size();++o){
1285 se_l = split_fta_se(op_list[o],l_forbid,lfta_select_list,Ext_fcns);
1287 if(!is_literal_or_param_only(se_l)){
1288 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1292 new_op_list.push_back(se_l);
1295 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1296 ret_pr->set_fcn_id(pr->get_fcn_id());
1299 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1300 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1311 //--------------------------------------------------------------------
1315 // Split a scalar expression into one part which executes
1316 // at the stream and another set of parts which execute
1318 // Because I'm actually modifying the SEs, I will make
1319 // copies. But I will assume that literals, params, and
1320 // colrefs are immutable at this point.
1321 // (if there is ever a need to change one, must make a
1323 // NOTE : if se is constant (only refrences literals),
1324 // avoid making the fta compute it.
1326 // NOTE : This will need to be generalized to
1327 // handle join expressions, namely to handle a vector
1330 // Return value is the HFTA se.
1331 // Add lftas select_elements to the fta_select_list.
1332 // set fta_forbidden if this node or any child cannot
1333 // execute at the lfta.
1335 #define SPLIT_FTAVEC_NOTBLVAR -1
1336 #define SPLIT_FTAVEC_MIXED -2
1338 bool is_PROTOCOL_source(int colref_source,
1339 vector< vector<select_element *> *> &lfta_select_list){
1340 if(colref_source>=0 && lfta_select_list[colref_source]!=NULL) return true;
1344 int combine_colref_source(int s1, int s2){
1345 if(s1==s2) return(s1);
1346 if(s1==SPLIT_FTAVEC_NOTBLVAR) return s2;
1347 if(s2==SPLIT_FTAVEC_NOTBLVAR) return s1;
1348 return SPLIT_FTAVEC_MIXED;
1351 scalarexp_t *split_ftavec_se(
1352 scalarexp_t *se, // the SE to split
1353 bool &fta_forbidden, // return true if some part of se
1355 int &colref_source, // the tblvar which sources the
1356 // colref, or NOTBLVAR, or MIXED
1357 vector< vector<select_element *> *> &lfta_select_list,
1358 // NULL if the tblvar is not PROTOCOL,
1359 // else build the select list.
1360 ext_fcn_list *Ext_fcns // is the fcn lfta-safe?
1362 // Return value is the HFTA SE, unless fta_forbidden is true and
1363 // colref_source>=0 and the indicated source is PROTOCOL.
1364 // In that case no split was done, the make_fta_se_ref must
1365 // be done by the caller.
1368 vector<scalarexp_t *> operand_list;
1369 vector<data_type *> dt_signature;
1370 scalarexp_t *ret_se, *l_se, *r_se;
1371 bool l_forbid, r_forbid, this_forbid;
1372 int l_csource, r_csource, this_csource;
1374 scalarexp_t *new_se;
1375 data_type *dt = se->get_data_type();
1377 switch(se->get_operator_type()){
1379 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1380 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1381 ret_se = new scalarexp_t(se->get_literal());
1382 ret_se->use_decorations_of(se);
1386 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1387 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1388 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1389 ret_se->use_decorations_of(se);
1392 case SE_IFACE_PARAM:
1393 fta_forbidden = false;
1394 colref_source = se->get_ifpref()->get_tablevar_ref();
1395 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1396 ret_se->use_decorations_of(se);
1400 // No colref should be forbidden,
1401 // the schema is wrong, the fta_legal_type() fcn is wrong,
1402 // or the source table is actually a stream.
1403 // Issue a warning, but proceed with processing.
1404 // Also, should not be a ref to a gbvar.
1405 // (a gbvar ref only occurs in an aggregation node,
1406 // and these SEs are rehomed, not split.
1407 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1408 colref_source = se->get_colref()->get_tablevar_ref();
1410 if(fta_forbidden && is_PROTOCOL_source(colref_source, lfta_select_list)){
1411 fprintf(stderr,"WARNING, a PROTOCOL colref is a forbidden data type in split_ftavec_se,"
1413 " type is %s, line=%d, col=%d\n",
1414 se->get_colref()->to_string().c_str(),
1415 se->get_data_type()->to_string().c_str(),
1416 se->lineno, se->charno
1421 fta_forbidden = true; // eval in hfta. ASSUME make copy as below.
1424 ret_se = new scalarexp_t(se->get_colref());
1425 ret_se->use_decorations_of(se);
1429 l_se = split_ftavec_se(se->get_left_se(), l_forbid, colref_source, lfta_select_list, Ext_fcns);
1431 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1433 // If this operation is forbidden but the child SE is not,
1434 // AND the colref source in the se is a single PROTOCOL source
1435 // put the child se on the lfta_select_list, create a colref
1436 // which accesses this se, and make it the child of this op.
1437 // Exception : the child se is constant (only literal refs).
1438 // TODO: I think the exception is expressed by is_PROTOCOL_source
1439 if(this_forbid && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list)){
1440 if(!is_literal_or_param_only(l_se)){
1441 new_se = make_fta_se_ref(lfta_select_list, l_se,colref_source);
1442 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1445 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1447 ret_se->use_decorations_of(se);
1448 fta_forbidden = this_forbid | l_forbid;
1452 l_se = split_ftavec_se(se->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1453 r_se = split_ftavec_se(se->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1455 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1456 colref_source=combine_colref_source(l_csource, r_csource);
1458 // Replace the left se if the parent must be hfta but the child can
1459 // be lfta. This translates to
1460 // a) result is PROTOCOL and forbidden, but left SE is not forbidden
1461 // OR b) if result is mixed but the left se is PROTOCOL, not forbidden
1462 if( ((this_forbid || r_forbid) && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1463 (colref_source==SPLIT_FTAVEC_MIXED && !l_forbid &&
1464 is_PROTOCOL_source(l_csource, lfta_select_list)) ){
1465 if(!is_literal_or_param_only(l_se)){
1466 new_se = make_fta_se_ref(lfta_select_list, l_se,l_csource);
1471 // same logic as for right se.
1472 if( ((this_forbid || l_forbid) && !r_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1473 (colref_source==SPLIT_FTAVEC_MIXED && !r_forbid &&
1474 is_PROTOCOL_source(r_csource, lfta_select_list)) ){
1475 if(!is_literal_or_param_only(r_se)){
1476 new_se = make_fta_se_ref(lfta_select_list, r_se,r_csource);
1481 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1482 ret_se->use_decorations_of(se);
1483 fta_forbidden = this_forbid || r_forbid || l_forbid;
1490 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_ftavec_se."
1491 " line=%d, col=%d\n",
1492 se->get_op().c_str(),
1493 se->lineno, se->charno
1500 operand_list = se->get_operands();
1501 vector<scalarexp_t *> new_operands;
1502 vector<bool> forbidden_op;
1503 vector<int> csource;
1505 fta_forbidden = false;
1506 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1507 for(p=0;p<operand_list.size();p++){
1508 l_se = split_ftavec_se(operand_list[p], l_forbid, l_csource, lfta_select_list, Ext_fcns);
1510 fta_forbidden |= l_forbid;
1511 colref_source = combine_colref_source(colref_source, l_csource);
1512 new_operands.push_back(l_se);
1513 forbidden_op.push_back(l_forbid);
1514 csource.push_back(l_csource);
1515 dt_signature.push_back(operand_list[p]->get_data_type() );
1518 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1520 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1522 for(o=0;o<operand_list.size();o++){
1523 if(o>0) fprintf(stderr,", ");
1524 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
1526 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1527 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1531 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1533 // Replace the non-forbidden operands.
1534 // the forbidden ones are already replaced.
1535 if(fta_forbidden || colref_source == SPLIT_FTAVEC_MIXED){
1536 for(p=0;p<new_operands.size();p++){
1537 if(! forbidden_op[p] && is_PROTOCOL_source(csource[p], lfta_select_list)){
1538 if(!is_literal_or_param_only(new_operands[p])){
1539 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],csource[p]);
1540 new_operands[p] = new_se;
1546 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1547 ret_se->use_decorations_of(se);
1553 printf("INTERNAL ERROR in split_ftavec_se: operator type %d\n",se->get_operator_type());
1562 // The predicates have already been
1563 // broken into conjunctions.
1564 // If any part of a conjunction is fta-forbidden,
1565 // it must be executed in the stream operator.
1566 // Else it is executed in the FTA.
1567 // A pre-analysis should determine whether this
1568 // predicate is fta-safe. This procedure will
1569 // assume that it is fta-forbidden and will
1570 // prepare it for execution in the stream.
1572 predicate_t *split_ftavec_pr(predicate_t *pr,
1573 vector< vector<select_element *> *> &lfta_select_list,
1574 ext_fcn_list *Ext_fcns
1577 vector<literal_t *> llist;
1578 scalarexp_t *se_l, *se_r;
1579 bool l_forbid, r_forbid;
1580 int l_csource, r_csource;
1581 predicate_t *ret_pr, *pr_l, *pr_r;
1582 vector<scalarexp_t *> op_list, new_op_list;
1584 vector<data_type *> dt_signature;
1587 switch(pr->get_operator_type()){
1589 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1591 // TODO: checking that the se is a PROTOCOL source should
1592 // take care of literal_or_param_only.
1593 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1594 if(!is_literal_or_param_only(se_l)){
1595 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1599 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1604 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1605 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1606 if(!is_literal_or_param_only(se_l)){
1607 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1612 se_r = split_ftavec_se(pr->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1613 if(!r_forbid && is_PROTOCOL_source(r_csource, lfta_select_list)){
1614 if(!is_literal_or_param_only(se_r)){
1615 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,r_csource);
1620 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1624 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1625 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1628 case PRED_BINARY_OP:
1629 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1630 pr_r = split_ftavec_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1631 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1635 // I can't push the predicate into the lfta, except by
1636 // returning a bool value, and that is not worth the trouble,
1637 op_list = pr->get_op_list();
1638 for(o=0;o<op_list.size();++o){
1639 se_l = split_ftavec_se(op_list[o],l_forbid,l_csource,lfta_select_list,Ext_fcns);
1640 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1641 if(!is_literal_or_param_only(se_l)){
1642 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1646 new_op_list.push_back(se_l);
1649 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1650 ret_pr->set_fcn_id(pr->get_fcn_id());
1653 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1654 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1664 ////////////////////////////////////////////////////////////////////////
1665 /// rehome_hfta_se rehome_hfta_pr
1666 /// This is use to split an sgah operator (aggregation),
1667 /// I just need to make gb, aggr references point to the
1668 /// new gb, aggr table entries.
1671 scalarexp_t *rehome_fta_se(scalarexp_t *se,
1672 map< int, scalarexp_t * > *aggr_map
1677 vector<scalarexp_t *> operand_list;
1678 scalarexp_t *ret_se, *l_se, *r_se;
1680 scalarexp_t *new_se;
1681 data_type *dt = se->get_data_type();
1682 vector<scalarexp_t *> new_operands;
1684 switch(se->get_operator_type()){
1686 ret_se = new scalarexp_t(se->get_literal());
1687 ret_se->use_decorations_of(se);
1691 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1692 ret_se->use_decorations_of(se);
1695 case SE_IFACE_PARAM:
1696 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1697 ret_se->use_decorations_of(se);
1703 // Must be a GB REF ...
1704 // I'm assuming that the hfta gbvar table has the
1705 // same sequence of entries as the input query's gbvar table.
1706 // Else I'll need some kind of translation table.
1709 fprintf(stderr,"WARNING, a colref is not a gbver ref in rehome_hfta_se"
1710 " type is %s, line=%d, col=%d\n",
1711 se->get_data_type()->to_string().c_str(),
1712 se->lineno, se->charno
1716 ret_se = new scalarexp_t(se->get_colref());
1717 ret_se->use_decorations_of(se); // just inherit the gbref
1721 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1723 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1724 ret_se->use_decorations_of(se);
1728 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1729 r_se = rehome_fta_se(se->get_right_se(), aggr_map);
1731 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1732 ret_se->use_decorations_of(se);
1738 agr_id = se->get_aggr_ref();
1739 return (*aggr_map)[agr_id];
1743 agr_id = se->get_aggr_ref();
1744 if(agr_id >= 0) return (*aggr_map)[agr_id];
1746 operand_list = se->get_operands();
1747 for(p=0;p<operand_list.size();p++){
1748 l_se = rehome_fta_se(operand_list[p], aggr_map);
1750 new_operands.push_back(l_se);
1754 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1755 ret_se->use_decorations_of(se);
1760 printf("INTERNAL ERROR in rehome_fta_se: operator type %d\n",se->get_operator_type());
1769 // The predicates have already been
1770 // broken into conjunctions.
1771 // If any part of a conjunction is fta-forbidden,
1772 // it must be executed in the stream operator.
1773 // Else it is executed in the FTA.
1774 // A pre-analysis should determine whether this
1775 // predicate is fta-safe. This procedure will
1776 // assume that it is fta-forbidden and will
1777 // prepare it for execution in the stream.
1779 predicate_t *rehome_fta_pr(predicate_t *pr,
1780 map<int, scalarexp_t *> *aggr_map
1783 vector<literal_t *> llist;
1784 scalarexp_t *se_l, *se_r;
1785 predicate_t *ret_pr, *pr_l, *pr_r;
1786 vector<scalarexp_t *> op_list, new_op_list;
1789 switch(pr->get_operator_type()){
1791 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1792 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1796 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1797 se_r = rehome_fta_se(pr->get_right_se(), aggr_map);
1798 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1802 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1803 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1806 case PRED_BINARY_OP:
1807 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1808 pr_r = rehome_fta_pr(pr->get_right_pr(), aggr_map);
1809 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1813 op_list = pr->get_op_list();
1814 for(o=0;o<op_list.size();++o){
1815 se_l = rehome_fta_se(op_list[o], aggr_map);
1816 new_op_list.push_back(se_l);
1818 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1819 ret_pr->set_fcn_id(pr->get_fcn_id());
1823 fprintf(stderr,"INTERNAL ERROR in rehome_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1824 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1833 ////////////////////////////////////////////////////////////////////
1834 ///////////////// Create a STREAM table to represent the FTA output.
1836 table_def *create_attributes(string tname, vector<select_element *> &select_list){
1840 // Create a new STREAM schema for the output of the FTA.
1842 field_entry_list *fel = new field_entry_list();
1844 for(s=0;s<select_list.size();s++){
1845 scalarexp_t *sel_se = select_list[s]->se;
1846 data_type *dt = sel_se->get_data_type();
1848 // Grab the annotations of the field.
1849 // As of this writing, the only meaningful annotations
1850 // are whether or not the attribute is temporal.
1851 // There can be an annotation of constant_t, but
1852 // I'll ignore this, it feels like an unsafe assumption
1853 param_list *plist = new param_list();
1854 // if(dt->is_temporal()){
1855 vector<string> param_strings = dt->get_param_keys();
1857 for(p=0;p<param_strings.size();++p){
1858 string v = dt->get_param_val(param_strings[p]);
1860 plist->append(param_strings[p].c_str(),v.c_str());
1862 plist->append(param_strings[p].c_str());
1866 // char access_fcn_name[500];
1867 string colname = select_list[s]->name;
1868 // sprintf(access_fcn_name,"get_field_%s",colname.c_str());
1869 string access_fcn_name = "get_field_"+colname;
1870 field_entry *fe = new field_entry(
1871 dt->get_type_str(), colname, access_fcn_name, plist, ufcns
1874 fel->append_field(fe);
1877 table_def *fta_tbl = new table_def(
1878 tname.c_str(), NULL, NULL, fel, STREAM_SCHEMA
1885 //------------------------------------------------------------------
1886 // Textual representation of the query node.
1890 string spx_qpn::to_query_string(){
1892 string ret = "Select ";
1894 for(s=0;s<select_list.size();s++){
1896 ret += se_to_query_string(select_list[s]->se, NULL);
1897 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1901 ret += "From "+table_name->to_string()+"\n";
1903 if(where.size() > 0){
1906 for(w=0;w<where.size();w++){
1907 if(w>0) ret += " AND ";
1908 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
1919 string sgah_qpn::to_query_string(){
1921 string ret = "Select ";
1923 for(s=0;s<select_list.size();s++){
1925 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1926 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1930 ret += "From "+table_name->to_string()+"\n";
1932 if(where.size() > 0){
1935 for(w=0;w<where.size();w++){
1936 if(w>0) ret += " AND ";
1937 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1942 if(gb_tbl.size() > 0){
1945 if(gb_tbl.gb_patterns.size() <= 1 || gb_tbl.gb_entry_type.size()==0){
1946 for(g=0;g<gb_tbl.size();g++){
1947 if(g>0) ret += ", ";
1948 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1949 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
1951 ret += gb_tbl.get_name(g);
1955 for(g=0;g<gb_tbl.gb_entry_type.size();++g){
1956 if(g>0) ret += ", ";
1957 if(gb_tbl.gb_entry_type[g] == ""){
1958 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+
1959 " AS "+ gb_tbl.get_name(gb_pos);
1962 if(gb_tbl.gb_entry_type[g] == "CUBE" ||
1963 gb_tbl.gb_entry_type[g] == "ROLLUP"){
1964 ret += gb_tbl.gb_entry_type[g] + "(";
1966 for(gg=0;gg<gb_tbl.gb_entry_count[g];++gg){
1967 if(gg>0) ret += ", ";
1968 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+ " AS "+ gb_tbl.get_name(gb_pos);
1973 if(gb_tbl.gb_entry_type[g] == "GROUPING_SETS"){
1974 ret += gb_tbl.gb_entry_type[g] + "(";
1976 vector<vector<bool> > &local_components = gb_tbl.pattern_components[g];
1977 for(g1=0;g1<local_components.size();++g1){
1979 bool first_field = true;
1981 for(g2=0;g2<=gb_tbl.gb_entry_count[g];g2++){
1982 if(local_components[g1][g2]){
1983 if(!first_field) ret+=", ";
1984 else first_field = false;
1985 ret += gb_tbl.get_name(gb_pos+g2);
1991 gb_pos += gb_tbl.gb_entry_count[g];
1998 if(having.size() > 0){
2001 for(h=0;h<having.size();h++){
2002 if(h>0) ret += " AND ";
2003 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2012 string rsgah_qpn::to_query_string(){
2014 string ret = "Select ";
2016 for(s=0;s<select_list.size();s++){
2018 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2019 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2023 ret += "From "+table_name->to_string()+"\n";
2025 if(where.size() > 0){
2028 for(w=0;w<where.size();w++){
2029 if(w>0) ret += " AND ";
2030 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2035 if(gb_tbl.size() > 0){
2038 for(g=0;g<gb_tbl.size();g++){
2039 if(g>0) ret += ", ";
2040 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2041 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl)+" AS ";
2043 ret += gb_tbl.get_name(g);
2048 if(having.size() > 0){
2051 for(h=0;h<having.size();h++){
2052 if(h>0) ret += " AND ";
2053 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2058 if(closing_when.size() > 0){
2059 ret += "Closing_When ";
2061 for(h=0;h<closing_when.size();h++){
2062 if(h>0) ret += " AND ";
2063 ret += "(" + pred_to_query_str(closing_when[h]->pr,&aggr_tbl) + ")";
2072 string sgahcwcb_qpn::to_query_string(){
2074 string ret = "Select ";
2076 for(s=0;s<select_list.size();s++){
2078 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2079 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2083 ret += "From "+table_name->to_string()+"\n";
2085 if(where.size() > 0){
2088 for(w=0;w<where.size();w++){
2089 if(w>0) ret += " AND ";
2090 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2095 if(gb_tbl.size() > 0){
2098 for(g=0;g<gb_tbl.size();g++){
2099 if(g>0) ret += ", ";
2100 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2101 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
2103 ret += gb_tbl.get_name(g);
2108 if(sg_tbl.size() > 0){
2109 ret += "Supergroup ";
2111 bool first_elem = true;
2112 for(g=0;g<gb_tbl.size();g++){
2113 if(sg_tbl.count(g)){
2118 ret += gb_tbl.get_name(g);
2124 if(having.size() > 0){
2127 for(h=0;h<having.size();h++){
2128 if(h>0) ret += " AND ";
2129 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2135 if(cleanwhen.size() > 0){
2136 ret += "Cleaning_When ";
2138 for(h=0;h<cleanwhen.size();h++){
2139 if(h>0) ret += " AND ";
2140 ret += "(" + pred_to_query_str(cleanwhen[h]->pr,&aggr_tbl) + ")";
2145 if(cleanby.size() > 0){
2146 ret += "Cleaning_By ";
2148 for(h=0;h<cleanby.size();h++){
2149 if(h>0) ret += " AND ";
2150 ret += "(" + pred_to_query_str(cleanby[h]->pr,&aggr_tbl) + ")";
2158 string watch_tbl_qpn::to_query_string(){
2160 // ret += "DEFINE {\n";
2161 // ret += "\tfilename='"+filename+";\n";
2162 // ret += "\trefresh_interval="+to_string(refresh_interval)+";\n}\n";
2163 ret += "WATCHLIST FIELDS {\n";
2164 std::vector<field_entry *> fields = table_layout->get_fields();
2165 for(int f=0;f<fields.size();++f){
2166 ret += fields[f]->to_string()+"\n";
2173 string mrg_qpn::to_query_string(){
2175 string ret="Merge ";
2176 ret += mvars[0]->to_query_string() + " : " + mvars[1]->to_query_string();
2178 ret += " SLACK "+se_to_query_string(slack, NULL);
2183 for(t=0;t<fm.size();++t){
2184 if(t>0) ret += ", ";
2185 ret += fm[t]->to_string();
2192 string join_eq_hash_qpn::to_query_string(){
2194 string ret = "Select ";
2196 for(s=0;s<select_list.size();s++){
2198 ret += se_to_query_string(select_list[s]->se, NULL);
2199 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2203 // NOTE: assuming binary join.
2204 int properties = from[0]->get_property()+2*from[1]->get_property();
2207 ret += "INNER_JOIN ";
2210 ret += "LEFT_OUTER_JOIN ";
2213 ret += "RIGHT_OUTER_JOIN ";
2216 ret += "OUTER_JOIN ";
2222 for(f=0;f<from.size();++f){
2224 ret += from[f]->to_string();
2228 if(where.size() > 0){
2231 for(w=0;w<where.size();w++){
2232 if(w>0) ret += " AND ";
2233 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2241 string filter_join_qpn::to_query_string(){
2243 string ret = "Select ";
2245 for(s=0;s<select_list.size();s++){
2247 ret += se_to_query_string(select_list[s]->se, NULL);
2248 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2252 // NOTE: assuming binary join.
2253 ret += "FILTER_JOIN("+temporal_var->field+","+int_to_string(temporal_range)+") ";
2257 for(f=0;f<from.size();++f){
2259 ret += from[f]->to_string();
2263 if(where.size() > 0){
2266 for(w=0;w<where.size();w++){
2267 if(w>0) ret += " AND ";
2268 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2276 string watch_join_qpn::to_query_string(){
2278 string ret = "Select ";
2280 for(s=0;s<select_list.size();s++){
2282 ret += se_to_query_string(select_list[s]->se, NULL);
2283 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2287 // NOTE: assuming binary join.
2288 ret += "WATCHLIST_JOIN ";
2292 for(f=0;f<from.size();++f){
2294 ret += from[f]->to_string();
2298 if(where.size() > 0){
2301 for(w=0;w<where.size();w++){
2302 if(w>0) ret += " AND ";
2303 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2313 // -----------------------------------------------------------------
2314 // Query node subclass specific processing.
2317 vector<mrg_qpn *> mrg_qpn::split_sources(){
2318 vector<mrg_qpn *> ret;
2322 if(fm.size() != mvars.size()){
2323 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources. fm.size() = %lu, mvars.size() = %lu\n",fm.size(),mvars.size());
2327 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources, fm size is 1.\n");
2333 printf("spliting sources merge node, name = %s, %d sources.\n\t",node_name.c_str(), fm.size());
2334 for(ff=0;ff<fm.size();++ff){
2335 printf("%s ",fm[ff]->to_string().c_str());
2340 // Handle special cases.
2342 ret.push_back(this);
2347 mrg_qpn *new_mrg = (mrg_qpn *)this->make_copy("_cH1");
2348 new_mrg->fm.push_back(this->fm[0]);
2349 new_mrg->fm.push_back(this->fm[1]);
2350 new_mrg->mvars.push_back(this->mvars[0]);
2351 new_mrg->mvars.push_back(this->mvars[1]);
2353 this->fm.erase(this->fm.begin());
2354 this->mvars.erase(this->mvars.begin());
2355 string vname = fm[0]->get_var_name();
2356 this->fm[0] = new tablevar_t(new_mrg->node_name.c_str());
2357 this->fm[0]->set_range_var(vname);
2358 this->mvars[0]->set_field(table_layout->get_field_name(merge_fieldpos));
2359 this->mvars[0]->set_tablevar_ref(0);
2360 this->mvars[1]->set_tablevar_ref(1);
2362 ret.push_back(new_mrg);
2363 ret.push_back(this);
2366 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg->node_name.c_str(),this->node_name.c_str());
2367 for(i=0;i<new_mrg->fm.size();++i)
2368 printf("\tsource %s var %d (%s, %s) \n",new_mrg->node_name.c_str(),i,new_mrg->fm[i]->to_string().c_str(), new_mrg->mvars[i]->to_string().c_str());
2369 for(i=0;i<this->fm.size();++i)
2370 printf("\tsource %s var %d (%s, %s) \n",this->node_name.c_str(),i,this->fm[i]->to_string().c_str(), this->mvars[i]->to_string().c_str());
2377 // divide up the sources between two children.
2378 // Then, recurse on the children.
2380 mrg_qpn *new_mrg1 = (mrg_qpn *)this->make_copy("_cH1");
2381 mrg_qpn *new_mrg2 = (mrg_qpn *)this->make_copy("_cH2");
2382 for(i=0;i<this->fm.size()/2;++i){
2383 new_mrg1->fm.push_back(this->fm[i]);
2384 new_mrg1->mvars.push_back(this->mvars[i]);
2385 //printf("Pushing %d (%s, %s) to new_mrg1\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2387 for(;i<this->fm.size();++i){
2388 new_mrg2->fm.push_back(this->fm[i]);
2389 new_mrg2->mvars.push_back(this->mvars[i]);
2390 //printf("Pushing %d (%s, %s) to new_mrg2\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2392 for(i=0;i<new_mrg1->mvars.size();++i)
2393 new_mrg1->mvars[i]->set_tablevar_ref(i);
2394 for(i=0;i<new_mrg2->mvars.size();++i)
2395 new_mrg2->mvars[i]->set_tablevar_ref(i);
2397 // Children created, make this merge them.
2401 tablevar_t *tmp_tblvar = new tablevar_t(new_mrg1->node_name.c_str());
2402 tmp_tblvar->set_range_var("_mrg_var_1");
2403 fm.push_back(tmp_tblvar);
2404 colref_t *tmp_cref = new colref_t("_mrg_var_1",table_layout->get_field_name(merge_fieldpos).c_str());
2405 tmp_cref->set_tablevar_ref(0);
2406 mvars.push_back(tmp_cref);
2408 tmp_tblvar = new tablevar_t(new_mrg2->node_name.c_str());
2409 tmp_tblvar->set_range_var("_mrg_var_2");
2410 fm.push_back(tmp_tblvar);
2411 tmp_cref = new colref_t("_mrg_var_2",table_layout->get_field_name(merge_fieldpos).c_str());
2412 tmp_cref->set_tablevar_ref(1);
2413 mvars.push_back(tmp_cref);
2417 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg1->node_name.c_str(),new_mrg2->node_name.c_str());
2418 for(i=0;i<new_mrg1->fm.size();++i)
2419 printf("\tsource %s var %d (%s, %s) \n",new_mrg1->node_name.c_str(),i,new_mrg1->fm[i]->to_string().c_str(), new_mrg1->mvars[i]->to_string().c_str());
2420 for(i=0;i<new_mrg2->fm.size();++i)
2421 printf("\tsource %s var %d (%s, %s) \n",new_mrg2->node_name.c_str(),i,new_mrg2->fm[i]->to_string().c_str(), new_mrg2->mvars[i]->to_string().c_str());
2424 // Recurse and put them together
2425 vector<mrg_qpn *> st1 = new_mrg1->split_sources();
2426 ret.insert(ret.end(), st1.begin(), st1.end());
2427 vector<mrg_qpn *> st2 = new_mrg2->split_sources();
2428 ret.insert(ret.end(), st2.begin(), st2.end());
2430 ret.push_back(this);
2438 //////// Split helper function : resolve interfaces
2440 vector<pair<string,string> > get_ifaces(tablevar_t *table, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2441 vector<pair<string,string> > basic_ifaces;
2443 if(table->get_ifq()){
2444 basic_ifaces= ifdb->eval(table->get_interface(),ierr);
2446 fprintf(stderr,"ERROR, Interface set %s not found.\n",table->get_interface().c_str());
2449 fprintf(stderr,"ERROR, interface definition file didn't parse.\n");
2452 basic_ifaces.push_back(make_pair(table->get_machine(), table->get_interface()));
2455 if(n_virtual_ifaces == 1)
2456 return basic_ifaces;
2458 int stride = n_virtual_ifaces / hfta_parallelism;
2460 vector<pair<string,string> > ifaces;
2462 for(i=0;i<basic_ifaces.size();++i){
2463 string mach = basic_ifaces[i].first;
2464 string iface = basic_ifaces[i].second;
2465 for(s=hfta_idx*stride;s<(hfta_idx+1)*stride;++s){
2466 ifaces.push_back(pair<string, string>(mach,iface+"X"+int_to_string(2*s)));
2474 ///////// Split helper function : compute slack in a generated
2477 void mrg_qpn::resolve_slack(scalarexp_t *t_se, string fname, vector<pair<string, string> > &sources, ifq_t *ifdb, gb_table *gbt){
2481 // Find slack divisor, if any.
2483 long long int slack_divisor = find_temporal_divisor(t_se,gbt, fnm);
2484 if(slack_divisor <= 0){
2489 // find max slack in the iface spec
2490 long long int max_slacker = 0, this_slacker;
2491 string rname = "Slack_"+fnm;
2492 for(s=0;s<sources.size();++s){
2493 string src_machine = sources[s].first;
2494 string src_iface = sources[s].second;
2495 vector<string> slack_vec = ifdb->get_iface_vals(src_machine, src_iface,rname,e,es);
2496 for(v=0;v<slack_vec.size();++v){
2497 if(sscanf(slack_vec[v].c_str(),"%qd",&this_slacker)){
2498 if(this_slacker > max_slacker)
2499 max_slacker = this_slacker;
2504 if(max_slacker <= 0){
2510 long long int the_slack=(long long int)(ceil(((double)max_slacker)/((double)slack_divisor)));
2512 sprintf(tmps,"%lld",the_slack);
2513 literal_t *slack_lit = new literal_t(tmps, LITERAL_LONGINT);
2514 slack = new scalarexp_t(slack_lit);
2518 //------------------------------------------------------------------
2519 // split a node to extract LFTA components.
2521 vector<qp_node *> watch_tbl_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2522 // nothing to do, nothing to split, return copy of self.
2526 vector<qp_node *> ret_vec;
2528 ret_vec.push_back(this);
2534 vector<qp_node *> mrg_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2535 // nothing to do, nothing to split, return copy of self.
2539 vector<qp_node *> ret_vec;
2541 ret_vec.push_back(this);
2546 vector<qp_node *> filter_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2547 vector<qp_node *> ret_vec;
2549 // First check if the query can be pushed to the FTA.
2552 for(s=0;s<select_list.size();s++){
2553 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2556 for(p=0;p<where.size();p++){
2557 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2561 fprintf(stderr,"ERROR, filter join %s is fta-unsafe.\n",node_name.c_str());
2565 // Can it be done in a single lfta?
2566 // Get the set of interfaces it accesses.
2569 vector<string> sel_names;
2570 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2571 if (ifaces.empty()) {
2572 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2576 if(ifaces.size() == 1){
2577 // Single interface, no need to merge.
2579 ret_vec.push_back(this);
2581 for(i=0;i<from.size();i++){
2582 from[i]->set_machine(ifaces[0].first);
2583 from[i]->set_interface(ifaces[0].second);
2584 from[i]->set_ifq(false);
2588 // Multiple interfaces, generate the interface-specific queries plus
2592 vector<string> sel_names;
2593 for(si=0;si<ifaces.size();++si){
2594 filter_join_qpn *fta_node = new filter_join_qpn();
2597 if(ifaces.size()==1)
2598 fta_node->set_node_name( node_name );
2600 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2602 fta_node->set_node_name(new_name);
2604 sel_names.push_back(fta_node->get_node_name());
2608 for(f=0;f<from.size();f++){
2609 fta_node->from.push_back(from[f]->duplicate());
2610 fta_node->from[f]->set_machine(ifaces[si].first);
2611 fta_node->from[f]->set_interface(ifaces[si].second);
2612 fta_node->from[f]->set_ifq(false);
2614 fta_node->temporal_var = temporal_var;
2615 fta_node->temporal_range = temporal_range;
2617 fta_node->use_bloom = use_bloom;
2619 for(s=0;s<select_list.size();s++){
2620 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2623 for(p=0;p<shared_pred.size();p++){
2624 predicate_t *new_pr = dup_pr(shared_pred[p]->pr, NULL);
2625 cnf_elem *new_cnf = new cnf_elem(new_pr);
2626 analyze_cnf(new_cnf);
2627 fta_node->shared_pred.push_back(new_cnf);
2628 fta_node->where.push_back(new_cnf);
2630 for(p=0;p<pred_t0.size();p++){
2631 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2632 cnf_elem *new_cnf = new cnf_elem(new_pr);
2633 analyze_cnf(new_cnf);
2634 fta_node->pred_t0.push_back(new_cnf);
2635 fta_node->where.push_back(new_cnf);
2637 for(p=0;p<pred_t1.size();p++){
2638 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2639 cnf_elem *new_cnf = new cnf_elem(new_pr);
2640 analyze_cnf(new_cnf);
2641 fta_node->pred_t1.push_back(new_cnf);
2642 fta_node->where.push_back(new_cnf);
2644 for(p=0;p<hash_eq.size();p++){
2645 predicate_t *new_pr = dup_pr(hash_eq[p]->pr, NULL);
2646 cnf_elem *new_cnf = new cnf_elem(new_pr);
2647 analyze_cnf(new_cnf);
2648 fta_node->hash_eq.push_back(new_cnf);
2649 fta_node->where.push_back(new_cnf);
2651 for(p=0;p<postfilter.size();p++){
2652 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2653 cnf_elem *new_cnf = new cnf_elem(new_pr);
2654 analyze_cnf(new_cnf);
2655 fta_node->postfilter.push_back(new_cnf);
2656 fta_node->where.push_back(new_cnf);
2659 // Xfer all of the parameters.
2660 // Use existing handle annotations.
2661 vector<string> param_names = param_tbl->get_param_names();
2663 for(pi=0;pi<param_names.size();pi++){
2664 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2665 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2666 param_tbl->handle_access(param_names[pi]));
2668 fta_node->definitions = definitions;
2669 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2670 this->error_code = 3;
2674 ret_vec.push_back(fta_node);
2677 mrg_qpn *mrg_node = new mrg_qpn((filter_join_qpn *)ret_vec[0],
2678 node_name, sel_names,ifaces, ifdb);
2679 ret_vec.push_back(mrg_node);
2690 vector<qp_node *> watch_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2691 vector<qp_node *> ret_vec;
2693 // First check if the query can be pushed to the FTA.
2696 for(s=0;s<select_list.size();s++){
2697 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2700 for(p=0;p<where.size();p++){
2701 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2705 fprintf(stderr,"ERROR, watchlist join %s is fta-unsafe.\n",node_name.c_str());
2709 // Can it be done in a single lfta?
2710 // Get the set of interfaces it accesses.
2713 vector<string> sel_names;
2714 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2715 if (ifaces.empty()) {
2716 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2720 if(ifaces.size() == 1){
2721 // Single interface, no need to merge.
2723 ret_vec.push_back(this);
2725 // Treat the range vars a bit differently, the 2nd is reading from a _local_ watchlist.
2726 from[0]->set_machine(ifaces[0].first);
2727 from[0]->set_interface(ifaces[0].second);
2728 from[0]->set_ifq(false);
2730 from[1]->set_machine(ifaces[0].first);
2731 from[1]->set_interface("_local_");
2732 from[1]->set_ifq(false);
2736 // Multiple interfaces, generate the interface-specific queries plus
2740 vector<string> sel_names;
2741 for(si=0;si<ifaces.size();++si){
2742 watch_join_qpn *fta_node = new watch_join_qpn();
2745 if(ifaces.size()==1)
2746 fta_node->set_node_name( node_name );
2748 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2750 fta_node->set_node_name(new_name);
2752 sel_names.push_back(fta_node->get_node_name());
2756 for(f=0;f<from.size();f++){
2757 fta_node->from.push_back(from[f]->duplicate());
2758 fta_node->from[f]->set_machine(ifaces[si].first);
2760 fta_node->from[f]->set_interface(ifaces[si].second);
2762 fta_node->from[f]->set_interface("_local_");
2763 fta_node->from[f]->set_ifq(false);
2766 for(s=0;s<select_list.size();s++){
2767 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2770 for(p=0;p<pred_t0.size();p++){
2771 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2772 cnf_elem *new_cnf = new cnf_elem(new_pr);
2773 analyze_cnf(new_cnf);
2774 fta_node->pred_t0.push_back(new_cnf);
2775 fta_node->where.push_back(new_cnf);
2777 for(p=0;p<pred_t1.size();p++){
2778 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2779 cnf_elem *new_cnf = new cnf_elem(new_pr);
2780 analyze_cnf(new_cnf);
2781 fta_node->pred_t1.push_back(new_cnf);
2782 fta_node->where.push_back(new_cnf);
2784 for(p=0;p<key_flds.size();p++){ // we've checked that all keys are covered
2785 string k = key_flds[p];
2786 predicate_t *new_pr = dup_pr(hash_eq[k]->pr, NULL);
2787 cnf_elem *new_cnf = new cnf_elem(new_pr);
2788 analyze_cnf(new_cnf);
2789 fta_node->hash_eq[k] = new_cnf;
2790 fta_node->where.push_back(new_cnf);
2792 for(p=0;p<join_filter.size();p++){
2793 predicate_t *new_pr = dup_pr(join_filter[p]->pr, NULL);
2794 cnf_elem *new_cnf = new cnf_elem(new_pr);
2795 analyze_cnf(new_cnf);
2796 fta_node->postfilter.push_back(new_cnf);
2797 fta_node->where.push_back(new_cnf);
2799 for(p=0;p<postfilter.size();p++){
2800 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2801 cnf_elem *new_cnf = new cnf_elem(new_pr);
2802 analyze_cnf(new_cnf);
2803 fta_node->postfilter.push_back(new_cnf);
2804 fta_node->where.push_back(new_cnf);
2806 fta_node->key_flds = key_flds;
2808 // Xfer all of the parameters.
2809 // Use existing handle annotations.
2810 vector<string> param_names = param_tbl->get_param_names();
2812 for(pi=0;pi<param_names.size();pi++){
2813 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2814 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2815 param_tbl->handle_access(param_names[pi]));
2817 fta_node->definitions = definitions;
2818 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2819 this->error_code = 3;
2823 ret_vec.push_back(fta_node);
2826 mrg_qpn *mrg_node = new mrg_qpn((watch_join_qpn *)ret_vec[0],
2827 node_name, sel_names,ifaces, ifdb);
2828 ret_vec.push_back(mrg_node);
2835 // Use to search for unresolved interface param refs in an hfta.
2837 int spx_qpn::count_ifp_refs(set<string> &ifpnames){
2840 for(i=0;i<select_list.size();++i)
2841 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2842 for(i=0;i<where.size();++i)
2843 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2847 int sgah_qpn::count_ifp_refs(set<string> &ifpnames){
2850 for(i=0;i<select_list.size();++i)
2851 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2852 for(i=0;i<where.size();++i)
2853 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2854 for(i=0;i<having.size();++i)
2855 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2856 for(i=0;i<aggr_tbl.size();++i){
2857 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2858 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2860 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2861 for(j=0;j<opl.size();++j)
2862 ret += count_se_ifp_refs(opl[j],ifpnames);
2865 for(i=0;i<gb_tbl.size();++i){
2866 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2872 int rsgah_qpn::count_ifp_refs(set<string> &ifpnames){
2875 for(i=0;i<select_list.size();++i)
2876 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2877 for(i=0;i<where.size();++i)
2878 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2879 for(i=0;i<having.size();++i)
2880 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2881 for(i=0;i<closing_when.size();++i)
2882 ret += count_pr_ifp_refs(closing_when[i]->pr,ifpnames);
2883 for(i=0;i<aggr_tbl.size();++i){
2884 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2885 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2887 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2888 for(j=0;j<opl.size();++j)
2889 ret += count_se_ifp_refs(opl[j],ifpnames);
2892 for(i=0;i<gb_tbl.size();++i){
2893 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2898 int watch_tbl_qpn::count_ifp_refs(set<string> &ifpnames){
2902 int mrg_qpn::count_ifp_refs(set<string> &ifpnames){
2906 int join_eq_hash_qpn::count_ifp_refs(set<string> &ifpnames){
2909 for(i=0;i<select_list.size();++i)
2910 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2911 for(i=0;i<prefilter[0].size();++i)
2912 ret += count_pr_ifp_refs(prefilter[0][i]->pr,ifpnames);
2913 for(i=0;i<prefilter[1].size();++i)
2914 ret += count_pr_ifp_refs(prefilter[1][i]->pr,ifpnames);
2915 for(i=0;i<temporal_eq.size();++i)
2916 ret += count_pr_ifp_refs(temporal_eq[i]->pr,ifpnames);
2917 for(i=0;i<hash_eq.size();++i)
2918 ret += count_pr_ifp_refs(hash_eq[i]->pr,ifpnames);
2919 for(i=0;i<postfilter.size();++i)
2920 ret += count_pr_ifp_refs(postfilter[i]->pr,ifpnames);
2924 int filter_join_qpn::count_ifp_refs(set<string> &ifpnames){
2927 for(i=0;i<select_list.size();++i)
2928 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2929 for(i=0;i<where.size();++i)
2930 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2934 int watch_join_qpn::count_ifp_refs(set<string> &ifpnames){
2937 for(i=0;i<select_list.size();++i)
2938 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2939 for(i=0;i<where.size();++i)
2940 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2946 // Resolve interface params to string literals
2947 int filter_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2950 string ifname = from[0]->get_interface();
2951 string ifmach = from[0]->get_machine();
2952 for(i=0;i<select_list.size();++i)
2953 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2955 for(i=0;i<where.size();++i)
2956 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2961 int watch_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2964 string ifname = from[0]->get_interface();
2965 string ifmach = from[0]->get_machine();
2966 for(i=0;i<select_list.size();++i)
2967 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2969 for(i=0;i<where.size();++i)
2970 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2976 int spx_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2979 string ifname = table_name->get_interface();
2980 string ifmach = table_name->get_machine();
2981 for(i=0;i<select_list.size();++i)
2982 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2984 for(i=0;i<where.size();++i)
2985 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2990 int sgah_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2993 string ifname = table_name->get_interface();
2994 string ifmach = table_name->get_machine();
2996 //printf("Select list has %d elements\n",select_list.size());
2997 for(i=0;i<select_list.size();++i){
2998 //printf("\tresolving elemet %d\n",i);
2999 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) ){
3003 for(i=0;i<where.size();++i){
3004 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err) )
3007 for(i=0;i<having.size();++i){
3008 if( resolve_pr_ifp_refs(having[i]->pr,ifmach, ifname, ifdb, err) )
3011 //printf("aggr list has %d elements\n",select_list.size());
3012 for(i=0;i<aggr_tbl.size();++i){
3013 //printf("\tresolving elemet %d\n",i);
3014 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
3015 //printf("\t\t\tbuiltin\n");
3016 if( resolve_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifmach, ifname, ifdb, err) )
3019 //printf("\t\t\tudaf\n");
3020 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
3021 for(j=0;j<opl.size();++j)
3022 if( resolve_se_ifp_refs(opl[j],ifmach, ifname, ifdb, err) )
3026 for(i=0;i<gb_tbl.size();++i){
3027 if( resolve_se_ifp_refs(gb_tbl.get_def(i), ifmach, ifname, ifdb, err) )
3036 SPLITTING A SELECTION_PROJECTION OPERATOR
3038 An SPX node may reference:
3039 literals, parameters, colrefs, functions, operators
3040 An SPX node may not reference:
3041 group-by variables, aggregates
3043 An SPX node contains
3044 selection list of SEs
3045 where list of CNF predicates
3048 If each selection SE and each where predicate is fta-safe
3049 execute entire operator as an LFTA.
3051 for each predicate in the where clause
3052 if it is fta safe, execute it in the lfta
3053 else, split each SE in the predicate, evaluate the
3054 top-level SEs in the hfta and eval the predicate on that.
3055 For each SE in the se list
3056 Split the SE, eval the high level part, push onto hfta
3060 A SE represents a value which must be computed. The LFTA
3061 must provide sub-values from which the HFTA can compute the
3063 1) the SE is fta-safe
3064 Create an entry in the selection list of the LFTA which is
3065 the SE itself. Reference this LFTA selection list entry in
3066 the HFTA (via a field name assigned to the lfta selection
3068 2) The SE is not fta-safe
3069 Determine the boundary between the fta-safe and the fta-unsafe
3070 portions of the SE. The result is a rooted tree (which is
3071 evaluated at the HFTA) which references sub-SEs (which are
3072 evaluated at the LFTA). Each of the sub-SEs is placed on
3073 the selection list of the LFTA and assigned field names,
3074 the top part is evaluated at the HFTA and references the
3075 sub-SEs through their assigned field names.
3076 The only SEs on the LFTA selection list are those created by
3077 the above mechanism. The collection of assigned field names becomes
3078 the schema of the LFTA.
3080 TODO: insert tablevar names into the colrefs.
3084 vector<qp_node *> spx_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3087 vector<qp_node *> ret_vec;
3089 // If the node reads from a stream, don't split.
3090 // int t = Schema->get_table_ref(table_name->get_schema_name());
3091 int t = table_name->get_schema_ref();
3092 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3094 ret_vec.push_back(this);
3099 // Get the set of interfaces it accesses.
3102 vector<string> sel_names;
3103 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3104 if (ifaces.empty()) {
3105 fprintf(stderr,"INTERNAL ERROR in spx_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3110 // The FTA node, it is always returned.
3112 spx_qpn *fta_node = new spx_qpn();
3113 fta_node->table_name = table_name;
3115 // for colname imputation
3116 // vector<string> fta_flds, stream_flds;
3119 // First check if the query can be pushed to the FTA.
3122 for(s=0;s<select_list.size();s++){
3123 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
3126 for(p=0;p<where.size();p++){
3127 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
3131 ////////////////////////////////////////////////////////////
3132 // The query can be executed entirely in the FTA.
3135 for(si=0;si<ifaces.size();++si){
3136 fta_node = new spx_qpn();
3139 if(ifaces.size()==1)
3140 fta_node->set_node_name( node_name );
3142 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3144 fta_node->set_node_name(new_name);
3146 sel_names.push_back(fta_node->get_node_name());
3149 fta_node->table_name = table_name->duplicate();
3150 fta_node->table_name->set_machine(ifaces[si].first);
3151 fta_node->table_name->set_interface(ifaces[si].second);
3152 fta_node->table_name->set_ifq(false);
3154 for(s=0;s<select_list.size();s++){
3155 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
3157 for(p=0;p<where.size();p++){
3158 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
3159 cnf_elem *new_cnf = new cnf_elem(new_pr);
3160 analyze_cnf(new_cnf);
3162 fta_node->where.push_back(new_cnf);
3165 // Xfer all of the parameters.
3166 // Use existing handle annotations.
3167 vector<string> param_names = param_tbl->get_param_names();
3169 for(pi=0;pi<param_names.size();pi++){
3170 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3171 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3172 param_tbl->handle_access(param_names[pi]));
3174 fta_node->definitions = definitions;
3175 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3176 this->error_code = 3;
3180 ret_vec.push_back(fta_node);
3183 if(ifaces.size() > 1){
3184 spx_qpn *tmp_spx = (spx_qpn *)(ret_vec[0]);
3185 mrg_qpn *mrg_node = new mrg_qpn(tmp_spx,
3186 node_name, sel_names,ifaces, ifdb);
3188 Do not split sources until we are done with optimizations
3189 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3190 for(i=0;i<split_merge.size();++i){
3191 ret_vec.push_back(split_merge[i]);
3193 hfta_returned = split_merge.size();
3195 ret_vec.push_back(mrg_node);
3200 // printf("OK as FTA.\n");
3201 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3206 ////////////////////////////////////////////////////
3207 // The fta must be split. Create a stream node.
3208 // NOTE : I am counting on the single
3209 // table in the from list. (Joins handled in a different operator).
3213 spx_qpn *stream_node = new spx_qpn();
3214 stream_node->set_node_name( node_name );
3215 // Create the tablevar in the stream's FROM clause.
3216 // set the schema name to the name of the LFTA,
3217 // and use the same tablevar name.
3218 stream_node->table_name = new tablevar_t(
3219 ("_fta_"+node_name).c_str()
3221 stream_node->table_name->set_range_var(table_name->get_var_name());
3224 fta_node->set_node_name( "_fta_"+node_name );
3226 // table var names of fta, stream.
3227 string fta_var = fta_node->table_name->get_var_name();
3228 string stream_var = stream_node->table_name->get_var_name();
3230 // Set up select list vector
3231 vector< vector<select_element *> *> select_vec;
3232 select_vec.push_back(&(fta_node->select_list)); // only one child
3235 // Split the select list into its FTA and stream parts.
3236 // If any part of the SE is fta-unsafe, it will return
3237 // a SE to execute at the stream ref'ing SE's evaluated
3238 // at the fta (which are put on the FTA's select list as a side effect).
3239 // If the SE is fta-safe, put it on the fta select list, make
3240 // a ref to it and put the ref on the stream select list.
3241 for(s=0;s<select_list.size();s++){
3242 bool fta_forbidden = false;
3243 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3244 // scalarexp_t *root_se = split_fta_se(
3245 // select_list[s]->se,fta_forbidden, fta_node->select_list, Ext_fcns
3247 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
3248 fta_forbidden, se_src, select_vec, Ext_fcns
3250 // if(fta_forbidden){
3251 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3252 stream_node->select_list.push_back(
3253 new select_element(root_se, select_list[s]->name)
3256 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,root_se,0);
3257 stream_node->select_list.push_back(
3258 new select_element(new_se, select_list[s]->name)
3264 // The WHERE clause has already been split into a set of clauses
3265 // that are ANDED together. For each clause, check if its FTA-safe.
3266 // If not, split its SE's into fta-safe and stream-executing parts,
3267 // then put a clause which ref's the SEs into the stream.
3268 // Else put it into the LFTA.
3269 predicate_t *pr_root;
3271 for(p=0;p<where.size();p++){
3272 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) ){
3273 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3274 // pr_root = split_fta_pr( where[p]->pr, fta_node->select_list, Ext_fcns);
3275 fta_forbidden = true;
3277 pr_root = dup_pr(where[p]->pr, NULL);
3278 fta_forbidden = false;
3280 cnf_elem *cnf_root = new cnf_elem(pr_root);
3281 analyze_cnf(cnf_root);
3284 stream_node->where.push_back(cnf_root);
3286 fta_node->where.push_back(cnf_root);
3292 // Divide the parameters among the stream, FTA.
3293 // Currently : assume that the stream receives all parameters
3294 // and parameter updates, incorporates them, then passes
3295 // all of the parameters to the FTA.
3296 // This will need to change (tables, fta-unsafe types. etc.)
3298 // I will pass on the use_handle_access marking, even
3299 // though the fcn call that requires handle access might
3300 // exist in only one of the parts of the query.
3301 // Parameter manipulation and handle access determination will
3302 // need to be revisited anyway.
3303 vector<string> param_names = param_tbl->get_param_names();
3305 for(pi=0;pi<param_names.size();pi++){
3306 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3307 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3308 param_tbl->handle_access(param_names[pi]));
3309 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3310 param_tbl->handle_access(param_names[pi]));
3313 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3314 stream_node->definitions = definitions;
3316 // Now split by interfaces
3317 if(ifaces.size() > 1){
3318 for(si=0;si<ifaces.size();++si){
3319 spx_qpn *subq_node = new spx_qpn();
3321 // Name the subquery
3322 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3324 subq_node->set_node_name( new_name) ;
3325 sel_names.push_back(subq_node->get_node_name());
3328 subq_node->table_name = fta_node->table_name->duplicate();
3329 subq_node->table_name->set_machine(ifaces[si].first);
3330 subq_node->table_name->set_interface(ifaces[si].second);
3331 subq_node->table_name->set_ifq(false);
3333 for(s=0;s<fta_node->select_list.size();s++){
3334 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3336 for(p=0;p<fta_node->where.size();p++){
3337 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3338 cnf_elem *new_cnf = new cnf_elem(new_pr);
3339 analyze_cnf(new_cnf);
3341 subq_node->where.push_back(new_cnf);
3343 // Xfer all of the parameters.
3344 // Use existing handle annotations.
3345 vector<string> param_names = param_tbl->get_param_names();
3347 for(pi=0;pi<param_names.size();pi++){
3348 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3349 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3350 param_tbl->handle_access(param_names[pi]));
3352 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3353 this->error_code = 3;
3356 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3358 ret_vec.push_back(subq_node);
3361 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3362 fta_node->node_name, sel_names, ifaces, ifdb);
3364 Do not split sources until we are done with optimizations
3365 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3366 for(i=0;i<split_merge.size();++i){
3367 ret_vec.push_back(split_merge[i]);
3370 ret_vec.push_back(mrg_node);
3371 ret_vec.push_back(stream_node);
3372 hfta_returned = 1/*split_merge.size()*/ + 1;
3375 fta_node->table_name->set_machine(ifaces[0].first);
3376 fta_node->table_name->set_interface(ifaces[0].second);
3377 fta_node->table_name->set_ifq(false);
3378 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3379 this->error_code = 3;
3382 ret_vec.push_back(fta_node);
3383 ret_vec.push_back(stream_node);
3387 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3388 // printf("Stream node is:\n%s\n\n",stream_node->to_query_string().c_str() );
3396 Splitting a aggregation+sampling operator.
3397 right now, return an error if any splitting is required.
3400 vector<qp_node *> sgahcwcb_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3404 vector<qp_node *> ret_vec;
3405 int s, p, g, a, o, i;
3408 vector<string> fta_flds, stream_flds;
3410 // If the node reads from a stream, don't split.
3411 // int t = Schema->get_table_ref(table_name->get_schema_name());
3412 int t = table_name->get_schema_ref();
3413 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3414 ret_vec.push_back(this);
3418 fprintf(stderr,"ERROR : cannot split a sampling operator (not yet implemented).\n");
3428 Splitting a running aggregation operator.
3429 The code is almost identical to that of the the sgah operator
3431 - there is no lfta-only option.
3432 - the stream node is rsagh_qpn (lfta is sgah or spx)
3433 - need to handle the closing when (similar to having)
3436 vector<qp_node *> rsgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3440 vector<qp_node *> ret_vec;
3441 int s, p, g, a, o, i;
3444 vector<string> fta_flds, stream_flds;
3446 // If the node reads from a stream, don't split.
3447 // int t = Schema->get_table_ref(table_name->get_schema_name());
3448 int t = table_name->get_schema_ref();
3449 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3450 ret_vec.push_back(this);
3454 // Get the set of interfaces it accesses.
3456 vector<string> sel_names;
3457 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3458 if (ifaces.empty()) {
3459 fprintf(stderr,"INTERNAL ERROR in rsgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3466 //////////////////////////////////////////////////////////////
3467 /// Split into lfta, hfta.
3469 // A rsgah node must always be split,
3470 // if for no other reason than to complete the
3471 // partial aggregation.
3473 // First, determine if the query can be spit into aggr/aggr,
3474 // or if it must be selection/aggr.
3475 // Splitting into selection/aggr is allowed only
3476 // if select_lfta is set.
3479 bool select_allowed = definitions.count("select_lfta")>0;
3480 bool select_rqd = false;
3482 set<int> unsafe_gbvars; // for processing where clause
3483 for(g=0;g<gb_tbl.size();g++){
3484 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3485 if(!select_allowed){
3486 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3487 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3489 this->error_code = 1;
3490 this->err_str = tmpstr;
3494 unsafe_gbvars.insert(g);
3499 // Verify that the SEs in the aggregate definitions are fta-safe
3500 for(a=0;a<aggr_tbl.size();++a){
3501 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3502 if(ase != NULL){ // COUNT(*) does not have a SE.
3503 if(!select_allowed){
3504 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3505 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3506 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3508 this->error_code = 1;
3509 this->err_str = tmpstr;
3518 // Verify that all of the ref'd UDAFs can be split.
3520 for(a=0;a<aggr_tbl.size();++a){
3521 if(! aggr_tbl.is_builtin(a)){
3522 int afcn = aggr_tbl.get_fcn_id(a);
3523 int super_id = Ext_fcns->get_superaggr_id(afcn);
3524 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3525 if(super_id < 0 || sub_id < 0){
3526 if(!select_allowed){
3527 this->err_str += "ERROR in rsgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3528 this->error_code = 1;
3537 for(p=0;p<where.size();p++){
3538 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3539 if(!select_allowed){
3540 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3541 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3543 this->error_code = 1;
3544 this->err_str = tmpstr;
3555 /////////////////////////////////////////////////////
3556 // Split into aggr/aggr.
3562 sgah_qpn *fta_node = new sgah_qpn();
3563 fta_node->table_name = table_name;
3564 fta_node->set_node_name( "_fta_"+node_name );
3565 fta_node->table_name->set_range_var(table_name->get_var_name());
3568 rsgah_qpn *stream_node = new rsgah_qpn();
3569 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3570 stream_node->set_node_name( node_name );
3571 stream_node->table_name->set_range_var(table_name->get_var_name());
3573 // First, process the group-by variables.
3574 // The fta must supply the values of all the gbvars.
3575 // If a gb is computed, the computation must be
3576 // performed at the FTA, so the SE must be FTA-safe.
3577 // Nice side effect : the gbvar table contains
3578 // matching entries for the original query, the lfta query,
3579 // and the hfta query. So gbrefs in the new queries are set
3580 // correctly just by inheriting the gbrefs from the old query.
3581 // If this property changed, I'll need translation tables.
3584 for(g=0;g<gb_tbl.size();g++){
3585 // Insert the gbvar into the lfta.
3586 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
3587 fta_node->gb_tbl.add_gb_var(
3588 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
3591 // Insert a ref to the value of the gbvar into the lfta select list.
3592 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
3593 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
3594 gbvar_fta->set_gb_ref(g);
3595 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
3596 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
3598 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
3599 gbvar_stream->set_gb_ref(-1); // used as GBvar def
3600 stream_node->gb_tbl.add_gb_var(
3601 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
3606 // SEs in the aggregate definitions.
3607 // They are all safe, so split them up for later processing.
3608 map<int, scalarexp_t *> hfta_aggr_se;
3609 for(a=0;a<aggr_tbl.size();++a){
3610 split_fta_aggr( &(aggr_tbl), a,
3611 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
3612 fta_node->select_list,
3619 // Next, the select list.
3621 for(s=0;s<select_list.size();s++){
3622 bool fta_forbidden = false;
3623 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3624 stream_node->select_list.push_back(
3625 new select_element(root_se, select_list[s]->name));
3630 // All the predicates in the where clause must execute
3633 for(p=0;p<where.size();p++){
3634 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
3635 cnf_elem *new_cnf = new cnf_elem(new_pr);
3636 analyze_cnf(new_cnf);
3638 fta_node->where.push_back(new_cnf);
3641 // All of the predicates in the having clause must
3642 // execute in the stream node.
3644 for(p=0;p<having.size();p++){
3645 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3646 cnf_elem *cnf_root = new cnf_elem(pr_root);
3647 analyze_cnf(cnf_root);
3649 stream_node->having.push_back(cnf_root);
3652 // All of the predicates in the closing when clause must
3653 // execute in the stream node.
3655 for(p=0;p<closing_when.size();p++){
3656 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3657 cnf_elem *cnf_root = new cnf_elem(pr_root);
3658 analyze_cnf(cnf_root);
3660 stream_node->closing_when.push_back(cnf_root);
3664 // Divide the parameters among the stream, FTA.
3665 // Currently : assume that the stream receives all parameters
3666 // and parameter updates, incorporates them, then passes
3667 // all of the parameters to the FTA.
3668 // This will need to change (tables, fta-unsafe types. etc.)
3670 // I will pass on the use_handle_access marking, even
3671 // though the fcn call that requires handle access might
3672 // exist in only one of the parts of the query.
3673 // Parameter manipulation and handle access determination will
3674 // need to be revisited anyway.
3675 vector<string> param_names = param_tbl->get_param_names();
3677 for(pi=0;pi<param_names.size();pi++){
3678 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3679 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3680 param_tbl->handle_access(param_names[pi]));
3681 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3682 param_tbl->handle_access(param_names[pi]));
3684 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3685 stream_node->definitions = definitions;
3687 // Now split by interfaces XXXX
3688 if(ifaces.size() > 1){
3689 for(si=0;si<ifaces.size();++si){
3690 sgah_qpn *subq_node = new sgah_qpn();
3692 // Name the subquery
3693 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3695 subq_node->set_node_name( new_name) ;
3696 sel_names.push_back(subq_node->get_node_name());
3699 subq_node->table_name = fta_node->table_name->duplicate();
3700 subq_node->table_name->set_machine(ifaces[si].first);
3701 subq_node->table_name->set_interface(ifaces[si].second);
3702 subq_node->table_name->set_ifq(false);
3705 for(g=0;g<fta_node->gb_tbl.size();g++){
3706 // Insert the gbvar into the lfta.
3707 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
3708 subq_node->gb_tbl.add_gb_var(
3709 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
3713 // Insert the aggregates
3714 for(a=0;a<fta_node->aggr_tbl.size();++a){
3715 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
3718 for(s=0;s<fta_node->select_list.size();s++){
3719 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3721 for(p=0;p<fta_node->where.size();p++){
3722 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3723 cnf_elem *new_cnf = new cnf_elem(new_pr);
3724 analyze_cnf(new_cnf);
3726 subq_node->where.push_back(new_cnf);
3728 for(p=0;p<fta_node->having.size();p++){
3729 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
3730 cnf_elem *new_cnf = new cnf_elem(new_pr);
3731 analyze_cnf(new_cnf);
3733 subq_node->having.push_back(new_cnf);
3735 // Xfer all of the parameters.
3736 // Use existing handle annotations.
3737 vector<string> param_names = param_tbl->get_param_names();
3739 for(pi=0;pi<param_names.size();pi++){
3740 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3741 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3742 param_tbl->handle_access(param_names[pi]));
3744 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3745 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3746 this->error_code = 3;
3750 ret_vec.push_back(subq_node);
3753 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
3754 fta_node->node_name, sel_names, ifaces, ifdb);
3757 Do not split sources until we are done with optimizations
3758 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3759 for(i=0;i<split_merge.size();++i){
3760 ret_vec.push_back(split_merge[i]);
3763 ret_vec.push_back(mrg_node);
3764 ret_vec.push_back(stream_node);
3765 hfta_returned = 1/*split_merge.size()*/+1;
3768 fta_node->table_name->set_machine(ifaces[0].first);
3769 fta_node->table_name->set_interface(ifaces[0].second);
3770 fta_node->table_name->set_ifq(false);
3771 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3772 this->error_code = 3;
3775 ret_vec.push_back(fta_node);
3776 ret_vec.push_back(stream_node);
3781 // ret_vec.push_back(fta_node);
3782 // ret_vec.push_back(stream_node);
3789 /////////////////////////////////////////////////////////////////////
3790 /// Split into selection LFTA, aggregation HFTA.
3792 spx_qpn *fta_node = new spx_qpn();
3793 fta_node->table_name = table_name;
3794 fta_node->set_node_name( "_fta_"+node_name );
3795 fta_node->table_name->set_range_var(table_name->get_var_name());
3798 rsgah_qpn *stream_node = new rsgah_qpn();
3799 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3800 stream_node->set_node_name( node_name );
3801 stream_node->table_name->set_range_var(table_name->get_var_name());
3804 vector< vector<select_element *> *> select_vec;
3805 select_vec.push_back(&(fta_node->select_list)); // only one child
3807 // Process the gbvars. Split their defining SEs.
3808 for(g=0;g<gb_tbl.size();g++){
3809 bool fta_forbidden = false;
3810 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3812 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
3813 fta_forbidden, se_src, select_vec, Ext_fcns
3815 // if(fta_forbidden) (
3816 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3817 stream_node->gb_tbl.add_gb_var(
3818 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
3821 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
3822 stream_node->gb_tbl.add_gb_var(
3823 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
3828 // Process the aggregate table.
3829 // Copy to stream, split the SEs.
3830 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
3831 for(a=0;a<aggr_tbl.size();++a){
3833 if(aggr_tbl.is_builtin(a)){
3834 if(aggr_tbl.is_star_aggr(a)){
3835 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
3836 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
3838 bool fta_forbidden = false;
3839 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3841 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3842 fta_forbidden, se_src, select_vec, Ext_fcns
3844 // if(fta_forbidden) (
3845 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3846 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
3847 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
3849 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3850 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
3851 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
3854 hse->set_data_type(aggr_tbl.get_data_type(a));
3855 hse->set_aggr_id(a);
3856 hfta_aggr_se[a]=hse;
3858 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
3859 vector<scalarexp_t *> new_opl;
3860 for(o=0;o<opl.size();++o){
3861 bool fta_forbidden = false;
3862 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3863 scalarexp_t *agg_se = split_ftavec_se( opl[o],
3864 fta_forbidden, se_src, select_vec, Ext_fcns
3866 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3867 // fta_forbidden, se_src, select_vec, Ext_fcns
3869 // if(fta_forbidden) (
3870 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3871 new_opl.push_back(agg_se);
3873 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3874 new_opl.push_back(new_se);
3877 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),aggr_tbl.is_superaggr(a), aggr_tbl.is_running_aggr(a),aggr_tbl.has_bailout(a));
3878 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
3879 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
3880 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
3881 hse->set_aggr_id(a);
3882 hfta_aggr_se[a]=hse;
3887 // Process the WHERE clause.
3888 // If it is fta-safe AND it refs only fta-safe gbvars,
3889 // then expand the gbvars and put it into the lfta.
3890 // Else, split it into an hfta predicate ref'ing
3891 // se's computed partially in the lfta.
3893 predicate_t *pr_root;
3895 for(p=0;p<where.size();p++){
3896 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
3897 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3898 fta_forbidden = true;
3900 pr_root = dup_pr(where[p]->pr, NULL);
3901 expand_gbvars_pr(pr_root, gb_tbl);
3902 fta_forbidden = false;
3904 cnf_elem *cnf_root = new cnf_elem(pr_root);
3905 analyze_cnf(cnf_root);
3908 stream_node->where.push_back(cnf_root);
3910 fta_node->where.push_back(cnf_root);
3915 // Process the Select clause, rehome it on the
3917 for(s=0;s<select_list.size();s++){
3918 bool fta_forbidden = false;
3919 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3920 stream_node->select_list.push_back(
3921 new select_element(root_se, select_list[s]->name));
3925 // Process the Having clause
3927 // All of the predicates in the having clause must
3928 // execute in the stream node.
3930 for(p=0;p<having.size();p++){
3931 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3932 cnf_elem *cnf_root = new cnf_elem(pr_root);
3933 analyze_cnf(cnf_root);
3935 stream_node->having.push_back(cnf_root);
3937 // Same for closing when
3938 for(p=0;p<closing_when.size();p++){
3939 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3940 cnf_elem *cnf_root = new cnf_elem(pr_root);
3941 analyze_cnf(cnf_root);
3943 stream_node->closing_when.push_back(cnf_root);
3947 // Handle parameters and a few last details.
3948 vector<string> param_names = param_tbl->get_param_names();
3950 for(pi=0;pi<param_names.size();pi++){
3951 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3952 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3953 param_tbl->handle_access(param_names[pi]));
3954 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3955 param_tbl->handle_access(param_names[pi]));
3958 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3959 stream_node->definitions = definitions;
3961 // Now split by interfaces YYYY
3962 if(ifaces.size() > 1){
3963 for(si=0;si<ifaces.size();++si){
3964 spx_qpn *subq_node = new spx_qpn();
3966 // Name the subquery
3967 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3969 subq_node->set_node_name( new_name) ;
3970 sel_names.push_back(subq_node->get_node_name());
3973 subq_node->table_name = fta_node->table_name->duplicate();
3974 subq_node->table_name->set_machine(ifaces[si].first);
3975 subq_node->table_name->set_interface(ifaces[si].second);
3976 subq_node->table_name->set_ifq(false);
3978 for(s=0;s<fta_node->select_list.size();s++){
3979 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3981 for(p=0;p<fta_node->where.size();p++){
3982 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3983 cnf_elem *new_cnf = new cnf_elem(new_pr);
3984 analyze_cnf(new_cnf);
3986 subq_node->where.push_back(new_cnf);
3988 // Xfer all of the parameters.
3989 // Use existing handle annotations.
3990 vector<string> param_names = param_tbl->get_param_names();
3992 for(pi=0;pi<param_names.size();pi++){
3993 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3994 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3995 param_tbl->handle_access(param_names[pi]));
3997 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3998 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3999 this->error_code = 3;
4003 ret_vec.push_back(subq_node);
4006 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4007 fta_node->node_name, sel_names, ifaces, ifdb);
4009 Do not split sources until we are done with optimizations
4010 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4011 for(i=0;i<split_merge.size();++i){
4012 ret_vec.push_back(split_merge[i]);
4015 ret_vec.push_back(mrg_node);
4016 ret_vec.push_back(stream_node);
4017 hfta_returned = 1/*split_merge.size()*/+1;
4020 fta_node->table_name->set_machine(ifaces[0].first);
4021 fta_node->table_name->set_interface(ifaces[0].second);
4022 fta_node->table_name->set_ifq(false);
4023 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4024 this->error_code = 3;
4027 ret_vec.push_back(fta_node);
4028 ret_vec.push_back(stream_node);
4038 Splitting an aggregation operator
4040 An aggregation operator can reference
4041 literals, parameters, colrefs, group-by vars, aggregates,
4042 operators, functions
4044 an aggregation contains
4045 A selection list of SEs
4046 A where list of predicates
4047 A list group-by variable definition
4048 A list of aggregates to be computed
4049 A HAVING list of predicates.
4051 Aggregation involves two phases:
4052 1) given an input tuple, determine if it satisfies all of
4053 the WHERE predicates. If so, compute the group.
4054 Look up the group, update its aggregates.
4055 2) given a closed group and its aggregates, determine
4056 if these values satisfy all of the HAVING predicates.
4057 If so, evaluate the SEs on the selection list from the
4058 group and its aggregates.
4059 The two-phase nature of aggregation places restrictions on
4060 what can be referenced by different components of the operator
4061 (in addition to functions and operators).
4062 - group-by variables : literals, parameters, colrefs
4063 - WHERE predicates : group-by vars, literals, params, colrefs
4064 - HAVING predicates : group-by vars, literals, params, aggregates
4065 - Selection list SEs : group-by vars, literals, params, aggregates
4067 Splitting an aggregation operator into an LFTA/HFTA part
4068 involves performing partial aggregation at the LFTA and
4069 completing the aggregation at the HFTA.
4070 - given a tuple, the LFTA part evaluates the WHERE clause,
4071 and if it is satisfied, computes the group. lookup the group
4072 and update the aggregates. output the group and its partial
4074 - Given a partial aggregate from the LFTA, look up the group and
4075 update its aggregates. When the group is closed, evalute
4076 the HAVING clause and the SEs on the selection list.
4077 THEREFORE the selection list of the LFTA must consist of the
4078 group-by variables and the set of (bare) subaggregate values
4079 necessary to compute the super aggregates.
4080 Unlike the case with the SPX operator, the SE splitting point
4081 is at the GBvar and the aggregate value level.
4084 For each group-by variable
4085 Put the GB variable definition in the LFTA GBVAR list.
4086 Put the GBVAR in the LFTA selection list (as an SE).
4087 Put a reference to that GBVAR in the HFTA GBVAR list.
4089 Split the aggregate into a superaggregate and a subaggregate.
4090 The SE of the superaggregate references the subaggregate value.
4091 (this will need modifications for MF aggregation)
4092 For each SE in the selection list, HAVING predicate
4093 Make GBVAR references point to the new GBVAR
4094 make the aggregate value references point to the new aggregates.
4096 SEs are not so much split as their ref's are changed.
4098 TODO: insert tablevar names into the colrefs.
4103 vector<qp_node *> sgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4107 vector<qp_node *> ret_vec;
4108 int s, p, g, a, o, i;
4111 vector<string> fta_flds, stream_flds;
4113 // If the node reads from a stream, don't split.
4114 // int t = Schema->get_table_ref(table_name->get_schema_name());
4115 int t = table_name->get_schema_ref();
4116 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
4117 ret_vec.push_back(this);
4121 // Get the set of interfaces it accesses.
4123 vector<string> sel_names;
4124 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
4125 if (ifaces.empty()) {
4126 fprintf(stderr,"INTERNAL ERROR in sgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
4132 //////////////////////////////////////////////
4133 // Is this LFTA-only?
4134 if(definitions.count("lfta_aggregation")>0){
4135 // Yes. Ensure that everything is lfta-safe.
4137 // Check only one interface is accessed.
4138 if(ifaces.size()>1){
4139 this->err_str = "ERROR, group-by query "+node_name+" is lfta-only, but it accesses more than one interface:\n";
4140 for(si=0;si<ifaces.size();++si)
4141 this->err_str += "\t"+ifaces[si].first+"."+ifaces[si].second+"\n";
4142 this->error_code = 2;
4146 // Check the group-by attributes
4147 for(g=0;g<gb_tbl.size();g++){
4148 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4149 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition and the query is lfta-only (%s).\n",
4150 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4152 this->error_code = 1;
4153 this->err_str = tmpstr;
4158 // Verify that the SEs in the aggregate definitions are fta-safe
4159 for(a=0;a<aggr_tbl.size();++a){
4160 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4161 if(ase != NULL){ // COUNT(*) does not have a SE.
4162 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4163 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe scalar expression and the query is lfta-only (%s).\n",
4164 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4166 this->error_code = 1;
4167 this->err_str = tmpstr;
4171 if(! aggr_tbl.fta_legal(a,Ext_fcns)){
4172 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4173 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe aggregate and the query is lfta-only (%s).\n",
4174 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4176 this->error_code = 1;
4177 this->err_str = tmpstr;
4183 // Ensure that all the aggregates are fta-safe ....
4187 for(s=0;s<select_list.size();s++){
4188 if(! check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns)){
4189 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4190 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4192 this->error_code = 1;
4193 this->err_str = tmpstr;
4200 for(p=0;p<where.size();p++){
4201 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4202 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4203 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4205 this->error_code = 1;
4206 this->err_str = tmpstr;
4213 if(having.size()>0){
4214 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : the query is lfta-only, so it can't have a HAVING clause.(%s).\n",
4215 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4217 this->error_code = 1;
4218 this->err_str = tmpstr;
4221 // The query is lfta safe, return it.
4224 ret_vec.push_back(this);
4228 //////////////////////////////////////////////////////////////
4229 /// Split into lfta, hfta.
4231 // A sgah node must always be split,
4232 // if for no other reason than to complete the
4233 // partial aggregation.
4235 // First, determine if the query can be spit into aggr/aggr,
4236 // or if it must be selection/aggr.
4237 // Splitting into selection/aggr is allowed only
4238 // if select_lfta is set.
4241 bool select_allowed = definitions.count("select_lfta")>0;
4242 bool select_rqd = false;
4244 set<int> unsafe_gbvars; // for processing where clause
4245 for(g=0;g<gb_tbl.size();g++){
4246 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4247 if(!select_allowed){
4248 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
4249 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4251 this->error_code = 1;
4252 this->err_str = tmpstr;
4256 unsafe_gbvars.insert(g);
4261 // Verify that the SEs in the aggregate definitions are fta-safe
4262 for(a=0;a<aggr_tbl.size();++a){
4263 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4264 if(ase != NULL){ // COUNT(*) does not have a SE.
4265 if(!select_allowed){
4266 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4267 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
4268 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4270 this->error_code = 1;
4271 this->err_str = tmpstr;
4280 // Verify that all of the ref'd UDAFs can be split.
4282 for(a=0;a<aggr_tbl.size();++a){
4283 if(! aggr_tbl.is_builtin(a)){
4284 int afcn = aggr_tbl.get_fcn_id(a);
4285 int super_id = Ext_fcns->get_superaggr_id(afcn);
4286 int sub_id = Ext_fcns->get_subaggr_id(afcn);
4287 if(super_id < 0 || sub_id < 0){
4288 if(!select_allowed){
4289 this->err_str += "ERROR in sgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
4290 this->error_code = 1;
4299 for(p=0;p<where.size();p++){
4300 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4301 if(!select_allowed){
4302 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
4303 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4305 this->error_code = 1;
4306 this->err_str = tmpstr;
4317 /////////////////////////////////////////////////////
4318 // Split into aggr/aggr.
4324 sgah_qpn *fta_node = new sgah_qpn();
4325 fta_node->table_name = table_name;
4326 fta_node->set_node_name( "_fta_"+node_name );
4327 fta_node->table_name->set_range_var(table_name->get_var_name());
4330 sgah_qpn *stream_node = new sgah_qpn();
4331 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4332 stream_node->set_node_name( node_name );
4333 stream_node->table_name->set_range_var(table_name->get_var_name());
4335 // allowed stream disorder. Default is 1,
4336 // can override with max_lfta_disorder setting.
4337 // Also limit the hfta disorder, set to lfta disorder + 1.
4338 // can override with max_hfta_disorder.
4340 fta_node->lfta_disorder = 1;
4341 if(this->get_val_of_def("max_lfta_disorder") != ""){
4342 int d = atoi(this->get_val_of_def("max_lfta_disorder").c_str() );
4344 fprintf(stderr,"Warning, max_lfta_disorder in node %s is %d, must be at least 1, ignoring.\n",node_name.c_str(), d);
4346 fta_node->lfta_disorder = d;
4347 printf("node %s setting lfta_disorder = %d\n",node_name.c_str(),fta_node->lfta_disorder);
4350 if(fta_node->lfta_disorder > 1)
4351 stream_node->hfta_disorder = fta_node->lfta_disorder + 1;
4353 stream_node->hfta_disorder = 1;
4355 if(this->get_val_of_def("max_hfta_disorder") != ""){
4356 int d = atoi(this->get_val_of_def("max_hfta_disorder").c_str() );
4357 if(d<fta_node->lfta_disorder){
4358 fprintf(stderr,"Warning, max_hfta_disorder in node %s is %d, must be at least the max lfta disorder %d, ignoring.\n",node_name.c_str(), d,fta_node->lfta_disorder);
4360 fta_node->lfta_disorder = d;
4362 if(fta_node->lfta_disorder < fta_node->hfta_disorder){
4363 fta_node->hfta_disorder = fta_node->lfta_disorder + 1;
4368 // First, process the group-by variables.
4369 // The fta must supply the values of all the gbvars.
4370 // If a gb is computed, the computation must be
4371 // performed at the FTA, so the SE must be FTA-safe.
4372 // Nice side effect : the gbvar table contains
4373 // matching entries for the original query, the lfta query,
4374 // and the hfta query. So gbrefs in the new queries are set
4375 // correctly just by inheriting the gbrefs from the old query.
4376 // If this property changed, I'll need translation tables.
4379 for(g=0;g<gb_tbl.size();g++){
4380 // Insert the gbvar into the lfta.
4381 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
4382 fta_node->gb_tbl.add_gb_var(
4383 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
4386 // Insert a ref to the value of the gbvar into the lfta select list.
4387 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
4388 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
4389 gbvar_fta->set_gb_ref(g);
4390 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
4391 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
4393 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
4394 gbvar_stream->set_gb_ref(-1); // used as GBvar def
4395 stream_node->gb_tbl.add_gb_var(
4396 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
4399 // multiple aggregation patterns, if any, go with the hfta
4400 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4402 // SEs in the aggregate definitions.
4403 // They are all safe, so split them up for later processing.
4404 map<int, scalarexp_t *> hfta_aggr_se;
4405 for(a=0;a<aggr_tbl.size();++a){
4406 split_fta_aggr( &(aggr_tbl), a,
4407 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
4408 fta_node->select_list,
4416 for(ii=0;ii<fta_flds.size() || ii < fta_node->select_list.size();++ii){
4417 if(ii<fta_flds.size())
4418 printf("\t%s : ",fta_flds[ii].c_str());
4421 if(ii<fta_node->select_list.size())
4422 printf("%s\n",fta_node->select_list[ii]->to_string().c_str());
4426 printf("hfta aggregates are:");
4427 for(ii=0;ii<stream_node->aggr_tbl.size();++ii){
4428 printf(" %s",stream_node->aggr_tbl.get_op(ii).c_str());
4430 printf("\nlfta aggregates are:");
4431 for(ii=0;ii<fta_node->aggr_tbl.size();++ii){
4432 printf(" %s",fta_node->aggr_tbl.get_op(ii).c_str());
4440 // Next, the select list.
4442 for(s=0;s<select_list.size();s++){
4443 bool fta_forbidden = false;
4444 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4445 stream_node->select_list.push_back(
4446 new select_element(root_se, select_list[s]->name));
4451 // All the predicates in the where clause must execute
4454 for(p=0;p<where.size();p++){
4455 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
4456 cnf_elem *new_cnf = new cnf_elem(new_pr);
4457 analyze_cnf(new_cnf);
4459 fta_node->where.push_back(new_cnf);
4462 // All of the predicates in the having clause must
4463 // execute in the stream node.
4465 for(p=0;p<having.size();p++){
4466 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4467 cnf_elem *cnf_root = new cnf_elem(pr_root);
4468 analyze_cnf(cnf_root);
4470 stream_node->having.push_back(cnf_root);
4474 // Divide the parameters among the stream, FTA.
4475 // Currently : assume that the stream receives all parameters
4476 // and parameter updates, incorporates them, then passes
4477 // all of the parameters to the FTA.
4478 // This will need to change (tables, fta-unsafe types. etc.)
4480 // I will pass on the use_handle_access marking, even
4481 // though the fcn call that requires handle access might
4482 // exist in only one of the parts of the query.
4483 // Parameter manipulation and handle access determination will
4484 // need to be revisited anyway.
4485 vector<string> param_names = param_tbl->get_param_names();
4487 for(pi=0;pi<param_names.size();pi++){
4488 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4489 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4490 param_tbl->handle_access(param_names[pi]));
4491 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4492 param_tbl->handle_access(param_names[pi]));
4494 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4495 stream_node->definitions = definitions;
4497 // Now split by interfaces XXXX
4498 if(ifaces.size() > 1){
4499 for(si=0;si<ifaces.size();++si){
4500 sgah_qpn *subq_node = new sgah_qpn();
4502 // Name the subquery
4503 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4505 subq_node->set_node_name( new_name) ;
4506 sel_names.push_back(subq_node->get_node_name());
4509 subq_node->table_name = fta_node->table_name->duplicate();
4510 subq_node->table_name->set_machine(ifaces[si].first);
4511 subq_node->table_name->set_interface(ifaces[si].second);
4512 subq_node->table_name->set_ifq(false);
4515 for(g=0;g<fta_node->gb_tbl.size();g++){
4516 // Insert the gbvar into the lfta.
4517 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
4518 subq_node->gb_tbl.add_gb_var(
4519 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
4523 // Insert the aggregates
4524 for(a=0;a<fta_node->aggr_tbl.size();++a){
4525 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
4528 for(s=0;s<fta_node->select_list.size();s++){
4529 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4531 for(p=0;p<fta_node->where.size();p++){
4532 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4533 cnf_elem *new_cnf = new cnf_elem(new_pr);
4534 analyze_cnf(new_cnf);
4536 subq_node->where.push_back(new_cnf);
4538 for(p=0;p<fta_node->having.size();p++){
4539 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
4540 cnf_elem *new_cnf = new cnf_elem(new_pr);
4541 analyze_cnf(new_cnf);
4543 subq_node->having.push_back(new_cnf);
4545 // Xfer all of the parameters.
4546 // Use existing handle annotations.
4547 vector<string> param_names = param_tbl->get_param_names();
4549 for(pi=0;pi<param_names.size();pi++){
4550 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4551 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4552 param_tbl->handle_access(param_names[pi]));
4554 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4555 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4556 this->error_code = 3;
4561 subq_node->lfta_disorder = fta_node->lfta_disorder;
4563 ret_vec.push_back(subq_node);
4566 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
4567 fta_node->node_name, sel_names, ifaces, ifdb);
4568 mrg_node->set_disorder(fta_node->lfta_disorder);
4571 Do not split sources until we are done with optimizations
4572 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4573 for(i=0;i<split_merge.size();++i){
4574 ret_vec.push_back(split_merge[i]);
4577 ret_vec.push_back(mrg_node);
4578 ret_vec.push_back(stream_node);
4579 hfta_returned = 1/*split_merge.size()*/+1;
4582 fta_node->table_name->set_machine(ifaces[0].first);
4583 fta_node->table_name->set_interface(ifaces[0].second);
4584 fta_node->table_name->set_ifq(false);
4585 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4586 this->error_code = 3;
4589 ret_vec.push_back(fta_node);
4590 ret_vec.push_back(stream_node);
4595 // ret_vec.push_back(fta_node);
4596 // ret_vec.push_back(stream_node);
4603 /////////////////////////////////////////////////////////////////////
4604 /// Split into selection LFTA, aggregation HFTA.
4606 spx_qpn *fta_node = new spx_qpn();
4607 fta_node->table_name = table_name;
4608 fta_node->set_node_name( "_fta_"+node_name );
4609 fta_node->table_name->set_range_var(table_name->get_var_name());
4612 sgah_qpn *stream_node = new sgah_qpn();
4613 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4614 stream_node->set_node_name( node_name );
4615 stream_node->table_name->set_range_var(table_name->get_var_name());
4618 vector< vector<select_element *> *> select_vec;
4619 select_vec.push_back(&(fta_node->select_list)); // only one child
4621 // Process the gbvars. Split their defining SEs.
4622 for(g=0;g<gb_tbl.size();g++){
4623 bool fta_forbidden = false;
4624 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4626 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
4627 fta_forbidden, se_src, select_vec, Ext_fcns
4629 // if(fta_forbidden) (
4630 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4631 stream_node->gb_tbl.add_gb_var(
4632 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
4635 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
4636 stream_node->gb_tbl.add_gb_var(
4637 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
4641 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4643 // Process the aggregate table.
4644 // Copy to stream, split the SEs.
4645 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
4646 for(a=0;a<aggr_tbl.size();++a){
4648 if(aggr_tbl.is_builtin(a)){
4649 if(aggr_tbl.is_star_aggr(a)){
4650 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
4651 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
4653 bool fta_forbidden = false;
4654 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4656 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4657 fta_forbidden, se_src, select_vec, Ext_fcns
4659 // if(fta_forbidden) (
4660 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4661 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
4662 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
4664 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4665 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
4666 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
4669 hse->set_data_type(aggr_tbl.get_data_type(a));
4670 hse->set_aggr_id(a);
4671 hfta_aggr_se[a]=hse;
4673 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
4674 vector<scalarexp_t *> new_opl;
4675 for(o=0;o<opl.size();++o){
4676 bool fta_forbidden = false;
4677 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4678 scalarexp_t *agg_se = split_ftavec_se( opl[o],
4679 fta_forbidden, se_src, select_vec, Ext_fcns
4681 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4682 // fta_forbidden, se_src, select_vec, Ext_fcns
4684 // if(fta_forbidden) (
4685 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4686 new_opl.push_back(agg_se);
4688 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4689 new_opl.push_back(new_se);
4692 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
4693 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
4694 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
4695 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
4696 hse->set_aggr_id(a);
4697 hfta_aggr_se[a]=hse;
4702 // Process the WHERE clause.
4703 // If it is fta-safe AND it refs only fta-safe gbvars,
4704 // then expand the gbvars and put it into the lfta.
4705 // Else, split it into an hfta predicate ref'ing
4706 // se's computed partially in the lfta.
4708 predicate_t *pr_root;
4710 for(p=0;p<where.size();p++){
4711 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
4712 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
4713 fta_forbidden = true;
4715 pr_root = dup_pr(where[p]->pr, NULL);
4716 expand_gbvars_pr(pr_root, gb_tbl);
4717 fta_forbidden = false;
4719 cnf_elem *cnf_root = new cnf_elem(pr_root);
4720 analyze_cnf(cnf_root);
4723 stream_node->where.push_back(cnf_root);
4725 fta_node->where.push_back(cnf_root);
4730 // Process the Select clause, rehome it on the
4732 for(s=0;s<select_list.size();s++){
4733 bool fta_forbidden = false;
4734 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4735 stream_node->select_list.push_back(
4736 new select_element(root_se, select_list[s]->name));
4740 // Process the Having clause
4742 // All of the predicates in the having clause must
4743 // execute in the stream node.
4745 for(p=0;p<having.size();p++){
4746 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4747 cnf_elem *cnf_root = new cnf_elem(pr_root);
4748 analyze_cnf(cnf_root);
4750 stream_node->having.push_back(cnf_root);
4753 // Handle parameters and a few last details.
4754 vector<string> param_names = param_tbl->get_param_names();
4756 for(pi=0;pi<param_names.size();pi++){
4757 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4758 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4759 param_tbl->handle_access(param_names[pi]));
4760 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4761 param_tbl->handle_access(param_names[pi]));
4764 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4765 stream_node->definitions = definitions;
4767 // Now split by interfaces YYYY
4768 if(ifaces.size() > 1){
4769 for(si=0;si<ifaces.size();++si){
4770 spx_qpn *subq_node = new spx_qpn();
4772 // Name the subquery
4773 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4775 subq_node->set_node_name( new_name) ;
4776 sel_names.push_back(subq_node->get_node_name());
4779 subq_node->table_name = fta_node->table_name->duplicate();
4780 subq_node->table_name->set_machine(ifaces[si].first);
4781 subq_node->table_name->set_interface(ifaces[si].second);
4782 subq_node->table_name->set_ifq(false);
4784 for(s=0;s<fta_node->select_list.size();s++){
4785 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4787 for(p=0;p<fta_node->where.size();p++){
4788 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4789 cnf_elem *new_cnf = new cnf_elem(new_pr);
4790 analyze_cnf(new_cnf);
4792 subq_node->where.push_back(new_cnf);
4794 // Xfer all of the parameters.
4795 // Use existing handle annotations.
4796 vector<string> param_names = param_tbl->get_param_names();
4798 for(pi=0;pi<param_names.size();pi++){
4799 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4800 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4801 param_tbl->handle_access(param_names[pi]));
4803 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4804 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4805 this->error_code = 3;
4809 ret_vec.push_back(subq_node);
4812 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4813 fta_node->node_name, sel_names, ifaces, ifdb);
4815 Do not split sources until we are done with optimizations
4816 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4817 for(i=0;i<split_merge.size();++i){
4818 ret_vec.push_back(split_merge[i]);
4821 ret_vec.push_back(mrg_node);
4822 ret_vec.push_back(stream_node);
4823 hfta_returned = 1/*split_merge.size()*/+1;
4826 fta_node->table_name->set_machine(ifaces[0].first);
4827 fta_node->table_name->set_interface(ifaces[0].second);
4828 fta_node->table_name->set_ifq(false);
4829 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4830 this->error_code = 3;
4833 ret_vec.push_back(fta_node);
4834 ret_vec.push_back(stream_node);
4839 // ret_vec.push_back(fta_node);
4840 // ret_vec.push_back(stream_node);
4849 SPLITTING A EQ-TEMPORAL, HASH JOIN OPERATOR
4851 An JOIN_EQ_HASH_QPN node may reference:
4852 literals, parameters, colrefs, functions, operators
4853 An JOIN_EQ_HASH_QPN node may not reference:
4854 group-by variables, aggregates
4856 An JOIN_EQ_HASH_QPN node contains
4857 selection list of SEs
4858 where list of CNF predicates, broken into:
4865 For each tablevar whose source is a PROTOCOL
4866 Create a LFTA for that tablevar
4867 Push as many prefilter[..] predicates to that tablevar as is
4869 Split the SEs in the select list, and the predicates not
4874 vector<qp_node *> join_eq_hash_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4876 vector<qp_node *> ret_vec;
4879 // If the node reads from streams only, don't split.
4880 bool stream_only = true;
4881 for(f=0;f<from.size();++f){
4882 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4883 int t = from[f]->get_schema_ref();
4884 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) stream_only = false;
4888 ret_vec.push_back(this);
4893 // The HFTA node, it is always returned.
4895 join_eq_hash_qpn *stream_node = new join_eq_hash_qpn();
4896 for(f=0;f<from.size();++f){
4897 // tablevar_t *tmp_tblvar = new tablevar_t( from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str());
4898 tablevar_t *tmp_tblvar = from[f]->duplicate();
4899 // tmp_tblvar->set_range_var(from[f]->get_var_name());
4901 stream_node->from.push_back(tmp_tblvar);
4903 stream_node->set_node_name(node_name);
4905 // Create spx (selection) children for each PROTOCOL source.
4906 vector<spx_qpn *> child_vec;
4907 vector< vector<select_element *> *> select_vec;
4908 for(f=0;f<from.size();++f){
4909 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4910 int t = from[f]->get_schema_ref();
4911 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA){
4912 spx_qpn *child_qpn = new spx_qpn();
4913 sprintf(tmpstr,"_fta_%d_%s",f,node_name.c_str());
4914 child_qpn->set_node_name(string(tmpstr));
4915 child_qpn->table_name = new tablevar_t(
4916 from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str(), from[f]->get_ifq());
4917 child_qpn->table_name->set_range_var(from[f]->get_var_name());
4918 child_qpn->table_name->set_machine(from[f]->get_machine());
4920 child_vec.push_back(child_qpn);
4921 select_vec.push_back(&(child_qpn->select_list));
4923 // Update the stream's FROM clause to read from this child
4924 stream_node->from[f]->set_interface("");
4925 stream_node->from[f]->set_schema(tmpstr);
4927 child_vec.push_back(NULL);
4928 select_vec.push_back(NULL);
4932 // Push lfta-safe prefilter to the lfta
4933 // TODO: I'm not copying the preds, I dont *think* it will be a problem.
4934 predicate_t *pr_root;
4936 for(f=0;f<from.size();++f){
4937 vector<cnf_elem *> pred_vec = prefilter[f];
4938 if(child_vec[f] != NULL){
4939 for(p=0;p<pred_vec.size();++p){
4940 if(check_fta_forbidden_pr(pred_vec[p]->pr,NULL, Ext_fcns)){
4941 child_vec[f]->where.push_back(pred_vec[p]);
4943 pr_root = split_ftavec_pr(pred_vec[p]->pr,select_vec,Ext_fcns);
4944 cnf_elem *cnf_root = new cnf_elem(pr_root);
4945 analyze_cnf(cnf_root);
4946 stream_node->prefilter[f].push_back(cnf_root);
4950 for(p=0;p<pred_vec.size();++p){
4951 stream_node->prefilter[f].push_back(pred_vec[p]);
4957 // Process the other predicates
4958 for(p=0;p<temporal_eq.size();++p){
4959 pr_root = split_ftavec_pr(temporal_eq[p]->pr,select_vec,Ext_fcns);
4960 cnf_elem *cnf_root = new cnf_elem(pr_root);
4961 analyze_cnf(cnf_root);
4962 stream_node->temporal_eq.push_back(cnf_root);
4964 for(p=0;p<hash_eq.size();++p){
4965 pr_root = split_ftavec_pr(hash_eq[p]->pr,select_vec,Ext_fcns);
4966 cnf_elem *cnf_root = new cnf_elem(pr_root);
4967 analyze_cnf(cnf_root);
4968 stream_node->hash_eq.push_back(cnf_root);
4970 for(p=0;p<postfilter.size();++p){
4971 pr_root = split_ftavec_pr(postfilter[p]->pr,select_vec,Ext_fcns);
4972 cnf_elem *cnf_root = new cnf_elem(pr_root);
4973 analyze_cnf(cnf_root);
4974 stream_node->postfilter.push_back(cnf_root);
4978 for(s=0;s<select_list.size();s++){
4979 bool fta_forbidden = false;
4980 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4981 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
4982 fta_forbidden, se_src, select_vec, Ext_fcns
4984 if(fta_forbidden || !is_PROTOCOL_source(se_src, select_vec)){
4985 stream_node->select_list.push_back(
4986 new select_element(root_se, select_list[s]->name) );
4988 scalarexp_t *new_se=make_fta_se_ref(select_vec,root_se,se_src);
4989 stream_node->select_list.push_back(
4990 new select_element(new_se, select_list[s]->name)
4996 // I need to "rehome" the colrefs -- make the annotations in the colrefs
4997 // agree with their tablevars.
4998 for(f=0;f<child_vec.size();++f){
4999 if(child_vec[f]!=NULL){
5000 vector<tablevar_t *> fm; fm.push_back(child_vec[f]->table_name);
5002 for(s=0;s<child_vec[f]->select_list.size();++s)
5003 bind_colref_se(child_vec[f]->select_list[s]->se, fm,0,0);
5004 for(p=0;p<child_vec[f]->where.size();++p)
5005 // bind_colref_pr(child_vec[f]->where[p]->pr, fm,f,0);
5006 bind_colref_pr(child_vec[f]->where[p]->pr, fm,0,0);
5010 // rehome the colrefs in the hfta node.
5011 for(f=0;f<stream_node->from.size();++f){
5012 stream_node->where.clear();
5013 for(s=0;s<stream_node->from.size();++s){
5014 for(p=0;p<stream_node->prefilter[s].size();++p){
5015 bind_colref_pr((stream_node->prefilter[s])[p]->pr,stream_node->from,f,f);
5018 for(p=0;p<stream_node->temporal_eq.size();++p){
5019 bind_colref_pr(stream_node->temporal_eq[p]->pr,stream_node->from,f,f);
5021 for(p=0;p<stream_node->hash_eq.size();++p){
5022 bind_colref_pr(stream_node->hash_eq[p]->pr,stream_node->from,f,f);
5024 for(p=0;p<stream_node->postfilter.size();++p){
5025 bind_colref_pr(stream_node->postfilter[p]->pr,stream_node->from,f,f);
5027 for(s=0;s<stream_node->select_list.size();++s){
5028 bind_colref_se(stream_node->select_list[s]->se,stream_node->from,f,f);
5032 // Rebuild the WHERE clause
5033 stream_node->where.clear();
5034 for(s=0;s<stream_node->from.size();++s){
5035 for(p=0;p<stream_node->prefilter[s].size();++p){
5036 stream_node->where.push_back((stream_node->prefilter[s])[p]);
5039 for(p=0;p<stream_node->temporal_eq.size();++p){
5040 stream_node->where.push_back(stream_node->temporal_eq[p]);
5042 for(p=0;p<stream_node->hash_eq.size();++p){
5043 stream_node->where.push_back(stream_node->hash_eq[p]);
5045 for(p=0;p<stream_node->postfilter.size();++p){
5046 stream_node->where.push_back(stream_node->postfilter[p]);
5050 // Build the return list
5051 vector<qp_node *> hfta_nodes;
5053 for(f=0;f<from.size();++f){
5054 if(child_vec[f] != NULL){
5055 spx_qpn *c_node = child_vec[f];
5056 vector<pair<string, string> > ifaces = get_ifaces(c_node->table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
5057 if (ifaces.empty()) {
5058 fprintf(stderr,"INTERNAL ERROR in join_eq_hash_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
5062 if(ifaces.size() == 1){
5063 c_node->table_name->set_machine(ifaces[0].first);
5064 c_node->table_name->set_interface(ifaces[0].second);
5065 c_node->table_name->set_ifq(false);
5066 if(c_node->resolve_if_params(ifdb, this->err_str)){
5067 this->error_code = 3;
5070 ret_vec.push_back(c_node);
5072 vector<string> sel_names;
5074 for(si=0;si<ifaces.size();++si){
5075 spx_qpn *subq_node = new spx_qpn();
5077 // Name the subquery
5078 string new_name = "_"+c_node->node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
5080 subq_node->set_node_name( new_name) ;
5081 sel_names.push_back(subq_node->get_node_name());
5084 subq_node->table_name = c_node->table_name->duplicate();
5085 subq_node->table_name->set_machine(ifaces[si].first);
5086 subq_node->table_name->set_interface(ifaces[si].second);
5087 subq_node->table_name->set_ifq(false);
5089 for(s=0;s<c_node->select_list.size();s++){
5090 subq_node->select_list.push_back(dup_select(c_node->select_list[s], NULL));
5092 for(p=0;p<c_node->where.size();p++){
5093 predicate_t *new_pr = dup_pr(c_node->where[p]->pr, NULL);
5094 cnf_elem *new_cnf = new cnf_elem(new_pr);
5095 analyze_cnf(new_cnf);
5097 printf("table name is %s\n",subq_node->table_name->to_string().c_str());
5098 subq_node->where.push_back(new_cnf);
5100 // Xfer all of the parameters.
5101 // Use existing handle annotations.
5102 // vector<string> param_names = param_tbl->get_param_names();
5104 // for(pi=0;pi<param_names.size();pi++){
5105 // data_type *dt = param_tbl->get_data_type(param_names[pi]);
5106 // subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
5107 // param_tbl->handle_access(param_names[pi]));
5109 // subq_node->definitions = definitions;
5111 if(subq_node->resolve_if_params(ifdb, this->err_str)){
5112 this->error_code = 3;
5116 ret_vec.push_back(subq_node);
5118 int lpos = ret_vec.size()-1 ;
5119 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[lpos]),c_node->node_name,sel_names, ifaces, ifdb);
5121 Do not split sources until we are done with optimizations
5122 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
5124 for(i=0;i<split_merge.size();++i){
5125 hfta_nodes.push_back(split_merge[i]);
5128 hfta_nodes.push_back(mrg_node);
5133 for(i=0;i<hfta_nodes.size();++i) ret_vec.push_back(hfta_nodes[i]);
5134 ret_vec.push_back(stream_node);
5135 hfta_returned = hfta_nodes.size()+1;
5137 // Currently : assume that the stream receives all parameters
5138 // and parameter updates, incorporates them, then passes
5139 // all of the parameters to the FTA.
5140 // This will need to change (tables, fta-unsafe types. etc.)
5142 // I will pass on the use_handle_access marking, even
5143 // though the fcn call that requires handle access might
5144 // exist in only one of the parts of the query.
5145 // Parameter manipulation and handle access determination will
5146 // need to be revisited anyway.
5147 vector<string> param_names = param_tbl->get_param_names();
5149 for(pi=0;pi<param_names.size();pi++){
5151 data_type *dt = param_tbl->get_data_type(param_names[pi]);
5152 for(ri=0;ri<ret_vec.size();++ri){
5153 ret_vec[ri]->param_tbl->add_param(param_names[pi],dt->duplicate(),
5154 param_tbl->handle_access(param_names[pi]));
5155 ret_vec[ri]->definitions = definitions; ret_vec[ri]->definitions.erase("_referenced_ifaces");
5166 /////////////////////////////////////////////////////////////
5169 // Common processing
5170 int process_opview(tablevar_t *fmtbl, int pos, string node_name,
5172 vector<query_node *> &qnodes,
5173 opview_set &opviews,
5174 vector<table_exp_t *> &ret, string rootnm, string silo_nm){
5178 int schref = fmtbl->get_schema_ref();
5182 if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){
5183 opview_entry *opv = new opview_entry();
5184 opv->parent_qname = node_name;
5185 opv->root_name = rootnm;
5186 opv->view_name = fmtbl->get_schema_name();
5188 sprintf(tmpstr,"%s_UDOP%d_%s",node_name.c_str(),pos,opv->view_name.c_str());
5189 opv->udop_alias = tmpstr;
5190 fmtbl->set_udop_alias(opv->udop_alias);
5192 opv->exec_fl = Schema->get_op_prop(schref, string("file"));
5193 opv->liveness_timeout = atoi(Schema->get_op_prop(schref, string("liveness_timeout")).c_str());
5195 vector<subquery_spec *> subq = Schema->get_subqueryspecs(schref);
5196 for(s=0;s<subq.size();++s){
5197 // Validate that the fields match.
5198 subquery_spec *sqs = subq[s];
5199 vector<field_entry *> flds = Schema->get_fields(sqs->name+silo_nm);
5200 if(flds.size() == 0){
5201 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in Schema.\n",sqs->name.c_str(), opv->view_name.c_str());
5204 if(flds.size() < sqs->types.size()){
5205 fprintf(stderr,"ERROR: subquery %s of view %s does not have enough fields (%lu found, %lu expected).\n",sqs->name.c_str(), opv->view_name.c_str(),flds.size(), sqs->types.size());
5208 bool failed = false;
5209 for(f=0;f<sqs->types.size();++f){
5210 data_type dte(sqs->types[f],sqs->modifiers[f]);
5211 data_type dtf(flds[f]->get_type(),flds[f]->get_modifier_list());
5212 if(! dte.subsumes_type(&dtf) ){
5213 fprintf(stderr,"ERROR: subquery %s of view %s does not have the correct type for field %d (%s found, %s expected).\n",sqs->name.c_str(), opv->view_name.c_str(),f,dtf.to_string().c_str(), dte.to_string().c_str());
5217 if(dte.is_temporal() && (dte.get_temporal() != dtf.get_temporal()) ){
5218 string pstr = dte.get_temporal_string();
5219 fprintf(stderr,"ERROR: subquery %s of view %s does not have the expected temporal value %s of field %d.\n",sqs->name.c_str(), opv->view_name.c_str(),pstr.c_str(),f);
5226 /// Validation done, find the subquery, make a copy of the
5227 /// parse tree, and add it to the return list.
5228 for(q=0;q<qnodes.size();++q)
5229 if(qnodes[q]->name == sqs->name)
5231 if(q==qnodes.size()){
5232 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in list of query names.\n",sqs->name.c_str(), opv->view_name.c_str());
5236 table_exp_t *newq = dup_table_exp(qnodes[q]->parse_tree);
5237 sprintf(tmpstr,"%s_OP%d_%s_SUBQ%d",node_name.c_str(),pos,opv->view_name.c_str(),s);
5238 string newq_name = tmpstr;
5239 newq->nmap["query_name"] = newq_name;
5240 ret.push_back(newq);
5241 opv->subq_names.push_back(newq_name);
5243 fmtbl->set_opview_idx(opviews.append(opv));
5249 vector<table_exp_t *> spx_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5250 vector<table_exp_t *> ret;
5252 int retval = process_opview(table_name,0,node_name,
5253 Schema,qnodes,opviews,ret, rootnm, silo_name);
5259 vector<table_exp_t *> sgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5260 vector<table_exp_t *> ret;
5262 int retval = process_opview(table_name,0,node_name,
5263 Schema,qnodes,opviews,ret, rootnm, silo_name);
5268 vector<table_exp_t *> rsgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5269 vector<table_exp_t *> ret;
5271 int retval = process_opview(table_name,0,node_name,
5272 Schema,qnodes,opviews,ret, rootnm, silo_name);
5278 vector<table_exp_t *> sgahcwcb_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5279 vector<table_exp_t *> ret;
5281 int retval = process_opview(table_name,0,node_name,
5282 Schema,qnodes,opviews,ret, rootnm, silo_name);
5289 vector<table_exp_t *> mrg_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5290 vector<table_exp_t *> ret;
5292 for(f=0;f<fm.size();++f){
5293 int retval = process_opview(fm[f],f,node_name,
5294 Schema,qnodes,opviews,ret, rootnm, silo_name);
5303 vector<table_exp_t *> join_eq_hash_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5304 vector<table_exp_t *> ret;
5306 for(f=0;f<from.size();++f){
5307 int retval = process_opview(from[f],f,node_name,
5308 Schema,qnodes,opviews,ret, rootnm, silo_name);
5314 vector<table_exp_t *> filter_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5315 vector<table_exp_t *> ret;
5317 for(f=0;f<from.size();++f){
5318 int retval = process_opview(from[f],f,node_name,
5319 Schema,qnodes,opviews,ret, rootnm, silo_name);
5325 vector<table_exp_t *> watch_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5326 vector<table_exp_t *> ret;
5327 int retval = process_opview(from[0],0,node_name,
5328 Schema,qnodes,opviews,ret, rootnm, silo_name);
5335 vector<table_exp_t *> watch_tbl_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5336 vector<table_exp_t *> ret;
5337 return ret; // nothing to process
5342 //////////////////////////////////////////////////////////////////
5343 //////////////////////////////////////////////////////////////////
5344 /////// Additional methods
5348 //////////////////////////////////////////////////////////////////
5349 // Get schema of operator output
5351 table_def *mrg_qpn::get_fields(){
5352 return(table_layout);
5355 table_def *watch_tbl_qpn::get_fields(){
5356 return(table_layout);
5360 table_def *spx_qpn::get_fields(){
5361 return(create_attributes(node_name, select_list));
5364 table_def *sgah_qpn::get_fields(){
5365 return(create_attributes(node_name, select_list));
5368 table_def *rsgah_qpn::get_fields(){
5369 return(create_attributes(node_name, select_list));
5372 table_def *sgahcwcb_qpn::get_fields(){
5373 return(create_attributes(node_name, select_list));
5376 table_def *filter_join_qpn::get_fields(){
5377 return(create_attributes(node_name, select_list));
5380 table_def *watch_join_qpn::get_fields(){
5381 return(create_attributes(node_name, select_list));
5384 table_def *join_eq_hash_qpn::get_fields(){
5387 // First, gather temporal colrefs and SEs.
5388 map<col_id, temporal_type> temporal_cids;
5389 vector<scalarexp_t *> temporal_se;
5390 for(h=0;h<temporal_eq.size();++h){
5391 scalarexp_t *sel = temporal_eq[h]->pr->get_left_se();
5392 scalarexp_t *ser = temporal_eq[h]->pr->get_right_se();
5394 if(sel->get_operator_type() == SE_COLREF){
5395 col_id tcol(sel->get_colref());
5396 if(temporal_cids.count(tcol) == 0){
5397 temporal_cids[tcol] = sel->get_data_type()->get_temporal();
5400 temporal_se.push_back(sel);
5403 if(ser->get_operator_type() == SE_COLREF){
5404 col_id tcol(ser->get_colref());
5405 if(temporal_cids.count(tcol) == 0){
5406 temporal_cids[tcol] = ser->get_data_type()->get_temporal();
5409 temporal_se.push_back(ser);
5413 // Mark select elements as nontemporal, then deduce which
5414 // ones are temporal.
5415 for(s=0;s<select_list.size();++s){
5416 select_list[s]->se->get_data_type()->set_temporal(
5417 compute_se_temporal(select_list[s]->se, temporal_cids)
5419 // Second chance if it is an exact match to an SE.
5420 // for(s=0;s<select_list.size();++s){
5421 if(! select_list[s]->se->get_data_type()->is_temporal() ){
5422 for(t=0;t<temporal_se.size();++t){
5423 if(is_equivalent_se(temporal_se[t], select_list[s]->se)){
5424 select_list[s]->se->get_data_type()->set_temporal(
5425 temporal_se[t]->get_data_type()->get_temporal()
5433 // If there is an outer join, verify that
5434 // the temporal attributes are actually temporal.
5435 // NOTE: this code must be synchronized with the
5436 // equivalence finding in join_eq_hash_qpn::generate_functor
5437 // (and also, the join_eq_hash_qpn constructor)
5438 if(from[0]->get_property() || from[1]->get_property()){
5439 set<string> l_equiv, r_equiv;
5440 for(i=0;i<temporal_eq.size();i++){
5441 scalarexp_t *lse = temporal_eq[i]->pr->get_left_se();
5442 scalarexp_t *rse = temporal_eq[i]->pr->get_right_se();
5443 if(lse->get_operator_type()==SE_COLREF){
5444 l_equiv.insert(lse->get_colref()->get_field());
5446 if(rse->get_operator_type()==SE_COLREF){
5447 r_equiv.insert(rse->get_colref()->get_field());
5451 for(s=0;s<select_list.size();++s){
5452 if(select_list[s]->se->get_data_type()->is_temporal()){
5454 col_id_set::iterator ci;
5455 bool failed = false;
5456 gather_se_col_ids(select_list[s]->se,cid_set, NULL);
5457 for(ci=cid_set.begin();ci!=cid_set.end();++ci){
5458 if((*ci).tblvar_ref == 0){
5459 if(from[0]->get_property()){
5460 if(l_equiv.count((*ci).field) == 0){
5465 if(from[1]->get_property()){
5466 if(r_equiv.count((*ci).field) == 0){
5473 select_list[s]->se->get_data_type()->reset_temporal();
5480 return create_attributes(node_name, select_list);
5484 //-----------------------------------------------------------------
5485 // get output "keys"
5486 // This is a guess about the set of fields which are a key
5487 // Use as metadata output, e.g. in qtree.xml
5491 // refs to GB attribtues are keys, if a SE is not a GB colref
5492 // but refers to a GB colref (outside of an aggregation)
5493 // then set partial_keys to true
5494 vector<string> sgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5495 vector<string> keys;
5498 for(int i=0; i<gb_tbl.size();++i)
5501 for(int s=0;s<select_list.size();++s){
5502 if(select_list[s]->se->is_gb()){
5503 keys.push_back(select_list[s]->name);
5505 if(contains_gb_se(select_list[s]->se, gref_set)){
5506 partial_keys.push_back(select_list[s]->name);
5513 vector<string> rsgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5514 vector<string> keys;
5517 for(int i=0; i<gb_tbl.size();++i)
5520 for(int s=0;s<select_list.size();++s){
5521 if(select_list[s]->se->is_gb()){
5522 keys.push_back(select_list[s]->name);
5524 if(contains_gb_se(select_list[s]->se, gref_set)){
5525 partial_keys.push_back(select_list[s]->name);
5536 //-----------------------------------------------------------------
5537 // get output tables
5540 // Get tablevar_t names of input and output tables
5542 // output_file_qpn::output_file_qpn(){source_op_name = ""; }
5543 vector<tablevar_t *> output_file_qpn::get_input_tbls(){
5547 vector<tablevar_t *> watch_tbl_qpn::get_input_tbls(){
5548 vector<tablevar_t *> ret;
5552 vector<tablevar_t *> mrg_qpn::get_input_tbls(){
5556 vector<tablevar_t *> spx_qpn::get_input_tbls(){
5557 vector<tablevar_t *> retval(1,table_name);
5561 vector<tablevar_t *> sgah_qpn::get_input_tbls(){
5562 vector<tablevar_t *> retval(1,table_name);
5566 vector<tablevar_t *> rsgah_qpn::get_input_tbls(){
5567 vector<tablevar_t *> retval(1,table_name);
5571 vector<tablevar_t *> sgahcwcb_qpn::get_input_tbls(){
5572 vector<tablevar_t *> retval(1,table_name);
5576 vector<tablevar_t *> join_eq_hash_qpn::get_input_tbls(){
5580 vector<tablevar_t *> filter_join_qpn::get_input_tbls(){
5584 vector<tablevar_t *> watch_join_qpn::get_input_tbls(){
5588 //-----------------------------------------------------------------
5589 // get output tables
5592 // This does not make sense, this fcn returns the output table *name*,
5593 // not its schema, and then there is another fcn to rturn the schema.
5594 vector<tablevar_t *> output_file_qpn::get_output_tbls(){
5595 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5599 vector<tablevar_t *> watch_tbl_qpn::get_output_tbls(){
5600 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5604 vector<tablevar_t *> mrg_qpn::get_output_tbls(){
5605 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5609 vector<tablevar_t *> spx_qpn::get_output_tbls(){
5610 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5614 vector<tablevar_t *> sgah_qpn::get_output_tbls(){
5615 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5619 vector<tablevar_t *> rsgah_qpn::get_output_tbls(){
5620 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5624 vector<tablevar_t *> sgahcwcb_qpn::get_output_tbls(){
5625 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5629 vector<tablevar_t *> join_eq_hash_qpn::get_output_tbls(){
5630 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5634 vector<tablevar_t *> filter_join_qpn::get_output_tbls(){
5635 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5640 vector<tablevar_t *> watch_join_qpn::get_output_tbls(){
5641 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5647 //-----------------------------------------------------------------
5650 // Associate colrefs with this schema.
5651 // Also, use this opportunity to create table_layout (the output schema).
5652 // If the output schema is ever needed before
5653 void mrg_qpn::bind_to_schema(table_list *Schema){
5655 for(t=0;t<fm.size();++t){
5656 int tblref = Schema->get_table_ref(fm[t]->get_schema_name());
5658 fm[t]->set_schema_ref(tblref );
5661 // Here I assume that the colrefs have been reorderd
5662 // during analysis so that mvars line up with fm.
5663 mvars[0]->set_schema_ref(fm[0]->get_schema_ref());
5664 mvars[1]->set_schema_ref(fm[1]->get_schema_ref());
5671 // Associate colrefs in SEs with this schema.
5672 void spx_qpn::bind_to_schema(table_list *Schema){
5673 // Bind the tablevars in the From clause to the Schema
5674 // (it might have changed from analysis time)
5675 int t = Schema->get_table_ref(table_name->get_schema_name() );
5677 table_name->set_schema_ref(t );
5679 // Get the "from" clause
5680 tablevar_list_t fm(table_name);
5682 // Bind all SEs to this schema
5684 for(p=0;p<where.size();++p){
5685 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5688 for(s=0;s<select_list.size();++s){
5689 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5692 // Collect set of tuples referenced in this HFTA
5693 // input, internal, or output.
5697 col_id_set spx_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5698 col_id_set retval, tmp_cset;
5700 for(p=0;p<where.size();++p){
5701 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5704 for(s=0;s<select_list.size();++s){
5705 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5707 col_id_set::iterator cisi;
5709 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5710 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5711 if(fe->get_unpack_fcns().size()>0)
5712 retval.insert((*cisi));
5720 col_id_set filter_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5721 col_id_set retval, tmp_cset;
5723 for(p=0;p<where.size();++p){
5724 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5727 for(s=0;s<select_list.size();++s){
5728 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5730 col_id_set::iterator cisi;
5732 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5733 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5734 if(fe->get_unpack_fcns().size()>0)
5735 retval.insert((*cisi));
5743 col_id_set watch_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5744 col_id_set retval, tmp_cset;
5746 for(p=0;p<where.size();++p){
5747 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5750 for(s=0;s<select_list.size();++s){
5751 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5753 col_id_set::iterator cisi;
5755 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5756 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5757 if(fe->get_unpack_fcns().size()>0)
5758 retval.insert((*cisi));
5769 // Associate colrefs in SEs with this schema.
5770 void join_eq_hash_qpn::bind_to_schema(table_list *Schema){
5771 // Bind the tablevars in the From clause to the Schema
5772 // (it might have changed from analysis time)
5774 for(f=0;f<from.size();++f){
5775 string snm = from[f]->get_schema_name();
5776 int tbl_ref = Schema->get_table_ref(snm);
5778 from[f]->set_schema_ref(tbl_ref);
5781 // Bind all SEs to this schema
5782 tablevar_list_t fm(from);
5785 for(p=0;p<where.size();++p){
5786 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5789 for(s=0;s<select_list.size();++s){
5790 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5793 // Collect set of tuples referenced in this HFTA
5794 // input, internal, or output.
5798 void filter_join_qpn::bind_to_schema(table_list *Schema){
5799 // Bind the tablevars in the From clause to the Schema
5800 // (it might have changed from analysis time)
5802 for(f=0;f<from.size();++f){
5803 string snm = from[f]->get_schema_name();
5804 int tbl_ref = Schema->get_table_ref(snm);
5806 from[f]->set_schema_ref(tbl_ref);
5809 // Bind all SEs to this schema
5810 tablevar_list_t fm(from);
5813 for(p=0;p<where.size();++p){
5814 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5817 for(s=0;s<select_list.size();++s){
5818 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5821 // Collect set of tuples referenced in this HFTA
5822 // input, internal, or output.
5826 void watch_join_qpn::bind_to_schema(table_list *Schema){
5827 // Bind the tablevars in the From clause to the Schema
5828 // (it might have changed from analysis time)
5830 for(f=0;f<from.size();++f){
5831 string snm = from[f]->get_schema_name();
5832 int tbl_ref = Schema->get_table_ref(snm);
5834 from[f]->set_schema_ref(tbl_ref);
5837 // Bind all SEs to this schema
5838 tablevar_list_t fm(from);
5841 for(p=0;p<where.size();++p){
5842 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5845 for(s=0;s<select_list.size();++s){
5846 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5849 // Collect set of tuples referenced in this HFTA
5850 // input, internal, or output.
5858 void sgah_qpn::bind_to_schema(table_list *Schema){
5859 // Bind the tablevars in the From clause to the Schema
5860 // (it might have changed from analysis time)
5863 int t = Schema->get_table_ref(table_name->get_schema_name() );
5865 table_name->set_schema_ref(t );
5867 // Get the "from" clause
5868 tablevar_list_t fm(table_name);
5872 // Bind all SEs to this schema
5874 for(p=0;p<where.size();++p){
5875 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5877 for(p=0;p<having.size();++p){
5878 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5881 for(s=0;s<select_list.size();++s){
5882 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5885 for(g=0;g<gb_tbl.size();++g){
5886 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5889 for(a=0;a<aggr_tbl.size();++a){
5890 if(aggr_tbl.is_builtin(a)){
5891 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5893 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5895 for(o=0;o<opl.size();++o){
5896 bind_to_schema_se(opl[o],&fm,Schema);
5902 col_id_set sgah_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5903 col_id_set retval, tmp_cset;
5905 for(p=0;p<where.size();++p){
5906 gather_pr_col_ids(where[p]->pr, tmp_cset, &gb_tbl);
5909 for(g=0;g<gb_tbl.size();++g){
5910 gather_se_col_ids(gb_tbl.get_def(g), tmp_cset, &gb_tbl);
5913 for(a=0;a<aggr_tbl.size();++a){
5914 if(aggr_tbl.is_builtin(a)){
5915 gather_se_col_ids(aggr_tbl.get_aggr_se(a), tmp_cset, &gb_tbl);
5917 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5919 for(o=0;o<opl.size();++o){
5920 gather_se_col_ids(opl[o], tmp_cset, &gb_tbl);
5925 col_id_set::iterator cisi;
5927 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5928 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5929 if(fe->get_unpack_fcns().size()>0)
5930 retval.insert((*cisi));
5939 void rsgah_qpn::bind_to_schema(table_list *Schema){
5940 // Bind the tablevars in the From clause to the Schema
5941 // (it might have changed from analysis time)
5942 int t = Schema->get_table_ref(table_name->get_schema_name() );
5944 table_name->set_schema_ref(t );
5946 // Get the "from" clause
5947 tablevar_list_t fm(table_name);
5949 // Bind all SEs to this schema
5951 for(p=0;p<where.size();++p){
5952 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5954 for(p=0;p<having.size();++p){
5955 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5957 for(p=0;p<closing_when.size();++p){
5958 bind_to_schema_pr(closing_when[p]->pr, &fm, Schema);
5961 for(s=0;s<select_list.size();++s){
5962 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5965 for(g=0;g<gb_tbl.size();++g){
5966 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5969 for(a=0;a<aggr_tbl.size();++a){
5970 if(aggr_tbl.is_builtin(a)){
5971 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5973 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5975 for(o=0;o<opl.size();++o){
5976 bind_to_schema_se(opl[o],&fm,Schema);
5983 void sgahcwcb_qpn::bind_to_schema(table_list *Schema){
5984 // Bind the tablevars in the From clause to the Schema
5985 // (it might have changed from analysis time)
5986 int t = Schema->get_table_ref(table_name->get_schema_name() );
5988 table_name->set_schema_ref(t );
5990 // Get the "from" clause
5991 tablevar_list_t fm(table_name);
5993 // Bind all SEs to this schema
5995 for(p=0;p<where.size();++p){
5996 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5998 for(p=0;p<having.size();++p){
5999 bind_to_schema_pr(having[p]->pr, &fm, Schema);
6001 for(p=0;p<having.size();++p){
6002 bind_to_schema_pr(cleanby[p]->pr, &fm, Schema);
6004 for(p=0;p<having.size();++p){
6005 bind_to_schema_pr(cleanwhen[p]->pr, &fm, Schema);
6008 for(s=0;s<select_list.size();++s){
6009 bind_to_schema_se(select_list[s]->se, &fm, Schema);
6012 for(g=0;g<gb_tbl.size();++g){
6013 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
6016 for(a=0;a<aggr_tbl.size();++a){
6017 if(aggr_tbl.is_builtin(a)){
6018 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
6020 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
6022 for(o=0;o<opl.size();++o){
6023 bind_to_schema_se(opl[o],&fm,Schema);
6034 ///////////////////////////////////////////////////////////////
6035 ///////////////////////////////////////////////////////////////
6036 /// Functions for code generation.
6039 //-----------------------------------------------------------------
6042 cplx_lit_table *watch_tbl_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6043 return(new cplx_lit_table());
6046 cplx_lit_table *mrg_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6047 return(new cplx_lit_table());
6050 cplx_lit_table *spx_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6052 cplx_lit_table *complex_literals = new cplx_lit_table();
6054 for(i=0;i<select_list.size();i++){
6055 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6057 for(i=0;i<where.size();++i){
6058 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6061 return(complex_literals);
6064 cplx_lit_table *sgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6066 cplx_lit_table *complex_literals = new cplx_lit_table();
6068 for(i=0;i<aggr_tbl.size();++i){
6069 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6070 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6072 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6073 for(j=0;j<opl.size();++j)
6074 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6078 for(i=0;i<select_list.size();i++){
6079 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6081 for(i=0;i<gb_tbl.size();i++){
6082 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6084 for(i=0;i<where.size();++i){
6085 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6087 for(i=0;i<having.size();++i){
6088 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6091 return(complex_literals);
6095 cplx_lit_table *rsgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6097 cplx_lit_table *complex_literals = new cplx_lit_table();
6099 for(i=0;i<aggr_tbl.size();++i){
6100 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6101 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6103 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6104 for(j=0;j<opl.size();++j)
6105 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6109 for(i=0;i<select_list.size();i++){
6110 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6112 for(i=0;i<gb_tbl.size();i++){
6113 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6115 for(i=0;i<where.size();++i){
6116 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6118 for(i=0;i<having.size();++i){
6119 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6121 for(i=0;i<closing_when.size();++i){
6122 find_complex_literal_pr(closing_when[i]->pr,Ext_fcns, complex_literals);
6125 return(complex_literals);
6129 cplx_lit_table *sgahcwcb_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6131 cplx_lit_table *complex_literals = new cplx_lit_table();
6133 for(i=0;i<aggr_tbl.size();++i){
6134 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6135 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6137 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6138 for(j=0;j<opl.size();++j)
6139 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6143 for(i=0;i<select_list.size();i++){
6144 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6146 for(i=0;i<gb_tbl.size();i++){
6147 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6149 for(i=0;i<where.size();++i){
6150 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6152 for(i=0;i<having.size();++i){
6153 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6155 for(i=0;i<cleanwhen.size();++i){
6156 find_complex_literal_pr(cleanwhen[i]->pr,Ext_fcns, complex_literals);
6158 for(i=0;i<cleanby.size();++i){
6159 find_complex_literal_pr(cleanby[i]->pr,Ext_fcns, complex_literals);
6162 return(complex_literals);
6165 cplx_lit_table *join_eq_hash_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6167 cplx_lit_table *complex_literals = new cplx_lit_table();
6169 for(i=0;i<select_list.size();i++){
6170 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6172 for(i=0;i<where.size();++i){
6173 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6176 return(complex_literals);
6179 cplx_lit_table *filter_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6181 cplx_lit_table *complex_literals = new cplx_lit_table();
6183 for(i=0;i<select_list.size();i++){
6184 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6186 for(i=0;i<where.size();++i){
6187 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6190 return(complex_literals);
6193 cplx_lit_table *watch_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6195 cplx_lit_table *complex_literals = new cplx_lit_table();
6197 for(i=0;i<select_list.size();i++){
6198 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6200 for(i=0;i<where.size();++i){
6201 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6204 return(complex_literals);
6211 //-----------------------------------------------------------------
6212 // get_handle_param_tbl
6214 vector<handle_param_tbl_entry *> watch_tbl_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6215 vector<handle_param_tbl_entry *> retval;
6219 vector<handle_param_tbl_entry *> mrg_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6220 vector<handle_param_tbl_entry *> retval;
6225 vector<handle_param_tbl_entry *> spx_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6227 vector<handle_param_tbl_entry *> retval;
6229 for(i=0;i<select_list.size();i++){
6230 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6232 for(i=0;i<where.size();++i){
6233 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6240 vector<handle_param_tbl_entry *> sgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6242 vector<handle_param_tbl_entry *> retval;
6245 for(i=0;i<aggr_tbl.size();++i){
6246 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6247 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6249 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6250 for(j=0;j<opl.size();++j)
6251 find_param_handles_se(opl[j], Ext_fcns, retval);
6254 for(i=0;i<select_list.size();i++){
6255 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6257 for(i=0;i<gb_tbl.size();i++){
6258 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6260 for(i=0;i<where.size();++i){
6261 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6263 for(i=0;i<having.size();++i){
6264 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6271 vector<handle_param_tbl_entry *> rsgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6273 vector<handle_param_tbl_entry *> retval;
6276 for(i=0;i<aggr_tbl.size();++i){
6277 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6278 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6280 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6281 for(j=0;j<opl.size();++j)
6282 find_param_handles_se(opl[j], Ext_fcns, retval);
6285 for(i=0;i<select_list.size();i++){
6286 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6288 for(i=0;i<gb_tbl.size();i++){
6289 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6291 for(i=0;i<where.size();++i){
6292 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6294 for(i=0;i<having.size();++i){
6295 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6297 for(i=0;i<closing_when.size();++i){
6298 find_param_handles_pr(closing_when[i]->pr,Ext_fcns, retval);
6305 vector<handle_param_tbl_entry *> sgahcwcb_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6307 vector<handle_param_tbl_entry *> retval;
6310 for(i=0;i<aggr_tbl.size();++i){
6311 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6312 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6314 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6315 for(j=0;j<opl.size();++j)
6316 find_param_handles_se(opl[j], Ext_fcns, retval);
6319 for(i=0;i<select_list.size();i++){
6320 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6322 for(i=0;i<gb_tbl.size();i++){
6323 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6325 for(i=0;i<where.size();++i){
6326 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6328 for(i=0;i<having.size();++i){
6329 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6331 for(i=0;i<cleanwhen.size();++i){
6332 find_param_handles_pr(cleanwhen[i]->pr,Ext_fcns, retval);
6334 for(i=0;i<cleanby.size();++i){
6335 find_param_handles_pr(cleanby[i]->pr,Ext_fcns, retval);
6341 vector<handle_param_tbl_entry *> join_eq_hash_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6343 vector<handle_param_tbl_entry *> retval;
6345 for(i=0;i<select_list.size();i++){
6346 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6348 for(i=0;i<where.size();++i){
6349 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6356 vector<handle_param_tbl_entry *> filter_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6358 vector<handle_param_tbl_entry *> retval;
6360 for(i=0;i<select_list.size();i++){
6361 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6363 for(i=0;i<where.size();++i){
6364 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6370 vector<handle_param_tbl_entry *> watch_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6372 vector<handle_param_tbl_entry *> retval;
6374 for(i=0;i<select_list.size();i++){
6375 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6377 for(i=0;i<where.size();++i){
6378 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6386 ///////////////////////////////////////////////////////////////
6387 ///////////////////////////////////////////////////////////////
6388 /// Functions for operator output rates estimations
6391 //-----------------------------------------------------------------
6392 // get_rate_estimate
6394 double spx_qpn::get_rate_estimate() {
6396 // dummy method for now
6397 return SPX_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6400 double sgah_qpn::get_rate_estimate() {
6402 // dummy method for now
6403 return SGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6406 double rsgah_qpn::get_rate_estimate() {
6408 // dummy method for now
6409 return RSGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6412 double sgahcwcb_qpn::get_rate_estimate() {
6414 // dummy method for now
6415 return SGAHCWCB_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6418 double watch_tbl_qpn::get_rate_estimate() {
6420 // dummy method for now
6421 return DEFAULT_INTERFACE_RATE;
6424 double mrg_qpn::get_rate_estimate() {
6426 // dummy method for now
6427 return MRG_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6430 double join_eq_hash_qpn::get_rate_estimate() {
6432 // dummy method for now
6433 return JOIN_EQ_HASH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6437 //////////////////////////////////////////////////////////////////////////////
6438 //////////////////////////////////////////////////////////////////////////////
6439 ///// Generate functors
6444 //-------------------------------------------------------------------------
6445 // Code generation utilities.
6446 //-------------------------------------------------------------------------
6448 // Globals referenced by generate utilities
6450 static gb_table *segen_gb_tbl; // Table of all group-by attributes.
6454 // Generate code that makes reference
6455 // to the tuple, and not to any aggregates.
6456 // NEW : it might reference a stateful function.
6457 static string generate_se_code(scalarexp_t *se,table_list *schema){
6459 data_type *ldt, *rdt;
6461 vector<scalarexp_t *> operands;
6464 switch(se->get_operator_type()){
6466 if(se->is_handle_ref()){
6467 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6471 if(se->get_literal()->is_cpx_lit()){
6472 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6476 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6478 if(se->is_handle_ref()){
6479 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6483 ret.append("param_");
6484 ret.append(se->get_param_name());
6487 ldt = se->get_left_se()->get_data_type();
6488 if(ldt->complex_operator(se->get_op()) ){
6489 ret.append( ldt->get_complex_operator(se->get_op()) );
6491 ret.append(generate_se_code(se->get_left_se(),schema));
6495 ret.append(se->get_op());
6496 ret.append(generate_se_code(se->get_left_se(),schema));
6501 ldt = se->get_left_se()->get_data_type();
6502 rdt = se->get_right_se()->get_data_type();
6504 if(ldt->complex_operator(rdt, se->get_op()) ){
6505 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6507 ret.append(generate_se_code(se->get_left_se(),schema));
6509 ret.append(generate_se_code(se->get_right_se(),schema));
6513 ret.append(generate_se_code(se->get_left_se(),schema));
6514 ret.append(se->get_op());
6515 ret.append(generate_se_code(se->get_right_se(),schema));
6520 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6521 // so return the defining code.
6522 int gref = se->get_gb_ref();
6523 scalarexp_t *gdef_se = segen_gb_tbl->get_def(gref);
6524 ret = generate_se_code(gdef_se, schema );
6527 sprintf(tmpstr,"unpack_var_%s_%d",
6528 se->get_colref()->get_field().c_str(), se->get_colref()->get_tablevar_ref() );
6533 if(se->is_partial()){
6534 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6537 ret += se->op + "(";
6538 operands = se->get_operands();
6539 bool first_elem = true;
6540 if(se->get_storage_state() != ""){
6541 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6544 for(o=0;o<operands.size();o++){
6545 if(first_elem) first_elem=false; else ret += ", ";
6546 if(operands[o]->get_data_type()->is_buffer_type() &&
6547 (! (operands[o]->is_handle_ref()) ) )
6549 ret += generate_se_code(operands[o], schema);
6555 fprintf(stderr,"INTERNAL ERROR in generate_se_code (hfta), line %d, character %d: unknown operator type %d\n",
6556 se->get_lineno(), se->get_charno(),se->get_operator_type());
6557 return("ERROR in generate_se_code");
6561 // generate code that refers only to aggregate data and constants.
6562 // NEW : modified to handle superaggregates and stateful fcn refs.
6563 // Assume that the state is in *stval
6564 static string generate_se_code_fm_aggr(scalarexp_t *se, string gbvar, string aggvar, table_list *schema){
6567 data_type *ldt, *rdt;
6569 vector<scalarexp_t *> operands;
6572 switch(se->get_operator_type()){
6574 if(se->is_handle_ref()){
6575 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6579 if(se->get_literal()->is_cpx_lit()){
6580 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6584 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6586 if(se->is_handle_ref()){
6587 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6591 ret.append("param_");
6592 ret.append(se->get_param_name());
6595 ldt = se->get_left_se()->get_data_type();
6596 if(ldt->complex_operator(se->get_op()) ){
6597 ret.append( ldt->get_complex_operator(se->get_op()) );
6599 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6603 ret.append(se->get_op());
6604 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6609 ldt = se->get_left_se()->get_data_type();
6610 rdt = se->get_right_se()->get_data_type();
6612 if(ldt->complex_operator(rdt, se->get_op()) ){
6613 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6615 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6617 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6621 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6622 ret.append(se->get_op());
6623 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6628 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6629 // so return the defining code.
6630 sprintf(tmpstr,"%s%d",gbvar.c_str(),se->get_gb_ref());
6634 fprintf(stderr,"ERROR reference to non-GB column ref not permitted here,"
6635 "error in query_plan.cc:generate_se_code_fm_aggr, line %d, character %d.\n",
6636 se->get_lineno(), se->get_charno());
6642 if(se->is_superaggr()){
6643 sprintf(tmpstr,"stval->aggr_var%d",se->get_aggr_ref());
6645 sprintf(tmpstr,"%saggr_var%d",aggvar.c_str(),se->get_aggr_ref());
6651 if(se->get_aggr_ref() >= 0){
6652 sprintf(tmpstr,"udaf_ret_%d",se->get_aggr_ref());
6657 if(se->is_partial()){
6658 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6661 ret += se->op + "(";
6662 bool first_elem = true;
6663 if(se->get_storage_state() != ""){
6664 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6667 operands = se->get_operands();
6668 for(o=0;o<operands.size();o++){
6669 if(first_elem) first_elem=false; else ret += ", ";
6670 if(operands[o]->get_data_type()->is_buffer_type() &&
6671 (! (operands[o]->is_handle_ref()) ) )
6673 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6679 fprintf(stderr,"INTERNAL ERROR in query_plan.cc::generate_se_code_fm_aggr, line %d, character %d: unknown operator type %d\n",
6680 se->get_lineno(), se->get_charno(),se->get_operator_type());
6681 return("ERROR in generate_se_code_fm_aggr");
6687 static string unpack_partial_fcn_fm_aggr(scalarexp_t *se, int pfn_id, string gbvar, string aggvar, table_list *schema){
6690 vector<scalarexp_t *> operands;
6693 if(se->get_operator_type() != SE_FUNC){
6694 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn_fm_aggr. line %d, character %d\n",
6695 se->get_lineno(), se->get_charno());
6696 return("ERROR in unpack_partial_fcn_fm_aggr");
6699 ret = "\tretval = " + se->get_op() + "( ",
6700 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6703 if(se->get_storage_state() != ""){
6704 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6707 operands = se->get_operands();
6708 for(o=0;o<operands.size();o++){
6710 if(operands[o]->get_data_type()->is_buffer_type() &&
6711 (! (operands[o]->is_handle_ref()) ) )
6713 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6721 static string unpack_partial_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6724 vector<scalarexp_t *> operands;
6726 if(se->get_operator_type() != SE_FUNC){
6727 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn. line %d, character %d\n",
6728 se->get_lineno(), se->get_charno());
6729 return("ERROR in unpack_partial_fcn");
6732 ret = "\tretval = " + se->get_op() + "( ",
6733 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6736 if(se->get_storage_state() != ""){
6737 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6740 operands = se->get_operands();
6741 for(o=0;o<operands.size();o++){
6743 if(operands[o]->get_data_type()->is_buffer_type() &&
6744 (! (operands[o]->is_handle_ref()) ) )
6746 ret += generate_se_code(operands[o], schema);
6753 static string generate_cached_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6756 vector<scalarexp_t *> operands;
6758 if(se->get_operator_type() != SE_FUNC){
6759 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to generate_cached_fcn. line %d, character %d\n",
6760 se->get_lineno(), se->get_charno());
6761 return("ERROR in generate_cached_fcn");
6764 ret = se->get_op()+"(";
6766 if(se->get_storage_state() != ""){
6767 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd,";
6770 operands = se->get_operands();
6771 for(o=0;o<operands.size();o++){
6773 if(operands[o]->get_data_type()->is_buffer_type() &&
6774 (! (operands[o]->is_handle_ref()) ) )
6776 ret += generate_se_code(operands[o], schema);
6787 static string generate_C_comparison_op(string op){
6788 if(op == "=") return("==");
6789 if(op == "<>") return("!=");
6793 static string generate_C_boolean_op(string op){
6794 if( (op == "AND") || (op == "And") || (op == "and") ){
6797 if( (op == "OR") || (op == "Or") || (op == "or") ){
6800 if( (op == "NOT") || (op == "Not") || (op == "not") ){
6804 return("ERROR UNKNOWN BOOLEAN OPERATOR");
6808 static string generate_predicate_code(predicate_t *pr,table_list *schema){
6810 vector<literal_t *> litv;
6812 data_type *ldt, *rdt;
6813 vector<scalarexp_t *> op_list;
6816 switch(pr->get_operator_type()){
6818 ldt = pr->get_left_se()->get_data_type();
6821 litv = pr->get_lit_vec();
6822 for(i=0;i<litv.size();i++){
6823 if(i>0) ret.append(" || ");
6826 if(ldt->complex_comparison(ldt) ){
6827 ret.append( ldt->get_hfta_equals_fcn(ldt) );
6829 if(ldt->is_buffer_type() )
6831 ret.append(generate_se_code(pr->get_left_se(), schema));
6833 if(ldt->is_buffer_type() )
6835 if(litv[i]->is_cpx_lit()){
6836 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6839 ret.append(litv[i]->to_C_code(""));
6841 ret.append(") == 0");
6843 ret.append(generate_se_code(pr->get_left_se(), schema));
6845 ret.append(litv[i]->to_hfta_C_code(""));
6854 ldt = pr->get_left_se()->get_data_type();
6855 rdt = pr->get_right_se()->get_data_type();
6858 if(ldt->complex_comparison(rdt) ){
6859 // TODO can use get_hfta_equals_fcn if op is "=" ?
6860 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6862 if(ldt->is_buffer_type() )
6864 ret.append(generate_se_code(pr->get_left_se(),schema) );
6866 if(rdt->is_buffer_type() )
6868 ret.append(generate_se_code(pr->get_right_se(),schema) );
6870 ret.append( generate_C_comparison_op(pr->get_op()));
6873 ret.append(generate_se_code(pr->get_left_se(),schema) );
6874 ret.append( generate_C_comparison_op(pr->get_op()));
6875 ret.append(generate_se_code(pr->get_right_se(),schema) );
6881 ret.append( generate_C_boolean_op(pr->get_op()) );
6882 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6885 case PRED_BINARY_OP:
6887 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6888 ret.append( generate_C_boolean_op(pr->get_op()) );
6889 ret.append(generate_predicate_code(pr->get_right_pr(),schema) );
6893 ret += pr->get_op() + "( ";
6894 op_list = pr->get_op_list();
6895 for(o=0;o<op_list.size();++o){
6896 if(o>0) ret += ", ";
6897 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6899 ret += generate_se_code(op_list[o], schema);
6904 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6905 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6906 return("ERROR in generate_predicate_code");
6910 static string generate_predicate_code_fm_aggr(predicate_t *pr, string gbvar, string aggvar,table_list *schema){
6912 vector<literal_t *> litv;
6914 data_type *ldt, *rdt;
6915 vector<scalarexp_t *> op_list;
6918 switch(pr->get_operator_type()){
6920 ldt = pr->get_left_se()->get_data_type();
6923 litv = pr->get_lit_vec();
6924 for(i=0;i<litv.size();i++){
6925 if(i>0) ret.append(" || ");
6928 if(ldt->complex_comparison(ldt) ){
6929 ret.append( ldt->get_hfta_equals_fcn(ldt) );
6931 if(ldt->is_buffer_type() )
6933 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6935 if(ldt->is_buffer_type() )
6937 if(litv[i]->is_cpx_lit()){
6938 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6941 ret.append(litv[i]->to_C_code(""));
6943 ret.append(") == 0");
6945 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6947 ret.append(litv[i]->to_hfta_C_code(""));
6956 ldt = pr->get_left_se()->get_data_type();
6957 rdt = pr->get_right_se()->get_data_type();
6960 if(ldt->complex_comparison(rdt) ){
6961 // TODO can use get_hfta_equals_fcn if op is "=" ?
6962 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6964 if(ldt->is_buffer_type() )
6966 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6968 if(rdt->is_buffer_type() )
6970 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6972 ret.append( generate_C_comparison_op(pr->get_op()));
6975 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6976 ret.append( generate_C_comparison_op(pr->get_op()));
6977 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6983 ret.append( generate_C_boolean_op(pr->get_op()) );
6984 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6987 case PRED_BINARY_OP:
6989 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6990 ret.append( generate_C_boolean_op(pr->get_op()) );
6991 ret.append(generate_predicate_code_fm_aggr(pr->get_right_pr(), gbvar, aggvar,schema) );
6995 ret += pr->get_op() + "( ";
6996 op_list = pr->get_op_list();
6997 for(o=0;o<op_list.size();++o){
6998 if(o>0) ret += ", ";
6999 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
7001 ret += generate_se_code_fm_aggr(op_list[o], gbvar, aggvar, schema);
7006 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
7007 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
7008 return("ERROR in generate_predicate_code");
7016 static string generate_equality_test(string &lhs_op, string &rhs_op, data_type *dt){
7019 if(dt->complex_comparison(dt) ){
7020 ret.append(dt->get_hfta_equals_fcn(dt));
7022 if(dt->is_buffer_type() )
7026 if(dt->is_buffer_type() )
7028 ret.append(rhs_op );
7029 ret.append(") == 0");
7031 ret.append(lhs_op );
7033 ret.append(rhs_op );
7039 static string generate_lt_test(string &lhs_op, string &rhs_op, data_type *dt){
7042 if(dt->complex_comparison(dt) ){
7043 ret.append(dt->get_hfta_comparison_fcn(dt));
7045 if(dt->is_buffer_type() )
7049 if(dt->is_buffer_type() )
7051 ret.append(rhs_op );
7052 ret.append(") == 1");
7054 ret.append(lhs_op );
7056 ret.append(rhs_op );
7062 //static string generate_comparison(string &lhs_op, string &rhs_op, data_type *dt){
7065 // if(dt->complex_comparison(dt) ){
7066 // ret.append(dt->get_hfta_equals_fcn(dt));
7068 // if(dt->is_buffer_type() )
7070 // ret.append(lhs_op);
7071 // ret.append(", ");
7072 // if(dt->is_buffer_type() )
7074 // ret.append(rhs_op );
7075 // ret.append(") == 0");
7077 // ret.append(lhs_op );
7078 // ret.append(" == ");
7079 // ret.append(rhs_op );
7086 // Here I assume that only MIN and MAX aggregates can be computed
7087 // over BUFFER data types.
7089 static string generate_aggr_update(string var, aggregate_table *atbl,int aidx, table_list *schema){
7090 string retval = "\t\t";
7091 string op = atbl->get_op(aidx);
7094 if(! atbl->is_builtin(aidx)) {
7096 retval += op+"_HFTA_AGGR_UPDATE_(";
7097 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7098 retval+="("+var+")";
7099 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7100 for(o=0;o<opl.size();++o){{
7102 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7104 retval += generate_se_code(opl[o], schema);
7113 // builtin processing
7114 data_type *dt = atbl->get_data_type(aidx);
7118 retval.append("++;\n");
7123 retval.append(" += ");
7124 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7125 retval.append(";\n");
7129 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7130 retval += dt->make_host_cvar(tmpstr);
7132 retval += generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7133 if(dt->complex_comparison(dt)){
7134 if(dt->is_buffer_type())
7135 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7137 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7139 sprintf(tmpstr,"\t\tif(aggr_tmp_%d < %s)\n",aidx,var.c_str());
7141 retval.append(tmpstr);
7142 if(dt->is_buffer_type()){
7143 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7145 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7147 retval.append(tmpstr);
7152 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7153 retval+=dt->make_host_cvar(tmpstr);
7155 retval+=generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7156 if(dt->complex_comparison(dt)){
7157 if(dt->is_buffer_type())
7158 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7160 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7162 sprintf(tmpstr,"\t\tif(aggr_tmp_%d > %s)\n",aidx,var.c_str());
7164 retval.append(tmpstr);
7165 if(dt->is_buffer_type()){
7166 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7168 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7170 retval.append(tmpstr);
7175 if(op == "AND_AGGR"){
7177 retval.append(" &= ");
7178 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7179 retval.append(";\n");
7182 if(op == "OR_AGGR"){
7184 retval.append(" |= ");
7185 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7186 retval.append(";\n");
7189 if(op == "XOR_AGGR"){
7191 retval.append(" ^= ");
7192 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7193 retval.append(";\n");
7197 retval += var+"_sum += "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7198 retval += "\t\t"+var+"_cnt += 1;\n";
7199 retval += "\t\t"+var+" = "+var+"_sum / "+var+"_cnt;\n";
7203 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_update.\n",op.c_str());
7212 static string generate_superaggr_minus(string var, string supervar, aggregate_table *atbl,int aidx, table_list *schema){
7213 string retval = "\t\t";
7214 string op = atbl->get_op(aidx);
7217 if(! atbl->is_builtin(aidx)) {
7219 retval += op+"_HFTA_AGGR_MINUS_(";
7220 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7221 retval+="("+supervar+"),";
7222 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7223 retval+="("+var+");\n";
7229 if(op == "COUNT" || op == "SUM"){
7230 retval += supervar + "-=" +var + ";\n";
7234 if(op == "XOR_AGGR"){
7235 retval += supervar + "^=" +var + ";\n";
7239 if(op=="MIN" || op == "MAX")
7242 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_superaggr_minus.\n",op.c_str());
7251 static string generate_aggr_init(string var, aggregate_table *atbl,int aidx, table_list *schema){
7253 string op = atbl->get_op(aidx);
7256 if(! atbl->is_builtin(aidx)){
7258 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_INIT_(";
7259 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7260 retval+="("+var+"));\n";
7262 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_UPDATE_(";
7263 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7264 retval+="("+var+")";
7265 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7267 for(o=0;o<opl.size();++o){
7269 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7271 retval += generate_se_code(opl[o],schema);
7277 // builtin aggregate processing
7278 data_type *dt = atbl->get_data_type(aidx);
7282 retval.append(" = 1;\n");
7286 if(op == "SUM" || op == "MIN" || op == "MAX" || op == "AND_AGGR" ||
7287 op=="AVG" || op == "OR_AGGR" || op == "XOR_AGGR"){
7288 if(dt->is_buffer_type()){
7289 sprintf(tmpstr,"\t\taggr_tmp_%d = %s;\n",aidx,generate_se_code(atbl->get_aggr_se(aidx), schema ).c_str() );
7290 retval.append(tmpstr);
7291 sprintf(tmpstr,"\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_assign_copy().c_str(),var.c_str(),aidx);
7292 retval.append(tmpstr);
7295 retval += var+"_sum = "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7296 retval += "\t"+var+"_cnt = 1;\n";
7297 retval += "\t"+var+" = "+var+"_sum;\n";
7301 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema));
7302 retval.append(";\n");
7308 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_init.\n",op.c_str());
7316 static string generate_aggr_reinitialize(string var, aggregate_table *atbl,int aidx, table_list *schema){
7318 string op = atbl->get_op(aidx);
7321 if(! atbl->is_builtin(aidx)){
7323 retval += "\t"+atbl->get_op(aidx);
7324 if(atbl->is_running_aggr(aidx)){
7325 retval += "_HFTA_AGGR_REINIT_(";
7327 retval += "_HFTA_AGGR_INIT_(";
7329 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7330 retval+="("+var+"));\n";
7334 // builtin aggregate processing
7335 data_type *dt = atbl->get_data_type(aidx);
7339 retval.append(" = 0;\n");
7343 if(op == "SUM" || op == "AND_AGGR" ||
7344 op == "OR_AGGR" || op == "XOR_AGGR"){
7345 if(dt->is_buffer_type()){
7346 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7350 literal_t l(dt->type_indicator());
7351 retval.append(l.to_string());
7352 retval.append(";\n");
7358 if(dt->is_buffer_type()){
7359 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7363 retval.append(dt->get_max_literal());
7364 retval.append(";\n");
7370 if(dt->is_buffer_type()){
7371 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7375 retval.append(dt->get_min_literal());
7376 retval.append(";\n");
7381 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_aggr_reinitialize.\n",op.c_str());
7388 // Generate parameter holding vars from a param table.
7389 static string generate_param_vars(param_table *param_tbl){
7392 vector<string> param_vec = param_tbl->get_param_names();
7393 for(p=0;p<param_vec.size();p++){
7394 data_type *dt = param_tbl->get_data_type(param_vec[p]);
7395 sprintf(tmpstr,"param_%s;\n", param_vec[p].c_str());
7396 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7397 if(param_tbl->handle_access(param_vec[p])){
7398 ret += "\tstruct search_handle *param_handle_"+param_vec[p]+";\n";
7404 // Parameter manipulation routines
7405 static string generate_load_param_block(string functor_name,
7406 param_table *param_tbl,
7407 vector<handle_param_tbl_entry *> param_handle_table
7410 vector<string> param_names = param_tbl->get_param_names();
7412 string ret = "int load_params_"+functor_name+"(gs_int32_t sz, void *value){\n";
7413 ret.append("\tint pos=0;\n");
7414 ret.append("\tint data_pos;\n");
7416 for(p=0;p<param_names.size();p++){
7417 data_type *dt = param_tbl->get_data_type(param_names[p]);
7418 if(dt->is_buffer_type()){
7419 sprintf(tmpstr,"tmp_var_%s;\n", param_names[p].c_str());
7420 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7425 // Verify that the block is of minimum size
7426 if(param_names.size() > 0){
7427 ret += "//\tVerify that the value block is large enough */\n";
7428 ret.append("\n\tdata_pos = ");
7429 for(p=0;p<param_names.size();p++){
7430 if(p>0) ret.append(" + ");
7431 data_type *dt = param_tbl->get_data_type(param_names[p]);
7432 ret.append("sizeof( ");
7433 ret.append( dt->get_host_cvar_type() );
7437 ret.append("\tif(data_pos > sz) return 1;\n\n");
7440 ///////////////////////
7441 /// Verify that all strings can be unpacked.
7443 ret += "//\tVerify that the strings can be unpacked */\n";
7444 for(p=0;p<param_names.size();p++){
7445 data_type *dt = param_tbl->get_data_type(param_names[p]);
7446 if(dt->is_buffer_type()){
7447 sprintf(tmpstr,"\ttmp_var_%s = *( (%s *)((gs_sp_t )value+pos) );\n",param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7449 switch( dt->get_type() ){
7451 // ret += "\ttmp_var_"+param_names[p]+".offset = ntohl( tmp_var_"+param_names[p]+".offset );\n"; // ntoh conversion
7452 // ret += "\ttmp_var_"+param_names[p]+".length = ntohl( tmp_var_"+param_names[p]+".length );\n"; // ntoh conversion
7453 sprintf(tmpstr,"\tif( (int)(tmp_var_%s.offset) + tmp_var_%s.length > sz) return 1;\n",param_names[p].c_str(), param_names[p].c_str() );
7455 sprintf(tmpstr,"\ttmp_var_%s.offset = (gs_p_t)( (gs_sp_t )value + (gs_p_t)(tmp_var_%s.offset) );\n",param_names[p].c_str(), param_names[p].c_str() );
7459 fprintf(stderr,"ERROR: parameter %s is of type %s, a buffered type, but I don't know how to unpack it as a parameter.\n",param_names[p].c_str(), dt->to_string().c_str() );
7464 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7468 /////////////////////////
7470 ret += "/*\tThe block is OK, do the unpacking. */\n";
7471 ret += "\tpos = 0;\n";
7473 for(p=0;p<param_names.size();p++){
7474 data_type *dt = param_tbl->get_data_type(param_names[p]);
7475 if(dt->is_buffer_type()){
7476 sprintf(tmpstr,"\t%s(¶m_%s, &tmp_var_%s);\n", dt->get_hfta_buffer_assign_copy().c_str(),param_names[p].c_str(),param_names[p].c_str() );
7479 // if(dt->needs_hn_translation()){
7480 // sprintf(tmpstr,"\tparam_%s = %s( *( (%s *)( (gs_sp_t )value+pos) ) );\n",
7481 // param_names[p].c_str(), dt->ntoh_translation().c_str(), dt->get_host_cvar_type().c_str() );
7483 sprintf(tmpstr,"\tparam_%s = *( (%s *)( (gs_sp_t )value+pos) );\n",
7484 param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7488 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7491 // TODO: I think this method of handle registration is obsolete
7492 // and should be deleted.
7493 // some examination reveals that handle_access is always false.
7494 for(p=0;p<param_names.size();p++){
7495 if(param_tbl->handle_access(param_names[p]) ){
7496 data_type *pdt = param_tbl->get_data_type(param_names[p]);
7498 ret += "\tt->param_handle_"+param_names[p]+" = " +
7499 pdt->handle_registration_name() +
7500 "((struct FTA *)t, &(t->param_"+param_names[p]+"));\n";
7503 // Register the pass-by-handle parameters
7505 ret += "/* register the pass-by-handle parameters */\n";
7508 for(ph=0;ph<param_handle_table.size();++ph){
7509 data_type pdt(param_handle_table[ph]->type_name);
7510 switch(param_handle_table[ph]->val_type){
7516 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7518 if(pdt.is_buffer_type()) ret += "&(";
7519 ret += "param_"+param_handle_table[ph]->param_name;
7520 if(pdt.is_buffer_type()) ret += ")";
7524 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7530 ret += "\treturn(0);\n";
7531 ret.append("}\n\n");
7537 static string generate_delete_param_block(string functor_name,
7538 param_table *param_tbl,
7539 vector<handle_param_tbl_entry *> param_handle_table
7543 vector<string> param_names = param_tbl->get_param_names();
7545 string ret = "void destroy_params_"+functor_name+"(){\n";
7547 for(p=0;p<param_names.size();p++){
7548 data_type *dt = param_tbl->get_data_type(param_names[p]);
7549 if(dt->is_buffer_type()){
7550 sprintf(tmpstr,"\t\t%s(¶m_%s);\n",dt->get_hfta_buffer_destroy().c_str(),param_names[p].c_str());
7553 if(param_tbl->handle_access(param_names[p]) ){
7554 ret += "\t\t" + dt->get_handle_destructor() +
7555 "(t->param_handle_" + param_names[p] + ");\n";
7559 ret += "//\t\tDeregister handles.\n";
7561 for(ph=0;ph<param_handle_table.size();++ph){
7562 if(param_handle_table[ph]->val_type == param_e){
7563 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7564 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7573 // ---------------------------------------------------------------------
7574 // functions for creating functor variables.
7576 static string generate_access_vars(col_id_set &cid_set, table_list *schema){
7578 col_id_set::iterator csi;
7580 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7581 int schref = (*csi).schema_ref;
7582 int tblref = (*csi).tblvar_ref;
7583 string field = (*csi).field;
7584 data_type dt(schema->get_type_name(schref,field));
7585 sprintf(tmpstr,"unpack_var_%s_%d", field.c_str(), tblref);
7586 ret+="\t"+dt.make_host_cvar(tmpstr)+";\n";
7587 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", field.c_str(), tblref);
7593 static string generate_partial_fcn_vars(vector<scalarexp_t *> &partial_fcns,
7594 vector<int> &ref_cnt, vector<bool> &is_partial, bool gen_fcn_cache){
7599 for(p=0;p<partial_fcns.size();++p){
7600 if(!gen_fcn_cache || is_partial[p] || ref_cnt[p]>1){
7601 sprintf(tmpstr,"partial_fcn_result_%d", p);
7602 ret+="\t"+partial_fcns[p]->get_data_type()->make_host_cvar(tmpstr)+";\n";
7603 if(gen_fcn_cache && ref_cnt[p]>1){
7604 ret+="\tint fcn_ref_cnt_"+int_to_string(p)+";\n";
7612 static string generate_complex_lit_vars(cplx_lit_table *complex_literals){
7615 for(cl=0;cl<complex_literals->size();cl++){
7616 literal_t *l = complex_literals->get_literal(cl);
7617 data_type *dtl = new data_type( l->get_type() );
7618 sprintf(tmpstr,"complex_literal_%d",cl);
7619 ret += "\t"+dtl->make_host_cvar(tmpstr)+";\n";
7620 if(complex_literals->is_handle_ref(cl)){
7621 sprintf(tmpstr,"\tstruct search_handle *lit_handle_%d;\n",cl);
7629 static string generate_pass_by_handle_vars(
7630 vector<handle_param_tbl_entry *> ¶m_handle_table){
7634 for(p=0;p<param_handle_table.size();++p){
7635 sprintf(tmpstr,"\tgs_param_handle_t handle_param_%d;\n",p);
7643 // ------------------------------------------------------------
7644 // functions for generating initialization code.
7646 static string gen_access_var_init(col_id_set &cid_set){
7648 col_id_set::iterator csi;
7650 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7651 int tblref = (*csi).tblvar_ref;
7652 string field = (*csi).field;
7653 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle%d, \"%s\");\n", field.c_str(),tblref,tblref,field.c_str());
7660 static string gen_complex_lit_init(cplx_lit_table *complex_literals){
7664 for(cl=0;cl<complex_literals->size();cl++){
7665 literal_t *l = complex_literals->get_literal(cl);
7666 // sprintf(tmpstr,"\tcomplex_literal_%d = ",cl);
7667 // ret += tmpstr + l->to_hfta_C_code() + ";\n";
7668 sprintf(tmpstr,"&(complex_literal_%d)",cl);
7669 ret += "\t" + l->to_hfta_C_code(tmpstr) + ";\n";
7670 // I think that the code below is obsolete
7671 // TODO: it is obsolete. add_cpx_lit is always
7672 // called with the handle indicator being false.
7673 // This entire structure should be cleansed.
7674 if(complex_literals->is_handle_ref(cl)){
7675 data_type *dt = new data_type( l->get_type() );
7676 sprintf(tmpstr,"\tlit_handle_%d = %s(&(f->complex_literal_%d));\n",
7677 cl, dt->hfta_handle_registration_name().c_str(), cl);
7686 static string gen_partial_fcn_init(vector<scalarexp_t *> &partial_fcns){
7690 for(p=0;p<partial_fcns.size();++p){
7691 data_type *pdt =partial_fcns[p]->get_data_type();
7692 literal_t empty_lit(pdt->type_indicator());
7693 if(pdt->is_buffer_type()){
7694 // sprintf(tmpstr,"\tpartial_fcn_result_%d = %s;\n",
7695 // p, empty_lit.to_hfta_C_code().c_str());
7696 sprintf(tmpstr,"&(partial_fcn_result_%d)",p);
7697 ret += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
7703 static string gen_pass_by_handle_init(
7704 vector<handle_param_tbl_entry *> ¶m_handle_table){
7708 for(ph=0;ph<param_handle_table.size();++ph){
7709 data_type pdt(param_handle_table[ph]->type_name);
7710 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7711 switch(param_handle_table[ph]->val_type){
7714 if(pdt.is_buffer_type()) ret += "&(";
7715 sprintf(tmpstr,"complex_literal_%d",param_handle_table[ph]->complex_literal_idx);
7717 if(pdt.is_buffer_type()) ret += ")";
7722 ret += param_handle_table[ph]->litval->to_hfta_C_code("") + ");\n";
7726 // query parameter handles are regstered/deregistered in the
7727 // load_params function.
7728 // ret += "t->param_"+param_handle_table[ph]->param_name;
7731 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7738 //------------------------------------------------------------
7739 // functions for destructor and deregistration code
7741 static string gen_complex_lit_dtr(cplx_lit_table *complex_literals){
7745 for(cl=0;cl<complex_literals->size();cl++){
7746 literal_t *l = complex_literals->get_literal(cl);
7747 data_type ldt( l->get_type() );
7748 if(ldt.is_buffer_type()){
7749 sprintf(tmpstr,"\t\t%s(&complex_literal_%d);\n",
7750 ldt.get_hfta_buffer_destroy().c_str(), cl );
7758 static string gen_pass_by_handle_dtr(
7759 vector<handle_param_tbl_entry *> ¶m_handle_table){
7763 for(ph=0;ph<param_handle_table.size();++ph){
7764 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7765 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7771 // Destroy all previous results
7772 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns){
7776 for(p=0;p<partial_fcns.size();++p){
7777 data_type *pdt =partial_fcns[p]->get_data_type();
7778 if(pdt->is_buffer_type()){
7779 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7780 pdt->get_hfta_buffer_destroy().c_str(), p );
7787 // Destroy previsou results of fcns in pfcn_set
7788 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns, set<int> &pfcn_set){
7790 set<int>::iterator si;
7792 for(si=pfcn_set.begin(); si!=pfcn_set.end(); ++si){
7793 data_type *pdt =partial_fcns[(*si)]->get_data_type();
7794 if(pdt->is_buffer_type()){
7795 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7796 pdt->get_hfta_buffer_destroy().c_str(), (*si) );
7804 //-------------------------------------------------------------------------
7805 // Functions related to se generation bookkeeping.
7807 static void get_new_pred_cids(predicate_t *pr, col_id_set &found_cids,
7808 col_id_set &new_cids, gb_table *gtbl){
7809 col_id_set this_pred_cids;
7810 col_id_set::iterator csi;
7812 // get colrefs in predicate not already found.
7813 gather_pr_col_ids(pr,this_pred_cids,gtbl);
7814 set_difference(this_pred_cids.begin(), this_pred_cids.end(),
7815 found_cids.begin(), found_cids.end(),
7816 inserter(new_cids,new_cids.begin()) );
7818 // We've found these cids, so update found_cids
7819 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7820 found_cids.insert((*csi));
7824 // after the call, new_cids will have the colrefs in se but not found_cids.
7825 // update found_cids with the new cids.
7826 static void get_new_se_cids(scalarexp_t *se, col_id_set &found_cids,
7827 col_id_set &new_cids, gb_table *gtbl){
7828 col_id_set this_se_cids;
7829 col_id_set::iterator csi;
7831 // get colrefs in se not already found.
7832 gather_se_col_ids(se,this_se_cids,gtbl);
7833 set_difference(this_se_cids.begin(), this_se_cids.end(),
7834 found_cids.begin(), found_cids.end(),
7835 inserter(new_cids,new_cids.begin()) );
7837 // We've found these cids, so update found_cids
7838 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7839 found_cids.insert((*csi));
7843 static string gen_unpack_cids(table_list *schema, col_id_set &new_cids, string on_problem, vector<bool> &needs_xform){
7845 col_id_set::iterator csi;
7847 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7848 int schref = (*csi).schema_ref;
7849 int tblref = (*csi).tblvar_ref;
7850 string field = (*csi).field;
7851 data_type dt(schema->get_type_name(schref,field));
7853 if(needs_xform[tblref]){
7854 unpack_fcn = dt.get_hfta_unpack_fcn();
7856 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7858 if(dt.is_buffer_type()){
7859 sprintf(tmpstr,"\tunpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem); // unpack_cid\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7861 sprintf(tmpstr,"\tunpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d); // unpack_cid\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7864 if(dt.is_buffer_type()){
7865 ret += "\tif(problem) return "+on_problem+" ;\n";
7871 // generates the declaration of all the variables related to
7872 // temp tuples generation
7873 static string gen_decl_temp_vars(){
7876 ret += "\t// variables related to temp tuple generation\n";
7877 ret += "\tbool temp_tuple_received;\n";
7882 // generates initialization code for variables related to temp tuple processing
7883 static string gen_init_temp_vars(table_list *schema, vector<select_element *>& select_list, gb_table *gtbl){
7885 col_id_set::iterator csi;
7888 // Initialize internal state
7889 ret += "\ttemp_tuple_received = false;\n";
7891 col_id_set temp_cids; // colrefs unpacked thus far.
7893 for(s=0;s<select_list.size();s++){
7894 if (select_list[s]->se->get_data_type()->is_temporal()) {
7895 // Find the set of attributes accessed in this SE
7896 col_id_set new_cids;
7897 get_new_se_cids(select_list[s]->se,temp_cids, new_cids, gtbl);
7900 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7901 int schref = (*csi).schema_ref;
7902 int tblref = (*csi).tblvar_ref;
7903 string field = (*csi).field;
7904 data_type dt(schema->get_type_name(schref,field), schema->get_modifier_list(schref,field));
7906 sprintf(tmpstr,"\t unpack_var_%s_%d = %s;\n", field.c_str(), tblref,
7907 dt.is_increasing() ? dt.get_min_literal().c_str() : dt.get_max_literal().c_str());
7917 // generates a check if tuple is temporal
7918 static string gen_temp_tuple_check(string node_name, int channel) {
7922 sprintf(tmpstr, "tup%d", channel);
7923 string tup_name = tmpstr;
7924 sprintf(tmpstr, "schema_handle%d", channel);
7925 string schema_handle_name = tmpstr;
7926 string tuple_offset_name = "tuple_metadata_offset"+int_to_string(channel);
7928 // check if it is a temporary status tuple
7929 ret += "\t// check if tuple is temp status tuple\n";
7930 // ret += "\tif (ftaschema_is_temporal_tuple(" + schema_handle_name + ", " + tup_name + ".data)) {\n";
7931 ret += "\tif (ftaschema_is_temporal_tuple_offset(" + tuple_offset_name + ", " + tup_name + ".data)) {\n";
7932 ret += "\t\ttemp_tuple_received = true;\n";
7934 ret += "\telse\n\t\ttemp_tuple_received = false;\n\n";
7939 // generates unpacking code for all temporal attributes referenced in select
7940 static string gen_unpack_temp_vars(table_list *schema, col_id_set& found_cids, vector<select_element *>& select_list, gb_table *gtbl, vector<bool> &needs_xform) {
7944 // Unpack all the temporal attributes references in select list
7945 // we need it to be able to generate temp status tuples
7946 for(s=0;s<select_list.size();s++){
7947 if (select_list[s]->se->get_data_type()->is_temporal()) {
7948 // Find the set of attributes accessed in this SE
7949 col_id_set new_cids;
7950 get_new_se_cids(select_list[s]->se,found_cids, new_cids, gtbl);
7951 // Unpack these values.
7952 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
7960 // Generates temporal tuple generation code (except attribute packing)
7961 static string gen_init_temp_status_tuple(string node_name) {
7964 ret += "\t// create temp status tuple\n";
7965 ret += "\tresult.tuple_size = sizeof("+generate_tuple_name( node_name)+") + sizeof(gs_uint8_t);\n";
7966 ret += "\tresult.data = (gs_sp_t )malloc(result.tuple_size);\n";
7967 ret += "\tresult.heap_resident = true;\n";
7968 ret += "\t// Mark tuple as temporal\n";
7969 ret += "\t*((gs_sp_t )result.data + sizeof("+generate_tuple_name( node_name)+")) = TEMPORAL_TUPLE;\n";
7971 ret += "\t"+generate_tuple_name( node_name)+" *tuple = ("+
7972 generate_tuple_name( node_name) +" *)(result.data);\n";
7978 // Assume that all colrefs unpacked already ...
7979 static string gen_unpack_partial_fcn(table_list *schema,
7980 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7983 set<int>::iterator si;
7985 // Since set<..> is a "Sorted Associative Container",
7986 // we can walk through it in sorted order by walking from
7987 // begin() to end(). (and the partial fcns must be
7988 // evaluated in this order).
7989 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7990 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7991 ret += "\tif(retval) return "+on_problem+" ;\n";
7996 // Assume that all colrefs unpacked already ...
7997 // this time with cached functions.
7998 static string gen_unpack_partial_fcn(table_list *schema,
7999 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8000 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8003 set<int>::iterator si;
8005 // Since set<..> is a "Sorted Associative Container",
8006 // we can walk through it in sorted order by walking from
8007 // begin() to end(). (and the partial fcns must be
8008 // evaluated in this order).
8009 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
8010 if(fcn_ref_cnt[(*si)] > 1){
8011 ret += "\tif(fcn_ref_cnt_"+int_to_string((*si))+"==0){\n";
8013 if(is_partial_fcn[(*si)]){
8014 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
8015 ret += "\tif(retval) return "+on_problem+" ;\n";
8017 if(fcn_ref_cnt[(*si)] > 1){
8018 if(!is_partial_fcn[(*si)]){
8019 ret += "\t\tpartial_fcn_result_"+int_to_string((*si))+"="+generate_cached_fcn(partial_fcns[(*si)],(*si),schema)+";\n";
8021 ret += "\t\tfcn_ref_cnt_"+int_to_string((*si))+"=1;\n";
8030 // This version finds and unpacks new colrefs.
8031 // found_cids gets updated with the newly unpacked cids.
8032 static string gen_full_unpack_partial_fcn(table_list *schema,
8033 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8034 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8035 vector<bool> &needs_xform){
8037 set<int>::iterator slsi;
8039 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8040 // find all new fields ref'd by this partial fcn.
8041 col_id_set new_cids;
8042 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8043 // Unpack these values.
8044 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8046 // Now evaluate the partial fcn.
8047 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8048 ret += "\tif(retval) return "+on_problem+" ;\n";
8053 // This version finds and unpacks new colrefs.
8054 // found_cids gets updated with the newly unpacked cids.
8055 // BUT : only for the partial functions.
8056 static string gen_full_unpack_partial_fcn(table_list *schema,
8057 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8058 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8059 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8060 vector<bool> &needs_xform){
8062 set<int>::iterator slsi;
8064 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8065 if(is_partial_fcn[(*slsi)]){
8066 // find all new fields ref'd by this partial fcn.
8067 col_id_set new_cids;
8068 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8069 // Unpack these values.
8070 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8072 // Now evaluate the partial fcn.
8073 if(fcn_ref_cnt[(*slsi)] > 1){
8074 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8076 if(is_partial_fcn[(*slsi)]){
8077 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8078 ret += "\tif(retval) return "+on_problem+" ;\n";
8080 if(fcn_ref_cnt[(*slsi)] > 1){
8081 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8090 static string gen_remaining_cached_fcns(table_list *schema,
8091 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8092 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn){
8094 set<int>::iterator slsi;
8096 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8097 if(!is_partial_fcn[(*slsi)] && fcn_ref_cnt[(*slsi)] > 1){
8099 if(fcn_ref_cnt[(*slsi)] > 1){
8100 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8101 ret += "\t\tpartial_fcn_result_"+int_to_string((*slsi))+"="+generate_cached_fcn(partial_fcns[(*slsi)],(*slsi),schema)+";\n";
8102 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8111 // unpack the colrefs in cid_set not in found_cids
8112 static string gen_remaining_colrefs(table_list *schema,
8113 col_id_set &cid_set, col_id_set &found_cids, string on_problem,
8114 vector<bool> &needs_xform){
8116 col_id_set::iterator csi;
8118 for(csi=cid_set.begin(); csi!=cid_set.end();csi++){
8119 if(found_cids.count( (*csi) ) == 0){
8120 int schref = (*csi).schema_ref;
8121 int tblref = (*csi).tblvar_ref;
8122 string field = (*csi).field;
8123 data_type dt(schema->get_type_name(schref,field));
8125 if(needs_xform[tblref]){
8126 unpack_fcn = dt.get_hfta_unpack_fcn();
8128 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
8130 if(dt.is_buffer_type()){
8131 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
8133 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
8136 if(dt.is_buffer_type()){
8137 ret.append("\tif(problem) return "+on_problem+" ;\n");
8144 static string gen_buffer_selvars(table_list *schema,
8145 vector<select_element *> &select_list){
8149 for(s=0;s<select_list.size();s++){
8150 scalarexp_t *se = select_list[s]->se;
8151 data_type *sdt = se->get_data_type();
8152 if(sdt->is_buffer_type() &&
8153 !( (se->get_operator_type() == SE_COLREF) ||
8154 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8155 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8157 sprintf(tmpstr,"selvar_%d",s);
8158 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
8159 ret += generate_se_code(se,schema) +";\n";
8165 static string gen_buffer_selvars_size(vector<select_element *> &select_list,table_list *schema){
8169 for(s=0;s<select_list.size();s++){
8170 scalarexp_t *se = select_list[s]->se;
8171 data_type *sdt = se->get_data_type();
8172 if(sdt->is_buffer_type()){
8173 if( !( (se->get_operator_type() == SE_COLREF) ||
8174 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8175 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8177 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
8180 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),
8181 generate_se_code(se,schema).c_str());
8189 static string gen_buffer_selvars_dtr(vector<select_element *> &select_list){
8193 for(s=0;s<select_list.size();s++){
8194 scalarexp_t *se = select_list[s]->se;
8195 data_type *sdt = se->get_data_type();
8196 if(sdt->is_buffer_type() &&
8197 !( (se->get_operator_type() == SE_COLREF) ||
8198 (se->get_operator_type() == SE_AGGR_STAR) ||
8199 (se->get_operator_type() == SE_AGGR_SE) ||
8200 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8201 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8203 sprintf(tmpstr,"\t\t%s(&selvar_%d);\n",
8204 sdt->get_hfta_buffer_destroy().c_str(), s );
8212 static string gen_pack_tuple(table_list *schema, vector<select_element *> &select_list, string node_name, bool temporal_only){
8216 ret += "\tint tuple_pos = sizeof("+generate_tuple_name(node_name)+") + sizeof(gs_uint8_t);\n";
8217 for(s=0;s<select_list.size();s++){
8218 scalarexp_t *se = select_list[s]->se;
8219 data_type *sdt = se->get_data_type();
8221 if(!temporal_only && sdt->is_buffer_type()){
8222 if( !( (se->get_operator_type() == SE_COLREF) ||
8223 (se->get_operator_type() == SE_FUNC && se->is_partial()))
8225 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
8227 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
8230 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code(se,schema).c_str());
8232 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code(se,schema).c_str());
8235 }else if (!temporal_only || sdt->is_temporal()) {
8236 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
8238 ret.append(generate_se_code(se,schema) );
8246 //-------------------------------------------------------------------------
8247 // functor generation methods
8248 //-------------------------------------------------------------------------
8250 /////////////////////////////////////////////////////////
8251 //// File Output Operator
8252 string output_file_qpn::generate_functor_name(){
8253 return("output_file_functor_" + normalize_name(get_node_name()));
8257 string output_file_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8258 string ret = "class " + this->generate_functor_name() + "{\n";
8260 // Find the temporal field
8261 int temporal_field_idx;
8262 data_type *tdt = NULL;
8263 for(temporal_field_idx=0;temporal_field_idx<fields.size();temporal_field_idx++){
8264 tdt = new data_type(fields[temporal_field_idx]->get_type(), fields[temporal_field_idx]->get_modifier_list());
8265 if(tdt->is_temporal()){
8272 if(temporal_field_idx == fields.size()){
8273 fprintf(stderr,"ERROR, no temporal field for file output operator %s\n",node_name.c_str());
8277 ret += "private:\n";
8279 // var to save the schema handle
8280 ret += "\tint schema_handle0;\n";
8281 // tuple metadata offset
8282 ret += "\tint tuple_metadata_offset0;\n";
8283 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[temporal_field_idx]->get_name().c_str());
8286 // For unpacking the hashing fields, if any
8288 for(h=0;h<hash_flds.size();++h){
8289 sprintf(tmpstr,"unpack_var_%s", fields[hash_flds[h]]->get_name().c_str());
8290 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8291 ret+="\t"+hdt->make_host_cvar(tmpstr)+";\n";
8292 if(hash_flds[h]!=temporal_field_idx){
8293 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[hash_flds[h]]->get_name().c_str());
8297 // Specail case for output file hashing
8298 if(n_streams>1 && hash_flds.size()==0){
8299 ret+="\tgs_uint32_t outfl_cnt;\n";
8302 ret += "//\t\tRemember the last posted timestamp.\n";
8303 ret+="\t"+tdt->make_host_cvar("timestamp")+";\n";
8304 ret+="\t"+tdt->make_host_cvar("last_bucket")+";\n";
8305 ret+="\t"+tdt->make_host_cvar("slack")+";\n";
8306 ret += "\tbool first_execution;\n";
8307 ret += "\tbool temp_tuple_received;\n";
8308 ret += "\tbool is_eof;\n";
8310 ret += "\tgs_int32_t bucketwidth;\n";
8313 //-------------------
8314 // The functor constructor
8315 // pass in a schema handle (e.g. for the 1st input stream),
8316 // use it to determine how to unpack the merge variable.
8317 // ASSUME that both streams have the same layout,
8318 // just duplicate it.
8321 ret += "//\t\tFunctor constructor.\n";
8322 ret += this->generate_functor_name()+"(int schema_hndl){\n";
8324 ret += "\tschema_handle0 = schema_hndl;\n";
8325 // tuple metadata offset
8326 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8328 if(output_spec->bucketwidth == 0)
8329 ret += "\tbucketwidth = 60;\n";
8331 ret += "\tbucketwidth = "+int_to_string(output_spec->bucketwidth)+";\n";
8332 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8334 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[temporal_field_idx]->get_name().c_str(), fields[temporal_field_idx]->get_name().c_str());
8336 // Hashing field unpacking, if any
8337 for(h=0;h<hash_flds.size();++h){
8338 if(hash_flds[h]!=temporal_field_idx){
8339 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[hash_flds[h]]->get_name().c_str(),fields[hash_flds[h]]->get_name().c_str());
8344 ret+="\tfirst_execution = true;\n";
8346 // Initialize internal state
8347 ret += "\ttemp_tuple_received = false;\n";
8349 // Init last timestamp values to minimum value for their type
8350 if (tdt->is_increasing()){
8351 ret+="\ttimestamp = " + tdt->get_min_literal() + ";\n";
8352 ret+="\tlast_bucket = " + tdt->get_min_literal() + ";\n";
8354 ret+="\ttimestamp = " + tdt->get_max_literal() + ";\n";
8355 ret+="\tlast_bucket = " + tdt->get_max_literal() + ";\n";
8361 ret += "//\t\tFunctor destructor.\n";
8362 ret += "~"+this->generate_functor_name()+"(){\n";
8366 ret += "int load_params_"+this->generate_functor_name()+"(gs_int32_t sz, void *value){return 0;}\n";
8367 ret += "void destroy_params_"+this->generate_functor_name()+"(){}\n";
8369 // Register new parameter block
8370 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8371 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8372 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8376 ret+="\nbool temp_status_received(const host_tuple& tup0)/* const*/ {\n";
8377 ret+="\tgs_int32_t problem;\n";
8379 ret += "\tvoid *tup_ptr = (void *)(&tup0);\n";
8380 ret += "\tis_eof = ftaschema_is_eof_tuple(schema_handle0,tup_ptr);\n";
8382 ret += gen_temp_tuple_check(this->node_name, 0);
8384 sprintf(tmpstr,"\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", tdt->get_hfta_unpack_fcn_noxf().c_str(), fields[temporal_field_idx]->get_name().c_str(), 0);
8387 for(h=0;h<hash_flds.size();++h){
8388 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8389 sprintf(tmpstr,"\tunpack_var_%s = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", fields[hash_flds[h]]->get_name().c_str(), hdt->get_hfta_unpack_fcn_noxf().c_str(), fields[hash_flds[h]]->get_name().c_str(), 0);
8393 " return temp_tuple_received;\n"
8399 "bool new_epoch(){\n"
8400 " if(first_execution || (last_bucket + 1) * bucketwidth <= timestamp){\n"
8401 " last_bucket = timestamp / bucketwidth;\n"
8402 " first_execution = false;\n"
8412 "inline gs_uint32_t output_hash(){return 0;}\n\n";
8414 if(hash_flds.size()==0){
8416 "gs_uint32_t output_hash(){\n"
8418 " if(outfl_cnt >= "+int_to_string(n_streams)+")\n"
8420 " return outfl_cnt;\n"
8426 "gs_uint32_t output_hash(){\n"
8427 " gs_uint32_t ret = "
8429 for(h=0;h<hash_flds.size();++h){
8431 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8432 if(hdt->use_hashfunc()){
8433 sprintf(tmpstr,"%s(&(unpack_var_%s))",hdt->get_hfta_hashfunc().c_str(),fields[hash_flds[h]]->get_name().c_str());
8435 sprintf(tmpstr,"unpack_var_%s",fields[hash_flds[h]]->get_name().c_str());
8441 " return ret % "+int_to_string(hash_flds.size())+";\n"
8448 "gs_uint32_t num_file_streams(){\n"
8449 " return("+int_to_string(n_streams)+");\n"
8454 "string get_filename_base(){\n"
8455 " char tmp_fname[500];\n";
8457 string output_filename_base = hfta_query_name+filestream_id;
8459 if(n_hfta_clones > 1){
8460 output_filename_base += "_"+int_to_string(parallel_idx);
8466 if(output_spec->output_directory == "")
8468 " sprintf(tmp_fname,\""+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8470 " sprintf(tmp_fname,\""+output_spec->output_directory+"/"+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8472 " return (string)(tmp_fname);\n"
8478 "bool do_compression(){\n";
8480 ret += " return true;\n";
8482 ret += " return false;\n";
8486 "bool is_eof_tuple(){\n"
8490 "bool propagate_tuple(){\n"
8493 ret+="\treturn false;\n";
8495 ret+="\treturn true;\n";
8497 // create a temp status tuple
8498 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8500 ret += gen_init_temp_status_tuple(this->hfta_query_name);
8502 sprintf(tmpstr,"\ttuple->tuple_var%d = timestamp;\n",temporal_field_idx);
8507 ret += "\treturn 0;\n";
8515 string output_file_qpn::generate_operator(int i, string params){
8516 string optype = "file_output_operator";
8517 switch(compression_type){
8519 optype = "file_output_operator";
8522 optype = "zfile_output_operator";
8525 optype = "bfile_output_operator";
8529 return(" "+optype+"<" +
8530 generate_functor_name() +
8531 "> *op"+int_to_string(i)+" = new "+optype+"<"+
8532 generate_functor_name() +">("+params+", \"" + hfta_query_name + "\""
8533 + "," + hfta_query_name + "_schema_definition);\n");
8536 /////////////////////////////////////////////////////////
8540 string spx_qpn::generate_functor_name(){
8541 return("spx_functor_" + normalize_name(normalize_name(this->get_node_name())));
8544 string spx_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8545 // Initialize generate utility globals
8546 segen_gb_tbl = NULL;
8548 string ret = "class " + this->generate_functor_name() + "{\n";
8550 // Find variables referenced in this query node.
8553 col_id_set::iterator csi;
8556 for(w=0;w<where.size();++w)
8557 gather_pr_col_ids(where[w]->pr,cid_set,NULL);
8558 for(s=0;s<select_list.size();s++){
8559 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
8563 // Private variables : store the state of the functor.
8564 // 1) variables for unpacked attributes
8565 // 2) offsets of the upacked attributes
8566 // 3) storage of partial functions
8567 // 4) storage of complex literals (i.e., require a constructor)
8569 ret += "private:\n";
8570 ret += "\tbool first_execution;\t// internal processing state \n";
8571 ret += "\tint schema_handle0;\n";
8573 // generate the declaration of all the variables related to
8574 // temp tuples generation
8575 ret += gen_decl_temp_vars();
8578 // unpacked attribute storage, offsets
8579 ret += "//\t\tstorage and offsets of accessed fields.\n";
8580 ret += generate_access_vars(cid_set,schema);
8581 // tuple metadata management
8582 ret += "\tint tuple_metadata_offset0;\n";
8584 // Variables to store results of partial functions.
8585 // WARNING find_partial_functions modifies the SE
8586 // (it marks the partial function id).
8587 ret += "//\t\tParital function result storage\n";
8588 vector<scalarexp_t *> partial_fcns;
8589 vector<int> fcn_ref_cnt;
8590 vector<bool> is_partial_fcn;
8591 for(s=0;s<select_list.size();s++){
8592 find_partial_fcns(select_list[s]->se, &partial_fcns,&fcn_ref_cnt,&is_partial_fcn, Ext_fcns);
8594 for(w=0;w<where.size();w++){
8595 find_partial_fcns_pr(where[w]->pr, &partial_fcns, &fcn_ref_cnt,&is_partial_fcn,Ext_fcns);
8597 // Unmark non-partial expensive functions referenced only once.
8598 for(p=0; p<partial_fcns.size();p++){
8599 if(!is_partial_fcn[p] && fcn_ref_cnt[p] <= 1){
8600 partial_fcns[p]->set_partial_ref(-1);
8603 if(partial_fcns.size()>0){
8604 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,true);
8607 // Complex literals (i.e., they need constructors)
8608 ret += "//\t\tComplex literal storage.\n";
8609 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8610 ret += generate_complex_lit_vars(complex_literals);
8612 // Pass-by-handle parameters
8613 ret += "//\t\tPass-by-handle storage.\n";
8614 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8615 ret += generate_pass_by_handle_vars(param_handle_table);
8617 // Variables to hold parameters
8618 ret += "//\tfor query parameters\n";
8619 ret += generate_param_vars(param_tbl);
8622 // The publicly exposed functions
8624 ret += "\npublic:\n";
8627 //-------------------
8628 // The functor constructor
8629 // pass in the schema handle.
8630 // 1) make assignments to the unpack offset variables
8631 // 2) initialize the complex literals
8632 // 3) Set the initial values of the temporal attributes
8633 // referenced in select clause (in case we need to emit
8634 // temporal tuple before receiving first tuple )
8636 ret += "//\t\tFunctor constructor.\n";
8637 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8639 // save schema handle
8640 ret += "this->schema_handle0 = schema_handle0;\n";
8643 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8644 ret += gen_access_var_init(cid_set);
8646 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8649 ret += "//\t\tInitialize complex literals.\n";
8650 ret += gen_complex_lit_init(complex_literals);
8652 // Initialize partial function results so they can be safely GC'd
8653 ret += gen_partial_fcn_init(partial_fcns);
8655 // Initialize non-query-parameter parameter handles
8656 ret += gen_pass_by_handle_init(param_handle_table);
8658 // Init temporal attributes referenced in select list
8659 ret += gen_init_temp_vars(schema, select_list, NULL);
8664 //-------------------
8665 // Functor destructor
8666 ret += "//\t\tFunctor destructor.\n";
8667 ret += "~"+this->generate_functor_name()+"(){\n";
8669 // clean up buffer-type complex literals.
8670 ret += gen_complex_lit_dtr(complex_literals);
8672 // Deregister the pass-by-handle parameters
8673 ret += "/* register and de-register the pass-by-handle parameters */\n";
8674 ret += gen_pass_by_handle_dtr(param_handle_table);
8676 // Reclaim buffer space for partial fucntion results
8677 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8678 ret += gen_partial_fcn_dtr(partial_fcns);
8681 // Destroy the parameters, if any need to be destroyed
8682 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8687 //-------------------
8688 // Parameter manipulation routines
8689 ret += generate_load_param_block(this->generate_functor_name(),
8690 this->param_tbl,param_handle_table );
8691 ret += generate_delete_param_block(this->generate_functor_name(),
8692 this->param_tbl,param_handle_table);
8695 //-------------------
8696 // Register new parameter block
8697 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8698 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8699 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8704 //-------------------
8705 // The selection predicate.
8706 // Unpack variables for 1 cnf element
8707 // at a time, return false immediately if the
8709 // optimization : evaluate the cheap cnf elements
8710 // first, the expensive ones last.
8712 ret += "bool predicate(host_tuple &tup0){\n";
8713 // Variables for execution of the function.
8714 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8715 // Initialize cached function indicators.
8716 for(p=0;p<partial_fcns.size();++p){
8717 if(fcn_ref_cnt[p]>1){
8718 ret+="\tfcn_ref_cnt_"+int_to_string(p)+"=0;\n";
8723 ret += gen_temp_tuple_check(this->node_name, 0);
8725 if(partial_fcns.size()>0){ // partial fcn access failure
8726 ret += "\tgs_retval_t retval = 0;\n";
8730 // Reclaim buffer space for partial fucntion results
8731 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8732 ret += gen_partial_fcn_dtr(partial_fcns);
8734 col_id_set found_cids; // colrefs unpacked thus far.
8735 ret += gen_unpack_temp_vars(schema, found_cids, select_list, NULL, needs_xform);
8737 // For temporal status tuple we don't need to do anything else
8738 ret += "\tif (temp_tuple_received) return false;\n\n";
8741 for(w=0;w<where.size();++w){
8742 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8744 // Find the set of variables accessed in this CNF elem,
8745 // but in no previous element.
8746 col_id_set new_cids;
8747 get_new_pred_cids(where[w]->pr,found_cids, new_cids, NULL);
8748 // Unpack these values.
8749 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
8750 // Find partial fcns ref'd in this cnf element
8752 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8753 ret += gen_unpack_partial_fcn(schema,partial_fcns,pfcn_refs,fcn_ref_cnt, is_partial_fcn, "false");
8755 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8756 +") ) return(false);\n";
8759 // The partial functions ref'd in the select list
8760 // must also be evaluated. If one returns false,
8761 // then implicitly the predicate is false.
8763 for(s=0;s<select_list.size();s++){
8764 collect_partial_fcns(select_list[s]->se, sl_pfcns);
8766 if(sl_pfcns.size() > 0)
8767 ret += "//\t\tUnpack remaining partial fcns.\n";
8768 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
8769 fcn_ref_cnt, is_partial_fcn,
8770 found_cids, NULL, "false", needs_xform);
8772 // Unpack remaining fields
8773 ret += "//\t\tunpack any remaining fields from the input tuple.\n";
8774 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
8777 ret += "\treturn(true);\n";
8781 //-------------------
8782 // The output tuple function.
8783 // Unpack the remaining attributes into
8784 // the placeholder variables, unpack the
8785 // partial fcn refs, then pack up the tuple.
8787 ret += "host_tuple create_output_tuple() {\n";
8788 ret += "\thost_tuple tup;\n";
8789 ret += "\tgs_retval_t retval = 0;\n";
8791 // Unpack any remaining cached functions.
8792 ret += gen_remaining_cached_fcns(schema, partial_fcns, sl_pfcns,
8793 fcn_ref_cnt, is_partial_fcn);
8796 // Now, compute the size of the tuple.
8798 // Unpack any BUFFER type selections into temporaries
8799 // so that I can compute their size and not have
8800 // to recompute their value during tuple packing.
8801 // I can use regular assignment here because
8802 // these temporaries are non-persistent.
8804 ret += "//\t\tCompute the size of the tuple.\n";
8805 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
8807 // Unpack all buffer type selections, to be able to compute their size
8808 ret += gen_buffer_selvars(schema, select_list);
8810 // The size of the tuple is the size of the tuple struct plus the
8811 // size of the buffers to be copied in.
8814 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
8815 ret += gen_buffer_selvars_size(select_list,schema);
8818 // Allocate tuple data block.
8819 ret += "//\t\tCreate the tuple block.\n";
8820 ret += "\ttup.data = malloc(tup.tuple_size);\n";
8821 ret += "\ttup.heap_resident = true;\n";
8822 // Mark tuple as regular
8823 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
8825 // ret += "\ttup.channel = 0;\n";
8826 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
8827 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
8830 // (Here, offsets are hard-wired. is this a problem?)
8832 ret += "//\t\tPack the fields into the tuple.\n";
8833 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
8835 // Delete string temporaries
8836 ret += gen_buffer_selvars_dtr(select_list);
8838 ret += "\treturn tup;\n";
8841 //-------------------------------------------------------------------
8842 // Temporal update functions
8844 ret += "bool temp_status_received(){return temp_tuple_received;};\n\n";
8847 // create a temp status tuple
8848 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8850 ret += gen_init_temp_status_tuple(this->get_node_name());
8853 // (Here, offsets are hard-wired. is this a problem?)
8855 ret += "//\t\tPack the fields into the tuple.\n";
8856 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
8858 ret += "\treturn 0;\n";
8865 string spx_qpn::generate_operator(int i, string params){
8867 return(" select_project_operator<" +
8868 generate_functor_name() +
8869 "> *op"+int_to_string(i)+" = new select_project_operator<"+
8870 generate_functor_name() +">("+params+", \"" + get_node_name() + "\");\n");
8874 ////////////////////////////////////////////////////////////////
8879 string sgah_qpn::generate_functor_name(){
8880 return("sgah_functor_" + normalize_name(this->get_node_name()));
8884 string sgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8887 // Regular or slow flush?
8888 hfta_slow_flush = 0;
8889 if(this->get_val_of_def("hfta_slow_flush") != ""){
8890 int d = atoi(this->get_val_of_def("hfta_slow_flush").c_str() );
8892 fprintf(stderr,"Warning, hfta_slow_flush in node %s is %d, must be at least 0, setting to 0.\n",node_name.c_str(), d);
8893 hfta_slow_flush = 0;
8895 hfta_slow_flush = d;
8900 // Initialize generate utility globals
8901 segen_gb_tbl = &(gb_tbl);
8903 // Might need to generate empty values for cube processing.
8904 map<int, string> structured_types;
8905 for(g=0;g<gb_tbl.size();++g){
8906 if(gb_tbl.get_data_type(g)->is_structured_type()){
8907 structured_types[gb_tbl.get_data_type(g)->type_indicator()] = gb_tbl.get_data_type(g)->get_type_str();
8911 //--------------------------------
8912 // group definition class
8913 string ret = "class " + generate_functor_name() + "_groupdef{\n";
8915 for(g=0;g<this->gb_tbl.size();g++){
8916 sprintf(tmpstr,"gb_var%d",g);
8917 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8919 // empty strucutred literals
8920 // map<int, string>::iterator sii;
8921 // for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8922 // data_type dt(sii->second);
8923 // literal_t empty_lit(sii->first);
8924 // ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8927 if(structured_types.size()==0){
8928 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
8930 ret += "\t"+generate_functor_name() + "_groupdef(){\n";
8931 for(g=0;g<gb_tbl.size();g++){
8932 data_type *gdt = gb_tbl.get_data_type(g);
8933 if(gdt->is_buffer_type()){
8934 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
8935 gdt->get_hfta_buffer_init().c_str(), g );
8943 ret += "\t// shallow copy constructors\n";
8944 ret += "\t"+generate_functor_name() + "_groupdef("+
8945 "const " + this->generate_functor_name() + "_groupdef &gd){\n";
8946 for(g=0;g<gb_tbl.size();g++){
8947 data_type *gdt = gb_tbl.get_data_type(g);
8948 sprintf(tmpstr,"\t\tgb_var%d = gd.gb_var%d;\n",g,g);
8950 // TODO : do strings perisist after the call? its a shllow copy
8951 // if(gdt->is_buffer_type()){
8952 // sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8953 // gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8956 // sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8961 ret += "\t"+generate_functor_name() + "_groupdef("+
8962 "const " + this->generate_functor_name() + "_groupdef &gd, bool *pattern){\n";
8963 // -- For patterns, need empty strucutred literals
8964 map<int, string>::iterator sii;
8965 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8966 data_type dt(sii->second);
8967 literal_t empty_lit(sii->first);
8968 ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8971 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8972 literal_t empty_lit(sii->first);
8973 ret += "\t\t"+empty_lit.to_hfta_C_code("&"+empty_lit.hfta_empty_literal_name())+";\n";
8975 for(g=0;g<gb_tbl.size();g++){
8976 data_type *gdt = gb_tbl.get_data_type(g);
8977 ret += "\t\tif(pattern["+int_to_string(g)+"]){\n";
8978 sprintf(tmpstr,"\t\t\tgb_var%d = gd.gb_var%d;\n",g,g);
8980 // TODO Do strings persist long enough? its a shllow copy constructor?
8981 // if(gdt->is_buffer_type()){
8982 // sprintf(tmpstr,"\t\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8983 // gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8986 // sprintf(tmpstr,"\t\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8989 ret += "\t\t}else{\n";
8990 literal_t empty_lit(gdt->type_indicator());
8991 if(empty_lit.is_cpx_lit()){
8992 ret +="\t\t\tgb_var"+int_to_string(g)+"= "+empty_lit.hfta_empty_literal_name()+";\n";
8994 ret +="\t\t\tgb_var"+int_to_string(g)+"="+empty_lit.to_hfta_C_code("")+";\n";
9000 ret += "\t// deep assignment operator\n";
9001 ret += "\t"+generate_functor_name() + "_groupdef& operator=(const "+
9002 this->generate_functor_name() + "_groupdef &gd){\n";
9003 for(g=0;g<gb_tbl.size();g++){
9004 data_type *gdt = gb_tbl.get_data_type(g);
9005 if(gdt->is_buffer_type()){
9006 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd.gb_var%d));\n",
9007 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
9010 sprintf(tmpstr,"\t\tgb_var%d = gd.gb_var%d;\n",g,g);
9017 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
9018 for(g=0;g<gb_tbl.size();g++){
9019 data_type *gdt = gb_tbl.get_data_type(g);
9020 if(gdt->is_buffer_type()){
9021 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
9022 gdt->get_hfta_buffer_destroy().c_str(), g );
9029 for(g=0;g<gb_tbl.size();g++){
9030 data_type *gdt = gb_tbl.get_data_type(g);
9031 if(gdt->is_temporal()){
9036 ret += tgdt->get_host_cvar_type()+" get_curr_gb(){\n";
9037 ret+="\treturn gb_var"+int_to_string(g)+";\n";
9042 //--------------------------------
9043 // aggr definition class
9044 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
9046 for(a=0;a<aggr_tbl.size();a++){
9047 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
9048 sprintf(tmpstr,"aggr_var%d",a);
9049 if(aggr_tbl.is_builtin(a)){
9050 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
9051 if(aggr_tbl.get_op(a) == "AVG"){ // HACK!
9052 data_type cnt_type = data_type("ullong");
9053 ret+="\t"+cnt_type.make_host_cvar(string(tmpstr)+"_cnt")+";\n";
9054 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(string(tmpstr)+"_sum")+";\n";
9057 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
9061 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
9063 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
9064 for(a=0;a<aggr_tbl.size();a++){
9065 if(aggr_tbl.is_builtin(a)){
9066 data_type *adt = aggr_tbl.get_data_type(a);
9067 if(adt->is_buffer_type()){
9068 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
9069 adt->get_hfta_buffer_destroy().c_str(), a );
9073 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
9074 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9075 ret+="(aggr_var"+int_to_string(a)+"));\n";
9081 //-------------------------------------------
9082 // group-by patterns for the functor,
9083 // initialization within the class is cumbersome.
9084 int n_patterns = gb_tbl.gb_patterns.size();
9086 ret += "bool "+this->generate_functor_name()+"_gb_patterns["+int_to_string(n_patterns)+
9087 "]["+int_to_string(gb_tbl.size())+"] = {\n";
9088 if(n_patterns == 0){
9089 for(i=0;i<gb_tbl.size();++i){
9094 for(i=0;i<n_patterns;++i){
9095 if(i>0) ret += ",\n";
9097 for(j=0;j<gb_tbl.size();j++){
9098 if(j>0) ret += ", ";
9099 if(gb_tbl.gb_patterns[i][j]){
9112 //--------------------------------
9114 ret += "class " + this->generate_functor_name() + "{\n";
9116 // Find variables referenced in this query node.
9119 col_id_set::iterator csi;
9121 for(w=0;w<where.size();++w)
9122 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
9123 for(w=0;w<having.size();++w)
9124 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
9125 for(g=0;g<gb_tbl.size();g++)
9126 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
9128 for(s=0;s<select_list.size();s++){
9129 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
9133 // Private variables : store the state of the functor.
9134 // 1) variables for unpacked attributes
9135 // 2) offsets of the upacked attributes
9136 // 3) storage of partial functions
9137 // 4) storage of complex literals (i.e., require a constructor)
9139 ret += "private:\n";
9141 // var to save the schema handle
9142 ret += "\tint schema_handle0;\n";
9143 // metadata from schema handle
9144 ret += "\tint tuple_metadata_offset0;\n";
9146 // generate the declaration of all the variables related to
9147 // temp tuples generation
9148 ret += gen_decl_temp_vars();
9150 // unpacked attribute storage, offsets
9151 ret += "//\t\tstorage and offsets of accessed fields.\n";
9152 ret += generate_access_vars(cid_set, schema);
9154 // Variables to store results of partial functions.
9155 // WARNING find_partial_functions modifies the SE
9156 // (it marks the partial function id).
9157 ret += "//\t\tParital function result storage\n";
9158 vector<scalarexp_t *> partial_fcns;
9159 vector<int> fcn_ref_cnt;
9160 vector<bool> is_partial_fcn;
9161 for(s=0;s<select_list.size();s++){
9162 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
9164 for(w=0;w<where.size();w++){
9165 find_partial_fcns_pr(where[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9167 for(w=0;w<having.size();w++){
9168 find_partial_fcns_pr(having[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9170 for(g=0;g<gb_tbl.size();g++){
9171 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns,NULL,NULL, Ext_fcns);
9173 for(a=0;a<aggr_tbl.size();a++){
9174 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns,NULL,NULL, Ext_fcns);
9176 if(partial_fcns.size()>0){
9177 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
9178 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
9181 // Complex literals (i.e., they need constructors)
9182 ret += "//\t\tComplex literal storage.\n";
9183 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
9184 ret += generate_complex_lit_vars(complex_literals);
9186 // Pass-by-handle parameters
9187 ret += "//\t\tPass-by-handle storage.\n";
9188 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
9189 ret += generate_pass_by_handle_vars(param_handle_table);
9192 // variables to hold parameters.
9193 ret += "//\tfor query parameters\n";
9194 ret += generate_param_vars(param_tbl);
9196 // Is there a temporal flush? If so create flush temporaries,
9197 // create flush indicator.
9198 bool uses_temporal_flush = false;
9199 for(g=0;g<gb_tbl.size();g++){
9200 data_type *gdt = gb_tbl.get_data_type(g);
9201 if(gdt->is_temporal())
9202 uses_temporal_flush = true;
9205 if(uses_temporal_flush){
9206 ret += "//\t\tFor temporal flush\n";
9207 for(g=0;g<gb_tbl.size();g++){
9208 data_type *gdt = gb_tbl.get_data_type(g);
9209 if(gdt->is_temporal()){
9210 sprintf(tmpstr,"last_gb%d",g);
9211 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9212 sprintf(tmpstr,"last_flushed_gb%d",g);
9213 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9216 ret += "\tbool needs_temporal_flush;\n";
9217 ret += "\tbool disordered_arrival;\n";
9221 // The publicly exposed functions
9223 ret += "\npublic:\n";
9226 //-------------------
9227 // The functor constructor
9228 // pass in the schema handle.
9229 // 1) make assignments to the unpack offset variables
9230 // 2) initialize the complex literals
9232 ret += "//\t\tFunctor constructor.\n";
9233 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9235 // save the schema handle
9236 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
9239 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9240 ret += gen_access_var_init(cid_set);
9242 ret += "tuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9245 ret += "//\t\tInitialize complex literals.\n";
9246 ret += gen_complex_lit_init(complex_literals);
9248 // Initialize partial function results so they can be safely GC'd
9249 ret += gen_partial_fcn_init(partial_fcns);
9251 // Initialize non-query-parameter parameter handles
9252 ret += gen_pass_by_handle_init(param_handle_table);
9254 // temporal flush variables
9255 // ASSUME that structured values won't be temporal.
9256 if(uses_temporal_flush){
9257 ret += "//\t\tInitialize temporal flush variables.\n";
9258 for(g=0;g<gb_tbl.size();g++){
9259 data_type *gdt = gb_tbl.get_data_type(g);
9260 if(gdt->is_temporal()){
9261 literal_t gl(gdt->type_indicator());
9262 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9264 sprintf(tmpstr,"\tlast_flushed_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9268 ret += "\tneeds_temporal_flush = false;\n";
9271 // Init temporal attributes referenced in select list
9272 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
9276 //-------------------
9277 // Functor destructor
9278 ret += "//\t\tFunctor destructor.\n";
9279 ret += "~"+this->generate_functor_name()+"(){\n";
9281 // clean up buffer type complex literals
9282 ret += gen_complex_lit_dtr(complex_literals);
9284 // Deregister the pass-by-handle parameters
9285 ret += "/* register and de-register the pass-by-handle parameters */\n";
9286 ret += gen_pass_by_handle_dtr(param_handle_table);
9288 // clean up partial function results.
9289 ret += "/* clean up partial function storage */\n";
9290 ret += gen_partial_fcn_dtr(partial_fcns);
9292 // Destroy the parameters, if any need to be destroyed
9293 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9298 //-------------------
9299 // Parameter manipulation routines
9300 ret += generate_load_param_block(this->generate_functor_name(),
9301 this->param_tbl,param_handle_table);
9302 ret += generate_delete_param_block(this->generate_functor_name(),
9303 this->param_tbl,param_handle_table);
9305 //-------------------
9306 // Register new parameter block
9308 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9309 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9310 ret += "\treturn this->load_params_"+this->generate_functor_name()+
9314 // -----------------------------------
9315 // group-by pattern support
9318 "int n_groupby_patterns(){\n"
9319 " return "+int_to_string(gb_tbl.gb_patterns.size())+";\n"
9321 "bool *get_pattern(int p){\n"
9322 " return "+this->generate_functor_name()+"_gb_patterns[p];\n"
9326 //---------------------------------------
9327 // Parameterized number of tuples output per slow flush
9329 "int gb_flush_per_tuple(){\n"
9330 " return "+int_to_string(hfta_slow_flush)+";\n"
9337 //-------------------
9338 // the create_group method.
9339 // This method creates a group in a buffer passed in
9340 // (to allow for creation on the stack).
9341 // There are also a couple of side effects:
9342 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
9343 // 2) determine if a temporal flush is required.
9345 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
9346 // Variables for execution of the function.
9347 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9349 if(partial_fcns.size()>0){ // partial fcn access failure
9350 ret += "\tgs_retval_t retval = 0;\n";
9354 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
9355 "_groupdef *) buffer;\n";
9357 // Start by cleaning up partial function results
9358 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
9359 set<int> w_pfcns; // partial fcns in where clause
9360 for(w=0;w<where.size();++w)
9361 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
9363 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
9364 for(g=0;g<gb_tbl.size();g++){
9365 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
9367 for(a=0;a<aggr_tbl.size();a++){
9368 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
9370 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
9371 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
9372 // ret += gen_partial_fcn_dtr(partial_fcns);
9375 ret += gen_temp_tuple_check(this->node_name, 0);
9376 col_id_set found_cids; // colrefs unpacked thus far.
9377 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
9380 // Save temporal group-by variables
9383 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
9385 for(g=0;g<gb_tbl.size();g++){
9387 data_type *gdt = gb_tbl.get_data_type(g);
9389 if(gdt->is_temporal()){
9390 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9391 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9399 // Compare the temporal GB vars with the stored ones,
9400 // set flush indicator and update stored GB vars if there is any change.
9402 ret += "// hfta_disorder = "+int_to_string(hfta_disorder)+"\n";
9403 if(hfta_disorder < 2){
9404 if(uses_temporal_flush){
9406 bool first_one = true;
9407 string disorder_test;
9408 for(g=0;g<gb_tbl.size();g++){
9409 data_type *gdt = gb_tbl.get_data_type(g);
9411 if(gdt->is_temporal()){
9412 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9413 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9414 if(first_one){first_one = false;} else {ret += ") && (";}
9415 ret += generate_lt_test(lhs_op, rhs_op, gdt);
9416 disorder_test += generate_lt_test(rhs_op, lhs_op, gdt);
9420 for(g=0;g<gb_tbl.size();g++){
9421 data_type *gdt = gb_tbl.get_data_type(g);
9422 if(gdt->is_temporal()){
9423 if(gdt->is_buffer_type()){
9424 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9426 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9428 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9433 ret += "\t\tneeds_temporal_flush=true;\n";
9435 "\t\tneeds_temporal_flush=false;\n"
9438 ret += "\tdisordered_arrival = "+disorder_test+";\n";
9439 // ret += "\tif( ( ("+disorder_test+") ) ){\n";
9440 // ret += "\t\tdisordered_arrival=true;\n";
9441 // ret += "\t}else{\n";
9442 // ret += "\t\tdisordered_arrival=false;\n";
9447 ret+= "\tif(temp_tuple_received && !( (";
9448 bool first_one = true;
9449 for(g=0;g<gb_tbl.size();g++){
9450 data_type *gdt = gb_tbl.get_data_type(g);
9452 if(gdt->is_temporal()){
9453 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9454 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9455 if(first_one){first_one = false;} else {ret += ") && (";}
9456 ret += generate_equality_test(lhs_op, rhs_op, gdt);
9462 for(g=0;g<gb_tbl.size();g++){
9463 data_type *gdt = gb_tbl.get_data_type(g);
9464 if(gdt->is_temporal()){
9466 if(gdt->is_buffer_type()){
9467 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9469 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9471 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9477 data_type *tgdt = gb_tbl.get_data_type(temporal_g);
9478 literal_t gl(tgdt->type_indicator());
9479 ret += "\t\tif(last_flushed_gb"+int_to_string(temporal_g)+">"+gl.to_hfta_C_code("")+")\n";
9480 ret += "\t\t\tneeds_temporal_flush=true;\n";
9481 ret += "\t\t}else{\n"
9482 "\t\t\tneeds_temporal_flush=false;\n"
9487 // For temporal status tuple we don't need to do anything else
9488 ret += "\tif (temp_tuple_received){\n";
9489 ret += "\t\tdisordered_arrival = false;\n";
9490 ret += "\t\treturn NULL;\n\n";
9493 for(w=0;w<where.size();++w){
9494 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
9496 // Find the set of variables accessed in this CNF elem,
9497 // but in no previous element.
9498 col_id_set new_cids;
9499 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
9501 // Unpack these values.
9502 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9503 // Find partial fcns ref'd in this cnf element
9505 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
9506 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
9508 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
9509 +") ) return(NULL);\n";
9512 // The partial functions ref'd in the group-by var and aggregate
9513 // definitions must also be evaluated. If one returns false,
9514 // then implicitly the predicate is false.
9515 set<int>::iterator pfsi;
9517 if(ag_gb_pfcns.size() > 0)
9518 ret += "//\t\tUnpack remaining partial fcns.\n";
9519 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
9520 found_cids, segen_gb_tbl, "NULL", needs_xform);
9522 // Unpack the group-by variables
9524 for(g=0;g<gb_tbl.size();g++){
9525 data_type *gdt = gb_tbl.get_data_type(g);
9527 if(!gdt->is_temporal()){
9528 // Find the new fields ref'd by this GBvar def.
9529 col_id_set new_cids;
9530 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
9531 // Unpack these values.
9532 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9534 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9535 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9537 // There seems to be no difference between the two
9538 // branches of the IF statement.
9539 data_type *gdt = gb_tbl.get_data_type(g);
9540 if(gdt->is_buffer_type()){
9541 // Create temporary copy.
9542 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9543 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9545 scalarexp_t *gse = gb_tbl.get_def(g);
9546 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9547 g,generate_se_code(gse,schema).c_str());
9556 ret+= "\treturn gbval;\n";
9559 //--------------------------------------------------------
9560 // Create and initialize an aggregate object
9562 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
9563 // Variables for execution of the function.
9564 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9567 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
9568 "_aggrdef *)buffer;\n";
9570 for(a=0;a<aggr_tbl.size();a++){
9571 if(aggr_tbl.is_builtin(a)){
9572 // Create temporaries for buffer return values
9573 data_type *adt = aggr_tbl.get_data_type(a);
9574 if(adt->is_buffer_type()){
9575 sprintf(tmpstr,"aggr_tmp_%d", a);
9576 ret+=adt->make_host_cvar(tmpstr)+";\n";
9581 // Unpack all remaining attributes
9582 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
9583 for(a=0;a<aggr_tbl.size();a++){
9584 sprintf(tmpstr,"aggval->aggr_var%d",a);
9585 string assignto_var = tmpstr;
9586 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
9589 ret += "\treturn aggval;\n";
9592 //--------------------------------------------------------
9593 // update an aggregate object
9595 ret += "void update_aggregate(host_tuple &tup0, "
9596 +generate_functor_name()+"_groupdef &gbval, "+
9597 generate_functor_name()+"_aggrdef &aggval){\n";
9598 // Variables for execution of the function.
9599 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9601 // use of temporaries depends on the aggregate,
9602 // generate them in generate_aggr_update
9605 // Unpack all remaining attributes
9606 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
9607 for(a=0;a<aggr_tbl.size();a++){
9608 sprintf(tmpstr,"aggval.aggr_var%d",a);
9609 string varname = tmpstr;
9610 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
9613 ret += "\treturn;\n";
9616 //---------------------------------------------------
9619 ret += "\tbool flush_needed(){\n";
9620 if(uses_temporal_flush){
9621 ret += "\t\treturn needs_temporal_flush;\n";
9623 ret += "\t\treturn false;\n";
9627 ret += "bool disordered(){return disordered_arrival;}\n";
9629 //---------------------------------------------------
9630 // create output tuple
9631 // Unpack the partial functions ref'd in the where clause,
9632 // select clause. Evaluate the where clause.
9633 // Finally, pack the tuple.
9635 // I need to use special code generation here,
9636 // so I'll leave it in longhand.
9638 ret += "host_tuple create_output_tuple("
9639 +generate_functor_name()+"_groupdef &gbval, "+
9640 generate_functor_name()+"_aggrdef &aggval, bool &failed){\n";
9642 ret += "\thost_tuple tup;\n";
9643 ret += "\tfailed = false;\n";
9644 ret += "\tgs_retval_t retval = 0;\n";
9646 string gbvar = "gbval.gb_var";
9647 string aggvar = "aggval.";
9649 // Create cached temporaries for UDAF return values.
9650 for(a=0;a<aggr_tbl.size();a++){
9651 if(! aggr_tbl.is_builtin(a)){
9652 int afcn_id = aggr_tbl.get_fcn_id(a);
9653 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9654 sprintf(tmpstr,"udaf_ret_%d", a);
9655 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
9660 // First, get the return values from the UDAFS
9661 for(a=0;a<aggr_tbl.size();a++){
9662 if(! aggr_tbl.is_builtin(a)){
9663 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
9664 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9665 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
9669 set<int> hv_sl_pfcns;
9670 for(w=0;w<having.size();w++){
9671 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
9673 for(s=0;s<select_list.size();s++){
9674 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
9677 // clean up the partial fcn results from any previous execution
9678 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
9681 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
9682 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
9683 ret += "\tif(retval){ failed = true; return(tup);}\n";
9686 // Evalaute the HAVING clause
9687 // TODO: this seems to have a ++ operator rather than a + operator.
9688 for(w=0;w<having.size();++w){
9689 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
9692 // Now, compute the size of the tuple.
9694 // Unpack any BUFFER type selections into temporaries
9695 // so that I can compute their size and not have
9696 // to recompute their value during tuple packing.
9697 // I can use regular assignment here because
9698 // these temporaries are non-persistent.
9699 // TODO: should I be using the selvar generation routine?
9701 ret += "//\t\tCompute the size of the tuple.\n";
9702 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
9703 for(s=0;s<select_list.size();s++){
9704 scalarexp_t *se = select_list[s]->se;
9705 data_type *sdt = se->get_data_type();
9706 if(sdt->is_buffer_type() &&
9707 !( (se->get_operator_type() == SE_COLREF) ||
9708 (se->get_operator_type() == SE_AGGR_STAR) ||
9709 (se->get_operator_type() == SE_AGGR_SE) ||
9710 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9711 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9713 sprintf(tmpstr,"selvar_%d",s);
9714 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
9715 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
9719 // The size of the tuple is the size of the tuple struct plus the
9720 // size of the buffers to be copied in.
9722 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
9723 for(s=0;s<select_list.size();s++){
9724 // if(s>0) ret += "+";
9725 scalarexp_t *se = select_list[s]->se;
9726 data_type *sdt = select_list[s]->se->get_data_type();
9727 if(sdt->is_buffer_type()){
9728 if(!( (se->get_operator_type() == SE_COLREF) ||
9729 (se->get_operator_type() == SE_AGGR_STAR) ||
9730 (se->get_operator_type() == SE_AGGR_SE) ||
9731 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9732 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9734 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
9737 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9744 // Allocate tuple data block.
9745 ret += "//\t\tCreate the tuple block.\n";
9746 ret += "\ttup.data = malloc(tup.tuple_size);\n";
9747 ret += "\ttup.heap_resident = true;\n";
9749 // Mark tuple as regular
9750 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
9752 // ret += "\ttup.channel = 0;\n";
9753 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
9754 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
9757 // (Here, offsets are hard-wired. is this a problem?)
9759 ret += "//\t\tPack the fields into the tuple.\n";
9760 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
9761 for(s=0;s<select_list.size();s++){
9762 scalarexp_t *se = select_list[s]->se;
9763 data_type *sdt = se->get_data_type();
9764 if(sdt->is_buffer_type()){
9765 if(!( (se->get_operator_type() == SE_COLREF) ||
9766 (se->get_operator_type() == SE_AGGR_STAR) ||
9767 (se->get_operator_type() == SE_AGGR_SE) ||
9768 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9769 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9771 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
9773 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
9776 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9778 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9782 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9784 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
9789 // Destroy string temporaries
9790 ret += gen_buffer_selvars_dtr(select_list);
9791 // Destroy string return vals of UDAFs
9792 for(a=0;a<aggr_tbl.size();a++){
9793 if(! aggr_tbl.is_builtin(a)){
9794 int afcn_id = aggr_tbl.get_fcn_id(a);
9795 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9796 if(adt->is_buffer_type()){
9797 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
9798 adt->get_hfta_buffer_destroy().c_str(), a );
9805 ret += "\treturn tup;\n";
9809 //-------------------------------------------------------------------
9810 // Temporal update functions
9812 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
9814 for(g=0;g<gb_tbl.size();g++){
9815 data_type *gdt = gb_tbl.get_data_type(g);
9816 if(gdt->is_temporal()){
9821 ret += tgdt->get_host_cvar_type()+" get_last_flushed_gb(){\n";
9822 ret+="\treturn last_flushed_gb"+int_to_string(g)+";\n";
9824 ret += tgdt->get_host_cvar_type()+" get_last_gb(){\n";
9825 ret+="\treturn last_gb"+int_to_string(g)+";\n";
9831 // create a temp status tuple
9832 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
9834 ret += gen_init_temp_status_tuple(this->get_node_name());
9837 // (Here, offsets are hard-wired. is this a problem?)
9839 ret += "//\t\tPack the fields into the tuple.\n";
9840 for(s=0;s<select_list.size();s++){
9841 data_type *sdt = select_list[s]->se->get_data_type();
9842 if(sdt->is_temporal()){
9843 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9846 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
9853 ret += "\treturn 0;\n";
9854 ret += "};};\n\n\n";
9857 //----------------------------------------------------------
9858 // The hash function
9860 ret += "struct "+generate_functor_name()+"_hash_func{\n";
9861 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
9862 "_groupdef &grp) const{\n";
9863 ret += "\t\treturn( (";
9864 for(g=0;g<gb_tbl.size();g++){
9866 data_type *gdt = gb_tbl.get_data_type(g);
9867 if(gdt->use_hashfunc()){
9868 if(gdt->is_buffer_type())
9869 sprintf(tmpstr,"(%s*%s(&(grp.gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9871 sprintf(tmpstr,"(%s*%s(grp.gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9873 sprintf(tmpstr,"(%s*grp.gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
9877 ret += ") >> 32);\n";
9881 //----------------------------------------------------------
9882 // The comparison function
9884 ret += "struct "+generate_functor_name()+"_equal_func{\n";
9885 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef &grp1, "+
9886 "const "+generate_functor_name()+"_groupdef &grp2) const{\n";
9887 ret += "\t\treturn( (";
9889 for(g=0;g<gb_tbl.size();g++){
9890 if(g>0) ret += ") && (";
9891 data_type *gdt = gb_tbl.get_data_type(g);
9892 if(gdt->complex_comparison(gdt)){
9893 if(gdt->is_buffer_type())
9894 sprintf(tmpstr,"(%s(&(grp1.gb_var%d), &(grp2.gb_var%d))==0)",
9895 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
9897 sprintf(tmpstr,"(%s((grp1.gb_var%d), (grp2.gb_var%d))==0)",
9898 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
9900 sprintf(tmpstr,"grp1.gb_var%d == grp2.gb_var%d",g,g);
9912 string sgah_qpn::generate_operator(int i, string params){
9914 if(hfta_disorder < 2){
9915 string op_name = "groupby_operator";
9916 if(hfta_slow_flush>0)
9917 op_name = "groupby_slowflush_operator";
9920 generate_functor_name()+","+
9921 generate_functor_name() + "_groupdef, " +
9922 generate_functor_name() + "_aggrdef, " +
9923 generate_functor_name()+"_hash_func, "+
9924 generate_functor_name()+"_equal_func "
9925 "> *op"+int_to_string(i)+" = new "+op_name+"<"+
9926 generate_functor_name()+","+
9927 generate_functor_name() + "_groupdef, " +
9928 generate_functor_name() + "_aggrdef, " +
9929 generate_functor_name()+"_hash_func, "+
9930 generate_functor_name()+"_equal_func "
9931 ">("+params+", \"" + get_node_name() +
9936 for(int g=0;g<gb_tbl.size();g++){
9937 data_type *gdt = gb_tbl.get_data_type(g);
9938 if(gdt->is_temporal()){
9945 " groupby_operator_oop<" +
9946 generate_functor_name()+","+
9947 generate_functor_name() + "_groupdef, " +
9948 generate_functor_name() + "_aggrdef, " +
9949 generate_functor_name()+"_hash_func, "+
9950 generate_functor_name()+"_equal_func, " +
9951 tgdt->get_host_cvar_type() +
9952 "> *op"+int_to_string(i)+" = new groupby_operator_oop<"+
9953 generate_functor_name()+","+
9954 generate_functor_name() + "_groupdef, " +
9955 generate_functor_name() + "_aggrdef, " +
9956 generate_functor_name()+"_hash_func, "+
9957 generate_functor_name()+"_equal_func, " +
9958 tgdt->get_host_cvar_type() +
9959 ">("+params+", \"" + get_node_name() +
9965 ////////////////////////////////////////////////
9968 ////////////////////////////////////////////
9970 string mrg_qpn::generate_functor_name(){
9971 return("mrg_functor_" + normalize_name(this->get_node_name()));
9974 string mrg_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9979 if(fm.size() != mvars.size()){
9980 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=%lu, mvars.size=%lu\n",fm.size(),mvars.size());
9984 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=mvars.size=%lu\n",fm.size());
9989 // Initialize generate utility globals
9990 segen_gb_tbl = NULL;
9992 string ret = "class " + this->generate_functor_name() + "{\n";
9994 // Private variable:
9995 // 1) Vars for unpacked attrs.
9996 // 2) offsets ofthe unpakced attrs
9997 // 3) last_posted_timestamp
10000 schema->get_type_name(mvars[0]->get_schema_ref(), mvars[0]->get_field()),
10001 schema->get_modifier_list(mvars[0]->get_schema_ref(), mvars[0]->get_field())
10004 schema->get_type_name(mvars[1]->get_schema_ref(), mvars[1]->get_field()),
10005 schema->get_modifier_list(mvars[1]->get_schema_ref(), mvars[1]->get_field())
10008 ret += "private:\n";
10010 // var to save the schema handle
10011 ret += "\tint schema_handle0;\n";
10013 // generate the declaration of all the variables related to
10014 // temp tuples generation
10015 ret += gen_decl_temp_vars();
10017 // unpacked attribute storage, offsets
10018 ret += "//\t\tstorage and offsets of accessed fields.\n";
10019 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10021 sprintf(tmpstr,"unpack_var_%s_%d", mvars[0]->get_field().c_str(), tblref);
10022 ret+="\t"+dta.make_host_cvar(tmpstr)+";\n";
10023 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[0]->get_field().c_str(), tblref);
10024 ret.append(tmpstr);
10026 sprintf(tmpstr,"unpack_var_%s_%d", mvars[1]->get_field().c_str(), tblref);
10027 ret+="\t"+dtb.make_host_cvar(tmpstr)+";\n";
10028 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[1]->get_field().c_str(), tblref);
10029 ret.append(tmpstr);
10031 ret += "//\t\tRemember the last posted timestamp.\n";
10032 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_0")+";\n";
10033 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_1")+";\n";
10034 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10035 ret+="\t"+dta.make_host_cvar("slack")+";\n";
10036 // ret += "\t bool first_execution_0, first_execution_1;\n";
10038 // variables to hold parameters.
10039 ret += "//\tfor query parameters\n";
10040 ret += generate_param_vars(param_tbl);
10042 ret += "public:\n";
10043 //-------------------
10044 // The functor constructor
10045 // pass in a schema handle (e.g. for the 1st input stream),
10046 // use it to determine how to unpack the merge variable.
10047 // ASSUME that both streams have the same layout,
10048 // just duplicate it.
10051 ret += "//\t\tFunctor constructor.\n";
10052 ret += this->generate_functor_name()+"(int schema_handle0){\n";
10054 // var to save the schema handle
10055 ret += "\tthis->schema_handle0 = schema_handle0;\n";
10056 ret += "\ttuple_metadata_offset0=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10057 ret += "\ttuple_metadata_offset1=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10059 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10061 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", mvars[0]->get_field().c_str(), 0,mvars[0]->get_field().c_str());
10062 ret.append(tmpstr);
10063 sprintf(tmpstr,"\tunpack_offset_%s_%d = unpack_offset_%s_%d;\n",mvars[1]->get_field().c_str(), 1,mvars[0]->get_field().c_str(), 0);
10064 ret.append(tmpstr);
10065 // ret+="\tfirst_execution_0 = first_execution_1 = true;\n";
10067 ret+="\tslack = "+generate_se_code(slack,schema)+";\n";
10069 ret+="\tslack = 0;\n";
10071 // Initialize internal state
10072 ret += "\ttemp_tuple_received = false;\n";
10074 // Init last timestamp values to minimum value for their type
10075 if (dta.is_increasing())
10076 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_min_literal() + ";\n";
10078 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_max_literal() + ";\n";
10083 ret += "//\t\tFunctor destructor.\n";
10084 ret += "~"+this->generate_functor_name()+"(){\n";
10086 // Destroy the parameters, if any need to be destroyed
10087 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10092 // no pass-by-handle params.
10093 vector<handle_param_tbl_entry *> param_handle_table;
10095 // Parameter manipulation routines
10096 ret += generate_load_param_block(this->generate_functor_name(),
10097 this->param_tbl,param_handle_table);
10098 ret += generate_delete_param_block(this->generate_functor_name(),
10099 this->param_tbl,param_handle_table);
10101 // Register new parameter block
10103 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10104 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10105 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10110 // -----------------------------------
10113 string unpack_fcna;
10114 if(needs_xform[0]) unpack_fcna = dta.get_hfta_unpack_fcn();
10115 else unpack_fcna = dta.get_hfta_unpack_fcn_noxf();
10116 string unpack_fcnb;
10117 if(needs_xform[1]) unpack_fcnb = dtb.get_hfta_unpack_fcn();
10118 else unpack_fcnb = dtb.get_hfta_unpack_fcn_noxf();
10121 ret+="\tint compare(const host_tuple& tup1, const host_tuple& tup2) const{ \n";
10122 ret+="\t"+dta.make_host_cvar("timestamp1")+";\n";
10123 ret+="\t"+dta.make_host_cvar("timestamp2")+";\n";
10124 ret+="\tgs_int32_t problem;\n";
10125 ret+="\tif (tup1.channel == 0) {\n";
10126 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10128 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10131 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 1);
10133 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 0);
10137 " if (timestamp1 > timestamp2+slack)\n"
10139 " else if (timestamp1 < timestamp2)\n"
10148 " void get_timestamp(const host_tuple& tup0){\n"
10149 " gs_int32_t problem;\n"
10151 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10160 // Compare to temp status.
10162 " int compare_with_temp_status(int channel) {\n"
10163 " // check if tuple is temp status tuple\n"
10165 " if (channel == 0) {\n"
10166 //" if(first_execution_0) return 1;\n"
10167 " if (timestamp == last_posted_timestamp_0)\n"
10169 " else if (timestamp < last_posted_timestamp_0)\n"
10174 //" if(first_execution_1) return 1;\n"
10175 " if (timestamp == last_posted_timestamp_1)\n"
10177 " else if (timestamp < last_posted_timestamp_1)\n"
10186 " int compare_stored_with_temp_status(const host_tuple& tup0, int channel)/* const*/ {\n"
10188 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10189 ret+="\tgs_int32_t problem;\n";
10191 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10193 ret+="\tif (channel == 0) {\n";
10194 // ret+="\tif(first_execution_0) return 1;\n";
10196 " if (l_timestamp == last_posted_timestamp_0)\n"
10198 " else if (l_timestamp < last_posted_timestamp_0)\n"
10203 // ret+="\tif(first_execution_1) return 1;\n";
10205 " if (l_timestamp == last_posted_timestamp_1)\n"
10207 " else if (l_timestamp < last_posted_timestamp_1)\n"
10215 // update temp status.
10217 " int update_temp_status(const host_tuple& tup) {\n"
10218 " if (tup.channel == 0) {\n"
10219 " last_posted_timestamp_0=timestamp;\n"
10220 //" first_execution_0 = false;\n"
10222 " last_posted_timestamp_1=timestamp;\n"
10223 //" first_execution_1 = false;\n"
10229 " int update_stored_temp_status(const host_tuple& tup, int channel) {\n"
10231 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10232 ret+="\tgs_int32_t problem;\n";
10233 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10236 " if (tup.channel == 0) {\n"
10237 " last_posted_timestamp_0=l_timestamp;\n"
10238 //" first_execution_0 = false;\n"
10240 " last_posted_timestamp_1=l_timestamp;\n"
10241 //" first_execution_1 = false;\n"
10247 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10248 ret+="\tgs_int32_t problem;\n";
10249 ret+="\tif (tup.channel == 0) {\n";
10250 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10253 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10256 ret+="\tif (tup.channel == 0) {\n";
10257 ret+="\tlast_posted_timestamp_0=timestamp;\n";
10258 ret +="\tfirst_execution_0 = false;\n";
10260 ret+="\tlast_posted_timestamp_1=timestamp;\n";
10261 ret +="\tfirst_execution_1 = false;\n";
10268 // update temp status modulo slack.
10269 ret+="\tint update_temp_status_by_slack(const host_tuple& tup, int channel) {\n";
10271 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10272 ret+="\tgs_int32_t problem;\n";
10273 ret+="\tif (tup.channel == 0) {\n";
10274 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10277 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10281 " if (channel == 0) {\n"
10282 " if(first_execution_0){\n"
10283 " last_posted_timestamp_0=timestamp - slack;\n"
10284 " first_execution_0 = false;\n"
10286 " if(last_posted_timestamp_0 < timestamp-slack)\n"
10287 " last_posted_timestamp_0 = timestamp-slack;\n"
10290 " if(first_execution_1){\n"
10291 " last_posted_timestamp_1=timestamp - slack;\n"
10292 " first_execution_1 = false;\n"
10294 " if(last_posted_timestamp_1 < timestamp-slack)\n"
10295 " last_posted_timestamp_1 = timestamp-slack;\n"
10309 "bool temp_status_received(const host_tuple& tup0){\n"
10310 " return ftaschema_is_temporal_tuple_offset(tuple_metadata_offset0, tup0.data);\n"
10313 //"bool temp_status_received(){return temp_tuple_received;};\n\n";
10316 // create a temp status tuple
10317 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
10319 ret += gen_init_temp_status_tuple(this->get_node_name());
10322 ret += "//\t\tPack the fields into the tuple.\n";
10324 string fld_name = mvars[0]->get_field();
10325 int idx = table_layout->get_field_idx(fld_name);
10326 field_entry* fld = table_layout->get_field(idx);
10327 data_type dt(fld->get_type());
10329 // if (needs_xform[0] && needs_xform[1] && dt.needs_hn_translation())
10330 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s((last_posted_timestamp_0 < last_posted_timestamp_1) ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx, dt.hton_translation().c_str());
10332 sprintf(tmpstr,"\ttuple->tuple_var%d = (last_posted_timestamp_0 < last_posted_timestamp_1 ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx);
10336 ret += "\treturn 0;\n";
10339 // Transform tuple (before output)
10342 ret += "void xform_tuple(host_tuple &tup){\n";
10343 if((needs_xform[0] && !needs_xform[1]) || (needs_xform[1] && !needs_xform[0])){
10344 ret += "\tstruct "+generate_tuple_name(this->get_node_name())+" *tuple = ("+
10345 generate_tuple_name(this->get_node_name())+" *)(tup.data);\n";
10347 vector<field_entry *> flds = table_layout->get_fields();
10349 ret+="\tif(tup.channel == 0){\n";
10350 if(needs_xform[0] && !needs_xform[1]){
10352 for(f=0;f<flds.size();f++){
10354 data_type dt(flds[f]->get_type());
10355 if(dt.get_type() == v_str_t){
10356 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10358 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10360 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10363 if(dt.needs_hn_translation()){
10364 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10365 // f, dt.hton_translation().c_str(), f);
10371 ret += "\t\treturn;\n";
10373 ret.append("\t}\n");
10376 ret+="\tif(tup.channel == 1){\n";
10377 if(needs_xform[1] && !needs_xform[0]){
10379 for(f=0;f<flds.size();f++){
10381 data_type dt(flds[f]->get_type());
10382 if(dt.get_type() == v_str_t){
10383 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10385 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10387 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10390 if(dt.needs_hn_translation()){
10391 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10392 // f, dt.hton_translation().c_str(), f);
10398 ret += "\t\treturn;\n";
10400 ret.append("\t}\n");
10403 ret.append("};\n\n");
10405 // print_warnings() : tell the functor if the user wants to print warnings.
10406 ret += "bool print_warnings(){\n";
10407 if(definitions.count("print_warnings") && (
10408 definitions["print_warnings"] == "yes" ||
10409 definitions["print_warnings"] == "Yes" ||
10410 definitions["print_warnings"] == "YES" )) {
10411 ret += "return true;\n";
10413 ret += "return false;\n";
10415 ret.append("};\n\n");
10418 // Done with methods.
10425 string mrg_qpn::generate_operator(int i, string params){
10429 " merge_operator<" +
10430 generate_functor_name()+
10431 "> *op"+int_to_string(i)+" = new merge_operator<"+
10432 generate_functor_name()+
10433 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10437 " merge_operator_oop<" +
10438 generate_functor_name()+
10439 "> *op"+int_to_string(i)+" = new merge_operator_oop<"+
10440 generate_functor_name()+
10441 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10445 ////////////////////////////////////////////////
10446 /// WATCHLIST_TBL operator
10447 /// WATCHLIST_TBL functor
10448 ////////////////////////////////////////////
10450 string watch_tbl_qpn::generate_functor_name(){
10451 return("watch_tbl_functor_" + normalize_name(this->get_node_name()));
10454 string watch_tbl_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10456 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10459 string watch_tbl_qpn::generate_operator(int i, string params){
10460 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10463 /////////////////////////////////////////////////////////
10464 ////// JOIN_EQ_HASH functor
10467 string join_eq_hash_qpn::generate_functor_name(){
10468 return("join_eq_hash_functor_" + normalize_name(this->get_node_name()));
10471 string join_eq_hash_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10473 vector<data_type *> hashkey_dt; // data types in the hash key
10474 vector<data_type *> temporal_dt; // data types in the temporal key
10475 map<string,scalarexp_t *> l_equiv, r_equiv; // field equivalences
10476 set<int> pfcn_refs;
10477 col_id_set new_cids, local_cids;
10479 //--------------------------------
10482 string plus_op = "+";
10484 //--------------------------------
10485 // key definition class
10486 string ret = "class " + generate_functor_name() + "_keydef{\n";
10487 ret += "public:\n";
10488 // Collect attributes from hash join predicates.
10489 // ASSUME equality predicate.
10490 // Use the upwardly compatible data type
10491 // (infer from '+' operator if possible, else use left type)
10492 for(p=0;p<this->hash_eq.size();++p){
10493 scalarexp_t *lse = hash_eq[p]->pr->get_left_se();
10494 scalarexp_t *rse = hash_eq[p]->pr->get_right_se();
10495 data_type *hdt = new data_type(
10496 lse->get_data_type(), rse->get_data_type(), plus_op );
10497 if(hdt->get_type() == undefined_t){
10498 hashkey_dt.push_back(lse->get_data_type()->duplicate());
10501 hashkey_dt.push_back(hdt);
10503 sprintf(tmpstr,"hashkey_var%d",p);
10504 ret+="\t"+hashkey_dt[p]->make_host_cvar(tmpstr)+";\n";
10506 // find equivalences
10507 // NOTE: this code needs to be synched with the temporality
10508 // checking done at join_eq_hash_qpn::get_fields
10509 if(lse->get_operator_type()==SE_COLREF){
10510 l_equiv[lse->get_colref()->get_field()] = rse;
10512 if(rse->get_operator_type()==SE_COLREF){
10513 r_equiv[rse->get_colref()->get_field()] = lse;
10516 ret += "\tbool touched;\n";
10519 ret += "\t"+generate_functor_name() + "_keydef(){touched=false;};\n";
10521 ret += "\t~"+ generate_functor_name() + "_keydef(){\n";
10522 for(p=0;p<hashkey_dt.size();p++){
10523 if(hashkey_dt[p]->is_buffer_type()){
10524 sprintf(tmpstr,"\t\t%s(&hashkey_var%d);\n",
10525 hashkey_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10530 ret+="\tvoid touch(){touched = true;};\n";
10531 ret+="\tbool is_touched(){return touched;};\n";
10535 //--------------------------------
10536 // temporal equality definition class
10537 ret += "class " + generate_functor_name() + "_tempeqdef{\n";
10538 ret += "public:\n";
10539 // Collect attributes from hash join predicates.
10540 // ASSUME equality predicate.
10541 // Use the upwardly compatible date type
10542 // (infer from '+' operator if possible, else use left type)
10543 for(p=0;p<this->temporal_eq.size();++p){
10544 scalarexp_t *lse = temporal_eq[p]->pr->get_left_se();
10545 scalarexp_t *rse = temporal_eq[p]->pr->get_right_se();
10546 data_type *hdt = new data_type(
10547 lse->get_data_type(), rse->get_data_type(), plus_op );
10548 if(hdt->get_type() == undefined_t){
10549 temporal_dt.push_back(hash_eq[p]->pr->get_left_se()->get_data_type()->duplicate());
10552 temporal_dt.push_back(hdt);
10554 sprintf(tmpstr,"tempeq_var%d",p);
10555 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
10556 // find equivalences
10557 if(lse->get_operator_type()==SE_COLREF){
10558 l_equiv[lse->get_colref()->get_field()] = rse;
10560 if(rse->get_operator_type()==SE_COLREF){
10561 r_equiv[rse->get_colref()->get_field()] = lse;
10566 ret += "\t"+generate_functor_name() + "_tempeqdef(){};\n";
10568 ret += "\t~"+ generate_functor_name() + "_tempeqdef(){\n";
10569 for(p=0;p<temporal_dt.size();p++){
10570 if(temporal_dt[p]->is_buffer_type()){
10571 sprintf(tmpstr,"\t\t%s(&tempeq_var%d);\n",
10572 temporal_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10580 //--------------------------------
10581 // temporal eq, hash join functor class
10582 ret += "class " + this->generate_functor_name() + "{\n";
10584 // Find variables referenced in this query node.
10586 col_id_set cid_set;
10587 col_id_set::iterator csi;
10589 for(p=0;p<where.size();++p)
10590 gather_pr_col_ids(where[p]->pr,cid_set,NULL);
10591 for(s=0;s<select_list.size();s++)
10592 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
10594 // Private variables : store the state of the functor.
10595 // 1) variables for unpacked attributes
10596 // 2) offsets of the upacked attributes
10597 // 3) storage of partial functions
10598 // 4) storage of complex literals (i.e., require a constructor)
10600 ret += "private:\n";
10602 // var to save the schema handles
10603 ret += "\tint schema_handle0;\n";
10604 ret += "\tint schema_handle1;\n";
10606 // generate the declaration of all the variables related to
10607 // temp tuples generation
10608 ret += gen_decl_temp_vars();
10609 // tuple metadata offsets
10610 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10612 // unpacked attribute storage, offsets
10613 ret += "//\t\tstorage and offsets of accessed fields.\n";
10614 ret += generate_access_vars(cid_set, schema);
10617 // Variables to store results of partial functions.
10618 // WARNING find_partial_functions modifies the SE
10619 // (it marks the partial function id).
10620 ret += "//\t\tParital function result storage\n";
10621 vector<scalarexp_t *> partial_fcns;
10622 vector<int> fcn_ref_cnt;
10623 vector<bool> is_partial_fcn;
10624 for(s=0;s<select_list.size();s++){
10625 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
10627 for(p=0;p<where.size();p++){
10628 find_partial_fcns_pr(where[p]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
10630 if(partial_fcns.size()>0){
10631 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
10632 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10635 // Complex literals (i.e., they need constructors)
10636 ret += "//\t\tComplex literal storage.\n";
10637 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10638 ret += generate_complex_lit_vars(complex_literals);
10639 // We need the following to handle strings in outer joins.
10640 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10641 ret += "\tstruct vstring EmptyString;\n";
10642 ret += "\tstruct hfta_ipv6_str EmptyIp6;\n";
10644 // Pass-by-handle parameters
10645 ret += "//\t\tPass-by-handle storage.\n";
10646 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10647 ret += generate_pass_by_handle_vars(param_handle_table);
10650 // variables to hold parameters.
10651 ret += "//\tfor query parameters\n";
10652 ret += generate_param_vars(param_tbl);
10655 ret += "\npublic:\n";
10656 //-------------------
10657 // The functor constructor
10658 // pass in the schema handle.
10659 // 1) make assignments to the unpack offset variables
10660 // 2) initialize the complex literals
10662 ret += "//\t\tFunctor constructor.\n";
10663 ret += this->generate_functor_name()+"(int schema_handle0, int schema_handle1){\n";
10665 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
10666 ret += "\t\tthis->schema_handle1 = schema_handle1;\n";
10667 // metadata offsets
10668 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10669 ret += "\ttuple_metadata_offset1 = ftaschema_get_tuple_metadata_offset(schema_handle1);\n";
10672 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10673 ret += gen_access_var_init(cid_set);
10675 // complex literals
10676 ret += "//\t\tInitialize complex literals.\n";
10677 ret += gen_complex_lit_init(complex_literals);
10678 // Initialize EmptyString to the ... empty string
10679 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10680 literal_t mtstr_lit("");
10681 ret += "\t" + mtstr_lit.to_hfta_C_code("&EmptyString")+";\n";
10682 literal_t mip6_lit("0:0:0:0:0:0:0:0",LITERAL_IPV6);
10683 ret += "\t" + mip6_lit.to_hfta_C_code("&EmptyIp6")+";\n";
10685 // Initialize partial function results so they can be safely GC'd
10686 ret += gen_partial_fcn_init(partial_fcns);
10688 // Initialize non-query-parameter parameter handles
10689 ret += gen_pass_by_handle_init(param_handle_table);
10691 // Init temporal attributes referenced in select list
10692 ret += gen_init_temp_vars(schema, select_list, NULL);
10699 //-------------------
10700 // Functor destructor
10701 ret += "//\t\tFunctor destructor.\n";
10702 ret += "~"+this->generate_functor_name()+"(){\n";
10704 // clean up buffer type complex literals
10705 ret += gen_complex_lit_dtr(complex_literals);
10707 // Deregister the pass-by-handle parameters
10708 ret += "/* register and de-register the pass-by-handle parameters */\n";
10709 ret += gen_pass_by_handle_dtr(param_handle_table);
10711 // clean up partial function results.
10712 ret += "/* clean up partial function storage */\n";
10713 ret += gen_partial_fcn_dtr(partial_fcns);
10715 // Destroy the parameters, if any need to be destroyed
10716 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10721 //-------------------
10722 // Parameter manipulation routines
10723 ret += generate_load_param_block(this->generate_functor_name(),
10724 this->param_tbl,param_handle_table);
10725 ret += generate_delete_param_block(this->generate_functor_name(),
10726 this->param_tbl,param_handle_table);
10728 //-------------------
10729 // Register new parameter block
10731 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10732 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10733 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10738 //-------------------
10739 // The create_key method.
10740 // Perform heap allocation.
10741 // ASSUME : the LHS of the preds reference channel 0 attributes
10742 // NOTE : it may fail if a partial function fails.
10744 ret += this->generate_functor_name()+"_keydef *create_key(host_tuple &tup, bool &failed){\n";
10745 // Variables for execution of the function.
10746 ret+="\t"+this->generate_functor_name()+"_keydef *retval = NULL;\n";
10747 ret+="\tgs_int32_t problem = 0;\n";
10749 // Assume unsuccessful completion
10750 ret+= "\tfailed = true;\n";
10752 // Switch the processing based on the channel
10753 ret+="\tif(tup.channel == 0){\n";
10754 ret+="// ------------ processing for channel 0\n";
10755 ret+="\t\thost_tuple &tup0 = tup;\n";
10756 // Gather partial fcns and colids ref'd by this branch
10758 new_cids.clear(); local_cids.clear();
10759 for(p=0;p<hash_eq.size();p++){
10760 collect_partial_fcns(hash_eq[p]->pr->get_left_se(), pfcn_refs);
10761 gather_se_col_ids(hash_eq[p]->pr->get_left_se(),local_cids,NULL);
10764 // Start by cleaning up partial function results
10765 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10766 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10768 // Evaluate the partial functions
10769 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10770 new_cids, NULL, "NULL", needs_xform);
10771 // test passed -- unpack remaining cids.
10772 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10774 // Alloc and load a key object
10775 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10776 for(p=0;p<hash_eq.size();p++){
10777 data_type *hdt = hash_eq[p]->pr->get_left_se()->get_data_type();
10778 if(hdt->is_buffer_type()){
10779 string vname = "tmp_keyvar"+int_to_string(p);
10780 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_left_se(),schema)+";\n";
10781 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10783 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10784 p,generate_se_code(hash_eq[p]->pr->get_left_se(),schema).c_str() );
10788 ret += "\t}else{\n";
10790 ret+="// ------------ processing for channel 1\n";
10791 ret+="\t\thost_tuple &tup1 = tup;\n";
10792 // Gather partial fcns and colids ref'd by this branch
10794 new_cids.clear(); local_cids.clear();
10795 for(p=0;p<hash_eq.size();p++){
10796 collect_partial_fcns(hash_eq[p]->pr->get_right_se(), pfcn_refs);
10797 gather_se_col_ids(hash_eq[p]->pr->get_right_se(),local_cids,NULL);
10800 // Start by cleaning up partial function results
10801 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10802 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10804 // Evaluate the partial functions
10805 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10806 new_cids, NULL, "NULL", needs_xform);
10808 // test passed -- unpack remaining cids.
10809 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10811 // Alloc and load a key object
10812 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10813 for(p=0;p<hash_eq.size();p++){
10814 data_type *hdt = hash_eq[p]->pr->get_right_se()->get_data_type();
10815 if(hdt->is_buffer_type()){
10816 string vname = "tmp_keyvar"+int_to_string(p);
10817 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_right_se(),schema)+";\n";
10818 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10820 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10821 p,generate_se_code(hash_eq[p]->pr->get_right_se(),schema).c_str() );
10827 ret += "\tfailed = false;\n";
10828 ret += "\t return retval;\n";
10832 //-------------------
10833 // The load_ts method.
10834 // load into an allocated buffer.
10835 // ASSUME : the LHS of the preds reference channel 0 attributes
10836 // NOTE : it may fail if a partial function fails.
10837 // NOTE : cann't handle buffer attributes
10839 ret += "bool load_ts_from_tup("+this->generate_functor_name()+"_tempeqdef *ts, host_tuple &tup){\n";
10840 // Variables for execution of the function.
10841 ret+="\tgs_int32_t problem = 0;\n";
10843 // Switch the processing based on the channel
10844 ret+="\tif(tup.channel == 0){\n";
10845 ret+="// ------------ processing for channel 0\n";
10846 ret+="\t\thost_tuple &tup0 = tup;\n";
10848 // Gather partial fcns and colids ref'd by this branch
10850 new_cids.clear(); local_cids.clear();
10851 for(p=0;p<temporal_eq.size();p++){
10852 collect_partial_fcns(temporal_eq[p]->pr->get_left_se(), pfcn_refs);
10853 gather_se_col_ids(temporal_eq[p]->pr->get_left_se(),local_cids,NULL);
10856 // Start by cleaning up partial function results
10857 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10858 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10860 // Evaluate the partial functions
10861 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10862 new_cids, NULL, "false", needs_xform);
10864 // test passed -- unpack remaining cids.
10865 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10867 // load the temporal key object
10868 for(p=0;p<temporal_eq.size();p++){
10869 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10870 p,generate_se_code(temporal_eq[p]->pr->get_left_se(),schema).c_str() );
10874 ret += "\t}else{\n";
10876 ret+="// ------------ processing for channel 1\n";
10877 ret+="\t\thost_tuple &tup1 = tup;\n";
10879 // Gather partial fcns and colids ref'd by this branch
10881 new_cids.clear(); local_cids.clear();
10882 for(p=0;p<temporal_eq.size();p++){
10883 collect_partial_fcns(temporal_eq[p]->pr->get_right_se(), pfcn_refs);
10884 gather_se_col_ids(temporal_eq[p]->pr->get_right_se(),local_cids,NULL);
10887 // Start by cleaning up partial function results
10888 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10889 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10891 // Evaluate the partial functions
10892 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10893 new_cids, NULL, "false", needs_xform);
10895 // test passed -- unpack remaining cids.
10896 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10898 // load the key object
10899 for(p=0;p<temporal_eq.size();p++){
10900 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10901 p,generate_se_code(temporal_eq[p]->pr->get_right_se(),schema).c_str() );
10907 ret += "\t return true;\n";
10911 // ------------------------------
10913 // (i.e make a copy)
10915 ret += "bool load_ts_from_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10916 for(p=0;p<temporal_eq.size();p++){
10917 sprintf(tmpstr,"\tlts->tempeq_var%d = rts->tempeq_var%d;\n",p,p);
10922 // -------------------------------------
10923 // compare_ts_to_ts
10924 // There should be only one variable to compare.
10925 // If there is more, assume an arbitrary lexicographic order.
10927 ret += "int compare_ts_with_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10928 for(p=0;p<temporal_eq.size();p++){
10929 sprintf(tmpstr,"\tif(lts->tempeq_var%d < rts->tempeq_var%d) return(-1);\n",p,p);
10931 sprintf(tmpstr,"\tif(lts->tempeq_var%d > rts->tempeq_var%d) return(1);\n",p,p);
10934 ret += "\treturn(0);\n";
10937 // ------------------------------------------
10939 // apply the prefilter
10941 ret += "bool apply_prefilter(host_tuple &tup){\n";
10943 // Variables for this procedure
10944 ret+="\tgs_int32_t problem = 0;\n";
10945 ret+="\tgs_retval_t retval;\n";
10947 // Switch the processing based on the channel
10948 ret+="\tif(tup.channel == 0){\n";
10949 ret+="// ------------ processing for channel 0\n";
10950 ret+="\t\thost_tuple &tup0 = tup;\n";
10951 // Gather partial fcns and colids ref'd by this branch
10953 new_cids.clear(); local_cids.clear();
10954 for(p=0;p<prefilter[0].size();p++){
10955 collect_partial_fcns_pr((prefilter[0])[p]->pr, pfcn_refs);
10958 // Start by cleaning up partial function results
10959 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10960 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10962 for(p=0;p<(prefilter[0]).size();++p){
10963 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10965 // Find the set of variables accessed in this CNF elem,
10966 // but in no previous element.
10967 col_id_set new_pr_cids;
10968 get_new_pred_cids((prefilter[0])[p]->pr,local_cids,new_pr_cids, NULL);
10969 // Unpack these values.
10970 ret += gen_unpack_cids(schema, new_pr_cids, "false", needs_xform);
10971 // Find partial fcns ref'd in this cnf element
10972 set<int> pr_pfcn_refs;
10973 collect_partial_fcns_pr((prefilter[0])[p]->pr, pr_pfcn_refs);
10974 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10976 ret += "\t\tif( !("+generate_predicate_code((prefilter[0])[p]->pr,schema)+") ) return(false);\n";
10978 ret += "\t}else{\n";
10979 ret+="// ------------ processing for channel 1\n";
10980 ret+="\t\thost_tuple &tup1 = tup;\n";
10981 // Gather partial fcns and colids ref'd by this branch
10983 new_cids.clear(); local_cids.clear();
10984 for(p=0;p<prefilter[1].size();p++){
10985 collect_partial_fcns_pr((prefilter[1])[p]->pr, pfcn_refs);
10988 // Start by cleaning up partial function results
10989 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10990 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10992 for(p=0;p<(prefilter[1]).size();++p){
10993 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10995 // Find the set of variables accessed in this CNF elem,
10996 // but in no previous element.
10997 col_id_set pr_new_cids;
10998 get_new_pred_cids((prefilter[1])[p]->pr,local_cids, pr_new_cids, NULL);
10999 // Unpack these values.
11000 ret += gen_unpack_cids(schema, pr_new_cids, "false", needs_xform);
11001 // Find partial fcns ref'd in this cnf element
11002 set<int> pr_pfcn_refs;
11003 collect_partial_fcns_pr((prefilter[1])[p]->pr, pr_pfcn_refs);
11004 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
11006 ret += "\t\tif( !("+generate_predicate_code((prefilter[1])[p]->pr,schema)+ ") ) return(false);\n";
11010 ret+="\treturn true;\n";
11014 // -------------------------------------
11015 // create_output_tuple
11016 // If the postfilter on the pair of tuples passes,
11017 // create an output tuple from the combined information.
11018 // (Plus, outer join processing)
11020 ret += "host_tuple create_output_tuple(const host_tuple &tup0, const host_tuple &tup1, bool &failed){\n";
11022 ret += "\thost_tuple tup;\n";
11023 ret += "\tfailed = true;\n";
11024 ret += "\tgs_retval_t retval = 0;\n";
11025 ret += "\tgs_int32_t problem = 0;\n";
11027 // Start by cleaning up partial function results
11028 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11030 new_cids.clear(); local_cids.clear();
11031 for(p=0;p<postfilter.size();p++){
11032 collect_partial_fcns_pr(postfilter[p]->pr, pfcn_refs);
11034 for(s=0;s<select_list.size();s++){
11035 collect_partial_fcns(select_list[s]->se, pfcn_refs);
11037 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
11040 ret+="\tif(tup0.data && tup1.data){\n";
11041 // Evaluate the postfilter
11042 new_cids.clear(); local_cids.clear();
11043 for(p=0;p<postfilter.size();p++){
11044 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
11046 // Find the set of variables accessed in this CNF elem,
11047 // but in no previous element.
11048 col_id_set pr_new_cids;
11049 get_new_pred_cids(postfilter[p]->pr,local_cids, pr_new_cids, NULL);
11050 // Unpack these values.
11051 ret += gen_unpack_cids(schema, pr_new_cids, "tup", needs_xform);
11052 // Find partial fcns ref'd in this cnf element
11053 set<int> pr_pfcn_refs;
11054 collect_partial_fcns_pr(postfilter[p]->pr, pr_pfcn_refs);
11055 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"tup");
11057 ret += "\t\tif( !("+generate_predicate_code(postfilter[p]->pr,schema)+ ") ) return(tup);\n";
11061 // postfilter passed, evaluate partial functions for select list
11064 col_id_set se_cids;
11065 for(s=0;s<select_list.size();s++){
11066 collect_partial_fcns(select_list[s]->se, sl_pfcns);
11069 if(sl_pfcns.size() > 0)
11070 ret += "//\t\tUnpack remaining partial fcns.\n";
11071 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
11072 local_cids, NULL, "tup", needs_xform);
11074 // Unpack remaining fields
11075 ret += "//\t\tunpack any remaining fields from the input tuples.\n";
11076 for(s=0;s<select_list.size();s++)
11077 get_new_se_cids(select_list[s]->se, local_cids,se_cids,NULL);
11078 ret += gen_unpack_cids(schema, se_cids,"tup", needs_xform);
11081 // Deal with outer join stuff
11082 col_id_set l_cids, r_cids;
11083 col_id_set l_base_cids, r_base_cids; // l_cids and r_cids get modified
11084 // to account for extra_f fields to
11085 // unpack for value imputation
11086 col_id_set::iterator ocsi;
11087 for(ocsi=local_cids.begin();ocsi!=local_cids.end();++ocsi){
11088 if((*ocsi).tblvar_ref == 0){
11089 l_cids.insert((*ocsi)); l_base_cids.insert((*ocsi));
11091 r_cids.insert((*ocsi)); r_base_cids.insert((*ocsi));
11094 for(ocsi=se_cids.begin();ocsi!=se_cids.end();++ocsi){
11095 if((*ocsi).tblvar_ref == 0){
11096 l_cids.insert((*ocsi)); l_base_cids.insert((*ocsi));
11098 r_cids.insert((*ocsi)); r_base_cids.insert((*ocsi));
11102 ret += "\t}else if(tup0.data){\n";
11103 string unpack_null = ""; col_id_set extra_cids;
11104 for(ocsi=r_base_cids.begin();ocsi!=r_base_cids.end();++ocsi){
11105 string field = (*ocsi).field;
11106 if(r_equiv.count(field)){
11107 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+"; // r_equiv\n";
11108 get_new_se_cids(r_equiv[field],l_cids,new_cids,NULL);
11110 int schref = (*ocsi).schema_ref;
11111 data_type dt(schema->get_type_name(schref,field));
11112 literal_t empty_lit(dt.type_indicator());
11113 if(empty_lit.is_cpx_lit()){
11114 // sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
11115 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11116 // NB : works for string type only
11117 // NNB: installed fix for ipv6, more of this should be pushed
11118 // into the literal_t code.
11119 unpack_null+="\t\tunpack_var_"+field+"_1= "+empty_lit.hfta_empty_literal_name()+"; // empty\n";
11121 unpack_null+="\t\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+"; // empty\n";
11125 ret += "// l_cids\n";
11126 ret += gen_unpack_cids(schema, l_cids, "tup", needs_xform);
11127 ret += "// extra_cids\n";
11128 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11129 ret += unpack_null;
11130 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11133 unpack_null = ""; extra_cids.clear(); new_cids.clear();
11134 for(ocsi=l_base_cids.begin();ocsi!=l_base_cids.end();++ocsi){
11135 string field = (*ocsi).field;
11136 if(l_equiv.count(field)){
11137 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+"; // l_equiv\n";
11138 get_new_se_cids(l_equiv[field],r_cids,new_cids,NULL);
11140 int schref = (*ocsi).schema_ref;
11141 data_type dt(schema->get_type_name(schref,field));
11142 literal_t empty_lit(dt.type_indicator());
11143 if(empty_lit.is_cpx_lit()){
11144 // sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
11145 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11146 // NB : works for string type only
11147 // NNB: installed fix for ipv6, more of this should be pushed
11148 // into the literal_t code.
11149 unpack_null+="\t\tunpack_var_"+field+"_0= "+empty_lit.hfta_empty_literal_name()+"; // empty\n";
11151 unpack_null+="\t\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+"; // empty\n";
11155 ret += "// r_cids\n";
11156 ret += gen_unpack_cids(schema, r_cids, "tup", needs_xform);
11157 ret += "// extra_cids\n";
11158 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11159 ret += unpack_null;
11160 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11165 // Unpack any BUFFER type selections into temporaries
11166 // so that I can compute their size and not have
11167 // to recompute their value during tuple packing.
11168 // I can use regular assignment here because
11169 // these temporaries are non-persistent.
11171 ret += "//\t\tCompute the size of the tuple.\n";
11172 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
11174 // Unpack all buffer type selections, to be able to compute their size
11175 ret += gen_buffer_selvars(schema, select_list);
11177 // The size of the tuple is the size of the tuple struct plus the
11178 // size of the buffers to be copied in.
11180 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
11181 ret += gen_buffer_selvars_size(select_list,schema);
11184 // Allocate tuple data block.
11185 ret += "//\t\tCreate the tuple block.\n";
11186 ret += "\ttup.data = malloc(tup.tuple_size);\n";
11187 ret += "\ttup.heap_resident = true;\n";
11188 // ret += "\ttup.channel = 0;\n";
11190 // Mark tuple as regular
11191 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
11194 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
11195 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
11198 // (Here, offsets are hard-wired. is this a problem?)
11200 ret += "//\t\tPack the fields into the tuple.\n";
11201 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
11203 // Delete string temporaries
11204 ret += gen_buffer_selvars_dtr(select_list);
11206 ret += "\tfailed = false;\n";
11207 ret += "\treturn tup;\n";
11212 //-----------------------------
11213 // Method for checking whether tuple is temporal
11215 ret += "bool temp_status_received(host_tuple &tup){\n";
11217 // Switch the processing based on the channel
11218 ret+="\tif(tup.channel == 0){\n";
11219 ret+="\t\thost_tuple &tup0 = tup;\n";
11220 ret += gen_temp_tuple_check(this->node_name, 0);
11221 ret += "\t}else{\n";
11222 ret+="\t\thost_tuple &tup1 = tup;\n";
11223 ret += gen_temp_tuple_check(this->node_name, 1);
11225 ret += "\treturn temp_tuple_received;\n};\n\n";
11228 //-------------------------------------------------------------------
11229 // Temporal update functions
11232 // create a temp status tuple
11233 ret += "int create_temp_status_tuple("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts, host_tuple& result) {\n\n";
11235 ret += "\tgs_retval_t retval = 0;\n";
11236 ret += "\tgs_int32_t problem = 0;\n";
11238 for(p=0;p<temporal_dt.size();p++){
11239 sprintf(tmpstr,"lhs_var");
11240 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
11241 sprintf(tmpstr,"rhs_var");
11242 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
11245 ret += "\tif(lts!=NULL){\n";
11246 for(p=0;p<temporal_dt.size();p++){
11247 ret += "\t\tlhs_var = lts->tempeq_var"+to_string(p)+";\n";
11249 ret += "\t}else{\n";
11250 for(p=0;p<temporal_dt.size();p++){
11251 ret += "\t\tlhs_var = 0;\n";
11255 ret += "\tif(rts!=NULL){\n";
11256 for(p=0;p<temporal_dt.size();p++){
11257 ret += "\t\trhs_var = rts->tempeq_var"+to_string(p)+";\n";
11259 ret += "\t}else{\n";
11260 for(p=0;p<temporal_dt.size();p++){
11261 ret += "\t\trhs_var = 0;\n";
11265 ret += gen_init_temp_status_tuple(this->get_node_name());
11270 // This is checked in the query analyzer so I think its safe,
11271 // But a lot of older code has complex code to propagate multiple
11273 for(s=0;s<select_list.size();s++){
11274 scalarexp_t *se = select_list[s]->se;
11275 data_type *sdt = se->get_data_type();
11276 if(sdt->is_temporal()){
11277 string target = "\ttuple->tuple_var"+to_string(s)+" = ";
11278 if(from[0]->get_property()==0 && from[1]->get_property()==0){ // INNER
11279 ret += target+"(lhs_var>rhs_var ? lhs_var : rhs_var); // INNER\n";
11281 if(from[0]->get_property()!=0 && from[1]->get_property()==0){ // LEFT
11282 ret += target+"lhs_var; // LEFT\n";
11283 // ret += target+"rhs_var; // LEFT\n";
11285 if(from[0]->get_property()==0 && from[1]->get_property()!=0){ // RIGHT
11286 ret += target+"rhs_var; // RIGHT\n";
11287 // ret += target+"lhs_var; // RIGHT\n";
11289 if(from[0]->get_property()!=0 && from[1]->get_property()!=0){ // OUTER
11290 ret += target+"(lhs_var<rhs_var ? lhs_var : rhs_var); // OUTER\n";
11296 ret += "\treturn 0;\n";
11302 //----------------------------------------------------------
11303 // The hash function
11305 ret += "struct "+generate_functor_name()+"_hash_func{\n";
11306 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
11307 "_keydef *key) const{\n";
11308 ret += "\t\treturn( (";
11309 if(hashkey_dt.size() > 0){
11310 for(p=0;p<hashkey_dt.size();p++){
11311 if(p>0) ret += "^";
11312 if(hashkey_dt[p]->use_hashfunc()){
11313 // sprintf(tmpstr,"%s(&(key->hashkey_var%d))",hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11314 if(hashkey_dt[p]->is_buffer_type())
11315 sprintf(tmpstr,"(%s*%s(&(key->hashkey_var%d)))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11317 sprintf(tmpstr,"(%s*%s(key->hashkey_var%d))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11319 sprintf(tmpstr,"(%s*key->hashkey_var%d)",hash_nums[p%NRANDS].c_str(),p);
11326 ret += ") >> 32);\n";
11330 //----------------------------------------------------------
11331 // The comparison function
11333 ret += "struct "+generate_functor_name()+"_equal_func{\n";
11334 ret += "\tbool operator()(const "+generate_functor_name()+"_keydef *key1, "+
11335 generate_functor_name()+"_keydef *key2) const{\n";
11336 ret += "\t\treturn( (";
11337 if(hashkey_dt.size() > 0){
11338 for(p=0;p<hashkey_dt.size();p++){
11339 if(p>0) ret += ") && (";
11340 if(hashkey_dt[p]->complex_comparison(hashkey_dt[p])){
11341 if(hashkey_dt[p]->is_buffer_type())
11342 sprintf(tmpstr,"(%s(&(key1->hashkey_var%d), &(key2->hashkey_var%d))==0)",
11343 hashkey_dt[p]->get_hfta_equals_fcn(hashkey_dt[p]).c_str(),p,p);
11345 sprintf(tmpstr,"(%s((key1->hashkey_var%d), (key2->hashkey_var%d))==0)",
11346 hashkey_dt[p]->get_hfta_equals_fcn(hashkey_dt[p]).c_str(),p,p);
11348 sprintf(tmpstr,"key1->hashkey_var%d == key2->hashkey_var%d",p,p);
11365 string join_eq_hash_qpn::generate_operator(int i, string params){
11368 " join_eq_hash_operator<" +
11369 generate_functor_name()+ ","+
11370 generate_functor_name() + "_tempeqdef,"+
11371 generate_functor_name() + "_keydef,"+
11372 generate_functor_name()+"_hash_func,"+
11373 generate_functor_name()+"_equal_func"
11374 "> *op"+int_to_string(i)+" = new join_eq_hash_operator<"+
11375 generate_functor_name()+","+
11376 generate_functor_name() + "_tempeqdef,"+
11377 generate_functor_name() + "_keydef,"+
11378 generate_functor_name()+"_hash_func,"+
11379 generate_functor_name()+"_equal_func"
11381 int_to_string(from[0]->get_property()+2*from[1]->get_property())+", \"" + get_node_name() +
11388 ////////////////////////////////////////////////////////////////
11389 //// SGAHCWCB functor
11393 string sgahcwcb_qpn::generate_functor_name(){
11394 return("sgahcwcb_functor_" + normalize_name(this->get_node_name()));
11398 string sgahcwcb_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
11402 // Initialize generate utility globals
11403 segen_gb_tbl = &(gb_tbl);
11406 //--------------------------------
11407 // group definition class
11408 string ret = "class " + generate_functor_name() + "_groupdef{\n";
11409 ret += "public:\n";
11410 ret += "\tbool valid;\n";
11411 for(g=0;g<this->gb_tbl.size();g++){
11412 sprintf(tmpstr,"gb_var%d",g);
11413 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11416 ret += "\t"+generate_functor_name() + "_groupdef(){valid=true;};\n";
11417 ret += "\t"+generate_functor_name() + "_groupdef("+
11418 this->generate_functor_name() + "_groupdef *gd){\n";
11419 for(g=0;g<gb_tbl.size();g++){
11420 data_type *gdt = gb_tbl.get_data_type(g);
11421 if(gdt->is_buffer_type()){
11422 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
11423 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
11426 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
11430 ret += "\tvalid=true;\n";
11433 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
11434 for(g=0;g<gb_tbl.size();g++){
11435 data_type *gdt = gb_tbl.get_data_type(g);
11436 if(gdt->is_buffer_type()){
11437 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
11438 gdt->get_hfta_buffer_destroy().c_str(), g );
11445 //--------------------------------
11446 // aggr definition class
11447 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
11448 ret += "public:\n";
11449 for(a=0;a<aggr_tbl.size();a++){
11450 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11451 sprintf(tmpstr,"aggr_var%d",a);
11452 if(aggr_tbl.is_builtin(a))
11453 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11455 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11458 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
11460 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
11461 for(a=0;a<aggr_tbl.size();a++){
11462 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11463 if(aggr_tbl.is_builtin(a)){
11464 data_type *adt = aggr_tbl.get_data_type(a);
11465 if(adt->is_buffer_type()){
11466 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11467 adt->get_hfta_buffer_destroy().c_str(), a );
11471 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11472 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11473 ret+="(aggr_var"+int_to_string(a)+"));\n";
11479 //--------------------------------
11480 // superaggr definition class
11481 ret += "class " + this->generate_functor_name() + "_statedef{\n";
11482 ret += "public:\n";
11483 for(a=0;a<aggr_tbl.size();a++){
11484 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11485 if(ate->is_superaggr()){
11486 sprintf(tmpstr,"aggr_var%d",a);
11487 if(aggr_tbl.is_builtin(a))
11488 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11490 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11493 set<string>::iterator ssi;
11494 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11495 string state_nm = (*ssi);
11496 int state_id = Ext_fcns->lookup_state(state_nm);
11497 data_type *dt = Ext_fcns->get_storage_dt(state_id);
11498 string state_var = "state_var_"+state_nm;
11499 ret += "\t"+dt->make_host_cvar(state_var)+";\n";
11502 ret += "\t"+this->generate_functor_name() + "_statedef(){};\n";
11504 ret += "\t~"+this->generate_functor_name() + "_statedef(){\n";
11505 for(a=0;a<aggr_tbl.size();a++){
11506 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11507 if(ate->is_superaggr()){
11508 if(aggr_tbl.is_builtin(a)){
11509 data_type *adt = aggr_tbl.get_data_type(a);
11510 if(adt->is_buffer_type()){
11511 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11512 adt->get_hfta_buffer_destroy().c_str(), a );
11516 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11517 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11518 ret+="(aggr_var"+int_to_string(a)+"));\n";
11522 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11523 string state_nm = (*ssi);
11524 int state_id = Ext_fcns->lookup_state(state_nm);
11525 string state_var = "state_var_"+state_nm;
11526 ret += "\t_sfun_state_destroy_"+state_nm+"(&"+state_var+");\n";
11533 //--------------------------------
11534 // gb functor class
11535 ret += "class " + this->generate_functor_name() + "{\n";
11537 // Find variables referenced in this query node.
11539 col_id_set cid_set;
11540 col_id_set::iterator csi;
11542 for(w=0;w<where.size();++w)
11543 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
11544 for(w=0;w<having.size();++w)
11545 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
11546 for(w=0;w<cleanby.size();++w)
11547 gather_pr_col_ids(cleanby[w]->pr,cid_set,segen_gb_tbl);
11548 for(w=0;w<cleanwhen.size();++w)
11549 gather_pr_col_ids(cleanwhen[w]->pr,cid_set,segen_gb_tbl);
11550 for(g=0;g<gb_tbl.size();g++)
11551 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
11553 for(s=0;s<select_list.size();s++){
11554 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
11558 // Private variables : store the state of the functor.
11559 // 1) variables for unpacked attributes
11560 // 2) offsets of the upacked attributes
11561 // 3) storage of partial functions
11562 // 4) storage of complex literals (i.e., require a constructor)
11564 ret += "private:\n";
11566 // var to save the schema handle
11567 ret += "\tint schema_handle0;\n";
11569 // generate the declaration of all the variables related to
11570 // temp tuples generation
11571 ret += gen_decl_temp_vars();
11573 // unpacked attribute storage, offsets
11574 ret += "//\t\tstorage and offsets of accessed fields.\n";
11575 ret += generate_access_vars(cid_set, schema);
11576 // tuple metadata offset
11577 ret += "\ttuple_metadata_offset0;\n";
11579 // Variables to store results of partial functions.
11580 // WARNING find_partial_functions modifies the SE
11581 // (it marks the partial function id).
11582 ret += "//\t\tParital function result storage\n";
11583 vector<scalarexp_t *> partial_fcns;
11584 vector<int> fcn_ref_cnt;
11585 vector<bool> is_partial_fcn;
11586 for(s=0;s<select_list.size();s++){
11587 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
11589 for(w=0;w<where.size();w++){
11590 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11592 for(w=0;w<having.size();w++){
11593 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11595 for(w=0;w<cleanby.size();w++){
11596 find_partial_fcns_pr(cleanby[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11598 for(w=0;w<cleanwhen.size();w++){
11599 find_partial_fcns_pr(cleanwhen[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11601 for(g=0;g<gb_tbl.size();g++){
11602 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
11604 for(a=0;a<aggr_tbl.size();a++){
11605 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
11607 if(partial_fcns.size()>0){
11608 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
11609 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
11612 // Complex literals (i.e., they need constructors)
11613 ret += "//\t\tComplex literal storage.\n";
11614 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
11615 ret += generate_complex_lit_vars(complex_literals);
11617 // Pass-by-handle parameters
11618 ret += "//\t\tPass-by-handle storage.\n";
11619 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
11620 ret += generate_pass_by_handle_vars(param_handle_table);
11622 // Create cached temporaries for UDAF return values.
11623 ret += "//\t\tTemporaries for UDAF return values.\n";
11624 for(a=0;a<aggr_tbl.size();a++){
11625 if(! aggr_tbl.is_builtin(a)){
11626 int afcn_id = aggr_tbl.get_fcn_id(a);
11627 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11628 sprintf(tmpstr,"udaf_ret_%d", a);
11629 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11635 // variables to hold parameters.
11636 ret += "//\tfor query parameters\n";
11637 ret += generate_param_vars(param_tbl);
11639 // Is there a temporal flush? If so create flush temporaries,
11640 // create flush indicator.
11641 bool uses_temporal_flush = false;
11642 for(g=0;g<gb_tbl.size();g++){
11643 data_type *gdt = gb_tbl.get_data_type(g);
11644 if(gdt->is_temporal())
11645 uses_temporal_flush = true;
11648 if(uses_temporal_flush){
11649 ret += "//\t\tFor temporal flush\n";
11650 for(g=0;g<gb_tbl.size();g++){
11651 data_type *gdt = gb_tbl.get_data_type(g);
11652 if(gdt->is_temporal()){
11653 sprintf(tmpstr,"last_gb%d",g);
11654 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11655 sprintf(tmpstr,"last_flushed_gb%d",g);
11656 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11659 ret += "\tbool needs_temporal_flush;\n";
11662 // The publicly exposed functions
11664 ret += "\npublic:\n";
11667 //-------------------
11668 // The functor constructor
11669 // pass in the schema handle.
11670 // 1) make assignments to the unpack offset variables
11671 // 2) initialize the complex literals
11673 ret += "//\t\tFunctor constructor.\n";
11674 ret += this->generate_functor_name()+"(int schema_handle0){\n";
11676 // save the schema handle
11677 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
11678 // tuple metadata offset
11679 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
11682 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
11683 ret += gen_access_var_init(cid_set);
11685 // aggregate return vals : refd in both final_sample
11686 // and create_output_tuple
11687 // Create cached temporaries for UDAF return values.
11688 for(a=0;a<aggr_tbl.size();a++){
11689 if(! aggr_tbl.is_builtin(a)){
11690 int afcn_id = aggr_tbl.get_fcn_id(a);
11691 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11692 sprintf(tmpstr,"udaf_ret_%d", a);
11693 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11697 // complex literals
11698 ret += "//\t\tInitialize complex literals.\n";
11699 ret += gen_complex_lit_init(complex_literals);
11701 // Initialize partial function results so they can be safely GC'd
11702 ret += gen_partial_fcn_init(partial_fcns);
11704 // Initialize non-query-parameter parameter handles
11705 ret += gen_pass_by_handle_init(param_handle_table);
11707 // temporal flush variables
11708 // ASSUME that structured values won't be temporal.
11709 if(uses_temporal_flush){
11710 ret += "//\t\tInitialize temporal flush variables.\n";
11711 for(g=0;g<gb_tbl.size();g++){
11712 data_type *gdt = gb_tbl.get_data_type(g);
11713 if(gdt->is_temporal()){
11714 literal_t gl(gdt->type_indicator());
11715 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
11716 ret.append(tmpstr);
11719 ret += "\tneeds_temporal_flush = false;\n";
11722 // Init temporal attributes referenced in select list
11723 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
11728 //-------------------
11729 // Functor destructor
11730 ret += "//\t\tFunctor destructor.\n";
11731 ret += "~"+this->generate_functor_name()+"(){\n";
11733 // clean up buffer type complex literals
11734 ret += gen_complex_lit_dtr(complex_literals);
11736 // Deregister the pass-by-handle parameters
11737 ret += "/* register and de-register the pass-by-handle parameters */\n";
11738 ret += gen_pass_by_handle_dtr(param_handle_table);
11740 // clean up partial function results.
11741 ret += "/* clean up partial function storage */\n";
11742 ret += gen_partial_fcn_dtr(partial_fcns);
11744 // Destroy the parameters, if any need to be destroyed
11745 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11750 //-------------------
11751 // Parameter manipulation routines
11752 ret += generate_load_param_block(this->generate_functor_name(),
11753 this->param_tbl,param_handle_table);
11754 ret += generate_delete_param_block(this->generate_functor_name(),
11755 this->param_tbl,param_handle_table);
11757 //-------------------
11758 // Register new parameter block
11760 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
11761 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11762 ret += "\treturn this->load_params_"+this->generate_functor_name()+
11766 //-------------------
11767 // the create_group method.
11768 // This method creates a group in a buffer passed in
11769 // (to allow for creation on the stack).
11770 // There are also a couple of side effects:
11771 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11772 // 2) determine if a temporal flush is required.
11774 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
11775 // Variables for execution of the function.
11776 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11778 if(partial_fcns.size()>0){ // partial fcn access failure
11779 ret += "\tgs_retval_t retval = 0;\n";
11783 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
11784 "_groupdef *) buffer;\n";
11786 // Start by cleaning up partial function results
11787 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11789 set<int> gb_pfcns; // partial fcns in gbdefs, aggr se's
11790 for(g=0;g<gb_tbl.size();g++){
11791 collect_partial_fcns(gb_tbl.get_def(g), gb_pfcns);
11793 ret += gen_partial_fcn_dtr(partial_fcns,gb_pfcns);
11794 // ret += gen_partial_fcn_dtr(partial_fcns);
11797 ret += gen_temp_tuple_check(this->node_name, 0);
11798 col_id_set found_cids; // colrefs unpacked thus far.
11799 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
11803 // Save temporal group-by variables
11806 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
11808 for(g=0;g<gb_tbl.size();g++){
11810 data_type *gdt = gb_tbl.get_data_type(g);
11812 if(gdt->is_temporal()){
11813 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11814 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11815 ret.append(tmpstr);
11822 // Compare the temporal GB vars with the stored ones,
11823 // set flush indicator and update stored GB vars if there is any change.
11825 if(uses_temporal_flush){
11826 ret+= "\tif( !( (";
11827 bool first_one = true;
11828 for(g=0;g<gb_tbl.size();g++){
11829 data_type *gdt = gb_tbl.get_data_type(g);
11831 if(gdt->is_temporal()){
11832 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
11833 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
11834 if(first_one){first_one = false;} else {ret += ") && (";}
11835 ret += generate_equality_test(lhs_op, rhs_op, gdt);
11839 for(g=0;g<gb_tbl.size();g++){
11840 data_type *gdt = gb_tbl.get_data_type(g);
11841 if(gdt->is_temporal()){
11842 if(gdt->is_buffer_type()){
11843 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
11845 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
11847 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
11853 if(uses_temporal_flush){
11854 for(g=0;g<gb_tbl.size();g++){
11855 data_type *gdt = gb_tbl.get_data_type(g);
11856 if(gdt->is_temporal()){
11857 ret+="if(last_flushed_gb"+int_to_string(g)+">0)\n";
11863 ret += "\t\tneeds_temporal_flush=true;\n";
11864 ret += "\t\t}else{\n"
11865 "\t\t\tneeds_temporal_flush=false;\n"
11870 // For temporal status tuple we don't need to do anything else
11871 ret += "\tif (temp_tuple_received) return NULL;\n\n";
11874 // The partial functions ref'd in the group-by var
11875 // definitions must be evaluated. If one returns false,
11876 // then implicitly the predicate is false.
11877 set<int>::iterator pfsi;
11879 if(gb_pfcns.size() > 0)
11880 ret += "//\t\tUnpack partial fcns.\n";
11881 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, gb_pfcns,
11882 found_cids, segen_gb_tbl, "NULL", needs_xform);
11884 // Unpack the group-by variables
11886 for(g=0;g<gb_tbl.size();g++){
11887 // Find the new fields ref'd by this GBvar def.
11888 col_id_set new_cids;
11889 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
11890 // Unpack these values.
11891 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
11893 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11894 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11896 // There seems to be no difference between the two
11897 // branches of the IF statement.
11898 data_type *gdt = gb_tbl.get_data_type(g);
11899 if(gdt->is_buffer_type()){
11900 // Create temporary copy.
11901 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11902 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11904 scalarexp_t *gse = gb_tbl.get_def(g);
11905 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11906 g,generate_se_code(gse,schema).c_str());
11909 ret.append(tmpstr);
11914 ret+= "\treturn gbval;\n";
11919 //-------------------
11920 // the create_group method.
11921 // This method creates a group in a buffer passed in
11922 // (to allow for creation on the stack).
11923 // There are also a couple of side effects:
11924 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11925 // 2) determine if a temporal flush is required.
11927 ret += "bool evaluate_predicate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, int cd){\n";
11928 // Variables for execution of the function.
11929 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11931 if(partial_fcns.size()>0){ // partial fcn access failure
11932 ret += "\tgs_retval_t retval = 0;\n";
11936 // Start by cleaning up partial function results
11937 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11938 set<int> w_pfcns; // partial fcns in where clause
11939 for(w=0;w<where.size();++w)
11940 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
11942 set<int> ag_pfcns; // partial fcns in gbdefs, aggr se's
11943 for(a=0;a<aggr_tbl.size();a++){
11944 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_pfcns);
11946 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
11947 ret += gen_partial_fcn_dtr(partial_fcns,ag_pfcns);
11949 ret+="//\t\tEvaluate clauses which don't reference stateful fcns first \n";
11950 for(w=0;w<where.size();++w){
11951 if(! pred_refs_sfun(where[w]->pr)){
11952 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11954 // Find the set of variables accessed in this CNF elem,
11955 // but in no previous element.
11956 col_id_set new_cids;
11957 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11959 // Unpack these values.
11960 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11961 // Find partial fcns ref'd in this cnf element
11962 set<int> pfcn_refs;
11963 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11964 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11966 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11967 +") ) return(false);\n";
11972 // The partial functions ref'd in the and aggregate
11973 // definitions must also be evaluated. If one returns false,
11974 // then implicitly the predicate is false.
11975 // ASSUME that aggregates cannot reference stateful fcns.
11977 if(ag_pfcns.size() > 0)
11978 ret += "//\t\tUnpack remaining partial fcns.\n";
11979 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_pfcns,
11980 found_cids, segen_gb_tbl, "false", needs_xform);
11982 ret+="//\t\tEvaluate all remaining where clauses.\n";
11983 ret+="\tbool retval = true;\n";
11984 for(w=0;w<where.size();++w){
11985 if( pred_refs_sfun(where[w]->pr)){
11986 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11988 // Find the set of variables accessed in this CNF elem,
11989 // but in no previous element.
11990 col_id_set new_cids;
11991 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11993 // Unpack these values.
11994 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11995 // Find partial fcns ref'd in this cnf element
11996 set<int> pfcn_refs;
11997 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11998 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
12000 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
12001 +") ) retval = false;\n";
12005 ret+="// Unpack all remaining attributes\n";
12006 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
12008 ret += "\n\treturn retval;\n";
12011 //--------------------------------------------------------
12012 // Create and initialize an aggregate object
12014 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, gs_sp_t a,"+generate_functor_name()+"_statedef *stval, int cd){\n";
12015 // Variables for execution of the function.
12016 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12019 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+ "_aggrdef *)a;\n";
12021 for(a=0;a<aggr_tbl.size();a++){
12022 if(aggr_tbl.is_builtin(a)){
12023 // Create temporaries for buffer return values
12024 data_type *adt = aggr_tbl.get_data_type(a);
12025 if(adt->is_buffer_type()){
12026 sprintf(tmpstr,"aggr_tmp_%d", a);
12027 ret+=adt->make_host_cvar(tmpstr)+";\n";
12032 for(a=0;a<aggr_tbl.size();a++){
12033 sprintf(tmpstr,"aggval->aggr_var%d",a);
12034 string assignto_var = tmpstr;
12035 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12038 ret += "\treturn aggval;\n";
12042 //--------------------------------------------------------
12043 // initialize an aggregate object inplace
12045 ret += "void create_aggregate(host_tuple &tup0, "+this->generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
12046 // Variables for execution of the function.
12047 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12051 for(a=0;a<aggr_tbl.size();a++){
12052 if(aggr_tbl.is_builtin(a)){
12053 // Create temporaries for buffer return values
12054 data_type *adt = aggr_tbl.get_data_type(a);
12055 if(adt->is_buffer_type()){
12056 sprintf(tmpstr,"aggr_tmp_%d", a);
12057 ret+=adt->make_host_cvar(tmpstr)+";\n";
12062 for(a=0;a<aggr_tbl.size();a++){
12063 sprintf(tmpstr,"aggval->aggr_var%d",a);
12064 string assignto_var = tmpstr;
12065 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12071 //--------------------------------------------------------
12072 // Create and clean-initialize an state object
12074 ret += "void initialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval){\n";
12075 // Variables for execution of the function.
12076 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12079 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
12081 for(a=0;a<aggr_tbl.size();a++){
12082 if( aggr_tbl.is_superaggr(a)){
12083 if(aggr_tbl.is_builtin(a)){
12084 // Create temporaries for buffer return values
12085 data_type *adt = aggr_tbl.get_data_type(a);
12086 if(adt->is_buffer_type()){
12087 sprintf(tmpstr,"aggr_tmp_%d", a);
12088 ret+=adt->make_host_cvar(tmpstr)+";\n";
12094 for(a=0;a<aggr_tbl.size();a++){
12095 if( aggr_tbl.is_superaggr(a)){
12096 sprintf(tmpstr,"stval->aggr_var%d",a);
12097 string assignto_var = tmpstr;
12098 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12102 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12103 string state_nm = (*ssi);
12104 ret += "_sfun_state_clean_init_"+state_nm+"(&(stval->state_var_"+state_nm+"));\n";
12110 //--------------------------------------------------------
12111 // Create and dirty-initialize an state object
12113 ret += "void reinitialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, "+generate_functor_name()+"_statedef *old_stval, int cd){\n";
12114 // Variables for execution of the function.
12115 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12118 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
12120 for(a=0;a<aggr_tbl.size();a++){
12121 if( aggr_tbl.is_superaggr(a)){
12122 if(aggr_tbl.is_builtin(a)){
12123 // Create temporaries for buffer return values
12124 data_type *adt = aggr_tbl.get_data_type(a);
12125 if(adt->is_buffer_type()){
12126 sprintf(tmpstr,"aggr_tmp_%d", a);
12127 ret+=adt->make_host_cvar(tmpstr)+";\n";
12133 // initialize superaggregates
12134 for(a=0;a<aggr_tbl.size();a++){
12135 if( aggr_tbl.is_superaggr(a)){
12136 sprintf(tmpstr,"stval->aggr_var%d",a);
12137 string assignto_var = tmpstr;
12138 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12142 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12143 string state_nm = (*ssi);
12144 ret += "_sfun_state_dirty_init_"+state_nm+"(&(stval->state_var_"+state_nm+"),&(old_stval->state_var_"+state_nm+"), cd );\n";
12149 //--------------------------------------------------------
12150 // Finalize_state : call the finalize fcn on all states
12153 ret += "void finalize_state( "+generate_functor_name()+"_statedef *stval, int cd){\n";
12155 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12156 string state_nm = (*ssi);
12157 ret += "_sfun_state_final_init_"+state_nm+"(&(stval->state_var_"+state_nm+"), cd);\n";
12165 //--------------------------------------------------------
12166 // update (plus) a superaggregate object
12168 ret += "void update_plus_superaggr(host_tuple &tup0, " +
12169 generate_functor_name()+"_groupdef *gbval, "+
12170 generate_functor_name()+"_statedef *stval){\n";
12171 // Variables for execution of the function.
12172 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12174 // use of temporaries depends on the aggregate,
12175 // generate them in generate_aggr_update
12178 for(a=0;a<aggr_tbl.size();a++){
12179 if(aggr_tbl.is_superaggr(a)){
12180 sprintf(tmpstr,"stval->aggr_var%d",a);
12181 string varname = tmpstr;
12182 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12186 ret += "\treturn;\n";
12191 //--------------------------------------------------------
12192 // update (minus) a superaggregate object
12194 ret += "void update_minus_superaggr( "+
12195 generate_functor_name()+"_groupdef *gbval, "+
12196 generate_functor_name()+"_aggrdef *aggval,"+
12197 generate_functor_name()+"_statedef *stval"+
12199 // Variables for execution of the function.
12200 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12202 // use of temporaries depends on the aggregate,
12203 // generate them in generate_aggr_update
12206 for(a=0;a<aggr_tbl.size();a++){
12207 if(aggr_tbl.is_superaggr(a)){
12208 sprintf(tmpstr,"stval->aggr_var%d",a);
12209 string super_varname = tmpstr;
12210 sprintf(tmpstr,"aggval->aggr_var%d",a);
12211 string sub_varname = tmpstr;
12212 ret.append(generate_superaggr_minus(sub_varname, super_varname,&aggr_tbl,a, schema));
12216 ret += "\treturn;\n";
12220 //--------------------------------------------------------
12221 // update an aggregate object
12223 ret += "void update_aggregate(host_tuple &tup0, "
12224 +generate_functor_name()+"_groupdef *gbval, "+
12225 generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
12226 // Variables for execution of the function.
12227 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12229 // use of temporaries depends on the aggregate,
12230 // generate them in generate_aggr_update
12233 for(a=0;a<aggr_tbl.size();a++){
12234 sprintf(tmpstr,"aggval->aggr_var%d",a);
12235 string varname = tmpstr;
12236 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12239 ret += "\treturn;\n";
12242 //---------------------------------------------------
12245 ret += "\tbool flush_needed(){\n";
12246 if(uses_temporal_flush){
12247 ret += "\t\treturn needs_temporal_flush;\n";
12249 ret += "\t\treturn false;\n";
12254 //------------------------------------------------------
12255 // THe cleaning_when predicate
12257 string gbvar = "gbval->gb_var";
12258 string aggvar = "aggval->";
12260 ret += "bool need_to_clean( "
12261 +generate_functor_name()+"_groupdef *gbval, "+
12262 generate_functor_name()+"_statedef *stval, int cd"+
12265 if(cleanwhen.size()>0)
12266 ret += "\tbool predval = true;\n";
12268 ret += "\tbool predval = false;\n";
12270 // Find the udafs ref'd in the having clause
12272 for(w=0;w<cleanwhen.size();++w)
12273 collect_aggr_refs_pr(cleanwhen[w]->pr, cw_aggs);
12276 // get the return values from the UDAFS
12277 for(a=0;a<aggr_tbl.size();a++){
12278 if(! aggr_tbl.is_builtin(a) && cw_aggs.count(a)){
12279 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12280 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12281 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12286 // Start by cleaning up partial function results
12287 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12288 set<int> cw_pfcns; // partial fcns in where clause
12289 for(w=0;w<cleanwhen.size();++w)
12290 collect_partial_fcns_pr(cleanwhen[w]->pr, cw_pfcns);
12292 ret += gen_partial_fcn_dtr(partial_fcns,cw_pfcns);
12295 for(w=0;w<cleanwhen.size();++w){
12296 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12298 // Find partial fcns ref'd in this cnf element
12299 set<int> pfcn_refs;
12300 collect_partial_fcns_pr(cleanwhen[w]->pr, pfcn_refs);
12301 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12302 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12303 ret += "\tif(retval){ return false;}\n";
12305 // ret += unpack_partial_fcn_fm_aggr(schema, partial_fcns, pfcn_refs,"false");
12307 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanwhen[w]->pr,gbvar, aggvar, schema)+
12308 ") ) predval = false;\n";
12311 ret += "\treturn predval;\n";
12314 //------------------------------------------------------
12315 // THe cleaning_by predicate
12317 ret += "bool sample_group("
12318 +generate_functor_name()+"_groupdef *gbval, "+
12319 generate_functor_name()+"_aggrdef *aggval,"+
12320 generate_functor_name()+"_statedef *stval, int cd"+
12323 if(cleanby.size()>0)
12324 ret += "\tbool retval = true;\n";
12326 ret += "\tbool retval = false;\n";
12328 // Find the udafs ref'd in the having clause
12330 for(w=0;w<cleanby.size();++w)
12331 collect_aggr_refs_pr(cleanby[w]->pr, cb_aggs);
12334 // get the return values from the UDAFS
12335 for(a=0;a<aggr_tbl.size();a++){
12336 if(! aggr_tbl.is_builtin(a) && cb_aggs.count(a)){
12337 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12338 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12339 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12344 // Start by cleaning up partial function results
12345 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12346 set<int> cb_pfcns; // partial fcns in where clause
12347 for(w=0;w<cleanby.size();++w)
12348 collect_partial_fcns_pr(cleanby[w]->pr, cb_pfcns);
12350 ret += gen_partial_fcn_dtr(partial_fcns,cb_pfcns);
12353 for(w=0;w<cleanwhen.size();++w){
12354 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12358 // Find the set of variables accessed in this CNF elem,
12359 // but in no previous element.
12360 col_id_set new_cids;
12361 get_new_pred_cids(cleanby[w]->pr, found_cids, new_cids, segen_gb_tbl);
12363 // Unpack these values.
12364 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
12367 // Find partial fcns ref'd in this cnf element
12368 set<int> pfcn_refs;
12369 collect_partial_fcns_pr(cleanby[w]->pr, pfcn_refs);
12370 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12371 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12372 ret += "\tif(retval){ return false;}\n";
12374 // ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
12376 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanby[w]->pr,gbvar, aggvar, schema)+
12377 +") ) retval = false;\n";
12380 ret += "\treturn retval;\n";
12384 //-----------------------------------------------------
12386 ret += "bool final_sample_group("
12387 +generate_functor_name()+"_groupdef *gbval, "+
12388 generate_functor_name()+"_aggrdef *aggval,"+
12389 generate_functor_name()+"_statedef *stval,"+
12392 ret += "\tgs_retval_t retval = 0;\n";
12394 // Find the udafs ref'd in the having clause
12396 for(w=0;w<having.size();++w)
12397 collect_aggr_refs_pr(having[w]->pr, hv_aggs);
12400 // get the return values from the UDAFS
12401 for(a=0;a<aggr_tbl.size();a++){
12402 if(! aggr_tbl.is_builtin(a) && hv_aggs.count(a)){
12403 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12404 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12405 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12410 set<int> hv_sl_pfcns;
12411 for(w=0;w<having.size();w++){
12412 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
12415 // clean up the partial fcn results from any previous execution
12416 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
12419 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
12420 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12421 ret += "\tif(retval){ return false;}\n";
12424 // Evalaute the HAVING clause
12425 // TODO: this seems to have a ++ operator rather than a + operator.
12426 for(w=0;w<having.size();++w){
12427 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
12430 ret += "\treturn true;\n";
12433 //---------------------------------------------------
12434 // create output tuple
12435 // Unpack the partial functions ref'd in the where clause,
12436 // select clause. Evaluate the where clause.
12437 // Finally, pack the tuple.
12439 // I need to use special code generation here,
12440 // so I'll leave it in longhand.
12442 ret += "host_tuple create_output_tuple("
12443 +generate_functor_name()+"_groupdef *gbval, "+
12444 generate_functor_name()+"_aggrdef *aggval,"+
12445 generate_functor_name()+"_statedef *stval,"+
12446 "int cd, bool &failed){\n";
12448 ret += "\thost_tuple tup;\n";
12449 ret += "\tfailed = false;\n";
12450 ret += "\tgs_retval_t retval = 0;\n";
12453 // Find the udafs ref'd in the select clause
12455 for(s=0;s<select_list.size();s++)
12456 collect_agg_refs(select_list[s]->se, sl_aggs);
12459 // get the return values from the UDAFS
12460 for(a=0;a<aggr_tbl.size();a++){
12461 if(! aggr_tbl.is_builtin(a) && sl_aggs.count(a)){
12462 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12463 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12464 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12469 // I can't cache partial fcn results from the having
12470 // clause because evaluation is separated.
12472 for(s=0;s<select_list.size();s++){
12473 collect_partial_fcns(select_list[s]->se, sl_pfcns);
12476 for(pfsi=sl_pfcns.begin();pfsi!=sl_pfcns.end();++pfsi){
12477 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12478 ret += "\tif(retval){ failed=true; return tup;}\n";
12482 // Now, compute the size of the tuple.
12484 // Unpack any BUFFER type selections into temporaries
12485 // so that I can compute their size and not have
12486 // to recompute their value during tuple packing.
12487 // I can use regular assignment here because
12488 // these temporaries are non-persistent.
12489 // TODO: should I be using the selvar generation routine?
12491 ret += "//\t\tCompute the size of the tuple.\n";
12492 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
12493 for(s=0;s<select_list.size();s++){
12494 scalarexp_t *se = select_list[s]->se;
12495 data_type *sdt = se->get_data_type();
12496 if(sdt->is_buffer_type() &&
12497 !( (se->get_operator_type() == SE_COLREF) ||
12498 (se->get_operator_type() == SE_AGGR_STAR) ||
12499 (se->get_operator_type() == SE_AGGR_SE) ||
12500 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12501 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12503 sprintf(tmpstr,"selvar_%d",s);
12504 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
12505 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
12509 // The size of the tuple is the size of the tuple struct plus the
12510 // size of the buffers to be copied in.
12512 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
12513 for(s=0;s<select_list.size();s++){
12514 // if(s>0) ret += "+";
12515 scalarexp_t *se = select_list[s]->se;
12516 data_type *sdt = select_list[s]->se->get_data_type();
12517 if(sdt->is_buffer_type()){
12518 if(!( (se->get_operator_type() == SE_COLREF) ||
12519 (se->get_operator_type() == SE_AGGR_STAR) ||
12520 (se->get_operator_type() == SE_AGGR_SE) ||
12521 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12522 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12524 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
12525 ret.append(tmpstr);
12527 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12528 ret.append(tmpstr);
12534 // Allocate tuple data block.
12535 ret += "//\t\tCreate the tuple block.\n";
12536 ret += "\ttup.data = malloc(tup.tuple_size);\n";
12537 ret += "\ttup.heap_resident = true;\n";
12539 // Mark tuple as regular
12540 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
12542 // ret += "\ttup.channel = 0;\n";
12543 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
12544 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
12547 // (Here, offsets are hard-wired. is this a problem?)
12549 ret += "//\t\tPack the fields into the tuple.\n";
12550 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
12551 for(s=0;s<select_list.size();s++){
12552 scalarexp_t *se = select_list[s]->se;
12553 data_type *sdt = se->get_data_type();
12554 if(sdt->is_buffer_type()){
12555 if(!( (se->get_operator_type() == SE_COLREF) ||
12556 (se->get_operator_type() == SE_AGGR_STAR) ||
12557 (se->get_operator_type() == SE_AGGR_SE) ||
12558 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12559 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12561 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
12562 ret.append(tmpstr);
12563 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
12564 ret.append(tmpstr);
12566 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12567 ret.append(tmpstr);
12568 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12569 ret.append(tmpstr);
12572 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12573 ret.append(tmpstr);
12574 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12579 // Destroy string temporaries
12580 ret += gen_buffer_selvars_dtr(select_list);
12581 // Destroy string return vals of UDAFs
12582 for(a=0;a<aggr_tbl.size();a++){
12583 if(! aggr_tbl.is_builtin(a)){
12584 int afcn_id = aggr_tbl.get_fcn_id(a);
12585 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12586 if(adt->is_buffer_type()){
12587 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12588 adt->get_hfta_buffer_destroy().c_str(), a );
12595 ret += "\treturn tup;\n";
12599 //-------------------------------------------------------------------
12600 // Temporal update functions
12602 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12604 // create a temp status tuple
12605 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12607 ret += gen_init_temp_status_tuple(this->get_node_name());
12610 // (Here, offsets are hard-wired. is this a problem?)
12612 ret += "//\t\tPack the fields into the tuple.\n";
12613 for(s=0;s<select_list.size();s++){
12614 data_type *sdt = select_list[s]->se->get_data_type();
12615 if(sdt->is_temporal()){
12616 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12618 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12624 ret += "\treturn 0;\n";
12625 ret += "};};\n\n\n";
12628 //----------------------------------------------------------
12629 // The hash function
12631 ret += "struct "+generate_functor_name()+"_hash_func{\n";
12632 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12633 "_groupdef *grp) const{\n";
12634 ret += "\t\treturn(";
12635 for(g=0;g<gb_tbl.size();g++){
12636 if(g>0) ret += "^";
12637 data_type *gdt = gb_tbl.get_data_type(g);
12638 if(gdt->use_hashfunc()){
12639 if(gdt->is_buffer_type())
12640 sprintf(tmpstr,"(%s*%s(&)grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12642 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12644 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12648 ret += ") >> 32);\n";
12652 //----------------------------------------------------------
12653 // The superhash function
12655 ret += "struct "+generate_functor_name()+"_superhash_func{\n";
12656 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12657 "_groupdef *grp) const{\n";
12658 ret += "\t\treturn(0";
12660 for(g=0;g<gb_tbl.size();g++){
12661 if(sg_tbl.count(g)>0){
12663 data_type *gdt = gb_tbl.get_data_type(g);
12664 if(gdt->use_hashfunc()){
12665 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12667 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12672 ret += ") >> 32);\n";
12677 //----------------------------------------------------------
12678 // The comparison function
12680 ret += "struct "+generate_functor_name()+"_equal_func{\n";
12681 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12682 generate_functor_name()+"_groupdef *grp2) const{\n";
12683 ret += "\t\treturn( (";
12684 for(g=0;g<gb_tbl.size();g++){
12685 if(g>0) ret += ") && (";
12686 data_type *gdt = gb_tbl.get_data_type(g);
12687 if(gdt->complex_comparison(gdt)){
12688 if(gdt->is_buffer_type())
12689 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12690 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
12692 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12693 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
12695 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12704 //----------------------------------------------------------
12705 // The superhashcomparison function
12707 ret += "struct "+generate_functor_name()+"_superequal_func{\n";
12708 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12709 generate_functor_name()+"_groupdef *grp2) const{\n";
12710 ret += "\t\treturn( (";
12712 bool first_elem = true;
12713 for(g=0;g<gb_tbl.size();g++){
12714 if(sg_tbl.count(g)){
12715 if(first_elem) first_elem=false; else ret += ") && (";
12716 data_type *gdt = gb_tbl.get_data_type(g);
12717 if(gdt->complex_comparison(gdt)){
12718 if(gdt->is_buffer_type())
12719 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12720 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
12722 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12723 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
12725 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12742 string sgahcwcb_qpn::generate_operator(int i, string params){
12745 " clean_operator<" +
12746 generate_functor_name()+",\n\t"+
12747 generate_functor_name() + "_groupdef, \n\t" +
12748 generate_functor_name() + "_aggrdef, \n\t" +
12749 generate_functor_name() + "_statedef, \n\t" +
12750 generate_functor_name()+"_hash_func, \n\t"+
12751 generate_functor_name()+"_equal_func ,\n\t"+
12752 generate_functor_name()+"_superhash_func,\n\t "+
12753 generate_functor_name()+"_superequal_func \n\t"+
12754 "> *op"+int_to_string(i)+" = new clean_operator<"+
12755 generate_functor_name()+",\n\t"+
12756 generate_functor_name() + "_groupdef,\n\t " +
12757 generate_functor_name() + "_aggrdef, \n\t" +
12758 generate_functor_name() + "_statedef, \n\t" +
12759 generate_functor_name()+"_hash_func, \n\t"+
12760 generate_functor_name()+"_equal_func, \n\t"+
12761 generate_functor_name()+"_superhash_func, \n\t"+
12762 generate_functor_name()+"_superequal_func\n\t "
12763 ">("+params+", \"" + get_node_name() + "\");\n"
12767 ////////////////////////////////////////////////////////////////
12772 string rsgah_qpn::generate_functor_name(){
12773 return("rsgah_functor_" + normalize_name(this->get_node_name()));
12777 string rsgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
12781 // Initialize generate utility globals
12782 segen_gb_tbl = &(gb_tbl);
12785 //--------------------------------
12786 // group definition class
12787 string ret = "class " + generate_functor_name() + "_groupdef{\n";
12788 ret += "public:\n";
12789 for(g=0;g<this->gb_tbl.size();g++){
12790 sprintf(tmpstr,"gb_var%d",g);
12791 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12795 ret += "\t"+generate_functor_name() + "_groupdef(){\n";
12796 for(g=0;g<gb_tbl.size();g++){
12797 data_type *gdt = gb_tbl.get_data_type(g);
12798 if(gdt->is_buffer_type()){
12799 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12800 gdt->get_hfta_buffer_init().c_str(), g );
12806 ret += "\t// shallow copy constructor\n";
12807 ret += "\t"+generate_functor_name() + "_groupdef("+
12808 this->generate_functor_name() + "_groupdef &gd){\n";
12809 for(g=0;g<gb_tbl.size();g++){
12810 data_type *gdt = gb_tbl.get_data_type(g);
12811 sprintf(tmpstr,"\t\tgb_var%d = gd.gb_var%d;\n",g,g);
12816 ret += "\t// deep assignment operator\n";
12817 ret += "\t"+generate_functor_name() + "_groupdef& operator=(const "+
12818 this->generate_functor_name() + "_groupdef &gd){\n";
12819 for(g=0;g<gb_tbl.size();g++){
12820 data_type *gdt = gb_tbl.get_data_type(g);
12821 if(gdt->is_buffer_type()){
12822 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd.gb_var%d));\n",
12823 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
12826 sprintf(tmpstr,"\t\tgb_var%d = gd.gb_var%d;\n",g,g);
12833 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
12834 for(g=0;g<gb_tbl.size();g++){
12835 data_type *gdt = gb_tbl.get_data_type(g);
12836 if(gdt->is_buffer_type()){
12837 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12838 gdt->get_hfta_buffer_destroy().c_str(), g );
12845 //--------------------------------
12846 // aggr definition class
12847 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
12848 ret += "public:\n";
12849 for(a=0;a<aggr_tbl.size();a++){
12850 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
12851 sprintf(tmpstr,"aggr_var%d",a);
12852 if(aggr_tbl.is_builtin(a))
12853 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
12855 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
12858 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
12860 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
12861 for(a=0;a<aggr_tbl.size();a++){
12862 if(aggr_tbl.is_builtin(a)){
12863 data_type *adt = aggr_tbl.get_data_type(a);
12864 if(adt->is_buffer_type()){
12865 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
12866 adt->get_hfta_buffer_destroy().c_str(), a );
12870 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
12871 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12872 ret+="(aggr_var"+int_to_string(a)+"));\n";
12878 //--------------------------------
12879 // gb functor class
12880 ret += "class " + this->generate_functor_name() + "{\n";
12882 // Find variables referenced in this query node.
12884 col_id_set cid_set;
12885 col_id_set::iterator csi;
12887 for(w=0;w<where.size();++w)
12888 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
12889 for(w=0;w<having.size();++w)
12890 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
12891 for(w=0;w<closing_when.size();++w)
12892 gather_pr_col_ids(closing_when[w]->pr,cid_set,segen_gb_tbl);
12893 for(g=0;g<gb_tbl.size();g++)
12894 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
12896 for(s=0;s<select_list.size();s++){
12897 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
12901 // Private variables : store the state of the functor.
12902 // 1) variables for unpacked attributes
12903 // 2) offsets of the upacked attributes
12904 // 3) storage of partial functions
12905 // 4) storage of complex literals (i.e., require a constructor)
12907 ret += "private:\n";
12909 // var to save the schema handle
12910 ret += "\tint schema_handle0;\n";
12912 // generate the declaration of all the variables related to
12913 // temp tuples generation
12914 ret += gen_decl_temp_vars();
12916 // unpacked attribute storage, offsets
12917 ret += "//\t\tstorage and offsets of accessed fields.\n";
12918 ret += generate_access_vars(cid_set, schema);
12919 // tuple metadata offset
12920 ret += "\tint tuple_metadata_offset0;\n";
12922 // Variables to store results of partial functions.
12923 // WARNING find_partial_functions modifies the SE
12924 // (it marks the partial function id).
12925 ret += "//\t\tParital function result storage\n";
12926 vector<scalarexp_t *> partial_fcns;
12927 vector<int> fcn_ref_cnt;
12928 vector<bool> is_partial_fcn;
12929 for(s=0;s<select_list.size();s++){
12930 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
12932 for(w=0;w<where.size();w++){
12933 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12935 for(w=0;w<having.size();w++){
12936 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12938 for(w=0;w<closing_when.size();w++){
12939 find_partial_fcns_pr(closing_when[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12941 for(g=0;g<gb_tbl.size();g++){
12942 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
12944 for(a=0;a<aggr_tbl.size();a++){
12945 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
12947 if(partial_fcns.size()>0){
12948 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
12949 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
12952 // Create cached temporaries for UDAF return values.
12953 for(a=0;a<aggr_tbl.size();a++){
12954 if(! aggr_tbl.is_builtin(a)){
12955 int afcn_id = aggr_tbl.get_fcn_id(a);
12956 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12957 sprintf(tmpstr,"udaf_ret_%d", a);
12958 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
12963 // Complex literals (i.e., they need constructors)
12964 ret += "//\t\tComplex literal storage.\n";
12965 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
12966 ret += generate_complex_lit_vars(complex_literals);
12968 // Pass-by-handle parameters
12969 ret += "//\t\tPass-by-handle storage.\n";
12970 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
12971 ret += generate_pass_by_handle_vars(param_handle_table);
12974 // variables to hold parameters.
12975 ret += "//\tfor query parameters\n";
12976 ret += generate_param_vars(param_tbl);
12978 // Is there a temporal flush? If so create flush temporaries,
12979 // create flush indicator.
12980 bool uses_temporal_flush = false;
12981 for(g=0;g<gb_tbl.size();g++){
12982 data_type *gdt = gb_tbl.get_data_type(g);
12983 if(gdt->is_temporal())
12984 uses_temporal_flush = true;
12987 if(uses_temporal_flush){
12988 ret += "//\t\tFor temporal flush\n";
12989 for(g=0;g<gb_tbl.size();g++){
12990 data_type *gdt = gb_tbl.get_data_type(g);
12991 if(gdt->is_temporal()){
12992 sprintf(tmpstr,"curr_gb%d",g);
12993 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12994 sprintf(tmpstr,"last_gb%d",g);
12995 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12998 ret += "\tgs_int32_t needs_temporal_flush;\n";
12999 ret += "\tbool disordered_arrival;\n";
13002 // The publicly exposed functions
13004 ret += "\npublic:\n";
13007 //-------------------
13008 // The functor constructor
13009 // pass in the schema handle.
13010 // 1) make assignments to the unpack offset variables
13011 // 2) initialize the complex literals
13013 ret += "//\t\tFunctor constructor.\n";
13014 ret += this->generate_functor_name()+"(int schema_handle0){\n";
13016 // save the schema handle
13017 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
13019 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
13022 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
13023 ret += gen_access_var_init(cid_set);
13025 // complex literals
13026 ret += "//\t\tInitialize complex literals.\n";
13027 ret += gen_complex_lit_init(complex_literals);
13029 // Initialize partial function results so they can be safely GC'd
13030 ret += gen_partial_fcn_init(partial_fcns);
13032 // Initialize non-query-parameter parameter handles
13033 ret += gen_pass_by_handle_init(param_handle_table);
13035 // temporal flush variables
13036 // ASSUME that structured values won't be temporal.
13037 if(uses_temporal_flush){
13038 ret += "//\t\tInitialize temporal flush variables.\n";
13039 for(g=0;g<gb_tbl.size();g++){
13040 data_type *gdt = gb_tbl.get_data_type(g);
13041 if(gdt->is_temporal()){
13042 literal_t gl(gdt->type_indicator());
13043 sprintf(tmpstr,"\tcurr_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
13044 ret.append(tmpstr);
13045 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
13046 ret.append(tmpstr);
13049 ret += "\tneeds_temporal_flush = 0;\n";
13052 // Init temporal attributes referenced in select list
13053 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
13058 //-------------------
13059 // Functor destructor
13060 ret += "//\t\tFunctor destructor.\n";
13061 ret += "~"+this->generate_functor_name()+"(){\n";
13063 // clean up buffer type complex literals
13064 ret += gen_complex_lit_dtr(complex_literals);
13066 // Deregister the pass-by-handle parameters
13067 ret += "/* register and de-register the pass-by-handle parameters */\n";
13068 ret += gen_pass_by_handle_dtr(param_handle_table);
13070 // clean up partial function results.
13071 ret += "/* clean up partial function storage */\n";
13072 ret += gen_partial_fcn_dtr(partial_fcns);
13074 // Destroy the parameters, if any need to be destroyed
13075 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
13080 //-------------------
13081 // Parameter manipulation routines
13082 ret += generate_load_param_block(this->generate_functor_name(),
13083 this->param_tbl,param_handle_table);
13084 ret += generate_delete_param_block(this->generate_functor_name(),
13085 this->param_tbl,param_handle_table);
13087 //-------------------
13088 // Register new parameter block
13090 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
13091 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
13092 ret += "\treturn this->load_params_"+this->generate_functor_name()+
13097 //-------------------
13098 // the create_group method.
13099 // This method creates a group in a buffer passed in
13100 // (to allow for creation on the stack).
13101 // There are also a couple of side effects:
13102 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
13103 // 2) determine if a temporal flush is required.
13105 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
13106 // Variables for execution of the function.
13107 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13109 if(partial_fcns.size()>0){ // partial fcn access failure
13110 ret += "\tgs_retval_t retval = 0;\n";
13114 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
13115 "_groupdef *) buffer;\n";
13117 // Start by cleaning up partial function results
13118 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
13119 set<int> w_pfcns; // partial fcns in where clause
13120 for(w=0;w<where.size();++w)
13121 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
13123 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
13124 for(g=0;g<gb_tbl.size();g++){
13125 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
13127 for(a=0;a<aggr_tbl.size();a++){
13128 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
13130 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
13131 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
13132 // ret += gen_partial_fcn_dtr(partial_fcns);
13135 ret += gen_temp_tuple_check(this->node_name, 0);
13136 col_id_set found_cids; // colrefs unpacked thus far.
13137 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
13140 // Save temporal group-by variables
13143 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
13145 for(g=0;g<gb_tbl.size();g++){
13147 data_type *gdt = gb_tbl.get_data_type(g);
13149 if(gdt->is_temporal()){
13150 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13151 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13152 ret.append(tmpstr);
13159 // Compare the temporal GB vars with the stored ones,
13160 // set flush indicator and update stored GB vars if there is any change.
13162 if(uses_temporal_flush){
13164 bool first_one = true;
13165 string disorder_test;
13166 for(g=0;g<gb_tbl.size();g++){
13167 data_type *gdt = gb_tbl.get_data_type(g);
13169 if(gdt->is_temporal()){
13170 sprintf(tmpstr,"curr_gb%d",g); string lhs_op = tmpstr;
13171 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
13172 if(first_one){first_one = false;} else {ret += ") && (";}
13173 ret += generate_lt_test(lhs_op, rhs_op, gdt);
13174 disorder_test += generate_lt_test(rhs_op, lhs_op, gdt);
13178 int temporal_gb=-1;
13179 for(g=0;g<gb_tbl.size();g++){
13180 data_type *gdt = gb_tbl.get_data_type(g);
13181 if(gdt->is_temporal()){
13182 if(gdt->is_buffer_type()){
13183 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&curr_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13185 // sprintf(tmpstr,"\t\tlast_gb%d = curr_gb%d;\n",g,g);
13187 // sprintf(tmpstr,"\t\tcurr_gb%d = gbval->gb_var%d;\n",g,g);
13189 ret += "\t\tif(curr_gb"+to_string(g)+"==0){\n";
13190 ret += "\t\t\tlast_gb"+to_string(g)+" = gbval->gb_var"+to_string(g)+";\n";
13191 ret += "\t\t}else{\n";
13192 ret += "\t\t\tlast_gb"+to_string(g)+" = curr_gb"+to_string(g)+";\n";
13194 sprintf(tmpstr,"\t\tcurr_gb%d = gbval->gb_var%d;\n",g,g);
13200 ret += "\t\tneeds_temporal_flush = curr_gb"+to_string (temporal_gb)+" - last_gb"+to_string(temporal_gb)+";\n";
13201 ret += "\t}else{\n"
13202 "\t\tneeds_temporal_flush=0;\n"
13205 ret += "\tdisordered_arrival = "+disorder_test+";\n";
13206 // ret += "\tif( ( ("+disorder_test+") ) ){\n";
13207 // ret += "\t\tdisordered_arrival=true;\n";
13208 // ret += "\t}else{\n";
13209 // ret += "\t\tdisordered_arrival=false;\n";
13214 // For temporal status tuple we don't need to do anything else
13215 ret += "\tif (temp_tuple_received) return NULL;\n\n";
13217 for(w=0;w<where.size();++w){
13218 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
13220 // Find the set of variables accessed in this CNF elem,
13221 // but in no previous element.
13222 col_id_set new_cids;
13223 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
13225 // Unpack these values.
13226 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13227 // Find partial fcns ref'd in this cnf element
13228 set<int> pfcn_refs;
13229 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
13230 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
13232 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
13233 +") ) return(NULL);\n";
13236 // The partial functions ref'd in the group-by var and aggregate
13237 // definitions must also be evaluated. If one returns false,
13238 // then implicitly the predicate is false.
13239 set<int>::iterator pfsi;
13241 if(ag_gb_pfcns.size() > 0)
13242 ret += "//\t\tUnpack remaining partial fcns.\n";
13243 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
13244 found_cids, segen_gb_tbl, "NULL", needs_xform);
13246 // Unpack the group-by variables
13248 for(g=0;g<gb_tbl.size();g++){
13249 data_type *gdt = gb_tbl.get_data_type(g);
13250 if(!gdt->is_temporal()){ // temproal gbs already computed
13251 // Find the new fields ref'd by this GBvar def.
13252 col_id_set new_cids;
13253 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
13254 // Unpack these values.
13255 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13257 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13258 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13260 // There seems to be no difference between the two
13261 // branches of the IF statement.
13262 data_type *gdt = gb_tbl.get_data_type(g);
13263 if(gdt->is_buffer_type()){
13264 // Create temporary copy.
13265 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13266 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13268 scalarexp_t *gse = gb_tbl.get_def(g);
13269 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13270 g,generate_se_code(gse,schema).c_str());
13273 ret.append(tmpstr);
13279 ret+= "\treturn gbval;\n";
13282 //--------------------------------------------------------
13283 // Create and initialize an aggregate object
13285 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
13286 // Variables for execution of the function.
13287 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13290 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
13291 "_aggrdef *)buffer;\n";
13293 for(a=0;a<aggr_tbl.size();a++){
13294 if(aggr_tbl.is_builtin(a)){
13295 // Create temporaries for buffer return values
13296 data_type *adt = aggr_tbl.get_data_type(a);
13297 if(adt->is_buffer_type()){
13298 sprintf(tmpstr,"aggr_tmp_%d", a);
13299 ret+=adt->make_host_cvar(tmpstr)+";\n";
13304 // Unpack all remaining attributes
13305 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
13306 for(a=0;a<aggr_tbl.size();a++){
13307 sprintf(tmpstr,"aggval->aggr_var%d",a);
13308 string assignto_var = tmpstr;
13309 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
13312 ret += "\treturn aggval;\n";
13315 //--------------------------------------------------------
13316 // update an aggregate object
13318 ret += "void update_aggregate(host_tuple &tup0, "
13319 +generate_functor_name()+"_groupdef &gbval, "+
13320 generate_functor_name()+"_aggrdef &aggval){\n";
13321 // Variables for execution of the function.
13322 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13324 // use of temporaries depends on the aggregate,
13325 // generate them in generate_aggr_update
13328 // Unpack all remaining attributes
13329 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
13330 for(a=0;a<aggr_tbl.size();a++){
13331 sprintf(tmpstr,"aggval.aggr_var%d",a);
13332 string varname = tmpstr;
13333 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
13336 ret += "\treturn;\n";
13339 //--------------------------------------------------------
13340 // reinitialize an aggregate object
13342 ret += "void reinit_aggregates( "+
13343 generate_functor_name()+"_groupdef &gbval, "+
13344 generate_functor_name()+"_aggrdef &aggval){\n";
13345 // Variables for execution of the function.
13346 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13348 // use of temporaries depends on the aggregate,
13349 // generate them in generate_aggr_update
13351 int temporal_gb; // track the # of the temporal gb
13352 for(g=0;g<gb_tbl.size();g++){
13353 data_type *gdt = gb_tbl.get_data_type(g);
13354 if(gdt->is_temporal()){
13355 if(gdt->is_buffer_type()){
13356 sprintf(tmpstr,"\t\t%s(&(gbval.gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13358 sprintf(tmpstr,"\t\t gbval.gb_var%d =last_gb%d;\n",g,g);
13365 // Unpack all remaining attributes
13366 for(a=0;a<aggr_tbl.size();a++){
13367 sprintf(tmpstr,"aggval.aggr_var%d",a);
13368 string varname = tmpstr;
13369 ret.append(generate_aggr_reinitialize(varname,&aggr_tbl,a, schema));
13372 ret += "\treturn;\n";
13379 //---------------------------------------------------
13382 ret += "gs_int32_t flush_needed(){\n";
13383 if(uses_temporal_flush){
13384 ret += "\treturn needs_temporal_flush;\n";
13386 ret += "\treturn false;\n";
13390 ret += "bool disordered(){return disordered_arrival;}\n";
13392 //------------------------------------------------
13393 // time bucket management
13394 ret += "void advance_last_tb(){\n";
13395 ret += "\tlast_gb"+to_string(temporal_gb)+"++;\n";
13397 ret += "void reset_last_tb(){\n";
13398 ret += "\tlast_gb"+to_string(temporal_gb)+" = curr_gb"+to_string(temporal_gb)+";\n";
13401 //---------------------------------------------------
13402 // create output tuple
13403 // Unpack the partial functions ref'd in the where clause,
13404 // select clause. Evaluate the where clause.
13405 // Finally, pack the tuple.
13407 // I need to use special code generation here,
13408 // so I'll leave it in longhand.
13410 ret += "host_tuple create_output_tuple("
13411 +generate_functor_name()+"_groupdef &gbval, "+
13412 generate_functor_name()+"_aggrdef &aggval, bool &failed){\n";
13414 ret += "\thost_tuple tup;\n";
13415 ret += "\tfailed = false;\n";
13416 ret += "\tgs_retval_t retval = 0;\n";
13418 string gbvar = "gbval.gb_var";
13419 string aggvar = "aggval.";
13422 // First, get the return values from the UDAFS
13423 for(a=0;a<aggr_tbl.size();a++){
13424 if(! aggr_tbl.is_builtin(a)){
13425 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
13426 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
13427 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
13431 set<int> hv_sl_pfcns;
13432 for(w=0;w<having.size();w++){
13433 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
13435 for(s=0;s<select_list.size();s++){
13436 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
13439 // clean up the partial fcn results from any previous execution
13440 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
13443 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
13444 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13445 ret += "\tif(retval){ failed = true; return(tup);}\n";
13448 // Evalaute the HAVING clause
13449 // TODO: this seems to have a ++ operator rather than a + operator.
13450 for(w=0;w<having.size();++w){
13451 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
13454 // Now, compute the size of the tuple.
13456 // Unpack any BUFFER type selections into temporaries
13457 // so that I can compute their size and not have
13458 // to recompute their value during tuple packing.
13459 // I can use regular assignment here because
13460 // these temporaries are non-persistent.
13461 // TODO: should I be using the selvar generation routine?
13463 ret += "//\t\tCompute the size of the tuple.\n";
13464 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
13465 for(s=0;s<select_list.size();s++){
13466 scalarexp_t *se = select_list[s]->se;
13467 data_type *sdt = se->get_data_type();
13468 if(sdt->is_buffer_type() &&
13469 !( (se->get_operator_type() == SE_COLREF) ||
13470 (se->get_operator_type() == SE_AGGR_STAR) ||
13471 (se->get_operator_type() == SE_AGGR_SE) ||
13472 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13473 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13475 sprintf(tmpstr,"selvar_%d",s);
13476 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
13477 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
13481 // The size of the tuple is the size of the tuple struct plus the
13482 // size of the buffers to be copied in.
13484 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
13485 for(s=0;s<select_list.size();s++){
13486 // if(s>0) ret += "+";
13487 scalarexp_t *se = select_list[s]->se;
13488 data_type *sdt = select_list[s]->se->get_data_type();
13489 if(sdt->is_buffer_type()){
13490 if(!( (se->get_operator_type() == SE_COLREF) ||
13491 (se->get_operator_type() == SE_AGGR_STAR) ||
13492 (se->get_operator_type() == SE_AGGR_SE) ||
13493 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13494 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13496 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
13497 ret.append(tmpstr);
13499 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13500 ret.append(tmpstr);
13506 // Allocate tuple data block.
13507 ret += "//\t\tCreate the tuple block.\n";
13508 ret += "\ttup.data = malloc(tup.tuple_size);\n";
13509 ret += "\ttup.heap_resident = true;\n";
13511 // Mark tuple as regular
13512 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
13514 // ret += "\ttup.channel = 0;\n";
13515 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
13516 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
13519 // (Here, offsets are hard-wired. is this a problem?)
13521 ret += "//\t\tPack the fields into the tuple.\n";
13522 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
13523 for(s=0;s<select_list.size();s++){
13524 scalarexp_t *se = select_list[s]->se;
13525 data_type *sdt = se->get_data_type();
13526 if(sdt->is_buffer_type()){
13527 if(!( (se->get_operator_type() == SE_COLREF) ||
13528 (se->get_operator_type() == SE_AGGR_STAR) ||
13529 (se->get_operator_type() == SE_AGGR_SE) ||
13530 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13531 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13533 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
13534 ret.append(tmpstr);
13535 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
13536 ret.append(tmpstr);
13538 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13539 ret.append(tmpstr);
13540 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13541 ret.append(tmpstr);
13544 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13545 ret.append(tmpstr);
13546 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
13551 // Destroy string temporaries
13552 ret += gen_buffer_selvars_dtr(select_list);
13554 ret += "\treturn tup;\n";
13557 //------------------------------------------------------------------
13558 // Cleaning_when : evaluate the cleaning_when clause.
13559 // ASSUME that the udaf return values have already
13560 // been unpacked. delete the string udaf return values at the end.
13562 ret += "bool cleaning_when("
13563 +generate_functor_name()+"_groupdef &gbval, "+
13564 generate_functor_name()+"_aggrdef &aggval){\n";
13566 ret += "\tbool retval = true;\n";
13569 gbvar = "gbval.gb_var";
13570 aggvar = "aggval.";
13573 set<int> clw_pfcns;
13574 for(w=0;w<closing_when.size();w++){
13575 collect_partial_fcns_pr(closing_when[w]->pr, clw_pfcns);
13578 // clean up the partial fcn results from any previous execution
13579 ret += gen_partial_fcn_dtr(partial_fcns,clw_pfcns);
13582 for(pfsi=clw_pfcns.begin();pfsi!=clw_pfcns.end();++pfsi){
13583 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13584 ret += "\tif(retval){ return false;}\n";
13587 // Evalaute the Closing When clause
13588 // TODO: this seems to have a ++ operator rather than a + operator.
13589 for(w=0;w<closing_when.size();++w){
13590 ret += "\tif( !("+generate_predicate_code_fm_aggr(closing_when[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
13594 // Destroy string return vals of UDAFs
13595 for(a=0;a<aggr_tbl.size();a++){
13596 if(! aggr_tbl.is_builtin(a)){
13597 int afcn_id = aggr_tbl.get_fcn_id(a);
13598 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
13599 if(adt->is_buffer_type()){
13600 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
13601 adt->get_hfta_buffer_destroy().c_str(), a );
13607 ret += "\treturn retval;\n";
13613 //-------------------------------------------------------------------
13614 // Temporal update functions
13616 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
13618 // create a temp status tuple
13619 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
13621 ret += gen_init_temp_status_tuple(this->get_node_name());
13624 // (Here, offsets are hard-wired. is this a problem?)
13626 ret += "//\t\tPack the fields into the tuple.\n";
13627 for(s=0;s<select_list.size();s++){
13628 data_type *sdt = select_list[s]->se->get_data_type();
13629 if(sdt->is_temporal()){
13630 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13632 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_gb", "", schema).c_str());
13638 ret += "\treturn 0;\n";
13639 ret += "};};\n\n\n";
13642 //----------------------------------------------------------
13643 // The hash function
13645 ret += "struct "+generate_functor_name()+"_hash_func{\n";
13646 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
13647 "_groupdef &grp) const{\n";
13648 ret += "\t\treturn(0";
13649 for(g=0;g<gb_tbl.size();g++){
13650 data_type *gdt = gb_tbl.get_data_type(g);
13651 if(! gdt->is_temporal()){
13653 if(gdt->use_hashfunc()){
13654 if(gdt->is_buffer_type())
13655 sprintf(tmpstr,"(%s*%s(&(grp.gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13657 sprintf(tmpstr,"(%s*%s(grp.gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13659 sprintf(tmpstr,"(%s*grp.gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
13664 ret += " >> 32);\n";
13668 //----------------------------------------------------------
13669 // The comparison function
13671 ret += "struct "+generate_functor_name()+"_equal_func{\n";
13672 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef &grp1, "+
13673 "const "+generate_functor_name()+"_groupdef &grp2) const{\n";
13674 ret += "\t\treturn( (";
13677 bool first_exec = true;
13678 for(g=0;g<gb_tbl.size();g++){
13679 data_type *gdt = gb_tbl.get_data_type(g);
13680 if(! gdt->is_temporal()){
13681 if(first_exec){first_exec=false;}else{ hcmpr += ") && (";}
13682 if(gdt->complex_comparison(gdt)){
13683 if(gdt->is_buffer_type())
13684 sprintf(tmpstr,"(%s(&(grp1.gb_var%d), &(grp2.gb_var%d))==0)",
13685 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
13687 sprintf(tmpstr,"(%s((grp1.gb_var%d), (grp2.gb_var%d))==0)",
13688 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
13690 sprintf(tmpstr,"grp1.gb_var%d == grp2.gb_var%d",g,g);
13707 string rsgah_qpn::generate_operator(int i, string params){
13710 " running_agg_operator<" +
13711 generate_functor_name()+","+
13712 generate_functor_name() + "_groupdef, " +
13713 generate_functor_name() + "_aggrdef, " +
13714 generate_functor_name()+"_hash_func, "+
13715 generate_functor_name()+"_equal_func "
13716 "> *op"+int_to_string(i)+" = new running_agg_operator<"+
13717 generate_functor_name()+","+
13718 generate_functor_name() + "_groupdef, " +
13719 generate_functor_name() + "_aggrdef, " +
13720 generate_functor_name()+"_hash_func, "+
13721 generate_functor_name()+"_equal_func "
13722 ">("+params+", \"" + get_node_name() + "\");\n"
13728 // Split aggregation into two HFTA components - sub and superaggregation
13729 // If unable to split the aggreagates, empty vector will be returned
13730 vector<qp_node *> sgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13732 vector<qp_node *> ret_vec;
13733 int s, p, g, a, o, i;
13736 vector<string> fta_flds, stream_flds;
13737 int t = table_name->get_schema_ref();
13739 // Get the set of interfaces it accesses.
13741 vector<string> sel_names;
13743 // Verify that all of the ref'd UDAFs can be split.
13745 for(a=0;a<aggr_tbl.size();++a){
13746 if(! aggr_tbl.is_builtin(a)){
13747 int afcn = aggr_tbl.get_fcn_id(a);
13748 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13749 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13750 if(hfta_super_id < 0 || hfta_sub_id < 0){
13756 /////////////////////////////////////////////////////
13757 // Split into aggr/aggr.
13760 sgah_qpn *low_hfta_node = new sgah_qpn();
13761 low_hfta_node->table_name = table_name;
13762 low_hfta_node->set_node_name( "_"+node_name );
13763 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13766 sgah_qpn *hi_hfta_node = new sgah_qpn();
13767 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13768 hi_hfta_node->set_node_name( node_name );
13769 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13771 // First, process the group-by variables.
13772 // both low and hi level queries duplicate group-by variables of original query
13775 for(g=0;g<gb_tbl.size();g++){
13776 // Insert the gbvar into both low- and hi level hfta.
13777 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13778 low_hfta_node->gb_tbl.add_gb_var(
13779 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13782 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13783 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13784 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13785 gbvar_fta->set_gb_ref(g);
13786 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13787 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13789 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13790 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13791 hi_hfta_node->gb_tbl.add_gb_var(
13792 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13796 // hi_hfta_node->gb_tbl.gb_patterns = gb_tbl.gb_patterns; // pattern processing at higtest level
13797 hi_hfta_node->gb_tbl.set_pattern_info( &gb_tbl); // pattern processing at higtest level
13799 // SEs in the aggregate definitions.
13800 // They are all safe, so split them up for later processing.
13801 map<int, scalarexp_t *> hfta_aggr_se;
13802 for(a=0;a<aggr_tbl.size();++a){
13803 split_hfta_aggr( &(aggr_tbl), a,
13804 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13805 low_hfta_node->select_list,
13812 // Next, the select list.
13814 for(s=0;s<select_list.size();s++){
13815 bool fta_forbidden = false;
13816 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13817 hi_hfta_node->select_list.push_back(
13818 new select_element(root_se, select_list[s]->name));
13823 // All the predicates in the where clause must execute
13824 // in the low-level hfta.
13826 for(p=0;p<where.size();p++){
13827 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13828 cnf_elem *new_cnf = new cnf_elem(new_pr);
13829 analyze_cnf(new_cnf);
13831 low_hfta_node->where.push_back(new_cnf);
13834 // All of the predicates in the having clause must
13835 // execute in the high-level hfta node.
13837 for(p=0;p<having.size();p++){
13838 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13839 cnf_elem *cnf_root = new cnf_elem(pr_root);
13840 analyze_cnf(cnf_root);
13842 hi_hfta_node->having.push_back(cnf_root);
13846 // Copy parameters to both nodes
13847 vector<string> param_names = param_tbl->get_param_names();
13849 for(pi=0;pi<param_names.size();pi++){
13850 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13851 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13852 param_tbl->handle_access(param_names[pi]));
13853 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13854 param_tbl->handle_access(param_names[pi]));
13856 low_hfta_node->definitions = definitions;
13857 hi_hfta_node->definitions = definitions;
13860 low_hfta_node->table_name->set_machine(table_name->get_machine());
13861 low_hfta_node->table_name->set_interface(table_name->get_interface());
13862 low_hfta_node->table_name->set_ifq(false);
13864 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13865 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13866 hi_hfta_node->table_name->set_ifq(false);
13868 ret_vec.push_back(low_hfta_node);
13869 ret_vec.push_back(hi_hfta_node);
13875 // TODO: add splitting into selection/aggregation
13879 // Split aggregation into two HFTA components - sub and superaggregation
13880 // If unable to split the aggreagates, empty vector will be returned
13881 // Similar to sgah, but super aggregate is rsgah, subaggr is sgah
13882 vector<qp_node *> rsgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13884 vector<qp_node *> ret_vec;
13885 int s, p, g, a, o, i;
13888 vector<string> fta_flds, stream_flds;
13889 int t = table_name->get_schema_ref();
13891 // Get the set of interfaces it accesses.
13893 vector<string> sel_names;
13895 // Verify that all of the ref'd UDAFs can be split.
13897 for(a=0;a<aggr_tbl.size();++a){
13898 if(! aggr_tbl.is_builtin(a)){
13899 int afcn = aggr_tbl.get_fcn_id(a);
13900 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13901 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13902 if(hfta_super_id < 0 || hfta_sub_id < 0){
13908 /////////////////////////////////////////////////////
13909 // Split into aggr/aggr.
13912 sgah_qpn *low_hfta_node = new sgah_qpn();
13913 low_hfta_node->table_name = table_name;
13914 low_hfta_node->set_node_name( "_"+node_name );
13915 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13918 rsgah_qpn *hi_hfta_node = new rsgah_qpn();
13919 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13920 hi_hfta_node->set_node_name( node_name );
13921 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13923 // First, process the group-by variables.
13924 // both low and hi level queries duplicate group-by variables of original query
13927 for(g=0;g<gb_tbl.size();g++){
13928 // Insert the gbvar into both low- and hi level hfta.
13929 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13930 low_hfta_node->gb_tbl.add_gb_var(
13931 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13934 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13935 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13936 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13937 gbvar_fta->set_gb_ref(g);
13938 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13939 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13941 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13942 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13943 hi_hfta_node->gb_tbl.add_gb_var(
13944 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13949 // SEs in the aggregate definitions.
13950 // They are all safe, so split them up for later processing.
13951 map<int, scalarexp_t *> hfta_aggr_se;
13952 for(a=0;a<aggr_tbl.size();++a){
13953 split_hfta_aggr( &(aggr_tbl), a,
13954 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13955 low_hfta_node->select_list,
13962 // Next, the select list.
13964 for(s=0;s<select_list.size();s++){
13965 bool fta_forbidden = false;
13966 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13967 hi_hfta_node->select_list.push_back(
13968 new select_element(root_se, select_list[s]->name));
13973 // All the predicates in the where clause must execute
13974 // in the low-level hfta.
13976 for(p=0;p<where.size();p++){
13977 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13978 cnf_elem *new_cnf = new cnf_elem(new_pr);
13979 analyze_cnf(new_cnf);
13981 low_hfta_node->where.push_back(new_cnf);
13984 // All of the predicates in the having clause must
13985 // execute in the high-level hfta node.
13987 for(p=0;p<having.size();p++){
13988 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13989 cnf_elem *cnf_root = new cnf_elem(pr_root);
13990 analyze_cnf(cnf_root);
13992 hi_hfta_node->having.push_back(cnf_root);
13995 // Similar for closing when
13996 for(p=0;p<closing_when.size();p++){
13997 predicate_t *pr_root = rehome_fta_pr( closing_when[p]->pr, &hfta_aggr_se);
13998 cnf_elem *cnf_root = new cnf_elem(pr_root);
13999 analyze_cnf(cnf_root);
14001 hi_hfta_node->closing_when.push_back(cnf_root);
14005 // Copy parameters to both nodes
14006 vector<string> param_names = param_tbl->get_param_names();
14008 for(pi=0;pi<param_names.size();pi++){
14009 data_type *dt = param_tbl->get_data_type(param_names[pi]);
14010 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
14011 param_tbl->handle_access(param_names[pi]));
14012 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
14013 param_tbl->handle_access(param_names[pi]));
14015 low_hfta_node->definitions = definitions;
14016 hi_hfta_node->definitions = definitions;
14019 low_hfta_node->table_name->set_machine(table_name->get_machine());
14020 low_hfta_node->table_name->set_interface(table_name->get_interface());
14021 low_hfta_node->table_name->set_ifq(false);
14023 hi_hfta_node->table_name->set_machine(table_name->get_machine());
14024 hi_hfta_node->table_name->set_interface(table_name->get_interface());
14025 hi_hfta_node->table_name->set_ifq(false);
14027 ret_vec.push_back(low_hfta_node);
14028 ret_vec.push_back(hi_hfta_node);
14034 // TODO: add splitting into selection/aggregation
14037 //---------------------------------------------------------------
14038 // Code for propagating Protocol field source information
14041 scalarexp_t *resolve_protocol_se(scalarexp_t *se, vector<map<string, scalarexp_t *> *> &src_vec, gb_table *gb_tbl, table_list *Schema){
14042 scalarexp_t *rse, *lse,*p_se, *gb_se;
14043 int tno, schema_type;
14044 map<string, scalarexp_t *> *pse_map;
14046 switch(se->get_operator_type()){
14048 return new scalarexp_t(se->get_literal());
14050 return scalarexp_t::make_param_reference(se->get_op().c_str());
14054 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, se->gb_ref=%d, but gb_tbl is NULL\n",se->get_gb_ref());
14055 gb_se = gb_tbl->get_def(se->get_gb_ref());
14056 return resolve_protocol_se(gb_se,src_vec,gb_tbl,Schema);
14059 schema_type = Schema->get_schema_type(se->get_colref()->get_schema_ref());
14060 if(schema_type == PROTOCOL_SCHEMA)
14061 return dup_se(se,NULL);
14063 tno = se->get_colref()->get_tablevar_ref();
14064 if(tno >= src_vec.size()){
14065 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, tno=%d, src_vec.size()=%lu\n",tno,src_vec.size());
14067 if(src_vec[tno] == NULL)
14070 pse_map =src_vec[tno];
14071 p_se = (*pse_map)[se->get_colref()->get_field()];
14074 return dup_se(p_se,NULL);
14076 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
14080 return new scalarexp_t(se->get_op().c_str(),lse);
14082 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
14085 rse = resolve_protocol_se(se->get_right_se(),src_vec,gb_tbl,Schema);
14088 return new scalarexp_t(se->get_op().c_str(),lse,rse);
14102 void spx_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14104 vector<map<string, scalarexp_t *> *> src_vec;
14106 for(i=0;i<q_sources.size();i++){
14107 if(q_sources[i] != NULL)
14108 src_vec.push_back(q_sources[i]->get_protocol_se());
14110 src_vec.push_back(NULL);
14113 for(i=0;i<select_list.size();i++){
14114 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14118 void join_eq_hash_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14120 vector<map<string, scalarexp_t *> *> src_vec;
14122 for(i=0;i<q_sources.size();i++){
14123 if(q_sources[i] != NULL)
14124 src_vec.push_back(q_sources[i]->get_protocol_se());
14126 src_vec.push_back(NULL);
14129 for(i=0;i<select_list.size();i++){
14130 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14133 for(i=0;i<hash_eq.size();i++){
14134 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
14135 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14139 void filter_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14141 vector<map<string, scalarexp_t *> *> src_vec;
14143 for(i=0;i<q_sources.size();i++){
14144 if(q_sources[i] != NULL)
14145 src_vec.push_back(q_sources[i]->get_protocol_se());
14147 src_vec.push_back(NULL);
14150 for(i=0;i<select_list.size();i++){
14151 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14154 for(i=0;i<hash_eq.size();i++){
14155 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
14156 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14160 void watch_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14162 vector<map<string, scalarexp_t *> *> src_vec;
14164 for(i=0;i<q_sources.size();i++){
14165 if(q_sources[i] != NULL)
14166 src_vec.push_back(q_sources[i]->get_protocol_se());
14168 src_vec.push_back(NULL);
14171 for(i=0;i<select_list.size();i++){
14172 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14175 for(i=0;i<key_flds.size();i++){
14176 string kfld = key_flds[i];
14177 hash_src_l.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_left_se(),src_vec,NULL,Schema));
14178 hash_src_r.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_right_se(),src_vec,NULL,Schema));
14183 void sgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14185 vector<map<string, scalarexp_t *> *> src_vec;
14187 for(i=0;i<q_sources.size();i++){
14188 if(q_sources[i] != NULL)
14189 src_vec.push_back(q_sources[i]->get_protocol_se());
14191 src_vec.push_back(NULL);
14194 for(i=0;i<select_list.size();i++){
14195 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14198 for(i=0;i<gb_tbl.size();i++)
14199 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14203 void rsgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14205 vector<map<string, scalarexp_t *> *> src_vec;
14207 for(i=0;i<q_sources.size();i++){
14208 if(q_sources[i] != NULL)
14209 src_vec.push_back(q_sources[i]->get_protocol_se());
14211 src_vec.push_back(NULL);
14214 for(i=0;i<select_list.size();i++){
14215 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14218 for(i=0;i<gb_tbl.size();i++)
14219 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14222 void sgahcwcb_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14224 vector<map<string, scalarexp_t *> *> src_vec;
14226 for(i=0;i<q_sources.size();i++){
14227 if(q_sources[i] != NULL)
14228 src_vec.push_back(q_sources[i]->get_protocol_se());
14230 src_vec.push_back(NULL);
14233 for(i=0;i<select_list.size();i++){
14234 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14237 for(i=0;i<gb_tbl.size();i++)
14238 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14241 void mrg_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14243 scalarexp_t *first_se;
14245 vector<map<string, scalarexp_t *> *> src_vec;
14246 map<string, scalarexp_t *> *pse_map;
14248 for(i=0;i<q_sources.size();i++){
14249 if(q_sources[i] != NULL)
14250 src_vec.push_back(q_sources[i]->get_protocol_se());
14252 src_vec.push_back(NULL);
14255 if(q_sources.size() == 0){
14256 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::create_protocol_se, q_sources.size() == 0\n");
14260 vector<field_entry *> tbl_flds = table_layout->get_fields();
14261 for(f=0;f<tbl_flds.size();f++){
14263 string fld_nm = tbl_flds[f]->get_name();
14264 pse_map = src_vec[0];
14265 first_se = (*pse_map)[fld_nm];
14266 if(first_se == NULL)
14268 for(s=1;s<src_vec.size() && match;s++){
14269 pse_map = src_vec[s];
14270 scalarexp_t *match_se = (*pse_map)[fld_nm];
14271 if(match_se == NULL)
14274 match = is_equivalent_se_base(first_se, match_se, Schema);
14277 protocol_map[fld_nm] = first_se;
14279 protocol_map[fld_nm] = NULL;
14283 void watch_tbl_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){