1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 // Create, manipulate, and dump query plans.
18 #include "query_plan.h"
19 #include "analyze_fta.h"
20 #include "generate_utils.h"
26 extern string hash_nums[NRANDS]; // for fast hashing
31 void untaboo(string &s){
33 for(c=0;c<s.size();++c){
40 // mrg_qpn constructor, define here to avoid
41 // circular references in the .h file
42 mrg_qpn::mrg_qpn(filter_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
43 param_tbl = spx->param_tbl;
46 field_entry_list *fel = new field_entry_list();
51 for(i=0;i<spx->select_list.size();++i){
52 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
53 if(dt->is_temporal()){
54 if(merge_fieldpos < 0){
57 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
62 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
63 fel->append_field(fe);
67 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
70 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
72 // NEED TO HANDLE USER_SPECIFIED SLACK
73 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
74 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
75 // if(this->slack == NULL)
76 // fprintf(stderr,"Zero slack.\n");
78 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
80 for(i=0;i<sources.size();i++){
81 std::string rvar = "_m"+int_to_string(i);
82 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
83 mvars[i]->set_tablevar_ref(i);
84 fm.push_back(new tablevar_t(sources[i].c_str()));
85 fm[i]->set_range_var(rvar);
88 param_tbl = new param_table();
89 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
91 for(pi=0;pi<param_names.size();pi++){
92 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
93 param_tbl->add_param(param_names[pi],dt->duplicate(),
94 spx->param_tbl->handle_access(param_names[pi]));
96 definitions = spx->definitions;
101 mrg_qpn::mrg_qpn(watch_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
102 param_tbl = spx->param_tbl;
105 field_entry_list *fel = new field_entry_list();
110 for(i=0;i<spx->select_list.size();++i){
111 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
112 if(dt->is_temporal()){
113 if(merge_fieldpos < 0){
116 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
117 dt->reset_temporal();
121 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
122 fel->append_field(fe);
125 if(merge_fieldpos<0){
126 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
129 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
131 // NEED TO HANDLE USER_SPECIFIED SLACK
132 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
133 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
134 // if(this->slack == NULL)
135 // fprintf(stderr,"Zero slack.\n");
137 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
139 for(i=0;i<sources.size();i++){
140 std::string rvar = "_m"+int_to_string(i);
141 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
142 mvars[i]->set_tablevar_ref(i);
143 fm.push_back(new tablevar_t(sources[i].c_str()));
144 fm[i]->set_range_var(rvar);
147 param_tbl = new param_table();
148 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
150 for(pi=0;pi<param_names.size();pi++){
151 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
152 param_tbl->add_param(param_names[pi],dt->duplicate(),
153 spx->param_tbl->handle_access(param_names[pi]));
155 definitions = spx->definitions;
162 // This function translates an analyzed parse tree
163 // into one or more query nodes (qp_node).
164 // Currently only one node is created, but some query
165 // fragments might create more than one query node,
166 // e.g. aggregation over a join, or nested subqueries
167 // in the FROM clause (unless this is handled at parse tree
168 // analysis time). At this stage, they will be linked
169 // by the names in the FROM clause.
170 // INVARIANT : if more than one query node is returned,
171 // the last one represents the output of the query.
172 vector<qp_node *> create_query_nodes(query_summary_class *qs,table_list *Schema){
174 // Classify the query.
176 vector <qp_node *> local_plan;
180 // I should probably move a lot of this code
181 // into the qp_node constructors,
182 // and have this code focus on building the query plan tree.
185 if(qs->query_type == WATCHLIST_QUERY){
186 watch_tbl_qpn *watchnode = new watch_tbl_qpn(qs, Schema);
189 plan_root = watchnode;
190 local_plan.push_back(watchnode);
195 if(qs->query_type == MERGE_QUERY){
196 mrg_qpn *merge_node = new mrg_qpn(qs,Schema);
199 plan_root = merge_node;
200 local_plan.push_back(merge_node);
203 Do not split sources until we are done with optimizations
204 vector<mrg_qpn *> split_merge = merge_node->split_sources();
205 local_plan.insert(local_plan.begin(), split_merge.begin(), split_merge.end());
207 // If children are created, add them to the schema.
210 printf("split_merge size is %d\n",split_merge.size());
211 for(i=1;i<split_merge.size();++i){
212 Schema->add_table(split_merge[i]->get_fields());
213 printf("Adding split merge table %d\n",i);
218 printf("Did split sources on %s:\n",qs->query_name.c_str());
220 for(ss=0;ss<local_plan.size();ss++){
221 printf("node %d, name=%s, sources=",ss,local_plan[ss]->get_node_name().c_str());
222 vector<tablevar_t *> inv = local_plan[ss]->get_input_tbls();
224 for(nn=0;nn<inv.size();nn++){
225 printf("%s ",inv[nn]->to_string().c_str());
234 if(qs->query_type == SELECT_QUERY){
236 // Select / Aggregation / Join
237 if(qs->gb_tbl->size() == 0 && qs->aggr_tbl->size() == 0){
239 if(qs->fta_tree->get_from()->size() == 1){
240 spx_qpn *spx_node = new spx_qpn(qs,Schema);
242 plan_root = spx_node;
243 local_plan.push_back(spx_node);
245 if(qs->fta_tree->get_from()->get_properties() == FILTER_JOIN_PROPERTY){
246 filter_join_qpn *join_node = new filter_join_qpn(qs,Schema);
247 plan_root = join_node;
248 local_plan.push_back(join_node);
250 if(qs->fta_tree->get_from()->get_properties() == WATCHLIST_JOIN_PROPERTY){
251 watch_join_qpn *join_node = new watch_join_qpn(qs,Schema);
252 plan_root = join_node;
253 local_plan.push_back(join_node);
255 join_eq_hash_qpn *join_node = new join_eq_hash_qpn(qs,Schema);
256 plan_root = join_node;
257 local_plan.push_back(join_node);
264 if(qs->states_refd.size() || qs->sg_tbl.size() || qs->cb_cnf.size()){
265 sgahcwcb_qpn *sgahcwcb_node = new sgahcwcb_qpn(qs,Schema);
266 plan_root = sgahcwcb_node;
267 local_plan.push_back(sgahcwcb_node);
269 if(qs->closew_cnf.size()){
270 rsgah_qpn *rsgah_node = new rsgah_qpn(qs,Schema);
271 plan_root = rsgah_node;
272 local_plan.push_back(rsgah_node);
274 sgah_qpn *sgah_node = new sgah_qpn(qs,Schema);
275 plan_root = sgah_node;
276 local_plan.push_back(sgah_node);
283 // Get the query name and other definitions.
284 plan_root->set_node_name( qs->query_name);
285 plan_root->set_definitions( qs->definitions) ;
288 // return(plan_root);
294 string se_to_query_string(scalarexp_t *se, aggregate_table *aggr_tbl){
299 vector<scalarexp_t *> operand_list;
302 if(se->is_superaggr())
305 switch(se->get_operator_type()){
307 l_str = se->get_literal()->to_query_string();
310 l_str = "$" + se->get_op();
313 l_str = se->get_colref()->to_query_string() ;
316 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
318 return se->get_op()+"( "+l_str+" )";;
320 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
321 r_str = se_to_query_string(se->get_right_se(),aggr_tbl);
322 return( "("+l_str+")"+se->get_op()+"("+r_str+")" );
324 return( se->get_op() + su_ind + "(*)");
326 l_str = se_to_query_string(aggr_tbl->get_aggr_se(se->get_aggr_ref()),aggr_tbl);
327 return( se->get_op() + su_ind + "(" + l_str + ")" );
329 if(se->get_aggr_ref() >= 0)
330 operand_list = aggr_tbl->get_operand_list(se->get_aggr_ref());
332 operand_list = se->get_operands();
334 ret = se->get_op() + su_ind + "(";
335 for(p=0;p<operand_list.size();p++){
336 l_str = se_to_query_string(operand_list[p],aggr_tbl);
344 return "ERROR SE op type not recognized in se_to_query_string.\n";
348 string pred_to_query_str(predicate_t *pr, aggregate_table *aggr_tbl){
353 vector<literal_t *> llist;
354 vector<scalarexp_t *> op_list;
356 switch(pr->get_operator_type()){
358 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
359 ret = l_str + " IN [";
360 llist = pr->get_lit_vec();
361 for(l=0;l<llist.size();l++){
363 ret += llist[l]->to_query_string();
369 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
370 r_str = se_to_query_string(pr->get_right_se(),aggr_tbl);
371 return( l_str + " " + pr->get_op() + " " + r_str );
373 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
374 return(pr->get_op() + "( " + l_str + " )");
376 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
377 r_str = pred_to_query_str(pr->get_right_pr(),aggr_tbl);
378 return("( " + r_str + " )" + pr->get_op() + "( " + l_str + " )");
380 ret = pr->get_op()+"[";
381 op_list = pr->get_op_list();
382 for(o=0;o<op_list.size();++o){
384 ret += se_to_query_string(op_list[o],aggr_tbl);
389 fprintf(stderr,"INTERNAL ERROR in pred_to_query_str, line %d, character %d, unknown predicate operator type %d\n",
390 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
399 // Build a selection list,
400 // but avoid adding duplicate SEs.
403 int add_select_list_nodup(vector<select_element *> &lfta_select_list, scalarexp_t *se,
407 for(s=0;s<lfta_select_list.size();s++){
408 if(is_equivalent_se(lfta_select_list[s]->se, se)){
413 lfta_select_list.push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
414 return(lfta_select_list.size()-1);
419 // TODO: The generated colref should be tied to the tablevar
420 // representing the lfta output. For now, always 0.
422 scalarexp_t *make_fta_se_ref(vector<select_element *> &lfta_select_list, scalarexp_t *se, int h_tvref){
424 int fta_se_nbr = add_select_list_nodup(lfta_select_list, se, new_element);
427 colname = lfta_select_list[fta_se_nbr]->name;
429 colname = impute_colname(lfta_select_list, se);
430 lfta_select_list[fta_se_nbr]->name = colname;
433 // TODO: fill in the tablevar and schema of the colref here.
434 colref_t *new_cr = new colref_t(colname.c_str());
435 new_cr->set_tablevar_ref(h_tvref);
438 scalarexp_t *new_se= new scalarexp_t(new_cr);
439 new_se->use_decorations_of(se);
445 // Build a selection list,
446 // but avoid adding duplicate SEs.
449 int add_select_list_nodup(vector<select_element *> *lfta_select_list, scalarexp_t *se,
453 for(s=0;s<lfta_select_list->size();s++){
454 if(is_equivalent_se((*lfta_select_list)[s]->se, se)){
459 lfta_select_list->push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
460 return(lfta_select_list->size()-1);
465 // TODO: The generated colref should be tied to the tablevar
466 // representing the lfta output. For now, always 0.
468 scalarexp_t *make_fta_se_ref(vector<vector<select_element *> *> &lfta_select_list, scalarexp_t *se, int h_tvref){
470 vector<select_element *> *the_sel_list = lfta_select_list[h_tvref];
471 int fta_se_nbr = add_select_list_nodup(the_sel_list, se, new_element);
474 colname = (*the_sel_list)[fta_se_nbr]->name;
476 colname = impute_colname(*the_sel_list, se);
477 (*the_sel_list)[fta_se_nbr]->name = colname;
480 // TODO: fill in the tablevar and schema of the colref here.
481 colref_t *new_cr = new colref_t(colname.c_str());
482 new_cr->set_tablevar_ref(h_tvref);
485 scalarexp_t *new_se= new scalarexp_t(new_cr);
486 new_se->use_decorations_of(se);
495 // Test if a se can be evaluated at the fta.
496 // check forbidden types (e.g. float), forbidden operations
497 // between types (e.g. divide a long long), forbidden operations
498 // (too expensive, not implemented).
500 // Return true if not forbidden, false if forbidden
502 // TODO: the parameter aggr_tbl is not used, delete it.
504 bool check_fta_forbidden_se(scalarexp_t *se,
505 aggregate_table *aggr_tbl,
506 ext_fcn_list *Ext_fcns
510 vector<scalarexp_t *> operand_list;
511 vector<data_type *> dt_signature;
512 data_type *dt = se->get_data_type();
516 switch(se->get_operator_type()){
520 return( se->get_data_type()->fta_legal_type() );
524 if(!check_fta_forbidden_se(se->get_left_se(), aggr_tbl, Ext_fcns))
527 dt->fta_legal_operation(se->get_left_se()->get_data_type(), se->get_op())
530 if(!check_fta_forbidden_se(se->get_left_se(),aggr_tbl, Ext_fcns))
532 if(!check_fta_forbidden_se(se->get_right_se(),aggr_tbl, Ext_fcns))
534 return(dt->fta_legal_operation(se->get_left_se()->get_data_type(),
535 se->get_right_se()->get_data_type(),
540 // return true, aggregate fta-safeness is determined elsewhere.
547 if(se->get_aggr_ref() >= 0) return true;
549 operand_list = se->get_operands();
550 for(p=0;p<operand_list.size();p++){
551 if(!check_fta_forbidden_se(operand_list[p],aggr_tbl, Ext_fcns))
553 dt_signature.push_back(operand_list[p]->get_data_type() );
555 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
557 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
559 for(o=0;o<operand_list.size();o++){
560 if(o>0) fprintf(stderr,", ");
561 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
563 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
564 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
568 return(Ext_fcns->fta_legal(fcn_id) );
570 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
579 // test if a pr can be executed at the fta.
581 // Return true if not forbidden, false if forbidden
583 bool check_fta_forbidden_pr(predicate_t *pr,
584 aggregate_table *aggr_tbl,
585 ext_fcn_list *Ext_fcns
588 vector<literal_t *> llist;
591 vector<scalarexp_t *> op_list;
592 vector<data_type *> dt_signature;
596 switch(pr->get_operator_type()){
598 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns) )
600 llist = pr->get_lit_vec();
601 for(l=0;l<llist.size();l++){
602 dt = new data_type(llist[l]->get_type());
603 if(! dt->fta_legal_type()){
611 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns))
613 if(! check_fta_forbidden_se(pr->get_right_se(), aggr_tbl, Ext_fcns))
617 return( check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns) );
619 if(! check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns))
621 if(! check_fta_forbidden_pr(pr->get_right_pr(), aggr_tbl, Ext_fcns))
625 op_list = pr->get_op_list();
626 for(o=0;o<op_list.size();o++){
627 if(!check_fta_forbidden_se(op_list[o],aggr_tbl, Ext_fcns))
629 dt_signature.push_back(op_list[o]->get_data_type() );
631 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), dt_signature);
633 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
635 for(o=0;o<op_list.size();o++){
636 if(o>0) fprintf(stderr,", ");
637 fprintf(stderr,"%s",op_list[o]->get_data_type()->to_string().c_str());
639 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
640 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
644 return(Ext_fcns->fta_legal(fcn_id) );
646 fprintf(stderr,"INTERNAL ERROR in check_fta_forbidden_pr, line %d, character %d, unknown predicate operator type %d\n",
647 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
656 // Split the aggregates in orig_aggr_tbl, into superaggregates and
658 // (the value of the HFTA aggregate might be a SE of several LFTA
659 // subaggregates, e.g. avg : sum / count )
660 // Register the superaggregates in hfta_aggr_tbl, and the
661 // subaggregates in lfta_aggr_tbl.
662 // Insert references to the subaggregates into lfta_select_list.
663 // (and record their names in the currnames list)
664 // Create a SE for the superaggregate, put it in hfta_aggr_se,
667 void split_fta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
668 aggregate_table *hfta_aggr_tbl,
669 aggregate_table *lfta_aggr_tbl,
670 vector<select_element *> &lfta_select_list,
671 map<int,scalarexp_t *> &hfta_aggr_se,
672 ext_fcn_list *Ext_fcns
675 scalarexp_t *subaggr_se;
680 scalarexp_t *new_se, *l_se;
681 vector<scalarexp_t *> subaggr_ref_se;
684 if(! orig_aggr_tbl->is_builtin(agr_id)){
685 // Construct the subaggregate
686 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
687 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
688 vector<scalarexp_t *> subopl;
690 for(o=0;o<opl.size();++o){
691 subopl.push_back(dup_se(opl[o], NULL));
693 int sub_id = Ext_fcns->get_subaggr_id(fcn_id);
694 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
695 subaggr_se->set_fcn_id(sub_id);
696 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
697 // Add it to the lfta select list.
698 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
700 colname = lfta_select_list[fta_se_nbr]->name;
702 colname = impute_colname(lfta_select_list, subaggr_se);
703 lfta_select_list[fta_se_nbr]->name = colname;
704 ano = lfta_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,Ext_fcns->has_lfta_bailout(sub_id));
705 subaggr_se->set_aggr_id(ano);
708 // Construct a reference to the subaggregate
709 new_cr = new colref_t(colname.c_str());
710 new_se = new scalarexp_t(new_cr);
711 // I'm not certain what the types should be ....
712 // This will need to be filled in by later analysis.
713 // NOTE: this might not capture all the meaning of data_type ...
714 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
715 subaggr_ref_se.push_back(new_se);
717 // Construct the superaggregate
718 int super_id = Ext_fcns->get_superaggr_id(fcn_id);
719 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
720 ret_se->set_fcn_id(super_id);
721 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
722 // Register it in the hfta aggregate table
723 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, Ext_fcns->is_running_aggr(sub_id),false);
724 ret_se->set_aggr_id(ano);
725 hfta_aggr_se[agr_id] = ret_se;
731 // builtin aggregate processing
735 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
736 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
739 if(orig_aggr_tbl->is_star_aggr(agr_id)){
740 for(sa=0;sa<subaggr_names.size();sa++){
741 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
742 subaggr_se->set_data_type(subaggr_dt[sa]);
744 // The following sequence is similar to the code in make_fta_se_ref,
745 // but there is special processing for the aggregate tables.
746 int fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
748 colname = lfta_select_list[fta_se_nbr]->name;
750 colname = impute_colname(lfta_select_list, subaggr_se);
751 lfta_select_list[fta_se_nbr]->name = colname;
752 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
753 subaggr_se->set_aggr_id(ano);
755 new_cr = new colref_t(colname.c_str());
756 new_cr->set_tablevar_ref(0);
757 new_se = new scalarexp_t(new_cr);
759 // I'm not certain what the types should be ....
760 // This will need to be filled in by later analysis.
761 // Actually, this is causing a problem.
762 // I will assume a UINT data type. / change to INT
763 // (consistent with assign_data_types in analyze_fta.cc)
764 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
765 data_type *ndt = new data_type("Int"); // used to be Uint
766 new_se->set_data_type(ndt);
768 subaggr_ref_se.push_back(new_se);
771 for(sa=0;sa<subaggr_names.size();sa++){
773 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
774 l_se = dup_se(aggr_operand, NULL);
775 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
777 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
779 subaggr_se->set_data_type(subaggr_dt[sa]);
781 // again, similar to make_fta_se_ref.
782 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
784 colname = lfta_select_list[fta_se_nbr]->name;
786 colname = impute_colname(lfta_select_list, subaggr_se);
787 lfta_select_list[fta_se_nbr]->name = colname;
789 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
791 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
792 subaggr_se->set_aggr_id(ano);
794 new_cr = new colref_t(colname.c_str());
795 new_se = new scalarexp_t(new_cr);
796 // I'm not certain what the types should be ....
797 // This will need to be filled in by later analysis.
798 // NOTE: this might not capture all the meaning of data_type ...
799 new_se->set_data_type(subaggr_dt[sa]);
800 subaggr_ref_se.push_back(new_se);
803 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
804 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
806 // ASSUME either the return value is an aggregation,
807 // or a binary_op between two aggregations
808 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
809 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
810 ret_se->set_aggr_id(ano);
812 // Basically processing for AVG.
813 // set the data type of the superagg to that of the subagg.
814 scalarexp_t *left_se = ret_se->get_left_se();
815 left_se->set_data_type(subaggr_dt[0]);
816 ano = hfta_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
817 left_se->set_aggr_id(ano);
819 scalarexp_t *right_se = ret_se->get_right_se();
820 right_se->set_data_type(subaggr_dt[1]);
821 ano = hfta_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
822 right_se->set_aggr_id(ano);
825 hfta_aggr_se[agr_id] = ret_se;
830 // Split the aggregates in orig_aggr_tbl, into hfta_superaggregates and
831 // hfta_subaggregates.
832 // Register the superaggregates in hi_aggr_tbl, and the
833 // subaggregates in loq_aggr_tbl.
834 // Insert references to the subaggregates into low_select_list.
835 // (and record their names in the currnames list)
836 // Create a SE for the superaggregate, put it in hfta_aggr_se,
839 void split_hfta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
840 aggregate_table *hi_aggr_tbl,
841 aggregate_table *low_aggr_tbl,
842 vector<select_element *> &low_select_list,
843 map<int,scalarexp_t *> &hi_aggr_se,
844 ext_fcn_list *Ext_fcns
847 scalarexp_t *subaggr_se;
852 scalarexp_t *new_se, *l_se;
853 vector<scalarexp_t *> subaggr_ref_se;
856 if(! orig_aggr_tbl->is_builtin(agr_id)){
857 // Construct the subaggregate
858 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
859 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
860 vector<scalarexp_t *> subopl;
862 for(o=0;o<opl.size();++o){
863 subopl.push_back(dup_se(opl[o], NULL));
865 int sub_id = Ext_fcns->get_hfta_subaggr_id(fcn_id);
866 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
867 subaggr_se->set_fcn_id(sub_id);
868 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
869 // Add it to the low select list.
870 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
872 colname = low_select_list[fta_se_nbr]->name;
874 colname = impute_colname(low_select_list, subaggr_se);
875 low_select_list[fta_se_nbr]->name = colname;
876 ano = low_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,false);
877 subaggr_se->set_aggr_id(ano);
880 // Construct a reference to the subaggregate
881 new_cr = new colref_t(colname.c_str());
882 new_se = new scalarexp_t(new_cr);
883 // I'm not certain what the types should be ....
884 // This will need to be filled in by later analysis.
885 // NOTE: this might not capture all the meaning of data_type ...
886 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
887 subaggr_ref_se.push_back(new_se);
889 // Construct the superaggregate
890 int super_id = Ext_fcns->get_hfta_superaggr_id(fcn_id);
891 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
892 ret_se->set_fcn_id(super_id);
893 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
894 // Register it in the high aggregate table
895 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, false,false);
896 ret_se->set_aggr_id(ano);
897 hi_aggr_se[agr_id] = ret_se;
903 // builtin aggregate processing
907 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
908 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
911 if(orig_aggr_tbl->is_star_aggr(agr_id)){
912 for(sa=0;sa<subaggr_names.size();sa++){
913 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
914 subaggr_se->set_data_type(subaggr_dt[sa]);
916 // The following sequence is similar to the code in make_fta_se_ref,
917 // but there is special processing for the aggregate tables.
918 int fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
920 colname = low_select_list[fta_se_nbr]->name;
922 colname = impute_colname(low_select_list, subaggr_se);
923 low_select_list[fta_se_nbr]->name = colname;
924 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
925 subaggr_se->set_aggr_id(ano);
927 new_cr = new colref_t(colname.c_str());
928 new_cr->set_tablevar_ref(0);
929 new_se = new scalarexp_t(new_cr);
931 // I'm not certain what the types should be ....
932 // This will need to be filled in by later analysis.
933 // Actually, this is causing a problem.
934 // I will assume a UINT data type.
935 // (consistent with assign_data_types in analyze_fta.cc)
936 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
937 data_type *ndt = new data_type("Int"); // was Uint
938 new_se->set_data_type(ndt);
940 subaggr_ref_se.push_back(new_se);
943 for(sa=0;sa<subaggr_names.size();sa++){
945 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
946 l_se = dup_se(aggr_operand, NULL);
947 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
949 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
951 subaggr_se->set_data_type(subaggr_dt[sa]);
953 // again, similar to make_fta_se_ref.
954 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
956 colname = low_select_list[fta_se_nbr]->name;
958 colname = impute_colname(low_select_list, subaggr_se);
959 low_select_list[fta_se_nbr]->name = colname;
961 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
963 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
964 subaggr_se->set_aggr_id(ano);
966 new_cr = new colref_t(colname.c_str());
967 new_se = new scalarexp_t(new_cr);
968 // I'm not certain what the types should be ....
969 // This will need to be filled in by later analysis.
970 // NOTE: this might not capture all the meaning of data_type ...
971 new_se->set_data_type(subaggr_dt[sa]);
972 subaggr_ref_se.push_back(new_se);
975 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
976 // ASSUME either the return value is an aggregation,
977 // or a binary_op between two aggregations
978 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
979 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
980 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
982 // Basically processing for AVG.
983 // set the data type of the superagg to that of the subagg.
984 scalarexp_t *left_se = ret_se->get_left_se();
985 left_se->set_data_type(subaggr_dt[0]);
986 ano = hi_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
987 left_se->set_aggr_id(ano);
989 scalarexp_t *right_se = ret_se->get_right_se();
990 right_se->set_data_type(subaggr_dt[1]);
991 ano = hi_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
992 right_se->set_aggr_id(ano);
995 ret_se->set_aggr_id(ano);
996 hi_aggr_se[agr_id] = ret_se;
1004 // Split a scalar expression into one part which executes
1005 // at the stream and another set of parts which execute
1007 // Because I'm actually modifying the SEs, I will make
1008 // copies. But I will assume that literals, params, and
1009 // colrefs are immutable at this point.
1010 // (if there is ever a need to change one, must make a
1012 // NOTE : if se is constant (only refrences literals),
1013 // avoid making the fta compute it.
1015 // NOTE : This will need to be generalized to
1016 // handle join expressions, namely to handle a vector
1019 // Return value is the HFTA se.
1020 // Add lftas select_elements to the fta_select_list.
1021 // set fta_forbidden if this node or any child cannot
1022 // execute at the lfta.
1026 scalarexp_t *split_fta_se(scalarexp_t *se,
1027 bool &fta_forbidden,
1028 vector<select_element *> &lfta_select_list,
1029 ext_fcn_list *Ext_fcns
1033 vector<scalarexp_t *> operand_list;
1034 vector<data_type *> dt_signature;
1035 scalarexp_t *ret_se, *l_se, *r_se;
1036 bool l_forbid, r_forbid, this_forbid;
1038 scalarexp_t *new_se;
1039 data_type *dt = se->get_data_type();
1041 switch(se->get_operator_type()){
1043 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1044 ret_se = new scalarexp_t(se->get_literal());
1045 ret_se->use_decorations_of(se);
1049 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1050 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1051 ret_se->use_decorations_of(se);
1055 // No colref should be forbidden,
1056 // the schema is wrong, the fta_legal_type() fcn is wrong,
1057 // or the source table is actually a stream.
1058 // Issue a warning, but proceed with processing.
1059 // Also, should not be a ref to a gbvar.
1060 // (a gbvar ref only occurs in an aggregation node,
1061 // and these SEs are rehomed, not split.
1062 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1065 fprintf(stderr,"WARNING, a colref is a forbidden data type in split_fta_se,"
1067 " type is %s, line=%d, col=%d\n",
1068 se->get_colref()->to_string().c_str(),
1069 se->get_data_type()->get_type_str().c_str(),
1070 se->lineno, se->charno
1075 fprintf(stderr,"INTERNAL ERROR, a colref is a gbvar ref in split_fta_se,"
1076 " type is %s, line=%d, col=%d\n",
1077 se->get_data_type()->get_type_str().c_str(),
1078 se->lineno, se->charno
1083 ret_se = new scalarexp_t(se->get_colref());
1084 ret_se->use_decorations_of(se);
1088 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1090 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1092 // If this operation is forbidden but the child SE is not,
1093 // put the child se on the lfta_select_list, create a colref
1094 // which accesses this se, and make it the child of this op.
1095 // Exception : the child se is constant (only literal refs).
1096 if(this_forbid && !l_forbid){
1097 if(!is_literal_or_param_only(l_se)){
1098 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1099 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1102 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1104 ret_se->use_decorations_of(se);
1105 fta_forbidden = this_forbid | l_forbid;
1109 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1110 r_se = split_fta_se(se->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1112 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1114 // Replace the left se if it is not forbidden, but something else is.
1115 if((this_forbid || r_forbid) & !l_forbid){
1116 if(!is_literal_or_param_only(l_se)){
1117 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1122 // Replace the right se if it is not forbidden, but something else is.
1123 if((this_forbid || l_forbid) & !r_forbid){
1124 if(!is_literal_or_param_only(r_se)){
1125 new_se = make_fta_se_ref(lfta_select_list, r_se,0);
1130 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1131 ret_se->use_decorations_of(se);
1132 fta_forbidden = this_forbid || r_forbid || l_forbid;
1139 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_fta_se."
1140 " line=%d, col=%d\n",
1141 se->get_op().c_str(),
1142 se->lineno, se->charno
1149 fta_forbidden = false;
1150 operand_list = se->get_operands();
1151 vector<scalarexp_t *> new_operands;
1152 vector<bool> forbidden_op;
1153 for(p=0;p<operand_list.size();p++){
1154 l_se = split_fta_se(operand_list[p], l_forbid, lfta_select_list, Ext_fcns);
1156 fta_forbidden |= l_forbid;
1157 new_operands.push_back(l_se);
1158 forbidden_op.push_back(l_forbid);
1159 dt_signature.push_back(operand_list[p]->get_data_type() );
1162 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1164 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1166 for(o=0;o<operand_list.size();o++){
1167 if(o>0) fprintf(stderr,", ");
1168 fprintf(stderr,"%s",operand_list[o]->get_data_type()->get_type_str().c_str());
1170 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1171 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1175 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1177 // Replace the non-forbidden operands.
1178 // the forbidden ones are already replaced.
1180 for(p=0;p<new_operands.size();p++){
1181 if(! forbidden_op[p]){
1182 // if(new_operands[p]->get_data_type()->get_temporal() != constant_t){
1183 if(!is_literal_or_param_only(new_operands[p])){
1184 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],0);
1185 new_operands[p] = new_se;
1191 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1192 ret_se->use_decorations_of(se);
1198 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
1209 // The predicates have already been
1210 // broken into conjunctions.
1211 // If any part of a conjunction is fta-forbidden,
1212 // it must be executed in the stream operator.
1213 // Else it is executed in the FTA.
1214 // A pre-analysis should determine whether this
1215 // predicate is fta-safe. This procedure will
1216 // assume that it is fta-forbidden and will
1217 // prepare it for execution in the stream.
1221 predicate_t *split_fta_pr(predicate_t *pr,
1222 vector<select_element *> &lfta_select_list,
1223 ext_fcn_list *Ext_fcns
1226 vector<literal_t *> llist;
1227 scalarexp_t *se_l, *se_r;
1228 bool l_forbid, r_forbid;
1229 predicate_t *ret_pr, *pr_l, *pr_r;
1230 vector<scalarexp_t *> op_list, new_op_list;
1232 vector<data_type *> dt_signature;
1235 switch(pr->get_operator_type()){
1237 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1240 if(!is_literal_or_param_only(se_l)){
1241 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1245 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1250 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1252 if(!is_literal_or_param_only(se_l)){
1253 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1258 se_r = split_fta_se(pr->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1260 if(!is_literal_or_param_only(se_r)){
1261 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,0);
1266 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1270 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1271 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1274 case PRED_BINARY_OP:
1275 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1276 pr_r = split_fta_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1277 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1281 // I can't push the predicate into the lfta, except by
1282 // returning a bool value, and that is not worth the trouble,
1283 op_list = pr->get_op_list();
1284 for(o=0;o<op_list.size();++o){
1285 se_l = split_fta_se(op_list[o],l_forbid,lfta_select_list,Ext_fcns);
1287 if(!is_literal_or_param_only(se_l)){
1288 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1292 new_op_list.push_back(se_l);
1295 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1296 ret_pr->set_fcn_id(pr->get_fcn_id());
1299 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1300 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1311 //--------------------------------------------------------------------
1315 // Split a scalar expression into one part which executes
1316 // at the stream and another set of parts which execute
1318 // Because I'm actually modifying the SEs, I will make
1319 // copies. But I will assume that literals, params, and
1320 // colrefs are immutable at this point.
1321 // (if there is ever a need to change one, must make a
1323 // NOTE : if se is constant (only refrences literals),
1324 // avoid making the fta compute it.
1326 // NOTE : This will need to be generalized to
1327 // handle join expressions, namely to handle a vector
1330 // Return value is the HFTA se.
1331 // Add lftas select_elements to the fta_select_list.
1332 // set fta_forbidden if this node or any child cannot
1333 // execute at the lfta.
1335 #define SPLIT_FTAVEC_NOTBLVAR -1
1336 #define SPLIT_FTAVEC_MIXED -2
1338 bool is_PROTOCOL_source(int colref_source,
1339 vector< vector<select_element *> *> &lfta_select_list){
1340 if(colref_source>=0 && lfta_select_list[colref_source]!=NULL) return true;
1344 int combine_colref_source(int s1, int s2){
1345 if(s1==s2) return(s1);
1346 if(s1==SPLIT_FTAVEC_NOTBLVAR) return s2;
1347 if(s2==SPLIT_FTAVEC_NOTBLVAR) return s1;
1348 return SPLIT_FTAVEC_MIXED;
1351 scalarexp_t *split_ftavec_se(
1352 scalarexp_t *se, // the SE to split
1353 bool &fta_forbidden, // return true if some part of se
1355 int &colref_source, // the tblvar which sources the
1356 // colref, or NOTBLVAR, or MIXED
1357 vector< vector<select_element *> *> &lfta_select_list,
1358 // NULL if the tblvar is not PROTOCOL,
1359 // else build the select list.
1360 ext_fcn_list *Ext_fcns // is the fcn lfta-safe?
1362 // Return value is the HFTA SE, unless fta_forbidden is true and
1363 // colref_source>=0 and the indicated source is PROTOCOL.
1364 // In that case no split was done, the make_fta_se_ref must
1365 // be done by the caller.
1368 vector<scalarexp_t *> operand_list;
1369 vector<data_type *> dt_signature;
1370 scalarexp_t *ret_se, *l_se, *r_se;
1371 bool l_forbid, r_forbid, this_forbid;
1372 int l_csource, r_csource, this_csource;
1374 scalarexp_t *new_se;
1375 data_type *dt = se->get_data_type();
1377 switch(se->get_operator_type()){
1379 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1380 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1381 ret_se = new scalarexp_t(se->get_literal());
1382 ret_se->use_decorations_of(se);
1386 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1387 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1388 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1389 ret_se->use_decorations_of(se);
1392 case SE_IFACE_PARAM:
1393 fta_forbidden = false;
1394 colref_source = se->get_ifpref()->get_tablevar_ref();
1395 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1396 ret_se->use_decorations_of(se);
1400 // No colref should be forbidden,
1401 // the schema is wrong, the fta_legal_type() fcn is wrong,
1402 // or the source table is actually a stream.
1403 // Issue a warning, but proceed with processing.
1404 // Also, should not be a ref to a gbvar.
1405 // (a gbvar ref only occurs in an aggregation node,
1406 // and these SEs are rehomed, not split.
1407 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1408 colref_source = se->get_colref()->get_tablevar_ref();
1410 if(fta_forbidden && is_PROTOCOL_source(colref_source, lfta_select_list)){
1411 fprintf(stderr,"WARNING, a PROTOCOL colref is a forbidden data type in split_ftavec_se,"
1413 " type is %s, line=%d, col=%d\n",
1414 se->get_colref()->to_string().c_str(),
1415 se->get_data_type()->to_string().c_str(),
1416 se->lineno, se->charno
1421 fta_forbidden = true; // eval in hfta. ASSUME make copy as below.
1424 ret_se = new scalarexp_t(se->get_colref());
1425 ret_se->use_decorations_of(se);
1429 l_se = split_ftavec_se(se->get_left_se(), l_forbid, colref_source, lfta_select_list, Ext_fcns);
1431 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1433 // If this operation is forbidden but the child SE is not,
1434 // AND the colref source in the se is a single PROTOCOL source
1435 // put the child se on the lfta_select_list, create a colref
1436 // which accesses this se, and make it the child of this op.
1437 // Exception : the child se is constant (only literal refs).
1438 // TODO: I think the exception is expressed by is_PROTOCOL_source
1439 if(this_forbid && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list)){
1440 if(!is_literal_or_param_only(l_se)){
1441 new_se = make_fta_se_ref(lfta_select_list, l_se,colref_source);
1442 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1445 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1447 ret_se->use_decorations_of(se);
1448 fta_forbidden = this_forbid | l_forbid;
1452 l_se = split_ftavec_se(se->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1453 r_se = split_ftavec_se(se->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1455 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1456 colref_source=combine_colref_source(l_csource, r_csource);
1458 // Replace the left se if the parent must be hfta but the child can
1459 // be lfta. This translates to
1460 // a) result is PROTOCOL and forbidden, but left SE is not forbidden
1461 // OR b) if result is mixed but the left se is PROTOCOL, not forbidden
1462 if( ((this_forbid || r_forbid) && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1463 (colref_source==SPLIT_FTAVEC_MIXED && !l_forbid &&
1464 is_PROTOCOL_source(l_csource, lfta_select_list)) ){
1465 if(!is_literal_or_param_only(l_se)){
1466 new_se = make_fta_se_ref(lfta_select_list, l_se,l_csource);
1471 // same logic as for right se.
1472 if( ((this_forbid || l_forbid) && !r_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1473 (colref_source==SPLIT_FTAVEC_MIXED && !r_forbid &&
1474 is_PROTOCOL_source(r_csource, lfta_select_list)) ){
1475 if(!is_literal_or_param_only(r_se)){
1476 new_se = make_fta_se_ref(lfta_select_list, r_se,r_csource);
1481 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1482 ret_se->use_decorations_of(se);
1483 fta_forbidden = this_forbid || r_forbid || l_forbid;
1490 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_ftavec_se."
1491 " line=%d, col=%d\n",
1492 se->get_op().c_str(),
1493 se->lineno, se->charno
1500 operand_list = se->get_operands();
1501 vector<scalarexp_t *> new_operands;
1502 vector<bool> forbidden_op;
1503 vector<int> csource;
1505 fta_forbidden = false;
1506 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1507 for(p=0;p<operand_list.size();p++){
1508 l_se = split_ftavec_se(operand_list[p], l_forbid, l_csource, lfta_select_list, Ext_fcns);
1510 fta_forbidden |= l_forbid;
1511 colref_source = combine_colref_source(colref_source, l_csource);
1512 new_operands.push_back(l_se);
1513 forbidden_op.push_back(l_forbid);
1514 csource.push_back(l_csource);
1515 dt_signature.push_back(operand_list[p]->get_data_type() );
1518 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1520 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1522 for(o=0;o<operand_list.size();o++){
1523 if(o>0) fprintf(stderr,", ");
1524 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
1526 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1527 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1531 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1533 // Replace the non-forbidden operands.
1534 // the forbidden ones are already replaced.
1535 if(fta_forbidden || colref_source == SPLIT_FTAVEC_MIXED){
1536 for(p=0;p<new_operands.size();p++){
1537 if(! forbidden_op[p] && is_PROTOCOL_source(csource[p], lfta_select_list)){
1538 if(!is_literal_or_param_only(new_operands[p])){
1539 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],csource[p]);
1540 new_operands[p] = new_se;
1546 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1547 ret_se->use_decorations_of(se);
1553 printf("INTERNAL ERROR in split_ftavec_se: operator type %d\n",se->get_operator_type());
1562 // The predicates have already been
1563 // broken into conjunctions.
1564 // If any part of a conjunction is fta-forbidden,
1565 // it must be executed in the stream operator.
1566 // Else it is executed in the FTA.
1567 // A pre-analysis should determine whether this
1568 // predicate is fta-safe. This procedure will
1569 // assume that it is fta-forbidden and will
1570 // prepare it for execution in the stream.
1572 predicate_t *split_ftavec_pr(predicate_t *pr,
1573 vector< vector<select_element *> *> &lfta_select_list,
1574 ext_fcn_list *Ext_fcns
1577 vector<literal_t *> llist;
1578 scalarexp_t *se_l, *se_r;
1579 bool l_forbid, r_forbid;
1580 int l_csource, r_csource;
1581 predicate_t *ret_pr, *pr_l, *pr_r;
1582 vector<scalarexp_t *> op_list, new_op_list;
1584 vector<data_type *> dt_signature;
1587 switch(pr->get_operator_type()){
1589 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1591 // TODO: checking that the se is a PROTOCOL source should
1592 // take care of literal_or_param_only.
1593 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1594 if(!is_literal_or_param_only(se_l)){
1595 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1599 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1604 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1605 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1606 if(!is_literal_or_param_only(se_l)){
1607 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1612 se_r = split_ftavec_se(pr->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1613 if(!r_forbid && is_PROTOCOL_source(r_csource, lfta_select_list)){
1614 if(!is_literal_or_param_only(se_r)){
1615 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,r_csource);
1620 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1624 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1625 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1628 case PRED_BINARY_OP:
1629 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1630 pr_r = split_ftavec_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1631 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1635 // I can't push the predicate into the lfta, except by
1636 // returning a bool value, and that is not worth the trouble,
1637 op_list = pr->get_op_list();
1638 for(o=0;o<op_list.size();++o){
1639 se_l = split_ftavec_se(op_list[o],l_forbid,l_csource,lfta_select_list,Ext_fcns);
1640 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1641 if(!is_literal_or_param_only(se_l)){
1642 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1646 new_op_list.push_back(se_l);
1649 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1650 ret_pr->set_fcn_id(pr->get_fcn_id());
1653 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1654 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1664 ////////////////////////////////////////////////////////////////////////
1665 /// rehome_hfta_se rehome_hfta_pr
1666 /// This is use to split an sgah operator (aggregation),
1667 /// I just need to make gb, aggr references point to the
1668 /// new gb, aggr table entries.
1671 scalarexp_t *rehome_fta_se(scalarexp_t *se,
1672 map< int, scalarexp_t * > *aggr_map
1677 vector<scalarexp_t *> operand_list;
1678 scalarexp_t *ret_se, *l_se, *r_se;
1680 scalarexp_t *new_se;
1681 data_type *dt = se->get_data_type();
1682 vector<scalarexp_t *> new_operands;
1684 switch(se->get_operator_type()){
1686 ret_se = new scalarexp_t(se->get_literal());
1687 ret_se->use_decorations_of(se);
1691 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1692 ret_se->use_decorations_of(se);
1695 case SE_IFACE_PARAM:
1696 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1697 ret_se->use_decorations_of(se);
1703 // Must be a GB REF ...
1704 // I'm assuming that the hfta gbvar table has the
1705 // same sequence of entries as the input query's gbvar table.
1706 // Else I'll need some kind of translation table.
1709 fprintf(stderr,"WARNING, a colref is not a gbver ref in rehome_hfta_se"
1710 " type is %s, line=%d, col=%d\n",
1711 se->get_data_type()->to_string().c_str(),
1712 se->lineno, se->charno
1716 ret_se = new scalarexp_t(se->get_colref());
1717 ret_se->use_decorations_of(se); // just inherit the gbref
1721 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1723 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1724 ret_se->use_decorations_of(se);
1728 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1729 r_se = rehome_fta_se(se->get_right_se(), aggr_map);
1731 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1732 ret_se->use_decorations_of(se);
1738 agr_id = se->get_aggr_ref();
1739 return (*aggr_map)[agr_id];
1743 agr_id = se->get_aggr_ref();
1744 if(agr_id >= 0) return (*aggr_map)[agr_id];
1746 operand_list = se->get_operands();
1747 for(p=0;p<operand_list.size();p++){
1748 l_se = rehome_fta_se(operand_list[p], aggr_map);
1750 new_operands.push_back(l_se);
1754 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1755 ret_se->use_decorations_of(se);
1760 printf("INTERNAL ERROR in rehome_fta_se: operator type %d\n",se->get_operator_type());
1769 // The predicates have already been
1770 // broken into conjunctions.
1771 // If any part of a conjunction is fta-forbidden,
1772 // it must be executed in the stream operator.
1773 // Else it is executed in the FTA.
1774 // A pre-analysis should determine whether this
1775 // predicate is fta-safe. This procedure will
1776 // assume that it is fta-forbidden and will
1777 // prepare it for execution in the stream.
1779 predicate_t *rehome_fta_pr(predicate_t *pr,
1780 map<int, scalarexp_t *> *aggr_map
1783 vector<literal_t *> llist;
1784 scalarexp_t *se_l, *se_r;
1785 predicate_t *ret_pr, *pr_l, *pr_r;
1786 vector<scalarexp_t *> op_list, new_op_list;
1789 switch(pr->get_operator_type()){
1791 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1792 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1796 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1797 se_r = rehome_fta_se(pr->get_right_se(), aggr_map);
1798 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1802 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1803 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1806 case PRED_BINARY_OP:
1807 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1808 pr_r = rehome_fta_pr(pr->get_right_pr(), aggr_map);
1809 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1813 op_list = pr->get_op_list();
1814 for(o=0;o<op_list.size();++o){
1815 se_l = rehome_fta_se(op_list[o], aggr_map);
1816 new_op_list.push_back(se_l);
1818 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1819 ret_pr->set_fcn_id(pr->get_fcn_id());
1823 fprintf(stderr,"INTERNAL ERROR in rehome_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1824 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1833 ////////////////////////////////////////////////////////////////////
1834 ///////////////// Create a STREAM table to represent the FTA output.
1836 table_def *create_attributes(string tname, vector<select_element *> &select_list){
1840 // Create a new STREAM schema for the output of the FTA.
1842 field_entry_list *fel = new field_entry_list();
1844 for(s=0;s<select_list.size();s++){
1845 scalarexp_t *sel_se = select_list[s]->se;
1846 data_type *dt = sel_se->get_data_type();
1848 // Grab the annotations of the field.
1849 // As of this writing, the only meaningful annotations
1850 // are whether or not the attribute is temporal.
1851 // There can be an annotation of constant_t, but
1852 // I'll ignore this, it feels like an unsafe assumption
1853 param_list *plist = new param_list();
1854 // if(dt->is_temporal()){
1855 vector<string> param_strings = dt->get_param_keys();
1857 for(p=0;p<param_strings.size();++p){
1858 string v = dt->get_param_val(param_strings[p]);
1860 plist->append(param_strings[p].c_str(),v.c_str());
1862 plist->append(param_strings[p].c_str());
1866 // char access_fcn_name[500];
1867 string colname = select_list[s]->name;
1868 // sprintf(access_fcn_name,"get_field_%s",colname.c_str());
1869 string access_fcn_name = "get_field_"+colname;
1870 field_entry *fe = new field_entry(
1871 dt->get_type_str(), colname, access_fcn_name, plist, ufcns
1874 fel->append_field(fe);
1877 table_def *fta_tbl = new table_def(
1878 tname.c_str(), NULL, NULL, fel, STREAM_SCHEMA
1885 //------------------------------------------------------------------
1886 // Textual representation of the query node.
1890 string spx_qpn::to_query_string(){
1892 string ret = "Select ";
1894 for(s=0;s<select_list.size();s++){
1896 ret += se_to_query_string(select_list[s]->se, NULL);
1897 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1901 ret += "From "+table_name->to_string()+"\n";
1903 if(where.size() > 0){
1906 for(w=0;w<where.size();w++){
1907 if(w>0) ret += " AND ";
1908 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
1919 string sgah_qpn::to_query_string(){
1921 string ret = "Select ";
1923 for(s=0;s<select_list.size();s++){
1925 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1926 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1930 ret += "From "+table_name->to_string()+"\n";
1932 if(where.size() > 0){
1935 for(w=0;w<where.size();w++){
1936 if(w>0) ret += " AND ";
1937 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1942 if(gb_tbl.size() > 0){
1945 if(gb_tbl.gb_patterns.size() <= 1 || gb_tbl.gb_entry_type.size()==0){
1946 for(g=0;g<gb_tbl.size();g++){
1947 if(g>0) ret += ", ";
1948 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1949 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
1951 ret += gb_tbl.get_name(g);
1955 for(g=0;g<gb_tbl.gb_entry_type.size();++g){
1956 if(g>0) ret += ", ";
1957 if(gb_tbl.gb_entry_type[g] == ""){
1958 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+
1959 " AS "+ gb_tbl.get_name(gb_pos);
1962 if(gb_tbl.gb_entry_type[g] == "CUBE" ||
1963 gb_tbl.gb_entry_type[g] == "ROLLUP"){
1964 ret += gb_tbl.gb_entry_type[g] + "(";
1966 for(gg=0;gg<gb_tbl.gb_entry_count[g];++gg){
1967 if(gg>0) ret += ", ";
1968 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+ " AS "+ gb_tbl.get_name(gb_pos);
1973 if(gb_tbl.gb_entry_type[g] == "GROUPING_SETS"){
1974 ret += gb_tbl.gb_entry_type[g] + "(";
1976 vector<vector<bool> > &local_components = gb_tbl.pattern_components[g];
1977 for(g1=0;g1<local_components.size();++g1){
1979 bool first_field = true;
1981 for(g2=0;g2<=gb_tbl.gb_entry_count[g];g2++){
1982 if(local_components[g1][g2]){
1983 if(!first_field) ret+=", ";
1984 else first_field = false;
1985 ret += gb_tbl.get_name(gb_pos+g2);
1991 gb_pos += gb_tbl.gb_entry_count[g];
1998 if(having.size() > 0){
2001 for(h=0;h<having.size();h++){
2002 if(h>0) ret += " AND ";
2003 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2012 string rsgah_qpn::to_query_string(){
2014 string ret = "Select ";
2016 for(s=0;s<select_list.size();s++){
2018 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2019 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2023 ret += "From "+table_name->to_string()+"\n";
2025 if(where.size() > 0){
2028 for(w=0;w<where.size();w++){
2029 if(w>0) ret += " AND ";
2030 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2035 if(gb_tbl.size() > 0){
2038 for(g=0;g<gb_tbl.size();g++){
2039 if(g>0) ret += ", ";
2040 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2041 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl)+" AS ";
2043 ret += gb_tbl.get_name(g);
2048 if(having.size() > 0){
2051 for(h=0;h<having.size();h++){
2052 if(h>0) ret += " AND ";
2053 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2058 if(closing_when.size() > 0){
2059 ret += "Closing_When ";
2061 for(h=0;h<closing_when.size();h++){
2062 if(h>0) ret += " AND ";
2063 ret += "(" + pred_to_query_str(closing_when[h]->pr,&aggr_tbl) + ")";
2072 string sgahcwcb_qpn::to_query_string(){
2074 string ret = "Select ";
2076 for(s=0;s<select_list.size();s++){
2078 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2079 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2083 ret += "From "+table_name->to_string()+"\n";
2085 if(where.size() > 0){
2088 for(w=0;w<where.size();w++){
2089 if(w>0) ret += " AND ";
2090 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2095 if(gb_tbl.size() > 0){
2098 for(g=0;g<gb_tbl.size();g++){
2099 if(g>0) ret += ", ";
2100 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2101 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
2103 ret += gb_tbl.get_name(g);
2108 if(sg_tbl.size() > 0){
2109 ret += "Supergroup ";
2111 bool first_elem = true;
2112 for(g=0;g<gb_tbl.size();g++){
2113 if(sg_tbl.count(g)){
2118 ret += gb_tbl.get_name(g);
2124 if(having.size() > 0){
2127 for(h=0;h<having.size();h++){
2128 if(h>0) ret += " AND ";
2129 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2135 if(cleanwhen.size() > 0){
2136 ret += "Cleaning_When ";
2138 for(h=0;h<cleanwhen.size();h++){
2139 if(h>0) ret += " AND ";
2140 ret += "(" + pred_to_query_str(cleanwhen[h]->pr,&aggr_tbl) + ")";
2145 if(cleanby.size() > 0){
2146 ret += "Cleaning_By ";
2148 for(h=0;h<cleanby.size();h++){
2149 if(h>0) ret += " AND ";
2150 ret += "(" + pred_to_query_str(cleanby[h]->pr,&aggr_tbl) + ")";
2158 string watch_tbl_qpn::to_query_string(){
2160 // ret += "DEFINE {\n";
2161 // ret += "\tfilename='"+filename+";\n";
2162 // ret += "\trefresh_interval="+to_string(refresh_interval)+";\n}\n";
2163 ret += "WATCHLIST FIELDS {\n";
2164 std::vector<field_entry *> fields = table_layout->get_fields();
2165 for(int f=0;f<fields.size();++f){
2166 ret += fields[f]->to_string()+"\n";
2173 string mrg_qpn::to_query_string(){
2175 string ret="Merge ";
2176 ret += mvars[0]->to_query_string() + " : " + mvars[1]->to_query_string();
2178 ret += " SLACK "+se_to_query_string(slack, NULL);
2183 for(t=0;t<fm.size();++t){
2184 if(t>0) ret += ", ";
2185 ret += fm[t]->to_string();
2192 string join_eq_hash_qpn::to_query_string(){
2194 string ret = "Select ";
2196 for(s=0;s<select_list.size();s++){
2198 ret += se_to_query_string(select_list[s]->se, NULL);
2199 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2203 // NOTE: assuming binary join.
2204 int properties = from[0]->get_property()+2*from[1]->get_property();
2207 ret += "INNER_JOIN ";
2210 ret += "LEFT_OUTER_JOIN ";
2213 ret += "RIGHT_OUTER_JOIN ";
2216 ret += "OUTER_JOIN ";
2222 for(f=0;f<from.size();++f){
2224 ret += from[f]->to_string();
2228 if(where.size() > 0){
2231 for(w=0;w<where.size();w++){
2232 if(w>0) ret += " AND ";
2233 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2241 string filter_join_qpn::to_query_string(){
2243 string ret = "Select ";
2245 for(s=0;s<select_list.size();s++){
2247 ret += se_to_query_string(select_list[s]->se, NULL);
2248 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2252 // NOTE: assuming binary join.
2253 ret += "FILTER_JOIN("+temporal_var->field+","+int_to_string(temporal_range)+") ";
2257 for(f=0;f<from.size();++f){
2259 ret += from[f]->to_string();
2263 if(where.size() > 0){
2266 for(w=0;w<where.size();w++){
2267 if(w>0) ret += " AND ";
2268 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2276 string watch_join_qpn::to_query_string(){
2278 string ret = "Select ";
2280 for(s=0;s<select_list.size();s++){
2282 ret += se_to_query_string(select_list[s]->se, NULL);
2283 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2287 // NOTE: assuming binary join.
2288 ret += "WATCHLIST_JOIN ";
2292 for(f=0;f<from.size();++f){
2294 ret += from[f]->to_string();
2298 if(where.size() > 0){
2301 for(w=0;w<where.size();w++){
2302 if(w>0) ret += " AND ";
2303 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2313 // -----------------------------------------------------------------
2314 // Query node subclass specific processing.
2317 vector<mrg_qpn *> mrg_qpn::split_sources(){
2318 vector<mrg_qpn *> ret;
2322 if(fm.size() != mvars.size()){
2323 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources. fm.size() = %lu, mvars.size() = %lu\n",fm.size(),mvars.size());
2327 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources, fm size is 1.\n");
2333 printf("spliting sources merge node, name = %s, %d sources.\n\t",node_name.c_str(), fm.size());
2334 for(ff=0;ff<fm.size();++ff){
2335 printf("%s ",fm[ff]->to_string().c_str());
2340 // Handle special cases.
2342 ret.push_back(this);
2347 mrg_qpn *new_mrg = (mrg_qpn *)this->make_copy("_cH1");
2348 new_mrg->fm.push_back(this->fm[0]);
2349 new_mrg->fm.push_back(this->fm[1]);
2350 new_mrg->mvars.push_back(this->mvars[0]);
2351 new_mrg->mvars.push_back(this->mvars[1]);
2353 this->fm.erase(this->fm.begin());
2354 this->mvars.erase(this->mvars.begin());
2355 string vname = fm[0]->get_var_name();
2356 this->fm[0] = new tablevar_t(new_mrg->node_name.c_str());
2357 this->fm[0]->set_range_var(vname);
2358 this->mvars[0]->set_field(table_layout->get_field_name(merge_fieldpos));
2359 this->mvars[0]->set_tablevar_ref(0);
2360 this->mvars[1]->set_tablevar_ref(1);
2362 ret.push_back(new_mrg);
2363 ret.push_back(this);
2366 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg->node_name.c_str(),this->node_name.c_str());
2367 for(i=0;i<new_mrg->fm.size();++i)
2368 printf("\tsource %s var %d (%s, %s) \n",new_mrg->node_name.c_str(),i,new_mrg->fm[i]->to_string().c_str(), new_mrg->mvars[i]->to_string().c_str());
2369 for(i=0;i<this->fm.size();++i)
2370 printf("\tsource %s var %d (%s, %s) \n",this->node_name.c_str(),i,this->fm[i]->to_string().c_str(), this->mvars[i]->to_string().c_str());
2377 // divide up the sources between two children.
2378 // Then, recurse on the children.
2380 mrg_qpn *new_mrg1 = (mrg_qpn *)this->make_copy("_cH1");
2381 mrg_qpn *new_mrg2 = (mrg_qpn *)this->make_copy("_cH2");
2382 for(i=0;i<this->fm.size()/2;++i){
2383 new_mrg1->fm.push_back(this->fm[i]);
2384 new_mrg1->mvars.push_back(this->mvars[i]);
2385 //printf("Pushing %d (%s, %s) to new_mrg1\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2387 for(;i<this->fm.size();++i){
2388 new_mrg2->fm.push_back(this->fm[i]);
2389 new_mrg2->mvars.push_back(this->mvars[i]);
2390 //printf("Pushing %d (%s, %s) to new_mrg2\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2392 for(i=0;i<new_mrg1->mvars.size();++i)
2393 new_mrg1->mvars[i]->set_tablevar_ref(i);
2394 for(i=0;i<new_mrg2->mvars.size();++i)
2395 new_mrg2->mvars[i]->set_tablevar_ref(i);
2397 // Children created, make this merge them.
2401 tablevar_t *tmp_tblvar = new tablevar_t(new_mrg1->node_name.c_str());
2402 tmp_tblvar->set_range_var("_mrg_var_1");
2403 fm.push_back(tmp_tblvar);
2404 colref_t *tmp_cref = new colref_t("_mrg_var_1",table_layout->get_field_name(merge_fieldpos).c_str());
2405 tmp_cref->set_tablevar_ref(0);
2406 mvars.push_back(tmp_cref);
2408 tmp_tblvar = new tablevar_t(new_mrg2->node_name.c_str());
2409 tmp_tblvar->set_range_var("_mrg_var_2");
2410 fm.push_back(tmp_tblvar);
2411 tmp_cref = new colref_t("_mrg_var_2",table_layout->get_field_name(merge_fieldpos).c_str());
2412 tmp_cref->set_tablevar_ref(1);
2413 mvars.push_back(tmp_cref);
2417 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg1->node_name.c_str(),new_mrg2->node_name.c_str());
2418 for(i=0;i<new_mrg1->fm.size();++i)
2419 printf("\tsource %s var %d (%s, %s) \n",new_mrg1->node_name.c_str(),i,new_mrg1->fm[i]->to_string().c_str(), new_mrg1->mvars[i]->to_string().c_str());
2420 for(i=0;i<new_mrg2->fm.size();++i)
2421 printf("\tsource %s var %d (%s, %s) \n",new_mrg2->node_name.c_str(),i,new_mrg2->fm[i]->to_string().c_str(), new_mrg2->mvars[i]->to_string().c_str());
2424 // Recurse and put them together
2425 vector<mrg_qpn *> st1 = new_mrg1->split_sources();
2426 ret.insert(ret.end(), st1.begin(), st1.end());
2427 vector<mrg_qpn *> st2 = new_mrg2->split_sources();
2428 ret.insert(ret.end(), st2.begin(), st2.end());
2430 ret.push_back(this);
2438 //////// Split helper function : resolve interfaces
2440 vector<pair<string,string> > get_ifaces(tablevar_t *table, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2441 vector<pair<string,string> > basic_ifaces;
2443 if(table->get_ifq()){
2444 basic_ifaces= ifdb->eval(table->get_interface(),ierr);
2446 fprintf(stderr,"ERROR, Interface set %s not found.\n",table->get_interface().c_str());
2449 fprintf(stderr,"ERROR, interface definition file didn't parse.\n");
2452 basic_ifaces.push_back(make_pair(table->get_machine(), table->get_interface()));
2455 if(n_virtual_ifaces == 1)
2456 return basic_ifaces;
2458 int stride = n_virtual_ifaces / hfta_parallelism;
2460 vector<pair<string,string> > ifaces;
2462 for(i=0;i<basic_ifaces.size();++i){
2463 string mach = basic_ifaces[i].first;
2464 string iface = basic_ifaces[i].second;
2465 for(s=hfta_idx*stride;s<(hfta_idx+1)*stride;++s){
2466 ifaces.push_back(pair<string, string>(mach,iface+"X"+int_to_string(2*s)));
2474 ///////// Split helper function : compute slack in a generated
2477 void mrg_qpn::resolve_slack(scalarexp_t *t_se, string fname, vector<pair<string, string> > &sources, ifq_t *ifdb, gb_table *gbt){
2481 // Find slack divisor, if any.
2483 long long int slack_divisor = find_temporal_divisor(t_se,gbt, fnm);
2484 if(slack_divisor <= 0){
2489 // find max slack in the iface spec
2490 long long int max_slacker = 0, this_slacker;
2491 string rname = "Slack_"+fnm;
2492 for(s=0;s<sources.size();++s){
2493 string src_machine = sources[s].first;
2494 string src_iface = sources[s].second;
2495 vector<string> slack_vec = ifdb->get_iface_vals(src_machine, src_iface,rname,e,es);
2496 for(v=0;v<slack_vec.size();++v){
2497 if(sscanf(slack_vec[v].c_str(),"%qd",&this_slacker)){
2498 if(this_slacker > max_slacker)
2499 max_slacker = this_slacker;
2504 if(max_slacker <= 0){
2510 long long int the_slack=(long long int)(ceil(((double)max_slacker)/((double)slack_divisor)));
2512 sprintf(tmps,"%lld",the_slack);
2513 literal_t *slack_lit = new literal_t(tmps, LITERAL_LONGINT);
2514 slack = new scalarexp_t(slack_lit);
2518 //------------------------------------------------------------------
2519 // split a node to extract LFTA components.
2521 vector<qp_node *> watch_tbl_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2522 // nothing to do, nothing to split, return copy of self.
2526 vector<qp_node *> ret_vec;
2528 ret_vec.push_back(this);
2534 vector<qp_node *> mrg_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2535 // nothing to do, nothing to split, return copy of self.
2539 vector<qp_node *> ret_vec;
2541 ret_vec.push_back(this);
2546 vector<qp_node *> filter_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2547 vector<qp_node *> ret_vec;
2549 // First check if the query can be pushed to the FTA.
2552 for(s=0;s<select_list.size();s++){
2553 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2556 for(p=0;p<where.size();p++){
2557 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2561 fprintf(stderr,"ERROR, filter join %s is fta-unsafe.\n",node_name.c_str());
2565 // Can it be done in a single lfta?
2566 // Get the set of interfaces it accesses.
2569 vector<string> sel_names;
2570 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2571 if (ifaces.empty()) {
2572 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2576 if(ifaces.size() == 1){
2577 // Single interface, no need to merge.
2579 ret_vec.push_back(this);
2581 for(i=0;i<from.size();i++){
2582 from[i]->set_machine(ifaces[0].first);
2583 from[i]->set_interface(ifaces[0].second);
2584 from[i]->set_ifq(false);
2588 // Multiple interfaces, generate the interface-specific queries plus
2592 vector<string> sel_names;
2593 for(si=0;si<ifaces.size();++si){
2594 filter_join_qpn *fta_node = new filter_join_qpn();
2597 if(ifaces.size()==1)
2598 fta_node->set_node_name( node_name );
2600 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2602 fta_node->set_node_name(new_name);
2604 sel_names.push_back(fta_node->get_node_name());
2608 for(f=0;f<from.size();f++){
2609 fta_node->from.push_back(from[f]->duplicate());
2610 fta_node->from[f]->set_machine(ifaces[si].first);
2611 fta_node->from[f]->set_interface(ifaces[si].second);
2612 fta_node->from[f]->set_ifq(false);
2614 fta_node->temporal_var = temporal_var;
2615 fta_node->temporal_range = temporal_range;
2617 fta_node->use_bloom = use_bloom;
2619 for(s=0;s<select_list.size();s++){
2620 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2623 for(p=0;p<shared_pred.size();p++){
2624 predicate_t *new_pr = dup_pr(shared_pred[p]->pr, NULL);
2625 cnf_elem *new_cnf = new cnf_elem(new_pr);
2626 analyze_cnf(new_cnf);
2627 fta_node->shared_pred.push_back(new_cnf);
2628 fta_node->where.push_back(new_cnf);
2630 for(p=0;p<pred_t0.size();p++){
2631 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2632 cnf_elem *new_cnf = new cnf_elem(new_pr);
2633 analyze_cnf(new_cnf);
2634 fta_node->pred_t0.push_back(new_cnf);
2635 fta_node->where.push_back(new_cnf);
2637 for(p=0;p<pred_t1.size();p++){
2638 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2639 cnf_elem *new_cnf = new cnf_elem(new_pr);
2640 analyze_cnf(new_cnf);
2641 fta_node->pred_t1.push_back(new_cnf);
2642 fta_node->where.push_back(new_cnf);
2644 for(p=0;p<hash_eq.size();p++){
2645 predicate_t *new_pr = dup_pr(hash_eq[p]->pr, NULL);
2646 cnf_elem *new_cnf = new cnf_elem(new_pr);
2647 analyze_cnf(new_cnf);
2648 fta_node->hash_eq.push_back(new_cnf);
2649 fta_node->where.push_back(new_cnf);
2651 for(p=0;p<postfilter.size();p++){
2652 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2653 cnf_elem *new_cnf = new cnf_elem(new_pr);
2654 analyze_cnf(new_cnf);
2655 fta_node->postfilter.push_back(new_cnf);
2656 fta_node->where.push_back(new_cnf);
2659 // Xfer all of the parameters.
2660 // Use existing handle annotations.
2661 vector<string> param_names = param_tbl->get_param_names();
2663 for(pi=0;pi<param_names.size();pi++){
2664 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2665 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2666 param_tbl->handle_access(param_names[pi]));
2668 fta_node->definitions = definitions;
2669 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2670 this->error_code = 3;
2674 ret_vec.push_back(fta_node);
2677 mrg_qpn *mrg_node = new mrg_qpn((filter_join_qpn *)ret_vec[0],
2678 node_name, sel_names,ifaces, ifdb);
2679 ret_vec.push_back(mrg_node);
2690 vector<qp_node *> watch_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2691 vector<qp_node *> ret_vec;
2693 // First check if the query can be pushed to the FTA.
2696 for(s=0;s<select_list.size();s++){
2697 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2700 for(p=0;p<where.size();p++){
2701 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2705 fprintf(stderr,"ERROR, watchlist join %s is fta-unsafe.\n",node_name.c_str());
2709 // Can it be done in a single lfta?
2710 // Get the set of interfaces it accesses.
2713 vector<string> sel_names;
2714 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2715 if (ifaces.empty()) {
2716 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2720 if(ifaces.size() == 1){
2721 // Single interface, no need to merge.
2723 ret_vec.push_back(this);
2725 // Treat the range vars a bit differently, the 2nd is reading from a _local_ watchlist.
2726 from[0]->set_machine(ifaces[0].first);
2727 from[0]->set_interface(ifaces[0].second);
2728 from[0]->set_ifq(false);
2730 from[1]->set_machine(ifaces[0].first);
2731 from[1]->set_interface("_local_");
2732 from[1]->set_ifq(false);
2736 // Multiple interfaces, generate the interface-specific queries plus
2740 vector<string> sel_names;
2741 for(si=0;si<ifaces.size();++si){
2742 watch_join_qpn *fta_node = new watch_join_qpn();
2745 if(ifaces.size()==1)
2746 fta_node->set_node_name( node_name );
2748 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2750 fta_node->set_node_name(new_name);
2752 sel_names.push_back(fta_node->get_node_name());
2756 for(f=0;f<from.size();f++){
2757 fta_node->from.push_back(from[f]->duplicate());
2758 fta_node->from[f]->set_machine(ifaces[si].first);
2760 fta_node->from[f]->set_interface(ifaces[si].second);
2762 fta_node->from[f]->set_interface("_local_");
2763 fta_node->from[f]->set_ifq(false);
2766 for(s=0;s<select_list.size();s++){
2767 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2770 for(p=0;p<pred_t0.size();p++){
2771 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2772 cnf_elem *new_cnf = new cnf_elem(new_pr);
2773 analyze_cnf(new_cnf);
2774 fta_node->pred_t0.push_back(new_cnf);
2775 fta_node->where.push_back(new_cnf);
2777 for(p=0;p<pred_t1.size();p++){
2778 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2779 cnf_elem *new_cnf = new cnf_elem(new_pr);
2780 analyze_cnf(new_cnf);
2781 fta_node->pred_t1.push_back(new_cnf);
2782 fta_node->where.push_back(new_cnf);
2784 for(p=0;p<key_flds.size();p++){ // we've checked that all keys are covered
2785 string k = key_flds[p];
2786 predicate_t *new_pr = dup_pr(hash_eq[k]->pr, NULL);
2787 cnf_elem *new_cnf = new cnf_elem(new_pr);
2788 analyze_cnf(new_cnf);
2789 fta_node->hash_eq[k] = new_cnf;
2790 fta_node->where.push_back(new_cnf);
2792 for(p=0;p<join_filter.size();p++){
2793 predicate_t *new_pr = dup_pr(join_filter[p]->pr, NULL);
2794 cnf_elem *new_cnf = new cnf_elem(new_pr);
2795 analyze_cnf(new_cnf);
2796 fta_node->postfilter.push_back(new_cnf);
2797 fta_node->where.push_back(new_cnf);
2799 for(p=0;p<postfilter.size();p++){
2800 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2801 cnf_elem *new_cnf = new cnf_elem(new_pr);
2802 analyze_cnf(new_cnf);
2803 fta_node->postfilter.push_back(new_cnf);
2804 fta_node->where.push_back(new_cnf);
2806 fta_node->key_flds = key_flds;
2808 // Xfer all of the parameters.
2809 // Use existing handle annotations.
2810 vector<string> param_names = param_tbl->get_param_names();
2812 for(pi=0;pi<param_names.size();pi++){
2813 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2814 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2815 param_tbl->handle_access(param_names[pi]));
2817 fta_node->definitions = definitions;
2818 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2819 this->error_code = 3;
2823 ret_vec.push_back(fta_node);
2826 mrg_qpn *mrg_node = new mrg_qpn((watch_join_qpn *)ret_vec[0],
2827 node_name, sel_names,ifaces, ifdb);
2828 ret_vec.push_back(mrg_node);
2835 // Use to search for unresolved interface param refs in an hfta.
2837 int spx_qpn::count_ifp_refs(set<string> &ifpnames){
2840 for(i=0;i<select_list.size();++i)
2841 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2842 for(i=0;i<where.size();++i)
2843 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2847 int sgah_qpn::count_ifp_refs(set<string> &ifpnames){
2850 for(i=0;i<select_list.size();++i)
2851 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2852 for(i=0;i<where.size();++i)
2853 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2854 for(i=0;i<having.size();++i)
2855 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2856 for(i=0;i<aggr_tbl.size();++i){
2857 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2858 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2860 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2861 for(j=0;j<opl.size();++j)
2862 ret += count_se_ifp_refs(opl[j],ifpnames);
2865 for(i=0;i<gb_tbl.size();++i){
2866 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2872 int rsgah_qpn::count_ifp_refs(set<string> &ifpnames){
2875 for(i=0;i<select_list.size();++i)
2876 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2877 for(i=0;i<where.size();++i)
2878 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2879 for(i=0;i<having.size();++i)
2880 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2881 for(i=0;i<closing_when.size();++i)
2882 ret += count_pr_ifp_refs(closing_when[i]->pr,ifpnames);
2883 for(i=0;i<aggr_tbl.size();++i){
2884 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2885 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2887 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2888 for(j=0;j<opl.size();++j)
2889 ret += count_se_ifp_refs(opl[j],ifpnames);
2892 for(i=0;i<gb_tbl.size();++i){
2893 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2898 int watch_tbl_qpn::count_ifp_refs(set<string> &ifpnames){
2902 int mrg_qpn::count_ifp_refs(set<string> &ifpnames){
2906 int join_eq_hash_qpn::count_ifp_refs(set<string> &ifpnames){
2909 for(i=0;i<select_list.size();++i)
2910 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2911 for(i=0;i<prefilter[0].size();++i)
2912 ret += count_pr_ifp_refs(prefilter[0][i]->pr,ifpnames);
2913 for(i=0;i<prefilter[1].size();++i)
2914 ret += count_pr_ifp_refs(prefilter[1][i]->pr,ifpnames);
2915 for(i=0;i<temporal_eq.size();++i)
2916 ret += count_pr_ifp_refs(temporal_eq[i]->pr,ifpnames);
2917 for(i=0;i<hash_eq.size();++i)
2918 ret += count_pr_ifp_refs(hash_eq[i]->pr,ifpnames);
2919 for(i=0;i<postfilter.size();++i)
2920 ret += count_pr_ifp_refs(postfilter[i]->pr,ifpnames);
2924 int filter_join_qpn::count_ifp_refs(set<string> &ifpnames){
2927 for(i=0;i<select_list.size();++i)
2928 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2929 for(i=0;i<where.size();++i)
2930 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2934 int watch_join_qpn::count_ifp_refs(set<string> &ifpnames){
2937 for(i=0;i<select_list.size();++i)
2938 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2939 for(i=0;i<where.size();++i)
2940 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2946 // Resolve interface params to string literals
2947 int filter_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2950 string ifname = from[0]->get_interface();
2951 string ifmach = from[0]->get_machine();
2952 for(i=0;i<select_list.size();++i)
2953 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2955 for(i=0;i<where.size();++i)
2956 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2961 int watch_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2964 string ifname = from[0]->get_interface();
2965 string ifmach = from[0]->get_machine();
2966 for(i=0;i<select_list.size();++i)
2967 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2969 for(i=0;i<where.size();++i)
2970 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2976 int spx_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2979 string ifname = table_name->get_interface();
2980 string ifmach = table_name->get_machine();
2981 for(i=0;i<select_list.size();++i)
2982 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2984 for(i=0;i<where.size();++i)
2985 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2990 int sgah_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2993 string ifname = table_name->get_interface();
2994 string ifmach = table_name->get_machine();
2996 //printf("Select list has %d elements\n",select_list.size());
2997 for(i=0;i<select_list.size();++i){
2998 //printf("\tresolving elemet %d\n",i);
2999 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) ){
3003 for(i=0;i<where.size();++i){
3004 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err) )
3007 for(i=0;i<having.size();++i){
3008 if( resolve_pr_ifp_refs(having[i]->pr,ifmach, ifname, ifdb, err) )
3011 //printf("aggr list has %d elements\n",select_list.size());
3012 for(i=0;i<aggr_tbl.size();++i){
3013 //printf("\tresolving elemet %d\n",i);
3014 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
3015 //printf("\t\t\tbuiltin\n");
3016 if( resolve_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifmach, ifname, ifdb, err) )
3019 //printf("\t\t\tudaf\n");
3020 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
3021 for(j=0;j<opl.size();++j)
3022 if( resolve_se_ifp_refs(opl[j],ifmach, ifname, ifdb, err) )
3026 for(i=0;i<gb_tbl.size();++i){
3027 if( resolve_se_ifp_refs(gb_tbl.get_def(i), ifmach, ifname, ifdb, err) )
3036 SPLITTING A SELECTION_PROJECTION OPERATOR
3038 An SPX node may reference:
3039 literals, parameters, colrefs, functions, operators
3040 An SPX node may not reference:
3041 group-by variables, aggregates
3043 An SPX node contains
3044 selection list of SEs
3045 where list of CNF predicates
3048 If each selection SE and each where predicate is fta-safe
3049 execute entire operator as an LFTA.
3051 for each predicate in the where clause
3052 if it is fta safe, execute it in the lfta
3053 else, split each SE in the predicate, evaluate the
3054 top-level SEs in the hfta and eval the predicate on that.
3055 For each SE in the se list
3056 Split the SE, eval the high level part, push onto hfta
3060 A SE represents a value which must be computed. The LFTA
3061 must provide sub-values from which the HFTA can compute the
3063 1) the SE is fta-safe
3064 Create an entry in the selection list of the LFTA which is
3065 the SE itself. Reference this LFTA selection list entry in
3066 the HFTA (via a field name assigned to the lfta selection
3068 2) The SE is not fta-safe
3069 Determine the boundary between the fta-safe and the fta-unsafe
3070 portions of the SE. The result is a rooted tree (which is
3071 evaluated at the HFTA) which references sub-SEs (which are
3072 evaluated at the LFTA). Each of the sub-SEs is placed on
3073 the selection list of the LFTA and assigned field names,
3074 the top part is evaluated at the HFTA and references the
3075 sub-SEs through their assigned field names.
3076 The only SEs on the LFTA selection list are those created by
3077 the above mechanism. The collection of assigned field names becomes
3078 the schema of the LFTA.
3080 TODO: insert tablevar names into the colrefs.
3084 vector<qp_node *> spx_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3087 vector<qp_node *> ret_vec;
3089 // If the node reads from a stream, don't split.
3090 // int t = Schema->get_table_ref(table_name->get_schema_name());
3091 int t = table_name->get_schema_ref();
3092 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3094 ret_vec.push_back(this);
3099 // Get the set of interfaces it accesses.
3102 vector<string> sel_names;
3103 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3104 if (ifaces.empty()) {
3105 fprintf(stderr,"INTERNAL ERROR in spx_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3110 // The FTA node, it is always returned.
3112 spx_qpn *fta_node = new spx_qpn();
3113 fta_node->table_name = table_name;
3115 // for colname imputation
3116 // vector<string> fta_flds, stream_flds;
3119 // First check if the query can be pushed to the FTA.
3122 for(s=0;s<select_list.size();s++){
3123 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
3126 for(p=0;p<where.size();p++){
3127 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
3131 ////////////////////////////////////////////////////////////
3132 // The query can be executed entirely in the FTA.
3135 for(si=0;si<ifaces.size();++si){
3136 fta_node = new spx_qpn();
3139 if(ifaces.size()==1)
3140 fta_node->set_node_name( node_name );
3142 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3144 fta_node->set_node_name(new_name);
3146 sel_names.push_back(fta_node->get_node_name());
3149 fta_node->table_name = table_name->duplicate();
3150 fta_node->table_name->set_machine(ifaces[si].first);
3151 fta_node->table_name->set_interface(ifaces[si].second);
3152 fta_node->table_name->set_ifq(false);
3154 for(s=0;s<select_list.size();s++){
3155 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
3157 for(p=0;p<where.size();p++){
3158 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
3159 cnf_elem *new_cnf = new cnf_elem(new_pr);
3160 analyze_cnf(new_cnf);
3162 fta_node->where.push_back(new_cnf);
3165 // Xfer all of the parameters.
3166 // Use existing handle annotations.
3167 vector<string> param_names = param_tbl->get_param_names();
3169 for(pi=0;pi<param_names.size();pi++){
3170 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3171 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3172 param_tbl->handle_access(param_names[pi]));
3174 fta_node->definitions = definitions;
3175 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3176 this->error_code = 3;
3180 ret_vec.push_back(fta_node);
3183 if(ifaces.size() > 1){
3184 spx_qpn *tmp_spx = (spx_qpn *)(ret_vec[0]);
3185 mrg_qpn *mrg_node = new mrg_qpn(tmp_spx,
3186 node_name, sel_names,ifaces, ifdb);
3188 Do not split sources until we are done with optimizations
3189 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3190 for(i=0;i<split_merge.size();++i){
3191 ret_vec.push_back(split_merge[i]);
3193 hfta_returned = split_merge.size();
3195 ret_vec.push_back(mrg_node);
3200 // printf("OK as FTA.\n");
3201 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3206 ////////////////////////////////////////////////////
3207 // The fta must be split. Create a stream node.
3208 // NOTE : I am counting on the single
3209 // table in the from list. (Joins handled in a different operator).
3213 spx_qpn *stream_node = new spx_qpn();
3214 stream_node->set_node_name( node_name );
3215 // Create the tablevar in the stream's FROM clause.
3216 // set the schema name to the name of the LFTA,
3217 // and use the same tablevar name.
3218 stream_node->table_name = new tablevar_t(
3219 ("_fta_"+node_name).c_str()
3221 stream_node->table_name->set_range_var(table_name->get_var_name());
3224 fta_node->set_node_name( "_fta_"+node_name );
3226 // table var names of fta, stream.
3227 string fta_var = fta_node->table_name->get_var_name();
3228 string stream_var = stream_node->table_name->get_var_name();
3230 // Set up select list vector
3231 vector< vector<select_element *> *> select_vec;
3232 select_vec.push_back(&(fta_node->select_list)); // only one child
3235 // Split the select list into its FTA and stream parts.
3236 // If any part of the SE is fta-unsafe, it will return
3237 // a SE to execute at the stream ref'ing SE's evaluated
3238 // at the fta (which are put on the FTA's select list as a side effect).
3239 // If the SE is fta-safe, put it on the fta select list, make
3240 // a ref to it and put the ref on the stream select list.
3241 for(s=0;s<select_list.size();s++){
3242 bool fta_forbidden = false;
3243 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3244 // scalarexp_t *root_se = split_fta_se(
3245 // select_list[s]->se,fta_forbidden, fta_node->select_list, Ext_fcns
3247 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
3248 fta_forbidden, se_src, select_vec, Ext_fcns
3250 // if(fta_forbidden){
3251 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3252 stream_node->select_list.push_back(
3253 new select_element(root_se, select_list[s]->name)
3256 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,root_se,0);
3257 stream_node->select_list.push_back(
3258 new select_element(new_se, select_list[s]->name)
3264 // The WHERE clause has already been split into a set of clauses
3265 // that are ANDED together. For each clause, check if its FTA-safe.
3266 // If not, split its SE's into fta-safe and stream-executing parts,
3267 // then put a clause which ref's the SEs into the stream.
3268 // Else put it into the LFTA.
3269 predicate_t *pr_root;
3271 for(p=0;p<where.size();p++){
3272 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) ){
3273 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3274 // pr_root = split_fta_pr( where[p]->pr, fta_node->select_list, Ext_fcns);
3275 fta_forbidden = true;
3277 pr_root = dup_pr(where[p]->pr, NULL);
3278 fta_forbidden = false;
3280 cnf_elem *cnf_root = new cnf_elem(pr_root);
3281 analyze_cnf(cnf_root);
3284 stream_node->where.push_back(cnf_root);
3286 fta_node->where.push_back(cnf_root);
3292 // Divide the parameters among the stream, FTA.
3293 // Currently : assume that the stream receives all parameters
3294 // and parameter updates, incorporates them, then passes
3295 // all of the parameters to the FTA.
3296 // This will need to change (tables, fta-unsafe types. etc.)
3298 // I will pass on the use_handle_access marking, even
3299 // though the fcn call that requires handle access might
3300 // exist in only one of the parts of the query.
3301 // Parameter manipulation and handle access determination will
3302 // need to be revisited anyway.
3303 vector<string> param_names = param_tbl->get_param_names();
3305 for(pi=0;pi<param_names.size();pi++){
3306 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3307 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3308 param_tbl->handle_access(param_names[pi]));
3309 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3310 param_tbl->handle_access(param_names[pi]));
3313 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3314 stream_node->definitions = definitions;
3316 // Now split by interfaces
3317 if(ifaces.size() > 1){
3318 for(si=0;si<ifaces.size();++si){
3319 spx_qpn *subq_node = new spx_qpn();
3321 // Name the subquery
3322 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3324 subq_node->set_node_name( new_name) ;
3325 sel_names.push_back(subq_node->get_node_name());
3328 subq_node->table_name = fta_node->table_name->duplicate();
3329 subq_node->table_name->set_machine(ifaces[si].first);
3330 subq_node->table_name->set_interface(ifaces[si].second);
3331 subq_node->table_name->set_ifq(false);
3333 for(s=0;s<fta_node->select_list.size();s++){
3334 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3336 for(p=0;p<fta_node->where.size();p++){
3337 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3338 cnf_elem *new_cnf = new cnf_elem(new_pr);
3339 analyze_cnf(new_cnf);
3341 subq_node->where.push_back(new_cnf);
3343 // Xfer all of the parameters.
3344 // Use existing handle annotations.
3345 vector<string> param_names = param_tbl->get_param_names();
3347 for(pi=0;pi<param_names.size();pi++){
3348 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3349 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3350 param_tbl->handle_access(param_names[pi]));
3352 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3353 this->error_code = 3;
3356 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3358 ret_vec.push_back(subq_node);
3361 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3362 fta_node->node_name, sel_names, ifaces, ifdb);
3364 Do not split sources until we are done with optimizations
3365 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3366 for(i=0;i<split_merge.size();++i){
3367 ret_vec.push_back(split_merge[i]);
3370 ret_vec.push_back(mrg_node);
3371 ret_vec.push_back(stream_node);
3372 hfta_returned = 1/*split_merge.size()*/ + 1;
3375 fta_node->table_name->set_machine(ifaces[0].first);
3376 fta_node->table_name->set_interface(ifaces[0].second);
3377 fta_node->table_name->set_ifq(false);
3378 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3379 this->error_code = 3;
3382 ret_vec.push_back(fta_node);
3383 ret_vec.push_back(stream_node);
3387 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3388 // printf("Stream node is:\n%s\n\n",stream_node->to_query_string().c_str() );
3396 Splitting a aggregation+sampling operator.
3397 right now, return an error if any splitting is required.
3400 vector<qp_node *> sgahcwcb_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3404 vector<qp_node *> ret_vec;
3405 int s, p, g, a, o, i;
3408 vector<string> fta_flds, stream_flds;
3410 // If the node reads from a stream, don't split.
3411 // int t = Schema->get_table_ref(table_name->get_schema_name());
3412 int t = table_name->get_schema_ref();
3413 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3414 ret_vec.push_back(this);
3418 fprintf(stderr,"ERROR : cannot split a sampling operator (not yet implemented).\n");
3428 Splitting a running aggregation operator.
3429 The code is almost identical to that of the the sgah operator
3431 - there is no lfta-only option.
3432 - the stream node is rsagh_qpn (lfta is sgah or spx)
3433 - need to handle the closing when (similar to having)
3436 vector<qp_node *> rsgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3440 vector<qp_node *> ret_vec;
3441 int s, p, g, a, o, i;
3444 vector<string> fta_flds, stream_flds;
3446 // If the node reads from a stream, don't split.
3447 // int t = Schema->get_table_ref(table_name->get_schema_name());
3448 int t = table_name->get_schema_ref();
3449 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3450 ret_vec.push_back(this);
3454 // Get the set of interfaces it accesses.
3456 vector<string> sel_names;
3457 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3458 if (ifaces.empty()) {
3459 fprintf(stderr,"INTERNAL ERROR in rsgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3466 //////////////////////////////////////////////////////////////
3467 /// Split into lfta, hfta.
3469 // A rsgah node must always be split,
3470 // if for no other reason than to complete the
3471 // partial aggregation.
3473 // First, determine if the query can be spit into aggr/aggr,
3474 // or if it must be selection/aggr.
3475 // Splitting into selection/aggr is allowed only
3476 // if select_lfta is set.
3479 bool select_allowed = definitions.count("select_lfta")>0;
3480 bool select_rqd = false;
3482 set<int> unsafe_gbvars; // for processing where clause
3483 for(g=0;g<gb_tbl.size();g++){
3484 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3485 if(!select_allowed){
3486 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3487 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3489 this->error_code = 1;
3490 this->err_str = tmpstr;
3494 unsafe_gbvars.insert(g);
3499 // Verify that the SEs in the aggregate definitions are fta-safe
3500 for(a=0;a<aggr_tbl.size();++a){
3501 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3502 if(ase != NULL){ // COUNT(*) does not have a SE.
3503 if(!select_allowed){
3504 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3505 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3506 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3508 this->error_code = 1;
3509 this->err_str = tmpstr;
3518 // Verify that all of the ref'd UDAFs can be split.
3520 for(a=0;a<aggr_tbl.size();++a){
3521 if(! aggr_tbl.is_builtin(a)){
3522 int afcn = aggr_tbl.get_fcn_id(a);
3523 int super_id = Ext_fcns->get_superaggr_id(afcn);
3524 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3525 if(super_id < 0 || sub_id < 0){
3526 if(!select_allowed){
3527 this->err_str += "ERROR in rsgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3528 this->error_code = 1;
3537 for(p=0;p<where.size();p++){
3538 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3539 if(!select_allowed){
3540 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3541 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3543 this->error_code = 1;
3544 this->err_str = tmpstr;
3555 /////////////////////////////////////////////////////
3556 // Split into aggr/aggr.
3562 sgah_qpn *fta_node = new sgah_qpn();
3563 fta_node->table_name = table_name;
3564 fta_node->set_node_name( "_fta_"+node_name );
3565 fta_node->table_name->set_range_var(table_name->get_var_name());
3568 rsgah_qpn *stream_node = new rsgah_qpn();
3569 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3570 stream_node->set_node_name( node_name );
3571 stream_node->table_name->set_range_var(table_name->get_var_name());
3573 // First, process the group-by variables.
3574 // The fta must supply the values of all the gbvars.
3575 // If a gb is computed, the computation must be
3576 // performed at the FTA, so the SE must be FTA-safe.
3577 // Nice side effect : the gbvar table contains
3578 // matching entries for the original query, the lfta query,
3579 // and the hfta query. So gbrefs in the new queries are set
3580 // correctly just by inheriting the gbrefs from the old query.
3581 // If this property changed, I'll need translation tables.
3584 for(g=0;g<gb_tbl.size();g++){
3585 // Insert the gbvar into the lfta.
3586 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
3587 fta_node->gb_tbl.add_gb_var(
3588 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
3591 // Insert a ref to the value of the gbvar into the lfta select list.
3592 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
3593 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
3594 gbvar_fta->set_gb_ref(g);
3595 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
3596 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
3598 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
3599 gbvar_stream->set_gb_ref(-1); // used as GBvar def
3600 stream_node->gb_tbl.add_gb_var(
3601 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
3606 // SEs in the aggregate definitions.
3607 // They are all safe, so split them up for later processing.
3608 map<int, scalarexp_t *> hfta_aggr_se;
3609 for(a=0;a<aggr_tbl.size();++a){
3610 split_fta_aggr( &(aggr_tbl), a,
3611 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
3612 fta_node->select_list,
3619 // Next, the select list.
3621 for(s=0;s<select_list.size();s++){
3622 bool fta_forbidden = false;
3623 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3624 stream_node->select_list.push_back(
3625 new select_element(root_se, select_list[s]->name));
3630 // All the predicates in the where clause must execute
3633 for(p=0;p<where.size();p++){
3634 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
3635 cnf_elem *new_cnf = new cnf_elem(new_pr);
3636 analyze_cnf(new_cnf);
3638 fta_node->where.push_back(new_cnf);
3641 // All of the predicates in the having clause must
3642 // execute in the stream node.
3644 for(p=0;p<having.size();p++){
3645 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3646 cnf_elem *cnf_root = new cnf_elem(pr_root);
3647 analyze_cnf(cnf_root);
3649 stream_node->having.push_back(cnf_root);
3652 // All of the predicates in the closing when clause must
3653 // execute in the stream node.
3655 for(p=0;p<closing_when.size();p++){
3656 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3657 cnf_elem *cnf_root = new cnf_elem(pr_root);
3658 analyze_cnf(cnf_root);
3660 stream_node->closing_when.push_back(cnf_root);
3664 // Divide the parameters among the stream, FTA.
3665 // Currently : assume that the stream receives all parameters
3666 // and parameter updates, incorporates them, then passes
3667 // all of the parameters to the FTA.
3668 // This will need to change (tables, fta-unsafe types. etc.)
3670 // I will pass on the use_handle_access marking, even
3671 // though the fcn call that requires handle access might
3672 // exist in only one of the parts of the query.
3673 // Parameter manipulation and handle access determination will
3674 // need to be revisited anyway.
3675 vector<string> param_names = param_tbl->get_param_names();
3677 for(pi=0;pi<param_names.size();pi++){
3678 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3679 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3680 param_tbl->handle_access(param_names[pi]));
3681 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3682 param_tbl->handle_access(param_names[pi]));
3684 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3685 stream_node->definitions = definitions;
3687 // Now split by interfaces XXXX
3688 if(ifaces.size() > 1){
3689 for(si=0;si<ifaces.size();++si){
3690 sgah_qpn *subq_node = new sgah_qpn();
3692 // Name the subquery
3693 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3695 subq_node->set_node_name( new_name) ;
3696 sel_names.push_back(subq_node->get_node_name());
3699 subq_node->table_name = fta_node->table_name->duplicate();
3700 subq_node->table_name->set_machine(ifaces[si].first);
3701 subq_node->table_name->set_interface(ifaces[si].second);
3702 subq_node->table_name->set_ifq(false);
3705 for(g=0;g<fta_node->gb_tbl.size();g++){
3706 // Insert the gbvar into the lfta.
3707 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
3708 subq_node->gb_tbl.add_gb_var(
3709 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
3713 // Insert the aggregates
3714 for(a=0;a<fta_node->aggr_tbl.size();++a){
3715 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
3718 for(s=0;s<fta_node->select_list.size();s++){
3719 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3721 for(p=0;p<fta_node->where.size();p++){
3722 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3723 cnf_elem *new_cnf = new cnf_elem(new_pr);
3724 analyze_cnf(new_cnf);
3726 subq_node->where.push_back(new_cnf);
3728 for(p=0;p<fta_node->having.size();p++){
3729 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
3730 cnf_elem *new_cnf = new cnf_elem(new_pr);
3731 analyze_cnf(new_cnf);
3733 subq_node->having.push_back(new_cnf);
3735 // Xfer all of the parameters.
3736 // Use existing handle annotations.
3737 vector<string> param_names = param_tbl->get_param_names();
3739 for(pi=0;pi<param_names.size();pi++){
3740 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3741 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3742 param_tbl->handle_access(param_names[pi]));
3744 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3745 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3746 this->error_code = 3;
3750 ret_vec.push_back(subq_node);
3753 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
3754 fta_node->node_name, sel_names, ifaces, ifdb);
3757 Do not split sources until we are done with optimizations
3758 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3759 for(i=0;i<split_merge.size();++i){
3760 ret_vec.push_back(split_merge[i]);
3763 ret_vec.push_back(mrg_node);
3764 ret_vec.push_back(stream_node);
3765 hfta_returned = 1/*split_merge.size()*/+1;
3768 fta_node->table_name->set_machine(ifaces[0].first);
3769 fta_node->table_name->set_interface(ifaces[0].second);
3770 fta_node->table_name->set_ifq(false);
3771 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3772 this->error_code = 3;
3775 ret_vec.push_back(fta_node);
3776 ret_vec.push_back(stream_node);
3781 // ret_vec.push_back(fta_node);
3782 // ret_vec.push_back(stream_node);
3789 /////////////////////////////////////////////////////////////////////
3790 /// Split into selection LFTA, aggregation HFTA.
3792 spx_qpn *fta_node = new spx_qpn();
3793 fta_node->table_name = table_name;
3794 fta_node->set_node_name( "_fta_"+node_name );
3795 fta_node->table_name->set_range_var(table_name->get_var_name());
3798 rsgah_qpn *stream_node = new rsgah_qpn();
3799 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3800 stream_node->set_node_name( node_name );
3801 stream_node->table_name->set_range_var(table_name->get_var_name());
3804 vector< vector<select_element *> *> select_vec;
3805 select_vec.push_back(&(fta_node->select_list)); // only one child
3807 // Process the gbvars. Split their defining SEs.
3808 for(g=0;g<gb_tbl.size();g++){
3809 bool fta_forbidden = false;
3810 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3812 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
3813 fta_forbidden, se_src, select_vec, Ext_fcns
3815 // if(fta_forbidden) (
3816 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3817 stream_node->gb_tbl.add_gb_var(
3818 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
3821 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
3822 stream_node->gb_tbl.add_gb_var(
3823 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
3828 // Process the aggregate table.
3829 // Copy to stream, split the SEs.
3830 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
3831 for(a=0;a<aggr_tbl.size();++a){
3833 if(aggr_tbl.is_builtin(a)){
3834 if(aggr_tbl.is_star_aggr(a)){
3835 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
3836 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
3838 bool fta_forbidden = false;
3839 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3841 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3842 fta_forbidden, se_src, select_vec, Ext_fcns
3844 // if(fta_forbidden) (
3845 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3846 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
3847 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
3849 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3850 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
3851 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
3854 hse->set_data_type(aggr_tbl.get_data_type(a));
3855 hse->set_aggr_id(a);
3856 hfta_aggr_se[a]=hse;
3858 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
3859 vector<scalarexp_t *> new_opl;
3860 for(o=0;o<opl.size();++o){
3861 bool fta_forbidden = false;
3862 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3863 scalarexp_t *agg_se = split_ftavec_se( opl[o],
3864 fta_forbidden, se_src, select_vec, Ext_fcns
3866 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3867 // fta_forbidden, se_src, select_vec, Ext_fcns
3869 // if(fta_forbidden) (
3870 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3871 new_opl.push_back(agg_se);
3873 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3874 new_opl.push_back(new_se);
3877 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),aggr_tbl.is_superaggr(a), aggr_tbl.is_running_aggr(a),aggr_tbl.has_bailout(a));
3878 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
3879 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
3880 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
3881 hse->set_aggr_id(a);
3882 hfta_aggr_se[a]=hse;
3887 // Process the WHERE clause.
3888 // If it is fta-safe AND it refs only fta-safe gbvars,
3889 // then expand the gbvars and put it into the lfta.
3890 // Else, split it into an hfta predicate ref'ing
3891 // se's computed partially in the lfta.
3893 predicate_t *pr_root;
3895 for(p=0;p<where.size();p++){
3896 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
3897 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3898 fta_forbidden = true;
3900 pr_root = dup_pr(where[p]->pr, NULL);
3901 expand_gbvars_pr(pr_root, gb_tbl);
3902 fta_forbidden = false;
3904 cnf_elem *cnf_root = new cnf_elem(pr_root);
3905 analyze_cnf(cnf_root);
3908 stream_node->where.push_back(cnf_root);
3910 fta_node->where.push_back(cnf_root);
3915 // Process the Select clause, rehome it on the
3917 for(s=0;s<select_list.size();s++){
3918 bool fta_forbidden = false;
3919 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3920 stream_node->select_list.push_back(
3921 new select_element(root_se, select_list[s]->name));
3925 // Process the Having clause
3927 // All of the predicates in the having clause must
3928 // execute in the stream node.
3930 for(p=0;p<having.size();p++){
3931 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3932 cnf_elem *cnf_root = new cnf_elem(pr_root);
3933 analyze_cnf(cnf_root);
3935 stream_node->having.push_back(cnf_root);
3937 // Same for closing when
3938 for(p=0;p<closing_when.size();p++){
3939 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3940 cnf_elem *cnf_root = new cnf_elem(pr_root);
3941 analyze_cnf(cnf_root);
3943 stream_node->closing_when.push_back(cnf_root);
3947 // Handle parameters and a few last details.
3948 vector<string> param_names = param_tbl->get_param_names();
3950 for(pi=0;pi<param_names.size();pi++){
3951 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3952 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3953 param_tbl->handle_access(param_names[pi]));
3954 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3955 param_tbl->handle_access(param_names[pi]));
3958 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3959 stream_node->definitions = definitions;
3961 // Now split by interfaces YYYY
3962 if(ifaces.size() > 1){
3963 for(si=0;si<ifaces.size();++si){
3964 spx_qpn *subq_node = new spx_qpn();
3966 // Name the subquery
3967 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3969 subq_node->set_node_name( new_name) ;
3970 sel_names.push_back(subq_node->get_node_name());
3973 subq_node->table_name = fta_node->table_name->duplicate();
3974 subq_node->table_name->set_machine(ifaces[si].first);
3975 subq_node->table_name->set_interface(ifaces[si].second);
3976 subq_node->table_name->set_ifq(false);
3978 for(s=0;s<fta_node->select_list.size();s++){
3979 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3981 for(p=0;p<fta_node->where.size();p++){
3982 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3983 cnf_elem *new_cnf = new cnf_elem(new_pr);
3984 analyze_cnf(new_cnf);
3986 subq_node->where.push_back(new_cnf);
3988 // Xfer all of the parameters.
3989 // Use existing handle annotations.
3990 vector<string> param_names = param_tbl->get_param_names();
3992 for(pi=0;pi<param_names.size();pi++){
3993 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3994 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3995 param_tbl->handle_access(param_names[pi]));
3997 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3998 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3999 this->error_code = 3;
4003 ret_vec.push_back(subq_node);
4006 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4007 fta_node->node_name, sel_names, ifaces, ifdb);
4009 Do not split sources until we are done with optimizations
4010 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4011 for(i=0;i<split_merge.size();++i){
4012 ret_vec.push_back(split_merge[i]);
4015 ret_vec.push_back(mrg_node);
4016 ret_vec.push_back(stream_node);
4017 hfta_returned = 1/*split_merge.size()*/+1;
4020 fta_node->table_name->set_machine(ifaces[0].first);
4021 fta_node->table_name->set_interface(ifaces[0].second);
4022 fta_node->table_name->set_ifq(false);
4023 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4024 this->error_code = 3;
4027 ret_vec.push_back(fta_node);
4028 ret_vec.push_back(stream_node);
4038 Splitting an aggregation operator
4040 An aggregation operator can reference
4041 literals, parameters, colrefs, group-by vars, aggregates,
4042 operators, functions
4044 an aggregation contains
4045 A selection list of SEs
4046 A where list of predicates
4047 A list group-by variable definition
4048 A list of aggregates to be computed
4049 A HAVING list of predicates.
4051 Aggregation involves two phases:
4052 1) given an input tuple, determine if it satisfies all of
4053 the WHERE predicates. If so, compute the group.
4054 Look up the group, update its aggregates.
4055 2) given a closed group and its aggregates, determine
4056 if these values satisfy all of the HAVING predicates.
4057 If so, evaluate the SEs on the selection list from the
4058 group and its aggregates.
4059 The two-phase nature of aggregation places restrictions on
4060 what can be referenced by different components of the operator
4061 (in addition to functions and operators).
4062 - group-by variables : literals, parameters, colrefs
4063 - WHERE predicates : group-by vars, literals, params, colrefs
4064 - HAVING predicates : group-by vars, literals, params, aggregates
4065 - Selection list SEs : group-by vars, literals, params, aggregates
4067 Splitting an aggregation operator into an LFTA/HFTA part
4068 involves performing partial aggregation at the LFTA and
4069 completing the aggregation at the HFTA.
4070 - given a tuple, the LFTA part evaluates the WHERE clause,
4071 and if it is satisfied, computes the group. lookup the group
4072 and update the aggregates. output the group and its partial
4074 - Given a partial aggregate from the LFTA, look up the group and
4075 update its aggregates. When the group is closed, evalute
4076 the HAVING clause and the SEs on the selection list.
4077 THEREFORE the selection list of the LFTA must consist of the
4078 group-by variables and the set of (bare) subaggregate values
4079 necessary to compute the super aggregates.
4080 Unlike the case with the SPX operator, the SE splitting point
4081 is at the GBvar and the aggregate value level.
4084 For each group-by variable
4085 Put the GB variable definition in the LFTA GBVAR list.
4086 Put the GBVAR in the LFTA selection list (as an SE).
4087 Put a reference to that GBVAR in the HFTA GBVAR list.
4089 Split the aggregate into a superaggregate and a subaggregate.
4090 The SE of the superaggregate references the subaggregate value.
4091 (this will need modifications for MF aggregation)
4092 For each SE in the selection list, HAVING predicate
4093 Make GBVAR references point to the new GBVAR
4094 make the aggregate value references point to the new aggregates.
4096 SEs are not so much split as their ref's are changed.
4098 TODO: insert tablevar names into the colrefs.
4103 vector<qp_node *> sgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4107 vector<qp_node *> ret_vec;
4108 int s, p, g, a, o, i;
4111 vector<string> fta_flds, stream_flds;
4113 // If the node reads from a stream, don't split.
4114 // int t = Schema->get_table_ref(table_name->get_schema_name());
4115 int t = table_name->get_schema_ref();
4116 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
4117 ret_vec.push_back(this);
4121 // Get the set of interfaces it accesses.
4123 vector<string> sel_names;
4124 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
4125 if (ifaces.empty()) {
4126 fprintf(stderr,"INTERNAL ERROR in sgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
4132 //////////////////////////////////////////////
4133 // Is this LFTA-only?
4134 if(definitions.count("lfta_aggregation")>0){
4135 // Yes. Ensure that everything is lfta-safe.
4137 // Check only one interface is accessed.
4138 if(ifaces.size()>1){
4139 this->err_str = "ERROR, group-by query "+node_name+" is lfta-only, but it accesses more than one interface:\n";
4140 for(si=0;si<ifaces.size();++si)
4141 this->err_str += "\t"+ifaces[si].first+"."+ifaces[si].second+"\n";
4142 this->error_code = 2;
4146 // Check the group-by attributes
4147 for(g=0;g<gb_tbl.size();g++){
4148 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4149 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition and the query is lfta-only (%s).\n",
4150 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4152 this->error_code = 1;
4153 this->err_str = tmpstr;
4158 // Verify that the SEs in the aggregate definitions are fta-safe
4159 for(a=0;a<aggr_tbl.size();++a){
4160 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4161 if(ase != NULL){ // COUNT(*) does not have a SE.
4162 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4163 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe scalar expression and the query is lfta-only (%s).\n",
4164 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4166 this->error_code = 1;
4167 this->err_str = tmpstr;
4171 if(! aggr_tbl.fta_legal(a,Ext_fcns)){
4172 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4173 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe aggregate and the query is lfta-only (%s).\n",
4174 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4176 this->error_code = 1;
4177 this->err_str = tmpstr;
4183 // Ensure that all the aggregates are fta-safe ....
4187 for(s=0;s<select_list.size();s++){
4188 if(! check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns)){
4189 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4190 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4192 this->error_code = 1;
4193 this->err_str = tmpstr;
4200 for(p=0;p<where.size();p++){
4201 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4202 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4203 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4205 this->error_code = 1;
4206 this->err_str = tmpstr;
4213 if(having.size()>0){
4214 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : the query is lfta-only, so it can't have a HAVING clause.(%s).\n",
4215 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4217 this->error_code = 1;
4218 this->err_str = tmpstr;
4221 // The query is lfta safe, return it.
4224 ret_vec.push_back(this);
4228 //////////////////////////////////////////////////////////////
4229 /// Split into lfta, hfta.
4231 // A sgah node must always be split,
4232 // if for no other reason than to complete the
4233 // partial aggregation.
4235 // First, determine if the query can be spit into aggr/aggr,
4236 // or if it must be selection/aggr.
4237 // Splitting into selection/aggr is allowed only
4238 // if select_lfta is set.
4241 bool select_allowed = definitions.count("select_lfta")>0;
4242 bool select_rqd = false;
4244 set<int> unsafe_gbvars; // for processing where clause
4245 for(g=0;g<gb_tbl.size();g++){
4246 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4247 if(!select_allowed){
4248 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
4249 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4251 this->error_code = 1;
4252 this->err_str = tmpstr;
4256 unsafe_gbvars.insert(g);
4261 // Verify that the SEs in the aggregate definitions are fta-safe
4262 for(a=0;a<aggr_tbl.size();++a){
4263 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4264 if(ase != NULL){ // COUNT(*) does not have a SE.
4265 if(!select_allowed){
4266 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4267 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
4268 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4270 this->error_code = 1;
4271 this->err_str = tmpstr;
4280 // Verify that all of the ref'd UDAFs can be split.
4282 for(a=0;a<aggr_tbl.size();++a){
4283 if(! aggr_tbl.is_builtin(a)){
4284 int afcn = aggr_tbl.get_fcn_id(a);
4285 int super_id = Ext_fcns->get_superaggr_id(afcn);
4286 int sub_id = Ext_fcns->get_subaggr_id(afcn);
4287 if(super_id < 0 || sub_id < 0){
4288 if(!select_allowed){
4289 this->err_str += "ERROR in sgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
4290 this->error_code = 1;
4299 for(p=0;p<where.size();p++){
4300 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4301 if(!select_allowed){
4302 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
4303 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4305 this->error_code = 1;
4306 this->err_str = tmpstr;
4317 /////////////////////////////////////////////////////
4318 // Split into aggr/aggr.
4324 sgah_qpn *fta_node = new sgah_qpn();
4325 fta_node->table_name = table_name;
4326 fta_node->set_node_name( "_fta_"+node_name );
4327 fta_node->table_name->set_range_var(table_name->get_var_name());
4330 sgah_qpn *stream_node = new sgah_qpn();
4331 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4332 stream_node->set_node_name( node_name );
4333 stream_node->table_name->set_range_var(table_name->get_var_name());
4335 // allowed stream disorder. Default is 1,
4336 // can override with max_lfta_disorder setting.
4337 // Also limit the hfta disorder, set to lfta disorder + 1.
4338 // can override with max_hfta_disorder.
4340 fta_node->lfta_disorder = 1;
4341 if(this->get_val_of_def("max_lfta_disorder") != ""){
4342 int d = atoi(this->get_val_of_def("max_lfta_disorder").c_str() );
4344 fprintf(stderr,"Warning, max_lfta_disorder in node %s is %d, must be at least 1, ignoring.\n",node_name.c_str(), d);
4346 fta_node->lfta_disorder = d;
4347 printf("node %s setting lfta_disorder = %d\n",node_name.c_str(),fta_node->lfta_disorder);
4350 if(fta_node->lfta_disorder > 1)
4351 stream_node->hfta_disorder = fta_node->lfta_disorder + 1;
4353 stream_node->hfta_disorder = 1;
4355 if(this->get_val_of_def("max_hfta_disorder") != ""){
4356 int d = atoi(this->get_val_of_def("max_hfta_disorder").c_str() );
4357 if(d<fta_node->lfta_disorder){
4358 fprintf(stderr,"Warning, max_hfta_disorder in node %s is %d, must be at least the max lfta disorder %d, ignoring.\n",node_name.c_str(), d,fta_node->lfta_disorder);
4360 fta_node->lfta_disorder = d;
4362 if(fta_node->lfta_disorder < fta_node->hfta_disorder){
4363 fta_node->hfta_disorder = fta_node->lfta_disorder + 1;
4368 // First, process the group-by variables.
4369 // The fta must supply the values of all the gbvars.
4370 // If a gb is computed, the computation must be
4371 // performed at the FTA, so the SE must be FTA-safe.
4372 // Nice side effect : the gbvar table contains
4373 // matching entries for the original query, the lfta query,
4374 // and the hfta query. So gbrefs in the new queries are set
4375 // correctly just by inheriting the gbrefs from the old query.
4376 // If this property changed, I'll need translation tables.
4379 for(g=0;g<gb_tbl.size();g++){
4380 // Insert the gbvar into the lfta.
4381 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
4382 fta_node->gb_tbl.add_gb_var(
4383 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
4386 // Insert a ref to the value of the gbvar into the lfta select list.
4387 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
4388 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
4389 gbvar_fta->set_gb_ref(g);
4390 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
4391 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
4393 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
4394 gbvar_stream->set_gb_ref(-1); // used as GBvar def
4395 stream_node->gb_tbl.add_gb_var(
4396 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
4399 // multiple aggregation patterns, if any, go with the hfta
4400 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4402 // SEs in the aggregate definitions.
4403 // They are all safe, so split them up for later processing.
4404 map<int, scalarexp_t *> hfta_aggr_se;
4405 for(a=0;a<aggr_tbl.size();++a){
4406 split_fta_aggr( &(aggr_tbl), a,
4407 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
4408 fta_node->select_list,
4416 for(ii=0;ii<fta_flds.size() || ii < fta_node->select_list.size();++ii){
4417 if(ii<fta_flds.size())
4418 printf("\t%s : ",fta_flds[ii].c_str());
4421 if(ii<fta_node->select_list.size())
4422 printf("%s\n",fta_node->select_list[ii]->to_string().c_str());
4426 printf("hfta aggregates are:");
4427 for(ii=0;ii<stream_node->aggr_tbl.size();++ii){
4428 printf(" %s",stream_node->aggr_tbl.get_op(ii).c_str());
4430 printf("\nlfta aggregates are:");
4431 for(ii=0;ii<fta_node->aggr_tbl.size();++ii){
4432 printf(" %s",fta_node->aggr_tbl.get_op(ii).c_str());
4440 // Next, the select list.
4442 for(s=0;s<select_list.size();s++){
4443 bool fta_forbidden = false;
4444 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4445 stream_node->select_list.push_back(
4446 new select_element(root_se, select_list[s]->name));
4451 // All the predicates in the where clause must execute
4454 for(p=0;p<where.size();p++){
4455 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
4456 cnf_elem *new_cnf = new cnf_elem(new_pr);
4457 analyze_cnf(new_cnf);
4459 fta_node->where.push_back(new_cnf);
4462 // All of the predicates in the having clause must
4463 // execute in the stream node.
4465 for(p=0;p<having.size();p++){
4466 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4467 cnf_elem *cnf_root = new cnf_elem(pr_root);
4468 analyze_cnf(cnf_root);
4470 stream_node->having.push_back(cnf_root);
4474 // Divide the parameters among the stream, FTA.
4475 // Currently : assume that the stream receives all parameters
4476 // and parameter updates, incorporates them, then passes
4477 // all of the parameters to the FTA.
4478 // This will need to change (tables, fta-unsafe types. etc.)
4480 // I will pass on the use_handle_access marking, even
4481 // though the fcn call that requires handle access might
4482 // exist in only one of the parts of the query.
4483 // Parameter manipulation and handle access determination will
4484 // need to be revisited anyway.
4485 vector<string> param_names = param_tbl->get_param_names();
4487 for(pi=0;pi<param_names.size();pi++){
4488 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4489 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4490 param_tbl->handle_access(param_names[pi]));
4491 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4492 param_tbl->handle_access(param_names[pi]));
4494 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4495 stream_node->definitions = definitions;
4497 // Now split by interfaces XXXX
4498 if(ifaces.size() > 1){
4499 for(si=0;si<ifaces.size();++si){
4500 sgah_qpn *subq_node = new sgah_qpn();
4502 // Name the subquery
4503 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4505 subq_node->set_node_name( new_name) ;
4506 sel_names.push_back(subq_node->get_node_name());
4509 subq_node->table_name = fta_node->table_name->duplicate();
4510 subq_node->table_name->set_machine(ifaces[si].first);
4511 subq_node->table_name->set_interface(ifaces[si].second);
4512 subq_node->table_name->set_ifq(false);
4515 for(g=0;g<fta_node->gb_tbl.size();g++){
4516 // Insert the gbvar into the lfta.
4517 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
4518 subq_node->gb_tbl.add_gb_var(
4519 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
4523 // Insert the aggregates
4524 for(a=0;a<fta_node->aggr_tbl.size();++a){
4525 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
4528 for(s=0;s<fta_node->select_list.size();s++){
4529 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4531 for(p=0;p<fta_node->where.size();p++){
4532 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4533 cnf_elem *new_cnf = new cnf_elem(new_pr);
4534 analyze_cnf(new_cnf);
4536 subq_node->where.push_back(new_cnf);
4538 for(p=0;p<fta_node->having.size();p++){
4539 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
4540 cnf_elem *new_cnf = new cnf_elem(new_pr);
4541 analyze_cnf(new_cnf);
4543 subq_node->having.push_back(new_cnf);
4545 // Xfer all of the parameters.
4546 // Use existing handle annotations.
4547 vector<string> param_names = param_tbl->get_param_names();
4549 for(pi=0;pi<param_names.size();pi++){
4550 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4551 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4552 param_tbl->handle_access(param_names[pi]));
4554 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4555 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4556 this->error_code = 3;
4561 subq_node->lfta_disorder = fta_node->lfta_disorder;
4563 ret_vec.push_back(subq_node);
4566 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
4567 fta_node->node_name, sel_names, ifaces, ifdb);
4568 mrg_node->set_disorder(fta_node->lfta_disorder);
4571 Do not split sources until we are done with optimizations
4572 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4573 for(i=0;i<split_merge.size();++i){
4574 ret_vec.push_back(split_merge[i]);
4577 ret_vec.push_back(mrg_node);
4578 ret_vec.push_back(stream_node);
4579 hfta_returned = 1/*split_merge.size()*/+1;
4582 fta_node->table_name->set_machine(ifaces[0].first);
4583 fta_node->table_name->set_interface(ifaces[0].second);
4584 fta_node->table_name->set_ifq(false);
4585 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4586 this->error_code = 3;
4589 ret_vec.push_back(fta_node);
4590 ret_vec.push_back(stream_node);
4595 // ret_vec.push_back(fta_node);
4596 // ret_vec.push_back(stream_node);
4603 /////////////////////////////////////////////////////////////////////
4604 /// Split into selection LFTA, aggregation HFTA.
4606 spx_qpn *fta_node = new spx_qpn();
4607 fta_node->table_name = table_name;
4608 fta_node->set_node_name( "_fta_"+node_name );
4609 fta_node->table_name->set_range_var(table_name->get_var_name());
4612 sgah_qpn *stream_node = new sgah_qpn();
4613 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4614 stream_node->set_node_name( node_name );
4615 stream_node->table_name->set_range_var(table_name->get_var_name());
4618 vector< vector<select_element *> *> select_vec;
4619 select_vec.push_back(&(fta_node->select_list)); // only one child
4621 // Process the gbvars. Split their defining SEs.
4622 for(g=0;g<gb_tbl.size();g++){
4623 bool fta_forbidden = false;
4624 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4626 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
4627 fta_forbidden, se_src, select_vec, Ext_fcns
4629 // if(fta_forbidden) (
4630 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4631 stream_node->gb_tbl.add_gb_var(
4632 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
4635 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
4636 stream_node->gb_tbl.add_gb_var(
4637 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
4641 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4643 // Process the aggregate table.
4644 // Copy to stream, split the SEs.
4645 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
4646 for(a=0;a<aggr_tbl.size();++a){
4648 if(aggr_tbl.is_builtin(a)){
4649 if(aggr_tbl.is_star_aggr(a)){
4650 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
4651 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
4653 bool fta_forbidden = false;
4654 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4656 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4657 fta_forbidden, se_src, select_vec, Ext_fcns
4659 // if(fta_forbidden) (
4660 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4661 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
4662 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
4664 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4665 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
4666 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
4669 hse->set_data_type(aggr_tbl.get_data_type(a));
4670 hse->set_aggr_id(a);
4671 hfta_aggr_se[a]=hse;
4673 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
4674 vector<scalarexp_t *> new_opl;
4675 for(o=0;o<opl.size();++o){
4676 bool fta_forbidden = false;
4677 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4678 scalarexp_t *agg_se = split_ftavec_se( opl[o],
4679 fta_forbidden, se_src, select_vec, Ext_fcns
4681 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4682 // fta_forbidden, se_src, select_vec, Ext_fcns
4684 // if(fta_forbidden) (
4685 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4686 new_opl.push_back(agg_se);
4688 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4689 new_opl.push_back(new_se);
4692 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
4693 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
4694 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
4695 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
4696 hse->set_aggr_id(a);
4697 hfta_aggr_se[a]=hse;
4702 // Process the WHERE clause.
4703 // If it is fta-safe AND it refs only fta-safe gbvars,
4704 // then expand the gbvars and put it into the lfta.
4705 // Else, split it into an hfta predicate ref'ing
4706 // se's computed partially in the lfta.
4708 predicate_t *pr_root;
4710 for(p=0;p<where.size();p++){
4711 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
4712 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
4713 fta_forbidden = true;
4715 pr_root = dup_pr(where[p]->pr, NULL);
4716 expand_gbvars_pr(pr_root, gb_tbl);
4717 fta_forbidden = false;
4719 cnf_elem *cnf_root = new cnf_elem(pr_root);
4720 analyze_cnf(cnf_root);
4723 stream_node->where.push_back(cnf_root);
4725 fta_node->where.push_back(cnf_root);
4730 // Process the Select clause, rehome it on the
4732 for(s=0;s<select_list.size();s++){
4733 bool fta_forbidden = false;
4734 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4735 stream_node->select_list.push_back(
4736 new select_element(root_se, select_list[s]->name));
4740 // Process the Having clause
4742 // All of the predicates in the having clause must
4743 // execute in the stream node.
4745 for(p=0;p<having.size();p++){
4746 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4747 cnf_elem *cnf_root = new cnf_elem(pr_root);
4748 analyze_cnf(cnf_root);
4750 stream_node->having.push_back(cnf_root);
4753 // Handle parameters and a few last details.
4754 vector<string> param_names = param_tbl->get_param_names();
4756 for(pi=0;pi<param_names.size();pi++){
4757 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4758 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4759 param_tbl->handle_access(param_names[pi]));
4760 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4761 param_tbl->handle_access(param_names[pi]));
4764 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4765 stream_node->definitions = definitions;
4767 // Now split by interfaces YYYY
4768 if(ifaces.size() > 1){
4769 for(si=0;si<ifaces.size();++si){
4770 spx_qpn *subq_node = new spx_qpn();
4772 // Name the subquery
4773 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4775 subq_node->set_node_name( new_name) ;
4776 sel_names.push_back(subq_node->get_node_name());
4779 subq_node->table_name = fta_node->table_name->duplicate();
4780 subq_node->table_name->set_machine(ifaces[si].first);
4781 subq_node->table_name->set_interface(ifaces[si].second);
4782 subq_node->table_name->set_ifq(false);
4784 for(s=0;s<fta_node->select_list.size();s++){
4785 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4787 for(p=0;p<fta_node->where.size();p++){
4788 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4789 cnf_elem *new_cnf = new cnf_elem(new_pr);
4790 analyze_cnf(new_cnf);
4792 subq_node->where.push_back(new_cnf);
4794 // Xfer all of the parameters.
4795 // Use existing handle annotations.
4796 vector<string> param_names = param_tbl->get_param_names();
4798 for(pi=0;pi<param_names.size();pi++){
4799 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4800 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4801 param_tbl->handle_access(param_names[pi]));
4803 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4804 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4805 this->error_code = 3;
4809 ret_vec.push_back(subq_node);
4812 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4813 fta_node->node_name, sel_names, ifaces, ifdb);
4815 Do not split sources until we are done with optimizations
4816 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4817 for(i=0;i<split_merge.size();++i){
4818 ret_vec.push_back(split_merge[i]);
4821 ret_vec.push_back(mrg_node);
4822 ret_vec.push_back(stream_node);
4823 hfta_returned = 1/*split_merge.size()*/+1;
4826 fta_node->table_name->set_machine(ifaces[0].first);
4827 fta_node->table_name->set_interface(ifaces[0].second);
4828 fta_node->table_name->set_ifq(false);
4829 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4830 this->error_code = 3;
4833 ret_vec.push_back(fta_node);
4834 ret_vec.push_back(stream_node);
4839 // ret_vec.push_back(fta_node);
4840 // ret_vec.push_back(stream_node);
4849 SPLITTING A EQ-TEMPORAL, HASH JOIN OPERATOR
4851 An JOIN_EQ_HASH_QPN node may reference:
4852 literals, parameters, colrefs, functions, operators
4853 An JOIN_EQ_HASH_QPN node may not reference:
4854 group-by variables, aggregates
4856 An JOIN_EQ_HASH_QPN node contains
4857 selection list of SEs
4858 where list of CNF predicates, broken into:
4865 For each tablevar whose source is a PROTOCOL
4866 Create a LFTA for that tablevar
4867 Push as many prefilter[..] predicates to that tablevar as is
4869 Split the SEs in the select list, and the predicates not
4874 vector<qp_node *> join_eq_hash_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4876 vector<qp_node *> ret_vec;
4879 // If the node reads from streams only, don't split.
4880 bool stream_only = true;
4881 for(f=0;f<from.size();++f){
4882 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4883 int t = from[f]->get_schema_ref();
4884 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) stream_only = false;
4888 ret_vec.push_back(this);
4893 // The HFTA node, it is always returned.
4895 join_eq_hash_qpn *stream_node = new join_eq_hash_qpn();
4896 for(f=0;f<from.size();++f){
4897 // tablevar_t *tmp_tblvar = new tablevar_t( from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str());
4898 tablevar_t *tmp_tblvar = from[f]->duplicate();
4899 // tmp_tblvar->set_range_var(from[f]->get_var_name());
4901 stream_node->from.push_back(tmp_tblvar);
4903 stream_node->set_node_name(node_name);
4905 // Create spx (selection) children for each PROTOCOL source.
4906 vector<spx_qpn *> child_vec;
4907 vector< vector<select_element *> *> select_vec;
4908 for(f=0;f<from.size();++f){
4909 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4910 int t = from[f]->get_schema_ref();
4911 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA){
4912 spx_qpn *child_qpn = new spx_qpn();
4913 sprintf(tmpstr,"_fta_%d_%s",f,node_name.c_str());
4914 child_qpn->set_node_name(string(tmpstr));
4915 child_qpn->table_name = new tablevar_t(
4916 from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str(), from[f]->get_ifq());
4917 child_qpn->table_name->set_range_var(from[f]->get_var_name());
4918 child_qpn->table_name->set_machine(from[f]->get_machine());
4920 child_vec.push_back(child_qpn);
4921 select_vec.push_back(&(child_qpn->select_list));
4923 // Update the stream's FROM clause to read from this child
4924 stream_node->from[f]->set_interface("");
4925 stream_node->from[f]->set_schema(tmpstr);
4927 child_vec.push_back(NULL);
4928 select_vec.push_back(NULL);
4932 // Push lfta-safe prefilter to the lfta
4933 // TODO: I'm not copying the preds, I dont *think* it will be a problem.
4934 predicate_t *pr_root;
4936 for(f=0;f<from.size();++f){
4937 vector<cnf_elem *> pred_vec = prefilter[f];
4938 if(child_vec[f] != NULL){
4939 for(p=0;p<pred_vec.size();++p){
4940 if(check_fta_forbidden_pr(pred_vec[p]->pr,NULL, Ext_fcns)){
4941 child_vec[f]->where.push_back(pred_vec[p]);
4943 pr_root = split_ftavec_pr(pred_vec[p]->pr,select_vec,Ext_fcns);
4944 cnf_elem *cnf_root = new cnf_elem(pr_root);
4945 analyze_cnf(cnf_root);
4946 stream_node->prefilter[f].push_back(cnf_root);
4950 for(p=0;p<pred_vec.size();++p){
4951 stream_node->prefilter[f].push_back(pred_vec[p]);
4957 // Process the other predicates
4958 for(p=0;p<temporal_eq.size();++p){
4959 pr_root = split_ftavec_pr(temporal_eq[p]->pr,select_vec,Ext_fcns);
4960 cnf_elem *cnf_root = new cnf_elem(pr_root);
4961 analyze_cnf(cnf_root);
4962 stream_node->temporal_eq.push_back(cnf_root);
4964 for(p=0;p<hash_eq.size();++p){
4965 pr_root = split_ftavec_pr(hash_eq[p]->pr,select_vec,Ext_fcns);
4966 cnf_elem *cnf_root = new cnf_elem(pr_root);
4967 analyze_cnf(cnf_root);
4968 stream_node->hash_eq.push_back(cnf_root);
4970 for(p=0;p<postfilter.size();++p){
4971 pr_root = split_ftavec_pr(postfilter[p]->pr,select_vec,Ext_fcns);
4972 cnf_elem *cnf_root = new cnf_elem(pr_root);
4973 analyze_cnf(cnf_root);
4974 stream_node->postfilter.push_back(cnf_root);
4978 for(s=0;s<select_list.size();s++){
4979 bool fta_forbidden = false;
4980 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4981 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
4982 fta_forbidden, se_src, select_vec, Ext_fcns
4984 if(fta_forbidden || !is_PROTOCOL_source(se_src, select_vec)){
4985 stream_node->select_list.push_back(
4986 new select_element(root_se, select_list[s]->name) );
4988 scalarexp_t *new_se=make_fta_se_ref(select_vec,root_se,se_src);
4989 stream_node->select_list.push_back(
4990 new select_element(new_se, select_list[s]->name)
4996 // I need to "rehome" the colrefs -- make the annotations in the colrefs
4997 // agree with their tablevars.
4998 for(f=0;f<child_vec.size();++f){
4999 if(child_vec[f]!=NULL){
5000 vector<tablevar_t *> fm; fm.push_back(child_vec[f]->table_name);
5002 for(s=0;s<child_vec[f]->select_list.size();++s)
5003 bind_colref_se(child_vec[f]->select_list[s]->se, fm,0,0);
5004 for(p=0;p<child_vec[f]->where.size();++p)
5005 // bind_colref_pr(child_vec[f]->where[p]->pr, fm,f,0);
5006 bind_colref_pr(child_vec[f]->where[p]->pr, fm,0,0);
5010 // rehome the colrefs in the hfta node.
5011 for(f=0;f<stream_node->from.size();++f){
5012 stream_node->where.clear();
5013 for(s=0;s<stream_node->from.size();++s){
5014 for(p=0;p<stream_node->prefilter[s].size();++p){
5015 bind_colref_pr((stream_node->prefilter[s])[p]->pr,stream_node->from,f,f);
5018 for(p=0;p<stream_node->temporal_eq.size();++p){
5019 bind_colref_pr(stream_node->temporal_eq[p]->pr,stream_node->from,f,f);
5021 for(p=0;p<stream_node->hash_eq.size();++p){
5022 bind_colref_pr(stream_node->hash_eq[p]->pr,stream_node->from,f,f);
5024 for(p=0;p<stream_node->postfilter.size();++p){
5025 bind_colref_pr(stream_node->postfilter[p]->pr,stream_node->from,f,f);
5027 for(s=0;s<stream_node->select_list.size();++s){
5028 bind_colref_se(stream_node->select_list[s]->se,stream_node->from,f,f);
5032 // Rebuild the WHERE clause
5033 stream_node->where.clear();
5034 for(s=0;s<stream_node->from.size();++s){
5035 for(p=0;p<stream_node->prefilter[s].size();++p){
5036 stream_node->where.push_back((stream_node->prefilter[s])[p]);
5039 for(p=0;p<stream_node->temporal_eq.size();++p){
5040 stream_node->where.push_back(stream_node->temporal_eq[p]);
5042 for(p=0;p<stream_node->hash_eq.size();++p){
5043 stream_node->where.push_back(stream_node->hash_eq[p]);
5045 for(p=0;p<stream_node->postfilter.size();++p){
5046 stream_node->where.push_back(stream_node->postfilter[p]);
5050 // Build the return list
5051 vector<qp_node *> hfta_nodes;
5053 for(f=0;f<from.size();++f){
5054 if(child_vec[f] != NULL){
5055 spx_qpn *c_node = child_vec[f];
5056 vector<pair<string, string> > ifaces = get_ifaces(c_node->table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
5057 if (ifaces.empty()) {
5058 fprintf(stderr,"INTERNAL ERROR in join_eq_hash_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
5062 if(ifaces.size() == 1){
5063 c_node->table_name->set_machine(ifaces[0].first);
5064 c_node->table_name->set_interface(ifaces[0].second);
5065 c_node->table_name->set_ifq(false);
5066 if(c_node->resolve_if_params(ifdb, this->err_str)){
5067 this->error_code = 3;
5070 ret_vec.push_back(c_node);
5072 vector<string> sel_names;
5074 for(si=0;si<ifaces.size();++si){
5075 spx_qpn *subq_node = new spx_qpn();
5077 // Name the subquery
5078 string new_name = "_"+c_node->node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
5080 subq_node->set_node_name( new_name) ;
5081 sel_names.push_back(subq_node->get_node_name());
5084 subq_node->table_name = c_node->table_name->duplicate();
5085 subq_node->table_name->set_machine(ifaces[si].first);
5086 subq_node->table_name->set_interface(ifaces[si].second);
5087 subq_node->table_name->set_ifq(false);
5089 for(s=0;s<c_node->select_list.size();s++){
5090 subq_node->select_list.push_back(dup_select(c_node->select_list[s], NULL));
5092 for(p=0;p<c_node->where.size();p++){
5093 predicate_t *new_pr = dup_pr(c_node->where[p]->pr, NULL);
5094 cnf_elem *new_cnf = new cnf_elem(new_pr);
5095 analyze_cnf(new_cnf);
5097 printf("table name is %s\n",subq_node->table_name->to_string().c_str());
5098 subq_node->where.push_back(new_cnf);
5100 // Xfer all of the parameters.
5101 // Use existing handle annotations.
5102 // vector<string> param_names = param_tbl->get_param_names();
5104 // for(pi=0;pi<param_names.size();pi++){
5105 // data_type *dt = param_tbl->get_data_type(param_names[pi]);
5106 // subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
5107 // param_tbl->handle_access(param_names[pi]));
5109 // subq_node->definitions = definitions;
5111 if(subq_node->resolve_if_params(ifdb, this->err_str)){
5112 this->error_code = 3;
5116 ret_vec.push_back(subq_node);
5118 int lpos = ret_vec.size()-1 ;
5119 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[lpos]),c_node->node_name,sel_names, ifaces, ifdb);
5121 Do not split sources until we are done with optimizations
5122 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
5124 for(i=0;i<split_merge.size();++i){
5125 hfta_nodes.push_back(split_merge[i]);
5128 hfta_nodes.push_back(mrg_node);
5133 for(i=0;i<hfta_nodes.size();++i) ret_vec.push_back(hfta_nodes[i]);
5134 ret_vec.push_back(stream_node);
5135 hfta_returned = hfta_nodes.size()+1;
5137 // Currently : assume that the stream receives all parameters
5138 // and parameter updates, incorporates them, then passes
5139 // all of the parameters to the FTA.
5140 // This will need to change (tables, fta-unsafe types. etc.)
5142 // I will pass on the use_handle_access marking, even
5143 // though the fcn call that requires handle access might
5144 // exist in only one of the parts of the query.
5145 // Parameter manipulation and handle access determination will
5146 // need to be revisited anyway.
5147 vector<string> param_names = param_tbl->get_param_names();
5149 for(pi=0;pi<param_names.size();pi++){
5151 data_type *dt = param_tbl->get_data_type(param_names[pi]);
5152 for(ri=0;ri<ret_vec.size();++ri){
5153 ret_vec[ri]->param_tbl->add_param(param_names[pi],dt->duplicate(),
5154 param_tbl->handle_access(param_names[pi]));
5155 ret_vec[ri]->definitions = definitions; ret_vec[ri]->definitions.erase("_referenced_ifaces");
5166 /////////////////////////////////////////////////////////////
5169 // Common processing
5170 int process_opview(tablevar_t *fmtbl, int pos, string node_name,
5172 vector<query_node *> &qnodes,
5173 opview_set &opviews,
5174 vector<table_exp_t *> &ret, string rootnm, string silo_nm){
5178 int schref = fmtbl->get_schema_ref();
5182 if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){
5183 opview_entry *opv = new opview_entry();
5184 opv->parent_qname = node_name;
5185 opv->root_name = rootnm;
5186 opv->view_name = fmtbl->get_schema_name();
5188 sprintf(tmpstr,"%s_UDOP%d_%s",node_name.c_str(),pos,opv->view_name.c_str());
5189 opv->udop_alias = tmpstr;
5190 fmtbl->set_udop_alias(opv->udop_alias);
5192 opv->exec_fl = Schema->get_op_prop(schref, string("file"));
5193 opv->liveness_timeout = atoi(Schema->get_op_prop(schref, string("liveness_timeout")).c_str());
5195 vector<subquery_spec *> subq = Schema->get_subqueryspecs(schref);
5196 for(s=0;s<subq.size();++s){
5197 // Validate that the fields match.
5198 subquery_spec *sqs = subq[s];
5199 vector<field_entry *> flds = Schema->get_fields(sqs->name+silo_nm);
5200 if(flds.size() == 0){
5201 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in Schema.\n",sqs->name.c_str(), opv->view_name.c_str());
5204 if(flds.size() < sqs->types.size()){
5205 fprintf(stderr,"ERROR: subquery %s of view %s does not have enough fields (%lu found, %lu expected).\n",sqs->name.c_str(), opv->view_name.c_str(),flds.size(), sqs->types.size());
5208 bool failed = false;
5209 for(f=0;f<sqs->types.size();++f){
5210 data_type dte(sqs->types[f],sqs->modifiers[f]);
5211 data_type dtf(flds[f]->get_type(),flds[f]->get_modifier_list());
5212 if(! dte.subsumes_type(&dtf) ){
5213 fprintf(stderr,"ERROR: subquery %s of view %s does not have the correct type for field %d (%s found, %s expected).\n",sqs->name.c_str(), opv->view_name.c_str(),f,dtf.to_string().c_str(), dte.to_string().c_str());
5217 if(dte.is_temporal() && (dte.get_temporal() != dtf.get_temporal()) ){
5218 string pstr = dte.get_temporal_string();
5219 fprintf(stderr,"ERROR: subquery %s of view %s does not have the expected temporal value %s of field %d.\n",sqs->name.c_str(), opv->view_name.c_str(),pstr.c_str(),f);
5226 /// Validation done, find the subquery, make a copy of the
5227 /// parse tree, and add it to the return list.
5228 for(q=0;q<qnodes.size();++q)
5229 if(qnodes[q]->name == sqs->name)
5231 if(q==qnodes.size()){
5232 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in list of query names.\n",sqs->name.c_str(), opv->view_name.c_str());
5236 table_exp_t *newq = dup_table_exp(qnodes[q]->parse_tree);
5237 sprintf(tmpstr,"%s_OP%d_%s_SUBQ%d",node_name.c_str(),pos,opv->view_name.c_str(),s);
5238 string newq_name = tmpstr;
5239 newq->nmap["query_name"] = newq_name;
5240 ret.push_back(newq);
5241 opv->subq_names.push_back(newq_name);
5243 fmtbl->set_opview_idx(opviews.append(opv));
5249 vector<table_exp_t *> spx_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5250 vector<table_exp_t *> ret;
5252 int retval = process_opview(table_name,0,node_name,
5253 Schema,qnodes,opviews,ret, rootnm, silo_name);
5259 vector<table_exp_t *> sgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5260 vector<table_exp_t *> ret;
5262 int retval = process_opview(table_name,0,node_name,
5263 Schema,qnodes,opviews,ret, rootnm, silo_name);
5268 vector<table_exp_t *> rsgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5269 vector<table_exp_t *> ret;
5271 int retval = process_opview(table_name,0,node_name,
5272 Schema,qnodes,opviews,ret, rootnm, silo_name);
5278 vector<table_exp_t *> sgahcwcb_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5279 vector<table_exp_t *> ret;
5281 int retval = process_opview(table_name,0,node_name,
5282 Schema,qnodes,opviews,ret, rootnm, silo_name);
5289 vector<table_exp_t *> mrg_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5290 vector<table_exp_t *> ret;
5292 for(f=0;f<fm.size();++f){
5293 int retval = process_opview(fm[f],f,node_name,
5294 Schema,qnodes,opviews,ret, rootnm, silo_name);
5303 vector<table_exp_t *> join_eq_hash_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5304 vector<table_exp_t *> ret;
5306 for(f=0;f<from.size();++f){
5307 int retval = process_opview(from[f],f,node_name,
5308 Schema,qnodes,opviews,ret, rootnm, silo_name);
5314 vector<table_exp_t *> filter_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5315 vector<table_exp_t *> ret;
5317 for(f=0;f<from.size();++f){
5318 int retval = process_opview(from[f],f,node_name,
5319 Schema,qnodes,opviews,ret, rootnm, silo_name);
5325 vector<table_exp_t *> watch_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5326 vector<table_exp_t *> ret;
5327 int retval = process_opview(from[0],0,node_name,
5328 Schema,qnodes,opviews,ret, rootnm, silo_name);
5335 vector<table_exp_t *> watch_tbl_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5336 vector<table_exp_t *> ret;
5337 return ret; // nothing to process
5342 //////////////////////////////////////////////////////////////////
5343 //////////////////////////////////////////////////////////////////
5344 /////// Additional methods
5348 //////////////////////////////////////////////////////////////////
5349 // Get schema of operator output
5351 table_def *mrg_qpn::get_fields(){
5352 return(table_layout);
5355 table_def *watch_tbl_qpn::get_fields(){
5356 return(table_layout);
5360 table_def *spx_qpn::get_fields(){
5361 return(create_attributes(node_name, select_list));
5364 table_def *sgah_qpn::get_fields(){
5365 return(create_attributes(node_name, select_list));
5368 table_def *rsgah_qpn::get_fields(){
5369 return(create_attributes(node_name, select_list));
5372 table_def *sgahcwcb_qpn::get_fields(){
5373 return(create_attributes(node_name, select_list));
5376 table_def *filter_join_qpn::get_fields(){
5377 return(create_attributes(node_name, select_list));
5380 table_def *watch_join_qpn::get_fields(){
5381 return(create_attributes(node_name, select_list));
5384 table_def *join_eq_hash_qpn::get_fields(){
5387 // First, gather temporal colrefs and SEs.
5388 map<col_id, temporal_type> temporal_cids;
5389 vector<scalarexp_t *> temporal_se;
5390 for(h=0;h<temporal_eq.size();++h){
5391 scalarexp_t *sel = temporal_eq[h]->pr->get_left_se();
5392 scalarexp_t *ser = temporal_eq[h]->pr->get_right_se();
5394 if(sel->get_operator_type() == SE_COLREF){
5395 col_id tcol(sel->get_colref());
5396 if(temporal_cids.count(tcol) == 0){
5397 temporal_cids[tcol] = sel->get_data_type()->get_temporal();
5400 temporal_se.push_back(sel);
5403 if(ser->get_operator_type() == SE_COLREF){
5404 col_id tcol(ser->get_colref());
5405 if(temporal_cids.count(tcol) == 0){
5406 temporal_cids[tcol] = ser->get_data_type()->get_temporal();
5409 temporal_se.push_back(ser);
5413 // Mark select elements as nontemporal, then deduce which
5414 // ones are temporal.
5415 for(s=0;s<select_list.size();++s){
5416 select_list[s]->se->get_data_type()->set_temporal(
5417 compute_se_temporal(select_list[s]->se, temporal_cids)
5419 // Second chance if it is an exact match to an SE.
5420 // for(s=0;s<select_list.size();++s){
5421 if(! select_list[s]->se->get_data_type()->is_temporal() ){
5422 for(t=0;t<temporal_se.size();++t){
5423 if(is_equivalent_se(temporal_se[t], select_list[s]->se)){
5424 select_list[s]->se->get_data_type()->set_temporal(
5425 temporal_se[t]->get_data_type()->get_temporal()
5433 // If there is an outer join, verify that
5434 // the temporal attributes are actually temporal.
5435 // NOTE: this code must be synchronized with the
5436 // equivalence finding in join_eq_hash_qpn::generate_functor
5437 // (and also, the join_eq_hash_qpn constructor)
5438 if(from[0]->get_property() || from[1]->get_property()){
5439 set<string> l_equiv, r_equiv;
5440 for(i=0;i<temporal_eq.size();i++){
5441 scalarexp_t *lse = temporal_eq[i]->pr->get_left_se();
5442 scalarexp_t *rse = temporal_eq[i]->pr->get_right_se();
5443 if(lse->get_operator_type()==SE_COLREF){
5444 l_equiv.insert(lse->get_colref()->get_field());
5446 if(rse->get_operator_type()==SE_COLREF){
5447 r_equiv.insert(rse->get_colref()->get_field());
5451 for(s=0;s<select_list.size();++s){
5452 if(select_list[s]->se->get_data_type()->is_temporal()){
5454 col_id_set::iterator ci;
5455 bool failed = false;
5456 gather_se_col_ids(select_list[s]->se,cid_set, NULL);
5457 for(ci=cid_set.begin();ci!=cid_set.end();++ci){
5458 if((*ci).tblvar_ref == 0){
5459 if(from[0]->get_property()){
5460 if(l_equiv.count((*ci).field) == 0){
5465 if(from[1]->get_property()){
5466 if(r_equiv.count((*ci).field) == 0){
5473 select_list[s]->se->get_data_type()->reset_temporal();
5480 return create_attributes(node_name, select_list);
5484 //-----------------------------------------------------------------
5485 // get output "keys"
5486 // This is a guess about the set of fields which are a key
5487 // Use as metadata output, e.g. in qtree.xml
5491 // refs to GB attribtues are keys, if a SE is not a GB colref
5492 // but refers to a GB colref (outside of an aggregation)
5493 // then set partial_keys to true
5494 vector<string> sgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5495 vector<string> keys;
5498 for(int i=0; i<gb_tbl.size();++i)
5501 for(int s=0;s<select_list.size();++s){
5502 if(select_list[s]->se->is_gb()){
5503 keys.push_back(select_list[s]->name);
5505 if(contains_gb_se(select_list[s]->se, gref_set)){
5506 partial_keys.push_back(select_list[s]->name);
5513 vector<string> rsgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5514 vector<string> keys;
5517 for(int i=0; i<gb_tbl.size();++i)
5520 for(int s=0;s<select_list.size();++s){
5521 if(select_list[s]->se->is_gb()){
5522 keys.push_back(select_list[s]->name);
5524 if(contains_gb_se(select_list[s]->se, gref_set)){
5525 partial_keys.push_back(select_list[s]->name);
5536 //-----------------------------------------------------------------
5537 // get output tables
5540 // Get tablevar_t names of input and output tables
5542 // output_file_qpn::output_file_qpn(){source_op_name = ""; }
5543 vector<tablevar_t *> output_file_qpn::get_input_tbls(){
5547 vector<tablevar_t *> watch_tbl_qpn::get_input_tbls(){
5548 vector<tablevar_t *> ret;
5552 vector<tablevar_t *> mrg_qpn::get_input_tbls(){
5556 vector<tablevar_t *> spx_qpn::get_input_tbls(){
5557 vector<tablevar_t *> retval(1,table_name);
5561 vector<tablevar_t *> sgah_qpn::get_input_tbls(){
5562 vector<tablevar_t *> retval(1,table_name);
5566 vector<tablevar_t *> rsgah_qpn::get_input_tbls(){
5567 vector<tablevar_t *> retval(1,table_name);
5571 vector<tablevar_t *> sgahcwcb_qpn::get_input_tbls(){
5572 vector<tablevar_t *> retval(1,table_name);
5576 vector<tablevar_t *> join_eq_hash_qpn::get_input_tbls(){
5580 vector<tablevar_t *> filter_join_qpn::get_input_tbls(){
5584 vector<tablevar_t *> watch_join_qpn::get_input_tbls(){
5588 //-----------------------------------------------------------------
5589 // get output tables
5592 // This does not make sense, this fcn returns the output table *name*,
5593 // not its schema, and then there is another fcn to rturn the schema.
5594 vector<tablevar_t *> output_file_qpn::get_output_tbls(){
5595 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5599 vector<tablevar_t *> watch_tbl_qpn::get_output_tbls(){
5600 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5604 vector<tablevar_t *> mrg_qpn::get_output_tbls(){
5605 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5609 vector<tablevar_t *> spx_qpn::get_output_tbls(){
5610 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5614 vector<tablevar_t *> sgah_qpn::get_output_tbls(){
5615 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5619 vector<tablevar_t *> rsgah_qpn::get_output_tbls(){
5620 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5624 vector<tablevar_t *> sgahcwcb_qpn::get_output_tbls(){
5625 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5629 vector<tablevar_t *> join_eq_hash_qpn::get_output_tbls(){
5630 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5634 vector<tablevar_t *> filter_join_qpn::get_output_tbls(){
5635 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5640 vector<tablevar_t *> watch_join_qpn::get_output_tbls(){
5641 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5647 //-----------------------------------------------------------------
5650 // Associate colrefs with this schema.
5651 // Also, use this opportunity to create table_layout (the output schema).
5652 // If the output schema is ever needed before
5653 void mrg_qpn::bind_to_schema(table_list *Schema){
5655 for(t=0;t<fm.size();++t){
5656 int tblref = Schema->get_table_ref(fm[t]->get_schema_name());
5658 fm[t]->set_schema_ref(tblref );
5661 // Here I assume that the colrefs have been reorderd
5662 // during analysis so that mvars line up with fm.
5663 mvars[0]->set_schema_ref(fm[0]->get_schema_ref());
5664 mvars[1]->set_schema_ref(fm[1]->get_schema_ref());
5671 // Associate colrefs in SEs with this schema.
5672 void spx_qpn::bind_to_schema(table_list *Schema){
5673 // Bind the tablevars in the From clause to the Schema
5674 // (it might have changed from analysis time)
5675 int t = Schema->get_table_ref(table_name->get_schema_name() );
5677 table_name->set_schema_ref(t );
5679 // Get the "from" clause
5680 tablevar_list_t fm(table_name);
5682 // Bind all SEs to this schema
5684 for(p=0;p<where.size();++p){
5685 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5688 for(s=0;s<select_list.size();++s){
5689 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5692 // Collect set of tuples referenced in this HFTA
5693 // input, internal, or output.
5697 col_id_set spx_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5698 col_id_set retval, tmp_cset;
5700 for(p=0;p<where.size();++p){
5701 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5704 for(s=0;s<select_list.size();++s){
5705 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5707 col_id_set::iterator cisi;
5709 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5710 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5711 if(fe->get_unpack_fcns().size()>0)
5712 retval.insert((*cisi));
5720 col_id_set filter_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5721 col_id_set retval, tmp_cset;
5723 for(p=0;p<where.size();++p){
5724 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5727 for(s=0;s<select_list.size();++s){
5728 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5730 col_id_set::iterator cisi;
5732 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5733 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5734 if(fe->get_unpack_fcns().size()>0)
5735 retval.insert((*cisi));
5743 col_id_set watch_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5744 col_id_set retval, tmp_cset;
5746 for(p=0;p<where.size();++p){
5747 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5750 for(s=0;s<select_list.size();++s){
5751 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5753 col_id_set::iterator cisi;
5755 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5756 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5757 if(fe->get_unpack_fcns().size()>0)
5758 retval.insert((*cisi));
5769 // Associate colrefs in SEs with this schema.
5770 void join_eq_hash_qpn::bind_to_schema(table_list *Schema){
5771 // Bind the tablevars in the From clause to the Schema
5772 // (it might have changed from analysis time)
5774 for(f=0;f<from.size();++f){
5775 string snm = from[f]->get_schema_name();
5776 int tbl_ref = Schema->get_table_ref(snm);
5778 from[f]->set_schema_ref(tbl_ref);
5781 // Bind all SEs to this schema
5782 tablevar_list_t fm(from);
5785 for(p=0;p<where.size();++p){
5786 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5789 for(s=0;s<select_list.size();++s){
5790 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5793 // Collect set of tuples referenced in this HFTA
5794 // input, internal, or output.
5798 void filter_join_qpn::bind_to_schema(table_list *Schema){
5799 // Bind the tablevars in the From clause to the Schema
5800 // (it might have changed from analysis time)
5802 for(f=0;f<from.size();++f){
5803 string snm = from[f]->get_schema_name();
5804 int tbl_ref = Schema->get_table_ref(snm);
5806 from[f]->set_schema_ref(tbl_ref);
5809 // Bind all SEs to this schema
5810 tablevar_list_t fm(from);
5813 for(p=0;p<where.size();++p){
5814 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5817 for(s=0;s<select_list.size();++s){
5818 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5821 // Collect set of tuples referenced in this HFTA
5822 // input, internal, or output.
5826 void watch_join_qpn::bind_to_schema(table_list *Schema){
5827 // Bind the tablevars in the From clause to the Schema
5828 // (it might have changed from analysis time)
5830 for(f=0;f<from.size();++f){
5831 string snm = from[f]->get_schema_name();
5832 int tbl_ref = Schema->get_table_ref(snm);
5834 from[f]->set_schema_ref(tbl_ref);
5837 // Bind all SEs to this schema
5838 tablevar_list_t fm(from);
5841 for(p=0;p<where.size();++p){
5842 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5845 for(s=0;s<select_list.size();++s){
5846 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5849 // Collect set of tuples referenced in this HFTA
5850 // input, internal, or output.
5858 void sgah_qpn::bind_to_schema(table_list *Schema){
5859 // Bind the tablevars in the From clause to the Schema
5860 // (it might have changed from analysis time)
5863 int t = Schema->get_table_ref(table_name->get_schema_name() );
5865 table_name->set_schema_ref(t );
5867 // Get the "from" clause
5868 tablevar_list_t fm(table_name);
5872 // Bind all SEs to this schema
5874 for(p=0;p<where.size();++p){
5875 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5877 for(p=0;p<having.size();++p){
5878 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5881 for(s=0;s<select_list.size();++s){
5882 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5885 for(g=0;g<gb_tbl.size();++g){
5886 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5889 for(a=0;a<aggr_tbl.size();++a){
5890 if(aggr_tbl.is_builtin(a)){
5891 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5893 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5895 for(o=0;o<opl.size();++o){
5896 bind_to_schema_se(opl[o],&fm,Schema);
5902 col_id_set sgah_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5903 col_id_set retval, tmp_cset;
5905 for(p=0;p<where.size();++p){
5906 gather_pr_col_ids(where[p]->pr, tmp_cset, &gb_tbl);
5909 for(g=0;g<gb_tbl.size();++g){
5910 gather_se_col_ids(gb_tbl.get_def(g), tmp_cset, &gb_tbl);
5913 for(a=0;a<aggr_tbl.size();++a){
5914 if(aggr_tbl.is_builtin(a)){
5915 gather_se_col_ids(aggr_tbl.get_aggr_se(a), tmp_cset, &gb_tbl);
5917 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5919 for(o=0;o<opl.size();++o){
5920 gather_se_col_ids(opl[o], tmp_cset, &gb_tbl);
5925 col_id_set::iterator cisi;
5927 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5928 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5929 if(fe->get_unpack_fcns().size()>0)
5930 retval.insert((*cisi));
5939 void rsgah_qpn::bind_to_schema(table_list *Schema){
5940 // Bind the tablevars in the From clause to the Schema
5941 // (it might have changed from analysis time)
5942 int t = Schema->get_table_ref(table_name->get_schema_name() );
5944 table_name->set_schema_ref(t );
5946 // Get the "from" clause
5947 tablevar_list_t fm(table_name);
5949 // Bind all SEs to this schema
5951 for(p=0;p<where.size();++p){
5952 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5954 for(p=0;p<having.size();++p){
5955 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5957 for(p=0;p<closing_when.size();++p){
5958 bind_to_schema_pr(closing_when[p]->pr, &fm, Schema);
5961 for(s=0;s<select_list.size();++s){
5962 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5965 for(g=0;g<gb_tbl.size();++g){
5966 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5969 for(a=0;a<aggr_tbl.size();++a){
5970 if(aggr_tbl.is_builtin(a)){
5971 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5973 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5975 for(o=0;o<opl.size();++o){
5976 bind_to_schema_se(opl[o],&fm,Schema);
5983 void sgahcwcb_qpn::bind_to_schema(table_list *Schema){
5984 // Bind the tablevars in the From clause to the Schema
5985 // (it might have changed from analysis time)
5986 int t = Schema->get_table_ref(table_name->get_schema_name() );
5988 table_name->set_schema_ref(t );
5990 // Get the "from" clause
5991 tablevar_list_t fm(table_name);
5993 // Bind all SEs to this schema
5995 for(p=0;p<where.size();++p){
5996 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5998 for(p=0;p<having.size();++p){
5999 bind_to_schema_pr(having[p]->pr, &fm, Schema);
6001 for(p=0;p<having.size();++p){
6002 bind_to_schema_pr(cleanby[p]->pr, &fm, Schema);
6004 for(p=0;p<having.size();++p){
6005 bind_to_schema_pr(cleanwhen[p]->pr, &fm, Schema);
6008 for(s=0;s<select_list.size();++s){
6009 bind_to_schema_se(select_list[s]->se, &fm, Schema);
6012 for(g=0;g<gb_tbl.size();++g){
6013 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
6016 for(a=0;a<aggr_tbl.size();++a){
6017 if(aggr_tbl.is_builtin(a)){
6018 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
6020 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
6022 for(o=0;o<opl.size();++o){
6023 bind_to_schema_se(opl[o],&fm,Schema);
6034 ///////////////////////////////////////////////////////////////
6035 ///////////////////////////////////////////////////////////////
6036 /// Functions for code generation.
6039 //-----------------------------------------------------------------
6042 cplx_lit_table *watch_tbl_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6043 return(new cplx_lit_table());
6046 cplx_lit_table *mrg_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6047 return(new cplx_lit_table());
6050 cplx_lit_table *spx_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6052 cplx_lit_table *complex_literals = new cplx_lit_table();
6054 for(i=0;i<select_list.size();i++){
6055 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6057 for(i=0;i<where.size();++i){
6058 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6061 return(complex_literals);
6064 cplx_lit_table *sgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6066 cplx_lit_table *complex_literals = new cplx_lit_table();
6068 for(i=0;i<aggr_tbl.size();++i){
6069 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6070 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6072 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6073 for(j=0;j<opl.size();++j)
6074 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6078 for(i=0;i<select_list.size();i++){
6079 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6081 for(i=0;i<gb_tbl.size();i++){
6082 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6084 for(i=0;i<where.size();++i){
6085 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6087 for(i=0;i<having.size();++i){
6088 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6091 return(complex_literals);
6095 cplx_lit_table *rsgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6097 cplx_lit_table *complex_literals = new cplx_lit_table();
6099 for(i=0;i<aggr_tbl.size();++i){
6100 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6101 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6103 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6104 for(j=0;j<opl.size();++j)
6105 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6109 for(i=0;i<select_list.size();i++){
6110 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6112 for(i=0;i<gb_tbl.size();i++){
6113 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6115 for(i=0;i<where.size();++i){
6116 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6118 for(i=0;i<having.size();++i){
6119 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6121 for(i=0;i<closing_when.size();++i){
6122 find_complex_literal_pr(closing_when[i]->pr,Ext_fcns, complex_literals);
6125 return(complex_literals);
6129 cplx_lit_table *sgahcwcb_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6131 cplx_lit_table *complex_literals = new cplx_lit_table();
6133 for(i=0;i<aggr_tbl.size();++i){
6134 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6135 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6137 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6138 for(j=0;j<opl.size();++j)
6139 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6143 for(i=0;i<select_list.size();i++){
6144 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6146 for(i=0;i<gb_tbl.size();i++){
6147 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6149 for(i=0;i<where.size();++i){
6150 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6152 for(i=0;i<having.size();++i){
6153 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6155 for(i=0;i<cleanwhen.size();++i){
6156 find_complex_literal_pr(cleanwhen[i]->pr,Ext_fcns, complex_literals);
6158 for(i=0;i<cleanby.size();++i){
6159 find_complex_literal_pr(cleanby[i]->pr,Ext_fcns, complex_literals);
6162 return(complex_literals);
6165 cplx_lit_table *join_eq_hash_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6167 cplx_lit_table *complex_literals = new cplx_lit_table();
6169 for(i=0;i<select_list.size();i++){
6170 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6172 for(i=0;i<where.size();++i){
6173 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6176 return(complex_literals);
6179 cplx_lit_table *filter_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6181 cplx_lit_table *complex_literals = new cplx_lit_table();
6183 for(i=0;i<select_list.size();i++){
6184 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6186 for(i=0;i<where.size();++i){
6187 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6190 return(complex_literals);
6193 cplx_lit_table *watch_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6195 cplx_lit_table *complex_literals = new cplx_lit_table();
6197 for(i=0;i<select_list.size();i++){
6198 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6200 for(i=0;i<where.size();++i){
6201 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6204 return(complex_literals);
6211 //-----------------------------------------------------------------
6212 // get_handle_param_tbl
6214 vector<handle_param_tbl_entry *> watch_tbl_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6215 vector<handle_param_tbl_entry *> retval;
6219 vector<handle_param_tbl_entry *> mrg_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6220 vector<handle_param_tbl_entry *> retval;
6225 vector<handle_param_tbl_entry *> spx_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6227 vector<handle_param_tbl_entry *> retval;
6229 for(i=0;i<select_list.size();i++){
6230 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6232 for(i=0;i<where.size();++i){
6233 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6240 vector<handle_param_tbl_entry *> sgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6242 vector<handle_param_tbl_entry *> retval;
6245 for(i=0;i<aggr_tbl.size();++i){
6246 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6247 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6249 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6250 for(j=0;j<opl.size();++j)
6251 find_param_handles_se(opl[j], Ext_fcns, retval);
6254 for(i=0;i<select_list.size();i++){
6255 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6257 for(i=0;i<gb_tbl.size();i++){
6258 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6260 for(i=0;i<where.size();++i){
6261 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6263 for(i=0;i<having.size();++i){
6264 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6271 vector<handle_param_tbl_entry *> rsgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6273 vector<handle_param_tbl_entry *> retval;
6276 for(i=0;i<aggr_tbl.size();++i){
6277 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6278 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6280 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6281 for(j=0;j<opl.size();++j)
6282 find_param_handles_se(opl[j], Ext_fcns, retval);
6285 for(i=0;i<select_list.size();i++){
6286 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6288 for(i=0;i<gb_tbl.size();i++){
6289 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6291 for(i=0;i<where.size();++i){
6292 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6294 for(i=0;i<having.size();++i){
6295 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6297 for(i=0;i<closing_when.size();++i){
6298 find_param_handles_pr(closing_when[i]->pr,Ext_fcns, retval);
6305 vector<handle_param_tbl_entry *> sgahcwcb_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6307 vector<handle_param_tbl_entry *> retval;
6310 for(i=0;i<aggr_tbl.size();++i){
6311 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6312 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6314 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6315 for(j=0;j<opl.size();++j)
6316 find_param_handles_se(opl[j], Ext_fcns, retval);
6319 for(i=0;i<select_list.size();i++){
6320 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6322 for(i=0;i<gb_tbl.size();i++){
6323 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6325 for(i=0;i<where.size();++i){
6326 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6328 for(i=0;i<having.size();++i){
6329 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6331 for(i=0;i<cleanwhen.size();++i){
6332 find_param_handles_pr(cleanwhen[i]->pr,Ext_fcns, retval);
6334 for(i=0;i<cleanby.size();++i){
6335 find_param_handles_pr(cleanby[i]->pr,Ext_fcns, retval);
6341 vector<handle_param_tbl_entry *> join_eq_hash_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6343 vector<handle_param_tbl_entry *> retval;
6345 for(i=0;i<select_list.size();i++){
6346 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6348 for(i=0;i<where.size();++i){
6349 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6356 vector<handle_param_tbl_entry *> filter_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6358 vector<handle_param_tbl_entry *> retval;
6360 for(i=0;i<select_list.size();i++){
6361 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6363 for(i=0;i<where.size();++i){
6364 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6370 vector<handle_param_tbl_entry *> watch_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6372 vector<handle_param_tbl_entry *> retval;
6374 for(i=0;i<select_list.size();i++){
6375 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6377 for(i=0;i<where.size();++i){
6378 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6386 ///////////////////////////////////////////////////////////////
6387 ///////////////////////////////////////////////////////////////
6388 /// Functions for operator output rates estimations
6391 //-----------------------------------------------------------------
6392 // get_rate_estimate
6394 double spx_qpn::get_rate_estimate() {
6396 // dummy method for now
6397 return SPX_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6400 double sgah_qpn::get_rate_estimate() {
6402 // dummy method for now
6403 return SGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6406 double rsgah_qpn::get_rate_estimate() {
6408 // dummy method for now
6409 return RSGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6412 double sgahcwcb_qpn::get_rate_estimate() {
6414 // dummy method for now
6415 return SGAHCWCB_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6418 double watch_tbl_qpn::get_rate_estimate() {
6420 // dummy method for now
6421 return DEFAULT_INTERFACE_RATE;
6424 double mrg_qpn::get_rate_estimate() {
6426 // dummy method for now
6427 return MRG_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6430 double join_eq_hash_qpn::get_rate_estimate() {
6432 // dummy method for now
6433 return JOIN_EQ_HASH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6437 //////////////////////////////////////////////////////////////////////////////
6438 //////////////////////////////////////////////////////////////////////////////
6439 ///// Generate functors
6444 //-------------------------------------------------------------------------
6445 // Code generation utilities.
6446 //-------------------------------------------------------------------------
6448 // Globals referenced by generate utilities
6450 static gb_table *segen_gb_tbl; // Table of all group-by attributes.
6454 // Generate code that makes reference
6455 // to the tuple, and not to any aggregates.
6456 // NEW : it might reference a stateful function.
6457 static string generate_se_code(scalarexp_t *se,table_list *schema){
6459 data_type *ldt, *rdt;
6461 vector<scalarexp_t *> operands;
6464 switch(se->get_operator_type()){
6466 if(se->is_handle_ref()){
6467 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6471 if(se->get_literal()->is_cpx_lit()){
6472 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6476 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6478 if(se->is_handle_ref()){
6479 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6483 ret.append("param_");
6484 ret.append(se->get_param_name());
6487 ldt = se->get_left_se()->get_data_type();
6488 if(ldt->complex_operator(se->get_op()) ){
6489 ret.append( ldt->get_complex_operator(se->get_op()) );
6491 ret.append(generate_se_code(se->get_left_se(),schema));
6495 ret.append(se->get_op());
6496 ret.append(generate_se_code(se->get_left_se(),schema));
6501 ldt = se->get_left_se()->get_data_type();
6502 rdt = se->get_right_se()->get_data_type();
6504 if(ldt->complex_operator(rdt, se->get_op()) ){
6505 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6507 ret.append(generate_se_code(se->get_left_se(),schema));
6509 ret.append(generate_se_code(se->get_right_se(),schema));
6513 ret.append(generate_se_code(se->get_left_se(),schema));
6514 ret.append(se->get_op());
6515 ret.append(generate_se_code(se->get_right_se(),schema));
6520 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6521 // so return the defining code.
6522 int gref = se->get_gb_ref();
6523 scalarexp_t *gdef_se = segen_gb_tbl->get_def(gref);
6524 ret = generate_se_code(gdef_se, schema );
6527 sprintf(tmpstr,"unpack_var_%s_%d",
6528 se->get_colref()->get_field().c_str(), se->get_colref()->get_tablevar_ref() );
6533 if(se->is_partial()){
6534 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6537 ret += se->op + "(";
6538 operands = se->get_operands();
6539 bool first_elem = true;
6540 if(se->get_storage_state() != ""){
6541 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6544 for(o=0;o<operands.size();o++){
6545 if(first_elem) first_elem=false; else ret += ", ";
6546 if(operands[o]->get_data_type()->is_buffer_type() &&
6547 (! (operands[o]->is_handle_ref()) ) )
6549 ret += generate_se_code(operands[o], schema);
6555 fprintf(stderr,"INTERNAL ERROR in generate_se_code (hfta), line %d, character %d: unknown operator type %d\n",
6556 se->get_lineno(), se->get_charno(),se->get_operator_type());
6557 return("ERROR in generate_se_code");
6561 // generate code that refers only to aggregate data and constants.
6562 // NEW : modified to handle superaggregates and stateful fcn refs.
6563 // Assume that the state is in *stval
6564 static string generate_se_code_fm_aggr(scalarexp_t *se, string gbvar, string aggvar, table_list *schema){
6567 data_type *ldt, *rdt;
6569 vector<scalarexp_t *> operands;
6572 switch(se->get_operator_type()){
6574 if(se->is_handle_ref()){
6575 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6579 if(se->get_literal()->is_cpx_lit()){
6580 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6584 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6586 if(se->is_handle_ref()){
6587 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6591 ret.append("param_");
6592 ret.append(se->get_param_name());
6595 ldt = se->get_left_se()->get_data_type();
6596 if(ldt->complex_operator(se->get_op()) ){
6597 ret.append( ldt->get_complex_operator(se->get_op()) );
6599 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6603 ret.append(se->get_op());
6604 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6609 ldt = se->get_left_se()->get_data_type();
6610 rdt = se->get_right_se()->get_data_type();
6612 if(ldt->complex_operator(rdt, se->get_op()) ){
6613 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6615 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6617 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6621 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6622 ret.append(se->get_op());
6623 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6628 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6629 // so return the defining code.
6630 sprintf(tmpstr,"%s%d",gbvar.c_str(),se->get_gb_ref());
6634 fprintf(stderr,"ERROR reference to non-GB column ref not permitted here,"
6635 "error in query_plan.cc:generate_se_code_fm_aggr, line %d, character %d.\n",
6636 se->get_lineno(), se->get_charno());
6642 if(se->is_superaggr()){
6643 sprintf(tmpstr,"stval->aggr_var%d",se->get_aggr_ref());
6645 sprintf(tmpstr,"%saggr_var%d",aggvar.c_str(),se->get_aggr_ref());
6651 if(se->get_aggr_ref() >= 0){
6652 sprintf(tmpstr,"udaf_ret_%d",se->get_aggr_ref());
6657 if(se->is_partial()){
6658 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6661 ret += se->op + "(";
6662 bool first_elem = true;
6663 if(se->get_storage_state() != ""){
6664 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6667 operands = se->get_operands();
6668 for(o=0;o<operands.size();o++){
6669 if(first_elem) first_elem=false; else ret += ", ";
6670 if(operands[o]->get_data_type()->is_buffer_type() &&
6671 (! (operands[o]->is_handle_ref()) ) )
6673 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6679 fprintf(stderr,"INTERNAL ERROR in query_plan.cc::generate_se_code_fm_aggr, line %d, character %d: unknown operator type %d\n",
6680 se->get_lineno(), se->get_charno(),se->get_operator_type());
6681 return("ERROR in generate_se_code_fm_aggr");
6687 static string unpack_partial_fcn_fm_aggr(scalarexp_t *se, int pfn_id, string gbvar, string aggvar, table_list *schema){
6690 vector<scalarexp_t *> operands;
6693 if(se->get_operator_type() != SE_FUNC){
6694 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn_fm_aggr. line %d, character %d\n",
6695 se->get_lineno(), se->get_charno());
6696 return("ERROR in unpack_partial_fcn_fm_aggr");
6699 ret = "\tretval = " + se->get_op() + "( ",
6700 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6703 if(se->get_storage_state() != ""){
6704 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6707 operands = se->get_operands();
6708 for(o=0;o<operands.size();o++){
6710 if(operands[o]->get_data_type()->is_buffer_type() &&
6711 (! (operands[o]->is_handle_ref()) ) )
6713 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6721 static string unpack_partial_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6724 vector<scalarexp_t *> operands;
6726 if(se->get_operator_type() != SE_FUNC){
6727 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn. line %d, character %d\n",
6728 se->get_lineno(), se->get_charno());
6729 return("ERROR in unpack_partial_fcn");
6732 ret = "\tretval = " + se->get_op() + "( ",
6733 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6736 if(se->get_storage_state() != ""){
6737 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6740 operands = se->get_operands();
6741 for(o=0;o<operands.size();o++){
6743 if(operands[o]->get_data_type()->is_buffer_type() &&
6744 (! (operands[o]->is_handle_ref()) ) )
6746 ret += generate_se_code(operands[o], schema);
6753 static string generate_cached_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6756 vector<scalarexp_t *> operands;
6758 if(se->get_operator_type() != SE_FUNC){
6759 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to generate_cached_fcn. line %d, character %d\n",
6760 se->get_lineno(), se->get_charno());
6761 return("ERROR in generate_cached_fcn");
6764 ret = se->get_op()+"(";
6766 if(se->get_storage_state() != ""){
6767 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd,";
6770 operands = se->get_operands();
6771 for(o=0;o<operands.size();o++){
6773 if(operands[o]->get_data_type()->is_buffer_type() &&
6774 (! (operands[o]->is_handle_ref()) ) )
6776 ret += generate_se_code(operands[o], schema);
6787 static string generate_C_comparison_op(string op){
6788 if(op == "=") return("==");
6789 if(op == "<>") return("!=");
6793 static string generate_C_boolean_op(string op){
6794 if( (op == "AND") || (op == "And") || (op == "and") ){
6797 if( (op == "OR") || (op == "Or") || (op == "or") ){
6800 if( (op == "NOT") || (op == "Not") || (op == "not") ){
6804 return("ERROR UNKNOWN BOOLEAN OPERATOR");
6808 static string generate_predicate_code(predicate_t *pr,table_list *schema){
6810 vector<literal_t *> litv;
6812 data_type *ldt, *rdt;
6813 vector<scalarexp_t *> op_list;
6816 switch(pr->get_operator_type()){
6818 ldt = pr->get_left_se()->get_data_type();
6821 litv = pr->get_lit_vec();
6822 for(i=0;i<litv.size();i++){
6823 if(i>0) ret.append(" || ");
6826 if(ldt->complex_comparison(ldt) ){
6827 ret.append( ldt->get_hfta_equals_fcn(ldt) );
6829 if(ldt->is_buffer_type() )
6831 ret.append(generate_se_code(pr->get_left_se(), schema));
6833 if(ldt->is_buffer_type() )
6835 if(litv[i]->is_cpx_lit()){
6836 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6839 ret.append(litv[i]->to_C_code(""));
6841 ret.append(") == 0");
6843 ret.append(generate_se_code(pr->get_left_se(), schema));
6845 ret.append(litv[i]->to_hfta_C_code(""));
6854 ldt = pr->get_left_se()->get_data_type();
6855 rdt = pr->get_right_se()->get_data_type();
6858 if(ldt->complex_comparison(rdt) ){
6859 // TODO can use get_hfta_equals_fcn if op is "=" ?
6860 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6862 if(ldt->is_buffer_type() )
6864 ret.append(generate_se_code(pr->get_left_se(),schema) );
6866 if(rdt->is_buffer_type() )
6868 ret.append(generate_se_code(pr->get_right_se(),schema) );
6870 ret.append( generate_C_comparison_op(pr->get_op()));
6873 ret.append(generate_se_code(pr->get_left_se(),schema) );
6874 ret.append( generate_C_comparison_op(pr->get_op()));
6875 ret.append(generate_se_code(pr->get_right_se(),schema) );
6881 ret.append( generate_C_boolean_op(pr->get_op()) );
6882 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6885 case PRED_BINARY_OP:
6887 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6888 ret.append( generate_C_boolean_op(pr->get_op()) );
6889 ret.append(generate_predicate_code(pr->get_right_pr(),schema) );
6893 ret += pr->get_op() + "( ";
6894 op_list = pr->get_op_list();
6895 for(o=0;o<op_list.size();++o){
6896 if(o>0) ret += ", ";
6897 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6899 ret += generate_se_code(op_list[o], schema);
6904 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6905 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6906 return("ERROR in generate_predicate_code");
6910 static string generate_predicate_code_fm_aggr(predicate_t *pr, string gbvar, string aggvar,table_list *schema){
6912 vector<literal_t *> litv;
6914 data_type *ldt, *rdt;
6915 vector<scalarexp_t *> op_list;
6918 switch(pr->get_operator_type()){
6920 ldt = pr->get_left_se()->get_data_type();
6923 litv = pr->get_lit_vec();
6924 for(i=0;i<litv.size();i++){
6925 if(i>0) ret.append(" || ");
6928 if(ldt->complex_comparison(ldt) ){
6929 ret.append( ldt->get_hfta_equals_fcn(ldt) );
6931 if(ldt->is_buffer_type() )
6933 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6935 if(ldt->is_buffer_type() )
6937 if(litv[i]->is_cpx_lit()){
6938 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6941 ret.append(litv[i]->to_C_code(""));
6943 ret.append(") == 0");
6945 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6947 ret.append(litv[i]->to_hfta_C_code(""));
6956 ldt = pr->get_left_se()->get_data_type();
6957 rdt = pr->get_right_se()->get_data_type();
6960 if(ldt->complex_comparison(rdt) ){
6961 // TODO can use get_hfta_equals_fcn if op is "=" ?
6962 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6964 if(ldt->is_buffer_type() )
6966 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6968 if(rdt->is_buffer_type() )
6970 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6972 ret.append( generate_C_comparison_op(pr->get_op()));
6975 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6976 ret.append( generate_C_comparison_op(pr->get_op()));
6977 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6983 ret.append( generate_C_boolean_op(pr->get_op()) );
6984 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6987 case PRED_BINARY_OP:
6989 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6990 ret.append( generate_C_boolean_op(pr->get_op()) );
6991 ret.append(generate_predicate_code_fm_aggr(pr->get_right_pr(), gbvar, aggvar,schema) );
6995 ret += pr->get_op() + "( ";
6996 op_list = pr->get_op_list();
6997 for(o=0;o<op_list.size();++o){
6998 if(o>0) ret += ", ";
6999 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
7001 ret += generate_se_code_fm_aggr(op_list[o], gbvar, aggvar, schema);
7006 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
7007 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
7008 return("ERROR in generate_predicate_code");
7016 static string generate_equality_test(string &lhs_op, string &rhs_op, data_type *dt){
7019 if(dt->complex_comparison(dt) ){
7020 ret.append(dt->get_hfta_equals_fcn(dt));
7022 if(dt->is_buffer_type() )
7026 if(dt->is_buffer_type() )
7028 ret.append(rhs_op );
7029 ret.append(") == 0");
7031 ret.append(lhs_op );
7033 ret.append(rhs_op );
7039 static string generate_lt_test(string &lhs_op, string &rhs_op, data_type *dt){
7042 if(dt->complex_comparison(dt) ){
7043 ret.append(dt->get_hfta_comparison_fcn(dt));
7045 if(dt->is_buffer_type() )
7049 if(dt->is_buffer_type() )
7051 ret.append(rhs_op );
7052 ret.append(") == 1");
7054 ret.append(lhs_op );
7056 ret.append(rhs_op );
7062 //static string generate_comparison(string &lhs_op, string &rhs_op, data_type *dt){
7065 // if(dt->complex_comparison(dt) ){
7066 // ret.append(dt->get_hfta_equals_fcn(dt));
7068 // if(dt->is_buffer_type() )
7070 // ret.append(lhs_op);
7071 // ret.append(", ");
7072 // if(dt->is_buffer_type() )
7074 // ret.append(rhs_op );
7075 // ret.append(") == 0");
7077 // ret.append(lhs_op );
7078 // ret.append(" == ");
7079 // ret.append(rhs_op );
7086 // Here I assume that only MIN and MAX aggregates can be computed
7087 // over BUFFER data types.
7089 static string generate_aggr_update(string var, aggregate_table *atbl,int aidx, table_list *schema){
7090 string retval = "\t\t";
7091 string op = atbl->get_op(aidx);
7094 if(! atbl->is_builtin(aidx)) {
7096 retval += op+"_HFTA_AGGR_UPDATE_(";
7097 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7098 retval+="("+var+")";
7099 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7100 for(o=0;o<opl.size();++o){{
7102 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7104 retval += generate_se_code(opl[o], schema);
7113 // builtin processing
7114 data_type *dt = atbl->get_data_type(aidx);
7118 retval.append("++;\n");
7123 retval.append(" += ");
7124 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7125 retval.append(";\n");
7129 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7130 retval += dt->make_host_cvar(tmpstr);
7132 retval += generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7133 if(dt->complex_comparison(dt)){
7134 if(dt->is_buffer_type())
7135 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7137 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7139 sprintf(tmpstr,"\t\tif(aggr_tmp_%d < %s)\n",aidx,var.c_str());
7141 retval.append(tmpstr);
7142 if(dt->is_buffer_type()){
7143 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7145 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7147 retval.append(tmpstr);
7152 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7153 retval+=dt->make_host_cvar(tmpstr);
7155 retval+=generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7156 if(dt->complex_comparison(dt)){
7157 if(dt->is_buffer_type())
7158 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7160 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7162 sprintf(tmpstr,"\t\tif(aggr_tmp_%d > %s)\n",aidx,var.c_str());
7164 retval.append(tmpstr);
7165 if(dt->is_buffer_type()){
7166 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7168 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7170 retval.append(tmpstr);
7175 if(op == "AND_AGGR"){
7177 retval.append(" &= ");
7178 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7179 retval.append(";\n");
7182 if(op == "OR_AGGR"){
7184 retval.append(" |= ");
7185 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7186 retval.append(";\n");
7189 if(op == "XOR_AGGR"){
7191 retval.append(" ^= ");
7192 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7193 retval.append(";\n");
7197 retval += var+"_sum += "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7198 retval += "\t\t"+var+"_cnt += 1;\n";
7199 retval += "\t\t"+var+" = "+var+"_sum / "+var+"_cnt;\n";
7203 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_update.\n",op.c_str());
7212 static string generate_superaggr_minus(string var, string supervar, aggregate_table *atbl,int aidx, table_list *schema){
7213 string retval = "\t\t";
7214 string op = atbl->get_op(aidx);
7217 if(! atbl->is_builtin(aidx)) {
7219 retval += op+"_HFTA_AGGR_MINUS_(";
7220 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7221 retval+="("+supervar+"),";
7222 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7223 retval+="("+var+");\n";
7229 if(op == "COUNT" || op == "SUM"){
7230 retval += supervar + "-=" +var + ";\n";
7234 if(op == "XOR_AGGR"){
7235 retval += supervar + "^=" +var + ";\n";
7239 if(op=="MIN" || op == "MAX")
7242 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_superaggr_minus.\n",op.c_str());
7251 static string generate_aggr_init(string var, aggregate_table *atbl,int aidx, table_list *schema){
7253 string op = atbl->get_op(aidx);
7256 if(! atbl->is_builtin(aidx)){
7258 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_INIT_(";
7259 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7260 retval+="("+var+"));\n";
7262 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_UPDATE_(";
7263 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7264 retval+="("+var+")";
7265 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7267 for(o=0;o<opl.size();++o){
7269 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7271 retval += generate_se_code(opl[o],schema);
7277 // builtin aggregate processing
7278 data_type *dt = atbl->get_data_type(aidx);
7282 retval.append(" = 1;\n");
7286 if(op == "SUM" || op == "MIN" || op == "MAX" || op == "AND_AGGR" ||
7287 op=="AVG" || op == "OR_AGGR" || op == "XOR_AGGR"){
7288 if(dt->is_buffer_type()){
7289 sprintf(tmpstr,"\t\taggr_tmp_%d = %s;\n",aidx,generate_se_code(atbl->get_aggr_se(aidx), schema ).c_str() );
7290 retval.append(tmpstr);
7291 sprintf(tmpstr,"\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_assign_copy().c_str(),var.c_str(),aidx);
7292 retval.append(tmpstr);
7295 retval += var+"_sum = "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7296 retval += "\t"+var+"_cnt = 1;\n";
7297 retval += "\t"+var+" = "+var+"_sum;\n";
7301 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema));
7302 retval.append(";\n");
7308 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_init.\n",op.c_str());
7316 static string generate_aggr_reinitialize(string var, aggregate_table *atbl,int aidx, table_list *schema){
7318 string op = atbl->get_op(aidx);
7321 if(! atbl->is_builtin(aidx)){
7323 retval += "\t"+atbl->get_op(aidx);
7324 if(atbl->is_running_aggr(aidx)){
7325 retval += "_HFTA_AGGR_REINIT_(";
7327 retval += "_HFTA_AGGR_INIT_(";
7329 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7330 retval+="("+var+"));\n";
7334 // builtin aggregate processing
7335 data_type *dt = atbl->get_data_type(aidx);
7339 retval.append(" = 0;\n");
7343 if(op == "SUM" || op == "AND_AGGR" ||
7344 op == "OR_AGGR" || op == "XOR_AGGR"){
7345 if(dt->is_buffer_type()){
7346 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7350 literal_t l(dt->type_indicator());
7351 retval.append(l.to_string());
7352 retval.append(";\n");
7358 if(dt->is_buffer_type()){
7359 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7363 retval.append(dt->get_max_literal());
7364 retval.append(";\n");
7370 if(dt->is_buffer_type()){
7371 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7375 retval.append(dt->get_min_literal());
7376 retval.append(";\n");
7381 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_aggr_reinitialize.\n",op.c_str());
7388 // Generate parameter holding vars from a param table.
7389 static string generate_param_vars(param_table *param_tbl){
7392 vector<string> param_vec = param_tbl->get_param_names();
7393 for(p=0;p<param_vec.size();p++){
7394 data_type *dt = param_tbl->get_data_type(param_vec[p]);
7395 sprintf(tmpstr,"param_%s;\n", param_vec[p].c_str());
7396 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7397 if(param_tbl->handle_access(param_vec[p])){
7398 ret += "\tstruct search_handle *param_handle_"+param_vec[p]+";\n";
7404 // Parameter manipulation routines
7405 static string generate_load_param_block(string functor_name,
7406 param_table *param_tbl,
7407 vector<handle_param_tbl_entry *> param_handle_table
7410 vector<string> param_names = param_tbl->get_param_names();
7412 string ret = "int load_params_"+functor_name+"(gs_int32_t sz, void *value){\n";
7413 ret.append("\tint pos=0;\n");
7414 ret.append("\tint data_pos;\n");
7416 for(p=0;p<param_names.size();p++){
7417 data_type *dt = param_tbl->get_data_type(param_names[p]);
7418 if(dt->is_buffer_type()){
7419 sprintf(tmpstr,"tmp_var_%s;\n", param_names[p].c_str());
7420 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7425 // Verify that the block is of minimum size
7426 if(param_names.size() > 0){
7427 ret += "//\tVerify that the value block is large enough */\n";
7428 ret.append("\n\tdata_pos = ");
7429 for(p=0;p<param_names.size();p++){
7430 if(p>0) ret.append(" + ");
7431 data_type *dt = param_tbl->get_data_type(param_names[p]);
7432 ret.append("sizeof( ");
7433 ret.append( dt->get_host_cvar_type() );
7437 ret.append("\tif(data_pos > sz) return 1;\n\n");
7440 ///////////////////////
7441 /// Verify that all strings can be unpacked.
7443 ret += "//\tVerify that the strings can be unpacked */\n";
7444 for(p=0;p<param_names.size();p++){
7445 data_type *dt = param_tbl->get_data_type(param_names[p]);
7446 if(dt->is_buffer_type()){
7447 sprintf(tmpstr,"\ttmp_var_%s = *( (%s *)((gs_sp_t )value+pos) );\n",param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7449 switch( dt->get_type() ){
7451 // ret += "\ttmp_var_"+param_names[p]+".offset = ntohl( tmp_var_"+param_names[p]+".offset );\n"; // ntoh conversion
7452 // ret += "\ttmp_var_"+param_names[p]+".length = ntohl( tmp_var_"+param_names[p]+".length );\n"; // ntoh conversion
7453 sprintf(tmpstr,"\tif( (int)(tmp_var_%s.offset) + tmp_var_%s.length > sz) return 1;\n",param_names[p].c_str(), param_names[p].c_str() );
7455 sprintf(tmpstr,"\ttmp_var_%s.offset = (gs_p_t)( (gs_sp_t )value + (gs_p_t)(tmp_var_%s.offset) );\n",param_names[p].c_str(), param_names[p].c_str() );
7459 fprintf(stderr,"ERROR: parameter %s is of type %s, a buffered type, but I don't know how to unpack it as a parameter.\n",param_names[p].c_str(), dt->to_string().c_str() );
7464 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7468 /////////////////////////
7470 ret += "/*\tThe block is OK, do the unpacking. */\n";
7471 ret += "\tpos = 0;\n";
7473 for(p=0;p<param_names.size();p++){
7474 data_type *dt = param_tbl->get_data_type(param_names[p]);
7475 if(dt->is_buffer_type()){
7476 sprintf(tmpstr,"\t%s(¶m_%s, &tmp_var_%s);\n", dt->get_hfta_buffer_assign_copy().c_str(),param_names[p].c_str(),param_names[p].c_str() );
7479 // if(dt->needs_hn_translation()){
7480 // sprintf(tmpstr,"\tparam_%s = %s( *( (%s *)( (gs_sp_t )value+pos) ) );\n",
7481 // param_names[p].c_str(), dt->ntoh_translation().c_str(), dt->get_host_cvar_type().c_str() );
7483 sprintf(tmpstr,"\tparam_%s = *( (%s *)( (gs_sp_t )value+pos) );\n",
7484 param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7488 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7491 // TODO: I think this method of handle registration is obsolete
7492 // and should be deleted.
7493 // some examination reveals that handle_access is always false.
7494 for(p=0;p<param_names.size();p++){
7495 if(param_tbl->handle_access(param_names[p]) ){
7496 data_type *pdt = param_tbl->get_data_type(param_names[p]);
7498 ret += "\tt->param_handle_"+param_names[p]+" = " +
7499 pdt->handle_registration_name() +
7500 "((struct FTA *)t, &(t->param_"+param_names[p]+"));\n";
7503 // Register the pass-by-handle parameters
7505 ret += "/* register the pass-by-handle parameters */\n";
7508 for(ph=0;ph<param_handle_table.size();++ph){
7509 data_type pdt(param_handle_table[ph]->type_name);
7510 switch(param_handle_table[ph]->val_type){
7516 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7518 if(pdt.is_buffer_type()) ret += "&(";
7519 ret += "param_"+param_handle_table[ph]->param_name;
7520 if(pdt.is_buffer_type()) ret += ")";
7524 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7530 ret += "\treturn(0);\n";
7531 ret.append("}\n\n");
7537 static string generate_delete_param_block(string functor_name,
7538 param_table *param_tbl,
7539 vector<handle_param_tbl_entry *> param_handle_table
7543 vector<string> param_names = param_tbl->get_param_names();
7545 string ret = "void destroy_params_"+functor_name+"(){\n";
7547 for(p=0;p<param_names.size();p++){
7548 data_type *dt = param_tbl->get_data_type(param_names[p]);
7549 if(dt->is_buffer_type()){
7550 sprintf(tmpstr,"\t\t%s(¶m_%s);\n",dt->get_hfta_buffer_destroy().c_str(),param_names[p].c_str());
7553 if(param_tbl->handle_access(param_names[p]) ){
7554 ret += "\t\t" + dt->get_handle_destructor() +
7555 "(t->param_handle_" + param_names[p] + ");\n";
7559 ret += "//\t\tDeregister handles.\n";
7561 for(ph=0;ph<param_handle_table.size();++ph){
7562 if(param_handle_table[ph]->val_type == param_e){
7563 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7564 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7573 // ---------------------------------------------------------------------
7574 // functions for creating functor variables.
7576 static string generate_access_vars(col_id_set &cid_set, table_list *schema){
7578 col_id_set::iterator csi;
7580 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7581 int schref = (*csi).schema_ref;
7582 int tblref = (*csi).tblvar_ref;
7583 string field = (*csi).field;
7584 data_type dt(schema->get_type_name(schref,field));
7585 sprintf(tmpstr,"unpack_var_%s_%d", field.c_str(), tblref);
7586 ret+="\t"+dt.make_host_cvar(tmpstr)+";\n";
7587 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", field.c_str(), tblref);
7593 static string generate_partial_fcn_vars(vector<scalarexp_t *> &partial_fcns,
7594 vector<int> &ref_cnt, vector<bool> &is_partial, bool gen_fcn_cache){
7599 for(p=0;p<partial_fcns.size();++p){
7600 if(!gen_fcn_cache || is_partial[p] || ref_cnt[p]>1){
7601 sprintf(tmpstr,"partial_fcn_result_%d", p);
7602 ret+="\t"+partial_fcns[p]->get_data_type()->make_host_cvar(tmpstr)+";\n";
7603 if(gen_fcn_cache && ref_cnt[p]>1){
7604 ret+="\tint fcn_ref_cnt_"+int_to_string(p)+";\n";
7612 static string generate_complex_lit_vars(cplx_lit_table *complex_literals){
7615 for(cl=0;cl<complex_literals->size();cl++){
7616 literal_t *l = complex_literals->get_literal(cl);
7617 data_type *dtl = new data_type( l->get_type() );
7618 sprintf(tmpstr,"complex_literal_%d",cl);
7619 ret += "\t"+dtl->make_host_cvar(tmpstr)+";\n";
7620 if(complex_literals->is_handle_ref(cl)){
7621 sprintf(tmpstr,"\tstruct search_handle *lit_handle_%d;\n",cl);
7629 static string generate_pass_by_handle_vars(
7630 vector<handle_param_tbl_entry *> ¶m_handle_table){
7634 for(p=0;p<param_handle_table.size();++p){
7635 sprintf(tmpstr,"\tgs_param_handle_t handle_param_%d;\n",p);
7643 // ------------------------------------------------------------
7644 // functions for generating initialization code.
7646 static string gen_access_var_init(col_id_set &cid_set){
7648 col_id_set::iterator csi;
7650 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7651 int tblref = (*csi).tblvar_ref;
7652 string field = (*csi).field;
7653 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle%d, \"%s\");\n", field.c_str(),tblref,tblref,field.c_str());
7660 static string gen_complex_lit_init(cplx_lit_table *complex_literals){
7664 for(cl=0;cl<complex_literals->size();cl++){
7665 literal_t *l = complex_literals->get_literal(cl);
7666 // sprintf(tmpstr,"\tcomplex_literal_%d = ",cl);
7667 // ret += tmpstr + l->to_hfta_C_code() + ";\n";
7668 sprintf(tmpstr,"&(complex_literal_%d)",cl);
7669 ret += "\t" + l->to_hfta_C_code(tmpstr) + ";\n";
7670 // I think that the code below is obsolete
7671 // TODO: it is obsolete. add_cpx_lit is always
7672 // called with the handle indicator being false.
7673 // This entire structure should be cleansed.
7674 if(complex_literals->is_handle_ref(cl)){
7675 data_type *dt = new data_type( l->get_type() );
7676 sprintf(tmpstr,"\tlit_handle_%d = %s(&(f->complex_literal_%d));\n",
7677 cl, dt->hfta_handle_registration_name().c_str(), cl);
7686 static string gen_partial_fcn_init(vector<scalarexp_t *> &partial_fcns){
7690 for(p=0;p<partial_fcns.size();++p){
7691 data_type *pdt =partial_fcns[p]->get_data_type();
7692 literal_t empty_lit(pdt->type_indicator());
7693 if(pdt->is_buffer_type()){
7694 // sprintf(tmpstr,"\tpartial_fcn_result_%d = %s;\n",
7695 // p, empty_lit.to_hfta_C_code().c_str());
7696 sprintf(tmpstr,"&(partial_fcn_result_%d)",p);
7697 ret += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
7703 static string gen_pass_by_handle_init(
7704 vector<handle_param_tbl_entry *> ¶m_handle_table){
7708 for(ph=0;ph<param_handle_table.size();++ph){
7709 data_type pdt(param_handle_table[ph]->type_name);
7710 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7711 switch(param_handle_table[ph]->val_type){
7714 if(pdt.is_buffer_type()) ret += "&(";
7715 sprintf(tmpstr,"complex_literal_%d",param_handle_table[ph]->complex_literal_idx);
7717 if(pdt.is_buffer_type()) ret += ")";
7722 ret += param_handle_table[ph]->litval->to_hfta_C_code("") + ");\n";
7726 // query parameter handles are regstered/deregistered in the
7727 // load_params function.
7728 // ret += "t->param_"+param_handle_table[ph]->param_name;
7731 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7738 //------------------------------------------------------------
7739 // functions for destructor and deregistration code
7741 static string gen_complex_lit_dtr(cplx_lit_table *complex_literals){
7745 for(cl=0;cl<complex_literals->size();cl++){
7746 literal_t *l = complex_literals->get_literal(cl);
7747 data_type ldt( l->get_type() );
7748 if(ldt.is_buffer_type()){
7749 sprintf(tmpstr,"\t\t%s(&complex_literal_%d);\n",
7750 ldt.get_hfta_buffer_destroy().c_str(), cl );
7758 static string gen_pass_by_handle_dtr(
7759 vector<handle_param_tbl_entry *> ¶m_handle_table){
7763 for(ph=0;ph<param_handle_table.size();++ph){
7764 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7765 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7771 // Destroy all previous results
7772 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns){
7776 for(p=0;p<partial_fcns.size();++p){
7777 data_type *pdt =partial_fcns[p]->get_data_type();
7778 if(pdt->is_buffer_type()){
7779 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7780 pdt->get_hfta_buffer_destroy().c_str(), p );
7787 // Destroy previsou results of fcns in pfcn_set
7788 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns, set<int> &pfcn_set){
7790 set<int>::iterator si;
7792 for(si=pfcn_set.begin(); si!=pfcn_set.end(); ++si){
7793 data_type *pdt =partial_fcns[(*si)]->get_data_type();
7794 if(pdt->is_buffer_type()){
7795 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7796 pdt->get_hfta_buffer_destroy().c_str(), (*si) );
7804 //-------------------------------------------------------------------------
7805 // Functions related to se generation bookkeeping.
7807 static void get_new_pred_cids(predicate_t *pr, col_id_set &found_cids,
7808 col_id_set &new_cids, gb_table *gtbl){
7809 col_id_set this_pred_cids;
7810 col_id_set::iterator csi;
7812 // get colrefs in predicate not already found.
7813 gather_pr_col_ids(pr,this_pred_cids,gtbl);
7814 set_difference(this_pred_cids.begin(), this_pred_cids.end(),
7815 found_cids.begin(), found_cids.end(),
7816 inserter(new_cids,new_cids.begin()) );
7818 // We've found these cids, so update found_cids
7819 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7820 found_cids.insert((*csi));
7824 // after the call, new_cids will have the colrefs in se but not found_cids.
7825 // update found_cids with the new cids.
7826 static void get_new_se_cids(scalarexp_t *se, col_id_set &found_cids,
7827 col_id_set &new_cids, gb_table *gtbl){
7828 col_id_set this_se_cids;
7829 col_id_set::iterator csi;
7831 // get colrefs in se not already found.
7832 gather_se_col_ids(se,this_se_cids,gtbl);
7833 set_difference(this_se_cids.begin(), this_se_cids.end(),
7834 found_cids.begin(), found_cids.end(),
7835 inserter(new_cids,new_cids.begin()) );
7837 // We've found these cids, so update found_cids
7838 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7839 found_cids.insert((*csi));
7843 static string gen_unpack_cids(table_list *schema, col_id_set &new_cids, string on_problem, vector<bool> &needs_xform){
7845 col_id_set::iterator csi;
7847 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7848 int schref = (*csi).schema_ref;
7849 int tblref = (*csi).tblvar_ref;
7850 string field = (*csi).field;
7851 data_type dt(schema->get_type_name(schref,field));
7853 if(needs_xform[tblref]){
7854 unpack_fcn = dt.get_hfta_unpack_fcn();
7856 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7858 if(dt.is_buffer_type()){
7859 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7861 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7864 if(dt.is_buffer_type()){
7865 ret += "\tif(problem) return "+on_problem+" ;\n";
7871 // generates the declaration of all the variables related to
7872 // temp tuples generation
7873 static string gen_decl_temp_vars(){
7876 ret += "\t// variables related to temp tuple generation\n";
7877 ret += "\tbool temp_tuple_received;\n";
7882 // generates initialization code for variables related to temp tuple processing
7883 static string gen_init_temp_vars(table_list *schema, vector<select_element *>& select_list, gb_table *gtbl){
7885 col_id_set::iterator csi;
7888 // Initialize internal state
7889 ret += "\ttemp_tuple_received = false;\n";
7891 col_id_set temp_cids; // colrefs unpacked thus far.
7893 for(s=0;s<select_list.size();s++){
7894 if (select_list[s]->se->get_data_type()->is_temporal()) {
7895 // Find the set of attributes accessed in this SE
7896 col_id_set new_cids;
7897 get_new_se_cids(select_list[s]->se,temp_cids, new_cids, gtbl);
7900 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7901 int schref = (*csi).schema_ref;
7902 int tblref = (*csi).tblvar_ref;
7903 string field = (*csi).field;
7904 data_type dt(schema->get_type_name(schref,field), schema->get_modifier_list(schref,field));
7906 sprintf(tmpstr,"\t unpack_var_%s_%d = %s;\n", field.c_str(), tblref,
7907 dt.is_increasing() ? dt.get_min_literal().c_str() : dt.get_max_literal().c_str());
7917 // generates a check if tuple is temporal
7918 static string gen_temp_tuple_check(string node_name, int channel) {
7922 sprintf(tmpstr, "tup%d", channel);
7923 string tup_name = tmpstr;
7924 sprintf(tmpstr, "schema_handle%d", channel);
7925 string schema_handle_name = tmpstr;
7926 string tuple_offset_name = "tuple_metadata_offset"+int_to_string(channel);
7928 // check if it is a temporary status tuple
7929 ret += "\t// check if tuple is temp status tuple\n";
7930 // ret += "\tif (ftaschema_is_temporal_tuple(" + schema_handle_name + ", " + tup_name + ".data)) {\n";
7931 ret += "\tif (ftaschema_is_temporal_tuple_offset(" + tuple_offset_name + ", " + tup_name + ".data)) {\n";
7932 ret += "\t\ttemp_tuple_received = true;\n";
7934 ret += "\telse\n\t\ttemp_tuple_received = false;\n\n";
7939 // generates unpacking code for all temporal attributes referenced in select
7940 static string gen_unpack_temp_vars(table_list *schema, col_id_set& found_cids, vector<select_element *>& select_list, gb_table *gtbl, vector<bool> &needs_xform) {
7944 // Unpack all the temporal attributes references in select list
7945 // we need it to be able to generate temp status tuples
7946 for(s=0;s<select_list.size();s++){
7947 if (select_list[s]->se->get_data_type()->is_temporal()) {
7948 // Find the set of attributes accessed in this SE
7949 col_id_set new_cids;
7950 get_new_se_cids(select_list[s]->se,found_cids, new_cids, gtbl);
7951 // Unpack these values.
7952 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
7960 // Generates temporal tuple generation code (except attribute packing)
7961 static string gen_init_temp_status_tuple(string node_name) {
7964 ret += "\t// create temp status tuple\n";
7965 ret += "\tresult.tuple_size = sizeof("+generate_tuple_name( node_name)+") + sizeof(gs_uint8_t);\n";
7966 ret += "\tresult.data = (gs_sp_t )malloc(result.tuple_size);\n";
7967 ret += "\tresult.heap_resident = true;\n";
7968 ret += "\t// Mark tuple as temporal\n";
7969 ret += "\t*((gs_sp_t )result.data + sizeof("+generate_tuple_name( node_name)+")) = TEMPORAL_TUPLE;\n";
7971 ret += "\t"+generate_tuple_name( node_name)+" *tuple = ("+
7972 generate_tuple_name( node_name) +" *)(result.data);\n";
7978 // Assume that all colrefs unpacked already ...
7979 static string gen_unpack_partial_fcn(table_list *schema,
7980 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7983 set<int>::iterator si;
7985 // Since set<..> is a "Sorted Associative Container",
7986 // we can walk through it in sorted order by walking from
7987 // begin() to end(). (and the partial fcns must be
7988 // evaluated in this order).
7989 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7990 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7991 ret += "\tif(retval) return "+on_problem+" ;\n";
7996 // Assume that all colrefs unpacked already ...
7997 // this time with cached functions.
7998 static string gen_unpack_partial_fcn(table_list *schema,
7999 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8000 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8003 set<int>::iterator si;
8005 // Since set<..> is a "Sorted Associative Container",
8006 // we can walk through it in sorted order by walking from
8007 // begin() to end(). (and the partial fcns must be
8008 // evaluated in this order).
8009 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
8010 if(fcn_ref_cnt[(*si)] > 1){
8011 ret += "\tif(fcn_ref_cnt_"+int_to_string((*si))+"==0){\n";
8013 if(is_partial_fcn[(*si)]){
8014 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
8015 ret += "\tif(retval) return "+on_problem+" ;\n";
8017 if(fcn_ref_cnt[(*si)] > 1){
8018 if(!is_partial_fcn[(*si)]){
8019 ret += "\t\tpartial_fcn_result_"+int_to_string((*si))+"="+generate_cached_fcn(partial_fcns[(*si)],(*si),schema)+";\n";
8021 ret += "\t\tfcn_ref_cnt_"+int_to_string((*si))+"=1;\n";
8030 // This version finds and unpacks new colrefs.
8031 // found_cids gets updated with the newly unpacked cids.
8032 static string gen_full_unpack_partial_fcn(table_list *schema,
8033 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8034 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8035 vector<bool> &needs_xform){
8037 set<int>::iterator slsi;
8039 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8040 // find all new fields ref'd by this partial fcn.
8041 col_id_set new_cids;
8042 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8043 // Unpack these values.
8044 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8046 // Now evaluate the partial fcn.
8047 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8048 ret += "\tif(retval) return "+on_problem+" ;\n";
8053 // This version finds and unpacks new colrefs.
8054 // found_cids gets updated with the newly unpacked cids.
8055 // BUT : only for the partial functions.
8056 static string gen_full_unpack_partial_fcn(table_list *schema,
8057 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8058 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8059 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8060 vector<bool> &needs_xform){
8062 set<int>::iterator slsi;
8064 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8065 if(is_partial_fcn[(*slsi)]){
8066 // find all new fields ref'd by this partial fcn.
8067 col_id_set new_cids;
8068 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8069 // Unpack these values.
8070 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8072 // Now evaluate the partial fcn.
8073 if(fcn_ref_cnt[(*slsi)] > 1){
8074 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8076 if(is_partial_fcn[(*slsi)]){
8077 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8078 ret += "\tif(retval) return "+on_problem+" ;\n";
8080 if(fcn_ref_cnt[(*slsi)] > 1){
8081 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8090 static string gen_remaining_cached_fcns(table_list *schema,
8091 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8092 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn){
8094 set<int>::iterator slsi;
8096 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8097 if(!is_partial_fcn[(*slsi)] && fcn_ref_cnt[(*slsi)] > 1){
8099 if(fcn_ref_cnt[(*slsi)] > 1){
8100 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8101 ret += "\t\tpartial_fcn_result_"+int_to_string((*slsi))+"="+generate_cached_fcn(partial_fcns[(*slsi)],(*slsi),schema)+";\n";
8102 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8111 // unpack the colrefs in cid_set not in found_cids
8112 static string gen_remaining_colrefs(table_list *schema,
8113 col_id_set &cid_set, col_id_set &found_cids, string on_problem,
8114 vector<bool> &needs_xform){
8116 col_id_set::iterator csi;
8118 for(csi=cid_set.begin(); csi!=cid_set.end();csi++){
8119 if(found_cids.count( (*csi) ) == 0){
8120 int schref = (*csi).schema_ref;
8121 int tblref = (*csi).tblvar_ref;
8122 string field = (*csi).field;
8123 data_type dt(schema->get_type_name(schref,field));
8125 if(needs_xform[tblref]){
8126 unpack_fcn = dt.get_hfta_unpack_fcn();
8128 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
8130 if(dt.is_buffer_type()){
8131 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
8133 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
8136 if(dt.is_buffer_type()){
8137 ret.append("\tif(problem) return "+on_problem+" ;\n");
8144 static string gen_buffer_selvars(table_list *schema,
8145 vector<select_element *> &select_list){
8149 for(s=0;s<select_list.size();s++){
8150 scalarexp_t *se = select_list[s]->se;
8151 data_type *sdt = se->get_data_type();
8152 if(sdt->is_buffer_type() &&
8153 !( (se->get_operator_type() == SE_COLREF) ||
8154 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8155 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8157 sprintf(tmpstr,"selvar_%d",s);
8158 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
8159 ret += generate_se_code(se,schema) +";\n";
8165 static string gen_buffer_selvars_size(vector<select_element *> &select_list,table_list *schema){
8169 for(s=0;s<select_list.size();s++){
8170 scalarexp_t *se = select_list[s]->se;
8171 data_type *sdt = se->get_data_type();
8172 if(sdt->is_buffer_type()){
8173 if( !( (se->get_operator_type() == SE_COLREF) ||
8174 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8175 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8177 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
8180 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),
8181 generate_se_code(se,schema).c_str());
8189 static string gen_buffer_selvars_dtr(vector<select_element *> &select_list){
8193 for(s=0;s<select_list.size();s++){
8194 scalarexp_t *se = select_list[s]->se;
8195 data_type *sdt = se->get_data_type();
8196 if(sdt->is_buffer_type() &&
8197 !( (se->get_operator_type() == SE_COLREF) ||
8198 (se->get_operator_type() == SE_AGGR_STAR) ||
8199 (se->get_operator_type() == SE_AGGR_SE) ||
8200 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8201 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8203 sprintf(tmpstr,"\t\t%s(&selvar_%d);\n",
8204 sdt->get_hfta_buffer_destroy().c_str(), s );
8212 static string gen_pack_tuple(table_list *schema, vector<select_element *> &select_list, string node_name, bool temporal_only){
8216 ret += "\tint tuple_pos = sizeof("+generate_tuple_name(node_name)+") + sizeof(gs_uint8_t);\n";
8217 for(s=0;s<select_list.size();s++){
8218 scalarexp_t *se = select_list[s]->se;
8219 data_type *sdt = se->get_data_type();
8221 if(!temporal_only && sdt->is_buffer_type()){
8222 if( !( (se->get_operator_type() == SE_COLREF) ||
8223 (se->get_operator_type() == SE_FUNC && se->is_partial()))
8225 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
8227 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
8230 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code(se,schema).c_str());
8232 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code(se,schema).c_str());
8235 }else if (!temporal_only || sdt->is_temporal()) {
8236 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
8238 ret.append(generate_se_code(se,schema) );
8246 //-------------------------------------------------------------------------
8247 // functor generation methods
8248 //-------------------------------------------------------------------------
8250 /////////////////////////////////////////////////////////
8251 //// File Output Operator
8252 string output_file_qpn::generate_functor_name(){
8253 return("output_file_functor_" + normalize_name(get_node_name()));
8257 string output_file_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8258 string ret = "class " + this->generate_functor_name() + "{\n";
8260 // Find the temporal field
8261 int temporal_field_idx;
8262 data_type *tdt = NULL;
8263 for(temporal_field_idx=0;temporal_field_idx<fields.size();temporal_field_idx++){
8264 tdt = new data_type(fields[temporal_field_idx]->get_type(), fields[temporal_field_idx]->get_modifier_list());
8265 if(tdt->is_temporal()){
8272 if(temporal_field_idx == fields.size()){
8273 fprintf(stderr,"ERROR, no temporal field for file output operator %s\n",node_name.c_str());
8277 ret += "private:\n";
8279 // var to save the schema handle
8280 ret += "\tint schema_handle0;\n";
8281 // tuple metadata offset
8282 ret += "\tint tuple_metadata_offset0;\n";
8283 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[temporal_field_idx]->get_name().c_str());
8286 // For unpacking the hashing fields, if any
8288 for(h=0;h<hash_flds.size();++h){
8289 sprintf(tmpstr,"unpack_var_%s", fields[hash_flds[h]]->get_name().c_str());
8290 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8291 ret+="\t"+hdt->make_host_cvar(tmpstr)+";\n";
8292 if(hash_flds[h]!=temporal_field_idx){
8293 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[hash_flds[h]]->get_name().c_str());
8297 // Specail case for output file hashing
8298 if(n_streams>1 && hash_flds.size()==0){
8299 ret+="\tgs_uint32_t outfl_cnt;\n";
8302 ret += "//\t\tRemember the last posted timestamp.\n";
8303 ret+="\t"+tdt->make_host_cvar("timestamp")+";\n";
8304 ret+="\t"+tdt->make_host_cvar("last_bucket")+";\n";
8305 ret+="\t"+tdt->make_host_cvar("slack")+";\n";
8306 ret += "\tbool first_execution;\n";
8307 ret += "\tbool temp_tuple_received;\n";
8308 ret += "\tbool is_eof;\n";
8310 ret += "\tgs_int32_t bucketwidth;\n";
8313 //-------------------
8314 // The functor constructor
8315 // pass in a schema handle (e.g. for the 1st input stream),
8316 // use it to determine how to unpack the merge variable.
8317 // ASSUME that both streams have the same layout,
8318 // just duplicate it.
8321 ret += "//\t\tFunctor constructor.\n";
8322 ret += this->generate_functor_name()+"(int schema_hndl){\n";
8324 ret += "\tschema_handle0 = schema_hndl;\n";
8325 // tuple metadata offset
8326 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8328 if(output_spec->bucketwidth == 0)
8329 ret += "\tbucketwidth = 60;\n";
8331 ret += "\tbucketwidth = "+int_to_string(output_spec->bucketwidth)+";\n";
8332 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8334 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[temporal_field_idx]->get_name().c_str(), fields[temporal_field_idx]->get_name().c_str());
8336 // Hashing field unpacking, if any
8337 for(h=0;h<hash_flds.size();++h){
8338 if(hash_flds[h]!=temporal_field_idx){
8339 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[hash_flds[h]]->get_name().c_str(),fields[hash_flds[h]]->get_name().c_str());
8344 ret+="\tfirst_execution = true;\n";
8346 // Initialize internal state
8347 ret += "\ttemp_tuple_received = false;\n";
8349 // Init last timestamp values to minimum value for their type
8350 if (tdt->is_increasing()){
8351 ret+="\ttimestamp = " + tdt->get_min_literal() + ";\n";
8352 ret+="\tlast_bucket = " + tdt->get_min_literal() + ";\n";
8354 ret+="\ttimestamp = " + tdt->get_max_literal() + ";\n";
8355 ret+="\tlast_bucket = " + tdt->get_max_literal() + ";\n";
8361 ret += "//\t\tFunctor destructor.\n";
8362 ret += "~"+this->generate_functor_name()+"(){\n";
8366 ret += "int load_params_"+this->generate_functor_name()+"(gs_int32_t sz, void *value){return 0;}\n";
8367 ret += "void destroy_params_"+this->generate_functor_name()+"(){}\n";
8369 // Register new parameter block
8370 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8371 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8372 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8376 ret+="\nbool temp_status_received(const host_tuple& tup0)/* const*/ {\n";
8377 ret+="\tgs_int32_t problem;\n";
8379 ret += "\tvoid *tup_ptr = (void *)(&tup0);\n";
8380 ret += "\tis_eof = ftaschema_is_eof_tuple(schema_handle0,tup_ptr);\n";
8382 ret += gen_temp_tuple_check(this->node_name, 0);
8384 sprintf(tmpstr,"\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", tdt->get_hfta_unpack_fcn_noxf().c_str(), fields[temporal_field_idx]->get_name().c_str(), 0);
8387 for(h=0;h<hash_flds.size();++h){
8388 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8389 sprintf(tmpstr,"\tunpack_var_%s = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", fields[hash_flds[h]]->get_name().c_str(), hdt->get_hfta_unpack_fcn_noxf().c_str(), fields[hash_flds[h]]->get_name().c_str(), 0);
8393 " return temp_tuple_received;\n"
8399 "bool new_epoch(){\n"
8400 " if(first_execution || (last_bucket + 1) * bucketwidth <= timestamp){\n"
8401 " last_bucket = timestamp / bucketwidth;\n"
8402 " first_execution = false;\n"
8412 "inline gs_uint32_t output_hash(){return 0;}\n\n";
8414 if(hash_flds.size()==0){
8416 "gs_uint32_t output_hash(){\n"
8418 " if(outfl_cnt >= "+int_to_string(n_streams)+")\n"
8420 " return outfl_cnt;\n"
8426 "gs_uint32_t output_hash(){\n"
8427 " gs_uint32_t ret = "
8429 for(h=0;h<hash_flds.size();++h){
8431 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8432 if(hdt->use_hashfunc()){
8433 sprintf(tmpstr,"%s(&(unpack_var_%s))",hdt->get_hfta_hashfunc().c_str(),fields[hash_flds[h]]->get_name().c_str());
8435 sprintf(tmpstr,"unpack_var_%s",fields[hash_flds[h]]->get_name().c_str());
8441 " return ret % "+int_to_string(hash_flds.size())+";\n"
8448 "gs_uint32_t num_file_streams(){\n"
8449 " return("+int_to_string(n_streams)+");\n"
8454 "string get_filename_base(){\n"
8455 " char tmp_fname[500];\n";
8457 string output_filename_base = hfta_query_name+filestream_id;
8459 if(n_hfta_clones > 1){
8460 output_filename_base += "_"+int_to_string(parallel_idx);
8466 if(output_spec->output_directory == "")
8468 " sprintf(tmp_fname,\""+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8470 " sprintf(tmp_fname,\""+output_spec->output_directory+"/"+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8472 " return (string)(tmp_fname);\n"
8478 "bool do_compression(){\n";
8480 ret += " return true;\n";
8482 ret += " return false;\n";
8486 "bool is_eof_tuple(){\n"
8490 "bool propagate_tuple(){\n"
8493 ret+="\treturn false;\n";
8495 ret+="\treturn true;\n";
8497 // create a temp status tuple
8498 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8500 ret += gen_init_temp_status_tuple(this->hfta_query_name);
8502 sprintf(tmpstr,"\ttuple->tuple_var%d = timestamp;\n",temporal_field_idx);
8507 ret += "\treturn 0;\n";
8515 string output_file_qpn::generate_operator(int i, string params){
8516 string optype = "file_output_operator";
8517 switch(compression_type){
8519 optype = "file_output_operator";
8522 optype = "zfile_output_operator";
8525 optype = "bfile_output_operator";
8529 return(" "+optype+"<" +
8530 generate_functor_name() +
8531 "> *op"+int_to_string(i)+" = new "+optype+"<"+
8532 generate_functor_name() +">("+params+", \"" + hfta_query_name + "\""
8533 + "," + hfta_query_name + "_schema_definition);\n");
8536 /////////////////////////////////////////////////////////
8540 string spx_qpn::generate_functor_name(){
8541 return("spx_functor_" + normalize_name(normalize_name(this->get_node_name())));
8544 string spx_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8545 // Initialize generate utility globals
8546 segen_gb_tbl = NULL;
8548 string ret = "class " + this->generate_functor_name() + "{\n";
8550 // Find variables referenced in this query node.
8553 col_id_set::iterator csi;
8556 for(w=0;w<where.size();++w)
8557 gather_pr_col_ids(where[w]->pr,cid_set,NULL);
8558 for(s=0;s<select_list.size();s++){
8559 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
8563 // Private variables : store the state of the functor.
8564 // 1) variables for unpacked attributes
8565 // 2) offsets of the upacked attributes
8566 // 3) storage of partial functions
8567 // 4) storage of complex literals (i.e., require a constructor)
8569 ret += "private:\n";
8570 ret += "\tbool first_execution;\t// internal processing state \n";
8571 ret += "\tint schema_handle0;\n";
8573 // generate the declaration of all the variables related to
8574 // temp tuples generation
8575 ret += gen_decl_temp_vars();
8578 // unpacked attribute storage, offsets
8579 ret += "//\t\tstorage and offsets of accessed fields.\n";
8580 ret += generate_access_vars(cid_set,schema);
8581 // tuple metadata management
8582 ret += "\tint tuple_metadata_offset0;\n";
8584 // Variables to store results of partial functions.
8585 // WARNING find_partial_functions modifies the SE
8586 // (it marks the partial function id).
8587 ret += "//\t\tParital function result storage\n";
8588 vector<scalarexp_t *> partial_fcns;
8589 vector<int> fcn_ref_cnt;
8590 vector<bool> is_partial_fcn;
8591 for(s=0;s<select_list.size();s++){
8592 find_partial_fcns(select_list[s]->se, &partial_fcns,&fcn_ref_cnt,&is_partial_fcn, Ext_fcns);
8594 for(w=0;w<where.size();w++){
8595 find_partial_fcns_pr(where[w]->pr, &partial_fcns, &fcn_ref_cnt,&is_partial_fcn,Ext_fcns);
8597 // Unmark non-partial expensive functions referenced only once.
8598 for(p=0; p<partial_fcns.size();p++){
8599 if(!is_partial_fcn[p] && fcn_ref_cnt[p] <= 1){
8600 partial_fcns[p]->set_partial_ref(-1);
8603 if(partial_fcns.size()>0){
8604 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,true);
8607 // Complex literals (i.e., they need constructors)
8608 ret += "//\t\tComplex literal storage.\n";
8609 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8610 ret += generate_complex_lit_vars(complex_literals);
8612 // Pass-by-handle parameters
8613 ret += "//\t\tPass-by-handle storage.\n";
8614 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8615 ret += generate_pass_by_handle_vars(param_handle_table);
8617 // Variables to hold parameters
8618 ret += "//\tfor query parameters\n";
8619 ret += generate_param_vars(param_tbl);
8622 // The publicly exposed functions
8624 ret += "\npublic:\n";
8627 //-------------------
8628 // The functor constructor
8629 // pass in the schema handle.
8630 // 1) make assignments to the unpack offset variables
8631 // 2) initialize the complex literals
8632 // 3) Set the initial values of the temporal attributes
8633 // referenced in select clause (in case we need to emit
8634 // temporal tuple before receiving first tuple )
8636 ret += "//\t\tFunctor constructor.\n";
8637 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8639 // save schema handle
8640 ret += "this->schema_handle0 = schema_handle0;\n";
8643 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8644 ret += gen_access_var_init(cid_set);
8646 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8649 ret += "//\t\tInitialize complex literals.\n";
8650 ret += gen_complex_lit_init(complex_literals);
8652 // Initialize partial function results so they can be safely GC'd
8653 ret += gen_partial_fcn_init(partial_fcns);
8655 // Initialize non-query-parameter parameter handles
8656 ret += gen_pass_by_handle_init(param_handle_table);
8658 // Init temporal attributes referenced in select list
8659 ret += gen_init_temp_vars(schema, select_list, NULL);
8664 //-------------------
8665 // Functor destructor
8666 ret += "//\t\tFunctor destructor.\n";
8667 ret += "~"+this->generate_functor_name()+"(){\n";
8669 // clean up buffer-type complex literals.
8670 ret += gen_complex_lit_dtr(complex_literals);
8672 // Deregister the pass-by-handle parameters
8673 ret += "/* register and de-register the pass-by-handle parameters */\n";
8674 ret += gen_pass_by_handle_dtr(param_handle_table);
8676 // Reclaim buffer space for partial fucntion results
8677 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8678 ret += gen_partial_fcn_dtr(partial_fcns);
8681 // Destroy the parameters, if any need to be destroyed
8682 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8687 //-------------------
8688 // Parameter manipulation routines
8689 ret += generate_load_param_block(this->generate_functor_name(),
8690 this->param_tbl,param_handle_table );
8691 ret += generate_delete_param_block(this->generate_functor_name(),
8692 this->param_tbl,param_handle_table);
8695 //-------------------
8696 // Register new parameter block
8697 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8698 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8699 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8704 //-------------------
8705 // The selection predicate.
8706 // Unpack variables for 1 cnf element
8707 // at a time, return false immediately if the
8709 // optimization : evaluate the cheap cnf elements
8710 // first, the expensive ones last.
8712 ret += "bool predicate(host_tuple &tup0){\n";
8713 // Variables for execution of the function.
8714 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8715 // Initialize cached function indicators.
8716 for(p=0;p<partial_fcns.size();++p){
8717 if(fcn_ref_cnt[p]>1){
8718 ret+="\tfcn_ref_cnt_"+int_to_string(p)+"=0;\n";
8723 ret += gen_temp_tuple_check(this->node_name, 0);
8725 if(partial_fcns.size()>0){ // partial fcn access failure
8726 ret += "\tgs_retval_t retval = 0;\n";
8730 // Reclaim buffer space for partial fucntion results
8731 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8732 ret += gen_partial_fcn_dtr(partial_fcns);
8734 col_id_set found_cids; // colrefs unpacked thus far.
8735 ret += gen_unpack_temp_vars(schema, found_cids, select_list, NULL, needs_xform);
8737 // For temporal status tuple we don't need to do anything else
8738 ret += "\tif (temp_tuple_received) return false;\n\n";
8741 for(w=0;w<where.size();++w){
8742 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8744 // Find the set of variables accessed in this CNF elem,
8745 // but in no previous element.
8746 col_id_set new_cids;
8747 get_new_pred_cids(where[w]->pr,found_cids, new_cids, NULL);
8748 // Unpack these values.
8749 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
8750 // Find partial fcns ref'd in this cnf element
8752 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8753 ret += gen_unpack_partial_fcn(schema,partial_fcns,pfcn_refs,fcn_ref_cnt, is_partial_fcn, "false");
8755 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8756 +") ) return(false);\n";
8759 // The partial functions ref'd in the select list
8760 // must also be evaluated. If one returns false,
8761 // then implicitly the predicate is false.
8763 for(s=0;s<select_list.size();s++){
8764 collect_partial_fcns(select_list[s]->se, sl_pfcns);
8766 if(sl_pfcns.size() > 0)
8767 ret += "//\t\tUnpack remaining partial fcns.\n";
8768 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
8769 fcn_ref_cnt, is_partial_fcn,
8770 found_cids, NULL, "false", needs_xform);
8772 // Unpack remaining fields
8773 ret += "//\t\tunpack any remaining fields from the input tuple.\n";
8774 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
8777 ret += "\treturn(true);\n";
8781 //-------------------
8782 // The output tuple function.
8783 // Unpack the remaining attributes into
8784 // the placeholder variables, unpack the
8785 // partial fcn refs, then pack up the tuple.
8787 ret += "host_tuple create_output_tuple() {\n";
8788 ret += "\thost_tuple tup;\n";
8789 ret += "\tgs_retval_t retval = 0;\n";
8791 // Unpack any remaining cached functions.
8792 ret += gen_remaining_cached_fcns(schema, partial_fcns, sl_pfcns,
8793 fcn_ref_cnt, is_partial_fcn);
8796 // Now, compute the size of the tuple.
8798 // Unpack any BUFFER type selections into temporaries
8799 // so that I can compute their size and not have
8800 // to recompute their value during tuple packing.
8801 // I can use regular assignment here because
8802 // these temporaries are non-persistent.
8804 ret += "//\t\tCompute the size of the tuple.\n";
8805 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
8807 // Unpack all buffer type selections, to be able to compute their size
8808 ret += gen_buffer_selvars(schema, select_list);
8810 // The size of the tuple is the size of the tuple struct plus the
8811 // size of the buffers to be copied in.
8814 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
8815 ret += gen_buffer_selvars_size(select_list,schema);
8818 // Allocate tuple data block.
8819 ret += "//\t\tCreate the tuple block.\n";
8820 ret += "\ttup.data = malloc(tup.tuple_size);\n";
8821 ret += "\ttup.heap_resident = true;\n";
8822 // Mark tuple as regular
8823 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
8825 // ret += "\ttup.channel = 0;\n";
8826 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
8827 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
8830 // (Here, offsets are hard-wired. is this a problem?)
8832 ret += "//\t\tPack the fields into the tuple.\n";
8833 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
8835 // Delete string temporaries
8836 ret += gen_buffer_selvars_dtr(select_list);
8838 ret += "\treturn tup;\n";
8841 //-------------------------------------------------------------------
8842 // Temporal update functions
8844 ret += "bool temp_status_received(){return temp_tuple_received;};\n\n";
8847 // create a temp status tuple
8848 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8850 ret += gen_init_temp_status_tuple(this->get_node_name());
8853 // (Here, offsets are hard-wired. is this a problem?)
8855 ret += "//\t\tPack the fields into the tuple.\n";
8856 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
8858 ret += "\treturn 0;\n";
8865 string spx_qpn::generate_operator(int i, string params){
8867 return(" select_project_operator<" +
8868 generate_functor_name() +
8869 "> *op"+int_to_string(i)+" = new select_project_operator<"+
8870 generate_functor_name() +">("+params+", \"" + get_node_name() + "\");\n");
8874 ////////////////////////////////////////////////////////////////
8879 string sgah_qpn::generate_functor_name(){
8880 return("sgah_functor_" + normalize_name(this->get_node_name()));
8884 string sgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8887 // Regular or slow flush?
8888 hfta_slow_flush = 0;
8889 if(this->get_val_of_def("hfta_slow_flush") != ""){
8890 int d = atoi(this->get_val_of_def("hfta_slow_flush").c_str() );
8892 fprintf(stderr,"Warning, hfta_slow_flush in node %s is %d, must be at least 0, setting to 0.\n",node_name.c_str(), d);
8893 hfta_slow_flush = 0;
8895 hfta_slow_flush = d;
8900 // Initialize generate utility globals
8901 segen_gb_tbl = &(gb_tbl);
8903 // Might need to generate empty values for cube processing.
8904 map<int, string> structured_types;
8905 for(g=0;g<gb_tbl.size();++g){
8906 if(gb_tbl.get_data_type(g)->is_structured_type()){
8907 structured_types[gb_tbl.get_data_type(g)->type_indicator()] = gb_tbl.get_data_type(g)->get_type_str();
8911 //--------------------------------
8912 // group definition class
8913 string ret = "class " + generate_functor_name() + "_groupdef{\n";
8915 for(g=0;g<this->gb_tbl.size();g++){
8916 sprintf(tmpstr,"gb_var%d",g);
8917 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8919 // empty strucutred literals
8920 // map<int, string>::iterator sii;
8921 // for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8922 // data_type dt(sii->second);
8923 // literal_t empty_lit(sii->first);
8924 // ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8927 if(structured_types.size()==0){
8928 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
8930 ret += "\t"+generate_functor_name() + "_groupdef(){}\n";
8934 ret += "\t// shallow copy constructors\n";
8935 ret += "\t"+generate_functor_name() + "_groupdef("+
8936 "const " + this->generate_functor_name() + "_groupdef &gd){\n";
8937 for(g=0;g<gb_tbl.size();g++){
8938 data_type *gdt = gb_tbl.get_data_type(g);
8939 sprintf(tmpstr,"\t\tgb_var%d = gd.gb_var%d;\n",g,g);
8941 // TODO : do strings perisist after the call? its a shllow copy
8942 // if(gdt->is_buffer_type()){
8943 // sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8944 // gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8947 // sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8952 ret += "\t"+generate_functor_name() + "_groupdef("+
8953 "const " + this->generate_functor_name() + "_groupdef &gd, bool *pattern){\n";
8954 // -- For patterns, need empty strucutred literals
8955 map<int, string>::iterator sii;
8956 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8957 data_type dt(sii->second);
8958 literal_t empty_lit(sii->first);
8959 ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8962 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8963 literal_t empty_lit(sii->first);
8964 ret += "\t\t"+empty_lit.to_hfta_C_code("&"+empty_lit.hfta_empty_literal_name())+";\n";
8966 for(g=0;g<gb_tbl.size();g++){
8967 data_type *gdt = gb_tbl.get_data_type(g);
8968 ret += "\t\tif(pattern["+int_to_string(g)+"]){\n";
8969 sprintf(tmpstr,"\t\t\tgb_var%d = gd.gb_var%d;\n",g,g);
8971 // TODO Do strings persist long enough? its a shllow copy constructor?
8972 // if(gdt->is_buffer_type()){
8973 // sprintf(tmpstr,"\t\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8974 // gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8977 // sprintf(tmpstr,"\t\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8980 ret += "\t\t}else{\n";
8981 literal_t empty_lit(gdt->type_indicator());
8982 if(empty_lit.is_cpx_lit()){
8983 ret +="\t\t\tgb_var"+int_to_string(g)+"= "+empty_lit.hfta_empty_literal_name()+";\n";
8985 ret +="\t\t\tgb_var"+int_to_string(g)+"="+empty_lit.to_hfta_C_code("")+";\n";
8991 ret += "\t// deep assignment operator\n";
8992 ret += "\t"+generate_functor_name() + "_groupdef& operator=(const "+
8993 this->generate_functor_name() + "_groupdef &gd){\n";
8994 for(g=0;g<gb_tbl.size();g++){
8995 data_type *gdt = gb_tbl.get_data_type(g);
8996 if(gdt->is_buffer_type()){
8997 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd.gb_var%d));\n",
8998 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
9001 sprintf(tmpstr,"\t\tgb_var%d = gd.gb_var%d;\n",g,g);
9008 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
9009 for(g=0;g<gb_tbl.size();g++){
9010 data_type *gdt = gb_tbl.get_data_type(g);
9011 if(gdt->is_buffer_type()){
9012 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
9013 gdt->get_hfta_buffer_destroy().c_str(), g );
9020 for(g=0;g<gb_tbl.size();g++){
9021 data_type *gdt = gb_tbl.get_data_type(g);
9022 if(gdt->is_temporal()){
9027 ret += tgdt->get_host_cvar_type()+" get_curr_gb(){\n";
9028 ret+="\treturn gb_var"+int_to_string(g)+";\n";
9033 //--------------------------------
9034 // aggr definition class
9035 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
9037 for(a=0;a<aggr_tbl.size();a++){
9038 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
9039 sprintf(tmpstr,"aggr_var%d",a);
9040 if(aggr_tbl.is_builtin(a)){
9041 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
9042 if(aggr_tbl.get_op(a) == "AVG"){ // HACK!
9043 data_type cnt_type = data_type("ullong");
9044 ret+="\t"+cnt_type.make_host_cvar(string(tmpstr)+"_cnt")+";\n";
9045 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(string(tmpstr)+"_sum")+";\n";
9048 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
9052 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
9054 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
9055 for(a=0;a<aggr_tbl.size();a++){
9056 if(aggr_tbl.is_builtin(a)){
9057 data_type *adt = aggr_tbl.get_data_type(a);
9058 if(adt->is_buffer_type()){
9059 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
9060 adt->get_hfta_buffer_destroy().c_str(), a );
9064 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
9065 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9066 ret+="(aggr_var"+int_to_string(a)+"));\n";
9072 //-------------------------------------------
9073 // group-by patterns for the functor,
9074 // initialization within the class is cumbersome.
9075 int n_patterns = gb_tbl.gb_patterns.size();
9077 ret += "bool "+this->generate_functor_name()+"_gb_patterns["+int_to_string(n_patterns)+
9078 "]["+int_to_string(gb_tbl.size())+"] = {\n";
9079 if(n_patterns == 0){
9080 for(i=0;i<gb_tbl.size();++i){
9085 for(i=0;i<n_patterns;++i){
9086 if(i>0) ret += ",\n";
9088 for(j=0;j<gb_tbl.size();j++){
9089 if(j>0) ret += ", ";
9090 if(gb_tbl.gb_patterns[i][j]){
9103 //--------------------------------
9105 ret += "class " + this->generate_functor_name() + "{\n";
9107 // Find variables referenced in this query node.
9110 col_id_set::iterator csi;
9112 for(w=0;w<where.size();++w)
9113 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
9114 for(w=0;w<having.size();++w)
9115 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
9116 for(g=0;g<gb_tbl.size();g++)
9117 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
9119 for(s=0;s<select_list.size();s++){
9120 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
9124 // Private variables : store the state of the functor.
9125 // 1) variables for unpacked attributes
9126 // 2) offsets of the upacked attributes
9127 // 3) storage of partial functions
9128 // 4) storage of complex literals (i.e., require a constructor)
9130 ret += "private:\n";
9132 // var to save the schema handle
9133 ret += "\tint schema_handle0;\n";
9134 // metadata from schema handle
9135 ret += "\tint tuple_metadata_offset0;\n";
9137 // generate the declaration of all the variables related to
9138 // temp tuples generation
9139 ret += gen_decl_temp_vars();
9141 // unpacked attribute storage, offsets
9142 ret += "//\t\tstorage and offsets of accessed fields.\n";
9143 ret += generate_access_vars(cid_set, schema);
9145 // Variables to store results of partial functions.
9146 // WARNING find_partial_functions modifies the SE
9147 // (it marks the partial function id).
9148 ret += "//\t\tParital function result storage\n";
9149 vector<scalarexp_t *> partial_fcns;
9150 vector<int> fcn_ref_cnt;
9151 vector<bool> is_partial_fcn;
9152 for(s=0;s<select_list.size();s++){
9153 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
9155 for(w=0;w<where.size();w++){
9156 find_partial_fcns_pr(where[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9158 for(w=0;w<having.size();w++){
9159 find_partial_fcns_pr(having[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9161 for(g=0;g<gb_tbl.size();g++){
9162 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns,NULL,NULL, Ext_fcns);
9164 for(a=0;a<aggr_tbl.size();a++){
9165 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns,NULL,NULL, Ext_fcns);
9167 if(partial_fcns.size()>0){
9168 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
9169 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
9172 // Complex literals (i.e., they need constructors)
9173 ret += "//\t\tComplex literal storage.\n";
9174 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
9175 ret += generate_complex_lit_vars(complex_literals);
9177 // Pass-by-handle parameters
9178 ret += "//\t\tPass-by-handle storage.\n";
9179 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
9180 ret += generate_pass_by_handle_vars(param_handle_table);
9183 // variables to hold parameters.
9184 ret += "//\tfor query parameters\n";
9185 ret += generate_param_vars(param_tbl);
9187 // Is there a temporal flush? If so create flush temporaries,
9188 // create flush indicator.
9189 bool uses_temporal_flush = false;
9190 for(g=0;g<gb_tbl.size();g++){
9191 data_type *gdt = gb_tbl.get_data_type(g);
9192 if(gdt->is_temporal())
9193 uses_temporal_flush = true;
9196 if(uses_temporal_flush){
9197 ret += "//\t\tFor temporal flush\n";
9198 for(g=0;g<gb_tbl.size();g++){
9199 data_type *gdt = gb_tbl.get_data_type(g);
9200 if(gdt->is_temporal()){
9201 sprintf(tmpstr,"last_gb%d",g);
9202 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9203 sprintf(tmpstr,"last_flushed_gb%d",g);
9204 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9207 ret += "\tbool needs_temporal_flush;\n";
9208 ret += "\tbool disordered_arrival;\n";
9212 // The publicly exposed functions
9214 ret += "\npublic:\n";
9217 //-------------------
9218 // The functor constructor
9219 // pass in the schema handle.
9220 // 1) make assignments to the unpack offset variables
9221 // 2) initialize the complex literals
9223 ret += "//\t\tFunctor constructor.\n";
9224 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9226 // save the schema handle
9227 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
9230 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9231 ret += gen_access_var_init(cid_set);
9233 ret += "tuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9236 ret += "//\t\tInitialize complex literals.\n";
9237 ret += gen_complex_lit_init(complex_literals);
9239 // Initialize partial function results so they can be safely GC'd
9240 ret += gen_partial_fcn_init(partial_fcns);
9242 // Initialize non-query-parameter parameter handles
9243 ret += gen_pass_by_handle_init(param_handle_table);
9245 // temporal flush variables
9246 // ASSUME that structured values won't be temporal.
9247 if(uses_temporal_flush){
9248 ret += "//\t\tInitialize temporal flush variables.\n";
9249 for(g=0;g<gb_tbl.size();g++){
9250 data_type *gdt = gb_tbl.get_data_type(g);
9251 if(gdt->is_temporal()){
9252 literal_t gl(gdt->type_indicator());
9253 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9255 sprintf(tmpstr,"\tlast_flushed_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9259 ret += "\tneeds_temporal_flush = false;\n";
9262 // Init temporal attributes referenced in select list
9263 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
9267 //-------------------
9268 // Functor destructor
9269 ret += "//\t\tFunctor destructor.\n";
9270 ret += "~"+this->generate_functor_name()+"(){\n";
9272 // clean up buffer type complex literals
9273 ret += gen_complex_lit_dtr(complex_literals);
9275 // Deregister the pass-by-handle parameters
9276 ret += "/* register and de-register the pass-by-handle parameters */\n";
9277 ret += gen_pass_by_handle_dtr(param_handle_table);
9279 // clean up partial function results.
9280 ret += "/* clean up partial function storage */\n";
9281 ret += gen_partial_fcn_dtr(partial_fcns);
9283 // Destroy the parameters, if any need to be destroyed
9284 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9289 //-------------------
9290 // Parameter manipulation routines
9291 ret += generate_load_param_block(this->generate_functor_name(),
9292 this->param_tbl,param_handle_table);
9293 ret += generate_delete_param_block(this->generate_functor_name(),
9294 this->param_tbl,param_handle_table);
9296 //-------------------
9297 // Register new parameter block
9299 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9300 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9301 ret += "\treturn this->load_params_"+this->generate_functor_name()+
9305 // -----------------------------------
9306 // group-by pattern support
9309 "int n_groupby_patterns(){\n"
9310 " return "+int_to_string(gb_tbl.gb_patterns.size())+";\n"
9312 "bool *get_pattern(int p){\n"
9313 " return "+this->generate_functor_name()+"_gb_patterns[p];\n"
9317 //---------------------------------------
9318 // Parameterized number of tuples output per slow flush
9320 "int gb_flush_per_tuple(){\n"
9321 " return "+int_to_string(hfta_slow_flush)+";\n"
9328 //-------------------
9329 // the create_group method.
9330 // This method creates a group in a buffer passed in
9331 // (to allow for creation on the stack).
9332 // There are also a couple of side effects:
9333 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
9334 // 2) determine if a temporal flush is required.
9336 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
9337 // Variables for execution of the function.
9338 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9340 if(partial_fcns.size()>0){ // partial fcn access failure
9341 ret += "\tgs_retval_t retval = 0;\n";
9345 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
9346 "_groupdef *) buffer;\n";
9348 // Start by cleaning up partial function results
9349 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
9350 set<int> w_pfcns; // partial fcns in where clause
9351 for(w=0;w<where.size();++w)
9352 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
9354 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
9355 for(g=0;g<gb_tbl.size();g++){
9356 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
9358 for(a=0;a<aggr_tbl.size();a++){
9359 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
9361 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
9362 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
9363 // ret += gen_partial_fcn_dtr(partial_fcns);
9366 ret += gen_temp_tuple_check(this->node_name, 0);
9367 col_id_set found_cids; // colrefs unpacked thus far.
9368 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
9371 // Save temporal group-by variables
9374 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
9376 for(g=0;g<gb_tbl.size();g++){
9378 data_type *gdt = gb_tbl.get_data_type(g);
9380 if(gdt->is_temporal()){
9381 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9382 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9390 // Compare the temporal GB vars with the stored ones,
9391 // set flush indicator and update stored GB vars if there is any change.
9393 ret += "// hfta_disorder = "+int_to_string(hfta_disorder)+"\n";
9394 if(hfta_disorder < 2){
9395 if(uses_temporal_flush){
9397 bool first_one = true;
9398 string disorder_test;
9399 for(g=0;g<gb_tbl.size();g++){
9400 data_type *gdt = gb_tbl.get_data_type(g);
9402 if(gdt->is_temporal()){
9403 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9404 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9405 if(first_one){first_one = false;} else {ret += ") && (";}
9406 ret += generate_lt_test(lhs_op, rhs_op, gdt);
9407 disorder_test += generate_lt_test(rhs_op, lhs_op, gdt);
9411 for(g=0;g<gb_tbl.size();g++){
9412 data_type *gdt = gb_tbl.get_data_type(g);
9413 if(gdt->is_temporal()){
9414 if(gdt->is_buffer_type()){
9415 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9417 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9419 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9424 ret += "\t\tneeds_temporal_flush=true;\n";
9426 "\t\tneeds_temporal_flush=false;\n"
9429 ret += "\tdisordered_arrival = "+disorder_test+";\n";
9430 // ret += "\tif( ( ("+disorder_test+") ) ){\n";
9431 // ret += "\t\tdisordered_arrival=true;\n";
9432 // ret += "\t}else{\n";
9433 // ret += "\t\tdisordered_arrival=false;\n";
9438 ret+= "\tif(temp_tuple_received && !( (";
9439 bool first_one = true;
9440 for(g=0;g<gb_tbl.size();g++){
9441 data_type *gdt = gb_tbl.get_data_type(g);
9443 if(gdt->is_temporal()){
9444 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9445 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9446 if(first_one){first_one = false;} else {ret += ") && (";}
9447 ret += generate_equality_test(lhs_op, rhs_op, gdt);
9453 for(g=0;g<gb_tbl.size();g++){
9454 data_type *gdt = gb_tbl.get_data_type(g);
9455 if(gdt->is_temporal()){
9457 if(gdt->is_buffer_type()){
9458 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9460 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9462 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9468 data_type *tgdt = gb_tbl.get_data_type(temporal_g);
9469 literal_t gl(tgdt->type_indicator());
9470 ret += "\t\tif(last_flushed_gb"+int_to_string(temporal_g)+">"+gl.to_hfta_C_code("")+")\n";
9471 ret += "\t\t\tneeds_temporal_flush=true;\n";
9472 ret += "\t\t}else{\n"
9473 "\t\t\tneeds_temporal_flush=false;\n"
9478 // For temporal status tuple we don't need to do anything else
9479 ret += "\tif (temp_tuple_received) return NULL;\n\n";
9481 for(w=0;w<where.size();++w){
9482 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
9484 // Find the set of variables accessed in this CNF elem,
9485 // but in no previous element.
9486 col_id_set new_cids;
9487 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
9489 // Unpack these values.
9490 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9491 // Find partial fcns ref'd in this cnf element
9493 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
9494 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
9496 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
9497 +") ) return(NULL);\n";
9500 // The partial functions ref'd in the group-by var and aggregate
9501 // definitions must also be evaluated. If one returns false,
9502 // then implicitly the predicate is false.
9503 set<int>::iterator pfsi;
9505 if(ag_gb_pfcns.size() > 0)
9506 ret += "//\t\tUnpack remaining partial fcns.\n";
9507 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
9508 found_cids, segen_gb_tbl, "NULL", needs_xform);
9510 // Unpack the group-by variables
9512 for(g=0;g<gb_tbl.size();g++){
9513 data_type *gdt = gb_tbl.get_data_type(g);
9515 if(!gdt->is_temporal()){
9516 // Find the new fields ref'd by this GBvar def.
9517 col_id_set new_cids;
9518 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
9519 // Unpack these values.
9520 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9522 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9523 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9525 // There seems to be no difference between the two
9526 // branches of the IF statement.
9527 data_type *gdt = gb_tbl.get_data_type(g);
9528 if(gdt->is_buffer_type()){
9529 // Create temporary copy.
9530 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9531 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9533 scalarexp_t *gse = gb_tbl.get_def(g);
9534 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9535 g,generate_se_code(gse,schema).c_str());
9544 ret+= "\treturn gbval;\n";
9547 //--------------------------------------------------------
9548 // Create and initialize an aggregate object
9550 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
9551 // Variables for execution of the function.
9552 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9555 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
9556 "_aggrdef *)buffer;\n";
9558 for(a=0;a<aggr_tbl.size();a++){
9559 if(aggr_tbl.is_builtin(a)){
9560 // Create temporaries for buffer return values
9561 data_type *adt = aggr_tbl.get_data_type(a);
9562 if(adt->is_buffer_type()){
9563 sprintf(tmpstr,"aggr_tmp_%d", a);
9564 ret+=adt->make_host_cvar(tmpstr)+";\n";
9569 // Unpack all remaining attributes
9570 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
9571 for(a=0;a<aggr_tbl.size();a++){
9572 sprintf(tmpstr,"aggval->aggr_var%d",a);
9573 string assignto_var = tmpstr;
9574 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
9577 ret += "\treturn aggval;\n";
9580 //--------------------------------------------------------
9581 // update an aggregate object
9583 ret += "void update_aggregate(host_tuple &tup0, "
9584 +generate_functor_name()+"_groupdef &gbval, "+
9585 generate_functor_name()+"_aggrdef &aggval){\n";
9586 // Variables for execution of the function.
9587 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9589 // use of temporaries depends on the aggregate,
9590 // generate them in generate_aggr_update
9593 // Unpack all remaining attributes
9594 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
9595 for(a=0;a<aggr_tbl.size();a++){
9596 sprintf(tmpstr,"aggval.aggr_var%d",a);
9597 string varname = tmpstr;
9598 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
9601 ret += "\treturn;\n";
9604 //---------------------------------------------------
9607 ret += "\tbool flush_needed(){\n";
9608 if(uses_temporal_flush){
9609 ret += "\t\treturn needs_temporal_flush;\n";
9611 ret += "\t\treturn false;\n";
9615 ret += "bool disordered(){return disordered_arrival;}\n";
9617 //---------------------------------------------------
9618 // create output tuple
9619 // Unpack the partial functions ref'd in the where clause,
9620 // select clause. Evaluate the where clause.
9621 // Finally, pack the tuple.
9623 // I need to use special code generation here,
9624 // so I'll leave it in longhand.
9626 ret += "host_tuple create_output_tuple("
9627 +generate_functor_name()+"_groupdef &gbval, "+
9628 generate_functor_name()+"_aggrdef &aggval, bool &failed){\n";
9630 ret += "\thost_tuple tup;\n";
9631 ret += "\tfailed = false;\n";
9632 ret += "\tgs_retval_t retval = 0;\n";
9634 string gbvar = "gbval.gb_var";
9635 string aggvar = "aggval.";
9637 // Create cached temporaries for UDAF return values.
9638 for(a=0;a<aggr_tbl.size();a++){
9639 if(! aggr_tbl.is_builtin(a)){
9640 int afcn_id = aggr_tbl.get_fcn_id(a);
9641 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9642 sprintf(tmpstr,"udaf_ret_%d", a);
9643 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
9648 // First, get the return values from the UDAFS
9649 for(a=0;a<aggr_tbl.size();a++){
9650 if(! aggr_tbl.is_builtin(a)){
9651 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
9652 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9653 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
9657 set<int> hv_sl_pfcns;
9658 for(w=0;w<having.size();w++){
9659 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
9661 for(s=0;s<select_list.size();s++){
9662 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
9665 // clean up the partial fcn results from any previous execution
9666 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
9669 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
9670 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
9671 ret += "\tif(retval){ failed = true; return(tup);}\n";
9674 // Evalaute the HAVING clause
9675 // TODO: this seems to have a ++ operator rather than a + operator.
9676 for(w=0;w<having.size();++w){
9677 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
9680 // Now, compute the size of the tuple.
9682 // Unpack any BUFFER type selections into temporaries
9683 // so that I can compute their size and not have
9684 // to recompute their value during tuple packing.
9685 // I can use regular assignment here because
9686 // these temporaries are non-persistent.
9687 // TODO: should I be using the selvar generation routine?
9689 ret += "//\t\tCompute the size of the tuple.\n";
9690 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
9691 for(s=0;s<select_list.size();s++){
9692 scalarexp_t *se = select_list[s]->se;
9693 data_type *sdt = se->get_data_type();
9694 if(sdt->is_buffer_type() &&
9695 !( (se->get_operator_type() == SE_COLREF) ||
9696 (se->get_operator_type() == SE_AGGR_STAR) ||
9697 (se->get_operator_type() == SE_AGGR_SE) ||
9698 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9699 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9701 sprintf(tmpstr,"selvar_%d",s);
9702 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
9703 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
9707 // The size of the tuple is the size of the tuple struct plus the
9708 // size of the buffers to be copied in.
9710 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
9711 for(s=0;s<select_list.size();s++){
9712 // if(s>0) ret += "+";
9713 scalarexp_t *se = select_list[s]->se;
9714 data_type *sdt = select_list[s]->se->get_data_type();
9715 if(sdt->is_buffer_type()){
9716 if(!( (se->get_operator_type() == SE_COLREF) ||
9717 (se->get_operator_type() == SE_AGGR_STAR) ||
9718 (se->get_operator_type() == SE_AGGR_SE) ||
9719 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9720 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9722 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
9725 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9732 // Allocate tuple data block.
9733 ret += "//\t\tCreate the tuple block.\n";
9734 ret += "\ttup.data = malloc(tup.tuple_size);\n";
9735 ret += "\ttup.heap_resident = true;\n";
9737 // Mark tuple as regular
9738 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
9740 // ret += "\ttup.channel = 0;\n";
9741 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
9742 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
9745 // (Here, offsets are hard-wired. is this a problem?)
9747 ret += "//\t\tPack the fields into the tuple.\n";
9748 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
9749 for(s=0;s<select_list.size();s++){
9750 scalarexp_t *se = select_list[s]->se;
9751 data_type *sdt = se->get_data_type();
9752 if(sdt->is_buffer_type()){
9753 if(!( (se->get_operator_type() == SE_COLREF) ||
9754 (se->get_operator_type() == SE_AGGR_STAR) ||
9755 (se->get_operator_type() == SE_AGGR_SE) ||
9756 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9757 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9759 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
9761 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
9764 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9766 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9770 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9772 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
9777 // Destroy string temporaries
9778 ret += gen_buffer_selvars_dtr(select_list);
9779 // Destroy string return vals of UDAFs
9780 for(a=0;a<aggr_tbl.size();a++){
9781 if(! aggr_tbl.is_builtin(a)){
9782 int afcn_id = aggr_tbl.get_fcn_id(a);
9783 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9784 if(adt->is_buffer_type()){
9785 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
9786 adt->get_hfta_buffer_destroy().c_str(), a );
9793 ret += "\treturn tup;\n";
9797 //-------------------------------------------------------------------
9798 // Temporal update functions
9800 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
9802 for(g=0;g<gb_tbl.size();g++){
9803 data_type *gdt = gb_tbl.get_data_type(g);
9804 if(gdt->is_temporal()){
9809 ret += tgdt->get_host_cvar_type()+" get_last_flushed_gb(){\n";
9810 ret+="\treturn last_flushed_gb"+int_to_string(g)+";\n";
9812 ret += tgdt->get_host_cvar_type()+" get_last_gb(){\n";
9813 ret+="\treturn last_gb"+int_to_string(g)+";\n";
9819 // create a temp status tuple
9820 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
9822 ret += gen_init_temp_status_tuple(this->get_node_name());
9825 // (Here, offsets are hard-wired. is this a problem?)
9827 ret += "//\t\tPack the fields into the tuple.\n";
9828 for(s=0;s<select_list.size();s++){
9829 data_type *sdt = select_list[s]->se->get_data_type();
9830 if(sdt->is_temporal()){
9831 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9834 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
9841 ret += "\treturn 0;\n";
9842 ret += "};};\n\n\n";
9845 //----------------------------------------------------------
9846 // The hash function
9848 ret += "struct "+generate_functor_name()+"_hash_func{\n";
9849 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
9850 "_groupdef &grp) const{\n";
9851 ret += "\t\treturn( (";
9852 for(g=0;g<gb_tbl.size();g++){
9854 data_type *gdt = gb_tbl.get_data_type(g);
9855 if(gdt->use_hashfunc()){
9856 if(gdt->is_buffer_type())
9857 sprintf(tmpstr,"(%s*%s(&(grp.gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9859 sprintf(tmpstr,"(%s*%s(grp.gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9861 sprintf(tmpstr,"(%s*grp.gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
9865 ret += ") >> 32);\n";
9869 //----------------------------------------------------------
9870 // The comparison function
9872 ret += "struct "+generate_functor_name()+"_equal_func{\n";
9873 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef &grp1, "+
9874 "const "+generate_functor_name()+"_groupdef &grp2) const{\n";
9875 ret += "\t\treturn( (";
9877 for(g=0;g<gb_tbl.size();g++){
9878 if(g>0) ret += ") && (";
9879 data_type *gdt = gb_tbl.get_data_type(g);
9880 if(gdt->complex_comparison(gdt)){
9881 if(gdt->is_buffer_type())
9882 sprintf(tmpstr,"(%s(&(grp1.gb_var%d), &(grp2.gb_var%d))==0)",
9883 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
9885 sprintf(tmpstr,"(%s((grp1.gb_var%d), (grp2.gb_var%d))==0)",
9886 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
9888 sprintf(tmpstr,"grp1.gb_var%d == grp2.gb_var%d",g,g);
9900 string sgah_qpn::generate_operator(int i, string params){
9902 if(hfta_disorder < 2){
9903 string op_name = "groupby_operator";
9904 if(hfta_slow_flush>0)
9905 op_name = "groupby_slowflush_operator";
9908 generate_functor_name()+","+
9909 generate_functor_name() + "_groupdef, " +
9910 generate_functor_name() + "_aggrdef, " +
9911 generate_functor_name()+"_hash_func, "+
9912 generate_functor_name()+"_equal_func "
9913 "> *op"+int_to_string(i)+" = new "+op_name+"<"+
9914 generate_functor_name()+","+
9915 generate_functor_name() + "_groupdef, " +
9916 generate_functor_name() + "_aggrdef, " +
9917 generate_functor_name()+"_hash_func, "+
9918 generate_functor_name()+"_equal_func "
9919 ">("+params+", \"" + get_node_name() +
9924 for(int g=0;g<gb_tbl.size();g++){
9925 data_type *gdt = gb_tbl.get_data_type(g);
9926 if(gdt->is_temporal()){
9933 " groupby_operator_oop<" +
9934 generate_functor_name()+","+
9935 generate_functor_name() + "_groupdef, " +
9936 generate_functor_name() + "_aggrdef, " +
9937 generate_functor_name()+"_hash_func, "+
9938 generate_functor_name()+"_equal_func, " +
9939 tgdt->get_host_cvar_type() +
9940 "> *op"+int_to_string(i)+" = new groupby_operator_oop<"+
9941 generate_functor_name()+","+
9942 generate_functor_name() + "_groupdef, " +
9943 generate_functor_name() + "_aggrdef, " +
9944 generate_functor_name()+"_hash_func, "+
9945 generate_functor_name()+"_equal_func, " +
9946 tgdt->get_host_cvar_type() +
9947 ">("+params+", \"" + get_node_name() +
9953 ////////////////////////////////////////////////
9956 ////////////////////////////////////////////
9958 string mrg_qpn::generate_functor_name(){
9959 return("mrg_functor_" + normalize_name(this->get_node_name()));
9962 string mrg_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9967 if(fm.size() != mvars.size()){
9968 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=%lu, mvars.size=%lu\n",fm.size(),mvars.size());
9972 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=mvars.size=%lu\n",fm.size());
9977 // Initialize generate utility globals
9978 segen_gb_tbl = NULL;
9980 string ret = "class " + this->generate_functor_name() + "{\n";
9982 // Private variable:
9983 // 1) Vars for unpacked attrs.
9984 // 2) offsets ofthe unpakced attrs
9985 // 3) last_posted_timestamp
9988 schema->get_type_name(mvars[0]->get_schema_ref(), mvars[0]->get_field()),
9989 schema->get_modifier_list(mvars[0]->get_schema_ref(), mvars[0]->get_field())
9992 schema->get_type_name(mvars[1]->get_schema_ref(), mvars[1]->get_field()),
9993 schema->get_modifier_list(mvars[1]->get_schema_ref(), mvars[1]->get_field())
9996 ret += "private:\n";
9998 // var to save the schema handle
9999 ret += "\tint schema_handle0;\n";
10001 // generate the declaration of all the variables related to
10002 // temp tuples generation
10003 ret += gen_decl_temp_vars();
10005 // unpacked attribute storage, offsets
10006 ret += "//\t\tstorage and offsets of accessed fields.\n";
10007 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10009 sprintf(tmpstr,"unpack_var_%s_%d", mvars[0]->get_field().c_str(), tblref);
10010 ret+="\t"+dta.make_host_cvar(tmpstr)+";\n";
10011 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[0]->get_field().c_str(), tblref);
10012 ret.append(tmpstr);
10014 sprintf(tmpstr,"unpack_var_%s_%d", mvars[1]->get_field().c_str(), tblref);
10015 ret+="\t"+dtb.make_host_cvar(tmpstr)+";\n";
10016 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[1]->get_field().c_str(), tblref);
10017 ret.append(tmpstr);
10019 ret += "//\t\tRemember the last posted timestamp.\n";
10020 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_0")+";\n";
10021 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_1")+";\n";
10022 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10023 ret+="\t"+dta.make_host_cvar("slack")+";\n";
10024 // ret += "\t bool first_execution_0, first_execution_1;\n";
10026 // variables to hold parameters.
10027 ret += "//\tfor query parameters\n";
10028 ret += generate_param_vars(param_tbl);
10030 ret += "public:\n";
10031 //-------------------
10032 // The functor constructor
10033 // pass in a schema handle (e.g. for the 1st input stream),
10034 // use it to determine how to unpack the merge variable.
10035 // ASSUME that both streams have the same layout,
10036 // just duplicate it.
10039 ret += "//\t\tFunctor constructor.\n";
10040 ret += this->generate_functor_name()+"(int schema_handle0){\n";
10042 // var to save the schema handle
10043 ret += "\tthis->schema_handle0 = schema_handle0;\n";
10044 ret += "\ttuple_metadata_offset0=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10045 ret += "\ttuple_metadata_offset1=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10047 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10049 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", mvars[0]->get_field().c_str(), 0,mvars[0]->get_field().c_str());
10050 ret.append(tmpstr);
10051 sprintf(tmpstr,"\tunpack_offset_%s_%d = unpack_offset_%s_%d;\n",mvars[1]->get_field().c_str(), 1,mvars[0]->get_field().c_str(), 0);
10052 ret.append(tmpstr);
10053 // ret+="\tfirst_execution_0 = first_execution_1 = true;\n";
10055 ret+="\tslack = "+generate_se_code(slack,schema)+";\n";
10057 ret+="\tslack = 0;\n";
10059 // Initialize internal state
10060 ret += "\ttemp_tuple_received = false;\n";
10062 // Init last timestamp values to minimum value for their type
10063 if (dta.is_increasing())
10064 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_min_literal() + ";\n";
10066 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_max_literal() + ";\n";
10071 ret += "//\t\tFunctor destructor.\n";
10072 ret += "~"+this->generate_functor_name()+"(){\n";
10074 // Destroy the parameters, if any need to be destroyed
10075 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10080 // no pass-by-handle params.
10081 vector<handle_param_tbl_entry *> param_handle_table;
10083 // Parameter manipulation routines
10084 ret += generate_load_param_block(this->generate_functor_name(),
10085 this->param_tbl,param_handle_table);
10086 ret += generate_delete_param_block(this->generate_functor_name(),
10087 this->param_tbl,param_handle_table);
10089 // Register new parameter block
10091 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10092 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10093 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10098 // -----------------------------------
10101 string unpack_fcna;
10102 if(needs_xform[0]) unpack_fcna = dta.get_hfta_unpack_fcn();
10103 else unpack_fcna = dta.get_hfta_unpack_fcn_noxf();
10104 string unpack_fcnb;
10105 if(needs_xform[1]) unpack_fcnb = dtb.get_hfta_unpack_fcn();
10106 else unpack_fcnb = dtb.get_hfta_unpack_fcn_noxf();
10109 ret+="\tint compare(const host_tuple& tup1, const host_tuple& tup2) const{ \n";
10110 ret+="\t"+dta.make_host_cvar("timestamp1")+";\n";
10111 ret+="\t"+dta.make_host_cvar("timestamp2")+";\n";
10112 ret+="\tgs_int32_t problem;\n";
10113 ret+="\tif (tup1.channel == 0) {\n";
10114 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10116 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10119 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 1);
10121 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 0);
10125 " if (timestamp1 > timestamp2+slack)\n"
10127 " else if (timestamp1 < timestamp2)\n"
10136 " void get_timestamp(const host_tuple& tup0){\n"
10137 " gs_int32_t problem;\n"
10139 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10148 // Compare to temp status.
10150 " int compare_with_temp_status(int channel) {\n"
10151 " // check if tuple is temp status tuple\n"
10153 " if (channel == 0) {\n"
10154 //" if(first_execution_0) return 1;\n"
10155 " if (timestamp == last_posted_timestamp_0)\n"
10157 " else if (timestamp < last_posted_timestamp_0)\n"
10162 //" if(first_execution_1) return 1;\n"
10163 " if (timestamp == last_posted_timestamp_1)\n"
10165 " else if (timestamp < last_posted_timestamp_1)\n"
10174 " int compare_stored_with_temp_status(const host_tuple& tup0, int channel)/* const*/ {\n"
10176 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10177 ret+="\tgs_int32_t problem;\n";
10179 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10181 ret+="\tif (channel == 0) {\n";
10182 // ret+="\tif(first_execution_0) return 1;\n";
10184 " if (l_timestamp == last_posted_timestamp_0)\n"
10186 " else if (l_timestamp < last_posted_timestamp_0)\n"
10191 // ret+="\tif(first_execution_1) return 1;\n";
10193 " if (l_timestamp == last_posted_timestamp_1)\n"
10195 " else if (l_timestamp < last_posted_timestamp_1)\n"
10203 // update temp status.
10205 " int update_temp_status(const host_tuple& tup) {\n"
10206 " if (tup.channel == 0) {\n"
10207 " last_posted_timestamp_0=timestamp;\n"
10208 //" first_execution_0 = false;\n"
10210 " last_posted_timestamp_1=timestamp;\n"
10211 //" first_execution_1 = false;\n"
10217 " int update_stored_temp_status(const host_tuple& tup, int channel) {\n"
10219 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10220 ret+="\tgs_int32_t problem;\n";
10221 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10224 " if (tup.channel == 0) {\n"
10225 " last_posted_timestamp_0=l_timestamp;\n"
10226 //" first_execution_0 = false;\n"
10228 " last_posted_timestamp_1=l_timestamp;\n"
10229 //" first_execution_1 = false;\n"
10235 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10236 ret+="\tgs_int32_t problem;\n";
10237 ret+="\tif (tup.channel == 0) {\n";
10238 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10241 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10244 ret+="\tif (tup.channel == 0) {\n";
10245 ret+="\tlast_posted_timestamp_0=timestamp;\n";
10246 ret +="\tfirst_execution_0 = false;\n";
10248 ret+="\tlast_posted_timestamp_1=timestamp;\n";
10249 ret +="\tfirst_execution_1 = false;\n";
10256 // update temp status modulo slack.
10257 ret+="\tint update_temp_status_by_slack(const host_tuple& tup, int channel) {\n";
10259 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10260 ret+="\tgs_int32_t problem;\n";
10261 ret+="\tif (tup.channel == 0) {\n";
10262 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10265 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10269 " if (channel == 0) {\n"
10270 " if(first_execution_0){\n"
10271 " last_posted_timestamp_0=timestamp - slack;\n"
10272 " first_execution_0 = false;\n"
10274 " if(last_posted_timestamp_0 < timestamp-slack)\n"
10275 " last_posted_timestamp_0 = timestamp-slack;\n"
10278 " if(first_execution_1){\n"
10279 " last_posted_timestamp_1=timestamp - slack;\n"
10280 " first_execution_1 = false;\n"
10282 " if(last_posted_timestamp_1 < timestamp-slack)\n"
10283 " last_posted_timestamp_1 = timestamp-slack;\n"
10297 "bool temp_status_received(const host_tuple& tup0){\n"
10298 " return ftaschema_is_temporal_tuple_offset(tuple_metadata_offset0, tup0.data);\n"
10301 //"bool temp_status_received(){return temp_tuple_received;};\n\n";
10304 // create a temp status tuple
10305 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
10307 ret += gen_init_temp_status_tuple(this->get_node_name());
10310 ret += "//\t\tPack the fields into the tuple.\n";
10312 string fld_name = mvars[0]->get_field();
10313 int idx = table_layout->get_field_idx(fld_name);
10314 field_entry* fld = table_layout->get_field(idx);
10315 data_type dt(fld->get_type());
10317 // if (needs_xform[0] && needs_xform[1] && dt.needs_hn_translation())
10318 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s((last_posted_timestamp_0 < last_posted_timestamp_1) ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx, dt.hton_translation().c_str());
10320 sprintf(tmpstr,"\ttuple->tuple_var%d = (last_posted_timestamp_0 < last_posted_timestamp_1 ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx);
10324 ret += "\treturn 0;\n";
10327 // Transform tuple (before output)
10330 ret += "void xform_tuple(host_tuple &tup){\n";
10331 if((needs_xform[0] && !needs_xform[1]) || (needs_xform[1] && !needs_xform[0])){
10332 ret += "\tstruct "+generate_tuple_name(this->get_node_name())+" *tuple = ("+
10333 generate_tuple_name(this->get_node_name())+" *)(tup.data);\n";
10335 vector<field_entry *> flds = table_layout->get_fields();
10337 ret+="\tif(tup.channel == 0){\n";
10338 if(needs_xform[0] && !needs_xform[1]){
10340 for(f=0;f<flds.size();f++){
10342 data_type dt(flds[f]->get_type());
10343 if(dt.get_type() == v_str_t){
10344 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10346 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10348 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10351 if(dt.needs_hn_translation()){
10352 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10353 // f, dt.hton_translation().c_str(), f);
10359 ret += "\t\treturn;\n";
10361 ret.append("\t}\n");
10364 ret+="\tif(tup.channel == 1){\n";
10365 if(needs_xform[1] && !needs_xform[0]){
10367 for(f=0;f<flds.size();f++){
10369 data_type dt(flds[f]->get_type());
10370 if(dt.get_type() == v_str_t){
10371 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10373 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10375 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10378 if(dt.needs_hn_translation()){
10379 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10380 // f, dt.hton_translation().c_str(), f);
10386 ret += "\t\treturn;\n";
10388 ret.append("\t}\n");
10391 ret.append("};\n\n");
10393 // print_warnings() : tell the functor if the user wants to print warnings.
10394 ret += "bool print_warnings(){\n";
10395 if(definitions.count("print_warnings") && (
10396 definitions["print_warnings"] == "yes" ||
10397 definitions["print_warnings"] == "Yes" ||
10398 definitions["print_warnings"] == "YES" )) {
10399 ret += "return true;\n";
10401 ret += "return false;\n";
10403 ret.append("};\n\n");
10406 // Done with methods.
10413 string mrg_qpn::generate_operator(int i, string params){
10417 " merge_operator<" +
10418 generate_functor_name()+
10419 "> *op"+int_to_string(i)+" = new merge_operator<"+
10420 generate_functor_name()+
10421 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10425 " merge_operator_oop<" +
10426 generate_functor_name()+
10427 "> *op"+int_to_string(i)+" = new merge_operator_oop<"+
10428 generate_functor_name()+
10429 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10433 ////////////////////////////////////////////////
10434 /// WATCHLIST_TBL operator
10435 /// WATCHLIST_TBL functor
10436 ////////////////////////////////////////////
10438 string watch_tbl_qpn::generate_functor_name(){
10439 return("watch_tbl_functor_" + normalize_name(this->get_node_name()));
10442 string watch_tbl_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10444 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10447 string watch_tbl_qpn::generate_operator(int i, string params){
10448 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10451 /////////////////////////////////////////////////////////
10452 ////// JOIN_EQ_HASH functor
10455 string join_eq_hash_qpn::generate_functor_name(){
10456 return("join_eq_hash_functor_" + normalize_name(this->get_node_name()));
10459 string join_eq_hash_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10461 vector<data_type *> hashkey_dt; // data types in the hash key
10462 vector<data_type *> temporal_dt; // data types in the temporal key
10463 map<string,scalarexp_t *> l_equiv, r_equiv; // field equivalences
10464 set<int> pfcn_refs;
10465 col_id_set new_cids, local_cids;
10467 //--------------------------------
10470 string plus_op = "+";
10472 //--------------------------------
10473 // key definition class
10474 string ret = "class " + generate_functor_name() + "_keydef{\n";
10475 ret += "public:\n";
10476 // Collect attributes from hash join predicates.
10477 // ASSUME equality predicate.
10478 // Use the upwardly compatible data type
10479 // (infer from '+' operator if possible, else use left type)
10480 for(p=0;p<this->hash_eq.size();++p){
10481 scalarexp_t *lse = hash_eq[p]->pr->get_left_se();
10482 scalarexp_t *rse = hash_eq[p]->pr->get_right_se();
10483 data_type *hdt = new data_type(
10484 lse->get_data_type(), rse->get_data_type(), plus_op );
10485 if(hdt->get_type() == undefined_t){
10486 hashkey_dt.push_back(lse->get_data_type()->duplicate());
10489 hashkey_dt.push_back(hdt);
10491 sprintf(tmpstr,"hashkey_var%d",p);
10492 ret+="\t"+hashkey_dt[p]->make_host_cvar(tmpstr)+";\n";
10494 // find equivalences
10495 // NOTE: this code needs to be synched with the temporality
10496 // checking done at join_eq_hash_qpn::get_fields
10497 if(lse->get_operator_type()==SE_COLREF){
10498 l_equiv[lse->get_colref()->get_field()] = rse;
10500 if(rse->get_operator_type()==SE_COLREF){
10501 r_equiv[rse->get_colref()->get_field()] = lse;
10504 ret += "\tbool touched;\n";
10507 ret += "\t"+generate_functor_name() + "_keydef(){touched=false;};\n";
10509 ret += "\t~"+ generate_functor_name() + "_keydef(){\n";
10510 for(p=0;p<hashkey_dt.size();p++){
10511 if(hashkey_dt[p]->is_buffer_type()){
10512 sprintf(tmpstr,"\t\t%s(&hashkey_var%d);\n",
10513 hashkey_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10518 ret+="\tvoid touch(){touched = true;};\n";
10519 ret+="\tbool is_touched(){return touched;};\n";
10523 //--------------------------------
10524 // temporal equality definition class
10525 ret += "class " + generate_functor_name() + "_tempeqdef{\n";
10526 ret += "public:\n";
10527 // Collect attributes from hash join predicates.
10528 // ASSUME equality predicate.
10529 // Use the upwardly compatible date type
10530 // (infer from '+' operator if possible, else use left type)
10531 for(p=0;p<this->temporal_eq.size();++p){
10532 scalarexp_t *lse = temporal_eq[p]->pr->get_left_se();
10533 scalarexp_t *rse = temporal_eq[p]->pr->get_right_se();
10534 data_type *hdt = new data_type(
10535 lse->get_data_type(), rse->get_data_type(), plus_op );
10536 if(hdt->get_type() == undefined_t){
10537 temporal_dt.push_back(hash_eq[p]->pr->get_left_se()->get_data_type()->duplicate());
10540 temporal_dt.push_back(hdt);
10542 sprintf(tmpstr,"tempeq_var%d",p);
10543 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
10544 // find equivalences
10545 if(lse->get_operator_type()==SE_COLREF){
10546 l_equiv[lse->get_colref()->get_field()] = rse;
10548 if(rse->get_operator_type()==SE_COLREF){
10549 r_equiv[rse->get_colref()->get_field()] = lse;
10554 ret += "\t"+generate_functor_name() + "_tempeqdef(){};\n";
10556 ret += "\t~"+ generate_functor_name() + "_tempeqdef(){\n";
10557 for(p=0;p<temporal_dt.size();p++){
10558 if(temporal_dt[p]->is_buffer_type()){
10559 sprintf(tmpstr,"\t\t%s(&tempeq_var%d);\n",
10560 temporal_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10568 //--------------------------------
10569 // temporal eq, hash join functor class
10570 ret += "class " + this->generate_functor_name() + "{\n";
10572 // Find variables referenced in this query node.
10574 col_id_set cid_set;
10575 col_id_set::iterator csi;
10577 for(p=0;p<where.size();++p)
10578 gather_pr_col_ids(where[p]->pr,cid_set,NULL);
10579 for(s=0;s<select_list.size();s++)
10580 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
10582 // Private variables : store the state of the functor.
10583 // 1) variables for unpacked attributes
10584 // 2) offsets of the upacked attributes
10585 // 3) storage of partial functions
10586 // 4) storage of complex literals (i.e., require a constructor)
10588 ret += "private:\n";
10590 // var to save the schema handles
10591 ret += "\tint schema_handle0;\n";
10592 ret += "\tint schema_handle1;\n";
10594 // generate the declaration of all the variables related to
10595 // temp tuples generation
10596 ret += gen_decl_temp_vars();
10597 // tuple metadata offsets
10598 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10600 // unpacked attribute storage, offsets
10601 ret += "//\t\tstorage and offsets of accessed fields.\n";
10602 ret += generate_access_vars(cid_set, schema);
10605 // Variables to store results of partial functions.
10606 // WARNING find_partial_functions modifies the SE
10607 // (it marks the partial function id).
10608 ret += "//\t\tParital function result storage\n";
10609 vector<scalarexp_t *> partial_fcns;
10610 vector<int> fcn_ref_cnt;
10611 vector<bool> is_partial_fcn;
10612 for(s=0;s<select_list.size();s++){
10613 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
10615 for(p=0;p<where.size();p++){
10616 find_partial_fcns_pr(where[p]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
10618 if(partial_fcns.size()>0){
10619 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
10620 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10623 // Complex literals (i.e., they need constructors)
10624 ret += "//\t\tComplex literal storage.\n";
10625 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10626 ret += generate_complex_lit_vars(complex_literals);
10627 // We need the following to handle strings in outer joins.
10628 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10629 ret += "\tstruct vstring EmptyString;\n";
10630 ret += "\tstruct hfta_ipv6_str EmptyIp6;\n";
10632 // Pass-by-handle parameters
10633 ret += "//\t\tPass-by-handle storage.\n";
10634 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10635 ret += generate_pass_by_handle_vars(param_handle_table);
10638 // variables to hold parameters.
10639 ret += "//\tfor query parameters\n";
10640 ret += generate_param_vars(param_tbl);
10643 ret += "\npublic:\n";
10644 //-------------------
10645 // The functor constructor
10646 // pass in the schema handle.
10647 // 1) make assignments to the unpack offset variables
10648 // 2) initialize the complex literals
10650 ret += "//\t\tFunctor constructor.\n";
10651 ret += this->generate_functor_name()+"(int schema_handle0, int schema_handle1){\n";
10653 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
10654 ret += "\t\tthis->schema_handle1 = schema_handle1;\n";
10655 // metadata offsets
10656 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10657 ret += "\ttuple_metadata_offset1 = ftaschema_get_tuple_metadata_offset(schema_handle1);\n";
10660 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10661 ret += gen_access_var_init(cid_set);
10663 // complex literals
10664 ret += "//\t\tInitialize complex literals.\n";
10665 ret += gen_complex_lit_init(complex_literals);
10666 // Initialize EmptyString to the ... empty string
10667 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10668 literal_t mtstr_lit("");
10669 ret += "\t" + mtstr_lit.to_hfta_C_code("&EmptyString")+";\n";
10670 literal_t mip6_lit("0:0:0:0:0:0:0:0",LITERAL_IPV6);
10671 ret += "\t" + mip6_lit.to_hfta_C_code("&EmptyIp6")+";\n";
10673 // Initialize partial function results so they can be safely GC'd
10674 ret += gen_partial_fcn_init(partial_fcns);
10676 // Initialize non-query-parameter parameter handles
10677 ret += gen_pass_by_handle_init(param_handle_table);
10679 // Init temporal attributes referenced in select list
10680 ret += gen_init_temp_vars(schema, select_list, NULL);
10687 //-------------------
10688 // Functor destructor
10689 ret += "//\t\tFunctor destructor.\n";
10690 ret += "~"+this->generate_functor_name()+"(){\n";
10692 // clean up buffer type complex literals
10693 ret += gen_complex_lit_dtr(complex_literals);
10695 // Deregister the pass-by-handle parameters
10696 ret += "/* register and de-register the pass-by-handle parameters */\n";
10697 ret += gen_pass_by_handle_dtr(param_handle_table);
10699 // clean up partial function results.
10700 ret += "/* clean up partial function storage */\n";
10701 ret += gen_partial_fcn_dtr(partial_fcns);
10703 // Destroy the parameters, if any need to be destroyed
10704 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10709 //-------------------
10710 // Parameter manipulation routines
10711 ret += generate_load_param_block(this->generate_functor_name(),
10712 this->param_tbl,param_handle_table);
10713 ret += generate_delete_param_block(this->generate_functor_name(),
10714 this->param_tbl,param_handle_table);
10716 //-------------------
10717 // Register new parameter block
10719 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10720 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10721 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10726 //-------------------
10727 // The create_key method.
10728 // Perform heap allocation.
10729 // ASSUME : the LHS of the preds reference channel 0 attributes
10730 // NOTE : it may fail if a partial function fails.
10732 ret += this->generate_functor_name()+"_keydef *create_key(host_tuple &tup, bool &failed){\n";
10733 // Variables for execution of the function.
10734 ret+="\t"+this->generate_functor_name()+"_keydef *retval = NULL;\n";
10735 ret+="\tgs_int32_t problem = 0;\n";
10737 // Assume unsuccessful completion
10738 ret+= "\tfailed = true;\n";
10740 // Switch the processing based on the channel
10741 ret+="\tif(tup.channel == 0){\n";
10742 ret+="// ------------ processing for channel 0\n";
10743 ret+="\t\thost_tuple &tup0 = tup;\n";
10744 // Gather partial fcns and colids ref'd by this branch
10746 new_cids.clear(); local_cids.clear();
10747 for(p=0;p<hash_eq.size();p++){
10748 collect_partial_fcns(hash_eq[p]->pr->get_left_se(), pfcn_refs);
10749 gather_se_col_ids(hash_eq[p]->pr->get_left_se(),local_cids,NULL);
10752 // Start by cleaning up partial function results
10753 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10754 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10756 // Evaluate the partial functions
10757 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10758 new_cids, NULL, "NULL", needs_xform);
10759 // test passed -- unpack remaining cids.
10760 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10762 // Alloc and load a key object
10763 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10764 for(p=0;p<hash_eq.size();p++){
10765 data_type *hdt = hash_eq[p]->pr->get_left_se()->get_data_type();
10766 if(hdt->is_buffer_type()){
10767 string vname = "tmp_keyvar"+int_to_string(p);
10768 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_left_se(),schema)+";\n";
10769 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10771 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10772 p,generate_se_code(hash_eq[p]->pr->get_left_se(),schema).c_str() );
10776 ret += "\t}else{\n";
10778 ret+="// ------------ processing for channel 1\n";
10779 ret+="\t\thost_tuple &tup1 = tup;\n";
10780 // Gather partial fcns and colids ref'd by this branch
10782 new_cids.clear(); local_cids.clear();
10783 for(p=0;p<hash_eq.size();p++){
10784 collect_partial_fcns(hash_eq[p]->pr->get_right_se(), pfcn_refs);
10785 gather_se_col_ids(hash_eq[p]->pr->get_right_se(),local_cids,NULL);
10788 // Start by cleaning up partial function results
10789 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10790 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10792 // Evaluate the partial functions
10793 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10794 new_cids, NULL, "NULL", needs_xform);
10796 // test passed -- unpack remaining cids.
10797 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10799 // Alloc and load a key object
10800 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10801 for(p=0;p<hash_eq.size();p++){
10802 data_type *hdt = hash_eq[p]->pr->get_right_se()->get_data_type();
10803 if(hdt->is_buffer_type()){
10804 string vname = "tmp_keyvar"+int_to_string(p);
10805 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_right_se(),schema)+";\n";
10806 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10808 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10809 p,generate_se_code(hash_eq[p]->pr->get_right_se(),schema).c_str() );
10815 ret += "\tfailed = false;\n";
10816 ret += "\t return retval;\n";
10820 //-------------------
10821 // The load_ts method.
10822 // load into an allocated buffer.
10823 // ASSUME : the LHS of the preds reference channel 0 attributes
10824 // NOTE : it may fail if a partial function fails.
10825 // NOTE : cann't handle buffer attributes
10827 ret += "bool load_ts_from_tup("+this->generate_functor_name()+"_tempeqdef *ts, host_tuple &tup){\n";
10828 // Variables for execution of the function.
10829 ret+="\tgs_int32_t problem = 0;\n";
10831 // Switch the processing based on the channel
10832 ret+="\tif(tup.channel == 0){\n";
10833 ret+="// ------------ processing for channel 0\n";
10834 ret+="\t\thost_tuple &tup0 = tup;\n";
10836 // Gather partial fcns and colids ref'd by this branch
10838 new_cids.clear(); local_cids.clear();
10839 for(p=0;p<temporal_eq.size();p++){
10840 collect_partial_fcns(temporal_eq[p]->pr->get_left_se(), pfcn_refs);
10841 gather_se_col_ids(temporal_eq[p]->pr->get_left_se(),local_cids,NULL);
10844 // Start by cleaning up partial function results
10845 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10846 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10848 // Evaluate the partial functions
10849 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10850 new_cids, NULL, "false", needs_xform);
10852 // test passed -- unpack remaining cids.
10853 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10855 // load the temporal key object
10856 for(p=0;p<temporal_eq.size();p++){
10857 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10858 p,generate_se_code(temporal_eq[p]->pr->get_left_se(),schema).c_str() );
10862 ret += "\t}else{\n";
10864 ret+="// ------------ processing for channel 1\n";
10865 ret+="\t\thost_tuple &tup1 = tup;\n";
10867 // Gather partial fcns and colids ref'd by this branch
10869 new_cids.clear(); local_cids.clear();
10870 for(p=0;p<temporal_eq.size();p++){
10871 collect_partial_fcns(temporal_eq[p]->pr->get_right_se(), pfcn_refs);
10872 gather_se_col_ids(temporal_eq[p]->pr->get_right_se(),local_cids,NULL);
10875 // Start by cleaning up partial function results
10876 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10877 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10879 // Evaluate the partial functions
10880 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10881 new_cids, NULL, "false", needs_xform);
10883 // test passed -- unpack remaining cids.
10884 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10886 // load the key object
10887 for(p=0;p<temporal_eq.size();p++){
10888 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10889 p,generate_se_code(temporal_eq[p]->pr->get_right_se(),schema).c_str() );
10895 ret += "\t return true;\n";
10899 // ------------------------------
10901 // (i.e make a copy)
10903 ret += "bool load_ts_from_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10904 for(p=0;p<temporal_eq.size();p++){
10905 sprintf(tmpstr,"\tlts->tempeq_var%d = rts->tempeq_var%d;\n",p,p);
10910 // -------------------------------------
10911 // compare_ts_to_ts
10912 // There should be only one variable to compare.
10913 // If there is more, assume an arbitrary lexicographic order.
10915 ret += "int compare_ts_with_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10916 for(p=0;p<temporal_eq.size();p++){
10917 sprintf(tmpstr,"\tif(lts->tempeq_var%d < rts->tempeq_var%d) return(-1);\n",p,p);
10919 sprintf(tmpstr,"\tif(lts->tempeq_var%d > rts->tempeq_var%d) return(1);\n",p,p);
10922 ret += "\treturn(0);\n";
10925 // ------------------------------------------
10927 // apply the prefilter
10929 ret += "bool apply_prefilter(host_tuple &tup){\n";
10931 // Variables for this procedure
10932 ret+="\tgs_int32_t problem = 0;\n";
10933 ret+="\tgs_retval_t retval;\n";
10935 // Switch the processing based on the channel
10936 ret+="\tif(tup.channel == 0){\n";
10937 ret+="// ------------ processing for channel 0\n";
10938 ret+="\t\thost_tuple &tup0 = tup;\n";
10939 // Gather partial fcns and colids ref'd by this branch
10941 new_cids.clear(); local_cids.clear();
10942 for(p=0;p<prefilter[0].size();p++){
10943 collect_partial_fcns_pr((prefilter[0])[p]->pr, pfcn_refs);
10946 // Start by cleaning up partial function results
10947 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10948 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10950 for(p=0;p<(prefilter[0]).size();++p){
10951 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10953 // Find the set of variables accessed in this CNF elem,
10954 // but in no previous element.
10955 col_id_set new_pr_cids;
10956 get_new_pred_cids((prefilter[0])[p]->pr,local_cids,new_pr_cids, NULL);
10957 // Unpack these values.
10958 ret += gen_unpack_cids(schema, new_pr_cids, "false", needs_xform);
10959 // Find partial fcns ref'd in this cnf element
10960 set<int> pr_pfcn_refs;
10961 collect_partial_fcns_pr((prefilter[0])[p]->pr, pr_pfcn_refs);
10962 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10964 ret += "\t\tif( !("+generate_predicate_code((prefilter[0])[p]->pr,schema)+") ) return(false);\n";
10966 ret += "\t}else{\n";
10967 ret+="// ------------ processing for channel 1\n";
10968 ret+="\t\thost_tuple &tup1 = tup;\n";
10969 // Gather partial fcns and colids ref'd by this branch
10971 new_cids.clear(); local_cids.clear();
10972 for(p=0;p<prefilter[1].size();p++){
10973 collect_partial_fcns_pr((prefilter[1])[p]->pr, pfcn_refs);
10976 // Start by cleaning up partial function results
10977 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10978 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10980 for(p=0;p<(prefilter[1]).size();++p){
10981 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10983 // Find the set of variables accessed in this CNF elem,
10984 // but in no previous element.
10985 col_id_set pr_new_cids;
10986 get_new_pred_cids((prefilter[1])[p]->pr,local_cids, pr_new_cids, NULL);
10987 // Unpack these values.
10988 ret += gen_unpack_cids(schema, pr_new_cids, "false", needs_xform);
10989 // Find partial fcns ref'd in this cnf element
10990 set<int> pr_pfcn_refs;
10991 collect_partial_fcns_pr((prefilter[1])[p]->pr, pr_pfcn_refs);
10992 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10994 ret += "\t\tif( !("+generate_predicate_code((prefilter[1])[p]->pr,schema)+ ") ) return(false);\n";
10998 ret+="\treturn true;\n";
11002 // -------------------------------------
11003 // create_output_tuple
11004 // If the postfilter on the pair of tuples passes,
11005 // create an output tuple from the combined information.
11006 // (Plus, outer join processing)
11008 ret += "host_tuple create_output_tuple(const host_tuple &tup0, const host_tuple &tup1, bool &failed){\n";
11010 ret += "\thost_tuple tup;\n";
11011 ret += "\tfailed = true;\n";
11012 ret += "\tgs_retval_t retval = 0;\n";
11013 ret += "\tgs_int32_t problem = 0;\n";
11015 // Start by cleaning up partial function results
11016 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11018 new_cids.clear(); local_cids.clear();
11019 for(p=0;p<postfilter.size();p++){
11020 collect_partial_fcns_pr(postfilter[p]->pr, pfcn_refs);
11022 for(s=0;s<select_list.size();s++){
11023 collect_partial_fcns(select_list[s]->se, pfcn_refs);
11025 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
11028 ret+="\tif(tup0.data && tup1.data){\n";
11029 // Evaluate the postfilter
11030 new_cids.clear(); local_cids.clear();
11031 for(p=0;p<postfilter.size();p++){
11032 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
11034 // Find the set of variables accessed in this CNF elem,
11035 // but in no previous element.
11036 col_id_set pr_new_cids;
11037 get_new_pred_cids(postfilter[p]->pr,local_cids, pr_new_cids, NULL);
11038 // Unpack these values.
11039 ret += gen_unpack_cids(schema, pr_new_cids, "tup", needs_xform);
11040 // Find partial fcns ref'd in this cnf element
11041 set<int> pr_pfcn_refs;
11042 collect_partial_fcns_pr(postfilter[p]->pr, pr_pfcn_refs);
11043 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"tup");
11045 ret += "\t\tif( !("+generate_predicate_code(postfilter[p]->pr,schema)+ ") ) return(tup);\n";
11049 // postfilter passed, evaluate partial functions for select list
11052 col_id_set se_cids;
11053 for(s=0;s<select_list.size();s++){
11054 collect_partial_fcns(select_list[s]->se, sl_pfcns);
11057 if(sl_pfcns.size() > 0)
11058 ret += "//\t\tUnpack remaining partial fcns.\n";
11059 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
11060 local_cids, NULL, "tup", needs_xform);
11062 // Unpack remaining fields
11063 ret += "//\t\tunpack any remaining fields from the input tuples.\n";
11064 for(s=0;s<select_list.size();s++)
11065 get_new_se_cids(select_list[s]->se, local_cids,se_cids,NULL);
11066 ret += gen_unpack_cids(schema, se_cids,"tup", needs_xform);
11069 // Deal with outer join stuff
11070 col_id_set l_cids, r_cids;
11071 col_id_set::iterator ocsi;
11072 for(ocsi=local_cids.begin();ocsi!=local_cids.end();++ocsi){
11073 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
11074 else r_cids.insert((*ocsi));
11076 for(ocsi=se_cids.begin();ocsi!=se_cids.end();++ocsi){
11077 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
11078 else r_cids.insert((*ocsi));
11081 ret += "\t}else if(tup0.data){\n";
11082 string unpack_null = ""; col_id_set extra_cids;
11083 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
11084 string field = (*ocsi).field;
11085 if(r_equiv.count(field)){
11086 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
11087 get_new_se_cids(r_equiv[field],l_cids,new_cids,NULL);
11089 int schref = (*ocsi).schema_ref;
11090 data_type dt(schema->get_type_name(schref,field));
11091 literal_t empty_lit(dt.type_indicator());
11092 if(empty_lit.is_cpx_lit()){
11093 // sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
11094 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11095 // NB : works for string type only
11096 // NNB: installed fix for ipv6, more of this should be pushed
11097 // into the literal_t code.
11098 unpack_null+="\tunpack_var_"+field+"_1= "+empty_lit.hfta_empty_literal_name()+";\n";
11100 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
11104 ret += gen_unpack_cids(schema, l_cids, "tup", needs_xform);
11105 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11106 ret += unpack_null;
11107 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11110 unpack_null = ""; extra_cids.clear();
11111 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
11112 string field = (*ocsi).field;
11113 if(l_equiv.count(field)){
11114 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
11115 get_new_se_cids(l_equiv[field],r_cids,new_cids,NULL);
11117 int schref = (*ocsi).schema_ref;
11118 data_type dt(schema->get_type_name(schref,field));
11119 literal_t empty_lit(dt.type_indicator());
11120 if(empty_lit.is_cpx_lit()){
11121 // sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
11122 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11123 // NB : works for string type only
11124 // NNB: installed fix for ipv6, more of this should be pushed
11125 // into the literal_t code.
11126 unpack_null+="\tunpack_var_"+field+"_0= "+empty_lit.hfta_empty_literal_name()+";\n";
11128 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
11132 ret += gen_unpack_cids(schema, r_cids, "tup", needs_xform);
11133 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11134 ret += unpack_null;
11135 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11140 // Unpack any BUFFER type selections into temporaries
11141 // so that I can compute their size and not have
11142 // to recompute their value during tuple packing.
11143 // I can use regular assignment here because
11144 // these temporaries are non-persistent.
11146 ret += "//\t\tCompute the size of the tuple.\n";
11147 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
11149 // Unpack all buffer type selections, to be able to compute their size
11150 ret += gen_buffer_selvars(schema, select_list);
11152 // The size of the tuple is the size of the tuple struct plus the
11153 // size of the buffers to be copied in.
11155 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
11156 ret += gen_buffer_selvars_size(select_list,schema);
11159 // Allocate tuple data block.
11160 ret += "//\t\tCreate the tuple block.\n";
11161 ret += "\ttup.data = malloc(tup.tuple_size);\n";
11162 ret += "\ttup.heap_resident = true;\n";
11163 // ret += "\ttup.channel = 0;\n";
11165 // Mark tuple as regular
11166 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
11169 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
11170 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
11173 // (Here, offsets are hard-wired. is this a problem?)
11175 ret += "//\t\tPack the fields into the tuple.\n";
11176 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
11178 // Delete string temporaries
11179 ret += gen_buffer_selvars_dtr(select_list);
11181 ret += "\tfailed = false;\n";
11182 ret += "\treturn tup;\n";
11187 //-----------------------------
11188 // Method for checking whether tuple is temporal
11190 ret += "bool temp_status_received(host_tuple &tup){\n";
11192 // Switch the processing based on the channel
11193 ret+="\tif(tup.channel == 0){\n";
11194 ret+="\t\thost_tuple &tup0 = tup;\n";
11195 ret += gen_temp_tuple_check(this->node_name, 0);
11196 ret += "\t}else{\n";
11197 ret+="\t\thost_tuple &tup1 = tup;\n";
11198 ret += gen_temp_tuple_check(this->node_name, 1);
11200 ret += "\treturn temp_tuple_received;\n};\n\n";
11203 //-------------------------------------------------------------------
11204 // Temporal update functions
11207 // create a temp status tuple
11208 ret += "int create_temp_status_tuple("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts, host_tuple& result) {\n\n";
11210 ret += "\tgs_retval_t retval = 0;\n";
11211 ret += "\tgs_int32_t problem = 0;\n";
11213 for(p=0;p<temporal_dt.size();p++){
11214 sprintf(tmpstr,"lhs_var");
11215 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
11216 sprintf(tmpstr,"rhs_var");
11217 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
11220 ret += "\tif(lts!=NULL){\n";
11221 for(p=0;p<temporal_dt.size();p++){
11222 ret += "\t\tlhs_var = lts->tempeq_var"+to_string(p)+";\n";
11224 ret += "\t}else{\n";
11225 for(p=0;p<temporal_dt.size();p++){
11226 ret += "\t\tlhs_var = 0;\n";
11230 ret += "\tif(rts!=NULL){\n";
11231 for(p=0;p<temporal_dt.size();p++){
11232 ret += "\t\trhs_var = rts->tempeq_var"+to_string(p)+";\n";
11234 ret += "\t}else{\n";
11235 for(p=0;p<temporal_dt.size();p++){
11236 ret += "\t\trhs_var = 0;\n";
11240 ret += gen_init_temp_status_tuple(this->get_node_name());
11245 // This is checked in the query analyzer so I think its safe,
11246 // But a lot of older code has complex code to propagate multiple
11248 for(s=0;s<select_list.size();s++){
11249 scalarexp_t *se = select_list[s]->se;
11250 data_type *sdt = se->get_data_type();
11251 if(sdt->is_temporal()){
11252 string target = "\ttuple->tuple_var"+to_string(s)+" = ";
11253 if(from[0]->get_property()==0 && from[1]->get_property()==0){ // INNER
11254 ret += target+"(lhs_var>rhs_var ? lhs_var : rhs_var); // INNER\n";
11256 if(from[0]->get_property()!=0 && from[1]->get_property()==0){ // LEFT
11257 ret += target+"lhs_var; // LEFT\n";
11258 // ret += target+"rhs_var; // LEFT\n";
11260 if(from[0]->get_property()==0 && from[1]->get_property()!=0){ // RIGHT
11261 ret += target+"rhs_var; // RIGHT\n";
11262 // ret += target+"lhs_var; // RIGHT\n";
11264 if(from[0]->get_property()!=0 && from[1]->get_property()!=0){ // OUTER
11265 ret += target+"(lhs_var<rhs_var ? lhs_var : rhs_var); // OUTER\n";
11271 ret += "\treturn 0;\n";
11277 //----------------------------------------------------------
11278 // The hash function
11280 ret += "struct "+generate_functor_name()+"_hash_func{\n";
11281 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
11282 "_keydef *key) const{\n";
11283 ret += "\t\treturn( (";
11284 if(hashkey_dt.size() > 0){
11285 for(p=0;p<hashkey_dt.size();p++){
11286 if(p>0) ret += "^";
11287 if(hashkey_dt[p]->use_hashfunc()){
11288 // sprintf(tmpstr,"%s(&(key->hashkey_var%d))",hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11289 if(hashkey_dt[p]->is_buffer_type())
11290 sprintf(tmpstr,"(%s*%s(&(key->hashkey_var%d)))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11292 sprintf(tmpstr,"(%s*%s(key->hashkey_var%d))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11294 sprintf(tmpstr,"(%s*key->hashkey_var%d)",hash_nums[p%NRANDS].c_str(),p);
11301 ret += ") >> 32);\n";
11305 //----------------------------------------------------------
11306 // The comparison function
11308 ret += "struct "+generate_functor_name()+"_equal_func{\n";
11309 ret += "\tbool operator()(const "+generate_functor_name()+"_keydef *key1, "+
11310 generate_functor_name()+"_keydef *key2) const{\n";
11311 ret += "\t\treturn( (";
11312 if(hashkey_dt.size() > 0){
11313 for(p=0;p<hashkey_dt.size();p++){
11314 if(p>0) ret += ") && (";
11315 if(hashkey_dt[p]->complex_comparison(hashkey_dt[p])){
11316 if(hashkey_dt[p]->is_buffer_type())
11317 sprintf(tmpstr,"(%s(&(key1->hashkey_var%d), &(key2->hashkey_var%d))==0)",
11318 hashkey_dt[p]->get_hfta_equals_fcn(hashkey_dt[p]).c_str(),p,p);
11320 sprintf(tmpstr,"(%s((key1->hashkey_var%d), (key2->hashkey_var%d))==0)",
11321 hashkey_dt[p]->get_hfta_equals_fcn(hashkey_dt[p]).c_str(),p,p);
11323 sprintf(tmpstr,"key1->hashkey_var%d == key2->hashkey_var%d",p,p);
11340 string join_eq_hash_qpn::generate_operator(int i, string params){
11343 " join_eq_hash_operator<" +
11344 generate_functor_name()+ ","+
11345 generate_functor_name() + "_tempeqdef,"+
11346 generate_functor_name() + "_keydef,"+
11347 generate_functor_name()+"_hash_func,"+
11348 generate_functor_name()+"_equal_func"
11349 "> *op"+int_to_string(i)+" = new join_eq_hash_operator<"+
11350 generate_functor_name()+","+
11351 generate_functor_name() + "_tempeqdef,"+
11352 generate_functor_name() + "_keydef,"+
11353 generate_functor_name()+"_hash_func,"+
11354 generate_functor_name()+"_equal_func"
11356 int_to_string(from[0]->get_property()+2*from[1]->get_property())+", \"" + get_node_name() +
11363 ////////////////////////////////////////////////////////////////
11364 //// SGAHCWCB functor
11368 string sgahcwcb_qpn::generate_functor_name(){
11369 return("sgahcwcb_functor_" + normalize_name(this->get_node_name()));
11373 string sgahcwcb_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
11377 // Initialize generate utility globals
11378 segen_gb_tbl = &(gb_tbl);
11381 //--------------------------------
11382 // group definition class
11383 string ret = "class " + generate_functor_name() + "_groupdef{\n";
11384 ret += "public:\n";
11385 ret += "\tbool valid;\n";
11386 for(g=0;g<this->gb_tbl.size();g++){
11387 sprintf(tmpstr,"gb_var%d",g);
11388 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11391 ret += "\t"+generate_functor_name() + "_groupdef(){valid=true;};\n";
11392 ret += "\t"+generate_functor_name() + "_groupdef("+
11393 this->generate_functor_name() + "_groupdef *gd){\n";
11394 for(g=0;g<gb_tbl.size();g++){
11395 data_type *gdt = gb_tbl.get_data_type(g);
11396 if(gdt->is_buffer_type()){
11397 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
11398 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
11401 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
11405 ret += "\tvalid=true;\n";
11408 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
11409 for(g=0;g<gb_tbl.size();g++){
11410 data_type *gdt = gb_tbl.get_data_type(g);
11411 if(gdt->is_buffer_type()){
11412 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
11413 gdt->get_hfta_buffer_destroy().c_str(), g );
11420 //--------------------------------
11421 // aggr definition class
11422 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
11423 ret += "public:\n";
11424 for(a=0;a<aggr_tbl.size();a++){
11425 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11426 sprintf(tmpstr,"aggr_var%d",a);
11427 if(aggr_tbl.is_builtin(a))
11428 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11430 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11433 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
11435 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
11436 for(a=0;a<aggr_tbl.size();a++){
11437 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11438 if(aggr_tbl.is_builtin(a)){
11439 data_type *adt = aggr_tbl.get_data_type(a);
11440 if(adt->is_buffer_type()){
11441 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11442 adt->get_hfta_buffer_destroy().c_str(), a );
11446 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11447 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11448 ret+="(aggr_var"+int_to_string(a)+"));\n";
11454 //--------------------------------
11455 // superaggr definition class
11456 ret += "class " + this->generate_functor_name() + "_statedef{\n";
11457 ret += "public:\n";
11458 for(a=0;a<aggr_tbl.size();a++){
11459 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11460 if(ate->is_superaggr()){
11461 sprintf(tmpstr,"aggr_var%d",a);
11462 if(aggr_tbl.is_builtin(a))
11463 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11465 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11468 set<string>::iterator ssi;
11469 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11470 string state_nm = (*ssi);
11471 int state_id = Ext_fcns->lookup_state(state_nm);
11472 data_type *dt = Ext_fcns->get_storage_dt(state_id);
11473 string state_var = "state_var_"+state_nm;
11474 ret += "\t"+dt->make_host_cvar(state_var)+";\n";
11477 ret += "\t"+this->generate_functor_name() + "_statedef(){};\n";
11479 ret += "\t~"+this->generate_functor_name() + "_statedef(){\n";
11480 for(a=0;a<aggr_tbl.size();a++){
11481 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11482 if(ate->is_superaggr()){
11483 if(aggr_tbl.is_builtin(a)){
11484 data_type *adt = aggr_tbl.get_data_type(a);
11485 if(adt->is_buffer_type()){
11486 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11487 adt->get_hfta_buffer_destroy().c_str(), a );
11491 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11492 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11493 ret+="(aggr_var"+int_to_string(a)+"));\n";
11497 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11498 string state_nm = (*ssi);
11499 int state_id = Ext_fcns->lookup_state(state_nm);
11500 string state_var = "state_var_"+state_nm;
11501 ret += "\t_sfun_state_destroy_"+state_nm+"(&"+state_var+");\n";
11508 //--------------------------------
11509 // gb functor class
11510 ret += "class " + this->generate_functor_name() + "{\n";
11512 // Find variables referenced in this query node.
11514 col_id_set cid_set;
11515 col_id_set::iterator csi;
11517 for(w=0;w<where.size();++w)
11518 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
11519 for(w=0;w<having.size();++w)
11520 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
11521 for(w=0;w<cleanby.size();++w)
11522 gather_pr_col_ids(cleanby[w]->pr,cid_set,segen_gb_tbl);
11523 for(w=0;w<cleanwhen.size();++w)
11524 gather_pr_col_ids(cleanwhen[w]->pr,cid_set,segen_gb_tbl);
11525 for(g=0;g<gb_tbl.size();g++)
11526 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
11528 for(s=0;s<select_list.size();s++){
11529 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
11533 // Private variables : store the state of the functor.
11534 // 1) variables for unpacked attributes
11535 // 2) offsets of the upacked attributes
11536 // 3) storage of partial functions
11537 // 4) storage of complex literals (i.e., require a constructor)
11539 ret += "private:\n";
11541 // var to save the schema handle
11542 ret += "\tint schema_handle0;\n";
11544 // generate the declaration of all the variables related to
11545 // temp tuples generation
11546 ret += gen_decl_temp_vars();
11548 // unpacked attribute storage, offsets
11549 ret += "//\t\tstorage and offsets of accessed fields.\n";
11550 ret += generate_access_vars(cid_set, schema);
11551 // tuple metadata offset
11552 ret += "\ttuple_metadata_offset0;\n";
11554 // Variables to store results of partial functions.
11555 // WARNING find_partial_functions modifies the SE
11556 // (it marks the partial function id).
11557 ret += "//\t\tParital function result storage\n";
11558 vector<scalarexp_t *> partial_fcns;
11559 vector<int> fcn_ref_cnt;
11560 vector<bool> is_partial_fcn;
11561 for(s=0;s<select_list.size();s++){
11562 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
11564 for(w=0;w<where.size();w++){
11565 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11567 for(w=0;w<having.size();w++){
11568 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11570 for(w=0;w<cleanby.size();w++){
11571 find_partial_fcns_pr(cleanby[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11573 for(w=0;w<cleanwhen.size();w++){
11574 find_partial_fcns_pr(cleanwhen[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11576 for(g=0;g<gb_tbl.size();g++){
11577 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
11579 for(a=0;a<aggr_tbl.size();a++){
11580 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
11582 if(partial_fcns.size()>0){
11583 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
11584 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
11587 // Complex literals (i.e., they need constructors)
11588 ret += "//\t\tComplex literal storage.\n";
11589 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
11590 ret += generate_complex_lit_vars(complex_literals);
11592 // Pass-by-handle parameters
11593 ret += "//\t\tPass-by-handle storage.\n";
11594 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
11595 ret += generate_pass_by_handle_vars(param_handle_table);
11597 // Create cached temporaries for UDAF return values.
11598 ret += "//\t\tTemporaries for UDAF return values.\n";
11599 for(a=0;a<aggr_tbl.size();a++){
11600 if(! aggr_tbl.is_builtin(a)){
11601 int afcn_id = aggr_tbl.get_fcn_id(a);
11602 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11603 sprintf(tmpstr,"udaf_ret_%d", a);
11604 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11610 // variables to hold parameters.
11611 ret += "//\tfor query parameters\n";
11612 ret += generate_param_vars(param_tbl);
11614 // Is there a temporal flush? If so create flush temporaries,
11615 // create flush indicator.
11616 bool uses_temporal_flush = false;
11617 for(g=0;g<gb_tbl.size();g++){
11618 data_type *gdt = gb_tbl.get_data_type(g);
11619 if(gdt->is_temporal())
11620 uses_temporal_flush = true;
11623 if(uses_temporal_flush){
11624 ret += "//\t\tFor temporal flush\n";
11625 for(g=0;g<gb_tbl.size();g++){
11626 data_type *gdt = gb_tbl.get_data_type(g);
11627 if(gdt->is_temporal()){
11628 sprintf(tmpstr,"last_gb%d",g);
11629 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11630 sprintf(tmpstr,"last_flushed_gb%d",g);
11631 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11634 ret += "\tbool needs_temporal_flush;\n";
11637 // The publicly exposed functions
11639 ret += "\npublic:\n";
11642 //-------------------
11643 // The functor constructor
11644 // pass in the schema handle.
11645 // 1) make assignments to the unpack offset variables
11646 // 2) initialize the complex literals
11648 ret += "//\t\tFunctor constructor.\n";
11649 ret += this->generate_functor_name()+"(int schema_handle0){\n";
11651 // save the schema handle
11652 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
11653 // tuple metadata offset
11654 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
11657 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
11658 ret += gen_access_var_init(cid_set);
11660 // aggregate return vals : refd in both final_sample
11661 // and create_output_tuple
11662 // Create cached temporaries for UDAF return values.
11663 for(a=0;a<aggr_tbl.size();a++){
11664 if(! aggr_tbl.is_builtin(a)){
11665 int afcn_id = aggr_tbl.get_fcn_id(a);
11666 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11667 sprintf(tmpstr,"udaf_ret_%d", a);
11668 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11672 // complex literals
11673 ret += "//\t\tInitialize complex literals.\n";
11674 ret += gen_complex_lit_init(complex_literals);
11676 // Initialize partial function results so they can be safely GC'd
11677 ret += gen_partial_fcn_init(partial_fcns);
11679 // Initialize non-query-parameter parameter handles
11680 ret += gen_pass_by_handle_init(param_handle_table);
11682 // temporal flush variables
11683 // ASSUME that structured values won't be temporal.
11684 if(uses_temporal_flush){
11685 ret += "//\t\tInitialize temporal flush variables.\n";
11686 for(g=0;g<gb_tbl.size();g++){
11687 data_type *gdt = gb_tbl.get_data_type(g);
11688 if(gdt->is_temporal()){
11689 literal_t gl(gdt->type_indicator());
11690 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
11691 ret.append(tmpstr);
11694 ret += "\tneeds_temporal_flush = false;\n";
11697 // Init temporal attributes referenced in select list
11698 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
11703 //-------------------
11704 // Functor destructor
11705 ret += "//\t\tFunctor destructor.\n";
11706 ret += "~"+this->generate_functor_name()+"(){\n";
11708 // clean up buffer type complex literals
11709 ret += gen_complex_lit_dtr(complex_literals);
11711 // Deregister the pass-by-handle parameters
11712 ret += "/* register and de-register the pass-by-handle parameters */\n";
11713 ret += gen_pass_by_handle_dtr(param_handle_table);
11715 // clean up partial function results.
11716 ret += "/* clean up partial function storage */\n";
11717 ret += gen_partial_fcn_dtr(partial_fcns);
11719 // Destroy the parameters, if any need to be destroyed
11720 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11725 //-------------------
11726 // Parameter manipulation routines
11727 ret += generate_load_param_block(this->generate_functor_name(),
11728 this->param_tbl,param_handle_table);
11729 ret += generate_delete_param_block(this->generate_functor_name(),
11730 this->param_tbl,param_handle_table);
11732 //-------------------
11733 // Register new parameter block
11735 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
11736 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11737 ret += "\treturn this->load_params_"+this->generate_functor_name()+
11741 //-------------------
11742 // the create_group method.
11743 // This method creates a group in a buffer passed in
11744 // (to allow for creation on the stack).
11745 // There are also a couple of side effects:
11746 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11747 // 2) determine if a temporal flush is required.
11749 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
11750 // Variables for execution of the function.
11751 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11753 if(partial_fcns.size()>0){ // partial fcn access failure
11754 ret += "\tgs_retval_t retval = 0;\n";
11758 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
11759 "_groupdef *) buffer;\n";
11761 // Start by cleaning up partial function results
11762 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11764 set<int> gb_pfcns; // partial fcns in gbdefs, aggr se's
11765 for(g=0;g<gb_tbl.size();g++){
11766 collect_partial_fcns(gb_tbl.get_def(g), gb_pfcns);
11768 ret += gen_partial_fcn_dtr(partial_fcns,gb_pfcns);
11769 // ret += gen_partial_fcn_dtr(partial_fcns);
11772 ret += gen_temp_tuple_check(this->node_name, 0);
11773 col_id_set found_cids; // colrefs unpacked thus far.
11774 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
11778 // Save temporal group-by variables
11781 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
11783 for(g=0;g<gb_tbl.size();g++){
11785 data_type *gdt = gb_tbl.get_data_type(g);
11787 if(gdt->is_temporal()){
11788 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11789 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11790 ret.append(tmpstr);
11797 // Compare the temporal GB vars with the stored ones,
11798 // set flush indicator and update stored GB vars if there is any change.
11800 if(uses_temporal_flush){
11801 ret+= "\tif( !( (";
11802 bool first_one = true;
11803 for(g=0;g<gb_tbl.size();g++){
11804 data_type *gdt = gb_tbl.get_data_type(g);
11806 if(gdt->is_temporal()){
11807 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
11808 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
11809 if(first_one){first_one = false;} else {ret += ") && (";}
11810 ret += generate_equality_test(lhs_op, rhs_op, gdt);
11814 for(g=0;g<gb_tbl.size();g++){
11815 data_type *gdt = gb_tbl.get_data_type(g);
11816 if(gdt->is_temporal()){
11817 if(gdt->is_buffer_type()){
11818 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
11820 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
11822 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
11828 if(uses_temporal_flush){
11829 for(g=0;g<gb_tbl.size();g++){
11830 data_type *gdt = gb_tbl.get_data_type(g);
11831 if(gdt->is_temporal()){
11832 ret+="if(last_flushed_gb"+int_to_string(g)+">0)\n";
11838 ret += "\t\tneeds_temporal_flush=true;\n";
11839 ret += "\t\t}else{\n"
11840 "\t\t\tneeds_temporal_flush=false;\n"
11845 // For temporal status tuple we don't need to do anything else
11846 ret += "\tif (temp_tuple_received) return NULL;\n\n";
11849 // The partial functions ref'd in the group-by var
11850 // definitions must be evaluated. If one returns false,
11851 // then implicitly the predicate is false.
11852 set<int>::iterator pfsi;
11854 if(gb_pfcns.size() > 0)
11855 ret += "//\t\tUnpack partial fcns.\n";
11856 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, gb_pfcns,
11857 found_cids, segen_gb_tbl, "NULL", needs_xform);
11859 // Unpack the group-by variables
11861 for(g=0;g<gb_tbl.size();g++){
11862 // Find the new fields ref'd by this GBvar def.
11863 col_id_set new_cids;
11864 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
11865 // Unpack these values.
11866 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
11868 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11869 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11871 // There seems to be no difference between the two
11872 // branches of the IF statement.
11873 data_type *gdt = gb_tbl.get_data_type(g);
11874 if(gdt->is_buffer_type()){
11875 // Create temporary copy.
11876 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11877 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11879 scalarexp_t *gse = gb_tbl.get_def(g);
11880 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11881 g,generate_se_code(gse,schema).c_str());
11884 ret.append(tmpstr);
11889 ret+= "\treturn gbval;\n";
11894 //-------------------
11895 // the create_group method.
11896 // This method creates a group in a buffer passed in
11897 // (to allow for creation on the stack).
11898 // There are also a couple of side effects:
11899 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11900 // 2) determine if a temporal flush is required.
11902 ret += "bool evaluate_predicate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, int cd){\n";
11903 // Variables for execution of the function.
11904 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11906 if(partial_fcns.size()>0){ // partial fcn access failure
11907 ret += "\tgs_retval_t retval = 0;\n";
11911 // Start by cleaning up partial function results
11912 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11913 set<int> w_pfcns; // partial fcns in where clause
11914 for(w=0;w<where.size();++w)
11915 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
11917 set<int> ag_pfcns; // partial fcns in gbdefs, aggr se's
11918 for(a=0;a<aggr_tbl.size();a++){
11919 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_pfcns);
11921 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
11922 ret += gen_partial_fcn_dtr(partial_fcns,ag_pfcns);
11924 ret+="//\t\tEvaluate clauses which don't reference stateful fcns first \n";
11925 for(w=0;w<where.size();++w){
11926 if(! pred_refs_sfun(where[w]->pr)){
11927 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11929 // Find the set of variables accessed in this CNF elem,
11930 // but in no previous element.
11931 col_id_set new_cids;
11932 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11934 // Unpack these values.
11935 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11936 // Find partial fcns ref'd in this cnf element
11937 set<int> pfcn_refs;
11938 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11939 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11941 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11942 +") ) return(false);\n";
11947 // The partial functions ref'd in the and aggregate
11948 // definitions must also be evaluated. If one returns false,
11949 // then implicitly the predicate is false.
11950 // ASSUME that aggregates cannot reference stateful fcns.
11952 if(ag_pfcns.size() > 0)
11953 ret += "//\t\tUnpack remaining partial fcns.\n";
11954 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_pfcns,
11955 found_cids, segen_gb_tbl, "false", needs_xform);
11957 ret+="//\t\tEvaluate all remaining where clauses.\n";
11958 ret+="\tbool retval = true;\n";
11959 for(w=0;w<where.size();++w){
11960 if( pred_refs_sfun(where[w]->pr)){
11961 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11963 // Find the set of variables accessed in this CNF elem,
11964 // but in no previous element.
11965 col_id_set new_cids;
11966 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11968 // Unpack these values.
11969 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11970 // Find partial fcns ref'd in this cnf element
11971 set<int> pfcn_refs;
11972 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11973 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11975 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11976 +") ) retval = false;\n";
11980 ret+="// Unpack all remaining attributes\n";
11981 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
11983 ret += "\n\treturn retval;\n";
11986 //--------------------------------------------------------
11987 // Create and initialize an aggregate object
11989 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, gs_sp_t a,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11990 // Variables for execution of the function.
11991 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11994 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+ "_aggrdef *)a;\n";
11996 for(a=0;a<aggr_tbl.size();a++){
11997 if(aggr_tbl.is_builtin(a)){
11998 // Create temporaries for buffer return values
11999 data_type *adt = aggr_tbl.get_data_type(a);
12000 if(adt->is_buffer_type()){
12001 sprintf(tmpstr,"aggr_tmp_%d", a);
12002 ret+=adt->make_host_cvar(tmpstr)+";\n";
12007 for(a=0;a<aggr_tbl.size();a++){
12008 sprintf(tmpstr,"aggval->aggr_var%d",a);
12009 string assignto_var = tmpstr;
12010 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12013 ret += "\treturn aggval;\n";
12017 //--------------------------------------------------------
12018 // initialize an aggregate object inplace
12020 ret += "void create_aggregate(host_tuple &tup0, "+this->generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
12021 // Variables for execution of the function.
12022 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12026 for(a=0;a<aggr_tbl.size();a++){
12027 if(aggr_tbl.is_builtin(a)){
12028 // Create temporaries for buffer return values
12029 data_type *adt = aggr_tbl.get_data_type(a);
12030 if(adt->is_buffer_type()){
12031 sprintf(tmpstr,"aggr_tmp_%d", a);
12032 ret+=adt->make_host_cvar(tmpstr)+";\n";
12037 for(a=0;a<aggr_tbl.size();a++){
12038 sprintf(tmpstr,"aggval->aggr_var%d",a);
12039 string assignto_var = tmpstr;
12040 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12046 //--------------------------------------------------------
12047 // Create and clean-initialize an state object
12049 ret += "void initialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval){\n";
12050 // Variables for execution of the function.
12051 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12054 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
12056 for(a=0;a<aggr_tbl.size();a++){
12057 if( aggr_tbl.is_superaggr(a)){
12058 if(aggr_tbl.is_builtin(a)){
12059 // Create temporaries for buffer return values
12060 data_type *adt = aggr_tbl.get_data_type(a);
12061 if(adt->is_buffer_type()){
12062 sprintf(tmpstr,"aggr_tmp_%d", a);
12063 ret+=adt->make_host_cvar(tmpstr)+";\n";
12069 for(a=0;a<aggr_tbl.size();a++){
12070 if( aggr_tbl.is_superaggr(a)){
12071 sprintf(tmpstr,"stval->aggr_var%d",a);
12072 string assignto_var = tmpstr;
12073 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12077 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12078 string state_nm = (*ssi);
12079 ret += "_sfun_state_clean_init_"+state_nm+"(&(stval->state_var_"+state_nm+"));\n";
12085 //--------------------------------------------------------
12086 // Create and dirty-initialize an state object
12088 ret += "void reinitialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, "+generate_functor_name()+"_statedef *old_stval, int cd){\n";
12089 // Variables for execution of the function.
12090 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12093 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
12095 for(a=0;a<aggr_tbl.size();a++){
12096 if( aggr_tbl.is_superaggr(a)){
12097 if(aggr_tbl.is_builtin(a)){
12098 // Create temporaries for buffer return values
12099 data_type *adt = aggr_tbl.get_data_type(a);
12100 if(adt->is_buffer_type()){
12101 sprintf(tmpstr,"aggr_tmp_%d", a);
12102 ret+=adt->make_host_cvar(tmpstr)+";\n";
12108 // initialize superaggregates
12109 for(a=0;a<aggr_tbl.size();a++){
12110 if( aggr_tbl.is_superaggr(a)){
12111 sprintf(tmpstr,"stval->aggr_var%d",a);
12112 string assignto_var = tmpstr;
12113 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12117 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12118 string state_nm = (*ssi);
12119 ret += "_sfun_state_dirty_init_"+state_nm+"(&(stval->state_var_"+state_nm+"),&(old_stval->state_var_"+state_nm+"), cd );\n";
12124 //--------------------------------------------------------
12125 // Finalize_state : call the finalize fcn on all states
12128 ret += "void finalize_state( "+generate_functor_name()+"_statedef *stval, int cd){\n";
12130 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12131 string state_nm = (*ssi);
12132 ret += "_sfun_state_final_init_"+state_nm+"(&(stval->state_var_"+state_nm+"), cd);\n";
12140 //--------------------------------------------------------
12141 // update (plus) a superaggregate object
12143 ret += "void update_plus_superaggr(host_tuple &tup0, " +
12144 generate_functor_name()+"_groupdef *gbval, "+
12145 generate_functor_name()+"_statedef *stval){\n";
12146 // Variables for execution of the function.
12147 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12149 // use of temporaries depends on the aggregate,
12150 // generate them in generate_aggr_update
12153 for(a=0;a<aggr_tbl.size();a++){
12154 if(aggr_tbl.is_superaggr(a)){
12155 sprintf(tmpstr,"stval->aggr_var%d",a);
12156 string varname = tmpstr;
12157 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12161 ret += "\treturn;\n";
12166 //--------------------------------------------------------
12167 // update (minus) a superaggregate object
12169 ret += "void update_minus_superaggr( "+
12170 generate_functor_name()+"_groupdef *gbval, "+
12171 generate_functor_name()+"_aggrdef *aggval,"+
12172 generate_functor_name()+"_statedef *stval"+
12174 // Variables for execution of the function.
12175 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12177 // use of temporaries depends on the aggregate,
12178 // generate them in generate_aggr_update
12181 for(a=0;a<aggr_tbl.size();a++){
12182 if(aggr_tbl.is_superaggr(a)){
12183 sprintf(tmpstr,"stval->aggr_var%d",a);
12184 string super_varname = tmpstr;
12185 sprintf(tmpstr,"aggval->aggr_var%d",a);
12186 string sub_varname = tmpstr;
12187 ret.append(generate_superaggr_minus(sub_varname, super_varname,&aggr_tbl,a, schema));
12191 ret += "\treturn;\n";
12195 //--------------------------------------------------------
12196 // update an aggregate object
12198 ret += "void update_aggregate(host_tuple &tup0, "
12199 +generate_functor_name()+"_groupdef *gbval, "+
12200 generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
12201 // Variables for execution of the function.
12202 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12204 // use of temporaries depends on the aggregate,
12205 // generate them in generate_aggr_update
12208 for(a=0;a<aggr_tbl.size();a++){
12209 sprintf(tmpstr,"aggval->aggr_var%d",a);
12210 string varname = tmpstr;
12211 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12214 ret += "\treturn;\n";
12217 //---------------------------------------------------
12220 ret += "\tbool flush_needed(){\n";
12221 if(uses_temporal_flush){
12222 ret += "\t\treturn needs_temporal_flush;\n";
12224 ret += "\t\treturn false;\n";
12229 //------------------------------------------------------
12230 // THe cleaning_when predicate
12232 string gbvar = "gbval->gb_var";
12233 string aggvar = "aggval->";
12235 ret += "bool need_to_clean( "
12236 +generate_functor_name()+"_groupdef *gbval, "+
12237 generate_functor_name()+"_statedef *stval, int cd"+
12240 if(cleanwhen.size()>0)
12241 ret += "\tbool predval = true;\n";
12243 ret += "\tbool predval = false;\n";
12245 // Find the udafs ref'd in the having clause
12247 for(w=0;w<cleanwhen.size();++w)
12248 collect_aggr_refs_pr(cleanwhen[w]->pr, cw_aggs);
12251 // get the return values from the UDAFS
12252 for(a=0;a<aggr_tbl.size();a++){
12253 if(! aggr_tbl.is_builtin(a) && cw_aggs.count(a)){
12254 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12255 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12256 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12261 // Start by cleaning up partial function results
12262 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12263 set<int> cw_pfcns; // partial fcns in where clause
12264 for(w=0;w<cleanwhen.size();++w)
12265 collect_partial_fcns_pr(cleanwhen[w]->pr, cw_pfcns);
12267 ret += gen_partial_fcn_dtr(partial_fcns,cw_pfcns);
12270 for(w=0;w<cleanwhen.size();++w){
12271 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12273 // Find partial fcns ref'd in this cnf element
12274 set<int> pfcn_refs;
12275 collect_partial_fcns_pr(cleanwhen[w]->pr, pfcn_refs);
12276 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12277 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12278 ret += "\tif(retval){ return false;}\n";
12280 // ret += unpack_partial_fcn_fm_aggr(schema, partial_fcns, pfcn_refs,"false");
12282 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanwhen[w]->pr,gbvar, aggvar, schema)+
12283 ") ) predval = false;\n";
12286 ret += "\treturn predval;\n";
12289 //------------------------------------------------------
12290 // THe cleaning_by predicate
12292 ret += "bool sample_group("
12293 +generate_functor_name()+"_groupdef *gbval, "+
12294 generate_functor_name()+"_aggrdef *aggval,"+
12295 generate_functor_name()+"_statedef *stval, int cd"+
12298 if(cleanby.size()>0)
12299 ret += "\tbool retval = true;\n";
12301 ret += "\tbool retval = false;\n";
12303 // Find the udafs ref'd in the having clause
12305 for(w=0;w<cleanby.size();++w)
12306 collect_aggr_refs_pr(cleanby[w]->pr, cb_aggs);
12309 // get the return values from the UDAFS
12310 for(a=0;a<aggr_tbl.size();a++){
12311 if(! aggr_tbl.is_builtin(a) && cb_aggs.count(a)){
12312 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12313 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12314 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12319 // Start by cleaning up partial function results
12320 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12321 set<int> cb_pfcns; // partial fcns in where clause
12322 for(w=0;w<cleanby.size();++w)
12323 collect_partial_fcns_pr(cleanby[w]->pr, cb_pfcns);
12325 ret += gen_partial_fcn_dtr(partial_fcns,cb_pfcns);
12328 for(w=0;w<cleanwhen.size();++w){
12329 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12333 // Find the set of variables accessed in this CNF elem,
12334 // but in no previous element.
12335 col_id_set new_cids;
12336 get_new_pred_cids(cleanby[w]->pr, found_cids, new_cids, segen_gb_tbl);
12338 // Unpack these values.
12339 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
12342 // Find partial fcns ref'd in this cnf element
12343 set<int> pfcn_refs;
12344 collect_partial_fcns_pr(cleanby[w]->pr, pfcn_refs);
12345 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12346 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12347 ret += "\tif(retval){ return false;}\n";
12349 // ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
12351 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanby[w]->pr,gbvar, aggvar, schema)+
12352 +") ) retval = false;\n";
12355 ret += "\treturn retval;\n";
12359 //-----------------------------------------------------
12361 ret += "bool final_sample_group("
12362 +generate_functor_name()+"_groupdef *gbval, "+
12363 generate_functor_name()+"_aggrdef *aggval,"+
12364 generate_functor_name()+"_statedef *stval,"+
12367 ret += "\tgs_retval_t retval = 0;\n";
12369 // Find the udafs ref'd in the having clause
12371 for(w=0;w<having.size();++w)
12372 collect_aggr_refs_pr(having[w]->pr, hv_aggs);
12375 // get the return values from the UDAFS
12376 for(a=0;a<aggr_tbl.size();a++){
12377 if(! aggr_tbl.is_builtin(a) && hv_aggs.count(a)){
12378 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12379 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12380 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12385 set<int> hv_sl_pfcns;
12386 for(w=0;w<having.size();w++){
12387 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
12390 // clean up the partial fcn results from any previous execution
12391 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
12394 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
12395 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12396 ret += "\tif(retval){ return false;}\n";
12399 // Evalaute the HAVING clause
12400 // TODO: this seems to have a ++ operator rather than a + operator.
12401 for(w=0;w<having.size();++w){
12402 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
12405 ret += "\treturn true;\n";
12408 //---------------------------------------------------
12409 // create output tuple
12410 // Unpack the partial functions ref'd in the where clause,
12411 // select clause. Evaluate the where clause.
12412 // Finally, pack the tuple.
12414 // I need to use special code generation here,
12415 // so I'll leave it in longhand.
12417 ret += "host_tuple create_output_tuple("
12418 +generate_functor_name()+"_groupdef *gbval, "+
12419 generate_functor_name()+"_aggrdef *aggval,"+
12420 generate_functor_name()+"_statedef *stval,"+
12421 "int cd, bool &failed){\n";
12423 ret += "\thost_tuple tup;\n";
12424 ret += "\tfailed = false;\n";
12425 ret += "\tgs_retval_t retval = 0;\n";
12428 // Find the udafs ref'd in the select clause
12430 for(s=0;s<select_list.size();s++)
12431 collect_agg_refs(select_list[s]->se, sl_aggs);
12434 // get the return values from the UDAFS
12435 for(a=0;a<aggr_tbl.size();a++){
12436 if(! aggr_tbl.is_builtin(a) && sl_aggs.count(a)){
12437 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12438 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12439 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12444 // I can't cache partial fcn results from the having
12445 // clause because evaluation is separated.
12447 for(s=0;s<select_list.size();s++){
12448 collect_partial_fcns(select_list[s]->se, sl_pfcns);
12451 for(pfsi=sl_pfcns.begin();pfsi!=sl_pfcns.end();++pfsi){
12452 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12453 ret += "\tif(retval){ failed=true; return tup;}\n";
12457 // Now, compute the size of the tuple.
12459 // Unpack any BUFFER type selections into temporaries
12460 // so that I can compute their size and not have
12461 // to recompute their value during tuple packing.
12462 // I can use regular assignment here because
12463 // these temporaries are non-persistent.
12464 // TODO: should I be using the selvar generation routine?
12466 ret += "//\t\tCompute the size of the tuple.\n";
12467 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
12468 for(s=0;s<select_list.size();s++){
12469 scalarexp_t *se = select_list[s]->se;
12470 data_type *sdt = se->get_data_type();
12471 if(sdt->is_buffer_type() &&
12472 !( (se->get_operator_type() == SE_COLREF) ||
12473 (se->get_operator_type() == SE_AGGR_STAR) ||
12474 (se->get_operator_type() == SE_AGGR_SE) ||
12475 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12476 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12478 sprintf(tmpstr,"selvar_%d",s);
12479 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
12480 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
12484 // The size of the tuple is the size of the tuple struct plus the
12485 // size of the buffers to be copied in.
12487 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
12488 for(s=0;s<select_list.size();s++){
12489 // if(s>0) ret += "+";
12490 scalarexp_t *se = select_list[s]->se;
12491 data_type *sdt = select_list[s]->se->get_data_type();
12492 if(sdt->is_buffer_type()){
12493 if(!( (se->get_operator_type() == SE_COLREF) ||
12494 (se->get_operator_type() == SE_AGGR_STAR) ||
12495 (se->get_operator_type() == SE_AGGR_SE) ||
12496 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12497 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12499 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
12500 ret.append(tmpstr);
12502 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12503 ret.append(tmpstr);
12509 // Allocate tuple data block.
12510 ret += "//\t\tCreate the tuple block.\n";
12511 ret += "\ttup.data = malloc(tup.tuple_size);\n";
12512 ret += "\ttup.heap_resident = true;\n";
12514 // Mark tuple as regular
12515 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
12517 // ret += "\ttup.channel = 0;\n";
12518 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
12519 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
12522 // (Here, offsets are hard-wired. is this a problem?)
12524 ret += "//\t\tPack the fields into the tuple.\n";
12525 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
12526 for(s=0;s<select_list.size();s++){
12527 scalarexp_t *se = select_list[s]->se;
12528 data_type *sdt = se->get_data_type();
12529 if(sdt->is_buffer_type()){
12530 if(!( (se->get_operator_type() == SE_COLREF) ||
12531 (se->get_operator_type() == SE_AGGR_STAR) ||
12532 (se->get_operator_type() == SE_AGGR_SE) ||
12533 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12534 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12536 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
12537 ret.append(tmpstr);
12538 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
12539 ret.append(tmpstr);
12541 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12542 ret.append(tmpstr);
12543 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12544 ret.append(tmpstr);
12547 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12548 ret.append(tmpstr);
12549 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12554 // Destroy string temporaries
12555 ret += gen_buffer_selvars_dtr(select_list);
12556 // Destroy string return vals of UDAFs
12557 for(a=0;a<aggr_tbl.size();a++){
12558 if(! aggr_tbl.is_builtin(a)){
12559 int afcn_id = aggr_tbl.get_fcn_id(a);
12560 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12561 if(adt->is_buffer_type()){
12562 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12563 adt->get_hfta_buffer_destroy().c_str(), a );
12570 ret += "\treturn tup;\n";
12574 //-------------------------------------------------------------------
12575 // Temporal update functions
12577 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12579 // create a temp status tuple
12580 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12582 ret += gen_init_temp_status_tuple(this->get_node_name());
12585 // (Here, offsets are hard-wired. is this a problem?)
12587 ret += "//\t\tPack the fields into the tuple.\n";
12588 for(s=0;s<select_list.size();s++){
12589 data_type *sdt = select_list[s]->se->get_data_type();
12590 if(sdt->is_temporal()){
12591 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12593 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12599 ret += "\treturn 0;\n";
12600 ret += "};};\n\n\n";
12603 //----------------------------------------------------------
12604 // The hash function
12606 ret += "struct "+generate_functor_name()+"_hash_func{\n";
12607 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12608 "_groupdef *grp) const{\n";
12609 ret += "\t\treturn(";
12610 for(g=0;g<gb_tbl.size();g++){
12611 if(g>0) ret += "^";
12612 data_type *gdt = gb_tbl.get_data_type(g);
12613 if(gdt->use_hashfunc()){
12614 if(gdt->is_buffer_type())
12615 sprintf(tmpstr,"(%s*%s(&)grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12617 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12619 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12623 ret += ") >> 32);\n";
12627 //----------------------------------------------------------
12628 // The superhash function
12630 ret += "struct "+generate_functor_name()+"_superhash_func{\n";
12631 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12632 "_groupdef *grp) const{\n";
12633 ret += "\t\treturn(0";
12635 for(g=0;g<gb_tbl.size();g++){
12636 if(sg_tbl.count(g)>0){
12638 data_type *gdt = gb_tbl.get_data_type(g);
12639 if(gdt->use_hashfunc()){
12640 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12642 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12647 ret += ") >> 32);\n";
12652 //----------------------------------------------------------
12653 // The comparison function
12655 ret += "struct "+generate_functor_name()+"_equal_func{\n";
12656 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12657 generate_functor_name()+"_groupdef *grp2) const{\n";
12658 ret += "\t\treturn( (";
12659 for(g=0;g<gb_tbl.size();g++){
12660 if(g>0) ret += ") && (";
12661 data_type *gdt = gb_tbl.get_data_type(g);
12662 if(gdt->complex_comparison(gdt)){
12663 if(gdt->is_buffer_type())
12664 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12665 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
12667 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12668 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
12670 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12679 //----------------------------------------------------------
12680 // The superhashcomparison function
12682 ret += "struct "+generate_functor_name()+"_superequal_func{\n";
12683 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12684 generate_functor_name()+"_groupdef *grp2) const{\n";
12685 ret += "\t\treturn( (";
12687 bool first_elem = true;
12688 for(g=0;g<gb_tbl.size();g++){
12689 if(sg_tbl.count(g)){
12690 if(first_elem) first_elem=false; else ret += ") && (";
12691 data_type *gdt = gb_tbl.get_data_type(g);
12692 if(gdt->complex_comparison(gdt)){
12693 if(gdt->is_buffer_type())
12694 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12695 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
12697 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12698 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
12700 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12717 string sgahcwcb_qpn::generate_operator(int i, string params){
12720 " clean_operator<" +
12721 generate_functor_name()+",\n\t"+
12722 generate_functor_name() + "_groupdef, \n\t" +
12723 generate_functor_name() + "_aggrdef, \n\t" +
12724 generate_functor_name() + "_statedef, \n\t" +
12725 generate_functor_name()+"_hash_func, \n\t"+
12726 generate_functor_name()+"_equal_func ,\n\t"+
12727 generate_functor_name()+"_superhash_func,\n\t "+
12728 generate_functor_name()+"_superequal_func \n\t"+
12729 "> *op"+int_to_string(i)+" = new clean_operator<"+
12730 generate_functor_name()+",\n\t"+
12731 generate_functor_name() + "_groupdef,\n\t " +
12732 generate_functor_name() + "_aggrdef, \n\t" +
12733 generate_functor_name() + "_statedef, \n\t" +
12734 generate_functor_name()+"_hash_func, \n\t"+
12735 generate_functor_name()+"_equal_func, \n\t"+
12736 generate_functor_name()+"_superhash_func, \n\t"+
12737 generate_functor_name()+"_superequal_func\n\t "
12738 ">("+params+", \"" + get_node_name() + "\");\n"
12742 ////////////////////////////////////////////////////////////////
12747 string rsgah_qpn::generate_functor_name(){
12748 return("rsgah_functor_" + normalize_name(this->get_node_name()));
12752 string rsgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
12756 // Initialize generate utility globals
12757 segen_gb_tbl = &(gb_tbl);
12760 //--------------------------------
12761 // group definition class
12762 string ret = "class " + generate_functor_name() + "_groupdef{\n";
12763 ret += "public:\n";
12764 for(g=0;g<this->gb_tbl.size();g++){
12765 sprintf(tmpstr,"gb_var%d",g);
12766 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12770 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
12771 ret += "\t// shallow copy constructor\n";
12772 ret += "\t"+generate_functor_name() + "_groupdef("+
12773 this->generate_functor_name() + "_groupdef &gd){\n";
12774 for(g=0;g<gb_tbl.size();g++){
12775 data_type *gdt = gb_tbl.get_data_type(g);
12776 sprintf(tmpstr,"\t\tgb_var%d = gd.gb_var%d;\n",g,g);
12781 ret += "\t// deep assignment operator\n";
12782 ret += "\t"+generate_functor_name() + "_groupdef& operator=(const "+
12783 this->generate_functor_name() + "_groupdef &gd){\n";
12784 for(g=0;g<gb_tbl.size();g++){
12785 data_type *gdt = gb_tbl.get_data_type(g);
12786 if(gdt->is_buffer_type()){
12787 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd.gb_var%d));\n",
12788 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
12791 sprintf(tmpstr,"\t\tgb_var%d = gd.gb_var%d;\n",g,g);
12798 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
12799 for(g=0;g<gb_tbl.size();g++){
12800 data_type *gdt = gb_tbl.get_data_type(g);
12801 if(gdt->is_buffer_type()){
12802 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12803 gdt->get_hfta_buffer_destroy().c_str(), g );
12810 //--------------------------------
12811 // aggr definition class
12812 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
12813 ret += "public:\n";
12814 for(a=0;a<aggr_tbl.size();a++){
12815 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
12816 sprintf(tmpstr,"aggr_var%d",a);
12817 if(aggr_tbl.is_builtin(a))
12818 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
12820 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
12823 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
12825 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
12826 for(a=0;a<aggr_tbl.size();a++){
12827 if(aggr_tbl.is_builtin(a)){
12828 data_type *adt = aggr_tbl.get_data_type(a);
12829 if(adt->is_buffer_type()){
12830 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
12831 adt->get_hfta_buffer_destroy().c_str(), a );
12835 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
12836 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12837 ret+="(aggr_var"+int_to_string(a)+"));\n";
12843 //--------------------------------
12844 // gb functor class
12845 ret += "class " + this->generate_functor_name() + "{\n";
12847 // Find variables referenced in this query node.
12849 col_id_set cid_set;
12850 col_id_set::iterator csi;
12852 for(w=0;w<where.size();++w)
12853 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
12854 for(w=0;w<having.size();++w)
12855 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
12856 for(w=0;w<closing_when.size();++w)
12857 gather_pr_col_ids(closing_when[w]->pr,cid_set,segen_gb_tbl);
12858 for(g=0;g<gb_tbl.size();g++)
12859 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
12861 for(s=0;s<select_list.size();s++){
12862 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
12866 // Private variables : store the state of the functor.
12867 // 1) variables for unpacked attributes
12868 // 2) offsets of the upacked attributes
12869 // 3) storage of partial functions
12870 // 4) storage of complex literals (i.e., require a constructor)
12872 ret += "private:\n";
12874 // var to save the schema handle
12875 ret += "\tint schema_handle0;\n";
12877 // generate the declaration of all the variables related to
12878 // temp tuples generation
12879 ret += gen_decl_temp_vars();
12881 // unpacked attribute storage, offsets
12882 ret += "//\t\tstorage and offsets of accessed fields.\n";
12883 ret += generate_access_vars(cid_set, schema);
12884 // tuple metadata offset
12885 ret += "\tint tuple_metadata_offset0;\n";
12887 // Variables to store results of partial functions.
12888 // WARNING find_partial_functions modifies the SE
12889 // (it marks the partial function id).
12890 ret += "//\t\tParital function result storage\n";
12891 vector<scalarexp_t *> partial_fcns;
12892 vector<int> fcn_ref_cnt;
12893 vector<bool> is_partial_fcn;
12894 for(s=0;s<select_list.size();s++){
12895 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
12897 for(w=0;w<where.size();w++){
12898 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12900 for(w=0;w<having.size();w++){
12901 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12903 for(w=0;w<closing_when.size();w++){
12904 find_partial_fcns_pr(closing_when[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12906 for(g=0;g<gb_tbl.size();g++){
12907 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
12909 for(a=0;a<aggr_tbl.size();a++){
12910 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
12912 if(partial_fcns.size()>0){
12913 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
12914 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
12917 // Create cached temporaries for UDAF return values.
12918 for(a=0;a<aggr_tbl.size();a++){
12919 if(! aggr_tbl.is_builtin(a)){
12920 int afcn_id = aggr_tbl.get_fcn_id(a);
12921 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12922 sprintf(tmpstr,"udaf_ret_%d", a);
12923 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
12928 // Complex literals (i.e., they need constructors)
12929 ret += "//\t\tComplex literal storage.\n";
12930 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
12931 ret += generate_complex_lit_vars(complex_literals);
12933 // Pass-by-handle parameters
12934 ret += "//\t\tPass-by-handle storage.\n";
12935 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
12936 ret += generate_pass_by_handle_vars(param_handle_table);
12939 // variables to hold parameters.
12940 ret += "//\tfor query parameters\n";
12941 ret += generate_param_vars(param_tbl);
12943 // Is there a temporal flush? If so create flush temporaries,
12944 // create flush indicator.
12945 bool uses_temporal_flush = false;
12946 for(g=0;g<gb_tbl.size();g++){
12947 data_type *gdt = gb_tbl.get_data_type(g);
12948 if(gdt->is_temporal())
12949 uses_temporal_flush = true;
12952 if(uses_temporal_flush){
12953 ret += "//\t\tFor temporal flush\n";
12954 for(g=0;g<gb_tbl.size();g++){
12955 data_type *gdt = gb_tbl.get_data_type(g);
12956 if(gdt->is_temporal()){
12957 sprintf(tmpstr,"curr_gb%d",g);
12958 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12959 sprintf(tmpstr,"last_gb%d",g);
12960 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12963 ret += "\tgs_int32_t needs_temporal_flush;\n";
12964 ret += "\tbool disordered_arrival;\n";
12967 // The publicly exposed functions
12969 ret += "\npublic:\n";
12972 //-------------------
12973 // The functor constructor
12974 // pass in the schema handle.
12975 // 1) make assignments to the unpack offset variables
12976 // 2) initialize the complex literals
12978 ret += "//\t\tFunctor constructor.\n";
12979 ret += this->generate_functor_name()+"(int schema_handle0){\n";
12981 // save the schema handle
12982 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
12984 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
12987 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
12988 ret += gen_access_var_init(cid_set);
12990 // complex literals
12991 ret += "//\t\tInitialize complex literals.\n";
12992 ret += gen_complex_lit_init(complex_literals);
12994 // Initialize partial function results so they can be safely GC'd
12995 ret += gen_partial_fcn_init(partial_fcns);
12997 // Initialize non-query-parameter parameter handles
12998 ret += gen_pass_by_handle_init(param_handle_table);
13000 // temporal flush variables
13001 // ASSUME that structured values won't be temporal.
13002 if(uses_temporal_flush){
13003 ret += "//\t\tInitialize temporal flush variables.\n";
13004 for(g=0;g<gb_tbl.size();g++){
13005 data_type *gdt = gb_tbl.get_data_type(g);
13006 if(gdt->is_temporal()){
13007 literal_t gl(gdt->type_indicator());
13008 sprintf(tmpstr,"\tcurr_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
13009 ret.append(tmpstr);
13010 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
13011 ret.append(tmpstr);
13014 ret += "\tneeds_temporal_flush = 0;\n";
13017 // Init temporal attributes referenced in select list
13018 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
13023 //-------------------
13024 // Functor destructor
13025 ret += "//\t\tFunctor destructor.\n";
13026 ret += "~"+this->generate_functor_name()+"(){\n";
13028 // clean up buffer type complex literals
13029 ret += gen_complex_lit_dtr(complex_literals);
13031 // Deregister the pass-by-handle parameters
13032 ret += "/* register and de-register the pass-by-handle parameters */\n";
13033 ret += gen_pass_by_handle_dtr(param_handle_table);
13035 // clean up partial function results.
13036 ret += "/* clean up partial function storage */\n";
13037 ret += gen_partial_fcn_dtr(partial_fcns);
13039 // Destroy the parameters, if any need to be destroyed
13040 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
13045 //-------------------
13046 // Parameter manipulation routines
13047 ret += generate_load_param_block(this->generate_functor_name(),
13048 this->param_tbl,param_handle_table);
13049 ret += generate_delete_param_block(this->generate_functor_name(),
13050 this->param_tbl,param_handle_table);
13052 //-------------------
13053 // Register new parameter block
13055 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
13056 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
13057 ret += "\treturn this->load_params_"+this->generate_functor_name()+
13062 //-------------------
13063 // the create_group method.
13064 // This method creates a group in a buffer passed in
13065 // (to allow for creation on the stack).
13066 // There are also a couple of side effects:
13067 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
13068 // 2) determine if a temporal flush is required.
13070 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
13071 // Variables for execution of the function.
13072 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13074 if(partial_fcns.size()>0){ // partial fcn access failure
13075 ret += "\tgs_retval_t retval = 0;\n";
13079 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
13080 "_groupdef *) buffer;\n";
13082 // Start by cleaning up partial function results
13083 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
13084 set<int> w_pfcns; // partial fcns in where clause
13085 for(w=0;w<where.size();++w)
13086 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
13088 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
13089 for(g=0;g<gb_tbl.size();g++){
13090 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
13092 for(a=0;a<aggr_tbl.size();a++){
13093 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
13095 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
13096 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
13097 // ret += gen_partial_fcn_dtr(partial_fcns);
13100 ret += gen_temp_tuple_check(this->node_name, 0);
13101 col_id_set found_cids; // colrefs unpacked thus far.
13102 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
13105 // Save temporal group-by variables
13108 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
13110 for(g=0;g<gb_tbl.size();g++){
13112 data_type *gdt = gb_tbl.get_data_type(g);
13114 if(gdt->is_temporal()){
13115 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13116 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13117 ret.append(tmpstr);
13124 // Compare the temporal GB vars with the stored ones,
13125 // set flush indicator and update stored GB vars if there is any change.
13127 if(uses_temporal_flush){
13129 bool first_one = true;
13130 string disorder_test;
13131 for(g=0;g<gb_tbl.size();g++){
13132 data_type *gdt = gb_tbl.get_data_type(g);
13134 if(gdt->is_temporal()){
13135 sprintf(tmpstr,"curr_gb%d",g); string lhs_op = tmpstr;
13136 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
13137 if(first_one){first_one = false;} else {ret += ") && (";}
13138 ret += generate_lt_test(lhs_op, rhs_op, gdt);
13139 disorder_test += generate_lt_test(rhs_op, lhs_op, gdt);
13143 int temporal_gb=-1;
13144 for(g=0;g<gb_tbl.size();g++){
13145 data_type *gdt = gb_tbl.get_data_type(g);
13146 if(gdt->is_temporal()){
13147 if(gdt->is_buffer_type()){
13148 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&curr_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13150 // sprintf(tmpstr,"\t\tlast_gb%d = curr_gb%d;\n",g,g);
13152 // sprintf(tmpstr,"\t\tcurr_gb%d = gbval->gb_var%d;\n",g,g);
13154 ret += "\t\tif(curr_gb"+to_string(g)+"==0){\n";
13155 ret += "\t\t\tlast_gb"+to_string(g)+" = gbval->gb_var"+to_string(g)+";\n";
13156 ret += "\t\t}else{\n";
13157 ret += "\t\t\tlast_gb"+to_string(g)+" = curr_gb"+to_string(g)+";\n";
13159 sprintf(tmpstr,"\t\tcurr_gb%d = gbval->gb_var%d;\n",g,g);
13165 ret += "\t\tneeds_temporal_flush = curr_gb"+to_string (temporal_gb)+" - last_gb"+to_string(temporal_gb)+";\n";
13166 ret += "\t}else{\n"
13167 "\t\tneeds_temporal_flush=0;\n"
13170 ret += "\tdisordered_arrival = "+disorder_test+";\n";
13171 // ret += "\tif( ( ("+disorder_test+") ) ){\n";
13172 // ret += "\t\tdisordered_arrival=true;\n";
13173 // ret += "\t}else{\n";
13174 // ret += "\t\tdisordered_arrival=false;\n";
13179 // For temporal status tuple we don't need to do anything else
13180 ret += "\tif (temp_tuple_received) return NULL;\n\n";
13182 for(w=0;w<where.size();++w){
13183 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
13185 // Find the set of variables accessed in this CNF elem,
13186 // but in no previous element.
13187 col_id_set new_cids;
13188 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
13190 // Unpack these values.
13191 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13192 // Find partial fcns ref'd in this cnf element
13193 set<int> pfcn_refs;
13194 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
13195 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
13197 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
13198 +") ) return(NULL);\n";
13201 // The partial functions ref'd in the group-by var and aggregate
13202 // definitions must also be evaluated. If one returns false,
13203 // then implicitly the predicate is false.
13204 set<int>::iterator pfsi;
13206 if(ag_gb_pfcns.size() > 0)
13207 ret += "//\t\tUnpack remaining partial fcns.\n";
13208 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
13209 found_cids, segen_gb_tbl, "NULL", needs_xform);
13211 // Unpack the group-by variables
13213 for(g=0;g<gb_tbl.size();g++){
13214 data_type *gdt = gb_tbl.get_data_type(g);
13215 if(!gdt->is_temporal()){ // temproal gbs already computed
13216 // Find the new fields ref'd by this GBvar def.
13217 col_id_set new_cids;
13218 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
13219 // Unpack these values.
13220 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13222 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13223 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13225 // There seems to be no difference between the two
13226 // branches of the IF statement.
13227 data_type *gdt = gb_tbl.get_data_type(g);
13228 if(gdt->is_buffer_type()){
13229 // Create temporary copy.
13230 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13231 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13233 scalarexp_t *gse = gb_tbl.get_def(g);
13234 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13235 g,generate_se_code(gse,schema).c_str());
13238 ret.append(tmpstr);
13244 ret+= "\treturn gbval;\n";
13247 //--------------------------------------------------------
13248 // Create and initialize an aggregate object
13250 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
13251 // Variables for execution of the function.
13252 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13255 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
13256 "_aggrdef *)buffer;\n";
13258 for(a=0;a<aggr_tbl.size();a++){
13259 if(aggr_tbl.is_builtin(a)){
13260 // Create temporaries for buffer return values
13261 data_type *adt = aggr_tbl.get_data_type(a);
13262 if(adt->is_buffer_type()){
13263 sprintf(tmpstr,"aggr_tmp_%d", a);
13264 ret+=adt->make_host_cvar(tmpstr)+";\n";
13269 // Unpack all remaining attributes
13270 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
13271 for(a=0;a<aggr_tbl.size();a++){
13272 sprintf(tmpstr,"aggval->aggr_var%d",a);
13273 string assignto_var = tmpstr;
13274 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
13277 ret += "\treturn aggval;\n";
13280 //--------------------------------------------------------
13281 // update an aggregate object
13283 ret += "void update_aggregate(host_tuple &tup0, "
13284 +generate_functor_name()+"_groupdef &gbval, "+
13285 generate_functor_name()+"_aggrdef &aggval){\n";
13286 // Variables for execution of the function.
13287 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13289 // use of temporaries depends on the aggregate,
13290 // generate them in generate_aggr_update
13293 // Unpack all remaining attributes
13294 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
13295 for(a=0;a<aggr_tbl.size();a++){
13296 sprintf(tmpstr,"aggval.aggr_var%d",a);
13297 string varname = tmpstr;
13298 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
13301 ret += "\treturn;\n";
13304 //--------------------------------------------------------
13305 // reinitialize an aggregate object
13307 ret += "void reinit_aggregates( "+
13308 generate_functor_name()+"_groupdef &gbval, "+
13309 generate_functor_name()+"_aggrdef &aggval){\n";
13310 // Variables for execution of the function.
13311 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13313 // use of temporaries depends on the aggregate,
13314 // generate them in generate_aggr_update
13316 int temporal_gb; // track the # of the temporal gb
13317 for(g=0;g<gb_tbl.size();g++){
13318 data_type *gdt = gb_tbl.get_data_type(g);
13319 if(gdt->is_temporal()){
13320 if(gdt->is_buffer_type()){
13321 sprintf(tmpstr,"\t\t%s(&(gbval.gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13323 sprintf(tmpstr,"\t\t gbval.gb_var%d =last_gb%d;\n",g,g);
13330 // Unpack all remaining attributes
13331 for(a=0;a<aggr_tbl.size();a++){
13332 sprintf(tmpstr,"aggval.aggr_var%d",a);
13333 string varname = tmpstr;
13334 ret.append(generate_aggr_reinitialize(varname,&aggr_tbl,a, schema));
13337 ret += "\treturn;\n";
13344 //---------------------------------------------------
13347 ret += "gs_int32_t flush_needed(){\n";
13348 if(uses_temporal_flush){
13349 ret += "\treturn needs_temporal_flush;\n";
13351 ret += "\treturn false;\n";
13355 ret += "bool disordered(){return disordered_arrival;}\n";
13357 //------------------------------------------------
13358 // time bucket management
13359 ret += "void advance_last_tb(){\n";
13360 ret += "\tlast_gb"+to_string(temporal_gb)+"++;\n";
13362 ret += "void reset_last_tb(){\n";
13363 ret += "\tlast_gb"+to_string(temporal_gb)+" = curr_gb"+to_string(temporal_gb)+";\n";
13366 //---------------------------------------------------
13367 // create output tuple
13368 // Unpack the partial functions ref'd in the where clause,
13369 // select clause. Evaluate the where clause.
13370 // Finally, pack the tuple.
13372 // I need to use special code generation here,
13373 // so I'll leave it in longhand.
13375 ret += "host_tuple create_output_tuple("
13376 +generate_functor_name()+"_groupdef &gbval, "+
13377 generate_functor_name()+"_aggrdef &aggval, bool &failed){\n";
13379 ret += "\thost_tuple tup;\n";
13380 ret += "\tfailed = false;\n";
13381 ret += "\tgs_retval_t retval = 0;\n";
13383 string gbvar = "gbval.gb_var";
13384 string aggvar = "aggval.";
13387 // First, get the return values from the UDAFS
13388 for(a=0;a<aggr_tbl.size();a++){
13389 if(! aggr_tbl.is_builtin(a)){
13390 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
13391 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
13392 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
13396 set<int> hv_sl_pfcns;
13397 for(w=0;w<having.size();w++){
13398 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
13400 for(s=0;s<select_list.size();s++){
13401 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
13404 // clean up the partial fcn results from any previous execution
13405 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
13408 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
13409 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13410 ret += "\tif(retval){ failed = true; return(tup);}\n";
13413 // Evalaute the HAVING clause
13414 // TODO: this seems to have a ++ operator rather than a + operator.
13415 for(w=0;w<having.size();++w){
13416 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
13419 // Now, compute the size of the tuple.
13421 // Unpack any BUFFER type selections into temporaries
13422 // so that I can compute their size and not have
13423 // to recompute their value during tuple packing.
13424 // I can use regular assignment here because
13425 // these temporaries are non-persistent.
13426 // TODO: should I be using the selvar generation routine?
13428 ret += "//\t\tCompute the size of the tuple.\n";
13429 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
13430 for(s=0;s<select_list.size();s++){
13431 scalarexp_t *se = select_list[s]->se;
13432 data_type *sdt = se->get_data_type();
13433 if(sdt->is_buffer_type() &&
13434 !( (se->get_operator_type() == SE_COLREF) ||
13435 (se->get_operator_type() == SE_AGGR_STAR) ||
13436 (se->get_operator_type() == SE_AGGR_SE) ||
13437 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13438 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13440 sprintf(tmpstr,"selvar_%d",s);
13441 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
13442 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
13446 // The size of the tuple is the size of the tuple struct plus the
13447 // size of the buffers to be copied in.
13449 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
13450 for(s=0;s<select_list.size();s++){
13451 // if(s>0) ret += "+";
13452 scalarexp_t *se = select_list[s]->se;
13453 data_type *sdt = select_list[s]->se->get_data_type();
13454 if(sdt->is_buffer_type()){
13455 if(!( (se->get_operator_type() == SE_COLREF) ||
13456 (se->get_operator_type() == SE_AGGR_STAR) ||
13457 (se->get_operator_type() == SE_AGGR_SE) ||
13458 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13459 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13461 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
13462 ret.append(tmpstr);
13464 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13465 ret.append(tmpstr);
13471 // Allocate tuple data block.
13472 ret += "//\t\tCreate the tuple block.\n";
13473 ret += "\ttup.data = malloc(tup.tuple_size);\n";
13474 ret += "\ttup.heap_resident = true;\n";
13476 // Mark tuple as regular
13477 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
13479 // ret += "\ttup.channel = 0;\n";
13480 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
13481 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
13484 // (Here, offsets are hard-wired. is this a problem?)
13486 ret += "//\t\tPack the fields into the tuple.\n";
13487 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
13488 for(s=0;s<select_list.size();s++){
13489 scalarexp_t *se = select_list[s]->se;
13490 data_type *sdt = se->get_data_type();
13491 if(sdt->is_buffer_type()){
13492 if(!( (se->get_operator_type() == SE_COLREF) ||
13493 (se->get_operator_type() == SE_AGGR_STAR) ||
13494 (se->get_operator_type() == SE_AGGR_SE) ||
13495 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13496 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13498 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
13499 ret.append(tmpstr);
13500 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
13501 ret.append(tmpstr);
13503 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13504 ret.append(tmpstr);
13505 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13506 ret.append(tmpstr);
13509 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13510 ret.append(tmpstr);
13511 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
13516 // Destroy string temporaries
13517 ret += gen_buffer_selvars_dtr(select_list);
13519 ret += "\treturn tup;\n";
13522 //------------------------------------------------------------------
13523 // Cleaning_when : evaluate the cleaning_when clause.
13524 // ASSUME that the udaf return values have already
13525 // been unpacked. delete the string udaf return values at the end.
13527 ret += "bool cleaning_when("
13528 +generate_functor_name()+"_groupdef &gbval, "+
13529 generate_functor_name()+"_aggrdef &aggval){\n";
13531 ret += "\tbool retval = true;\n";
13534 gbvar = "gbval.gb_var";
13535 aggvar = "aggval.";
13538 set<int> clw_pfcns;
13539 for(w=0;w<closing_when.size();w++){
13540 collect_partial_fcns_pr(closing_when[w]->pr, clw_pfcns);
13543 // clean up the partial fcn results from any previous execution
13544 ret += gen_partial_fcn_dtr(partial_fcns,clw_pfcns);
13547 for(pfsi=clw_pfcns.begin();pfsi!=clw_pfcns.end();++pfsi){
13548 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13549 ret += "\tif(retval){ return false;}\n";
13552 // Evalaute the Closing When clause
13553 // TODO: this seems to have a ++ operator rather than a + operator.
13554 for(w=0;w<closing_when.size();++w){
13555 ret += "\tif( !("+generate_predicate_code_fm_aggr(closing_when[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
13559 // Destroy string return vals of UDAFs
13560 for(a=0;a<aggr_tbl.size();a++){
13561 if(! aggr_tbl.is_builtin(a)){
13562 int afcn_id = aggr_tbl.get_fcn_id(a);
13563 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
13564 if(adt->is_buffer_type()){
13565 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
13566 adt->get_hfta_buffer_destroy().c_str(), a );
13572 ret += "\treturn retval;\n";
13578 //-------------------------------------------------------------------
13579 // Temporal update functions
13581 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
13583 // create a temp status tuple
13584 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
13586 ret += gen_init_temp_status_tuple(this->get_node_name());
13589 // (Here, offsets are hard-wired. is this a problem?)
13591 ret += "//\t\tPack the fields into the tuple.\n";
13592 for(s=0;s<select_list.size();s++){
13593 data_type *sdt = select_list[s]->se->get_data_type();
13594 if(sdt->is_temporal()){
13595 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13597 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_gb", "", schema).c_str());
13603 ret += "\treturn 0;\n";
13604 ret += "};};\n\n\n";
13607 //----------------------------------------------------------
13608 // The hash function
13610 ret += "struct "+generate_functor_name()+"_hash_func{\n";
13611 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
13612 "_groupdef &grp) const{\n";
13613 ret += "\t\treturn(0";
13614 for(g=0;g<gb_tbl.size();g++){
13615 data_type *gdt = gb_tbl.get_data_type(g);
13616 if(! gdt->is_temporal()){
13618 if(gdt->use_hashfunc()){
13619 if(gdt->is_buffer_type())
13620 sprintf(tmpstr,"(%s*%s(&(grp.gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13622 sprintf(tmpstr,"(%s*%s(grp.gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13624 sprintf(tmpstr,"(%s*grp.gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
13629 ret += " >> 32);\n";
13633 //----------------------------------------------------------
13634 // The comparison function
13636 ret += "struct "+generate_functor_name()+"_equal_func{\n";
13637 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef &grp1, "+
13638 "const "+generate_functor_name()+"_groupdef &grp2) const{\n";
13639 ret += "\t\treturn( (";
13642 bool first_exec = true;
13643 for(g=0;g<gb_tbl.size();g++){
13644 data_type *gdt = gb_tbl.get_data_type(g);
13645 if(! gdt->is_temporal()){
13646 if(first_exec){first_exec=false;}else{ hcmpr += ") && (";}
13647 if(gdt->complex_comparison(gdt)){
13648 if(gdt->is_buffer_type())
13649 sprintf(tmpstr,"(%s(&(grp1.gb_var%d), &(grp2.gb_var%d))==0)",
13650 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
13652 sprintf(tmpstr,"(%s((grp1.gb_var%d), (grp2.gb_var%d))==0)",
13653 gdt->get_hfta_equals_fcn(gdt).c_str(),g,g);
13655 sprintf(tmpstr,"grp1.gb_var%d == grp2.gb_var%d",g,g);
13672 string rsgah_qpn::generate_operator(int i, string params){
13675 " running_agg_operator<" +
13676 generate_functor_name()+","+
13677 generate_functor_name() + "_groupdef, " +
13678 generate_functor_name() + "_aggrdef, " +
13679 generate_functor_name()+"_hash_func, "+
13680 generate_functor_name()+"_equal_func "
13681 "> *op"+int_to_string(i)+" = new running_agg_operator<"+
13682 generate_functor_name()+","+
13683 generate_functor_name() + "_groupdef, " +
13684 generate_functor_name() + "_aggrdef, " +
13685 generate_functor_name()+"_hash_func, "+
13686 generate_functor_name()+"_equal_func "
13687 ">("+params+", \"" + get_node_name() + "\");\n"
13693 // Split aggregation into two HFTA components - sub and superaggregation
13694 // If unable to split the aggreagates, empty vector will be returned
13695 vector<qp_node *> sgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13697 vector<qp_node *> ret_vec;
13698 int s, p, g, a, o, i;
13701 vector<string> fta_flds, stream_flds;
13702 int t = table_name->get_schema_ref();
13704 // Get the set of interfaces it accesses.
13706 vector<string> sel_names;
13708 // Verify that all of the ref'd UDAFs can be split.
13710 for(a=0;a<aggr_tbl.size();++a){
13711 if(! aggr_tbl.is_builtin(a)){
13712 int afcn = aggr_tbl.get_fcn_id(a);
13713 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13714 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13715 if(hfta_super_id < 0 || hfta_sub_id < 0){
13721 /////////////////////////////////////////////////////
13722 // Split into aggr/aggr.
13725 sgah_qpn *low_hfta_node = new sgah_qpn();
13726 low_hfta_node->table_name = table_name;
13727 low_hfta_node->set_node_name( "_"+node_name );
13728 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13731 sgah_qpn *hi_hfta_node = new sgah_qpn();
13732 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13733 hi_hfta_node->set_node_name( node_name );
13734 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13736 // First, process the group-by variables.
13737 // both low and hi level queries duplicate group-by variables of original query
13740 for(g=0;g<gb_tbl.size();g++){
13741 // Insert the gbvar into both low- and hi level hfta.
13742 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13743 low_hfta_node->gb_tbl.add_gb_var(
13744 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13747 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13748 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13749 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13750 gbvar_fta->set_gb_ref(g);
13751 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13752 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13754 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13755 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13756 hi_hfta_node->gb_tbl.add_gb_var(
13757 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13761 // hi_hfta_node->gb_tbl.gb_patterns = gb_tbl.gb_patterns; // pattern processing at higtest level
13762 hi_hfta_node->gb_tbl.set_pattern_info( &gb_tbl); // pattern processing at higtest level
13764 // SEs in the aggregate definitions.
13765 // They are all safe, so split them up for later processing.
13766 map<int, scalarexp_t *> hfta_aggr_se;
13767 for(a=0;a<aggr_tbl.size();++a){
13768 split_hfta_aggr( &(aggr_tbl), a,
13769 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13770 low_hfta_node->select_list,
13777 // Next, the select list.
13779 for(s=0;s<select_list.size();s++){
13780 bool fta_forbidden = false;
13781 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13782 hi_hfta_node->select_list.push_back(
13783 new select_element(root_se, select_list[s]->name));
13788 // All the predicates in the where clause must execute
13789 // in the low-level hfta.
13791 for(p=0;p<where.size();p++){
13792 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13793 cnf_elem *new_cnf = new cnf_elem(new_pr);
13794 analyze_cnf(new_cnf);
13796 low_hfta_node->where.push_back(new_cnf);
13799 // All of the predicates in the having clause must
13800 // execute in the high-level hfta node.
13802 for(p=0;p<having.size();p++){
13803 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13804 cnf_elem *cnf_root = new cnf_elem(pr_root);
13805 analyze_cnf(cnf_root);
13807 hi_hfta_node->having.push_back(cnf_root);
13811 // Copy parameters to both nodes
13812 vector<string> param_names = param_tbl->get_param_names();
13814 for(pi=0;pi<param_names.size();pi++){
13815 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13816 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13817 param_tbl->handle_access(param_names[pi]));
13818 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13819 param_tbl->handle_access(param_names[pi]));
13821 low_hfta_node->definitions = definitions;
13822 hi_hfta_node->definitions = definitions;
13825 low_hfta_node->table_name->set_machine(table_name->get_machine());
13826 low_hfta_node->table_name->set_interface(table_name->get_interface());
13827 low_hfta_node->table_name->set_ifq(false);
13829 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13830 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13831 hi_hfta_node->table_name->set_ifq(false);
13833 ret_vec.push_back(low_hfta_node);
13834 ret_vec.push_back(hi_hfta_node);
13840 // TODO: add splitting into selection/aggregation
13844 // Split aggregation into two HFTA components - sub and superaggregation
13845 // If unable to split the aggreagates, empty vector will be returned
13846 // Similar to sgah, but super aggregate is rsgah, subaggr is sgah
13847 vector<qp_node *> rsgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13849 vector<qp_node *> ret_vec;
13850 int s, p, g, a, o, i;
13853 vector<string> fta_flds, stream_flds;
13854 int t = table_name->get_schema_ref();
13856 // Get the set of interfaces it accesses.
13858 vector<string> sel_names;
13860 // Verify that all of the ref'd UDAFs can be split.
13862 for(a=0;a<aggr_tbl.size();++a){
13863 if(! aggr_tbl.is_builtin(a)){
13864 int afcn = aggr_tbl.get_fcn_id(a);
13865 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13866 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13867 if(hfta_super_id < 0 || hfta_sub_id < 0){
13873 /////////////////////////////////////////////////////
13874 // Split into aggr/aggr.
13877 sgah_qpn *low_hfta_node = new sgah_qpn();
13878 low_hfta_node->table_name = table_name;
13879 low_hfta_node->set_node_name( "_"+node_name );
13880 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13883 rsgah_qpn *hi_hfta_node = new rsgah_qpn();
13884 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13885 hi_hfta_node->set_node_name( node_name );
13886 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13888 // First, process the group-by variables.
13889 // both low and hi level queries duplicate group-by variables of original query
13892 for(g=0;g<gb_tbl.size();g++){
13893 // Insert the gbvar into both low- and hi level hfta.
13894 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13895 low_hfta_node->gb_tbl.add_gb_var(
13896 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13899 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13900 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13901 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13902 gbvar_fta->set_gb_ref(g);
13903 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13904 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13906 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13907 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13908 hi_hfta_node->gb_tbl.add_gb_var(
13909 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13914 // SEs in the aggregate definitions.
13915 // They are all safe, so split them up for later processing.
13916 map<int, scalarexp_t *> hfta_aggr_se;
13917 for(a=0;a<aggr_tbl.size();++a){
13918 split_hfta_aggr( &(aggr_tbl), a,
13919 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13920 low_hfta_node->select_list,
13927 // Next, the select list.
13929 for(s=0;s<select_list.size();s++){
13930 bool fta_forbidden = false;
13931 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13932 hi_hfta_node->select_list.push_back(
13933 new select_element(root_se, select_list[s]->name));
13938 // All the predicates in the where clause must execute
13939 // in the low-level hfta.
13941 for(p=0;p<where.size();p++){
13942 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13943 cnf_elem *new_cnf = new cnf_elem(new_pr);
13944 analyze_cnf(new_cnf);
13946 low_hfta_node->where.push_back(new_cnf);
13949 // All of the predicates in the having clause must
13950 // execute in the high-level hfta node.
13952 for(p=0;p<having.size();p++){
13953 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13954 cnf_elem *cnf_root = new cnf_elem(pr_root);
13955 analyze_cnf(cnf_root);
13957 hi_hfta_node->having.push_back(cnf_root);
13960 // Similar for closing when
13961 for(p=0;p<closing_when.size();p++){
13962 predicate_t *pr_root = rehome_fta_pr( closing_when[p]->pr, &hfta_aggr_se);
13963 cnf_elem *cnf_root = new cnf_elem(pr_root);
13964 analyze_cnf(cnf_root);
13966 hi_hfta_node->closing_when.push_back(cnf_root);
13970 // Copy parameters to both nodes
13971 vector<string> param_names = param_tbl->get_param_names();
13973 for(pi=0;pi<param_names.size();pi++){
13974 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13975 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13976 param_tbl->handle_access(param_names[pi]));
13977 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13978 param_tbl->handle_access(param_names[pi]));
13980 low_hfta_node->definitions = definitions;
13981 hi_hfta_node->definitions = definitions;
13984 low_hfta_node->table_name->set_machine(table_name->get_machine());
13985 low_hfta_node->table_name->set_interface(table_name->get_interface());
13986 low_hfta_node->table_name->set_ifq(false);
13988 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13989 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13990 hi_hfta_node->table_name->set_ifq(false);
13992 ret_vec.push_back(low_hfta_node);
13993 ret_vec.push_back(hi_hfta_node);
13999 // TODO: add splitting into selection/aggregation
14002 //---------------------------------------------------------------
14003 // Code for propagating Protocol field source information
14006 scalarexp_t *resolve_protocol_se(scalarexp_t *se, vector<map<string, scalarexp_t *> *> &src_vec, gb_table *gb_tbl, table_list *Schema){
14007 scalarexp_t *rse, *lse,*p_se, *gb_se;
14008 int tno, schema_type;
14009 map<string, scalarexp_t *> *pse_map;
14011 switch(se->get_operator_type()){
14013 return new scalarexp_t(se->get_literal());
14015 return scalarexp_t::make_param_reference(se->get_op().c_str());
14019 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, se->gb_ref=%d, but gb_tbl is NULL\n",se->get_gb_ref());
14020 gb_se = gb_tbl->get_def(se->get_gb_ref());
14021 return resolve_protocol_se(gb_se,src_vec,gb_tbl,Schema);
14024 schema_type = Schema->get_schema_type(se->get_colref()->get_schema_ref());
14025 if(schema_type == PROTOCOL_SCHEMA)
14026 return dup_se(se,NULL);
14028 tno = se->get_colref()->get_tablevar_ref();
14029 if(tno >= src_vec.size()){
14030 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, tno=%d, src_vec.size()=%lu\n",tno,src_vec.size());
14032 if(src_vec[tno] == NULL)
14035 pse_map =src_vec[tno];
14036 p_se = (*pse_map)[se->get_colref()->get_field()];
14039 return dup_se(p_se,NULL);
14041 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
14045 return new scalarexp_t(se->get_op().c_str(),lse);
14047 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
14050 rse = resolve_protocol_se(se->get_right_se(),src_vec,gb_tbl,Schema);
14053 return new scalarexp_t(se->get_op().c_str(),lse,rse);
14067 void spx_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14069 vector<map<string, scalarexp_t *> *> src_vec;
14071 for(i=0;i<q_sources.size();i++){
14072 if(q_sources[i] != NULL)
14073 src_vec.push_back(q_sources[i]->get_protocol_se());
14075 src_vec.push_back(NULL);
14078 for(i=0;i<select_list.size();i++){
14079 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14083 void join_eq_hash_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14085 vector<map<string, scalarexp_t *> *> src_vec;
14087 for(i=0;i<q_sources.size();i++){
14088 if(q_sources[i] != NULL)
14089 src_vec.push_back(q_sources[i]->get_protocol_se());
14091 src_vec.push_back(NULL);
14094 for(i=0;i<select_list.size();i++){
14095 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14098 for(i=0;i<hash_eq.size();i++){
14099 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
14100 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14104 void filter_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14106 vector<map<string, scalarexp_t *> *> src_vec;
14108 for(i=0;i<q_sources.size();i++){
14109 if(q_sources[i] != NULL)
14110 src_vec.push_back(q_sources[i]->get_protocol_se());
14112 src_vec.push_back(NULL);
14115 for(i=0;i<select_list.size();i++){
14116 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14119 for(i=0;i<hash_eq.size();i++){
14120 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
14121 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14125 void watch_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14127 vector<map<string, scalarexp_t *> *> src_vec;
14129 for(i=0;i<q_sources.size();i++){
14130 if(q_sources[i] != NULL)
14131 src_vec.push_back(q_sources[i]->get_protocol_se());
14133 src_vec.push_back(NULL);
14136 for(i=0;i<select_list.size();i++){
14137 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14140 for(i=0;i<key_flds.size();i++){
14141 string kfld = key_flds[i];
14142 hash_src_l.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_left_se(),src_vec,NULL,Schema));
14143 hash_src_r.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_right_se(),src_vec,NULL,Schema));
14148 void sgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14150 vector<map<string, scalarexp_t *> *> src_vec;
14152 for(i=0;i<q_sources.size();i++){
14153 if(q_sources[i] != NULL)
14154 src_vec.push_back(q_sources[i]->get_protocol_se());
14156 src_vec.push_back(NULL);
14159 for(i=0;i<select_list.size();i++){
14160 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14163 for(i=0;i<gb_tbl.size();i++)
14164 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14168 void rsgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14170 vector<map<string, scalarexp_t *> *> src_vec;
14172 for(i=0;i<q_sources.size();i++){
14173 if(q_sources[i] != NULL)
14174 src_vec.push_back(q_sources[i]->get_protocol_se());
14176 src_vec.push_back(NULL);
14179 for(i=0;i<select_list.size();i++){
14180 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14183 for(i=0;i<gb_tbl.size();i++)
14184 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14187 void sgahcwcb_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14189 vector<map<string, scalarexp_t *> *> src_vec;
14191 for(i=0;i<q_sources.size();i++){
14192 if(q_sources[i] != NULL)
14193 src_vec.push_back(q_sources[i]->get_protocol_se());
14195 src_vec.push_back(NULL);
14198 for(i=0;i<select_list.size();i++){
14199 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14202 for(i=0;i<gb_tbl.size();i++)
14203 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14206 void mrg_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14208 scalarexp_t *first_se;
14210 vector<map<string, scalarexp_t *> *> src_vec;
14211 map<string, scalarexp_t *> *pse_map;
14213 for(i=0;i<q_sources.size();i++){
14214 if(q_sources[i] != NULL)
14215 src_vec.push_back(q_sources[i]->get_protocol_se());
14217 src_vec.push_back(NULL);
14220 if(q_sources.size() == 0){
14221 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::create_protocol_se, q_sources.size() == 0\n");
14225 vector<field_entry *> tbl_flds = table_layout->get_fields();
14226 for(f=0;f<tbl_flds.size();f++){
14228 string fld_nm = tbl_flds[f]->get_name();
14229 pse_map = src_vec[0];
14230 first_se = (*pse_map)[fld_nm];
14231 if(first_se == NULL)
14233 for(s=1;s<src_vec.size() && match;s++){
14234 pse_map = src_vec[s];
14235 scalarexp_t *match_se = (*pse_map)[fld_nm];
14236 if(match_se == NULL)
14239 match = is_equivalent_se_base(first_se, match_se, Schema);
14242 protocol_map[fld_nm] = first_se;
14244 protocol_map[fld_nm] = NULL;
14248 void watch_tbl_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){