1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 // Create, manipulate, and dump query plans.
18 #include "query_plan.h"
19 #include "analyze_fta.h"
20 #include "generate_utils.h"
26 extern string hash_nums[NRANDS]; // for fast hashing
31 void untaboo(string &s){
33 for(c=0;c<s.size();++c){
40 // mrg_qpn constructor, define here to avoid
41 // circular references in the .h file
42 mrg_qpn::mrg_qpn(filter_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
43 param_tbl = spx->param_tbl;
46 field_entry_list *fel = new field_entry_list();
51 for(i=0;i<spx->select_list.size();++i){
52 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
53 if(dt->is_temporal()){
54 if(merge_fieldpos < 0){
57 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
62 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
63 fel->append_field(fe);
67 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
70 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
72 // NEED TO HANDLE USER_SPECIFIED SLACK
73 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
74 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
75 // if(this->slack == NULL)
76 // fprintf(stderr,"Zero slack.\n");
78 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
80 for(i=0;i<sources.size();i++){
81 std::string rvar = "_m"+int_to_string(i);
82 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
83 mvars[i]->set_tablevar_ref(i);
84 fm.push_back(new tablevar_t(sources[i].c_str()));
85 fm[i]->set_range_var(rvar);
88 param_tbl = new param_table();
89 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
91 for(pi=0;pi<param_names.size();pi++){
92 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
93 param_tbl->add_param(param_names[pi],dt->duplicate(),
94 spx->param_tbl->handle_access(param_names[pi]));
96 definitions = spx->definitions;
101 mrg_qpn::mrg_qpn(watch_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
102 param_tbl = spx->param_tbl;
105 field_entry_list *fel = new field_entry_list();
110 for(i=0;i<spx->select_list.size();++i){
111 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
112 if(dt->is_temporal()){
113 if(merge_fieldpos < 0){
116 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
117 dt->reset_temporal();
121 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
122 fel->append_field(fe);
125 if(merge_fieldpos<0){
126 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
129 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
131 // NEED TO HANDLE USER_SPECIFIED SLACK
132 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
133 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
134 // if(this->slack == NULL)
135 // fprintf(stderr,"Zero slack.\n");
137 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
139 for(i=0;i<sources.size();i++){
140 std::string rvar = "_m"+int_to_string(i);
141 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
142 mvars[i]->set_tablevar_ref(i);
143 fm.push_back(new tablevar_t(sources[i].c_str()));
144 fm[i]->set_range_var(rvar);
147 param_tbl = new param_table();
148 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
150 for(pi=0;pi<param_names.size();pi++){
151 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
152 param_tbl->add_param(param_names[pi],dt->duplicate(),
153 spx->param_tbl->handle_access(param_names[pi]));
155 definitions = spx->definitions;
162 // This function translates an analyzed parse tree
163 // into one or more query nodes (qp_node).
164 // Currently only one node is created, but some query
165 // fragments might create more than one query node,
166 // e.g. aggregation over a join, or nested subqueries
167 // in the FROM clause (unless this is handled at parse tree
168 // analysis time). At this stage, they will be linked
169 // by the names in the FROM clause.
170 // INVARIANT : if more than one query node is returned,
171 // the last one represents the output of the query.
172 vector<qp_node *> create_query_nodes(query_summary_class *qs,table_list *Schema){
174 // Classify the query.
176 vector <qp_node *> local_plan;
180 // I should probably move a lot of this code
181 // into the qp_node constructors,
182 // and have this code focus on building the query plan tree.
185 if(qs->query_type == WATCHLIST_QUERY){
186 watch_tbl_qpn *watchnode = new watch_tbl_qpn(qs, Schema);
189 plan_root = watchnode;
190 local_plan.push_back(watchnode);
195 if(qs->query_type == MERGE_QUERY){
196 mrg_qpn *merge_node = new mrg_qpn(qs,Schema);
199 plan_root = merge_node;
200 local_plan.push_back(merge_node);
203 Do not split sources until we are done with optimizations
204 vector<mrg_qpn *> split_merge = merge_node->split_sources();
205 local_plan.insert(local_plan.begin(), split_merge.begin(), split_merge.end());
207 // If children are created, add them to the schema.
210 printf("split_merge size is %d\n",split_merge.size());
211 for(i=1;i<split_merge.size();++i){
212 Schema->add_table(split_merge[i]->get_fields());
213 printf("Adding split merge table %d\n",i);
218 printf("Did split sources on %s:\n",qs->query_name.c_str());
220 for(ss=0;ss<local_plan.size();ss++){
221 printf("node %d, name=%s, sources=",ss,local_plan[ss]->get_node_name().c_str());
222 vector<tablevar_t *> inv = local_plan[ss]->get_input_tbls();
224 for(nn=0;nn<inv.size();nn++){
225 printf("%s ",inv[nn]->to_string().c_str());
234 if(qs->query_type == SELECT_QUERY){
236 // Select / Aggregation / Join
237 if(qs->gb_tbl->size() == 0 && qs->aggr_tbl->size() == 0){
239 if(qs->fta_tree->get_from()->size() == 1){
240 spx_qpn *spx_node = new spx_qpn(qs,Schema);
242 plan_root = spx_node;
243 local_plan.push_back(spx_node);
245 if(qs->fta_tree->get_from()->get_properties() == FILTER_JOIN_PROPERTY){
246 filter_join_qpn *join_node = new filter_join_qpn(qs,Schema);
247 plan_root = join_node;
248 local_plan.push_back(join_node);
250 if(qs->fta_tree->get_from()->get_properties() == WATCHLIST_JOIN_PROPERTY){
251 watch_join_qpn *join_node = new watch_join_qpn(qs,Schema);
252 plan_root = join_node;
253 local_plan.push_back(join_node);
255 join_eq_hash_qpn *join_node = new join_eq_hash_qpn(qs,Schema);
256 plan_root = join_node;
257 local_plan.push_back(join_node);
264 if(qs->states_refd.size() || qs->sg_tbl.size() || qs->cb_cnf.size()){
265 sgahcwcb_qpn *sgahcwcb_node = new sgahcwcb_qpn(qs,Schema);
266 plan_root = sgahcwcb_node;
267 local_plan.push_back(sgahcwcb_node);
269 if(qs->closew_cnf.size()){
270 rsgah_qpn *rsgah_node = new rsgah_qpn(qs,Schema);
271 plan_root = rsgah_node;
272 local_plan.push_back(rsgah_node);
274 sgah_qpn *sgah_node = new sgah_qpn(qs,Schema);
275 plan_root = sgah_node;
276 local_plan.push_back(sgah_node);
283 // Get the query name and other definitions.
284 plan_root->set_node_name( qs->query_name);
285 plan_root->set_definitions( qs->definitions) ;
288 // return(plan_root);
294 string se_to_query_string(scalarexp_t *se, aggregate_table *aggr_tbl){
299 vector<scalarexp_t *> operand_list;
302 if(se->is_superaggr())
305 switch(se->get_operator_type()){
307 l_str = se->get_literal()->to_query_string();
310 l_str = "$" + se->get_op();
313 l_str = se->get_colref()->to_query_string() ;
316 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
318 return se->get_op()+"( "+l_str+" )";;
320 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
321 r_str = se_to_query_string(se->get_right_se(),aggr_tbl);
322 return( "("+l_str+")"+se->get_op()+"("+r_str+")" );
324 return( se->get_op() + su_ind + "(*)");
326 l_str = se_to_query_string(aggr_tbl->get_aggr_se(se->get_aggr_ref()),aggr_tbl);
327 return( se->get_op() + su_ind + "(" + l_str + ")" );
329 if(se->get_aggr_ref() >= 0)
330 operand_list = aggr_tbl->get_operand_list(se->get_aggr_ref());
332 operand_list = se->get_operands();
334 ret = se->get_op() + su_ind + "(";
335 for(p=0;p<operand_list.size();p++){
336 l_str = se_to_query_string(operand_list[p],aggr_tbl);
344 return "ERROR SE op type not recognized in se_to_query_string.\n";
348 string pred_to_query_str(predicate_t *pr, aggregate_table *aggr_tbl){
353 vector<literal_t *> llist;
354 vector<scalarexp_t *> op_list;
356 switch(pr->get_operator_type()){
358 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
359 ret = l_str + " IN [";
360 llist = pr->get_lit_vec();
361 for(l=0;l<llist.size();l++){
363 ret += llist[l]->to_query_string();
369 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
370 r_str = se_to_query_string(pr->get_right_se(),aggr_tbl);
371 return( l_str + " " + pr->get_op() + " " + r_str );
373 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
374 return(pr->get_op() + "( " + l_str + " )");
376 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
377 r_str = pred_to_query_str(pr->get_right_pr(),aggr_tbl);
378 return("( " + r_str + " )" + pr->get_op() + "( " + l_str + " )");
380 ret = pr->get_op()+"[";
381 op_list = pr->get_op_list();
382 for(o=0;o<op_list.size();++o){
384 ret += se_to_query_string(op_list[o],aggr_tbl);
389 fprintf(stderr,"INTERNAL ERROR in pred_to_query_str, line %d, character %d, unknown predicate operator type %d\n",
390 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
399 // Build a selection list,
400 // but avoid adding duplicate SEs.
403 int add_select_list_nodup(vector<select_element *> &lfta_select_list, scalarexp_t *se,
407 for(s=0;s<lfta_select_list.size();s++){
408 if(is_equivalent_se(lfta_select_list[s]->se, se)){
413 lfta_select_list.push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
414 return(lfta_select_list.size()-1);
419 // TODO: The generated colref should be tied to the tablevar
420 // representing the lfta output. For now, always 0.
422 scalarexp_t *make_fta_se_ref(vector<select_element *> &lfta_select_list, scalarexp_t *se, int h_tvref){
424 int fta_se_nbr = add_select_list_nodup(lfta_select_list, se, new_element);
427 colname = lfta_select_list[fta_se_nbr]->name;
429 colname = impute_colname(lfta_select_list, se);
430 lfta_select_list[fta_se_nbr]->name = colname;
433 // TODO: fill in the tablevar and schema of the colref here.
434 colref_t *new_cr = new colref_t(colname.c_str());
435 new_cr->set_tablevar_ref(h_tvref);
438 scalarexp_t *new_se= new scalarexp_t(new_cr);
439 new_se->use_decorations_of(se);
445 // Build a selection list,
446 // but avoid adding duplicate SEs.
449 int add_select_list_nodup(vector<select_element *> *lfta_select_list, scalarexp_t *se,
453 for(s=0;s<lfta_select_list->size();s++){
454 if(is_equivalent_se((*lfta_select_list)[s]->se, se)){
459 lfta_select_list->push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
460 return(lfta_select_list->size()-1);
465 // TODO: The generated colref should be tied to the tablevar
466 // representing the lfta output. For now, always 0.
468 scalarexp_t *make_fta_se_ref(vector<vector<select_element *> *> &lfta_select_list, scalarexp_t *se, int h_tvref){
470 vector<select_element *> *the_sel_list = lfta_select_list[h_tvref];
471 int fta_se_nbr = add_select_list_nodup(the_sel_list, se, new_element);
474 colname = (*the_sel_list)[fta_se_nbr]->name;
476 colname = impute_colname(*the_sel_list, se);
477 (*the_sel_list)[fta_se_nbr]->name = colname;
480 // TODO: fill in the tablevar and schema of the colref here.
481 colref_t *new_cr = new colref_t(colname.c_str());
482 new_cr->set_tablevar_ref(h_tvref);
485 scalarexp_t *new_se= new scalarexp_t(new_cr);
486 new_se->use_decorations_of(se);
495 // Test if a se can be evaluated at the fta.
496 // check forbidden types (e.g. float), forbidden operations
497 // between types (e.g. divide a long long), forbidden operations
498 // (too expensive, not implemented).
500 // Return true if not forbidden, false if forbidden
502 // TODO: the parameter aggr_tbl is not used, delete it.
504 bool check_fta_forbidden_se(scalarexp_t *se,
505 aggregate_table *aggr_tbl,
506 ext_fcn_list *Ext_fcns
510 vector<scalarexp_t *> operand_list;
511 vector<data_type *> dt_signature;
512 data_type *dt = se->get_data_type();
516 switch(se->get_operator_type()){
520 return( se->get_data_type()->fta_legal_type() );
524 if(!check_fta_forbidden_se(se->get_left_se(), aggr_tbl, Ext_fcns))
527 dt->fta_legal_operation(se->get_left_se()->get_data_type(), se->get_op())
530 if(!check_fta_forbidden_se(se->get_left_se(),aggr_tbl, Ext_fcns))
532 if(!check_fta_forbidden_se(se->get_right_se(),aggr_tbl, Ext_fcns))
534 return(dt->fta_legal_operation(se->get_left_se()->get_data_type(),
535 se->get_right_se()->get_data_type(),
540 // return true, aggregate fta-safeness is determined elsewhere.
547 if(se->get_aggr_ref() >= 0) return true;
549 operand_list = se->get_operands();
550 for(p=0;p<operand_list.size();p++){
551 if(!check_fta_forbidden_se(operand_list[p],aggr_tbl, Ext_fcns))
553 dt_signature.push_back(operand_list[p]->get_data_type() );
555 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
557 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
559 for(o=0;o<operand_list.size();o++){
560 if(o>0) fprintf(stderr,", ");
561 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
563 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
564 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
568 return(Ext_fcns->fta_legal(fcn_id) );
570 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
579 // test if a pr can be executed at the fta.
581 // Return true if not forbidden, false if forbidden
583 bool check_fta_forbidden_pr(predicate_t *pr,
584 aggregate_table *aggr_tbl,
585 ext_fcn_list *Ext_fcns
588 vector<literal_t *> llist;
591 vector<scalarexp_t *> op_list;
592 vector<data_type *> dt_signature;
596 switch(pr->get_operator_type()){
598 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns) )
600 llist = pr->get_lit_vec();
601 for(l=0;l<llist.size();l++){
602 dt = new data_type(llist[l]->get_type());
603 if(! dt->fta_legal_type()){
611 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns))
613 if(! check_fta_forbidden_se(pr->get_right_se(), aggr_tbl, Ext_fcns))
617 return( check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns) );
619 if(! check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns))
621 if(! check_fta_forbidden_pr(pr->get_right_pr(), aggr_tbl, Ext_fcns))
625 op_list = pr->get_op_list();
626 for(o=0;o<op_list.size();o++){
627 if(!check_fta_forbidden_se(op_list[o],aggr_tbl, Ext_fcns))
629 dt_signature.push_back(op_list[o]->get_data_type() );
631 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), dt_signature);
633 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
635 for(o=0;o<op_list.size();o++){
636 if(o>0) fprintf(stderr,", ");
637 fprintf(stderr,"%s",op_list[o]->get_data_type()->to_string().c_str());
639 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
640 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
644 return(Ext_fcns->fta_legal(fcn_id) );
646 fprintf(stderr,"INTERNAL ERROR in check_fta_forbidden_pr, line %d, character %d, unknown predicate operator type %d\n",
647 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
656 // Split the aggregates in orig_aggr_tbl, into superaggregates and
658 // (the value of the HFTA aggregate might be a SE of several LFTA
659 // subaggregates, e.g. avg : sum / count )
660 // Register the superaggregates in hfta_aggr_tbl, and the
661 // subaggregates in lfta_aggr_tbl.
662 // Insert references to the subaggregates into lfta_select_list.
663 // (and record their names in the currnames list)
664 // Create a SE for the superaggregate, put it in hfta_aggr_se,
667 void split_fta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
668 aggregate_table *hfta_aggr_tbl,
669 aggregate_table *lfta_aggr_tbl,
670 vector<select_element *> &lfta_select_list,
671 map<int,scalarexp_t *> &hfta_aggr_se,
672 ext_fcn_list *Ext_fcns
675 scalarexp_t *subaggr_se;
680 scalarexp_t *new_se, *l_se;
681 vector<scalarexp_t *> subaggr_ref_se;
684 if(! orig_aggr_tbl->is_builtin(agr_id)){
685 // Construct the subaggregate
686 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
687 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
688 vector<scalarexp_t *> subopl;
690 for(o=0;o<opl.size();++o){
691 subopl.push_back(dup_se(opl[o], NULL));
693 int sub_id = Ext_fcns->get_subaggr_id(fcn_id);
694 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
695 subaggr_se->set_fcn_id(sub_id);
696 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
697 // Add it to the lfta select list.
698 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
700 colname = lfta_select_list[fta_se_nbr]->name;
702 colname = impute_colname(lfta_select_list, subaggr_se);
703 lfta_select_list[fta_se_nbr]->name = colname;
704 ano = lfta_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,Ext_fcns->has_lfta_bailout(sub_id));
705 subaggr_se->set_aggr_id(ano);
708 // Construct a reference to the subaggregate
709 new_cr = new colref_t(colname.c_str());
710 new_se = new scalarexp_t(new_cr);
711 // I'm not certain what the types should be ....
712 // This will need to be filled in by later analysis.
713 // NOTE: this might not capture all the meaning of data_type ...
714 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
715 subaggr_ref_se.push_back(new_se);
717 // Construct the superaggregate
718 int super_id = Ext_fcns->get_superaggr_id(fcn_id);
719 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
720 ret_se->set_fcn_id(super_id);
721 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
722 // Register it in the hfta aggregate table
723 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, Ext_fcns->is_running_aggr(sub_id),false);
724 ret_se->set_aggr_id(ano);
725 hfta_aggr_se[agr_id] = ret_se;
731 // builtin aggregate processing
735 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
736 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
739 if(orig_aggr_tbl->is_star_aggr(agr_id)){
740 for(sa=0;sa<subaggr_names.size();sa++){
741 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
742 subaggr_se->set_data_type(subaggr_dt[sa]);
744 // The following sequence is similar to the code in make_fta_se_ref,
745 // but there is special processing for the aggregate tables.
746 int fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
748 colname = lfta_select_list[fta_se_nbr]->name;
750 colname = impute_colname(lfta_select_list, subaggr_se);
751 lfta_select_list[fta_se_nbr]->name = colname;
752 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
753 subaggr_se->set_aggr_id(ano);
755 new_cr = new colref_t(colname.c_str());
756 new_cr->set_tablevar_ref(0);
757 new_se = new scalarexp_t(new_cr);
759 // I'm not certain what the types should be ....
760 // This will need to be filled in by later analysis.
761 // Actually, this is causing a problem.
762 // I will assume a UINT data type. / change to INT
763 // (consistent with assign_data_types in analyze_fta.cc)
764 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
765 data_type *ndt = new data_type("Int"); // used to be Uint
766 new_se->set_data_type(ndt);
768 subaggr_ref_se.push_back(new_se);
771 for(sa=0;sa<subaggr_names.size();sa++){
773 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
774 l_se = dup_se(aggr_operand, NULL);
775 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
777 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
779 subaggr_se->set_data_type(subaggr_dt[sa]);
781 // again, similar to make_fta_se_ref.
782 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
784 colname = lfta_select_list[fta_se_nbr]->name;
786 colname = impute_colname(lfta_select_list, subaggr_se);
787 lfta_select_list[fta_se_nbr]->name = colname;
789 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
791 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
792 subaggr_se->set_aggr_id(ano);
794 new_cr = new colref_t(colname.c_str());
795 new_se = new scalarexp_t(new_cr);
796 // I'm not certain what the types should be ....
797 // This will need to be filled in by later analysis.
798 // NOTE: this might not capture all the meaning of data_type ...
799 new_se->set_data_type(subaggr_dt[sa]);
800 subaggr_ref_se.push_back(new_se);
803 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
804 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
806 // ASSUME either the return value is an aggregation,
807 // or a binary_op between two aggregations
808 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
809 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
810 ret_se->set_aggr_id(ano);
812 // Basically processing for AVG.
813 // set the data type of the superagg to that of the subagg.
814 scalarexp_t *left_se = ret_se->get_left_se();
815 left_se->set_data_type(subaggr_dt[0]);
816 ano = hfta_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
817 left_se->set_aggr_id(ano);
819 scalarexp_t *right_se = ret_se->get_right_se();
820 right_se->set_data_type(subaggr_dt[1]);
821 ano = hfta_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
822 right_se->set_aggr_id(ano);
825 hfta_aggr_se[agr_id] = ret_se;
830 // Split the aggregates in orig_aggr_tbl, into hfta_superaggregates and
831 // hfta_subaggregates.
832 // Register the superaggregates in hi_aggr_tbl, and the
833 // subaggregates in loq_aggr_tbl.
834 // Insert references to the subaggregates into low_select_list.
835 // (and record their names in the currnames list)
836 // Create a SE for the superaggregate, put it in hfta_aggr_se,
839 void split_hfta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
840 aggregate_table *hi_aggr_tbl,
841 aggregate_table *low_aggr_tbl,
842 vector<select_element *> &low_select_list,
843 map<int,scalarexp_t *> &hi_aggr_se,
844 ext_fcn_list *Ext_fcns
847 scalarexp_t *subaggr_se;
852 scalarexp_t *new_se, *l_se;
853 vector<scalarexp_t *> subaggr_ref_se;
856 if(! orig_aggr_tbl->is_builtin(agr_id)){
857 // Construct the subaggregate
858 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
859 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
860 vector<scalarexp_t *> subopl;
862 for(o=0;o<opl.size();++o){
863 subopl.push_back(dup_se(opl[o], NULL));
865 int sub_id = Ext_fcns->get_hfta_subaggr_id(fcn_id);
866 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
867 subaggr_se->set_fcn_id(sub_id);
868 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
869 // Add it to the low select list.
870 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
872 colname = low_select_list[fta_se_nbr]->name;
874 colname = impute_colname(low_select_list, subaggr_se);
875 low_select_list[fta_se_nbr]->name = colname;
876 ano = low_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,false);
877 subaggr_se->set_aggr_id(ano);
880 // Construct a reference to the subaggregate
881 new_cr = new colref_t(colname.c_str());
882 new_se = new scalarexp_t(new_cr);
883 // I'm not certain what the types should be ....
884 // This will need to be filled in by later analysis.
885 // NOTE: this might not capture all the meaning of data_type ...
886 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
887 subaggr_ref_se.push_back(new_se);
889 // Construct the superaggregate
890 int super_id = Ext_fcns->get_hfta_superaggr_id(fcn_id);
891 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
892 ret_se->set_fcn_id(super_id);
893 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
894 // Register it in the high aggregate table
895 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, false,false);
896 ret_se->set_aggr_id(ano);
897 hi_aggr_se[agr_id] = ret_se;
903 // builtin aggregate processing
907 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
908 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
911 if(orig_aggr_tbl->is_star_aggr(agr_id)){
912 for(sa=0;sa<subaggr_names.size();sa++){
913 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
914 subaggr_se->set_data_type(subaggr_dt[sa]);
916 // The following sequence is similar to the code in make_fta_se_ref,
917 // but there is special processing for the aggregate tables.
918 int fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
920 colname = low_select_list[fta_se_nbr]->name;
922 colname = impute_colname(low_select_list, subaggr_se);
923 low_select_list[fta_se_nbr]->name = colname;
924 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
925 subaggr_se->set_aggr_id(ano);
927 new_cr = new colref_t(colname.c_str());
928 new_cr->set_tablevar_ref(0);
929 new_se = new scalarexp_t(new_cr);
931 // I'm not certain what the types should be ....
932 // This will need to be filled in by later analysis.
933 // Actually, this is causing a problem.
934 // I will assume a UINT data type.
935 // (consistent with assign_data_types in analyze_fta.cc)
936 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
937 data_type *ndt = new data_type("Int"); // was Uint
938 new_se->set_data_type(ndt);
940 subaggr_ref_se.push_back(new_se);
943 for(sa=0;sa<subaggr_names.size();sa++){
945 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
946 l_se = dup_se(aggr_operand, NULL);
947 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
949 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
951 subaggr_se->set_data_type(subaggr_dt[sa]);
953 // again, similar to make_fta_se_ref.
954 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
956 colname = low_select_list[fta_se_nbr]->name;
958 colname = impute_colname(low_select_list, subaggr_se);
959 low_select_list[fta_se_nbr]->name = colname;
961 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
963 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
964 subaggr_se->set_aggr_id(ano);
966 new_cr = new colref_t(colname.c_str());
967 new_se = new scalarexp_t(new_cr);
968 // I'm not certain what the types should be ....
969 // This will need to be filled in by later analysis.
970 // NOTE: this might not capture all the meaning of data_type ...
971 new_se->set_data_type(subaggr_dt[sa]);
972 subaggr_ref_se.push_back(new_se);
975 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
976 // ASSUME either the return value is an aggregation,
977 // or a binary_op between two aggregations
978 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
979 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
980 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
982 // Basically processing for AVG.
983 // set the data type of the superagg to that of the subagg.
984 scalarexp_t *left_se = ret_se->get_left_se();
985 left_se->set_data_type(subaggr_dt[0]);
986 ano = hi_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
987 left_se->set_aggr_id(ano);
989 scalarexp_t *right_se = ret_se->get_right_se();
990 right_se->set_data_type(subaggr_dt[1]);
991 ano = hi_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
992 right_se->set_aggr_id(ano);
995 ret_se->set_aggr_id(ano);
996 hi_aggr_se[agr_id] = ret_se;
1004 // Split a scalar expression into one part which executes
1005 // at the stream and another set of parts which execute
1007 // Because I'm actually modifying the SEs, I will make
1008 // copies. But I will assume that literals, params, and
1009 // colrefs are immutable at this point.
1010 // (if there is ever a need to change one, must make a
1012 // NOTE : if se is constant (only refrences literals),
1013 // avoid making the fta compute it.
1015 // NOTE : This will need to be generalized to
1016 // handle join expressions, namely to handle a vector
1019 // Return value is the HFTA se.
1020 // Add lftas select_elements to the fta_select_list.
1021 // set fta_forbidden if this node or any child cannot
1022 // execute at the lfta.
1026 scalarexp_t *split_fta_se(scalarexp_t *se,
1027 bool &fta_forbidden,
1028 vector<select_element *> &lfta_select_list,
1029 ext_fcn_list *Ext_fcns
1033 vector<scalarexp_t *> operand_list;
1034 vector<data_type *> dt_signature;
1035 scalarexp_t *ret_se, *l_se, *r_se;
1036 bool l_forbid, r_forbid, this_forbid;
1038 scalarexp_t *new_se;
1039 data_type *dt = se->get_data_type();
1041 switch(se->get_operator_type()){
1043 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1044 ret_se = new scalarexp_t(se->get_literal());
1045 ret_se->use_decorations_of(se);
1049 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1050 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1051 ret_se->use_decorations_of(se);
1055 // No colref should be forbidden,
1056 // the schema is wrong, the fta_legal_type() fcn is wrong,
1057 // or the source table is actually a stream.
1058 // Issue a warning, but proceed with processing.
1059 // Also, should not be a ref to a gbvar.
1060 // (a gbvar ref only occurs in an aggregation node,
1061 // and these SEs are rehomed, not split.
1062 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1065 fprintf(stderr,"WARNING, a colref is a forbidden data type in split_fta_se,"
1067 " type is %s, line=%d, col=%d\n",
1068 se->get_colref()->to_string().c_str(),
1069 se->get_data_type()->get_type_str().c_str(),
1070 se->lineno, se->charno
1075 fprintf(stderr,"INTERNAL ERROR, a colref is a gbvar ref in split_fta_se,"
1076 " type is %s, line=%d, col=%d\n",
1077 se->get_data_type()->get_type_str().c_str(),
1078 se->lineno, se->charno
1083 ret_se = new scalarexp_t(se->get_colref());
1084 ret_se->use_decorations_of(se);
1088 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1090 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1092 // If this operation is forbidden but the child SE is not,
1093 // put the child se on the lfta_select_list, create a colref
1094 // which accesses this se, and make it the child of this op.
1095 // Exception : the child se is constant (only literal refs).
1096 if(this_forbid && !l_forbid){
1097 if(!is_literal_or_param_only(l_se)){
1098 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1099 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1102 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1104 ret_se->use_decorations_of(se);
1105 fta_forbidden = this_forbid | l_forbid;
1109 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1110 r_se = split_fta_se(se->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1112 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1114 // Replace the left se if it is not forbidden, but something else is.
1115 if((this_forbid || r_forbid) & !l_forbid){
1116 if(!is_literal_or_param_only(l_se)){
1117 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1122 // Replace the right se if it is not forbidden, but something else is.
1123 if((this_forbid || l_forbid) & !r_forbid){
1124 if(!is_literal_or_param_only(r_se)){
1125 new_se = make_fta_se_ref(lfta_select_list, r_se,0);
1130 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1131 ret_se->use_decorations_of(se);
1132 fta_forbidden = this_forbid || r_forbid || l_forbid;
1139 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_fta_se."
1140 " line=%d, col=%d\n",
1141 se->get_op().c_str(),
1142 se->lineno, se->charno
1149 fta_forbidden = false;
1150 operand_list = se->get_operands();
1151 vector<scalarexp_t *> new_operands;
1152 vector<bool> forbidden_op;
1153 for(p=0;p<operand_list.size();p++){
1154 l_se = split_fta_se(operand_list[p], l_forbid, lfta_select_list, Ext_fcns);
1156 fta_forbidden |= l_forbid;
1157 new_operands.push_back(l_se);
1158 forbidden_op.push_back(l_forbid);
1159 dt_signature.push_back(operand_list[p]->get_data_type() );
1162 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1164 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1166 for(o=0;o<operand_list.size();o++){
1167 if(o>0) fprintf(stderr,", ");
1168 fprintf(stderr,"%s",operand_list[o]->get_data_type()->get_type_str().c_str());
1170 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1171 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1175 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1177 // Replace the non-forbidden operands.
1178 // the forbidden ones are already replaced.
1180 for(p=0;p<new_operands.size();p++){
1181 if(! forbidden_op[p]){
1182 // if(new_operands[p]->get_data_type()->get_temporal() != constant_t){
1183 if(!is_literal_or_param_only(new_operands[p])){
1184 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],0);
1185 new_operands[p] = new_se;
1191 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1192 ret_se->use_decorations_of(se);
1198 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
1209 // The predicates have already been
1210 // broken into conjunctions.
1211 // If any part of a conjunction is fta-forbidden,
1212 // it must be executed in the stream operator.
1213 // Else it is executed in the FTA.
1214 // A pre-analysis should determine whether this
1215 // predicate is fta-safe. This procedure will
1216 // assume that it is fta-forbidden and will
1217 // prepare it for execution in the stream.
1221 predicate_t *split_fta_pr(predicate_t *pr,
1222 vector<select_element *> &lfta_select_list,
1223 ext_fcn_list *Ext_fcns
1226 vector<literal_t *> llist;
1227 scalarexp_t *se_l, *se_r;
1228 bool l_forbid, r_forbid;
1229 predicate_t *ret_pr, *pr_l, *pr_r;
1230 vector<scalarexp_t *> op_list, new_op_list;
1232 vector<data_type *> dt_signature;
1235 switch(pr->get_operator_type()){
1237 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1240 if(!is_literal_or_param_only(se_l)){
1241 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1245 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1250 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1252 if(!is_literal_or_param_only(se_l)){
1253 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1258 se_r = split_fta_se(pr->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1260 if(!is_literal_or_param_only(se_r)){
1261 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,0);
1266 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1270 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1271 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1274 case PRED_BINARY_OP:
1275 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1276 pr_r = split_fta_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1277 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1281 // I can't push the predicate into the lfta, except by
1282 // returning a bool value, and that is not worth the trouble,
1283 op_list = pr->get_op_list();
1284 for(o=0;o<op_list.size();++o){
1285 se_l = split_fta_se(op_list[o],l_forbid,lfta_select_list,Ext_fcns);
1287 if(!is_literal_or_param_only(se_l)){
1288 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1292 new_op_list.push_back(se_l);
1295 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1296 ret_pr->set_fcn_id(pr->get_fcn_id());
1299 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1300 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1311 //--------------------------------------------------------------------
1315 // Split a scalar expression into one part which executes
1316 // at the stream and another set of parts which execute
1318 // Because I'm actually modifying the SEs, I will make
1319 // copies. But I will assume that literals, params, and
1320 // colrefs are immutable at this point.
1321 // (if there is ever a need to change one, must make a
1323 // NOTE : if se is constant (only refrences literals),
1324 // avoid making the fta compute it.
1326 // NOTE : This will need to be generalized to
1327 // handle join expressions, namely to handle a vector
1330 // Return value is the HFTA se.
1331 // Add lftas select_elements to the fta_select_list.
1332 // set fta_forbidden if this node or any child cannot
1333 // execute at the lfta.
1335 #define SPLIT_FTAVEC_NOTBLVAR -1
1336 #define SPLIT_FTAVEC_MIXED -2
1338 bool is_PROTOCOL_source(int colref_source,
1339 vector< vector<select_element *> *> &lfta_select_list){
1340 if(colref_source>=0 && lfta_select_list[colref_source]!=NULL) return true;
1344 int combine_colref_source(int s1, int s2){
1345 if(s1==s2) return(s1);
1346 if(s1==SPLIT_FTAVEC_NOTBLVAR) return s2;
1347 if(s2==SPLIT_FTAVEC_NOTBLVAR) return s1;
1348 return SPLIT_FTAVEC_MIXED;
1351 scalarexp_t *split_ftavec_se(
1352 scalarexp_t *se, // the SE to split
1353 bool &fta_forbidden, // return true if some part of se
1355 int &colref_source, // the tblvar which sources the
1356 // colref, or NOTBLVAR, or MIXED
1357 vector< vector<select_element *> *> &lfta_select_list,
1358 // NULL if the tblvar is not PROTOCOL,
1359 // else build the select list.
1360 ext_fcn_list *Ext_fcns // is the fcn lfta-safe?
1362 // Return value is the HFTA SE, unless fta_forbidden is true and
1363 // colref_source>=0 and the indicated source is PROTOCOL.
1364 // In that case no split was done, the make_fta_se_ref must
1365 // be done by the caller.
1368 vector<scalarexp_t *> operand_list;
1369 vector<data_type *> dt_signature;
1370 scalarexp_t *ret_se, *l_se, *r_se;
1371 bool l_forbid, r_forbid, this_forbid;
1372 int l_csource, r_csource, this_csource;
1374 scalarexp_t *new_se;
1375 data_type *dt = se->get_data_type();
1377 switch(se->get_operator_type()){
1379 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1380 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1381 ret_se = new scalarexp_t(se->get_literal());
1382 ret_se->use_decorations_of(se);
1386 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1387 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1388 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1389 ret_se->use_decorations_of(se);
1392 case SE_IFACE_PARAM:
1393 fta_forbidden = false;
1394 colref_source = se->get_ifpref()->get_tablevar_ref();
1395 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1396 ret_se->use_decorations_of(se);
1400 // No colref should be forbidden,
1401 // the schema is wrong, the fta_legal_type() fcn is wrong,
1402 // or the source table is actually a stream.
1403 // Issue a warning, but proceed with processing.
1404 // Also, should not be a ref to a gbvar.
1405 // (a gbvar ref only occurs in an aggregation node,
1406 // and these SEs are rehomed, not split.
1407 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1408 colref_source = se->get_colref()->get_tablevar_ref();
1410 if(fta_forbidden && is_PROTOCOL_source(colref_source, lfta_select_list)){
1411 fprintf(stderr,"WARNING, a PROTOCOL colref is a forbidden data type in split_ftavec_se,"
1413 " type is %s, line=%d, col=%d\n",
1414 se->get_colref()->to_string().c_str(),
1415 se->get_data_type()->to_string().c_str(),
1416 se->lineno, se->charno
1421 fta_forbidden = true; // eval in hfta. ASSUME make copy as below.
1424 ret_se = new scalarexp_t(se->get_colref());
1425 ret_se->use_decorations_of(se);
1429 l_se = split_ftavec_se(se->get_left_se(), l_forbid, colref_source, lfta_select_list, Ext_fcns);
1431 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1433 // If this operation is forbidden but the child SE is not,
1434 // AND the colref source in the se is a single PROTOCOL source
1435 // put the child se on the lfta_select_list, create a colref
1436 // which accesses this se, and make it the child of this op.
1437 // Exception : the child se is constant (only literal refs).
1438 // TODO: I think the exception is expressed by is_PROTOCOL_source
1439 if(this_forbid && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list)){
1440 if(!is_literal_or_param_only(l_se)){
1441 new_se = make_fta_se_ref(lfta_select_list, l_se,colref_source);
1442 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1445 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1447 ret_se->use_decorations_of(se);
1448 fta_forbidden = this_forbid | l_forbid;
1452 l_se = split_ftavec_se(se->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1453 r_se = split_ftavec_se(se->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1455 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1456 colref_source=combine_colref_source(l_csource, r_csource);
1458 // Replace the left se if the parent must be hfta but the child can
1459 // be lfta. This translates to
1460 // a) result is PROTOCOL and forbidden, but left SE is not forbidden
1461 // OR b) if result is mixed but the left se is PROTOCOL, not forbidden
1462 if( ((this_forbid || r_forbid) && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1463 (colref_source==SPLIT_FTAVEC_MIXED && !l_forbid &&
1464 is_PROTOCOL_source(l_csource, lfta_select_list)) ){
1465 if(!is_literal_or_param_only(l_se)){
1466 new_se = make_fta_se_ref(lfta_select_list, l_se,l_csource);
1471 // same logic as for right se.
1472 if( ((this_forbid || l_forbid) && !r_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1473 (colref_source==SPLIT_FTAVEC_MIXED && !r_forbid &&
1474 is_PROTOCOL_source(r_csource, lfta_select_list)) ){
1475 if(!is_literal_or_param_only(r_se)){
1476 new_se = make_fta_se_ref(lfta_select_list, r_se,r_csource);
1481 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1482 ret_se->use_decorations_of(se);
1483 fta_forbidden = this_forbid || r_forbid || l_forbid;
1490 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_ftavec_se."
1491 " line=%d, col=%d\n",
1492 se->get_op().c_str(),
1493 se->lineno, se->charno
1500 operand_list = se->get_operands();
1501 vector<scalarexp_t *> new_operands;
1502 vector<bool> forbidden_op;
1503 vector<int> csource;
1505 fta_forbidden = false;
1506 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1507 for(p=0;p<operand_list.size();p++){
1508 l_se = split_ftavec_se(operand_list[p], l_forbid, l_csource, lfta_select_list, Ext_fcns);
1510 fta_forbidden |= l_forbid;
1511 colref_source = combine_colref_source(colref_source, l_csource);
1512 new_operands.push_back(l_se);
1513 forbidden_op.push_back(l_forbid);
1514 csource.push_back(l_csource);
1515 dt_signature.push_back(operand_list[p]->get_data_type() );
1518 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1520 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1522 for(o=0;o<operand_list.size();o++){
1523 if(o>0) fprintf(stderr,", ");
1524 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
1526 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1527 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1531 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1533 // Replace the non-forbidden operands.
1534 // the forbidden ones are already replaced.
1535 if(fta_forbidden || colref_source == SPLIT_FTAVEC_MIXED){
1536 for(p=0;p<new_operands.size();p++){
1537 if(! forbidden_op[p] && is_PROTOCOL_source(csource[p], lfta_select_list)){
1538 if(!is_literal_or_param_only(new_operands[p])){
1539 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],csource[p]);
1540 new_operands[p] = new_se;
1546 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1547 ret_se->use_decorations_of(se);
1553 printf("INTERNAL ERROR in split_ftavec_se: operator type %d\n",se->get_operator_type());
1562 // The predicates have already been
1563 // broken into conjunctions.
1564 // If any part of a conjunction is fta-forbidden,
1565 // it must be executed in the stream operator.
1566 // Else it is executed in the FTA.
1567 // A pre-analysis should determine whether this
1568 // predicate is fta-safe. This procedure will
1569 // assume that it is fta-forbidden and will
1570 // prepare it for execution in the stream.
1572 predicate_t *split_ftavec_pr(predicate_t *pr,
1573 vector< vector<select_element *> *> &lfta_select_list,
1574 ext_fcn_list *Ext_fcns
1577 vector<literal_t *> llist;
1578 scalarexp_t *se_l, *se_r;
1579 bool l_forbid, r_forbid;
1580 int l_csource, r_csource;
1581 predicate_t *ret_pr, *pr_l, *pr_r;
1582 vector<scalarexp_t *> op_list, new_op_list;
1584 vector<data_type *> dt_signature;
1587 switch(pr->get_operator_type()){
1589 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1591 // TODO: checking that the se is a PROTOCOL source should
1592 // take care of literal_or_param_only.
1593 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1594 if(!is_literal_or_param_only(se_l)){
1595 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1599 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1604 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1605 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1606 if(!is_literal_or_param_only(se_l)){
1607 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1612 se_r = split_ftavec_se(pr->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1613 if(!r_forbid && is_PROTOCOL_source(r_csource, lfta_select_list)){
1614 if(!is_literal_or_param_only(se_r)){
1615 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,r_csource);
1620 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1624 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1625 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1628 case PRED_BINARY_OP:
1629 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1630 pr_r = split_ftavec_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1631 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1635 // I can't push the predicate into the lfta, except by
1636 // returning a bool value, and that is not worth the trouble,
1637 op_list = pr->get_op_list();
1638 for(o=0;o<op_list.size();++o){
1639 se_l = split_ftavec_se(op_list[o],l_forbid,l_csource,lfta_select_list,Ext_fcns);
1640 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1641 if(!is_literal_or_param_only(se_l)){
1642 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1646 new_op_list.push_back(se_l);
1649 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1650 ret_pr->set_fcn_id(pr->get_fcn_id());
1653 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1654 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1664 ////////////////////////////////////////////////////////////////////////
1665 /// rehome_hfta_se rehome_hfta_pr
1666 /// This is use to split an sgah operator (aggregation),
1667 /// I just need to make gb, aggr references point to the
1668 /// new gb, aggr table entries.
1671 scalarexp_t *rehome_fta_se(scalarexp_t *se,
1672 map< int, scalarexp_t * > *aggr_map
1677 vector<scalarexp_t *> operand_list;
1678 scalarexp_t *ret_se, *l_se, *r_se;
1680 scalarexp_t *new_se;
1681 data_type *dt = se->get_data_type();
1682 vector<scalarexp_t *> new_operands;
1684 switch(se->get_operator_type()){
1686 ret_se = new scalarexp_t(se->get_literal());
1687 ret_se->use_decorations_of(se);
1691 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1692 ret_se->use_decorations_of(se);
1695 case SE_IFACE_PARAM:
1696 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1697 ret_se->use_decorations_of(se);
1703 // Must be a GB REF ...
1704 // I'm assuming that the hfta gbvar table has the
1705 // same sequence of entries as the input query's gbvar table.
1706 // Else I'll need some kind of translation table.
1709 fprintf(stderr,"WARNING, a colref is not a gbver ref in rehome_hfta_se"
1710 " type is %s, line=%d, col=%d\n",
1711 se->get_data_type()->to_string().c_str(),
1712 se->lineno, se->charno
1716 ret_se = new scalarexp_t(se->get_colref());
1717 ret_se->use_decorations_of(se); // just inherit the gbref
1721 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1723 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1724 ret_se->use_decorations_of(se);
1728 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1729 r_se = rehome_fta_se(se->get_right_se(), aggr_map);
1731 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1732 ret_se->use_decorations_of(se);
1738 agr_id = se->get_aggr_ref();
1739 return (*aggr_map)[agr_id];
1743 agr_id = se->get_aggr_ref();
1744 if(agr_id >= 0) return (*aggr_map)[agr_id];
1746 operand_list = se->get_operands();
1747 for(p=0;p<operand_list.size();p++){
1748 l_se = rehome_fta_se(operand_list[p], aggr_map);
1750 new_operands.push_back(l_se);
1754 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1755 ret_se->use_decorations_of(se);
1760 printf("INTERNAL ERROR in rehome_fta_se: operator type %d\n",se->get_operator_type());
1769 // The predicates have already been
1770 // broken into conjunctions.
1771 // If any part of a conjunction is fta-forbidden,
1772 // it must be executed in the stream operator.
1773 // Else it is executed in the FTA.
1774 // A pre-analysis should determine whether this
1775 // predicate is fta-safe. This procedure will
1776 // assume that it is fta-forbidden and will
1777 // prepare it for execution in the stream.
1779 predicate_t *rehome_fta_pr(predicate_t *pr,
1780 map<int, scalarexp_t *> *aggr_map
1783 vector<literal_t *> llist;
1784 scalarexp_t *se_l, *se_r;
1785 predicate_t *ret_pr, *pr_l, *pr_r;
1786 vector<scalarexp_t *> op_list, new_op_list;
1789 switch(pr->get_operator_type()){
1791 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1792 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1796 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1797 se_r = rehome_fta_se(pr->get_right_se(), aggr_map);
1798 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1802 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1803 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1806 case PRED_BINARY_OP:
1807 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1808 pr_r = rehome_fta_pr(pr->get_right_pr(), aggr_map);
1809 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1813 op_list = pr->get_op_list();
1814 for(o=0;o<op_list.size();++o){
1815 se_l = rehome_fta_se(op_list[o], aggr_map);
1816 new_op_list.push_back(se_l);
1818 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1819 ret_pr->set_fcn_id(pr->get_fcn_id());
1823 fprintf(stderr,"INTERNAL ERROR in rehome_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1824 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1833 ////////////////////////////////////////////////////////////////////
1834 ///////////////// Create a STREAM table to represent the FTA output.
1836 table_def *create_attributes(string tname, vector<select_element *> &select_list){
1840 // Create a new STREAM schema for the output of the FTA.
1842 field_entry_list *fel = new field_entry_list();
1844 for(s=0;s<select_list.size();s++){
1845 scalarexp_t *sel_se = select_list[s]->se;
1846 data_type *dt = sel_se->get_data_type();
1848 // Grab the annotations of the field.
1849 // As of this writing, the only meaningful annotations
1850 // are whether or not the attribute is temporal.
1851 // There can be an annotation of constant_t, but
1852 // I'll ignore this, it feels like an unsafe assumption
1853 param_list *plist = new param_list();
1854 // if(dt->is_temporal()){
1855 vector<string> param_strings = dt->get_param_keys();
1857 for(p=0;p<param_strings.size();++p){
1858 string v = dt->get_param_val(param_strings[p]);
1860 plist->append(param_strings[p].c_str(),v.c_str());
1862 plist->append(param_strings[p].c_str());
1866 // char access_fcn_name[500];
1867 string colname = select_list[s]->name;
1868 // sprintf(access_fcn_name,"get_field_%s",colname.c_str());
1869 string access_fcn_name = "get_field_"+colname;
1870 field_entry *fe = new field_entry(
1871 dt->get_type_str(), colname, access_fcn_name, plist, ufcns
1874 fel->append_field(fe);
1877 table_def *fta_tbl = new table_def(
1878 tname.c_str(), NULL, NULL, fel, STREAM_SCHEMA
1885 //------------------------------------------------------------------
1886 // Textual representation of the query node.
1890 string spx_qpn::to_query_string(){
1892 string ret = "Select ";
1894 for(s=0;s<select_list.size();s++){
1896 ret += se_to_query_string(select_list[s]->se, NULL);
1897 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1901 ret += "From "+table_name->to_string()+"\n";
1903 if(where.size() > 0){
1906 for(w=0;w<where.size();w++){
1907 if(w>0) ret += " AND ";
1908 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
1919 string sgah_qpn::to_query_string(){
1921 string ret = "Select ";
1923 for(s=0;s<select_list.size();s++){
1925 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1926 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1930 ret += "From "+table_name->to_string()+"\n";
1932 if(where.size() > 0){
1935 for(w=0;w<where.size();w++){
1936 if(w>0) ret += " AND ";
1937 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1942 if(gb_tbl.size() > 0){
1945 if(gb_tbl.gb_patterns.size() <= 1 || gb_tbl.gb_entry_type.size()==0){
1946 for(g=0;g<gb_tbl.size();g++){
1947 if(g>0) ret += ", ";
1948 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1949 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
1951 ret += gb_tbl.get_name(g);
1955 for(g=0;g<gb_tbl.gb_entry_type.size();++g){
1956 if(g>0) ret += ", ";
1957 if(gb_tbl.gb_entry_type[g] == ""){
1958 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+
1959 " AS "+ gb_tbl.get_name(gb_pos);
1962 if(gb_tbl.gb_entry_type[g] == "CUBE" ||
1963 gb_tbl.gb_entry_type[g] == "ROLLUP"){
1964 ret += gb_tbl.gb_entry_type[g] + "(";
1966 for(gg=0;gg<gb_tbl.gb_entry_count[g];++gg){
1967 if(gg>0) ret += ", ";
1968 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+ " AS "+ gb_tbl.get_name(gb_pos);
1973 if(gb_tbl.gb_entry_type[g] == "GROUPING_SETS"){
1974 ret += gb_tbl.gb_entry_type[g] + "(";
1976 vector<vector<bool> > &local_components = gb_tbl.pattern_components[g];
1977 for(g1=0;g1<local_components.size();++g1){
1979 bool first_field = true;
1981 for(g2=0;g2<=gb_tbl.gb_entry_count[g];g2++){
1982 if(local_components[g1][g2]){
1983 if(!first_field) ret+=", ";
1984 else first_field = false;
1985 ret += gb_tbl.get_name(gb_pos+g2);
1991 gb_pos += gb_tbl.gb_entry_count[g];
1998 if(having.size() > 0){
2001 for(h=0;h<having.size();h++){
2002 if(h>0) ret += " AND ";
2003 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2012 string rsgah_qpn::to_query_string(){
2014 string ret = "Select ";
2016 for(s=0;s<select_list.size();s++){
2018 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2019 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2023 ret += "From "+table_name->to_string()+"\n";
2025 if(where.size() > 0){
2028 for(w=0;w<where.size();w++){
2029 if(w>0) ret += " AND ";
2030 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2035 if(gb_tbl.size() > 0){
2038 for(g=0;g<gb_tbl.size();g++){
2039 if(g>0) ret += ", ";
2040 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2041 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl)+" AS ";
2043 ret += gb_tbl.get_name(g);
2048 if(having.size() > 0){
2051 for(h=0;h<having.size();h++){
2052 if(h>0) ret += " AND ";
2053 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2058 if(closing_when.size() > 0){
2059 ret += "Closing_When ";
2061 for(h=0;h<closing_when.size();h++){
2062 if(h>0) ret += " AND ";
2063 ret += "(" + pred_to_query_str(closing_when[h]->pr,&aggr_tbl) + ")";
2072 string sgahcwcb_qpn::to_query_string(){
2074 string ret = "Select ";
2076 for(s=0;s<select_list.size();s++){
2078 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2079 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2083 ret += "From "+table_name->to_string()+"\n";
2085 if(where.size() > 0){
2088 for(w=0;w<where.size();w++){
2089 if(w>0) ret += " AND ";
2090 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2095 if(gb_tbl.size() > 0){
2098 for(g=0;g<gb_tbl.size();g++){
2099 if(g>0) ret += ", ";
2100 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2101 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
2103 ret += gb_tbl.get_name(g);
2108 if(sg_tbl.size() > 0){
2109 ret += "Supergroup ";
2111 bool first_elem = true;
2112 for(g=0;g<gb_tbl.size();g++){
2113 if(sg_tbl.count(g)){
2118 ret += gb_tbl.get_name(g);
2124 if(having.size() > 0){
2127 for(h=0;h<having.size();h++){
2128 if(h>0) ret += " AND ";
2129 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2135 if(cleanwhen.size() > 0){
2136 ret += "Cleaning_When ";
2138 for(h=0;h<cleanwhen.size();h++){
2139 if(h>0) ret += " AND ";
2140 ret += "(" + pred_to_query_str(cleanwhen[h]->pr,&aggr_tbl) + ")";
2145 if(cleanby.size() > 0){
2146 ret += "Cleaning_By ";
2148 for(h=0;h<cleanby.size();h++){
2149 if(h>0) ret += " AND ";
2150 ret += "(" + pred_to_query_str(cleanby[h]->pr,&aggr_tbl) + ")";
2158 string watch_tbl_qpn::to_query_string(){
2160 // ret += "DEFINE {\n";
2161 // ret += "\tfilename='"+filename+";\n";
2162 // ret += "\trefresh_interval="+to_string(refresh_interval)+";\n}\n";
2163 ret += "WATCHLIST FIELDS {\n";
2164 std::vector<field_entry *> fields = table_layout->get_fields();
2165 for(int f=0;f<fields.size();++f){
2166 ret += fields[f]->to_string()+"\n";
2173 string mrg_qpn::to_query_string(){
2175 string ret="Merge ";
2176 ret += mvars[0]->to_query_string() + " : " + mvars[1]->to_query_string();
2178 ret += " SLACK "+se_to_query_string(slack, NULL);
2183 for(t=0;t<fm.size();++t){
2184 if(t>0) ret += ", ";
2185 ret += fm[t]->to_string();
2192 string join_eq_hash_qpn::to_query_string(){
2194 string ret = "Select ";
2196 for(s=0;s<select_list.size();s++){
2198 ret += se_to_query_string(select_list[s]->se, NULL);
2199 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2203 // NOTE: assuming binary join.
2204 int properties = from[0]->get_property()+2*from[1]->get_property();
2207 ret += "INNER_JOIN ";
2210 ret += "LEFT_OUTER_JOIN ";
2213 ret += "RIGHT_OUTER_JOIN ";
2216 ret += "OUTER_JOIN ";
2222 for(f=0;f<from.size();++f){
2224 ret += from[f]->to_string();
2228 if(where.size() > 0){
2231 for(w=0;w<where.size();w++){
2232 if(w>0) ret += " AND ";
2233 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2241 string filter_join_qpn::to_query_string(){
2243 string ret = "Select ";
2245 for(s=0;s<select_list.size();s++){
2247 ret += se_to_query_string(select_list[s]->se, NULL);
2248 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2252 // NOTE: assuming binary join.
2253 ret += "FILTER_JOIN("+temporal_var->field+","+int_to_string(temporal_range)+") ";
2257 for(f=0;f<from.size();++f){
2259 ret += from[f]->to_string();
2263 if(where.size() > 0){
2266 for(w=0;w<where.size();w++){
2267 if(w>0) ret += " AND ";
2268 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2276 string watch_join_qpn::to_query_string(){
2278 string ret = "Select ";
2280 for(s=0;s<select_list.size();s++){
2282 ret += se_to_query_string(select_list[s]->se, NULL);
2283 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2287 // NOTE: assuming binary join.
2288 ret += "WATCHLIST_JOIN ";
2292 for(f=0;f<from.size();++f){
2294 ret += from[f]->to_string();
2298 if(where.size() > 0){
2301 for(w=0;w<where.size();w++){
2302 if(w>0) ret += " AND ";
2303 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2313 // -----------------------------------------------------------------
2314 // Query node subclass specific processing.
2317 vector<mrg_qpn *> mrg_qpn::split_sources(){
2318 vector<mrg_qpn *> ret;
2322 if(fm.size() != mvars.size()){
2323 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources. fm.size() = %lu, mvars.size() = %lu\n",fm.size(),mvars.size());
2327 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources, fm size is 1.\n");
2333 printf("spliting sources merge node, name = %s, %d sources.\n\t",node_name.c_str(), fm.size());
2334 for(ff=0;ff<fm.size();++ff){
2335 printf("%s ",fm[ff]->to_string().c_str());
2340 // Handle special cases.
2342 ret.push_back(this);
2347 mrg_qpn *new_mrg = (mrg_qpn *)this->make_copy("_cH1");
2348 new_mrg->fm.push_back(this->fm[0]);
2349 new_mrg->fm.push_back(this->fm[1]);
2350 new_mrg->mvars.push_back(this->mvars[0]);
2351 new_mrg->mvars.push_back(this->mvars[1]);
2353 this->fm.erase(this->fm.begin());
2354 this->mvars.erase(this->mvars.begin());
2355 string vname = fm[0]->get_var_name();
2356 this->fm[0] = new tablevar_t(new_mrg->node_name.c_str());
2357 this->fm[0]->set_range_var(vname);
2358 this->mvars[0]->set_field(table_layout->get_field_name(merge_fieldpos));
2359 this->mvars[0]->set_tablevar_ref(0);
2360 this->mvars[1]->set_tablevar_ref(1);
2362 ret.push_back(new_mrg);
2363 ret.push_back(this);
2366 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg->node_name.c_str(),this->node_name.c_str());
2367 for(i=0;i<new_mrg->fm.size();++i)
2368 printf("\tsource %s var %d (%s, %s) \n",new_mrg->node_name.c_str(),i,new_mrg->fm[i]->to_string().c_str(), new_mrg->mvars[i]->to_string().c_str());
2369 for(i=0;i<this->fm.size();++i)
2370 printf("\tsource %s var %d (%s, %s) \n",this->node_name.c_str(),i,this->fm[i]->to_string().c_str(), this->mvars[i]->to_string().c_str());
2377 // divide up the sources between two children.
2378 // Then, recurse on the children.
2380 mrg_qpn *new_mrg1 = (mrg_qpn *)this->make_copy("_cH1");
2381 mrg_qpn *new_mrg2 = (mrg_qpn *)this->make_copy("_cH2");
2382 for(i=0;i<this->fm.size()/2;++i){
2383 new_mrg1->fm.push_back(this->fm[i]);
2384 new_mrg1->mvars.push_back(this->mvars[i]);
2385 //printf("Pushing %d (%s, %s) to new_mrg1\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2387 for(;i<this->fm.size();++i){
2388 new_mrg2->fm.push_back(this->fm[i]);
2389 new_mrg2->mvars.push_back(this->mvars[i]);
2390 //printf("Pushing %d (%s, %s) to new_mrg2\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2392 for(i=0;i<new_mrg1->mvars.size();++i)
2393 new_mrg1->mvars[i]->set_tablevar_ref(i);
2394 for(i=0;i<new_mrg2->mvars.size();++i)
2395 new_mrg2->mvars[i]->set_tablevar_ref(i);
2397 // Children created, make this merge them.
2401 tablevar_t *tmp_tblvar = new tablevar_t(new_mrg1->node_name.c_str());
2402 tmp_tblvar->set_range_var("_mrg_var_1");
2403 fm.push_back(tmp_tblvar);
2404 colref_t *tmp_cref = new colref_t("_mrg_var_1",table_layout->get_field_name(merge_fieldpos).c_str());
2405 tmp_cref->set_tablevar_ref(0);
2406 mvars.push_back(tmp_cref);
2408 tmp_tblvar = new tablevar_t(new_mrg2->node_name.c_str());
2409 tmp_tblvar->set_range_var("_mrg_var_2");
2410 fm.push_back(tmp_tblvar);
2411 tmp_cref = new colref_t("_mrg_var_2",table_layout->get_field_name(merge_fieldpos).c_str());
2412 tmp_cref->set_tablevar_ref(1);
2413 mvars.push_back(tmp_cref);
2417 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg1->node_name.c_str(),new_mrg2->node_name.c_str());
2418 for(i=0;i<new_mrg1->fm.size();++i)
2419 printf("\tsource %s var %d (%s, %s) \n",new_mrg1->node_name.c_str(),i,new_mrg1->fm[i]->to_string().c_str(), new_mrg1->mvars[i]->to_string().c_str());
2420 for(i=0;i<new_mrg2->fm.size();++i)
2421 printf("\tsource %s var %d (%s, %s) \n",new_mrg2->node_name.c_str(),i,new_mrg2->fm[i]->to_string().c_str(), new_mrg2->mvars[i]->to_string().c_str());
2424 // Recurse and put them together
2425 vector<mrg_qpn *> st1 = new_mrg1->split_sources();
2426 ret.insert(ret.end(), st1.begin(), st1.end());
2427 vector<mrg_qpn *> st2 = new_mrg2->split_sources();
2428 ret.insert(ret.end(), st2.begin(), st2.end());
2430 ret.push_back(this);
2438 //////// Split helper function : resolve interfaces
2440 vector<pair<string,string> > get_ifaces(tablevar_t *table, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2441 vector<pair<string,string> > basic_ifaces;
2443 if(table->get_ifq()){
2444 basic_ifaces= ifdb->eval(table->get_interface(),ierr);
2446 fprintf(stderr,"ERROR, Interface set %s not found.\n",table->get_interface().c_str());
2449 fprintf(stderr,"ERROR, interface definition file didn't parse.\n");
2452 basic_ifaces.push_back(make_pair(table->get_machine(), table->get_interface()));
2455 if(n_virtual_ifaces == 1)
2456 return basic_ifaces;
2458 int stride = n_virtual_ifaces / hfta_parallelism;
2460 vector<pair<string,string> > ifaces;
2462 for(i=0;i<basic_ifaces.size();++i){
2463 string mach = basic_ifaces[i].first;
2464 string iface = basic_ifaces[i].second;
2465 for(s=hfta_idx*stride;s<(hfta_idx+1)*stride;++s){
2466 ifaces.push_back(pair<string, string>(mach,iface+"X"+int_to_string(2*s)));
2474 ///////// Split helper function : compute slack in a generated
2477 void mrg_qpn::resolve_slack(scalarexp_t *t_se, string fname, vector<pair<string, string> > &sources, ifq_t *ifdb, gb_table *gbt){
2481 // Find slack divisor, if any.
2483 long long int slack_divisor = find_temporal_divisor(t_se,gbt, fnm);
2484 if(slack_divisor <= 0){
2489 // find max slack in the iface spec
2490 long long int max_slacker = 0, this_slacker;
2491 string rname = "Slack_"+fnm;
2492 for(s=0;s<sources.size();++s){
2493 string src_machine = sources[s].first;
2494 string src_iface = sources[s].second;
2495 vector<string> slack_vec = ifdb->get_iface_vals(src_machine, src_iface,rname,e,es);
2496 for(v=0;v<slack_vec.size();++v){
2497 if(sscanf(slack_vec[v].c_str(),"%qd",&this_slacker)){
2498 if(this_slacker > max_slacker)
2499 max_slacker = this_slacker;
2504 if(max_slacker <= 0){
2510 long long int the_slack=(long long int)(ceil(((double)max_slacker)/((double)slack_divisor)));
2512 sprintf(tmps,"%lld",the_slack);
2513 literal_t *slack_lit = new literal_t(tmps, LITERAL_LONGINT);
2514 slack = new scalarexp_t(slack_lit);
2518 //------------------------------------------------------------------
2519 // split a node to extract LFTA components.
2521 vector<qp_node *> watch_tbl_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2522 // nothing to do, nothing to split, return copy of self.
2526 vector<qp_node *> ret_vec;
2528 ret_vec.push_back(this);
2534 vector<qp_node *> mrg_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2535 // nothing to do, nothing to split, return copy of self.
2539 vector<qp_node *> ret_vec;
2541 ret_vec.push_back(this);
2546 vector<qp_node *> filter_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2547 vector<qp_node *> ret_vec;
2549 // First check if the query can be pushed to the FTA.
2552 for(s=0;s<select_list.size();s++){
2553 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2556 for(p=0;p<where.size();p++){
2557 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2561 fprintf(stderr,"ERROR, filter join %s is fta-unsafe.\n",node_name.c_str());
2565 // Can it be done in a single lfta?
2566 // Get the set of interfaces it accesses.
2569 vector<string> sel_names;
2570 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2571 if (ifaces.empty()) {
2572 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2576 if(ifaces.size() == 1){
2577 // Single interface, no need to merge.
2579 ret_vec.push_back(this);
2581 for(i=0;i<from.size();i++){
2582 from[i]->set_machine(ifaces[0].first);
2583 from[i]->set_interface(ifaces[0].second);
2584 from[i]->set_ifq(false);
2588 // Multiple interfaces, generate the interface-specific queries plus
2592 vector<string> sel_names;
2593 for(si=0;si<ifaces.size();++si){
2594 filter_join_qpn *fta_node = new filter_join_qpn();
2597 if(ifaces.size()==1)
2598 fta_node->set_node_name( node_name );
2600 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2602 fta_node->set_node_name(new_name);
2604 sel_names.push_back(fta_node->get_node_name());
2608 for(f=0;f<from.size();f++){
2609 fta_node->from.push_back(from[f]->duplicate());
2610 fta_node->from[f]->set_machine(ifaces[si].first);
2611 fta_node->from[f]->set_interface(ifaces[si].second);
2612 fta_node->from[f]->set_ifq(false);
2614 fta_node->temporal_var = temporal_var;
2615 fta_node->temporal_range = temporal_range;
2617 fta_node->use_bloom = use_bloom;
2619 for(s=0;s<select_list.size();s++){
2620 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2623 for(p=0;p<shared_pred.size();p++){
2624 predicate_t *new_pr = dup_pr(shared_pred[p]->pr, NULL);
2625 cnf_elem *new_cnf = new cnf_elem(new_pr);
2626 analyze_cnf(new_cnf);
2627 fta_node->shared_pred.push_back(new_cnf);
2628 fta_node->where.push_back(new_cnf);
2630 for(p=0;p<pred_t0.size();p++){
2631 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2632 cnf_elem *new_cnf = new cnf_elem(new_pr);
2633 analyze_cnf(new_cnf);
2634 fta_node->pred_t0.push_back(new_cnf);
2635 fta_node->where.push_back(new_cnf);
2637 for(p=0;p<pred_t1.size();p++){
2638 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2639 cnf_elem *new_cnf = new cnf_elem(new_pr);
2640 analyze_cnf(new_cnf);
2641 fta_node->pred_t1.push_back(new_cnf);
2642 fta_node->where.push_back(new_cnf);
2644 for(p=0;p<hash_eq.size();p++){
2645 predicate_t *new_pr = dup_pr(hash_eq[p]->pr, NULL);
2646 cnf_elem *new_cnf = new cnf_elem(new_pr);
2647 analyze_cnf(new_cnf);
2648 fta_node->hash_eq.push_back(new_cnf);
2649 fta_node->where.push_back(new_cnf);
2651 for(p=0;p<postfilter.size();p++){
2652 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2653 cnf_elem *new_cnf = new cnf_elem(new_pr);
2654 analyze_cnf(new_cnf);
2655 fta_node->postfilter.push_back(new_cnf);
2656 fta_node->where.push_back(new_cnf);
2659 // Xfer all of the parameters.
2660 // Use existing handle annotations.
2661 vector<string> param_names = param_tbl->get_param_names();
2663 for(pi=0;pi<param_names.size();pi++){
2664 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2665 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2666 param_tbl->handle_access(param_names[pi]));
2668 fta_node->definitions = definitions;
2669 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2670 this->error_code = 3;
2674 ret_vec.push_back(fta_node);
2677 mrg_qpn *mrg_node = new mrg_qpn((filter_join_qpn *)ret_vec[0],
2678 node_name, sel_names,ifaces, ifdb);
2679 ret_vec.push_back(mrg_node);
2690 vector<qp_node *> watch_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2691 vector<qp_node *> ret_vec;
2693 // First check if the query can be pushed to the FTA.
2696 for(s=0;s<select_list.size();s++){
2697 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2700 for(p=0;p<where.size();p++){
2701 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2705 fprintf(stderr,"ERROR, watchlist join %s is fta-unsafe.\n",node_name.c_str());
2709 // Can it be done in a single lfta?
2710 // Get the set of interfaces it accesses.
2713 vector<string> sel_names;
2714 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2715 if (ifaces.empty()) {
2716 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2720 if(ifaces.size() == 1){
2721 // Single interface, no need to merge.
2723 ret_vec.push_back(this);
2725 // Treat the range vars a bit differently, the 2nd is reading from a _local_ watchlist.
2726 from[0]->set_machine(ifaces[0].first);
2727 from[0]->set_interface(ifaces[0].second);
2728 from[0]->set_ifq(false);
2730 from[1]->set_machine(ifaces[0].first);
2731 from[1]->set_interface("_local_");
2732 from[1]->set_ifq(false);
2736 // Multiple interfaces, generate the interface-specific queries plus
2740 vector<string> sel_names;
2741 for(si=0;si<ifaces.size();++si){
2742 watch_join_qpn *fta_node = new watch_join_qpn();
2745 if(ifaces.size()==1)
2746 fta_node->set_node_name( node_name );
2748 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2750 fta_node->set_node_name(new_name);
2752 sel_names.push_back(fta_node->get_node_name());
2756 for(f=0;f<from.size();f++){
2757 fta_node->from.push_back(from[f]->duplicate());
2758 fta_node->from[f]->set_machine(ifaces[si].first);
2760 fta_node->from[f]->set_interface(ifaces[si].second);
2762 fta_node->from[f]->set_interface("_local_");
2763 fta_node->from[f]->set_ifq(false);
2766 for(s=0;s<select_list.size();s++){
2767 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2770 for(p=0;p<pred_t0.size();p++){
2771 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2772 cnf_elem *new_cnf = new cnf_elem(new_pr);
2773 analyze_cnf(new_cnf);
2774 fta_node->pred_t0.push_back(new_cnf);
2775 fta_node->where.push_back(new_cnf);
2777 for(p=0;p<pred_t1.size();p++){
2778 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2779 cnf_elem *new_cnf = new cnf_elem(new_pr);
2780 analyze_cnf(new_cnf);
2781 fta_node->pred_t1.push_back(new_cnf);
2782 fta_node->where.push_back(new_cnf);
2784 for(p=0;p<key_flds.size();p++){ // we've checked that all keys are covered
2785 string k = key_flds[p];
2786 predicate_t *new_pr = dup_pr(hash_eq[k]->pr, NULL);
2787 cnf_elem *new_cnf = new cnf_elem(new_pr);
2788 analyze_cnf(new_cnf);
2789 fta_node->hash_eq[k] = new_cnf;
2790 fta_node->where.push_back(new_cnf);
2792 for(p=0;p<join_filter.size();p++){
2793 predicate_t *new_pr = dup_pr(join_filter[p]->pr, NULL);
2794 cnf_elem *new_cnf = new cnf_elem(new_pr);
2795 analyze_cnf(new_cnf);
2796 fta_node->postfilter.push_back(new_cnf);
2797 fta_node->where.push_back(new_cnf);
2799 for(p=0;p<postfilter.size();p++){
2800 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2801 cnf_elem *new_cnf = new cnf_elem(new_pr);
2802 analyze_cnf(new_cnf);
2803 fta_node->postfilter.push_back(new_cnf);
2804 fta_node->where.push_back(new_cnf);
2806 fta_node->key_flds = key_flds;
2808 // Xfer all of the parameters.
2809 // Use existing handle annotations.
2810 vector<string> param_names = param_tbl->get_param_names();
2812 for(pi=0;pi<param_names.size();pi++){
2813 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2814 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2815 param_tbl->handle_access(param_names[pi]));
2817 fta_node->definitions = definitions;
2818 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2819 this->error_code = 3;
2823 ret_vec.push_back(fta_node);
2826 mrg_qpn *mrg_node = new mrg_qpn((watch_join_qpn *)ret_vec[0],
2827 node_name, sel_names,ifaces, ifdb);
2828 ret_vec.push_back(mrg_node);
2835 // Use to search for unresolved interface param refs in an hfta.
2837 int spx_qpn::count_ifp_refs(set<string> &ifpnames){
2840 for(i=0;i<select_list.size();++i)
2841 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2842 for(i=0;i<where.size();++i)
2843 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2847 int sgah_qpn::count_ifp_refs(set<string> &ifpnames){
2850 for(i=0;i<select_list.size();++i)
2851 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2852 for(i=0;i<where.size();++i)
2853 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2854 for(i=0;i<having.size();++i)
2855 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2856 for(i=0;i<aggr_tbl.size();++i){
2857 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2858 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2860 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2861 for(j=0;j<opl.size();++j)
2862 ret += count_se_ifp_refs(opl[j],ifpnames);
2865 for(i=0;i<gb_tbl.size();++i){
2866 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2872 int rsgah_qpn::count_ifp_refs(set<string> &ifpnames){
2875 for(i=0;i<select_list.size();++i)
2876 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2877 for(i=0;i<where.size();++i)
2878 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2879 for(i=0;i<having.size();++i)
2880 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2881 for(i=0;i<closing_when.size();++i)
2882 ret += count_pr_ifp_refs(closing_when[i]->pr,ifpnames);
2883 for(i=0;i<aggr_tbl.size();++i){
2884 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2885 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2887 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2888 for(j=0;j<opl.size();++j)
2889 ret += count_se_ifp_refs(opl[j],ifpnames);
2892 for(i=0;i<gb_tbl.size();++i){
2893 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2898 int watch_tbl_qpn::count_ifp_refs(set<string> &ifpnames){
2902 int mrg_qpn::count_ifp_refs(set<string> &ifpnames){
2906 int join_eq_hash_qpn::count_ifp_refs(set<string> &ifpnames){
2909 for(i=0;i<select_list.size();++i)
2910 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2911 for(i=0;i<prefilter[0].size();++i)
2912 ret += count_pr_ifp_refs(prefilter[0][i]->pr,ifpnames);
2913 for(i=0;i<prefilter[1].size();++i)
2914 ret += count_pr_ifp_refs(prefilter[1][i]->pr,ifpnames);
2915 for(i=0;i<temporal_eq.size();++i)
2916 ret += count_pr_ifp_refs(temporal_eq[i]->pr,ifpnames);
2917 for(i=0;i<hash_eq.size();++i)
2918 ret += count_pr_ifp_refs(hash_eq[i]->pr,ifpnames);
2919 for(i=0;i<postfilter.size();++i)
2920 ret += count_pr_ifp_refs(postfilter[i]->pr,ifpnames);
2924 int filter_join_qpn::count_ifp_refs(set<string> &ifpnames){
2927 for(i=0;i<select_list.size();++i)
2928 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2929 for(i=0;i<where.size();++i)
2930 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2934 int watch_join_qpn::count_ifp_refs(set<string> &ifpnames){
2937 for(i=0;i<select_list.size();++i)
2938 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2939 for(i=0;i<where.size();++i)
2940 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2946 // Resolve interface params to string literals
2947 int filter_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2950 string ifname = from[0]->get_interface();
2951 string ifmach = from[0]->get_machine();
2952 for(i=0;i<select_list.size();++i)
2953 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2955 for(i=0;i<where.size();++i)
2956 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2961 int watch_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2964 string ifname = from[0]->get_interface();
2965 string ifmach = from[0]->get_machine();
2966 for(i=0;i<select_list.size();++i)
2967 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2969 for(i=0;i<where.size();++i)
2970 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2976 int spx_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2979 string ifname = table_name->get_interface();
2980 string ifmach = table_name->get_machine();
2981 for(i=0;i<select_list.size();++i)
2982 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2984 for(i=0;i<where.size();++i)
2985 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2990 int sgah_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2993 string ifname = table_name->get_interface();
2994 string ifmach = table_name->get_machine();
2996 //printf("Select list has %d elements\n",select_list.size());
2997 for(i=0;i<select_list.size();++i){
2998 //printf("\tresolving elemet %d\n",i);
2999 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) ){
3003 for(i=0;i<where.size();++i){
3004 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err) )
3007 for(i=0;i<having.size();++i){
3008 if( resolve_pr_ifp_refs(having[i]->pr,ifmach, ifname, ifdb, err) )
3011 //printf("aggr list has %d elements\n",select_list.size());
3012 for(i=0;i<aggr_tbl.size();++i){
3013 //printf("\tresolving elemet %d\n",i);
3014 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
3015 //printf("\t\t\tbuiltin\n");
3016 if( resolve_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifmach, ifname, ifdb, err) )
3019 //printf("\t\t\tudaf\n");
3020 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
3021 for(j=0;j<opl.size();++j)
3022 if( resolve_se_ifp_refs(opl[j],ifmach, ifname, ifdb, err) )
3026 for(i=0;i<gb_tbl.size();++i){
3027 if( resolve_se_ifp_refs(gb_tbl.get_def(i), ifmach, ifname, ifdb, err) )
3036 SPLITTING A SELECTION_PROJECTION OPERATOR
3038 An SPX node may reference:
3039 literals, parameters, colrefs, functions, operators
3040 An SPX node may not reference:
3041 group-by variables, aggregates
3043 An SPX node contains
3044 selection list of SEs
3045 where list of CNF predicates
3048 If each selection SE and each where predicate is fta-safe
3049 execute entire operator as an LFTA.
3051 for each predicate in the where clause
3052 if it is fta safe, execute it in the lfta
3053 else, split each SE in the predicate, evaluate the
3054 top-level SEs in the hfta and eval the predicate on that.
3055 For each SE in the se list
3056 Split the SE, eval the high level part, push onto hfta
3060 A SE represents a value which must be computed. The LFTA
3061 must provide sub-values from which the HFTA can compute the
3063 1) the SE is fta-safe
3064 Create an entry in the selection list of the LFTA which is
3065 the SE itself. Reference this LFTA selection list entry in
3066 the HFTA (via a field name assigned to the lfta selection
3068 2) The SE is not fta-safe
3069 Determine the boundary between the fta-safe and the fta-unsafe
3070 portions of the SE. The result is a rooted tree (which is
3071 evaluated at the HFTA) which references sub-SEs (which are
3072 evaluated at the LFTA). Each of the sub-SEs is placed on
3073 the selection list of the LFTA and assigned field names,
3074 the top part is evaluated at the HFTA and references the
3075 sub-SEs through their assigned field names.
3076 The only SEs on the LFTA selection list are those created by
3077 the above mechanism. The collection of assigned field names becomes
3078 the schema of the LFTA.
3080 TODO: insert tablevar names into the colrefs.
3084 vector<qp_node *> spx_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3087 vector<qp_node *> ret_vec;
3089 // If the node reads from a stream, don't split.
3090 // int t = Schema->get_table_ref(table_name->get_schema_name());
3091 int t = table_name->get_schema_ref();
3092 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3094 ret_vec.push_back(this);
3099 // Get the set of interfaces it accesses.
3102 vector<string> sel_names;
3103 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3104 if (ifaces.empty()) {
3105 fprintf(stderr,"INTERNAL ERROR in spx_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3110 // The FTA node, it is always returned.
3112 spx_qpn *fta_node = new spx_qpn();
3113 fta_node->table_name = table_name;
3115 // for colname imputation
3116 // vector<string> fta_flds, stream_flds;
3119 // First check if the query can be pushed to the FTA.
3122 for(s=0;s<select_list.size();s++){
3123 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
3126 for(p=0;p<where.size();p++){
3127 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
3131 ////////////////////////////////////////////////////////////
3132 // The query can be executed entirely in the FTA.
3135 for(si=0;si<ifaces.size();++si){
3136 fta_node = new spx_qpn();
3139 if(ifaces.size()==1)
3140 fta_node->set_node_name( node_name );
3142 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3144 fta_node->set_node_name(new_name);
3146 sel_names.push_back(fta_node->get_node_name());
3149 fta_node->table_name = table_name->duplicate();
3150 fta_node->table_name->set_machine(ifaces[si].first);
3151 fta_node->table_name->set_interface(ifaces[si].second);
3152 fta_node->table_name->set_ifq(false);
3154 for(s=0;s<select_list.size();s++){
3155 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
3157 for(p=0;p<where.size();p++){
3158 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
3159 cnf_elem *new_cnf = new cnf_elem(new_pr);
3160 analyze_cnf(new_cnf);
3162 fta_node->where.push_back(new_cnf);
3165 // Xfer all of the parameters.
3166 // Use existing handle annotations.
3167 vector<string> param_names = param_tbl->get_param_names();
3169 for(pi=0;pi<param_names.size();pi++){
3170 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3171 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3172 param_tbl->handle_access(param_names[pi]));
3174 fta_node->definitions = definitions;
3175 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3176 this->error_code = 3;
3180 ret_vec.push_back(fta_node);
3183 if(ifaces.size() > 1){
3184 spx_qpn *tmp_spx = (spx_qpn *)(ret_vec[0]);
3185 mrg_qpn *mrg_node = new mrg_qpn(tmp_spx,
3186 node_name, sel_names,ifaces, ifdb);
3188 Do not split sources until we are done with optimizations
3189 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3190 for(i=0;i<split_merge.size();++i){
3191 ret_vec.push_back(split_merge[i]);
3193 hfta_returned = split_merge.size();
3195 ret_vec.push_back(mrg_node);
3200 // printf("OK as FTA.\n");
3201 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3206 ////////////////////////////////////////////////////
3207 // The fta must be split. Create a stream node.
3208 // NOTE : I am counting on the single
3209 // table in the from list. (Joins handled in a different operator).
3213 spx_qpn *stream_node = new spx_qpn();
3214 stream_node->set_node_name( node_name );
3215 // Create the tablevar in the stream's FROM clause.
3216 // set the schema name to the name of the LFTA,
3217 // and use the same tablevar name.
3218 stream_node->table_name = new tablevar_t(
3219 ("_fta_"+node_name).c_str()
3221 stream_node->table_name->set_range_var(table_name->get_var_name());
3224 fta_node->set_node_name( "_fta_"+node_name );
3226 // table var names of fta, stream.
3227 string fta_var = fta_node->table_name->get_var_name();
3228 string stream_var = stream_node->table_name->get_var_name();
3230 // Set up select list vector
3231 vector< vector<select_element *> *> select_vec;
3232 select_vec.push_back(&(fta_node->select_list)); // only one child
3235 // Split the select list into its FTA and stream parts.
3236 // If any part of the SE is fta-unsafe, it will return
3237 // a SE to execute at the stream ref'ing SE's evaluated
3238 // at the fta (which are put on the FTA's select list as a side effect).
3239 // If the SE is fta-safe, put it on the fta select list, make
3240 // a ref to it and put the ref on the stream select list.
3241 for(s=0;s<select_list.size();s++){
3242 bool fta_forbidden = false;
3243 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3244 // scalarexp_t *root_se = split_fta_se(
3245 // select_list[s]->se,fta_forbidden, fta_node->select_list, Ext_fcns
3247 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
3248 fta_forbidden, se_src, select_vec, Ext_fcns
3250 // if(fta_forbidden){
3251 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3252 stream_node->select_list.push_back(
3253 new select_element(root_se, select_list[s]->name)
3256 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,root_se,0);
3257 stream_node->select_list.push_back(
3258 new select_element(new_se, select_list[s]->name)
3264 // The WHERE clause has already been split into a set of clauses
3265 // that are ANDED together. For each clause, check if its FTA-safe.
3266 // If not, split its SE's into fta-safe and stream-executing parts,
3267 // then put a clause which ref's the SEs into the stream.
3268 // Else put it into the LFTA.
3269 predicate_t *pr_root;
3271 for(p=0;p<where.size();p++){
3272 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) ){
3273 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3274 // pr_root = split_fta_pr( where[p]->pr, fta_node->select_list, Ext_fcns);
3275 fta_forbidden = true;
3277 pr_root = dup_pr(where[p]->pr, NULL);
3278 fta_forbidden = false;
3280 cnf_elem *cnf_root = new cnf_elem(pr_root);
3281 analyze_cnf(cnf_root);
3284 stream_node->where.push_back(cnf_root);
3286 fta_node->where.push_back(cnf_root);
3292 // Divide the parameters among the stream, FTA.
3293 // Currently : assume that the stream receives all parameters
3294 // and parameter updates, incorporates them, then passes
3295 // all of the parameters to the FTA.
3296 // This will need to change (tables, fta-unsafe types. etc.)
3298 // I will pass on the use_handle_access marking, even
3299 // though the fcn call that requires handle access might
3300 // exist in only one of the parts of the query.
3301 // Parameter manipulation and handle access determination will
3302 // need to be revisited anyway.
3303 vector<string> param_names = param_tbl->get_param_names();
3305 for(pi=0;pi<param_names.size();pi++){
3306 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3307 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3308 param_tbl->handle_access(param_names[pi]));
3309 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3310 param_tbl->handle_access(param_names[pi]));
3313 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3314 stream_node->definitions = definitions;
3316 // Now split by interfaces
3317 if(ifaces.size() > 1){
3318 for(si=0;si<ifaces.size();++si){
3319 spx_qpn *subq_node = new spx_qpn();
3321 // Name the subquery
3322 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3324 subq_node->set_node_name( new_name) ;
3325 sel_names.push_back(subq_node->get_node_name());
3328 subq_node->table_name = fta_node->table_name->duplicate();
3329 subq_node->table_name->set_machine(ifaces[si].first);
3330 subq_node->table_name->set_interface(ifaces[si].second);
3331 subq_node->table_name->set_ifq(false);
3333 for(s=0;s<fta_node->select_list.size();s++){
3334 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3336 for(p=0;p<fta_node->where.size();p++){
3337 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3338 cnf_elem *new_cnf = new cnf_elem(new_pr);
3339 analyze_cnf(new_cnf);
3341 subq_node->where.push_back(new_cnf);
3343 // Xfer all of the parameters.
3344 // Use existing handle annotations.
3345 vector<string> param_names = param_tbl->get_param_names();
3347 for(pi=0;pi<param_names.size();pi++){
3348 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3349 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3350 param_tbl->handle_access(param_names[pi]));
3352 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3353 this->error_code = 3;
3356 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3358 ret_vec.push_back(subq_node);
3361 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3362 fta_node->node_name, sel_names, ifaces, ifdb);
3364 Do not split sources until we are done with optimizations
3365 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3366 for(i=0;i<split_merge.size();++i){
3367 ret_vec.push_back(split_merge[i]);
3370 ret_vec.push_back(mrg_node);
3371 ret_vec.push_back(stream_node);
3372 hfta_returned = 1/*split_merge.size()*/ + 1;
3375 fta_node->table_name->set_machine(ifaces[0].first);
3376 fta_node->table_name->set_interface(ifaces[0].second);
3377 fta_node->table_name->set_ifq(false);
3378 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3379 this->error_code = 3;
3382 ret_vec.push_back(fta_node);
3383 ret_vec.push_back(stream_node);
3387 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3388 // printf("Stream node is:\n%s\n\n",stream_node->to_query_string().c_str() );
3396 Splitting a aggregation+sampling operator.
3397 right now, return an error if any splitting is required.
3400 vector<qp_node *> sgahcwcb_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3404 vector<qp_node *> ret_vec;
3405 int s, p, g, a, o, i;
3408 vector<string> fta_flds, stream_flds;
3410 // If the node reads from a stream, don't split.
3411 // int t = Schema->get_table_ref(table_name->get_schema_name());
3412 int t = table_name->get_schema_ref();
3413 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3414 ret_vec.push_back(this);
3418 fprintf(stderr,"ERROR : cannot split a sampling operator (not yet implemented).\n");
3428 Splitting a running aggregation operator.
3429 The code is almost identical to that of the the sgah operator
3431 - there is no lfta-only option.
3432 - the stream node is rsagh_qpn (lfta is sgah or spx)
3433 - need to handle the closing when (similar to having)
3436 vector<qp_node *> rsgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3440 vector<qp_node *> ret_vec;
3441 int s, p, g, a, o, i;
3444 vector<string> fta_flds, stream_flds;
3446 // If the node reads from a stream, don't split.
3447 // int t = Schema->get_table_ref(table_name->get_schema_name());
3448 int t = table_name->get_schema_ref();
3449 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3450 ret_vec.push_back(this);
3454 // Get the set of interfaces it accesses.
3456 vector<string> sel_names;
3457 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3458 if (ifaces.empty()) {
3459 fprintf(stderr,"INTERNAL ERROR in rsgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3466 //////////////////////////////////////////////////////////////
3467 /// Split into lfta, hfta.
3469 // A rsgah node must always be split,
3470 // if for no other reason than to complete the
3471 // partial aggregation.
3473 // First, determine if the query can be spit into aggr/aggr,
3474 // or if it must be selection/aggr.
3475 // Splitting into selection/aggr is allowed only
3476 // if select_lfta is set.
3479 bool select_allowed = definitions.count("select_lfta")>0;
3480 bool select_rqd = false;
3482 set<int> unsafe_gbvars; // for processing where clause
3483 for(g=0;g<gb_tbl.size();g++){
3484 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3485 if(!select_allowed){
3486 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3487 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3489 this->error_code = 1;
3490 this->err_str = tmpstr;
3494 unsafe_gbvars.insert(g);
3499 // Verify that the SEs in the aggregate definitions are fta-safe
3500 for(a=0;a<aggr_tbl.size();++a){
3501 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3502 if(ase != NULL){ // COUNT(*) does not have a SE.
3503 if(!select_allowed){
3504 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3505 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3506 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3508 this->error_code = 1;
3509 this->err_str = tmpstr;
3518 // Verify that all of the ref'd UDAFs can be split.
3520 for(a=0;a<aggr_tbl.size();++a){
3521 if(! aggr_tbl.is_builtin(a)){
3522 int afcn = aggr_tbl.get_fcn_id(a);
3523 int super_id = Ext_fcns->get_superaggr_id(afcn);
3524 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3525 if(super_id < 0 || sub_id < 0){
3526 if(!select_allowed){
3527 this->err_str += "ERROR in rsgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3528 this->error_code = 1;
3537 for(p=0;p<where.size();p++){
3538 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3539 if(!select_allowed){
3540 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3541 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3543 this->error_code = 1;
3544 this->err_str = tmpstr;
3555 /////////////////////////////////////////////////////
3556 // Split into aggr/aggr.
3562 sgah_qpn *fta_node = new sgah_qpn();
3563 fta_node->table_name = table_name;
3564 fta_node->set_node_name( "_fta_"+node_name );
3565 fta_node->table_name->set_range_var(table_name->get_var_name());
3568 rsgah_qpn *stream_node = new rsgah_qpn();
3569 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3570 stream_node->set_node_name( node_name );
3571 stream_node->table_name->set_range_var(table_name->get_var_name());
3573 // First, process the group-by variables.
3574 // The fta must supply the values of all the gbvars.
3575 // If a gb is computed, the computation must be
3576 // performed at the FTA, so the SE must be FTA-safe.
3577 // Nice side effect : the gbvar table contains
3578 // matching entries for the original query, the lfta query,
3579 // and the hfta query. So gbrefs in the new queries are set
3580 // correctly just by inheriting the gbrefs from the old query.
3581 // If this property changed, I'll need translation tables.
3584 for(g=0;g<gb_tbl.size();g++){
3585 // Insert the gbvar into the lfta.
3586 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
3587 fta_node->gb_tbl.add_gb_var(
3588 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
3591 // Insert a ref to the value of the gbvar into the lfta select list.
3592 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
3593 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
3594 gbvar_fta->set_gb_ref(g);
3595 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
3596 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
3598 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
3599 gbvar_stream->set_gb_ref(-1); // used as GBvar def
3600 stream_node->gb_tbl.add_gb_var(
3601 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
3606 // SEs in the aggregate definitions.
3607 // They are all safe, so split them up for later processing.
3608 map<int, scalarexp_t *> hfta_aggr_se;
3609 for(a=0;a<aggr_tbl.size();++a){
3610 split_fta_aggr( &(aggr_tbl), a,
3611 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
3612 fta_node->select_list,
3619 // Next, the select list.
3621 for(s=0;s<select_list.size();s++){
3622 bool fta_forbidden = false;
3623 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3624 stream_node->select_list.push_back(
3625 new select_element(root_se, select_list[s]->name));
3630 // All the predicates in the where clause must execute
3633 for(p=0;p<where.size();p++){
3634 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
3635 cnf_elem *new_cnf = new cnf_elem(new_pr);
3636 analyze_cnf(new_cnf);
3638 fta_node->where.push_back(new_cnf);
3641 // All of the predicates in the having clause must
3642 // execute in the stream node.
3644 for(p=0;p<having.size();p++){
3645 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3646 cnf_elem *cnf_root = new cnf_elem(pr_root);
3647 analyze_cnf(cnf_root);
3649 stream_node->having.push_back(cnf_root);
3652 // All of the predicates in the closing when clause must
3653 // execute in the stream node.
3655 for(p=0;p<closing_when.size();p++){
3656 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3657 cnf_elem *cnf_root = new cnf_elem(pr_root);
3658 analyze_cnf(cnf_root);
3660 stream_node->closing_when.push_back(cnf_root);
3664 // Divide the parameters among the stream, FTA.
3665 // Currently : assume that the stream receives all parameters
3666 // and parameter updates, incorporates them, then passes
3667 // all of the parameters to the FTA.
3668 // This will need to change (tables, fta-unsafe types. etc.)
3670 // I will pass on the use_handle_access marking, even
3671 // though the fcn call that requires handle access might
3672 // exist in only one of the parts of the query.
3673 // Parameter manipulation and handle access determination will
3674 // need to be revisited anyway.
3675 vector<string> param_names = param_tbl->get_param_names();
3677 for(pi=0;pi<param_names.size();pi++){
3678 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3679 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3680 param_tbl->handle_access(param_names[pi]));
3681 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3682 param_tbl->handle_access(param_names[pi]));
3684 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3685 stream_node->definitions = definitions;
3687 // Now split by interfaces XXXX
3688 if(ifaces.size() > 1){
3689 for(si=0;si<ifaces.size();++si){
3690 sgah_qpn *subq_node = new sgah_qpn();
3692 // Name the subquery
3693 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3695 subq_node->set_node_name( new_name) ;
3696 sel_names.push_back(subq_node->get_node_name());
3699 subq_node->table_name = fta_node->table_name->duplicate();
3700 subq_node->table_name->set_machine(ifaces[si].first);
3701 subq_node->table_name->set_interface(ifaces[si].second);
3702 subq_node->table_name->set_ifq(false);
3705 for(g=0;g<fta_node->gb_tbl.size();g++){
3706 // Insert the gbvar into the lfta.
3707 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
3708 subq_node->gb_tbl.add_gb_var(
3709 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
3713 // Insert the aggregates
3714 for(a=0;a<fta_node->aggr_tbl.size();++a){
3715 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
3718 for(s=0;s<fta_node->select_list.size();s++){
3719 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3721 for(p=0;p<fta_node->where.size();p++){
3722 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3723 cnf_elem *new_cnf = new cnf_elem(new_pr);
3724 analyze_cnf(new_cnf);
3726 subq_node->where.push_back(new_cnf);
3728 for(p=0;p<fta_node->having.size();p++){
3729 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
3730 cnf_elem *new_cnf = new cnf_elem(new_pr);
3731 analyze_cnf(new_cnf);
3733 subq_node->having.push_back(new_cnf);
3735 // Xfer all of the parameters.
3736 // Use existing handle annotations.
3737 vector<string> param_names = param_tbl->get_param_names();
3739 for(pi=0;pi<param_names.size();pi++){
3740 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3741 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3742 param_tbl->handle_access(param_names[pi]));
3744 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3745 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3746 this->error_code = 3;
3750 ret_vec.push_back(subq_node);
3753 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
3754 fta_node->node_name, sel_names, ifaces, ifdb);
3757 Do not split sources until we are done with optimizations
3758 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3759 for(i=0;i<split_merge.size();++i){
3760 ret_vec.push_back(split_merge[i]);
3763 ret_vec.push_back(mrg_node);
3764 ret_vec.push_back(stream_node);
3765 hfta_returned = 1/*split_merge.size()*/+1;
3768 fta_node->table_name->set_machine(ifaces[0].first);
3769 fta_node->table_name->set_interface(ifaces[0].second);
3770 fta_node->table_name->set_ifq(false);
3771 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3772 this->error_code = 3;
3775 ret_vec.push_back(fta_node);
3776 ret_vec.push_back(stream_node);
3781 // ret_vec.push_back(fta_node);
3782 // ret_vec.push_back(stream_node);
3789 /////////////////////////////////////////////////////////////////////
3790 /// Split into selection LFTA, aggregation HFTA.
3792 spx_qpn *fta_node = new spx_qpn();
3793 fta_node->table_name = table_name;
3794 fta_node->set_node_name( "_fta_"+node_name );
3795 fta_node->table_name->set_range_var(table_name->get_var_name());
3798 rsgah_qpn *stream_node = new rsgah_qpn();
3799 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3800 stream_node->set_node_name( node_name );
3801 stream_node->table_name->set_range_var(table_name->get_var_name());
3804 vector< vector<select_element *> *> select_vec;
3805 select_vec.push_back(&(fta_node->select_list)); // only one child
3807 // Process the gbvars. Split their defining SEs.
3808 for(g=0;g<gb_tbl.size();g++){
3809 bool fta_forbidden = false;
3810 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3812 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
3813 fta_forbidden, se_src, select_vec, Ext_fcns
3815 // if(fta_forbidden) (
3816 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3817 stream_node->gb_tbl.add_gb_var(
3818 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
3821 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
3822 stream_node->gb_tbl.add_gb_var(
3823 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
3828 // Process the aggregate table.
3829 // Copy to stream, split the SEs.
3830 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
3831 for(a=0;a<aggr_tbl.size();++a){
3833 if(aggr_tbl.is_builtin(a)){
3834 if(aggr_tbl.is_star_aggr(a)){
3835 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
3836 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
3838 bool fta_forbidden = false;
3839 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3841 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3842 fta_forbidden, se_src, select_vec, Ext_fcns
3844 // if(fta_forbidden) (
3845 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3846 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
3847 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
3849 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3850 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
3851 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
3854 hse->set_data_type(aggr_tbl.get_data_type(a));
3855 hse->set_aggr_id(a);
3856 hfta_aggr_se[a]=hse;
3858 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
3859 vector<scalarexp_t *> new_opl;
3860 for(o=0;o<opl.size();++o){
3861 bool fta_forbidden = false;
3862 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3863 scalarexp_t *agg_se = split_ftavec_se( opl[o],
3864 fta_forbidden, se_src, select_vec, Ext_fcns
3866 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3867 // fta_forbidden, se_src, select_vec, Ext_fcns
3869 // if(fta_forbidden) (
3870 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3871 new_opl.push_back(agg_se);
3873 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3874 new_opl.push_back(new_se);
3877 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
3878 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
3879 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
3880 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
3881 hse->set_aggr_id(a);
3882 hfta_aggr_se[a]=hse;
3887 // Process the WHERE clause.
3888 // If it is fta-safe AND it refs only fta-safe gbvars,
3889 // then expand the gbvars and put it into the lfta.
3890 // Else, split it into an hfta predicate ref'ing
3891 // se's computed partially in the lfta.
3893 predicate_t *pr_root;
3895 for(p=0;p<where.size();p++){
3896 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
3897 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3898 fta_forbidden = true;
3900 pr_root = dup_pr(where[p]->pr, NULL);
3901 expand_gbvars_pr(pr_root, gb_tbl);
3902 fta_forbidden = false;
3904 cnf_elem *cnf_root = new cnf_elem(pr_root);
3905 analyze_cnf(cnf_root);
3908 stream_node->where.push_back(cnf_root);
3910 fta_node->where.push_back(cnf_root);
3915 // Process the Select clause, rehome it on the
3917 for(s=0;s<select_list.size();s++){
3918 bool fta_forbidden = false;
3919 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3920 stream_node->select_list.push_back(
3921 new select_element(root_se, select_list[s]->name));
3925 // Process the Having clause
3927 // All of the predicates in the having clause must
3928 // execute in the stream node.
3930 for(p=0;p<having.size();p++){
3931 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3932 cnf_elem *cnf_root = new cnf_elem(pr_root);
3933 analyze_cnf(cnf_root);
3935 stream_node->having.push_back(cnf_root);
3937 // Same for closing when
3938 for(p=0;p<closing_when.size();p++){
3939 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3940 cnf_elem *cnf_root = new cnf_elem(pr_root);
3941 analyze_cnf(cnf_root);
3943 stream_node->closing_when.push_back(cnf_root);
3947 // Handle parameters and a few last details.
3948 vector<string> param_names = param_tbl->get_param_names();
3950 for(pi=0;pi<param_names.size();pi++){
3951 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3952 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3953 param_tbl->handle_access(param_names[pi]));
3954 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3955 param_tbl->handle_access(param_names[pi]));
3958 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3959 stream_node->definitions = definitions;
3961 // Now split by interfaces YYYY
3962 if(ifaces.size() > 1){
3963 for(si=0;si<ifaces.size();++si){
3964 spx_qpn *subq_node = new spx_qpn();
3966 // Name the subquery
3967 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3969 subq_node->set_node_name( new_name) ;
3970 sel_names.push_back(subq_node->get_node_name());
3973 subq_node->table_name = fta_node->table_name->duplicate();
3974 subq_node->table_name->set_machine(ifaces[si].first);
3975 subq_node->table_name->set_interface(ifaces[si].second);
3976 subq_node->table_name->set_ifq(false);
3978 for(s=0;s<fta_node->select_list.size();s++){
3979 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3981 for(p=0;p<fta_node->where.size();p++){
3982 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3983 cnf_elem *new_cnf = new cnf_elem(new_pr);
3984 analyze_cnf(new_cnf);
3986 subq_node->where.push_back(new_cnf);
3988 // Xfer all of the parameters.
3989 // Use existing handle annotations.
3990 vector<string> param_names = param_tbl->get_param_names();
3992 for(pi=0;pi<param_names.size();pi++){
3993 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3994 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3995 param_tbl->handle_access(param_names[pi]));
3997 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3998 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3999 this->error_code = 3;
4003 ret_vec.push_back(subq_node);
4006 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4007 fta_node->node_name, sel_names, ifaces, ifdb);
4009 Do not split sources until we are done with optimizations
4010 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4011 for(i=0;i<split_merge.size();++i){
4012 ret_vec.push_back(split_merge[i]);
4015 ret_vec.push_back(mrg_node);
4016 ret_vec.push_back(stream_node);
4017 hfta_returned = 1/*split_merge.size()*/+1;
4020 fta_node->table_name->set_machine(ifaces[0].first);
4021 fta_node->table_name->set_interface(ifaces[0].second);
4022 fta_node->table_name->set_ifq(false);
4023 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4024 this->error_code = 3;
4027 ret_vec.push_back(fta_node);
4028 ret_vec.push_back(stream_node);
4038 Splitting an aggregation operator
4040 An aggregation operator can reference
4041 literals, parameters, colrefs, group-by vars, aggregates,
4042 operators, functions
4044 an aggregation contains
4045 A selection list of SEs
4046 A where list of predicates
4047 A list group-by variable definition
4048 A list of aggregates to be computed
4049 A HAVING list of predicates.
4051 Aggregation involves two phases:
4052 1) given an input tuple, determine if it satisfies all of
4053 the WHERE predicates. If so, compute the group.
4054 Look up the group, update its aggregates.
4055 2) given a closed group and its aggregates, determine
4056 if these values satisfy all of the HAVING predicates.
4057 If so, evaluate the SEs on the selection list from the
4058 group and its aggregates.
4059 The two-phase nature of aggregation places restrictions on
4060 what can be referenced by different components of the operator
4061 (in addition to functions and operators).
4062 - group-by variables : literals, parameters, colrefs
4063 - WHERE predicates : group-by vars, literals, params, colrefs
4064 - HAVING predicates : group-by vars, literals, params, aggregates
4065 - Selection list SEs : group-by vars, literals, params, aggregates
4067 Splitting an aggregation operator into an LFTA/HFTA part
4068 involves performing partial aggregation at the LFTA and
4069 completing the aggregation at the HFTA.
4070 - given a tuple, the LFTA part evaluates the WHERE clause,
4071 and if it is satisfied, computes the group. lookup the group
4072 and update the aggregates. output the group and its partial
4074 - Given a partial aggregate from the LFTA, look up the group and
4075 update its aggregates. When the group is closed, evalute
4076 the HAVING clause and the SEs on the selection list.
4077 THEREFORE the selection list of the LFTA must consist of the
4078 group-by variables and the set of (bare) subaggregate values
4079 necessary to compute the super aggregates.
4080 Unlike the case with the SPX operator, the SE splitting point
4081 is at the GBvar and the aggregate value level.
4084 For each group-by variable
4085 Put the GB variable definition in the LFTA GBVAR list.
4086 Put the GBVAR in the LFTA selection list (as an SE).
4087 Put a reference to that GBVAR in the HFTA GBVAR list.
4089 Split the aggregate into a superaggregate and a subaggregate.
4090 The SE of the superaggregate references the subaggregate value.
4091 (this will need modifications for MF aggregation)
4092 For each SE in the selection list, HAVING predicate
4093 Make GBVAR references point to the new GBVAR
4094 make the aggregate value references point to the new aggregates.
4096 SEs are not so much split as their ref's are changed.
4098 TODO: insert tablevar names into the colrefs.
4103 vector<qp_node *> sgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4107 vector<qp_node *> ret_vec;
4108 int s, p, g, a, o, i;
4111 vector<string> fta_flds, stream_flds;
4113 // If the node reads from a stream, don't split.
4114 // int t = Schema->get_table_ref(table_name->get_schema_name());
4115 int t = table_name->get_schema_ref();
4116 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
4117 ret_vec.push_back(this);
4121 // Get the set of interfaces it accesses.
4123 vector<string> sel_names;
4124 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
4125 if (ifaces.empty()) {
4126 fprintf(stderr,"INTERNAL ERROR in sgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
4132 //////////////////////////////////////////////
4133 // Is this LFTA-only?
4134 if(definitions.count("lfta_aggregation")>0){
4135 // Yes. Ensure that everything is lfta-safe.
4137 // Check only one interface is accessed.
4138 if(ifaces.size()>1){
4139 this->err_str = "ERROR, group-by query "+node_name+" is lfta-only, but it accesses more than one interface:\n";
4140 for(si=0;si<ifaces.size();++si)
4141 this->err_str += "\t"+ifaces[si].first+"."+ifaces[si].second+"\n";
4142 this->error_code = 2;
4146 // Check the group-by attributes
4147 for(g=0;g<gb_tbl.size();g++){
4148 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4149 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition and the query is lfta-only (%s).\n",
4150 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4152 this->error_code = 1;
4153 this->err_str = tmpstr;
4158 // Verify that the SEs in the aggregate definitions are fta-safe
4159 for(a=0;a<aggr_tbl.size();++a){
4160 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4161 if(ase != NULL){ // COUNT(*) does not have a SE.
4162 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4163 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe scalar expression and the query is lfta-only (%s).\n",
4164 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4166 this->error_code = 1;
4167 this->err_str = tmpstr;
4171 if(! aggr_tbl.fta_legal(a,Ext_fcns)){
4172 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4173 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe aggregate and the query is lfta-only (%s).\n",
4174 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4176 this->error_code = 1;
4177 this->err_str = tmpstr;
4183 // Ensure that all the aggregates are fta-safe ....
4187 for(s=0;s<select_list.size();s++){
4188 if(! check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns)){
4189 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4190 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4192 this->error_code = 1;
4193 this->err_str = tmpstr;
4200 for(p=0;p<where.size();p++){
4201 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4202 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4203 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4205 this->error_code = 1;
4206 this->err_str = tmpstr;
4213 if(having.size()>0){
4214 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : the query is lfta-only, so it can't have a HAVING clause.(%s).\n",
4215 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4217 this->error_code = 1;
4218 this->err_str = tmpstr;
4221 // The query is lfta safe, return it.
4224 ret_vec.push_back(this);
4228 //////////////////////////////////////////////////////////////
4229 /// Split into lfta, hfta.
4231 // A sgah node must always be split,
4232 // if for no other reason than to complete the
4233 // partial aggregation.
4235 // First, determine if the query can be spit into aggr/aggr,
4236 // or if it must be selection/aggr.
4237 // Splitting into selection/aggr is allowed only
4238 // if select_lfta is set.
4241 bool select_allowed = definitions.count("select_lfta")>0;
4242 bool select_rqd = false;
4244 set<int> unsafe_gbvars; // for processing where clause
4245 for(g=0;g<gb_tbl.size();g++){
4246 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4247 if(!select_allowed){
4248 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
4249 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4251 this->error_code = 1;
4252 this->err_str = tmpstr;
4256 unsafe_gbvars.insert(g);
4261 // Verify that the SEs in the aggregate definitions are fta-safe
4262 for(a=0;a<aggr_tbl.size();++a){
4263 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4264 if(ase != NULL){ // COUNT(*) does not have a SE.
4265 if(!select_allowed){
4266 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4267 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
4268 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4270 this->error_code = 1;
4271 this->err_str = tmpstr;
4280 // Verify that all of the ref'd UDAFs can be split.
4282 for(a=0;a<aggr_tbl.size();++a){
4283 if(! aggr_tbl.is_builtin(a)){
4284 int afcn = aggr_tbl.get_fcn_id(a);
4285 int super_id = Ext_fcns->get_superaggr_id(afcn);
4286 int sub_id = Ext_fcns->get_subaggr_id(afcn);
4287 if(super_id < 0 || sub_id < 0){
4288 if(!select_allowed){
4289 this->err_str += "ERROR in sgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
4290 this->error_code = 1;
4299 for(p=0;p<where.size();p++){
4300 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4301 if(!select_allowed){
4302 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
4303 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4305 this->error_code = 1;
4306 this->err_str = tmpstr;
4317 /////////////////////////////////////////////////////
4318 // Split into aggr/aggr.
4324 sgah_qpn *fta_node = new sgah_qpn();
4325 fta_node->table_name = table_name;
4326 fta_node->set_node_name( "_fta_"+node_name );
4327 fta_node->table_name->set_range_var(table_name->get_var_name());
4330 sgah_qpn *stream_node = new sgah_qpn();
4331 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4332 stream_node->set_node_name( node_name );
4333 stream_node->table_name->set_range_var(table_name->get_var_name());
4335 // allowed stream disorder. Default is 2,
4336 // can override with max_lfta_disorder setting.
4337 // Also limit the hfta disorder, set to lfta disorder + 1.
4338 // can override with max_hfta_disorder.
4340 fta_node->lfta_disorder = 2;
4341 if(this->get_val_of_def("max_lfta_disorder") != ""){
4342 int d = atoi(this->get_val_of_def("max_lfta_disorder").c_str() );
4344 fprintf(stderr,"Warning, max_lfta_disorder in node %s is %d, must be at least 1, ignoring.\n",node_name.c_str(), d);
4346 fta_node->lfta_disorder = d;
4347 printf("node %s setting lfta_disorder = %d\n",node_name.c_str(),fta_node->lfta_disorder);
4350 if(fta_node->lfta_disorder > 1)
4351 stream_node->hfta_disorder = fta_node->lfta_disorder + 1;
4353 stream_node->hfta_disorder = 1;
4355 if(this->get_val_of_def("max_hfta_disorder") != ""){
4356 int d = atoi(this->get_val_of_def("max_hfta_disorder").c_str() );
4357 if(d<fta_node->lfta_disorder){
4358 fprintf(stderr,"Warning, max_hfta_disorder in node %s is %d, must be at least the max lfta disorder %d, ignoring.\n",node_name.c_str(), d,fta_node->lfta_disorder);
4360 fta_node->lfta_disorder = d;
4362 if(fta_node->lfta_disorder < fta_node->hfta_disorder){
4363 fta_node->hfta_disorder = fta_node->lfta_disorder + 1;
4367 // First, process the group-by variables.
4368 // The fta must supply the values of all the gbvars.
4369 // If a gb is computed, the computation must be
4370 // performed at the FTA, so the SE must be FTA-safe.
4371 // Nice side effect : the gbvar table contains
4372 // matching entries for the original query, the lfta query,
4373 // and the hfta query. So gbrefs in the new queries are set
4374 // correctly just by inheriting the gbrefs from the old query.
4375 // If this property changed, I'll need translation tables.
4378 for(g=0;g<gb_tbl.size();g++){
4379 // Insert the gbvar into the lfta.
4380 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
4381 fta_node->gb_tbl.add_gb_var(
4382 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
4385 // Insert a ref to the value of the gbvar into the lfta select list.
4386 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
4387 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
4388 gbvar_fta->set_gb_ref(g);
4389 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
4390 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
4392 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
4393 gbvar_stream->set_gb_ref(-1); // used as GBvar def
4394 stream_node->gb_tbl.add_gb_var(
4395 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
4398 // multiple aggregation patterns, if any, go with the hfta
4399 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4401 // SEs in the aggregate definitions.
4402 // They are all safe, so split them up for later processing.
4403 map<int, scalarexp_t *> hfta_aggr_se;
4404 for(a=0;a<aggr_tbl.size();++a){
4405 split_fta_aggr( &(aggr_tbl), a,
4406 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
4407 fta_node->select_list,
4415 for(ii=0;ii<fta_flds.size() || ii < fta_node->select_list.size();++ii){
4416 if(ii<fta_flds.size())
4417 printf("\t%s : ",fta_flds[ii].c_str());
4420 if(ii<fta_node->select_list.size())
4421 printf("%s\n",fta_node->select_list[ii]->to_string().c_str());
4425 printf("hfta aggregates are:");
4426 for(ii=0;ii<stream_node->aggr_tbl.size();++ii){
4427 printf(" %s",stream_node->aggr_tbl.get_op(ii).c_str());
4429 printf("\nlfta aggregates are:");
4430 for(ii=0;ii<fta_node->aggr_tbl.size();++ii){
4431 printf(" %s",fta_node->aggr_tbl.get_op(ii).c_str());
4439 // Next, the select list.
4441 for(s=0;s<select_list.size();s++){
4442 bool fta_forbidden = false;
4443 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4444 stream_node->select_list.push_back(
4445 new select_element(root_se, select_list[s]->name));
4450 // All the predicates in the where clause must execute
4453 for(p=0;p<where.size();p++){
4454 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
4455 cnf_elem *new_cnf = new cnf_elem(new_pr);
4456 analyze_cnf(new_cnf);
4458 fta_node->where.push_back(new_cnf);
4461 // All of the predicates in the having clause must
4462 // execute in the stream node.
4464 for(p=0;p<having.size();p++){
4465 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4466 cnf_elem *cnf_root = new cnf_elem(pr_root);
4467 analyze_cnf(cnf_root);
4469 stream_node->having.push_back(cnf_root);
4473 // Divide the parameters among the stream, FTA.
4474 // Currently : assume that the stream receives all parameters
4475 // and parameter updates, incorporates them, then passes
4476 // all of the parameters to the FTA.
4477 // This will need to change (tables, fta-unsafe types. etc.)
4479 // I will pass on the use_handle_access marking, even
4480 // though the fcn call that requires handle access might
4481 // exist in only one of the parts of the query.
4482 // Parameter manipulation and handle access determination will
4483 // need to be revisited anyway.
4484 vector<string> param_names = param_tbl->get_param_names();
4486 for(pi=0;pi<param_names.size();pi++){
4487 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4488 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4489 param_tbl->handle_access(param_names[pi]));
4490 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4491 param_tbl->handle_access(param_names[pi]));
4493 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4494 stream_node->definitions = definitions;
4496 // Now split by interfaces XXXX
4497 if(ifaces.size() > 1){
4498 for(si=0;si<ifaces.size();++si){
4499 sgah_qpn *subq_node = new sgah_qpn();
4501 // Name the subquery
4502 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4504 subq_node->set_node_name( new_name) ;
4505 sel_names.push_back(subq_node->get_node_name());
4508 subq_node->table_name = fta_node->table_name->duplicate();
4509 subq_node->table_name->set_machine(ifaces[si].first);
4510 subq_node->table_name->set_interface(ifaces[si].second);
4511 subq_node->table_name->set_ifq(false);
4514 for(g=0;g<fta_node->gb_tbl.size();g++){
4515 // Insert the gbvar into the lfta.
4516 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
4517 subq_node->gb_tbl.add_gb_var(
4518 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
4522 // Insert the aggregates
4523 for(a=0;a<fta_node->aggr_tbl.size();++a){
4524 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
4527 for(s=0;s<fta_node->select_list.size();s++){
4528 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4530 for(p=0;p<fta_node->where.size();p++){
4531 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4532 cnf_elem *new_cnf = new cnf_elem(new_pr);
4533 analyze_cnf(new_cnf);
4535 subq_node->where.push_back(new_cnf);
4537 for(p=0;p<fta_node->having.size();p++){
4538 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
4539 cnf_elem *new_cnf = new cnf_elem(new_pr);
4540 analyze_cnf(new_cnf);
4542 subq_node->having.push_back(new_cnf);
4544 // Xfer all of the parameters.
4545 // Use existing handle annotations.
4546 vector<string> param_names = param_tbl->get_param_names();
4548 for(pi=0;pi<param_names.size();pi++){
4549 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4550 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4551 param_tbl->handle_access(param_names[pi]));
4553 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4554 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4555 this->error_code = 3;
4560 subq_node->lfta_disorder = fta_node->lfta_disorder;
4562 ret_vec.push_back(subq_node);
4565 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
4566 fta_node->node_name, sel_names, ifaces, ifdb);
4567 mrg_node->set_disorder(fta_node->lfta_disorder);
4570 Do not split sources until we are done with optimizations
4571 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4572 for(i=0;i<split_merge.size();++i){
4573 ret_vec.push_back(split_merge[i]);
4576 ret_vec.push_back(mrg_node);
4577 ret_vec.push_back(stream_node);
4578 hfta_returned = 1/*split_merge.size()*/+1;
4581 fta_node->table_name->set_machine(ifaces[0].first);
4582 fta_node->table_name->set_interface(ifaces[0].second);
4583 fta_node->table_name->set_ifq(false);
4584 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4585 this->error_code = 3;
4588 ret_vec.push_back(fta_node);
4589 ret_vec.push_back(stream_node);
4594 // ret_vec.push_back(fta_node);
4595 // ret_vec.push_back(stream_node);
4602 /////////////////////////////////////////////////////////////////////
4603 /// Split into selection LFTA, aggregation HFTA.
4605 spx_qpn *fta_node = new spx_qpn();
4606 fta_node->table_name = table_name;
4607 fta_node->set_node_name( "_fta_"+node_name );
4608 fta_node->table_name->set_range_var(table_name->get_var_name());
4611 sgah_qpn *stream_node = new sgah_qpn();
4612 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4613 stream_node->set_node_name( node_name );
4614 stream_node->table_name->set_range_var(table_name->get_var_name());
4617 vector< vector<select_element *> *> select_vec;
4618 select_vec.push_back(&(fta_node->select_list)); // only one child
4620 // Process the gbvars. Split their defining SEs.
4621 for(g=0;g<gb_tbl.size();g++){
4622 bool fta_forbidden = false;
4623 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4625 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
4626 fta_forbidden, se_src, select_vec, Ext_fcns
4628 // if(fta_forbidden) (
4629 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4630 stream_node->gb_tbl.add_gb_var(
4631 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
4634 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
4635 stream_node->gb_tbl.add_gb_var(
4636 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
4640 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4642 // Process the aggregate table.
4643 // Copy to stream, split the SEs.
4644 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
4645 for(a=0;a<aggr_tbl.size();++a){
4647 if(aggr_tbl.is_builtin(a)){
4648 if(aggr_tbl.is_star_aggr(a)){
4649 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
4650 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
4652 bool fta_forbidden = false;
4653 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4655 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4656 fta_forbidden, se_src, select_vec, Ext_fcns
4658 // if(fta_forbidden) (
4659 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4660 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
4661 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
4663 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4664 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
4665 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
4668 hse->set_data_type(aggr_tbl.get_data_type(a));
4669 hse->set_aggr_id(a);
4670 hfta_aggr_se[a]=hse;
4672 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
4673 vector<scalarexp_t *> new_opl;
4674 for(o=0;o<opl.size();++o){
4675 bool fta_forbidden = false;
4676 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4677 scalarexp_t *agg_se = split_ftavec_se( opl[o],
4678 fta_forbidden, se_src, select_vec, Ext_fcns
4680 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4681 // fta_forbidden, se_src, select_vec, Ext_fcns
4683 // if(fta_forbidden) (
4684 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4685 new_opl.push_back(agg_se);
4687 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4688 new_opl.push_back(new_se);
4691 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
4692 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
4693 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
4694 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
4695 hse->set_aggr_id(a);
4696 hfta_aggr_se[a]=hse;
4701 // Process the WHERE clause.
4702 // If it is fta-safe AND it refs only fta-safe gbvars,
4703 // then expand the gbvars and put it into the lfta.
4704 // Else, split it into an hfta predicate ref'ing
4705 // se's computed partially in the lfta.
4707 predicate_t *pr_root;
4709 for(p=0;p<where.size();p++){
4710 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
4711 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
4712 fta_forbidden = true;
4714 pr_root = dup_pr(where[p]->pr, NULL);
4715 expand_gbvars_pr(pr_root, gb_tbl);
4716 fta_forbidden = false;
4718 cnf_elem *cnf_root = new cnf_elem(pr_root);
4719 analyze_cnf(cnf_root);
4722 stream_node->where.push_back(cnf_root);
4724 fta_node->where.push_back(cnf_root);
4729 // Process the Select clause, rehome it on the
4731 for(s=0;s<select_list.size();s++){
4732 bool fta_forbidden = false;
4733 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4734 stream_node->select_list.push_back(
4735 new select_element(root_se, select_list[s]->name));
4739 // Process the Having clause
4741 // All of the predicates in the having clause must
4742 // execute in the stream node.
4744 for(p=0;p<having.size();p++){
4745 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4746 cnf_elem *cnf_root = new cnf_elem(pr_root);
4747 analyze_cnf(cnf_root);
4749 stream_node->having.push_back(cnf_root);
4752 // Handle parameters and a few last details.
4753 vector<string> param_names = param_tbl->get_param_names();
4755 for(pi=0;pi<param_names.size();pi++){
4756 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4757 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4758 param_tbl->handle_access(param_names[pi]));
4759 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4760 param_tbl->handle_access(param_names[pi]));
4763 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4764 stream_node->definitions = definitions;
4766 // Now split by interfaces YYYY
4767 if(ifaces.size() > 1){
4768 for(si=0;si<ifaces.size();++si){
4769 spx_qpn *subq_node = new spx_qpn();
4771 // Name the subquery
4772 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4774 subq_node->set_node_name( new_name) ;
4775 sel_names.push_back(subq_node->get_node_name());
4778 subq_node->table_name = fta_node->table_name->duplicate();
4779 subq_node->table_name->set_machine(ifaces[si].first);
4780 subq_node->table_name->set_interface(ifaces[si].second);
4781 subq_node->table_name->set_ifq(false);
4783 for(s=0;s<fta_node->select_list.size();s++){
4784 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4786 for(p=0;p<fta_node->where.size();p++){
4787 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4788 cnf_elem *new_cnf = new cnf_elem(new_pr);
4789 analyze_cnf(new_cnf);
4791 subq_node->where.push_back(new_cnf);
4793 // Xfer all of the parameters.
4794 // Use existing handle annotations.
4795 vector<string> param_names = param_tbl->get_param_names();
4797 for(pi=0;pi<param_names.size();pi++){
4798 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4799 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4800 param_tbl->handle_access(param_names[pi]));
4802 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4803 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4804 this->error_code = 3;
4808 ret_vec.push_back(subq_node);
4811 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4812 fta_node->node_name, sel_names, ifaces, ifdb);
4814 Do not split sources until we are done with optimizations
4815 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4816 for(i=0;i<split_merge.size();++i){
4817 ret_vec.push_back(split_merge[i]);
4820 ret_vec.push_back(mrg_node);
4821 ret_vec.push_back(stream_node);
4822 hfta_returned = 1/*split_merge.size()*/+1;
4825 fta_node->table_name->set_machine(ifaces[0].first);
4826 fta_node->table_name->set_interface(ifaces[0].second);
4827 fta_node->table_name->set_ifq(false);
4828 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4829 this->error_code = 3;
4832 ret_vec.push_back(fta_node);
4833 ret_vec.push_back(stream_node);
4838 // ret_vec.push_back(fta_node);
4839 // ret_vec.push_back(stream_node);
4848 SPLITTING A EQ-TEMPORAL, HASH JOIN OPERATOR
4850 An JOIN_EQ_HASH_QPN node may reference:
4851 literals, parameters, colrefs, functions, operators
4852 An JOIN_EQ_HASH_QPN node may not reference:
4853 group-by variables, aggregates
4855 An JOIN_EQ_HASH_QPN node contains
4856 selection list of SEs
4857 where list of CNF predicates, broken into:
4864 For each tablevar whose source is a PROTOCOL
4865 Create a LFTA for that tablevar
4866 Push as many prefilter[..] predicates to that tablevar as is
4868 Split the SEs in the select list, and the predicates not
4873 vector<qp_node *> join_eq_hash_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4875 vector<qp_node *> ret_vec;
4878 // If the node reads from streams only, don't split.
4879 bool stream_only = true;
4880 for(f=0;f<from.size();++f){
4881 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4882 int t = from[f]->get_schema_ref();
4883 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) stream_only = false;
4887 ret_vec.push_back(this);
4892 // The HFTA node, it is always returned.
4894 join_eq_hash_qpn *stream_node = new join_eq_hash_qpn();
4895 for(f=0;f<from.size();++f){
4896 // tablevar_t *tmp_tblvar = new tablevar_t( from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str());
4897 tablevar_t *tmp_tblvar = from[f]->duplicate();
4898 // tmp_tblvar->set_range_var(from[f]->get_var_name());
4900 stream_node->from.push_back(tmp_tblvar);
4902 stream_node->set_node_name(node_name);
4904 // Create spx (selection) children for each PROTOCOL source.
4905 vector<spx_qpn *> child_vec;
4906 vector< vector<select_element *> *> select_vec;
4907 for(f=0;f<from.size();++f){
4908 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4909 int t = from[f]->get_schema_ref();
4910 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA){
4911 spx_qpn *child_qpn = new spx_qpn();
4912 sprintf(tmpstr,"_fta_%d_%s",f,node_name.c_str());
4913 child_qpn->set_node_name(string(tmpstr));
4914 child_qpn->table_name = new tablevar_t(
4915 from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str(), from[f]->get_ifq());
4916 child_qpn->table_name->set_range_var(from[f]->get_var_name());
4917 child_qpn->table_name->set_machine(from[f]->get_machine());
4919 child_vec.push_back(child_qpn);
4920 select_vec.push_back(&(child_qpn->select_list));
4922 // Update the stream's FROM clause to read from this child
4923 stream_node->from[f]->set_interface("");
4924 stream_node->from[f]->set_schema(tmpstr);
4926 child_vec.push_back(NULL);
4927 select_vec.push_back(NULL);
4931 // Push lfta-safe prefilter to the lfta
4932 // TODO: I'm not copying the preds, I dont *think* it will be a problem.
4933 predicate_t *pr_root;
4935 for(f=0;f<from.size();++f){
4936 vector<cnf_elem *> pred_vec = prefilter[f];
4937 if(child_vec[f] != NULL){
4938 for(p=0;p<pred_vec.size();++p){
4939 if(check_fta_forbidden_pr(pred_vec[p]->pr,NULL, Ext_fcns)){
4940 child_vec[f]->where.push_back(pred_vec[p]);
4942 pr_root = split_ftavec_pr(pred_vec[p]->pr,select_vec,Ext_fcns);
4943 cnf_elem *cnf_root = new cnf_elem(pr_root);
4944 analyze_cnf(cnf_root);
4945 stream_node->prefilter[f].push_back(cnf_root);
4949 for(p=0;p<pred_vec.size();++p){
4950 stream_node->prefilter[f].push_back(pred_vec[p]);
4956 // Process the other predicates
4957 for(p=0;p<temporal_eq.size();++p){
4958 pr_root = split_ftavec_pr(temporal_eq[p]->pr,select_vec,Ext_fcns);
4959 cnf_elem *cnf_root = new cnf_elem(pr_root);
4960 analyze_cnf(cnf_root);
4961 stream_node->temporal_eq.push_back(cnf_root);
4963 for(p=0;p<hash_eq.size();++p){
4964 pr_root = split_ftavec_pr(hash_eq[p]->pr,select_vec,Ext_fcns);
4965 cnf_elem *cnf_root = new cnf_elem(pr_root);
4966 analyze_cnf(cnf_root);
4967 stream_node->hash_eq.push_back(cnf_root);
4969 for(p=0;p<postfilter.size();++p){
4970 pr_root = split_ftavec_pr(postfilter[p]->pr,select_vec,Ext_fcns);
4971 cnf_elem *cnf_root = new cnf_elem(pr_root);
4972 analyze_cnf(cnf_root);
4973 stream_node->postfilter.push_back(cnf_root);
4977 for(s=0;s<select_list.size();s++){
4978 bool fta_forbidden = false;
4979 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4980 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
4981 fta_forbidden, se_src, select_vec, Ext_fcns
4983 if(fta_forbidden || !is_PROTOCOL_source(se_src, select_vec)){
4984 stream_node->select_list.push_back(
4985 new select_element(root_se, select_list[s]->name) );
4987 scalarexp_t *new_se=make_fta_se_ref(select_vec,root_se,se_src);
4988 stream_node->select_list.push_back(
4989 new select_element(new_se, select_list[s]->name)
4995 // I need to "rehome" the colrefs -- make the annotations in the colrefs
4996 // agree with their tablevars.
4997 for(f=0;f<child_vec.size();++f){
4998 if(child_vec[f]!=NULL){
4999 vector<tablevar_t *> fm; fm.push_back(child_vec[f]->table_name);
5001 for(s=0;s<child_vec[f]->select_list.size();++s)
5002 bind_colref_se(child_vec[f]->select_list[s]->se, fm,0,0);
5003 for(p=0;p<child_vec[f]->where.size();++p)
5004 // bind_colref_pr(child_vec[f]->where[p]->pr, fm,f,0);
5005 bind_colref_pr(child_vec[f]->where[p]->pr, fm,0,0);
5009 // rehome the colrefs in the hfta node.
5010 for(f=0;f<stream_node->from.size();++f){
5011 stream_node->where.clear();
5012 for(s=0;s<stream_node->from.size();++s){
5013 for(p=0;p<stream_node->prefilter[s].size();++p){
5014 bind_colref_pr((stream_node->prefilter[s])[p]->pr,stream_node->from,f,f);
5017 for(p=0;p<stream_node->temporal_eq.size();++p){
5018 bind_colref_pr(stream_node->temporal_eq[p]->pr,stream_node->from,f,f);
5020 for(p=0;p<stream_node->hash_eq.size();++p){
5021 bind_colref_pr(stream_node->hash_eq[p]->pr,stream_node->from,f,f);
5023 for(p=0;p<stream_node->postfilter.size();++p){
5024 bind_colref_pr(stream_node->postfilter[p]->pr,stream_node->from,f,f);
5026 for(s=0;s<stream_node->select_list.size();++s){
5027 bind_colref_se(stream_node->select_list[s]->se,stream_node->from,f,f);
5031 // Rebuild the WHERE clause
5032 stream_node->where.clear();
5033 for(s=0;s<stream_node->from.size();++s){
5034 for(p=0;p<stream_node->prefilter[s].size();++p){
5035 stream_node->where.push_back((stream_node->prefilter[s])[p]);
5038 for(p=0;p<stream_node->temporal_eq.size();++p){
5039 stream_node->where.push_back(stream_node->temporal_eq[p]);
5041 for(p=0;p<stream_node->hash_eq.size();++p){
5042 stream_node->where.push_back(stream_node->hash_eq[p]);
5044 for(p=0;p<stream_node->postfilter.size();++p){
5045 stream_node->where.push_back(stream_node->postfilter[p]);
5049 // Build the return list
5050 vector<qp_node *> hfta_nodes;
5052 for(f=0;f<from.size();++f){
5053 if(child_vec[f] != NULL){
5054 spx_qpn *c_node = child_vec[f];
5055 vector<pair<string, string> > ifaces = get_ifaces(c_node->table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
5056 if (ifaces.empty()) {
5057 fprintf(stderr,"INTERNAL ERROR in join_eq_hash_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
5061 if(ifaces.size() == 1){
5062 c_node->table_name->set_machine(ifaces[0].first);
5063 c_node->table_name->set_interface(ifaces[0].second);
5064 c_node->table_name->set_ifq(false);
5065 if(c_node->resolve_if_params(ifdb, this->err_str)){
5066 this->error_code = 3;
5069 ret_vec.push_back(c_node);
5071 vector<string> sel_names;
5073 for(si=0;si<ifaces.size();++si){
5074 spx_qpn *subq_node = new spx_qpn();
5076 // Name the subquery
5077 string new_name = "_"+c_node->node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
5079 subq_node->set_node_name( new_name) ;
5080 sel_names.push_back(subq_node->get_node_name());
5083 subq_node->table_name = c_node->table_name->duplicate();
5084 subq_node->table_name->set_machine(ifaces[si].first);
5085 subq_node->table_name->set_interface(ifaces[si].second);
5086 subq_node->table_name->set_ifq(false);
5088 for(s=0;s<c_node->select_list.size();s++){
5089 subq_node->select_list.push_back(dup_select(c_node->select_list[s], NULL));
5091 for(p=0;p<c_node->where.size();p++){
5092 predicate_t *new_pr = dup_pr(c_node->where[p]->pr, NULL);
5093 cnf_elem *new_cnf = new cnf_elem(new_pr);
5094 analyze_cnf(new_cnf);
5096 printf("table name is %s\n",subq_node->table_name->to_string().c_str());
5097 subq_node->where.push_back(new_cnf);
5099 // Xfer all of the parameters.
5100 // Use existing handle annotations.
5101 // vector<string> param_names = param_tbl->get_param_names();
5103 // for(pi=0;pi<param_names.size();pi++){
5104 // data_type *dt = param_tbl->get_data_type(param_names[pi]);
5105 // subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
5106 // param_tbl->handle_access(param_names[pi]));
5108 // subq_node->definitions = definitions;
5110 if(subq_node->resolve_if_params(ifdb, this->err_str)){
5111 this->error_code = 3;
5115 ret_vec.push_back(subq_node);
5117 int lpos = ret_vec.size()-1 ;
5118 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[lpos]),c_node->node_name,sel_names, ifaces, ifdb);
5120 Do not split sources until we are done with optimizations
5121 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
5123 for(i=0;i<split_merge.size();++i){
5124 hfta_nodes.push_back(split_merge[i]);
5127 hfta_nodes.push_back(mrg_node);
5132 for(i=0;i<hfta_nodes.size();++i) ret_vec.push_back(hfta_nodes[i]);
5133 ret_vec.push_back(stream_node);
5134 hfta_returned = hfta_nodes.size()+1;
5136 // Currently : assume that the stream receives all parameters
5137 // and parameter updates, incorporates them, then passes
5138 // all of the parameters to the FTA.
5139 // This will need to change (tables, fta-unsafe types. etc.)
5141 // I will pass on the use_handle_access marking, even
5142 // though the fcn call that requires handle access might
5143 // exist in only one of the parts of the query.
5144 // Parameter manipulation and handle access determination will
5145 // need to be revisited anyway.
5146 vector<string> param_names = param_tbl->get_param_names();
5148 for(pi=0;pi<param_names.size();pi++){
5150 data_type *dt = param_tbl->get_data_type(param_names[pi]);
5151 for(ri=0;ri<ret_vec.size();++ri){
5152 ret_vec[ri]->param_tbl->add_param(param_names[pi],dt->duplicate(),
5153 param_tbl->handle_access(param_names[pi]));
5154 ret_vec[ri]->definitions = definitions; ret_vec[ri]->definitions.erase("_referenced_ifaces");
5165 /////////////////////////////////////////////////////////////
5168 // Common processing
5169 int process_opview(tablevar_t *fmtbl, int pos, string node_name,
5171 vector<query_node *> &qnodes,
5172 opview_set &opviews,
5173 vector<table_exp_t *> &ret, string rootnm, string silo_nm){
5177 int schref = fmtbl->get_schema_ref();
5181 if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){
5182 opview_entry *opv = new opview_entry();
5183 opv->parent_qname = node_name;
5184 opv->root_name = rootnm;
5185 opv->view_name = fmtbl->get_schema_name();
5187 sprintf(tmpstr,"%s_UDOP%d_%s",node_name.c_str(),pos,opv->view_name.c_str());
5188 opv->udop_alias = tmpstr;
5189 fmtbl->set_udop_alias(opv->udop_alias);
5191 opv->exec_fl = Schema->get_op_prop(schref, string("file"));
5192 opv->liveness_timeout = atoi(Schema->get_op_prop(schref, string("liveness_timeout")).c_str());
5194 vector<subquery_spec *> subq = Schema->get_subqueryspecs(schref);
5195 for(s=0;s<subq.size();++s){
5196 // Validate that the fields match.
5197 subquery_spec *sqs = subq[s];
5198 vector<field_entry *> flds = Schema->get_fields(sqs->name+silo_nm);
5199 if(flds.size() == 0){
5200 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in Schema.\n",sqs->name.c_str(), opv->view_name.c_str());
5203 if(flds.size() < sqs->types.size()){
5204 fprintf(stderr,"ERROR: subquery %s of view %s does not have enough fields (%lu found, %lu expected).\n",sqs->name.c_str(), opv->view_name.c_str(),flds.size(), sqs->types.size());
5207 bool failed = false;
5208 for(f=0;f<sqs->types.size();++f){
5209 data_type dte(sqs->types[f],sqs->modifiers[f]);
5210 data_type dtf(flds[f]->get_type(),flds[f]->get_modifier_list());
5211 if(! dte.subsumes_type(&dtf) ){
5212 fprintf(stderr,"ERROR: subquery %s of view %s does not have the correct type for field %d (%s found, %s expected).\n",sqs->name.c_str(), opv->view_name.c_str(),f,dtf.to_string().c_str(), dte.to_string().c_str());
5216 if(dte.is_temporal() && (dte.get_temporal() != dtf.get_temporal()) ){
5217 string pstr = dte.get_temporal_string();
5218 fprintf(stderr,"ERROR: subquery %s of view %s does not have the expected temporal value %s of field %d.\n",sqs->name.c_str(), opv->view_name.c_str(),pstr.c_str(),f);
5225 /// Validation done, find the subquery, make a copy of the
5226 /// parse tree, and add it to the return list.
5227 for(q=0;q<qnodes.size();++q)
5228 if(qnodes[q]->name == sqs->name)
5230 if(q==qnodes.size()){
5231 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in list of query names.\n",sqs->name.c_str(), opv->view_name.c_str());
5235 table_exp_t *newq = dup_table_exp(qnodes[q]->parse_tree);
5236 sprintf(tmpstr,"%s_OP%d_%s_SUBQ%d",node_name.c_str(),pos,opv->view_name.c_str(),s);
5237 string newq_name = tmpstr;
5238 newq->nmap["query_name"] = newq_name;
5239 ret.push_back(newq);
5240 opv->subq_names.push_back(newq_name);
5242 fmtbl->set_opview_idx(opviews.append(opv));
5248 vector<table_exp_t *> spx_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5249 vector<table_exp_t *> ret;
5251 int retval = process_opview(table_name,0,node_name,
5252 Schema,qnodes,opviews,ret, rootnm, silo_name);
5258 vector<table_exp_t *> sgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5259 vector<table_exp_t *> ret;
5261 int retval = process_opview(table_name,0,node_name,
5262 Schema,qnodes,opviews,ret, rootnm, silo_name);
5267 vector<table_exp_t *> rsgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5268 vector<table_exp_t *> ret;
5270 int retval = process_opview(table_name,0,node_name,
5271 Schema,qnodes,opviews,ret, rootnm, silo_name);
5277 vector<table_exp_t *> sgahcwcb_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5278 vector<table_exp_t *> ret;
5280 int retval = process_opview(table_name,0,node_name,
5281 Schema,qnodes,opviews,ret, rootnm, silo_name);
5288 vector<table_exp_t *> mrg_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5289 vector<table_exp_t *> ret;
5291 for(f=0;f<fm.size();++f){
5292 int retval = process_opview(fm[f],f,node_name,
5293 Schema,qnodes,opviews,ret, rootnm, silo_name);
5302 vector<table_exp_t *> join_eq_hash_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5303 vector<table_exp_t *> ret;
5305 for(f=0;f<from.size();++f){
5306 int retval = process_opview(from[f],f,node_name,
5307 Schema,qnodes,opviews,ret, rootnm, silo_name);
5313 vector<table_exp_t *> filter_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5314 vector<table_exp_t *> ret;
5316 for(f=0;f<from.size();++f){
5317 int retval = process_opview(from[f],f,node_name,
5318 Schema,qnodes,opviews,ret, rootnm, silo_name);
5324 vector<table_exp_t *> watch_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5325 vector<table_exp_t *> ret;
5326 int retval = process_opview(from[0],0,node_name,
5327 Schema,qnodes,opviews,ret, rootnm, silo_name);
5334 vector<table_exp_t *> watch_tbl_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5335 vector<table_exp_t *> ret;
5336 return ret; // nothing to process
5341 //////////////////////////////////////////////////////////////////
5342 //////////////////////////////////////////////////////////////////
5343 /////// Additional methods
5347 //////////////////////////////////////////////////////////////////
5348 // Get schema of operator output
5350 table_def *mrg_qpn::get_fields(){
5351 return(table_layout);
5354 table_def *watch_tbl_qpn::get_fields(){
5355 return(table_layout);
5359 table_def *spx_qpn::get_fields(){
5360 return(create_attributes(node_name, select_list));
5363 table_def *sgah_qpn::get_fields(){
5364 return(create_attributes(node_name, select_list));
5367 table_def *rsgah_qpn::get_fields(){
5368 return(create_attributes(node_name, select_list));
5371 table_def *sgahcwcb_qpn::get_fields(){
5372 return(create_attributes(node_name, select_list));
5375 table_def *filter_join_qpn::get_fields(){
5376 return(create_attributes(node_name, select_list));
5379 table_def *watch_join_qpn::get_fields(){
5380 return(create_attributes(node_name, select_list));
5383 table_def *join_eq_hash_qpn::get_fields(){
5386 // First, gather temporal colrefs and SEs.
5387 map<col_id, temporal_type> temporal_cids;
5388 vector<scalarexp_t *> temporal_se;
5389 for(h=0;h<temporal_eq.size();++h){
5390 scalarexp_t *sel = temporal_eq[h]->pr->get_left_se();
5391 scalarexp_t *ser = temporal_eq[h]->pr->get_right_se();
5393 if(sel->get_operator_type() == SE_COLREF){
5394 col_id tcol(sel->get_colref());
5395 if(temporal_cids.count(tcol) == 0){
5396 temporal_cids[tcol] = sel->get_data_type()->get_temporal();
5399 temporal_se.push_back(sel);
5402 if(ser->get_operator_type() == SE_COLREF){
5403 col_id tcol(ser->get_colref());
5404 if(temporal_cids.count(tcol) == 0){
5405 temporal_cids[tcol] = ser->get_data_type()->get_temporal();
5408 temporal_se.push_back(ser);
5412 // Mark select elements as nontemporal, then deduce which
5413 // ones are temporal.
5414 for(s=0;s<select_list.size();++s){
5415 select_list[s]->se->get_data_type()->set_temporal(
5416 compute_se_temporal(select_list[s]->se, temporal_cids)
5418 // Second chance if it is an exact match to an SE.
5419 // for(s=0;s<select_list.size();++s){
5420 if(! select_list[s]->se->get_data_type()->is_temporal() ){
5421 for(t=0;t<temporal_se.size();++t){
5422 if(is_equivalent_se(temporal_se[t], select_list[s]->se)){
5423 select_list[s]->se->get_data_type()->set_temporal(
5424 temporal_se[t]->get_data_type()->get_temporal()
5432 // If there is an outer join, verify that
5433 // the temporal attributes are actually temporal.
5434 // NOTE: this code must be synchronized with the
5435 // equivalence finding in join_eq_hash_qpn::generate_functor
5436 // (and also, the join_eq_hash_qpn constructor)
5437 if(from[0]->get_property() || from[1]->get_property()){
5438 set<string> l_equiv, r_equiv;
5439 for(i=0;i<temporal_eq.size();i++){
5440 scalarexp_t *lse = temporal_eq[i]->pr->get_left_se();
5441 scalarexp_t *rse = temporal_eq[i]->pr->get_right_se();
5442 if(lse->get_operator_type()==SE_COLREF){
5443 l_equiv.insert(lse->get_colref()->get_field());
5445 if(rse->get_operator_type()==SE_COLREF){
5446 r_equiv.insert(rse->get_colref()->get_field());
5450 for(s=0;s<select_list.size();++s){
5451 if(select_list[s]->se->get_data_type()->is_temporal()){
5453 col_id_set::iterator ci;
5454 bool failed = false;
5455 gather_se_col_ids(select_list[s]->se,cid_set, NULL);
5456 for(ci=cid_set.begin();ci!=cid_set.end();++ci){
5457 if((*ci).tblvar_ref == 0){
5458 if(from[0]->get_property()){
5459 if(l_equiv.count((*ci).field) == 0){
5464 if(from[1]->get_property()){
5465 if(r_equiv.count((*ci).field) == 0){
5472 select_list[s]->se->get_data_type()->reset_temporal();
5479 return create_attributes(node_name, select_list);
5483 //-----------------------------------------------------------------
5484 // get output "keys"
5485 // This is a guess about the set of fields which are a key
5486 // Use as metadata output, e.g. in qtree.xml
5490 // refs to GB attribtues are keys, if a SE is not a GB colref
5491 // but refers to a GB colref (outside of an aggregation)
5492 // then set partial_keys to true
5493 vector<string> sgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5494 vector<string> keys;
5497 for(int i=0; i<gb_tbl.size();++i)
5500 for(int s=0;s<select_list.size();++s){
5501 if(select_list[s]->se->is_gb()){
5502 keys.push_back(select_list[s]->name);
5504 if(contains_gb_se(select_list[s]->se, gref_set)){
5505 partial_keys.push_back(select_list[s]->name);
5512 vector<string> rsgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5513 vector<string> keys;
5516 for(int i=0; i<gb_tbl.size();++i)
5519 for(int s=0;s<select_list.size();++s){
5520 if(select_list[s]->se->is_gb()){
5521 keys.push_back(select_list[s]->name);
5523 if(contains_gb_se(select_list[s]->se, gref_set)){
5524 partial_keys.push_back(select_list[s]->name);
5535 //-----------------------------------------------------------------
5536 // get output tables
5539 // Get tablevar_t names of input and output tables
5541 // output_file_qpn::output_file_qpn(){source_op_name = ""; }
5542 vector<tablevar_t *> output_file_qpn::get_input_tbls(){
5546 vector<tablevar_t *> watch_tbl_qpn::get_input_tbls(){
5547 vector<tablevar_t *> ret;
5551 vector<tablevar_t *> mrg_qpn::get_input_tbls(){
5555 vector<tablevar_t *> spx_qpn::get_input_tbls(){
5556 vector<tablevar_t *> retval(1,table_name);
5560 vector<tablevar_t *> sgah_qpn::get_input_tbls(){
5561 vector<tablevar_t *> retval(1,table_name);
5565 vector<tablevar_t *> rsgah_qpn::get_input_tbls(){
5566 vector<tablevar_t *> retval(1,table_name);
5570 vector<tablevar_t *> sgahcwcb_qpn::get_input_tbls(){
5571 vector<tablevar_t *> retval(1,table_name);
5575 vector<tablevar_t *> join_eq_hash_qpn::get_input_tbls(){
5579 vector<tablevar_t *> filter_join_qpn::get_input_tbls(){
5583 vector<tablevar_t *> watch_join_qpn::get_input_tbls(){
5587 //-----------------------------------------------------------------
5588 // get output tables
5591 // This does not make sense, this fcn returns the output table *name*,
5592 // not its schema, and then there is another fcn to rturn the schema.
5593 vector<tablevar_t *> output_file_qpn::get_output_tbls(){
5594 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5598 vector<tablevar_t *> watch_tbl_qpn::get_output_tbls(){
5599 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5603 vector<tablevar_t *> mrg_qpn::get_output_tbls(){
5604 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5608 vector<tablevar_t *> spx_qpn::get_output_tbls(){
5609 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5613 vector<tablevar_t *> sgah_qpn::get_output_tbls(){
5614 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5618 vector<tablevar_t *> rsgah_qpn::get_output_tbls(){
5619 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5623 vector<tablevar_t *> sgahcwcb_qpn::get_output_tbls(){
5624 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5628 vector<tablevar_t *> join_eq_hash_qpn::get_output_tbls(){
5629 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5633 vector<tablevar_t *> filter_join_qpn::get_output_tbls(){
5634 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5639 vector<tablevar_t *> watch_join_qpn::get_output_tbls(){
5640 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5646 //-----------------------------------------------------------------
5649 // Associate colrefs with this schema.
5650 // Also, use this opportunity to create table_layout (the output schema).
5651 // If the output schema is ever needed before
5652 void mrg_qpn::bind_to_schema(table_list *Schema){
5654 for(t=0;t<fm.size();++t){
5655 int tblref = Schema->get_table_ref(fm[t]->get_schema_name());
5657 fm[t]->set_schema_ref(tblref );
5660 // Here I assume that the colrefs have been reorderd
5661 // during analysis so that mvars line up with fm.
5662 mvars[0]->set_schema_ref(fm[0]->get_schema_ref());
5663 mvars[1]->set_schema_ref(fm[1]->get_schema_ref());
5670 // Associate colrefs in SEs with this schema.
5671 void spx_qpn::bind_to_schema(table_list *Schema){
5672 // Bind the tablevars in the From clause to the Schema
5673 // (it might have changed from analysis time)
5674 int t = Schema->get_table_ref(table_name->get_schema_name() );
5676 table_name->set_schema_ref(t );
5678 // Get the "from" clause
5679 tablevar_list_t fm(table_name);
5681 // Bind all SEs to this schema
5683 for(p=0;p<where.size();++p){
5684 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5687 for(s=0;s<select_list.size();++s){
5688 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5691 // Collect set of tuples referenced in this HFTA
5692 // input, internal, or output.
5696 col_id_set spx_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5697 col_id_set retval, tmp_cset;
5699 for(p=0;p<where.size();++p){
5700 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5703 for(s=0;s<select_list.size();++s){
5704 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5706 col_id_set::iterator cisi;
5708 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5709 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5710 if(fe->get_unpack_fcns().size()>0)
5711 retval.insert((*cisi));
5719 col_id_set filter_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5720 col_id_set retval, tmp_cset;
5722 for(p=0;p<where.size();++p){
5723 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5726 for(s=0;s<select_list.size();++s){
5727 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5729 col_id_set::iterator cisi;
5731 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5732 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5733 if(fe->get_unpack_fcns().size()>0)
5734 retval.insert((*cisi));
5742 col_id_set watch_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5743 col_id_set retval, tmp_cset;
5745 for(p=0;p<where.size();++p){
5746 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5749 for(s=0;s<select_list.size();++s){
5750 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5752 col_id_set::iterator cisi;
5754 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5755 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5756 if(fe->get_unpack_fcns().size()>0)
5757 retval.insert((*cisi));
5768 // Associate colrefs in SEs with this schema.
5769 void join_eq_hash_qpn::bind_to_schema(table_list *Schema){
5770 // Bind the tablevars in the From clause to the Schema
5771 // (it might have changed from analysis time)
5773 for(f=0;f<from.size();++f){
5774 string snm = from[f]->get_schema_name();
5775 int tbl_ref = Schema->get_table_ref(snm);
5777 from[f]->set_schema_ref(tbl_ref);
5780 // Bind all SEs to this schema
5781 tablevar_list_t fm(from);
5784 for(p=0;p<where.size();++p){
5785 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5788 for(s=0;s<select_list.size();++s){
5789 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5792 // Collect set of tuples referenced in this HFTA
5793 // input, internal, or output.
5797 void filter_join_qpn::bind_to_schema(table_list *Schema){
5798 // Bind the tablevars in the From clause to the Schema
5799 // (it might have changed from analysis time)
5801 for(f=0;f<from.size();++f){
5802 string snm = from[f]->get_schema_name();
5803 int tbl_ref = Schema->get_table_ref(snm);
5805 from[f]->set_schema_ref(tbl_ref);
5808 // Bind all SEs to this schema
5809 tablevar_list_t fm(from);
5812 for(p=0;p<where.size();++p){
5813 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5816 for(s=0;s<select_list.size();++s){
5817 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5820 // Collect set of tuples referenced in this HFTA
5821 // input, internal, or output.
5825 void watch_join_qpn::bind_to_schema(table_list *Schema){
5826 // Bind the tablevars in the From clause to the Schema
5827 // (it might have changed from analysis time)
5829 for(f=0;f<from.size();++f){
5830 string snm = from[f]->get_schema_name();
5831 int tbl_ref = Schema->get_table_ref(snm);
5833 from[f]->set_schema_ref(tbl_ref);
5836 // Bind all SEs to this schema
5837 tablevar_list_t fm(from);
5840 for(p=0;p<where.size();++p){
5841 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5844 for(s=0;s<select_list.size();++s){
5845 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5848 // Collect set of tuples referenced in this HFTA
5849 // input, internal, or output.
5857 void sgah_qpn::bind_to_schema(table_list *Schema){
5858 // Bind the tablevars in the From clause to the Schema
5859 // (it might have changed from analysis time)
5862 int t = Schema->get_table_ref(table_name->get_schema_name() );
5864 table_name->set_schema_ref(t );
5866 // Get the "from" clause
5867 tablevar_list_t fm(table_name);
5871 // Bind all SEs to this schema
5873 for(p=0;p<where.size();++p){
5874 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5876 for(p=0;p<having.size();++p){
5877 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5880 for(s=0;s<select_list.size();++s){
5881 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5884 for(g=0;g<gb_tbl.size();++g){
5885 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5888 for(a=0;a<aggr_tbl.size();++a){
5889 if(aggr_tbl.is_builtin(a)){
5890 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5892 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5894 for(o=0;o<opl.size();++o){
5895 bind_to_schema_se(opl[o],&fm,Schema);
5901 col_id_set sgah_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5902 col_id_set retval, tmp_cset;
5904 for(p=0;p<where.size();++p){
5905 gather_pr_col_ids(where[p]->pr, tmp_cset, &gb_tbl);
5908 for(g=0;g<gb_tbl.size();++g){
5909 gather_se_col_ids(gb_tbl.get_def(g), tmp_cset, &gb_tbl);
5912 for(a=0;a<aggr_tbl.size();++a){
5913 if(aggr_tbl.is_builtin(a)){
5914 gather_se_col_ids(aggr_tbl.get_aggr_se(a), tmp_cset, &gb_tbl);
5916 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5918 for(o=0;o<opl.size();++o){
5919 gather_se_col_ids(opl[o], tmp_cset, &gb_tbl);
5924 col_id_set::iterator cisi;
5926 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5927 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5928 if(fe->get_unpack_fcns().size()>0)
5929 retval.insert((*cisi));
5938 void rsgah_qpn::bind_to_schema(table_list *Schema){
5939 // Bind the tablevars in the From clause to the Schema
5940 // (it might have changed from analysis time)
5941 int t = Schema->get_table_ref(table_name->get_schema_name() );
5943 table_name->set_schema_ref(t );
5945 // Get the "from" clause
5946 tablevar_list_t fm(table_name);
5948 // Bind all SEs to this schema
5950 for(p=0;p<where.size();++p){
5951 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5953 for(p=0;p<having.size();++p){
5954 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5956 for(p=0;p<closing_when.size();++p){
5957 bind_to_schema_pr(closing_when[p]->pr, &fm, Schema);
5960 for(s=0;s<select_list.size();++s){
5961 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5964 for(g=0;g<gb_tbl.size();++g){
5965 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5968 for(a=0;a<aggr_tbl.size();++a){
5969 if(aggr_tbl.is_builtin(a)){
5970 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5972 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5974 for(o=0;o<opl.size();++o){
5975 bind_to_schema_se(opl[o],&fm,Schema);
5982 void sgahcwcb_qpn::bind_to_schema(table_list *Schema){
5983 // Bind the tablevars in the From clause to the Schema
5984 // (it might have changed from analysis time)
5985 int t = Schema->get_table_ref(table_name->get_schema_name() );
5987 table_name->set_schema_ref(t );
5989 // Get the "from" clause
5990 tablevar_list_t fm(table_name);
5992 // Bind all SEs to this schema
5994 for(p=0;p<where.size();++p){
5995 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5997 for(p=0;p<having.size();++p){
5998 bind_to_schema_pr(having[p]->pr, &fm, Schema);
6000 for(p=0;p<having.size();++p){
6001 bind_to_schema_pr(cleanby[p]->pr, &fm, Schema);
6003 for(p=0;p<having.size();++p){
6004 bind_to_schema_pr(cleanwhen[p]->pr, &fm, Schema);
6007 for(s=0;s<select_list.size();++s){
6008 bind_to_schema_se(select_list[s]->se, &fm, Schema);
6011 for(g=0;g<gb_tbl.size();++g){
6012 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
6015 for(a=0;a<aggr_tbl.size();++a){
6016 if(aggr_tbl.is_builtin(a)){
6017 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
6019 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
6021 for(o=0;o<opl.size();++o){
6022 bind_to_schema_se(opl[o],&fm,Schema);
6033 ///////////////////////////////////////////////////////////////
6034 ///////////////////////////////////////////////////////////////
6035 /// Functions for code generation.
6038 //-----------------------------------------------------------------
6041 cplx_lit_table *watch_tbl_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6042 return(new cplx_lit_table());
6045 cplx_lit_table *mrg_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6046 return(new cplx_lit_table());
6049 cplx_lit_table *spx_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6051 cplx_lit_table *complex_literals = new cplx_lit_table();
6053 for(i=0;i<select_list.size();i++){
6054 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6056 for(i=0;i<where.size();++i){
6057 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6060 return(complex_literals);
6063 cplx_lit_table *sgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6065 cplx_lit_table *complex_literals = new cplx_lit_table();
6067 for(i=0;i<aggr_tbl.size();++i){
6068 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6069 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6071 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6072 for(j=0;j<opl.size();++j)
6073 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6077 for(i=0;i<select_list.size();i++){
6078 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6080 for(i=0;i<gb_tbl.size();i++){
6081 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6083 for(i=0;i<where.size();++i){
6084 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6086 for(i=0;i<having.size();++i){
6087 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6090 return(complex_literals);
6094 cplx_lit_table *rsgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6096 cplx_lit_table *complex_literals = new cplx_lit_table();
6098 for(i=0;i<aggr_tbl.size();++i){
6099 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6100 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6102 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6103 for(j=0;j<opl.size();++j)
6104 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6108 for(i=0;i<select_list.size();i++){
6109 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6111 for(i=0;i<gb_tbl.size();i++){
6112 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6114 for(i=0;i<where.size();++i){
6115 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6117 for(i=0;i<having.size();++i){
6118 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6120 for(i=0;i<closing_when.size();++i){
6121 find_complex_literal_pr(closing_when[i]->pr,Ext_fcns, complex_literals);
6124 return(complex_literals);
6128 cplx_lit_table *sgahcwcb_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6130 cplx_lit_table *complex_literals = new cplx_lit_table();
6132 for(i=0;i<aggr_tbl.size();++i){
6133 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6134 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6136 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6137 for(j=0;j<opl.size();++j)
6138 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6142 for(i=0;i<select_list.size();i++){
6143 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6145 for(i=0;i<gb_tbl.size();i++){
6146 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6148 for(i=0;i<where.size();++i){
6149 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6151 for(i=0;i<having.size();++i){
6152 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6154 for(i=0;i<cleanwhen.size();++i){
6155 find_complex_literal_pr(cleanwhen[i]->pr,Ext_fcns, complex_literals);
6157 for(i=0;i<cleanby.size();++i){
6158 find_complex_literal_pr(cleanby[i]->pr,Ext_fcns, complex_literals);
6161 return(complex_literals);
6164 cplx_lit_table *join_eq_hash_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6166 cplx_lit_table *complex_literals = new cplx_lit_table();
6168 for(i=0;i<select_list.size();i++){
6169 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6171 for(i=0;i<where.size();++i){
6172 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6175 return(complex_literals);
6178 cplx_lit_table *filter_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6180 cplx_lit_table *complex_literals = new cplx_lit_table();
6182 for(i=0;i<select_list.size();i++){
6183 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6185 for(i=0;i<where.size();++i){
6186 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6189 return(complex_literals);
6192 cplx_lit_table *watch_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6194 cplx_lit_table *complex_literals = new cplx_lit_table();
6196 for(i=0;i<select_list.size();i++){
6197 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6199 for(i=0;i<where.size();++i){
6200 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6203 return(complex_literals);
6210 //-----------------------------------------------------------------
6211 // get_handle_param_tbl
6213 vector<handle_param_tbl_entry *> watch_tbl_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6214 vector<handle_param_tbl_entry *> retval;
6218 vector<handle_param_tbl_entry *> mrg_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6219 vector<handle_param_tbl_entry *> retval;
6224 vector<handle_param_tbl_entry *> spx_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6226 vector<handle_param_tbl_entry *> retval;
6228 for(i=0;i<select_list.size();i++){
6229 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6231 for(i=0;i<where.size();++i){
6232 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6239 vector<handle_param_tbl_entry *> sgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6241 vector<handle_param_tbl_entry *> retval;
6244 for(i=0;i<aggr_tbl.size();++i){
6245 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6246 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6248 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6249 for(j=0;j<opl.size();++j)
6250 find_param_handles_se(opl[j], Ext_fcns, retval);
6253 for(i=0;i<select_list.size();i++){
6254 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6256 for(i=0;i<gb_tbl.size();i++){
6257 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6259 for(i=0;i<where.size();++i){
6260 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6262 for(i=0;i<having.size();++i){
6263 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6270 vector<handle_param_tbl_entry *> rsgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6272 vector<handle_param_tbl_entry *> retval;
6275 for(i=0;i<aggr_tbl.size();++i){
6276 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6277 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6279 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6280 for(j=0;j<opl.size();++j)
6281 find_param_handles_se(opl[j], Ext_fcns, retval);
6284 for(i=0;i<select_list.size();i++){
6285 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6287 for(i=0;i<gb_tbl.size();i++){
6288 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6290 for(i=0;i<where.size();++i){
6291 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6293 for(i=0;i<having.size();++i){
6294 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6296 for(i=0;i<closing_when.size();++i){
6297 find_param_handles_pr(closing_when[i]->pr,Ext_fcns, retval);
6304 vector<handle_param_tbl_entry *> sgahcwcb_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6306 vector<handle_param_tbl_entry *> retval;
6309 for(i=0;i<aggr_tbl.size();++i){
6310 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6311 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6313 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6314 for(j=0;j<opl.size();++j)
6315 find_param_handles_se(opl[j], Ext_fcns, retval);
6318 for(i=0;i<select_list.size();i++){
6319 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6321 for(i=0;i<gb_tbl.size();i++){
6322 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6324 for(i=0;i<where.size();++i){
6325 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6327 for(i=0;i<having.size();++i){
6328 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6330 for(i=0;i<cleanwhen.size();++i){
6331 find_param_handles_pr(cleanwhen[i]->pr,Ext_fcns, retval);
6333 for(i=0;i<cleanby.size();++i){
6334 find_param_handles_pr(cleanby[i]->pr,Ext_fcns, retval);
6340 vector<handle_param_tbl_entry *> join_eq_hash_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6342 vector<handle_param_tbl_entry *> retval;
6344 for(i=0;i<select_list.size();i++){
6345 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6347 for(i=0;i<where.size();++i){
6348 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6355 vector<handle_param_tbl_entry *> filter_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6357 vector<handle_param_tbl_entry *> retval;
6359 for(i=0;i<select_list.size();i++){
6360 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6362 for(i=0;i<where.size();++i){
6363 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6369 vector<handle_param_tbl_entry *> watch_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6371 vector<handle_param_tbl_entry *> retval;
6373 for(i=0;i<select_list.size();i++){
6374 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6376 for(i=0;i<where.size();++i){
6377 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6385 ///////////////////////////////////////////////////////////////
6386 ///////////////////////////////////////////////////////////////
6387 /// Functions for operator output rates estimations
6390 //-----------------------------------------------------------------
6391 // get_rate_estimate
6393 double spx_qpn::get_rate_estimate() {
6395 // dummy method for now
6396 return SPX_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6399 double sgah_qpn::get_rate_estimate() {
6401 // dummy method for now
6402 return SGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6405 double rsgah_qpn::get_rate_estimate() {
6407 // dummy method for now
6408 return RSGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6411 double sgahcwcb_qpn::get_rate_estimate() {
6413 // dummy method for now
6414 return SGAHCWCB_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6417 double watch_tbl_qpn::get_rate_estimate() {
6419 // dummy method for now
6420 return DEFAULT_INTERFACE_RATE;
6423 double mrg_qpn::get_rate_estimate() {
6425 // dummy method for now
6426 return MRG_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6429 double join_eq_hash_qpn::get_rate_estimate() {
6431 // dummy method for now
6432 return JOIN_EQ_HASH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6436 //////////////////////////////////////////////////////////////////////////////
6437 //////////////////////////////////////////////////////////////////////////////
6438 ///// Generate functors
6443 //-------------------------------------------------------------------------
6444 // Code generation utilities.
6445 //-------------------------------------------------------------------------
6447 // Globals referenced by generate utilities
6449 static gb_table *segen_gb_tbl; // Table of all group-by attributes.
6453 // Generate code that makes reference
6454 // to the tuple, and not to any aggregates.
6455 // NEW : it might reference a stateful function.
6456 static string generate_se_code(scalarexp_t *se,table_list *schema){
6458 data_type *ldt, *rdt;
6460 vector<scalarexp_t *> operands;
6463 switch(se->get_operator_type()){
6465 if(se->is_handle_ref()){
6466 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6470 if(se->get_literal()->is_cpx_lit()){
6471 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6475 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6477 if(se->is_handle_ref()){
6478 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6482 ret.append("param_");
6483 ret.append(se->get_param_name());
6486 ldt = se->get_left_se()->get_data_type();
6487 if(ldt->complex_operator(se->get_op()) ){
6488 ret.append( ldt->get_complex_operator(se->get_op()) );
6490 ret.append(generate_se_code(se->get_left_se(),schema));
6494 ret.append(se->get_op());
6495 ret.append(generate_se_code(se->get_left_se(),schema));
6500 ldt = se->get_left_se()->get_data_type();
6501 rdt = se->get_right_se()->get_data_type();
6503 if(ldt->complex_operator(rdt, se->get_op()) ){
6504 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6506 ret.append(generate_se_code(se->get_left_se(),schema));
6508 ret.append(generate_se_code(se->get_right_se(),schema));
6512 ret.append(generate_se_code(se->get_left_se(),schema));
6513 ret.append(se->get_op());
6514 ret.append(generate_se_code(se->get_right_se(),schema));
6519 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6520 // so return the defining code.
6521 int gref = se->get_gb_ref();
6522 scalarexp_t *gdef_se = segen_gb_tbl->get_def(gref);
6523 ret = generate_se_code(gdef_se, schema );
6526 sprintf(tmpstr,"unpack_var_%s_%d",
6527 se->get_colref()->get_field().c_str(), se->get_colref()->get_tablevar_ref() );
6532 if(se->is_partial()){
6533 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6536 ret += se->op + "(";
6537 operands = se->get_operands();
6538 bool first_elem = true;
6539 if(se->get_storage_state() != ""){
6540 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6543 for(o=0;o<operands.size();o++){
6544 if(first_elem) first_elem=false; else ret += ", ";
6545 if(operands[o]->get_data_type()->is_buffer_type() &&
6546 (! (operands[o]->is_handle_ref()) ) )
6548 ret += generate_se_code(operands[o], schema);
6554 fprintf(stderr,"INTERNAL ERROR in generate_se_code (hfta), line %d, character %d: unknown operator type %d\n",
6555 se->get_lineno(), se->get_charno(),se->get_operator_type());
6556 return("ERROR in generate_se_code");
6560 // generate code that refers only to aggregate data and constants.
6561 // NEW : modified to handle superaggregates and stateful fcn refs.
6562 // Assume that the state is in *stval
6563 static string generate_se_code_fm_aggr(scalarexp_t *se, string gbvar, string aggvar, table_list *schema){
6566 data_type *ldt, *rdt;
6568 vector<scalarexp_t *> operands;
6571 switch(se->get_operator_type()){
6573 if(se->is_handle_ref()){
6574 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6578 if(se->get_literal()->is_cpx_lit()){
6579 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6583 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6585 if(se->is_handle_ref()){
6586 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6590 ret.append("param_");
6591 ret.append(se->get_param_name());
6594 ldt = se->get_left_se()->get_data_type();
6595 if(ldt->complex_operator(se->get_op()) ){
6596 ret.append( ldt->get_complex_operator(se->get_op()) );
6598 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6602 ret.append(se->get_op());
6603 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6608 ldt = se->get_left_se()->get_data_type();
6609 rdt = se->get_right_se()->get_data_type();
6611 if(ldt->complex_operator(rdt, se->get_op()) ){
6612 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6614 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6616 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6620 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6621 ret.append(se->get_op());
6622 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6627 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6628 // so return the defining code.
6629 sprintf(tmpstr,"%s%d",gbvar.c_str(),se->get_gb_ref());
6633 fprintf(stderr,"ERROR reference to non-GB column ref not permitted here,"
6634 "error in query_plan.cc:generate_se_code_fm_aggr, line %d, character %d.\n",
6635 se->get_lineno(), se->get_charno());
6641 if(se->is_superaggr()){
6642 sprintf(tmpstr,"stval->aggr_var%d",se->get_aggr_ref());
6644 sprintf(tmpstr,"%saggr_var%d",aggvar.c_str(),se->get_aggr_ref());
6650 if(se->get_aggr_ref() >= 0){
6651 sprintf(tmpstr,"udaf_ret_%d",se->get_aggr_ref());
6656 if(se->is_partial()){
6657 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6660 ret += se->op + "(";
6661 bool first_elem = true;
6662 if(se->get_storage_state() != ""){
6663 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6666 operands = se->get_operands();
6667 for(o=0;o<operands.size();o++){
6668 if(first_elem) first_elem=false; else ret += ", ";
6669 if(operands[o]->get_data_type()->is_buffer_type() &&
6670 (! (operands[o]->is_handle_ref()) ) )
6672 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6678 fprintf(stderr,"INTERNAL ERROR in query_plan.cc::generate_se_code_fm_aggr, line %d, character %d: unknown operator type %d\n",
6679 se->get_lineno(), se->get_charno(),se->get_operator_type());
6680 return("ERROR in generate_se_code_fm_aggr");
6686 static string unpack_partial_fcn_fm_aggr(scalarexp_t *se, int pfn_id, string gbvar, string aggvar, table_list *schema){
6689 vector<scalarexp_t *> operands;
6692 if(se->get_operator_type() != SE_FUNC){
6693 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn_fm_aggr. line %d, character %d\n",
6694 se->get_lineno(), se->get_charno());
6695 return("ERROR in unpack_partial_fcn_fm_aggr");
6698 ret = "\tretval = " + se->get_op() + "( ",
6699 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6702 if(se->get_storage_state() != ""){
6703 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6706 operands = se->get_operands();
6707 for(o=0;o<operands.size();o++){
6709 if(operands[o]->get_data_type()->is_buffer_type() &&
6710 (! (operands[o]->is_handle_ref()) ) )
6712 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6720 static string unpack_partial_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6723 vector<scalarexp_t *> operands;
6725 if(se->get_operator_type() != SE_FUNC){
6726 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn. line %d, character %d\n",
6727 se->get_lineno(), se->get_charno());
6728 return("ERROR in unpack_partial_fcn");
6731 ret = "\tretval = " + se->get_op() + "( ",
6732 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6735 if(se->get_storage_state() != ""){
6736 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6739 operands = se->get_operands();
6740 for(o=0;o<operands.size();o++){
6742 if(operands[o]->get_data_type()->is_buffer_type() &&
6743 (! (operands[o]->is_handle_ref()) ) )
6745 ret += generate_se_code(operands[o], schema);
6752 static string generate_cached_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6755 vector<scalarexp_t *> operands;
6757 if(se->get_operator_type() != SE_FUNC){
6758 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to generate_cached_fcn. line %d, character %d\n",
6759 se->get_lineno(), se->get_charno());
6760 return("ERROR in generate_cached_fcn");
6763 ret = se->get_op()+"(";
6765 if(se->get_storage_state() != ""){
6766 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd,";
6769 operands = se->get_operands();
6770 for(o=0;o<operands.size();o++){
6772 if(operands[o]->get_data_type()->is_buffer_type() &&
6773 (! (operands[o]->is_handle_ref()) ) )
6775 ret += generate_se_code(operands[o], schema);
6786 static string generate_C_comparison_op(string op){
6787 if(op == "=") return("==");
6788 if(op == "<>") return("!=");
6792 static string generate_C_boolean_op(string op){
6793 if( (op == "AND") || (op == "And") || (op == "and") ){
6796 if( (op == "OR") || (op == "Or") || (op == "or") ){
6799 if( (op == "NOT") || (op == "Not") || (op == "not") ){
6803 return("ERROR UNKNOWN BOOLEAN OPERATOR");
6807 static string generate_predicate_code(predicate_t *pr,table_list *schema){
6809 vector<literal_t *> litv;
6811 data_type *ldt, *rdt;
6812 vector<scalarexp_t *> op_list;
6815 switch(pr->get_operator_type()){
6817 ldt = pr->get_left_se()->get_data_type();
6820 litv = pr->get_lit_vec();
6821 for(i=0;i<litv.size();i++){
6822 if(i>0) ret.append(" || ");
6825 if(ldt->complex_comparison(ldt) ){
6826 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6828 if(ldt->is_buffer_type() )
6830 ret.append(generate_se_code(pr->get_left_se(), schema));
6832 if(ldt->is_buffer_type() )
6834 if(litv[i]->is_cpx_lit()){
6835 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6838 ret.append(litv[i]->to_C_code(""));
6840 ret.append(") == 0");
6842 ret.append(generate_se_code(pr->get_left_se(), schema));
6844 ret.append(litv[i]->to_hfta_C_code(""));
6853 ldt = pr->get_left_se()->get_data_type();
6854 rdt = pr->get_right_se()->get_data_type();
6857 if(ldt->complex_comparison(rdt) ){
6858 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6860 if(ldt->is_buffer_type() )
6862 ret.append(generate_se_code(pr->get_left_se(),schema) );
6864 if(rdt->is_buffer_type() )
6866 ret.append(generate_se_code(pr->get_right_se(),schema) );
6868 ret.append( generate_C_comparison_op(pr->get_op()));
6871 ret.append(generate_se_code(pr->get_left_se(),schema) );
6872 ret.append( generate_C_comparison_op(pr->get_op()));
6873 ret.append(generate_se_code(pr->get_right_se(),schema) );
6879 ret.append( generate_C_boolean_op(pr->get_op()) );
6880 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6883 case PRED_BINARY_OP:
6885 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6886 ret.append( generate_C_boolean_op(pr->get_op()) );
6887 ret.append(generate_predicate_code(pr->get_right_pr(),schema) );
6891 ret += pr->get_op() + "( ";
6892 op_list = pr->get_op_list();
6893 for(o=0;o<op_list.size();++o){
6894 if(o>0) ret += ", ";
6895 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6897 ret += generate_se_code(op_list[o], schema);
6902 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6903 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6904 return("ERROR in generate_predicate_code");
6908 static string generate_predicate_code_fm_aggr(predicate_t *pr, string gbvar, string aggvar,table_list *schema){
6910 vector<literal_t *> litv;
6912 data_type *ldt, *rdt;
6913 vector<scalarexp_t *> op_list;
6916 switch(pr->get_operator_type()){
6918 ldt = pr->get_left_se()->get_data_type();
6921 litv = pr->get_lit_vec();
6922 for(i=0;i<litv.size();i++){
6923 if(i>0) ret.append(" || ");
6926 if(ldt->complex_comparison(ldt) ){
6927 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6929 if(ldt->is_buffer_type() )
6931 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6933 if(ldt->is_buffer_type() )
6935 if(litv[i]->is_cpx_lit()){
6936 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6939 ret.append(litv[i]->to_C_code(""));
6941 ret.append(") == 0");
6943 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6945 ret.append(litv[i]->to_hfta_C_code(""));
6954 ldt = pr->get_left_se()->get_data_type();
6955 rdt = pr->get_right_se()->get_data_type();
6958 if(ldt->complex_comparison(rdt) ){
6959 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6961 if(ldt->is_buffer_type() )
6963 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6965 if(rdt->is_buffer_type() )
6967 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6969 ret.append( generate_C_comparison_op(pr->get_op()));
6972 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6973 ret.append( generate_C_comparison_op(pr->get_op()));
6974 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6980 ret.append( generate_C_boolean_op(pr->get_op()) );
6981 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6984 case PRED_BINARY_OP:
6986 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6987 ret.append( generate_C_boolean_op(pr->get_op()) );
6988 ret.append(generate_predicate_code_fm_aggr(pr->get_right_pr(), gbvar, aggvar,schema) );
6992 ret += pr->get_op() + "( ";
6993 op_list = pr->get_op_list();
6994 for(o=0;o<op_list.size();++o){
6995 if(o>0) ret += ", ";
6996 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6998 ret += generate_se_code_fm_aggr(op_list[o], gbvar, aggvar, schema);
7003 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
7004 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
7005 return("ERROR in generate_predicate_code");
7013 static string generate_equality_test(string &lhs_op, string &rhs_op, data_type *dt){
7016 if(dt->complex_comparison(dt) ){
7017 ret.append(dt->get_hfta_comparison_fcn(dt));
7019 if(dt->is_buffer_type() )
7023 if(dt->is_buffer_type() )
7025 ret.append(rhs_op );
7026 ret.append(") == 0");
7028 ret.append(lhs_op );
7030 ret.append(rhs_op );
7036 static string generate_lt_test(string &lhs_op, string &rhs_op, data_type *dt){
7039 if(dt->complex_comparison(dt) ){
7040 ret.append(dt->get_hfta_comparison_fcn(dt));
7042 if(dt->is_buffer_type() )
7046 if(dt->is_buffer_type() )
7048 ret.append(rhs_op );
7049 ret.append(") == 1");
7051 ret.append(lhs_op );
7053 ret.append(rhs_op );
7059 static string generate_comparison(string &lhs_op, string &rhs_op, data_type *dt){
7062 if(dt->complex_comparison(dt) ){
7063 ret.append(dt->get_hfta_comparison_fcn(dt));
7065 if(dt->is_buffer_type() )
7069 if(dt->is_buffer_type() )
7071 ret.append(rhs_op );
7072 ret.append(") == 0");
7074 ret.append(lhs_op );
7076 ret.append(rhs_op );
7083 // Here I assume that only MIN and MAX aggregates can be computed
7084 // over BUFFER data types.
7086 static string generate_aggr_update(string var, aggregate_table *atbl,int aidx, table_list *schema){
7087 string retval = "\t\t";
7088 string op = atbl->get_op(aidx);
7091 if(! atbl->is_builtin(aidx)) {
7093 retval += op+"_HFTA_AGGR_UPDATE_(";
7094 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7095 retval+="("+var+")";
7096 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7097 for(o=0;o<opl.size();++o){{
7099 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7101 retval += generate_se_code(opl[o], schema);
7110 // builtin processing
7111 data_type *dt = atbl->get_data_type(aidx);
7115 retval.append("++;\n");
7120 retval.append(" += ");
7121 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7122 retval.append(";\n");
7126 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7127 retval += dt->make_host_cvar(tmpstr);
7129 retval += generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7130 if(dt->complex_comparison(dt)){
7131 if(dt->is_buffer_type())
7132 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7134 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7136 sprintf(tmpstr,"\t\tif(aggr_tmp_%d < %s)\n",aidx,var.c_str());
7138 retval.append(tmpstr);
7139 if(dt->is_buffer_type()){
7140 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7142 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7144 retval.append(tmpstr);
7149 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7150 retval+=dt->make_host_cvar(tmpstr);
7152 retval+=generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7153 if(dt->complex_comparison(dt)){
7154 if(dt->is_buffer_type())
7155 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7157 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7159 sprintf(tmpstr,"\t\tif(aggr_tmp_%d > %s)\n",aidx,var.c_str());
7161 retval.append(tmpstr);
7162 if(dt->is_buffer_type()){
7163 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7165 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7167 retval.append(tmpstr);
7172 if(op == "AND_AGGR"){
7174 retval.append(" &= ");
7175 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7176 retval.append(";\n");
7179 if(op == "OR_AGGR"){
7181 retval.append(" |= ");
7182 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7183 retval.append(";\n");
7186 if(op == "XOR_AGGR"){
7188 retval.append(" ^= ");
7189 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7190 retval.append(";\n");
7194 retval += var+"_sum += "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7195 retval += "\t\t"+var+"_cnt += 1;\n";
7196 retval += "\t\t"+var+" = "+var+"_sum / "+var+"_cnt;\n";
7200 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_update.\n",op.c_str());
7209 static string generate_superaggr_minus(string var, string supervar, aggregate_table *atbl,int aidx, table_list *schema){
7210 string retval = "\t\t";
7211 string op = atbl->get_op(aidx);
7214 if(! atbl->is_builtin(aidx)) {
7216 retval += op+"_HFTA_AGGR_MINUS_(";
7217 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7218 retval+="("+supervar+"),";
7219 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7220 retval+="("+var+");\n";
7226 if(op == "COUNT" || op == "SUM"){
7227 retval += supervar + "-=" +var + ";\n";
7231 if(op == "XOR_AGGR"){
7232 retval += supervar + "^=" +var + ";\n";
7236 if(op=="MIN" || op == "MAX")
7239 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_superaggr_minus.\n",op.c_str());
7248 static string generate_aggr_init(string var, aggregate_table *atbl,int aidx, table_list *schema){
7250 string op = atbl->get_op(aidx);
7253 if(! atbl->is_builtin(aidx)){
7255 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_INIT_(";
7256 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7257 retval+="("+var+"));\n";
7259 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_UPDATE_(";
7260 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7261 retval+="("+var+")";
7262 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7264 for(o=0;o<opl.size();++o){
7266 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7268 retval += generate_se_code(opl[o],schema);
7274 // builtin aggregate processing
7275 data_type *dt = atbl->get_data_type(aidx);
7279 retval.append(" = 1;\n");
7283 if(op == "SUM" || op == "MIN" || op == "MAX" || op == "AND_AGGR" ||
7284 op=="AVG" || op == "OR_AGGR" || op == "XOR_AGGR"){
7285 if(dt->is_buffer_type()){
7286 sprintf(tmpstr,"\t\taggr_tmp_%d = %s;\n",aidx,generate_se_code(atbl->get_aggr_se(aidx), schema ).c_str() );
7287 retval.append(tmpstr);
7288 sprintf(tmpstr,"\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_assign_copy().c_str(),var.c_str(),aidx);
7289 retval.append(tmpstr);
7292 retval += var+"_sum = "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7293 retval += "\t"+var+"_cnt = 1;\n";
7294 retval += "\t"+var+" = "+var+"_sum;\n";
7298 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema));
7299 retval.append(";\n");
7305 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_init.\n",op.c_str());
7313 static string generate_aggr_reinitialize(string var, aggregate_table *atbl,int aidx, table_list *schema){
7315 string op = atbl->get_op(aidx);
7318 if(! atbl->is_builtin(aidx)){
7320 retval += "\t"+atbl->get_op(aidx);
7321 if(atbl->is_running_aggr(aidx)){
7322 retval += "_HFTA_AGGR_REINIT_(";
7324 retval += "_HFTA_AGGR_INIT_(";
7326 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7327 retval+="("+var+"));\n";
7331 // builtin aggregate processing
7332 data_type *dt = atbl->get_data_type(aidx);
7336 retval.append(" = 0;\n");
7340 if(op == "SUM" || op == "AND_AGGR" ||
7341 op == "OR_AGGR" || op == "XOR_AGGR"){
7342 if(dt->is_buffer_type()){
7343 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7347 literal_t l(dt->type_indicator());
7348 retval.append(l.to_string());
7349 retval.append(";\n");
7355 if(dt->is_buffer_type()){
7356 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7360 retval.append(dt->get_max_literal());
7361 retval.append(";\n");
7367 if(dt->is_buffer_type()){
7368 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7372 retval.append(dt->get_min_literal());
7373 retval.append(";\n");
7378 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_aggr_reinitialize.\n",op.c_str());
7385 // Generate parameter holding vars from a param table.
7386 static string generate_param_vars(param_table *param_tbl){
7389 vector<string> param_vec = param_tbl->get_param_names();
7390 for(p=0;p<param_vec.size();p++){
7391 data_type *dt = param_tbl->get_data_type(param_vec[p]);
7392 sprintf(tmpstr,"param_%s;\n", param_vec[p].c_str());
7393 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7394 if(param_tbl->handle_access(param_vec[p])){
7395 ret += "\tstruct search_handle *param_handle_"+param_vec[p]+";\n";
7401 // Parameter manipulation routines
7402 static string generate_load_param_block(string functor_name,
7403 param_table *param_tbl,
7404 vector<handle_param_tbl_entry *> param_handle_table
7407 vector<string> param_names = param_tbl->get_param_names();
7409 string ret = "int load_params_"+functor_name+"(gs_int32_t sz, void *value){\n";
7410 ret.append("\tint pos=0;\n");
7411 ret.append("\tint data_pos;\n");
7413 for(p=0;p<param_names.size();p++){
7414 data_type *dt = param_tbl->get_data_type(param_names[p]);
7415 if(dt->is_buffer_type()){
7416 sprintf(tmpstr,"tmp_var_%s;\n", param_names[p].c_str());
7417 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7422 // Verify that the block is of minimum size
7423 if(param_names.size() > 0){
7424 ret += "//\tVerify that the value block is large enough */\n";
7425 ret.append("\n\tdata_pos = ");
7426 for(p=0;p<param_names.size();p++){
7427 if(p>0) ret.append(" + ");
7428 data_type *dt = param_tbl->get_data_type(param_names[p]);
7429 ret.append("sizeof( ");
7430 ret.append( dt->get_host_cvar_type() );
7434 ret.append("\tif(data_pos > sz) return 1;\n\n");
7437 ///////////////////////
7438 /// Verify that all strings can be unpacked.
7440 ret += "//\tVerify that the strings can be unpacked */\n";
7441 for(p=0;p<param_names.size();p++){
7442 data_type *dt = param_tbl->get_data_type(param_names[p]);
7443 if(dt->is_buffer_type()){
7444 sprintf(tmpstr,"\ttmp_var_%s = *( (%s *)((gs_sp_t )value+pos) );\n",param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7446 switch( dt->get_type() ){
7448 // ret += "\ttmp_var_"+param_names[p]+".offset = ntohl( tmp_var_"+param_names[p]+".offset );\n"; // ntoh conversion
7449 // ret += "\ttmp_var_"+param_names[p]+".length = ntohl( tmp_var_"+param_names[p]+".length );\n"; // ntoh conversion
7450 sprintf(tmpstr,"\tif( (int)(tmp_var_%s.offset) + tmp_var_%s.length > sz) return 1;\n",param_names[p].c_str(), param_names[p].c_str() );
7452 sprintf(tmpstr,"\ttmp_var_%s.offset = (gs_p_t)( (gs_sp_t )value + (gs_p_t)(tmp_var_%s.offset) );\n",param_names[p].c_str(), param_names[p].c_str() );
7456 fprintf(stderr,"ERROR: parameter %s is of type %s, a buffered type, but I don't know how to unpack it as a parameter.\n",param_names[p].c_str(), dt->to_string().c_str() );
7461 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7465 /////////////////////////
7467 ret += "/*\tThe block is OK, do the unpacking. */\n";
7468 ret += "\tpos = 0;\n";
7470 for(p=0;p<param_names.size();p++){
7471 data_type *dt = param_tbl->get_data_type(param_names[p]);
7472 if(dt->is_buffer_type()){
7473 sprintf(tmpstr,"\t%s(¶m_%s, &tmp_var_%s);\n", dt->get_hfta_buffer_assign_copy().c_str(),param_names[p].c_str(),param_names[p].c_str() );
7476 // if(dt->needs_hn_translation()){
7477 // sprintf(tmpstr,"\tparam_%s = %s( *( (%s *)( (gs_sp_t )value+pos) ) );\n",
7478 // param_names[p].c_str(), dt->ntoh_translation().c_str(), dt->get_host_cvar_type().c_str() );
7480 sprintf(tmpstr,"\tparam_%s = *( (%s *)( (gs_sp_t )value+pos) );\n",
7481 param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7485 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7488 // TODO: I think this method of handle registration is obsolete
7489 // and should be deleted.
7490 // some examination reveals that handle_access is always false.
7491 for(p=0;p<param_names.size();p++){
7492 if(param_tbl->handle_access(param_names[p]) ){
7493 data_type *pdt = param_tbl->get_data_type(param_names[p]);
7495 ret += "\tt->param_handle_"+param_names[p]+" = " +
7496 pdt->handle_registration_name() +
7497 "((struct FTA *)t, &(t->param_"+param_names[p]+"));\n";
7500 // Register the pass-by-handle parameters
7502 ret += "/* register the pass-by-handle parameters */\n";
7505 for(ph=0;ph<param_handle_table.size();++ph){
7506 data_type pdt(param_handle_table[ph]->type_name);
7507 switch(param_handle_table[ph]->val_type){
7513 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7515 if(pdt.is_buffer_type()) ret += "&(";
7516 ret += "param_"+param_handle_table[ph]->param_name;
7517 if(pdt.is_buffer_type()) ret += ")";
7521 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7527 ret += "\treturn(0);\n";
7528 ret.append("}\n\n");
7534 static string generate_delete_param_block(string functor_name,
7535 param_table *param_tbl,
7536 vector<handle_param_tbl_entry *> param_handle_table
7540 vector<string> param_names = param_tbl->get_param_names();
7542 string ret = "void destroy_params_"+functor_name+"(){\n";
7544 for(p=0;p<param_names.size();p++){
7545 data_type *dt = param_tbl->get_data_type(param_names[p]);
7546 if(dt->is_buffer_type()){
7547 sprintf(tmpstr,"\t\t%s(¶m_%s);\n",dt->get_hfta_buffer_destroy().c_str(),param_names[p].c_str());
7550 if(param_tbl->handle_access(param_names[p]) ){
7551 ret += "\t\t" + dt->get_handle_destructor() +
7552 "(t->param_handle_" + param_names[p] + ");\n";
7556 ret += "//\t\tDeregister handles.\n";
7558 for(ph=0;ph<param_handle_table.size();++ph){
7559 if(param_handle_table[ph]->val_type == param_e){
7560 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7561 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7570 // ---------------------------------------------------------------------
7571 // functions for creating functor variables.
7573 static string generate_access_vars(col_id_set &cid_set, table_list *schema){
7575 col_id_set::iterator csi;
7577 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7578 int schref = (*csi).schema_ref;
7579 int tblref = (*csi).tblvar_ref;
7580 string field = (*csi).field;
7581 data_type dt(schema->get_type_name(schref,field));
7582 sprintf(tmpstr,"unpack_var_%s_%d", field.c_str(), tblref);
7583 ret+="\t"+dt.make_host_cvar(tmpstr)+";\n";
7584 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", field.c_str(), tblref);
7590 static string generate_partial_fcn_vars(vector<scalarexp_t *> &partial_fcns,
7591 vector<int> &ref_cnt, vector<bool> &is_partial, bool gen_fcn_cache){
7596 for(p=0;p<partial_fcns.size();++p){
7597 if(!gen_fcn_cache || is_partial[p] || ref_cnt[p]>1){
7598 sprintf(tmpstr,"partial_fcn_result_%d", p);
7599 ret+="\t"+partial_fcns[p]->get_data_type()->make_host_cvar(tmpstr)+";\n";
7600 if(gen_fcn_cache && ref_cnt[p]>1){
7601 ret+="\tint fcn_ref_cnt_"+int_to_string(p)+";\n";
7609 static string generate_complex_lit_vars(cplx_lit_table *complex_literals){
7612 for(cl=0;cl<complex_literals->size();cl++){
7613 literal_t *l = complex_literals->get_literal(cl);
7614 data_type *dtl = new data_type( l->get_type() );
7615 sprintf(tmpstr,"complex_literal_%d",cl);
7616 ret += "\t"+dtl->make_host_cvar(tmpstr)+";\n";
7617 if(complex_literals->is_handle_ref(cl)){
7618 sprintf(tmpstr,"\tstruct search_handle *lit_handle_%d;\n",cl);
7626 static string generate_pass_by_handle_vars(
7627 vector<handle_param_tbl_entry *> ¶m_handle_table){
7631 for(p=0;p<param_handle_table.size();++p){
7632 sprintf(tmpstr,"\tgs_param_handle_t handle_param_%d;\n",p);
7640 // ------------------------------------------------------------
7641 // functions for generating initialization code.
7643 static string gen_access_var_init(col_id_set &cid_set){
7645 col_id_set::iterator csi;
7647 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7648 int tblref = (*csi).tblvar_ref;
7649 string field = (*csi).field;
7650 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle%d, \"%s\");\n", field.c_str(),tblref,tblref,field.c_str());
7657 static string gen_complex_lit_init(cplx_lit_table *complex_literals){
7661 for(cl=0;cl<complex_literals->size();cl++){
7662 literal_t *l = complex_literals->get_literal(cl);
7663 // sprintf(tmpstr,"\tcomplex_literal_%d = ",cl);
7664 // ret += tmpstr + l->to_hfta_C_code() + ";\n";
7665 sprintf(tmpstr,"&(complex_literal_%d)",cl);
7666 ret += "\t" + l->to_hfta_C_code(tmpstr) + ";\n";
7667 // I think that the code below is obsolete
7668 // TODO: it is obsolete. add_cpx_lit is always
7669 // called with the handle indicator being false.
7670 // This entire structure should be cleansed.
7671 if(complex_literals->is_handle_ref(cl)){
7672 data_type *dt = new data_type( l->get_type() );
7673 sprintf(tmpstr,"\tlit_handle_%d = %s(&(f->complex_literal_%d));\n",
7674 cl, dt->hfta_handle_registration_name().c_str(), cl);
7683 static string gen_partial_fcn_init(vector<scalarexp_t *> &partial_fcns){
7687 for(p=0;p<partial_fcns.size();++p){
7688 data_type *pdt =partial_fcns[p]->get_data_type();
7689 literal_t empty_lit(pdt->type_indicator());
7690 if(pdt->is_buffer_type()){
7691 // sprintf(tmpstr,"\tpartial_fcn_result_%d = %s;\n",
7692 // p, empty_lit.to_hfta_C_code().c_str());
7693 sprintf(tmpstr,"&(partial_fcn_result_%d)",p);
7694 ret += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
7700 static string gen_pass_by_handle_init(
7701 vector<handle_param_tbl_entry *> ¶m_handle_table){
7705 for(ph=0;ph<param_handle_table.size();++ph){
7706 data_type pdt(param_handle_table[ph]->type_name);
7707 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7708 switch(param_handle_table[ph]->val_type){
7711 if(pdt.is_buffer_type()) ret += "&(";
7712 sprintf(tmpstr,"complex_literal_%d",param_handle_table[ph]->complex_literal_idx);
7714 if(pdt.is_buffer_type()) ret += ")";
7719 ret += param_handle_table[ph]->litval->to_hfta_C_code("") + ");\n";
7723 // query parameter handles are regstered/deregistered in the
7724 // load_params function.
7725 // ret += "t->param_"+param_handle_table[ph]->param_name;
7728 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7735 //------------------------------------------------------------
7736 // functions for destructor and deregistration code
7738 static string gen_complex_lit_dtr(cplx_lit_table *complex_literals){
7742 for(cl=0;cl<complex_literals->size();cl++){
7743 literal_t *l = complex_literals->get_literal(cl);
7744 data_type ldt( l->get_type() );
7745 if(ldt.is_buffer_type()){
7746 sprintf(tmpstr,"\t\t%s(&complex_literal_%d);\n",
7747 ldt.get_hfta_buffer_destroy().c_str(), cl );
7755 static string gen_pass_by_handle_dtr(
7756 vector<handle_param_tbl_entry *> ¶m_handle_table){
7760 for(ph=0;ph<param_handle_table.size();++ph){
7761 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7762 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7768 // Destroy all previous results
7769 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns){
7773 for(p=0;p<partial_fcns.size();++p){
7774 data_type *pdt =partial_fcns[p]->get_data_type();
7775 if(pdt->is_buffer_type()){
7776 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7777 pdt->get_hfta_buffer_destroy().c_str(), p );
7784 // Destroy previsou results of fcns in pfcn_set
7785 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns, set<int> &pfcn_set){
7787 set<int>::iterator si;
7789 for(si=pfcn_set.begin(); si!=pfcn_set.end(); ++si){
7790 data_type *pdt =partial_fcns[(*si)]->get_data_type();
7791 if(pdt->is_buffer_type()){
7792 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7793 pdt->get_hfta_buffer_destroy().c_str(), (*si) );
7801 //-------------------------------------------------------------------------
7802 // Functions related to se generation bookkeeping.
7804 static void get_new_pred_cids(predicate_t *pr, col_id_set &found_cids,
7805 col_id_set &new_cids, gb_table *gtbl){
7806 col_id_set this_pred_cids;
7807 col_id_set::iterator csi;
7809 // get colrefs in predicate not already found.
7810 gather_pr_col_ids(pr,this_pred_cids,gtbl);
7811 set_difference(this_pred_cids.begin(), this_pred_cids.end(),
7812 found_cids.begin(), found_cids.end(),
7813 inserter(new_cids,new_cids.begin()) );
7815 // We've found these cids, so update found_cids
7816 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7817 found_cids.insert((*csi));
7821 // after the call, new_cids will have the colrefs in se but not found_cids.
7822 // update found_cids with the new cids.
7823 static void get_new_se_cids(scalarexp_t *se, col_id_set &found_cids,
7824 col_id_set &new_cids, gb_table *gtbl){
7825 col_id_set this_se_cids;
7826 col_id_set::iterator csi;
7828 // get colrefs in se not already found.
7829 gather_se_col_ids(se,this_se_cids,gtbl);
7830 set_difference(this_se_cids.begin(), this_se_cids.end(),
7831 found_cids.begin(), found_cids.end(),
7832 inserter(new_cids,new_cids.begin()) );
7834 // We've found these cids, so update found_cids
7835 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7836 found_cids.insert((*csi));
7840 static string gen_unpack_cids(table_list *schema, col_id_set &new_cids, string on_problem, vector<bool> &needs_xform){
7842 col_id_set::iterator csi;
7844 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7845 int schref = (*csi).schema_ref;
7846 int tblref = (*csi).tblvar_ref;
7847 string field = (*csi).field;
7848 data_type dt(schema->get_type_name(schref,field));
7850 if(needs_xform[tblref]){
7851 unpack_fcn = dt.get_hfta_unpack_fcn();
7853 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7855 if(dt.is_buffer_type()){
7856 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7858 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7861 if(dt.is_buffer_type()){
7862 ret += "\tif(problem) return "+on_problem+" ;\n";
7868 // generates the declaration of all the variables related to
7869 // temp tuples generation
7870 static string gen_decl_temp_vars(){
7873 ret += "\t// variables related to temp tuple generation\n";
7874 ret += "\tbool temp_tuple_received;\n";
7879 // generates initialization code for variables related to temp tuple processing
7880 static string gen_init_temp_vars(table_list *schema, vector<select_element *>& select_list, gb_table *gtbl){
7882 col_id_set::iterator csi;
7885 // Initialize internal state
7886 ret += "\ttemp_tuple_received = false;\n";
7888 col_id_set temp_cids; // colrefs unpacked thus far.
7890 for(s=0;s<select_list.size();s++){
7891 if (select_list[s]->se->get_data_type()->is_temporal()) {
7892 // Find the set of attributes accessed in this SE
7893 col_id_set new_cids;
7894 get_new_se_cids(select_list[s]->se,temp_cids, new_cids, gtbl);
7897 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7898 int schref = (*csi).schema_ref;
7899 int tblref = (*csi).tblvar_ref;
7900 string field = (*csi).field;
7901 data_type dt(schema->get_type_name(schref,field), schema->get_modifier_list(schref,field));
7903 sprintf(tmpstr,"\t unpack_var_%s_%d = %s;\n", field.c_str(), tblref,
7904 dt.is_increasing() ? dt.get_min_literal().c_str() : dt.get_max_literal().c_str());
7914 // generates a check if tuple is temporal
7915 static string gen_temp_tuple_check(string node_name, int channel) {
7919 sprintf(tmpstr, "tup%d", channel);
7920 string tup_name = tmpstr;
7921 sprintf(tmpstr, "schema_handle%d", channel);
7922 string schema_handle_name = tmpstr;
7923 string tuple_offset_name = "tuple_metadata_offset"+int_to_string(channel);
7925 // check if it is a temporary status tuple
7926 ret += "\t// check if tuple is temp status tuple\n";
7927 // ret += "\tif (ftaschema_is_temporal_tuple(" + schema_handle_name + ", " + tup_name + ".data)) {\n";
7928 ret += "\tif (ftaschema_is_temporal_tuple_offset(" + tuple_offset_name + ", " + tup_name + ".data)) {\n";
7929 ret += "\t\ttemp_tuple_received = true;\n";
7931 ret += "\telse\n\t\ttemp_tuple_received = false;\n\n";
7936 // generates unpacking code for all temporal attributes referenced in select
7937 static string gen_unpack_temp_vars(table_list *schema, col_id_set& found_cids, vector<select_element *>& select_list, gb_table *gtbl, vector<bool> &needs_xform) {
7941 // Unpack all the temporal attributes references in select list
7942 // we need it to be able to generate temp status tuples
7943 for(s=0;s<select_list.size();s++){
7944 if (select_list[s]->se->get_data_type()->is_temporal()) {
7945 // Find the set of attributes accessed in this SE
7946 col_id_set new_cids;
7947 get_new_se_cids(select_list[s]->se,found_cids, new_cids, gtbl);
7948 // Unpack these values.
7949 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
7957 // Generates temporal tuple generation code (except attribute packing)
7958 static string gen_init_temp_status_tuple(string node_name) {
7961 ret += "\t// create temp status tuple\n";
7962 ret += "\tresult.tuple_size = sizeof("+generate_tuple_name( node_name)+") + sizeof(gs_uint8_t);\n";
7963 ret += "\tresult.data = (gs_sp_t )malloc(result.tuple_size);\n";
7964 ret += "\tresult.heap_resident = true;\n";
7965 ret += "\t// Mark tuple as temporal\n";
7966 ret += "\t*((gs_sp_t )result.data + sizeof("+generate_tuple_name( node_name)+")) = TEMPORAL_TUPLE;\n";
7968 ret += "\t"+generate_tuple_name( node_name)+" *tuple = ("+
7969 generate_tuple_name( node_name) +" *)(result.data);\n";
7975 // Assume that all colrefs unpacked already ...
7976 static string gen_unpack_partial_fcn(table_list *schema,
7977 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7980 set<int>::iterator si;
7982 // Since set<..> is a "Sorted Associative Container",
7983 // we can walk through it in sorted order by walking from
7984 // begin() to end(). (and the partial fcns must be
7985 // evaluated in this order).
7986 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7987 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7988 ret += "\tif(retval) return "+on_problem+" ;\n";
7993 // Assume that all colrefs unpacked already ...
7994 // this time with cached functions.
7995 static string gen_unpack_partial_fcn(table_list *schema,
7996 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7997 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8000 set<int>::iterator si;
8002 // Since set<..> is a "Sorted Associative Container",
8003 // we can walk through it in sorted order by walking from
8004 // begin() to end(). (and the partial fcns must be
8005 // evaluated in this order).
8006 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
8007 if(fcn_ref_cnt[(*si)] > 1){
8008 ret += "\tif(fcn_ref_cnt_"+int_to_string((*si))+"==0){\n";
8010 if(is_partial_fcn[(*si)]){
8011 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
8012 ret += "\tif(retval) return "+on_problem+" ;\n";
8014 if(fcn_ref_cnt[(*si)] > 1){
8015 if(!is_partial_fcn[(*si)]){
8016 ret += "\t\tpartial_fcn_result_"+int_to_string((*si))+"="+generate_cached_fcn(partial_fcns[(*si)],(*si),schema)+";\n";
8018 ret += "\t\tfcn_ref_cnt_"+int_to_string((*si))+"=1;\n";
8027 // This version finds and unpacks new colrefs.
8028 // found_cids gets updated with the newly unpacked cids.
8029 static string gen_full_unpack_partial_fcn(table_list *schema,
8030 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8031 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8032 vector<bool> &needs_xform){
8034 set<int>::iterator slsi;
8036 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8037 // find all new fields ref'd by this partial fcn.
8038 col_id_set new_cids;
8039 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8040 // Unpack these values.
8041 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8043 // Now evaluate the partial fcn.
8044 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8045 ret += "\tif(retval) return "+on_problem+" ;\n";
8050 // This version finds and unpacks new colrefs.
8051 // found_cids gets updated with the newly unpacked cids.
8052 // BUT : only for the partial functions.
8053 static string gen_full_unpack_partial_fcn(table_list *schema,
8054 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8055 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8056 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8057 vector<bool> &needs_xform){
8059 set<int>::iterator slsi;
8061 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8062 if(is_partial_fcn[(*slsi)]){
8063 // find all new fields ref'd by this partial fcn.
8064 col_id_set new_cids;
8065 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8066 // Unpack these values.
8067 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8069 // Now evaluate the partial fcn.
8070 if(fcn_ref_cnt[(*slsi)] > 1){
8071 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8073 if(is_partial_fcn[(*slsi)]){
8074 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8075 ret += "\tif(retval) return "+on_problem+" ;\n";
8077 if(fcn_ref_cnt[(*slsi)] > 1){
8078 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8087 static string gen_remaining_cached_fcns(table_list *schema,
8088 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8089 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn){
8091 set<int>::iterator slsi;
8093 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8094 if(!is_partial_fcn[(*slsi)] && fcn_ref_cnt[(*slsi)] > 1){
8096 if(fcn_ref_cnt[(*slsi)] > 1){
8097 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8098 ret += "\t\tpartial_fcn_result_"+int_to_string((*slsi))+"="+generate_cached_fcn(partial_fcns[(*slsi)],(*slsi),schema)+";\n";
8099 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8108 // unpack the colrefs in cid_set not in found_cids
8109 static string gen_remaining_colrefs(table_list *schema,
8110 col_id_set &cid_set, col_id_set &found_cids, string on_problem,
8111 vector<bool> &needs_xform){
8113 col_id_set::iterator csi;
8115 for(csi=cid_set.begin(); csi!=cid_set.end();csi++){
8116 if(found_cids.count( (*csi) ) == 0){
8117 int schref = (*csi).schema_ref;
8118 int tblref = (*csi).tblvar_ref;
8119 string field = (*csi).field;
8120 data_type dt(schema->get_type_name(schref,field));
8122 if(needs_xform[tblref]){
8123 unpack_fcn = dt.get_hfta_unpack_fcn();
8125 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
8127 if(dt.is_buffer_type()){
8128 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
8130 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
8133 if(dt.is_buffer_type()){
8134 ret.append("\tif(problem) return "+on_problem+" ;\n");
8141 static string gen_buffer_selvars(table_list *schema,
8142 vector<select_element *> &select_list){
8146 for(s=0;s<select_list.size();s++){
8147 scalarexp_t *se = select_list[s]->se;
8148 data_type *sdt = se->get_data_type();
8149 if(sdt->is_buffer_type() &&
8150 !( (se->get_operator_type() == SE_COLREF) ||
8151 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8152 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8154 sprintf(tmpstr,"selvar_%d",s);
8155 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
8156 ret += generate_se_code(se,schema) +";\n";
8162 static string gen_buffer_selvars_size(vector<select_element *> &select_list,table_list *schema){
8166 for(s=0;s<select_list.size();s++){
8167 scalarexp_t *se = select_list[s]->se;
8168 data_type *sdt = se->get_data_type();
8169 if(sdt->is_buffer_type()){
8170 if( !( (se->get_operator_type() == SE_COLREF) ||
8171 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8172 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8174 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
8177 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),
8178 generate_se_code(se,schema).c_str());
8186 static string gen_buffer_selvars_dtr(vector<select_element *> &select_list){
8190 for(s=0;s<select_list.size();s++){
8191 scalarexp_t *se = select_list[s]->se;
8192 data_type *sdt = se->get_data_type();
8193 if(sdt->is_buffer_type() &&
8194 !( (se->get_operator_type() == SE_COLREF) ||
8195 (se->get_operator_type() == SE_AGGR_STAR) ||
8196 (se->get_operator_type() == SE_AGGR_SE) ||
8197 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8198 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8200 sprintf(tmpstr,"\t\t%s(&selvar_%d);\n",
8201 sdt->get_hfta_buffer_destroy().c_str(), s );
8209 static string gen_pack_tuple(table_list *schema, vector<select_element *> &select_list, string node_name, bool temporal_only){
8213 ret += "\tint tuple_pos = sizeof("+generate_tuple_name(node_name)+") + sizeof(gs_uint8_t);\n";
8214 for(s=0;s<select_list.size();s++){
8215 scalarexp_t *se = select_list[s]->se;
8216 data_type *sdt = se->get_data_type();
8218 if(!temporal_only && sdt->is_buffer_type()){
8219 if( !( (se->get_operator_type() == SE_COLREF) ||
8220 (se->get_operator_type() == SE_FUNC && se->is_partial()))
8222 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
8224 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
8227 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code(se,schema).c_str());
8229 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code(se,schema).c_str());
8232 }else if (!temporal_only || sdt->is_temporal()) {
8233 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
8235 ret.append(generate_se_code(se,schema) );
8243 //-------------------------------------------------------------------------
8244 // functor generation methods
8245 //-------------------------------------------------------------------------
8247 /////////////////////////////////////////////////////////
8248 //// File Output Operator
8249 string output_file_qpn::generate_functor_name(){
8250 return("output_file_functor_" + normalize_name(get_node_name()));
8254 string output_file_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8255 string ret = "class " + this->generate_functor_name() + "{\n";
8257 // Find the temporal field
8258 int temporal_field_idx;
8259 data_type *tdt = NULL;
8260 for(temporal_field_idx=0;temporal_field_idx<fields.size();temporal_field_idx++){
8261 tdt = new data_type(fields[temporal_field_idx]->get_type(), fields[temporal_field_idx]->get_modifier_list());
8262 if(tdt->is_temporal()){
8269 if(temporal_field_idx == fields.size()){
8270 fprintf(stderr,"ERROR, no temporal field for file output operator %s\n",node_name.c_str());
8274 ret += "private:\n";
8276 // var to save the schema handle
8277 ret += "\tint schema_handle0;\n";
8278 // tuple metadata offset
8279 ret += "\tint tuple_metadata_offset0;\n";
8280 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[temporal_field_idx]->get_name().c_str());
8283 // For unpacking the hashing fields, if any
8285 for(h=0;h<hash_flds.size();++h){
8286 sprintf(tmpstr,"unpack_var_%s", fields[hash_flds[h]]->get_name().c_str());
8287 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8288 ret+="\t"+hdt->make_host_cvar(tmpstr)+";\n";
8289 if(hash_flds[h]!=temporal_field_idx){
8290 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[hash_flds[h]]->get_name().c_str());
8294 // Specail case for output file hashing
8295 if(n_streams>1 && hash_flds.size()==0){
8296 ret+="\tgs_uint32_t outfl_cnt;\n";
8299 ret += "//\t\tRemember the last posted timestamp.\n";
8300 ret+="\t"+tdt->make_host_cvar("timestamp")+";\n";
8301 ret+="\t"+tdt->make_host_cvar("last_bucket")+";\n";
8302 ret+="\t"+tdt->make_host_cvar("slack")+";\n";
8303 ret += "\tbool first_execution;\n";
8304 ret += "\tbool temp_tuple_received;\n";
8305 ret += "\tbool is_eof;\n";
8307 ret += "\tgs_int32_t bucketwidth;\n";
8310 //-------------------
8311 // The functor constructor
8312 // pass in a schema handle (e.g. for the 1st input stream),
8313 // use it to determine how to unpack the merge variable.
8314 // ASSUME that both streams have the same layout,
8315 // just duplicate it.
8318 ret += "//\t\tFunctor constructor.\n";
8319 ret += this->generate_functor_name()+"(int schema_hndl){\n";
8321 ret += "\tschema_handle0 = schema_hndl;\n";
8322 // tuple metadata offset
8323 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8325 if(output_spec->bucketwidth == 0)
8326 ret += "\tbucketwidth = 60;\n";
8328 ret += "\tbucketwidth = "+int_to_string(output_spec->bucketwidth)+";\n";
8329 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8331 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[temporal_field_idx]->get_name().c_str(), fields[temporal_field_idx]->get_name().c_str());
8333 // Hashing field unpacking, if any
8334 for(h=0;h<hash_flds.size();++h){
8335 if(hash_flds[h]!=temporal_field_idx){
8336 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[hash_flds[h]]->get_name().c_str(),fields[hash_flds[h]]->get_name().c_str());
8341 ret+="\tfirst_execution = true;\n";
8343 // Initialize internal state
8344 ret += "\ttemp_tuple_received = false;\n";
8346 // Init last timestamp values to minimum value for their type
8347 if (tdt->is_increasing()){
8348 ret+="\ttimestamp = " + tdt->get_min_literal() + ";\n";
8349 ret+="\tlast_bucket = " + tdt->get_min_literal() + ";\n";
8351 ret+="\ttimestamp = " + tdt->get_max_literal() + ";\n";
8352 ret+="\tlast_bucket = " + tdt->get_max_literal() + ";\n";
8358 ret += "//\t\tFunctor destructor.\n";
8359 ret += "~"+this->generate_functor_name()+"(){\n";
8363 ret += "int load_params_"+this->generate_functor_name()+"(gs_int32_t sz, void *value){return 0;}\n";
8364 ret += "void destroy_params_"+this->generate_functor_name()+"(){}\n";
8366 // Register new parameter block
8367 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8368 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8369 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8373 ret+="\nbool temp_status_received(const host_tuple& tup0)/* const*/ {\n";
8374 ret+="\tgs_int32_t problem;\n";
8376 ret += "\tvoid *tup_ptr = (void *)(&tup0);\n";
8377 ret += "\tis_eof = ftaschema_is_eof_tuple(schema_handle0,tup_ptr);\n";
8379 ret += gen_temp_tuple_check(this->node_name, 0);
8381 sprintf(tmpstr,"\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", tdt->get_hfta_unpack_fcn_noxf().c_str(), fields[temporal_field_idx]->get_name().c_str(), 0);
8384 for(h=0;h<hash_flds.size();++h){
8385 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8386 sprintf(tmpstr,"\tunpack_var_%s = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", fields[hash_flds[h]]->get_name().c_str(), hdt->get_hfta_unpack_fcn_noxf().c_str(), fields[hash_flds[h]]->get_name().c_str(), 0);
8390 " return temp_tuple_received;\n"
8396 "bool new_epoch(){\n"
8397 " if(first_execution || (last_bucket + 1) * bucketwidth <= timestamp){\n"
8398 " last_bucket = timestamp / bucketwidth;\n"
8399 " first_execution = false;\n"
8409 "inline gs_uint32_t output_hash(){return 0;}\n\n";
8411 if(hash_flds.size()==0){
8413 "gs_uint32_t output_hash(){\n"
8415 " if(outfl_cnt >= "+int_to_string(n_streams)+")\n"
8417 " return outfl_cnt;\n"
8423 "gs_uint32_t output_hash(){\n"
8424 " gs_uint32_t ret = "
8426 for(h=0;h<hash_flds.size();++h){
8428 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8429 if(hdt->use_hashfunc()){
8430 sprintf(tmpstr,"%s(&(unpack_var_%s))",hdt->get_hfta_hashfunc().c_str(),fields[hash_flds[h]]->get_name().c_str());
8432 sprintf(tmpstr,"unpack_var_%s",fields[hash_flds[h]]->get_name().c_str());
8438 " return ret % "+int_to_string(hash_flds.size())+";\n"
8445 "gs_uint32_t num_file_streams(){\n"
8446 " return("+int_to_string(n_streams)+");\n"
8451 "string get_filename_base(){\n"
8452 " char tmp_fname[500];\n";
8454 string output_filename_base = hfta_query_name+filestream_id;
8456 if(n_hfta_clones > 1){
8457 output_filename_base += "_"+int_to_string(parallel_idx);
8463 if(output_spec->output_directory == "")
8465 " sprintf(tmp_fname,\""+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8467 " sprintf(tmp_fname,\""+output_spec->output_directory+"/"+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8469 " return (string)(tmp_fname);\n"
8475 "bool do_compression(){\n";
8477 ret += " return true;\n";
8479 ret += " return false;\n";
8483 "bool is_eof_tuple(){\n"
8487 "bool propagate_tuple(){\n"
8490 ret+="\treturn false;\n";
8492 ret+="\treturn true;\n";
8494 // create a temp status tuple
8495 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8497 ret += gen_init_temp_status_tuple(this->hfta_query_name);
8499 sprintf(tmpstr,"\ttuple->tuple_var%d = timestamp;\n",temporal_field_idx);
8504 ret += "\treturn 0;\n";
8512 string output_file_qpn::generate_operator(int i, string params){
8513 string optype = "file_output_operator";
8514 switch(compression_type){
8516 optype = "file_output_operator";
8519 optype = "zfile_output_operator";
8522 optype = "bfile_output_operator";
8526 return(" "+optype+"<" +
8527 generate_functor_name() +
8528 "> *op"+int_to_string(i)+" = new "+optype+"<"+
8529 generate_functor_name() +">("+params+", \"" + hfta_query_name + "\""
8530 + "," + hfta_query_name + "_schema_definition);\n");
8533 /////////////////////////////////////////////////////////
8537 string spx_qpn::generate_functor_name(){
8538 return("spx_functor_" + normalize_name(normalize_name(this->get_node_name())));
8541 string spx_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8542 // Initialize generate utility globals
8543 segen_gb_tbl = NULL;
8545 string ret = "class " + this->generate_functor_name() + "{\n";
8547 // Find variables referenced in this query node.
8550 col_id_set::iterator csi;
8553 for(w=0;w<where.size();++w)
8554 gather_pr_col_ids(where[w]->pr,cid_set,NULL);
8555 for(s=0;s<select_list.size();s++){
8556 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
8560 // Private variables : store the state of the functor.
8561 // 1) variables for unpacked attributes
8562 // 2) offsets of the upacked attributes
8563 // 3) storage of partial functions
8564 // 4) storage of complex literals (i.e., require a constructor)
8566 ret += "private:\n";
8567 ret += "\tbool first_execution;\t// internal processing state \n";
8568 ret += "\tint schema_handle0;\n";
8570 // generate the declaration of all the variables related to
8571 // temp tuples generation
8572 ret += gen_decl_temp_vars();
8575 // unpacked attribute storage, offsets
8576 ret += "//\t\tstorage and offsets of accessed fields.\n";
8577 ret += generate_access_vars(cid_set,schema);
8578 // tuple metadata management
8579 ret += "\tint tuple_metadata_offset0;\n";
8581 // Variables to store results of partial functions.
8582 // WARNING find_partial_functions modifies the SE
8583 // (it marks the partial function id).
8584 ret += "//\t\tParital function result storage\n";
8585 vector<scalarexp_t *> partial_fcns;
8586 vector<int> fcn_ref_cnt;
8587 vector<bool> is_partial_fcn;
8588 for(s=0;s<select_list.size();s++){
8589 find_partial_fcns(select_list[s]->se, &partial_fcns,&fcn_ref_cnt,&is_partial_fcn, Ext_fcns);
8591 for(w=0;w<where.size();w++){
8592 find_partial_fcns_pr(where[w]->pr, &partial_fcns, &fcn_ref_cnt,&is_partial_fcn,Ext_fcns);
8594 // Unmark non-partial expensive functions referenced only once.
8595 for(p=0; p<partial_fcns.size();p++){
8596 if(!is_partial_fcn[p] && fcn_ref_cnt[p] <= 1){
8597 partial_fcns[p]->set_partial_ref(-1);
8600 if(partial_fcns.size()>0){
8601 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,true);
8604 // Complex literals (i.e., they need constructors)
8605 ret += "//\t\tComplex literal storage.\n";
8606 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8607 ret += generate_complex_lit_vars(complex_literals);
8609 // Pass-by-handle parameters
8610 ret += "//\t\tPass-by-handle storage.\n";
8611 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8612 ret += generate_pass_by_handle_vars(param_handle_table);
8614 // Variables to hold parameters
8615 ret += "//\tfor query parameters\n";
8616 ret += generate_param_vars(param_tbl);
8619 // The publicly exposed functions
8621 ret += "\npublic:\n";
8624 //-------------------
8625 // The functor constructor
8626 // pass in the schema handle.
8627 // 1) make assignments to the unpack offset variables
8628 // 2) initialize the complex literals
8629 // 3) Set the initial values of the temporal attributes
8630 // referenced in select clause (in case we need to emit
8631 // temporal tuple before receiving first tuple )
8633 ret += "//\t\tFunctor constructor.\n";
8634 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8636 // save schema handle
8637 ret += "this->schema_handle0 = schema_handle0;\n";
8640 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8641 ret += gen_access_var_init(cid_set);
8643 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8646 ret += "//\t\tInitialize complex literals.\n";
8647 ret += gen_complex_lit_init(complex_literals);
8649 // Initialize partial function results so they can be safely GC'd
8650 ret += gen_partial_fcn_init(partial_fcns);
8652 // Initialize non-query-parameter parameter handles
8653 ret += gen_pass_by_handle_init(param_handle_table);
8655 // Init temporal attributes referenced in select list
8656 ret += gen_init_temp_vars(schema, select_list, NULL);
8661 //-------------------
8662 // Functor destructor
8663 ret += "//\t\tFunctor destructor.\n";
8664 ret += "~"+this->generate_functor_name()+"(){\n";
8666 // clean up buffer-type complex literals.
8667 ret += gen_complex_lit_dtr(complex_literals);
8669 // Deregister the pass-by-handle parameters
8670 ret += "/* register and de-register the pass-by-handle parameters */\n";
8671 ret += gen_pass_by_handle_dtr(param_handle_table);
8673 // Reclaim buffer space for partial fucntion results
8674 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8675 ret += gen_partial_fcn_dtr(partial_fcns);
8678 // Destroy the parameters, if any need to be destroyed
8679 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8684 //-------------------
8685 // Parameter manipulation routines
8686 ret += generate_load_param_block(this->generate_functor_name(),
8687 this->param_tbl,param_handle_table );
8688 ret += generate_delete_param_block(this->generate_functor_name(),
8689 this->param_tbl,param_handle_table);
8692 //-------------------
8693 // Register new parameter block
8694 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8695 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8696 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8701 //-------------------
8702 // The selection predicate.
8703 // Unpack variables for 1 cnf element
8704 // at a time, return false immediately if the
8706 // optimization : evaluate the cheap cnf elements
8707 // first, the expensive ones last.
8709 ret += "bool predicate(host_tuple &tup0){\n";
8710 // Variables for execution of the function.
8711 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8712 // Initialize cached function indicators.
8713 for(p=0;p<partial_fcns.size();++p){
8714 if(fcn_ref_cnt[p]>1){
8715 ret+="\tfcn_ref_cnt_"+int_to_string(p)+"=0;\n";
8720 ret += gen_temp_tuple_check(this->node_name, 0);
8722 if(partial_fcns.size()>0){ // partial fcn access failure
8723 ret += "\tgs_retval_t retval = 0;\n";
8727 // Reclaim buffer space for partial fucntion results
8728 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8729 ret += gen_partial_fcn_dtr(partial_fcns);
8731 col_id_set found_cids; // colrefs unpacked thus far.
8732 ret += gen_unpack_temp_vars(schema, found_cids, select_list, NULL, needs_xform);
8734 // For temporal status tuple we don't need to do anything else
8735 ret += "\tif (temp_tuple_received) return false;\n\n";
8738 for(w=0;w<where.size();++w){
8739 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8741 // Find the set of variables accessed in this CNF elem,
8742 // but in no previous element.
8743 col_id_set new_cids;
8744 get_new_pred_cids(where[w]->pr,found_cids, new_cids, NULL);
8745 // Unpack these values.
8746 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
8747 // Find partial fcns ref'd in this cnf element
8749 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8750 ret += gen_unpack_partial_fcn(schema,partial_fcns,pfcn_refs,fcn_ref_cnt, is_partial_fcn, "false");
8752 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8753 +") ) return(false);\n";
8756 // The partial functions ref'd in the select list
8757 // must also be evaluated. If one returns false,
8758 // then implicitly the predicate is false.
8760 for(s=0;s<select_list.size();s++){
8761 collect_partial_fcns(select_list[s]->se, sl_pfcns);
8763 if(sl_pfcns.size() > 0)
8764 ret += "//\t\tUnpack remaining partial fcns.\n";
8765 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
8766 fcn_ref_cnt, is_partial_fcn,
8767 found_cids, NULL, "false", needs_xform);
8769 // Unpack remaining fields
8770 ret += "//\t\tunpack any remaining fields from the input tuple.\n";
8771 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
8774 ret += "\treturn(true);\n";
8778 //-------------------
8779 // The output tuple function.
8780 // Unpack the remaining attributes into
8781 // the placeholder variables, unpack the
8782 // partial fcn refs, then pack up the tuple.
8784 ret += "host_tuple create_output_tuple() {\n";
8785 ret += "\thost_tuple tup;\n";
8786 ret += "\tgs_retval_t retval = 0;\n";
8788 // Unpack any remaining cached functions.
8789 ret += gen_remaining_cached_fcns(schema, partial_fcns, sl_pfcns,
8790 fcn_ref_cnt, is_partial_fcn);
8793 // Now, compute the size of the tuple.
8795 // Unpack any BUFFER type selections into temporaries
8796 // so that I can compute their size and not have
8797 // to recompute their value during tuple packing.
8798 // I can use regular assignment here because
8799 // these temporaries are non-persistent.
8801 ret += "//\t\tCompute the size of the tuple.\n";
8802 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
8804 // Unpack all buffer type selections, to be able to compute their size
8805 ret += gen_buffer_selvars(schema, select_list);
8807 // The size of the tuple is the size of the tuple struct plus the
8808 // size of the buffers to be copied in.
8811 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
8812 ret += gen_buffer_selvars_size(select_list,schema);
8815 // Allocate tuple data block.
8816 ret += "//\t\tCreate the tuple block.\n";
8817 ret += "\ttup.data = malloc(tup.tuple_size);\n";
8818 ret += "\ttup.heap_resident = true;\n";
8819 // Mark tuple as regular
8820 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
8822 // ret += "\ttup.channel = 0;\n";
8823 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
8824 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
8827 // (Here, offsets are hard-wired. is this a problem?)
8829 ret += "//\t\tPack the fields into the tuple.\n";
8830 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
8832 // Delete string temporaries
8833 ret += gen_buffer_selvars_dtr(select_list);
8835 ret += "\treturn tup;\n";
8838 //-------------------------------------------------------------------
8839 // Temporal update functions
8841 ret += "bool temp_status_received(){return temp_tuple_received;};\n\n";
8844 // create a temp status tuple
8845 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8847 ret += gen_init_temp_status_tuple(this->get_node_name());
8850 // (Here, offsets are hard-wired. is this a problem?)
8852 ret += "//\t\tPack the fields into the tuple.\n";
8853 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
8855 ret += "\treturn 0;\n";
8862 string spx_qpn::generate_operator(int i, string params){
8864 return(" select_project_operator<" +
8865 generate_functor_name() +
8866 "> *op"+int_to_string(i)+" = new select_project_operator<"+
8867 generate_functor_name() +">("+params+", \"" + get_node_name() + "\");\n");
8871 ////////////////////////////////////////////////////////////////
8876 string sgah_qpn::generate_functor_name(){
8877 return("sgah_functor_" + normalize_name(this->get_node_name()));
8881 string sgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8885 // Initialize generate utility globals
8886 segen_gb_tbl = &(gb_tbl);
8888 // Might need to generate empty values for cube processing.
8889 map<int, string> structured_types;
8890 for(g=0;g<gb_tbl.size();++g){
8891 if(gb_tbl.get_data_type(g)->is_structured_type()){
8892 structured_types[gb_tbl.get_data_type(g)->type_indicator()] = gb_tbl.get_data_type(g)->get_type_str();
8896 //--------------------------------
8897 // group definition class
8898 string ret = "class " + generate_functor_name() + "_groupdef{\n";
8900 for(g=0;g<this->gb_tbl.size();g++){
8901 sprintf(tmpstr,"gb_var%d",g);
8902 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8904 // empty strucutred literals
8905 map<int, string>::iterator sii;
8906 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8907 data_type dt(sii->second);
8908 literal_t empty_lit(sii->first);
8909 ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8912 if(structured_types.size()==0){
8913 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
8915 ret += "\t"+generate_functor_name() + "_groupdef(){}\n";
8919 ret += "\t"+generate_functor_name() + "_groupdef("+
8920 this->generate_functor_name() + "_groupdef *gd){\n";
8921 for(g=0;g<gb_tbl.size();g++){
8922 data_type *gdt = gb_tbl.get_data_type(g);
8923 if(gdt->is_buffer_type()){
8924 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8925 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8928 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8933 ret += "\t"+generate_functor_name() + "_groupdef("+
8934 this->generate_functor_name() + "_groupdef *gd, bool *pattern){\n";
8935 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8936 literal_t empty_lit(sii->first);
8937 ret += "\t\t"+empty_lit.to_hfta_C_code("&"+empty_lit.hfta_empty_literal_name())+";\n";
8939 for(g=0;g<gb_tbl.size();g++){
8940 data_type *gdt = gb_tbl.get_data_type(g);
8941 ret += "\t\tif(pattern["+int_to_string(g)+"]){\n";
8942 if(gdt->is_buffer_type()){
8943 sprintf(tmpstr,"\t\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8944 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8947 sprintf(tmpstr,"\t\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8950 ret += "\t\t}else{\n";
8951 literal_t empty_lit(gdt->type_indicator());
8952 if(empty_lit.is_cpx_lit()){
8953 ret +="\t\t\tgb_var"+int_to_string(g)+"= "+empty_lit.hfta_empty_literal_name()+";\n";
8955 ret +="\t\t\tgb_var"+int_to_string(g)+"="+empty_lit.to_hfta_C_code("")+";\n";
8961 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
8962 for(g=0;g<gb_tbl.size();g++){
8963 data_type *gdt = gb_tbl.get_data_type(g);
8964 if(gdt->is_buffer_type()){
8965 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
8966 gdt->get_hfta_buffer_destroy().c_str(), g );
8973 for(g=0;g<gb_tbl.size();g++){
8974 data_type *gdt = gb_tbl.get_data_type(g);
8975 if(gdt->is_temporal()){
8980 ret += tgdt->get_host_cvar_type()+" get_curr_gb(){\n";
8981 ret+="\treturn gb_var"+int_to_string(g)+";\n";
8986 //--------------------------------
8987 // aggr definition class
8988 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
8990 for(a=0;a<aggr_tbl.size();a++){
8991 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
8992 sprintf(tmpstr,"aggr_var%d",a);
8993 if(aggr_tbl.is_builtin(a)){
8994 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
8995 if(aggr_tbl.get_op(a) == "AVG"){ // HACK!
8996 data_type cnt_type = data_type("ullong");
8997 ret+="\t"+cnt_type.make_host_cvar(string(tmpstr)+"_cnt")+";\n";
8998 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(string(tmpstr)+"_sum")+";\n";
9001 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
9005 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
9007 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
9008 for(a=0;a<aggr_tbl.size();a++){
9009 if(aggr_tbl.is_builtin(a)){
9010 data_type *adt = aggr_tbl.get_data_type(a);
9011 if(adt->is_buffer_type()){
9012 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
9013 adt->get_hfta_buffer_destroy().c_str(), a );
9017 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
9018 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9019 ret+="(aggr_var"+int_to_string(a)+"));\n";
9025 //-------------------------------------------
9026 // group-by patterns for the functor,
9027 // initialization within the class is cumbersome.
9028 int n_patterns = gb_tbl.gb_patterns.size();
9030 ret += "bool "+this->generate_functor_name()+"_gb_patterns["+int_to_string(n_patterns)+
9031 "]["+int_to_string(gb_tbl.size())+"] = {\n";
9032 if(n_patterns == 0){
9033 for(i=0;i<gb_tbl.size();++i){
9038 for(i=0;i<n_patterns;++i){
9039 if(i>0) ret += ",\n";
9041 for(j=0;j<gb_tbl.size();j++){
9042 if(j>0) ret += ", ";
9043 if(gb_tbl.gb_patterns[i][j]){
9056 //--------------------------------
9058 ret += "class " + this->generate_functor_name() + "{\n";
9060 // Find variables referenced in this query node.
9063 col_id_set::iterator csi;
9065 for(w=0;w<where.size();++w)
9066 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
9067 for(w=0;w<having.size();++w)
9068 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
9069 for(g=0;g<gb_tbl.size();g++)
9070 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
9072 for(s=0;s<select_list.size();s++){
9073 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
9077 // Private variables : store the state of the functor.
9078 // 1) variables for unpacked attributes
9079 // 2) offsets of the upacked attributes
9080 // 3) storage of partial functions
9081 // 4) storage of complex literals (i.e., require a constructor)
9083 ret += "private:\n";
9085 // var to save the schema handle
9086 ret += "\tint schema_handle0;\n";
9087 // metadata from schema handle
9088 ret += "\tint tuple_metadata_offset0;\n";
9090 // generate the declaration of all the variables related to
9091 // temp tuples generation
9092 ret += gen_decl_temp_vars();
9094 // unpacked attribute storage, offsets
9095 ret += "//\t\tstorage and offsets of accessed fields.\n";
9096 ret += generate_access_vars(cid_set, schema);
9098 // Variables to store results of partial functions.
9099 // WARNING find_partial_functions modifies the SE
9100 // (it marks the partial function id).
9101 ret += "//\t\tParital function result storage\n";
9102 vector<scalarexp_t *> partial_fcns;
9103 vector<int> fcn_ref_cnt;
9104 vector<bool> is_partial_fcn;
9105 for(s=0;s<select_list.size();s++){
9106 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
9108 for(w=0;w<where.size();w++){
9109 find_partial_fcns_pr(where[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9111 for(w=0;w<having.size();w++){
9112 find_partial_fcns_pr(having[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9114 for(g=0;g<gb_tbl.size();g++){
9115 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns,NULL,NULL, Ext_fcns);
9117 for(a=0;a<aggr_tbl.size();a++){
9118 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns,NULL,NULL, Ext_fcns);
9120 if(partial_fcns.size()>0){
9121 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
9122 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
9125 // Complex literals (i.e., they need constructors)
9126 ret += "//\t\tComplex literal storage.\n";
9127 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
9128 ret += generate_complex_lit_vars(complex_literals);
9130 // Pass-by-handle parameters
9131 ret += "//\t\tPass-by-handle storage.\n";
9132 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
9133 ret += generate_pass_by_handle_vars(param_handle_table);
9136 // variables to hold parameters.
9137 ret += "//\tfor query parameters\n";
9138 ret += generate_param_vars(param_tbl);
9140 // Is there a temporal flush? If so create flush temporaries,
9141 // create flush indicator.
9142 bool uses_temporal_flush = false;
9143 for(g=0;g<gb_tbl.size();g++){
9144 data_type *gdt = gb_tbl.get_data_type(g);
9145 if(gdt->is_temporal())
9146 uses_temporal_flush = true;
9149 if(uses_temporal_flush){
9150 ret += "//\t\tFor temporal flush\n";
9151 for(g=0;g<gb_tbl.size();g++){
9152 data_type *gdt = gb_tbl.get_data_type(g);
9153 if(gdt->is_temporal()){
9154 sprintf(tmpstr,"last_gb%d",g);
9155 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9156 sprintf(tmpstr,"last_flushed_gb%d",g);
9157 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9160 ret += "\tbool needs_temporal_flush;\n";
9164 // The publicly exposed functions
9166 ret += "\npublic:\n";
9169 //-------------------
9170 // The functor constructor
9171 // pass in the schema handle.
9172 // 1) make assignments to the unpack offset variables
9173 // 2) initialize the complex literals
9175 ret += "//\t\tFunctor constructor.\n";
9176 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9178 // save the schema handle
9179 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
9182 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9183 ret += gen_access_var_init(cid_set);
9185 ret += "tuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9188 ret += "//\t\tInitialize complex literals.\n";
9189 ret += gen_complex_lit_init(complex_literals);
9191 // Initialize partial function results so they can be safely GC'd
9192 ret += gen_partial_fcn_init(partial_fcns);
9194 // Initialize non-query-parameter parameter handles
9195 ret += gen_pass_by_handle_init(param_handle_table);
9197 // temporal flush variables
9198 // ASSUME that structured values won't be temporal.
9199 if(uses_temporal_flush){
9200 ret += "//\t\tInitialize temporal flush variables.\n";
9201 for(g=0;g<gb_tbl.size();g++){
9202 data_type *gdt = gb_tbl.get_data_type(g);
9203 if(gdt->is_temporal()){
9204 literal_t gl(gdt->type_indicator());
9205 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9207 sprintf(tmpstr,"\tlast_flushed_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9211 ret += "\tneeds_temporal_flush = false;\n";
9214 // Init temporal attributes referenced in select list
9215 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
9219 //-------------------
9220 // Functor destructor
9221 ret += "//\t\tFunctor destructor.\n";
9222 ret += "~"+this->generate_functor_name()+"(){\n";
9224 // clean up buffer type complex literals
9225 ret += gen_complex_lit_dtr(complex_literals);
9227 // Deregister the pass-by-handle parameters
9228 ret += "/* register and de-register the pass-by-handle parameters */\n";
9229 ret += gen_pass_by_handle_dtr(param_handle_table);
9231 // clean up partial function results.
9232 ret += "/* clean up partial function storage */\n";
9233 ret += gen_partial_fcn_dtr(partial_fcns);
9235 // Destroy the parameters, if any need to be destroyed
9236 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9241 //-------------------
9242 // Parameter manipulation routines
9243 ret += generate_load_param_block(this->generate_functor_name(),
9244 this->param_tbl,param_handle_table);
9245 ret += generate_delete_param_block(this->generate_functor_name(),
9246 this->param_tbl,param_handle_table);
9248 //-------------------
9249 // Register new parameter block
9251 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9252 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9253 ret += "\treturn this->load_params_"+this->generate_functor_name()+
9257 // -----------------------------------
9258 // group-by pattern support
9261 "int n_groupby_patterns(){\n"
9262 " return "+int_to_string(gb_tbl.gb_patterns.size())+";\n"
9264 "bool *get_pattern(int p){\n"
9265 " return "+this->generate_functor_name()+"_gb_patterns[p];\n"
9272 //-------------------
9273 // the create_group method.
9274 // This method creates a group in a buffer passed in
9275 // (to allow for creation on the stack).
9276 // There are also a couple of side effects:
9277 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
9278 // 2) determine if a temporal flush is required.
9280 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
9281 // Variables for execution of the function.
9282 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9284 if(partial_fcns.size()>0){ // partial fcn access failure
9285 ret += "\tgs_retval_t retval = 0;\n";
9289 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
9290 "_groupdef *) buffer;\n";
9292 // Start by cleaning up partial function results
9293 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
9294 set<int> w_pfcns; // partial fcns in where clause
9295 for(w=0;w<where.size();++w)
9296 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
9298 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
9299 for(g=0;g<gb_tbl.size();g++){
9300 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
9302 for(a=0;a<aggr_tbl.size();a++){
9303 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
9305 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
9306 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
9307 // ret += gen_partial_fcn_dtr(partial_fcns);
9310 ret += gen_temp_tuple_check(this->node_name, 0);
9311 col_id_set found_cids; // colrefs unpacked thus far.
9312 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
9315 // Save temporal group-by variables
9318 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
9320 for(g=0;g<gb_tbl.size();g++){
9322 data_type *gdt = gb_tbl.get_data_type(g);
9324 if(gdt->is_temporal()){
9325 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9326 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9334 // Compare the temporal GB vars with the stored ones,
9335 // set flush indicator and update stored GB vars if there is any change.
9337 ret += "// hfta_disorder = "+int_to_string(hfta_disorder)+"\n";
9338 if(hfta_disorder < 2){
9339 if(uses_temporal_flush){
9341 bool first_one = true;
9342 for(g=0;g<gb_tbl.size();g++){
9343 data_type *gdt = gb_tbl.get_data_type(g);
9345 if(gdt->is_temporal()){
9346 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9347 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9348 if(first_one){first_one = false;} else {ret += ") && (";}
9349 ret += generate_equality_test(lhs_op, rhs_op, gdt);
9353 for(g=0;g<gb_tbl.size();g++){
9354 data_type *gdt = gb_tbl.get_data_type(g);
9355 if(gdt->is_temporal()){
9356 if(gdt->is_buffer_type()){
9357 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9359 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9361 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9366 ret += "\t\tneeds_temporal_flush=true;\n";
9367 ret += "\t\t}else{\n"
9368 "\t\t\tneeds_temporal_flush=false;\n"
9372 ret+= "\tif(temp_tuple_received && !( (";
9373 bool first_one = true;
9374 for(g=0;g<gb_tbl.size();g++){
9375 data_type *gdt = gb_tbl.get_data_type(g);
9377 if(gdt->is_temporal()){
9378 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9379 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9380 if(first_one){first_one = false;} else {ret += ") && (";}
9381 ret += generate_equality_test(lhs_op, rhs_op, gdt);
9387 for(g=0;g<gb_tbl.size();g++){
9388 data_type *gdt = gb_tbl.get_data_type(g);
9389 if(gdt->is_temporal()){
9391 if(gdt->is_buffer_type()){
9392 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9394 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9396 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9402 data_type *tgdt = gb_tbl.get_data_type(temporal_g);
9403 literal_t gl(tgdt->type_indicator());
9404 ret += "\t\tif(last_flushed_gb"+int_to_string(temporal_g)+">"+gl.to_hfta_C_code("")+")\n";
9405 ret += "\t\t\tneeds_temporal_flush=true;\n";
9406 ret += "\t\t}else{\n"
9407 "\t\t\tneeds_temporal_flush=false;\n"
9412 // For temporal status tuple we don't need to do anything else
9413 ret += "\tif (temp_tuple_received) return NULL;\n\n";
9415 for(w=0;w<where.size();++w){
9416 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
9418 // Find the set of variables accessed in this CNF elem,
9419 // but in no previous element.
9420 col_id_set new_cids;
9421 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
9423 // Unpack these values.
9424 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9425 // Find partial fcns ref'd in this cnf element
9427 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
9428 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
9430 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
9431 +") ) return(NULL);\n";
9434 // The partial functions ref'd in the group-by var and aggregate
9435 // definitions must also be evaluated. If one returns false,
9436 // then implicitly the predicate is false.
9437 set<int>::iterator pfsi;
9439 if(ag_gb_pfcns.size() > 0)
9440 ret += "//\t\tUnpack remaining partial fcns.\n";
9441 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
9442 found_cids, segen_gb_tbl, "NULL", needs_xform);
9444 // Unpack the group-by variables
9446 for(g=0;g<gb_tbl.size();g++){
9447 data_type *gdt = gb_tbl.get_data_type(g);
9449 if(!gdt->is_temporal()){
9450 // Find the new fields ref'd by this GBvar def.
9451 col_id_set new_cids;
9452 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
9453 // Unpack these values.
9454 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9456 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9457 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9459 // There seems to be no difference between the two
9460 // branches of the IF statement.
9461 data_type *gdt = gb_tbl.get_data_type(g);
9462 if(gdt->is_buffer_type()){
9463 // Create temporary copy.
9464 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9465 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9467 scalarexp_t *gse = gb_tbl.get_def(g);
9468 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9469 g,generate_se_code(gse,schema).c_str());
9478 ret+= "\treturn gbval;\n";
9481 //--------------------------------------------------------
9482 // Create and initialize an aggregate object
9484 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
9485 // Variables for execution of the function.
9486 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9489 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
9490 "_aggrdef *)buffer;\n";
9492 for(a=0;a<aggr_tbl.size();a++){
9493 if(aggr_tbl.is_builtin(a)){
9494 // Create temporaries for buffer return values
9495 data_type *adt = aggr_tbl.get_data_type(a);
9496 if(adt->is_buffer_type()){
9497 sprintf(tmpstr,"aggr_tmp_%d", a);
9498 ret+=adt->make_host_cvar(tmpstr)+";\n";
9503 // Unpack all remaining attributes
9504 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
9505 for(a=0;a<aggr_tbl.size();a++){
9506 sprintf(tmpstr,"aggval->aggr_var%d",a);
9507 string assignto_var = tmpstr;
9508 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
9511 ret += "\treturn aggval;\n";
9514 //--------------------------------------------------------
9515 // update an aggregate object
9517 ret += "void update_aggregate(host_tuple &tup0, "
9518 +generate_functor_name()+"_groupdef *gbval, "+
9519 generate_functor_name()+"_aggrdef *aggval){\n";
9520 // Variables for execution of the function.
9521 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9523 // use of temporaries depends on the aggregate,
9524 // generate them in generate_aggr_update
9527 // Unpack all remaining attributes
9528 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
9529 for(a=0;a<aggr_tbl.size();a++){
9530 sprintf(tmpstr,"aggval->aggr_var%d",a);
9531 string varname = tmpstr;
9532 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
9535 ret += "\treturn;\n";
9538 //---------------------------------------------------
9541 ret += "\tbool flush_needed(){\n";
9542 if(uses_temporal_flush){
9543 ret += "\t\treturn needs_temporal_flush;\n";
9545 ret += "\t\treturn false;\n";
9549 //---------------------------------------------------
9550 // create output tuple
9551 // Unpack the partial functions ref'd in the where clause,
9552 // select clause. Evaluate the where clause.
9553 // Finally, pack the tuple.
9555 // I need to use special code generation here,
9556 // so I'll leave it in longhand.
9558 ret += "host_tuple create_output_tuple("
9559 +generate_functor_name()+"_groupdef *gbval, "+
9560 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
9562 ret += "\thost_tuple tup;\n";
9563 ret += "\tfailed = false;\n";
9564 ret += "\tgs_retval_t retval = 0;\n";
9566 string gbvar = "gbval->gb_var";
9567 string aggvar = "aggval->";
9569 // Create cached temporaries for UDAF return values.
9570 for(a=0;a<aggr_tbl.size();a++){
9571 if(! aggr_tbl.is_builtin(a)){
9572 int afcn_id = aggr_tbl.get_fcn_id(a);
9573 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9574 sprintf(tmpstr,"udaf_ret_%d", a);
9575 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
9580 // First, get the return values from the UDAFS
9581 for(a=0;a<aggr_tbl.size();a++){
9582 if(! aggr_tbl.is_builtin(a)){
9583 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
9584 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9585 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
9589 set<int> hv_sl_pfcns;
9590 for(w=0;w<having.size();w++){
9591 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
9593 for(s=0;s<select_list.size();s++){
9594 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
9597 // clean up the partial fcn results from any previous execution
9598 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
9601 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
9602 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
9603 ret += "\tif(retval){ failed = true; return(tup);}\n";
9606 // Evalaute the HAVING clause
9607 // TODO: this seems to have a ++ operator rather than a + operator.
9608 for(w=0;w<having.size();++w){
9609 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
9612 // Now, compute the size of the tuple.
9614 // Unpack any BUFFER type selections into temporaries
9615 // so that I can compute their size and not have
9616 // to recompute their value during tuple packing.
9617 // I can use regular assignment here because
9618 // these temporaries are non-persistent.
9619 // TODO: should I be using the selvar generation routine?
9621 ret += "//\t\tCompute the size of the tuple.\n";
9622 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
9623 for(s=0;s<select_list.size();s++){
9624 scalarexp_t *se = select_list[s]->se;
9625 data_type *sdt = se->get_data_type();
9626 if(sdt->is_buffer_type() &&
9627 !( (se->get_operator_type() == SE_COLREF) ||
9628 (se->get_operator_type() == SE_AGGR_STAR) ||
9629 (se->get_operator_type() == SE_AGGR_SE) ||
9630 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9631 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9633 sprintf(tmpstr,"selvar_%d",s);
9634 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
9635 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
9639 // The size of the tuple is the size of the tuple struct plus the
9640 // size of the buffers to be copied in.
9642 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
9643 for(s=0;s<select_list.size();s++){
9644 // if(s>0) ret += "+";
9645 scalarexp_t *se = select_list[s]->se;
9646 data_type *sdt = select_list[s]->se->get_data_type();
9647 if(sdt->is_buffer_type()){
9648 if(!( (se->get_operator_type() == SE_COLREF) ||
9649 (se->get_operator_type() == SE_AGGR_STAR) ||
9650 (se->get_operator_type() == SE_AGGR_SE) ||
9651 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9652 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9654 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
9657 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9664 // Allocate tuple data block.
9665 ret += "//\t\tCreate the tuple block.\n";
9666 ret += "\ttup.data = malloc(tup.tuple_size);\n";
9667 ret += "\ttup.heap_resident = true;\n";
9669 // Mark tuple as regular
9670 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
9672 // ret += "\ttup.channel = 0;\n";
9673 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
9674 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
9677 // (Here, offsets are hard-wired. is this a problem?)
9679 ret += "//\t\tPack the fields into the tuple.\n";
9680 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
9681 for(s=0;s<select_list.size();s++){
9682 scalarexp_t *se = select_list[s]->se;
9683 data_type *sdt = se->get_data_type();
9684 if(sdt->is_buffer_type()){
9685 if(!( (se->get_operator_type() == SE_COLREF) ||
9686 (se->get_operator_type() == SE_AGGR_STAR) ||
9687 (se->get_operator_type() == SE_AGGR_SE) ||
9688 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9689 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9691 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
9693 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
9696 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9698 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9702 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9704 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
9709 // Destroy string temporaries
9710 ret += gen_buffer_selvars_dtr(select_list);
9711 // Destroy string return vals of UDAFs
9712 for(a=0;a<aggr_tbl.size();a++){
9713 if(! aggr_tbl.is_builtin(a)){
9714 int afcn_id = aggr_tbl.get_fcn_id(a);
9715 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9716 if(adt->is_buffer_type()){
9717 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
9718 adt->get_hfta_buffer_destroy().c_str(), a );
9725 ret += "\treturn tup;\n";
9729 //-------------------------------------------------------------------
9730 // Temporal update functions
9732 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
9734 for(g=0;g<gb_tbl.size();g++){
9735 data_type *gdt = gb_tbl.get_data_type(g);
9736 if(gdt->is_temporal()){
9741 ret += tgdt->get_host_cvar_type()+" get_last_flushed_gb(){\n";
9742 ret+="\treturn last_flushed_gb"+int_to_string(g)+";\n";
9744 ret += tgdt->get_host_cvar_type()+" get_last_gb(){\n";
9745 ret+="\treturn last_gb"+int_to_string(g)+";\n";
9751 // create a temp status tuple
9752 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
9754 ret += gen_init_temp_status_tuple(this->get_node_name());
9757 // (Here, offsets are hard-wired. is this a problem?)
9759 ret += "//\t\tPack the fields into the tuple.\n";
9760 for(s=0;s<select_list.size();s++){
9761 data_type *sdt = select_list[s]->se->get_data_type();
9762 if(sdt->is_temporal()){
9763 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9766 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
9773 ret += "\treturn 0;\n";
9774 ret += "};};\n\n\n";
9777 //----------------------------------------------------------
9778 // The hash function
9780 ret += "struct "+generate_functor_name()+"_hash_func{\n";
9781 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
9782 "_groupdef *grp) const{\n";
9783 ret += "\t\treturn( (";
9784 for(g=0;g<gb_tbl.size();g++){
9786 data_type *gdt = gb_tbl.get_data_type(g);
9787 if(gdt->use_hashfunc()){
9788 if(gdt->is_buffer_type())
9789 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9791 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9793 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
9797 ret += ") >> 32);\n";
9801 //----------------------------------------------------------
9802 // The comparison function
9804 ret += "struct "+generate_functor_name()+"_equal_func{\n";
9805 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
9806 generate_functor_name()+"_groupdef *grp2) const{\n";
9807 ret += "\t\treturn( (";
9809 for(g=0;g<gb_tbl.size();g++){
9810 if(g>0) ret += ") && (";
9811 data_type *gdt = gb_tbl.get_data_type(g);
9812 if(gdt->complex_comparison(gdt)){
9813 if(gdt->is_buffer_type())
9814 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
9815 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9817 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
9818 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9820 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
9832 string sgah_qpn::generate_operator(int i, string params){
9834 if(hfta_disorder < 2){
9836 " groupby_operator<" +
9837 generate_functor_name()+","+
9838 generate_functor_name() + "_groupdef, " +
9839 generate_functor_name() + "_aggrdef, " +
9840 generate_functor_name()+"_hash_func, "+
9841 generate_functor_name()+"_equal_func "
9842 "> *op"+int_to_string(i)+" = new groupby_operator<"+
9843 generate_functor_name()+","+
9844 generate_functor_name() + "_groupdef, " +
9845 generate_functor_name() + "_aggrdef, " +
9846 generate_functor_name()+"_hash_func, "+
9847 generate_functor_name()+"_equal_func "
9848 ">("+params+", \"" + get_node_name() +
9853 for(int g=0;g<gb_tbl.size();g++){
9854 data_type *gdt = gb_tbl.get_data_type(g);
9855 if(gdt->is_temporal()){
9862 " groupby_operator_oop<" +
9863 generate_functor_name()+","+
9864 generate_functor_name() + "_groupdef, " +
9865 generate_functor_name() + "_aggrdef, " +
9866 generate_functor_name()+"_hash_func, "+
9867 generate_functor_name()+"_equal_func, " +
9868 tgdt->get_host_cvar_type() +
9869 "> *op"+int_to_string(i)+" = new groupby_operator_oop<"+
9870 generate_functor_name()+","+
9871 generate_functor_name() + "_groupdef, " +
9872 generate_functor_name() + "_aggrdef, " +
9873 generate_functor_name()+"_hash_func, "+
9874 generate_functor_name()+"_equal_func, " +
9875 tgdt->get_host_cvar_type() +
9876 ">("+params+", \"" + get_node_name() +
9882 ////////////////////////////////////////////////
9885 ////////////////////////////////////////////
9887 string mrg_qpn::generate_functor_name(){
9888 return("mrg_functor_" + normalize_name(this->get_node_name()));
9891 string mrg_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9896 if(fm.size() != mvars.size()){
9897 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=%lu, mvars.size=%lu\n",fm.size(),mvars.size());
9901 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=mvars.size=%lu\n",fm.size());
9906 // Initialize generate utility globals
9907 segen_gb_tbl = NULL;
9909 string ret = "class " + this->generate_functor_name() + "{\n";
9911 // Private variable:
9912 // 1) Vars for unpacked attrs.
9913 // 2) offsets ofthe unpakced attrs
9914 // 3) last_posted_timestamp
9917 schema->get_type_name(mvars[0]->get_schema_ref(), mvars[0]->get_field()),
9918 schema->get_modifier_list(mvars[0]->get_schema_ref(), mvars[0]->get_field())
9921 schema->get_type_name(mvars[1]->get_schema_ref(), mvars[1]->get_field()),
9922 schema->get_modifier_list(mvars[1]->get_schema_ref(), mvars[1]->get_field())
9925 ret += "private:\n";
9927 // var to save the schema handle
9928 ret += "\tint schema_handle0;\n";
9930 // generate the declaration of all the variables related to
9931 // temp tuples generation
9932 ret += gen_decl_temp_vars();
9934 // unpacked attribute storage, offsets
9935 ret += "//\t\tstorage and offsets of accessed fields.\n";
9936 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
9938 sprintf(tmpstr,"unpack_var_%s_%d", mvars[0]->get_field().c_str(), tblref);
9939 ret+="\t"+dta.make_host_cvar(tmpstr)+";\n";
9940 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[0]->get_field().c_str(), tblref);
9943 sprintf(tmpstr,"unpack_var_%s_%d", mvars[1]->get_field().c_str(), tblref);
9944 ret+="\t"+dtb.make_host_cvar(tmpstr)+";\n";
9945 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[1]->get_field().c_str(), tblref);
9948 ret += "//\t\tRemember the last posted timestamp.\n";
9949 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_0")+";\n";
9950 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_1")+";\n";
9951 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9952 ret+="\t"+dta.make_host_cvar("slack")+";\n";
9953 // ret += "\t bool first_execution_0, first_execution_1;\n";
9955 // variables to hold parameters.
9956 ret += "//\tfor query parameters\n";
9957 ret += generate_param_vars(param_tbl);
9960 //-------------------
9961 // The functor constructor
9962 // pass in a schema handle (e.g. for the 1st input stream),
9963 // use it to determine how to unpack the merge variable.
9964 // ASSUME that both streams have the same layout,
9965 // just duplicate it.
9968 ret += "//\t\tFunctor constructor.\n";
9969 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9971 // var to save the schema handle
9972 ret += "\tthis->schema_handle0 = schema_handle0;\n";
9973 ret += "\ttuple_metadata_offset0=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9974 ret += "\ttuple_metadata_offset1=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9976 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9978 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", mvars[0]->get_field().c_str(), 0,mvars[0]->get_field().c_str());
9980 sprintf(tmpstr,"\tunpack_offset_%s_%d = unpack_offset_%s_%d;\n",mvars[1]->get_field().c_str(), 1,mvars[0]->get_field().c_str(), 0);
9982 // ret+="\tfirst_execution_0 = first_execution_1 = true;\n";
9984 ret+="\tslack = "+generate_se_code(slack,schema)+";\n";
9986 ret+="\tslack = 0;\n";
9988 // Initialize internal state
9989 ret += "\ttemp_tuple_received = false;\n";
9991 // Init last timestamp values to minimum value for their type
9992 if (dta.is_increasing())
9993 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_min_literal() + ";\n";
9995 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_max_literal() + ";\n";
10000 ret += "//\t\tFunctor destructor.\n";
10001 ret += "~"+this->generate_functor_name()+"(){\n";
10003 // Destroy the parameters, if any need to be destroyed
10004 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10009 // no pass-by-handle params.
10010 vector<handle_param_tbl_entry *> param_handle_table;
10012 // Parameter manipulation routines
10013 ret += generate_load_param_block(this->generate_functor_name(),
10014 this->param_tbl,param_handle_table);
10015 ret += generate_delete_param_block(this->generate_functor_name(),
10016 this->param_tbl,param_handle_table);
10018 // Register new parameter block
10020 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10021 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10022 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10027 // -----------------------------------
10030 string unpack_fcna;
10031 if(needs_xform[0]) unpack_fcna = dta.get_hfta_unpack_fcn();
10032 else unpack_fcna = dta.get_hfta_unpack_fcn_noxf();
10033 string unpack_fcnb;
10034 if(needs_xform[1]) unpack_fcnb = dtb.get_hfta_unpack_fcn();
10035 else unpack_fcnb = dtb.get_hfta_unpack_fcn_noxf();
10038 ret+="\tint compare(const host_tuple& tup1, const host_tuple& tup2) const{ \n";
10039 ret+="\t"+dta.make_host_cvar("timestamp1")+";\n";
10040 ret+="\t"+dta.make_host_cvar("timestamp2")+";\n";
10041 ret+="\tgs_int32_t problem;\n";
10042 ret+="\tif (tup1.channel == 0) {\n";
10043 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10045 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10048 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 1);
10050 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 0);
10054 " if (timestamp1 > timestamp2+slack)\n"
10056 " else if (timestamp1 < timestamp2)\n"
10065 " void get_timestamp(const host_tuple& tup0){\n"
10066 " gs_int32_t problem;\n"
10068 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10077 // Compare to temp status.
10079 " int compare_with_temp_status(int channel) {\n"
10080 " // check if tuple is temp status tuple\n"
10082 " if (channel == 0) {\n"
10083 //" if(first_execution_0) return 1;\n"
10084 " if (timestamp == last_posted_timestamp_0)\n"
10086 " else if (timestamp < last_posted_timestamp_0)\n"
10091 //" if(first_execution_1) return 1;\n"
10092 " if (timestamp == last_posted_timestamp_1)\n"
10094 " else if (timestamp < last_posted_timestamp_1)\n"
10103 " int compare_stored_with_temp_status(const host_tuple& tup0, int channel)/* const*/ {\n"
10105 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10106 ret+="\tgs_int32_t problem;\n";
10108 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10110 ret+="\tif (channel == 0) {\n";
10111 // ret+="\tif(first_execution_0) return 1;\n";
10113 " if (l_timestamp == last_posted_timestamp_0)\n"
10115 " else if (l_timestamp < last_posted_timestamp_0)\n"
10120 // ret+="\tif(first_execution_1) return 1;\n";
10122 " if (l_timestamp == last_posted_timestamp_1)\n"
10124 " else if (l_timestamp < last_posted_timestamp_1)\n"
10132 // update temp status.
10134 " int update_temp_status(const host_tuple& tup) {\n"
10135 " if (tup.channel == 0) {\n"
10136 " last_posted_timestamp_0=timestamp;\n"
10137 //" first_execution_0 = false;\n"
10139 " last_posted_timestamp_1=timestamp;\n"
10140 //" first_execution_1 = false;\n"
10146 " int update_stored_temp_status(const host_tuple& tup, int channel) {\n"
10148 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10149 ret+="\tgs_int32_t problem;\n";
10150 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10153 " if (tup.channel == 0) {\n"
10154 " last_posted_timestamp_0=l_timestamp;\n"
10155 //" first_execution_0 = false;\n"
10157 " last_posted_timestamp_1=l_timestamp;\n"
10158 //" first_execution_1 = false;\n"
10164 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10165 ret+="\tgs_int32_t problem;\n";
10166 ret+="\tif (tup.channel == 0) {\n";
10167 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10170 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10173 ret+="\tif (tup.channel == 0) {\n";
10174 ret+="\tlast_posted_timestamp_0=timestamp;\n";
10175 ret +="\tfirst_execution_0 = false;\n";
10177 ret+="\tlast_posted_timestamp_1=timestamp;\n";
10178 ret +="\tfirst_execution_1 = false;\n";
10185 // update temp status modulo slack.
10186 ret+="\tint update_temp_status_by_slack(const host_tuple& tup, int channel) {\n";
10188 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10189 ret+="\tgs_int32_t problem;\n";
10190 ret+="\tif (tup.channel == 0) {\n";
10191 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10194 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10198 " if (channel == 0) {\n"
10199 " if(first_execution_0){\n"
10200 " last_posted_timestamp_0=timestamp - slack;\n"
10201 " first_execution_0 = false;\n"
10203 " if(last_posted_timestamp_0 < timestamp-slack)\n"
10204 " last_posted_timestamp_0 = timestamp-slack;\n"
10207 " if(first_execution_1){\n"
10208 " last_posted_timestamp_1=timestamp - slack;\n"
10209 " first_execution_1 = false;\n"
10211 " if(last_posted_timestamp_1 < timestamp-slack)\n"
10212 " last_posted_timestamp_1 = timestamp-slack;\n"
10226 "bool temp_status_received(const host_tuple& tup0){\n"
10227 " return ftaschema_is_temporal_tuple_offset(tuple_metadata_offset0, tup0.data);\n"
10230 //"bool temp_status_received(){return temp_tuple_received;};\n\n";
10233 // create a temp status tuple
10234 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
10236 ret += gen_init_temp_status_tuple(this->get_node_name());
10239 ret += "//\t\tPack the fields into the tuple.\n";
10241 string fld_name = mvars[0]->get_field();
10242 int idx = table_layout->get_field_idx(fld_name);
10243 field_entry* fld = table_layout->get_field(idx);
10244 data_type dt(fld->get_type());
10246 // if (needs_xform[0] && needs_xform[1] && dt.needs_hn_translation())
10247 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s((last_posted_timestamp_0 < last_posted_timestamp_1) ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx, dt.hton_translation().c_str());
10249 sprintf(tmpstr,"\ttuple->tuple_var%d = (last_posted_timestamp_0 < last_posted_timestamp_1 ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx);
10253 ret += "\treturn 0;\n";
10256 // Transform tuple (before output)
10259 ret += "void xform_tuple(host_tuple &tup){\n";
10260 if((needs_xform[0] && !needs_xform[1]) || (needs_xform[1] && !needs_xform[0])){
10261 ret += "\tstruct "+generate_tuple_name(this->get_node_name())+" *tuple = ("+
10262 generate_tuple_name(this->get_node_name())+" *)(tup.data);\n";
10264 vector<field_entry *> flds = table_layout->get_fields();
10266 ret+="\tif(tup.channel == 0){\n";
10267 if(needs_xform[0] && !needs_xform[1]){
10269 for(f=0;f<flds.size();f++){
10271 data_type dt(flds[f]->get_type());
10272 if(dt.get_type() == v_str_t){
10273 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10275 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10277 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10280 if(dt.needs_hn_translation()){
10281 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10282 // f, dt.hton_translation().c_str(), f);
10288 ret += "\t\treturn;\n";
10290 ret.append("\t}\n");
10293 ret+="\tif(tup.channel == 1){\n";
10294 if(needs_xform[1] && !needs_xform[0]){
10296 for(f=0;f<flds.size();f++){
10298 data_type dt(flds[f]->get_type());
10299 if(dt.get_type() == v_str_t){
10300 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10302 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10304 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10307 if(dt.needs_hn_translation()){
10308 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10309 // f, dt.hton_translation().c_str(), f);
10315 ret += "\t\treturn;\n";
10317 ret.append("\t}\n");
10320 ret.append("};\n\n");
10322 // print_warnings() : tell the functor if the user wants to print warnings.
10323 ret += "bool print_warnings(){\n";
10324 if(definitions.count("print_warnings") && (
10325 definitions["print_warnings"] == "yes" ||
10326 definitions["print_warnings"] == "Yes" ||
10327 definitions["print_warnings"] == "YES" )) {
10328 ret += "return true;\n";
10330 ret += "return false;\n";
10332 ret.append("};\n\n");
10335 // Done with methods.
10342 string mrg_qpn::generate_operator(int i, string params){
10346 " merge_operator<" +
10347 generate_functor_name()+
10348 "> *op"+int_to_string(i)+" = new merge_operator<"+
10349 generate_functor_name()+
10350 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10354 " merge_operator_oop<" +
10355 generate_functor_name()+
10356 "> *op"+int_to_string(i)+" = new merge_operator_oop<"+
10357 generate_functor_name()+
10358 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10362 ////////////////////////////////////////////////
10363 /// WATCHLIST_TBL operator
10364 /// WATCHLIST_TBL functor
10365 ////////////////////////////////////////////
10367 string watch_tbl_qpn::generate_functor_name(){
10368 return("watch_tbl_functor_" + normalize_name(this->get_node_name()));
10371 string watch_tbl_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10373 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10376 string watch_tbl_qpn::generate_operator(int i, string params){
10377 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10380 /////////////////////////////////////////////////////////
10381 ////// JOIN_EQ_HASH functor
10384 string join_eq_hash_qpn::generate_functor_name(){
10385 return("join_eq_hash_functor_" + normalize_name(this->get_node_name()));
10388 string join_eq_hash_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10390 vector<data_type *> hashkey_dt; // data types in the hash key
10391 vector<data_type *> temporal_dt; // data types in the temporal key
10392 map<string,scalarexp_t *> l_equiv, r_equiv; // field equivalences
10393 set<int> pfcn_refs;
10394 col_id_set new_cids, local_cids;
10396 //--------------------------------
10399 string plus_op = "+";
10401 //--------------------------------
10402 // key definition class
10403 string ret = "class " + generate_functor_name() + "_keydef{\n";
10404 ret += "public:\n";
10405 // Collect attributes from hash join predicates.
10406 // ASSUME equality predicate.
10407 // Use the upwardly compatible data type
10408 // (infer from '+' operator if possible, else use left type)
10409 for(p=0;p<this->hash_eq.size();++p){
10410 scalarexp_t *lse = hash_eq[p]->pr->get_left_se();
10411 scalarexp_t *rse = hash_eq[p]->pr->get_right_se();
10412 data_type *hdt = new data_type(
10413 lse->get_data_type(), rse->get_data_type(), plus_op );
10414 if(hdt->get_type() == undefined_t){
10415 hashkey_dt.push_back(lse->get_data_type()->duplicate());
10418 hashkey_dt.push_back(hdt);
10420 sprintf(tmpstr,"hashkey_var%d",p);
10421 ret+="\t"+hashkey_dt[p]->make_host_cvar(tmpstr)+";\n";
10423 // find equivalences
10424 // NOTE: this code needs to be synched with the temporality
10425 // checking done at join_eq_hash_qpn::get_fields
10426 if(lse->get_operator_type()==SE_COLREF){
10427 l_equiv[lse->get_colref()->get_field()] = rse;
10429 if(rse->get_operator_type()==SE_COLREF){
10430 r_equiv[rse->get_colref()->get_field()] = lse;
10433 ret += "\tbool touched;\n";
10436 ret += "\t"+generate_functor_name() + "_keydef(){touched=false;};\n";
10438 ret += "\t~"+ generate_functor_name() + "_keydef(){\n";
10439 for(p=0;p<hashkey_dt.size();p++){
10440 if(hashkey_dt[p]->is_buffer_type()){
10441 sprintf(tmpstr,"\t\t%s(&hashkey_var%d);\n",
10442 hashkey_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10447 ret+="\tvoid touch(){touched = true;};\n";
10448 ret+="\tbool is_touched(){return touched;};\n";
10452 //--------------------------------
10453 // temporal equality definition class
10454 ret += "class " + generate_functor_name() + "_tempeqdef{\n";
10455 ret += "public:\n";
10456 // Collect attributes from hash join predicates.
10457 // ASSUME equality predicate.
10458 // Use the upwardly compatible date type
10459 // (infer from '+' operator if possible, else use left type)
10460 for(p=0;p<this->temporal_eq.size();++p){
10461 scalarexp_t *lse = temporal_eq[p]->pr->get_left_se();
10462 scalarexp_t *rse = temporal_eq[p]->pr->get_right_se();
10463 data_type *hdt = new data_type(
10464 lse->get_data_type(), rse->get_data_type(), plus_op );
10465 if(hdt->get_type() == undefined_t){
10466 temporal_dt.push_back(hash_eq[p]->pr->get_left_se()->get_data_type()->duplicate());
10469 temporal_dt.push_back(hdt);
10471 sprintf(tmpstr,"tempeq_var%d",p);
10472 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
10473 // find equivalences
10474 if(lse->get_operator_type()==SE_COLREF){
10475 l_equiv[lse->get_colref()->get_field()] = rse;
10477 if(rse->get_operator_type()==SE_COLREF){
10478 r_equiv[rse->get_colref()->get_field()] = lse;
10483 ret += "\t"+generate_functor_name() + "_tempeqdef(){};\n";
10485 ret += "\t~"+ generate_functor_name() + "_tempeqdef(){\n";
10486 for(p=0;p<temporal_dt.size();p++){
10487 if(temporal_dt[p]->is_buffer_type()){
10488 sprintf(tmpstr,"\t\t%s(&tempeq_var%d);\n",
10489 temporal_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10497 //--------------------------------
10498 // temporal eq, hash join functor class
10499 ret += "class " + this->generate_functor_name() + "{\n";
10501 // Find variables referenced in this query node.
10503 col_id_set cid_set;
10504 col_id_set::iterator csi;
10506 for(p=0;p<where.size();++p)
10507 gather_pr_col_ids(where[p]->pr,cid_set,NULL);
10508 for(s=0;s<select_list.size();s++)
10509 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
10511 // Private variables : store the state of the functor.
10512 // 1) variables for unpacked attributes
10513 // 2) offsets of the upacked attributes
10514 // 3) storage of partial functions
10515 // 4) storage of complex literals (i.e., require a constructor)
10517 ret += "private:\n";
10519 // var to save the schema handles
10520 ret += "\tint schema_handle0;\n";
10521 ret += "\tint schema_handle1;\n";
10523 // generate the declaration of all the variables related to
10524 // temp tuples generation
10525 ret += gen_decl_temp_vars();
10526 // tuple metadata offsets
10527 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10529 // unpacked attribute storage, offsets
10530 ret += "//\t\tstorage and offsets of accessed fields.\n";
10531 ret += generate_access_vars(cid_set, schema);
10534 // Variables to store results of partial functions.
10535 // WARNING find_partial_functions modifies the SE
10536 // (it marks the partial function id).
10537 ret += "//\t\tParital function result storage\n";
10538 vector<scalarexp_t *> partial_fcns;
10539 vector<int> fcn_ref_cnt;
10540 vector<bool> is_partial_fcn;
10541 for(s=0;s<select_list.size();s++){
10542 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
10544 for(p=0;p<where.size();p++){
10545 find_partial_fcns_pr(where[p]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
10547 if(partial_fcns.size()>0){
10548 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
10549 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10552 // Complex literals (i.e., they need constructors)
10553 ret += "//\t\tComplex literal storage.\n";
10554 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10555 ret += generate_complex_lit_vars(complex_literals);
10556 // We need the following to handle strings in outer joins.
10557 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10558 ret += "\tstruct vstring EmptyString;\n";
10559 ret += "\tstruct hfta_ipv6_str EmptyIp6;\n";
10561 // Pass-by-handle parameters
10562 ret += "//\t\tPass-by-handle storage.\n";
10563 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10564 ret += generate_pass_by_handle_vars(param_handle_table);
10567 // variables to hold parameters.
10568 ret += "//\tfor query parameters\n";
10569 ret += generate_param_vars(param_tbl);
10572 ret += "\npublic:\n";
10573 //-------------------
10574 // The functor constructor
10575 // pass in the schema handle.
10576 // 1) make assignments to the unpack offset variables
10577 // 2) initialize the complex literals
10579 ret += "//\t\tFunctor constructor.\n";
10580 ret += this->generate_functor_name()+"(int schema_handle0, int schema_handle1){\n";
10582 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
10583 ret += "\t\tthis->schema_handle1 = schema_handle1;\n";
10584 // metadata offsets
10585 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10586 ret += "\ttuple_metadata_offset1 = ftaschema_get_tuple_metadata_offset(schema_handle1);\n";
10589 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10590 ret += gen_access_var_init(cid_set);
10592 // complex literals
10593 ret += "//\t\tInitialize complex literals.\n";
10594 ret += gen_complex_lit_init(complex_literals);
10595 // Initialize EmptyString to the ... empty string
10596 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10597 literal_t mtstr_lit("");
10598 ret += "\t" + mtstr_lit.to_hfta_C_code("&EmptyString")+";\n";
10599 literal_t mip6_lit("0:0:0:0:0:0:0:0",LITERAL_IPV6);
10600 ret += "\t" + mip6_lit.to_hfta_C_code("&EmptyIp6")+";\n";
10602 // Initialize partial function results so they can be safely GC'd
10603 ret += gen_partial_fcn_init(partial_fcns);
10605 // Initialize non-query-parameter parameter handles
10606 ret += gen_pass_by_handle_init(param_handle_table);
10608 // Init temporal attributes referenced in select list
10609 ret += gen_init_temp_vars(schema, select_list, NULL);
10616 //-------------------
10617 // Functor destructor
10618 ret += "//\t\tFunctor destructor.\n";
10619 ret += "~"+this->generate_functor_name()+"(){\n";
10621 // clean up buffer type complex literals
10622 ret += gen_complex_lit_dtr(complex_literals);
10624 // Deregister the pass-by-handle parameters
10625 ret += "/* register and de-register the pass-by-handle parameters */\n";
10626 ret += gen_pass_by_handle_dtr(param_handle_table);
10628 // clean up partial function results.
10629 ret += "/* clean up partial function storage */\n";
10630 ret += gen_partial_fcn_dtr(partial_fcns);
10632 // Destroy the parameters, if any need to be destroyed
10633 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10638 //-------------------
10639 // Parameter manipulation routines
10640 ret += generate_load_param_block(this->generate_functor_name(),
10641 this->param_tbl,param_handle_table);
10642 ret += generate_delete_param_block(this->generate_functor_name(),
10643 this->param_tbl,param_handle_table);
10645 //-------------------
10646 // Register new parameter block
10648 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10649 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10650 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10655 //-------------------
10656 // The create_key method.
10657 // Perform heap allocation.
10658 // ASSUME : the LHS of the preds reference channel 0 attributes
10659 // NOTE : it may fail if a partial function fails.
10661 ret += this->generate_functor_name()+"_keydef *create_key(host_tuple &tup, bool &failed){\n";
10662 // Variables for execution of the function.
10663 ret+="\t"+this->generate_functor_name()+"_keydef *retval = NULL;\n";
10664 ret+="\tgs_int32_t problem = 0;\n";
10666 // Assume unsuccessful completion
10667 ret+= "\tfailed = true;\n";
10669 // Switch the processing based on the channel
10670 ret+="\tif(tup.channel == 0){\n";
10671 ret+="// ------------ processing for channel 0\n";
10672 ret+="\t\thost_tuple &tup0 = tup;\n";
10673 // Gather partial fcns and colids ref'd by this branch
10675 new_cids.clear(); local_cids.clear();
10676 for(p=0;p<hash_eq.size();p++){
10677 collect_partial_fcns(hash_eq[p]->pr->get_left_se(), pfcn_refs);
10678 gather_se_col_ids(hash_eq[p]->pr->get_left_se(),local_cids,NULL);
10681 // Start by cleaning up partial function results
10682 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10683 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10685 // Evaluate the partial functions
10686 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10687 new_cids, NULL, "NULL", needs_xform);
10688 // test passed -- unpack remaining cids.
10689 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10691 // Alloc and load a key object
10692 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10693 for(p=0;p<hash_eq.size();p++){
10694 data_type *hdt = hash_eq[p]->pr->get_left_se()->get_data_type();
10695 if(hdt->is_buffer_type()){
10696 string vname = "tmp_keyvar"+int_to_string(p);
10697 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_left_se(),schema)+";\n";
10698 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10700 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10701 p,generate_se_code(hash_eq[p]->pr->get_left_se(),schema).c_str() );
10705 ret += "\t}else{\n";
10707 ret+="// ------------ processing for channel 1\n";
10708 ret+="\t\thost_tuple &tup1 = tup;\n";
10709 // Gather partial fcns and colids ref'd by this branch
10711 new_cids.clear(); local_cids.clear();
10712 for(p=0;p<hash_eq.size();p++){
10713 collect_partial_fcns(hash_eq[p]->pr->get_right_se(), pfcn_refs);
10714 gather_se_col_ids(hash_eq[p]->pr->get_right_se(),local_cids,NULL);
10717 // Start by cleaning up partial function results
10718 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10719 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10721 // Evaluate the partial functions
10722 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10723 new_cids, NULL, "NULL", needs_xform);
10725 // test passed -- unpack remaining cids.
10726 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10728 // Alloc and load a key object
10729 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10730 for(p=0;p<hash_eq.size();p++){
10731 data_type *hdt = hash_eq[p]->pr->get_right_se()->get_data_type();
10732 if(hdt->is_buffer_type()){
10733 string vname = "tmp_keyvar"+int_to_string(p);
10734 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_right_se(),schema)+";\n";
10735 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10737 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10738 p,generate_se_code(hash_eq[p]->pr->get_right_se(),schema).c_str() );
10744 ret += "\tfailed = false;\n";
10745 ret += "\t return retval;\n";
10749 //-------------------
10750 // The load_ts method.
10751 // load into an allocated buffer.
10752 // ASSUME : the LHS of the preds reference channel 0 attributes
10753 // NOTE : it may fail if a partial function fails.
10754 // NOTE : cann't handle buffer attributes
10756 ret += "bool load_ts_from_tup("+this->generate_functor_name()+"_tempeqdef *ts, host_tuple &tup){\n";
10757 // Variables for execution of the function.
10758 ret+="\tgs_int32_t problem = 0;\n";
10760 // Switch the processing based on the channel
10761 ret+="\tif(tup.channel == 0){\n";
10762 ret+="// ------------ processing for channel 0\n";
10763 ret+="\t\thost_tuple &tup0 = tup;\n";
10765 // Gather partial fcns and colids ref'd by this branch
10767 new_cids.clear(); local_cids.clear();
10768 for(p=0;p<temporal_eq.size();p++){
10769 collect_partial_fcns(temporal_eq[p]->pr->get_left_se(), pfcn_refs);
10770 gather_se_col_ids(temporal_eq[p]->pr->get_left_se(),local_cids,NULL);
10773 // Start by cleaning up partial function results
10774 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10775 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10777 // Evaluate the partial functions
10778 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10779 new_cids, NULL, "false", needs_xform);
10781 // test passed -- unpack remaining cids.
10782 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10784 // load the temporal key object
10785 for(p=0;p<temporal_eq.size();p++){
10786 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10787 p,generate_se_code(temporal_eq[p]->pr->get_left_se(),schema).c_str() );
10791 ret += "\t}else{\n";
10793 ret+="// ------------ processing for channel 1\n";
10794 ret+="\t\thost_tuple &tup1 = tup;\n";
10796 // Gather partial fcns and colids ref'd by this branch
10798 new_cids.clear(); local_cids.clear();
10799 for(p=0;p<temporal_eq.size();p++){
10800 collect_partial_fcns(temporal_eq[p]->pr->get_right_se(), pfcn_refs);
10801 gather_se_col_ids(temporal_eq[p]->pr->get_right_se(),local_cids,NULL);
10804 // Start by cleaning up partial function results
10805 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10806 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10808 // Evaluate the partial functions
10809 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10810 new_cids, NULL, "false", needs_xform);
10812 // test passed -- unpack remaining cids.
10813 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10815 // load the key object
10816 for(p=0;p<temporal_eq.size();p++){
10817 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10818 p,generate_se_code(temporal_eq[p]->pr->get_right_se(),schema).c_str() );
10824 ret += "\t return true;\n";
10828 // ------------------------------
10830 // (i.e make a copy)
10832 ret += "bool load_ts_from_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10833 for(p=0;p<temporal_eq.size();p++){
10834 sprintf(tmpstr,"\tlts->tempeq_var%d = rts->tempeq_var%d;\n",p,p);
10839 // -------------------------------------
10840 // compare_ts_to_ts
10841 // There should be only one variable to compare.
10842 // If there is more, assume an arbitrary lexicographic order.
10844 ret += "int compare_ts_with_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10845 for(p=0;p<temporal_eq.size();p++){
10846 sprintf(tmpstr,"\tif(lts->tempeq_var%d < rts->tempeq_var%d) return(-1);\n",p,p);
10848 sprintf(tmpstr,"\tif(lts->tempeq_var%d > rts->tempeq_var%d) return(1);\n",p,p);
10851 ret += "\treturn(0);\n";
10854 // ------------------------------------------
10856 // apply the prefilter
10858 ret += "bool apply_prefilter(host_tuple &tup){\n";
10860 // Variables for this procedure
10861 ret+="\tgs_int32_t problem = 0;\n";
10862 ret+="\tgs_retval_t retval;\n";
10864 // Switch the processing based on the channel
10865 ret+="\tif(tup.channel == 0){\n";
10866 ret+="// ------------ processing for channel 0\n";
10867 ret+="\t\thost_tuple &tup0 = tup;\n";
10868 // Gather partial fcns and colids ref'd by this branch
10870 new_cids.clear(); local_cids.clear();
10871 for(p=0;p<prefilter[0].size();p++){
10872 collect_partial_fcns_pr((prefilter[0])[p]->pr, pfcn_refs);
10875 // Start by cleaning up partial function results
10876 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10877 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10879 for(p=0;p<(prefilter[0]).size();++p){
10880 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10882 // Find the set of variables accessed in this CNF elem,
10883 // but in no previous element.
10884 col_id_set new_pr_cids;
10885 get_new_pred_cids((prefilter[0])[p]->pr,local_cids,new_pr_cids, NULL);
10886 // Unpack these values.
10887 ret += gen_unpack_cids(schema, new_pr_cids, "false", needs_xform);
10888 // Find partial fcns ref'd in this cnf element
10889 set<int> pr_pfcn_refs;
10890 collect_partial_fcns_pr((prefilter[0])[p]->pr, pr_pfcn_refs);
10891 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10893 ret += "\t\tif( !("+generate_predicate_code((prefilter[0])[p]->pr,schema)+") ) return(false);\n";
10895 ret += "\t}else{\n";
10896 ret+="// ------------ processing for channel 1\n";
10897 ret+="\t\thost_tuple &tup1 = tup;\n";
10898 // Gather partial fcns and colids ref'd by this branch
10900 new_cids.clear(); local_cids.clear();
10901 for(p=0;p<prefilter[1].size();p++){
10902 collect_partial_fcns_pr((prefilter[1])[p]->pr, pfcn_refs);
10905 // Start by cleaning up partial function results
10906 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10907 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10909 for(p=0;p<(prefilter[1]).size();++p){
10910 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10912 // Find the set of variables accessed in this CNF elem,
10913 // but in no previous element.
10914 col_id_set pr_new_cids;
10915 get_new_pred_cids((prefilter[1])[p]->pr,local_cids, pr_new_cids, NULL);
10916 // Unpack these values.
10917 ret += gen_unpack_cids(schema, pr_new_cids, "false", needs_xform);
10918 // Find partial fcns ref'd in this cnf element
10919 set<int> pr_pfcn_refs;
10920 collect_partial_fcns_pr((prefilter[1])[p]->pr, pr_pfcn_refs);
10921 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10923 ret += "\t\tif( !("+generate_predicate_code((prefilter[1])[p]->pr,schema)+ ") ) return(false);\n";
10927 ret+="\treturn true;\n";
10931 // -------------------------------------
10932 // create_output_tuple
10933 // If the postfilter on the pair of tuples passes,
10934 // create an output tuple from the combined information.
10935 // (Plus, outer join processing)
10937 ret += "host_tuple create_output_tuple(const host_tuple &tup0, const host_tuple &tup1, bool &failed){\n";
10939 ret += "\thost_tuple tup;\n";
10940 ret += "\tfailed = true;\n";
10941 ret += "\tgs_retval_t retval = 0;\n";
10942 ret += "\tgs_int32_t problem = 0;\n";
10944 // Start by cleaning up partial function results
10945 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10947 new_cids.clear(); local_cids.clear();
10948 for(p=0;p<postfilter.size();p++){
10949 collect_partial_fcns_pr(postfilter[p]->pr, pfcn_refs);
10951 for(s=0;s<select_list.size();s++){
10952 collect_partial_fcns(select_list[s]->se, pfcn_refs);
10954 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10957 ret+="\tif(tup0.data && tup1.data){\n";
10958 // Evaluate the postfilter
10959 new_cids.clear(); local_cids.clear();
10960 for(p=0;p<postfilter.size();p++){
10961 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10963 // Find the set of variables accessed in this CNF elem,
10964 // but in no previous element.
10965 col_id_set pr_new_cids;
10966 get_new_pred_cids(postfilter[p]->pr,local_cids, pr_new_cids, NULL);
10967 // Unpack these values.
10968 ret += gen_unpack_cids(schema, pr_new_cids, "tup", needs_xform);
10969 // Find partial fcns ref'd in this cnf element
10970 set<int> pr_pfcn_refs;
10971 collect_partial_fcns_pr(postfilter[p]->pr, pr_pfcn_refs);
10972 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"tup");
10974 ret += "\t\tif( !("+generate_predicate_code(postfilter[p]->pr,schema)+ ") ) return(tup);\n";
10978 // postfilter passed, evaluate partial functions for select list
10981 col_id_set se_cids;
10982 for(s=0;s<select_list.size();s++){
10983 collect_partial_fcns(select_list[s]->se, sl_pfcns);
10986 if(sl_pfcns.size() > 0)
10987 ret += "//\t\tUnpack remaining partial fcns.\n";
10988 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
10989 local_cids, NULL, "tup", needs_xform);
10991 // Unpack remaining fields
10992 ret += "//\t\tunpack any remaining fields from the input tuples.\n";
10993 for(s=0;s<select_list.size();s++)
10994 get_new_se_cids(select_list[s]->se, local_cids,se_cids,NULL);
10995 ret += gen_unpack_cids(schema, se_cids,"tup", needs_xform);
10998 // Deal with outer join stuff
10999 col_id_set l_cids, r_cids;
11000 col_id_set::iterator ocsi;
11001 for(ocsi=local_cids.begin();ocsi!=local_cids.end();++ocsi){
11002 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
11003 else r_cids.insert((*ocsi));
11005 for(ocsi=se_cids.begin();ocsi!=se_cids.end();++ocsi){
11006 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
11007 else r_cids.insert((*ocsi));
11010 ret += "\t}else if(tup0.data){\n";
11011 string unpack_null = ""; col_id_set extra_cids;
11012 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
11013 string field = (*ocsi).field;
11014 if(r_equiv.count(field)){
11015 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
11016 get_new_se_cids(r_equiv[field],l_cids,new_cids,NULL);
11018 int schref = (*ocsi).schema_ref;
11019 data_type dt(schema->get_type_name(schref,field));
11020 literal_t empty_lit(dt.type_indicator());
11021 if(empty_lit.is_cpx_lit()){
11022 // sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
11023 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11024 // NB : works for string type only
11025 // NNB: installed fix for ipv6, more of this should be pushed
11026 // into the literal_t code.
11027 unpack_null+="\tunpack_var_"+field+"_1= "+empty_lit.hfta_empty_literal_name()+";\n";
11029 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
11033 ret += gen_unpack_cids(schema, l_cids, "tup", needs_xform);
11034 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11035 ret += unpack_null;
11036 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11039 unpack_null = ""; extra_cids.clear();
11040 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
11041 string field = (*ocsi).field;
11042 if(l_equiv.count(field)){
11043 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
11044 get_new_se_cids(l_equiv[field],r_cids,new_cids,NULL);
11046 int schref = (*ocsi).schema_ref;
11047 data_type dt(schema->get_type_name(schref,field));
11048 literal_t empty_lit(dt.type_indicator());
11049 if(empty_lit.is_cpx_lit()){
11050 // sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
11051 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11052 // NB : works for string type only
11053 // NNB: installed fix for ipv6, more of this should be pushed
11054 // into the literal_t code.
11055 unpack_null+="\tunpack_var_"+field+"_0= "+empty_lit.hfta_empty_literal_name()+";\n";
11057 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
11061 ret += gen_unpack_cids(schema, r_cids, "tup", needs_xform);
11062 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11063 ret += unpack_null;
11064 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11069 // Unpack any BUFFER type selections into temporaries
11070 // so that I can compute their size and not have
11071 // to recompute their value during tuple packing.
11072 // I can use regular assignment here because
11073 // these temporaries are non-persistent.
11075 ret += "//\t\tCompute the size of the tuple.\n";
11076 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
11078 // Unpack all buffer type selections, to be able to compute their size
11079 ret += gen_buffer_selvars(schema, select_list);
11081 // The size of the tuple is the size of the tuple struct plus the
11082 // size of the buffers to be copied in.
11084 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
11085 ret += gen_buffer_selvars_size(select_list,schema);
11088 // Allocate tuple data block.
11089 ret += "//\t\tCreate the tuple block.\n";
11090 ret += "\ttup.data = malloc(tup.tuple_size);\n";
11091 ret += "\ttup.heap_resident = true;\n";
11092 // ret += "\ttup.channel = 0;\n";
11094 // Mark tuple as regular
11095 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
11098 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
11099 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
11102 // (Here, offsets are hard-wired. is this a problem?)
11104 ret += "//\t\tPack the fields into the tuple.\n";
11105 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
11107 // Delete string temporaries
11108 ret += gen_buffer_selvars_dtr(select_list);
11110 ret += "\tfailed = false;\n";
11111 ret += "\treturn tup;\n";
11116 //-----------------------------
11117 // Method for checking whether tuple is temporal
11119 ret += "bool temp_status_received(host_tuple &tup){\n";
11121 // Switch the processing based on the channel
11122 ret+="\tif(tup.channel == 0){\n";
11123 ret+="\t\thost_tuple &tup0 = tup;\n";
11124 ret += gen_temp_tuple_check(this->node_name, 0);
11125 ret += "\t}else{\n";
11126 ret+="\t\thost_tuple &tup1 = tup;\n";
11127 ret += gen_temp_tuple_check(this->node_name, 1);
11129 ret += "\treturn temp_tuple_received;\n};\n\n";
11132 //-------------------------------------------------------------------
11133 // Temporal update functions
11136 // create a temp status tuple
11137 ret += "int create_temp_status_tuple("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts, host_tuple& result) {\n\n";
11139 ret += "\tgs_retval_t retval = 0;\n";
11140 ret += "\tgs_int32_t problem = 0;\n";
11142 for(p=0;p<temporal_dt.size();p++){
11143 sprintf(tmpstr,"lhs_var");
11144 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
11145 sprintf(tmpstr,"rhs_var");
11146 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
11149 ret += "\tif(lts!=NULL){\n";
11150 for(p=0;p<temporal_dt.size();p++){
11151 ret += "\t\tlhs_var = lts->tempeq_var"+to_string(p)+";\n";
11153 ret += "\t}else{\n";
11154 for(p=0;p<temporal_dt.size();p++){
11155 ret += "\t\tlhs_var = 0;\n";
11159 ret += "\tif(rts!=NULL){\n";
11160 for(p=0;p<temporal_dt.size();p++){
11161 ret += "\t\trhs_var = rts->tempeq_var"+to_string(p)+";\n";
11163 ret += "\t}else{\n";
11164 for(p=0;p<temporal_dt.size();p++){
11165 ret += "\t\trhs_var = 0;\n";
11169 ret += gen_init_temp_status_tuple(this->get_node_name());
11174 // This is checked in the query analyzer so I think its safe,
11175 // But a lot of older code has complex code to propagate multiple
11177 for(s=0;s<select_list.size();s++){
11178 scalarexp_t *se = select_list[s]->se;
11179 data_type *sdt = se->get_data_type();
11180 if(sdt->is_temporal()){
11181 string target = "\ttuple->tuple_var"+to_string(s)+" = ";
11182 if(from[0]->get_property()==0 && from[1]->get_property()==0){ // INNER
11183 ret += target+"(lhs_var>rhs_var ? lhs_var : rhs_var); // INNER\n";
11185 if(from[0]->get_property()!=0 && from[1]->get_property()==0){ // LEFT
11186 ret += target+"lhs_var; // LEFT\n";
11187 // ret += target+"rhs_var; // LEFT\n";
11189 if(from[0]->get_property()==0 && from[1]->get_property()!=0){ // RIGHT
11190 ret += target+"rhs_var; // RIGHT\n";
11191 // ret += target+"lhs_var; // RIGHT\n";
11193 if(from[0]->get_property()!=0 && from[1]->get_property()!=0){ // OUTER
11194 ret += target+"(lhs_var<rhs_var ? lhs_var : rhs_var); // OUTER\n";
11200 ret += "\treturn 0;\n";
11206 //----------------------------------------------------------
11207 // The hash function
11209 ret += "struct "+generate_functor_name()+"_hash_func{\n";
11210 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
11211 "_keydef *key) const{\n";
11212 ret += "\t\treturn( (";
11213 if(hashkey_dt.size() > 0){
11214 for(p=0;p<hashkey_dt.size();p++){
11215 if(p>0) ret += "^";
11216 if(hashkey_dt[p]->use_hashfunc()){
11217 // sprintf(tmpstr,"%s(&(key->hashkey_var%d))",hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11218 if(hashkey_dt[p]->is_buffer_type())
11219 sprintf(tmpstr,"(%s*%s(&(key->hashkey_var%d)))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11221 sprintf(tmpstr,"(%s*%s(key->hashkey_var%d))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11223 sprintf(tmpstr,"(%s*key->hashkey_var%d)",hash_nums[p%NRANDS].c_str(),p);
11230 ret += ") >> 32);\n";
11234 //----------------------------------------------------------
11235 // The comparison function
11237 ret += "struct "+generate_functor_name()+"_equal_func{\n";
11238 ret += "\tbool operator()(const "+generate_functor_name()+"_keydef *key1, "+
11239 generate_functor_name()+"_keydef *key2) const{\n";
11240 ret += "\t\treturn( (";
11241 if(hashkey_dt.size() > 0){
11242 for(p=0;p<hashkey_dt.size();p++){
11243 if(p>0) ret += ") && (";
11244 if(hashkey_dt[p]->complex_comparison(hashkey_dt[p])){
11245 if(hashkey_dt[p]->is_buffer_type())
11246 sprintf(tmpstr,"(%s(&(key1->hashkey_var%d), &(key2->hashkey_var%d))==0)",
11247 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
11249 sprintf(tmpstr,"(%s((key1->hashkey_var%d), (key2->hashkey_var%d))==0)",
11250 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
11252 sprintf(tmpstr,"key1->hashkey_var%d == key2->hashkey_var%d",p,p);
11269 string join_eq_hash_qpn::generate_operator(int i, string params){
11272 " join_eq_hash_operator<" +
11273 generate_functor_name()+ ","+
11274 generate_functor_name() + "_tempeqdef,"+
11275 generate_functor_name() + "_keydef,"+
11276 generate_functor_name()+"_hash_func,"+
11277 generate_functor_name()+"_equal_func"
11278 "> *op"+int_to_string(i)+" = new join_eq_hash_operator<"+
11279 generate_functor_name()+","+
11280 generate_functor_name() + "_tempeqdef,"+
11281 generate_functor_name() + "_keydef,"+
11282 generate_functor_name()+"_hash_func,"+
11283 generate_functor_name()+"_equal_func"
11285 int_to_string(from[0]->get_property()+2*from[1]->get_property())+", \"" + get_node_name() +
11292 ////////////////////////////////////////////////////////////////
11293 //// SGAHCWCB functor
11297 string sgahcwcb_qpn::generate_functor_name(){
11298 return("sgahcwcb_functor_" + normalize_name(this->get_node_name()));
11302 string sgahcwcb_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
11306 // Initialize generate utility globals
11307 segen_gb_tbl = &(gb_tbl);
11310 //--------------------------------
11311 // group definition class
11312 string ret = "class " + generate_functor_name() + "_groupdef{\n";
11313 ret += "public:\n";
11314 ret += "\tbool valid;\n";
11315 for(g=0;g<this->gb_tbl.size();g++){
11316 sprintf(tmpstr,"gb_var%d",g);
11317 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11320 ret += "\t"+generate_functor_name() + "_groupdef(){valid=true;};\n";
11321 ret += "\t"+generate_functor_name() + "_groupdef("+
11322 this->generate_functor_name() + "_groupdef *gd){\n";
11323 for(g=0;g<gb_tbl.size();g++){
11324 data_type *gdt = gb_tbl.get_data_type(g);
11325 if(gdt->is_buffer_type()){
11326 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
11327 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
11330 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
11334 ret += "\tvalid=true;\n";
11337 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
11338 for(g=0;g<gb_tbl.size();g++){
11339 data_type *gdt = gb_tbl.get_data_type(g);
11340 if(gdt->is_buffer_type()){
11341 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
11342 gdt->get_hfta_buffer_destroy().c_str(), g );
11349 //--------------------------------
11350 // aggr definition class
11351 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
11352 ret += "public:\n";
11353 for(a=0;a<aggr_tbl.size();a++){
11354 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11355 sprintf(tmpstr,"aggr_var%d",a);
11356 if(aggr_tbl.is_builtin(a))
11357 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11359 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11362 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
11364 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
11365 for(a=0;a<aggr_tbl.size();a++){
11366 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11367 if(aggr_tbl.is_builtin(a)){
11368 data_type *adt = aggr_tbl.get_data_type(a);
11369 if(adt->is_buffer_type()){
11370 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11371 adt->get_hfta_buffer_destroy().c_str(), a );
11375 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11376 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11377 ret+="(aggr_var"+int_to_string(a)+"));\n";
11383 //--------------------------------
11384 // superaggr definition class
11385 ret += "class " + this->generate_functor_name() + "_statedef{\n";
11386 ret += "public:\n";
11387 for(a=0;a<aggr_tbl.size();a++){
11388 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11389 if(ate->is_superaggr()){
11390 sprintf(tmpstr,"aggr_var%d",a);
11391 if(aggr_tbl.is_builtin(a))
11392 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11394 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11397 set<string>::iterator ssi;
11398 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11399 string state_nm = (*ssi);
11400 int state_id = Ext_fcns->lookup_state(state_nm);
11401 data_type *dt = Ext_fcns->get_storage_dt(state_id);
11402 string state_var = "state_var_"+state_nm;
11403 ret += "\t"+dt->make_host_cvar(state_var)+";\n";
11406 ret += "\t"+this->generate_functor_name() + "_statedef(){};\n";
11408 ret += "\t~"+this->generate_functor_name() + "_statedef(){\n";
11409 for(a=0;a<aggr_tbl.size();a++){
11410 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11411 if(ate->is_superaggr()){
11412 if(aggr_tbl.is_builtin(a)){
11413 data_type *adt = aggr_tbl.get_data_type(a);
11414 if(adt->is_buffer_type()){
11415 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11416 adt->get_hfta_buffer_destroy().c_str(), a );
11420 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11421 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11422 ret+="(aggr_var"+int_to_string(a)+"));\n";
11426 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11427 string state_nm = (*ssi);
11428 int state_id = Ext_fcns->lookup_state(state_nm);
11429 string state_var = "state_var_"+state_nm;
11430 ret += "\t_sfun_state_destroy_"+state_nm+"(&"+state_var+");\n";
11437 //--------------------------------
11438 // gb functor class
11439 ret += "class " + this->generate_functor_name() + "{\n";
11441 // Find variables referenced in this query node.
11443 col_id_set cid_set;
11444 col_id_set::iterator csi;
11446 for(w=0;w<where.size();++w)
11447 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
11448 for(w=0;w<having.size();++w)
11449 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
11450 for(w=0;w<cleanby.size();++w)
11451 gather_pr_col_ids(cleanby[w]->pr,cid_set,segen_gb_tbl);
11452 for(w=0;w<cleanwhen.size();++w)
11453 gather_pr_col_ids(cleanwhen[w]->pr,cid_set,segen_gb_tbl);
11454 for(g=0;g<gb_tbl.size();g++)
11455 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
11457 for(s=0;s<select_list.size();s++){
11458 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
11462 // Private variables : store the state of the functor.
11463 // 1) variables for unpacked attributes
11464 // 2) offsets of the upacked attributes
11465 // 3) storage of partial functions
11466 // 4) storage of complex literals (i.e., require a constructor)
11468 ret += "private:\n";
11470 // var to save the schema handle
11471 ret += "\tint schema_handle0;\n";
11473 // generate the declaration of all the variables related to
11474 // temp tuples generation
11475 ret += gen_decl_temp_vars();
11477 // unpacked attribute storage, offsets
11478 ret += "//\t\tstorage and offsets of accessed fields.\n";
11479 ret += generate_access_vars(cid_set, schema);
11480 // tuple metadata offset
11481 ret += "\ttuple_metadata_offset0;\n";
11483 // Variables to store results of partial functions.
11484 // WARNING find_partial_functions modifies the SE
11485 // (it marks the partial function id).
11486 ret += "//\t\tParital function result storage\n";
11487 vector<scalarexp_t *> partial_fcns;
11488 vector<int> fcn_ref_cnt;
11489 vector<bool> is_partial_fcn;
11490 for(s=0;s<select_list.size();s++){
11491 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
11493 for(w=0;w<where.size();w++){
11494 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11496 for(w=0;w<having.size();w++){
11497 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11499 for(w=0;w<cleanby.size();w++){
11500 find_partial_fcns_pr(cleanby[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11502 for(w=0;w<cleanwhen.size();w++){
11503 find_partial_fcns_pr(cleanwhen[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11505 for(g=0;g<gb_tbl.size();g++){
11506 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
11508 for(a=0;a<aggr_tbl.size();a++){
11509 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
11511 if(partial_fcns.size()>0){
11512 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
11513 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
11516 // Complex literals (i.e., they need constructors)
11517 ret += "//\t\tComplex literal storage.\n";
11518 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
11519 ret += generate_complex_lit_vars(complex_literals);
11521 // Pass-by-handle parameters
11522 ret += "//\t\tPass-by-handle storage.\n";
11523 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
11524 ret += generate_pass_by_handle_vars(param_handle_table);
11526 // Create cached temporaries for UDAF return values.
11527 ret += "//\t\tTemporaries for UDAF return values.\n";
11528 for(a=0;a<aggr_tbl.size();a++){
11529 if(! aggr_tbl.is_builtin(a)){
11530 int afcn_id = aggr_tbl.get_fcn_id(a);
11531 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11532 sprintf(tmpstr,"udaf_ret_%d", a);
11533 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11539 // variables to hold parameters.
11540 ret += "//\tfor query parameters\n";
11541 ret += generate_param_vars(param_tbl);
11543 // Is there a temporal flush? If so create flush temporaries,
11544 // create flush indicator.
11545 bool uses_temporal_flush = false;
11546 for(g=0;g<gb_tbl.size();g++){
11547 data_type *gdt = gb_tbl.get_data_type(g);
11548 if(gdt->is_temporal())
11549 uses_temporal_flush = true;
11552 if(uses_temporal_flush){
11553 ret += "//\t\tFor temporal flush\n";
11554 for(g=0;g<gb_tbl.size();g++){
11555 data_type *gdt = gb_tbl.get_data_type(g);
11556 if(gdt->is_temporal()){
11557 sprintf(tmpstr,"last_gb%d",g);
11558 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11559 sprintf(tmpstr,"last_flushed_gb%d",g);
11560 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11563 ret += "\tbool needs_temporal_flush;\n";
11566 // The publicly exposed functions
11568 ret += "\npublic:\n";
11571 //-------------------
11572 // The functor constructor
11573 // pass in the schema handle.
11574 // 1) make assignments to the unpack offset variables
11575 // 2) initialize the complex literals
11577 ret += "//\t\tFunctor constructor.\n";
11578 ret += this->generate_functor_name()+"(int schema_handle0){\n";
11580 // save the schema handle
11581 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
11582 // tuple metadata offset
11583 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
11586 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
11587 ret += gen_access_var_init(cid_set);
11589 // aggregate return vals : refd in both final_sample
11590 // and create_output_tuple
11591 // Create cached temporaries for UDAF return values.
11592 for(a=0;a<aggr_tbl.size();a++){
11593 if(! aggr_tbl.is_builtin(a)){
11594 int afcn_id = aggr_tbl.get_fcn_id(a);
11595 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11596 sprintf(tmpstr,"udaf_ret_%d", a);
11597 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11601 // complex literals
11602 ret += "//\t\tInitialize complex literals.\n";
11603 ret += gen_complex_lit_init(complex_literals);
11605 // Initialize partial function results so they can be safely GC'd
11606 ret += gen_partial_fcn_init(partial_fcns);
11608 // Initialize non-query-parameter parameter handles
11609 ret += gen_pass_by_handle_init(param_handle_table);
11611 // temporal flush variables
11612 // ASSUME that structured values won't be temporal.
11613 if(uses_temporal_flush){
11614 ret += "//\t\tInitialize temporal flush variables.\n";
11615 for(g=0;g<gb_tbl.size();g++){
11616 data_type *gdt = gb_tbl.get_data_type(g);
11617 if(gdt->is_temporal()){
11618 literal_t gl(gdt->type_indicator());
11619 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
11620 ret.append(tmpstr);
11623 ret += "\tneeds_temporal_flush = false;\n";
11626 // Init temporal attributes referenced in select list
11627 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
11632 //-------------------
11633 // Functor destructor
11634 ret += "//\t\tFunctor destructor.\n";
11635 ret += "~"+this->generate_functor_name()+"(){\n";
11637 // clean up buffer type complex literals
11638 ret += gen_complex_lit_dtr(complex_literals);
11640 // Deregister the pass-by-handle parameters
11641 ret += "/* register and de-register the pass-by-handle parameters */\n";
11642 ret += gen_pass_by_handle_dtr(param_handle_table);
11644 // clean up partial function results.
11645 ret += "/* clean up partial function storage */\n";
11646 ret += gen_partial_fcn_dtr(partial_fcns);
11648 // Destroy the parameters, if any need to be destroyed
11649 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11654 //-------------------
11655 // Parameter manipulation routines
11656 ret += generate_load_param_block(this->generate_functor_name(),
11657 this->param_tbl,param_handle_table);
11658 ret += generate_delete_param_block(this->generate_functor_name(),
11659 this->param_tbl,param_handle_table);
11661 //-------------------
11662 // Register new parameter block
11664 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
11665 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11666 ret += "\treturn this->load_params_"+this->generate_functor_name()+
11670 //-------------------
11671 // the create_group method.
11672 // This method creates a group in a buffer passed in
11673 // (to allow for creation on the stack).
11674 // There are also a couple of side effects:
11675 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11676 // 2) determine if a temporal flush is required.
11678 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
11679 // Variables for execution of the function.
11680 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11682 if(partial_fcns.size()>0){ // partial fcn access failure
11683 ret += "\tgs_retval_t retval = 0;\n";
11687 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
11688 "_groupdef *) buffer;\n";
11690 // Start by cleaning up partial function results
11691 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11693 set<int> gb_pfcns; // partial fcns in gbdefs, aggr se's
11694 for(g=0;g<gb_tbl.size();g++){
11695 collect_partial_fcns(gb_tbl.get_def(g), gb_pfcns);
11697 ret += gen_partial_fcn_dtr(partial_fcns,gb_pfcns);
11698 // ret += gen_partial_fcn_dtr(partial_fcns);
11701 ret += gen_temp_tuple_check(this->node_name, 0);
11702 col_id_set found_cids; // colrefs unpacked thus far.
11703 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
11707 // Save temporal group-by variables
11710 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
11712 for(g=0;g<gb_tbl.size();g++){
11714 data_type *gdt = gb_tbl.get_data_type(g);
11716 if(gdt->is_temporal()){
11717 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11718 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11719 ret.append(tmpstr);
11726 // Compare the temporal GB vars with the stored ones,
11727 // set flush indicator and update stored GB vars if there is any change.
11729 if(uses_temporal_flush){
11730 ret+= "\tif( !( (";
11731 bool first_one = true;
11732 for(g=0;g<gb_tbl.size();g++){
11733 data_type *gdt = gb_tbl.get_data_type(g);
11735 if(gdt->is_temporal()){
11736 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
11737 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
11738 if(first_one){first_one = false;} else {ret += ") && (";}
11739 ret += generate_equality_test(lhs_op, rhs_op, gdt);
11743 for(g=0;g<gb_tbl.size();g++){
11744 data_type *gdt = gb_tbl.get_data_type(g);
11745 if(gdt->is_temporal()){
11746 if(gdt->is_buffer_type()){
11747 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
11749 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
11751 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
11757 if(uses_temporal_flush){
11758 for(g=0;g<gb_tbl.size();g++){
11759 data_type *gdt = gb_tbl.get_data_type(g);
11760 if(gdt->is_temporal()){
11761 ret+="if(last_flushed_gb"+int_to_string(g)+">0)\n";
11767 ret += "\t\tneeds_temporal_flush=true;\n";
11768 ret += "\t\t}else{\n"
11769 "\t\t\tneeds_temporal_flush=false;\n"
11774 // For temporal status tuple we don't need to do anything else
11775 ret += "\tif (temp_tuple_received) return NULL;\n\n";
11778 // The partial functions ref'd in the group-by var
11779 // definitions must be evaluated. If one returns false,
11780 // then implicitly the predicate is false.
11781 set<int>::iterator pfsi;
11783 if(gb_pfcns.size() > 0)
11784 ret += "//\t\tUnpack partial fcns.\n";
11785 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, gb_pfcns,
11786 found_cids, segen_gb_tbl, "NULL", needs_xform);
11788 // Unpack the group-by variables
11790 for(g=0;g<gb_tbl.size();g++){
11791 // Find the new fields ref'd by this GBvar def.
11792 col_id_set new_cids;
11793 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
11794 // Unpack these values.
11795 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
11797 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11798 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11800 // There seems to be no difference between the two
11801 // branches of the IF statement.
11802 data_type *gdt = gb_tbl.get_data_type(g);
11803 if(gdt->is_buffer_type()){
11804 // Create temporary copy.
11805 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11806 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11808 scalarexp_t *gse = gb_tbl.get_def(g);
11809 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11810 g,generate_se_code(gse,schema).c_str());
11813 ret.append(tmpstr);
11818 ret+= "\treturn gbval;\n";
11823 //-------------------
11824 // the create_group method.
11825 // This method creates a group in a buffer passed in
11826 // (to allow for creation on the stack).
11827 // There are also a couple of side effects:
11828 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11829 // 2) determine if a temporal flush is required.
11831 ret += "bool evaluate_predicate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, int cd){\n";
11832 // Variables for execution of the function.
11833 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11835 if(partial_fcns.size()>0){ // partial fcn access failure
11836 ret += "\tgs_retval_t retval = 0;\n";
11840 // Start by cleaning up partial function results
11841 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11842 set<int> w_pfcns; // partial fcns in where clause
11843 for(w=0;w<where.size();++w)
11844 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
11846 set<int> ag_pfcns; // partial fcns in gbdefs, aggr se's
11847 for(a=0;a<aggr_tbl.size();a++){
11848 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_pfcns);
11850 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
11851 ret += gen_partial_fcn_dtr(partial_fcns,ag_pfcns);
11853 ret+="//\t\tEvaluate clauses which don't reference stateful fcns first \n";
11854 for(w=0;w<where.size();++w){
11855 if(! pred_refs_sfun(where[w]->pr)){
11856 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11858 // Find the set of variables accessed in this CNF elem,
11859 // but in no previous element.
11860 col_id_set new_cids;
11861 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11863 // Unpack these values.
11864 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11865 // Find partial fcns ref'd in this cnf element
11866 set<int> pfcn_refs;
11867 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11868 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11870 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11871 +") ) return(false);\n";
11876 // The partial functions ref'd in the and aggregate
11877 // definitions must also be evaluated. If one returns false,
11878 // then implicitly the predicate is false.
11879 // ASSUME that aggregates cannot reference stateful fcns.
11881 if(ag_pfcns.size() > 0)
11882 ret += "//\t\tUnpack remaining partial fcns.\n";
11883 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_pfcns,
11884 found_cids, segen_gb_tbl, "false", needs_xform);
11886 ret+="//\t\tEvaluate all remaining where clauses.\n";
11887 ret+="\tbool retval = true;\n";
11888 for(w=0;w<where.size();++w){
11889 if( pred_refs_sfun(where[w]->pr)){
11890 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11892 // Find the set of variables accessed in this CNF elem,
11893 // but in no previous element.
11894 col_id_set new_cids;
11895 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11897 // Unpack these values.
11898 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11899 // Find partial fcns ref'd in this cnf element
11900 set<int> pfcn_refs;
11901 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11902 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11904 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11905 +") ) retval = false;\n";
11909 ret+="// Unpack all remaining attributes\n";
11910 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
11912 ret += "\n\treturn retval;\n";
11915 //--------------------------------------------------------
11916 // Create and initialize an aggregate object
11918 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, gs_sp_t a,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11919 // Variables for execution of the function.
11920 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11923 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+ "_aggrdef *)a;\n";
11925 for(a=0;a<aggr_tbl.size();a++){
11926 if(aggr_tbl.is_builtin(a)){
11927 // Create temporaries for buffer return values
11928 data_type *adt = aggr_tbl.get_data_type(a);
11929 if(adt->is_buffer_type()){
11930 sprintf(tmpstr,"aggr_tmp_%d", a);
11931 ret+=adt->make_host_cvar(tmpstr)+";\n";
11936 for(a=0;a<aggr_tbl.size();a++){
11937 sprintf(tmpstr,"aggval->aggr_var%d",a);
11938 string assignto_var = tmpstr;
11939 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11942 ret += "\treturn aggval;\n";
11946 //--------------------------------------------------------
11947 // initialize an aggregate object inplace
11949 ret += "void create_aggregate(host_tuple &tup0, "+this->generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11950 // Variables for execution of the function.
11951 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11955 for(a=0;a<aggr_tbl.size();a++){
11956 if(aggr_tbl.is_builtin(a)){
11957 // Create temporaries for buffer return values
11958 data_type *adt = aggr_tbl.get_data_type(a);
11959 if(adt->is_buffer_type()){
11960 sprintf(tmpstr,"aggr_tmp_%d", a);
11961 ret+=adt->make_host_cvar(tmpstr)+";\n";
11966 for(a=0;a<aggr_tbl.size();a++){
11967 sprintf(tmpstr,"aggval->aggr_var%d",a);
11968 string assignto_var = tmpstr;
11969 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11975 //--------------------------------------------------------
11976 // Create and clean-initialize an state object
11978 ret += "void initialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval){\n";
11979 // Variables for execution of the function.
11980 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11983 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
11985 for(a=0;a<aggr_tbl.size();a++){
11986 if( aggr_tbl.is_superaggr(a)){
11987 if(aggr_tbl.is_builtin(a)){
11988 // Create temporaries for buffer return values
11989 data_type *adt = aggr_tbl.get_data_type(a);
11990 if(adt->is_buffer_type()){
11991 sprintf(tmpstr,"aggr_tmp_%d", a);
11992 ret+=adt->make_host_cvar(tmpstr)+";\n";
11998 for(a=0;a<aggr_tbl.size();a++){
11999 if( aggr_tbl.is_superaggr(a)){
12000 sprintf(tmpstr,"stval->aggr_var%d",a);
12001 string assignto_var = tmpstr;
12002 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12006 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12007 string state_nm = (*ssi);
12008 ret += "_sfun_state_clean_init_"+state_nm+"(&(stval->state_var_"+state_nm+"));\n";
12014 //--------------------------------------------------------
12015 // Create and dirty-initialize an state object
12017 ret += "void reinitialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, "+generate_functor_name()+"_statedef *old_stval, int cd){\n";
12018 // Variables for execution of the function.
12019 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12022 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
12024 for(a=0;a<aggr_tbl.size();a++){
12025 if( aggr_tbl.is_superaggr(a)){
12026 if(aggr_tbl.is_builtin(a)){
12027 // Create temporaries for buffer return values
12028 data_type *adt = aggr_tbl.get_data_type(a);
12029 if(adt->is_buffer_type()){
12030 sprintf(tmpstr,"aggr_tmp_%d", a);
12031 ret+=adt->make_host_cvar(tmpstr)+";\n";
12037 // initialize superaggregates
12038 for(a=0;a<aggr_tbl.size();a++){
12039 if( aggr_tbl.is_superaggr(a)){
12040 sprintf(tmpstr,"stval->aggr_var%d",a);
12041 string assignto_var = tmpstr;
12042 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12046 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12047 string state_nm = (*ssi);
12048 ret += "_sfun_state_dirty_init_"+state_nm+"(&(stval->state_var_"+state_nm+"),&(old_stval->state_var_"+state_nm+"), cd );\n";
12053 //--------------------------------------------------------
12054 // Finalize_state : call the finalize fcn on all states
12057 ret += "void finalize_state( "+generate_functor_name()+"_statedef *stval, int cd){\n";
12059 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12060 string state_nm = (*ssi);
12061 ret += "_sfun_state_final_init_"+state_nm+"(&(stval->state_var_"+state_nm+"), cd);\n";
12069 //--------------------------------------------------------
12070 // update (plus) a superaggregate object
12072 ret += "void update_plus_superaggr(host_tuple &tup0, " +
12073 generate_functor_name()+"_groupdef *gbval, "+
12074 generate_functor_name()+"_statedef *stval){\n";
12075 // Variables for execution of the function.
12076 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12078 // use of temporaries depends on the aggregate,
12079 // generate them in generate_aggr_update
12082 for(a=0;a<aggr_tbl.size();a++){
12083 if(aggr_tbl.is_superaggr(a)){
12084 sprintf(tmpstr,"stval->aggr_var%d",a);
12085 string varname = tmpstr;
12086 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12090 ret += "\treturn;\n";
12095 //--------------------------------------------------------
12096 // update (minus) a superaggregate object
12098 ret += "void update_minus_superaggr( "+
12099 generate_functor_name()+"_groupdef *gbval, "+
12100 generate_functor_name()+"_aggrdef *aggval,"+
12101 generate_functor_name()+"_statedef *stval"+
12103 // Variables for execution of the function.
12104 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12106 // use of temporaries depends on the aggregate,
12107 // generate them in generate_aggr_update
12110 for(a=0;a<aggr_tbl.size();a++){
12111 if(aggr_tbl.is_superaggr(a)){
12112 sprintf(tmpstr,"stval->aggr_var%d",a);
12113 string super_varname = tmpstr;
12114 sprintf(tmpstr,"aggval->aggr_var%d",a);
12115 string sub_varname = tmpstr;
12116 ret.append(generate_superaggr_minus(sub_varname, super_varname,&aggr_tbl,a, schema));
12120 ret += "\treturn;\n";
12124 //--------------------------------------------------------
12125 // update an aggregate object
12127 ret += "void update_aggregate(host_tuple &tup0, "
12128 +generate_functor_name()+"_groupdef *gbval, "+
12129 generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
12130 // Variables for execution of the function.
12131 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12133 // use of temporaries depends on the aggregate,
12134 // generate them in generate_aggr_update
12137 for(a=0;a<aggr_tbl.size();a++){
12138 sprintf(tmpstr,"aggval->aggr_var%d",a);
12139 string varname = tmpstr;
12140 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12143 ret += "\treturn;\n";
12146 //---------------------------------------------------
12149 ret += "\tbool flush_needed(){\n";
12150 if(uses_temporal_flush){
12151 ret += "\t\treturn needs_temporal_flush;\n";
12153 ret += "\t\treturn false;\n";
12158 //------------------------------------------------------
12159 // THe cleaning_when predicate
12161 string gbvar = "gbval->gb_var";
12162 string aggvar = "aggval->";
12164 ret += "bool need_to_clean( "
12165 +generate_functor_name()+"_groupdef *gbval, "+
12166 generate_functor_name()+"_statedef *stval, int cd"+
12169 if(cleanwhen.size()>0)
12170 ret += "\tbool predval = true;\n";
12172 ret += "\tbool predval = false;\n";
12174 // Find the udafs ref'd in the having clause
12176 for(w=0;w<cleanwhen.size();++w)
12177 collect_aggr_refs_pr(cleanwhen[w]->pr, cw_aggs);
12180 // get the return values from the UDAFS
12181 for(a=0;a<aggr_tbl.size();a++){
12182 if(! aggr_tbl.is_builtin(a) && cw_aggs.count(a)){
12183 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12184 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12185 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12190 // Start by cleaning up partial function results
12191 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12192 set<int> cw_pfcns; // partial fcns in where clause
12193 for(w=0;w<cleanwhen.size();++w)
12194 collect_partial_fcns_pr(cleanwhen[w]->pr, cw_pfcns);
12196 ret += gen_partial_fcn_dtr(partial_fcns,cw_pfcns);
12199 for(w=0;w<cleanwhen.size();++w){
12200 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12202 // Find partial fcns ref'd in this cnf element
12203 set<int> pfcn_refs;
12204 collect_partial_fcns_pr(cleanwhen[w]->pr, pfcn_refs);
12205 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12206 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12207 ret += "\tif(retval){ return false;}\n";
12209 // ret += unpack_partial_fcn_fm_aggr(schema, partial_fcns, pfcn_refs,"false");
12211 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanwhen[w]->pr,gbvar, aggvar, schema)+
12212 ") ) predval = false;\n";
12215 ret += "\treturn predval;\n";
12218 //------------------------------------------------------
12219 // THe cleaning_by predicate
12221 ret += "bool sample_group("
12222 +generate_functor_name()+"_groupdef *gbval, "+
12223 generate_functor_name()+"_aggrdef *aggval,"+
12224 generate_functor_name()+"_statedef *stval, int cd"+
12227 if(cleanby.size()>0)
12228 ret += "\tbool retval = true;\n";
12230 ret += "\tbool retval = false;\n";
12232 // Find the udafs ref'd in the having clause
12234 for(w=0;w<cleanby.size();++w)
12235 collect_aggr_refs_pr(cleanby[w]->pr, cb_aggs);
12238 // get the return values from the UDAFS
12239 for(a=0;a<aggr_tbl.size();a++){
12240 if(! aggr_tbl.is_builtin(a) && cb_aggs.count(a)){
12241 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12242 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12243 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12248 // Start by cleaning up partial function results
12249 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12250 set<int> cb_pfcns; // partial fcns in where clause
12251 for(w=0;w<cleanby.size();++w)
12252 collect_partial_fcns_pr(cleanby[w]->pr, cb_pfcns);
12254 ret += gen_partial_fcn_dtr(partial_fcns,cb_pfcns);
12257 for(w=0;w<cleanwhen.size();++w){
12258 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12262 // Find the set of variables accessed in this CNF elem,
12263 // but in no previous element.
12264 col_id_set new_cids;
12265 get_new_pred_cids(cleanby[w]->pr, found_cids, new_cids, segen_gb_tbl);
12267 // Unpack these values.
12268 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
12271 // Find partial fcns ref'd in this cnf element
12272 set<int> pfcn_refs;
12273 collect_partial_fcns_pr(cleanby[w]->pr, pfcn_refs);
12274 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12275 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12276 ret += "\tif(retval){ return false;}\n";
12278 // ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
12280 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanby[w]->pr,gbvar, aggvar, schema)+
12281 +") ) retval = false;\n";
12284 ret += "\treturn retval;\n";
12288 //-----------------------------------------------------
12290 ret += "bool final_sample_group("
12291 +generate_functor_name()+"_groupdef *gbval, "+
12292 generate_functor_name()+"_aggrdef *aggval,"+
12293 generate_functor_name()+"_statedef *stval,"+
12296 ret += "\tgs_retval_t retval = 0;\n";
12298 // Find the udafs ref'd in the having clause
12300 for(w=0;w<having.size();++w)
12301 collect_aggr_refs_pr(having[w]->pr, hv_aggs);
12304 // get the return values from the UDAFS
12305 for(a=0;a<aggr_tbl.size();a++){
12306 if(! aggr_tbl.is_builtin(a) && hv_aggs.count(a)){
12307 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12308 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12309 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12314 set<int> hv_sl_pfcns;
12315 for(w=0;w<having.size();w++){
12316 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
12319 // clean up the partial fcn results from any previous execution
12320 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
12323 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
12324 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12325 ret += "\tif(retval){ return false;}\n";
12328 // Evalaute the HAVING clause
12329 // TODO: this seems to have a ++ operator rather than a + operator.
12330 for(w=0;w<having.size();++w){
12331 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
12334 ret += "\treturn true;\n";
12337 //---------------------------------------------------
12338 // create output tuple
12339 // Unpack the partial functions ref'd in the where clause,
12340 // select clause. Evaluate the where clause.
12341 // Finally, pack the tuple.
12343 // I need to use special code generation here,
12344 // so I'll leave it in longhand.
12346 ret += "host_tuple create_output_tuple("
12347 +generate_functor_name()+"_groupdef *gbval, "+
12348 generate_functor_name()+"_aggrdef *aggval,"+
12349 generate_functor_name()+"_statedef *stval,"+
12350 "int cd, bool &failed){\n";
12352 ret += "\thost_tuple tup;\n";
12353 ret += "\tfailed = false;\n";
12354 ret += "\tgs_retval_t retval = 0;\n";
12357 // Find the udafs ref'd in the select clause
12359 for(s=0;s<select_list.size();s++)
12360 collect_agg_refs(select_list[s]->se, sl_aggs);
12363 // get the return values from the UDAFS
12364 for(a=0;a<aggr_tbl.size();a++){
12365 if(! aggr_tbl.is_builtin(a) && sl_aggs.count(a)){
12366 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12367 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12368 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12373 // I can't cache partial fcn results from the having
12374 // clause because evaluation is separated.
12376 for(s=0;s<select_list.size();s++){
12377 collect_partial_fcns(select_list[s]->se, sl_pfcns);
12380 for(pfsi=sl_pfcns.begin();pfsi!=sl_pfcns.end();++pfsi){
12381 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12382 ret += "\tif(retval){ failed=true; return tup;}\n";
12386 // Now, compute the size of the tuple.
12388 // Unpack any BUFFER type selections into temporaries
12389 // so that I can compute their size and not have
12390 // to recompute their value during tuple packing.
12391 // I can use regular assignment here because
12392 // these temporaries are non-persistent.
12393 // TODO: should I be using the selvar generation routine?
12395 ret += "//\t\tCompute the size of the tuple.\n";
12396 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
12397 for(s=0;s<select_list.size();s++){
12398 scalarexp_t *se = select_list[s]->se;
12399 data_type *sdt = se->get_data_type();
12400 if(sdt->is_buffer_type() &&
12401 !( (se->get_operator_type() == SE_COLREF) ||
12402 (se->get_operator_type() == SE_AGGR_STAR) ||
12403 (se->get_operator_type() == SE_AGGR_SE) ||
12404 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12405 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12407 sprintf(tmpstr,"selvar_%d",s);
12408 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
12409 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
12413 // The size of the tuple is the size of the tuple struct plus the
12414 // size of the buffers to be copied in.
12416 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
12417 for(s=0;s<select_list.size();s++){
12418 // if(s>0) ret += "+";
12419 scalarexp_t *se = select_list[s]->se;
12420 data_type *sdt = select_list[s]->se->get_data_type();
12421 if(sdt->is_buffer_type()){
12422 if(!( (se->get_operator_type() == SE_COLREF) ||
12423 (se->get_operator_type() == SE_AGGR_STAR) ||
12424 (se->get_operator_type() == SE_AGGR_SE) ||
12425 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12426 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12428 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
12429 ret.append(tmpstr);
12431 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12432 ret.append(tmpstr);
12438 // Allocate tuple data block.
12439 ret += "//\t\tCreate the tuple block.\n";
12440 ret += "\ttup.data = malloc(tup.tuple_size);\n";
12441 ret += "\ttup.heap_resident = true;\n";
12443 // Mark tuple as regular
12444 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
12446 // ret += "\ttup.channel = 0;\n";
12447 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
12448 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
12451 // (Here, offsets are hard-wired. is this a problem?)
12453 ret += "//\t\tPack the fields into the tuple.\n";
12454 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
12455 for(s=0;s<select_list.size();s++){
12456 scalarexp_t *se = select_list[s]->se;
12457 data_type *sdt = se->get_data_type();
12458 if(sdt->is_buffer_type()){
12459 if(!( (se->get_operator_type() == SE_COLREF) ||
12460 (se->get_operator_type() == SE_AGGR_STAR) ||
12461 (se->get_operator_type() == SE_AGGR_SE) ||
12462 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12463 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12465 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
12466 ret.append(tmpstr);
12467 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
12468 ret.append(tmpstr);
12470 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12471 ret.append(tmpstr);
12472 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12473 ret.append(tmpstr);
12476 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12477 ret.append(tmpstr);
12478 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12483 // Destroy string temporaries
12484 ret += gen_buffer_selvars_dtr(select_list);
12485 // Destroy string return vals of UDAFs
12486 for(a=0;a<aggr_tbl.size();a++){
12487 if(! aggr_tbl.is_builtin(a)){
12488 int afcn_id = aggr_tbl.get_fcn_id(a);
12489 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12490 if(adt->is_buffer_type()){
12491 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12492 adt->get_hfta_buffer_destroy().c_str(), a );
12499 ret += "\treturn tup;\n";
12503 //-------------------------------------------------------------------
12504 // Temporal update functions
12506 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12508 // create a temp status tuple
12509 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12511 ret += gen_init_temp_status_tuple(this->get_node_name());
12514 // (Here, offsets are hard-wired. is this a problem?)
12516 ret += "//\t\tPack the fields into the tuple.\n";
12517 for(s=0;s<select_list.size();s++){
12518 data_type *sdt = select_list[s]->se->get_data_type();
12519 if(sdt->is_temporal()){
12520 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12522 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12528 ret += "\treturn 0;\n";
12529 ret += "};};\n\n\n";
12532 //----------------------------------------------------------
12533 // The hash function
12535 ret += "struct "+generate_functor_name()+"_hash_func{\n";
12536 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12537 "_groupdef *grp) const{\n";
12538 ret += "\t\treturn(";
12539 for(g=0;g<gb_tbl.size();g++){
12540 if(g>0) ret += "^";
12541 data_type *gdt = gb_tbl.get_data_type(g);
12542 if(gdt->use_hashfunc()){
12543 if(gdt->is_buffer_type())
12544 sprintf(tmpstr,"(%s*%s(&)grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12546 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12548 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12552 ret += ") >> 32);\n";
12556 //----------------------------------------------------------
12557 // The superhash function
12559 ret += "struct "+generate_functor_name()+"_superhash_func{\n";
12560 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12561 "_groupdef *grp) const{\n";
12562 ret += "\t\treturn(0";
12564 for(g=0;g<gb_tbl.size();g++){
12565 if(sg_tbl.count(g)>0){
12567 data_type *gdt = gb_tbl.get_data_type(g);
12568 if(gdt->use_hashfunc()){
12569 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12571 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12576 ret += ") >> 32);\n";
12581 //----------------------------------------------------------
12582 // The comparison function
12584 ret += "struct "+generate_functor_name()+"_equal_func{\n";
12585 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12586 generate_functor_name()+"_groupdef *grp2) const{\n";
12587 ret += "\t\treturn( (";
12588 for(g=0;g<gb_tbl.size();g++){
12589 if(g>0) ret += ") && (";
12590 data_type *gdt = gb_tbl.get_data_type(g);
12591 if(gdt->complex_comparison(gdt)){
12592 if(gdt->is_buffer_type())
12593 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12594 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12596 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12597 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12599 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12608 //----------------------------------------------------------
12609 // The superhashcomparison function
12611 ret += "struct "+generate_functor_name()+"_superequal_func{\n";
12612 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12613 generate_functor_name()+"_groupdef *grp2) const{\n";
12614 ret += "\t\treturn( (";
12616 bool first_elem = true;
12617 for(g=0;g<gb_tbl.size();g++){
12618 if(sg_tbl.count(g)){
12619 if(first_elem) first_elem=false; else ret += ") && (";
12620 data_type *gdt = gb_tbl.get_data_type(g);
12621 if(gdt->complex_comparison(gdt)){
12622 if(gdt->is_buffer_type())
12623 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12624 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12626 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12627 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12629 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12646 string sgahcwcb_qpn::generate_operator(int i, string params){
12649 " clean_operator<" +
12650 generate_functor_name()+",\n\t"+
12651 generate_functor_name() + "_groupdef, \n\t" +
12652 generate_functor_name() + "_aggrdef, \n\t" +
12653 generate_functor_name() + "_statedef, \n\t" +
12654 generate_functor_name()+"_hash_func, \n\t"+
12655 generate_functor_name()+"_equal_func ,\n\t"+
12656 generate_functor_name()+"_superhash_func,\n\t "+
12657 generate_functor_name()+"_superequal_func \n\t"+
12658 "> *op"+int_to_string(i)+" = new clean_operator<"+
12659 generate_functor_name()+",\n\t"+
12660 generate_functor_name() + "_groupdef,\n\t " +
12661 generate_functor_name() + "_aggrdef, \n\t" +
12662 generate_functor_name() + "_statedef, \n\t" +
12663 generate_functor_name()+"_hash_func, \n\t"+
12664 generate_functor_name()+"_equal_func, \n\t"+
12665 generate_functor_name()+"_superhash_func, \n\t"+
12666 generate_functor_name()+"_superequal_func\n\t "
12667 ">("+params+", \"" + get_node_name() + "\");\n"
12671 ////////////////////////////////////////////////////////////////
12676 string rsgah_qpn::generate_functor_name(){
12677 return("rsgah_functor_" + normalize_name(this->get_node_name()));
12681 string rsgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
12685 // Initialize generate utility globals
12686 segen_gb_tbl = &(gb_tbl);
12689 //--------------------------------
12690 // group definition class
12691 string ret = "class " + generate_functor_name() + "_groupdef{\n";
12692 ret += "public:\n";
12693 for(g=0;g<this->gb_tbl.size();g++){
12694 sprintf(tmpstr,"gb_var%d",g);
12695 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12698 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
12699 ret += "\t"+generate_functor_name() + "_groupdef("+
12700 this->generate_functor_name() + "_groupdef *gd){\n";
12701 for(g=0;g<gb_tbl.size();g++){
12702 data_type *gdt = gb_tbl.get_data_type(g);
12703 if(gdt->is_buffer_type()){
12704 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
12705 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
12708 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
12714 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
12715 for(g=0;g<gb_tbl.size();g++){
12716 data_type *gdt = gb_tbl.get_data_type(g);
12717 if(gdt->is_buffer_type()){
12718 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12719 gdt->get_hfta_buffer_destroy().c_str(), g );
12726 //--------------------------------
12727 // aggr definition class
12728 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
12729 ret += "public:\n";
12730 for(a=0;a<aggr_tbl.size();a++){
12731 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
12732 sprintf(tmpstr,"aggr_var%d",a);
12733 if(aggr_tbl.is_builtin(a))
12734 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
12736 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
12739 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
12741 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
12742 for(a=0;a<aggr_tbl.size();a++){
12743 if(aggr_tbl.is_builtin(a)){
12744 data_type *adt = aggr_tbl.get_data_type(a);
12745 if(adt->is_buffer_type()){
12746 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
12747 adt->get_hfta_buffer_destroy().c_str(), a );
12751 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
12752 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12753 ret+="(aggr_var"+int_to_string(a)+"));\n";
12759 //--------------------------------
12760 // gb functor class
12761 ret += "class " + this->generate_functor_name() + "{\n";
12763 // Find variables referenced in this query node.
12765 col_id_set cid_set;
12766 col_id_set::iterator csi;
12768 for(w=0;w<where.size();++w)
12769 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
12770 for(w=0;w<having.size();++w)
12771 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
12772 for(w=0;w<closing_when.size();++w)
12773 gather_pr_col_ids(closing_when[w]->pr,cid_set,segen_gb_tbl);
12774 for(g=0;g<gb_tbl.size();g++)
12775 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
12777 for(s=0;s<select_list.size();s++){
12778 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
12782 // Private variables : store the state of the functor.
12783 // 1) variables for unpacked attributes
12784 // 2) offsets of the upacked attributes
12785 // 3) storage of partial functions
12786 // 4) storage of complex literals (i.e., require a constructor)
12788 ret += "private:\n";
12790 // var to save the schema handle
12791 ret += "\tint schema_handle0;\n";
12793 // generate the declaration of all the variables related to
12794 // temp tuples generation
12795 ret += gen_decl_temp_vars();
12797 // unpacked attribute storage, offsets
12798 ret += "//\t\tstorage and offsets of accessed fields.\n";
12799 ret += generate_access_vars(cid_set, schema);
12800 // tuple metadata offset
12801 ret += "\tint tuple_metadata_offset0;\n";
12803 // Variables to store results of partial functions.
12804 // WARNING find_partial_functions modifies the SE
12805 // (it marks the partial function id).
12806 ret += "//\t\tParital function result storage\n";
12807 vector<scalarexp_t *> partial_fcns;
12808 vector<int> fcn_ref_cnt;
12809 vector<bool> is_partial_fcn;
12810 for(s=0;s<select_list.size();s++){
12811 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
12813 for(w=0;w<where.size();w++){
12814 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12816 for(w=0;w<having.size();w++){
12817 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12819 for(w=0;w<closing_when.size();w++){
12820 find_partial_fcns_pr(closing_when[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12822 for(g=0;g<gb_tbl.size();g++){
12823 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
12825 for(a=0;a<aggr_tbl.size();a++){
12826 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
12828 if(partial_fcns.size()>0){
12829 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
12830 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
12833 // Create cached temporaries for UDAF return values.
12834 for(a=0;a<aggr_tbl.size();a++){
12835 if(! aggr_tbl.is_builtin(a)){
12836 int afcn_id = aggr_tbl.get_fcn_id(a);
12837 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12838 sprintf(tmpstr,"udaf_ret_%d", a);
12839 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
12844 // Complex literals (i.e., they need constructors)
12845 ret += "//\t\tComplex literal storage.\n";
12846 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
12847 ret += generate_complex_lit_vars(complex_literals);
12849 // Pass-by-handle parameters
12850 ret += "//\t\tPass-by-handle storage.\n";
12851 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
12852 ret += generate_pass_by_handle_vars(param_handle_table);
12855 // variables to hold parameters.
12856 ret += "//\tfor query parameters\n";
12857 ret += generate_param_vars(param_tbl);
12859 // Is there a temporal flush? If so create flush temporaries,
12860 // create flush indicator.
12861 bool uses_temporal_flush = false;
12862 for(g=0;g<gb_tbl.size();g++){
12863 data_type *gdt = gb_tbl.get_data_type(g);
12864 if(gdt->is_temporal())
12865 uses_temporal_flush = true;
12868 if(uses_temporal_flush){
12869 ret += "//\t\tFor temporal flush\n";
12870 for(g=0;g<gb_tbl.size();g++){
12871 data_type *gdt = gb_tbl.get_data_type(g);
12872 if(gdt->is_temporal()){
12873 sprintf(tmpstr,"curr_gb%d",g);
12874 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12875 sprintf(tmpstr,"last_gb%d",g);
12876 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12879 ret += "\tgs_int32_t needs_temporal_flush;\n";
12882 // The publicly exposed functions
12884 ret += "\npublic:\n";
12887 //-------------------
12888 // The functor constructor
12889 // pass in the schema handle.
12890 // 1) make assignments to the unpack offset variables
12891 // 2) initialize the complex literals
12893 ret += "//\t\tFunctor constructor.\n";
12894 ret += this->generate_functor_name()+"(int schema_handle0){\n";
12896 // save the schema handle
12897 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
12899 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
12902 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
12903 ret += gen_access_var_init(cid_set);
12905 // complex literals
12906 ret += "//\t\tInitialize complex literals.\n";
12907 ret += gen_complex_lit_init(complex_literals);
12909 // Initialize partial function results so they can be safely GC'd
12910 ret += gen_partial_fcn_init(partial_fcns);
12912 // Initialize non-query-parameter parameter handles
12913 ret += gen_pass_by_handle_init(param_handle_table);
12915 // temporal flush variables
12916 // ASSUME that structured values won't be temporal.
12917 gs_int32_t temporal_gb = 0;
12918 if(uses_temporal_flush){
12919 ret += "//\t\tInitialize temporal flush variables.\n";
12920 for(g=0;g<gb_tbl.size();g++){
12921 data_type *gdt = gb_tbl.get_data_type(g);
12922 if(gdt->is_temporal()){
12923 literal_t gl(gdt->type_indicator());
12924 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
12925 ret.append(tmpstr);
12926 sprintf(tmpstr,"\tcurr_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
12927 ret.append(tmpstr);
12931 ret += "\tneeds_temporal_flush = 0;\n";
12934 // Init temporal attributes referenced in select list
12935 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
12940 //-------------------
12941 // Functor destructor
12942 ret += "//\t\tFunctor destructor.\n";
12943 ret += "~"+this->generate_functor_name()+"(){\n";
12945 // clean up buffer type complex literals
12946 ret += gen_complex_lit_dtr(complex_literals);
12948 // Deregister the pass-by-handle parameters
12949 ret += "/* register and de-register the pass-by-handle parameters */\n";
12950 ret += gen_pass_by_handle_dtr(param_handle_table);
12952 // clean up partial function results.
12953 ret += "/* clean up partial function storage */\n";
12954 ret += gen_partial_fcn_dtr(partial_fcns);
12956 // Destroy the parameters, if any need to be destroyed
12957 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12962 //-------------------
12963 // Parameter manipulation routines
12964 ret += generate_load_param_block(this->generate_functor_name(),
12965 this->param_tbl,param_handle_table);
12966 ret += generate_delete_param_block(this->generate_functor_name(),
12967 this->param_tbl,param_handle_table);
12969 //-------------------
12970 // Register new parameter block
12972 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
12973 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12974 ret += "\treturn this->load_params_"+this->generate_functor_name()+
12979 //-------------------
12980 // the create_group method.
12981 // This method creates a group in a buffer passed in
12982 // (to allow for creation on the stack).
12983 // There are also a couple of side effects:
12984 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
12985 // 2) determine if a temporal flush is required.
12987 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
12988 // Variables for execution of the function.
12989 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12991 if(partial_fcns.size()>0){ // partial fcn access failure
12992 ret += "\tgs_retval_t retval = 0;\n";
12996 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
12997 "_groupdef *) buffer;\n";
12999 // Start by cleaning up partial function results
13000 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
13001 set<int> w_pfcns; // partial fcns in where clause
13002 for(w=0;w<where.size();++w)
13003 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
13005 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
13006 for(g=0;g<gb_tbl.size();g++){
13007 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
13009 for(a=0;a<aggr_tbl.size();a++){
13010 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
13012 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
13013 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
13014 // ret += gen_partial_fcn_dtr(partial_fcns);
13017 ret += gen_temp_tuple_check(this->node_name, 0);
13018 col_id_set found_cids; // colrefs unpacked thus far.
13019 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
13022 // Save temporal group-by variables
13025 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
13027 for(g=0;g<gb_tbl.size();g++){
13029 data_type *gdt = gb_tbl.get_data_type(g);
13031 if(gdt->is_temporal()){
13032 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13033 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13034 ret.append(tmpstr);
13041 // Compare the temporal GB vars with the stored ones,
13042 // set flush indicator and update stored GB vars if there is any change.
13044 if(uses_temporal_flush){
13046 bool first_one = true;
13047 for(g=0;g<gb_tbl.size();g++){
13048 data_type *gdt = gb_tbl.get_data_type(g);
13050 if(gdt->is_temporal()){
13051 sprintf(tmpstr,"curr_gb%d",g); string lhs_op = tmpstr;
13052 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
13053 if(first_one){first_one = false;} else {ret += ") && (";}
13054 ret += generate_lt_test(lhs_op, rhs_op, gdt);
13058 for(g=0;g<gb_tbl.size();g++){
13059 data_type *gdt = gb_tbl.get_data_type(g);
13060 if(gdt->is_temporal()){
13062 if(gdt->is_buffer_type()){ // TODO first, last? or delete?
13063 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13065 ret += "\t\tif(curr_gb"+to_string(g)+"==0){\n";
13066 ret += "\t\t\tlast_gb"+to_string(g)+" = gbval->gb_var"+to_string(g)+";\n";
13067 ret += "\t\t}else{\n";
13068 ret += "\t\t\tlast_gb"+to_string(g)+" = curr_gb"+to_string(g)+";\n";
13070 sprintf(tmpstr,"\t\tcurr_gb%d = gbval->gb_var%d;\n",g,g);
13075 ret += "\t\tneeds_temporal_flush = curr_gb"+to_string (temporal_gb)+" - last_gb"+to_string(temporal_gb)+";\n";
13076 ret += "\t\t}else{\n"
13077 "\t\t\tneeds_temporal_flush=0;\n"
13082 // For temporal status tuple we don't need to do anything else
13083 ret += "\tif (temp_tuple_received) return NULL;\n\n";
13085 for(w=0;w<where.size();++w){
13086 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
13088 // Find the set of variables accessed in this CNF elem,
13089 // but in no previous element.
13090 col_id_set new_cids;
13091 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
13093 // Unpack these values.
13094 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13095 // Find partial fcns ref'd in this cnf element
13096 set<int> pfcn_refs;
13097 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
13098 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
13100 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
13101 +") ) return(NULL);\n";
13104 // The partial functions ref'd in the group-by var and aggregate
13105 // definitions must also be evaluated. If one returns false,
13106 // then implicitly the predicate is false.
13107 set<int>::iterator pfsi;
13109 if(ag_gb_pfcns.size() > 0)
13110 ret += "//\t\tUnpack remaining partial fcns.\n";
13111 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
13112 found_cids, segen_gb_tbl, "NULL", needs_xform);
13114 // Unpack the group-by variables
13116 for(g=0;g<gb_tbl.size();g++){
13117 data_type *gdt = gb_tbl.get_data_type(g);
13118 if(!gdt->is_temporal()){ // temproal gbs already computed
13119 // Find the new fields ref'd by this GBvar def.
13120 col_id_set new_cids;
13121 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
13122 // Unpack these values.
13123 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13125 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13126 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13128 // There seems to be no difference between the two
13129 // branches of the IF statement.
13130 data_type *gdt = gb_tbl.get_data_type(g);
13131 if(gdt->is_buffer_type()){
13132 // Create temporary copy.
13133 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13134 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13136 scalarexp_t *gse = gb_tbl.get_def(g);
13137 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13138 g,generate_se_code(gse,schema).c_str());
13141 ret.append(tmpstr);
13147 ret+= "\treturn gbval;\n";
13150 //--------------------------------------------------------
13151 // Create and initialize an aggregate object
13153 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
13154 // Variables for execution of the function.
13155 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13158 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
13159 "_aggrdef *)buffer;\n";
13161 for(a=0;a<aggr_tbl.size();a++){
13162 if(aggr_tbl.is_builtin(a)){
13163 // Create temporaries for buffer return values
13164 data_type *adt = aggr_tbl.get_data_type(a);
13165 if(adt->is_buffer_type()){
13166 sprintf(tmpstr,"aggr_tmp_%d", a);
13167 ret+=adt->make_host_cvar(tmpstr)+";\n";
13172 // Unpack all remaining attributes
13173 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
13174 for(a=0;a<aggr_tbl.size();a++){
13175 sprintf(tmpstr,"aggval->aggr_var%d",a);
13176 string assignto_var = tmpstr;
13177 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
13180 ret += "\treturn aggval;\n";
13183 //--------------------------------------------------------
13184 // update an aggregate object
13186 ret += "void update_aggregate(host_tuple &tup0, "
13187 +generate_functor_name()+"_groupdef *gbval, "+
13188 generate_functor_name()+"_aggrdef *aggval){\n";
13189 // Variables for execution of the function.
13190 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13192 // use of temporaries depends on the aggregate,
13193 // generate them in generate_aggr_update
13196 // Unpack all remaining attributes
13197 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
13198 for(a=0;a<aggr_tbl.size();a++){
13199 sprintf(tmpstr,"aggval->aggr_var%d",a);
13200 string varname = tmpstr;
13201 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
13204 ret += "\treturn;\n";
13207 //--------------------------------------------------------
13208 // reinitialize an aggregate object
13210 ret += "void reinit_aggregates( "+
13211 generate_functor_name()+"_groupdef *gbval, "+
13212 generate_functor_name()+"_aggrdef *aggval){\n";
13213 // Variables for execution of the function.
13214 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13216 // use of temporaries depends on the aggregate,
13217 // generate them in generate_aggr_update
13219 for(g=0;g<gb_tbl.size();g++){
13220 data_type *gdt = gb_tbl.get_data_type(g);
13221 if(gdt->is_temporal()){
13222 if(gdt->is_buffer_type()){
13223 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13225 sprintf(tmpstr,"\t\t gbval->gb_var%d =last_gb%d;\n",g,g);
13231 // Unpack all remaining attributes
13232 for(a=0;a<aggr_tbl.size();a++){
13233 sprintf(tmpstr,"aggval->aggr_var%d",a);
13234 string varname = tmpstr;
13235 ret.append(generate_aggr_reinitialize(varname,&aggr_tbl,a, schema));
13238 ret += "\treturn;\n";
13245 //---------------------------------------------------
13248 ret += "gs_int32_t flush_needed(){\n";
13249 if(uses_temporal_flush){
13250 ret += "\treturn needs_temporal_flush;\n";
13252 ret += "\treturn 0;\n";
13256 //------------------------------------------------
13257 // time bucket management
13258 ret += "void advance_last_tb(){\n";
13259 ret += "\tlast_gb"+to_string(temporal_gb)+"++;\n";
13261 ret += "void reset_last_tb(){\n";
13262 ret += "\tlast_gb"+to_string(temporal_gb)+" = curr_gb"+to_string(temporal_gb)+";\n";
13265 //---------------------------------------------------
13266 // create output tuple
13267 // Unpack the partial functions ref'd in the where clause,
13268 // select clause. Evaluate the where clause.
13269 // Finally, pack the tuple.
13271 // I need to use special code generation here,
13272 // so I'll leave it in longhand.
13274 ret += "host_tuple create_output_tuple("
13275 +generate_functor_name()+"_groupdef *gbval, "+
13276 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
13278 ret += "\thost_tuple tup;\n";
13279 ret += "\tfailed = false;\n";
13280 ret += "\tgs_retval_t retval = 0;\n";
13282 string gbvar = "gbval->gb_var";
13283 string aggvar = "aggval->";
13286 // First, get the return values from the UDAFS
13287 for(a=0;a<aggr_tbl.size();a++){
13288 if(! aggr_tbl.is_builtin(a)){
13289 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
13290 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
13291 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
13295 set<int> hv_sl_pfcns;
13296 for(w=0;w<having.size();w++){
13297 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
13299 for(s=0;s<select_list.size();s++){
13300 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
13303 // clean up the partial fcn results from any previous execution
13304 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
13307 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
13308 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13309 ret += "\tif(retval){ failed = true; return(tup);}\n";
13312 // Evalaute the HAVING clause
13313 // TODO: this seems to have a ++ operator rather than a + operator.
13314 for(w=0;w<having.size();++w){
13315 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
13318 // Now, compute the size of the tuple.
13320 // Unpack any BUFFER type selections into temporaries
13321 // so that I can compute their size and not have
13322 // to recompute their value during tuple packing.
13323 // I can use regular assignment here because
13324 // these temporaries are non-persistent.
13325 // TODO: should I be using the selvar generation routine?
13327 ret += "//\t\tCompute the size of the tuple.\n";
13328 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
13329 for(s=0;s<select_list.size();s++){
13330 scalarexp_t *se = select_list[s]->se;
13331 data_type *sdt = se->get_data_type();
13332 if(sdt->is_buffer_type() &&
13333 !( (se->get_operator_type() == SE_COLREF) ||
13334 (se->get_operator_type() == SE_AGGR_STAR) ||
13335 (se->get_operator_type() == SE_AGGR_SE) ||
13336 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13337 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13339 sprintf(tmpstr,"selvar_%d",s);
13340 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
13341 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
13345 // The size of the tuple is the size of the tuple struct plus the
13346 // size of the buffers to be copied in.
13348 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
13349 for(s=0;s<select_list.size();s++){
13350 // if(s>0) ret += "+";
13351 scalarexp_t *se = select_list[s]->se;
13352 data_type *sdt = select_list[s]->se->get_data_type();
13353 if(sdt->is_buffer_type()){
13354 if(!( (se->get_operator_type() == SE_COLREF) ||
13355 (se->get_operator_type() == SE_AGGR_STAR) ||
13356 (se->get_operator_type() == SE_AGGR_SE) ||
13357 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13358 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13360 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
13361 ret.append(tmpstr);
13363 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13364 ret.append(tmpstr);
13370 // Allocate tuple data block.
13371 ret += "//\t\tCreate the tuple block.\n";
13372 ret += "\ttup.data = malloc(tup.tuple_size);\n";
13373 ret += "\ttup.heap_resident = true;\n";
13375 // Mark tuple as regular
13376 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
13378 // ret += "\ttup.channel = 0;\n";
13379 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
13380 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
13383 // (Here, offsets are hard-wired. is this a problem?)
13385 ret += "//\t\tPack the fields into the tuple.\n";
13386 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
13387 for(s=0;s<select_list.size();s++){
13388 scalarexp_t *se = select_list[s]->se;
13389 data_type *sdt = se->get_data_type();
13390 if(sdt->is_buffer_type()){
13391 if(!( (se->get_operator_type() == SE_COLREF) ||
13392 (se->get_operator_type() == SE_AGGR_STAR) ||
13393 (se->get_operator_type() == SE_AGGR_SE) ||
13394 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13395 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13397 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
13398 ret.append(tmpstr);
13399 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
13400 ret.append(tmpstr);
13402 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13403 ret.append(tmpstr);
13404 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13405 ret.append(tmpstr);
13408 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13409 ret.append(tmpstr);
13410 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
13415 // Destroy string temporaries
13416 ret += gen_buffer_selvars_dtr(select_list);
13418 ret += "\treturn tup;\n";
13421 //------------------------------------------------------------------
13422 // Cleaning_when : evaluate the cleaning_when clause.
13423 // ASSUME that the udaf return values have already
13424 // been unpacked. delete the string udaf return values at the end.
13426 ret += "bool cleaning_when("
13427 +generate_functor_name()+"_groupdef *gbval, "+
13428 generate_functor_name()+"_aggrdef *aggval){\n";
13430 ret += "\tbool retval = true;\n";
13433 gbvar = "gbval->gb_var";
13434 aggvar = "aggval->";
13437 set<int> clw_pfcns;
13438 for(w=0;w<closing_when.size();w++){
13439 collect_partial_fcns_pr(closing_when[w]->pr, clw_pfcns);
13442 // clean up the partial fcn results from any previous execution
13443 ret += gen_partial_fcn_dtr(partial_fcns,clw_pfcns);
13446 for(pfsi=clw_pfcns.begin();pfsi!=clw_pfcns.end();++pfsi){
13447 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13448 ret += "\tif(retval){ return false;}\n";
13451 // Evalaute the Closing When clause
13452 // TODO: this seems to have a ++ operator rather than a + operator.
13453 for(w=0;w<closing_when.size();++w){
13454 ret += "\tif( !("+generate_predicate_code_fm_aggr(closing_when[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
13458 // Destroy string return vals of UDAFs
13459 for(a=0;a<aggr_tbl.size();a++){
13460 if(! aggr_tbl.is_builtin(a)){
13461 int afcn_id = aggr_tbl.get_fcn_id(a);
13462 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
13463 if(adt->is_buffer_type()){
13464 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
13465 adt->get_hfta_buffer_destroy().c_str(), a );
13471 ret += "\treturn retval;\n";
13477 //-------------------------------------------------------------------
13478 // Temporal update functions
13480 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
13482 // create a temp status tuple
13483 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
13485 ret += gen_init_temp_status_tuple(this->get_node_name());
13488 // (Here, offsets are hard-wired. is this a problem?)
13490 ret += "//\t\tPack the fields into the tuple.\n";
13491 for(s=0;s<select_list.size();s++){
13492 data_type *sdt = select_list[s]->se->get_data_type();
13493 if(sdt->is_temporal()){
13494 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13496 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_gb", "", schema).c_str());
13502 ret += "\treturn 0;\n";
13503 ret += "};};\n\n\n";
13506 //----------------------------------------------------------
13507 // The hash function
13509 ret += "struct "+generate_functor_name()+"_hash_func{\n";
13510 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
13511 "_groupdef *grp) const{\n";
13512 ret += "\t\treturn(0";
13513 for(g=0;g<gb_tbl.size();g++){
13514 data_type *gdt = gb_tbl.get_data_type(g);
13515 if(! gdt->is_temporal()){
13517 if(gdt->use_hashfunc()){
13518 if(gdt->is_buffer_type())
13519 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13521 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13523 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
13528 ret += " >> 32);\n";
13532 //----------------------------------------------------------
13533 // The comparison function
13535 ret += "struct "+generate_functor_name()+"_equal_func{\n";
13536 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
13537 generate_functor_name()+"_groupdef *grp2) const{\n";
13538 ret += "\t\treturn( (";
13541 bool first_exec = true;
13542 for(g=0;g<gb_tbl.size();g++){
13543 data_type *gdt = gb_tbl.get_data_type(g);
13544 if(! gdt->is_temporal()){
13545 if(first_exec){first_exec=false;}else{ hcmpr += ") && (";}
13546 if(gdt->complex_comparison(gdt)){
13547 if(gdt->is_buffer_type())
13548 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
13549 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13551 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
13552 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13554 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
13571 string rsgah_qpn::generate_operator(int i, string params){
13574 " running_agg_operator<" +
13575 generate_functor_name()+","+
13576 generate_functor_name() + "_groupdef, " +
13577 generate_functor_name() + "_aggrdef, " +
13578 generate_functor_name()+"_hash_func, "+
13579 generate_functor_name()+"_equal_func "
13580 "> *op"+int_to_string(i)+" = new running_agg_operator<"+
13581 generate_functor_name()+","+
13582 generate_functor_name() + "_groupdef, " +
13583 generate_functor_name() + "_aggrdef, " +
13584 generate_functor_name()+"_hash_func, "+
13585 generate_functor_name()+"_equal_func "
13586 ">("+params+", \"" + get_node_name() + "\");\n"
13592 // Split aggregation into two HFTA components - sub and superaggregation
13593 // If unable to split the aggreagates, empty vector will be returned
13594 vector<qp_node *> sgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13596 vector<qp_node *> ret_vec;
13597 int s, p, g, a, o, i;
13600 vector<string> fta_flds, stream_flds;
13601 int t = table_name->get_schema_ref();
13603 // Get the set of interfaces it accesses.
13605 vector<string> sel_names;
13607 // Verify that all of the ref'd UDAFs can be split.
13609 for(a=0;a<aggr_tbl.size();++a){
13610 if(! aggr_tbl.is_builtin(a)){
13611 int afcn = aggr_tbl.get_fcn_id(a);
13612 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13613 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13614 if(hfta_super_id < 0 || hfta_sub_id < 0){
13620 /////////////////////////////////////////////////////
13621 // Split into aggr/aggr.
13624 sgah_qpn *low_hfta_node = new sgah_qpn();
13625 low_hfta_node->table_name = table_name;
13626 low_hfta_node->set_node_name( "_"+node_name );
13627 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13630 sgah_qpn *hi_hfta_node = new sgah_qpn();
13631 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13632 hi_hfta_node->set_node_name( node_name );
13633 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13635 // First, process the group-by variables.
13636 // both low and hi level queries duplicate group-by variables of original query
13639 for(g=0;g<gb_tbl.size();g++){
13640 // Insert the gbvar into both low- and hi level hfta.
13641 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13642 low_hfta_node->gb_tbl.add_gb_var(
13643 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13646 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13647 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13648 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13649 gbvar_fta->set_gb_ref(g);
13650 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13651 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13653 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13654 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13655 hi_hfta_node->gb_tbl.add_gb_var(
13656 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13660 // hi_hfta_node->gb_tbl.gb_patterns = gb_tbl.gb_patterns; // pattern processing at higtest level
13661 hi_hfta_node->gb_tbl.set_pattern_info( &gb_tbl); // pattern processing at higtest level
13663 // SEs in the aggregate definitions.
13664 // They are all safe, so split them up for later processing.
13665 map<int, scalarexp_t *> hfta_aggr_se;
13666 for(a=0;a<aggr_tbl.size();++a){
13667 split_hfta_aggr( &(aggr_tbl), a,
13668 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13669 low_hfta_node->select_list,
13676 // Next, the select list.
13678 for(s=0;s<select_list.size();s++){
13679 bool fta_forbidden = false;
13680 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13681 hi_hfta_node->select_list.push_back(
13682 new select_element(root_se, select_list[s]->name));
13687 // All the predicates in the where clause must execute
13688 // in the low-level hfta.
13690 for(p=0;p<where.size();p++){
13691 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13692 cnf_elem *new_cnf = new cnf_elem(new_pr);
13693 analyze_cnf(new_cnf);
13695 low_hfta_node->where.push_back(new_cnf);
13698 // All of the predicates in the having clause must
13699 // execute in the high-level hfta node.
13701 for(p=0;p<having.size();p++){
13702 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13703 cnf_elem *cnf_root = new cnf_elem(pr_root);
13704 analyze_cnf(cnf_root);
13706 hi_hfta_node->having.push_back(cnf_root);
13710 // Copy parameters to both nodes
13711 vector<string> param_names = param_tbl->get_param_names();
13713 for(pi=0;pi<param_names.size();pi++){
13714 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13715 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13716 param_tbl->handle_access(param_names[pi]));
13717 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13718 param_tbl->handle_access(param_names[pi]));
13720 low_hfta_node->definitions = definitions;
13721 hi_hfta_node->definitions = definitions;
13724 low_hfta_node->table_name->set_machine(table_name->get_machine());
13725 low_hfta_node->table_name->set_interface(table_name->get_interface());
13726 low_hfta_node->table_name->set_ifq(false);
13728 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13729 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13730 hi_hfta_node->table_name->set_ifq(false);
13732 ret_vec.push_back(low_hfta_node);
13733 ret_vec.push_back(hi_hfta_node);
13739 // TODO: add splitting into selection/aggregation
13743 // Split aggregation into two HFTA components - sub and superaggregation
13744 // If unable to split the aggreagates, empty vector will be returned
13745 // Similar to sgah, but super aggregate is rsgah, subaggr is sgah
13746 vector<qp_node *> rsgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13748 vector<qp_node *> ret_vec;
13749 int s, p, g, a, o, i;
13752 vector<string> fta_flds, stream_flds;
13753 int t = table_name->get_schema_ref();
13755 // Get the set of interfaces it accesses.
13757 vector<string> sel_names;
13759 // Verify that all of the ref'd UDAFs can be split.
13761 for(a=0;a<aggr_tbl.size();++a){
13762 if(! aggr_tbl.is_builtin(a)){
13763 int afcn = aggr_tbl.get_fcn_id(a);
13764 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13765 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13766 if(hfta_super_id < 0 || hfta_sub_id < 0){
13772 /////////////////////////////////////////////////////
13773 // Split into aggr/aggr.
13776 sgah_qpn *low_hfta_node = new sgah_qpn();
13777 low_hfta_node->table_name = table_name;
13778 low_hfta_node->set_node_name( "_"+node_name );
13779 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13782 rsgah_qpn *hi_hfta_node = new rsgah_qpn();
13783 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13784 hi_hfta_node->set_node_name( node_name );
13785 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13787 // First, process the group-by variables.
13788 // both low and hi level queries duplicate group-by variables of original query
13791 for(g=0;g<gb_tbl.size();g++){
13792 // Insert the gbvar into both low- and hi level hfta.
13793 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13794 low_hfta_node->gb_tbl.add_gb_var(
13795 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13798 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13799 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13800 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13801 gbvar_fta->set_gb_ref(g);
13802 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13803 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13805 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13806 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13807 hi_hfta_node->gb_tbl.add_gb_var(
13808 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13813 // SEs in the aggregate definitions.
13814 // They are all safe, so split them up for later processing.
13815 map<int, scalarexp_t *> hfta_aggr_se;
13816 for(a=0;a<aggr_tbl.size();++a){
13817 split_hfta_aggr( &(aggr_tbl), a,
13818 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13819 low_hfta_node->select_list,
13826 // Next, the select list.
13828 for(s=0;s<select_list.size();s++){
13829 bool fta_forbidden = false;
13830 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13831 hi_hfta_node->select_list.push_back(
13832 new select_element(root_se, select_list[s]->name));
13837 // All the predicates in the where clause must execute
13838 // in the low-level hfta.
13840 for(p=0;p<where.size();p++){
13841 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13842 cnf_elem *new_cnf = new cnf_elem(new_pr);
13843 analyze_cnf(new_cnf);
13845 low_hfta_node->where.push_back(new_cnf);
13848 // All of the predicates in the having clause must
13849 // execute in the high-level hfta node.
13851 for(p=0;p<having.size();p++){
13852 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13853 cnf_elem *cnf_root = new cnf_elem(pr_root);
13854 analyze_cnf(cnf_root);
13856 hi_hfta_node->having.push_back(cnf_root);
13859 // Similar for closing when
13860 for(p=0;p<closing_when.size();p++){
13861 predicate_t *pr_root = rehome_fta_pr( closing_when[p]->pr, &hfta_aggr_se);
13862 cnf_elem *cnf_root = new cnf_elem(pr_root);
13863 analyze_cnf(cnf_root);
13865 hi_hfta_node->closing_when.push_back(cnf_root);
13869 // Copy parameters to both nodes
13870 vector<string> param_names = param_tbl->get_param_names();
13872 for(pi=0;pi<param_names.size();pi++){
13873 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13874 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13875 param_tbl->handle_access(param_names[pi]));
13876 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13877 param_tbl->handle_access(param_names[pi]));
13879 low_hfta_node->definitions = definitions;
13880 hi_hfta_node->definitions = definitions;
13883 low_hfta_node->table_name->set_machine(table_name->get_machine());
13884 low_hfta_node->table_name->set_interface(table_name->get_interface());
13885 low_hfta_node->table_name->set_ifq(false);
13887 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13888 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13889 hi_hfta_node->table_name->set_ifq(false);
13891 ret_vec.push_back(low_hfta_node);
13892 ret_vec.push_back(hi_hfta_node);
13898 // TODO: add splitting into selection/aggregation
13901 //---------------------------------------------------------------
13902 // Code for propagating Protocol field source information
13905 scalarexp_t *resolve_protocol_se(scalarexp_t *se, vector<map<string, scalarexp_t *> *> &src_vec, gb_table *gb_tbl, table_list *Schema){
13906 scalarexp_t *rse, *lse,*p_se, *gb_se;
13907 int tno, schema_type;
13908 map<string, scalarexp_t *> *pse_map;
13910 switch(se->get_operator_type()){
13912 return new scalarexp_t(se->get_literal());
13914 return scalarexp_t::make_param_reference(se->get_op().c_str());
13918 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, se->gb_ref=%d, but gb_tbl is NULL\n",se->get_gb_ref());
13919 gb_se = gb_tbl->get_def(se->get_gb_ref());
13920 return resolve_protocol_se(gb_se,src_vec,gb_tbl,Schema);
13923 schema_type = Schema->get_schema_type(se->get_colref()->get_schema_ref());
13924 if(schema_type == PROTOCOL_SCHEMA)
13925 return dup_se(se,NULL);
13927 tno = se->get_colref()->get_tablevar_ref();
13928 if(tno >= src_vec.size()){
13929 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, tno=%d, src_vec.size()=%lu\n",tno,src_vec.size());
13931 if(src_vec[tno] == NULL)
13934 pse_map =src_vec[tno];
13935 p_se = (*pse_map)[se->get_colref()->get_field()];
13938 return dup_se(p_se,NULL);
13940 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13944 return new scalarexp_t(se->get_op().c_str(),lse);
13946 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13949 rse = resolve_protocol_se(se->get_right_se(),src_vec,gb_tbl,Schema);
13952 return new scalarexp_t(se->get_op().c_str(),lse,rse);
13966 void spx_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13968 vector<map<string, scalarexp_t *> *> src_vec;
13970 for(i=0;i<q_sources.size();i++){
13971 if(q_sources[i] != NULL)
13972 src_vec.push_back(q_sources[i]->get_protocol_se());
13974 src_vec.push_back(NULL);
13977 for(i=0;i<select_list.size();i++){
13978 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13982 void join_eq_hash_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13984 vector<map<string, scalarexp_t *> *> src_vec;
13986 for(i=0;i<q_sources.size();i++){
13987 if(q_sources[i] != NULL)
13988 src_vec.push_back(q_sources[i]->get_protocol_se());
13990 src_vec.push_back(NULL);
13993 for(i=0;i<select_list.size();i++){
13994 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13997 for(i=0;i<hash_eq.size();i++){
13998 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13999 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14003 void filter_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14005 vector<map<string, scalarexp_t *> *> src_vec;
14007 for(i=0;i<q_sources.size();i++){
14008 if(q_sources[i] != NULL)
14009 src_vec.push_back(q_sources[i]->get_protocol_se());
14011 src_vec.push_back(NULL);
14014 for(i=0;i<select_list.size();i++){
14015 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14018 for(i=0;i<hash_eq.size();i++){
14019 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
14020 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14024 void watch_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14026 vector<map<string, scalarexp_t *> *> src_vec;
14028 for(i=0;i<q_sources.size();i++){
14029 if(q_sources[i] != NULL)
14030 src_vec.push_back(q_sources[i]->get_protocol_se());
14032 src_vec.push_back(NULL);
14035 for(i=0;i<select_list.size();i++){
14036 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14039 for(i=0;i<key_flds.size();i++){
14040 string kfld = key_flds[i];
14041 hash_src_l.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_left_se(),src_vec,NULL,Schema));
14042 hash_src_r.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_right_se(),src_vec,NULL,Schema));
14047 void sgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14049 vector<map<string, scalarexp_t *> *> src_vec;
14051 for(i=0;i<q_sources.size();i++){
14052 if(q_sources[i] != NULL)
14053 src_vec.push_back(q_sources[i]->get_protocol_se());
14055 src_vec.push_back(NULL);
14058 for(i=0;i<select_list.size();i++){
14059 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14062 for(i=0;i<gb_tbl.size();i++)
14063 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14067 void rsgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14069 vector<map<string, scalarexp_t *> *> src_vec;
14071 for(i=0;i<q_sources.size();i++){
14072 if(q_sources[i] != NULL)
14073 src_vec.push_back(q_sources[i]->get_protocol_se());
14075 src_vec.push_back(NULL);
14078 for(i=0;i<select_list.size();i++){
14079 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14082 for(i=0;i<gb_tbl.size();i++)
14083 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14086 void sgahcwcb_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14088 vector<map<string, scalarexp_t *> *> src_vec;
14090 for(i=0;i<q_sources.size();i++){
14091 if(q_sources[i] != NULL)
14092 src_vec.push_back(q_sources[i]->get_protocol_se());
14094 src_vec.push_back(NULL);
14097 for(i=0;i<select_list.size();i++){
14098 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14101 for(i=0;i<gb_tbl.size();i++)
14102 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14105 void mrg_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14107 scalarexp_t *first_se;
14109 vector<map<string, scalarexp_t *> *> src_vec;
14110 map<string, scalarexp_t *> *pse_map;
14112 for(i=0;i<q_sources.size();i++){
14113 if(q_sources[i] != NULL)
14114 src_vec.push_back(q_sources[i]->get_protocol_se());
14116 src_vec.push_back(NULL);
14119 if(q_sources.size() == 0){
14120 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::create_protocol_se, q_sources.size() == 0\n");
14124 vector<field_entry *> tbl_flds = table_layout->get_fields();
14125 for(f=0;f<tbl_flds.size();f++){
14127 string fld_nm = tbl_flds[f]->get_name();
14128 pse_map = src_vec[0];
14129 first_se = (*pse_map)[fld_nm];
14130 if(first_se == NULL)
14132 for(s=1;s<src_vec.size() && match;s++){
14133 pse_map = src_vec[s];
14134 scalarexp_t *match_se = (*pse_map)[fld_nm];
14135 if(match_se == NULL)
14138 match = is_equivalent_se_base(first_se, match_se, Schema);
14141 protocol_map[fld_nm] = first_se;
14143 protocol_map[fld_nm] = NULL;
14147 void watch_tbl_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){