1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 // Create, manipulate, and dump query plans.
18 #include "query_plan.h"
19 #include "analyze_fta.h"
20 #include "generate_utils.h"
26 extern string hash_nums[NRANDS]; // for fast hashing
31 void untaboo(string &s){
33 for(c=0;c<s.size();++c){
40 // mrg_qpn constructor, define here to avoid
41 // circular references in the .h file
42 mrg_qpn::mrg_qpn(filter_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
43 param_tbl = spx->param_tbl;
46 field_entry_list *fel = new field_entry_list();
51 for(i=0;i<spx->select_list.size();++i){
52 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
53 if(dt->is_temporal()){
54 if(merge_fieldpos < 0){
57 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
62 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
63 fel->append_field(fe);
67 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
70 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
72 // NEED TO HANDLE USER_SPECIFIED SLACK
73 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
74 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
75 // if(this->slack == NULL)
76 // fprintf(stderr,"Zero slack.\n");
78 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
80 for(i=0;i<sources.size();i++){
81 std::string rvar = "_m"+int_to_string(i);
82 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
83 mvars[i]->set_tablevar_ref(i);
84 fm.push_back(new tablevar_t(sources[i].c_str()));
85 fm[i]->set_range_var(rvar);
88 param_tbl = new param_table();
89 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
91 for(pi=0;pi<param_names.size();pi++){
92 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
93 param_tbl->add_param(param_names[pi],dt->duplicate(),
94 spx->param_tbl->handle_access(param_names[pi]));
96 definitions = spx->definitions;
101 mrg_qpn::mrg_qpn(watch_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
102 param_tbl = spx->param_tbl;
105 field_entry_list *fel = new field_entry_list();
110 for(i=0;i<spx->select_list.size();++i){
111 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
112 if(dt->is_temporal()){
113 if(merge_fieldpos < 0){
116 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
117 dt->reset_temporal();
121 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
122 fel->append_field(fe);
125 if(merge_fieldpos<0){
126 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
129 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
131 // NEED TO HANDLE USER_SPECIFIED SLACK
132 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
133 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
134 // if(this->slack == NULL)
135 // fprintf(stderr,"Zero slack.\n");
137 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
139 for(i=0;i<sources.size();i++){
140 std::string rvar = "_m"+int_to_string(i);
141 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
142 mvars[i]->set_tablevar_ref(i);
143 fm.push_back(new tablevar_t(sources[i].c_str()));
144 fm[i]->set_range_var(rvar);
147 param_tbl = new param_table();
148 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
150 for(pi=0;pi<param_names.size();pi++){
151 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
152 param_tbl->add_param(param_names[pi],dt->duplicate(),
153 spx->param_tbl->handle_access(param_names[pi]));
155 definitions = spx->definitions;
162 // This function translates an analyzed parse tree
163 // into one or more query nodes (qp_node).
164 // Currently only one node is created, but some query
165 // fragments might create more than one query node,
166 // e.g. aggregation over a join, or nested subqueries
167 // in the FROM clause (unless this is handled at parse tree
168 // analysis time). At this stage, they will be linked
169 // by the names in the FROM clause.
170 // INVARIANT : if more than one query node is returned,
171 // the last one represents the output of the query.
172 vector<qp_node *> create_query_nodes(query_summary_class *qs,table_list *Schema){
174 // Classify the query.
176 vector <qp_node *> local_plan;
180 // I should probably move a lot of this code
181 // into the qp_node constructors,
182 // and have this code focus on building the query plan tree.
185 if(qs->query_type == WATCHLIST_QUERY){
186 watch_tbl_qpn *watchnode = new watch_tbl_qpn(qs, Schema);
189 plan_root = watchnode;
190 local_plan.push_back(watchnode);
195 if(qs->query_type == MERGE_QUERY){
196 mrg_qpn *merge_node = new mrg_qpn(qs,Schema);
199 plan_root = merge_node;
200 local_plan.push_back(merge_node);
203 Do not split sources until we are done with optimizations
204 vector<mrg_qpn *> split_merge = merge_node->split_sources();
205 local_plan.insert(local_plan.begin(), split_merge.begin(), split_merge.end());
207 // If children are created, add them to the schema.
210 printf("split_merge size is %d\n",split_merge.size());
211 for(i=1;i<split_merge.size();++i){
212 Schema->add_table(split_merge[i]->get_fields());
213 printf("Adding split merge table %d\n",i);
218 printf("Did split sources on %s:\n",qs->query_name.c_str());
220 for(ss=0;ss<local_plan.size();ss++){
221 printf("node %d, name=%s, sources=",ss,local_plan[ss]->get_node_name().c_str());
222 vector<tablevar_t *> inv = local_plan[ss]->get_input_tbls();
224 for(nn=0;nn<inv.size();nn++){
225 printf("%s ",inv[nn]->to_string().c_str());
234 if(qs->query_type == SELECT_QUERY){
236 // Select / Aggregation / Join
237 if(qs->gb_tbl->size() == 0 && qs->aggr_tbl->size() == 0){
239 if(qs->fta_tree->get_from()->size() == 1){
240 spx_qpn *spx_node = new spx_qpn(qs,Schema);
242 plan_root = spx_node;
243 local_plan.push_back(spx_node);
245 if(qs->fta_tree->get_from()->get_properties() == FILTER_JOIN_PROPERTY){
246 filter_join_qpn *join_node = new filter_join_qpn(qs,Schema);
247 plan_root = join_node;
248 local_plan.push_back(join_node);
250 if(qs->fta_tree->get_from()->get_properties() == WATCHLIST_JOIN_PROPERTY){
251 watch_join_qpn *join_node = new watch_join_qpn(qs,Schema);
252 plan_root = join_node;
253 local_plan.push_back(join_node);
255 join_eq_hash_qpn *join_node = new join_eq_hash_qpn(qs,Schema);
256 plan_root = join_node;
257 local_plan.push_back(join_node);
264 if(qs->states_refd.size() || qs->sg_tbl.size() || qs->cb_cnf.size()){
265 sgahcwcb_qpn *sgahcwcb_node = new sgahcwcb_qpn(qs,Schema);
266 plan_root = sgahcwcb_node;
267 local_plan.push_back(sgahcwcb_node);
269 if(qs->closew_cnf.size()){
270 rsgah_qpn *rsgah_node = new rsgah_qpn(qs,Schema);
271 plan_root = rsgah_node;
272 local_plan.push_back(rsgah_node);
274 sgah_qpn *sgah_node = new sgah_qpn(qs,Schema);
275 plan_root = sgah_node;
276 local_plan.push_back(sgah_node);
283 // Get the query name and other definitions.
284 plan_root->set_node_name( qs->query_name);
285 plan_root->set_definitions( qs->definitions) ;
288 // return(plan_root);
294 string se_to_query_string(scalarexp_t *se, aggregate_table *aggr_tbl){
299 vector<scalarexp_t *> operand_list;
302 if(se->is_superaggr())
305 switch(se->get_operator_type()){
307 l_str = se->get_literal()->to_query_string();
310 l_str = "$" + se->get_op();
313 l_str = se->get_colref()->to_query_string() ;
316 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
318 return se->get_op()+"( "+l_str+" )";;
320 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
321 r_str = se_to_query_string(se->get_right_se(),aggr_tbl);
322 return( "("+l_str+")"+se->get_op()+"("+r_str+")" );
324 return( se->get_op() + su_ind + "(*)");
326 l_str = se_to_query_string(aggr_tbl->get_aggr_se(se->get_aggr_ref()),aggr_tbl);
327 return( se->get_op() + su_ind + "(" + l_str + ")" );
329 if(se->get_aggr_ref() >= 0)
330 operand_list = aggr_tbl->get_operand_list(se->get_aggr_ref());
332 operand_list = se->get_operands();
334 ret = se->get_op() + su_ind + "(";
335 for(p=0;p<operand_list.size();p++){
336 l_str = se_to_query_string(operand_list[p],aggr_tbl);
344 return "ERROR SE op type not recognized in se_to_query_string.\n";
348 string pred_to_query_str(predicate_t *pr, aggregate_table *aggr_tbl){
353 vector<literal_t *> llist;
354 vector<scalarexp_t *> op_list;
356 switch(pr->get_operator_type()){
358 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
359 ret = l_str + " IN [";
360 llist = pr->get_lit_vec();
361 for(l=0;l<llist.size();l++){
363 ret += llist[l]->to_query_string();
369 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
370 r_str = se_to_query_string(pr->get_right_se(),aggr_tbl);
371 return( l_str + " " + pr->get_op() + " " + r_str );
373 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
374 return(pr->get_op() + "( " + l_str + " )");
376 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
377 r_str = pred_to_query_str(pr->get_right_pr(),aggr_tbl);
378 return("( " + r_str + " )" + pr->get_op() + "( " + l_str + " )");
380 ret = pr->get_op()+"[";
381 op_list = pr->get_op_list();
382 for(o=0;o<op_list.size();++o){
384 ret += se_to_query_string(op_list[o],aggr_tbl);
389 fprintf(stderr,"INTERNAL ERROR in pred_to_query_str, line %d, character %d, unknown predicate operator type %d\n",
390 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
399 // Build a selection list,
400 // but avoid adding duplicate SEs.
403 int add_select_list_nodup(vector<select_element *> &lfta_select_list, scalarexp_t *se,
407 for(s=0;s<lfta_select_list.size();s++){
408 if(is_equivalent_se(lfta_select_list[s]->se, se)){
413 lfta_select_list.push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
414 return(lfta_select_list.size()-1);
419 // TODO: The generated colref should be tied to the tablevar
420 // representing the lfta output. For now, always 0.
422 scalarexp_t *make_fta_se_ref(vector<select_element *> &lfta_select_list, scalarexp_t *se, int h_tvref){
424 int fta_se_nbr = add_select_list_nodup(lfta_select_list, se, new_element);
427 colname = lfta_select_list[fta_se_nbr]->name;
429 colname = impute_colname(lfta_select_list, se);
430 lfta_select_list[fta_se_nbr]->name = colname;
433 // TODO: fill in the tablevar and schema of the colref here.
434 colref_t *new_cr = new colref_t(colname.c_str());
435 new_cr->set_tablevar_ref(h_tvref);
438 scalarexp_t *new_se= new scalarexp_t(new_cr);
439 new_se->use_decorations_of(se);
445 // Build a selection list,
446 // but avoid adding duplicate SEs.
449 int add_select_list_nodup(vector<select_element *> *lfta_select_list, scalarexp_t *se,
453 for(s=0;s<lfta_select_list->size();s++){
454 if(is_equivalent_se((*lfta_select_list)[s]->se, se)){
459 lfta_select_list->push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
460 return(lfta_select_list->size()-1);
465 // TODO: The generated colref should be tied to the tablevar
466 // representing the lfta output. For now, always 0.
468 scalarexp_t *make_fta_se_ref(vector<vector<select_element *> *> &lfta_select_list, scalarexp_t *se, int h_tvref){
470 vector<select_element *> *the_sel_list = lfta_select_list[h_tvref];
471 int fta_se_nbr = add_select_list_nodup(the_sel_list, se, new_element);
474 colname = (*the_sel_list)[fta_se_nbr]->name;
476 colname = impute_colname(*the_sel_list, se);
477 (*the_sel_list)[fta_se_nbr]->name = colname;
480 // TODO: fill in the tablevar and schema of the colref here.
481 colref_t *new_cr = new colref_t(colname.c_str());
482 new_cr->set_tablevar_ref(h_tvref);
485 scalarexp_t *new_se= new scalarexp_t(new_cr);
486 new_se->use_decorations_of(se);
495 // Test if a se can be evaluated at the fta.
496 // check forbidden types (e.g. float), forbidden operations
497 // between types (e.g. divide a long long), forbidden operations
498 // (too expensive, not implemented).
500 // Return true if not forbidden, false if forbidden
502 // TODO: the parameter aggr_tbl is not used, delete it.
504 bool check_fta_forbidden_se(scalarexp_t *se,
505 aggregate_table *aggr_tbl,
506 ext_fcn_list *Ext_fcns
510 vector<scalarexp_t *> operand_list;
511 vector<data_type *> dt_signature;
512 data_type *dt = se->get_data_type();
516 switch(se->get_operator_type()){
520 return( se->get_data_type()->fta_legal_type() );
524 if(!check_fta_forbidden_se(se->get_left_se(), aggr_tbl, Ext_fcns))
527 dt->fta_legal_operation(se->get_left_se()->get_data_type(), se->get_op())
530 if(!check_fta_forbidden_se(se->get_left_se(),aggr_tbl, Ext_fcns))
532 if(!check_fta_forbidden_se(se->get_right_se(),aggr_tbl, Ext_fcns))
534 return(dt->fta_legal_operation(se->get_left_se()->get_data_type(),
535 se->get_right_se()->get_data_type(),
540 // return true, aggregate fta-safeness is determined elsewhere.
547 if(se->get_aggr_ref() >= 0) return true;
549 operand_list = se->get_operands();
550 for(p=0;p<operand_list.size();p++){
551 if(!check_fta_forbidden_se(operand_list[p],aggr_tbl, Ext_fcns))
553 dt_signature.push_back(operand_list[p]->get_data_type() );
555 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
557 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
559 for(o=0;o<operand_list.size();o++){
560 if(o>0) fprintf(stderr,", ");
561 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
563 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
564 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
568 return(Ext_fcns->fta_legal(fcn_id) );
570 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
579 // test if a pr can be executed at the fta.
581 // Return true if not forbidden, false if forbidden
583 bool check_fta_forbidden_pr(predicate_t *pr,
584 aggregate_table *aggr_tbl,
585 ext_fcn_list *Ext_fcns
588 vector<literal_t *> llist;
591 vector<scalarexp_t *> op_list;
592 vector<data_type *> dt_signature;
596 switch(pr->get_operator_type()){
598 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns) )
600 llist = pr->get_lit_vec();
601 for(l=0;l<llist.size();l++){
602 dt = new data_type(llist[l]->get_type());
603 if(! dt->fta_legal_type()){
611 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns))
613 if(! check_fta_forbidden_se(pr->get_right_se(), aggr_tbl, Ext_fcns))
617 return( check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns) );
619 if(! check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns))
621 if(! check_fta_forbidden_pr(pr->get_right_pr(), aggr_tbl, Ext_fcns))
625 op_list = pr->get_op_list();
626 for(o=0;o<op_list.size();o++){
627 if(!check_fta_forbidden_se(op_list[o],aggr_tbl, Ext_fcns))
629 dt_signature.push_back(op_list[o]->get_data_type() );
631 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), dt_signature);
633 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
635 for(o=0;o<op_list.size();o++){
636 if(o>0) fprintf(stderr,", ");
637 fprintf(stderr,"%s",op_list[o]->get_data_type()->to_string().c_str());
639 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
640 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
644 return(Ext_fcns->fta_legal(fcn_id) );
646 fprintf(stderr,"INTERNAL ERROR in check_fta_forbidden_pr, line %d, character %d, unknown predicate operator type %d\n",
647 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
656 // Split the aggregates in orig_aggr_tbl, into superaggregates and
658 // (the value of the HFTA aggregate might be a SE of several LFTA
659 // subaggregates, e.g. avg : sum / count )
660 // Register the superaggregates in hfta_aggr_tbl, and the
661 // subaggregates in lfta_aggr_tbl.
662 // Insert references to the subaggregates into lfta_select_list.
663 // (and record their names in the currnames list)
664 // Create a SE for the superaggregate, put it in hfta_aggr_se,
667 void split_fta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
668 aggregate_table *hfta_aggr_tbl,
669 aggregate_table *lfta_aggr_tbl,
670 vector<select_element *> &lfta_select_list,
671 map<int,scalarexp_t *> &hfta_aggr_se,
672 ext_fcn_list *Ext_fcns
675 scalarexp_t *subaggr_se;
680 scalarexp_t *new_se, *l_se;
681 vector<scalarexp_t *> subaggr_ref_se;
684 if(! orig_aggr_tbl->is_builtin(agr_id)){
685 // Construct the subaggregate
686 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
687 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
688 vector<scalarexp_t *> subopl;
690 for(o=0;o<opl.size();++o){
691 subopl.push_back(dup_se(opl[o], NULL));
693 int sub_id = Ext_fcns->get_subaggr_id(fcn_id);
694 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
695 subaggr_se->set_fcn_id(sub_id);
696 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
697 // Add it to the lfta select list.
698 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
700 colname = lfta_select_list[fta_se_nbr]->name;
702 colname = impute_colname(lfta_select_list, subaggr_se);
703 lfta_select_list[fta_se_nbr]->name = colname;
704 ano = lfta_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,Ext_fcns->has_lfta_bailout(sub_id));
705 subaggr_se->set_aggr_id(ano);
708 // Construct a reference to the subaggregate
709 new_cr = new colref_t(colname.c_str());
710 new_se = new scalarexp_t(new_cr);
711 // I'm not certain what the types should be ....
712 // This will need to be filled in by later analysis.
713 // NOTE: this might not capture all the meaning of data_type ...
714 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
715 subaggr_ref_se.push_back(new_se);
717 // Construct the superaggregate
718 int super_id = Ext_fcns->get_superaggr_id(fcn_id);
719 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
720 ret_se->set_fcn_id(super_id);
721 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
722 // Register it in the hfta aggregate table
723 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, Ext_fcns->is_running_aggr(sub_id),false);
724 ret_se->set_aggr_id(ano);
725 hfta_aggr_se[agr_id] = ret_se;
731 // builtin aggregate processing
735 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
736 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
739 if(orig_aggr_tbl->is_star_aggr(agr_id)){
740 for(sa=0;sa<subaggr_names.size();sa++){
741 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
742 subaggr_se->set_data_type(subaggr_dt[sa]);
744 // The following sequence is similar to the code in make_fta_se_ref,
745 // but there is special processing for the aggregate tables.
746 int fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
748 colname = lfta_select_list[fta_se_nbr]->name;
750 colname = impute_colname(lfta_select_list, subaggr_se);
751 lfta_select_list[fta_se_nbr]->name = colname;
752 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
753 subaggr_se->set_aggr_id(ano);
755 new_cr = new colref_t(colname.c_str());
756 new_cr->set_tablevar_ref(0);
757 new_se = new scalarexp_t(new_cr);
759 // I'm not certain what the types should be ....
760 // This will need to be filled in by later analysis.
761 // Actually, this is causing a problem.
762 // I will assume a UINT data type. / change to INT
763 // (consistent with assign_data_types in analyze_fta.cc)
764 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
765 data_type *ndt = new data_type("Int"); // used to be Uint
766 new_se->set_data_type(ndt);
768 subaggr_ref_se.push_back(new_se);
771 for(sa=0;sa<subaggr_names.size();sa++){
773 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
774 l_se = dup_se(aggr_operand, NULL);
775 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
777 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
779 subaggr_se->set_data_type(subaggr_dt[sa]);
781 // again, similar to make_fta_se_ref.
782 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
784 colname = lfta_select_list[fta_se_nbr]->name;
786 colname = impute_colname(lfta_select_list, subaggr_se);
787 lfta_select_list[fta_se_nbr]->name = colname;
789 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
791 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
792 subaggr_se->set_aggr_id(ano);
794 new_cr = new colref_t(colname.c_str());
795 new_se = new scalarexp_t(new_cr);
796 // I'm not certain what the types should be ....
797 // This will need to be filled in by later analysis.
798 // NOTE: this might not capture all the meaning of data_type ...
799 new_se->set_data_type(subaggr_dt[sa]);
800 subaggr_ref_se.push_back(new_se);
803 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
804 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
806 // ASSUME either the return value is an aggregation,
807 // or a binary_op between two aggregations
808 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
809 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
810 ret_se->set_aggr_id(ano);
812 // Basically processing for AVG.
813 // set the data type of the superagg to that of the subagg.
814 scalarexp_t *left_se = ret_se->get_left_se();
815 left_se->set_data_type(subaggr_dt[0]);
816 ano = hfta_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
817 left_se->set_aggr_id(ano);
819 scalarexp_t *right_se = ret_se->get_right_se();
820 right_se->set_data_type(subaggr_dt[1]);
821 ano = hfta_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
822 right_se->set_aggr_id(ano);
825 hfta_aggr_se[agr_id] = ret_se;
830 // Split the aggregates in orig_aggr_tbl, into hfta_superaggregates and
831 // hfta_subaggregates.
832 // Register the superaggregates in hi_aggr_tbl, and the
833 // subaggregates in loq_aggr_tbl.
834 // Insert references to the subaggregates into low_select_list.
835 // (and record their names in the currnames list)
836 // Create a SE for the superaggregate, put it in hfta_aggr_se,
839 void split_hfta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
840 aggregate_table *hi_aggr_tbl,
841 aggregate_table *low_aggr_tbl,
842 vector<select_element *> &low_select_list,
843 map<int,scalarexp_t *> &hi_aggr_se,
844 ext_fcn_list *Ext_fcns
847 scalarexp_t *subaggr_se;
852 scalarexp_t *new_se, *l_se;
853 vector<scalarexp_t *> subaggr_ref_se;
856 if(! orig_aggr_tbl->is_builtin(agr_id)){
857 // Construct the subaggregate
858 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
859 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
860 vector<scalarexp_t *> subopl;
862 for(o=0;o<opl.size();++o){
863 subopl.push_back(dup_se(opl[o], NULL));
865 int sub_id = Ext_fcns->get_hfta_subaggr_id(fcn_id);
866 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
867 subaggr_se->set_fcn_id(sub_id);
868 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
869 // Add it to the low select list.
870 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
872 colname = low_select_list[fta_se_nbr]->name;
874 colname = impute_colname(low_select_list, subaggr_se);
875 low_select_list[fta_se_nbr]->name = colname;
876 ano = low_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,false);
877 subaggr_se->set_aggr_id(ano);
880 // Construct a reference to the subaggregate
881 new_cr = new colref_t(colname.c_str());
882 new_se = new scalarexp_t(new_cr);
883 // I'm not certain what the types should be ....
884 // This will need to be filled in by later analysis.
885 // NOTE: this might not capture all the meaning of data_type ...
886 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
887 subaggr_ref_se.push_back(new_se);
889 // Construct the superaggregate
890 int super_id = Ext_fcns->get_hfta_superaggr_id(fcn_id);
891 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
892 ret_se->set_fcn_id(super_id);
893 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
894 // Register it in the high aggregate table
895 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, false,false);
896 ret_se->set_aggr_id(ano);
897 hi_aggr_se[agr_id] = ret_se;
903 // builtin aggregate processing
907 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
908 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
911 if(orig_aggr_tbl->is_star_aggr(agr_id)){
912 for(sa=0;sa<subaggr_names.size();sa++){
913 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
914 subaggr_se->set_data_type(subaggr_dt[sa]);
916 // The following sequence is similar to the code in make_fta_se_ref,
917 // but there is special processing for the aggregate tables.
918 int fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
920 colname = low_select_list[fta_se_nbr]->name;
922 colname = impute_colname(low_select_list, subaggr_se);
923 low_select_list[fta_se_nbr]->name = colname;
924 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
925 subaggr_se->set_aggr_id(ano);
927 new_cr = new colref_t(colname.c_str());
928 new_cr->set_tablevar_ref(0);
929 new_se = new scalarexp_t(new_cr);
931 // I'm not certain what the types should be ....
932 // This will need to be filled in by later analysis.
933 // Actually, this is causing a problem.
934 // I will assume a UINT data type.
935 // (consistent with assign_data_types in analyze_fta.cc)
936 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
937 data_type *ndt = new data_type("Int"); // was Uint
938 new_se->set_data_type(ndt);
940 subaggr_ref_se.push_back(new_se);
943 for(sa=0;sa<subaggr_names.size();sa++){
945 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
946 l_se = dup_se(aggr_operand, NULL);
947 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
949 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
951 subaggr_se->set_data_type(subaggr_dt[sa]);
953 // again, similar to make_fta_se_ref.
954 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
956 colname = low_select_list[fta_se_nbr]->name;
958 colname = impute_colname(low_select_list, subaggr_se);
959 low_select_list[fta_se_nbr]->name = colname;
961 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
963 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
964 subaggr_se->set_aggr_id(ano);
966 new_cr = new colref_t(colname.c_str());
967 new_se = new scalarexp_t(new_cr);
968 // I'm not certain what the types should be ....
969 // This will need to be filled in by later analysis.
970 // NOTE: this might not capture all the meaning of data_type ...
971 new_se->set_data_type(subaggr_dt[sa]);
972 subaggr_ref_se.push_back(new_se);
975 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
976 // ASSUME either the return value is an aggregation,
977 // or a binary_op between two aggregations
978 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
979 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
980 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
982 // Basically processing for AVG.
983 // set the data type of the superagg to that of the subagg.
984 scalarexp_t *left_se = ret_se->get_left_se();
985 left_se->set_data_type(subaggr_dt[0]);
986 ano = hi_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
987 left_se->set_aggr_id(ano);
989 scalarexp_t *right_se = ret_se->get_right_se();
990 right_se->set_data_type(subaggr_dt[1]);
991 ano = hi_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
992 right_se->set_aggr_id(ano);
995 ret_se->set_aggr_id(ano);
996 hi_aggr_se[agr_id] = ret_se;
1004 // Split a scalar expression into one part which executes
1005 // at the stream and another set of parts which execute
1007 // Because I'm actually modifying the SEs, I will make
1008 // copies. But I will assume that literals, params, and
1009 // colrefs are immutable at this point.
1010 // (if there is ever a need to change one, must make a
1012 // NOTE : if se is constant (only refrences literals),
1013 // avoid making the fta compute it.
1015 // NOTE : This will need to be generalized to
1016 // handle join expressions, namely to handle a vector
1019 // Return value is the HFTA se.
1020 // Add lftas select_elements to the fta_select_list.
1021 // set fta_forbidden if this node or any child cannot
1022 // execute at the lfta.
1026 scalarexp_t *split_fta_se(scalarexp_t *se,
1027 bool &fta_forbidden,
1028 vector<select_element *> &lfta_select_list,
1029 ext_fcn_list *Ext_fcns
1033 vector<scalarexp_t *> operand_list;
1034 vector<data_type *> dt_signature;
1035 scalarexp_t *ret_se, *l_se, *r_se;
1036 bool l_forbid, r_forbid, this_forbid;
1038 scalarexp_t *new_se;
1039 data_type *dt = se->get_data_type();
1041 switch(se->get_operator_type()){
1043 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1044 ret_se = new scalarexp_t(se->get_literal());
1045 ret_se->use_decorations_of(se);
1049 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1050 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1051 ret_se->use_decorations_of(se);
1055 // No colref should be forbidden,
1056 // the schema is wrong, the fta_legal_type() fcn is wrong,
1057 // or the source table is actually a stream.
1058 // Issue a warning, but proceed with processing.
1059 // Also, should not be a ref to a gbvar.
1060 // (a gbvar ref only occurs in an aggregation node,
1061 // and these SEs are rehomed, not split.
1062 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1065 fprintf(stderr,"WARNING, a colref is a forbidden data type in split_fta_se,"
1067 " type is %s, line=%d, col=%d\n",
1068 se->get_colref()->to_string().c_str(),
1069 se->get_data_type()->get_type_str().c_str(),
1070 se->lineno, se->charno
1075 fprintf(stderr,"INTERNAL ERROR, a colref is a gbvar ref in split_fta_se,"
1076 " type is %s, line=%d, col=%d\n",
1077 se->get_data_type()->get_type_str().c_str(),
1078 se->lineno, se->charno
1083 ret_se = new scalarexp_t(se->get_colref());
1084 ret_se->use_decorations_of(se);
1088 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1090 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1092 // If this operation is forbidden but the child SE is not,
1093 // put the child se on the lfta_select_list, create a colref
1094 // which accesses this se, and make it the child of this op.
1095 // Exception : the child se is constant (only literal refs).
1096 if(this_forbid && !l_forbid){
1097 if(!is_literal_or_param_only(l_se)){
1098 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1099 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1102 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1104 ret_se->use_decorations_of(se);
1105 fta_forbidden = this_forbid | l_forbid;
1109 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1110 r_se = split_fta_se(se->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1112 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1114 // Replace the left se if it is not forbidden, but something else is.
1115 if((this_forbid || r_forbid) & !l_forbid){
1116 if(!is_literal_or_param_only(l_se)){
1117 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1122 // Replace the right se if it is not forbidden, but something else is.
1123 if((this_forbid || l_forbid) & !r_forbid){
1124 if(!is_literal_or_param_only(r_se)){
1125 new_se = make_fta_se_ref(lfta_select_list, r_se,0);
1130 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1131 ret_se->use_decorations_of(se);
1132 fta_forbidden = this_forbid || r_forbid || l_forbid;
1139 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_fta_se."
1140 " line=%d, col=%d\n",
1141 se->get_op().c_str(),
1142 se->lineno, se->charno
1149 fta_forbidden = false;
1150 operand_list = se->get_operands();
1151 vector<scalarexp_t *> new_operands;
1152 vector<bool> forbidden_op;
1153 for(p=0;p<operand_list.size();p++){
1154 l_se = split_fta_se(operand_list[p], l_forbid, lfta_select_list, Ext_fcns);
1156 fta_forbidden |= l_forbid;
1157 new_operands.push_back(l_se);
1158 forbidden_op.push_back(l_forbid);
1159 dt_signature.push_back(operand_list[p]->get_data_type() );
1162 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1164 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1166 for(o=0;o<operand_list.size();o++){
1167 if(o>0) fprintf(stderr,", ");
1168 fprintf(stderr,"%s",operand_list[o]->get_data_type()->get_type_str().c_str());
1170 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1171 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1175 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1177 // Replace the non-forbidden operands.
1178 // the forbidden ones are already replaced.
1180 for(p=0;p<new_operands.size();p++){
1181 if(! forbidden_op[p]){
1182 // if(new_operands[p]->get_data_type()->get_temporal() != constant_t){
1183 if(!is_literal_or_param_only(new_operands[p])){
1184 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],0);
1185 new_operands[p] = new_se;
1191 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1192 ret_se->use_decorations_of(se);
1198 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
1209 // The predicates have already been
1210 // broken into conjunctions.
1211 // If any part of a conjunction is fta-forbidden,
1212 // it must be executed in the stream operator.
1213 // Else it is executed in the FTA.
1214 // A pre-analysis should determine whether this
1215 // predicate is fta-safe. This procedure will
1216 // assume that it is fta-forbidden and will
1217 // prepare it for execution in the stream.
1221 predicate_t *split_fta_pr(predicate_t *pr,
1222 vector<select_element *> &lfta_select_list,
1223 ext_fcn_list *Ext_fcns
1226 vector<literal_t *> llist;
1227 scalarexp_t *se_l, *se_r;
1228 bool l_forbid, r_forbid;
1229 predicate_t *ret_pr, *pr_l, *pr_r;
1230 vector<scalarexp_t *> op_list, new_op_list;
1232 vector<data_type *> dt_signature;
1235 switch(pr->get_operator_type()){
1237 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1240 if(!is_literal_or_param_only(se_l)){
1241 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1245 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1250 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1252 if(!is_literal_or_param_only(se_l)){
1253 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1258 se_r = split_fta_se(pr->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1260 if(!is_literal_or_param_only(se_r)){
1261 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,0);
1266 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1270 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1271 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1274 case PRED_BINARY_OP:
1275 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1276 pr_r = split_fta_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1277 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1281 // I can't push the predicate into the lfta, except by
1282 // returning a bool value, and that is not worth the trouble,
1283 op_list = pr->get_op_list();
1284 for(o=0;o<op_list.size();++o){
1285 se_l = split_fta_se(op_list[o],l_forbid,lfta_select_list,Ext_fcns);
1287 if(!is_literal_or_param_only(se_l)){
1288 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1292 new_op_list.push_back(se_l);
1295 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1296 ret_pr->set_fcn_id(pr->get_fcn_id());
1299 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1300 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1311 //--------------------------------------------------------------------
1315 // Split a scalar expression into one part which executes
1316 // at the stream and another set of parts which execute
1318 // Because I'm actually modifying the SEs, I will make
1319 // copies. But I will assume that literals, params, and
1320 // colrefs are immutable at this point.
1321 // (if there is ever a need to change one, must make a
1323 // NOTE : if se is constant (only refrences literals),
1324 // avoid making the fta compute it.
1326 // NOTE : This will need to be generalized to
1327 // handle join expressions, namely to handle a vector
1330 // Return value is the HFTA se.
1331 // Add lftas select_elements to the fta_select_list.
1332 // set fta_forbidden if this node or any child cannot
1333 // execute at the lfta.
1335 #define SPLIT_FTAVEC_NOTBLVAR -1
1336 #define SPLIT_FTAVEC_MIXED -2
1338 bool is_PROTOCOL_source(int colref_source,
1339 vector< vector<select_element *> *> &lfta_select_list){
1340 if(colref_source>=0 && lfta_select_list[colref_source]!=NULL) return true;
1344 int combine_colref_source(int s1, int s2){
1345 if(s1==s2) return(s1);
1346 if(s1==SPLIT_FTAVEC_NOTBLVAR) return s2;
1347 if(s2==SPLIT_FTAVEC_NOTBLVAR) return s1;
1348 return SPLIT_FTAVEC_MIXED;
1351 scalarexp_t *split_ftavec_se(
1352 scalarexp_t *se, // the SE to split
1353 bool &fta_forbidden, // return true if some part of se
1355 int &colref_source, // the tblvar which sources the
1356 // colref, or NOTBLVAR, or MIXED
1357 vector< vector<select_element *> *> &lfta_select_list,
1358 // NULL if the tblvar is not PROTOCOL,
1359 // else build the select list.
1360 ext_fcn_list *Ext_fcns // is the fcn lfta-safe?
1362 // Return value is the HFTA SE, unless fta_forbidden is true and
1363 // colref_source>=0 and the indicated source is PROTOCOL.
1364 // In that case no split was done, the make_fta_se_ref must
1365 // be done by the caller.
1368 vector<scalarexp_t *> operand_list;
1369 vector<data_type *> dt_signature;
1370 scalarexp_t *ret_se, *l_se, *r_se;
1371 bool l_forbid, r_forbid, this_forbid;
1372 int l_csource, r_csource, this_csource;
1374 scalarexp_t *new_se;
1375 data_type *dt = se->get_data_type();
1377 switch(se->get_operator_type()){
1379 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1380 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1381 ret_se = new scalarexp_t(se->get_literal());
1382 ret_se->use_decorations_of(se);
1386 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1387 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1388 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1389 ret_se->use_decorations_of(se);
1392 case SE_IFACE_PARAM:
1393 fta_forbidden = false;
1394 colref_source = se->get_ifpref()->get_tablevar_ref();
1395 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1396 ret_se->use_decorations_of(se);
1400 // No colref should be forbidden,
1401 // the schema is wrong, the fta_legal_type() fcn is wrong,
1402 // or the source table is actually a stream.
1403 // Issue a warning, but proceed with processing.
1404 // Also, should not be a ref to a gbvar.
1405 // (a gbvar ref only occurs in an aggregation node,
1406 // and these SEs are rehomed, not split.
1407 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1408 colref_source = se->get_colref()->get_tablevar_ref();
1410 if(fta_forbidden && is_PROTOCOL_source(colref_source, lfta_select_list)){
1411 fprintf(stderr,"WARNING, a PROTOCOL colref is a forbidden data type in split_ftavec_se,"
1413 " type is %s, line=%d, col=%d\n",
1414 se->get_colref()->to_string().c_str(),
1415 se->get_data_type()->to_string().c_str(),
1416 se->lineno, se->charno
1421 fta_forbidden = true; // eval in hfta. ASSUME make copy as below.
1424 ret_se = new scalarexp_t(se->get_colref());
1425 ret_se->use_decorations_of(se);
1429 l_se = split_ftavec_se(se->get_left_se(), l_forbid, colref_source, lfta_select_list, Ext_fcns);
1431 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1433 // If this operation is forbidden but the child SE is not,
1434 // AND the colref source in the se is a single PROTOCOL source
1435 // put the child se on the lfta_select_list, create a colref
1436 // which accesses this se, and make it the child of this op.
1437 // Exception : the child se is constant (only literal refs).
1438 // TODO: I think the exception is expressed by is_PROTOCOL_source
1439 if(this_forbid && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list)){
1440 if(!is_literal_or_param_only(l_se)){
1441 new_se = make_fta_se_ref(lfta_select_list, l_se,colref_source);
1442 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1445 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1447 ret_se->use_decorations_of(se);
1448 fta_forbidden = this_forbid | l_forbid;
1452 l_se = split_ftavec_se(se->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1453 r_se = split_ftavec_se(se->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1455 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1456 colref_source=combine_colref_source(l_csource, r_csource);
1458 // Replace the left se if the parent must be hfta but the child can
1459 // be lfta. This translates to
1460 // a) result is PROTOCOL and forbidden, but left SE is not forbidden
1461 // OR b) if result is mixed but the left se is PROTOCOL, not forbidden
1462 if( ((this_forbid || r_forbid) && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1463 (colref_source==SPLIT_FTAVEC_MIXED && !l_forbid &&
1464 is_PROTOCOL_source(l_csource, lfta_select_list)) ){
1465 if(!is_literal_or_param_only(l_se)){
1466 new_se = make_fta_se_ref(lfta_select_list, l_se,l_csource);
1471 // same logic as for right se.
1472 if( ((this_forbid || l_forbid) && !r_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1473 (colref_source==SPLIT_FTAVEC_MIXED && !r_forbid &&
1474 is_PROTOCOL_source(r_csource, lfta_select_list)) ){
1475 if(!is_literal_or_param_only(r_se)){
1476 new_se = make_fta_se_ref(lfta_select_list, r_se,r_csource);
1481 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1482 ret_se->use_decorations_of(se);
1483 fta_forbidden = this_forbid || r_forbid || l_forbid;
1490 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_ftavec_se."
1491 " line=%d, col=%d\n",
1492 se->get_op().c_str(),
1493 se->lineno, se->charno
1500 operand_list = se->get_operands();
1501 vector<scalarexp_t *> new_operands;
1502 vector<bool> forbidden_op;
1503 vector<int> csource;
1505 fta_forbidden = false;
1506 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1507 for(p=0;p<operand_list.size();p++){
1508 l_se = split_ftavec_se(operand_list[p], l_forbid, l_csource, lfta_select_list, Ext_fcns);
1510 fta_forbidden |= l_forbid;
1511 colref_source = combine_colref_source(colref_source, l_csource);
1512 new_operands.push_back(l_se);
1513 forbidden_op.push_back(l_forbid);
1514 csource.push_back(l_csource);
1515 dt_signature.push_back(operand_list[p]->get_data_type() );
1518 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1520 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1522 for(o=0;o<operand_list.size();o++){
1523 if(o>0) fprintf(stderr,", ");
1524 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
1526 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1527 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1531 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1533 // Replace the non-forbidden operands.
1534 // the forbidden ones are already replaced.
1535 if(fta_forbidden || colref_source == SPLIT_FTAVEC_MIXED){
1536 for(p=0;p<new_operands.size();p++){
1537 if(! forbidden_op[p] && is_PROTOCOL_source(csource[p], lfta_select_list)){
1538 if(!is_literal_or_param_only(new_operands[p])){
1539 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],csource[p]);
1540 new_operands[p] = new_se;
1546 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1547 ret_se->use_decorations_of(se);
1553 printf("INTERNAL ERROR in split_ftavec_se: operator type %d\n",se->get_operator_type());
1562 // The predicates have already been
1563 // broken into conjunctions.
1564 // If any part of a conjunction is fta-forbidden,
1565 // it must be executed in the stream operator.
1566 // Else it is executed in the FTA.
1567 // A pre-analysis should determine whether this
1568 // predicate is fta-safe. This procedure will
1569 // assume that it is fta-forbidden and will
1570 // prepare it for execution in the stream.
1572 predicate_t *split_ftavec_pr(predicate_t *pr,
1573 vector< vector<select_element *> *> &lfta_select_list,
1574 ext_fcn_list *Ext_fcns
1577 vector<literal_t *> llist;
1578 scalarexp_t *se_l, *se_r;
1579 bool l_forbid, r_forbid;
1580 int l_csource, r_csource;
1581 predicate_t *ret_pr, *pr_l, *pr_r;
1582 vector<scalarexp_t *> op_list, new_op_list;
1584 vector<data_type *> dt_signature;
1587 switch(pr->get_operator_type()){
1589 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1591 // TODO: checking that the se is a PROTOCOL source should
1592 // take care of literal_or_param_only.
1593 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1594 if(!is_literal_or_param_only(se_l)){
1595 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1599 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1604 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1605 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1606 if(!is_literal_or_param_only(se_l)){
1607 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1612 se_r = split_ftavec_se(pr->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1613 if(!r_forbid && is_PROTOCOL_source(r_csource, lfta_select_list)){
1614 if(!is_literal_or_param_only(se_r)){
1615 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,r_csource);
1620 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1624 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1625 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1628 case PRED_BINARY_OP:
1629 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1630 pr_r = split_ftavec_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1631 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1635 // I can't push the predicate into the lfta, except by
1636 // returning a bool value, and that is not worth the trouble,
1637 op_list = pr->get_op_list();
1638 for(o=0;o<op_list.size();++o){
1639 se_l = split_ftavec_se(op_list[o],l_forbid,l_csource,lfta_select_list,Ext_fcns);
1640 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1641 if(!is_literal_or_param_only(se_l)){
1642 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1646 new_op_list.push_back(se_l);
1649 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1650 ret_pr->set_fcn_id(pr->get_fcn_id());
1653 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1654 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1664 ////////////////////////////////////////////////////////////////////////
1665 /// rehome_hfta_se rehome_hfta_pr
1666 /// This is use to split an sgah operator (aggregation),
1667 /// I just need to make gb, aggr references point to the
1668 /// new gb, aggr table entries.
1671 scalarexp_t *rehome_fta_se(scalarexp_t *se,
1672 map< int, scalarexp_t * > *aggr_map
1677 vector<scalarexp_t *> operand_list;
1678 scalarexp_t *ret_se, *l_se, *r_se;
1680 scalarexp_t *new_se;
1681 data_type *dt = se->get_data_type();
1682 vector<scalarexp_t *> new_operands;
1684 switch(se->get_operator_type()){
1686 ret_se = new scalarexp_t(se->get_literal());
1687 ret_se->use_decorations_of(se);
1691 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1692 ret_se->use_decorations_of(se);
1695 case SE_IFACE_PARAM:
1696 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1697 ret_se->use_decorations_of(se);
1703 // Must be a GB REF ...
1704 // I'm assuming that the hfta gbvar table has the
1705 // same sequence of entries as the input query's gbvar table.
1706 // Else I'll need some kind of translation table.
1709 fprintf(stderr,"WARNING, a colref is not a gbver ref in rehome_hfta_se"
1710 " type is %s, line=%d, col=%d\n",
1711 se->get_data_type()->to_string().c_str(),
1712 se->lineno, se->charno
1716 ret_se = new scalarexp_t(se->get_colref());
1717 ret_se->use_decorations_of(se); // just inherit the gbref
1721 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1723 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1724 ret_se->use_decorations_of(se);
1728 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1729 r_se = rehome_fta_se(se->get_right_se(), aggr_map);
1731 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1732 ret_se->use_decorations_of(se);
1738 agr_id = se->get_aggr_ref();
1739 return (*aggr_map)[agr_id];
1743 agr_id = se->get_aggr_ref();
1744 if(agr_id >= 0) return (*aggr_map)[agr_id];
1746 operand_list = se->get_operands();
1747 for(p=0;p<operand_list.size();p++){
1748 l_se = rehome_fta_se(operand_list[p], aggr_map);
1750 new_operands.push_back(l_se);
1754 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1755 ret_se->use_decorations_of(se);
1760 printf("INTERNAL ERROR in rehome_fta_se: operator type %d\n",se->get_operator_type());
1769 // The predicates have already been
1770 // broken into conjunctions.
1771 // If any part of a conjunction is fta-forbidden,
1772 // it must be executed in the stream operator.
1773 // Else it is executed in the FTA.
1774 // A pre-analysis should determine whether this
1775 // predicate is fta-safe. This procedure will
1776 // assume that it is fta-forbidden and will
1777 // prepare it for execution in the stream.
1779 predicate_t *rehome_fta_pr(predicate_t *pr,
1780 map<int, scalarexp_t *> *aggr_map
1783 vector<literal_t *> llist;
1784 scalarexp_t *se_l, *se_r;
1785 predicate_t *ret_pr, *pr_l, *pr_r;
1786 vector<scalarexp_t *> op_list, new_op_list;
1789 switch(pr->get_operator_type()){
1791 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1792 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1796 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1797 se_r = rehome_fta_se(pr->get_right_se(), aggr_map);
1798 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1802 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1803 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1806 case PRED_BINARY_OP:
1807 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1808 pr_r = rehome_fta_pr(pr->get_right_pr(), aggr_map);
1809 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1813 op_list = pr->get_op_list();
1814 for(o=0;o<op_list.size();++o){
1815 se_l = rehome_fta_se(op_list[o], aggr_map);
1816 new_op_list.push_back(se_l);
1818 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1819 ret_pr->set_fcn_id(pr->get_fcn_id());
1823 fprintf(stderr,"INTERNAL ERROR in rehome_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1824 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1833 ////////////////////////////////////////////////////////////////////
1834 ///////////////// Create a STREAM table to represent the FTA output.
1836 table_def *create_attributes(string tname, vector<select_element *> &select_list){
1840 // Create a new STREAM schema for the output of the FTA.
1842 field_entry_list *fel = new field_entry_list();
1844 for(s=0;s<select_list.size();s++){
1845 scalarexp_t *sel_se = select_list[s]->se;
1846 data_type *dt = sel_se->get_data_type();
1848 // Grab the annotations of the field.
1849 // As of this writing, the only meaningful annotations
1850 // are whether or not the attribute is temporal.
1851 // There can be an annotation of constant_t, but
1852 // I'll ignore this, it feels like an unsafe assumption
1853 param_list *plist = new param_list();
1854 // if(dt->is_temporal()){
1855 vector<string> param_strings = dt->get_param_keys();
1857 for(p=0;p<param_strings.size();++p){
1858 string v = dt->get_param_val(param_strings[p]);
1860 plist->append(param_strings[p].c_str(),v.c_str());
1862 plist->append(param_strings[p].c_str());
1866 // char access_fcn_name[500];
1867 string colname = select_list[s]->name;
1868 // sprintf(access_fcn_name,"get_field_%s",colname.c_str());
1869 string access_fcn_name = "get_field_"+colname;
1870 field_entry *fe = new field_entry(
1871 dt->get_type_str(), colname, access_fcn_name, plist, ufcns
1874 fel->append_field(fe);
1877 table_def *fta_tbl = new table_def(
1878 tname.c_str(), NULL, NULL, fel, STREAM_SCHEMA
1885 //------------------------------------------------------------------
1886 // Textual representation of the query node.
1890 string spx_qpn::to_query_string(){
1892 string ret = "Select ";
1894 for(s=0;s<select_list.size();s++){
1896 ret += se_to_query_string(select_list[s]->se, NULL);
1897 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1901 ret += "From "+table_name->to_string()+"\n";
1903 if(where.size() > 0){
1906 for(w=0;w<where.size();w++){
1907 if(w>0) ret += " AND ";
1908 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
1919 string sgah_qpn::to_query_string(){
1921 string ret = "Select ";
1923 for(s=0;s<select_list.size();s++){
1925 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1926 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1930 ret += "From "+table_name->to_string()+"\n";
1932 if(where.size() > 0){
1935 for(w=0;w<where.size();w++){
1936 if(w>0) ret += " AND ";
1937 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1942 if(gb_tbl.size() > 0){
1945 if(gb_tbl.gb_patterns.size() <= 1 || gb_tbl.gb_entry_type.size()==0){
1946 for(g=0;g<gb_tbl.size();g++){
1947 if(g>0) ret += ", ";
1948 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1949 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
1951 ret += gb_tbl.get_name(g);
1955 for(g=0;g<gb_tbl.gb_entry_type.size();++g){
1956 if(g>0) ret += ", ";
1957 if(gb_tbl.gb_entry_type[g] == ""){
1958 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+
1959 " AS "+ gb_tbl.get_name(gb_pos);
1962 if(gb_tbl.gb_entry_type[g] == "CUBE" ||
1963 gb_tbl.gb_entry_type[g] == "ROLLUP"){
1964 ret += gb_tbl.gb_entry_type[g] + "(";
1966 for(gg=0;gg<gb_tbl.gb_entry_count[g];++gg){
1967 if(gg>0) ret += ", ";
1968 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+ " AS "+ gb_tbl.get_name(gb_pos);
1973 if(gb_tbl.gb_entry_type[g] == "GROUPING_SETS"){
1974 ret += gb_tbl.gb_entry_type[g] + "(";
1976 vector<vector<bool> > &local_components = gb_tbl.pattern_components[g];
1977 for(g1=0;g1<local_components.size();++g1){
1979 bool first_field = true;
1981 for(g2=0;g2<=gb_tbl.gb_entry_count[g];g2++){
1982 if(local_components[g1][g2]){
1983 if(!first_field) ret+=", ";
1984 else first_field = false;
1985 ret += gb_tbl.get_name(gb_pos+g2);
1991 gb_pos += gb_tbl.gb_entry_count[g];
1998 if(having.size() > 0){
2001 for(h=0;h<having.size();h++){
2002 if(h>0) ret += " AND ";
2003 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2012 string rsgah_qpn::to_query_string(){
2014 string ret = "Select ";
2016 for(s=0;s<select_list.size();s++){
2018 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2019 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2023 ret += "From "+table_name->to_string()+"\n";
2025 if(where.size() > 0){
2028 for(w=0;w<where.size();w++){
2029 if(w>0) ret += " AND ";
2030 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2035 if(gb_tbl.size() > 0){
2038 for(g=0;g<gb_tbl.size();g++){
2039 if(g>0) ret += ", ";
2040 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2041 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl)+" AS ";
2043 ret += gb_tbl.get_name(g);
2048 if(having.size() > 0){
2051 for(h=0;h<having.size();h++){
2052 if(h>0) ret += " AND ";
2053 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2058 if(closing_when.size() > 0){
2059 ret += "Closing_When ";
2061 for(h=0;h<closing_when.size();h++){
2062 if(h>0) ret += " AND ";
2063 ret += "(" + pred_to_query_str(closing_when[h]->pr,&aggr_tbl) + ")";
2072 string sgahcwcb_qpn::to_query_string(){
2074 string ret = "Select ";
2076 for(s=0;s<select_list.size();s++){
2078 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2079 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2083 ret += "From "+table_name->to_string()+"\n";
2085 if(where.size() > 0){
2088 for(w=0;w<where.size();w++){
2089 if(w>0) ret += " AND ";
2090 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2095 if(gb_tbl.size() > 0){
2098 for(g=0;g<gb_tbl.size();g++){
2099 if(g>0) ret += ", ";
2100 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2101 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
2103 ret += gb_tbl.get_name(g);
2108 if(sg_tbl.size() > 0){
2109 ret += "Supergroup ";
2111 bool first_elem = true;
2112 for(g=0;g<gb_tbl.size();g++){
2113 if(sg_tbl.count(g)){
2118 ret += gb_tbl.get_name(g);
2124 if(having.size() > 0){
2127 for(h=0;h<having.size();h++){
2128 if(h>0) ret += " AND ";
2129 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2135 if(cleanwhen.size() > 0){
2136 ret += "Cleaning_When ";
2138 for(h=0;h<cleanwhen.size();h++){
2139 if(h>0) ret += " AND ";
2140 ret += "(" + pred_to_query_str(cleanwhen[h]->pr,&aggr_tbl) + ")";
2145 if(cleanby.size() > 0){
2146 ret += "Cleaning_By ";
2148 for(h=0;h<cleanby.size();h++){
2149 if(h>0) ret += " AND ";
2150 ret += "(" + pred_to_query_str(cleanby[h]->pr,&aggr_tbl) + ")";
2158 string watch_tbl_qpn::to_query_string(){
2160 // ret += "DEFINE {\n";
2161 // ret += "\tfilename='"+filename+";\n";
2162 // ret += "\trefresh_interval="+to_string(refresh_interval)+";\n}\n";
2163 ret += "WATCHLIST FIELDS {\n";
2164 std::vector<field_entry *> fields = table_layout->get_fields();
2165 for(int f=0;f<fields.size();++f){
2166 ret += fields[f]->to_string()+"\n";
2173 string mrg_qpn::to_query_string(){
2175 string ret="Merge ";
2176 ret += mvars[0]->to_query_string() + " : " + mvars[1]->to_query_string();
2178 ret += " SLACK "+se_to_query_string(slack, NULL);
2183 for(t=0;t<fm.size();++t){
2184 if(t>0) ret += ", ";
2185 ret += fm[t]->to_string();
2192 string join_eq_hash_qpn::to_query_string(){
2194 string ret = "Select ";
2196 for(s=0;s<select_list.size();s++){
2198 ret += se_to_query_string(select_list[s]->se, NULL);
2199 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2203 // NOTE: assuming binary join.
2204 int properties = from[0]->get_property()+2*from[1]->get_property();
2207 ret += "INNER_JOIN ";
2210 ret += "LEFT_OUTER_JOIN ";
2213 ret += "RIGHT_OUTER_JOIN ";
2216 ret += "OUTER_JOIN ";
2222 for(f=0;f<from.size();++f){
2224 ret += from[f]->to_string();
2228 if(where.size() > 0){
2231 for(w=0;w<where.size();w++){
2232 if(w>0) ret += " AND ";
2233 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2241 string filter_join_qpn::to_query_string(){
2243 string ret = "Select ";
2245 for(s=0;s<select_list.size();s++){
2247 ret += se_to_query_string(select_list[s]->se, NULL);
2248 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2252 // NOTE: assuming binary join.
2253 ret += "FILTER_JOIN("+temporal_var->field+","+int_to_string(temporal_range)+") ";
2257 for(f=0;f<from.size();++f){
2259 ret += from[f]->to_string();
2263 if(where.size() > 0){
2266 for(w=0;w<where.size();w++){
2267 if(w>0) ret += " AND ";
2268 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2276 string watch_join_qpn::to_query_string(){
2278 string ret = "Select ";
2280 for(s=0;s<select_list.size();s++){
2282 ret += se_to_query_string(select_list[s]->se, NULL);
2283 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2287 // NOTE: assuming binary join.
2288 ret += "WATCHLIST_JOIN ";
2292 for(f=0;f<from.size();++f){
2294 ret += from[f]->to_string();
2298 if(where.size() > 0){
2301 for(w=0;w<where.size();w++){
2302 if(w>0) ret += " AND ";
2303 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2313 // -----------------------------------------------------------------
2314 // Query node subclass specific processing.
2317 vector<mrg_qpn *> mrg_qpn::split_sources(){
2318 vector<mrg_qpn *> ret;
2322 if(fm.size() != mvars.size()){
2323 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources. fm.size() = %lu, mvars.size() = %lu\n",fm.size(),mvars.size());
2327 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources, fm size is 1.\n");
2333 printf("spliting sources merge node, name = %s, %d sources.\n\t",node_name.c_str(), fm.size());
2334 for(ff=0;ff<fm.size();++ff){
2335 printf("%s ",fm[ff]->to_string().c_str());
2340 // Handle special cases.
2342 ret.push_back(this);
2347 mrg_qpn *new_mrg = (mrg_qpn *)this->make_copy("_cH1");
2348 new_mrg->fm.push_back(this->fm[0]);
2349 new_mrg->fm.push_back(this->fm[1]);
2350 new_mrg->mvars.push_back(this->mvars[0]);
2351 new_mrg->mvars.push_back(this->mvars[1]);
2353 this->fm.erase(this->fm.begin());
2354 this->mvars.erase(this->mvars.begin());
2355 string vname = fm[0]->get_var_name();
2356 this->fm[0] = new tablevar_t(new_mrg->node_name.c_str());
2357 this->fm[0]->set_range_var(vname);
2358 this->mvars[0]->set_field(table_layout->get_field_name(merge_fieldpos));
2359 this->mvars[0]->set_tablevar_ref(0);
2360 this->mvars[1]->set_tablevar_ref(1);
2362 ret.push_back(new_mrg);
2363 ret.push_back(this);
2366 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg->node_name.c_str(),this->node_name.c_str());
2367 for(i=0;i<new_mrg->fm.size();++i)
2368 printf("\tsource %s var %d (%s, %s) \n",new_mrg->node_name.c_str(),i,new_mrg->fm[i]->to_string().c_str(), new_mrg->mvars[i]->to_string().c_str());
2369 for(i=0;i<this->fm.size();++i)
2370 printf("\tsource %s var %d (%s, %s) \n",this->node_name.c_str(),i,this->fm[i]->to_string().c_str(), this->mvars[i]->to_string().c_str());
2377 // divide up the sources between two children.
2378 // Then, recurse on the children.
2380 mrg_qpn *new_mrg1 = (mrg_qpn *)this->make_copy("_cH1");
2381 mrg_qpn *new_mrg2 = (mrg_qpn *)this->make_copy("_cH2");
2382 for(i=0;i<this->fm.size()/2;++i){
2383 new_mrg1->fm.push_back(this->fm[i]);
2384 new_mrg1->mvars.push_back(this->mvars[i]);
2385 //printf("Pushing %d (%s, %s) to new_mrg1\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2387 for(;i<this->fm.size();++i){
2388 new_mrg2->fm.push_back(this->fm[i]);
2389 new_mrg2->mvars.push_back(this->mvars[i]);
2390 //printf("Pushing %d (%s, %s) to new_mrg2\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2392 for(i=0;i<new_mrg1->mvars.size();++i)
2393 new_mrg1->mvars[i]->set_tablevar_ref(i);
2394 for(i=0;i<new_mrg2->mvars.size();++i)
2395 new_mrg2->mvars[i]->set_tablevar_ref(i);
2397 // Children created, make this merge them.
2401 tablevar_t *tmp_tblvar = new tablevar_t(new_mrg1->node_name.c_str());
2402 tmp_tblvar->set_range_var("_mrg_var_1");
2403 fm.push_back(tmp_tblvar);
2404 colref_t *tmp_cref = new colref_t("_mrg_var_1",table_layout->get_field_name(merge_fieldpos).c_str());
2405 tmp_cref->set_tablevar_ref(0);
2406 mvars.push_back(tmp_cref);
2408 tmp_tblvar = new tablevar_t(new_mrg2->node_name.c_str());
2409 tmp_tblvar->set_range_var("_mrg_var_2");
2410 fm.push_back(tmp_tblvar);
2411 tmp_cref = new colref_t("_mrg_var_2",table_layout->get_field_name(merge_fieldpos).c_str());
2412 tmp_cref->set_tablevar_ref(1);
2413 mvars.push_back(tmp_cref);
2417 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg1->node_name.c_str(),new_mrg2->node_name.c_str());
2418 for(i=0;i<new_mrg1->fm.size();++i)
2419 printf("\tsource %s var %d (%s, %s) \n",new_mrg1->node_name.c_str(),i,new_mrg1->fm[i]->to_string().c_str(), new_mrg1->mvars[i]->to_string().c_str());
2420 for(i=0;i<new_mrg2->fm.size();++i)
2421 printf("\tsource %s var %d (%s, %s) \n",new_mrg2->node_name.c_str(),i,new_mrg2->fm[i]->to_string().c_str(), new_mrg2->mvars[i]->to_string().c_str());
2424 // Recurse and put them together
2425 vector<mrg_qpn *> st1 = new_mrg1->split_sources();
2426 ret.insert(ret.end(), st1.begin(), st1.end());
2427 vector<mrg_qpn *> st2 = new_mrg2->split_sources();
2428 ret.insert(ret.end(), st2.begin(), st2.end());
2430 ret.push_back(this);
2438 //////// Split helper function : resolve interfaces
2440 vector<pair<string,string> > get_ifaces(tablevar_t *table, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2441 vector<pair<string,string> > basic_ifaces;
2443 if(table->get_ifq()){
2444 basic_ifaces= ifdb->eval(table->get_interface(),ierr);
2446 fprintf(stderr,"ERROR, Interface set %s not found.\n",table->get_interface().c_str());
2449 fprintf(stderr,"ERROR, interface definition file didn't parse.\n");
2452 basic_ifaces.push_back(make_pair(table->get_machine(), table->get_interface()));
2455 if(n_virtual_ifaces == 1)
2456 return basic_ifaces;
2458 int stride = n_virtual_ifaces / hfta_parallelism;
2460 vector<pair<string,string> > ifaces;
2462 for(i=0;i<basic_ifaces.size();++i){
2463 string mach = basic_ifaces[i].first;
2464 string iface = basic_ifaces[i].second;
2465 for(s=hfta_idx*stride;s<(hfta_idx+1)*stride;++s){
2466 ifaces.push_back(pair<string, string>(mach,iface+"X"+int_to_string(2*s)));
2474 ///////// Split helper function : compute slack in a generated
2477 void mrg_qpn::resolve_slack(scalarexp_t *t_se, string fname, vector<pair<string, string> > &sources, ifq_t *ifdb, gb_table *gbt){
2481 // Find slack divisor, if any.
2483 long long int slack_divisor = find_temporal_divisor(t_se,gbt, fnm);
2484 if(slack_divisor <= 0){
2489 // find max slack in the iface spec
2490 long long int max_slacker = 0, this_slacker;
2491 string rname = "Slack_"+fnm;
2492 for(s=0;s<sources.size();++s){
2493 string src_machine = sources[s].first;
2494 string src_iface = sources[s].second;
2495 vector<string> slack_vec = ifdb->get_iface_vals(src_machine, src_iface,rname,e,es);
2496 for(v=0;v<slack_vec.size();++v){
2497 if(sscanf(slack_vec[v].c_str(),"%qd",&this_slacker)){
2498 if(this_slacker > max_slacker)
2499 max_slacker = this_slacker;
2504 if(max_slacker <= 0){
2510 long long int the_slack=(long long int)(ceil(((double)max_slacker)/((double)slack_divisor)));
2512 sprintf(tmps,"%lld",the_slack);
2513 literal_t *slack_lit = new literal_t(tmps, LITERAL_LONGINT);
2514 slack = new scalarexp_t(slack_lit);
2518 //------------------------------------------------------------------
2519 // split a node to extract LFTA components.
2521 vector<qp_node *> watch_tbl_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2522 // nothing to do, nothing to split, return copy of self.
2526 vector<qp_node *> ret_vec;
2528 ret_vec.push_back(this);
2534 vector<qp_node *> mrg_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2535 // nothing to do, nothing to split, return copy of self.
2539 vector<qp_node *> ret_vec;
2541 ret_vec.push_back(this);
2546 vector<qp_node *> filter_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2547 vector<qp_node *> ret_vec;
2549 // First check if the query can be pushed to the FTA.
2552 for(s=0;s<select_list.size();s++){
2553 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2556 for(p=0;p<where.size();p++){
2557 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2561 fprintf(stderr,"ERROR, filter join %s is fta-unsafe.\n",node_name.c_str());
2565 // Can it be done in a single lfta?
2566 // Get the set of interfaces it accesses.
2569 vector<string> sel_names;
2570 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2571 if (ifaces.empty()) {
2572 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2576 if(ifaces.size() == 1){
2577 // Single interface, no need to merge.
2579 ret_vec.push_back(this);
2581 for(i=0;i<from.size();i++){
2582 from[i]->set_machine(ifaces[0].first);
2583 from[i]->set_interface(ifaces[0].second);
2584 from[i]->set_ifq(false);
2588 // Multiple interfaces, generate the interface-specific queries plus
2592 vector<string> sel_names;
2593 for(si=0;si<ifaces.size();++si){
2594 filter_join_qpn *fta_node = new filter_join_qpn();
2597 if(ifaces.size()==1)
2598 fta_node->set_node_name( node_name );
2600 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2602 fta_node->set_node_name(new_name);
2604 sel_names.push_back(fta_node->get_node_name());
2608 for(f=0;f<from.size();f++){
2609 fta_node->from.push_back(from[f]->duplicate());
2610 fta_node->from[f]->set_machine(ifaces[si].first);
2611 fta_node->from[f]->set_interface(ifaces[si].second);
2612 fta_node->from[f]->set_ifq(false);
2614 fta_node->temporal_var = temporal_var;
2615 fta_node->temporal_range = temporal_range;
2617 fta_node->use_bloom = use_bloom;
2619 for(s=0;s<select_list.size();s++){
2620 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2623 for(p=0;p<shared_pred.size();p++){
2624 predicate_t *new_pr = dup_pr(shared_pred[p]->pr, NULL);
2625 cnf_elem *new_cnf = new cnf_elem(new_pr);
2626 analyze_cnf(new_cnf);
2627 fta_node->shared_pred.push_back(new_cnf);
2628 fta_node->where.push_back(new_cnf);
2630 for(p=0;p<pred_t0.size();p++){
2631 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2632 cnf_elem *new_cnf = new cnf_elem(new_pr);
2633 analyze_cnf(new_cnf);
2634 fta_node->pred_t0.push_back(new_cnf);
2635 fta_node->where.push_back(new_cnf);
2637 for(p=0;p<pred_t1.size();p++){
2638 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2639 cnf_elem *new_cnf = new cnf_elem(new_pr);
2640 analyze_cnf(new_cnf);
2641 fta_node->pred_t1.push_back(new_cnf);
2642 fta_node->where.push_back(new_cnf);
2644 for(p=0;p<hash_eq.size();p++){
2645 predicate_t *new_pr = dup_pr(hash_eq[p]->pr, NULL);
2646 cnf_elem *new_cnf = new cnf_elem(new_pr);
2647 analyze_cnf(new_cnf);
2648 fta_node->hash_eq.push_back(new_cnf);
2649 fta_node->where.push_back(new_cnf);
2651 for(p=0;p<postfilter.size();p++){
2652 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2653 cnf_elem *new_cnf = new cnf_elem(new_pr);
2654 analyze_cnf(new_cnf);
2655 fta_node->postfilter.push_back(new_cnf);
2656 fta_node->where.push_back(new_cnf);
2659 // Xfer all of the parameters.
2660 // Use existing handle annotations.
2661 vector<string> param_names = param_tbl->get_param_names();
2663 for(pi=0;pi<param_names.size();pi++){
2664 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2665 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2666 param_tbl->handle_access(param_names[pi]));
2668 fta_node->definitions = definitions;
2669 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2670 this->error_code = 3;
2674 ret_vec.push_back(fta_node);
2677 mrg_qpn *mrg_node = new mrg_qpn((filter_join_qpn *)ret_vec[0],
2678 node_name, sel_names,ifaces, ifdb);
2679 ret_vec.push_back(mrg_node);
2690 vector<qp_node *> watch_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2691 vector<qp_node *> ret_vec;
2693 // First check if the query can be pushed to the FTA.
2696 for(s=0;s<select_list.size();s++){
2697 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2700 for(p=0;p<where.size();p++){
2701 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2705 fprintf(stderr,"ERROR, watchlist join %s is fta-unsafe.\n",node_name.c_str());
2709 // Can it be done in a single lfta?
2710 // Get the set of interfaces it accesses.
2713 vector<string> sel_names;
2714 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2715 if (ifaces.empty()) {
2716 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2720 if(ifaces.size() == 1){
2721 // Single interface, no need to merge.
2723 ret_vec.push_back(this);
2725 // Treat the range vars a bit differently, the 2nd is reading from a _local_ watchlist.
2726 from[0]->set_machine(ifaces[0].first);
2727 from[0]->set_interface(ifaces[0].second);
2728 from[0]->set_ifq(false);
2730 from[1]->set_machine(ifaces[0].first);
2731 from[1]->set_interface("_local_");
2732 from[1]->set_ifq(false);
2736 // Multiple interfaces, generate the interface-specific queries plus
2740 vector<string> sel_names;
2741 for(si=0;si<ifaces.size();++si){
2742 watch_join_qpn *fta_node = new watch_join_qpn();
2745 if(ifaces.size()==1)
2746 fta_node->set_node_name( node_name );
2748 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2750 fta_node->set_node_name(new_name);
2752 sel_names.push_back(fta_node->get_node_name());
2756 for(f=0;f<from.size();f++){
2757 fta_node->from.push_back(from[f]->duplicate());
2758 fta_node->from[f]->set_machine(ifaces[si].first);
2760 fta_node->from[f]->set_interface(ifaces[si].second);
2762 fta_node->from[f]->set_interface("_local_");
2763 fta_node->from[f]->set_ifq(false);
2766 for(s=0;s<select_list.size();s++){
2767 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2770 for(p=0;p<pred_t0.size();p++){
2771 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2772 cnf_elem *new_cnf = new cnf_elem(new_pr);
2773 analyze_cnf(new_cnf);
2774 fta_node->pred_t0.push_back(new_cnf);
2775 fta_node->where.push_back(new_cnf);
2777 for(p=0;p<pred_t1.size();p++){
2778 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2779 cnf_elem *new_cnf = new cnf_elem(new_pr);
2780 analyze_cnf(new_cnf);
2781 fta_node->pred_t1.push_back(new_cnf);
2782 fta_node->where.push_back(new_cnf);
2784 for(p=0;p<key_flds.size();p++){ // we've checked that all keys are covered
2785 string k = key_flds[p];
2786 predicate_t *new_pr = dup_pr(hash_eq[k]->pr, NULL);
2787 cnf_elem *new_cnf = new cnf_elem(new_pr);
2788 analyze_cnf(new_cnf);
2789 fta_node->hash_eq[k] = new_cnf;
2790 fta_node->where.push_back(new_cnf);
2792 for(p=0;p<join_filter.size();p++){
2793 predicate_t *new_pr = dup_pr(join_filter[p]->pr, NULL);
2794 cnf_elem *new_cnf = new cnf_elem(new_pr);
2795 analyze_cnf(new_cnf);
2796 fta_node->postfilter.push_back(new_cnf);
2797 fta_node->where.push_back(new_cnf);
2799 for(p=0;p<postfilter.size();p++){
2800 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2801 cnf_elem *new_cnf = new cnf_elem(new_pr);
2802 analyze_cnf(new_cnf);
2803 fta_node->postfilter.push_back(new_cnf);
2804 fta_node->where.push_back(new_cnf);
2806 fta_node->key_flds = key_flds;
2808 // Xfer all of the parameters.
2809 // Use existing handle annotations.
2810 vector<string> param_names = param_tbl->get_param_names();
2812 for(pi=0;pi<param_names.size();pi++){
2813 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2814 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2815 param_tbl->handle_access(param_names[pi]));
2817 fta_node->definitions = definitions;
2818 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2819 this->error_code = 3;
2823 ret_vec.push_back(fta_node);
2826 mrg_qpn *mrg_node = new mrg_qpn((watch_join_qpn *)ret_vec[0],
2827 node_name, sel_names,ifaces, ifdb);
2828 ret_vec.push_back(mrg_node);
2835 // Use to search for unresolved interface param refs in an hfta.
2837 int spx_qpn::count_ifp_refs(set<string> &ifpnames){
2840 for(i=0;i<select_list.size();++i)
2841 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2842 for(i=0;i<where.size();++i)
2843 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2847 int sgah_qpn::count_ifp_refs(set<string> &ifpnames){
2850 for(i=0;i<select_list.size();++i)
2851 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2852 for(i=0;i<where.size();++i)
2853 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2854 for(i=0;i<having.size();++i)
2855 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2856 for(i=0;i<aggr_tbl.size();++i){
2857 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2858 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2860 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2861 for(j=0;j<opl.size();++j)
2862 ret += count_se_ifp_refs(opl[j],ifpnames);
2865 for(i=0;i<gb_tbl.size();++i){
2866 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2872 int rsgah_qpn::count_ifp_refs(set<string> &ifpnames){
2875 for(i=0;i<select_list.size();++i)
2876 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2877 for(i=0;i<where.size();++i)
2878 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2879 for(i=0;i<having.size();++i)
2880 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2881 for(i=0;i<closing_when.size();++i)
2882 ret += count_pr_ifp_refs(closing_when[i]->pr,ifpnames);
2883 for(i=0;i<aggr_tbl.size();++i){
2884 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2885 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2887 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2888 for(j=0;j<opl.size();++j)
2889 ret += count_se_ifp_refs(opl[j],ifpnames);
2892 for(i=0;i<gb_tbl.size();++i){
2893 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2898 int watch_tbl_qpn::count_ifp_refs(set<string> &ifpnames){
2902 int mrg_qpn::count_ifp_refs(set<string> &ifpnames){
2906 int join_eq_hash_qpn::count_ifp_refs(set<string> &ifpnames){
2909 for(i=0;i<select_list.size();++i)
2910 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2911 for(i=0;i<prefilter[0].size();++i)
2912 ret += count_pr_ifp_refs(prefilter[0][i]->pr,ifpnames);
2913 for(i=0;i<prefilter[1].size();++i)
2914 ret += count_pr_ifp_refs(prefilter[1][i]->pr,ifpnames);
2915 for(i=0;i<temporal_eq.size();++i)
2916 ret += count_pr_ifp_refs(temporal_eq[i]->pr,ifpnames);
2917 for(i=0;i<hash_eq.size();++i)
2918 ret += count_pr_ifp_refs(hash_eq[i]->pr,ifpnames);
2919 for(i=0;i<postfilter.size();++i)
2920 ret += count_pr_ifp_refs(postfilter[i]->pr,ifpnames);
2924 int filter_join_qpn::count_ifp_refs(set<string> &ifpnames){
2927 for(i=0;i<select_list.size();++i)
2928 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2929 for(i=0;i<where.size();++i)
2930 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2934 int watch_join_qpn::count_ifp_refs(set<string> &ifpnames){
2937 for(i=0;i<select_list.size();++i)
2938 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2939 for(i=0;i<where.size();++i)
2940 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2946 // Resolve interface params to string literals
2947 int filter_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2950 string ifname = from[0]->get_interface();
2951 string ifmach = from[0]->get_machine();
2952 for(i=0;i<select_list.size();++i)
2953 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2955 for(i=0;i<where.size();++i)
2956 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2961 int watch_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2964 string ifname = from[0]->get_interface();
2965 string ifmach = from[0]->get_machine();
2966 for(i=0;i<select_list.size();++i)
2967 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2969 for(i=0;i<where.size();++i)
2970 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2976 int spx_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2979 string ifname = table_name->get_interface();
2980 string ifmach = table_name->get_machine();
2981 for(i=0;i<select_list.size();++i)
2982 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2984 for(i=0;i<where.size();++i)
2985 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2990 int sgah_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2993 string ifname = table_name->get_interface();
2994 string ifmach = table_name->get_machine();
2996 //printf("Select list has %d elements\n",select_list.size());
2997 for(i=0;i<select_list.size();++i){
2998 //printf("\tresolving elemet %d\n",i);
2999 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) ){
3003 for(i=0;i<where.size();++i){
3004 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err) )
3007 for(i=0;i<having.size();++i){
3008 if( resolve_pr_ifp_refs(having[i]->pr,ifmach, ifname, ifdb, err) )
3011 //printf("aggr list has %d elements\n",select_list.size());
3012 for(i=0;i<aggr_tbl.size();++i){
3013 //printf("\tresolving elemet %d\n",i);
3014 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
3015 //printf("\t\t\tbuiltin\n");
3016 if( resolve_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifmach, ifname, ifdb, err) )
3019 //printf("\t\t\tudaf\n");
3020 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
3021 for(j=0;j<opl.size();++j)
3022 if( resolve_se_ifp_refs(opl[j],ifmach, ifname, ifdb, err) )
3026 for(i=0;i<gb_tbl.size();++i){
3027 if( resolve_se_ifp_refs(gb_tbl.get_def(i), ifmach, ifname, ifdb, err) )
3036 SPLITTING A SELECTION_PROJECTION OPERATOR
3038 An SPX node may reference:
3039 literals, parameters, colrefs, functions, operators
3040 An SPX node may not reference:
3041 group-by variables, aggregates
3043 An SPX node contains
3044 selection list of SEs
3045 where list of CNF predicates
3048 If each selection SE and each where predicate is fta-safe
3049 execute entire operator as an LFTA.
3051 for each predicate in the where clause
3052 if it is fta safe, execute it in the lfta
3053 else, split each SE in the predicate, evaluate the
3054 top-level SEs in the hfta and eval the predicate on that.
3055 For each SE in the se list
3056 Split the SE, eval the high level part, push onto hfta
3060 A SE represents a value which must be computed. The LFTA
3061 must provide sub-values from which the HFTA can compute the
3063 1) the SE is fta-safe
3064 Create an entry in the selection list of the LFTA which is
3065 the SE itself. Reference this LFTA selection list entry in
3066 the HFTA (via a field name assigned to the lfta selection
3068 2) The SE is not fta-safe
3069 Determine the boundary between the fta-safe and the fta-unsafe
3070 portions of the SE. The result is a rooted tree (which is
3071 evaluated at the HFTA) which references sub-SEs (which are
3072 evaluated at the LFTA). Each of the sub-SEs is placed on
3073 the selection list of the LFTA and assigned field names,
3074 the top part is evaluated at the HFTA and references the
3075 sub-SEs through their assigned field names.
3076 The only SEs on the LFTA selection list are those created by
3077 the above mechanism. The collection of assigned field names becomes
3078 the schema of the LFTA.
3080 TODO: insert tablevar names into the colrefs.
3084 vector<qp_node *> spx_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3087 vector<qp_node *> ret_vec;
3089 // If the node reads from a stream, don't split.
3090 // int t = Schema->get_table_ref(table_name->get_schema_name());
3091 int t = table_name->get_schema_ref();
3092 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3094 ret_vec.push_back(this);
3099 // Get the set of interfaces it accesses.
3102 vector<string> sel_names;
3103 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3104 if (ifaces.empty()) {
3105 fprintf(stderr,"INTERNAL ERROR in spx_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3110 // The FTA node, it is always returned.
3112 spx_qpn *fta_node = new spx_qpn();
3113 fta_node->table_name = table_name;
3115 // for colname imputation
3116 // vector<string> fta_flds, stream_flds;
3119 // First check if the query can be pushed to the FTA.
3122 for(s=0;s<select_list.size();s++){
3123 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
3126 for(p=0;p<where.size();p++){
3127 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
3131 ////////////////////////////////////////////////////////////
3132 // The query can be executed entirely in the FTA.
3135 for(si=0;si<ifaces.size();++si){
3136 fta_node = new spx_qpn();
3139 if(ifaces.size()==1)
3140 fta_node->set_node_name( node_name );
3142 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3144 fta_node->set_node_name(new_name);
3146 sel_names.push_back(fta_node->get_node_name());
3149 fta_node->table_name = table_name->duplicate();
3150 fta_node->table_name->set_machine(ifaces[si].first);
3151 fta_node->table_name->set_interface(ifaces[si].second);
3152 fta_node->table_name->set_ifq(false);
3154 for(s=0;s<select_list.size();s++){
3155 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
3157 for(p=0;p<where.size();p++){
3158 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
3159 cnf_elem *new_cnf = new cnf_elem(new_pr);
3160 analyze_cnf(new_cnf);
3162 fta_node->where.push_back(new_cnf);
3165 // Xfer all of the parameters.
3166 // Use existing handle annotations.
3167 vector<string> param_names = param_tbl->get_param_names();
3169 for(pi=0;pi<param_names.size();pi++){
3170 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3171 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3172 param_tbl->handle_access(param_names[pi]));
3174 fta_node->definitions = definitions;
3175 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3176 this->error_code = 3;
3180 ret_vec.push_back(fta_node);
3183 if(ifaces.size() > 1){
3184 spx_qpn *tmp_spx = (spx_qpn *)(ret_vec[0]);
3185 mrg_qpn *mrg_node = new mrg_qpn(tmp_spx,
3186 node_name, sel_names,ifaces, ifdb);
3188 Do not split sources until we are done with optimizations
3189 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3190 for(i=0;i<split_merge.size();++i){
3191 ret_vec.push_back(split_merge[i]);
3193 hfta_returned = split_merge.size();
3195 ret_vec.push_back(mrg_node);
3200 // printf("OK as FTA.\n");
3201 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3206 ////////////////////////////////////////////////////
3207 // The fta must be split. Create a stream node.
3208 // NOTE : I am counting on the single
3209 // table in the from list. (Joins handled in a different operator).
3213 spx_qpn *stream_node = new spx_qpn();
3214 stream_node->set_node_name( node_name );
3215 // Create the tablevar in the stream's FROM clause.
3216 // set the schema name to the name of the LFTA,
3217 // and use the same tablevar name.
3218 stream_node->table_name = new tablevar_t(
3219 ("_fta_"+node_name).c_str()
3221 stream_node->table_name->set_range_var(table_name->get_var_name());
3224 fta_node->set_node_name( "_fta_"+node_name );
3226 // table var names of fta, stream.
3227 string fta_var = fta_node->table_name->get_var_name();
3228 string stream_var = stream_node->table_name->get_var_name();
3230 // Set up select list vector
3231 vector< vector<select_element *> *> select_vec;
3232 select_vec.push_back(&(fta_node->select_list)); // only one child
3235 // Split the select list into its FTA and stream parts.
3236 // If any part of the SE is fta-unsafe, it will return
3237 // a SE to execute at the stream ref'ing SE's evaluated
3238 // at the fta (which are put on the FTA's select list as a side effect).
3239 // If the SE is fta-safe, put it on the fta select list, make
3240 // a ref to it and put the ref on the stream select list.
3241 for(s=0;s<select_list.size();s++){
3242 bool fta_forbidden = false;
3243 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3244 // scalarexp_t *root_se = split_fta_se(
3245 // select_list[s]->se,fta_forbidden, fta_node->select_list, Ext_fcns
3247 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
3248 fta_forbidden, se_src, select_vec, Ext_fcns
3250 // if(fta_forbidden){
3251 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3252 stream_node->select_list.push_back(
3253 new select_element(root_se, select_list[s]->name)
3256 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,root_se,0);
3257 stream_node->select_list.push_back(
3258 new select_element(new_se, select_list[s]->name)
3264 // The WHERE clause has already been split into a set of clauses
3265 // that are ANDED together. For each clause, check if its FTA-safe.
3266 // If not, split its SE's into fta-safe and stream-executing parts,
3267 // then put a clause which ref's the SEs into the stream.
3268 // Else put it into the LFTA.
3269 predicate_t *pr_root;
3271 for(p=0;p<where.size();p++){
3272 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) ){
3273 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3274 // pr_root = split_fta_pr( where[p]->pr, fta_node->select_list, Ext_fcns);
3275 fta_forbidden = true;
3277 pr_root = dup_pr(where[p]->pr, NULL);
3278 fta_forbidden = false;
3280 cnf_elem *cnf_root = new cnf_elem(pr_root);
3281 analyze_cnf(cnf_root);
3284 stream_node->where.push_back(cnf_root);
3286 fta_node->where.push_back(cnf_root);
3292 // Divide the parameters among the stream, FTA.
3293 // Currently : assume that the stream receives all parameters
3294 // and parameter updates, incorporates them, then passes
3295 // all of the parameters to the FTA.
3296 // This will need to change (tables, fta-unsafe types. etc.)
3298 // I will pass on the use_handle_access marking, even
3299 // though the fcn call that requires handle access might
3300 // exist in only one of the parts of the query.
3301 // Parameter manipulation and handle access determination will
3302 // need to be revisited anyway.
3303 vector<string> param_names = param_tbl->get_param_names();
3305 for(pi=0;pi<param_names.size();pi++){
3306 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3307 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3308 param_tbl->handle_access(param_names[pi]));
3309 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3310 param_tbl->handle_access(param_names[pi]));
3313 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3314 stream_node->definitions = definitions;
3316 // Now split by interfaces
3317 if(ifaces.size() > 1){
3318 for(si=0;si<ifaces.size();++si){
3319 spx_qpn *subq_node = new spx_qpn();
3321 // Name the subquery
3322 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3324 subq_node->set_node_name( new_name) ;
3325 sel_names.push_back(subq_node->get_node_name());
3328 subq_node->table_name = fta_node->table_name->duplicate();
3329 subq_node->table_name->set_machine(ifaces[si].first);
3330 subq_node->table_name->set_interface(ifaces[si].second);
3331 subq_node->table_name->set_ifq(false);
3333 for(s=0;s<fta_node->select_list.size();s++){
3334 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3336 for(p=0;p<fta_node->where.size();p++){
3337 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3338 cnf_elem *new_cnf = new cnf_elem(new_pr);
3339 analyze_cnf(new_cnf);
3341 subq_node->where.push_back(new_cnf);
3343 // Xfer all of the parameters.
3344 // Use existing handle annotations.
3345 vector<string> param_names = param_tbl->get_param_names();
3347 for(pi=0;pi<param_names.size();pi++){
3348 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3349 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3350 param_tbl->handle_access(param_names[pi]));
3352 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3353 this->error_code = 3;
3356 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3358 ret_vec.push_back(subq_node);
3361 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3362 fta_node->node_name, sel_names, ifaces, ifdb);
3364 Do not split sources until we are done with optimizations
3365 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3366 for(i=0;i<split_merge.size();++i){
3367 ret_vec.push_back(split_merge[i]);
3370 ret_vec.push_back(mrg_node);
3371 ret_vec.push_back(stream_node);
3372 hfta_returned = 1/*split_merge.size()*/ + 1;
3375 fta_node->table_name->set_machine(ifaces[0].first);
3376 fta_node->table_name->set_interface(ifaces[0].second);
3377 fta_node->table_name->set_ifq(false);
3378 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3379 this->error_code = 3;
3382 ret_vec.push_back(fta_node);
3383 ret_vec.push_back(stream_node);
3387 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3388 // printf("Stream node is:\n%s\n\n",stream_node->to_query_string().c_str() );
3396 Splitting a aggregation+sampling operator.
3397 right now, return an error if any splitting is required.
3400 vector<qp_node *> sgahcwcb_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3404 vector<qp_node *> ret_vec;
3405 int s, p, g, a, o, i;
3408 vector<string> fta_flds, stream_flds;
3410 // If the node reads from a stream, don't split.
3411 // int t = Schema->get_table_ref(table_name->get_schema_name());
3412 int t = table_name->get_schema_ref();
3413 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3414 ret_vec.push_back(this);
3418 fprintf(stderr,"ERROR : cannot split a sampling operator (not yet implemented).\n");
3428 Splitting a running aggregation operator.
3429 The code is almost identical to that of the the sgah operator
3431 - there is no lfta-only option.
3432 - the stream node is rsagh_qpn (lfta is sgah or spx)
3433 - need to handle the closing when (similar to having)
3436 vector<qp_node *> rsgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3440 vector<qp_node *> ret_vec;
3441 int s, p, g, a, o, i;
3444 vector<string> fta_flds, stream_flds;
3446 // If the node reads from a stream, don't split.
3447 // int t = Schema->get_table_ref(table_name->get_schema_name());
3448 int t = table_name->get_schema_ref();
3449 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3450 ret_vec.push_back(this);
3454 // Get the set of interfaces it accesses.
3456 vector<string> sel_names;
3457 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3458 if (ifaces.empty()) {
3459 fprintf(stderr,"INTERNAL ERROR in rsgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3466 //////////////////////////////////////////////////////////////
3467 /// Split into lfta, hfta.
3469 // A rsgah node must always be split,
3470 // if for no other reason than to complete the
3471 // partial aggregation.
3473 // First, determine if the query can be spit into aggr/aggr,
3474 // or if it must be selection/aggr.
3475 // Splitting into selection/aggr is allowed only
3476 // if select_lfta is set.
3479 bool select_allowed = definitions.count("select_lfta")>0;
3480 bool select_rqd = false;
3482 set<int> unsafe_gbvars; // for processing where clause
3483 for(g=0;g<gb_tbl.size();g++){
3484 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3485 if(!select_allowed){
3486 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3487 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3489 this->error_code = 1;
3490 this->err_str = tmpstr;
3494 unsafe_gbvars.insert(g);
3499 // Verify that the SEs in the aggregate definitions are fta-safe
3500 for(a=0;a<aggr_tbl.size();++a){
3501 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3502 if(ase != NULL){ // COUNT(*) does not have a SE.
3503 if(!select_allowed){
3504 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3505 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3506 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3508 this->error_code = 1;
3509 this->err_str = tmpstr;
3518 // Verify that all of the ref'd UDAFs can be split.
3520 for(a=0;a<aggr_tbl.size();++a){
3521 if(! aggr_tbl.is_builtin(a)){
3522 int afcn = aggr_tbl.get_fcn_id(a);
3523 int super_id = Ext_fcns->get_superaggr_id(afcn);
3524 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3525 if(super_id < 0 || sub_id < 0){
3526 if(!select_allowed){
3527 this->err_str += "ERROR in rsgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3528 this->error_code = 1;
3537 for(p=0;p<where.size();p++){
3538 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3539 if(!select_allowed){
3540 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3541 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3543 this->error_code = 1;
3544 this->err_str = tmpstr;
3555 /////////////////////////////////////////////////////
3556 // Split into aggr/aggr.
3562 sgah_qpn *fta_node = new sgah_qpn();
3563 fta_node->table_name = table_name;
3564 fta_node->set_node_name( "_fta_"+node_name );
3565 fta_node->table_name->set_range_var(table_name->get_var_name());
3568 rsgah_qpn *stream_node = new rsgah_qpn();
3569 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3570 stream_node->set_node_name( node_name );
3571 stream_node->table_name->set_range_var(table_name->get_var_name());
3573 // First, process the group-by variables.
3574 // The fta must supply the values of all the gbvars.
3575 // If a gb is computed, the computation must be
3576 // performed at the FTA, so the SE must be FTA-safe.
3577 // Nice side effect : the gbvar table contains
3578 // matching entries for the original query, the lfta query,
3579 // and the hfta query. So gbrefs in the new queries are set
3580 // correctly just by inheriting the gbrefs from the old query.
3581 // If this property changed, I'll need translation tables.
3584 for(g=0;g<gb_tbl.size();g++){
3585 // Insert the gbvar into the lfta.
3586 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
3587 fta_node->gb_tbl.add_gb_var(
3588 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
3591 // Insert a ref to the value of the gbvar into the lfta select list.
3592 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
3593 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
3594 gbvar_fta->set_gb_ref(g);
3595 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
3596 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
3598 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
3599 gbvar_stream->set_gb_ref(-1); // used as GBvar def
3600 stream_node->gb_tbl.add_gb_var(
3601 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
3606 // SEs in the aggregate definitions.
3607 // They are all safe, so split them up for later processing.
3608 map<int, scalarexp_t *> hfta_aggr_se;
3609 for(a=0;a<aggr_tbl.size();++a){
3610 split_fta_aggr( &(aggr_tbl), a,
3611 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
3612 fta_node->select_list,
3619 // Next, the select list.
3621 for(s=0;s<select_list.size();s++){
3622 bool fta_forbidden = false;
3623 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3624 stream_node->select_list.push_back(
3625 new select_element(root_se, select_list[s]->name));
3630 // All the predicates in the where clause must execute
3633 for(p=0;p<where.size();p++){
3634 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
3635 cnf_elem *new_cnf = new cnf_elem(new_pr);
3636 analyze_cnf(new_cnf);
3638 fta_node->where.push_back(new_cnf);
3641 // All of the predicates in the having clause must
3642 // execute in the stream node.
3644 for(p=0;p<having.size();p++){
3645 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3646 cnf_elem *cnf_root = new cnf_elem(pr_root);
3647 analyze_cnf(cnf_root);
3649 stream_node->having.push_back(cnf_root);
3652 // All of the predicates in the closing when clause must
3653 // execute in the stream node.
3655 for(p=0;p<closing_when.size();p++){
3656 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3657 cnf_elem *cnf_root = new cnf_elem(pr_root);
3658 analyze_cnf(cnf_root);
3660 stream_node->closing_when.push_back(cnf_root);
3664 // Divide the parameters among the stream, FTA.
3665 // Currently : assume that the stream receives all parameters
3666 // and parameter updates, incorporates them, then passes
3667 // all of the parameters to the FTA.
3668 // This will need to change (tables, fta-unsafe types. etc.)
3670 // I will pass on the use_handle_access marking, even
3671 // though the fcn call that requires handle access might
3672 // exist in only one of the parts of the query.
3673 // Parameter manipulation and handle access determination will
3674 // need to be revisited anyway.
3675 vector<string> param_names = param_tbl->get_param_names();
3677 for(pi=0;pi<param_names.size();pi++){
3678 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3679 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3680 param_tbl->handle_access(param_names[pi]));
3681 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3682 param_tbl->handle_access(param_names[pi]));
3684 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3685 stream_node->definitions = definitions;
3687 // Now split by interfaces XXXX
3688 if(ifaces.size() > 1){
3689 for(si=0;si<ifaces.size();++si){
3690 sgah_qpn *subq_node = new sgah_qpn();
3692 // Name the subquery
3693 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3695 subq_node->set_node_name( new_name) ;
3696 sel_names.push_back(subq_node->get_node_name());
3699 subq_node->table_name = fta_node->table_name->duplicate();
3700 subq_node->table_name->set_machine(ifaces[si].first);
3701 subq_node->table_name->set_interface(ifaces[si].second);
3702 subq_node->table_name->set_ifq(false);
3705 for(g=0;g<fta_node->gb_tbl.size();g++){
3706 // Insert the gbvar into the lfta.
3707 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
3708 subq_node->gb_tbl.add_gb_var(
3709 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
3713 // Insert the aggregates
3714 for(a=0;a<fta_node->aggr_tbl.size();++a){
3715 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
3718 for(s=0;s<fta_node->select_list.size();s++){
3719 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3721 for(p=0;p<fta_node->where.size();p++){
3722 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3723 cnf_elem *new_cnf = new cnf_elem(new_pr);
3724 analyze_cnf(new_cnf);
3726 subq_node->where.push_back(new_cnf);
3728 for(p=0;p<fta_node->having.size();p++){
3729 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
3730 cnf_elem *new_cnf = new cnf_elem(new_pr);
3731 analyze_cnf(new_cnf);
3733 subq_node->having.push_back(new_cnf);
3735 // Xfer all of the parameters.
3736 // Use existing handle annotations.
3737 vector<string> param_names = param_tbl->get_param_names();
3739 for(pi=0;pi<param_names.size();pi++){
3740 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3741 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3742 param_tbl->handle_access(param_names[pi]));
3744 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3745 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3746 this->error_code = 3;
3750 ret_vec.push_back(subq_node);
3753 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
3754 fta_node->node_name, sel_names, ifaces, ifdb);
3757 Do not split sources until we are done with optimizations
3758 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3759 for(i=0;i<split_merge.size();++i){
3760 ret_vec.push_back(split_merge[i]);
3763 ret_vec.push_back(mrg_node);
3764 ret_vec.push_back(stream_node);
3765 hfta_returned = 1/*split_merge.size()*/+1;
3768 fta_node->table_name->set_machine(ifaces[0].first);
3769 fta_node->table_name->set_interface(ifaces[0].second);
3770 fta_node->table_name->set_ifq(false);
3771 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3772 this->error_code = 3;
3775 ret_vec.push_back(fta_node);
3776 ret_vec.push_back(stream_node);
3781 // ret_vec.push_back(fta_node);
3782 // ret_vec.push_back(stream_node);
3789 /////////////////////////////////////////////////////////////////////
3790 /// Split into selection LFTA, aggregation HFTA.
3792 spx_qpn *fta_node = new spx_qpn();
3793 fta_node->table_name = table_name;
3794 fta_node->set_node_name( "_fta_"+node_name );
3795 fta_node->table_name->set_range_var(table_name->get_var_name());
3798 rsgah_qpn *stream_node = new rsgah_qpn();
3799 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3800 stream_node->set_node_name( node_name );
3801 stream_node->table_name->set_range_var(table_name->get_var_name());
3804 vector< vector<select_element *> *> select_vec;
3805 select_vec.push_back(&(fta_node->select_list)); // only one child
3807 // Process the gbvars. Split their defining SEs.
3808 for(g=0;g<gb_tbl.size();g++){
3809 bool fta_forbidden = false;
3810 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3812 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
3813 fta_forbidden, se_src, select_vec, Ext_fcns
3815 // if(fta_forbidden) (
3816 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3817 stream_node->gb_tbl.add_gb_var(
3818 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
3821 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
3822 stream_node->gb_tbl.add_gb_var(
3823 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
3828 // Process the aggregate table.
3829 // Copy to stream, split the SEs.
3830 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
3831 for(a=0;a<aggr_tbl.size();++a){
3833 if(aggr_tbl.is_builtin(a)){
3834 if(aggr_tbl.is_star_aggr(a)){
3835 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
3836 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
3838 bool fta_forbidden = false;
3839 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3841 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3842 fta_forbidden, se_src, select_vec, Ext_fcns
3844 // if(fta_forbidden) (
3845 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3846 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
3847 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
3849 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3850 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
3851 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
3854 hse->set_data_type(aggr_tbl.get_data_type(a));
3855 hse->set_aggr_id(a);
3856 hfta_aggr_se[a]=hse;
3858 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
3859 vector<scalarexp_t *> new_opl;
3860 for(o=0;o<opl.size();++o){
3861 bool fta_forbidden = false;
3862 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3863 scalarexp_t *agg_se = split_ftavec_se( opl[o],
3864 fta_forbidden, se_src, select_vec, Ext_fcns
3866 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3867 // fta_forbidden, se_src, select_vec, Ext_fcns
3869 // if(fta_forbidden) (
3870 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3871 new_opl.push_back(agg_se);
3873 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3874 new_opl.push_back(new_se);
3877 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
3878 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
3879 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
3880 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
3881 hse->set_aggr_id(a);
3882 hfta_aggr_se[a]=hse;
3887 // Process the WHERE clause.
3888 // If it is fta-safe AND it refs only fta-safe gbvars,
3889 // then expand the gbvars and put it into the lfta.
3890 // Else, split it into an hfta predicate ref'ing
3891 // se's computed partially in the lfta.
3893 predicate_t *pr_root;
3895 for(p=0;p<where.size();p++){
3896 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
3897 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3898 fta_forbidden = true;
3900 pr_root = dup_pr(where[p]->pr, NULL);
3901 expand_gbvars_pr(pr_root, gb_tbl);
3902 fta_forbidden = false;
3904 cnf_elem *cnf_root = new cnf_elem(pr_root);
3905 analyze_cnf(cnf_root);
3908 stream_node->where.push_back(cnf_root);
3910 fta_node->where.push_back(cnf_root);
3915 // Process the Select clause, rehome it on the
3917 for(s=0;s<select_list.size();s++){
3918 bool fta_forbidden = false;
3919 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3920 stream_node->select_list.push_back(
3921 new select_element(root_se, select_list[s]->name));
3925 // Process the Having clause
3927 // All of the predicates in the having clause must
3928 // execute in the stream node.
3930 for(p=0;p<having.size();p++){
3931 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3932 cnf_elem *cnf_root = new cnf_elem(pr_root);
3933 analyze_cnf(cnf_root);
3935 stream_node->having.push_back(cnf_root);
3937 // Same for closing when
3938 for(p=0;p<closing_when.size();p++){
3939 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3940 cnf_elem *cnf_root = new cnf_elem(pr_root);
3941 analyze_cnf(cnf_root);
3943 stream_node->closing_when.push_back(cnf_root);
3947 // Handle parameters and a few last details.
3948 vector<string> param_names = param_tbl->get_param_names();
3950 for(pi=0;pi<param_names.size();pi++){
3951 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3952 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3953 param_tbl->handle_access(param_names[pi]));
3954 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3955 param_tbl->handle_access(param_names[pi]));
3958 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3959 stream_node->definitions = definitions;
3961 // Now split by interfaces YYYY
3962 if(ifaces.size() > 1){
3963 for(si=0;si<ifaces.size();++si){
3964 spx_qpn *subq_node = new spx_qpn();
3966 // Name the subquery
3967 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3969 subq_node->set_node_name( new_name) ;
3970 sel_names.push_back(subq_node->get_node_name());
3973 subq_node->table_name = fta_node->table_name->duplicate();
3974 subq_node->table_name->set_machine(ifaces[si].first);
3975 subq_node->table_name->set_interface(ifaces[si].second);
3976 subq_node->table_name->set_ifq(false);
3978 for(s=0;s<fta_node->select_list.size();s++){
3979 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3981 for(p=0;p<fta_node->where.size();p++){
3982 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3983 cnf_elem *new_cnf = new cnf_elem(new_pr);
3984 analyze_cnf(new_cnf);
3986 subq_node->where.push_back(new_cnf);
3988 // Xfer all of the parameters.
3989 // Use existing handle annotations.
3990 vector<string> param_names = param_tbl->get_param_names();
3992 for(pi=0;pi<param_names.size();pi++){
3993 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3994 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3995 param_tbl->handle_access(param_names[pi]));
3997 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3998 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3999 this->error_code = 3;
4003 ret_vec.push_back(subq_node);
4006 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4007 fta_node->node_name, sel_names, ifaces, ifdb);
4009 Do not split sources until we are done with optimizations
4010 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4011 for(i=0;i<split_merge.size();++i){
4012 ret_vec.push_back(split_merge[i]);
4015 ret_vec.push_back(mrg_node);
4016 ret_vec.push_back(stream_node);
4017 hfta_returned = 1/*split_merge.size()*/+1;
4020 fta_node->table_name->set_machine(ifaces[0].first);
4021 fta_node->table_name->set_interface(ifaces[0].second);
4022 fta_node->table_name->set_ifq(false);
4023 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4024 this->error_code = 3;
4027 ret_vec.push_back(fta_node);
4028 ret_vec.push_back(stream_node);
4038 Splitting an aggregation operator
4040 An aggregation operator can reference
4041 literals, parameters, colrefs, group-by vars, aggregates,
4042 operators, functions
4044 an aggregation contains
4045 A selection list of SEs
4046 A where list of predicates
4047 A list group-by variable definition
4048 A list of aggregates to be computed
4049 A HAVING list of predicates.
4051 Aggregation involves two phases:
4052 1) given an input tuple, determine if it satisfies all of
4053 the WHERE predicates. If so, compute the group.
4054 Look up the group, update its aggregates.
4055 2) given a closed group and its aggregates, determine
4056 if these values satisfy all of the HAVING predicates.
4057 If so, evaluate the SEs on the selection list from the
4058 group and its aggregates.
4059 The two-phase nature of aggregation places restrictions on
4060 what can be referenced by different components of the operator
4061 (in addition to functions and operators).
4062 - group-by variables : literals, parameters, colrefs
4063 - WHERE predicates : group-by vars, literals, params, colrefs
4064 - HAVING predicates : group-by vars, literals, params, aggregates
4065 - Selection list SEs : group-by vars, literals, params, aggregates
4067 Splitting an aggregation operator into an LFTA/HFTA part
4068 involves performing partial aggregation at the LFTA and
4069 completing the aggregation at the HFTA.
4070 - given a tuple, the LFTA part evaluates the WHERE clause,
4071 and if it is satisfied, computes the group. lookup the group
4072 and update the aggregates. output the group and its partial
4074 - Given a partial aggregate from the LFTA, look up the group and
4075 update its aggregates. When the group is closed, evalute
4076 the HAVING clause and the SEs on the selection list.
4077 THEREFORE the selection list of the LFTA must consist of the
4078 group-by variables and the set of (bare) subaggregate values
4079 necessary to compute the super aggregates.
4080 Unlike the case with the SPX operator, the SE splitting point
4081 is at the GBvar and the aggregate value level.
4084 For each group-by variable
4085 Put the GB variable definition in the LFTA GBVAR list.
4086 Put the GBVAR in the LFTA selection list (as an SE).
4087 Put a reference to that GBVAR in the HFTA GBVAR list.
4089 Split the aggregate into a superaggregate and a subaggregate.
4090 The SE of the superaggregate references the subaggregate value.
4091 (this will need modifications for MF aggregation)
4092 For each SE in the selection list, HAVING predicate
4093 Make GBVAR references point to the new GBVAR
4094 make the aggregate value references point to the new aggregates.
4096 SEs are not so much split as their ref's are changed.
4098 TODO: insert tablevar names into the colrefs.
4103 vector<qp_node *> sgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4107 vector<qp_node *> ret_vec;
4108 int s, p, g, a, o, i;
4111 vector<string> fta_flds, stream_flds;
4113 // If the node reads from a stream, don't split.
4114 // int t = Schema->get_table_ref(table_name->get_schema_name());
4115 int t = table_name->get_schema_ref();
4116 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
4117 ret_vec.push_back(this);
4121 // Get the set of interfaces it accesses.
4123 vector<string> sel_names;
4124 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
4125 if (ifaces.empty()) {
4126 fprintf(stderr,"INTERNAL ERROR in sgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
4132 //////////////////////////////////////////////
4133 // Is this LFTA-only?
4134 if(definitions.count("lfta_aggregation")>0){
4135 // Yes. Ensure that everything is lfta-safe.
4137 // Check only one interface is accessed.
4138 if(ifaces.size()>1){
4139 this->err_str = "ERROR, group-by query "+node_name+" is lfta-only, but it accesses more than one interface:\n";
4140 for(si=0;si<ifaces.size();++si)
4141 this->err_str += "\t"+ifaces[si].first+"."+ifaces[si].second+"\n";
4142 this->error_code = 2;
4146 // Check the group-by attributes
4147 for(g=0;g<gb_tbl.size();g++){
4148 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4149 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition and the query is lfta-only (%s).\n",
4150 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4152 this->error_code = 1;
4153 this->err_str = tmpstr;
4158 // Verify that the SEs in the aggregate definitions are fta-safe
4159 for(a=0;a<aggr_tbl.size();++a){
4160 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4161 if(ase != NULL){ // COUNT(*) does not have a SE.
4162 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4163 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe scalar expression and the query is lfta-only (%s).\n",
4164 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4166 this->error_code = 1;
4167 this->err_str = tmpstr;
4171 if(! aggr_tbl.fta_legal(a,Ext_fcns)){
4172 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4173 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe aggregate and the query is lfta-only (%s).\n",
4174 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4176 this->error_code = 1;
4177 this->err_str = tmpstr;
4183 // Ensure that all the aggregates are fta-safe ....
4187 for(s=0;s<select_list.size();s++){
4188 if(! check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns)){
4189 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4190 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4192 this->error_code = 1;
4193 this->err_str = tmpstr;
4200 for(p=0;p<where.size();p++){
4201 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4202 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4203 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4205 this->error_code = 1;
4206 this->err_str = tmpstr;
4213 if(having.size()>0){
4214 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : the query is lfta-only, so it can't have a HAVING clause.(%s).\n",
4215 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4217 this->error_code = 1;
4218 this->err_str = tmpstr;
4221 // The query is lfta safe, return it.
4224 ret_vec.push_back(this);
4228 //////////////////////////////////////////////////////////////
4229 /// Split into lfta, hfta.
4231 // A sgah node must always be split,
4232 // if for no other reason than to complete the
4233 // partial aggregation.
4235 // First, determine if the query can be spit into aggr/aggr,
4236 // or if it must be selection/aggr.
4237 // Splitting into selection/aggr is allowed only
4238 // if select_lfta is set.
4241 bool select_allowed = definitions.count("select_lfta")>0;
4242 bool select_rqd = false;
4244 set<int> unsafe_gbvars; // for processing where clause
4245 for(g=0;g<gb_tbl.size();g++){
4246 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4247 if(!select_allowed){
4248 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
4249 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4251 this->error_code = 1;
4252 this->err_str = tmpstr;
4256 unsafe_gbvars.insert(g);
4261 // Verify that the SEs in the aggregate definitions are fta-safe
4262 for(a=0;a<aggr_tbl.size();++a){
4263 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4264 if(ase != NULL){ // COUNT(*) does not have a SE.
4265 if(!select_allowed){
4266 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4267 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
4268 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4270 this->error_code = 1;
4271 this->err_str = tmpstr;
4280 // Verify that all of the ref'd UDAFs can be split.
4282 for(a=0;a<aggr_tbl.size();++a){
4283 if(! aggr_tbl.is_builtin(a)){
4284 int afcn = aggr_tbl.get_fcn_id(a);
4285 int super_id = Ext_fcns->get_superaggr_id(afcn);
4286 int sub_id = Ext_fcns->get_subaggr_id(afcn);
4287 if(super_id < 0 || sub_id < 0){
4288 if(!select_allowed){
4289 this->err_str += "ERROR in sgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
4290 this->error_code = 1;
4299 for(p=0;p<where.size();p++){
4300 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4301 if(!select_allowed){
4302 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
4303 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4305 this->error_code = 1;
4306 this->err_str = tmpstr;
4317 /////////////////////////////////////////////////////
4318 // Split into aggr/aggr.
4324 sgah_qpn *fta_node = new sgah_qpn();
4325 fta_node->table_name = table_name;
4326 fta_node->set_node_name( "_fta_"+node_name );
4327 fta_node->table_name->set_range_var(table_name->get_var_name());
4330 sgah_qpn *stream_node = new sgah_qpn();
4331 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4332 stream_node->set_node_name( node_name );
4333 stream_node->table_name->set_range_var(table_name->get_var_name());
4335 // allowed stream disorder. Default is 2,
4336 // can override with max_lfta_disorder setting.
4337 // Also limit the hfta disorder, set to lfta disorder + 1.
4338 // can override with max_hfta_disorder.
4340 fta_node->lfta_disorder = 2;
4341 if(this->get_val_of_def("max_lfta_disorder") != ""){
4342 int d = atoi(this->get_val_of_def("max_lfta_disorder").c_str() );
4344 fprintf(stderr,"Warning, max_lfta_disorder in node %s is %d, must be at least 1, ignoring.\n",node_name.c_str(), d);
4346 fta_node->lfta_disorder = d;
4347 printf("node %s setting lfta_disorder = %d\n",node_name.c_str(),fta_node->lfta_disorder);
4350 if(fta_node->lfta_disorder > 1)
4351 stream_node->hfta_disorder = fta_node->lfta_disorder + 1;
4353 stream_node->hfta_disorder = 1;
4355 if(this->get_val_of_def("max_hfta_disorder") != ""){
4356 int d = atoi(this->get_val_of_def("max_hfta_disorder").c_str() );
4357 if(d<fta_node->lfta_disorder){
4358 fprintf(stderr,"Warning, max_hfta_disorder in node %s is %d, must be at least the max lfta disorder %d, ignoring.\n",node_name.c_str(), d,fta_node->lfta_disorder);
4360 fta_node->lfta_disorder = d;
4362 if(fta_node->lfta_disorder < fta_node->hfta_disorder){
4363 fta_node->hfta_disorder = fta_node->lfta_disorder + 1;
4367 // First, process the group-by variables.
4368 // The fta must supply the values of all the gbvars.
4369 // If a gb is computed, the computation must be
4370 // performed at the FTA, so the SE must be FTA-safe.
4371 // Nice side effect : the gbvar table contains
4372 // matching entries for the original query, the lfta query,
4373 // and the hfta query. So gbrefs in the new queries are set
4374 // correctly just by inheriting the gbrefs from the old query.
4375 // If this property changed, I'll need translation tables.
4378 for(g=0;g<gb_tbl.size();g++){
4379 // Insert the gbvar into the lfta.
4380 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
4381 fta_node->gb_tbl.add_gb_var(
4382 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
4385 // Insert a ref to the value of the gbvar into the lfta select list.
4386 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
4387 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
4388 gbvar_fta->set_gb_ref(g);
4389 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
4390 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
4392 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
4393 gbvar_stream->set_gb_ref(-1); // used as GBvar def
4394 stream_node->gb_tbl.add_gb_var(
4395 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
4398 // multiple aggregation patterns, if any, go with the hfta
4399 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4401 // SEs in the aggregate definitions.
4402 // They are all safe, so split them up for later processing.
4403 map<int, scalarexp_t *> hfta_aggr_se;
4404 for(a=0;a<aggr_tbl.size();++a){
4405 split_fta_aggr( &(aggr_tbl), a,
4406 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
4407 fta_node->select_list,
4415 for(ii=0;ii<fta_flds.size() || ii < fta_node->select_list.size();++ii){
4416 if(ii<fta_flds.size())
4417 printf("\t%s : ",fta_flds[ii].c_str());
4420 if(ii<fta_node->select_list.size())
4421 printf("%s\n",fta_node->select_list[ii]->to_string().c_str());
4425 printf("hfta aggregates are:");
4426 for(ii=0;ii<stream_node->aggr_tbl.size();++ii){
4427 printf(" %s",stream_node->aggr_tbl.get_op(ii).c_str());
4429 printf("\nlfta aggregates are:");
4430 for(ii=0;ii<fta_node->aggr_tbl.size();++ii){
4431 printf(" %s",fta_node->aggr_tbl.get_op(ii).c_str());
4439 // Next, the select list.
4441 for(s=0;s<select_list.size();s++){
4442 bool fta_forbidden = false;
4443 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4444 stream_node->select_list.push_back(
4445 new select_element(root_se, select_list[s]->name));
4450 // All the predicates in the where clause must execute
4453 for(p=0;p<where.size();p++){
4454 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
4455 cnf_elem *new_cnf = new cnf_elem(new_pr);
4456 analyze_cnf(new_cnf);
4458 fta_node->where.push_back(new_cnf);
4461 // All of the predicates in the having clause must
4462 // execute in the stream node.
4464 for(p=0;p<having.size();p++){
4465 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4466 cnf_elem *cnf_root = new cnf_elem(pr_root);
4467 analyze_cnf(cnf_root);
4469 stream_node->having.push_back(cnf_root);
4473 // Divide the parameters among the stream, FTA.
4474 // Currently : assume that the stream receives all parameters
4475 // and parameter updates, incorporates them, then passes
4476 // all of the parameters to the FTA.
4477 // This will need to change (tables, fta-unsafe types. etc.)
4479 // I will pass on the use_handle_access marking, even
4480 // though the fcn call that requires handle access might
4481 // exist in only one of the parts of the query.
4482 // Parameter manipulation and handle access determination will
4483 // need to be revisited anyway.
4484 vector<string> param_names = param_tbl->get_param_names();
4486 for(pi=0;pi<param_names.size();pi++){
4487 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4488 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4489 param_tbl->handle_access(param_names[pi]));
4490 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4491 param_tbl->handle_access(param_names[pi]));
4493 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4494 stream_node->definitions = definitions;
4496 // Now split by interfaces XXXX
4497 if(ifaces.size() > 1){
4498 for(si=0;si<ifaces.size();++si){
4499 sgah_qpn *subq_node = new sgah_qpn();
4501 // Name the subquery
4502 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4504 subq_node->set_node_name( new_name) ;
4505 sel_names.push_back(subq_node->get_node_name());
4508 subq_node->table_name = fta_node->table_name->duplicate();
4509 subq_node->table_name->set_machine(ifaces[si].first);
4510 subq_node->table_name->set_interface(ifaces[si].second);
4511 subq_node->table_name->set_ifq(false);
4514 for(g=0;g<fta_node->gb_tbl.size();g++){
4515 // Insert the gbvar into the lfta.
4516 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
4517 subq_node->gb_tbl.add_gb_var(
4518 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
4522 // Insert the aggregates
4523 for(a=0;a<fta_node->aggr_tbl.size();++a){
4524 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
4527 for(s=0;s<fta_node->select_list.size();s++){
4528 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4530 for(p=0;p<fta_node->where.size();p++){
4531 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4532 cnf_elem *new_cnf = new cnf_elem(new_pr);
4533 analyze_cnf(new_cnf);
4535 subq_node->where.push_back(new_cnf);
4537 for(p=0;p<fta_node->having.size();p++){
4538 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
4539 cnf_elem *new_cnf = new cnf_elem(new_pr);
4540 analyze_cnf(new_cnf);
4542 subq_node->having.push_back(new_cnf);
4544 // Xfer all of the parameters.
4545 // Use existing handle annotations.
4546 vector<string> param_names = param_tbl->get_param_names();
4548 for(pi=0;pi<param_names.size();pi++){
4549 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4550 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4551 param_tbl->handle_access(param_names[pi]));
4553 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4554 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4555 this->error_code = 3;
4560 subq_node->lfta_disorder = fta_node->lfta_disorder;
4562 ret_vec.push_back(subq_node);
4565 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
4566 fta_node->node_name, sel_names, ifaces, ifdb);
4567 mrg_node->set_disorder(fta_node->lfta_disorder);
4570 Do not split sources until we are done with optimizations
4571 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4572 for(i=0;i<split_merge.size();++i){
4573 ret_vec.push_back(split_merge[i]);
4576 ret_vec.push_back(mrg_node);
4577 ret_vec.push_back(stream_node);
4578 hfta_returned = 1/*split_merge.size()*/+1;
4581 fta_node->table_name->set_machine(ifaces[0].first);
4582 fta_node->table_name->set_interface(ifaces[0].second);
4583 fta_node->table_name->set_ifq(false);
4584 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4585 this->error_code = 3;
4588 ret_vec.push_back(fta_node);
4589 ret_vec.push_back(stream_node);
4594 // ret_vec.push_back(fta_node);
4595 // ret_vec.push_back(stream_node);
4602 /////////////////////////////////////////////////////////////////////
4603 /// Split into selection LFTA, aggregation HFTA.
4605 spx_qpn *fta_node = new spx_qpn();
4606 fta_node->table_name = table_name;
4607 fta_node->set_node_name( "_fta_"+node_name );
4608 fta_node->table_name->set_range_var(table_name->get_var_name());
4611 sgah_qpn *stream_node = new sgah_qpn();
4612 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4613 stream_node->set_node_name( node_name );
4614 stream_node->table_name->set_range_var(table_name->get_var_name());
4617 vector< vector<select_element *> *> select_vec;
4618 select_vec.push_back(&(fta_node->select_list)); // only one child
4620 // Process the gbvars. Split their defining SEs.
4621 for(g=0;g<gb_tbl.size();g++){
4622 bool fta_forbidden = false;
4623 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4625 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
4626 fta_forbidden, se_src, select_vec, Ext_fcns
4628 // if(fta_forbidden) (
4629 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4630 stream_node->gb_tbl.add_gb_var(
4631 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
4634 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
4635 stream_node->gb_tbl.add_gb_var(
4636 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
4640 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4642 // Process the aggregate table.
4643 // Copy to stream, split the SEs.
4644 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
4645 for(a=0;a<aggr_tbl.size();++a){
4647 if(aggr_tbl.is_builtin(a)){
4648 if(aggr_tbl.is_star_aggr(a)){
4649 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
4650 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
4652 bool fta_forbidden = false;
4653 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4655 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4656 fta_forbidden, se_src, select_vec, Ext_fcns
4658 // if(fta_forbidden) (
4659 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4660 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
4661 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
4663 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4664 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
4665 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
4668 hse->set_data_type(aggr_tbl.get_data_type(a));
4669 hse->set_aggr_id(a);
4670 hfta_aggr_se[a]=hse;
4672 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
4673 vector<scalarexp_t *> new_opl;
4674 for(o=0;o<opl.size();++o){
4675 bool fta_forbidden = false;
4676 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4677 scalarexp_t *agg_se = split_ftavec_se( opl[o],
4678 fta_forbidden, se_src, select_vec, Ext_fcns
4680 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4681 // fta_forbidden, se_src, select_vec, Ext_fcns
4683 // if(fta_forbidden) (
4684 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4685 new_opl.push_back(agg_se);
4687 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4688 new_opl.push_back(new_se);
4691 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
4692 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
4693 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
4694 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
4695 hse->set_aggr_id(a);
4696 hfta_aggr_se[a]=hse;
4701 // Process the WHERE clause.
4702 // If it is fta-safe AND it refs only fta-safe gbvars,
4703 // then expand the gbvars and put it into the lfta.
4704 // Else, split it into an hfta predicate ref'ing
4705 // se's computed partially in the lfta.
4707 predicate_t *pr_root;
4709 for(p=0;p<where.size();p++){
4710 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
4711 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
4712 fta_forbidden = true;
4714 pr_root = dup_pr(where[p]->pr, NULL);
4715 expand_gbvars_pr(pr_root, gb_tbl);
4716 fta_forbidden = false;
4718 cnf_elem *cnf_root = new cnf_elem(pr_root);
4719 analyze_cnf(cnf_root);
4722 stream_node->where.push_back(cnf_root);
4724 fta_node->where.push_back(cnf_root);
4729 // Process the Select clause, rehome it on the
4731 for(s=0;s<select_list.size();s++){
4732 bool fta_forbidden = false;
4733 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4734 stream_node->select_list.push_back(
4735 new select_element(root_se, select_list[s]->name));
4739 // Process the Having clause
4741 // All of the predicates in the having clause must
4742 // execute in the stream node.
4744 for(p=0;p<having.size();p++){
4745 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4746 cnf_elem *cnf_root = new cnf_elem(pr_root);
4747 analyze_cnf(cnf_root);
4749 stream_node->having.push_back(cnf_root);
4752 // Handle parameters and a few last details.
4753 vector<string> param_names = param_tbl->get_param_names();
4755 for(pi=0;pi<param_names.size();pi++){
4756 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4757 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4758 param_tbl->handle_access(param_names[pi]));
4759 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4760 param_tbl->handle_access(param_names[pi]));
4763 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4764 stream_node->definitions = definitions;
4766 // Now split by interfaces YYYY
4767 if(ifaces.size() > 1){
4768 for(si=0;si<ifaces.size();++si){
4769 spx_qpn *subq_node = new spx_qpn();
4771 // Name the subquery
4772 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4774 subq_node->set_node_name( new_name) ;
4775 sel_names.push_back(subq_node->get_node_name());
4778 subq_node->table_name = fta_node->table_name->duplicate();
4779 subq_node->table_name->set_machine(ifaces[si].first);
4780 subq_node->table_name->set_interface(ifaces[si].second);
4781 subq_node->table_name->set_ifq(false);
4783 for(s=0;s<fta_node->select_list.size();s++){
4784 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4786 for(p=0;p<fta_node->where.size();p++){
4787 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4788 cnf_elem *new_cnf = new cnf_elem(new_pr);
4789 analyze_cnf(new_cnf);
4791 subq_node->where.push_back(new_cnf);
4793 // Xfer all of the parameters.
4794 // Use existing handle annotations.
4795 vector<string> param_names = param_tbl->get_param_names();
4797 for(pi=0;pi<param_names.size();pi++){
4798 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4799 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4800 param_tbl->handle_access(param_names[pi]));
4802 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4803 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4804 this->error_code = 3;
4808 ret_vec.push_back(subq_node);
4811 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4812 fta_node->node_name, sel_names, ifaces, ifdb);
4814 Do not split sources until we are done with optimizations
4815 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4816 for(i=0;i<split_merge.size();++i){
4817 ret_vec.push_back(split_merge[i]);
4820 ret_vec.push_back(mrg_node);
4821 ret_vec.push_back(stream_node);
4822 hfta_returned = 1/*split_merge.size()*/+1;
4825 fta_node->table_name->set_machine(ifaces[0].first);
4826 fta_node->table_name->set_interface(ifaces[0].second);
4827 fta_node->table_name->set_ifq(false);
4828 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4829 this->error_code = 3;
4832 ret_vec.push_back(fta_node);
4833 ret_vec.push_back(stream_node);
4838 // ret_vec.push_back(fta_node);
4839 // ret_vec.push_back(stream_node);
4848 SPLITTING A EQ-TEMPORAL, HASH JOIN OPERATOR
4850 An JOIN_EQ_HASH_QPN node may reference:
4851 literals, parameters, colrefs, functions, operators
4852 An JOIN_EQ_HASH_QPN node may not reference:
4853 group-by variables, aggregates
4855 An JOIN_EQ_HASH_QPN node contains
4856 selection list of SEs
4857 where list of CNF predicates, broken into:
4864 For each tablevar whose source is a PROTOCOL
4865 Create a LFTA for that tablevar
4866 Push as many prefilter[..] predicates to that tablevar as is
4868 Split the SEs in the select list, and the predicates not
4873 vector<qp_node *> join_eq_hash_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4875 vector<qp_node *> ret_vec;
4878 // If the node reads from streams only, don't split.
4879 bool stream_only = true;
4880 for(f=0;f<from.size();++f){
4881 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4882 int t = from[f]->get_schema_ref();
4883 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) stream_only = false;
4887 ret_vec.push_back(this);
4892 // The HFTA node, it is always returned.
4894 join_eq_hash_qpn *stream_node = new join_eq_hash_qpn();
4895 for(f=0;f<from.size();++f){
4896 // tablevar_t *tmp_tblvar = new tablevar_t( from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str());
4897 tablevar_t *tmp_tblvar = from[f]->duplicate();
4898 // tmp_tblvar->set_range_var(from[f]->get_var_name());
4900 stream_node->from.push_back(tmp_tblvar);
4902 stream_node->set_node_name(node_name);
4904 // Create spx (selection) children for each PROTOCOL source.
4905 vector<spx_qpn *> child_vec;
4906 vector< vector<select_element *> *> select_vec;
4907 for(f=0;f<from.size();++f){
4908 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4909 int t = from[f]->get_schema_ref();
4910 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA){
4911 spx_qpn *child_qpn = new spx_qpn();
4912 sprintf(tmpstr,"_fta_%d_%s",f,node_name.c_str());
4913 child_qpn->set_node_name(string(tmpstr));
4914 child_qpn->table_name = new tablevar_t(
4915 from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str(), from[f]->get_ifq());
4916 child_qpn->table_name->set_range_var(from[f]->get_var_name());
4917 child_qpn->table_name->set_machine(from[f]->get_machine());
4919 child_vec.push_back(child_qpn);
4920 select_vec.push_back(&(child_qpn->select_list));
4922 // Update the stream's FROM clause to read from this child
4923 stream_node->from[f]->set_interface("");
4924 stream_node->from[f]->set_schema(tmpstr);
4926 child_vec.push_back(NULL);
4927 select_vec.push_back(NULL);
4931 // Push lfta-safe prefilter to the lfta
4932 // TODO: I'm not copying the preds, I dont *think* it will be a problem.
4933 predicate_t *pr_root;
4935 for(f=0;f<from.size();++f){
4936 vector<cnf_elem *> pred_vec = prefilter[f];
4937 if(child_vec[f] != NULL){
4938 for(p=0;p<pred_vec.size();++p){
4939 if(check_fta_forbidden_pr(pred_vec[p]->pr,NULL, Ext_fcns)){
4940 child_vec[f]->where.push_back(pred_vec[p]);
4942 pr_root = split_ftavec_pr(pred_vec[p]->pr,select_vec,Ext_fcns);
4943 cnf_elem *cnf_root = new cnf_elem(pr_root);
4944 analyze_cnf(cnf_root);
4945 stream_node->prefilter[f].push_back(cnf_root);
4949 for(p=0;p<pred_vec.size();++p){
4950 stream_node->prefilter[f].push_back(pred_vec[p]);
4956 // Process the other predicates
4957 for(p=0;p<temporal_eq.size();++p){
4958 pr_root = split_ftavec_pr(temporal_eq[p]->pr,select_vec,Ext_fcns);
4959 cnf_elem *cnf_root = new cnf_elem(pr_root);
4960 analyze_cnf(cnf_root);
4961 stream_node->temporal_eq.push_back(cnf_root);
4963 for(p=0;p<hash_eq.size();++p){
4964 pr_root = split_ftavec_pr(hash_eq[p]->pr,select_vec,Ext_fcns);
4965 cnf_elem *cnf_root = new cnf_elem(pr_root);
4966 analyze_cnf(cnf_root);
4967 stream_node->hash_eq.push_back(cnf_root);
4969 for(p=0;p<postfilter.size();++p){
4970 pr_root = split_ftavec_pr(postfilter[p]->pr,select_vec,Ext_fcns);
4971 cnf_elem *cnf_root = new cnf_elem(pr_root);
4972 analyze_cnf(cnf_root);
4973 stream_node->postfilter.push_back(cnf_root);
4977 for(s=0;s<select_list.size();s++){
4978 bool fta_forbidden = false;
4979 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4980 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
4981 fta_forbidden, se_src, select_vec, Ext_fcns
4983 if(fta_forbidden || !is_PROTOCOL_source(se_src, select_vec)){
4984 stream_node->select_list.push_back(
4985 new select_element(root_se, select_list[s]->name) );
4987 scalarexp_t *new_se=make_fta_se_ref(select_vec,root_se,se_src);
4988 stream_node->select_list.push_back(
4989 new select_element(new_se, select_list[s]->name)
4995 // I need to "rehome" the colrefs -- make the annotations in the colrefs
4996 // agree with their tablevars.
4997 for(f=0;f<child_vec.size();++f){
4998 if(child_vec[f]!=NULL){
4999 vector<tablevar_t *> fm; fm.push_back(child_vec[f]->table_name);
5001 for(s=0;s<child_vec[f]->select_list.size();++s)
5002 bind_colref_se(child_vec[f]->select_list[s]->se, fm,0,0);
5003 for(p=0;p<child_vec[f]->where.size();++p)
5004 // bind_colref_pr(child_vec[f]->where[p]->pr, fm,f,0);
5005 bind_colref_pr(child_vec[f]->where[p]->pr, fm,0,0);
5009 // rehome the colrefs in the hfta node.
5010 for(f=0;f<stream_node->from.size();++f){
5011 stream_node->where.clear();
5012 for(s=0;s<stream_node->from.size();++s){
5013 for(p=0;p<stream_node->prefilter[s].size();++p){
5014 bind_colref_pr((stream_node->prefilter[s])[p]->pr,stream_node->from,f,f);
5017 for(p=0;p<stream_node->temporal_eq.size();++p){
5018 bind_colref_pr(stream_node->temporal_eq[p]->pr,stream_node->from,f,f);
5020 for(p=0;p<stream_node->hash_eq.size();++p){
5021 bind_colref_pr(stream_node->hash_eq[p]->pr,stream_node->from,f,f);
5023 for(p=0;p<stream_node->postfilter.size();++p){
5024 bind_colref_pr(stream_node->postfilter[p]->pr,stream_node->from,f,f);
5026 for(s=0;s<stream_node->select_list.size();++s){
5027 bind_colref_se(stream_node->select_list[s]->se,stream_node->from,f,f);
5031 // Rebuild the WHERE clause
5032 stream_node->where.clear();
5033 for(s=0;s<stream_node->from.size();++s){
5034 for(p=0;p<stream_node->prefilter[s].size();++p){
5035 stream_node->where.push_back((stream_node->prefilter[s])[p]);
5038 for(p=0;p<stream_node->temporal_eq.size();++p){
5039 stream_node->where.push_back(stream_node->temporal_eq[p]);
5041 for(p=0;p<stream_node->hash_eq.size();++p){
5042 stream_node->where.push_back(stream_node->hash_eq[p]);
5044 for(p=0;p<stream_node->postfilter.size();++p){
5045 stream_node->where.push_back(stream_node->postfilter[p]);
5049 // Build the return list
5050 vector<qp_node *> hfta_nodes;
5052 for(f=0;f<from.size();++f){
5053 if(child_vec[f] != NULL){
5054 spx_qpn *c_node = child_vec[f];
5055 vector<pair<string, string> > ifaces = get_ifaces(c_node->table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
5056 if (ifaces.empty()) {
5057 fprintf(stderr,"INTERNAL ERROR in join_eq_hash_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
5061 if(ifaces.size() == 1){
5062 c_node->table_name->set_machine(ifaces[0].first);
5063 c_node->table_name->set_interface(ifaces[0].second);
5064 c_node->table_name->set_ifq(false);
5065 if(c_node->resolve_if_params(ifdb, this->err_str)){
5066 this->error_code = 3;
5069 ret_vec.push_back(c_node);
5071 vector<string> sel_names;
5073 for(si=0;si<ifaces.size();++si){
5074 spx_qpn *subq_node = new spx_qpn();
5076 // Name the subquery
5077 string new_name = "_"+c_node->node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
5079 subq_node->set_node_name( new_name) ;
5080 sel_names.push_back(subq_node->get_node_name());
5083 subq_node->table_name = c_node->table_name->duplicate();
5084 subq_node->table_name->set_machine(ifaces[si].first);
5085 subq_node->table_name->set_interface(ifaces[si].second);
5086 subq_node->table_name->set_ifq(false);
5088 for(s=0;s<c_node->select_list.size();s++){
5089 subq_node->select_list.push_back(dup_select(c_node->select_list[s], NULL));
5091 for(p=0;p<c_node->where.size();p++){
5092 predicate_t *new_pr = dup_pr(c_node->where[p]->pr, NULL);
5093 cnf_elem *new_cnf = new cnf_elem(new_pr);
5094 analyze_cnf(new_cnf);
5096 printf("table name is %s\n",subq_node->table_name->to_string().c_str());
5097 subq_node->where.push_back(new_cnf);
5099 // Xfer all of the parameters.
5100 // Use existing handle annotations.
5101 // vector<string> param_names = param_tbl->get_param_names();
5103 // for(pi=0;pi<param_names.size();pi++){
5104 // data_type *dt = param_tbl->get_data_type(param_names[pi]);
5105 // subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
5106 // param_tbl->handle_access(param_names[pi]));
5108 // subq_node->definitions = definitions;
5110 if(subq_node->resolve_if_params(ifdb, this->err_str)){
5111 this->error_code = 3;
5115 ret_vec.push_back(subq_node);
5117 int lpos = ret_vec.size()-1 ;
5118 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[lpos]),c_node->node_name,sel_names, ifaces, ifdb);
5120 Do not split sources until we are done with optimizations
5121 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
5123 for(i=0;i<split_merge.size();++i){
5124 hfta_nodes.push_back(split_merge[i]);
5127 hfta_nodes.push_back(mrg_node);
5132 for(i=0;i<hfta_nodes.size();++i) ret_vec.push_back(hfta_nodes[i]);
5133 ret_vec.push_back(stream_node);
5134 hfta_returned = hfta_nodes.size()+1;
5136 // Currently : assume that the stream receives all parameters
5137 // and parameter updates, incorporates them, then passes
5138 // all of the parameters to the FTA.
5139 // This will need to change (tables, fta-unsafe types. etc.)
5141 // I will pass on the use_handle_access marking, even
5142 // though the fcn call that requires handle access might
5143 // exist in only one of the parts of the query.
5144 // Parameter manipulation and handle access determination will
5145 // need to be revisited anyway.
5146 vector<string> param_names = param_tbl->get_param_names();
5148 for(pi=0;pi<param_names.size();pi++){
5150 data_type *dt = param_tbl->get_data_type(param_names[pi]);
5151 for(ri=0;ri<ret_vec.size();++ri){
5152 ret_vec[ri]->param_tbl->add_param(param_names[pi],dt->duplicate(),
5153 param_tbl->handle_access(param_names[pi]));
5154 ret_vec[ri]->definitions = definitions; ret_vec[ri]->definitions.erase("_referenced_ifaces");
5165 /////////////////////////////////////////////////////////////
5168 // Common processing
5169 int process_opview(tablevar_t *fmtbl, int pos, string node_name,
5171 vector<query_node *> &qnodes,
5172 opview_set &opviews,
5173 vector<table_exp_t *> &ret, string rootnm, string silo_nm){
5177 int schref = fmtbl->get_schema_ref();
5181 if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){
5182 opview_entry *opv = new opview_entry();
5183 opv->parent_qname = node_name;
5184 opv->root_name = rootnm;
5185 opv->view_name = fmtbl->get_schema_name();
5187 sprintf(tmpstr,"%s_UDOP%d_%s",node_name.c_str(),pos,opv->view_name.c_str());
5188 opv->udop_alias = tmpstr;
5189 fmtbl->set_udop_alias(opv->udop_alias);
5191 opv->exec_fl = Schema->get_op_prop(schref, string("file"));
5192 opv->liveness_timeout = atoi(Schema->get_op_prop(schref, string("liveness_timeout")).c_str());
5194 vector<subquery_spec *> subq = Schema->get_subqueryspecs(schref);
5195 for(s=0;s<subq.size();++s){
5196 // Validate that the fields match.
5197 subquery_spec *sqs = subq[s];
5198 vector<field_entry *> flds = Schema->get_fields(sqs->name+silo_nm);
5199 if(flds.size() == 0){
5200 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in Schema.\n",sqs->name.c_str(), opv->view_name.c_str());
5203 if(flds.size() < sqs->types.size()){
5204 fprintf(stderr,"ERROR: subquery %s of view %s does not have enough fields (%lu found, %lu expected).\n",sqs->name.c_str(), opv->view_name.c_str(),flds.size(), sqs->types.size());
5207 bool failed = false;
5208 for(f=0;f<sqs->types.size();++f){
5209 data_type dte(sqs->types[f],sqs->modifiers[f]);
5210 data_type dtf(flds[f]->get_type(),flds[f]->get_modifier_list());
5211 if(! dte.subsumes_type(&dtf) ){
5212 fprintf(stderr,"ERROR: subquery %s of view %s does not have the correct type for field %d (%s found, %s expected).\n",sqs->name.c_str(), opv->view_name.c_str(),f,dtf.to_string().c_str(), dte.to_string().c_str());
5216 if(dte.is_temporal() && (dte.get_temporal() != dtf.get_temporal()) ){
5217 string pstr = dte.get_temporal_string();
5218 fprintf(stderr,"ERROR: subquery %s of view %s does not have the expected temporal value %s of field %d.\n",sqs->name.c_str(), opv->view_name.c_str(),pstr.c_str(),f);
5225 /// Validation done, find the subquery, make a copy of the
5226 /// parse tree, and add it to the return list.
5227 for(q=0;q<qnodes.size();++q)
5228 if(qnodes[q]->name == sqs->name)
5230 if(q==qnodes.size()){
5231 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in list of query names.\n",sqs->name.c_str(), opv->view_name.c_str());
5235 table_exp_t *newq = dup_table_exp(qnodes[q]->parse_tree);
5236 sprintf(tmpstr,"%s_OP%d_%s_SUBQ%d",node_name.c_str(),pos,opv->view_name.c_str(),s);
5237 string newq_name = tmpstr;
5238 newq->nmap["query_name"] = newq_name;
5239 ret.push_back(newq);
5240 opv->subq_names.push_back(newq_name);
5242 fmtbl->set_opview_idx(opviews.append(opv));
5248 vector<table_exp_t *> spx_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5249 vector<table_exp_t *> ret;
5251 int retval = process_opview(table_name,0,node_name,
5252 Schema,qnodes,opviews,ret, rootnm, silo_name);
5258 vector<table_exp_t *> sgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5259 vector<table_exp_t *> ret;
5261 int retval = process_opview(table_name,0,node_name,
5262 Schema,qnodes,opviews,ret, rootnm, silo_name);
5267 vector<table_exp_t *> rsgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5268 vector<table_exp_t *> ret;
5270 int retval = process_opview(table_name,0,node_name,
5271 Schema,qnodes,opviews,ret, rootnm, silo_name);
5277 vector<table_exp_t *> sgahcwcb_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5278 vector<table_exp_t *> ret;
5280 int retval = process_opview(table_name,0,node_name,
5281 Schema,qnodes,opviews,ret, rootnm, silo_name);
5288 vector<table_exp_t *> mrg_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5289 vector<table_exp_t *> ret;
5291 for(f=0;f<fm.size();++f){
5292 int retval = process_opview(fm[f],f,node_name,
5293 Schema,qnodes,opviews,ret, rootnm, silo_name);
5302 vector<table_exp_t *> join_eq_hash_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5303 vector<table_exp_t *> ret;
5305 for(f=0;f<from.size();++f){
5306 int retval = process_opview(from[f],f,node_name,
5307 Schema,qnodes,opviews,ret, rootnm, silo_name);
5313 vector<table_exp_t *> filter_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5314 vector<table_exp_t *> ret;
5316 for(f=0;f<from.size();++f){
5317 int retval = process_opview(from[f],f,node_name,
5318 Schema,qnodes,opviews,ret, rootnm, silo_name);
5324 vector<table_exp_t *> watch_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5325 vector<table_exp_t *> ret;
5326 int retval = process_opview(from[0],0,node_name,
5327 Schema,qnodes,opviews,ret, rootnm, silo_name);
5334 vector<table_exp_t *> watch_tbl_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5335 vector<table_exp_t *> ret;
5336 return ret; // nothing to process
5341 //////////////////////////////////////////////////////////////////
5342 //////////////////////////////////////////////////////////////////
5343 /////// Additional methods
5347 //////////////////////////////////////////////////////////////////
5348 // Get schema of operator output
5350 table_def *mrg_qpn::get_fields(){
5351 return(table_layout);
5354 table_def *watch_tbl_qpn::get_fields(){
5355 return(table_layout);
5359 table_def *spx_qpn::get_fields(){
5360 return(create_attributes(node_name, select_list));
5363 table_def *sgah_qpn::get_fields(){
5364 return(create_attributes(node_name, select_list));
5367 table_def *rsgah_qpn::get_fields(){
5368 return(create_attributes(node_name, select_list));
5371 table_def *sgahcwcb_qpn::get_fields(){
5372 return(create_attributes(node_name, select_list));
5375 table_def *filter_join_qpn::get_fields(){
5376 return(create_attributes(node_name, select_list));
5379 table_def *watch_join_qpn::get_fields(){
5380 return(create_attributes(node_name, select_list));
5383 table_def *join_eq_hash_qpn::get_fields(){
5386 // First, gather temporal colrefs and SEs.
5387 map<col_id, temporal_type> temporal_cids;
5388 vector<scalarexp_t *> temporal_se;
5389 for(h=0;h<temporal_eq.size();++h){
5390 scalarexp_t *sel = temporal_eq[h]->pr->get_left_se();
5391 scalarexp_t *ser = temporal_eq[h]->pr->get_right_se();
5393 if(sel->get_operator_type() == SE_COLREF){
5394 col_id tcol(sel->get_colref());
5395 if(temporal_cids.count(tcol) == 0){
5396 temporal_cids[tcol] = sel->get_data_type()->get_temporal();
5399 temporal_se.push_back(sel);
5402 if(ser->get_operator_type() == SE_COLREF){
5403 col_id tcol(ser->get_colref());
5404 if(temporal_cids.count(tcol) == 0){
5405 temporal_cids[tcol] = ser->get_data_type()->get_temporal();
5408 temporal_se.push_back(ser);
5412 // Mark select elements as nontemporal, then deduce which
5413 // ones are temporal.
5414 for(s=0;s<select_list.size();++s){
5415 select_list[s]->se->get_data_type()->set_temporal(
5416 compute_se_temporal(select_list[s]->se, temporal_cids)
5418 // Second chance if it is an exact match to an SE.
5419 // for(s=0;s<select_list.size();++s){
5420 if(! select_list[s]->se->get_data_type()->is_temporal() ){
5421 for(t=0;t<temporal_se.size();++t){
5422 if(is_equivalent_se(temporal_se[t], select_list[s]->se)){
5423 select_list[s]->se->get_data_type()->set_temporal(
5424 temporal_se[t]->get_data_type()->get_temporal()
5432 // If there is an outer join, verify that
5433 // the temporal attributes are actually temporal.
5434 // NOTE: this code must be synchronized with the
5435 // equivalence finding in join_eq_hash_qpn::generate_functor
5436 // (and also, the join_eq_hash_qpn constructor)
5437 if(from[0]->get_property() || from[1]->get_property()){
5438 set<string> l_equiv, r_equiv;
5439 for(i=0;i<temporal_eq.size();i++){
5440 scalarexp_t *lse = temporal_eq[i]->pr->get_left_se();
5441 scalarexp_t *rse = temporal_eq[i]->pr->get_right_se();
5442 if(lse->get_operator_type()==SE_COLREF){
5443 l_equiv.insert(lse->get_colref()->get_field());
5445 if(rse->get_operator_type()==SE_COLREF){
5446 r_equiv.insert(rse->get_colref()->get_field());
5450 for(s=0;s<select_list.size();++s){
5451 if(select_list[s]->se->get_data_type()->is_temporal()){
5453 col_id_set::iterator ci;
5454 bool failed = false;
5455 gather_se_col_ids(select_list[s]->se,cid_set, NULL);
5456 for(ci=cid_set.begin();ci!=cid_set.end();++ci){
5457 if((*ci).tblvar_ref == 0){
5458 if(from[0]->get_property()){
5459 if(l_equiv.count((*ci).field) == 0){
5464 if(from[1]->get_property()){
5465 if(r_equiv.count((*ci).field) == 0){
5472 select_list[s]->se->get_data_type()->reset_temporal();
5479 return create_attributes(node_name, select_list);
5483 //-----------------------------------------------------------------
5484 // get output "keys"
5485 // This is a guess about the set of fields which are a key
5486 // Use as metadata output, e.g. in qtree.xml
5490 // refs to GB attribtues are keys, if a SE is not a GB colref
5491 // but refers to a GB colref (outside of an aggregation)
5492 // then set partial_keys to true
5493 vector<string> sgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5494 vector<string> keys;
5497 for(int i=0; i<gb_tbl.size();++i)
5500 for(int s=0;s<select_list.size();++s){
5501 if(select_list[s]->se->is_gb()){
5502 keys.push_back(select_list[s]->name);
5504 if(contains_gb_se(select_list[s]->se, gref_set)){
5505 partial_keys.push_back(select_list[s]->name);
5512 vector<string> rsgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5513 vector<string> keys;
5516 for(int i=0; i<gb_tbl.size();++i)
5519 for(int s=0;s<select_list.size();++s){
5520 if(select_list[s]->se->is_gb()){
5521 keys.push_back(select_list[s]->name);
5523 if(contains_gb_se(select_list[s]->se, gref_set)){
5524 partial_keys.push_back(select_list[s]->name);
5535 //-----------------------------------------------------------------
5536 // get output tables
5539 // Get tablevar_t names of input and output tables
5541 // output_file_qpn::output_file_qpn(){source_op_name = ""; }
5542 vector<tablevar_t *> output_file_qpn::get_input_tbls(){
5546 vector<tablevar_t *> watch_tbl_qpn::get_input_tbls(){
5547 vector<tablevar_t *> ret;
5551 vector<tablevar_t *> mrg_qpn::get_input_tbls(){
5555 vector<tablevar_t *> spx_qpn::get_input_tbls(){
5556 vector<tablevar_t *> retval(1,table_name);
5560 vector<tablevar_t *> sgah_qpn::get_input_tbls(){
5561 vector<tablevar_t *> retval(1,table_name);
5565 vector<tablevar_t *> rsgah_qpn::get_input_tbls(){
5566 vector<tablevar_t *> retval(1,table_name);
5570 vector<tablevar_t *> sgahcwcb_qpn::get_input_tbls(){
5571 vector<tablevar_t *> retval(1,table_name);
5575 vector<tablevar_t *> join_eq_hash_qpn::get_input_tbls(){
5579 vector<tablevar_t *> filter_join_qpn::get_input_tbls(){
5583 vector<tablevar_t *> watch_join_qpn::get_input_tbls(){
5587 //-----------------------------------------------------------------
5588 // get output tables
5591 // This does not make sense, this fcn returns the output table *name*,
5592 // not its schema, and then there is another fcn to rturn the schema.
5593 vector<tablevar_t *> output_file_qpn::get_output_tbls(){
5594 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5598 vector<tablevar_t *> watch_tbl_qpn::get_output_tbls(){
5599 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5603 vector<tablevar_t *> mrg_qpn::get_output_tbls(){
5604 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5608 vector<tablevar_t *> spx_qpn::get_output_tbls(){
5609 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5613 vector<tablevar_t *> sgah_qpn::get_output_tbls(){
5614 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5618 vector<tablevar_t *> rsgah_qpn::get_output_tbls(){
5619 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5623 vector<tablevar_t *> sgahcwcb_qpn::get_output_tbls(){
5624 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5628 vector<tablevar_t *> join_eq_hash_qpn::get_output_tbls(){
5629 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5633 vector<tablevar_t *> filter_join_qpn::get_output_tbls(){
5634 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5639 vector<tablevar_t *> watch_join_qpn::get_output_tbls(){
5640 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5646 //-----------------------------------------------------------------
5649 // Associate colrefs with this schema.
5650 // Also, use this opportunity to create table_layout (the output schema).
5651 // If the output schema is ever needed before
5652 void mrg_qpn::bind_to_schema(table_list *Schema){
5654 for(t=0;t<fm.size();++t){
5655 int tblref = Schema->get_table_ref(fm[t]->get_schema_name());
5657 fm[t]->set_schema_ref(tblref );
5660 // Here I assume that the colrefs have been reorderd
5661 // during analysis so that mvars line up with fm.
5662 mvars[0]->set_schema_ref(fm[0]->get_schema_ref());
5663 mvars[1]->set_schema_ref(fm[1]->get_schema_ref());
5670 // Associate colrefs in SEs with this schema.
5671 void spx_qpn::bind_to_schema(table_list *Schema){
5672 // Bind the tablevars in the From clause to the Schema
5673 // (it might have changed from analysis time)
5674 int t = Schema->get_table_ref(table_name->get_schema_name() );
5676 table_name->set_schema_ref(t );
5678 // Get the "from" clause
5679 tablevar_list_t fm(table_name);
5681 // Bind all SEs to this schema
5683 for(p=0;p<where.size();++p){
5684 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5687 for(s=0;s<select_list.size();++s){
5688 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5691 // Collect set of tuples referenced in this HFTA
5692 // input, internal, or output.
5696 col_id_set spx_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5697 col_id_set retval, tmp_cset;
5699 for(p=0;p<where.size();++p){
5700 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5703 for(s=0;s<select_list.size();++s){
5704 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5706 col_id_set::iterator cisi;
5708 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5709 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5710 if(fe->get_unpack_fcns().size()>0)
5711 retval.insert((*cisi));
5719 col_id_set filter_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5720 col_id_set retval, tmp_cset;
5722 for(p=0;p<where.size();++p){
5723 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5726 for(s=0;s<select_list.size();++s){
5727 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5729 col_id_set::iterator cisi;
5731 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5732 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5733 if(fe->get_unpack_fcns().size()>0)
5734 retval.insert((*cisi));
5742 col_id_set watch_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5743 col_id_set retval, tmp_cset;
5745 for(p=0;p<where.size();++p){
5746 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5749 for(s=0;s<select_list.size();++s){
5750 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5752 col_id_set::iterator cisi;
5754 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5755 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5756 if(fe->get_unpack_fcns().size()>0)
5757 retval.insert((*cisi));
5768 // Associate colrefs in SEs with this schema.
5769 void join_eq_hash_qpn::bind_to_schema(table_list *Schema){
5770 // Bind the tablevars in the From clause to the Schema
5771 // (it might have changed from analysis time)
5773 for(f=0;f<from.size();++f){
5774 string snm = from[f]->get_schema_name();
5775 int tbl_ref = Schema->get_table_ref(snm);
5777 from[f]->set_schema_ref(tbl_ref);
5780 // Bind all SEs to this schema
5781 tablevar_list_t fm(from);
5784 for(p=0;p<where.size();++p){
5785 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5788 for(s=0;s<select_list.size();++s){
5789 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5792 // Collect set of tuples referenced in this HFTA
5793 // input, internal, or output.
5797 void filter_join_qpn::bind_to_schema(table_list *Schema){
5798 // Bind the tablevars in the From clause to the Schema
5799 // (it might have changed from analysis time)
5801 for(f=0;f<from.size();++f){
5802 string snm = from[f]->get_schema_name();
5803 int tbl_ref = Schema->get_table_ref(snm);
5805 from[f]->set_schema_ref(tbl_ref);
5808 // Bind all SEs to this schema
5809 tablevar_list_t fm(from);
5812 for(p=0;p<where.size();++p){
5813 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5816 for(s=0;s<select_list.size();++s){
5817 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5820 // Collect set of tuples referenced in this HFTA
5821 // input, internal, or output.
5825 void watch_join_qpn::bind_to_schema(table_list *Schema){
5826 // Bind the tablevars in the From clause to the Schema
5827 // (it might have changed from analysis time)
5829 for(f=0;f<from.size();++f){
5830 string snm = from[f]->get_schema_name();
5831 int tbl_ref = Schema->get_table_ref(snm);
5833 from[f]->set_schema_ref(tbl_ref);
5836 // Bind all SEs to this schema
5837 tablevar_list_t fm(from);
5840 for(p=0;p<where.size();++p){
5841 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5844 for(s=0;s<select_list.size();++s){
5845 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5848 // Collect set of tuples referenced in this HFTA
5849 // input, internal, or output.
5857 void sgah_qpn::bind_to_schema(table_list *Schema){
5858 // Bind the tablevars in the From clause to the Schema
5859 // (it might have changed from analysis time)
5862 int t = Schema->get_table_ref(table_name->get_schema_name() );
5864 table_name->set_schema_ref(t );
5866 // Get the "from" clause
5867 tablevar_list_t fm(table_name);
5871 // Bind all SEs to this schema
5873 for(p=0;p<where.size();++p){
5874 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5876 for(p=0;p<having.size();++p){
5877 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5880 for(s=0;s<select_list.size();++s){
5881 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5884 for(g=0;g<gb_tbl.size();++g){
5885 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5888 for(a=0;a<aggr_tbl.size();++a){
5889 if(aggr_tbl.is_builtin(a)){
5890 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5892 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5894 for(o=0;o<opl.size();++o){
5895 bind_to_schema_se(opl[o],&fm,Schema);
5901 col_id_set sgah_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5902 col_id_set retval, tmp_cset;
5904 for(p=0;p<where.size();++p){
5905 gather_pr_col_ids(where[p]->pr, tmp_cset, &gb_tbl);
5908 for(g=0;g<gb_tbl.size();++g){
5909 gather_se_col_ids(gb_tbl.get_def(g), tmp_cset, &gb_tbl);
5912 for(a=0;a<aggr_tbl.size();++a){
5913 if(aggr_tbl.is_builtin(a)){
5914 gather_se_col_ids(aggr_tbl.get_aggr_se(a), tmp_cset, &gb_tbl);
5916 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5918 for(o=0;o<opl.size();++o){
5919 gather_se_col_ids(opl[o], tmp_cset, &gb_tbl);
5924 col_id_set::iterator cisi;
5926 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5927 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5928 if(fe->get_unpack_fcns().size()>0)
5929 retval.insert((*cisi));
5938 void rsgah_qpn::bind_to_schema(table_list *Schema){
5939 // Bind the tablevars in the From clause to the Schema
5940 // (it might have changed from analysis time)
5941 int t = Schema->get_table_ref(table_name->get_schema_name() );
5943 table_name->set_schema_ref(t );
5945 // Get the "from" clause
5946 tablevar_list_t fm(table_name);
5948 // Bind all SEs to this schema
5950 for(p=0;p<where.size();++p){
5951 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5953 for(p=0;p<having.size();++p){
5954 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5956 for(p=0;p<closing_when.size();++p){
5957 bind_to_schema_pr(closing_when[p]->pr, &fm, Schema);
5960 for(s=0;s<select_list.size();++s){
5961 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5964 for(g=0;g<gb_tbl.size();++g){
5965 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5968 for(a=0;a<aggr_tbl.size();++a){
5969 if(aggr_tbl.is_builtin(a)){
5970 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5972 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5974 for(o=0;o<opl.size();++o){
5975 bind_to_schema_se(opl[o],&fm,Schema);
5982 void sgahcwcb_qpn::bind_to_schema(table_list *Schema){
5983 // Bind the tablevars in the From clause to the Schema
5984 // (it might have changed from analysis time)
5985 int t = Schema->get_table_ref(table_name->get_schema_name() );
5987 table_name->set_schema_ref(t );
5989 // Get the "from" clause
5990 tablevar_list_t fm(table_name);
5992 // Bind all SEs to this schema
5994 for(p=0;p<where.size();++p){
5995 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5997 for(p=0;p<having.size();++p){
5998 bind_to_schema_pr(having[p]->pr, &fm, Schema);
6000 for(p=0;p<having.size();++p){
6001 bind_to_schema_pr(cleanby[p]->pr, &fm, Schema);
6003 for(p=0;p<having.size();++p){
6004 bind_to_schema_pr(cleanwhen[p]->pr, &fm, Schema);
6007 for(s=0;s<select_list.size();++s){
6008 bind_to_schema_se(select_list[s]->se, &fm, Schema);
6011 for(g=0;g<gb_tbl.size();++g){
6012 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
6015 for(a=0;a<aggr_tbl.size();++a){
6016 if(aggr_tbl.is_builtin(a)){
6017 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
6019 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
6021 for(o=0;o<opl.size();++o){
6022 bind_to_schema_se(opl[o],&fm,Schema);
6033 ///////////////////////////////////////////////////////////////
6034 ///////////////////////////////////////////////////////////////
6035 /// Functions for code generation.
6038 //-----------------------------------------------------------------
6041 cplx_lit_table *watch_tbl_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6042 return(new cplx_lit_table());
6045 cplx_lit_table *mrg_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6046 return(new cplx_lit_table());
6049 cplx_lit_table *spx_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6051 cplx_lit_table *complex_literals = new cplx_lit_table();
6053 for(i=0;i<select_list.size();i++){
6054 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6056 for(i=0;i<where.size();++i){
6057 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6060 return(complex_literals);
6063 cplx_lit_table *sgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6065 cplx_lit_table *complex_literals = new cplx_lit_table();
6067 for(i=0;i<aggr_tbl.size();++i){
6068 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6069 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6071 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6072 for(j=0;j<opl.size();++j)
6073 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6077 for(i=0;i<select_list.size();i++){
6078 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6080 for(i=0;i<gb_tbl.size();i++){
6081 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6083 for(i=0;i<where.size();++i){
6084 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6086 for(i=0;i<having.size();++i){
6087 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6090 return(complex_literals);
6094 cplx_lit_table *rsgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6096 cplx_lit_table *complex_literals = new cplx_lit_table();
6098 for(i=0;i<aggr_tbl.size();++i){
6099 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6100 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6102 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6103 for(j=0;j<opl.size();++j)
6104 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6108 for(i=0;i<select_list.size();i++){
6109 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6111 for(i=0;i<gb_tbl.size();i++){
6112 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6114 for(i=0;i<where.size();++i){
6115 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6117 for(i=0;i<having.size();++i){
6118 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6120 for(i=0;i<closing_when.size();++i){
6121 find_complex_literal_pr(closing_when[i]->pr,Ext_fcns, complex_literals);
6124 return(complex_literals);
6128 cplx_lit_table *sgahcwcb_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6130 cplx_lit_table *complex_literals = new cplx_lit_table();
6132 for(i=0;i<aggr_tbl.size();++i){
6133 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6134 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6136 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6137 for(j=0;j<opl.size();++j)
6138 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6142 for(i=0;i<select_list.size();i++){
6143 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6145 for(i=0;i<gb_tbl.size();i++){
6146 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6148 for(i=0;i<where.size();++i){
6149 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6151 for(i=0;i<having.size();++i){
6152 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6154 for(i=0;i<cleanwhen.size();++i){
6155 find_complex_literal_pr(cleanwhen[i]->pr,Ext_fcns, complex_literals);
6157 for(i=0;i<cleanby.size();++i){
6158 find_complex_literal_pr(cleanby[i]->pr,Ext_fcns, complex_literals);
6161 return(complex_literals);
6164 cplx_lit_table *join_eq_hash_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6166 cplx_lit_table *complex_literals = new cplx_lit_table();
6168 for(i=0;i<select_list.size();i++){
6169 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6171 for(i=0;i<where.size();++i){
6172 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6175 return(complex_literals);
6178 cplx_lit_table *filter_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6180 cplx_lit_table *complex_literals = new cplx_lit_table();
6182 for(i=0;i<select_list.size();i++){
6183 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6185 for(i=0;i<where.size();++i){
6186 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6189 return(complex_literals);
6192 cplx_lit_table *watch_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6194 cplx_lit_table *complex_literals = new cplx_lit_table();
6196 for(i=0;i<select_list.size();i++){
6197 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6199 for(i=0;i<where.size();++i){
6200 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6203 return(complex_literals);
6210 //-----------------------------------------------------------------
6211 // get_handle_param_tbl
6213 vector<handle_param_tbl_entry *> watch_tbl_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6214 vector<handle_param_tbl_entry *> retval;
6218 vector<handle_param_tbl_entry *> mrg_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6219 vector<handle_param_tbl_entry *> retval;
6224 vector<handle_param_tbl_entry *> spx_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6226 vector<handle_param_tbl_entry *> retval;
6228 for(i=0;i<select_list.size();i++){
6229 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6231 for(i=0;i<where.size();++i){
6232 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6239 vector<handle_param_tbl_entry *> sgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6241 vector<handle_param_tbl_entry *> retval;
6244 for(i=0;i<aggr_tbl.size();++i){
6245 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6246 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6248 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6249 for(j=0;j<opl.size();++j)
6250 find_param_handles_se(opl[j], Ext_fcns, retval);
6253 for(i=0;i<select_list.size();i++){
6254 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6256 for(i=0;i<gb_tbl.size();i++){
6257 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6259 for(i=0;i<where.size();++i){
6260 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6262 for(i=0;i<having.size();++i){
6263 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6270 vector<handle_param_tbl_entry *> rsgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6272 vector<handle_param_tbl_entry *> retval;
6275 for(i=0;i<aggr_tbl.size();++i){
6276 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6277 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6279 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6280 for(j=0;j<opl.size();++j)
6281 find_param_handles_se(opl[j], Ext_fcns, retval);
6284 for(i=0;i<select_list.size();i++){
6285 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6287 for(i=0;i<gb_tbl.size();i++){
6288 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6290 for(i=0;i<where.size();++i){
6291 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6293 for(i=0;i<having.size();++i){
6294 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6296 for(i=0;i<closing_when.size();++i){
6297 find_param_handles_pr(closing_when[i]->pr,Ext_fcns, retval);
6304 vector<handle_param_tbl_entry *> sgahcwcb_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6306 vector<handle_param_tbl_entry *> retval;
6309 for(i=0;i<aggr_tbl.size();++i){
6310 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6311 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6313 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6314 for(j=0;j<opl.size();++j)
6315 find_param_handles_se(opl[j], Ext_fcns, retval);
6318 for(i=0;i<select_list.size();i++){
6319 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6321 for(i=0;i<gb_tbl.size();i++){
6322 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6324 for(i=0;i<where.size();++i){
6325 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6327 for(i=0;i<having.size();++i){
6328 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6330 for(i=0;i<cleanwhen.size();++i){
6331 find_param_handles_pr(cleanwhen[i]->pr,Ext_fcns, retval);
6333 for(i=0;i<cleanby.size();++i){
6334 find_param_handles_pr(cleanby[i]->pr,Ext_fcns, retval);
6340 vector<handle_param_tbl_entry *> join_eq_hash_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6342 vector<handle_param_tbl_entry *> retval;
6344 for(i=0;i<select_list.size();i++){
6345 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6347 for(i=0;i<where.size();++i){
6348 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6355 vector<handle_param_tbl_entry *> filter_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6357 vector<handle_param_tbl_entry *> retval;
6359 for(i=0;i<select_list.size();i++){
6360 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6362 for(i=0;i<where.size();++i){
6363 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6369 vector<handle_param_tbl_entry *> watch_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6371 vector<handle_param_tbl_entry *> retval;
6373 for(i=0;i<select_list.size();i++){
6374 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6376 for(i=0;i<where.size();++i){
6377 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6385 ///////////////////////////////////////////////////////////////
6386 ///////////////////////////////////////////////////////////////
6387 /// Functions for operator output rates estimations
6390 //-----------------------------------------------------------------
6391 // get_rate_estimate
6393 double spx_qpn::get_rate_estimate() {
6395 // dummy method for now
6396 return SPX_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6399 double sgah_qpn::get_rate_estimate() {
6401 // dummy method for now
6402 return SGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6405 double rsgah_qpn::get_rate_estimate() {
6407 // dummy method for now
6408 return RSGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6411 double sgahcwcb_qpn::get_rate_estimate() {
6413 // dummy method for now
6414 return SGAHCWCB_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6417 double watch_tbl_qpn::get_rate_estimate() {
6419 // dummy method for now
6420 return DEFAULT_INTERFACE_RATE;
6423 double mrg_qpn::get_rate_estimate() {
6425 // dummy method for now
6426 return MRG_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6429 double join_eq_hash_qpn::get_rate_estimate() {
6431 // dummy method for now
6432 return JOIN_EQ_HASH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6436 //////////////////////////////////////////////////////////////////////////////
6437 //////////////////////////////////////////////////////////////////////////////
6438 ///// Generate functors
6443 //-------------------------------------------------------------------------
6444 // Code generation utilities.
6445 //-------------------------------------------------------------------------
6447 // Globals referenced by generate utilities
6449 static gb_table *segen_gb_tbl; // Table of all group-by attributes.
6453 // Generate code that makes reference
6454 // to the tuple, and not to any aggregates.
6455 // NEW : it might reference a stateful function.
6456 static string generate_se_code(scalarexp_t *se,table_list *schema){
6458 data_type *ldt, *rdt;
6460 vector<scalarexp_t *> operands;
6463 switch(se->get_operator_type()){
6465 if(se->is_handle_ref()){
6466 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6470 if(se->get_literal()->is_cpx_lit()){
6471 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6475 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6477 if(se->is_handle_ref()){
6478 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6482 ret.append("param_");
6483 ret.append(se->get_param_name());
6486 ldt = se->get_left_se()->get_data_type();
6487 if(ldt->complex_operator(se->get_op()) ){
6488 ret.append( ldt->get_complex_operator(se->get_op()) );
6490 ret.append(generate_se_code(se->get_left_se(),schema));
6494 ret.append(se->get_op());
6495 ret.append(generate_se_code(se->get_left_se(),schema));
6500 ldt = se->get_left_se()->get_data_type();
6501 rdt = se->get_right_se()->get_data_type();
6503 if(ldt->complex_operator(rdt, se->get_op()) ){
6504 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6506 ret.append(generate_se_code(se->get_left_se(),schema));
6508 ret.append(generate_se_code(se->get_right_se(),schema));
6512 ret.append(generate_se_code(se->get_left_se(),schema));
6513 ret.append(se->get_op());
6514 ret.append(generate_se_code(se->get_right_se(),schema));
6519 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6520 // so return the defining code.
6521 int gref = se->get_gb_ref();
6522 scalarexp_t *gdef_se = segen_gb_tbl->get_def(gref);
6523 ret = generate_se_code(gdef_se, schema );
6526 sprintf(tmpstr,"unpack_var_%s_%d",
6527 se->get_colref()->get_field().c_str(), se->get_colref()->get_tablevar_ref() );
6532 if(se->is_partial()){
6533 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6536 ret += se->op + "(";
6537 operands = se->get_operands();
6538 bool first_elem = true;
6539 if(se->get_storage_state() != ""){
6540 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6543 for(o=0;o<operands.size();o++){
6544 if(first_elem) first_elem=false; else ret += ", ";
6545 if(operands[o]->get_data_type()->is_buffer_type() &&
6546 (! (operands[o]->is_handle_ref()) ) )
6548 ret += generate_se_code(operands[o], schema);
6554 fprintf(stderr,"INTERNAL ERROR in generate_se_code (hfta), line %d, character %d: unknown operator type %d\n",
6555 se->get_lineno(), se->get_charno(),se->get_operator_type());
6556 return("ERROR in generate_se_code");
6560 // generate code that refers only to aggregate data and constants.
6561 // NEW : modified to handle superaggregates and stateful fcn refs.
6562 // Assume that the state is in *stval
6563 static string generate_se_code_fm_aggr(scalarexp_t *se, string gbvar, string aggvar, table_list *schema){
6566 data_type *ldt, *rdt;
6568 vector<scalarexp_t *> operands;
6571 switch(se->get_operator_type()){
6573 if(se->is_handle_ref()){
6574 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6578 if(se->get_literal()->is_cpx_lit()){
6579 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6583 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6585 if(se->is_handle_ref()){
6586 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6590 ret.append("param_");
6591 ret.append(se->get_param_name());
6594 ldt = se->get_left_se()->get_data_type();
6595 if(ldt->complex_operator(se->get_op()) ){
6596 ret.append( ldt->get_complex_operator(se->get_op()) );
6598 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6602 ret.append(se->get_op());
6603 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6608 ldt = se->get_left_se()->get_data_type();
6609 rdt = se->get_right_se()->get_data_type();
6611 if(ldt->complex_operator(rdt, se->get_op()) ){
6612 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6614 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6616 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6620 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6621 ret.append(se->get_op());
6622 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6627 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6628 // so return the defining code.
6629 sprintf(tmpstr,"%s%d",gbvar.c_str(),se->get_gb_ref());
6633 fprintf(stderr,"ERROR reference to non-GB column ref not permitted here,"
6634 "error in query_plan.cc:generate_se_code_fm_aggr, line %d, character %d.\n",
6635 se->get_lineno(), se->get_charno());
6641 if(se->is_superaggr()){
6642 sprintf(tmpstr,"stval->aggr_var%d",se->get_aggr_ref());
6644 sprintf(tmpstr,"%saggr_var%d",aggvar.c_str(),se->get_aggr_ref());
6650 if(se->get_aggr_ref() >= 0){
6651 sprintf(tmpstr,"udaf_ret_%d",se->get_aggr_ref());
6656 if(se->is_partial()){
6657 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6660 ret += se->op + "(";
6661 bool first_elem = true;
6662 if(se->get_storage_state() != ""){
6663 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6666 operands = se->get_operands();
6667 for(o=0;o<operands.size();o++){
6668 if(first_elem) first_elem=false; else ret += ", ";
6669 if(operands[o]->get_data_type()->is_buffer_type() &&
6670 (! (operands[o]->is_handle_ref()) ) )
6672 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6678 fprintf(stderr,"INTERNAL ERROR in query_plan.cc::generate_se_code_fm_aggr, line %d, character %d: unknown operator type %d\n",
6679 se->get_lineno(), se->get_charno(),se->get_operator_type());
6680 return("ERROR in generate_se_code_fm_aggr");
6686 static string unpack_partial_fcn_fm_aggr(scalarexp_t *se, int pfn_id, string gbvar, string aggvar, table_list *schema){
6689 vector<scalarexp_t *> operands;
6692 if(se->get_operator_type() != SE_FUNC){
6693 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn_fm_aggr. line %d, character %d\n",
6694 se->get_lineno(), se->get_charno());
6695 return("ERROR in unpack_partial_fcn_fm_aggr");
6698 ret = "\tretval = " + se->get_op() + "( ",
6699 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6702 if(se->get_storage_state() != ""){
6703 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6706 operands = se->get_operands();
6707 for(o=0;o<operands.size();o++){
6709 if(operands[o]->get_data_type()->is_buffer_type() &&
6710 (! (operands[o]->is_handle_ref()) ) )
6712 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6720 static string unpack_partial_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6723 vector<scalarexp_t *> operands;
6725 if(se->get_operator_type() != SE_FUNC){
6726 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn. line %d, character %d\n",
6727 se->get_lineno(), se->get_charno());
6728 return("ERROR in unpack_partial_fcn");
6731 ret = "\tretval = " + se->get_op() + "( ",
6732 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6735 if(se->get_storage_state() != ""){
6736 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6739 operands = se->get_operands();
6740 for(o=0;o<operands.size();o++){
6742 if(operands[o]->get_data_type()->is_buffer_type() &&
6743 (! (operands[o]->is_handle_ref()) ) )
6745 ret += generate_se_code(operands[o], schema);
6752 static string generate_cached_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6755 vector<scalarexp_t *> operands;
6757 if(se->get_operator_type() != SE_FUNC){
6758 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to generate_cached_fcn. line %d, character %d\n",
6759 se->get_lineno(), se->get_charno());
6760 return("ERROR in generate_cached_fcn");
6763 ret = se->get_op()+"(";
6765 if(se->get_storage_state() != ""){
6766 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd,";
6769 operands = se->get_operands();
6770 for(o=0;o<operands.size();o++){
6772 if(operands[o]->get_data_type()->is_buffer_type() &&
6773 (! (operands[o]->is_handle_ref()) ) )
6775 ret += generate_se_code(operands[o], schema);
6786 static string generate_C_comparison_op(string op){
6787 if(op == "=") return("==");
6788 if(op == "<>") return("!=");
6792 static string generate_C_boolean_op(string op){
6793 if( (op == "AND") || (op == "And") || (op == "and") ){
6796 if( (op == "OR") || (op == "Or") || (op == "or") ){
6799 if( (op == "NOT") || (op == "Not") || (op == "not") ){
6803 return("ERROR UNKNOWN BOOLEAN OPERATOR");
6807 static string generate_predicate_code(predicate_t *pr,table_list *schema){
6809 vector<literal_t *> litv;
6811 data_type *ldt, *rdt;
6812 vector<scalarexp_t *> op_list;
6815 switch(pr->get_operator_type()){
6817 ldt = pr->get_left_se()->get_data_type();
6820 litv = pr->get_lit_vec();
6821 for(i=0;i<litv.size();i++){
6822 if(i>0) ret.append(" || ");
6825 if(ldt->complex_comparison(ldt) ){
6826 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6828 if(ldt->is_buffer_type() )
6830 ret.append(generate_se_code(pr->get_left_se(), schema));
6832 if(ldt->is_buffer_type() )
6834 if(litv[i]->is_cpx_lit()){
6835 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6838 ret.append(litv[i]->to_C_code(""));
6840 ret.append(") == 0");
6842 ret.append(generate_se_code(pr->get_left_se(), schema));
6844 ret.append(litv[i]->to_hfta_C_code(""));
6853 ldt = pr->get_left_se()->get_data_type();
6854 rdt = pr->get_right_se()->get_data_type();
6857 if(ldt->complex_comparison(rdt) ){
6858 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6860 if(ldt->is_buffer_type() )
6862 ret.append(generate_se_code(pr->get_left_se(),schema) );
6864 if(rdt->is_buffer_type() )
6866 ret.append(generate_se_code(pr->get_right_se(),schema) );
6868 ret.append( generate_C_comparison_op(pr->get_op()));
6871 ret.append(generate_se_code(pr->get_left_se(),schema) );
6872 ret.append( generate_C_comparison_op(pr->get_op()));
6873 ret.append(generate_se_code(pr->get_right_se(),schema) );
6879 ret.append( generate_C_boolean_op(pr->get_op()) );
6880 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6883 case PRED_BINARY_OP:
6885 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6886 ret.append( generate_C_boolean_op(pr->get_op()) );
6887 ret.append(generate_predicate_code(pr->get_right_pr(),schema) );
6891 ret += pr->get_op() + "( ";
6892 op_list = pr->get_op_list();
6893 for(o=0;o<op_list.size();++o){
6894 if(o>0) ret += ", ";
6895 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6897 ret += generate_se_code(op_list[o], schema);
6902 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6903 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6904 return("ERROR in generate_predicate_code");
6908 static string generate_predicate_code_fm_aggr(predicate_t *pr, string gbvar, string aggvar,table_list *schema){
6910 vector<literal_t *> litv;
6912 data_type *ldt, *rdt;
6913 vector<scalarexp_t *> op_list;
6916 switch(pr->get_operator_type()){
6918 ldt = pr->get_left_se()->get_data_type();
6921 litv = pr->get_lit_vec();
6922 for(i=0;i<litv.size();i++){
6923 if(i>0) ret.append(" || ");
6926 if(ldt->complex_comparison(ldt) ){
6927 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6929 if(ldt->is_buffer_type() )
6931 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6933 if(ldt->is_buffer_type() )
6935 if(litv[i]->is_cpx_lit()){
6936 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6939 ret.append(litv[i]->to_C_code(""));
6941 ret.append(") == 0");
6943 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6945 ret.append(litv[i]->to_hfta_C_code(""));
6954 ldt = pr->get_left_se()->get_data_type();
6955 rdt = pr->get_right_se()->get_data_type();
6958 if(ldt->complex_comparison(rdt) ){
6959 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6961 if(ldt->is_buffer_type() )
6963 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6965 if(rdt->is_buffer_type() )
6967 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6969 ret.append( generate_C_comparison_op(pr->get_op()));
6972 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6973 ret.append( generate_C_comparison_op(pr->get_op()));
6974 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6980 ret.append( generate_C_boolean_op(pr->get_op()) );
6981 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6984 case PRED_BINARY_OP:
6986 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6987 ret.append( generate_C_boolean_op(pr->get_op()) );
6988 ret.append(generate_predicate_code_fm_aggr(pr->get_right_pr(), gbvar, aggvar,schema) );
6992 ret += pr->get_op() + "( ";
6993 op_list = pr->get_op_list();
6994 for(o=0;o<op_list.size();++o){
6995 if(o>0) ret += ", ";
6996 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6998 ret += generate_se_code_fm_aggr(op_list[o], gbvar, aggvar, schema);
7003 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
7004 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
7005 return("ERROR in generate_predicate_code");
7013 static string generate_equality_test(string &lhs_op, string &rhs_op, data_type *dt){
7016 if(dt->complex_comparison(dt) ){
7017 ret.append(dt->get_hfta_comparison_fcn(dt));
7019 if(dt->is_buffer_type() )
7023 if(dt->is_buffer_type() )
7025 ret.append(rhs_op );
7026 ret.append(") == 0");
7028 ret.append(lhs_op );
7030 ret.append(rhs_op );
7036 static string generate_lt_test(string &lhs_op, string &rhs_op, data_type *dt){
7039 if(dt->complex_comparison(dt) ){
7040 ret.append(dt->get_hfta_comparison_fcn(dt));
7042 if(dt->is_buffer_type() )
7046 if(dt->is_buffer_type() )
7048 ret.append(rhs_op );
7049 ret.append(") == 1");
7051 ret.append(lhs_op );
7053 ret.append(rhs_op );
7059 static string generate_comparison(string &lhs_op, string &rhs_op, data_type *dt){
7062 if(dt->complex_comparison(dt) ){
7063 ret.append(dt->get_hfta_comparison_fcn(dt));
7065 if(dt->is_buffer_type() )
7069 if(dt->is_buffer_type() )
7071 ret.append(rhs_op );
7072 ret.append(") == 0");
7074 ret.append(lhs_op );
7076 ret.append(rhs_op );
7083 // Here I assume that only MIN and MAX aggregates can be computed
7084 // over BUFFER data types.
7086 static string generate_aggr_update(string var, aggregate_table *atbl,int aidx, table_list *schema){
7087 string retval = "\t\t";
7088 string op = atbl->get_op(aidx);
7091 if(! atbl->is_builtin(aidx)) {
7093 retval += op+"_HFTA_AGGR_UPDATE_(";
7094 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7095 retval+="("+var+")";
7096 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7097 for(o=0;o<opl.size();++o){{
7099 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7101 retval += generate_se_code(opl[o], schema);
7110 // builtin processing
7111 data_type *dt = atbl->get_data_type(aidx);
7115 retval.append("++;\n");
7120 retval.append(" += ");
7121 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7122 retval.append(";\n");
7126 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7127 retval += dt->make_host_cvar(tmpstr);
7129 retval += generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7130 if(dt->complex_comparison(dt)){
7131 if(dt->is_buffer_type())
7132 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7134 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7136 sprintf(tmpstr,"\t\tif(aggr_tmp_%d < %s)\n",aidx,var.c_str());
7138 retval.append(tmpstr);
7139 if(dt->is_buffer_type()){
7140 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7142 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7144 retval.append(tmpstr);
7149 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7150 retval+=dt->make_host_cvar(tmpstr);
7152 retval+=generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7153 if(dt->complex_comparison(dt)){
7154 if(dt->is_buffer_type())
7155 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7157 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7159 sprintf(tmpstr,"\t\tif(aggr_tmp_%d > %s)\n",aidx,var.c_str());
7161 retval.append(tmpstr);
7162 if(dt->is_buffer_type()){
7163 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7165 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7167 retval.append(tmpstr);
7172 if(op == "AND_AGGR"){
7174 retval.append(" &= ");
7175 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7176 retval.append(";\n");
7179 if(op == "OR_AGGR"){
7181 retval.append(" |= ");
7182 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7183 retval.append(";\n");
7186 if(op == "XOR_AGGR"){
7188 retval.append(" ^= ");
7189 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7190 retval.append(";\n");
7194 retval += var+"_sum += "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7195 retval += "\t\t"+var+"_cnt += 1;\n";
7196 retval += "\t\t"+var+" = "+var+"_sum / "+var+"_cnt;\n";
7200 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_update.\n",op.c_str());
7209 static string generate_superaggr_minus(string var, string supervar, aggregate_table *atbl,int aidx, table_list *schema){
7210 string retval = "\t\t";
7211 string op = atbl->get_op(aidx);
7214 if(! atbl->is_builtin(aidx)) {
7216 retval += op+"_HFTA_AGGR_MINUS_(";
7217 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7218 retval+="("+supervar+"),";
7219 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7220 retval+="("+var+");\n";
7226 if(op == "COUNT" || op == "SUM"){
7227 retval += supervar + "-=" +var + ";\n";
7231 if(op == "XOR_AGGR"){
7232 retval += supervar + "^=" +var + ";\n";
7236 if(op=="MIN" || op == "MAX")
7239 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_superaggr_minus.\n",op.c_str());
7248 static string generate_aggr_init(string var, aggregate_table *atbl,int aidx, table_list *schema){
7250 string op = atbl->get_op(aidx);
7253 if(! atbl->is_builtin(aidx)){
7255 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_INIT_(";
7256 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7257 retval+="("+var+"));\n";
7259 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_UPDATE_(";
7260 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7261 retval+="("+var+")";
7262 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7264 for(o=0;o<opl.size();++o){
7266 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7268 retval += generate_se_code(opl[o],schema);
7274 // builtin aggregate processing
7275 data_type *dt = atbl->get_data_type(aidx);
7279 retval.append(" = 1;\n");
7283 if(op == "SUM" || op == "MIN" || op == "MAX" || op == "AND_AGGR" ||
7284 op=="AVG" || op == "OR_AGGR" || op == "XOR_AGGR"){
7285 if(dt->is_buffer_type()){
7286 sprintf(tmpstr,"\t\taggr_tmp_%d = %s;\n",aidx,generate_se_code(atbl->get_aggr_se(aidx), schema ).c_str() );
7287 retval.append(tmpstr);
7288 sprintf(tmpstr,"\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_assign_copy().c_str(),var.c_str(),aidx);
7289 retval.append(tmpstr);
7292 retval += var+"_sum = "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7293 retval += "\t"+var+"_cnt = 1;\n";
7294 retval += "\t"+var+" = "+var+"_sum;\n";
7298 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema));
7299 retval.append(";\n");
7305 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_init.\n",op.c_str());
7313 static string generate_aggr_reinitialize(string var, aggregate_table *atbl,int aidx, table_list *schema){
7315 string op = atbl->get_op(aidx);
7318 if(! atbl->is_builtin(aidx)){
7320 retval += "\t"+atbl->get_op(aidx);
7321 if(atbl->is_running_aggr(aidx)){
7322 retval += "_HFTA_AGGR_REINIT_(";
7324 retval += "_HFTA_AGGR_INIT_(";
7326 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7327 retval+="("+var+"));\n";
7331 // builtin aggregate processing
7332 data_type *dt = atbl->get_data_type(aidx);
7336 retval.append(" = 0;\n");
7340 if(op == "SUM" || op == "AND_AGGR" ||
7341 op == "OR_AGGR" || op == "XOR_AGGR"){
7342 if(dt->is_buffer_type()){
7343 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7347 literal_t l(dt->type_indicator());
7348 retval.append(l.to_string());
7349 retval.append(";\n");
7355 if(dt->is_buffer_type()){
7356 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7360 retval.append(dt->get_max_literal());
7361 retval.append(";\n");
7367 if(dt->is_buffer_type()){
7368 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7372 retval.append(dt->get_min_literal());
7373 retval.append(";\n");
7378 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_aggr_reinitialize.\n",op.c_str());
7385 // Generate parameter holding vars from a param table.
7386 static string generate_param_vars(param_table *param_tbl){
7389 vector<string> param_vec = param_tbl->get_param_names();
7390 for(p=0;p<param_vec.size();p++){
7391 data_type *dt = param_tbl->get_data_type(param_vec[p]);
7392 sprintf(tmpstr,"param_%s;\n", param_vec[p].c_str());
7393 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7394 if(param_tbl->handle_access(param_vec[p])){
7395 ret += "\tstruct search_handle *param_handle_"+param_vec[p]+";\n";
7401 // Parameter manipulation routines
7402 static string generate_load_param_block(string functor_name,
7403 param_table *param_tbl,
7404 vector<handle_param_tbl_entry *> param_handle_table
7407 vector<string> param_names = param_tbl->get_param_names();
7409 string ret = "int load_params_"+functor_name+"(gs_int32_t sz, void *value){\n";
7410 ret.append("\tint pos=0;\n");
7411 ret.append("\tint data_pos;\n");
7413 for(p=0;p<param_names.size();p++){
7414 data_type *dt = param_tbl->get_data_type(param_names[p]);
7415 if(dt->is_buffer_type()){
7416 sprintf(tmpstr,"tmp_var_%s;\n", param_names[p].c_str());
7417 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7422 // Verify that the block is of minimum size
7423 if(param_names.size() > 0){
7424 ret += "//\tVerify that the value block is large enough */\n";
7425 ret.append("\n\tdata_pos = ");
7426 for(p=0;p<param_names.size();p++){
7427 if(p>0) ret.append(" + ");
7428 data_type *dt = param_tbl->get_data_type(param_names[p]);
7429 ret.append("sizeof( ");
7430 ret.append( dt->get_host_cvar_type() );
7434 ret.append("\tif(data_pos > sz) return 1;\n\n");
7437 ///////////////////////
7438 /// Verify that all strings can be unpacked.
7440 ret += "//\tVerify that the strings can be unpacked */\n";
7441 for(p=0;p<param_names.size();p++){
7442 data_type *dt = param_tbl->get_data_type(param_names[p]);
7443 if(dt->is_buffer_type()){
7444 sprintf(tmpstr,"\ttmp_var_%s = *( (%s *)((gs_sp_t )value+pos) );\n",param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7446 switch( dt->get_type() ){
7448 // ret += "\ttmp_var_"+param_names[p]+".offset = ntohl( tmp_var_"+param_names[p]+".offset );\n"; // ntoh conversion
7449 // ret += "\ttmp_var_"+param_names[p]+".length = ntohl( tmp_var_"+param_names[p]+".length );\n"; // ntoh conversion
7450 sprintf(tmpstr,"\tif( (int)(tmp_var_%s.offset) + tmp_var_%s.length > sz) return 1;\n",param_names[p].c_str(), param_names[p].c_str() );
7452 sprintf(tmpstr,"\ttmp_var_%s.offset = (gs_p_t)( (gs_sp_t )value + (gs_p_t)(tmp_var_%s.offset) );\n",param_names[p].c_str(), param_names[p].c_str() );
7456 fprintf(stderr,"ERROR: parameter %s is of type %s, a buffered type, but I don't know how to unpack it as a parameter.\n",param_names[p].c_str(), dt->to_string().c_str() );
7461 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7465 /////////////////////////
7467 ret += "/*\tThe block is OK, do the unpacking. */\n";
7468 ret += "\tpos = 0;\n";
7470 for(p=0;p<param_names.size();p++){
7471 data_type *dt = param_tbl->get_data_type(param_names[p]);
7472 if(dt->is_buffer_type()){
7473 sprintf(tmpstr,"\t%s(¶m_%s, &tmp_var_%s);\n", dt->get_hfta_buffer_assign_copy().c_str(),param_names[p].c_str(),param_names[p].c_str() );
7476 // if(dt->needs_hn_translation()){
7477 // sprintf(tmpstr,"\tparam_%s = %s( *( (%s *)( (gs_sp_t )value+pos) ) );\n",
7478 // param_names[p].c_str(), dt->ntoh_translation().c_str(), dt->get_host_cvar_type().c_str() );
7480 sprintf(tmpstr,"\tparam_%s = *( (%s *)( (gs_sp_t )value+pos) );\n",
7481 param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7485 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7488 // TODO: I think this method of handle registration is obsolete
7489 // and should be deleted.
7490 // some examination reveals that handle_access is always false.
7491 for(p=0;p<param_names.size();p++){
7492 if(param_tbl->handle_access(param_names[p]) ){
7493 data_type *pdt = param_tbl->get_data_type(param_names[p]);
7495 ret += "\tt->param_handle_"+param_names[p]+" = " +
7496 pdt->handle_registration_name() +
7497 "((struct FTA *)t, &(t->param_"+param_names[p]+"));\n";
7500 // Register the pass-by-handle parameters
7502 ret += "/* register the pass-by-handle parameters */\n";
7505 for(ph=0;ph<param_handle_table.size();++ph){
7506 data_type pdt(param_handle_table[ph]->type_name);
7507 switch(param_handle_table[ph]->val_type){
7513 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7515 if(pdt.is_buffer_type()) ret += "&(";
7516 ret += "param_"+param_handle_table[ph]->param_name;
7517 if(pdt.is_buffer_type()) ret += ")";
7521 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7527 ret += "\treturn(0);\n";
7528 ret.append("}\n\n");
7534 static string generate_delete_param_block(string functor_name,
7535 param_table *param_tbl,
7536 vector<handle_param_tbl_entry *> param_handle_table
7540 vector<string> param_names = param_tbl->get_param_names();
7542 string ret = "void destroy_params_"+functor_name+"(){\n";
7544 for(p=0;p<param_names.size();p++){
7545 data_type *dt = param_tbl->get_data_type(param_names[p]);
7546 if(dt->is_buffer_type()){
7547 sprintf(tmpstr,"\t\t%s(¶m_%s);\n",dt->get_hfta_buffer_destroy().c_str(),param_names[p].c_str());
7550 if(param_tbl->handle_access(param_names[p]) ){
7551 ret += "\t\t" + dt->get_handle_destructor() +
7552 "(t->param_handle_" + param_names[p] + ");\n";
7556 ret += "//\t\tDeregister handles.\n";
7558 for(ph=0;ph<param_handle_table.size();++ph){
7559 if(param_handle_table[ph]->val_type == param_e){
7560 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7561 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7570 // ---------------------------------------------------------------------
7571 // functions for creating functor variables.
7573 static string generate_access_vars(col_id_set &cid_set, table_list *schema){
7575 col_id_set::iterator csi;
7577 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7578 int schref = (*csi).schema_ref;
7579 int tblref = (*csi).tblvar_ref;
7580 string field = (*csi).field;
7581 data_type dt(schema->get_type_name(schref,field));
7582 sprintf(tmpstr,"unpack_var_%s_%d", field.c_str(), tblref);
7583 ret+="\t"+dt.make_host_cvar(tmpstr)+";\n";
7584 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", field.c_str(), tblref);
7590 static string generate_partial_fcn_vars(vector<scalarexp_t *> &partial_fcns,
7591 vector<int> &ref_cnt, vector<bool> &is_partial, bool gen_fcn_cache){
7596 for(p=0;p<partial_fcns.size();++p){
7597 if(!gen_fcn_cache || is_partial[p] || ref_cnt[p]>1){
7598 sprintf(tmpstr,"partial_fcn_result_%d", p);
7599 ret+="\t"+partial_fcns[p]->get_data_type()->make_host_cvar(tmpstr)+";\n";
7600 if(gen_fcn_cache && ref_cnt[p]>1){
7601 ret+="\tint fcn_ref_cnt_"+int_to_string(p)+";\n";
7609 static string generate_complex_lit_vars(cplx_lit_table *complex_literals){
7612 for(cl=0;cl<complex_literals->size();cl++){
7613 literal_t *l = complex_literals->get_literal(cl);
7614 data_type *dtl = new data_type( l->get_type() );
7615 sprintf(tmpstr,"complex_literal_%d",cl);
7616 ret += "\t"+dtl->make_host_cvar(tmpstr)+";\n";
7617 if(complex_literals->is_handle_ref(cl)){
7618 sprintf(tmpstr,"\tstruct search_handle *lit_handle_%d;\n",cl);
7626 static string generate_pass_by_handle_vars(
7627 vector<handle_param_tbl_entry *> ¶m_handle_table){
7631 for(p=0;p<param_handle_table.size();++p){
7632 sprintf(tmpstr,"\tgs_param_handle_t handle_param_%d;\n",p);
7640 // ------------------------------------------------------------
7641 // functions for generating initialization code.
7643 static string gen_access_var_init(col_id_set &cid_set){
7645 col_id_set::iterator csi;
7647 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7648 int tblref = (*csi).tblvar_ref;
7649 string field = (*csi).field;
7650 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle%d, \"%s\");\n", field.c_str(),tblref,tblref,field.c_str());
7657 static string gen_complex_lit_init(cplx_lit_table *complex_literals){
7661 for(cl=0;cl<complex_literals->size();cl++){
7662 literal_t *l = complex_literals->get_literal(cl);
7663 // sprintf(tmpstr,"\tcomplex_literal_%d = ",cl);
7664 // ret += tmpstr + l->to_hfta_C_code() + ";\n";
7665 sprintf(tmpstr,"&(complex_literal_%d)",cl);
7666 ret += "\t" + l->to_hfta_C_code(tmpstr) + ";\n";
7667 // I think that the code below is obsolete
7668 // TODO: it is obsolete. add_cpx_lit is always
7669 // called with the handle indicator being false.
7670 // This entire structure should be cleansed.
7671 if(complex_literals->is_handle_ref(cl)){
7672 data_type *dt = new data_type( l->get_type() );
7673 sprintf(tmpstr,"\tlit_handle_%d = %s(&(f->complex_literal_%d));\n",
7674 cl, dt->hfta_handle_registration_name().c_str(), cl);
7683 static string gen_partial_fcn_init(vector<scalarexp_t *> &partial_fcns){
7687 for(p=0;p<partial_fcns.size();++p){
7688 data_type *pdt =partial_fcns[p]->get_data_type();
7689 literal_t empty_lit(pdt->type_indicator());
7690 if(pdt->is_buffer_type()){
7691 // sprintf(tmpstr,"\tpartial_fcn_result_%d = %s;\n",
7692 // p, empty_lit.to_hfta_C_code().c_str());
7693 sprintf(tmpstr,"&(partial_fcn_result_%d)",p);
7694 ret += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
7700 static string gen_pass_by_handle_init(
7701 vector<handle_param_tbl_entry *> ¶m_handle_table){
7705 for(ph=0;ph<param_handle_table.size();++ph){
7706 data_type pdt(param_handle_table[ph]->type_name);
7707 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7708 switch(param_handle_table[ph]->val_type){
7711 if(pdt.is_buffer_type()) ret += "&(";
7712 sprintf(tmpstr,"complex_literal_%d",param_handle_table[ph]->complex_literal_idx);
7714 if(pdt.is_buffer_type()) ret += ")";
7719 ret += param_handle_table[ph]->litval->to_hfta_C_code("") + ");\n";
7723 // query parameter handles are regstered/deregistered in the
7724 // load_params function.
7725 // ret += "t->param_"+param_handle_table[ph]->param_name;
7728 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7735 //------------------------------------------------------------
7736 // functions for destructor and deregistration code
7738 static string gen_complex_lit_dtr(cplx_lit_table *complex_literals){
7742 for(cl=0;cl<complex_literals->size();cl++){
7743 literal_t *l = complex_literals->get_literal(cl);
7744 data_type ldt( l->get_type() );
7745 if(ldt.is_buffer_type()){
7746 sprintf(tmpstr,"\t\t%s(&complex_literal_%d);\n",
7747 ldt.get_hfta_buffer_destroy().c_str(), cl );
7755 static string gen_pass_by_handle_dtr(
7756 vector<handle_param_tbl_entry *> ¶m_handle_table){
7760 for(ph=0;ph<param_handle_table.size();++ph){
7761 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7762 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7768 // Destroy all previous results
7769 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns){
7773 for(p=0;p<partial_fcns.size();++p){
7774 data_type *pdt =partial_fcns[p]->get_data_type();
7775 if(pdt->is_buffer_type()){
7776 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7777 pdt->get_hfta_buffer_destroy().c_str(), p );
7784 // Destroy previsou results of fcns in pfcn_set
7785 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns, set<int> &pfcn_set){
7787 set<int>::iterator si;
7789 for(si=pfcn_set.begin(); si!=pfcn_set.end(); ++si){
7790 data_type *pdt =partial_fcns[(*si)]->get_data_type();
7791 if(pdt->is_buffer_type()){
7792 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7793 pdt->get_hfta_buffer_destroy().c_str(), (*si) );
7801 //-------------------------------------------------------------------------
7802 // Functions related to se generation bookkeeping.
7804 static void get_new_pred_cids(predicate_t *pr, col_id_set &found_cids,
7805 col_id_set &new_cids, gb_table *gtbl){
7806 col_id_set this_pred_cids;
7807 col_id_set::iterator csi;
7809 // get colrefs in predicate not already found.
7810 gather_pr_col_ids(pr,this_pred_cids,gtbl);
7811 set_difference(this_pred_cids.begin(), this_pred_cids.end(),
7812 found_cids.begin(), found_cids.end(),
7813 inserter(new_cids,new_cids.begin()) );
7815 // We've found these cids, so update found_cids
7816 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7817 found_cids.insert((*csi));
7821 // after the call, new_cids will have the colrefs in se but not found_cids.
7822 // update found_cids with the new cids.
7823 static void get_new_se_cids(scalarexp_t *se, col_id_set &found_cids,
7824 col_id_set &new_cids, gb_table *gtbl){
7825 col_id_set this_se_cids;
7826 col_id_set::iterator csi;
7828 // get colrefs in se not already found.
7829 gather_se_col_ids(se,this_se_cids,gtbl);
7830 set_difference(this_se_cids.begin(), this_se_cids.end(),
7831 found_cids.begin(), found_cids.end(),
7832 inserter(new_cids,new_cids.begin()) );
7834 // We've found these cids, so update found_cids
7835 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7836 found_cids.insert((*csi));
7840 static string gen_unpack_cids(table_list *schema, col_id_set &new_cids, string on_problem, vector<bool> &needs_xform){
7842 col_id_set::iterator csi;
7844 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7845 int schref = (*csi).schema_ref;
7846 int tblref = (*csi).tblvar_ref;
7847 string field = (*csi).field;
7848 data_type dt(schema->get_type_name(schref,field));
7850 if(needs_xform[tblref]){
7851 unpack_fcn = dt.get_hfta_unpack_fcn();
7853 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7855 if(dt.is_buffer_type()){
7856 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7858 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7861 if(dt.is_buffer_type()){
7862 ret += "\tif(problem) return "+on_problem+" ;\n";
7868 // generates the declaration of all the variables related to
7869 // temp tuples generation
7870 static string gen_decl_temp_vars(){
7873 ret += "\t// variables related to temp tuple generation\n";
7874 ret += "\tbool temp_tuple_received;\n";
7879 // generates initialization code for variables related to temp tuple processing
7880 static string gen_init_temp_vars(table_list *schema, vector<select_element *>& select_list, gb_table *gtbl){
7882 col_id_set::iterator csi;
7885 // Initialize internal state
7886 ret += "\ttemp_tuple_received = false;\n";
7888 col_id_set temp_cids; // colrefs unpacked thus far.
7890 for(s=0;s<select_list.size();s++){
7891 if (select_list[s]->se->get_data_type()->is_temporal()) {
7892 // Find the set of attributes accessed in this SE
7893 col_id_set new_cids;
7894 get_new_se_cids(select_list[s]->se,temp_cids, new_cids, gtbl);
7897 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7898 int schref = (*csi).schema_ref;
7899 int tblref = (*csi).tblvar_ref;
7900 string field = (*csi).field;
7901 data_type dt(schema->get_type_name(schref,field), schema->get_modifier_list(schref,field));
7903 sprintf(tmpstr,"\t unpack_var_%s_%d = %s;\n", field.c_str(), tblref,
7904 dt.is_increasing() ? dt.get_min_literal().c_str() : dt.get_max_literal().c_str());
7914 // generates a check if tuple is temporal
7915 static string gen_temp_tuple_check(string node_name, int channel) {
7919 sprintf(tmpstr, "tup%d", channel);
7920 string tup_name = tmpstr;
7921 sprintf(tmpstr, "schema_handle%d", channel);
7922 string schema_handle_name = tmpstr;
7923 string tuple_offset_name = "tuple_metadata_offset"+int_to_string(channel);
7925 // check if it is a temporary status tuple
7926 ret += "\t// check if tuple is temp status tuple\n";
7927 // ret += "\tif (ftaschema_is_temporal_tuple(" + schema_handle_name + ", " + tup_name + ".data)) {\n";
7928 ret += "\tif (ftaschema_is_temporal_tuple_offset(" + tuple_offset_name + ", " + tup_name + ".data)) {\n";
7929 ret += "\t\ttemp_tuple_received = true;\n";
7931 ret += "\telse\n\t\ttemp_tuple_received = false;\n\n";
7936 // generates unpacking code for all temporal attributes referenced in select
7937 static string gen_unpack_temp_vars(table_list *schema, col_id_set& found_cids, vector<select_element *>& select_list, gb_table *gtbl, vector<bool> &needs_xform) {
7941 // Unpack all the temporal attributes references in select list
7942 // we need it to be able to generate temp status tuples
7943 for(s=0;s<select_list.size();s++){
7944 if (select_list[s]->se->get_data_type()->is_temporal()) {
7945 // Find the set of attributes accessed in this SE
7946 col_id_set new_cids;
7947 get_new_se_cids(select_list[s]->se,found_cids, new_cids, gtbl);
7948 // Unpack these values.
7949 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
7957 // Generates temporal tuple generation code (except attribute packing)
7958 static string gen_init_temp_status_tuple(string node_name) {
7961 ret += "\t// create temp status tuple\n";
7962 ret += "\tresult.tuple_size = sizeof("+generate_tuple_name( node_name)+") + sizeof(gs_uint8_t);\n";
7963 ret += "\tresult.data = (gs_sp_t )malloc(result.tuple_size);\n";
7964 ret += "\tresult.heap_resident = true;\n";
7965 ret += "\t// Mark tuple as temporal\n";
7966 ret += "\t*((gs_sp_t )result.data + sizeof("+generate_tuple_name( node_name)+")) = TEMPORAL_TUPLE;\n";
7968 ret += "\t"+generate_tuple_name( node_name)+" *tuple = ("+
7969 generate_tuple_name( node_name) +" *)(result.data);\n";
7975 // Assume that all colrefs unpacked already ...
7976 static string gen_unpack_partial_fcn(table_list *schema,
7977 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7980 set<int>::iterator si;
7982 // Since set<..> is a "Sorted Associative Container",
7983 // we can walk through it in sorted order by walking from
7984 // begin() to end(). (and the partial fcns must be
7985 // evaluated in this order).
7986 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7987 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7988 ret += "\tif(retval) return "+on_problem+" ;\n";
7993 // Assume that all colrefs unpacked already ...
7994 // this time with cached functions.
7995 static string gen_unpack_partial_fcn(table_list *schema,
7996 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7997 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8000 set<int>::iterator si;
8002 // Since set<..> is a "Sorted Associative Container",
8003 // we can walk through it in sorted order by walking from
8004 // begin() to end(). (and the partial fcns must be
8005 // evaluated in this order).
8006 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
8007 if(fcn_ref_cnt[(*si)] > 1){
8008 ret += "\tif(fcn_ref_cnt_"+int_to_string((*si))+"==0){\n";
8010 if(is_partial_fcn[(*si)]){
8011 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
8012 ret += "\tif(retval) return "+on_problem+" ;\n";
8014 if(fcn_ref_cnt[(*si)] > 1){
8015 if(!is_partial_fcn[(*si)]){
8016 ret += "\t\tpartial_fcn_result_"+int_to_string((*si))+"="+generate_cached_fcn(partial_fcns[(*si)],(*si),schema)+";\n";
8018 ret += "\t\tfcn_ref_cnt_"+int_to_string((*si))+"=1;\n";
8027 // This version finds and unpacks new colrefs.
8028 // found_cids gets updated with the newly unpacked cids.
8029 static string gen_full_unpack_partial_fcn(table_list *schema,
8030 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8031 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8032 vector<bool> &needs_xform){
8034 set<int>::iterator slsi;
8036 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8037 // find all new fields ref'd by this partial fcn.
8038 col_id_set new_cids;
8039 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8040 // Unpack these values.
8041 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8043 // Now evaluate the partial fcn.
8044 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8045 ret += "\tif(retval) return "+on_problem+" ;\n";
8050 // This version finds and unpacks new colrefs.
8051 // found_cids gets updated with the newly unpacked cids.
8052 // BUT : only for the partial functions.
8053 static string gen_full_unpack_partial_fcn(table_list *schema,
8054 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8055 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8056 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8057 vector<bool> &needs_xform){
8059 set<int>::iterator slsi;
8061 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8062 if(is_partial_fcn[(*slsi)]){
8063 // find all new fields ref'd by this partial fcn.
8064 col_id_set new_cids;
8065 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8066 // Unpack these values.
8067 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8069 // Now evaluate the partial fcn.
8070 if(fcn_ref_cnt[(*slsi)] > 1){
8071 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8073 if(is_partial_fcn[(*slsi)]){
8074 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8075 ret += "\tif(retval) return "+on_problem+" ;\n";
8077 if(fcn_ref_cnt[(*slsi)] > 1){
8078 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8087 static string gen_remaining_cached_fcns(table_list *schema,
8088 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8089 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn){
8091 set<int>::iterator slsi;
8093 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8094 if(!is_partial_fcn[(*slsi)] && fcn_ref_cnt[(*slsi)] > 1){
8096 if(fcn_ref_cnt[(*slsi)] > 1){
8097 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8098 ret += "\t\tpartial_fcn_result_"+int_to_string((*slsi))+"="+generate_cached_fcn(partial_fcns[(*slsi)],(*slsi),schema)+";\n";
8099 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8108 // unpack the colrefs in cid_set not in found_cids
8109 static string gen_remaining_colrefs(table_list *schema,
8110 col_id_set &cid_set, col_id_set &found_cids, string on_problem,
8111 vector<bool> &needs_xform){
8113 col_id_set::iterator csi;
8115 for(csi=cid_set.begin(); csi!=cid_set.end();csi++){
8116 if(found_cids.count( (*csi) ) == 0){
8117 int schref = (*csi).schema_ref;
8118 int tblref = (*csi).tblvar_ref;
8119 string field = (*csi).field;
8120 data_type dt(schema->get_type_name(schref,field));
8122 if(needs_xform[tblref]){
8123 unpack_fcn = dt.get_hfta_unpack_fcn();
8125 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
8127 if(dt.is_buffer_type()){
8128 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
8130 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
8133 if(dt.is_buffer_type()){
8134 ret.append("\tif(problem) return "+on_problem+" ;\n");
8141 static string gen_buffer_selvars(table_list *schema,
8142 vector<select_element *> &select_list){
8146 for(s=0;s<select_list.size();s++){
8147 scalarexp_t *se = select_list[s]->se;
8148 data_type *sdt = se->get_data_type();
8149 if(sdt->is_buffer_type() &&
8150 !( (se->get_operator_type() == SE_COLREF) ||
8151 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8152 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8154 sprintf(tmpstr,"selvar_%d",s);
8155 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
8156 ret += generate_se_code(se,schema) +";\n";
8162 static string gen_buffer_selvars_size(vector<select_element *> &select_list,table_list *schema){
8166 for(s=0;s<select_list.size();s++){
8167 scalarexp_t *se = select_list[s]->se;
8168 data_type *sdt = se->get_data_type();
8169 if(sdt->is_buffer_type()){
8170 if( !( (se->get_operator_type() == SE_COLREF) ||
8171 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8172 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8174 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
8177 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),
8178 generate_se_code(se,schema).c_str());
8186 static string gen_buffer_selvars_dtr(vector<select_element *> &select_list){
8190 for(s=0;s<select_list.size();s++){
8191 scalarexp_t *se = select_list[s]->se;
8192 data_type *sdt = se->get_data_type();
8193 if(sdt->is_buffer_type() &&
8194 !( (se->get_operator_type() == SE_COLREF) ||
8195 (se->get_operator_type() == SE_AGGR_STAR) ||
8196 (se->get_operator_type() == SE_AGGR_SE) ||
8197 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8198 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8200 sprintf(tmpstr,"\t\t%s(&selvar_%d);\n",
8201 sdt->get_hfta_buffer_destroy().c_str(), s );
8209 static string gen_pack_tuple(table_list *schema, vector<select_element *> &select_list, string node_name, bool temporal_only){
8213 ret += "\tint tuple_pos = sizeof("+generate_tuple_name(node_name)+") + sizeof(gs_uint8_t);\n";
8214 for(s=0;s<select_list.size();s++){
8215 scalarexp_t *se = select_list[s]->se;
8216 data_type *sdt = se->get_data_type();
8218 if(!temporal_only && sdt->is_buffer_type()){
8219 if( !( (se->get_operator_type() == SE_COLREF) ||
8220 (se->get_operator_type() == SE_FUNC && se->is_partial()))
8222 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
8224 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
8227 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code(se,schema).c_str());
8229 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code(se,schema).c_str());
8232 }else if (!temporal_only || sdt->is_temporal()) {
8233 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
8235 ret.append(generate_se_code(se,schema) );
8243 //-------------------------------------------------------------------------
8244 // functor generation methods
8245 //-------------------------------------------------------------------------
8247 /////////////////////////////////////////////////////////
8248 //// File Output Operator
8249 string output_file_qpn::generate_functor_name(){
8250 return("output_file_functor_" + normalize_name(get_node_name()));
8254 string output_file_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8255 string ret = "class " + this->generate_functor_name() + "{\n";
8257 // Find the temporal field
8258 int temporal_field_idx;
8259 data_type *tdt = NULL;
8260 for(temporal_field_idx=0;temporal_field_idx<fields.size();temporal_field_idx++){
8261 tdt = new data_type(fields[temporal_field_idx]->get_type(), fields[temporal_field_idx]->get_modifier_list());
8262 if(tdt->is_temporal()){
8269 if(temporal_field_idx == fields.size()){
8270 fprintf(stderr,"ERROR, no temporal field for file output operator %s\n",node_name.c_str());
8274 ret += "private:\n";
8276 // var to save the schema handle
8277 ret += "\tint schema_handle0;\n";
8278 // tuple metadata offset
8279 ret += "\tint tuple_metadata_offset0;\n";
8280 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[temporal_field_idx]->get_name().c_str());
8283 // For unpacking the hashing fields, if any
8285 for(h=0;h<hash_flds.size();++h){
8286 sprintf(tmpstr,"unpack_var_%s", fields[hash_flds[h]]->get_name().c_str());
8287 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8288 ret+="\t"+hdt->make_host_cvar(tmpstr)+";\n";
8289 if(hash_flds[h]!=temporal_field_idx){
8290 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[hash_flds[h]]->get_name().c_str());
8294 // Specail case for output file hashing
8295 if(n_streams>1 && hash_flds.size()==0){
8296 ret+="\tgs_uint32_t outfl_cnt;\n";
8299 ret += "//\t\tRemember the last posted timestamp.\n";
8300 ret+="\t"+tdt->make_host_cvar("timestamp")+";\n";
8301 ret+="\t"+tdt->make_host_cvar("last_bucket")+";\n";
8302 ret+="\t"+tdt->make_host_cvar("slack")+";\n";
8303 ret += "\tbool first_execution;\n";
8304 ret += "\tbool temp_tuple_received;\n";
8305 ret += "\tbool is_eof;\n";
8307 ret += "\tgs_int32_t bucketwidth;\n";
8310 //-------------------
8311 // The functor constructor
8312 // pass in a schema handle (e.g. for the 1st input stream),
8313 // use it to determine how to unpack the merge variable.
8314 // ASSUME that both streams have the same layout,
8315 // just duplicate it.
8318 ret += "//\t\tFunctor constructor.\n";
8319 ret += this->generate_functor_name()+"(int schema_hndl){\n";
8321 ret += "\tschema_handle0 = schema_hndl;\n";
8322 // tuple metadata offset
8323 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8325 if(output_spec->bucketwidth == 0)
8326 ret += "\tbucketwidth = 60;\n";
8328 ret += "\tbucketwidth = "+int_to_string(output_spec->bucketwidth)+";\n";
8329 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8331 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[temporal_field_idx]->get_name().c_str(), fields[temporal_field_idx]->get_name().c_str());
8333 // Hashing field unpacking, if any
8334 for(h=0;h<hash_flds.size();++h){
8335 if(hash_flds[h]!=temporal_field_idx){
8336 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[hash_flds[h]]->get_name().c_str(),fields[hash_flds[h]]->get_name().c_str());
8341 ret+="\tfirst_execution = true;\n";
8343 // Initialize internal state
8344 ret += "\ttemp_tuple_received = false;\n";
8346 // Init last timestamp values to minimum value for their type
8347 if (tdt->is_increasing()){
8348 ret+="\ttimestamp = " + tdt->get_min_literal() + ";\n";
8349 ret+="\tlast_bucket = " + tdt->get_min_literal() + ";\n";
8351 ret+="\ttimestamp = " + tdt->get_max_literal() + ";\n";
8352 ret+="\tlast_bucket = " + tdt->get_max_literal() + ";\n";
8358 ret += "//\t\tFunctor destructor.\n";
8359 ret += "~"+this->generate_functor_name()+"(){\n";
8363 ret += "int load_params_"+this->generate_functor_name()+"(gs_int32_t sz, void *value){return 0;}\n";
8364 ret += "void destroy_params_"+this->generate_functor_name()+"(){}\n";
8366 // Register new parameter block
8367 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8368 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8369 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8373 ret+="\nbool temp_status_received(const host_tuple& tup0)/* const*/ {\n";
8374 ret+="\tgs_int32_t problem;\n";
8376 ret += "\tvoid *tup_ptr = (void *)(&tup0);\n";
8377 ret += "\tis_eof = ftaschema_is_eof_tuple(schema_handle0,tup_ptr);\n";
8379 ret += gen_temp_tuple_check(this->node_name, 0);
8381 sprintf(tmpstr,"\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", tdt->get_hfta_unpack_fcn_noxf().c_str(), fields[temporal_field_idx]->get_name().c_str(), 0);
8384 for(h=0;h<hash_flds.size();++h){
8385 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8386 sprintf(tmpstr,"\tunpack_var_%s = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", fields[hash_flds[h]]->get_name().c_str(), hdt->get_hfta_unpack_fcn_noxf().c_str(), fields[hash_flds[h]]->get_name().c_str(), 0);
8390 " return temp_tuple_received;\n"
8396 "bool new_epoch(){\n"
8397 " if(first_execution || (last_bucket + 1) * bucketwidth <= timestamp){\n"
8398 " last_bucket = timestamp / bucketwidth;\n"
8399 " first_execution = false;\n"
8409 "inline gs_uint32_t output_hash(){return 0;}\n\n";
8411 if(hash_flds.size()==0){
8413 "gs_uint32_t output_hash(){\n"
8415 " if(outfl_cnt >= "+int_to_string(n_streams)+")\n"
8417 " return outfl_cnt;\n"
8423 "gs_uint32_t output_hash(){\n"
8424 " gs_uint32_t ret = "
8426 for(h=0;h<hash_flds.size();++h){
8428 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8429 if(hdt->use_hashfunc()){
8430 sprintf(tmpstr,"%s(&(unpack_var_%s))",hdt->get_hfta_hashfunc().c_str(),fields[hash_flds[h]]->get_name().c_str());
8432 sprintf(tmpstr,"unpack_var_%s",fields[hash_flds[h]]->get_name().c_str());
8438 " return ret % "+int_to_string(hash_flds.size())+";\n"
8445 "gs_uint32_t num_file_streams(){\n"
8446 " return("+int_to_string(n_streams)+");\n"
8451 "string get_filename_base(){\n"
8452 " char tmp_fname[500];\n";
8454 string output_filename_base = hfta_query_name+filestream_id;
8456 if(n_hfta_clones > 1){
8457 output_filename_base += "_"+int_to_string(parallel_idx);
8463 if(output_spec->output_directory == "")
8465 " sprintf(tmp_fname,\""+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8467 " sprintf(tmp_fname,\""+output_spec->output_directory+"/"+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8469 " return (string)(tmp_fname);\n"
8475 "bool do_compression(){\n";
8477 ret += " return true;\n";
8479 ret += " return false;\n";
8483 "bool is_eof_tuple(){\n"
8487 "bool propagate_tuple(){\n"
8490 ret+="\treturn false;\n";
8492 ret+="\treturn true;\n";
8494 // create a temp status tuple
8495 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8497 ret += gen_init_temp_status_tuple(this->hfta_query_name);
8499 sprintf(tmpstr,"\ttuple->tuple_var%d = timestamp;\n",temporal_field_idx);
8504 ret += "\treturn 0;\n";
8512 string output_file_qpn::generate_operator(int i, string params){
8513 string optype = "file_output_operator";
8514 switch(compression_type){
8516 optype = "file_output_operator";
8519 optype = "zfile_output_operator";
8522 optype = "bfile_output_operator";
8526 return(" "+optype+"<" +
8527 generate_functor_name() +
8528 "> *op"+int_to_string(i)+" = new "+optype+"<"+
8529 generate_functor_name() +">("+params+", \"" + hfta_query_name + "\""
8530 + "," + hfta_query_name + "_schema_definition);\n");
8533 /////////////////////////////////////////////////////////
8537 string spx_qpn::generate_functor_name(){
8538 return("spx_functor_" + normalize_name(normalize_name(this->get_node_name())));
8541 string spx_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8542 // Initialize generate utility globals
8543 segen_gb_tbl = NULL;
8545 string ret = "class " + this->generate_functor_name() + "{\n";
8547 // Find variables referenced in this query node.
8550 col_id_set::iterator csi;
8553 for(w=0;w<where.size();++w)
8554 gather_pr_col_ids(where[w]->pr,cid_set,NULL);
8555 for(s=0;s<select_list.size();s++){
8556 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
8560 // Private variables : store the state of the functor.
8561 // 1) variables for unpacked attributes
8562 // 2) offsets of the upacked attributes
8563 // 3) storage of partial functions
8564 // 4) storage of complex literals (i.e., require a constructor)
8566 ret += "private:\n";
8567 ret += "\tbool first_execution;\t// internal processing state \n";
8568 ret += "\tint schema_handle0;\n";
8570 // generate the declaration of all the variables related to
8571 // temp tuples generation
8572 ret += gen_decl_temp_vars();
8575 // unpacked attribute storage, offsets
8576 ret += "//\t\tstorage and offsets of accessed fields.\n";
8577 ret += generate_access_vars(cid_set,schema);
8578 // tuple metadata management
8579 ret += "\tint tuple_metadata_offset0;\n";
8581 // Variables to store results of partial functions.
8582 // WARNING find_partial_functions modifies the SE
8583 // (it marks the partial function id).
8584 ret += "//\t\tParital function result storage\n";
8585 vector<scalarexp_t *> partial_fcns;
8586 vector<int> fcn_ref_cnt;
8587 vector<bool> is_partial_fcn;
8588 for(s=0;s<select_list.size();s++){
8589 find_partial_fcns(select_list[s]->se, &partial_fcns,&fcn_ref_cnt,&is_partial_fcn, Ext_fcns);
8591 for(w=0;w<where.size();w++){
8592 find_partial_fcns_pr(where[w]->pr, &partial_fcns, &fcn_ref_cnt,&is_partial_fcn,Ext_fcns);
8594 // Unmark non-partial expensive functions referenced only once.
8595 for(p=0; p<partial_fcns.size();p++){
8596 if(!is_partial_fcn[p] && fcn_ref_cnt[p] <= 1){
8597 partial_fcns[p]->set_partial_ref(-1);
8600 if(partial_fcns.size()>0){
8601 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,true);
8604 // Complex literals (i.e., they need constructors)
8605 ret += "//\t\tComplex literal storage.\n";
8606 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8607 ret += generate_complex_lit_vars(complex_literals);
8609 // Pass-by-handle parameters
8610 ret += "//\t\tPass-by-handle storage.\n";
8611 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8612 ret += generate_pass_by_handle_vars(param_handle_table);
8614 // Variables to hold parameters
8615 ret += "//\tfor query parameters\n";
8616 ret += generate_param_vars(param_tbl);
8619 // The publicly exposed functions
8621 ret += "\npublic:\n";
8624 //-------------------
8625 // The functor constructor
8626 // pass in the schema handle.
8627 // 1) make assignments to the unpack offset variables
8628 // 2) initialize the complex literals
8629 // 3) Set the initial values of the temporal attributes
8630 // referenced in select clause (in case we need to emit
8631 // temporal tuple before receiving first tuple )
8633 ret += "//\t\tFunctor constructor.\n";
8634 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8636 // save schema handle
8637 ret += "this->schema_handle0 = schema_handle0;\n";
8640 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8641 ret += gen_access_var_init(cid_set);
8643 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8646 ret += "//\t\tInitialize complex literals.\n";
8647 ret += gen_complex_lit_init(complex_literals);
8649 // Initialize partial function results so they can be safely GC'd
8650 ret += gen_partial_fcn_init(partial_fcns);
8652 // Initialize non-query-parameter parameter handles
8653 ret += gen_pass_by_handle_init(param_handle_table);
8655 // Init temporal attributes referenced in select list
8656 ret += gen_init_temp_vars(schema, select_list, NULL);
8661 //-------------------
8662 // Functor destructor
8663 ret += "//\t\tFunctor destructor.\n";
8664 ret += "~"+this->generate_functor_name()+"(){\n";
8666 // clean up buffer-type complex literals.
8667 ret += gen_complex_lit_dtr(complex_literals);
8669 // Deregister the pass-by-handle parameters
8670 ret += "/* register and de-register the pass-by-handle parameters */\n";
8671 ret += gen_pass_by_handle_dtr(param_handle_table);
8673 // Reclaim buffer space for partial fucntion results
8674 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8675 ret += gen_partial_fcn_dtr(partial_fcns);
8678 // Destroy the parameters, if any need to be destroyed
8679 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8684 //-------------------
8685 // Parameter manipulation routines
8686 ret += generate_load_param_block(this->generate_functor_name(),
8687 this->param_tbl,param_handle_table );
8688 ret += generate_delete_param_block(this->generate_functor_name(),
8689 this->param_tbl,param_handle_table);
8692 //-------------------
8693 // Register new parameter block
8694 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8695 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8696 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8701 //-------------------
8702 // The selection predicate.
8703 // Unpack variables for 1 cnf element
8704 // at a time, return false immediately if the
8706 // optimization : evaluate the cheap cnf elements
8707 // first, the expensive ones last.
8709 ret += "bool predicate(host_tuple &tup0){\n";
8710 // Variables for execution of the function.
8711 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8712 // Initialize cached function indicators.
8713 for(p=0;p<partial_fcns.size();++p){
8714 if(fcn_ref_cnt[p]>1){
8715 ret+="\tfcn_ref_cnt_"+int_to_string(p)+"=0;\n";
8720 ret += gen_temp_tuple_check(this->node_name, 0);
8722 if(partial_fcns.size()>0){ // partial fcn access failure
8723 ret += "\tgs_retval_t retval = 0;\n";
8727 // Reclaim buffer space for partial fucntion results
8728 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8729 ret += gen_partial_fcn_dtr(partial_fcns);
8731 col_id_set found_cids; // colrefs unpacked thus far.
8732 ret += gen_unpack_temp_vars(schema, found_cids, select_list, NULL, needs_xform);
8734 // For temporal status tuple we don't need to do anything else
8735 ret += "\tif (temp_tuple_received) return false;\n\n";
8738 for(w=0;w<where.size();++w){
8739 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8741 // Find the set of variables accessed in this CNF elem,
8742 // but in no previous element.
8743 col_id_set new_cids;
8744 get_new_pred_cids(where[w]->pr,found_cids, new_cids, NULL);
8745 // Unpack these values.
8746 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
8747 // Find partial fcns ref'd in this cnf element
8749 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8750 ret += gen_unpack_partial_fcn(schema,partial_fcns,pfcn_refs,fcn_ref_cnt, is_partial_fcn, "false");
8752 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8753 +") ) return(false);\n";
8756 // The partial functions ref'd in the select list
8757 // must also be evaluated. If one returns false,
8758 // then implicitly the predicate is false.
8760 for(s=0;s<select_list.size();s++){
8761 collect_partial_fcns(select_list[s]->se, sl_pfcns);
8763 if(sl_pfcns.size() > 0)
8764 ret += "//\t\tUnpack remaining partial fcns.\n";
8765 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
8766 fcn_ref_cnt, is_partial_fcn,
8767 found_cids, NULL, "false", needs_xform);
8769 // Unpack remaining fields
8770 ret += "//\t\tunpack any remaining fields from the input tuple.\n";
8771 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
8774 ret += "\treturn(true);\n";
8778 //-------------------
8779 // The output tuple function.
8780 // Unpack the remaining attributes into
8781 // the placeholder variables, unpack the
8782 // partial fcn refs, then pack up the tuple.
8784 ret += "host_tuple create_output_tuple() {\n";
8785 ret += "\thost_tuple tup;\n";
8786 ret += "\tgs_retval_t retval = 0;\n";
8788 // Unpack any remaining cached functions.
8789 ret += gen_remaining_cached_fcns(schema, partial_fcns, sl_pfcns,
8790 fcn_ref_cnt, is_partial_fcn);
8793 // Now, compute the size of the tuple.
8795 // Unpack any BUFFER type selections into temporaries
8796 // so that I can compute their size and not have
8797 // to recompute their value during tuple packing.
8798 // I can use regular assignment here because
8799 // these temporaries are non-persistent.
8801 ret += "//\t\tCompute the size of the tuple.\n";
8802 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
8804 // Unpack all buffer type selections, to be able to compute their size
8805 ret += gen_buffer_selvars(schema, select_list);
8807 // The size of the tuple is the size of the tuple struct plus the
8808 // size of the buffers to be copied in.
8811 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
8812 ret += gen_buffer_selvars_size(select_list,schema);
8815 // Allocate tuple data block.
8816 ret += "//\t\tCreate the tuple block.\n";
8817 ret += "\ttup.data = malloc(tup.tuple_size);\n";
8818 ret += "\ttup.heap_resident = true;\n";
8819 // Mark tuple as regular
8820 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
8822 // ret += "\ttup.channel = 0;\n";
8823 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
8824 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
8827 // (Here, offsets are hard-wired. is this a problem?)
8829 ret += "//\t\tPack the fields into the tuple.\n";
8830 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
8832 // Delete string temporaries
8833 ret += gen_buffer_selvars_dtr(select_list);
8835 ret += "\treturn tup;\n";
8838 //-------------------------------------------------------------------
8839 // Temporal update functions
8841 ret += "bool temp_status_received(){return temp_tuple_received;};\n\n";
8844 // create a temp status tuple
8845 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8847 ret += gen_init_temp_status_tuple(this->get_node_name());
8850 // (Here, offsets are hard-wired. is this a problem?)
8852 ret += "//\t\tPack the fields into the tuple.\n";
8853 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
8855 ret += "\treturn 0;\n";
8862 string spx_qpn::generate_operator(int i, string params){
8864 return(" select_project_operator<" +
8865 generate_functor_name() +
8866 "> *op"+int_to_string(i)+" = new select_project_operator<"+
8867 generate_functor_name() +">("+params+", \"" + get_node_name() + "\");\n");
8871 ////////////////////////////////////////////////////////////////
8876 string sgah_qpn::generate_functor_name(){
8877 return("sgah_functor_" + normalize_name(this->get_node_name()));
8881 string sgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8885 // Initialize generate utility globals
8886 segen_gb_tbl = &(gb_tbl);
8888 // Might need to generate empty values for cube processing.
8889 map<int, string> structured_types;
8890 for(g=0;g<gb_tbl.size();++g){
8891 if(gb_tbl.get_data_type(g)->is_structured_type()){
8892 structured_types[gb_tbl.get_data_type(g)->type_indicator()] = gb_tbl.get_data_type(g)->get_type_str();
8896 //--------------------------------
8897 // group definition class
8898 string ret = "class " + generate_functor_name() + "_groupdef{\n";
8900 for(g=0;g<this->gb_tbl.size();g++){
8901 sprintf(tmpstr,"gb_var%d",g);
8902 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8904 // empty strucutred literals
8905 map<int, string>::iterator sii;
8906 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8907 data_type dt(sii->second);
8908 literal_t empty_lit(sii->first);
8909 ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8912 if(structured_types.size()==0){
8913 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
8915 ret += "\t"+generate_functor_name() + "_groupdef(){}\n";
8919 ret += "\t"+generate_functor_name() + "_groupdef("+
8920 this->generate_functor_name() + "_groupdef *gd){\n";
8921 for(g=0;g<gb_tbl.size();g++){
8922 data_type *gdt = gb_tbl.get_data_type(g);
8923 if(gdt->is_buffer_type()){
8924 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8925 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8928 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8933 ret += "\t"+generate_functor_name() + "_groupdef("+
8934 this->generate_functor_name() + "_groupdef *gd, bool *pattern){\n";
8935 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8936 literal_t empty_lit(sii->first);
8937 ret += "\t\t"+empty_lit.to_hfta_C_code("&"+empty_lit.hfta_empty_literal_name())+";\n";
8939 for(g=0;g<gb_tbl.size();g++){
8940 data_type *gdt = gb_tbl.get_data_type(g);
8941 ret += "\t\tif(pattern["+int_to_string(g)+"]){\n";
8942 if(gdt->is_buffer_type()){
8943 sprintf(tmpstr,"\t\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8944 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8947 sprintf(tmpstr,"\t\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8950 ret += "\t\t}else{\n";
8951 literal_t empty_lit(gdt->type_indicator());
8952 if(empty_lit.is_cpx_lit()){
8953 ret +="\t\t\tgb_var"+int_to_string(g)+"= "+empty_lit.hfta_empty_literal_name()+";\n";
8955 ret +="\t\t\tgb_var"+int_to_string(g)+"="+empty_lit.to_hfta_C_code("")+";\n";
8961 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
8962 for(g=0;g<gb_tbl.size();g++){
8963 data_type *gdt = gb_tbl.get_data_type(g);
8964 if(gdt->is_buffer_type()){
8965 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
8966 gdt->get_hfta_buffer_destroy().c_str(), g );
8973 for(g=0;g<gb_tbl.size();g++){
8974 data_type *gdt = gb_tbl.get_data_type(g);
8975 if(gdt->is_temporal()){
8980 ret += tgdt->get_host_cvar_type()+" get_curr_gb(){\n";
8981 ret+="\treturn gb_var"+int_to_string(g)+";\n";
8986 //--------------------------------
8987 // aggr definition class
8988 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
8990 for(a=0;a<aggr_tbl.size();a++){
8991 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
8992 sprintf(tmpstr,"aggr_var%d",a);
8993 if(aggr_tbl.is_builtin(a)){
8994 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
8995 if(aggr_tbl.get_op(a) == "AVG"){ // HACK!
8996 data_type cnt_type = data_type("ullong");
8997 ret+="\t"+cnt_type.make_host_cvar(string(tmpstr)+"_cnt")+";\n";
8998 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(string(tmpstr)+"_sum")+";\n";
9001 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
9005 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
9007 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
9008 for(a=0;a<aggr_tbl.size();a++){
9009 if(aggr_tbl.is_builtin(a)){
9010 data_type *adt = aggr_tbl.get_data_type(a);
9011 if(adt->is_buffer_type()){
9012 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
9013 adt->get_hfta_buffer_destroy().c_str(), a );
9017 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
9018 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9019 ret+="(aggr_var"+int_to_string(a)+"));\n";
9025 //-------------------------------------------
9026 // group-by patterns for the functor,
9027 // initialization within the class is cumbersome.
9028 int n_patterns = gb_tbl.gb_patterns.size();
9030 ret += "bool "+this->generate_functor_name()+"_gb_patterns["+int_to_string(n_patterns)+
9031 "]["+int_to_string(gb_tbl.size())+"] = {\n";
9032 if(n_patterns == 0){
9033 for(i=0;i<gb_tbl.size();++i){
9038 for(i=0;i<n_patterns;++i){
9039 if(i>0) ret += ",\n";
9041 for(j=0;j<gb_tbl.size();j++){
9042 if(j>0) ret += ", ";
9043 if(gb_tbl.gb_patterns[i][j]){
9056 //--------------------------------
9058 ret += "class " + this->generate_functor_name() + "{\n";
9060 // Find variables referenced in this query node.
9063 col_id_set::iterator csi;
9065 for(w=0;w<where.size();++w)
9066 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
9067 for(w=0;w<having.size();++w)
9068 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
9069 for(g=0;g<gb_tbl.size();g++)
9070 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
9072 for(s=0;s<select_list.size();s++){
9073 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
9077 // Private variables : store the state of the functor.
9078 // 1) variables for unpacked attributes
9079 // 2) offsets of the upacked attributes
9080 // 3) storage of partial functions
9081 // 4) storage of complex literals (i.e., require a constructor)
9083 ret += "private:\n";
9085 // var to save the schema handle
9086 ret += "\tint schema_handle0;\n";
9087 // metadata from schema handle
9088 ret += "\tint tuple_metadata_offset0;\n";
9090 // generate the declaration of all the variables related to
9091 // temp tuples generation
9092 ret += gen_decl_temp_vars();
9094 // unpacked attribute storage, offsets
9095 ret += "//\t\tstorage and offsets of accessed fields.\n";
9096 ret += generate_access_vars(cid_set, schema);
9098 // Variables to store results of partial functions.
9099 // WARNING find_partial_functions modifies the SE
9100 // (it marks the partial function id).
9101 ret += "//\t\tParital function result storage\n";
9102 vector<scalarexp_t *> partial_fcns;
9103 vector<int> fcn_ref_cnt;
9104 vector<bool> is_partial_fcn;
9105 for(s=0;s<select_list.size();s++){
9106 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
9108 for(w=0;w<where.size();w++){
9109 find_partial_fcns_pr(where[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9111 for(w=0;w<having.size();w++){
9112 find_partial_fcns_pr(having[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9114 for(g=0;g<gb_tbl.size();g++){
9115 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns,NULL,NULL, Ext_fcns);
9117 for(a=0;a<aggr_tbl.size();a++){
9118 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns,NULL,NULL, Ext_fcns);
9120 if(partial_fcns.size()>0){
9121 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
9122 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
9125 // Complex literals (i.e., they need constructors)
9126 ret += "//\t\tComplex literal storage.\n";
9127 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
9128 ret += generate_complex_lit_vars(complex_literals);
9130 // Pass-by-handle parameters
9131 ret += "//\t\tPass-by-handle storage.\n";
9132 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
9133 ret += generate_pass_by_handle_vars(param_handle_table);
9136 // variables to hold parameters.
9137 ret += "//\tfor query parameters\n";
9138 ret += generate_param_vars(param_tbl);
9140 // Is there a temporal flush? If so create flush temporaries,
9141 // create flush indicator.
9142 bool uses_temporal_flush = false;
9143 for(g=0;g<gb_tbl.size();g++){
9144 data_type *gdt = gb_tbl.get_data_type(g);
9145 if(gdt->is_temporal())
9146 uses_temporal_flush = true;
9149 if(uses_temporal_flush){
9150 ret += "//\t\tFor temporal flush\n";
9151 for(g=0;g<gb_tbl.size();g++){
9152 data_type *gdt = gb_tbl.get_data_type(g);
9153 if(gdt->is_temporal()){
9154 sprintf(tmpstr,"last_gb%d",g);
9155 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9156 sprintf(tmpstr,"last_flushed_gb%d",g);
9157 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9160 ret += "\tbool needs_temporal_flush;\n";
9164 // The publicly exposed functions
9166 ret += "\npublic:\n";
9169 //-------------------
9170 // The functor constructor
9171 // pass in the schema handle.
9172 // 1) make assignments to the unpack offset variables
9173 // 2) initialize the complex literals
9175 ret += "//\t\tFunctor constructor.\n";
9176 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9178 // save the schema handle
9179 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
9182 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9183 ret += gen_access_var_init(cid_set);
9185 ret += "tuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9188 ret += "//\t\tInitialize complex literals.\n";
9189 ret += gen_complex_lit_init(complex_literals);
9191 // Initialize partial function results so they can be safely GC'd
9192 ret += gen_partial_fcn_init(partial_fcns);
9194 // Initialize non-query-parameter parameter handles
9195 ret += gen_pass_by_handle_init(param_handle_table);
9197 // temporal flush variables
9198 // ASSUME that structured values won't be temporal.
9199 if(uses_temporal_flush){
9200 ret += "//\t\tInitialize temporal flush variables.\n";
9201 for(g=0;g<gb_tbl.size();g++){
9202 data_type *gdt = gb_tbl.get_data_type(g);
9203 if(gdt->is_temporal()){
9204 literal_t gl(gdt->type_indicator());
9205 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9207 sprintf(tmpstr,"\tlast_flushed_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9211 ret += "\tneeds_temporal_flush = false;\n";
9214 // Init temporal attributes referenced in select list
9215 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
9219 //-------------------
9220 // Functor destructor
9221 ret += "//\t\tFunctor destructor.\n";
9222 ret += "~"+this->generate_functor_name()+"(){\n";
9224 // clean up buffer type complex literals
9225 ret += gen_complex_lit_dtr(complex_literals);
9227 // Deregister the pass-by-handle parameters
9228 ret += "/* register and de-register the pass-by-handle parameters */\n";
9229 ret += gen_pass_by_handle_dtr(param_handle_table);
9231 // clean up partial function results.
9232 ret += "/* clean up partial function storage */\n";
9233 ret += gen_partial_fcn_dtr(partial_fcns);
9235 // Destroy the parameters, if any need to be destroyed
9236 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9241 //-------------------
9242 // Parameter manipulation routines
9243 ret += generate_load_param_block(this->generate_functor_name(),
9244 this->param_tbl,param_handle_table);
9245 ret += generate_delete_param_block(this->generate_functor_name(),
9246 this->param_tbl,param_handle_table);
9248 //-------------------
9249 // Register new parameter block
9251 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9252 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9253 ret += "\treturn this->load_params_"+this->generate_functor_name()+
9257 // -----------------------------------
9258 // group-by pattern support
9261 "int n_groupby_patterns(){\n"
9262 " return "+int_to_string(gb_tbl.gb_patterns.size())+";\n"
9264 "bool *get_pattern(int p){\n"
9265 " return "+this->generate_functor_name()+"_gb_patterns[p];\n"
9272 //-------------------
9273 // the create_group method.
9274 // This method creates a group in a buffer passed in
9275 // (to allow for creation on the stack).
9276 // There are also a couple of side effects:
9277 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
9278 // 2) determine if a temporal flush is required.
9280 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
9281 // Variables for execution of the function.
9282 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9284 if(partial_fcns.size()>0){ // partial fcn access failure
9285 ret += "\tgs_retval_t retval = 0;\n";
9289 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
9290 "_groupdef *) buffer;\n";
9292 // Start by cleaning up partial function results
9293 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
9294 set<int> w_pfcns; // partial fcns in where clause
9295 for(w=0;w<where.size();++w)
9296 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
9298 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
9299 for(g=0;g<gb_tbl.size();g++){
9300 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
9302 for(a=0;a<aggr_tbl.size();a++){
9303 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
9305 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
9306 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
9307 // ret += gen_partial_fcn_dtr(partial_fcns);
9310 ret += gen_temp_tuple_check(this->node_name, 0);
9311 col_id_set found_cids; // colrefs unpacked thus far.
9312 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
9315 // Save temporal group-by variables
9318 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
9320 for(g=0;g<gb_tbl.size();g++){
9322 data_type *gdt = gb_tbl.get_data_type(g);
9324 if(gdt->is_temporal()){
9325 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9326 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9334 // Compare the temporal GB vars with the stored ones,
9335 // set flush indicator and update stored GB vars if there is any change.
9337 ret += "// hfta_disorder = "+int_to_string(hfta_disorder)+"\n";
9338 if(hfta_disorder < 2){
9339 if(uses_temporal_flush){
9341 bool first_one = true;
9342 for(g=0;g<gb_tbl.size();g++){
9343 data_type *gdt = gb_tbl.get_data_type(g);
9345 if(gdt->is_temporal()){
9346 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9347 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9348 if(first_one){first_one = false;} else {ret += ") && (";}
9349 ret += generate_equality_test(lhs_op, rhs_op, gdt);
9353 for(g=0;g<gb_tbl.size();g++){
9354 data_type *gdt = gb_tbl.get_data_type(g);
9355 if(gdt->is_temporal()){
9356 if(gdt->is_buffer_type()){
9357 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9359 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9361 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9366 ret += "\t\tneeds_temporal_flush=true;\n";
9367 ret += "\t\t}else{\n"
9368 "\t\t\tneeds_temporal_flush=false;\n"
9372 ret+= "\tif(temp_tuple_received && !( (";
9373 bool first_one = true;
9374 for(g=0;g<gb_tbl.size();g++){
9375 data_type *gdt = gb_tbl.get_data_type(g);
9377 if(gdt->is_temporal()){
9378 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9379 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9380 if(first_one){first_one = false;} else {ret += ") && (";}
9381 ret += generate_equality_test(lhs_op, rhs_op, gdt);
9387 for(g=0;g<gb_tbl.size();g++){
9388 data_type *gdt = gb_tbl.get_data_type(g);
9389 if(gdt->is_temporal()){
9391 if(gdt->is_buffer_type()){
9392 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9394 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9396 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9402 data_type *tgdt = gb_tbl.get_data_type(temporal_g);
9403 literal_t gl(tgdt->type_indicator());
9404 ret += "\t\tif(last_flushed_gb"+int_to_string(temporal_g)+">"+gl.to_hfta_C_code("")+")\n";
9405 ret += "\t\t\tneeds_temporal_flush=true;\n";
9406 ret += "\t\t}else{\n"
9407 "\t\t\tneeds_temporal_flush=false;\n"
9412 // For temporal status tuple we don't need to do anything else
9413 ret += "\tif (temp_tuple_received) return NULL;\n\n";
9415 for(w=0;w<where.size();++w){
9416 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
9418 // Find the set of variables accessed in this CNF elem,
9419 // but in no previous element.
9420 col_id_set new_cids;
9421 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
9423 // Unpack these values.
9424 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9425 // Find partial fcns ref'd in this cnf element
9427 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
9428 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
9430 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
9431 +") ) return(NULL);\n";
9434 // The partial functions ref'd in the group-by var and aggregate
9435 // definitions must also be evaluated. If one returns false,
9436 // then implicitly the predicate is false.
9437 set<int>::iterator pfsi;
9439 if(ag_gb_pfcns.size() > 0)
9440 ret += "//\t\tUnpack remaining partial fcns.\n";
9441 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
9442 found_cids, segen_gb_tbl, "NULL", needs_xform);
9444 // Unpack the group-by variables
9446 for(g=0;g<gb_tbl.size();g++){
9447 data_type *gdt = gb_tbl.get_data_type(g);
9449 if(!gdt->is_temporal()){
9450 // Find the new fields ref'd by this GBvar def.
9451 col_id_set new_cids;
9452 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
9453 // Unpack these values.
9454 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9456 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9457 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9459 // There seems to be no difference between the two
9460 // branches of the IF statement.
9461 data_type *gdt = gb_tbl.get_data_type(g);
9462 if(gdt->is_buffer_type()){
9463 // Create temporary copy.
9464 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9465 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9467 scalarexp_t *gse = gb_tbl.get_def(g);
9468 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9469 g,generate_se_code(gse,schema).c_str());
9478 ret+= "\treturn gbval;\n";
9481 //--------------------------------------------------------
9482 // Create and initialize an aggregate object
9484 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
9485 // Variables for execution of the function.
9486 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9489 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
9490 "_aggrdef *)buffer;\n";
9492 for(a=0;a<aggr_tbl.size();a++){
9493 if(aggr_tbl.is_builtin(a)){
9494 // Create temporaries for buffer return values
9495 data_type *adt = aggr_tbl.get_data_type(a);
9496 if(adt->is_buffer_type()){
9497 sprintf(tmpstr,"aggr_tmp_%d", a);
9498 ret+=adt->make_host_cvar(tmpstr)+";\n";
9503 // Unpack all remaining attributes
9504 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
9505 for(a=0;a<aggr_tbl.size();a++){
9506 sprintf(tmpstr,"aggval->aggr_var%d",a);
9507 string assignto_var = tmpstr;
9508 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
9511 ret += "\treturn aggval;\n";
9514 //--------------------------------------------------------
9515 // update an aggregate object
9517 ret += "void update_aggregate(host_tuple &tup0, "
9518 +generate_functor_name()+"_groupdef *gbval, "+
9519 generate_functor_name()+"_aggrdef *aggval){\n";
9520 // Variables for execution of the function.
9521 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9523 // use of temporaries depends on the aggregate,
9524 // generate them in generate_aggr_update
9527 // Unpack all remaining attributes
9528 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
9529 for(a=0;a<aggr_tbl.size();a++){
9530 sprintf(tmpstr,"aggval->aggr_var%d",a);
9531 string varname = tmpstr;
9532 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
9535 ret += "\treturn;\n";
9538 //---------------------------------------------------
9541 ret += "\tbool flush_needed(){\n";
9542 if(uses_temporal_flush){
9543 ret += "\t\treturn needs_temporal_flush;\n";
9545 ret += "\t\treturn false;\n";
9549 //---------------------------------------------------
9550 // create output tuple
9551 // Unpack the partial functions ref'd in the where clause,
9552 // select clause. Evaluate the where clause.
9553 // Finally, pack the tuple.
9555 // I need to use special code generation here,
9556 // so I'll leave it in longhand.
9558 ret += "host_tuple create_output_tuple("
9559 +generate_functor_name()+"_groupdef *gbval, "+
9560 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
9562 ret += "\thost_tuple tup;\n";
9563 ret += "\tfailed = false;\n";
9564 ret += "\tgs_retval_t retval = 0;\n";
9566 string gbvar = "gbval->gb_var";
9567 string aggvar = "aggval->";
9569 // Create cached temporaries for UDAF return values.
9570 for(a=0;a<aggr_tbl.size();a++){
9571 if(! aggr_tbl.is_builtin(a)){
9572 int afcn_id = aggr_tbl.get_fcn_id(a);
9573 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9574 sprintf(tmpstr,"udaf_ret_%d", a);
9575 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
9580 // First, get the return values from the UDAFS
9581 for(a=0;a<aggr_tbl.size();a++){
9582 if(! aggr_tbl.is_builtin(a)){
9583 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
9584 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9585 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
9589 set<int> hv_sl_pfcns;
9590 for(w=0;w<having.size();w++){
9591 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
9593 for(s=0;s<select_list.size();s++){
9594 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
9597 // clean up the partial fcn results from any previous execution
9598 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
9601 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
9602 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
9603 ret += "\tif(retval){ failed = true; return(tup);}\n";
9606 // Evalaute the HAVING clause
9607 // TODO: this seems to have a ++ operator rather than a + operator.
9608 for(w=0;w<having.size();++w){
9609 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
9612 // Now, compute the size of the tuple.
9614 // Unpack any BUFFER type selections into temporaries
9615 // so that I can compute their size and not have
9616 // to recompute their value during tuple packing.
9617 // I can use regular assignment here because
9618 // these temporaries are non-persistent.
9619 // TODO: should I be using the selvar generation routine?
9621 ret += "//\t\tCompute the size of the tuple.\n";
9622 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
9623 for(s=0;s<select_list.size();s++){
9624 scalarexp_t *se = select_list[s]->se;
9625 data_type *sdt = se->get_data_type();
9626 if(sdt->is_buffer_type() &&
9627 !( (se->get_operator_type() == SE_COLREF) ||
9628 (se->get_operator_type() == SE_AGGR_STAR) ||
9629 (se->get_operator_type() == SE_AGGR_SE) ||
9630 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9631 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9633 sprintf(tmpstr,"selvar_%d",s);
9634 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
9635 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
9639 // The size of the tuple is the size of the tuple struct plus the
9640 // size of the buffers to be copied in.
9642 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
9643 for(s=0;s<select_list.size();s++){
9644 // if(s>0) ret += "+";
9645 scalarexp_t *se = select_list[s]->se;
9646 data_type *sdt = select_list[s]->se->get_data_type();
9647 if(sdt->is_buffer_type()){
9648 if(!( (se->get_operator_type() == SE_COLREF) ||
9649 (se->get_operator_type() == SE_AGGR_STAR) ||
9650 (se->get_operator_type() == SE_AGGR_SE) ||
9651 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9652 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9654 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
9657 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9664 // Allocate tuple data block.
9665 ret += "//\t\tCreate the tuple block.\n";
9666 ret += "\ttup.data = malloc(tup.tuple_size);\n";
9667 ret += "\ttup.heap_resident = true;\n";
9669 // Mark tuple as regular
9670 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
9672 // ret += "\ttup.channel = 0;\n";
9673 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
9674 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
9677 // (Here, offsets are hard-wired. is this a problem?)
9679 ret += "//\t\tPack the fields into the tuple.\n";
9680 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
9681 for(s=0;s<select_list.size();s++){
9682 scalarexp_t *se = select_list[s]->se;
9683 data_type *sdt = se->get_data_type();
9684 if(sdt->is_buffer_type()){
9685 if(!( (se->get_operator_type() == SE_COLREF) ||
9686 (se->get_operator_type() == SE_AGGR_STAR) ||
9687 (se->get_operator_type() == SE_AGGR_SE) ||
9688 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9689 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9691 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
9693 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
9696 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9698 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9702 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9704 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
9709 // Destroy string temporaries
9710 ret += gen_buffer_selvars_dtr(select_list);
9711 // Destroy string return vals of UDAFs
9712 for(a=0;a<aggr_tbl.size();a++){
9713 if(! aggr_tbl.is_builtin(a)){
9714 int afcn_id = aggr_tbl.get_fcn_id(a);
9715 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9716 if(adt->is_buffer_type()){
9717 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
9718 adt->get_hfta_buffer_destroy().c_str(), a );
9725 ret += "\treturn tup;\n";
9729 //-------------------------------------------------------------------
9730 // Temporal update functions
9732 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
9734 for(g=0;g<gb_tbl.size();g++){
9735 data_type *gdt = gb_tbl.get_data_type(g);
9736 if(gdt->is_temporal()){
9741 ret += tgdt->get_host_cvar_type()+" get_last_flushed_gb(){\n";
9742 ret+="\treturn last_flushed_gb"+int_to_string(g)+";\n";
9744 ret += tgdt->get_host_cvar_type()+" get_last_gb(){\n";
9745 ret+="\treturn last_gb"+int_to_string(g)+";\n";
9751 // create a temp status tuple
9752 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
9754 ret += gen_init_temp_status_tuple(this->get_node_name());
9757 // (Here, offsets are hard-wired. is this a problem?)
9759 ret += "//\t\tPack the fields into the tuple.\n";
9760 for(s=0;s<select_list.size();s++){
9761 data_type *sdt = select_list[s]->se->get_data_type();
9762 if(sdt->is_temporal()){
9763 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9766 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
9773 ret += "\treturn 0;\n";
9774 ret += "};};\n\n\n";
9777 //----------------------------------------------------------
9778 // The hash function
9780 ret += "struct "+generate_functor_name()+"_hash_func{\n";
9781 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
9782 "_groupdef *grp) const{\n";
9783 ret += "\t\treturn( (";
9784 for(g=0;g<gb_tbl.size();g++){
9786 data_type *gdt = gb_tbl.get_data_type(g);
9787 if(gdt->use_hashfunc()){
9788 if(gdt->is_buffer_type())
9789 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9791 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9793 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
9797 ret += ") >> 32);\n";
9801 //----------------------------------------------------------
9802 // The comparison function
9804 ret += "struct "+generate_functor_name()+"_equal_func{\n";
9805 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
9806 generate_functor_name()+"_groupdef *grp2) const{\n";
9807 ret += "\t\treturn( (";
9809 for(g=0;g<gb_tbl.size();g++){
9810 if(g>0) ret += ") && (";
9811 data_type *gdt = gb_tbl.get_data_type(g);
9812 if(gdt->complex_comparison(gdt)){
9813 if(gdt->is_buffer_type())
9814 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
9815 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9817 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
9818 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9820 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
9832 string sgah_qpn::generate_operator(int i, string params){
9834 if(hfta_disorder < 2){
9836 " groupby_operator<" +
9837 generate_functor_name()+","+
9838 generate_functor_name() + "_groupdef, " +
9839 generate_functor_name() + "_aggrdef, " +
9840 generate_functor_name()+"_hash_func, "+
9841 generate_functor_name()+"_equal_func "
9842 "> *op"+int_to_string(i)+" = new groupby_operator<"+
9843 generate_functor_name()+","+
9844 generate_functor_name() + "_groupdef, " +
9845 generate_functor_name() + "_aggrdef, " +
9846 generate_functor_name()+"_hash_func, "+
9847 generate_functor_name()+"_equal_func "
9848 ">("+params+", \"" + get_node_name() +
9853 for(int g=0;g<gb_tbl.size();g++){
9854 data_type *gdt = gb_tbl.get_data_type(g);
9855 if(gdt->is_temporal()){
9862 " groupby_operator_oop<" +
9863 generate_functor_name()+","+
9864 generate_functor_name() + "_groupdef, " +
9865 generate_functor_name() + "_aggrdef, " +
9866 generate_functor_name()+"_hash_func, "+
9867 generate_functor_name()+"_equal_func, " +
9868 tgdt->get_host_cvar_type() +
9869 "> *op"+int_to_string(i)+" = new groupby_operator_oop<"+
9870 generate_functor_name()+","+
9871 generate_functor_name() + "_groupdef, " +
9872 generate_functor_name() + "_aggrdef, " +
9873 generate_functor_name()+"_hash_func, "+
9874 generate_functor_name()+"_equal_func, " +
9875 tgdt->get_host_cvar_type() +
9876 ">("+params+", \"" + get_node_name() +
9882 ////////////////////////////////////////////////
9885 ////////////////////////////////////////////
9887 string mrg_qpn::generate_functor_name(){
9888 return("mrg_functor_" + normalize_name(this->get_node_name()));
9891 string mrg_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9896 if(fm.size() != mvars.size()){
9897 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=%lu, mvars.size=%lu\n",fm.size(),mvars.size());
9901 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=mvars.size=%lu\n",fm.size());
9906 // Initialize generate utility globals
9907 segen_gb_tbl = NULL;
9909 string ret = "class " + this->generate_functor_name() + "{\n";
9911 // Private variable:
9912 // 1) Vars for unpacked attrs.
9913 // 2) offsets ofthe unpakced attrs
9914 // 3) last_posted_timestamp
9917 schema->get_type_name(mvars[0]->get_schema_ref(), mvars[0]->get_field()),
9918 schema->get_modifier_list(mvars[0]->get_schema_ref(), mvars[0]->get_field())
9921 schema->get_type_name(mvars[1]->get_schema_ref(), mvars[1]->get_field()),
9922 schema->get_modifier_list(mvars[1]->get_schema_ref(), mvars[1]->get_field())
9925 ret += "private:\n";
9927 // var to save the schema handle
9928 ret += "\tint schema_handle0;\n";
9930 // generate the declaration of all the variables related to
9931 // temp tuples generation
9932 ret += gen_decl_temp_vars();
9934 // unpacked attribute storage, offsets
9935 ret += "//\t\tstorage and offsets of accessed fields.\n";
9936 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
9938 sprintf(tmpstr,"unpack_var_%s_%d", mvars[0]->get_field().c_str(), tblref);
9939 ret+="\t"+dta.make_host_cvar(tmpstr)+";\n";
9940 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[0]->get_field().c_str(), tblref);
9943 sprintf(tmpstr,"unpack_var_%s_%d", mvars[1]->get_field().c_str(), tblref);
9944 ret+="\t"+dtb.make_host_cvar(tmpstr)+";\n";
9945 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[1]->get_field().c_str(), tblref);
9948 ret += "//\t\tRemember the last posted timestamp.\n";
9949 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_0")+";\n";
9950 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_1")+";\n";
9951 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9952 ret+="\t"+dta.make_host_cvar("slack")+";\n";
9953 // ret += "\t bool first_execution_0, first_execution_1;\n";
9955 // variables to hold parameters.
9956 ret += "//\tfor query parameters\n";
9957 ret += generate_param_vars(param_tbl);
9960 //-------------------
9961 // The functor constructor
9962 // pass in a schema handle (e.g. for the 1st input stream),
9963 // use it to determine how to unpack the merge variable.
9964 // ASSUME that both streams have the same layout,
9965 // just duplicate it.
9968 ret += "//\t\tFunctor constructor.\n";
9969 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9971 // var to save the schema handle
9972 ret += "\tthis->schema_handle0 = schema_handle0;\n";
9973 ret += "\ttuple_metadata_offset0=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9974 ret += "\ttuple_metadata_offset1=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9976 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9978 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", mvars[0]->get_field().c_str(), 0,mvars[0]->get_field().c_str());
9980 sprintf(tmpstr,"\tunpack_offset_%s_%d = unpack_offset_%s_%d;\n",mvars[1]->get_field().c_str(), 1,mvars[0]->get_field().c_str(), 0);
9982 // ret+="\tfirst_execution_0 = first_execution_1 = true;\n";
9984 ret+="\tslack = "+generate_se_code(slack,schema)+";\n";
9986 ret+="\tslack = 0;\n";
9988 // Initialize internal state
9989 ret += "\ttemp_tuple_received = false;\n";
9991 // Init last timestamp values to minimum value for their type
9992 if (dta.is_increasing())
9993 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_min_literal() + ";\n";
9995 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_max_literal() + ";\n";
10000 ret += "//\t\tFunctor destructor.\n";
10001 ret += "~"+this->generate_functor_name()+"(){\n";
10003 // Destroy the parameters, if any need to be destroyed
10004 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10009 // no pass-by-handle params.
10010 vector<handle_param_tbl_entry *> param_handle_table;
10012 // Parameter manipulation routines
10013 ret += generate_load_param_block(this->generate_functor_name(),
10014 this->param_tbl,param_handle_table);
10015 ret += generate_delete_param_block(this->generate_functor_name(),
10016 this->param_tbl,param_handle_table);
10018 // Register new parameter block
10020 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10021 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10022 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10027 // -----------------------------------
10030 string unpack_fcna;
10031 if(needs_xform[0]) unpack_fcna = dta.get_hfta_unpack_fcn();
10032 else unpack_fcna = dta.get_hfta_unpack_fcn_noxf();
10033 string unpack_fcnb;
10034 if(needs_xform[1]) unpack_fcnb = dtb.get_hfta_unpack_fcn();
10035 else unpack_fcnb = dtb.get_hfta_unpack_fcn_noxf();
10038 ret+="\tint compare(const host_tuple& tup1, const host_tuple& tup2) const{ \n";
10039 ret+="\t"+dta.make_host_cvar("timestamp1")+";\n";
10040 ret+="\t"+dta.make_host_cvar("timestamp2")+";\n";
10041 ret+="\tgs_int32_t problem;\n";
10042 ret+="\tif (tup1.channel == 0) {\n";
10043 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10045 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10048 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 1);
10050 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 0);
10054 " if (timestamp1 > timestamp2+slack)\n"
10056 " else if (timestamp1 < timestamp2)\n"
10065 " void get_timestamp(const host_tuple& tup0){\n"
10066 " gs_int32_t problem;\n"
10068 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10077 // Compare to temp status.
10079 " int compare_with_temp_status(int channel) {\n"
10080 " // check if tuple is temp status tuple\n"
10082 " if (channel == 0) {\n"
10083 //" if(first_execution_0) return 1;\n"
10084 " if (timestamp == last_posted_timestamp_0)\n"
10086 " else if (timestamp < last_posted_timestamp_0)\n"
10091 //" if(first_execution_1) return 1;\n"
10092 " if (timestamp == last_posted_timestamp_1)\n"
10094 " else if (timestamp < last_posted_timestamp_1)\n"
10103 " int compare_stored_with_temp_status(const host_tuple& tup0, int channel)/* const*/ {\n"
10105 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10106 ret+="\tgs_int32_t problem;\n";
10108 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10110 ret+="\tif (channel == 0) {\n";
10111 // ret+="\tif(first_execution_0) return 1;\n";
10113 " if (l_timestamp == last_posted_timestamp_0)\n"
10115 " else if (l_timestamp < last_posted_timestamp_0)\n"
10120 // ret+="\tif(first_execution_1) return 1;\n";
10122 " if (l_timestamp == last_posted_timestamp_1)\n"
10124 " else if (l_timestamp < last_posted_timestamp_1)\n"
10132 // update temp status.
10134 " int update_temp_status(const host_tuple& tup) {\n"
10135 " if (tup.channel == 0) {\n"
10136 " last_posted_timestamp_0=timestamp;\n"
10137 //" first_execution_0 = false;\n"
10139 " last_posted_timestamp_1=timestamp;\n"
10140 //" first_execution_1 = false;\n"
10146 " int update_stored_temp_status(const host_tuple& tup, int channel) {\n"
10148 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10149 ret+="\tgs_int32_t problem;\n";
10150 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10153 " if (tup.channel == 0) {\n"
10154 " last_posted_timestamp_0=l_timestamp;\n"
10155 //" first_execution_0 = false;\n"
10157 " last_posted_timestamp_1=l_timestamp;\n"
10158 //" first_execution_1 = false;\n"
10164 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10165 ret+="\tgs_int32_t problem;\n";
10166 ret+="\tif (tup.channel == 0) {\n";
10167 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10170 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10173 ret+="\tif (tup.channel == 0) {\n";
10174 ret+="\tlast_posted_timestamp_0=timestamp;\n";
10175 ret +="\tfirst_execution_0 = false;\n";
10177 ret+="\tlast_posted_timestamp_1=timestamp;\n";
10178 ret +="\tfirst_execution_1 = false;\n";
10185 // update temp status modulo slack.
10186 ret+="\tint update_temp_status_by_slack(const host_tuple& tup, int channel) {\n";
10188 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10189 ret+="\tgs_int32_t problem;\n";
10190 ret+="\tif (tup.channel == 0) {\n";
10191 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10194 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10198 " if (channel == 0) {\n"
10199 " if(first_execution_0){\n"
10200 " last_posted_timestamp_0=timestamp - slack;\n"
10201 " first_execution_0 = false;\n"
10203 " if(last_posted_timestamp_0 < timestamp-slack)\n"
10204 " last_posted_timestamp_0 = timestamp-slack;\n"
10207 " if(first_execution_1){\n"
10208 " last_posted_timestamp_1=timestamp - slack;\n"
10209 " first_execution_1 = false;\n"
10211 " if(last_posted_timestamp_1 < timestamp-slack)\n"
10212 " last_posted_timestamp_1 = timestamp-slack;\n"
10226 "bool temp_status_received(const host_tuple& tup0){\n"
10227 " return ftaschema_is_temporal_tuple_offset(tuple_metadata_offset0, tup0.data);\n"
10230 //"bool temp_status_received(){return temp_tuple_received;};\n\n";
10233 // create a temp status tuple
10234 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
10236 ret += gen_init_temp_status_tuple(this->get_node_name());
10239 ret += "//\t\tPack the fields into the tuple.\n";
10241 string fld_name = mvars[0]->get_field();
10242 int idx = table_layout->get_field_idx(fld_name);
10243 field_entry* fld = table_layout->get_field(idx);
10244 data_type dt(fld->get_type());
10246 // if (needs_xform[0] && needs_xform[1] && dt.needs_hn_translation())
10247 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s((last_posted_timestamp_0 < last_posted_timestamp_1) ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx, dt.hton_translation().c_str());
10249 sprintf(tmpstr,"\ttuple->tuple_var%d = (last_posted_timestamp_0 < last_posted_timestamp_1 ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx);
10253 ret += "\treturn 0;\n";
10256 // Transform tuple (before output)
10259 ret += "void xform_tuple(host_tuple &tup){\n";
10260 if((needs_xform[0] && !needs_xform[1]) || (needs_xform[1] && !needs_xform[0])){
10261 ret += "\tstruct "+generate_tuple_name(this->get_node_name())+" *tuple = ("+
10262 generate_tuple_name(this->get_node_name())+" *)(tup.data);\n";
10264 vector<field_entry *> flds = table_layout->get_fields();
10266 ret+="\tif(tup.channel == 0){\n";
10267 if(needs_xform[0] && !needs_xform[1]){
10269 for(f=0;f<flds.size();f++){
10271 data_type dt(flds[f]->get_type());
10272 if(dt.get_type() == v_str_t){
10273 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10275 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10277 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10280 if(dt.needs_hn_translation()){
10281 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10282 // f, dt.hton_translation().c_str(), f);
10288 ret += "\t\treturn;\n";
10290 ret.append("\t}\n");
10293 ret+="\tif(tup.channel == 1){\n";
10294 if(needs_xform[1] && !needs_xform[0]){
10296 for(f=0;f<flds.size();f++){
10298 data_type dt(flds[f]->get_type());
10299 if(dt.get_type() == v_str_t){
10300 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10302 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10304 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10307 if(dt.needs_hn_translation()){
10308 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10309 // f, dt.hton_translation().c_str(), f);
10315 ret += "\t\treturn;\n";
10317 ret.append("\t}\n");
10320 ret.append("};\n\n");
10322 // print_warnings() : tell the functor if the user wants to print warnings.
10323 ret += "bool print_warnings(){\n";
10324 if(definitions.count("print_warnings") && (
10325 definitions["print_warnings"] == "yes" ||
10326 definitions["print_warnings"] == "Yes" ||
10327 definitions["print_warnings"] == "YES" )) {
10328 ret += "return true;\n";
10330 ret += "return false;\n";
10332 ret.append("};\n\n");
10335 // Done with methods.
10342 string mrg_qpn::generate_operator(int i, string params){
10346 " merge_operator<" +
10347 generate_functor_name()+
10348 "> *op"+int_to_string(i)+" = new merge_operator<"+
10349 generate_functor_name()+
10350 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10354 " merge_operator_oop<" +
10355 generate_functor_name()+
10356 "> *op"+int_to_string(i)+" = new merge_operator_oop<"+
10357 generate_functor_name()+
10358 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10362 ////////////////////////////////////////////////
10363 /// WATCHLIST_TBL operator
10364 /// WATCHLIST_TBL functor
10365 ////////////////////////////////////////////
10367 string watch_tbl_qpn::generate_functor_name(){
10368 return("watch_tbl_functor_" + normalize_name(this->get_node_name()));
10371 string watch_tbl_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10373 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10376 string watch_tbl_qpn::generate_operator(int i, string params){
10377 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10380 /////////////////////////////////////////////////////////
10381 ////// JOIN_EQ_HASH functor
10384 string join_eq_hash_qpn::generate_functor_name(){
10385 return("join_eq_hash_functor_" + normalize_name(this->get_node_name()));
10388 string join_eq_hash_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10390 vector<data_type *> hashkey_dt; // data types in the hash key
10391 vector<data_type *> temporal_dt; // data types in the temporal key
10392 map<string,scalarexp_t *> l_equiv, r_equiv; // field equivalences
10393 set<int> pfcn_refs;
10394 col_id_set new_cids, local_cids;
10396 //--------------------------------
10399 string plus_op = "+";
10401 //--------------------------------
10402 // key definition class
10403 string ret = "class " + generate_functor_name() + "_keydef{\n";
10404 ret += "public:\n";
10405 // Collect attributes from hash join predicates.
10406 // ASSUME equality predicate.
10407 // Use the upwardly compatible data type
10408 // (infer from '+' operator if possible, else use left type)
10409 for(p=0;p<this->hash_eq.size();++p){
10410 scalarexp_t *lse = hash_eq[p]->pr->get_left_se();
10411 scalarexp_t *rse = hash_eq[p]->pr->get_right_se();
10412 data_type *hdt = new data_type(
10413 lse->get_data_type(), rse->get_data_type(), plus_op );
10414 if(hdt->get_type() == undefined_t){
10415 hashkey_dt.push_back(lse->get_data_type()->duplicate());
10418 hashkey_dt.push_back(hdt);
10420 sprintf(tmpstr,"hashkey_var%d",p);
10421 ret+="\t"+hashkey_dt[p]->make_host_cvar(tmpstr)+";\n";
10423 // find equivalences
10424 // NOTE: this code needs to be synched with the temporality
10425 // checking done at join_eq_hash_qpn::get_fields
10426 if(lse->get_operator_type()==SE_COLREF){
10427 l_equiv[lse->get_colref()->get_field()] = rse;
10429 if(rse->get_operator_type()==SE_COLREF){
10430 r_equiv[rse->get_colref()->get_field()] = lse;
10433 ret += "\tbool touched;\n";
10436 ret += "\t"+generate_functor_name() + "_keydef(){touched=false;};\n";
10438 ret += "\t~"+ generate_functor_name() + "_keydef(){\n";
10439 for(p=0;p<hashkey_dt.size();p++){
10440 if(hashkey_dt[p]->is_buffer_type()){
10441 sprintf(tmpstr,"\t\t%s(&hashkey_var%d);\n",
10442 hashkey_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10447 ret+="\tvoid touch(){touched = true;};\n";
10448 ret+="\tbool is_touched(){return touched;};\n";
10452 //--------------------------------
10453 // temporal equality definition class
10454 ret += "class " + generate_functor_name() + "_tempeqdef{\n";
10455 ret += "public:\n";
10456 // Collect attributes from hash join predicates.
10457 // ASSUME equality predicate.
10458 // Use the upwardly compatible date type
10459 // (infer from '+' operator if possible, else use left type)
10460 for(p=0;p<this->temporal_eq.size();++p){
10461 scalarexp_t *lse = temporal_eq[p]->pr->get_left_se();
10462 scalarexp_t *rse = temporal_eq[p]->pr->get_right_se();
10463 data_type *hdt = new data_type(
10464 lse->get_data_type(), rse->get_data_type(), plus_op );
10465 if(hdt->get_type() == undefined_t){
10466 temporal_dt.push_back(hash_eq[p]->pr->get_left_se()->get_data_type()->duplicate());
10469 temporal_dt.push_back(hdt);
10471 sprintf(tmpstr,"tempeq_var%d",p);
10472 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
10473 // find equivalences
10474 if(lse->get_operator_type()==SE_COLREF){
10475 l_equiv[lse->get_colref()->get_field()] = rse;
10477 if(rse->get_operator_type()==SE_COLREF){
10478 r_equiv[rse->get_colref()->get_field()] = lse;
10483 ret += "\t"+generate_functor_name() + "_tempeqdef(){};\n";
10485 ret += "\t~"+ generate_functor_name() + "_tempeqdef(){\n";
10486 for(p=0;p<temporal_dt.size();p++){
10487 if(temporal_dt[p]->is_buffer_type()){
10488 sprintf(tmpstr,"\t\t%s(&tempeq_var%d);\n",
10489 temporal_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10497 //--------------------------------
10498 // temporal eq, hash join functor class
10499 ret += "class " + this->generate_functor_name() + "{\n";
10501 // Find variables referenced in this query node.
10503 col_id_set cid_set;
10504 col_id_set::iterator csi;
10506 for(p=0;p<where.size();++p)
10507 gather_pr_col_ids(where[p]->pr,cid_set,NULL);
10508 for(s=0;s<select_list.size();s++)
10509 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
10511 // Private variables : store the state of the functor.
10512 // 1) variables for unpacked attributes
10513 // 2) offsets of the upacked attributes
10514 // 3) storage of partial functions
10515 // 4) storage of complex literals (i.e., require a constructor)
10517 ret += "private:\n";
10519 // var to save the schema handles
10520 ret += "\tint schema_handle0;\n";
10521 ret += "\tint schema_handle1;\n";
10523 // generate the declaration of all the variables related to
10524 // temp tuples generation
10525 ret += gen_decl_temp_vars();
10526 // tuple metadata offsets
10527 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10529 // unpacked attribute storage, offsets
10530 ret += "//\t\tstorage and offsets of accessed fields.\n";
10531 ret += generate_access_vars(cid_set, schema);
10534 // Variables to store results of partial functions.
10535 // WARNING find_partial_functions modifies the SE
10536 // (it marks the partial function id).
10537 ret += "//\t\tParital function result storage\n";
10538 vector<scalarexp_t *> partial_fcns;
10539 vector<int> fcn_ref_cnt;
10540 vector<bool> is_partial_fcn;
10541 for(s=0;s<select_list.size();s++){
10542 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
10544 for(p=0;p<where.size();p++){
10545 find_partial_fcns_pr(where[p]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
10547 if(partial_fcns.size()>0){
10548 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
10549 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10552 // Complex literals (i.e., they need constructors)
10553 ret += "//\t\tComplex literal storage.\n";
10554 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10555 ret += generate_complex_lit_vars(complex_literals);
10556 // We need the following to handle strings in outer joins.
10557 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10558 ret += "\tstruct vstring EmptyString;\n";
10559 ret += "\tstruct hfta_ipv6_str EmptyIp6;\n";
10561 // Pass-by-handle parameters
10562 ret += "//\t\tPass-by-handle storage.\n";
10563 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10564 ret += generate_pass_by_handle_vars(param_handle_table);
10567 // variables to hold parameters.
10568 ret += "//\tfor query parameters\n";
10569 ret += generate_param_vars(param_tbl);
10572 ret += "\npublic:\n";
10573 //-------------------
10574 // The functor constructor
10575 // pass in the schema handle.
10576 // 1) make assignments to the unpack offset variables
10577 // 2) initialize the complex literals
10579 ret += "//\t\tFunctor constructor.\n";
10580 ret += this->generate_functor_name()+"(int schema_handle0, int schema_handle1){\n";
10582 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
10583 ret += "\t\tthis->schema_handle1 = schema_handle1;\n";
10584 // metadata offsets
10585 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10586 ret += "\ttuple_metadata_offset1 = ftaschema_get_tuple_metadata_offset(schema_handle1);\n";
10589 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10590 ret += gen_access_var_init(cid_set);
10592 // complex literals
10593 ret += "//\t\tInitialize complex literals.\n";
10594 ret += gen_complex_lit_init(complex_literals);
10595 // Initialize EmptyString to the ... empty string
10596 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10597 literal_t mtstr_lit("");
10598 ret += "\t" + mtstr_lit.to_hfta_C_code("&EmptyString")+";\n";
10599 literal_t mip6_lit("0:0:0:0:0:0:0:0",LITERAL_IPV6);
10600 ret += "\t" + mip6_lit.to_hfta_C_code("&EmptyIp6")+";\n";
10602 // Initialize partial function results so they can be safely GC'd
10603 ret += gen_partial_fcn_init(partial_fcns);
10605 // Initialize non-query-parameter parameter handles
10606 ret += gen_pass_by_handle_init(param_handle_table);
10608 // Init temporal attributes referenced in select list
10609 ret += gen_init_temp_vars(schema, select_list, NULL);
10616 //-------------------
10617 // Functor destructor
10618 ret += "//\t\tFunctor destructor.\n";
10619 ret += "~"+this->generate_functor_name()+"(){\n";
10621 // clean up buffer type complex literals
10622 ret += gen_complex_lit_dtr(complex_literals);
10624 // Deregister the pass-by-handle parameters
10625 ret += "/* register and de-register the pass-by-handle parameters */\n";
10626 ret += gen_pass_by_handle_dtr(param_handle_table);
10628 // clean up partial function results.
10629 ret += "/* clean up partial function storage */\n";
10630 ret += gen_partial_fcn_dtr(partial_fcns);
10632 // Destroy the parameters, if any need to be destroyed
10633 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10638 //-------------------
10639 // Parameter manipulation routines
10640 ret += generate_load_param_block(this->generate_functor_name(),
10641 this->param_tbl,param_handle_table);
10642 ret += generate_delete_param_block(this->generate_functor_name(),
10643 this->param_tbl,param_handle_table);
10645 //-------------------
10646 // Register new parameter block
10648 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10649 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10650 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10655 //-------------------
10656 // The create_key method.
10657 // Perform heap allocation.
10658 // ASSUME : the LHS of the preds reference channel 0 attributes
10659 // NOTE : it may fail if a partial function fails.
10661 ret += this->generate_functor_name()+"_keydef *create_key(host_tuple &tup, bool &failed){\n";
10662 // Variables for execution of the function.
10663 ret+="\t"+this->generate_functor_name()+"_keydef *retval = NULL;\n";
10664 ret+="\tgs_int32_t problem = 0;\n";
10666 // Assume unsuccessful completion
10667 ret+= "\tfailed = true;\n";
10669 // Switch the processing based on the channel
10670 ret+="\tif(tup.channel == 0){\n";
10671 ret+="// ------------ processing for channel 0\n";
10672 ret+="\t\thost_tuple &tup0 = tup;\n";
10673 // Gather partial fcns and colids ref'd by this branch
10675 new_cids.clear(); local_cids.clear();
10676 for(p=0;p<hash_eq.size();p++){
10677 collect_partial_fcns(hash_eq[p]->pr->get_left_se(), pfcn_refs);
10678 gather_se_col_ids(hash_eq[p]->pr->get_left_se(),local_cids,NULL);
10681 // Start by cleaning up partial function results
10682 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10683 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10685 // Evaluate the partial functions
10686 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10687 new_cids, NULL, "NULL", needs_xform);
10688 // test passed -- unpack remaining cids.
10689 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10691 // Alloc and load a key object
10692 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10693 for(p=0;p<hash_eq.size();p++){
10694 data_type *hdt = hash_eq[p]->pr->get_left_se()->get_data_type();
10695 if(hdt->is_buffer_type()){
10696 string vname = "tmp_keyvar"+int_to_string(p);
10697 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_left_se(),schema)+";\n";
10698 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10700 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10701 p,generate_se_code(hash_eq[p]->pr->get_left_se(),schema).c_str() );
10705 ret += "\t}else{\n";
10707 ret+="// ------------ processing for channel 1\n";
10708 ret+="\t\thost_tuple &tup1 = tup;\n";
10709 // Gather partial fcns and colids ref'd by this branch
10711 new_cids.clear(); local_cids.clear();
10712 for(p=0;p<hash_eq.size();p++){
10713 collect_partial_fcns(hash_eq[p]->pr->get_right_se(), pfcn_refs);
10714 gather_se_col_ids(hash_eq[p]->pr->get_right_se(),local_cids,NULL);
10717 // Start by cleaning up partial function results
10718 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10719 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10721 // Evaluate the partial functions
10722 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10723 new_cids, NULL, "NULL", needs_xform);
10725 // test passed -- unpack remaining cids.
10726 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10728 // Alloc and load a key object
10729 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10730 for(p=0;p<hash_eq.size();p++){
10731 data_type *hdt = hash_eq[p]->pr->get_right_se()->get_data_type();
10732 if(hdt->is_buffer_type()){
10733 string vname = "tmp_keyvar"+int_to_string(p);
10734 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_right_se(),schema)+";\n";
10735 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10737 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10738 p,generate_se_code(hash_eq[p]->pr->get_right_se(),schema).c_str() );
10744 ret += "\tfailed = false;\n";
10745 ret += "\t return retval;\n";
10749 //-------------------
10750 // The load_ts method.
10751 // load into an allocated buffer.
10752 // ASSUME : the LHS of the preds reference channel 0 attributes
10753 // NOTE : it may fail if a partial function fails.
10754 // NOTE : cann't handle buffer attributes
10756 ret += "bool load_ts_from_tup("+this->generate_functor_name()+"_tempeqdef *ts, host_tuple &tup){\n";
10757 // Variables for execution of the function.
10758 ret+="\tgs_int32_t problem = 0;\n";
10760 // Switch the processing based on the channel
10761 ret+="\tif(tup.channel == 0){\n";
10762 ret+="// ------------ processing for channel 0\n";
10763 ret+="\t\thost_tuple &tup0 = tup;\n";
10765 // Gather partial fcns and colids ref'd by this branch
10767 new_cids.clear(); local_cids.clear();
10768 for(p=0;p<temporal_eq.size();p++){
10769 collect_partial_fcns(temporal_eq[p]->pr->get_left_se(), pfcn_refs);
10770 gather_se_col_ids(temporal_eq[p]->pr->get_left_se(),local_cids,NULL);
10773 // Start by cleaning up partial function results
10774 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10775 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10777 // Evaluate the partial functions
10778 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10779 new_cids, NULL, "false", needs_xform);
10781 // test passed -- unpack remaining cids.
10782 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10784 // load the temporal key object
10785 for(p=0;p<temporal_eq.size();p++){
10786 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10787 p,generate_se_code(temporal_eq[p]->pr->get_left_se(),schema).c_str() );
10791 ret += "\t}else{\n";
10793 ret+="// ------------ processing for channel 1\n";
10794 ret+="\t\thost_tuple &tup1 = tup;\n";
10796 // Gather partial fcns and colids ref'd by this branch
10798 new_cids.clear(); local_cids.clear();
10799 for(p=0;p<temporal_eq.size();p++){
10800 collect_partial_fcns(temporal_eq[p]->pr->get_right_se(), pfcn_refs);
10801 gather_se_col_ids(temporal_eq[p]->pr->get_right_se(),local_cids,NULL);
10804 // Start by cleaning up partial function results
10805 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10806 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10808 // Evaluate the partial functions
10809 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10810 new_cids, NULL, "false", needs_xform);
10812 // test passed -- unpack remaining cids.
10813 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10815 // load the key object
10816 for(p=0;p<temporal_eq.size();p++){
10817 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10818 p,generate_se_code(temporal_eq[p]->pr->get_right_se(),schema).c_str() );
10824 ret += "\t return true;\n";
10828 // ------------------------------
10830 // (i.e make a copy)
10832 ret += "bool load_ts_from_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10833 for(p=0;p<temporal_eq.size();p++){
10834 sprintf(tmpstr,"\tlts->tempeq_var%d = rts->tempeq_var%d;\n",p,p);
10839 // -------------------------------------
10840 // compare_ts_to_ts
10841 // There should be only one variable to compare.
10842 // If there is more, assume an arbitrary lexicographic order.
10844 ret += "int compare_ts_with_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10845 for(p=0;p<temporal_eq.size();p++){
10846 sprintf(tmpstr,"\tif(lts->tempeq_var%d < rts->tempeq_var%d) return(-1);\n",p,p);
10848 sprintf(tmpstr,"\tif(lts->tempeq_var%d > rts->tempeq_var%d) return(1);\n",p,p);
10851 ret += "\treturn(0);\n";
10854 // ------------------------------------------
10856 // apply the prefilter
10858 ret += "bool apply_prefilter(host_tuple &tup){\n";
10860 // Variables for this procedure
10861 ret+="\tgs_int32_t problem = 0;\n";
10862 ret+="\tgs_retval_t retval;\n";
10864 // Switch the processing based on the channel
10865 ret+="\tif(tup.channel == 0){\n";
10866 ret+="// ------------ processing for channel 0\n";
10867 ret+="\t\thost_tuple &tup0 = tup;\n";
10868 // Gather partial fcns and colids ref'd by this branch
10870 new_cids.clear(); local_cids.clear();
10871 for(p=0;p<prefilter[0].size();p++){
10872 collect_partial_fcns_pr((prefilter[0])[p]->pr, pfcn_refs);
10875 // Start by cleaning up partial function results
10876 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10877 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10879 for(p=0;p<(prefilter[0]).size();++p){
10880 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10882 // Find the set of variables accessed in this CNF elem,
10883 // but in no previous element.
10884 col_id_set new_pr_cids;
10885 get_new_pred_cids((prefilter[0])[p]->pr,local_cids,new_pr_cids, NULL);
10886 // Unpack these values.
10887 ret += gen_unpack_cids(schema, new_pr_cids, "false", needs_xform);
10888 // Find partial fcns ref'd in this cnf element
10889 set<int> pr_pfcn_refs;
10890 collect_partial_fcns_pr((prefilter[0])[p]->pr, pr_pfcn_refs);
10891 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10893 ret += "\t\tif( !("+generate_predicate_code((prefilter[0])[p]->pr,schema)+") ) return(false);\n";
10895 ret += "\t}else{\n";
10896 ret+="// ------------ processing for channel 1\n";
10897 ret+="\t\thost_tuple &tup1 = tup;\n";
10898 // Gather partial fcns and colids ref'd by this branch
10900 new_cids.clear(); local_cids.clear();
10901 for(p=0;p<prefilter[1].size();p++){
10902 collect_partial_fcns_pr((prefilter[1])[p]->pr, pfcn_refs);
10905 // Start by cleaning up partial function results
10906 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10907 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10909 for(p=0;p<(prefilter[1]).size();++p){
10910 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10912 // Find the set of variables accessed in this CNF elem,
10913 // but in no previous element.
10914 col_id_set pr_new_cids;
10915 get_new_pred_cids((prefilter[1])[p]->pr,local_cids, pr_new_cids, NULL);
10916 // Unpack these values.
10917 ret += gen_unpack_cids(schema, pr_new_cids, "false", needs_xform);
10918 // Find partial fcns ref'd in this cnf element
10919 set<int> pr_pfcn_refs;
10920 collect_partial_fcns_pr((prefilter[1])[p]->pr, pr_pfcn_refs);
10921 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10923 ret += "\t\tif( !("+generate_predicate_code((prefilter[1])[p]->pr,schema)+ ") ) return(false);\n";
10927 ret+="\treturn true;\n";
10931 // -------------------------------------
10932 // create_output_tuple
10933 // If the postfilter on the pair of tuples passes,
10934 // create an output tuple from the combined information.
10935 // (Plus, outer join processing)
10937 ret += "host_tuple create_output_tuple(const host_tuple &tup0, const host_tuple &tup1, bool &failed){\n";
10939 ret += "\thost_tuple tup;\n";
10940 ret += "\tfailed = true;\n";
10941 ret += "\tgs_retval_t retval = 0;\n";
10942 ret += "\tgs_int32_t problem = 0;\n";
10944 // Start by cleaning up partial function results
10945 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10947 new_cids.clear(); local_cids.clear();
10948 for(p=0;p<postfilter.size();p++){
10949 collect_partial_fcns_pr(postfilter[p]->pr, pfcn_refs);
10951 for(s=0;s<select_list.size();s++){
10952 collect_partial_fcns(select_list[s]->se, pfcn_refs);
10954 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10957 ret+="\tif(tup0.data && tup1.data){\n";
10958 // Evaluate the postfilter
10959 new_cids.clear(); local_cids.clear();
10960 for(p=0;p<postfilter.size();p++){
10961 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10963 // Find the set of variables accessed in this CNF elem,
10964 // but in no previous element.
10965 col_id_set pr_new_cids;
10966 get_new_pred_cids(postfilter[p]->pr,local_cids, pr_new_cids, NULL);
10967 // Unpack these values.
10968 ret += gen_unpack_cids(schema, pr_new_cids, "tup", needs_xform);
10969 // Find partial fcns ref'd in this cnf element
10970 set<int> pr_pfcn_refs;
10971 collect_partial_fcns_pr(postfilter[p]->pr, pr_pfcn_refs);
10972 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"tup");
10974 ret += "\t\tif( !("+generate_predicate_code(postfilter[p]->pr,schema)+ ") ) return(tup);\n";
10978 // postfilter passed, evaluate partial functions for select list
10981 col_id_set se_cids;
10982 for(s=0;s<select_list.size();s++){
10983 collect_partial_fcns(select_list[s]->se, sl_pfcns);
10986 if(sl_pfcns.size() > 0)
10987 ret += "//\t\tUnpack remaining partial fcns.\n";
10988 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
10989 local_cids, NULL, "tup", needs_xform);
10991 // Unpack remaining fields
10992 ret += "//\t\tunpack any remaining fields from the input tuples.\n";
10993 for(s=0;s<select_list.size();s++)
10994 get_new_se_cids(select_list[s]->se, local_cids,se_cids,NULL);
10995 ret += gen_unpack_cids(schema, se_cids,"tup", needs_xform);
10998 // Deal with outer join stuff
10999 col_id_set l_cids, r_cids;
11000 col_id_set::iterator ocsi;
11001 for(ocsi=local_cids.begin();ocsi!=local_cids.end();++ocsi){
11002 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
11003 else r_cids.insert((*ocsi));
11005 for(ocsi=se_cids.begin();ocsi!=se_cids.end();++ocsi){
11006 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
11007 else r_cids.insert((*ocsi));
11010 ret += "\t}else if(tup0.data){\n";
11011 string unpack_null = ""; col_id_set extra_cids;
11012 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
11013 string field = (*ocsi).field;
11014 if(r_equiv.count(field)){
11015 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
11016 get_new_se_cids(r_equiv[field],l_cids,new_cids,NULL);
11018 int schref = (*ocsi).schema_ref;
11019 data_type dt(schema->get_type_name(schref,field));
11020 literal_t empty_lit(dt.type_indicator());
11021 if(empty_lit.is_cpx_lit()){
11022 // sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
11023 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11024 // NB : works for string type only
11025 // NNB: installed fix for ipv6, more of this should be pushed
11026 // into the literal_t code.
11027 unpack_null+="\tunpack_var_"+field+"_1= "+empty_lit.hfta_empty_literal_name()+";\n";
11029 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
11033 ret += gen_unpack_cids(schema, l_cids, "tup", needs_xform);
11034 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11035 ret += unpack_null;
11036 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11039 unpack_null = ""; extra_cids.clear();
11040 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
11041 string field = (*ocsi).field;
11042 if(l_equiv.count(field)){
11043 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
11044 get_new_se_cids(l_equiv[field],r_cids,new_cids,NULL);
11046 int schref = (*ocsi).schema_ref;
11047 data_type dt(schema->get_type_name(schref,field));
11048 literal_t empty_lit(dt.type_indicator());
11049 if(empty_lit.is_cpx_lit()){
11050 // sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
11051 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11052 // NB : works for string type only
11053 // NNB: installed fix for ipv6, more of this should be pushed
11054 // into the literal_t code.
11055 unpack_null+="\tunpack_var_"+field+"_0= "+empty_lit.hfta_empty_literal_name()+";\n";
11057 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
11061 ret += gen_unpack_cids(schema, r_cids, "tup", needs_xform);
11062 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11063 ret += unpack_null;
11064 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11069 // Unpack any BUFFER type selections into temporaries
11070 // so that I can compute their size and not have
11071 // to recompute their value during tuple packing.
11072 // I can use regular assignment here because
11073 // these temporaries are non-persistent.
11075 ret += "//\t\tCompute the size of the tuple.\n";
11076 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
11078 // Unpack all buffer type selections, to be able to compute their size
11079 ret += gen_buffer_selvars(schema, select_list);
11081 // The size of the tuple is the size of the tuple struct plus the
11082 // size of the buffers to be copied in.
11084 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
11085 ret += gen_buffer_selvars_size(select_list,schema);
11088 // Allocate tuple data block.
11089 ret += "//\t\tCreate the tuple block.\n";
11090 ret += "\ttup.data = malloc(tup.tuple_size);\n";
11091 ret += "\ttup.heap_resident = true;\n";
11092 // ret += "\ttup.channel = 0;\n";
11094 // Mark tuple as regular
11095 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
11098 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
11099 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
11102 // (Here, offsets are hard-wired. is this a problem?)
11104 ret += "//\t\tPack the fields into the tuple.\n";
11105 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
11107 // Delete string temporaries
11108 ret += gen_buffer_selvars_dtr(select_list);
11110 ret += "\tfailed = false;\n";
11111 ret += "\treturn tup;\n";
11116 //-----------------------------
11117 // Method for checking whether tuple is temporal
11119 ret += "bool temp_status_received(host_tuple &tup){\n";
11121 // Switch the processing based on the channel
11122 ret+="\tif(tup.channel == 0){\n";
11123 ret+="\t\thost_tuple &tup0 = tup;\n";
11124 ret += gen_temp_tuple_check(this->node_name, 0);
11125 ret += "\t}else{\n";
11126 ret+="\t\thost_tuple &tup1 = tup;\n";
11127 ret += gen_temp_tuple_check(this->node_name, 1);
11129 ret += "\treturn temp_tuple_received;\n};\n\n";
11132 //-------------------------------------------------------------------
11133 // Temporal update functions
11136 // create a temp status tuple
11137 ret += "int create_temp_status_tuple(const host_tuple &tup0, const host_tuple &tup1, host_tuple& result) {\n\n";
11139 ret += "\tgs_retval_t retval = 0;\n";
11140 ret += "\tgs_int32_t problem = 0;\n";
11142 ret += "\tif(tup0.data){\n";
11144 // Unpack all the temporal attributes references in select list
11145 col_id_set found_cids;
11147 for(s=0;s<select_list.size();s++){
11148 if (select_list[s]->se->get_data_type()->is_temporal()) {
11149 // Find the set of attributes accessed in this SE
11150 col_id_set new_cids;
11151 get_new_se_cids(select_list[s]->se,found_cids, new_cids, NULL);
11155 // Deal with outer join stuff
11156 l_cids.clear(), r_cids.clear();
11157 for(ocsi=found_cids.begin();ocsi!=found_cids.end();++ocsi){
11158 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
11159 else r_cids.insert((*ocsi));
11162 extra_cids.clear();
11163 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
11164 string field = (*ocsi).field;
11165 if(r_equiv.count(field)){
11166 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
11167 col_id_set addnl_cids;
11168 get_new_se_cids(r_equiv[field],l_cids,addnl_cids,NULL);
11170 int schref = (*ocsi).schema_ref;
11171 data_type dt(schema->get_type_name(schref,field));
11172 literal_t empty_lit(dt.type_indicator());
11173 if(empty_lit.is_cpx_lit()){
11174 sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
11175 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11177 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
11181 ret += gen_unpack_cids(schema, l_cids, "1", needs_xform);
11182 ret += gen_unpack_cids(schema, extra_cids, "1", needs_xform);
11183 ret += unpack_null;
11185 ret+="\t}else if (tup1.data) {\n";
11186 unpack_null = ""; extra_cids.clear();
11187 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
11188 string field = (*ocsi).field;
11189 if(l_equiv.count(field)){
11190 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
11191 col_id_set addnl_cids;
11192 get_new_se_cids(l_equiv[field],r_cids,addnl_cids,NULL);
11194 int schref = (*ocsi).schema_ref;
11195 data_type dt(schema->get_type_name(schref,field));
11196 literal_t empty_lit(dt.type_indicator());
11197 if(empty_lit.is_cpx_lit()){
11198 sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
11199 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11201 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
11205 ret += gen_unpack_cids(schema, r_cids, "1", needs_xform);
11206 ret += gen_unpack_cids(schema, extra_cids, "1", needs_xform);
11207 ret += unpack_null;
11210 ret += gen_init_temp_status_tuple(this->get_node_name());
11213 ret += "//\t\tPack the fields into the tuple.\n";
11214 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
11217 ret += "\treturn 0;\n";
11223 //----------------------------------------------------------
11224 // The hash function
11226 ret += "struct "+generate_functor_name()+"_hash_func{\n";
11227 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
11228 "_keydef *key) const{\n";
11229 ret += "\t\treturn( (";
11230 if(hashkey_dt.size() > 0){
11231 for(p=0;p<hashkey_dt.size();p++){
11232 if(p>0) ret += "^";
11233 if(hashkey_dt[p]->use_hashfunc()){
11234 // sprintf(tmpstr,"%s(&(key->hashkey_var%d))",hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11235 if(hashkey_dt[p]->is_buffer_type())
11236 sprintf(tmpstr,"(%s*%s(&(key->hashkey_var%d)))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11238 sprintf(tmpstr,"(%s*%s(key->hashkey_var%d))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11240 sprintf(tmpstr,"(%s*key->hashkey_var%d)",hash_nums[p%NRANDS].c_str(),p);
11247 ret += ") >> 32);\n";
11251 //----------------------------------------------------------
11252 // The comparison function
11254 ret += "struct "+generate_functor_name()+"_equal_func{\n";
11255 ret += "\tbool operator()(const "+generate_functor_name()+"_keydef *key1, "+
11256 generate_functor_name()+"_keydef *key2) const{\n";
11257 ret += "\t\treturn( (";
11258 if(hashkey_dt.size() > 0){
11259 for(p=0;p<hashkey_dt.size();p++){
11260 if(p>0) ret += ") && (";
11261 if(hashkey_dt[p]->complex_comparison(hashkey_dt[p])){
11262 if(hashkey_dt[p]->is_buffer_type())
11263 sprintf(tmpstr,"(%s(&(key1->hashkey_var%d), &(key2->hashkey_var%d))==0)",
11264 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
11266 sprintf(tmpstr,"(%s((key1->hashkey_var%d), (key2->hashkey_var%d))==0)",
11267 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
11269 sprintf(tmpstr,"key1->hashkey_var%d == key2->hashkey_var%d",p,p);
11286 string join_eq_hash_qpn::generate_operator(int i, string params){
11289 " join_eq_hash_operator<" +
11290 generate_functor_name()+ ","+
11291 generate_functor_name() + "_tempeqdef,"+
11292 generate_functor_name() + "_keydef,"+
11293 generate_functor_name()+"_hash_func,"+
11294 generate_functor_name()+"_equal_func"
11295 "> *op"+int_to_string(i)+" = new join_eq_hash_operator<"+
11296 generate_functor_name()+","+
11297 generate_functor_name() + "_tempeqdef,"+
11298 generate_functor_name() + "_keydef,"+
11299 generate_functor_name()+"_hash_func,"+
11300 generate_functor_name()+"_equal_func"
11302 int_to_string(from[0]->get_property()+2*from[1]->get_property())+", \"" + get_node_name() +
11309 ////////////////////////////////////////////////////////////////
11310 //// SGAHCWCB functor
11314 string sgahcwcb_qpn::generate_functor_name(){
11315 return("sgahcwcb_functor_" + normalize_name(this->get_node_name()));
11319 string sgahcwcb_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
11323 // Initialize generate utility globals
11324 segen_gb_tbl = &(gb_tbl);
11327 //--------------------------------
11328 // group definition class
11329 string ret = "class " + generate_functor_name() + "_groupdef{\n";
11330 ret += "public:\n";
11331 ret += "\tbool valid;\n";
11332 for(g=0;g<this->gb_tbl.size();g++){
11333 sprintf(tmpstr,"gb_var%d",g);
11334 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11337 ret += "\t"+generate_functor_name() + "_groupdef(){valid=true;};\n";
11338 ret += "\t"+generate_functor_name() + "_groupdef("+
11339 this->generate_functor_name() + "_groupdef *gd){\n";
11340 for(g=0;g<gb_tbl.size();g++){
11341 data_type *gdt = gb_tbl.get_data_type(g);
11342 if(gdt->is_buffer_type()){
11343 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
11344 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
11347 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
11351 ret += "\tvalid=true;\n";
11354 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
11355 for(g=0;g<gb_tbl.size();g++){
11356 data_type *gdt = gb_tbl.get_data_type(g);
11357 if(gdt->is_buffer_type()){
11358 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
11359 gdt->get_hfta_buffer_destroy().c_str(), g );
11366 //--------------------------------
11367 // aggr definition class
11368 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
11369 ret += "public:\n";
11370 for(a=0;a<aggr_tbl.size();a++){
11371 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11372 sprintf(tmpstr,"aggr_var%d",a);
11373 if(aggr_tbl.is_builtin(a))
11374 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11376 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11379 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
11381 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
11382 for(a=0;a<aggr_tbl.size();a++){
11383 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11384 if(aggr_tbl.is_builtin(a)){
11385 data_type *adt = aggr_tbl.get_data_type(a);
11386 if(adt->is_buffer_type()){
11387 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11388 adt->get_hfta_buffer_destroy().c_str(), a );
11392 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11393 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11394 ret+="(aggr_var"+int_to_string(a)+"));\n";
11400 //--------------------------------
11401 // superaggr definition class
11402 ret += "class " + this->generate_functor_name() + "_statedef{\n";
11403 ret += "public:\n";
11404 for(a=0;a<aggr_tbl.size();a++){
11405 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11406 if(ate->is_superaggr()){
11407 sprintf(tmpstr,"aggr_var%d",a);
11408 if(aggr_tbl.is_builtin(a))
11409 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11411 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11414 set<string>::iterator ssi;
11415 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11416 string state_nm = (*ssi);
11417 int state_id = Ext_fcns->lookup_state(state_nm);
11418 data_type *dt = Ext_fcns->get_storage_dt(state_id);
11419 string state_var = "state_var_"+state_nm;
11420 ret += "\t"+dt->make_host_cvar(state_var)+";\n";
11423 ret += "\t"+this->generate_functor_name() + "_statedef(){};\n";
11425 ret += "\t~"+this->generate_functor_name() + "_statedef(){\n";
11426 for(a=0;a<aggr_tbl.size();a++){
11427 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11428 if(ate->is_superaggr()){
11429 if(aggr_tbl.is_builtin(a)){
11430 data_type *adt = aggr_tbl.get_data_type(a);
11431 if(adt->is_buffer_type()){
11432 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11433 adt->get_hfta_buffer_destroy().c_str(), a );
11437 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11438 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11439 ret+="(aggr_var"+int_to_string(a)+"));\n";
11443 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11444 string state_nm = (*ssi);
11445 int state_id = Ext_fcns->lookup_state(state_nm);
11446 string state_var = "state_var_"+state_nm;
11447 ret += "\t_sfun_state_destroy_"+state_nm+"(&"+state_var+");\n";
11454 //--------------------------------
11455 // gb functor class
11456 ret += "class " + this->generate_functor_name() + "{\n";
11458 // Find variables referenced in this query node.
11460 col_id_set cid_set;
11461 col_id_set::iterator csi;
11463 for(w=0;w<where.size();++w)
11464 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
11465 for(w=0;w<having.size();++w)
11466 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
11467 for(w=0;w<cleanby.size();++w)
11468 gather_pr_col_ids(cleanby[w]->pr,cid_set,segen_gb_tbl);
11469 for(w=0;w<cleanwhen.size();++w)
11470 gather_pr_col_ids(cleanwhen[w]->pr,cid_set,segen_gb_tbl);
11471 for(g=0;g<gb_tbl.size();g++)
11472 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
11474 for(s=0;s<select_list.size();s++){
11475 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
11479 // Private variables : store the state of the functor.
11480 // 1) variables for unpacked attributes
11481 // 2) offsets of the upacked attributes
11482 // 3) storage of partial functions
11483 // 4) storage of complex literals (i.e., require a constructor)
11485 ret += "private:\n";
11487 // var to save the schema handle
11488 ret += "\tint schema_handle0;\n";
11490 // generate the declaration of all the variables related to
11491 // temp tuples generation
11492 ret += gen_decl_temp_vars();
11494 // unpacked attribute storage, offsets
11495 ret += "//\t\tstorage and offsets of accessed fields.\n";
11496 ret += generate_access_vars(cid_set, schema);
11497 // tuple metadata offset
11498 ret += "\ttuple_metadata_offset0;\n";
11500 // Variables to store results of partial functions.
11501 // WARNING find_partial_functions modifies the SE
11502 // (it marks the partial function id).
11503 ret += "//\t\tParital function result storage\n";
11504 vector<scalarexp_t *> partial_fcns;
11505 vector<int> fcn_ref_cnt;
11506 vector<bool> is_partial_fcn;
11507 for(s=0;s<select_list.size();s++){
11508 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
11510 for(w=0;w<where.size();w++){
11511 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11513 for(w=0;w<having.size();w++){
11514 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11516 for(w=0;w<cleanby.size();w++){
11517 find_partial_fcns_pr(cleanby[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11519 for(w=0;w<cleanwhen.size();w++){
11520 find_partial_fcns_pr(cleanwhen[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11522 for(g=0;g<gb_tbl.size();g++){
11523 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
11525 for(a=0;a<aggr_tbl.size();a++){
11526 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
11528 if(partial_fcns.size()>0){
11529 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
11530 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
11533 // Complex literals (i.e., they need constructors)
11534 ret += "//\t\tComplex literal storage.\n";
11535 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
11536 ret += generate_complex_lit_vars(complex_literals);
11538 // Pass-by-handle parameters
11539 ret += "//\t\tPass-by-handle storage.\n";
11540 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
11541 ret += generate_pass_by_handle_vars(param_handle_table);
11543 // Create cached temporaries for UDAF return values.
11544 ret += "//\t\tTemporaries for UDAF return values.\n";
11545 for(a=0;a<aggr_tbl.size();a++){
11546 if(! aggr_tbl.is_builtin(a)){
11547 int afcn_id = aggr_tbl.get_fcn_id(a);
11548 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11549 sprintf(tmpstr,"udaf_ret_%d", a);
11550 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11556 // variables to hold parameters.
11557 ret += "//\tfor query parameters\n";
11558 ret += generate_param_vars(param_tbl);
11560 // Is there a temporal flush? If so create flush temporaries,
11561 // create flush indicator.
11562 bool uses_temporal_flush = false;
11563 for(g=0;g<gb_tbl.size();g++){
11564 data_type *gdt = gb_tbl.get_data_type(g);
11565 if(gdt->is_temporal())
11566 uses_temporal_flush = true;
11569 if(uses_temporal_flush){
11570 ret += "//\t\tFor temporal flush\n";
11571 for(g=0;g<gb_tbl.size();g++){
11572 data_type *gdt = gb_tbl.get_data_type(g);
11573 if(gdt->is_temporal()){
11574 sprintf(tmpstr,"last_gb%d",g);
11575 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11576 sprintf(tmpstr,"last_flushed_gb%d",g);
11577 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11580 ret += "\tbool needs_temporal_flush;\n";
11583 // The publicly exposed functions
11585 ret += "\npublic:\n";
11588 //-------------------
11589 // The functor constructor
11590 // pass in the schema handle.
11591 // 1) make assignments to the unpack offset variables
11592 // 2) initialize the complex literals
11594 ret += "//\t\tFunctor constructor.\n";
11595 ret += this->generate_functor_name()+"(int schema_handle0){\n";
11597 // save the schema handle
11598 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
11599 // tuple metadata offset
11600 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
11603 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
11604 ret += gen_access_var_init(cid_set);
11606 // aggregate return vals : refd in both final_sample
11607 // and create_output_tuple
11608 // Create cached temporaries for UDAF return values.
11609 for(a=0;a<aggr_tbl.size();a++){
11610 if(! aggr_tbl.is_builtin(a)){
11611 int afcn_id = aggr_tbl.get_fcn_id(a);
11612 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11613 sprintf(tmpstr,"udaf_ret_%d", a);
11614 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11618 // complex literals
11619 ret += "//\t\tInitialize complex literals.\n";
11620 ret += gen_complex_lit_init(complex_literals);
11622 // Initialize partial function results so they can be safely GC'd
11623 ret += gen_partial_fcn_init(partial_fcns);
11625 // Initialize non-query-parameter parameter handles
11626 ret += gen_pass_by_handle_init(param_handle_table);
11628 // temporal flush variables
11629 // ASSUME that structured values won't be temporal.
11630 if(uses_temporal_flush){
11631 ret += "//\t\tInitialize temporal flush variables.\n";
11632 for(g=0;g<gb_tbl.size();g++){
11633 data_type *gdt = gb_tbl.get_data_type(g);
11634 if(gdt->is_temporal()){
11635 literal_t gl(gdt->type_indicator());
11636 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
11637 ret.append(tmpstr);
11640 ret += "\tneeds_temporal_flush = false;\n";
11643 // Init temporal attributes referenced in select list
11644 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
11649 //-------------------
11650 // Functor destructor
11651 ret += "//\t\tFunctor destructor.\n";
11652 ret += "~"+this->generate_functor_name()+"(){\n";
11654 // clean up buffer type complex literals
11655 ret += gen_complex_lit_dtr(complex_literals);
11657 // Deregister the pass-by-handle parameters
11658 ret += "/* register and de-register the pass-by-handle parameters */\n";
11659 ret += gen_pass_by_handle_dtr(param_handle_table);
11661 // clean up partial function results.
11662 ret += "/* clean up partial function storage */\n";
11663 ret += gen_partial_fcn_dtr(partial_fcns);
11665 // Destroy the parameters, if any need to be destroyed
11666 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11671 //-------------------
11672 // Parameter manipulation routines
11673 ret += generate_load_param_block(this->generate_functor_name(),
11674 this->param_tbl,param_handle_table);
11675 ret += generate_delete_param_block(this->generate_functor_name(),
11676 this->param_tbl,param_handle_table);
11678 //-------------------
11679 // Register new parameter block
11681 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
11682 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11683 ret += "\treturn this->load_params_"+this->generate_functor_name()+
11687 //-------------------
11688 // the create_group method.
11689 // This method creates a group in a buffer passed in
11690 // (to allow for creation on the stack).
11691 // There are also a couple of side effects:
11692 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11693 // 2) determine if a temporal flush is required.
11695 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
11696 // Variables for execution of the function.
11697 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11699 if(partial_fcns.size()>0){ // partial fcn access failure
11700 ret += "\tgs_retval_t retval = 0;\n";
11704 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
11705 "_groupdef *) buffer;\n";
11707 // Start by cleaning up partial function results
11708 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11710 set<int> gb_pfcns; // partial fcns in gbdefs, aggr se's
11711 for(g=0;g<gb_tbl.size();g++){
11712 collect_partial_fcns(gb_tbl.get_def(g), gb_pfcns);
11714 ret += gen_partial_fcn_dtr(partial_fcns,gb_pfcns);
11715 // ret += gen_partial_fcn_dtr(partial_fcns);
11718 ret += gen_temp_tuple_check(this->node_name, 0);
11719 col_id_set found_cids; // colrefs unpacked thus far.
11720 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
11724 // Save temporal group-by variables
11727 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
11729 for(g=0;g<gb_tbl.size();g++){
11731 data_type *gdt = gb_tbl.get_data_type(g);
11733 if(gdt->is_temporal()){
11734 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11735 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11736 ret.append(tmpstr);
11743 // Compare the temporal GB vars with the stored ones,
11744 // set flush indicator and update stored GB vars if there is any change.
11746 if(uses_temporal_flush){
11747 ret+= "\tif( !( (";
11748 bool first_one = true;
11749 for(g=0;g<gb_tbl.size();g++){
11750 data_type *gdt = gb_tbl.get_data_type(g);
11752 if(gdt->is_temporal()){
11753 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
11754 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
11755 if(first_one){first_one = false;} else {ret += ") && (";}
11756 ret += generate_equality_test(lhs_op, rhs_op, gdt);
11760 for(g=0;g<gb_tbl.size();g++){
11761 data_type *gdt = gb_tbl.get_data_type(g);
11762 if(gdt->is_temporal()){
11763 if(gdt->is_buffer_type()){
11764 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
11766 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
11768 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
11774 if(uses_temporal_flush){
11775 for(g=0;g<gb_tbl.size();g++){
11776 data_type *gdt = gb_tbl.get_data_type(g);
11777 if(gdt->is_temporal()){
11778 ret+="if(last_flushed_gb"+int_to_string(g)+">0)\n";
11784 ret += "\t\tneeds_temporal_flush=true;\n";
11785 ret += "\t\t}else{\n"
11786 "\t\t\tneeds_temporal_flush=false;\n"
11791 // For temporal status tuple we don't need to do anything else
11792 ret += "\tif (temp_tuple_received) return NULL;\n\n";
11795 // The partial functions ref'd in the group-by var
11796 // definitions must be evaluated. If one returns false,
11797 // then implicitly the predicate is false.
11798 set<int>::iterator pfsi;
11800 if(gb_pfcns.size() > 0)
11801 ret += "//\t\tUnpack partial fcns.\n";
11802 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, gb_pfcns,
11803 found_cids, segen_gb_tbl, "NULL", needs_xform);
11805 // Unpack the group-by variables
11807 for(g=0;g<gb_tbl.size();g++){
11808 // Find the new fields ref'd by this GBvar def.
11809 col_id_set new_cids;
11810 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
11811 // Unpack these values.
11812 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
11814 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11815 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11817 // There seems to be no difference between the two
11818 // branches of the IF statement.
11819 data_type *gdt = gb_tbl.get_data_type(g);
11820 if(gdt->is_buffer_type()){
11821 // Create temporary copy.
11822 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11823 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11825 scalarexp_t *gse = gb_tbl.get_def(g);
11826 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11827 g,generate_se_code(gse,schema).c_str());
11830 ret.append(tmpstr);
11835 ret+= "\treturn gbval;\n";
11840 //-------------------
11841 // the create_group method.
11842 // This method creates a group in a buffer passed in
11843 // (to allow for creation on the stack).
11844 // There are also a couple of side effects:
11845 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11846 // 2) determine if a temporal flush is required.
11848 ret += "bool evaluate_predicate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, int cd){\n";
11849 // Variables for execution of the function.
11850 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11852 if(partial_fcns.size()>0){ // partial fcn access failure
11853 ret += "\tgs_retval_t retval = 0;\n";
11857 // Start by cleaning up partial function results
11858 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11859 set<int> w_pfcns; // partial fcns in where clause
11860 for(w=0;w<where.size();++w)
11861 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
11863 set<int> ag_pfcns; // partial fcns in gbdefs, aggr se's
11864 for(a=0;a<aggr_tbl.size();a++){
11865 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_pfcns);
11867 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
11868 ret += gen_partial_fcn_dtr(partial_fcns,ag_pfcns);
11870 ret+="//\t\tEvaluate clauses which don't reference stateful fcns first \n";
11871 for(w=0;w<where.size();++w){
11872 if(! pred_refs_sfun(where[w]->pr)){
11873 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11875 // Find the set of variables accessed in this CNF elem,
11876 // but in no previous element.
11877 col_id_set new_cids;
11878 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11880 // Unpack these values.
11881 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11882 // Find partial fcns ref'd in this cnf element
11883 set<int> pfcn_refs;
11884 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11885 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11887 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11888 +") ) return(false);\n";
11893 // The partial functions ref'd in the and aggregate
11894 // definitions must also be evaluated. If one returns false,
11895 // then implicitly the predicate is false.
11896 // ASSUME that aggregates cannot reference stateful fcns.
11898 if(ag_pfcns.size() > 0)
11899 ret += "//\t\tUnpack remaining partial fcns.\n";
11900 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_pfcns,
11901 found_cids, segen_gb_tbl, "false", needs_xform);
11903 ret+="//\t\tEvaluate all remaining where clauses.\n";
11904 ret+="\tbool retval = true;\n";
11905 for(w=0;w<where.size();++w){
11906 if( pred_refs_sfun(where[w]->pr)){
11907 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11909 // Find the set of variables accessed in this CNF elem,
11910 // but in no previous element.
11911 col_id_set new_cids;
11912 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11914 // Unpack these values.
11915 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11916 // Find partial fcns ref'd in this cnf element
11917 set<int> pfcn_refs;
11918 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11919 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11921 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11922 +") ) retval = false;\n";
11926 ret+="// Unpack all remaining attributes\n";
11927 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
11929 ret += "\n\treturn retval;\n";
11932 //--------------------------------------------------------
11933 // Create and initialize an aggregate object
11935 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, gs_sp_t a,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11936 // Variables for execution of the function.
11937 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11940 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+ "_aggrdef *)a;\n";
11942 for(a=0;a<aggr_tbl.size();a++){
11943 if(aggr_tbl.is_builtin(a)){
11944 // Create temporaries for buffer return values
11945 data_type *adt = aggr_tbl.get_data_type(a);
11946 if(adt->is_buffer_type()){
11947 sprintf(tmpstr,"aggr_tmp_%d", a);
11948 ret+=adt->make_host_cvar(tmpstr)+";\n";
11953 for(a=0;a<aggr_tbl.size();a++){
11954 sprintf(tmpstr,"aggval->aggr_var%d",a);
11955 string assignto_var = tmpstr;
11956 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11959 ret += "\treturn aggval;\n";
11963 //--------------------------------------------------------
11964 // initialize an aggregate object inplace
11966 ret += "void create_aggregate(host_tuple &tup0, "+this->generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11967 // Variables for execution of the function.
11968 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11972 for(a=0;a<aggr_tbl.size();a++){
11973 if(aggr_tbl.is_builtin(a)){
11974 // Create temporaries for buffer return values
11975 data_type *adt = aggr_tbl.get_data_type(a);
11976 if(adt->is_buffer_type()){
11977 sprintf(tmpstr,"aggr_tmp_%d", a);
11978 ret+=adt->make_host_cvar(tmpstr)+";\n";
11983 for(a=0;a<aggr_tbl.size();a++){
11984 sprintf(tmpstr,"aggval->aggr_var%d",a);
11985 string assignto_var = tmpstr;
11986 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11992 //--------------------------------------------------------
11993 // Create and clean-initialize an state object
11995 ret += "void initialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval){\n";
11996 // Variables for execution of the function.
11997 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12000 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
12002 for(a=0;a<aggr_tbl.size();a++){
12003 if( aggr_tbl.is_superaggr(a)){
12004 if(aggr_tbl.is_builtin(a)){
12005 // Create temporaries for buffer return values
12006 data_type *adt = aggr_tbl.get_data_type(a);
12007 if(adt->is_buffer_type()){
12008 sprintf(tmpstr,"aggr_tmp_%d", a);
12009 ret+=adt->make_host_cvar(tmpstr)+";\n";
12015 for(a=0;a<aggr_tbl.size();a++){
12016 if( aggr_tbl.is_superaggr(a)){
12017 sprintf(tmpstr,"stval->aggr_var%d",a);
12018 string assignto_var = tmpstr;
12019 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12023 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12024 string state_nm = (*ssi);
12025 ret += "_sfun_state_clean_init_"+state_nm+"(&(stval->state_var_"+state_nm+"));\n";
12031 //--------------------------------------------------------
12032 // Create and dirty-initialize an state object
12034 ret += "void reinitialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, "+generate_functor_name()+"_statedef *old_stval, int cd){\n";
12035 // Variables for execution of the function.
12036 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12039 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
12041 for(a=0;a<aggr_tbl.size();a++){
12042 if( aggr_tbl.is_superaggr(a)){
12043 if(aggr_tbl.is_builtin(a)){
12044 // Create temporaries for buffer return values
12045 data_type *adt = aggr_tbl.get_data_type(a);
12046 if(adt->is_buffer_type()){
12047 sprintf(tmpstr,"aggr_tmp_%d", a);
12048 ret+=adt->make_host_cvar(tmpstr)+";\n";
12054 // initialize superaggregates
12055 for(a=0;a<aggr_tbl.size();a++){
12056 if( aggr_tbl.is_superaggr(a)){
12057 sprintf(tmpstr,"stval->aggr_var%d",a);
12058 string assignto_var = tmpstr;
12059 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12063 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12064 string state_nm = (*ssi);
12065 ret += "_sfun_state_dirty_init_"+state_nm+"(&(stval->state_var_"+state_nm+"),&(old_stval->state_var_"+state_nm+"), cd );\n";
12070 //--------------------------------------------------------
12071 // Finalize_state : call the finalize fcn on all states
12074 ret += "void finalize_state( "+generate_functor_name()+"_statedef *stval, int cd){\n";
12076 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12077 string state_nm = (*ssi);
12078 ret += "_sfun_state_final_init_"+state_nm+"(&(stval->state_var_"+state_nm+"), cd);\n";
12086 //--------------------------------------------------------
12087 // update (plus) a superaggregate object
12089 ret += "void update_plus_superaggr(host_tuple &tup0, " +
12090 generate_functor_name()+"_groupdef *gbval, "+
12091 generate_functor_name()+"_statedef *stval){\n";
12092 // Variables for execution of the function.
12093 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12095 // use of temporaries depends on the aggregate,
12096 // generate them in generate_aggr_update
12099 for(a=0;a<aggr_tbl.size();a++){
12100 if(aggr_tbl.is_superaggr(a)){
12101 sprintf(tmpstr,"stval->aggr_var%d",a);
12102 string varname = tmpstr;
12103 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12107 ret += "\treturn;\n";
12112 //--------------------------------------------------------
12113 // update (minus) a superaggregate object
12115 ret += "void update_minus_superaggr( "+
12116 generate_functor_name()+"_groupdef *gbval, "+
12117 generate_functor_name()+"_aggrdef *aggval,"+
12118 generate_functor_name()+"_statedef *stval"+
12120 // Variables for execution of the function.
12121 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12123 // use of temporaries depends on the aggregate,
12124 // generate them in generate_aggr_update
12127 for(a=0;a<aggr_tbl.size();a++){
12128 if(aggr_tbl.is_superaggr(a)){
12129 sprintf(tmpstr,"stval->aggr_var%d",a);
12130 string super_varname = tmpstr;
12131 sprintf(tmpstr,"aggval->aggr_var%d",a);
12132 string sub_varname = tmpstr;
12133 ret.append(generate_superaggr_minus(sub_varname, super_varname,&aggr_tbl,a, schema));
12137 ret += "\treturn;\n";
12141 //--------------------------------------------------------
12142 // update an aggregate object
12144 ret += "void update_aggregate(host_tuple &tup0, "
12145 +generate_functor_name()+"_groupdef *gbval, "+
12146 generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
12147 // Variables for execution of the function.
12148 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12150 // use of temporaries depends on the aggregate,
12151 // generate them in generate_aggr_update
12154 for(a=0;a<aggr_tbl.size();a++){
12155 sprintf(tmpstr,"aggval->aggr_var%d",a);
12156 string varname = tmpstr;
12157 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12160 ret += "\treturn;\n";
12163 //---------------------------------------------------
12166 ret += "\tbool flush_needed(){\n";
12167 if(uses_temporal_flush){
12168 ret += "\t\treturn needs_temporal_flush;\n";
12170 ret += "\t\treturn false;\n";
12175 //------------------------------------------------------
12176 // THe cleaning_when predicate
12178 string gbvar = "gbval->gb_var";
12179 string aggvar = "aggval->";
12181 ret += "bool need_to_clean( "
12182 +generate_functor_name()+"_groupdef *gbval, "+
12183 generate_functor_name()+"_statedef *stval, int cd"+
12186 if(cleanwhen.size()>0)
12187 ret += "\tbool predval = true;\n";
12189 ret += "\tbool predval = false;\n";
12191 // Find the udafs ref'd in the having clause
12193 for(w=0;w<cleanwhen.size();++w)
12194 collect_aggr_refs_pr(cleanwhen[w]->pr, cw_aggs);
12197 // get the return values from the UDAFS
12198 for(a=0;a<aggr_tbl.size();a++){
12199 if(! aggr_tbl.is_builtin(a) && cw_aggs.count(a)){
12200 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12201 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12202 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12207 // Start by cleaning up partial function results
12208 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12209 set<int> cw_pfcns; // partial fcns in where clause
12210 for(w=0;w<cleanwhen.size();++w)
12211 collect_partial_fcns_pr(cleanwhen[w]->pr, cw_pfcns);
12213 ret += gen_partial_fcn_dtr(partial_fcns,cw_pfcns);
12216 for(w=0;w<cleanwhen.size();++w){
12217 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12219 // Find partial fcns ref'd in this cnf element
12220 set<int> pfcn_refs;
12221 collect_partial_fcns_pr(cleanwhen[w]->pr, pfcn_refs);
12222 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12223 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12224 ret += "\tif(retval){ return false;}\n";
12226 // ret += unpack_partial_fcn_fm_aggr(schema, partial_fcns, pfcn_refs,"false");
12228 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanwhen[w]->pr,gbvar, aggvar, schema)+
12229 ") ) predval = false;\n";
12232 ret += "\treturn predval;\n";
12235 //------------------------------------------------------
12236 // THe cleaning_by predicate
12238 ret += "bool sample_group("
12239 +generate_functor_name()+"_groupdef *gbval, "+
12240 generate_functor_name()+"_aggrdef *aggval,"+
12241 generate_functor_name()+"_statedef *stval, int cd"+
12244 if(cleanby.size()>0)
12245 ret += "\tbool retval = true;\n";
12247 ret += "\tbool retval = false;\n";
12249 // Find the udafs ref'd in the having clause
12251 for(w=0;w<cleanby.size();++w)
12252 collect_aggr_refs_pr(cleanby[w]->pr, cb_aggs);
12255 // get the return values from the UDAFS
12256 for(a=0;a<aggr_tbl.size();a++){
12257 if(! aggr_tbl.is_builtin(a) && cb_aggs.count(a)){
12258 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12259 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12260 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12265 // Start by cleaning up partial function results
12266 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12267 set<int> cb_pfcns; // partial fcns in where clause
12268 for(w=0;w<cleanby.size();++w)
12269 collect_partial_fcns_pr(cleanby[w]->pr, cb_pfcns);
12271 ret += gen_partial_fcn_dtr(partial_fcns,cb_pfcns);
12274 for(w=0;w<cleanwhen.size();++w){
12275 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12279 // Find the set of variables accessed in this CNF elem,
12280 // but in no previous element.
12281 col_id_set new_cids;
12282 get_new_pred_cids(cleanby[w]->pr, found_cids, new_cids, segen_gb_tbl);
12284 // Unpack these values.
12285 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
12288 // Find partial fcns ref'd in this cnf element
12289 set<int> pfcn_refs;
12290 collect_partial_fcns_pr(cleanby[w]->pr, pfcn_refs);
12291 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12292 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12293 ret += "\tif(retval){ return false;}\n";
12295 // ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
12297 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanby[w]->pr,gbvar, aggvar, schema)+
12298 +") ) retval = false;\n";
12301 ret += "\treturn retval;\n";
12305 //-----------------------------------------------------
12307 ret += "bool final_sample_group("
12308 +generate_functor_name()+"_groupdef *gbval, "+
12309 generate_functor_name()+"_aggrdef *aggval,"+
12310 generate_functor_name()+"_statedef *stval,"+
12313 ret += "\tgs_retval_t retval = 0;\n";
12315 // Find the udafs ref'd in the having clause
12317 for(w=0;w<having.size();++w)
12318 collect_aggr_refs_pr(having[w]->pr, hv_aggs);
12321 // get the return values from the UDAFS
12322 for(a=0;a<aggr_tbl.size();a++){
12323 if(! aggr_tbl.is_builtin(a) && hv_aggs.count(a)){
12324 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12325 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12326 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12331 set<int> hv_sl_pfcns;
12332 for(w=0;w<having.size();w++){
12333 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
12336 // clean up the partial fcn results from any previous execution
12337 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
12340 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
12341 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12342 ret += "\tif(retval){ return false;}\n";
12345 // Evalaute the HAVING clause
12346 // TODO: this seems to have a ++ operator rather than a + operator.
12347 for(w=0;w<having.size();++w){
12348 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
12351 ret += "\treturn true;\n";
12354 //---------------------------------------------------
12355 // create output tuple
12356 // Unpack the partial functions ref'd in the where clause,
12357 // select clause. Evaluate the where clause.
12358 // Finally, pack the tuple.
12360 // I need to use special code generation here,
12361 // so I'll leave it in longhand.
12363 ret += "host_tuple create_output_tuple("
12364 +generate_functor_name()+"_groupdef *gbval, "+
12365 generate_functor_name()+"_aggrdef *aggval,"+
12366 generate_functor_name()+"_statedef *stval,"+
12367 "int cd, bool &failed){\n";
12369 ret += "\thost_tuple tup;\n";
12370 ret += "\tfailed = false;\n";
12371 ret += "\tgs_retval_t retval = 0;\n";
12374 // Find the udafs ref'd in the select clause
12376 for(s=0;s<select_list.size();s++)
12377 collect_agg_refs(select_list[s]->se, sl_aggs);
12380 // get the return values from the UDAFS
12381 for(a=0;a<aggr_tbl.size();a++){
12382 if(! aggr_tbl.is_builtin(a) && sl_aggs.count(a)){
12383 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12384 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12385 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12390 // I can't cache partial fcn results from the having
12391 // clause because evaluation is separated.
12393 for(s=0;s<select_list.size();s++){
12394 collect_partial_fcns(select_list[s]->se, sl_pfcns);
12397 for(pfsi=sl_pfcns.begin();pfsi!=sl_pfcns.end();++pfsi){
12398 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12399 ret += "\tif(retval){ failed=true; return tup;}\n";
12403 // Now, compute the size of the tuple.
12405 // Unpack any BUFFER type selections into temporaries
12406 // so that I can compute their size and not have
12407 // to recompute their value during tuple packing.
12408 // I can use regular assignment here because
12409 // these temporaries are non-persistent.
12410 // TODO: should I be using the selvar generation routine?
12412 ret += "//\t\tCompute the size of the tuple.\n";
12413 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
12414 for(s=0;s<select_list.size();s++){
12415 scalarexp_t *se = select_list[s]->se;
12416 data_type *sdt = se->get_data_type();
12417 if(sdt->is_buffer_type() &&
12418 !( (se->get_operator_type() == SE_COLREF) ||
12419 (se->get_operator_type() == SE_AGGR_STAR) ||
12420 (se->get_operator_type() == SE_AGGR_SE) ||
12421 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12422 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12424 sprintf(tmpstr,"selvar_%d",s);
12425 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
12426 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
12430 // The size of the tuple is the size of the tuple struct plus the
12431 // size of the buffers to be copied in.
12433 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
12434 for(s=0;s<select_list.size();s++){
12435 // if(s>0) ret += "+";
12436 scalarexp_t *se = select_list[s]->se;
12437 data_type *sdt = select_list[s]->se->get_data_type();
12438 if(sdt->is_buffer_type()){
12439 if(!( (se->get_operator_type() == SE_COLREF) ||
12440 (se->get_operator_type() == SE_AGGR_STAR) ||
12441 (se->get_operator_type() == SE_AGGR_SE) ||
12442 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12443 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12445 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
12446 ret.append(tmpstr);
12448 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12449 ret.append(tmpstr);
12455 // Allocate tuple data block.
12456 ret += "//\t\tCreate the tuple block.\n";
12457 ret += "\ttup.data = malloc(tup.tuple_size);\n";
12458 ret += "\ttup.heap_resident = true;\n";
12460 // Mark tuple as regular
12461 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
12463 // ret += "\ttup.channel = 0;\n";
12464 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
12465 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
12468 // (Here, offsets are hard-wired. is this a problem?)
12470 ret += "//\t\tPack the fields into the tuple.\n";
12471 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
12472 for(s=0;s<select_list.size();s++){
12473 scalarexp_t *se = select_list[s]->se;
12474 data_type *sdt = se->get_data_type();
12475 if(sdt->is_buffer_type()){
12476 if(!( (se->get_operator_type() == SE_COLREF) ||
12477 (se->get_operator_type() == SE_AGGR_STAR) ||
12478 (se->get_operator_type() == SE_AGGR_SE) ||
12479 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12480 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12482 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
12483 ret.append(tmpstr);
12484 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
12485 ret.append(tmpstr);
12487 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12488 ret.append(tmpstr);
12489 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12490 ret.append(tmpstr);
12493 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12494 ret.append(tmpstr);
12495 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12500 // Destroy string temporaries
12501 ret += gen_buffer_selvars_dtr(select_list);
12502 // Destroy string return vals of UDAFs
12503 for(a=0;a<aggr_tbl.size();a++){
12504 if(! aggr_tbl.is_builtin(a)){
12505 int afcn_id = aggr_tbl.get_fcn_id(a);
12506 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12507 if(adt->is_buffer_type()){
12508 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12509 adt->get_hfta_buffer_destroy().c_str(), a );
12516 ret += "\treturn tup;\n";
12520 //-------------------------------------------------------------------
12521 // Temporal update functions
12523 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12525 // create a temp status tuple
12526 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12528 ret += gen_init_temp_status_tuple(this->get_node_name());
12531 // (Here, offsets are hard-wired. is this a problem?)
12533 ret += "//\t\tPack the fields into the tuple.\n";
12534 for(s=0;s<select_list.size();s++){
12535 data_type *sdt = select_list[s]->se->get_data_type();
12536 if(sdt->is_temporal()){
12537 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12539 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12545 ret += "\treturn 0;\n";
12546 ret += "};};\n\n\n";
12549 //----------------------------------------------------------
12550 // The hash function
12552 ret += "struct "+generate_functor_name()+"_hash_func{\n";
12553 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12554 "_groupdef *grp) const{\n";
12555 ret += "\t\treturn(";
12556 for(g=0;g<gb_tbl.size();g++){
12557 if(g>0) ret += "^";
12558 data_type *gdt = gb_tbl.get_data_type(g);
12559 if(gdt->use_hashfunc()){
12560 if(gdt->is_buffer_type())
12561 sprintf(tmpstr,"(%s*%s(&)grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12563 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12565 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12569 ret += ") >> 32);\n";
12573 //----------------------------------------------------------
12574 // The superhash function
12576 ret += "struct "+generate_functor_name()+"_superhash_func{\n";
12577 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12578 "_groupdef *grp) const{\n";
12579 ret += "\t\treturn(0";
12581 for(g=0;g<gb_tbl.size();g++){
12582 if(sg_tbl.count(g)>0){
12584 data_type *gdt = gb_tbl.get_data_type(g);
12585 if(gdt->use_hashfunc()){
12586 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12588 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12593 ret += ") >> 32);\n";
12598 //----------------------------------------------------------
12599 // The comparison function
12601 ret += "struct "+generate_functor_name()+"_equal_func{\n";
12602 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12603 generate_functor_name()+"_groupdef *grp2) const{\n";
12604 ret += "\t\treturn( (";
12605 for(g=0;g<gb_tbl.size();g++){
12606 if(g>0) ret += ") && (";
12607 data_type *gdt = gb_tbl.get_data_type(g);
12608 if(gdt->complex_comparison(gdt)){
12609 if(gdt->is_buffer_type())
12610 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12611 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12613 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12614 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12616 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12625 //----------------------------------------------------------
12626 // The superhashcomparison function
12628 ret += "struct "+generate_functor_name()+"_superequal_func{\n";
12629 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12630 generate_functor_name()+"_groupdef *grp2) const{\n";
12631 ret += "\t\treturn( (";
12633 bool first_elem = true;
12634 for(g=0;g<gb_tbl.size();g++){
12635 if(sg_tbl.count(g)){
12636 if(first_elem) first_elem=false; else ret += ") && (";
12637 data_type *gdt = gb_tbl.get_data_type(g);
12638 if(gdt->complex_comparison(gdt)){
12639 if(gdt->is_buffer_type())
12640 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12641 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12643 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12644 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12646 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12663 string sgahcwcb_qpn::generate_operator(int i, string params){
12666 " clean_operator<" +
12667 generate_functor_name()+",\n\t"+
12668 generate_functor_name() + "_groupdef, \n\t" +
12669 generate_functor_name() + "_aggrdef, \n\t" +
12670 generate_functor_name() + "_statedef, \n\t" +
12671 generate_functor_name()+"_hash_func, \n\t"+
12672 generate_functor_name()+"_equal_func ,\n\t"+
12673 generate_functor_name()+"_superhash_func,\n\t "+
12674 generate_functor_name()+"_superequal_func \n\t"+
12675 "> *op"+int_to_string(i)+" = new clean_operator<"+
12676 generate_functor_name()+",\n\t"+
12677 generate_functor_name() + "_groupdef,\n\t " +
12678 generate_functor_name() + "_aggrdef, \n\t" +
12679 generate_functor_name() + "_statedef, \n\t" +
12680 generate_functor_name()+"_hash_func, \n\t"+
12681 generate_functor_name()+"_equal_func, \n\t"+
12682 generate_functor_name()+"_superhash_func, \n\t"+
12683 generate_functor_name()+"_superequal_func\n\t "
12684 ">("+params+", \"" + get_node_name() + "\");\n"
12688 ////////////////////////////////////////////////////////////////
12693 string rsgah_qpn::generate_functor_name(){
12694 return("rsgah_functor_" + normalize_name(this->get_node_name()));
12698 string rsgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
12702 // Initialize generate utility globals
12703 segen_gb_tbl = &(gb_tbl);
12706 //--------------------------------
12707 // group definition class
12708 string ret = "class " + generate_functor_name() + "_groupdef{\n";
12709 ret += "public:\n";
12710 for(g=0;g<this->gb_tbl.size();g++){
12711 sprintf(tmpstr,"gb_var%d",g);
12712 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12715 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
12716 ret += "\t"+generate_functor_name() + "_groupdef("+
12717 this->generate_functor_name() + "_groupdef *gd){\n";
12718 for(g=0;g<gb_tbl.size();g++){
12719 data_type *gdt = gb_tbl.get_data_type(g);
12720 if(gdt->is_buffer_type()){
12721 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
12722 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
12725 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
12731 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
12732 for(g=0;g<gb_tbl.size();g++){
12733 data_type *gdt = gb_tbl.get_data_type(g);
12734 if(gdt->is_buffer_type()){
12735 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12736 gdt->get_hfta_buffer_destroy().c_str(), g );
12743 //--------------------------------
12744 // aggr definition class
12745 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
12746 ret += "public:\n";
12747 for(a=0;a<aggr_tbl.size();a++){
12748 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
12749 sprintf(tmpstr,"aggr_var%d",a);
12750 if(aggr_tbl.is_builtin(a))
12751 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
12753 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
12756 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
12758 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
12759 for(a=0;a<aggr_tbl.size();a++){
12760 if(aggr_tbl.is_builtin(a)){
12761 data_type *adt = aggr_tbl.get_data_type(a);
12762 if(adt->is_buffer_type()){
12763 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
12764 adt->get_hfta_buffer_destroy().c_str(), a );
12768 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
12769 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12770 ret+="(aggr_var"+int_to_string(a)+"));\n";
12776 //--------------------------------
12777 // gb functor class
12778 ret += "class " + this->generate_functor_name() + "{\n";
12780 // Find variables referenced in this query node.
12782 col_id_set cid_set;
12783 col_id_set::iterator csi;
12785 for(w=0;w<where.size();++w)
12786 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
12787 for(w=0;w<having.size();++w)
12788 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
12789 for(w=0;w<closing_when.size();++w)
12790 gather_pr_col_ids(closing_when[w]->pr,cid_set,segen_gb_tbl);
12791 for(g=0;g<gb_tbl.size();g++)
12792 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
12794 for(s=0;s<select_list.size();s++){
12795 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
12799 // Private variables : store the state of the functor.
12800 // 1) variables for unpacked attributes
12801 // 2) offsets of the upacked attributes
12802 // 3) storage of partial functions
12803 // 4) storage of complex literals (i.e., require a constructor)
12805 ret += "private:\n";
12807 // var to save the schema handle
12808 ret += "\tint schema_handle0;\n";
12810 // generate the declaration of all the variables related to
12811 // temp tuples generation
12812 ret += gen_decl_temp_vars();
12814 // unpacked attribute storage, offsets
12815 ret += "//\t\tstorage and offsets of accessed fields.\n";
12816 ret += generate_access_vars(cid_set, schema);
12817 // tuple metadata offset
12818 ret += "\tint tuple_metadata_offset0;\n";
12820 // Variables to store results of partial functions.
12821 // WARNING find_partial_functions modifies the SE
12822 // (it marks the partial function id).
12823 ret += "//\t\tParital function result storage\n";
12824 vector<scalarexp_t *> partial_fcns;
12825 vector<int> fcn_ref_cnt;
12826 vector<bool> is_partial_fcn;
12827 for(s=0;s<select_list.size();s++){
12828 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
12830 for(w=0;w<where.size();w++){
12831 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12833 for(w=0;w<having.size();w++){
12834 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12836 for(w=0;w<closing_when.size();w++){
12837 find_partial_fcns_pr(closing_when[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12839 for(g=0;g<gb_tbl.size();g++){
12840 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
12842 for(a=0;a<aggr_tbl.size();a++){
12843 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
12845 if(partial_fcns.size()>0){
12846 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
12847 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
12850 // Create cached temporaries for UDAF return values.
12851 for(a=0;a<aggr_tbl.size();a++){
12852 if(! aggr_tbl.is_builtin(a)){
12853 int afcn_id = aggr_tbl.get_fcn_id(a);
12854 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12855 sprintf(tmpstr,"udaf_ret_%d", a);
12856 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
12861 // Complex literals (i.e., they need constructors)
12862 ret += "//\t\tComplex literal storage.\n";
12863 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
12864 ret += generate_complex_lit_vars(complex_literals);
12866 // Pass-by-handle parameters
12867 ret += "//\t\tPass-by-handle storage.\n";
12868 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
12869 ret += generate_pass_by_handle_vars(param_handle_table);
12872 // variables to hold parameters.
12873 ret += "//\tfor query parameters\n";
12874 ret += generate_param_vars(param_tbl);
12876 // Is there a temporal flush? If so create flush temporaries,
12877 // create flush indicator.
12878 bool uses_temporal_flush = false;
12879 for(g=0;g<gb_tbl.size();g++){
12880 data_type *gdt = gb_tbl.get_data_type(g);
12881 if(gdt->is_temporal())
12882 uses_temporal_flush = true;
12885 if(uses_temporal_flush){
12886 ret += "//\t\tFor temporal flush\n";
12887 for(g=0;g<gb_tbl.size();g++){
12888 data_type *gdt = gb_tbl.get_data_type(g);
12889 if(gdt->is_temporal()){
12890 sprintf(tmpstr,"curr_gb%d",g);
12891 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12892 sprintf(tmpstr,"last_gb%d",g);
12893 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12896 ret += "\tgs_int32_t needs_temporal_flush;\n";
12899 // The publicly exposed functions
12901 ret += "\npublic:\n";
12904 //-------------------
12905 // The functor constructor
12906 // pass in the schema handle.
12907 // 1) make assignments to the unpack offset variables
12908 // 2) initialize the complex literals
12910 ret += "//\t\tFunctor constructor.\n";
12911 ret += this->generate_functor_name()+"(int schema_handle0){\n";
12913 // save the schema handle
12914 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
12916 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
12919 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
12920 ret += gen_access_var_init(cid_set);
12922 // complex literals
12923 ret += "//\t\tInitialize complex literals.\n";
12924 ret += gen_complex_lit_init(complex_literals);
12926 // Initialize partial function results so they can be safely GC'd
12927 ret += gen_partial_fcn_init(partial_fcns);
12929 // Initialize non-query-parameter parameter handles
12930 ret += gen_pass_by_handle_init(param_handle_table);
12932 // temporal flush variables
12933 // ASSUME that structured values won't be temporal.
12934 gs_int32_t temporal_gb = 0;
12935 if(uses_temporal_flush){
12936 ret += "//\t\tInitialize temporal flush variables.\n";
12937 for(g=0;g<gb_tbl.size();g++){
12938 data_type *gdt = gb_tbl.get_data_type(g);
12939 if(gdt->is_temporal()){
12940 literal_t gl(gdt->type_indicator());
12941 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
12942 ret.append(tmpstr);
12943 sprintf(tmpstr,"\tcurr_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
12944 ret.append(tmpstr);
12948 ret += "\tneeds_temporal_flush = 0;\n";
12951 // Init temporal attributes referenced in select list
12952 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
12957 //-------------------
12958 // Functor destructor
12959 ret += "//\t\tFunctor destructor.\n";
12960 ret += "~"+this->generate_functor_name()+"(){\n";
12962 // clean up buffer type complex literals
12963 ret += gen_complex_lit_dtr(complex_literals);
12965 // Deregister the pass-by-handle parameters
12966 ret += "/* register and de-register the pass-by-handle parameters */\n";
12967 ret += gen_pass_by_handle_dtr(param_handle_table);
12969 // clean up partial function results.
12970 ret += "/* clean up partial function storage */\n";
12971 ret += gen_partial_fcn_dtr(partial_fcns);
12973 // Destroy the parameters, if any need to be destroyed
12974 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12979 //-------------------
12980 // Parameter manipulation routines
12981 ret += generate_load_param_block(this->generate_functor_name(),
12982 this->param_tbl,param_handle_table);
12983 ret += generate_delete_param_block(this->generate_functor_name(),
12984 this->param_tbl,param_handle_table);
12986 //-------------------
12987 // Register new parameter block
12989 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
12990 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12991 ret += "\treturn this->load_params_"+this->generate_functor_name()+
12996 //-------------------
12997 // the create_group method.
12998 // This method creates a group in a buffer passed in
12999 // (to allow for creation on the stack).
13000 // There are also a couple of side effects:
13001 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
13002 // 2) determine if a temporal flush is required.
13004 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
13005 // Variables for execution of the function.
13006 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13008 if(partial_fcns.size()>0){ // partial fcn access failure
13009 ret += "\tgs_retval_t retval = 0;\n";
13013 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
13014 "_groupdef *) buffer;\n";
13016 // Start by cleaning up partial function results
13017 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
13018 set<int> w_pfcns; // partial fcns in where clause
13019 for(w=0;w<where.size();++w)
13020 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
13022 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
13023 for(g=0;g<gb_tbl.size();g++){
13024 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
13026 for(a=0;a<aggr_tbl.size();a++){
13027 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
13029 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
13030 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
13031 // ret += gen_partial_fcn_dtr(partial_fcns);
13034 ret += gen_temp_tuple_check(this->node_name, 0);
13035 col_id_set found_cids; // colrefs unpacked thus far.
13036 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
13039 // Save temporal group-by variables
13042 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
13044 for(g=0;g<gb_tbl.size();g++){
13046 data_type *gdt = gb_tbl.get_data_type(g);
13048 if(gdt->is_temporal()){
13049 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13050 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13051 ret.append(tmpstr);
13058 // Compare the temporal GB vars with the stored ones,
13059 // set flush indicator and update stored GB vars if there is any change.
13061 if(uses_temporal_flush){
13063 bool first_one = true;
13064 for(g=0;g<gb_tbl.size();g++){
13065 data_type *gdt = gb_tbl.get_data_type(g);
13067 if(gdt->is_temporal()){
13068 sprintf(tmpstr,"curr_gb%d",g); string lhs_op = tmpstr;
13069 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
13070 if(first_one){first_one = false;} else {ret += ") && (";}
13071 ret += generate_lt_test(lhs_op, rhs_op, gdt);
13075 for(g=0;g<gb_tbl.size();g++){
13076 data_type *gdt = gb_tbl.get_data_type(g);
13077 if(gdt->is_temporal()){
13079 if(gdt->is_buffer_type()){ // TODO first, last? or delete?
13080 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13082 ret += "\t\tif(curr_gb"+to_string(g)+"==0){\n";
13083 ret += "\t\t\tlast_gb"+to_string(g)+" = gbval->gb_var"+to_string(g)+";\n";
13084 ret += "\t\t}else{\n";
13085 ret += "\t\t\tlast_gb"+to_string(g)+" = curr_gb"+to_string(g)+";\n";
13087 sprintf(tmpstr,"\t\tcurr_gb%d = gbval->gb_var%d;\n",g,g);
13092 ret += "\t\tneeds_temporal_flush = curr_gb"+to_string (temporal_gb)+" - last_gb"+to_string(temporal_gb)+";\n";
13093 ret += "\t\t}else{\n"
13094 "\t\t\tneeds_temporal_flush=0;\n"
13099 // For temporal status tuple we don't need to do anything else
13100 ret += "\tif (temp_tuple_received) return NULL;\n\n";
13102 for(w=0;w<where.size();++w){
13103 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
13105 // Find the set of variables accessed in this CNF elem,
13106 // but in no previous element.
13107 col_id_set new_cids;
13108 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
13110 // Unpack these values.
13111 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13112 // Find partial fcns ref'd in this cnf element
13113 set<int> pfcn_refs;
13114 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
13115 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
13117 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
13118 +") ) return(NULL);\n";
13121 // The partial functions ref'd in the group-by var and aggregate
13122 // definitions must also be evaluated. If one returns false,
13123 // then implicitly the predicate is false.
13124 set<int>::iterator pfsi;
13126 if(ag_gb_pfcns.size() > 0)
13127 ret += "//\t\tUnpack remaining partial fcns.\n";
13128 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
13129 found_cids, segen_gb_tbl, "NULL", needs_xform);
13131 // Unpack the group-by variables
13133 for(g=0;g<gb_tbl.size();g++){
13134 data_type *gdt = gb_tbl.get_data_type(g);
13135 if(!gdt->is_temporal()){ // temproal gbs already computed
13136 // Find the new fields ref'd by this GBvar def.
13137 col_id_set new_cids;
13138 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
13139 // Unpack these values.
13140 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13142 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13143 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13145 // There seems to be no difference between the two
13146 // branches of the IF statement.
13147 data_type *gdt = gb_tbl.get_data_type(g);
13148 if(gdt->is_buffer_type()){
13149 // Create temporary copy.
13150 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13151 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13153 scalarexp_t *gse = gb_tbl.get_def(g);
13154 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13155 g,generate_se_code(gse,schema).c_str());
13158 ret.append(tmpstr);
13164 ret+= "\treturn gbval;\n";
13167 //--------------------------------------------------------
13168 // Create and initialize an aggregate object
13170 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
13171 // Variables for execution of the function.
13172 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13175 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
13176 "_aggrdef *)buffer;\n";
13178 for(a=0;a<aggr_tbl.size();a++){
13179 if(aggr_tbl.is_builtin(a)){
13180 // Create temporaries for buffer return values
13181 data_type *adt = aggr_tbl.get_data_type(a);
13182 if(adt->is_buffer_type()){
13183 sprintf(tmpstr,"aggr_tmp_%d", a);
13184 ret+=adt->make_host_cvar(tmpstr)+";\n";
13189 // Unpack all remaining attributes
13190 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
13191 for(a=0;a<aggr_tbl.size();a++){
13192 sprintf(tmpstr,"aggval->aggr_var%d",a);
13193 string assignto_var = tmpstr;
13194 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
13197 ret += "\treturn aggval;\n";
13200 //--------------------------------------------------------
13201 // update an aggregate object
13203 ret += "void update_aggregate(host_tuple &tup0, "
13204 +generate_functor_name()+"_groupdef *gbval, "+
13205 generate_functor_name()+"_aggrdef *aggval){\n";
13206 // Variables for execution of the function.
13207 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13209 // use of temporaries depends on the aggregate,
13210 // generate them in generate_aggr_update
13213 // Unpack all remaining attributes
13214 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
13215 for(a=0;a<aggr_tbl.size();a++){
13216 sprintf(tmpstr,"aggval->aggr_var%d",a);
13217 string varname = tmpstr;
13218 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
13221 ret += "\treturn;\n";
13224 //--------------------------------------------------------
13225 // reinitialize an aggregate object
13227 ret += "void reinit_aggregates( "+
13228 generate_functor_name()+"_groupdef *gbval, "+
13229 generate_functor_name()+"_aggrdef *aggval){\n";
13230 // Variables for execution of the function.
13231 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13233 // use of temporaries depends on the aggregate,
13234 // generate them in generate_aggr_update
13236 for(g=0;g<gb_tbl.size();g++){
13237 data_type *gdt = gb_tbl.get_data_type(g);
13238 if(gdt->is_temporal()){
13239 if(gdt->is_buffer_type()){
13240 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13242 sprintf(tmpstr,"\t\t gbval->gb_var%d =last_gb%d;\n",g,g);
13248 // Unpack all remaining attributes
13249 for(a=0;a<aggr_tbl.size();a++){
13250 sprintf(tmpstr,"aggval->aggr_var%d",a);
13251 string varname = tmpstr;
13252 ret.append(generate_aggr_reinitialize(varname,&aggr_tbl,a, schema));
13255 ret += "\treturn;\n";
13262 //---------------------------------------------------
13265 ret += "gs_int32_t flush_needed(){\n";
13266 if(uses_temporal_flush){
13267 ret += "\treturn needs_temporal_flush;\n";
13269 ret += "\treturn 0;\n";
13273 //------------------------------------------------
13274 // time bucket management
13275 ret += "void advance_last_tb(){\n";
13276 ret += "\tlast_gb"+to_string(temporal_gb)+"++;\n";
13278 ret += "void reset_last_tb(){\n";
13279 ret += "\tlast_gb"+to_string(temporal_gb)+" = curr_gb"+to_string(temporal_gb)+";\n";
13282 //---------------------------------------------------
13283 // create output tuple
13284 // Unpack the partial functions ref'd in the where clause,
13285 // select clause. Evaluate the where clause.
13286 // Finally, pack the tuple.
13288 // I need to use special code generation here,
13289 // so I'll leave it in longhand.
13291 ret += "host_tuple create_output_tuple("
13292 +generate_functor_name()+"_groupdef *gbval, "+
13293 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
13295 ret += "\thost_tuple tup;\n";
13296 ret += "\tfailed = false;\n";
13297 ret += "\tgs_retval_t retval = 0;\n";
13299 string gbvar = "gbval->gb_var";
13300 string aggvar = "aggval->";
13303 // First, get the return values from the UDAFS
13304 for(a=0;a<aggr_tbl.size();a++){
13305 if(! aggr_tbl.is_builtin(a)){
13306 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
13307 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
13308 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
13312 set<int> hv_sl_pfcns;
13313 for(w=0;w<having.size();w++){
13314 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
13316 for(s=0;s<select_list.size();s++){
13317 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
13320 // clean up the partial fcn results from any previous execution
13321 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
13324 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
13325 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13326 ret += "\tif(retval){ failed = true; return(tup);}\n";
13329 // Evalaute the HAVING clause
13330 // TODO: this seems to have a ++ operator rather than a + operator.
13331 for(w=0;w<having.size();++w){
13332 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
13335 // Now, compute the size of the tuple.
13337 // Unpack any BUFFER type selections into temporaries
13338 // so that I can compute their size and not have
13339 // to recompute their value during tuple packing.
13340 // I can use regular assignment here because
13341 // these temporaries are non-persistent.
13342 // TODO: should I be using the selvar generation routine?
13344 ret += "//\t\tCompute the size of the tuple.\n";
13345 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
13346 for(s=0;s<select_list.size();s++){
13347 scalarexp_t *se = select_list[s]->se;
13348 data_type *sdt = se->get_data_type();
13349 if(sdt->is_buffer_type() &&
13350 !( (se->get_operator_type() == SE_COLREF) ||
13351 (se->get_operator_type() == SE_AGGR_STAR) ||
13352 (se->get_operator_type() == SE_AGGR_SE) ||
13353 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13354 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13356 sprintf(tmpstr,"selvar_%d",s);
13357 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
13358 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
13362 // The size of the tuple is the size of the tuple struct plus the
13363 // size of the buffers to be copied in.
13365 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
13366 for(s=0;s<select_list.size();s++){
13367 // if(s>0) ret += "+";
13368 scalarexp_t *se = select_list[s]->se;
13369 data_type *sdt = select_list[s]->se->get_data_type();
13370 if(sdt->is_buffer_type()){
13371 if(!( (se->get_operator_type() == SE_COLREF) ||
13372 (se->get_operator_type() == SE_AGGR_STAR) ||
13373 (se->get_operator_type() == SE_AGGR_SE) ||
13374 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13375 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13377 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
13378 ret.append(tmpstr);
13380 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13381 ret.append(tmpstr);
13387 // Allocate tuple data block.
13388 ret += "//\t\tCreate the tuple block.\n";
13389 ret += "\ttup.data = malloc(tup.tuple_size);\n";
13390 ret += "\ttup.heap_resident = true;\n";
13392 // Mark tuple as regular
13393 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
13395 // ret += "\ttup.channel = 0;\n";
13396 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
13397 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
13400 // (Here, offsets are hard-wired. is this a problem?)
13402 ret += "//\t\tPack the fields into the tuple.\n";
13403 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
13404 for(s=0;s<select_list.size();s++){
13405 scalarexp_t *se = select_list[s]->se;
13406 data_type *sdt = se->get_data_type();
13407 if(sdt->is_buffer_type()){
13408 if(!( (se->get_operator_type() == SE_COLREF) ||
13409 (se->get_operator_type() == SE_AGGR_STAR) ||
13410 (se->get_operator_type() == SE_AGGR_SE) ||
13411 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13412 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13414 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
13415 ret.append(tmpstr);
13416 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
13417 ret.append(tmpstr);
13419 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13420 ret.append(tmpstr);
13421 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13422 ret.append(tmpstr);
13425 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13426 ret.append(tmpstr);
13427 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
13432 // Destroy string temporaries
13433 ret += gen_buffer_selvars_dtr(select_list);
13435 ret += "\treturn tup;\n";
13438 //------------------------------------------------------------------
13439 // Cleaning_when : evaluate the cleaning_when clause.
13440 // ASSUME that the udaf return values have already
13441 // been unpacked. delete the string udaf return values at the end.
13443 ret += "bool cleaning_when("
13444 +generate_functor_name()+"_groupdef *gbval, "+
13445 generate_functor_name()+"_aggrdef *aggval){\n";
13447 ret += "\tbool retval = true;\n";
13450 gbvar = "gbval->gb_var";
13451 aggvar = "aggval->";
13454 set<int> clw_pfcns;
13455 for(w=0;w<closing_when.size();w++){
13456 collect_partial_fcns_pr(closing_when[w]->pr, clw_pfcns);
13459 // clean up the partial fcn results from any previous execution
13460 ret += gen_partial_fcn_dtr(partial_fcns,clw_pfcns);
13463 for(pfsi=clw_pfcns.begin();pfsi!=clw_pfcns.end();++pfsi){
13464 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13465 ret += "\tif(retval){ return false;}\n";
13468 // Evalaute the Closing When clause
13469 // TODO: this seems to have a ++ operator rather than a + operator.
13470 for(w=0;w<closing_when.size();++w){
13471 ret += "\tif( !("+generate_predicate_code_fm_aggr(closing_when[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
13475 // Destroy string return vals of UDAFs
13476 for(a=0;a<aggr_tbl.size();a++){
13477 if(! aggr_tbl.is_builtin(a)){
13478 int afcn_id = aggr_tbl.get_fcn_id(a);
13479 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
13480 if(adt->is_buffer_type()){
13481 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
13482 adt->get_hfta_buffer_destroy().c_str(), a );
13488 ret += "\treturn retval;\n";
13494 //-------------------------------------------------------------------
13495 // Temporal update functions
13497 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
13499 // create a temp status tuple
13500 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
13502 ret += gen_init_temp_status_tuple(this->get_node_name());
13505 // (Here, offsets are hard-wired. is this a problem?)
13507 ret += "//\t\tPack the fields into the tuple.\n";
13508 for(s=0;s<select_list.size();s++){
13509 data_type *sdt = select_list[s]->se->get_data_type();
13510 if(sdt->is_temporal()){
13511 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13513 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_gb", "", schema).c_str());
13519 ret += "\treturn 0;\n";
13520 ret += "};};\n\n\n";
13523 //----------------------------------------------------------
13524 // The hash function
13526 ret += "struct "+generate_functor_name()+"_hash_func{\n";
13527 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
13528 "_groupdef *grp) const{\n";
13529 ret += "\t\treturn(0";
13530 for(g=0;g<gb_tbl.size();g++){
13531 data_type *gdt = gb_tbl.get_data_type(g);
13532 if(! gdt->is_temporal()){
13534 if(gdt->use_hashfunc()){
13535 if(gdt->is_buffer_type())
13536 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13538 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13540 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
13545 ret += " >> 32);\n";
13549 //----------------------------------------------------------
13550 // The comparison function
13552 ret += "struct "+generate_functor_name()+"_equal_func{\n";
13553 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
13554 generate_functor_name()+"_groupdef *grp2) const{\n";
13555 ret += "\t\treturn( (";
13558 bool first_exec = true;
13559 for(g=0;g<gb_tbl.size();g++){
13560 data_type *gdt = gb_tbl.get_data_type(g);
13561 if(! gdt->is_temporal()){
13562 if(first_exec){first_exec=false;}else{ hcmpr += ") && (";}
13563 if(gdt->complex_comparison(gdt)){
13564 if(gdt->is_buffer_type())
13565 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
13566 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13568 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
13569 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13571 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
13588 string rsgah_qpn::generate_operator(int i, string params){
13591 " running_agg_operator<" +
13592 generate_functor_name()+","+
13593 generate_functor_name() + "_groupdef, " +
13594 generate_functor_name() + "_aggrdef, " +
13595 generate_functor_name()+"_hash_func, "+
13596 generate_functor_name()+"_equal_func "
13597 "> *op"+int_to_string(i)+" = new running_agg_operator<"+
13598 generate_functor_name()+","+
13599 generate_functor_name() + "_groupdef, " +
13600 generate_functor_name() + "_aggrdef, " +
13601 generate_functor_name()+"_hash_func, "+
13602 generate_functor_name()+"_equal_func "
13603 ">("+params+", \"" + get_node_name() + "\");\n"
13609 // Split aggregation into two HFTA components - sub and superaggregation
13610 // If unable to split the aggreagates, empty vector will be returned
13611 vector<qp_node *> sgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13613 vector<qp_node *> ret_vec;
13614 int s, p, g, a, o, i;
13617 vector<string> fta_flds, stream_flds;
13618 int t = table_name->get_schema_ref();
13620 // Get the set of interfaces it accesses.
13622 vector<string> sel_names;
13624 // Verify that all of the ref'd UDAFs can be split.
13626 for(a=0;a<aggr_tbl.size();++a){
13627 if(! aggr_tbl.is_builtin(a)){
13628 int afcn = aggr_tbl.get_fcn_id(a);
13629 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13630 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13631 if(hfta_super_id < 0 || hfta_sub_id < 0){
13637 /////////////////////////////////////////////////////
13638 // Split into aggr/aggr.
13641 sgah_qpn *low_hfta_node = new sgah_qpn();
13642 low_hfta_node->table_name = table_name;
13643 low_hfta_node->set_node_name( "_"+node_name );
13644 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13647 sgah_qpn *hi_hfta_node = new sgah_qpn();
13648 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13649 hi_hfta_node->set_node_name( node_name );
13650 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13652 // First, process the group-by variables.
13653 // both low and hi level queries duplicate group-by variables of original query
13656 for(g=0;g<gb_tbl.size();g++){
13657 // Insert the gbvar into both low- and hi level hfta.
13658 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13659 low_hfta_node->gb_tbl.add_gb_var(
13660 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13663 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13664 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13665 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13666 gbvar_fta->set_gb_ref(g);
13667 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13668 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13670 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13671 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13672 hi_hfta_node->gb_tbl.add_gb_var(
13673 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13677 // hi_hfta_node->gb_tbl.gb_patterns = gb_tbl.gb_patterns; // pattern processing at higtest level
13678 hi_hfta_node->gb_tbl.set_pattern_info( &gb_tbl); // pattern processing at higtest level
13680 // SEs in the aggregate definitions.
13681 // They are all safe, so split them up for later processing.
13682 map<int, scalarexp_t *> hfta_aggr_se;
13683 for(a=0;a<aggr_tbl.size();++a){
13684 split_hfta_aggr( &(aggr_tbl), a,
13685 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13686 low_hfta_node->select_list,
13693 // Next, the select list.
13695 for(s=0;s<select_list.size();s++){
13696 bool fta_forbidden = false;
13697 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13698 hi_hfta_node->select_list.push_back(
13699 new select_element(root_se, select_list[s]->name));
13704 // All the predicates in the where clause must execute
13705 // in the low-level hfta.
13707 for(p=0;p<where.size();p++){
13708 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13709 cnf_elem *new_cnf = new cnf_elem(new_pr);
13710 analyze_cnf(new_cnf);
13712 low_hfta_node->where.push_back(new_cnf);
13715 // All of the predicates in the having clause must
13716 // execute in the high-level hfta node.
13718 for(p=0;p<having.size();p++){
13719 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13720 cnf_elem *cnf_root = new cnf_elem(pr_root);
13721 analyze_cnf(cnf_root);
13723 hi_hfta_node->having.push_back(cnf_root);
13727 // Copy parameters to both nodes
13728 vector<string> param_names = param_tbl->get_param_names();
13730 for(pi=0;pi<param_names.size();pi++){
13731 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13732 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13733 param_tbl->handle_access(param_names[pi]));
13734 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13735 param_tbl->handle_access(param_names[pi]));
13737 low_hfta_node->definitions = definitions;
13738 hi_hfta_node->definitions = definitions;
13741 low_hfta_node->table_name->set_machine(table_name->get_machine());
13742 low_hfta_node->table_name->set_interface(table_name->get_interface());
13743 low_hfta_node->table_name->set_ifq(false);
13745 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13746 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13747 hi_hfta_node->table_name->set_ifq(false);
13749 ret_vec.push_back(low_hfta_node);
13750 ret_vec.push_back(hi_hfta_node);
13756 // TODO: add splitting into selection/aggregation
13760 // Split aggregation into two HFTA components - sub and superaggregation
13761 // If unable to split the aggreagates, empty vector will be returned
13762 // Similar to sgah, but super aggregate is rsgah, subaggr is sgah
13763 vector<qp_node *> rsgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13765 vector<qp_node *> ret_vec;
13766 int s, p, g, a, o, i;
13769 vector<string> fta_flds, stream_flds;
13770 int t = table_name->get_schema_ref();
13772 // Get the set of interfaces it accesses.
13774 vector<string> sel_names;
13776 // Verify that all of the ref'd UDAFs can be split.
13778 for(a=0;a<aggr_tbl.size();++a){
13779 if(! aggr_tbl.is_builtin(a)){
13780 int afcn = aggr_tbl.get_fcn_id(a);
13781 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13782 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13783 if(hfta_super_id < 0 || hfta_sub_id < 0){
13789 /////////////////////////////////////////////////////
13790 // Split into aggr/aggr.
13793 sgah_qpn *low_hfta_node = new sgah_qpn();
13794 low_hfta_node->table_name = table_name;
13795 low_hfta_node->set_node_name( "_"+node_name );
13796 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13799 rsgah_qpn *hi_hfta_node = new rsgah_qpn();
13800 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13801 hi_hfta_node->set_node_name( node_name );
13802 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13804 // First, process the group-by variables.
13805 // both low and hi level queries duplicate group-by variables of original query
13808 for(g=0;g<gb_tbl.size();g++){
13809 // Insert the gbvar into both low- and hi level hfta.
13810 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13811 low_hfta_node->gb_tbl.add_gb_var(
13812 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13815 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13816 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13817 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13818 gbvar_fta->set_gb_ref(g);
13819 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13820 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13822 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13823 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13824 hi_hfta_node->gb_tbl.add_gb_var(
13825 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13830 // SEs in the aggregate definitions.
13831 // They are all safe, so split them up for later processing.
13832 map<int, scalarexp_t *> hfta_aggr_se;
13833 for(a=0;a<aggr_tbl.size();++a){
13834 split_hfta_aggr( &(aggr_tbl), a,
13835 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13836 low_hfta_node->select_list,
13843 // Next, the select list.
13845 for(s=0;s<select_list.size();s++){
13846 bool fta_forbidden = false;
13847 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13848 hi_hfta_node->select_list.push_back(
13849 new select_element(root_se, select_list[s]->name));
13854 // All the predicates in the where clause must execute
13855 // in the low-level hfta.
13857 for(p=0;p<where.size();p++){
13858 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13859 cnf_elem *new_cnf = new cnf_elem(new_pr);
13860 analyze_cnf(new_cnf);
13862 low_hfta_node->where.push_back(new_cnf);
13865 // All of the predicates in the having clause must
13866 // execute in the high-level hfta node.
13868 for(p=0;p<having.size();p++){
13869 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13870 cnf_elem *cnf_root = new cnf_elem(pr_root);
13871 analyze_cnf(cnf_root);
13873 hi_hfta_node->having.push_back(cnf_root);
13876 // Similar for closing when
13877 for(p=0;p<closing_when.size();p++){
13878 predicate_t *pr_root = rehome_fta_pr( closing_when[p]->pr, &hfta_aggr_se);
13879 cnf_elem *cnf_root = new cnf_elem(pr_root);
13880 analyze_cnf(cnf_root);
13882 hi_hfta_node->closing_when.push_back(cnf_root);
13886 // Copy parameters to both nodes
13887 vector<string> param_names = param_tbl->get_param_names();
13889 for(pi=0;pi<param_names.size();pi++){
13890 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13891 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13892 param_tbl->handle_access(param_names[pi]));
13893 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13894 param_tbl->handle_access(param_names[pi]));
13896 low_hfta_node->definitions = definitions;
13897 hi_hfta_node->definitions = definitions;
13900 low_hfta_node->table_name->set_machine(table_name->get_machine());
13901 low_hfta_node->table_name->set_interface(table_name->get_interface());
13902 low_hfta_node->table_name->set_ifq(false);
13904 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13905 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13906 hi_hfta_node->table_name->set_ifq(false);
13908 ret_vec.push_back(low_hfta_node);
13909 ret_vec.push_back(hi_hfta_node);
13915 // TODO: add splitting into selection/aggregation
13918 //---------------------------------------------------------------
13919 // Code for propagating Protocol field source information
13922 scalarexp_t *resolve_protocol_se(scalarexp_t *se, vector<map<string, scalarexp_t *> *> &src_vec, gb_table *gb_tbl, table_list *Schema){
13923 scalarexp_t *rse, *lse,*p_se, *gb_se;
13924 int tno, schema_type;
13925 map<string, scalarexp_t *> *pse_map;
13927 switch(se->get_operator_type()){
13929 return new scalarexp_t(se->get_literal());
13931 return scalarexp_t::make_param_reference(se->get_op().c_str());
13935 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, se->gb_ref=%d, but gb_tbl is NULL\n",se->get_gb_ref());
13936 gb_se = gb_tbl->get_def(se->get_gb_ref());
13937 return resolve_protocol_se(gb_se,src_vec,gb_tbl,Schema);
13940 schema_type = Schema->get_schema_type(se->get_colref()->get_schema_ref());
13941 if(schema_type == PROTOCOL_SCHEMA)
13942 return dup_se(se,NULL);
13944 tno = se->get_colref()->get_tablevar_ref();
13945 if(tno >= src_vec.size()){
13946 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, tno=%d, src_vec.size()=%lu\n",tno,src_vec.size());
13948 if(src_vec[tno] == NULL)
13951 pse_map =src_vec[tno];
13952 p_se = (*pse_map)[se->get_colref()->get_field()];
13955 return dup_se(p_se,NULL);
13957 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13961 return new scalarexp_t(se->get_op().c_str(),lse);
13963 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13966 rse = resolve_protocol_se(se->get_right_se(),src_vec,gb_tbl,Schema);
13969 return new scalarexp_t(se->get_op().c_str(),lse,rse);
13983 void spx_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13985 vector<map<string, scalarexp_t *> *> src_vec;
13987 for(i=0;i<q_sources.size();i++){
13988 if(q_sources[i] != NULL)
13989 src_vec.push_back(q_sources[i]->get_protocol_se());
13991 src_vec.push_back(NULL);
13994 for(i=0;i<select_list.size();i++){
13995 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13999 void join_eq_hash_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14001 vector<map<string, scalarexp_t *> *> src_vec;
14003 for(i=0;i<q_sources.size();i++){
14004 if(q_sources[i] != NULL)
14005 src_vec.push_back(q_sources[i]->get_protocol_se());
14007 src_vec.push_back(NULL);
14010 for(i=0;i<select_list.size();i++){
14011 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14014 for(i=0;i<hash_eq.size();i++){
14015 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
14016 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14020 void filter_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14022 vector<map<string, scalarexp_t *> *> src_vec;
14024 for(i=0;i<q_sources.size();i++){
14025 if(q_sources[i] != NULL)
14026 src_vec.push_back(q_sources[i]->get_protocol_se());
14028 src_vec.push_back(NULL);
14031 for(i=0;i<select_list.size();i++){
14032 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14035 for(i=0;i<hash_eq.size();i++){
14036 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
14037 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14041 void watch_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14043 vector<map<string, scalarexp_t *> *> src_vec;
14045 for(i=0;i<q_sources.size();i++){
14046 if(q_sources[i] != NULL)
14047 src_vec.push_back(q_sources[i]->get_protocol_se());
14049 src_vec.push_back(NULL);
14052 for(i=0;i<select_list.size();i++){
14053 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14056 for(i=0;i<key_flds.size();i++){
14057 string kfld = key_flds[i];
14058 hash_src_l.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_left_se(),src_vec,NULL,Schema));
14059 hash_src_r.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_right_se(),src_vec,NULL,Schema));
14064 void sgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14066 vector<map<string, scalarexp_t *> *> src_vec;
14068 for(i=0;i<q_sources.size();i++){
14069 if(q_sources[i] != NULL)
14070 src_vec.push_back(q_sources[i]->get_protocol_se());
14072 src_vec.push_back(NULL);
14075 for(i=0;i<select_list.size();i++){
14076 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14079 for(i=0;i<gb_tbl.size();i++)
14080 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14084 void rsgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14086 vector<map<string, scalarexp_t *> *> src_vec;
14088 for(i=0;i<q_sources.size();i++){
14089 if(q_sources[i] != NULL)
14090 src_vec.push_back(q_sources[i]->get_protocol_se());
14092 src_vec.push_back(NULL);
14095 for(i=0;i<select_list.size();i++){
14096 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14099 for(i=0;i<gb_tbl.size();i++)
14100 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14103 void sgahcwcb_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14105 vector<map<string, scalarexp_t *> *> src_vec;
14107 for(i=0;i<q_sources.size();i++){
14108 if(q_sources[i] != NULL)
14109 src_vec.push_back(q_sources[i]->get_protocol_se());
14111 src_vec.push_back(NULL);
14114 for(i=0;i<select_list.size();i++){
14115 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14118 for(i=0;i<gb_tbl.size();i++)
14119 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14122 void mrg_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14124 scalarexp_t *first_se;
14126 vector<map<string, scalarexp_t *> *> src_vec;
14127 map<string, scalarexp_t *> *pse_map;
14129 for(i=0;i<q_sources.size();i++){
14130 if(q_sources[i] != NULL)
14131 src_vec.push_back(q_sources[i]->get_protocol_se());
14133 src_vec.push_back(NULL);
14136 if(q_sources.size() == 0){
14137 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::create_protocol_se, q_sources.size() == 0\n");
14141 vector<field_entry *> tbl_flds = table_layout->get_fields();
14142 for(f=0;f<tbl_flds.size();f++){
14144 string fld_nm = tbl_flds[f]->get_name();
14145 pse_map = src_vec[0];
14146 first_se = (*pse_map)[fld_nm];
14147 if(first_se == NULL)
14149 for(s=1;s<src_vec.size() && match;s++){
14150 pse_map = src_vec[s];
14151 scalarexp_t *match_se = (*pse_map)[fld_nm];
14152 if(match_se == NULL)
14155 match = is_equivalent_se_base(first_se, match_se, Schema);
14158 protocol_map[fld_nm] = first_se;
14160 protocol_map[fld_nm] = NULL;
14164 void watch_tbl_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){