1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
16 // Create, manipulate, and dump query plans.
18 #include "query_plan.h"
19 #include "analyze_fta.h"
20 #include "generate_utils.h"
26 extern string hash_nums[NRANDS]; // for fast hashing
31 void untaboo(string &s){
33 for(c=0;c<s.size();++c){
40 // mrg_qpn constructor, define here to avoid
41 // circular references in the .h file
42 mrg_qpn::mrg_qpn(filter_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
43 param_tbl = spx->param_tbl;
46 field_entry_list *fel = new field_entry_list();
51 for(i=0;i<spx->select_list.size();++i){
52 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
53 if(dt->is_temporal()){
54 if(merge_fieldpos < 0){
57 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
62 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
63 fel->append_field(fe);
67 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
70 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
72 // NEED TO HANDLE USER_SPECIFIED SLACK
73 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
74 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
75 // if(this->slack == NULL)
76 // fprintf(stderr,"Zero slack.\n");
78 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
80 for(i=0;i<sources.size();i++){
81 std::string rvar = "_m"+int_to_string(i);
82 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
83 mvars[i]->set_tablevar_ref(i);
84 fm.push_back(new tablevar_t(sources[i].c_str()));
85 fm[i]->set_range_var(rvar);
88 param_tbl = new param_table();
89 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
91 for(pi=0;pi<param_names.size();pi++){
92 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
93 param_tbl->add_param(param_names[pi],dt->duplicate(),
94 spx->param_tbl->handle_access(param_names[pi]));
96 definitions = spx->definitions;
101 mrg_qpn::mrg_qpn(watch_join_qpn *spx, std::string n_name, std::vector<std::string> &sources, std::vector<std::pair<std::string, std::string> > &ifaces, ifq_t *ifdb){
102 param_tbl = spx->param_tbl;
105 field_entry_list *fel = new field_entry_list();
110 for(i=0;i<spx->select_list.size();++i){
111 data_type *dt = spx->select_list[i]->se->get_data_type()->duplicate();
112 if(dt->is_temporal()){
113 if(merge_fieldpos < 0){
116 fprintf(stderr,"Warning: Merge subquery %s found two temporal fields (%s, %s), using %s\n", n_name.c_str(), spx->select_list[merge_fieldpos]->name.c_str(), spx->select_list[i]->name.c_str(), spx->select_list[merge_fieldpos]->name.c_str() );
117 dt->reset_temporal();
121 field_entry *fe = dt->make_field_entry(spx->select_list[i]->name);
122 fel->append_field(fe);
125 if(merge_fieldpos<0){
126 fprintf(stderr,"ERROR, no temporal attribute for merge subquery %s\n",n_name.c_str());
129 table_layout = new table_def( n_name.c_str(), NULL, NULL, fel, STREAM_SCHEMA);
131 // NEED TO HANDLE USER_SPECIFIED SLACK
132 this->resolve_slack(spx->select_list[merge_fieldpos]->se,
133 spx->select_list[merge_fieldpos]->name, ifaces, ifdb,NULL);
134 // if(this->slack == NULL)
135 // fprintf(stderr,"Zero slack.\n");
137 // fprintf(stderr,"slack is %s\n",slack->to_string().c_str());
139 for(i=0;i<sources.size();i++){
140 std::string rvar = "_m"+int_to_string(i);
141 mvars.push_back(new colref_t(rvar.c_str(), spx->select_list[merge_fieldpos]->name.c_str()));
142 mvars[i]->set_tablevar_ref(i);
143 fm.push_back(new tablevar_t(sources[i].c_str()));
144 fm[i]->set_range_var(rvar);
147 param_tbl = new param_table();
148 std::vector<std::string> param_names = spx->param_tbl->get_param_names();
150 for(pi=0;pi<param_names.size();pi++){
151 data_type *dt = spx->param_tbl->get_data_type(param_names[pi]);
152 param_tbl->add_param(param_names[pi],dt->duplicate(),
153 spx->param_tbl->handle_access(param_names[pi]));
155 definitions = spx->definitions;
162 // This function translates an analyzed parse tree
163 // into one or more query nodes (qp_node).
164 // Currently only one node is created, but some query
165 // fragments might create more than one query node,
166 // e.g. aggregation over a join, or nested subqueries
167 // in the FROM clause (unless this is handled at parse tree
168 // analysis time). At this stage, they will be linked
169 // by the names in the FROM clause.
170 // INVARIANT : if more than one query node is returned,
171 // the last one represents the output of the query.
172 vector<qp_node *> create_query_nodes(query_summary_class *qs,table_list *Schema){
174 // Classify the query.
176 vector <qp_node *> local_plan;
180 // I should probably move a lot of this code
181 // into the qp_node constructors,
182 // and have this code focus on building the query plan tree.
185 if(qs->query_type == WATCHLIST_QUERY){
186 watch_tbl_qpn *watchnode = new watch_tbl_qpn(qs, Schema);
189 plan_root = watchnode;
190 local_plan.push_back(watchnode);
195 if(qs->query_type == MERGE_QUERY){
196 mrg_qpn *merge_node = new mrg_qpn(qs,Schema);
199 plan_root = merge_node;
200 local_plan.push_back(merge_node);
203 Do not split sources until we are done with optimizations
204 vector<mrg_qpn *> split_merge = merge_node->split_sources();
205 local_plan.insert(local_plan.begin(), split_merge.begin(), split_merge.end());
207 // If children are created, add them to the schema.
210 printf("split_merge size is %d\n",split_merge.size());
211 for(i=1;i<split_merge.size();++i){
212 Schema->add_table(split_merge[i]->get_fields());
213 printf("Adding split merge table %d\n",i);
218 printf("Did split sources on %s:\n",qs->query_name.c_str());
220 for(ss=0;ss<local_plan.size();ss++){
221 printf("node %d, name=%s, sources=",ss,local_plan[ss]->get_node_name().c_str());
222 vector<tablevar_t *> inv = local_plan[ss]->get_input_tbls();
224 for(nn=0;nn<inv.size();nn++){
225 printf("%s ",inv[nn]->to_string().c_str());
234 if(qs->query_type == SELECT_QUERY){
236 // Select / Aggregation / Join
237 if(qs->gb_tbl->size() == 0 && qs->aggr_tbl->size() == 0){
239 if(qs->fta_tree->get_from()->size() == 1){
240 spx_qpn *spx_node = new spx_qpn(qs,Schema);
242 plan_root = spx_node;
243 local_plan.push_back(spx_node);
245 if(qs->fta_tree->get_from()->get_properties() == FILTER_JOIN_PROPERTY){
246 filter_join_qpn *join_node = new filter_join_qpn(qs,Schema);
247 plan_root = join_node;
248 local_plan.push_back(join_node);
250 if(qs->fta_tree->get_from()->get_properties() == WATCHLIST_JOIN_PROPERTY){
251 watch_join_qpn *join_node = new watch_join_qpn(qs,Schema);
252 plan_root = join_node;
253 local_plan.push_back(join_node);
255 join_eq_hash_qpn *join_node = new join_eq_hash_qpn(qs,Schema);
256 plan_root = join_node;
257 local_plan.push_back(join_node);
264 if(qs->states_refd.size() || qs->sg_tbl.size() || qs->cb_cnf.size()){
265 sgahcwcb_qpn *sgahcwcb_node = new sgahcwcb_qpn(qs,Schema);
266 plan_root = sgahcwcb_node;
267 local_plan.push_back(sgahcwcb_node);
269 if(qs->closew_cnf.size()){
270 rsgah_qpn *rsgah_node = new rsgah_qpn(qs,Schema);
271 plan_root = rsgah_node;
272 local_plan.push_back(rsgah_node);
274 sgah_qpn *sgah_node = new sgah_qpn(qs,Schema);
275 plan_root = sgah_node;
276 local_plan.push_back(sgah_node);
283 // Get the query name and other definitions.
284 plan_root->set_node_name( qs->query_name);
285 plan_root->set_definitions( qs->definitions) ;
288 // return(plan_root);
294 string se_to_query_string(scalarexp_t *se, aggregate_table *aggr_tbl){
299 vector<scalarexp_t *> operand_list;
302 if(se->is_superaggr())
305 switch(se->get_operator_type()){
307 l_str = se->get_literal()->to_query_string();
310 l_str = "$" + se->get_op();
313 l_str = se->get_colref()->to_query_string() ;
316 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
318 return se->get_op()+"( "+l_str+" )";;
320 l_str = se_to_query_string(se->get_left_se(),aggr_tbl);
321 r_str = se_to_query_string(se->get_right_se(),aggr_tbl);
322 return( "("+l_str+")"+se->get_op()+"("+r_str+")" );
324 return( se->get_op() + su_ind + "(*)");
326 l_str = se_to_query_string(aggr_tbl->get_aggr_se(se->get_aggr_ref()),aggr_tbl);
327 return( se->get_op() + su_ind + "(" + l_str + ")" );
329 if(se->get_aggr_ref() >= 0)
330 operand_list = aggr_tbl->get_operand_list(se->get_aggr_ref());
332 operand_list = se->get_operands();
334 ret = se->get_op() + su_ind + "(";
335 for(p=0;p<operand_list.size();p++){
336 l_str = se_to_query_string(operand_list[p],aggr_tbl);
344 return "ERROR SE op type not recognized in se_to_query_string.\n";
348 string pred_to_query_str(predicate_t *pr, aggregate_table *aggr_tbl){
353 vector<literal_t *> llist;
354 vector<scalarexp_t *> op_list;
356 switch(pr->get_operator_type()){
358 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
359 ret = l_str + " IN [";
360 llist = pr->get_lit_vec();
361 for(l=0;l<llist.size();l++){
363 ret += llist[l]->to_query_string();
369 l_str = se_to_query_string(pr->get_left_se(),aggr_tbl);
370 r_str = se_to_query_string(pr->get_right_se(),aggr_tbl);
371 return( l_str + " " + pr->get_op() + " " + r_str );
373 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
374 return(pr->get_op() + "( " + l_str + " )");
376 l_str = pred_to_query_str(pr->get_left_pr(),aggr_tbl);
377 r_str = pred_to_query_str(pr->get_right_pr(),aggr_tbl);
378 return("( " + r_str + " )" + pr->get_op() + "( " + l_str + " )");
380 ret = pr->get_op()+"[";
381 op_list = pr->get_op_list();
382 for(o=0;o<op_list.size();++o){
384 ret += se_to_query_string(op_list[o],aggr_tbl);
389 fprintf(stderr,"INTERNAL ERROR in pred_to_query_str, line %d, character %d, unknown predicate operator type %d\n",
390 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
399 // Build a selection list,
400 // but avoid adding duplicate SEs.
403 int add_select_list_nodup(vector<select_element *> &lfta_select_list, scalarexp_t *se,
407 for(s=0;s<lfta_select_list.size();s++){
408 if(is_equivalent_se(lfta_select_list[s]->se, se)){
413 lfta_select_list.push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
414 return(lfta_select_list.size()-1);
419 // TODO: The generated colref should be tied to the tablevar
420 // representing the lfta output. For now, always 0.
422 scalarexp_t *make_fta_se_ref(vector<select_element *> &lfta_select_list, scalarexp_t *se, int h_tvref){
424 int fta_se_nbr = add_select_list_nodup(lfta_select_list, se, new_element);
427 colname = lfta_select_list[fta_se_nbr]->name;
429 colname = impute_colname(lfta_select_list, se);
430 lfta_select_list[fta_se_nbr]->name = colname;
433 // TODO: fill in the tablevar and schema of the colref here.
434 colref_t *new_cr = new colref_t(colname.c_str());
435 new_cr->set_tablevar_ref(h_tvref);
438 scalarexp_t *new_se= new scalarexp_t(new_cr);
439 new_se->use_decorations_of(se);
445 // Build a selection list,
446 // but avoid adding duplicate SEs.
449 int add_select_list_nodup(vector<select_element *> *lfta_select_list, scalarexp_t *se,
453 for(s=0;s<lfta_select_list->size();s++){
454 if(is_equivalent_se((*lfta_select_list)[s]->se, se)){
459 lfta_select_list->push_back(new select_element(se,"NoNameIn:add_select_list_nodup"));
460 return(lfta_select_list->size()-1);
465 // TODO: The generated colref should be tied to the tablevar
466 // representing the lfta output. For now, always 0.
468 scalarexp_t *make_fta_se_ref(vector<vector<select_element *> *> &lfta_select_list, scalarexp_t *se, int h_tvref){
470 vector<select_element *> *the_sel_list = lfta_select_list[h_tvref];
471 int fta_se_nbr = add_select_list_nodup(the_sel_list, se, new_element);
474 colname = (*the_sel_list)[fta_se_nbr]->name;
476 colname = impute_colname(*the_sel_list, se);
477 (*the_sel_list)[fta_se_nbr]->name = colname;
480 // TODO: fill in the tablevar and schema of the colref here.
481 colref_t *new_cr = new colref_t(colname.c_str());
482 new_cr->set_tablevar_ref(h_tvref);
485 scalarexp_t *new_se= new scalarexp_t(new_cr);
486 new_se->use_decorations_of(se);
495 // Test if a se can be evaluated at the fta.
496 // check forbidden types (e.g. float), forbidden operations
497 // between types (e.g. divide a long long), forbidden operations
498 // (too expensive, not implemented).
500 // Return true if not forbidden, false if forbidden
502 // TODO: the parameter aggr_tbl is not used, delete it.
504 bool check_fta_forbidden_se(scalarexp_t *se,
505 aggregate_table *aggr_tbl,
506 ext_fcn_list *Ext_fcns
510 vector<scalarexp_t *> operand_list;
511 vector<data_type *> dt_signature;
512 data_type *dt = se->get_data_type();
516 switch(se->get_operator_type()){
520 return( se->get_data_type()->fta_legal_type() );
524 if(!check_fta_forbidden_se(se->get_left_se(), aggr_tbl, Ext_fcns))
527 dt->fta_legal_operation(se->get_left_se()->get_data_type(), se->get_op())
530 if(!check_fta_forbidden_se(se->get_left_se(),aggr_tbl, Ext_fcns))
532 if(!check_fta_forbidden_se(se->get_right_se(),aggr_tbl, Ext_fcns))
534 return(dt->fta_legal_operation(se->get_left_se()->get_data_type(),
535 se->get_right_se()->get_data_type(),
540 // return true, aggregate fta-safeness is determined elsewhere.
547 if(se->get_aggr_ref() >= 0) return true;
549 operand_list = se->get_operands();
550 for(p=0;p<operand_list.size();p++){
551 if(!check_fta_forbidden_se(operand_list[p],aggr_tbl, Ext_fcns))
553 dt_signature.push_back(operand_list[p]->get_data_type() );
555 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
557 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
559 for(o=0;o<operand_list.size();o++){
560 if(o>0) fprintf(stderr,", ");
561 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
563 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
564 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
568 return(Ext_fcns->fta_legal(fcn_id) );
570 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
579 // test if a pr can be executed at the fta.
581 // Return true if not forbidden, false if forbidden
583 bool check_fta_forbidden_pr(predicate_t *pr,
584 aggregate_table *aggr_tbl,
585 ext_fcn_list *Ext_fcns
588 vector<literal_t *> llist;
591 vector<scalarexp_t *> op_list;
592 vector<data_type *> dt_signature;
596 switch(pr->get_operator_type()){
598 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns) )
600 llist = pr->get_lit_vec();
601 for(l=0;l<llist.size();l++){
602 dt = new data_type(llist[l]->get_type());
603 if(! dt->fta_legal_type()){
611 if(! check_fta_forbidden_se(pr->get_left_se(), aggr_tbl, Ext_fcns))
613 if(! check_fta_forbidden_se(pr->get_right_se(), aggr_tbl, Ext_fcns))
617 return( check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns) );
619 if(! check_fta_forbidden_pr(pr->get_left_pr(), aggr_tbl, Ext_fcns))
621 if(! check_fta_forbidden_pr(pr->get_right_pr(), aggr_tbl, Ext_fcns))
625 op_list = pr->get_op_list();
626 for(o=0;o<op_list.size();o++){
627 if(!check_fta_forbidden_se(op_list[o],aggr_tbl, Ext_fcns))
629 dt_signature.push_back(op_list[o]->get_data_type() );
631 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), dt_signature);
633 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
635 for(o=0;o<op_list.size();o++){
636 if(o>0) fprintf(stderr,", ");
637 fprintf(stderr,"%s",op_list[o]->get_data_type()->to_string().c_str());
639 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
640 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
644 return(Ext_fcns->fta_legal(fcn_id) );
646 fprintf(stderr,"INTERNAL ERROR in check_fta_forbidden_pr, line %d, character %d, unknown predicate operator type %d\n",
647 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
656 // Split the aggregates in orig_aggr_tbl, into superaggregates and
658 // (the value of the HFTA aggregate might be a SE of several LFTA
659 // subaggregates, e.g. avg : sum / count )
660 // Register the superaggregates in hfta_aggr_tbl, and the
661 // subaggregates in lfta_aggr_tbl.
662 // Insert references to the subaggregates into lfta_select_list.
663 // (and record their names in the currnames list)
664 // Create a SE for the superaggregate, put it in hfta_aggr_se,
667 void split_fta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
668 aggregate_table *hfta_aggr_tbl,
669 aggregate_table *lfta_aggr_tbl,
670 vector<select_element *> &lfta_select_list,
671 map<int,scalarexp_t *> &hfta_aggr_se,
672 ext_fcn_list *Ext_fcns
675 scalarexp_t *subaggr_se;
680 scalarexp_t *new_se, *l_se;
681 vector<scalarexp_t *> subaggr_ref_se;
684 if(! orig_aggr_tbl->is_builtin(agr_id)){
685 // Construct the subaggregate
686 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
687 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
688 vector<scalarexp_t *> subopl;
690 for(o=0;o<opl.size();++o){
691 subopl.push_back(dup_se(opl[o], NULL));
693 int sub_id = Ext_fcns->get_subaggr_id(fcn_id);
694 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
695 subaggr_se->set_fcn_id(sub_id);
696 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
697 // Add it to the lfta select list.
698 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
700 colname = lfta_select_list[fta_se_nbr]->name;
702 colname = impute_colname(lfta_select_list, subaggr_se);
703 lfta_select_list[fta_se_nbr]->name = colname;
704 ano = lfta_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,Ext_fcns->has_lfta_bailout(sub_id));
705 subaggr_se->set_aggr_id(ano);
708 // Construct a reference to the subaggregate
709 new_cr = new colref_t(colname.c_str());
710 new_se = new scalarexp_t(new_cr);
711 // I'm not certain what the types should be ....
712 // This will need to be filled in by later analysis.
713 // NOTE: this might not capture all the meaning of data_type ...
714 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
715 subaggr_ref_se.push_back(new_se);
717 // Construct the superaggregate
718 int super_id = Ext_fcns->get_superaggr_id(fcn_id);
719 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
720 ret_se->set_fcn_id(super_id);
721 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
722 // Register it in the hfta aggregate table
723 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, Ext_fcns->is_running_aggr(sub_id),false);
724 ret_se->set_aggr_id(ano);
725 hfta_aggr_se[agr_id] = ret_se;
731 // builtin aggregate processing
735 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
736 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
739 if(orig_aggr_tbl->is_star_aggr(agr_id)){
740 for(sa=0;sa<subaggr_names.size();sa++){
741 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
742 subaggr_se->set_data_type(subaggr_dt[sa]);
744 // The following sequence is similar to the code in make_fta_se_ref,
745 // but there is special processing for the aggregate tables.
746 int fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
748 colname = lfta_select_list[fta_se_nbr]->name;
750 colname = impute_colname(lfta_select_list, subaggr_se);
751 lfta_select_list[fta_se_nbr]->name = colname;
752 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
753 subaggr_se->set_aggr_id(ano);
755 new_cr = new colref_t(colname.c_str());
756 new_cr->set_tablevar_ref(0);
757 new_se = new scalarexp_t(new_cr);
759 // I'm not certain what the types should be ....
760 // This will need to be filled in by later analysis.
761 // Actually, this is causing a problem.
762 // I will assume a UINT data type. / change to INT
763 // (consistent with assign_data_types in analyze_fta.cc)
764 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
765 data_type *ndt = new data_type("Int"); // used to be Uint
766 new_se->set_data_type(ndt);
768 subaggr_ref_se.push_back(new_se);
771 for(sa=0;sa<subaggr_names.size();sa++){
773 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
774 l_se = dup_se(aggr_operand, NULL);
775 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
777 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
779 subaggr_se->set_data_type(subaggr_dt[sa]);
781 // again, similar to make_fta_se_ref.
782 fta_se_nbr = add_select_list_nodup(lfta_select_list, subaggr_se,new_element);
784 colname = lfta_select_list[fta_se_nbr]->name;
786 colname = impute_colname(lfta_select_list, subaggr_se);
787 lfta_select_list[fta_se_nbr]->name = colname;
789 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
791 ano = lfta_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
792 subaggr_se->set_aggr_id(ano);
794 new_cr = new colref_t(colname.c_str());
795 new_se = new scalarexp_t(new_cr);
796 // I'm not certain what the types should be ....
797 // This will need to be filled in by later analysis.
798 // NOTE: this might not capture all the meaning of data_type ...
799 new_se->set_data_type(subaggr_dt[sa]);
800 subaggr_ref_se.push_back(new_se);
803 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
804 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
806 // ASSUME either the return value is an aggregation,
807 // or a binary_op between two aggregations
808 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
809 ano = hfta_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
810 ret_se->set_aggr_id(ano);
812 // Basically processing for AVG.
813 // set the data type of the superagg to that of the subagg.
814 scalarexp_t *left_se = ret_se->get_left_se();
815 left_se->set_data_type(subaggr_dt[0]);
816 ano = hfta_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
817 left_se->set_aggr_id(ano);
819 scalarexp_t *right_se = ret_se->get_right_se();
820 right_se->set_data_type(subaggr_dt[1]);
821 ano = hfta_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
822 right_se->set_aggr_id(ano);
825 hfta_aggr_se[agr_id] = ret_se;
830 // Split the aggregates in orig_aggr_tbl, into hfta_superaggregates and
831 // hfta_subaggregates.
832 // Register the superaggregates in hi_aggr_tbl, and the
833 // subaggregates in loq_aggr_tbl.
834 // Insert references to the subaggregates into low_select_list.
835 // (and record their names in the currnames list)
836 // Create a SE for the superaggregate, put it in hfta_aggr_se,
839 void split_hfta_aggr(aggregate_table *orig_aggr_tbl, int agr_id,
840 aggregate_table *hi_aggr_tbl,
841 aggregate_table *low_aggr_tbl,
842 vector<select_element *> &low_select_list,
843 map<int,scalarexp_t *> &hi_aggr_se,
844 ext_fcn_list *Ext_fcns
847 scalarexp_t *subaggr_se;
852 scalarexp_t *new_se, *l_se;
853 vector<scalarexp_t *> subaggr_ref_se;
856 if(! orig_aggr_tbl->is_builtin(agr_id)){
857 // Construct the subaggregate
858 int fcn_id = orig_aggr_tbl->get_fcn_id(agr_id);
859 vector<scalarexp_t *> opl = orig_aggr_tbl->get_operand_list(agr_id);
860 vector<scalarexp_t *> subopl;
862 for(o=0;o<opl.size();++o){
863 subopl.push_back(dup_se(opl[o], NULL));
865 int sub_id = Ext_fcns->get_hfta_subaggr_id(fcn_id);
866 subaggr_se = new scalarexp_t(Ext_fcns->get_fcn_name(sub_id).c_str(), subopl);
867 subaggr_se->set_fcn_id(sub_id);
868 subaggr_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
869 // Add it to the low select list.
870 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
872 colname = low_select_list[fta_se_nbr]->name;
874 colname = impute_colname(low_select_list, subaggr_se);
875 low_select_list[fta_se_nbr]->name = colname;
876 ano = low_aggr_tbl->add_aggr(Ext_fcns->get_fcn_name(sub_id), sub_id, subopl,Ext_fcns->get_storage_dt(sub_id), false, false,false);
877 subaggr_se->set_aggr_id(ano);
880 // Construct a reference to the subaggregate
881 new_cr = new colref_t(colname.c_str());
882 new_se = new scalarexp_t(new_cr);
883 // I'm not certain what the types should be ....
884 // This will need to be filled in by later analysis.
885 // NOTE: this might not capture all the meaning of data_type ...
886 new_se->set_data_type(Ext_fcns->get_fcn_dt(sub_id));
887 subaggr_ref_se.push_back(new_se);
889 // Construct the superaggregate
890 int super_id = Ext_fcns->get_hfta_superaggr_id(fcn_id);
891 scalarexp_t *ret_se = new scalarexp_t(Ext_fcns->get_fcn_name(super_id).c_str(), subaggr_ref_se);
892 ret_se->set_fcn_id(super_id);
893 ret_se->set_data_type(Ext_fcns->get_fcn_dt(super_id));
894 // Register it in the high aggregate table
895 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), super_id, subaggr_ref_se,Ext_fcns->get_storage_dt(super_id), false, false,false);
896 ret_se->set_aggr_id(ano);
897 hi_aggr_se[agr_id] = ret_se;
903 // builtin aggregate processing
907 vector<string> subaggr_names = orig_aggr_tbl->get_subaggr_fcns(agr_id, use_se);
908 vector<data_type *> subaggr_dt = orig_aggr_tbl->get_subaggr_dt(agr_id);
911 if(orig_aggr_tbl->is_star_aggr(agr_id)){
912 for(sa=0;sa<subaggr_names.size();sa++){
913 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
914 subaggr_se->set_data_type(subaggr_dt[sa]);
916 // The following sequence is similar to the code in make_fta_se_ref,
917 // but there is special processing for the aggregate tables.
918 int fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
920 colname = low_select_list[fta_se_nbr]->name;
922 colname = impute_colname(low_select_list, subaggr_se);
923 low_select_list[fta_se_nbr]->name = colname;
924 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL, false);
925 subaggr_se->set_aggr_id(ano);
927 new_cr = new colref_t(colname.c_str());
928 new_cr->set_tablevar_ref(0);
929 new_se = new scalarexp_t(new_cr);
931 // I'm not certain what the types should be ....
932 // This will need to be filled in by later analysis.
933 // Actually, this is causing a problem.
934 // I will assume a UINT data type.
935 // (consistent with assign_data_types in analyze_fta.cc)
936 // TODO: why can't I use subaggr_dt, as I do in the other IF branch?
937 data_type *ndt = new data_type("Int"); // was Uint
938 new_se->set_data_type(ndt);
940 subaggr_ref_se.push_back(new_se);
943 for(sa=0;sa<subaggr_names.size();sa++){
945 scalarexp_t *aggr_operand = orig_aggr_tbl->get_aggr_se(agr_id);
946 l_se = dup_se(aggr_operand, NULL);
947 subaggr_se = scalarexp_t::make_se_aggr(subaggr_names[sa].c_str(),l_se);
949 subaggr_se = scalarexp_t::make_star_aggr(subaggr_names[sa].c_str());
951 subaggr_se->set_data_type(subaggr_dt[sa]);
953 // again, similar to make_fta_se_ref.
954 fta_se_nbr = add_select_list_nodup(low_select_list, subaggr_se,new_element);
956 colname = low_select_list[fta_se_nbr]->name;
958 colname = impute_colname(low_select_list, subaggr_se);
959 low_select_list[fta_se_nbr]->name = colname;
961 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),l_se, false);
963 ano = low_aggr_tbl->add_aggr(subaggr_names[sa].c_str(),NULL,false);
964 subaggr_se->set_aggr_id(ano);
966 new_cr = new colref_t(colname.c_str());
967 new_se = new scalarexp_t(new_cr);
968 // I'm not certain what the types should be ....
969 // This will need to be filled in by later analysis.
970 // NOTE: this might not capture all the meaning of data_type ...
971 new_se->set_data_type(subaggr_dt[sa]);
972 subaggr_ref_se.push_back(new_se);
975 scalarexp_t *ret_se = orig_aggr_tbl->make_superaggr_se(agr_id, subaggr_ref_se);
976 // ASSUME either the return value is an aggregation,
977 // or a binary_op between two aggregations
978 if(ret_se->get_operator_type() == SE_AGGR_SE || ret_se->get_operator_type() == SE_AGGR_SE){
979 ret_se->set_data_type(orig_aggr_tbl->get_data_type(agr_id));
980 ano = hi_aggr_tbl->add_aggr(ret_se->get_op(), ret_se->get_left_se(), false );
982 // Basically processing for AVG.
983 // set the data type of the superagg to that of the subagg.
984 scalarexp_t *left_se = ret_se->get_left_se();
985 left_se->set_data_type(subaggr_dt[0]);
986 ano = hi_aggr_tbl->add_aggr(left_se->get_op(), left_se->get_left_se(), false );
987 left_se->set_aggr_id(ano);
989 scalarexp_t *right_se = ret_se->get_right_se();
990 right_se->set_data_type(subaggr_dt[1]);
991 ano = hi_aggr_tbl->add_aggr(right_se->get_op(), right_se->get_left_se(), false );
992 right_se->set_aggr_id(ano);
995 ret_se->set_aggr_id(ano);
996 hi_aggr_se[agr_id] = ret_se;
1004 // Split a scalar expression into one part which executes
1005 // at the stream and another set of parts which execute
1007 // Because I'm actually modifying the SEs, I will make
1008 // copies. But I will assume that literals, params, and
1009 // colrefs are immutable at this point.
1010 // (if there is ever a need to change one, must make a
1012 // NOTE : if se is constant (only refrences literals),
1013 // avoid making the fta compute it.
1015 // NOTE : This will need to be generalized to
1016 // handle join expressions, namely to handle a vector
1019 // Return value is the HFTA se.
1020 // Add lftas select_elements to the fta_select_list.
1021 // set fta_forbidden if this node or any child cannot
1022 // execute at the lfta.
1026 scalarexp_t *split_fta_se(scalarexp_t *se,
1027 bool &fta_forbidden,
1028 vector<select_element *> &lfta_select_list,
1029 ext_fcn_list *Ext_fcns
1033 vector<scalarexp_t *> operand_list;
1034 vector<data_type *> dt_signature;
1035 scalarexp_t *ret_se, *l_se, *r_se;
1036 bool l_forbid, r_forbid, this_forbid;
1038 scalarexp_t *new_se;
1039 data_type *dt = se->get_data_type();
1041 switch(se->get_operator_type()){
1043 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1044 ret_se = new scalarexp_t(se->get_literal());
1045 ret_se->use_decorations_of(se);
1049 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1050 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1051 ret_se->use_decorations_of(se);
1055 // No colref should be forbidden,
1056 // the schema is wrong, the fta_legal_type() fcn is wrong,
1057 // or the source table is actually a stream.
1058 // Issue a warning, but proceed with processing.
1059 // Also, should not be a ref to a gbvar.
1060 // (a gbvar ref only occurs in an aggregation node,
1061 // and these SEs are rehomed, not split.
1062 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1065 fprintf(stderr,"WARNING, a colref is a forbidden data type in split_fta_se,"
1067 " type is %s, line=%d, col=%d\n",
1068 se->get_colref()->to_string().c_str(),
1069 se->get_data_type()->get_type_str().c_str(),
1070 se->lineno, se->charno
1075 fprintf(stderr,"INTERNAL ERROR, a colref is a gbvar ref in split_fta_se,"
1076 " type is %s, line=%d, col=%d\n",
1077 se->get_data_type()->get_type_str().c_str(),
1078 se->lineno, se->charno
1083 ret_se = new scalarexp_t(se->get_colref());
1084 ret_se->use_decorations_of(se);
1088 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1090 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1092 // If this operation is forbidden but the child SE is not,
1093 // put the child se on the lfta_select_list, create a colref
1094 // which accesses this se, and make it the child of this op.
1095 // Exception : the child se is constant (only literal refs).
1096 if(this_forbid && !l_forbid){
1097 if(!is_literal_or_param_only(l_se)){
1098 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1099 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1102 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1104 ret_se->use_decorations_of(se);
1105 fta_forbidden = this_forbid | l_forbid;
1109 l_se = split_fta_se(se->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1110 r_se = split_fta_se(se->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1112 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1114 // Replace the left se if it is not forbidden, but something else is.
1115 if((this_forbid || r_forbid) & !l_forbid){
1116 if(!is_literal_or_param_only(l_se)){
1117 new_se = make_fta_se_ref(lfta_select_list, l_se,0);
1122 // Replace the right se if it is not forbidden, but something else is.
1123 if((this_forbid || l_forbid) & !r_forbid){
1124 if(!is_literal_or_param_only(r_se)){
1125 new_se = make_fta_se_ref(lfta_select_list, r_se,0);
1130 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1131 ret_se->use_decorations_of(se);
1132 fta_forbidden = this_forbid || r_forbid || l_forbid;
1139 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_fta_se."
1140 " line=%d, col=%d\n",
1141 se->get_op().c_str(),
1142 se->lineno, se->charno
1149 fta_forbidden = false;
1150 operand_list = se->get_operands();
1151 vector<scalarexp_t *> new_operands;
1152 vector<bool> forbidden_op;
1153 for(p=0;p<operand_list.size();p++){
1154 l_se = split_fta_se(operand_list[p], l_forbid, lfta_select_list, Ext_fcns);
1156 fta_forbidden |= l_forbid;
1157 new_operands.push_back(l_se);
1158 forbidden_op.push_back(l_forbid);
1159 dt_signature.push_back(operand_list[p]->get_data_type() );
1162 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1164 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1166 for(o=0;o<operand_list.size();o++){
1167 if(o>0) fprintf(stderr,", ");
1168 fprintf(stderr,"%s",operand_list[o]->get_data_type()->get_type_str().c_str());
1170 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1171 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1175 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1177 // Replace the non-forbidden operands.
1178 // the forbidden ones are already replaced.
1180 for(p=0;p<new_operands.size();p++){
1181 if(! forbidden_op[p]){
1182 // if(new_operands[p]->get_data_type()->get_temporal() != constant_t){
1183 if(!is_literal_or_param_only(new_operands[p])){
1184 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],0);
1185 new_operands[p] = new_se;
1191 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1192 ret_se->use_decorations_of(se);
1198 printf("INTERNAL ERROR in check_fta_forbidden_se: operator type %d\n",se->get_operator_type());
1209 // The predicates have already been
1210 // broken into conjunctions.
1211 // If any part of a conjunction is fta-forbidden,
1212 // it must be executed in the stream operator.
1213 // Else it is executed in the FTA.
1214 // A pre-analysis should determine whether this
1215 // predicate is fta-safe. This procedure will
1216 // assume that it is fta-forbidden and will
1217 // prepare it for execution in the stream.
1221 predicate_t *split_fta_pr(predicate_t *pr,
1222 vector<select_element *> &lfta_select_list,
1223 ext_fcn_list *Ext_fcns
1226 vector<literal_t *> llist;
1227 scalarexp_t *se_l, *se_r;
1228 bool l_forbid, r_forbid;
1229 predicate_t *ret_pr, *pr_l, *pr_r;
1230 vector<scalarexp_t *> op_list, new_op_list;
1232 vector<data_type *> dt_signature;
1235 switch(pr->get_operator_type()){
1237 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1240 if(!is_literal_or_param_only(se_l)){
1241 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1245 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1250 se_l = split_fta_se(pr->get_left_se(), l_forbid, lfta_select_list, Ext_fcns);
1252 if(!is_literal_or_param_only(se_l)){
1253 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1258 se_r = split_fta_se(pr->get_right_se(), r_forbid, lfta_select_list, Ext_fcns);
1260 if(!is_literal_or_param_only(se_r)){
1261 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,0);
1266 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1270 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1271 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1274 case PRED_BINARY_OP:
1275 pr_l = split_fta_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1276 pr_r = split_fta_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1277 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1281 // I can't push the predicate into the lfta, except by
1282 // returning a bool value, and that is not worth the trouble,
1283 op_list = pr->get_op_list();
1284 for(o=0;o<op_list.size();++o){
1285 se_l = split_fta_se(op_list[o],l_forbid,lfta_select_list,Ext_fcns);
1287 if(!is_literal_or_param_only(se_l)){
1288 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,0);
1292 new_op_list.push_back(se_l);
1295 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1296 ret_pr->set_fcn_id(pr->get_fcn_id());
1299 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1300 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1311 //--------------------------------------------------------------------
1315 // Split a scalar expression into one part which executes
1316 // at the stream and another set of parts which execute
1318 // Because I'm actually modifying the SEs, I will make
1319 // copies. But I will assume that literals, params, and
1320 // colrefs are immutable at this point.
1321 // (if there is ever a need to change one, must make a
1323 // NOTE : if se is constant (only refrences literals),
1324 // avoid making the fta compute it.
1326 // NOTE : This will need to be generalized to
1327 // handle join expressions, namely to handle a vector
1330 // Return value is the HFTA se.
1331 // Add lftas select_elements to the fta_select_list.
1332 // set fta_forbidden if this node or any child cannot
1333 // execute at the lfta.
1335 #define SPLIT_FTAVEC_NOTBLVAR -1
1336 #define SPLIT_FTAVEC_MIXED -2
1338 bool is_PROTOCOL_source(int colref_source,
1339 vector< vector<select_element *> *> &lfta_select_list){
1340 if(colref_source>=0 && lfta_select_list[colref_source]!=NULL) return true;
1344 int combine_colref_source(int s1, int s2){
1345 if(s1==s2) return(s1);
1346 if(s1==SPLIT_FTAVEC_NOTBLVAR) return s2;
1347 if(s2==SPLIT_FTAVEC_NOTBLVAR) return s1;
1348 return SPLIT_FTAVEC_MIXED;
1351 scalarexp_t *split_ftavec_se(
1352 scalarexp_t *se, // the SE to split
1353 bool &fta_forbidden, // return true if some part of se
1355 int &colref_source, // the tblvar which sources the
1356 // colref, or NOTBLVAR, or MIXED
1357 vector< vector<select_element *> *> &lfta_select_list,
1358 // NULL if the tblvar is not PROTOCOL,
1359 // else build the select list.
1360 ext_fcn_list *Ext_fcns // is the fcn lfta-safe?
1362 // Return value is the HFTA SE, unless fta_forbidden is true and
1363 // colref_source>=0 and the indicated source is PROTOCOL.
1364 // In that case no split was done, the make_fta_se_ref must
1365 // be done by the caller.
1368 vector<scalarexp_t *> operand_list;
1369 vector<data_type *> dt_signature;
1370 scalarexp_t *ret_se, *l_se, *r_se;
1371 bool l_forbid, r_forbid, this_forbid;
1372 int l_csource, r_csource, this_csource;
1374 scalarexp_t *new_se;
1375 data_type *dt = se->get_data_type();
1377 switch(se->get_operator_type()){
1379 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1380 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1381 ret_se = new scalarexp_t(se->get_literal());
1382 ret_se->use_decorations_of(se);
1386 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1387 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1388 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1389 ret_se->use_decorations_of(se);
1392 case SE_IFACE_PARAM:
1393 fta_forbidden = false;
1394 colref_source = se->get_ifpref()->get_tablevar_ref();
1395 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1396 ret_se->use_decorations_of(se);
1400 // No colref should be forbidden,
1401 // the schema is wrong, the fta_legal_type() fcn is wrong,
1402 // or the source table is actually a stream.
1403 // Issue a warning, but proceed with processing.
1404 // Also, should not be a ref to a gbvar.
1405 // (a gbvar ref only occurs in an aggregation node,
1406 // and these SEs are rehomed, not split.
1407 fta_forbidden = ! se->get_data_type()->fta_legal_type();
1408 colref_source = se->get_colref()->get_tablevar_ref();
1410 if(fta_forbidden && is_PROTOCOL_source(colref_source, lfta_select_list)){
1411 fprintf(stderr,"WARNING, a PROTOCOL colref is a forbidden data type in split_ftavec_se,"
1413 " type is %s, line=%d, col=%d\n",
1414 se->get_colref()->to_string().c_str(),
1415 se->get_data_type()->to_string().c_str(),
1416 se->lineno, se->charno
1421 fta_forbidden = true; // eval in hfta. ASSUME make copy as below.
1424 ret_se = new scalarexp_t(se->get_colref());
1425 ret_se->use_decorations_of(se);
1429 l_se = split_ftavec_se(se->get_left_se(), l_forbid, colref_source, lfta_select_list, Ext_fcns);
1431 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), se->get_op());
1433 // If this operation is forbidden but the child SE is not,
1434 // AND the colref source in the se is a single PROTOCOL source
1435 // put the child se on the lfta_select_list, create a colref
1436 // which accesses this se, and make it the child of this op.
1437 // Exception : the child se is constant (only literal refs).
1438 // TODO: I think the exception is expressed by is_PROTOCOL_source
1439 if(this_forbid && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list)){
1440 if(!is_literal_or_param_only(l_se)){
1441 new_se = make_fta_se_ref(lfta_select_list, l_se,colref_source);
1442 ret_se = new scalarexp_t(se->get_op().c_str(), new_se);
1445 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1447 ret_se->use_decorations_of(se);
1448 fta_forbidden = this_forbid | l_forbid;
1452 l_se = split_ftavec_se(se->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1453 r_se = split_ftavec_se(se->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1455 this_forbid = ! dt->fta_legal_operation(l_se->get_data_type(), r_se->get_data_type(), se->get_op());
1456 colref_source=combine_colref_source(l_csource, r_csource);
1458 // Replace the left se if the parent must be hfta but the child can
1459 // be lfta. This translates to
1460 // a) result is PROTOCOL and forbidden, but left SE is not forbidden
1461 // OR b) if result is mixed but the left se is PROTOCOL, not forbidden
1462 if( ((this_forbid || r_forbid) && !l_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1463 (colref_source==SPLIT_FTAVEC_MIXED && !l_forbid &&
1464 is_PROTOCOL_source(l_csource, lfta_select_list)) ){
1465 if(!is_literal_or_param_only(l_se)){
1466 new_se = make_fta_se_ref(lfta_select_list, l_se,l_csource);
1471 // same logic as for right se.
1472 if( ((this_forbid || l_forbid) && !r_forbid && is_PROTOCOL_source(colref_source, lfta_select_list) ) ||
1473 (colref_source==SPLIT_FTAVEC_MIXED && !r_forbid &&
1474 is_PROTOCOL_source(r_csource, lfta_select_list)) ){
1475 if(!is_literal_or_param_only(r_se)){
1476 new_se = make_fta_se_ref(lfta_select_list, r_se,r_csource);
1481 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1482 ret_se->use_decorations_of(se);
1483 fta_forbidden = this_forbid || r_forbid || l_forbid;
1490 fprintf(stderr,"INTERNAL ERROR, aggregate ref (%s) in split_ftavec_se."
1491 " line=%d, col=%d\n",
1492 se->get_op().c_str(),
1493 se->lineno, se->charno
1500 operand_list = se->get_operands();
1501 vector<scalarexp_t *> new_operands;
1502 vector<bool> forbidden_op;
1503 vector<int> csource;
1505 fta_forbidden = false;
1506 colref_source = SPLIT_FTAVEC_NOTBLVAR;
1507 for(p=0;p<operand_list.size();p++){
1508 l_se = split_ftavec_se(operand_list[p], l_forbid, l_csource, lfta_select_list, Ext_fcns);
1510 fta_forbidden |= l_forbid;
1511 colref_source = combine_colref_source(colref_source, l_csource);
1512 new_operands.push_back(l_se);
1513 forbidden_op.push_back(l_forbid);
1514 csource.push_back(l_csource);
1515 dt_signature.push_back(operand_list[p]->get_data_type() );
1518 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), dt_signature);
1520 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
1522 for(o=0;o<operand_list.size();o++){
1523 if(o>0) fprintf(stderr,", ");
1524 fprintf(stderr,"%s",operand_list[o]->get_data_type()->to_string().c_str());
1526 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
1527 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
1531 fta_forbidden |= (! Ext_fcns->fta_legal(fcn_id));
1533 // Replace the non-forbidden operands.
1534 // the forbidden ones are already replaced.
1535 if(fta_forbidden || colref_source == SPLIT_FTAVEC_MIXED){
1536 for(p=0;p<new_operands.size();p++){
1537 if(! forbidden_op[p] && is_PROTOCOL_source(csource[p], lfta_select_list)){
1538 if(!is_literal_or_param_only(new_operands[p])){
1539 new_se = make_fta_se_ref(lfta_select_list, new_operands[p],csource[p]);
1540 new_operands[p] = new_se;
1546 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1547 ret_se->use_decorations_of(se);
1553 printf("INTERNAL ERROR in split_ftavec_se: operator type %d\n",se->get_operator_type());
1562 // The predicates have already been
1563 // broken into conjunctions.
1564 // If any part of a conjunction is fta-forbidden,
1565 // it must be executed in the stream operator.
1566 // Else it is executed in the FTA.
1567 // A pre-analysis should determine whether this
1568 // predicate is fta-safe. This procedure will
1569 // assume that it is fta-forbidden and will
1570 // prepare it for execution in the stream.
1572 predicate_t *split_ftavec_pr(predicate_t *pr,
1573 vector< vector<select_element *> *> &lfta_select_list,
1574 ext_fcn_list *Ext_fcns
1577 vector<literal_t *> llist;
1578 scalarexp_t *se_l, *se_r;
1579 bool l_forbid, r_forbid;
1580 int l_csource, r_csource;
1581 predicate_t *ret_pr, *pr_l, *pr_r;
1582 vector<scalarexp_t *> op_list, new_op_list;
1584 vector<data_type *> dt_signature;
1587 switch(pr->get_operator_type()){
1589 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1591 // TODO: checking that the se is a PROTOCOL source should
1592 // take care of literal_or_param_only.
1593 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1594 if(!is_literal_or_param_only(se_l)){
1595 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1599 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1604 se_l = split_ftavec_se(pr->get_left_se(), l_forbid, l_csource, lfta_select_list, Ext_fcns);
1605 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1606 if(!is_literal_or_param_only(se_l)){
1607 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1612 se_r = split_ftavec_se(pr->get_right_se(), r_forbid, r_csource, lfta_select_list, Ext_fcns);
1613 if(!r_forbid && is_PROTOCOL_source(r_csource, lfta_select_list)){
1614 if(!is_literal_or_param_only(se_r)){
1615 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_r,r_csource);
1620 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1624 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1625 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1628 case PRED_BINARY_OP:
1629 pr_l = split_ftavec_pr(pr->get_left_pr(), lfta_select_list, Ext_fcns);
1630 pr_r = split_ftavec_pr(pr->get_right_pr(), lfta_select_list, Ext_fcns);
1631 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1635 // I can't push the predicate into the lfta, except by
1636 // returning a bool value, and that is not worth the trouble,
1637 op_list = pr->get_op_list();
1638 for(o=0;o<op_list.size();++o){
1639 se_l = split_ftavec_se(op_list[o],l_forbid,l_csource,lfta_select_list,Ext_fcns);
1640 if(!l_forbid && is_PROTOCOL_source(l_csource, lfta_select_list)){
1641 if(!is_literal_or_param_only(se_l)){
1642 scalarexp_t *new_se = make_fta_se_ref(lfta_select_list, se_l,l_csource);
1646 new_op_list.push_back(se_l);
1649 ret_pr = new predicate_t(pr->get_op().c_str(), new_op_list);
1650 ret_pr->set_fcn_id(pr->get_fcn_id());
1653 fprintf(stderr,"INTERNAL ERROR in split_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1654 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1664 ////////////////////////////////////////////////////////////////////////
1665 /// rehome_hfta_se rehome_hfta_pr
1666 /// This is use to split an sgah operator (aggregation),
1667 /// I just need to make gb, aggr references point to the
1668 /// new gb, aggr table entries.
1671 scalarexp_t *rehome_fta_se(scalarexp_t *se,
1672 map< int, scalarexp_t * > *aggr_map
1677 vector<scalarexp_t *> operand_list;
1678 scalarexp_t *ret_se, *l_se, *r_se;
1680 scalarexp_t *new_se;
1681 data_type *dt = se->get_data_type();
1682 vector<scalarexp_t *> new_operands;
1684 switch(se->get_operator_type()){
1686 ret_se = new scalarexp_t(se->get_literal());
1687 ret_se->use_decorations_of(se);
1691 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1692 ret_se->use_decorations_of(se);
1695 case SE_IFACE_PARAM:
1696 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1697 ret_se->use_decorations_of(se);
1703 // Must be a GB REF ...
1704 // I'm assuming that the hfta gbvar table has the
1705 // same sequence of entries as the input query's gbvar table.
1706 // Else I'll need some kind of translation table.
1709 fprintf(stderr,"WARNING, a colref is not a gbver ref in rehome_hfta_se"
1710 " type is %s, line=%d, col=%d\n",
1711 se->get_data_type()->to_string().c_str(),
1712 se->lineno, se->charno
1716 ret_se = new scalarexp_t(se->get_colref());
1717 ret_se->use_decorations_of(se); // just inherit the gbref
1721 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1723 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1724 ret_se->use_decorations_of(se);
1728 l_se = rehome_fta_se(se->get_left_se(), aggr_map);
1729 r_se = rehome_fta_se(se->get_right_se(), aggr_map);
1731 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1732 ret_se->use_decorations_of(se);
1738 agr_id = se->get_aggr_ref();
1739 return (*aggr_map)[agr_id];
1743 agr_id = se->get_aggr_ref();
1744 if(agr_id >= 0) return (*aggr_map)[agr_id];
1746 operand_list = se->get_operands();
1747 for(p=0;p<operand_list.size();p++){
1748 l_se = rehome_fta_se(operand_list[p], aggr_map);
1750 new_operands.push_back(l_se);
1754 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1755 ret_se->use_decorations_of(se);
1760 printf("INTERNAL ERROR in rehome_fta_se: operator type %d\n",se->get_operator_type());
1769 // The predicates have already been
1770 // broken into conjunctions.
1771 // If any part of a conjunction is fta-forbidden,
1772 // it must be executed in the stream operator.
1773 // Else it is executed in the FTA.
1774 // A pre-analysis should determine whether this
1775 // predicate is fta-safe. This procedure will
1776 // assume that it is fta-forbidden and will
1777 // prepare it for execution in the stream.
1779 predicate_t *rehome_fta_pr(predicate_t *pr,
1780 map<int, scalarexp_t *> *aggr_map
1783 vector<literal_t *> llist;
1784 scalarexp_t *se_l, *se_r;
1785 predicate_t *ret_pr, *pr_l, *pr_r;
1786 vector<scalarexp_t *> op_list, new_op_list;
1789 switch(pr->get_operator_type()){
1791 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1792 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1796 se_l = rehome_fta_se(pr->get_left_se(), aggr_map);
1797 se_r = rehome_fta_se(pr->get_right_se(), aggr_map);
1798 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1802 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1803 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1806 case PRED_BINARY_OP:
1807 pr_l = rehome_fta_pr(pr->get_left_pr(), aggr_map);
1808 pr_r = rehome_fta_pr(pr->get_right_pr(), aggr_map);
1809 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1813 op_list = pr->get_op_list();
1814 for(o=0;o<op_list.size();++o){
1815 se_l = rehome_fta_se(op_list[o], aggr_map);
1816 new_op_list.push_back(se_l);
1818 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1819 ret_pr->set_fcn_id(pr->get_fcn_id());
1823 fprintf(stderr,"INTERNAL ERROR in rehome_fta_pr, line %d, character %d, unknown predicate operator type %d\n",
1824 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1833 ////////////////////////////////////////////////////////////////////
1834 ///////////////// Create a STREAM table to represent the FTA output.
1836 table_def *create_attributes(string tname, vector<select_element *> &select_list){
1840 // Create a new STREAM schema for the output of the FTA.
1842 field_entry_list *fel = new field_entry_list();
1844 for(s=0;s<select_list.size();s++){
1845 scalarexp_t *sel_se = select_list[s]->se;
1846 data_type *dt = sel_se->get_data_type();
1848 // Grab the annotations of the field.
1849 // As of this writing, the only meaningful annotations
1850 // are whether or not the attribute is temporal.
1851 // There can be an annotation of constant_t, but
1852 // I'll ignore this, it feels like an unsafe assumption
1853 param_list *plist = new param_list();
1854 // if(dt->is_temporal()){
1855 vector<string> param_strings = dt->get_param_keys();
1857 for(p=0;p<param_strings.size();++p){
1858 string v = dt->get_param_val(param_strings[p]);
1860 plist->append(param_strings[p].c_str(),v.c_str());
1862 plist->append(param_strings[p].c_str());
1866 // char access_fcn_name[500];
1867 string colname = select_list[s]->name;
1868 // sprintf(access_fcn_name,"get_field_%s",colname.c_str());
1869 string access_fcn_name = "get_field_"+colname;
1870 field_entry *fe = new field_entry(
1871 dt->get_type_str(), colname, access_fcn_name, plist, ufcns
1874 fel->append_field(fe);
1877 table_def *fta_tbl = new table_def(
1878 tname.c_str(), NULL, NULL, fel, STREAM_SCHEMA
1885 //------------------------------------------------------------------
1886 // Textual representation of the query node.
1890 string spx_qpn::to_query_string(){
1892 string ret = "Select ";
1894 for(s=0;s<select_list.size();s++){
1896 ret += se_to_query_string(select_list[s]->se, NULL);
1897 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1901 ret += "From "+table_name->to_string()+"\n";
1903 if(where.size() > 0){
1906 for(w=0;w<where.size();w++){
1907 if(w>0) ret += " AND ";
1908 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
1919 string sgah_qpn::to_query_string(){
1921 string ret = "Select ";
1923 for(s=0;s<select_list.size();s++){
1925 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
1926 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
1930 ret += "From "+table_name->to_string()+"\n";
1932 if(where.size() > 0){
1935 for(w=0;w<where.size();w++){
1936 if(w>0) ret += " AND ";
1937 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
1942 if(gb_tbl.size() > 0){
1945 if(gb_tbl.gb_patterns.size() <= 1 || gb_tbl.gb_entry_type.size()==0){
1946 for(g=0;g<gb_tbl.size();g++){
1947 if(g>0) ret += ", ";
1948 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
1949 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
1951 ret += gb_tbl.get_name(g);
1955 for(g=0;g<gb_tbl.gb_entry_type.size();++g){
1956 if(g>0) ret += ", ";
1957 if(gb_tbl.gb_entry_type[g] == ""){
1958 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+
1959 " AS "+ gb_tbl.get_name(gb_pos);
1962 if(gb_tbl.gb_entry_type[g] == "CUBE" ||
1963 gb_tbl.gb_entry_type[g] == "ROLLUP"){
1964 ret += gb_tbl.gb_entry_type[g] + "(";
1966 for(gg=0;gg<gb_tbl.gb_entry_count[g];++gg){
1967 if(gg>0) ret += ", ";
1968 ret += se_to_query_string(gb_tbl.get_def(gb_pos),&aggr_tbl)+ " AS "+ gb_tbl.get_name(gb_pos);
1973 if(gb_tbl.gb_entry_type[g] == "GROUPING_SETS"){
1974 ret += gb_tbl.gb_entry_type[g] + "(";
1976 vector<vector<bool> > &local_components = gb_tbl.pattern_components[g];
1977 for(g1=0;g1<local_components.size();++g1){
1979 bool first_field = true;
1981 for(g2=0;g2<=gb_tbl.gb_entry_count[g];g2++){
1982 if(local_components[g1][g2]){
1983 if(!first_field) ret+=", ";
1984 else first_field = false;
1985 ret += gb_tbl.get_name(gb_pos+g2);
1991 gb_pos += gb_tbl.gb_entry_count[g];
1998 if(having.size() > 0){
2001 for(h=0;h<having.size();h++){
2002 if(h>0) ret += " AND ";
2003 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2012 string rsgah_qpn::to_query_string(){
2014 string ret = "Select ";
2016 for(s=0;s<select_list.size();s++){
2018 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2019 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2023 ret += "From "+table_name->to_string()+"\n";
2025 if(where.size() > 0){
2028 for(w=0;w<where.size();w++){
2029 if(w>0) ret += " AND ";
2030 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2035 if(gb_tbl.size() > 0){
2038 for(g=0;g<gb_tbl.size();g++){
2039 if(g>0) ret += ", ";
2040 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2041 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl)+" AS ";
2043 ret += gb_tbl.get_name(g);
2048 if(having.size() > 0){
2051 for(h=0;h<having.size();h++){
2052 if(h>0) ret += " AND ";
2053 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2058 if(closing_when.size() > 0){
2059 ret += "Closing_When ";
2061 for(h=0;h<closing_when.size();h++){
2062 if(h>0) ret += " AND ";
2063 ret += "(" + pred_to_query_str(closing_when[h]->pr,&aggr_tbl) + ")";
2072 string sgahcwcb_qpn::to_query_string(){
2074 string ret = "Select ";
2076 for(s=0;s<select_list.size();s++){
2078 ret += se_to_query_string(select_list[s]->se, &aggr_tbl);
2079 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2083 ret += "From "+table_name->to_string()+"\n";
2085 if(where.size() > 0){
2088 for(w=0;w<where.size();w++){
2089 if(w>0) ret += " AND ";
2090 ret += "(" + pred_to_query_str(where[w]->pr,&aggr_tbl) + ")";
2095 if(gb_tbl.size() > 0){
2098 for(g=0;g<gb_tbl.size();g++){
2099 if(g>0) ret += ", ";
2100 // if(gb_tbl.get_reftype(g) == GBVAR_SE){
2101 ret += se_to_query_string(gb_tbl.get_def(g), &aggr_tbl) + " AS ";
2103 ret += gb_tbl.get_name(g);
2108 if(sg_tbl.size() > 0){
2109 ret += "Supergroup ";
2111 bool first_elem = true;
2112 for(g=0;g<gb_tbl.size();g++){
2113 if(sg_tbl.count(g)){
2118 ret += gb_tbl.get_name(g);
2124 if(having.size() > 0){
2127 for(h=0;h<having.size();h++){
2128 if(h>0) ret += " AND ";
2129 ret += "(" + pred_to_query_str(having[h]->pr,&aggr_tbl) + ")";
2135 if(cleanwhen.size() > 0){
2136 ret += "Cleaning_When ";
2138 for(h=0;h<cleanwhen.size();h++){
2139 if(h>0) ret += " AND ";
2140 ret += "(" + pred_to_query_str(cleanwhen[h]->pr,&aggr_tbl) + ")";
2145 if(cleanby.size() > 0){
2146 ret += "Cleaning_By ";
2148 for(h=0;h<cleanby.size();h++){
2149 if(h>0) ret += " AND ";
2150 ret += "(" + pred_to_query_str(cleanby[h]->pr,&aggr_tbl) + ")";
2158 string watch_tbl_qpn::to_query_string(){
2160 // ret += "DEFINE {\n";
2161 // ret += "\tfilename='"+filename+";\n";
2162 // ret += "\trefresh_interval="+to_string(refresh_interval)+";\n}\n";
2163 ret += "WATCHLIST FIELDS {\n";
2164 std::vector<field_entry *> fields = table_layout->get_fields();
2165 for(int f=0;f<fields.size();++f){
2166 ret += fields[f]->to_string()+"\n";
2173 string mrg_qpn::to_query_string(){
2175 string ret="Merge ";
2176 ret += mvars[0]->to_query_string() + " : " + mvars[1]->to_query_string();
2178 ret += " SLACK "+se_to_query_string(slack, NULL);
2183 for(t=0;t<fm.size();++t){
2184 if(t>0) ret += ", ";
2185 ret += fm[t]->to_string();
2192 string join_eq_hash_qpn::to_query_string(){
2194 string ret = "Select ";
2196 for(s=0;s<select_list.size();s++){
2198 ret += se_to_query_string(select_list[s]->se, NULL);
2199 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2203 // NOTE: assuming binary join.
2204 int properties = from[0]->get_property()+2*from[1]->get_property();
2207 ret += "INNER_JOIN ";
2210 ret += "LEFT_OUTER_JOIN ";
2213 ret += "RIGHT_OUTER_JOIN ";
2216 ret += "OUTER_JOIN ";
2222 for(f=0;f<from.size();++f){
2224 ret += from[f]->to_string();
2228 if(where.size() > 0){
2231 for(w=0;w<where.size();w++){
2232 if(w>0) ret += " AND ";
2233 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2241 string filter_join_qpn::to_query_string(){
2243 string ret = "Select ";
2245 for(s=0;s<select_list.size();s++){
2247 ret += se_to_query_string(select_list[s]->se, NULL);
2248 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2252 // NOTE: assuming binary join.
2253 ret += "FILTER_JOIN("+temporal_var->field+","+int_to_string(temporal_range)+") ";
2257 for(f=0;f<from.size();++f){
2259 ret += from[f]->to_string();
2263 if(where.size() > 0){
2266 for(w=0;w<where.size();w++){
2267 if(w>0) ret += " AND ";
2268 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2276 string watch_join_qpn::to_query_string(){
2278 string ret = "Select ";
2280 for(s=0;s<select_list.size();s++){
2282 ret += se_to_query_string(select_list[s]->se, NULL);
2283 if(select_list[s]->name != "") ret += " AS "+select_list[s]->name;
2287 // NOTE: assuming binary join.
2288 ret += "WATCHLIST_JOIN ";
2292 for(f=0;f<from.size();++f){
2294 ret += from[f]->to_string();
2298 if(where.size() > 0){
2301 for(w=0;w<where.size();w++){
2302 if(w>0) ret += " AND ";
2303 ret += "(" + pred_to_query_str(where[w]->pr,NULL) + ")";
2313 // -----------------------------------------------------------------
2314 // Query node subclass specific processing.
2317 vector<mrg_qpn *> mrg_qpn::split_sources(){
2318 vector<mrg_qpn *> ret;
2322 if(fm.size() != mvars.size()){
2323 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources. fm.size() = %lu, mvars.size() = %lu\n",fm.size(),mvars.size());
2327 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::split_sources, fm size is 1.\n");
2333 printf("spliting sources merge node, name = %s, %d sources.\n\t",node_name.c_str(), fm.size());
2334 for(ff=0;ff<fm.size();++ff){
2335 printf("%s ",fm[ff]->to_string().c_str());
2340 // Handle special cases.
2342 ret.push_back(this);
2347 mrg_qpn *new_mrg = (mrg_qpn *)this->make_copy("_cH1");
2348 new_mrg->fm.push_back(this->fm[0]);
2349 new_mrg->fm.push_back(this->fm[1]);
2350 new_mrg->mvars.push_back(this->mvars[0]);
2351 new_mrg->mvars.push_back(this->mvars[1]);
2353 this->fm.erase(this->fm.begin());
2354 this->mvars.erase(this->mvars.begin());
2355 string vname = fm[0]->get_var_name();
2356 this->fm[0] = new tablevar_t(new_mrg->node_name.c_str());
2357 this->fm[0]->set_range_var(vname);
2358 this->mvars[0]->set_field(table_layout->get_field_name(merge_fieldpos));
2359 this->mvars[0]->set_tablevar_ref(0);
2360 this->mvars[1]->set_tablevar_ref(1);
2362 ret.push_back(new_mrg);
2363 ret.push_back(this);
2366 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg->node_name.c_str(),this->node_name.c_str());
2367 for(i=0;i<new_mrg->fm.size();++i)
2368 printf("\tsource %s var %d (%s, %s) \n",new_mrg->node_name.c_str(),i,new_mrg->fm[i]->to_string().c_str(), new_mrg->mvars[i]->to_string().c_str());
2369 for(i=0;i<this->fm.size();++i)
2370 printf("\tsource %s var %d (%s, %s) \n",this->node_name.c_str(),i,this->fm[i]->to_string().c_str(), this->mvars[i]->to_string().c_str());
2377 // divide up the sources between two children.
2378 // Then, recurse on the children.
2380 mrg_qpn *new_mrg1 = (mrg_qpn *)this->make_copy("_cH1");
2381 mrg_qpn *new_mrg2 = (mrg_qpn *)this->make_copy("_cH2");
2382 for(i=0;i<this->fm.size()/2;++i){
2383 new_mrg1->fm.push_back(this->fm[i]);
2384 new_mrg1->mvars.push_back(this->mvars[i]);
2385 //printf("Pushing %d (%s, %s) to new_mrg1\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2387 for(;i<this->fm.size();++i){
2388 new_mrg2->fm.push_back(this->fm[i]);
2389 new_mrg2->mvars.push_back(this->mvars[i]);
2390 //printf("Pushing %d (%s, %s) to new_mrg2\n",i,fm[i]->to_string().c_str(), mvars[i]->to_string().c_str());
2392 for(i=0;i<new_mrg1->mvars.size();++i)
2393 new_mrg1->mvars[i]->set_tablevar_ref(i);
2394 for(i=0;i<new_mrg2->mvars.size();++i)
2395 new_mrg2->mvars[i]->set_tablevar_ref(i);
2397 // Children created, make this merge them.
2401 tablevar_t *tmp_tblvar = new tablevar_t(new_mrg1->node_name.c_str());
2402 tmp_tblvar->set_range_var("_mrg_var_1");
2403 fm.push_back(tmp_tblvar);
2404 colref_t *tmp_cref = new colref_t("_mrg_var_1",table_layout->get_field_name(merge_fieldpos).c_str());
2405 tmp_cref->set_tablevar_ref(0);
2406 mvars.push_back(tmp_cref);
2408 tmp_tblvar = new tablevar_t(new_mrg2->node_name.c_str());
2409 tmp_tblvar->set_range_var("_mrg_var_2");
2410 fm.push_back(tmp_tblvar);
2411 tmp_cref = new colref_t("_mrg_var_2",table_layout->get_field_name(merge_fieldpos).c_str());
2412 tmp_cref->set_tablevar_ref(1);
2413 mvars.push_back(tmp_cref);
2417 printf("split sources %s (%s %s)\n",node_name.c_str(),new_mrg1->node_name.c_str(),new_mrg2->node_name.c_str());
2418 for(i=0;i<new_mrg1->fm.size();++i)
2419 printf("\tsource %s var %d (%s, %s) \n",new_mrg1->node_name.c_str(),i,new_mrg1->fm[i]->to_string().c_str(), new_mrg1->mvars[i]->to_string().c_str());
2420 for(i=0;i<new_mrg2->fm.size();++i)
2421 printf("\tsource %s var %d (%s, %s) \n",new_mrg2->node_name.c_str(),i,new_mrg2->fm[i]->to_string().c_str(), new_mrg2->mvars[i]->to_string().c_str());
2424 // Recurse and put them together
2425 vector<mrg_qpn *> st1 = new_mrg1->split_sources();
2426 ret.insert(ret.end(), st1.begin(), st1.end());
2427 vector<mrg_qpn *> st2 = new_mrg2->split_sources();
2428 ret.insert(ret.end(), st2.begin(), st2.end());
2430 ret.push_back(this);
2438 //////// Split helper function : resolve interfaces
2440 vector<pair<string,string> > get_ifaces(tablevar_t *table, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2441 vector<pair<string,string> > basic_ifaces;
2443 if(table->get_ifq()){
2444 basic_ifaces= ifdb->eval(table->get_interface(),ierr);
2446 fprintf(stderr,"ERROR, Interface set %s not found.\n",table->get_interface().c_str());
2449 fprintf(stderr,"ERROR, interface definition file didn't parse.\n");
2452 basic_ifaces.push_back(make_pair(table->get_machine(), table->get_interface()));
2455 if(n_virtual_ifaces == 1)
2456 return basic_ifaces;
2458 int stride = n_virtual_ifaces / hfta_parallelism;
2460 vector<pair<string,string> > ifaces;
2462 for(i=0;i<basic_ifaces.size();++i){
2463 string mach = basic_ifaces[i].first;
2464 string iface = basic_ifaces[i].second;
2465 for(s=hfta_idx*stride;s<(hfta_idx+1)*stride;++s){
2466 ifaces.push_back(pair<string, string>(mach,iface+"X"+int_to_string(2*s)));
2474 ///////// Split helper function : compute slack in a generated
2477 void mrg_qpn::resolve_slack(scalarexp_t *t_se, string fname, vector<pair<string, string> > &sources, ifq_t *ifdb, gb_table *gbt){
2481 // Find slack divisor, if any.
2483 long long int slack_divisor = find_temporal_divisor(t_se,gbt, fnm);
2484 if(slack_divisor <= 0){
2489 // find max slack in the iface spec
2490 long long int max_slacker = 0, this_slacker;
2491 string rname = "Slack_"+fnm;
2492 for(s=0;s<sources.size();++s){
2493 string src_machine = sources[s].first;
2494 string src_iface = sources[s].second;
2495 vector<string> slack_vec = ifdb->get_iface_vals(src_machine, src_iface,rname,e,es);
2496 for(v=0;v<slack_vec.size();++v){
2497 if(sscanf(slack_vec[v].c_str(),"%qd",&this_slacker)){
2498 if(this_slacker > max_slacker)
2499 max_slacker = this_slacker;
2504 if(max_slacker <= 0){
2510 long long int the_slack=(long long int)(ceil(((double)max_slacker)/((double)slack_divisor)));
2512 sprintf(tmps,"%lld",the_slack);
2513 literal_t *slack_lit = new literal_t(tmps, LITERAL_LONGINT);
2514 slack = new scalarexp_t(slack_lit);
2518 //------------------------------------------------------------------
2519 // split a node to extract LFTA components.
2521 vector<qp_node *> watch_tbl_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2522 // nothing to do, nothing to split, return copy of self.
2526 vector<qp_node *> ret_vec;
2528 ret_vec.push_back(this);
2534 vector<qp_node *> mrg_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2535 // nothing to do, nothing to split, return copy of self.
2539 vector<qp_node *> ret_vec;
2541 ret_vec.push_back(this);
2546 vector<qp_node *> filter_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2547 vector<qp_node *> ret_vec;
2549 // First check if the query can be pushed to the FTA.
2552 for(s=0;s<select_list.size();s++){
2553 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2556 for(p=0;p<where.size();p++){
2557 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2561 fprintf(stderr,"ERROR, filter join %s is fta-unsafe.\n",node_name.c_str());
2565 // Can it be done in a single lfta?
2566 // Get the set of interfaces it accesses.
2569 vector<string> sel_names;
2570 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2571 if (ifaces.empty()) {
2572 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2576 if(ifaces.size() == 1){
2577 // Single interface, no need to merge.
2579 ret_vec.push_back(this);
2581 for(i=0;i<from.size();i++){
2582 from[i]->set_machine(ifaces[0].first);
2583 from[i]->set_interface(ifaces[0].second);
2584 from[i]->set_ifq(false);
2588 // Multiple interfaces, generate the interface-specific queries plus
2592 vector<string> sel_names;
2593 for(si=0;si<ifaces.size();++si){
2594 filter_join_qpn *fta_node = new filter_join_qpn();
2597 if(ifaces.size()==1)
2598 fta_node->set_node_name( node_name );
2600 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2602 fta_node->set_node_name(new_name);
2604 sel_names.push_back(fta_node->get_node_name());
2608 for(f=0;f<from.size();f++){
2609 fta_node->from.push_back(from[f]->duplicate());
2610 fta_node->from[f]->set_machine(ifaces[si].first);
2611 fta_node->from[f]->set_interface(ifaces[si].second);
2612 fta_node->from[f]->set_ifq(false);
2614 fta_node->temporal_var = temporal_var;
2615 fta_node->temporal_range = temporal_range;
2617 fta_node->use_bloom = use_bloom;
2619 for(s=0;s<select_list.size();s++){
2620 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2623 for(p=0;p<shared_pred.size();p++){
2624 predicate_t *new_pr = dup_pr(shared_pred[p]->pr, NULL);
2625 cnf_elem *new_cnf = new cnf_elem(new_pr);
2626 analyze_cnf(new_cnf);
2627 fta_node->shared_pred.push_back(new_cnf);
2628 fta_node->where.push_back(new_cnf);
2630 for(p=0;p<pred_t0.size();p++){
2631 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2632 cnf_elem *new_cnf = new cnf_elem(new_pr);
2633 analyze_cnf(new_cnf);
2634 fta_node->pred_t0.push_back(new_cnf);
2635 fta_node->where.push_back(new_cnf);
2637 for(p=0;p<pred_t1.size();p++){
2638 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2639 cnf_elem *new_cnf = new cnf_elem(new_pr);
2640 analyze_cnf(new_cnf);
2641 fta_node->pred_t1.push_back(new_cnf);
2642 fta_node->where.push_back(new_cnf);
2644 for(p=0;p<hash_eq.size();p++){
2645 predicate_t *new_pr = dup_pr(hash_eq[p]->pr, NULL);
2646 cnf_elem *new_cnf = new cnf_elem(new_pr);
2647 analyze_cnf(new_cnf);
2648 fta_node->hash_eq.push_back(new_cnf);
2649 fta_node->where.push_back(new_cnf);
2651 for(p=0;p<postfilter.size();p++){
2652 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2653 cnf_elem *new_cnf = new cnf_elem(new_pr);
2654 analyze_cnf(new_cnf);
2655 fta_node->postfilter.push_back(new_cnf);
2656 fta_node->where.push_back(new_cnf);
2659 // Xfer all of the parameters.
2660 // Use existing handle annotations.
2661 vector<string> param_names = param_tbl->get_param_names();
2663 for(pi=0;pi<param_names.size();pi++){
2664 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2665 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2666 param_tbl->handle_access(param_names[pi]));
2668 fta_node->definitions = definitions;
2669 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2670 this->error_code = 3;
2674 ret_vec.push_back(fta_node);
2677 mrg_qpn *mrg_node = new mrg_qpn((filter_join_qpn *)ret_vec[0],
2678 node_name, sel_names,ifaces, ifdb);
2679 ret_vec.push_back(mrg_node);
2690 vector<qp_node *> watch_join_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
2691 vector<qp_node *> ret_vec;
2693 // First check if the query can be pushed to the FTA.
2696 for(s=0;s<select_list.size();s++){
2697 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
2700 for(p=0;p<where.size();p++){
2701 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
2705 fprintf(stderr,"ERROR, watchlist join %s is fta-unsafe.\n",node_name.c_str());
2709 // Can it be done in a single lfta?
2710 // Get the set of interfaces it accesses.
2713 vector<string> sel_names;
2714 vector<pair<string,string> > ifaces = get_ifaces(from[0], ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
2715 if (ifaces.empty()) {
2716 fprintf(stderr,"INTERNAL ERROR in filter_join_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
2720 if(ifaces.size() == 1){
2721 // Single interface, no need to merge.
2723 ret_vec.push_back(this);
2725 // Treat the range vars a bit differently, the 2nd is reading from a _local_ watchlist.
2726 from[0]->set_machine(ifaces[0].first);
2727 from[0]->set_interface(ifaces[0].second);
2728 from[0]->set_ifq(false);
2730 from[1]->set_machine(ifaces[0].first);
2731 from[1]->set_interface("_local_");
2732 from[1]->set_ifq(false);
2736 // Multiple interfaces, generate the interface-specific queries plus
2740 vector<string> sel_names;
2741 for(si=0;si<ifaces.size();++si){
2742 watch_join_qpn *fta_node = new watch_join_qpn();
2745 if(ifaces.size()==1)
2746 fta_node->set_node_name( node_name );
2748 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
2750 fta_node->set_node_name(new_name);
2752 sel_names.push_back(fta_node->get_node_name());
2756 for(f=0;f<from.size();f++){
2757 fta_node->from.push_back(from[f]->duplicate());
2758 fta_node->from[f]->set_machine(ifaces[si].first);
2760 fta_node->from[f]->set_interface(ifaces[si].second);
2762 fta_node->from[f]->set_interface("_local_");
2763 fta_node->from[f]->set_ifq(false);
2766 for(s=0;s<select_list.size();s++){
2767 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
2770 for(p=0;p<pred_t0.size();p++){
2771 predicate_t *new_pr = dup_pr(pred_t0[p]->pr, NULL);
2772 cnf_elem *new_cnf = new cnf_elem(new_pr);
2773 analyze_cnf(new_cnf);
2774 fta_node->pred_t0.push_back(new_cnf);
2775 fta_node->where.push_back(new_cnf);
2777 for(p=0;p<pred_t1.size();p++){
2778 predicate_t *new_pr = dup_pr(pred_t1[p]->pr, NULL);
2779 cnf_elem *new_cnf = new cnf_elem(new_pr);
2780 analyze_cnf(new_cnf);
2781 fta_node->pred_t1.push_back(new_cnf);
2782 fta_node->where.push_back(new_cnf);
2784 for(p=0;p<key_flds.size();p++){ // we've checked that all keys are covered
2785 string k = key_flds[p];
2786 predicate_t *new_pr = dup_pr(hash_eq[k]->pr, NULL);
2787 cnf_elem *new_cnf = new cnf_elem(new_pr);
2788 analyze_cnf(new_cnf);
2789 fta_node->hash_eq[k] = new_cnf;
2790 fta_node->where.push_back(new_cnf);
2792 for(p=0;p<join_filter.size();p++){
2793 predicate_t *new_pr = dup_pr(join_filter[p]->pr, NULL);
2794 cnf_elem *new_cnf = new cnf_elem(new_pr);
2795 analyze_cnf(new_cnf);
2796 fta_node->postfilter.push_back(new_cnf);
2797 fta_node->where.push_back(new_cnf);
2799 for(p=0;p<postfilter.size();p++){
2800 predicate_t *new_pr = dup_pr(postfilter[p]->pr, NULL);
2801 cnf_elem *new_cnf = new cnf_elem(new_pr);
2802 analyze_cnf(new_cnf);
2803 fta_node->postfilter.push_back(new_cnf);
2804 fta_node->where.push_back(new_cnf);
2806 fta_node->key_flds = key_flds;
2808 // Xfer all of the parameters.
2809 // Use existing handle annotations.
2810 vector<string> param_names = param_tbl->get_param_names();
2812 for(pi=0;pi<param_names.size();pi++){
2813 data_type *dt = param_tbl->get_data_type(param_names[pi]);
2814 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
2815 param_tbl->handle_access(param_names[pi]));
2817 fta_node->definitions = definitions;
2818 if(fta_node->resolve_if_params(ifdb, this->err_str)){
2819 this->error_code = 3;
2823 ret_vec.push_back(fta_node);
2826 mrg_qpn *mrg_node = new mrg_qpn((watch_join_qpn *)ret_vec[0],
2827 node_name, sel_names,ifaces, ifdb);
2828 ret_vec.push_back(mrg_node);
2835 // Use to search for unresolved interface param refs in an hfta.
2837 int spx_qpn::count_ifp_refs(set<string> &ifpnames){
2840 for(i=0;i<select_list.size();++i)
2841 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2842 for(i=0;i<where.size();++i)
2843 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2847 int sgah_qpn::count_ifp_refs(set<string> &ifpnames){
2850 for(i=0;i<select_list.size();++i)
2851 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2852 for(i=0;i<where.size();++i)
2853 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2854 for(i=0;i<having.size();++i)
2855 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2856 for(i=0;i<aggr_tbl.size();++i){
2857 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2858 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2860 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2861 for(j=0;j<opl.size();++j)
2862 ret += count_se_ifp_refs(opl[j],ifpnames);
2865 for(i=0;i<gb_tbl.size();++i){
2866 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2872 int rsgah_qpn::count_ifp_refs(set<string> &ifpnames){
2875 for(i=0;i<select_list.size();++i)
2876 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2877 for(i=0;i<where.size();++i)
2878 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2879 for(i=0;i<having.size();++i)
2880 ret += count_pr_ifp_refs(having[i]->pr,ifpnames);
2881 for(i=0;i<closing_when.size();++i)
2882 ret += count_pr_ifp_refs(closing_when[i]->pr,ifpnames);
2883 for(i=0;i<aggr_tbl.size();++i){
2884 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
2885 ret += count_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifpnames);
2887 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
2888 for(j=0;j<opl.size();++j)
2889 ret += count_se_ifp_refs(opl[j],ifpnames);
2892 for(i=0;i<gb_tbl.size();++i){
2893 ret += count_se_ifp_refs(gb_tbl.get_def(i), ifpnames);
2898 int watch_tbl_qpn::count_ifp_refs(set<string> &ifpnames){
2902 int mrg_qpn::count_ifp_refs(set<string> &ifpnames){
2906 int join_eq_hash_qpn::count_ifp_refs(set<string> &ifpnames){
2909 for(i=0;i<select_list.size();++i)
2910 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2911 for(i=0;i<prefilter[0].size();++i)
2912 ret += count_pr_ifp_refs(prefilter[0][i]->pr,ifpnames);
2913 for(i=0;i<prefilter[1].size();++i)
2914 ret += count_pr_ifp_refs(prefilter[1][i]->pr,ifpnames);
2915 for(i=0;i<temporal_eq.size();++i)
2916 ret += count_pr_ifp_refs(temporal_eq[i]->pr,ifpnames);
2917 for(i=0;i<hash_eq.size();++i)
2918 ret += count_pr_ifp_refs(hash_eq[i]->pr,ifpnames);
2919 for(i=0;i<postfilter.size();++i)
2920 ret += count_pr_ifp_refs(postfilter[i]->pr,ifpnames);
2924 int filter_join_qpn::count_ifp_refs(set<string> &ifpnames){
2927 for(i=0;i<select_list.size();++i)
2928 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2929 for(i=0;i<where.size();++i)
2930 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2934 int watch_join_qpn::count_ifp_refs(set<string> &ifpnames){
2937 for(i=0;i<select_list.size();++i)
2938 ret += count_se_ifp_refs(select_list[i]->se,ifpnames);
2939 for(i=0;i<where.size();++i)
2940 ret += count_pr_ifp_refs(where[i]->pr,ifpnames);
2946 // Resolve interface params to string literals
2947 int filter_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2950 string ifname = from[0]->get_interface();
2951 string ifmach = from[0]->get_machine();
2952 for(i=0;i<select_list.size();++i)
2953 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2955 for(i=0;i<where.size();++i)
2956 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2961 int watch_join_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2964 string ifname = from[0]->get_interface();
2965 string ifmach = from[0]->get_machine();
2966 for(i=0;i<select_list.size();++i)
2967 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2969 for(i=0;i<where.size();++i)
2970 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2976 int spx_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2979 string ifname = table_name->get_interface();
2980 string ifmach = table_name->get_machine();
2981 for(i=0;i<select_list.size();++i)
2982 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) )
2984 for(i=0;i<where.size();++i)
2985 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err))
2990 int sgah_qpn::resolve_if_params( ifq_t *ifdb, string &err){
2993 string ifname = table_name->get_interface();
2994 string ifmach = table_name->get_machine();
2996 //printf("Select list has %d elements\n",select_list.size());
2997 for(i=0;i<select_list.size();++i){
2998 //printf("\tresolving elemet %d\n",i);
2999 if( resolve_se_ifp_refs(select_list[i]->se,ifmach, ifname, ifdb, err) ){
3003 for(i=0;i<where.size();++i){
3004 if( resolve_pr_ifp_refs(where[i]->pr,ifmach, ifname, ifdb, err) )
3007 for(i=0;i<having.size();++i){
3008 if( resolve_pr_ifp_refs(having[i]->pr,ifmach, ifname, ifdb, err) )
3011 //printf("aggr list has %d elements\n",select_list.size());
3012 for(i=0;i<aggr_tbl.size();++i){
3013 //printf("\tresolving elemet %d\n",i);
3014 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
3015 //printf("\t\t\tbuiltin\n");
3016 if( resolve_se_ifp_refs(aggr_tbl.get_aggr_se(i),ifmach, ifname, ifdb, err) )
3019 //printf("\t\t\tudaf\n");
3020 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
3021 for(j=0;j<opl.size();++j)
3022 if( resolve_se_ifp_refs(opl[j],ifmach, ifname, ifdb, err) )
3026 for(i=0;i<gb_tbl.size();++i){
3027 if( resolve_se_ifp_refs(gb_tbl.get_def(i), ifmach, ifname, ifdb, err) )
3036 SPLITTING A SELECTION_PROJECTION OPERATOR
3038 An SPX node may reference:
3039 literals, parameters, colrefs, functions, operators
3040 An SPX node may not reference:
3041 group-by variables, aggregates
3043 An SPX node contains
3044 selection list of SEs
3045 where list of CNF predicates
3048 If each selection SE and each where predicate is fta-safe
3049 execute entire operator as an LFTA.
3051 for each predicate in the where clause
3052 if it is fta safe, execute it in the lfta
3053 else, split each SE in the predicate, evaluate the
3054 top-level SEs in the hfta and eval the predicate on that.
3055 For each SE in the se list
3056 Split the SE, eval the high level part, push onto hfta
3060 A SE represents a value which must be computed. The LFTA
3061 must provide sub-values from which the HFTA can compute the
3063 1) the SE is fta-safe
3064 Create an entry in the selection list of the LFTA which is
3065 the SE itself. Reference this LFTA selection list entry in
3066 the HFTA (via a field name assigned to the lfta selection
3068 2) The SE is not fta-safe
3069 Determine the boundary between the fta-safe and the fta-unsafe
3070 portions of the SE. The result is a rooted tree (which is
3071 evaluated at the HFTA) which references sub-SEs (which are
3072 evaluated at the LFTA). Each of the sub-SEs is placed on
3073 the selection list of the LFTA and assigned field names,
3074 the top part is evaluated at the HFTA and references the
3075 sub-SEs through their assigned field names.
3076 The only SEs on the LFTA selection list are those created by
3077 the above mechanism. The collection of assigned field names becomes
3078 the schema of the LFTA.
3080 TODO: insert tablevar names into the colrefs.
3084 vector<qp_node *> spx_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3087 vector<qp_node *> ret_vec;
3089 // If the node reads from a stream, don't split.
3090 // int t = Schema->get_table_ref(table_name->get_schema_name());
3091 int t = table_name->get_schema_ref();
3092 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3094 ret_vec.push_back(this);
3099 // Get the set of interfaces it accesses.
3102 vector<string> sel_names;
3103 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3104 if (ifaces.empty()) {
3105 fprintf(stderr,"INTERNAL ERROR in spx_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3110 // The FTA node, it is always returned.
3112 spx_qpn *fta_node = new spx_qpn();
3113 fta_node->table_name = table_name;
3115 // for colname imputation
3116 // vector<string> fta_flds, stream_flds;
3119 // First check if the query can be pushed to the FTA.
3122 for(s=0;s<select_list.size();s++){
3123 fta_ok &= check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns);
3126 for(p=0;p<where.size();p++){
3127 fta_ok &= check_fta_forbidden_pr(where[p]->pr,NULL, Ext_fcns);
3131 ////////////////////////////////////////////////////////////
3132 // The query can be executed entirely in the FTA.
3135 for(si=0;si<ifaces.size();++si){
3136 fta_node = new spx_qpn();
3139 if(ifaces.size()==1)
3140 fta_node->set_node_name( node_name );
3142 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3144 fta_node->set_node_name(new_name);
3146 sel_names.push_back(fta_node->get_node_name());
3149 fta_node->table_name = table_name->duplicate();
3150 fta_node->table_name->set_machine(ifaces[si].first);
3151 fta_node->table_name->set_interface(ifaces[si].second);
3152 fta_node->table_name->set_ifq(false);
3154 for(s=0;s<select_list.size();s++){
3155 fta_node->select_list.push_back( dup_select(select_list[s], NULL) );
3157 for(p=0;p<where.size();p++){
3158 predicate_t *new_pr = dup_pr(where[p]->pr, NULL);
3159 cnf_elem *new_cnf = new cnf_elem(new_pr);
3160 analyze_cnf(new_cnf);
3162 fta_node->where.push_back(new_cnf);
3165 // Xfer all of the parameters.
3166 // Use existing handle annotations.
3167 vector<string> param_names = param_tbl->get_param_names();
3169 for(pi=0;pi<param_names.size();pi++){
3170 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3171 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3172 param_tbl->handle_access(param_names[pi]));
3174 fta_node->definitions = definitions;
3175 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3176 this->error_code = 3;
3180 ret_vec.push_back(fta_node);
3183 if(ifaces.size() > 1){
3184 spx_qpn *tmp_spx = (spx_qpn *)(ret_vec[0]);
3185 mrg_qpn *mrg_node = new mrg_qpn(tmp_spx,
3186 node_name, sel_names,ifaces, ifdb);
3188 Do not split sources until we are done with optimizations
3189 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3190 for(i=0;i<split_merge.size();++i){
3191 ret_vec.push_back(split_merge[i]);
3193 hfta_returned = split_merge.size();
3195 ret_vec.push_back(mrg_node);
3200 // printf("OK as FTA.\n");
3201 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3206 ////////////////////////////////////////////////////
3207 // The fta must be split. Create a stream node.
3208 // NOTE : I am counting on the single
3209 // table in the from list. (Joins handled in a different operator).
3213 spx_qpn *stream_node = new spx_qpn();
3214 stream_node->set_node_name( node_name );
3215 // Create the tablevar in the stream's FROM clause.
3216 // set the schema name to the name of the LFTA,
3217 // and use the same tablevar name.
3218 stream_node->table_name = new tablevar_t(
3219 ("_fta_"+node_name).c_str()
3221 stream_node->table_name->set_range_var(table_name->get_var_name());
3224 fta_node->set_node_name( "_fta_"+node_name );
3226 // table var names of fta, stream.
3227 string fta_var = fta_node->table_name->get_var_name();
3228 string stream_var = stream_node->table_name->get_var_name();
3230 // Set up select list vector
3231 vector< vector<select_element *> *> select_vec;
3232 select_vec.push_back(&(fta_node->select_list)); // only one child
3235 // Split the select list into its FTA and stream parts.
3236 // If any part of the SE is fta-unsafe, it will return
3237 // a SE to execute at the stream ref'ing SE's evaluated
3238 // at the fta (which are put on the FTA's select list as a side effect).
3239 // If the SE is fta-safe, put it on the fta select list, make
3240 // a ref to it and put the ref on the stream select list.
3241 for(s=0;s<select_list.size();s++){
3242 bool fta_forbidden = false;
3243 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3244 // scalarexp_t *root_se = split_fta_se(
3245 // select_list[s]->se,fta_forbidden, fta_node->select_list, Ext_fcns
3247 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
3248 fta_forbidden, se_src, select_vec, Ext_fcns
3250 // if(fta_forbidden){
3251 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3252 stream_node->select_list.push_back(
3253 new select_element(root_se, select_list[s]->name)
3256 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,root_se,0);
3257 stream_node->select_list.push_back(
3258 new select_element(new_se, select_list[s]->name)
3264 // The WHERE clause has already been split into a set of clauses
3265 // that are ANDED together. For each clause, check if its FTA-safe.
3266 // If not, split its SE's into fta-safe and stream-executing parts,
3267 // then put a clause which ref's the SEs into the stream.
3268 // Else put it into the LFTA.
3269 predicate_t *pr_root;
3271 for(p=0;p<where.size();p++){
3272 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) ){
3273 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3274 // pr_root = split_fta_pr( where[p]->pr, fta_node->select_list, Ext_fcns);
3275 fta_forbidden = true;
3277 pr_root = dup_pr(where[p]->pr, NULL);
3278 fta_forbidden = false;
3280 cnf_elem *cnf_root = new cnf_elem(pr_root);
3281 analyze_cnf(cnf_root);
3284 stream_node->where.push_back(cnf_root);
3286 fta_node->where.push_back(cnf_root);
3292 // Divide the parameters among the stream, FTA.
3293 // Currently : assume that the stream receives all parameters
3294 // and parameter updates, incorporates them, then passes
3295 // all of the parameters to the FTA.
3296 // This will need to change (tables, fta-unsafe types. etc.)
3298 // I will pass on the use_handle_access marking, even
3299 // though the fcn call that requires handle access might
3300 // exist in only one of the parts of the query.
3301 // Parameter manipulation and handle access determination will
3302 // need to be revisited anyway.
3303 vector<string> param_names = param_tbl->get_param_names();
3305 for(pi=0;pi<param_names.size();pi++){
3306 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3307 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3308 param_tbl->handle_access(param_names[pi]));
3309 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3310 param_tbl->handle_access(param_names[pi]));
3313 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3314 stream_node->definitions = definitions;
3316 // Now split by interfaces
3317 if(ifaces.size() > 1){
3318 for(si=0;si<ifaces.size();++si){
3319 spx_qpn *subq_node = new spx_qpn();
3321 // Name the subquery
3322 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3324 subq_node->set_node_name( new_name) ;
3325 sel_names.push_back(subq_node->get_node_name());
3328 subq_node->table_name = fta_node->table_name->duplicate();
3329 subq_node->table_name->set_machine(ifaces[si].first);
3330 subq_node->table_name->set_interface(ifaces[si].second);
3331 subq_node->table_name->set_ifq(false);
3333 for(s=0;s<fta_node->select_list.size();s++){
3334 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3336 for(p=0;p<fta_node->where.size();p++){
3337 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3338 cnf_elem *new_cnf = new cnf_elem(new_pr);
3339 analyze_cnf(new_cnf);
3341 subq_node->where.push_back(new_cnf);
3343 // Xfer all of the parameters.
3344 // Use existing handle annotations.
3345 vector<string> param_names = param_tbl->get_param_names();
3347 for(pi=0;pi<param_names.size();pi++){
3348 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3349 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3350 param_tbl->handle_access(param_names[pi]));
3352 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3353 this->error_code = 3;
3356 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3358 ret_vec.push_back(subq_node);
3361 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
3362 fta_node->node_name, sel_names, ifaces, ifdb);
3364 Do not split sources until we are done with optimizations
3365 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3366 for(i=0;i<split_merge.size();++i){
3367 ret_vec.push_back(split_merge[i]);
3370 ret_vec.push_back(mrg_node);
3371 ret_vec.push_back(stream_node);
3372 hfta_returned = 1/*split_merge.size()*/ + 1;
3375 fta_node->table_name->set_machine(ifaces[0].first);
3376 fta_node->table_name->set_interface(ifaces[0].second);
3377 fta_node->table_name->set_ifq(false);
3378 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3379 this->error_code = 3;
3382 ret_vec.push_back(fta_node);
3383 ret_vec.push_back(stream_node);
3387 // printf("FTA node is:\n%s\n\n",fta_node->to_query_string().c_str() );
3388 // printf("Stream node is:\n%s\n\n",stream_node->to_query_string().c_str() );
3396 Splitting a aggregation+sampling operator.
3397 right now, return an error if any splitting is required.
3400 vector<qp_node *> sgahcwcb_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3404 vector<qp_node *> ret_vec;
3405 int s, p, g, a, o, i;
3408 vector<string> fta_flds, stream_flds;
3410 // If the node reads from a stream, don't split.
3411 // int t = Schema->get_table_ref(table_name->get_schema_name());
3412 int t = table_name->get_schema_ref();
3413 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3414 ret_vec.push_back(this);
3418 fprintf(stderr,"ERROR : cannot split a sampling operator (not yet implemented).\n");
3428 Splitting a running aggregation operator.
3429 The code is almost identical to that of the the sgah operator
3431 - there is no lfta-only option.
3432 - the stream node is rsagh_qpn (lfta is sgah or spx)
3433 - need to handle the closing when (similar to having)
3436 vector<qp_node *> rsgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
3440 vector<qp_node *> ret_vec;
3441 int s, p, g, a, o, i;
3444 vector<string> fta_flds, stream_flds;
3446 // If the node reads from a stream, don't split.
3447 // int t = Schema->get_table_ref(table_name->get_schema_name());
3448 int t = table_name->get_schema_ref();
3449 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
3450 ret_vec.push_back(this);
3454 // Get the set of interfaces it accesses.
3456 vector<string> sel_names;
3457 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
3458 if (ifaces.empty()) {
3459 fprintf(stderr,"INTERNAL ERROR in rsgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
3466 //////////////////////////////////////////////////////////////
3467 /// Split into lfta, hfta.
3469 // A rsgah node must always be split,
3470 // if for no other reason than to complete the
3471 // partial aggregation.
3473 // First, determine if the query can be spit into aggr/aggr,
3474 // or if it must be selection/aggr.
3475 // Splitting into selection/aggr is allowed only
3476 // if select_lfta is set.
3479 bool select_allowed = definitions.count("select_lfta")>0;
3480 bool select_rqd = false;
3482 set<int> unsafe_gbvars; // for processing where clause
3483 for(g=0;g<gb_tbl.size();g++){
3484 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
3485 if(!select_allowed){
3486 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
3487 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
3489 this->error_code = 1;
3490 this->err_str = tmpstr;
3494 unsafe_gbvars.insert(g);
3499 // Verify that the SEs in the aggregate definitions are fta-safe
3500 for(a=0;a<aggr_tbl.size();++a){
3501 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
3502 if(ase != NULL){ // COUNT(*) does not have a SE.
3503 if(!select_allowed){
3504 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
3505 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
3506 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
3508 this->error_code = 1;
3509 this->err_str = tmpstr;
3518 // Verify that all of the ref'd UDAFs can be split.
3520 for(a=0;a<aggr_tbl.size();++a){
3521 if(! aggr_tbl.is_builtin(a)){
3522 int afcn = aggr_tbl.get_fcn_id(a);
3523 int super_id = Ext_fcns->get_superaggr_id(afcn);
3524 int sub_id = Ext_fcns->get_subaggr_id(afcn);
3525 if(super_id < 0 || sub_id < 0){
3526 if(!select_allowed){
3527 this->err_str += "ERROR in rsgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
3528 this->error_code = 1;
3537 for(p=0;p<where.size();p++){
3538 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
3539 if(!select_allowed){
3540 sprintf(tmpstr,"ERROR in rsgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
3541 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
3543 this->error_code = 1;
3544 this->err_str = tmpstr;
3555 /////////////////////////////////////////////////////
3556 // Split into aggr/aggr.
3562 sgah_qpn *fta_node = new sgah_qpn();
3563 fta_node->table_name = table_name;
3564 fta_node->set_node_name( "_fta_"+node_name );
3565 fta_node->table_name->set_range_var(table_name->get_var_name());
3568 rsgah_qpn *stream_node = new rsgah_qpn();
3569 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3570 stream_node->set_node_name( node_name );
3571 stream_node->table_name->set_range_var(table_name->get_var_name());
3573 // First, process the group-by variables.
3574 // The fta must supply the values of all the gbvars.
3575 // If a gb is computed, the computation must be
3576 // performed at the FTA, so the SE must be FTA-safe.
3577 // Nice side effect : the gbvar table contains
3578 // matching entries for the original query, the lfta query,
3579 // and the hfta query. So gbrefs in the new queries are set
3580 // correctly just by inheriting the gbrefs from the old query.
3581 // If this property changed, I'll need translation tables.
3584 for(g=0;g<gb_tbl.size();g++){
3585 // Insert the gbvar into the lfta.
3586 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
3587 fta_node->gb_tbl.add_gb_var(
3588 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
3591 // Insert a ref to the value of the gbvar into the lfta select list.
3592 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
3593 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
3594 gbvar_fta->set_gb_ref(g);
3595 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
3596 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
3598 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
3599 gbvar_stream->set_gb_ref(-1); // used as GBvar def
3600 stream_node->gb_tbl.add_gb_var(
3601 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
3606 // SEs in the aggregate definitions.
3607 // They are all safe, so split them up for later processing.
3608 map<int, scalarexp_t *> hfta_aggr_se;
3609 for(a=0;a<aggr_tbl.size();++a){
3610 split_fta_aggr( &(aggr_tbl), a,
3611 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
3612 fta_node->select_list,
3619 // Next, the select list.
3621 for(s=0;s<select_list.size();s++){
3622 bool fta_forbidden = false;
3623 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3624 stream_node->select_list.push_back(
3625 new select_element(root_se, select_list[s]->name));
3630 // All the predicates in the where clause must execute
3633 for(p=0;p<where.size();p++){
3634 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
3635 cnf_elem *new_cnf = new cnf_elem(new_pr);
3636 analyze_cnf(new_cnf);
3638 fta_node->where.push_back(new_cnf);
3641 // All of the predicates in the having clause must
3642 // execute in the stream node.
3644 for(p=0;p<having.size();p++){
3645 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3646 cnf_elem *cnf_root = new cnf_elem(pr_root);
3647 analyze_cnf(cnf_root);
3649 stream_node->having.push_back(cnf_root);
3652 // All of the predicates in the closing when clause must
3653 // execute in the stream node.
3655 for(p=0;p<closing_when.size();p++){
3656 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3657 cnf_elem *cnf_root = new cnf_elem(pr_root);
3658 analyze_cnf(cnf_root);
3660 stream_node->closing_when.push_back(cnf_root);
3664 // Divide the parameters among the stream, FTA.
3665 // Currently : assume that the stream receives all parameters
3666 // and parameter updates, incorporates them, then passes
3667 // all of the parameters to the FTA.
3668 // This will need to change (tables, fta-unsafe types. etc.)
3670 // I will pass on the use_handle_access marking, even
3671 // though the fcn call that requires handle access might
3672 // exist in only one of the parts of the query.
3673 // Parameter manipulation and handle access determination will
3674 // need to be revisited anyway.
3675 vector<string> param_names = param_tbl->get_param_names();
3677 for(pi=0;pi<param_names.size();pi++){
3678 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3679 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3680 param_tbl->handle_access(param_names[pi]));
3681 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3682 param_tbl->handle_access(param_names[pi]));
3684 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3685 stream_node->definitions = definitions;
3687 // Now split by interfaces XXXX
3688 if(ifaces.size() > 1){
3689 for(si=0;si<ifaces.size();++si){
3690 sgah_qpn *subq_node = new sgah_qpn();
3692 // Name the subquery
3693 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3695 subq_node->set_node_name( new_name) ;
3696 sel_names.push_back(subq_node->get_node_name());
3699 subq_node->table_name = fta_node->table_name->duplicate();
3700 subq_node->table_name->set_machine(ifaces[si].first);
3701 subq_node->table_name->set_interface(ifaces[si].second);
3702 subq_node->table_name->set_ifq(false);
3705 for(g=0;g<fta_node->gb_tbl.size();g++){
3706 // Insert the gbvar into the lfta.
3707 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
3708 subq_node->gb_tbl.add_gb_var(
3709 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
3713 // Insert the aggregates
3714 for(a=0;a<fta_node->aggr_tbl.size();++a){
3715 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
3718 for(s=0;s<fta_node->select_list.size();s++){
3719 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3721 for(p=0;p<fta_node->where.size();p++){
3722 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3723 cnf_elem *new_cnf = new cnf_elem(new_pr);
3724 analyze_cnf(new_cnf);
3726 subq_node->where.push_back(new_cnf);
3728 for(p=0;p<fta_node->having.size();p++){
3729 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
3730 cnf_elem *new_cnf = new cnf_elem(new_pr);
3731 analyze_cnf(new_cnf);
3733 subq_node->having.push_back(new_cnf);
3735 // Xfer all of the parameters.
3736 // Use existing handle annotations.
3737 vector<string> param_names = param_tbl->get_param_names();
3739 for(pi=0;pi<param_names.size();pi++){
3740 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3741 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3742 param_tbl->handle_access(param_names[pi]));
3744 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3745 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3746 this->error_code = 3;
3750 ret_vec.push_back(subq_node);
3753 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
3754 fta_node->node_name, sel_names, ifaces, ifdb);
3757 Do not split sources until we are done with optimizations
3758 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
3759 for(i=0;i<split_merge.size();++i){
3760 ret_vec.push_back(split_merge[i]);
3763 ret_vec.push_back(mrg_node);
3764 ret_vec.push_back(stream_node);
3765 hfta_returned = 1/*split_merge.size()*/+1;
3768 fta_node->table_name->set_machine(ifaces[0].first);
3769 fta_node->table_name->set_interface(ifaces[0].second);
3770 fta_node->table_name->set_ifq(false);
3771 if(fta_node->resolve_if_params(ifdb, this->err_str)){
3772 this->error_code = 3;
3775 ret_vec.push_back(fta_node);
3776 ret_vec.push_back(stream_node);
3781 // ret_vec.push_back(fta_node);
3782 // ret_vec.push_back(stream_node);
3789 /////////////////////////////////////////////////////////////////////
3790 /// Split into selection LFTA, aggregation HFTA.
3792 spx_qpn *fta_node = new spx_qpn();
3793 fta_node->table_name = table_name;
3794 fta_node->set_node_name( "_fta_"+node_name );
3795 fta_node->table_name->set_range_var(table_name->get_var_name());
3798 rsgah_qpn *stream_node = new rsgah_qpn();
3799 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
3800 stream_node->set_node_name( node_name );
3801 stream_node->table_name->set_range_var(table_name->get_var_name());
3804 vector< vector<select_element *> *> select_vec;
3805 select_vec.push_back(&(fta_node->select_list)); // only one child
3807 // Process the gbvars. Split their defining SEs.
3808 for(g=0;g<gb_tbl.size();g++){
3809 bool fta_forbidden = false;
3810 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3812 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
3813 fta_forbidden, se_src, select_vec, Ext_fcns
3815 // if(fta_forbidden) (
3816 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3817 stream_node->gb_tbl.add_gb_var(
3818 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
3821 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
3822 stream_node->gb_tbl.add_gb_var(
3823 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
3828 // Process the aggregate table.
3829 // Copy to stream, split the SEs.
3830 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
3831 for(a=0;a<aggr_tbl.size();++a){
3833 if(aggr_tbl.is_builtin(a)){
3834 if(aggr_tbl.is_star_aggr(a)){
3835 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
3836 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
3838 bool fta_forbidden = false;
3839 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3841 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3842 fta_forbidden, se_src, select_vec, Ext_fcns
3844 // if(fta_forbidden) (
3845 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3846 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
3847 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
3849 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3850 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
3851 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
3854 hse->set_data_type(aggr_tbl.get_data_type(a));
3855 hse->set_aggr_id(a);
3856 hfta_aggr_se[a]=hse;
3858 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
3859 vector<scalarexp_t *> new_opl;
3860 for(o=0;o<opl.size();++o){
3861 bool fta_forbidden = false;
3862 int se_src = SPLIT_FTAVEC_NOTBLVAR;
3863 scalarexp_t *agg_se = split_ftavec_se( opl[o],
3864 fta_forbidden, se_src, select_vec, Ext_fcns
3866 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
3867 // fta_forbidden, se_src, select_vec, Ext_fcns
3869 // if(fta_forbidden) (
3870 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
3871 new_opl.push_back(agg_se);
3873 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
3874 new_opl.push_back(new_se);
3877 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
3878 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
3879 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
3880 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
3881 hse->set_aggr_id(a);
3882 hfta_aggr_se[a]=hse;
3887 // Process the WHERE clause.
3888 // If it is fta-safe AND it refs only fta-safe gbvars,
3889 // then expand the gbvars and put it into the lfta.
3890 // Else, split it into an hfta predicate ref'ing
3891 // se's computed partially in the lfta.
3893 predicate_t *pr_root;
3895 for(p=0;p<where.size();p++){
3896 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
3897 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
3898 fta_forbidden = true;
3900 pr_root = dup_pr(where[p]->pr, NULL);
3901 expand_gbvars_pr(pr_root, gb_tbl);
3902 fta_forbidden = false;
3904 cnf_elem *cnf_root = new cnf_elem(pr_root);
3905 analyze_cnf(cnf_root);
3908 stream_node->where.push_back(cnf_root);
3910 fta_node->where.push_back(cnf_root);
3915 // Process the Select clause, rehome it on the
3917 for(s=0;s<select_list.size();s++){
3918 bool fta_forbidden = false;
3919 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
3920 stream_node->select_list.push_back(
3921 new select_element(root_se, select_list[s]->name));
3925 // Process the Having clause
3927 // All of the predicates in the having clause must
3928 // execute in the stream node.
3930 for(p=0;p<having.size();p++){
3931 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
3932 cnf_elem *cnf_root = new cnf_elem(pr_root);
3933 analyze_cnf(cnf_root);
3935 stream_node->having.push_back(cnf_root);
3937 // Same for closing when
3938 for(p=0;p<closing_when.size();p++){
3939 predicate_t *pr_root=rehome_fta_pr(closing_when[p]->pr,&hfta_aggr_se);
3940 cnf_elem *cnf_root = new cnf_elem(pr_root);
3941 analyze_cnf(cnf_root);
3943 stream_node->closing_when.push_back(cnf_root);
3947 // Handle parameters and a few last details.
3948 vector<string> param_names = param_tbl->get_param_names();
3950 for(pi=0;pi<param_names.size();pi++){
3951 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3952 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3953 param_tbl->handle_access(param_names[pi]));
3954 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3955 param_tbl->handle_access(param_names[pi]));
3958 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
3959 stream_node->definitions = definitions;
3961 // Now split by interfaces YYYY
3962 if(ifaces.size() > 1){
3963 for(si=0;si<ifaces.size();++si){
3964 spx_qpn *subq_node = new spx_qpn();
3966 // Name the subquery
3967 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
3969 subq_node->set_node_name( new_name) ;
3970 sel_names.push_back(subq_node->get_node_name());
3973 subq_node->table_name = fta_node->table_name->duplicate();
3974 subq_node->table_name->set_machine(ifaces[si].first);
3975 subq_node->table_name->set_interface(ifaces[si].second);
3976 subq_node->table_name->set_ifq(false);
3978 for(s=0;s<fta_node->select_list.size();s++){
3979 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
3981 for(p=0;p<fta_node->where.size();p++){
3982 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
3983 cnf_elem *new_cnf = new cnf_elem(new_pr);
3984 analyze_cnf(new_cnf);
3986 subq_node->where.push_back(new_cnf);
3988 // Xfer all of the parameters.
3989 // Use existing handle annotations.
3990 vector<string> param_names = param_tbl->get_param_names();
3992 for(pi=0;pi<param_names.size();pi++){
3993 data_type *dt = param_tbl->get_data_type(param_names[pi]);
3994 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
3995 param_tbl->handle_access(param_names[pi]));
3997 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
3998 if(subq_node->resolve_if_params(ifdb, this->err_str)){
3999 this->error_code = 3;
4003 ret_vec.push_back(subq_node);
4006 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4007 fta_node->node_name, sel_names, ifaces, ifdb);
4009 Do not split sources until we are done with optimizations
4010 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4011 for(i=0;i<split_merge.size();++i){
4012 ret_vec.push_back(split_merge[i]);
4015 ret_vec.push_back(mrg_node);
4016 ret_vec.push_back(stream_node);
4017 hfta_returned = 1/*split_merge.size()*/+1;
4020 fta_node->table_name->set_machine(ifaces[0].first);
4021 fta_node->table_name->set_interface(ifaces[0].second);
4022 fta_node->table_name->set_ifq(false);
4023 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4024 this->error_code = 3;
4027 ret_vec.push_back(fta_node);
4028 ret_vec.push_back(stream_node);
4038 Splitting an aggregation operator
4040 An aggregation operator can reference
4041 literals, parameters, colrefs, group-by vars, aggregates,
4042 operators, functions
4044 an aggregation contains
4045 A selection list of SEs
4046 A where list of predicates
4047 A list group-by variable definition
4048 A list of aggregates to be computed
4049 A HAVING list of predicates.
4051 Aggregation involves two phases:
4052 1) given an input tuple, determine if it satisfies all of
4053 the WHERE predicates. If so, compute the group.
4054 Look up the group, update its aggregates.
4055 2) given a closed group and its aggregates, determine
4056 if these values satisfy all of the HAVING predicates.
4057 If so, evaluate the SEs on the selection list from the
4058 group and its aggregates.
4059 The two-phase nature of aggregation places restrictions on
4060 what can be referenced by different components of the operator
4061 (in addition to functions and operators).
4062 - group-by variables : literals, parameters, colrefs
4063 - WHERE predicates : group-by vars, literals, params, colrefs
4064 - HAVING predicates : group-by vars, literals, params, aggregates
4065 - Selection list SEs : group-by vars, literals, params, aggregates
4067 Splitting an aggregation operator into an LFTA/HFTA part
4068 involves performing partial aggregation at the LFTA and
4069 completing the aggregation at the HFTA.
4070 - given a tuple, the LFTA part evaluates the WHERE clause,
4071 and if it is satisfied, computes the group. lookup the group
4072 and update the aggregates. output the group and its partial
4074 - Given a partial aggregate from the LFTA, look up the group and
4075 update its aggregates. When the group is closed, evalute
4076 the HAVING clause and the SEs on the selection list.
4077 THEREFORE the selection list of the LFTA must consist of the
4078 group-by variables and the set of (bare) subaggregate values
4079 necessary to compute the super aggregates.
4080 Unlike the case with the SPX operator, the SE splitting point
4081 is at the GBvar and the aggregate value level.
4084 For each group-by variable
4085 Put the GB variable definition in the LFTA GBVAR list.
4086 Put the GBVAR in the LFTA selection list (as an SE).
4087 Put a reference to that GBVAR in the HFTA GBVAR list.
4089 Split the aggregate into a superaggregate and a subaggregate.
4090 The SE of the superaggregate references the subaggregate value.
4091 (this will need modifications for MF aggregation)
4092 For each SE in the selection list, HAVING predicate
4093 Make GBVAR references point to the new GBVAR
4094 make the aggregate value references point to the new aggregates.
4096 SEs are not so much split as their ref's are changed.
4098 TODO: insert tablevar names into the colrefs.
4103 vector<qp_node *> sgah_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4107 vector<qp_node *> ret_vec;
4108 int s, p, g, a, o, i;
4111 vector<string> fta_flds, stream_flds;
4113 // If the node reads from a stream, don't split.
4114 // int t = Schema->get_table_ref(table_name->get_schema_name());
4115 int t = table_name->get_schema_ref();
4116 if(Schema->get_schema_type(t) != PROTOCOL_SCHEMA){
4117 ret_vec.push_back(this);
4121 // Get the set of interfaces it accesses.
4123 vector<string> sel_names;
4124 vector<pair<string,string> > ifaces = get_ifaces(table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
4125 if (ifaces.empty()) {
4126 fprintf(stderr,"INTERNAL ERROR in sgah_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
4132 //////////////////////////////////////////////
4133 // Is this LFTA-only?
4134 if(definitions.count("lfta_aggregation")>0){
4135 // Yes. Ensure that everything is lfta-safe.
4137 // Check only one interface is accessed.
4138 if(ifaces.size()>1){
4139 this->err_str = "ERROR, group-by query "+node_name+" is lfta-only, but it accesses more than one interface:\n";
4140 for(si=0;si<ifaces.size();++si)
4141 this->err_str += "\t"+ifaces[si].first+"."+ifaces[si].second+"\n";
4142 this->error_code = 2;
4146 // Check the group-by attributes
4147 for(g=0;g<gb_tbl.size();g++){
4148 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4149 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition and the query is lfta-only (%s).\n",
4150 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4152 this->error_code = 1;
4153 this->err_str = tmpstr;
4158 // Verify that the SEs in the aggregate definitions are fta-safe
4159 for(a=0;a<aggr_tbl.size();++a){
4160 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4161 if(ase != NULL){ // COUNT(*) does not have a SE.
4162 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4163 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe scalar expression and the query is lfta-only (%s).\n",
4164 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4166 this->error_code = 1;
4167 this->err_str = tmpstr;
4171 if(! aggr_tbl.fta_legal(a,Ext_fcns)){
4172 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4173 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has LFTA-unsafe aggregate and the query is lfta-only (%s).\n",
4174 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4176 this->error_code = 1;
4177 this->err_str = tmpstr;
4183 // Ensure that all the aggregates are fta-safe ....
4187 for(s=0;s<select_list.size();s++){
4188 if(! check_fta_forbidden_se(select_list[s]->se,NULL, Ext_fcns)){
4189 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4190 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4192 this->error_code = 1;
4193 this->err_str = tmpstr;
4200 for(p=0;p<where.size();p++){
4201 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4202 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be LFTA-safe and the query is lfta-only (%s).\n",
4203 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4205 this->error_code = 1;
4206 this->err_str = tmpstr;
4213 if(having.size()>0){
4214 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : the query is lfta-only, so it can't have a HAVING clause.(%s).\n",
4215 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4217 this->error_code = 1;
4218 this->err_str = tmpstr;
4221 // The query is lfta safe, return it.
4224 ret_vec.push_back(this);
4228 //////////////////////////////////////////////////////////////
4229 /// Split into lfta, hfta.
4231 // A sgah node must always be split,
4232 // if for no other reason than to complete the
4233 // partial aggregation.
4235 // First, determine if the query can be spit into aggr/aggr,
4236 // or if it must be selection/aggr.
4237 // Splitting into selection/aggr is allowed only
4238 // if select_lfta is set.
4241 bool select_allowed = definitions.count("select_lfta")>0;
4242 bool select_rqd = false;
4244 set<int> unsafe_gbvars; // for processing where clause
4245 for(g=0;g<gb_tbl.size();g++){
4246 if(! check_fta_forbidden_se(gb_tbl.get_def(g), &aggr_tbl, Ext_fcns)){
4247 if(!select_allowed){
4248 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : group by attribute (%s) has LFTA-unsafe definition but select_lfta is not enabled (%s).\n",
4249 gb_tbl.get_name(g).c_str(), se_to_query_string(gb_tbl.get_def(g), &aggr_tbl).c_str()
4251 this->error_code = 1;
4252 this->err_str = tmpstr;
4256 unsafe_gbvars.insert(g);
4261 // Verify that the SEs in the aggregate definitions are fta-safe
4262 for(a=0;a<aggr_tbl.size();++a){
4263 scalarexp_t *ase = aggr_tbl.get_aggr_se(a);
4264 if(ase != NULL){ // COUNT(*) does not have a SE.
4265 if(!select_allowed){
4266 if(! check_fta_forbidden_se(aggr_tbl.get_aggr_se(a), &aggr_tbl, Ext_fcns)){
4267 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : aggregate (%s) has FTA-unsafe scalar expression but select_lfta is not enabled (%s).\n",
4268 aggr_tbl.get_op(a).c_str(), se_to_query_string(aggr_tbl.get_aggr_se(a), &aggr_tbl).c_str()
4270 this->error_code = 1;
4271 this->err_str = tmpstr;
4280 // Verify that all of the ref'd UDAFs can be split.
4282 for(a=0;a<aggr_tbl.size();++a){
4283 if(! aggr_tbl.is_builtin(a)){
4284 int afcn = aggr_tbl.get_fcn_id(a);
4285 int super_id = Ext_fcns->get_superaggr_id(afcn);
4286 int sub_id = Ext_fcns->get_subaggr_id(afcn);
4287 if(super_id < 0 || sub_id < 0){
4288 if(!select_allowed){
4289 this->err_str += "ERROR in sgah_qpn::split_node_for_fta : UDAF "+aggr_tbl.get_op(a)+" doesn't have sub/super UDAFS so it can't be split, but select_lfta is not enabled.\n";
4290 this->error_code = 1;
4299 for(p=0;p<where.size();p++){
4300 if(! check_fta_forbidden_pr(where[p]->pr, &aggr_tbl, Ext_fcns) ){
4301 if(!select_allowed){
4302 sprintf(tmpstr,"ERROR in sgah_qpn::split_node_for_fta : all of the WHERE predicate must be FTA-safe, but select_lfta is not enabled (%s).\n",
4303 pred_to_query_str(where[p]->pr,&aggr_tbl).c_str()
4305 this->error_code = 1;
4306 this->err_str = tmpstr;
4317 /////////////////////////////////////////////////////
4318 // Split into aggr/aggr.
4324 sgah_qpn *fta_node = new sgah_qpn();
4325 fta_node->table_name = table_name;
4326 fta_node->set_node_name( "_fta_"+node_name );
4327 fta_node->table_name->set_range_var(table_name->get_var_name());
4330 sgah_qpn *stream_node = new sgah_qpn();
4331 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4332 stream_node->set_node_name( node_name );
4333 stream_node->table_name->set_range_var(table_name->get_var_name());
4335 // allowed stream disorder. Default is 2,
4336 // can override with max_lfta_disorder setting.
4337 // Also limit the hfta disorder, set to lfta disorder + 1.
4338 // can override with max_hfta_disorder.
4340 fta_node->lfta_disorder = 2;
4341 if(this->get_val_of_def("max_lfta_disorder") != ""){
4342 int d = atoi(this->get_val_of_def("max_lfta_disorder").c_str() );
4344 fprintf(stderr,"Warning, max_lfta_disorder in node %s is %d, must be at least 1, ignoring.\n",node_name.c_str(), d);
4346 fta_node->lfta_disorder = d;
4347 printf("node %s setting lfta_disorder = %d\n",node_name.c_str(),fta_node->lfta_disorder);
4350 if(fta_node->lfta_disorder > 1)
4351 stream_node->hfta_disorder = fta_node->lfta_disorder + 1;
4353 stream_node->hfta_disorder = 1;
4355 if(this->get_val_of_def("max_hfta_disorder") != ""){
4356 int d = atoi(this->get_val_of_def("max_hfta_disorder").c_str() );
4357 if(d<fta_node->lfta_disorder){
4358 fprintf(stderr,"Warning, max_hfta_disorder in node %s is %d, must be at least the max lfta disorder %d, ignoring.\n",node_name.c_str(), d,fta_node->lfta_disorder);
4360 fta_node->lfta_disorder = d;
4362 if(fta_node->lfta_disorder < fta_node->hfta_disorder){
4363 fta_node->hfta_disorder = fta_node->lfta_disorder + 1;
4367 // First, process the group-by variables.
4368 // The fta must supply the values of all the gbvars.
4369 // If a gb is computed, the computation must be
4370 // performed at the FTA, so the SE must be FTA-safe.
4371 // Nice side effect : the gbvar table contains
4372 // matching entries for the original query, the lfta query,
4373 // and the hfta query. So gbrefs in the new queries are set
4374 // correctly just by inheriting the gbrefs from the old query.
4375 // If this property changed, I'll need translation tables.
4378 for(g=0;g<gb_tbl.size();g++){
4379 // Insert the gbvar into the lfta.
4380 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
4381 fta_node->gb_tbl.add_gb_var(
4382 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
4385 // Insert a ref to the value of the gbvar into the lfta select list.
4386 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
4387 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
4388 gbvar_fta->set_gb_ref(g);
4389 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
4390 scalarexp_t *gbvar_stream = make_fta_se_ref(fta_node->select_list, gbvar_fta,0);
4392 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
4393 gbvar_stream->set_gb_ref(-1); // used as GBvar def
4394 stream_node->gb_tbl.add_gb_var(
4395 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
4398 // multiple aggregation patterns, if any, go with the hfta
4399 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4401 // SEs in the aggregate definitions.
4402 // They are all safe, so split them up for later processing.
4403 map<int, scalarexp_t *> hfta_aggr_se;
4404 for(a=0;a<aggr_tbl.size();++a){
4405 split_fta_aggr( &(aggr_tbl), a,
4406 &(stream_node->aggr_tbl), &(fta_node->aggr_tbl) ,
4407 fta_node->select_list,
4415 for(ii=0;ii<fta_flds.size() || ii < fta_node->select_list.size();++ii){
4416 if(ii<fta_flds.size())
4417 printf("\t%s : ",fta_flds[ii].c_str());
4420 if(ii<fta_node->select_list.size())
4421 printf("%s\n",fta_node->select_list[ii]->to_string().c_str());
4425 printf("hfta aggregates are:");
4426 for(ii=0;ii<stream_node->aggr_tbl.size();++ii){
4427 printf(" %s",stream_node->aggr_tbl.get_op(ii).c_str());
4429 printf("\nlfta aggregates are:");
4430 for(ii=0;ii<fta_node->aggr_tbl.size();++ii){
4431 printf(" %s",fta_node->aggr_tbl.get_op(ii).c_str());
4439 // Next, the select list.
4441 for(s=0;s<select_list.size();s++){
4442 bool fta_forbidden = false;
4443 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4444 stream_node->select_list.push_back(
4445 new select_element(root_se, select_list[s]->name));
4450 // All the predicates in the where clause must execute
4453 for(p=0;p<where.size();p++){
4454 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
4455 cnf_elem *new_cnf = new cnf_elem(new_pr);
4456 analyze_cnf(new_cnf);
4458 fta_node->where.push_back(new_cnf);
4461 // All of the predicates in the having clause must
4462 // execute in the stream node.
4464 for(p=0;p<having.size();p++){
4465 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4466 cnf_elem *cnf_root = new cnf_elem(pr_root);
4467 analyze_cnf(cnf_root);
4469 stream_node->having.push_back(cnf_root);
4473 // Divide the parameters among the stream, FTA.
4474 // Currently : assume that the stream receives all parameters
4475 // and parameter updates, incorporates them, then passes
4476 // all of the parameters to the FTA.
4477 // This will need to change (tables, fta-unsafe types. etc.)
4479 // I will pass on the use_handle_access marking, even
4480 // though the fcn call that requires handle access might
4481 // exist in only one of the parts of the query.
4482 // Parameter manipulation and handle access determination will
4483 // need to be revisited anyway.
4484 vector<string> param_names = param_tbl->get_param_names();
4486 for(pi=0;pi<param_names.size();pi++){
4487 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4488 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4489 param_tbl->handle_access(param_names[pi]));
4490 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4491 param_tbl->handle_access(param_names[pi]));
4493 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4494 stream_node->definitions = definitions;
4496 // Now split by interfaces XXXX
4497 if(ifaces.size() > 1){
4498 for(si=0;si<ifaces.size();++si){
4499 sgah_qpn *subq_node = new sgah_qpn();
4501 // Name the subquery
4502 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4504 subq_node->set_node_name( new_name) ;
4505 sel_names.push_back(subq_node->get_node_name());
4508 subq_node->table_name = fta_node->table_name->duplicate();
4509 subq_node->table_name->set_machine(ifaces[si].first);
4510 subq_node->table_name->set_interface(ifaces[si].second);
4511 subq_node->table_name->set_ifq(false);
4514 for(g=0;g<fta_node->gb_tbl.size();g++){
4515 // Insert the gbvar into the lfta.
4516 scalarexp_t *gbvar_def = dup_se(fta_node->gb_tbl.get_def(g), NULL);
4517 subq_node->gb_tbl.add_gb_var(
4518 fta_node->gb_tbl.get_name(g), fta_node->gb_tbl.get_tblvar_ref(g), gbvar_def, fta_node->gb_tbl.get_reftype(g)
4522 // Insert the aggregates
4523 for(a=0;a<fta_node->aggr_tbl.size();++a){
4524 subq_node->aggr_tbl.add_aggr(fta_node->aggr_tbl.duplicate(a));
4527 for(s=0;s<fta_node->select_list.size();s++){
4528 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4530 for(p=0;p<fta_node->where.size();p++){
4531 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4532 cnf_elem *new_cnf = new cnf_elem(new_pr);
4533 analyze_cnf(new_cnf);
4535 subq_node->where.push_back(new_cnf);
4537 for(p=0;p<fta_node->having.size();p++){
4538 predicate_t *new_pr = dup_pr(fta_node->having[p]->pr, NULL);
4539 cnf_elem *new_cnf = new cnf_elem(new_pr);
4540 analyze_cnf(new_cnf);
4542 subq_node->having.push_back(new_cnf);
4544 // Xfer all of the parameters.
4545 // Use existing handle annotations.
4546 vector<string> param_names = param_tbl->get_param_names();
4548 for(pi=0;pi<param_names.size();pi++){
4549 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4550 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4551 param_tbl->handle_access(param_names[pi]));
4553 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4554 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4555 this->error_code = 3;
4560 subq_node->lfta_disorder = fta_node->lfta_disorder;
4562 ret_vec.push_back(subq_node);
4565 mrg_qpn *mrg_node = new mrg_qpn((sgah_qpn *)(ret_vec[0]),
4566 fta_node->node_name, sel_names, ifaces, ifdb);
4567 mrg_node->set_disorder(fta_node->lfta_disorder);
4570 Do not split sources until we are done with optimizations
4571 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4572 for(i=0;i<split_merge.size();++i){
4573 ret_vec.push_back(split_merge[i]);
4576 ret_vec.push_back(mrg_node);
4577 ret_vec.push_back(stream_node);
4578 hfta_returned = 1/*split_merge.size()*/+1;
4581 fta_node->table_name->set_machine(ifaces[0].first);
4582 fta_node->table_name->set_interface(ifaces[0].second);
4583 fta_node->table_name->set_ifq(false);
4584 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4585 this->error_code = 3;
4588 ret_vec.push_back(fta_node);
4589 ret_vec.push_back(stream_node);
4594 // ret_vec.push_back(fta_node);
4595 // ret_vec.push_back(stream_node);
4602 /////////////////////////////////////////////////////////////////////
4603 /// Split into selection LFTA, aggregation HFTA.
4605 spx_qpn *fta_node = new spx_qpn();
4606 fta_node->table_name = table_name;
4607 fta_node->set_node_name( "_fta_"+node_name );
4608 fta_node->table_name->set_range_var(table_name->get_var_name());
4611 sgah_qpn *stream_node = new sgah_qpn();
4612 stream_node->table_name = new tablevar_t( ("_fta_"+node_name).c_str());
4613 stream_node->set_node_name( node_name );
4614 stream_node->table_name->set_range_var(table_name->get_var_name());
4617 vector< vector<select_element *> *> select_vec;
4618 select_vec.push_back(&(fta_node->select_list)); // only one child
4620 // Process the gbvars. Split their defining SEs.
4621 for(g=0;g<gb_tbl.size();g++){
4622 bool fta_forbidden = false;
4623 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4625 scalarexp_t *gbvar_se = split_ftavec_se( gb_tbl.get_def(g),
4626 fta_forbidden, se_src, select_vec, Ext_fcns
4628 // if(fta_forbidden) (
4629 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4630 stream_node->gb_tbl.add_gb_var(
4631 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),gbvar_se,gb_tbl.get_reftype(g)
4634 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,gbvar_se,0);
4635 stream_node->gb_tbl.add_gb_var(
4636 gb_tbl.get_name(g),gb_tbl.get_tblvar_ref(g),new_se,gb_tbl.get_reftype(g)
4640 stream_node->gb_tbl.set_pattern_info( &gb_tbl);
4642 // Process the aggregate table.
4643 // Copy to stream, split the SEs.
4644 map<int, scalarexp_t *> hfta_aggr_se; // for rehome
4645 for(a=0;a<aggr_tbl.size();++a){
4647 if(aggr_tbl.is_builtin(a)){
4648 if(aggr_tbl.is_star_aggr(a)){
4649 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a),NULL, false);
4650 hse=scalarexp_t::make_star_aggr(aggr_tbl.get_op(a).c_str());
4652 bool fta_forbidden = false;
4653 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4655 scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4656 fta_forbidden, se_src, select_vec, Ext_fcns
4658 // if(fta_forbidden) (
4659 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4660 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), agg_se,false);
4661 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),agg_se);
4663 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4664 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), new_se,false);
4665 hse=scalarexp_t::make_se_aggr(aggr_tbl.get_op(a).c_str(),new_se);
4668 hse->set_data_type(aggr_tbl.get_data_type(a));
4669 hse->set_aggr_id(a);
4670 hfta_aggr_se[a]=hse;
4672 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
4673 vector<scalarexp_t *> new_opl;
4674 for(o=0;o<opl.size();++o){
4675 bool fta_forbidden = false;
4676 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4677 scalarexp_t *agg_se = split_ftavec_se( opl[o],
4678 fta_forbidden, se_src, select_vec, Ext_fcns
4680 // scalarexp_t *agg_se = split_ftavec_se( aggr_tbl.get_aggr_se(a),
4681 // fta_forbidden, se_src, select_vec, Ext_fcns
4683 // if(fta_forbidden) (
4684 if(fta_forbidden || se_src == SPLIT_FTAVEC_NOTBLVAR){
4685 new_opl.push_back(agg_se);
4687 scalarexp_t *new_se=make_fta_se_ref(fta_node->select_list,agg_se,0);
4688 new_opl.push_back(new_se);
4691 stream_node->aggr_tbl.add_aggr(aggr_tbl.get_op(a), aggr_tbl.get_fcn_id(a), new_opl, aggr_tbl.get_storage_type(a),false, false,aggr_tbl.has_bailout(a));
4692 hse = new scalarexp_t(aggr_tbl.get_op(a).c_str(),new_opl);
4693 hse->set_data_type(Ext_fcns->get_fcn_dt(aggr_tbl.get_fcn_id(a)));
4694 hse->set_fcn_id(aggr_tbl.get_fcn_id(a));
4695 hse->set_aggr_id(a);
4696 hfta_aggr_se[a]=hse;
4701 // Process the WHERE clause.
4702 // If it is fta-safe AND it refs only fta-safe gbvars,
4703 // then expand the gbvars and put it into the lfta.
4704 // Else, split it into an hfta predicate ref'ing
4705 // se's computed partially in the lfta.
4707 predicate_t *pr_root;
4709 for(p=0;p<where.size();p++){
4710 if(! check_fta_forbidden_pr(where[p]->pr, NULL, Ext_fcns) || contains_gb_pr(where[p]->pr, unsafe_gbvars) ){
4711 pr_root = split_ftavec_pr(where[p]->pr,select_vec,Ext_fcns);
4712 fta_forbidden = true;
4714 pr_root = dup_pr(where[p]->pr, NULL);
4715 expand_gbvars_pr(pr_root, gb_tbl);
4716 fta_forbidden = false;
4718 cnf_elem *cnf_root = new cnf_elem(pr_root);
4719 analyze_cnf(cnf_root);
4722 stream_node->where.push_back(cnf_root);
4724 fta_node->where.push_back(cnf_root);
4729 // Process the Select clause, rehome it on the
4731 for(s=0;s<select_list.size();s++){
4732 bool fta_forbidden = false;
4733 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
4734 stream_node->select_list.push_back(
4735 new select_element(root_se, select_list[s]->name));
4739 // Process the Having clause
4741 // All of the predicates in the having clause must
4742 // execute in the stream node.
4744 for(p=0;p<having.size();p++){
4745 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
4746 cnf_elem *cnf_root = new cnf_elem(pr_root);
4747 analyze_cnf(cnf_root);
4749 stream_node->having.push_back(cnf_root);
4752 // Handle parameters and a few last details.
4753 vector<string> param_names = param_tbl->get_param_names();
4755 for(pi=0;pi<param_names.size();pi++){
4756 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4757 fta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4758 param_tbl->handle_access(param_names[pi]));
4759 stream_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4760 param_tbl->handle_access(param_names[pi]));
4763 fta_node->definitions = definitions; fta_node->definitions.erase("_referenced_ifaces");
4764 stream_node->definitions = definitions;
4766 // Now split by interfaces YYYY
4767 if(ifaces.size() > 1){
4768 for(si=0;si<ifaces.size();++si){
4769 spx_qpn *subq_node = new spx_qpn();
4771 // Name the subquery
4772 string new_name = "_"+node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
4774 subq_node->set_node_name( new_name) ;
4775 sel_names.push_back(subq_node->get_node_name());
4778 subq_node->table_name = fta_node->table_name->duplicate();
4779 subq_node->table_name->set_machine(ifaces[si].first);
4780 subq_node->table_name->set_interface(ifaces[si].second);
4781 subq_node->table_name->set_ifq(false);
4783 for(s=0;s<fta_node->select_list.size();s++){
4784 subq_node->select_list.push_back( dup_select(fta_node->select_list[s], NULL) );
4786 for(p=0;p<fta_node->where.size();p++){
4787 predicate_t *new_pr = dup_pr(fta_node->where[p]->pr, NULL);
4788 cnf_elem *new_cnf = new cnf_elem(new_pr);
4789 analyze_cnf(new_cnf);
4791 subq_node->where.push_back(new_cnf);
4793 // Xfer all of the parameters.
4794 // Use existing handle annotations.
4795 vector<string> param_names = param_tbl->get_param_names();
4797 for(pi=0;pi<param_names.size();pi++){
4798 data_type *dt = param_tbl->get_data_type(param_names[pi]);
4799 subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
4800 param_tbl->handle_access(param_names[pi]));
4802 subq_node->definitions = definitions; subq_node->definitions.erase("_referenced_ifaces");
4803 if(subq_node->resolve_if_params(ifdb, this->err_str)){
4804 this->error_code = 3;
4808 ret_vec.push_back(subq_node);
4811 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[0]),
4812 fta_node->node_name, sel_names, ifaces, ifdb);
4814 Do not split sources until we are done with optimizations
4815 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
4816 for(i=0;i<split_merge.size();++i){
4817 ret_vec.push_back(split_merge[i]);
4820 ret_vec.push_back(mrg_node);
4821 ret_vec.push_back(stream_node);
4822 hfta_returned = 1/*split_merge.size()*/+1;
4825 fta_node->table_name->set_machine(ifaces[0].first);
4826 fta_node->table_name->set_interface(ifaces[0].second);
4827 fta_node->table_name->set_ifq(false);
4828 if(fta_node->resolve_if_params(ifdb, this->err_str)){
4829 this->error_code = 3;
4832 ret_vec.push_back(fta_node);
4833 ret_vec.push_back(stream_node);
4838 // ret_vec.push_back(fta_node);
4839 // ret_vec.push_back(stream_node);
4848 SPLITTING A EQ-TEMPORAL, HASH JOIN OPERATOR
4850 An JOIN_EQ_HASH_QPN node may reference:
4851 literals, parameters, colrefs, functions, operators
4852 An JOIN_EQ_HASH_QPN node may not reference:
4853 group-by variables, aggregates
4855 An JOIN_EQ_HASH_QPN node contains
4856 selection list of SEs
4857 where list of CNF predicates, broken into:
4864 For each tablevar whose source is a PROTOCOL
4865 Create a LFTA for that tablevar
4866 Push as many prefilter[..] predicates to that tablevar as is
4868 Split the SEs in the select list, and the predicates not
4873 vector<qp_node *> join_eq_hash_qpn::split_node_for_fta(ext_fcn_list *Ext_fcns, table_list *Schema, int &hfta_returned, ifq_t *ifdb, int n_virtual_ifaces, int hfta_parallelism, int hfta_idx){
4875 vector<qp_node *> ret_vec;
4878 // If the node reads from streams only, don't split.
4879 bool stream_only = true;
4880 for(f=0;f<from.size();++f){
4881 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4882 int t = from[f]->get_schema_ref();
4883 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA) stream_only = false;
4887 ret_vec.push_back(this);
4892 // The HFTA node, it is always returned.
4894 join_eq_hash_qpn *stream_node = new join_eq_hash_qpn();
4895 for(f=0;f<from.size();++f){
4896 // tablevar_t *tmp_tblvar = new tablevar_t( from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str());
4897 tablevar_t *tmp_tblvar = from[f]->duplicate();
4898 // tmp_tblvar->set_range_var(from[f]->get_var_name());
4900 stream_node->from.push_back(tmp_tblvar);
4902 stream_node->set_node_name(node_name);
4904 // Create spx (selection) children for each PROTOCOL source.
4905 vector<spx_qpn *> child_vec;
4906 vector< vector<select_element *> *> select_vec;
4907 for(f=0;f<from.size();++f){
4908 // int t = Schema->get_table_ref(from[f]->get_schema_name());
4909 int t = from[f]->get_schema_ref();
4910 if(Schema->get_schema_type(t) == PROTOCOL_SCHEMA){
4911 spx_qpn *child_qpn = new spx_qpn();
4912 sprintf(tmpstr,"_fta_%d_%s",f,node_name.c_str());
4913 child_qpn->set_node_name(string(tmpstr));
4914 child_qpn->table_name = new tablevar_t(
4915 from[f]->get_interface().c_str(), from[f]->get_schema_name().c_str(), from[f]->get_ifq());
4916 child_qpn->table_name->set_range_var(from[f]->get_var_name());
4918 child_vec.push_back(child_qpn);
4919 select_vec.push_back(&(child_qpn->select_list));
4921 // Update the stream's FROM clause to read from this child
4922 stream_node->from[f]->set_interface("");
4923 stream_node->from[f]->set_schema(tmpstr);
4925 child_vec.push_back(NULL);
4926 select_vec.push_back(NULL);
4930 // Push lfta-safe prefilter to the lfta
4931 // TODO: I'm not copying the preds, I dont *think* it will be a problem.
4932 predicate_t *pr_root;
4934 for(f=0;f<from.size();++f){
4935 vector<cnf_elem *> pred_vec = prefilter[f];
4936 if(child_vec[f] != NULL){
4937 for(p=0;p<pred_vec.size();++p){
4938 if(check_fta_forbidden_pr(pred_vec[p]->pr,NULL, Ext_fcns)){
4939 child_vec[f]->where.push_back(pred_vec[p]);
4941 pr_root = split_ftavec_pr(pred_vec[p]->pr,select_vec,Ext_fcns);
4942 cnf_elem *cnf_root = new cnf_elem(pr_root);
4943 analyze_cnf(cnf_root);
4944 stream_node->prefilter[f].push_back(cnf_root);
4948 for(p=0;p<pred_vec.size();++p){
4949 stream_node->prefilter[f].push_back(pred_vec[p]);
4955 // Process the other predicates
4956 for(p=0;p<temporal_eq.size();++p){
4957 pr_root = split_ftavec_pr(temporal_eq[p]->pr,select_vec,Ext_fcns);
4958 cnf_elem *cnf_root = new cnf_elem(pr_root);
4959 analyze_cnf(cnf_root);
4960 stream_node->temporal_eq.push_back(cnf_root);
4962 for(p=0;p<hash_eq.size();++p){
4963 pr_root = split_ftavec_pr(hash_eq[p]->pr,select_vec,Ext_fcns);
4964 cnf_elem *cnf_root = new cnf_elem(pr_root);
4965 analyze_cnf(cnf_root);
4966 stream_node->hash_eq.push_back(cnf_root);
4968 for(p=0;p<postfilter.size();++p){
4969 pr_root = split_ftavec_pr(postfilter[p]->pr,select_vec,Ext_fcns);
4970 cnf_elem *cnf_root = new cnf_elem(pr_root);
4971 analyze_cnf(cnf_root);
4972 stream_node->postfilter.push_back(cnf_root);
4976 for(s=0;s<select_list.size();s++){
4977 bool fta_forbidden = false;
4978 int se_src = SPLIT_FTAVEC_NOTBLVAR;
4979 scalarexp_t *root_se = split_ftavec_se( select_list[s]->se,
4980 fta_forbidden, se_src, select_vec, Ext_fcns
4982 if(fta_forbidden || !is_PROTOCOL_source(se_src, select_vec)){
4983 stream_node->select_list.push_back(
4984 new select_element(root_se, select_list[s]->name) );
4986 scalarexp_t *new_se=make_fta_se_ref(select_vec,root_se,se_src);
4987 stream_node->select_list.push_back(
4988 new select_element(new_se, select_list[s]->name)
4994 // I need to "rehome" the colrefs -- make the annotations in the colrefs
4995 // agree with their tablevars.
4996 for(f=0;f<child_vec.size();++f){
4997 if(child_vec[f]!=NULL){
4998 vector<tablevar_t *> fm; fm.push_back(child_vec[f]->table_name);
5000 for(s=0;s<child_vec[f]->select_list.size();++s)
5001 bind_colref_se(child_vec[f]->select_list[s]->se, fm,0,0);
5002 for(p=0;p<child_vec[f]->where.size();++p)
5003 // bind_colref_pr(child_vec[f]->where[p]->pr, fm,f,0);
5004 bind_colref_pr(child_vec[f]->where[p]->pr, fm,0,0);
5008 // rehome the colrefs in the hfta node.
5009 for(f=0;f<stream_node->from.size();++f){
5010 stream_node->where.clear();
5011 for(s=0;s<stream_node->from.size();++s){
5012 for(p=0;p<stream_node->prefilter[s].size();++p){
5013 bind_colref_pr((stream_node->prefilter[s])[p]->pr,stream_node->from,f,f);
5016 for(p=0;p<stream_node->temporal_eq.size();++p){
5017 bind_colref_pr(stream_node->temporal_eq[p]->pr,stream_node->from,f,f);
5019 for(p=0;p<stream_node->hash_eq.size();++p){
5020 bind_colref_pr(stream_node->hash_eq[p]->pr,stream_node->from,f,f);
5022 for(p=0;p<stream_node->postfilter.size();++p){
5023 bind_colref_pr(stream_node->postfilter[p]->pr,stream_node->from,f,f);
5025 for(s=0;s<stream_node->select_list.size();++s){
5026 bind_colref_se(stream_node->select_list[s]->se,stream_node->from,f,f);
5030 // Rebuild the WHERE clause
5031 stream_node->where.clear();
5032 for(s=0;s<stream_node->from.size();++s){
5033 for(p=0;p<stream_node->prefilter[s].size();++p){
5034 stream_node->where.push_back((stream_node->prefilter[s])[p]);
5037 for(p=0;p<stream_node->temporal_eq.size();++p){
5038 stream_node->where.push_back(stream_node->temporal_eq[p]);
5040 for(p=0;p<stream_node->hash_eq.size();++p){
5041 stream_node->where.push_back(stream_node->hash_eq[p]);
5043 for(p=0;p<stream_node->postfilter.size();++p){
5044 stream_node->where.push_back(stream_node->postfilter[p]);
5048 // Build the return list
5049 vector<qp_node *> hfta_nodes;
5051 for(f=0;f<from.size();++f){
5052 if(child_vec[f] != NULL){
5053 spx_qpn *c_node = child_vec[f];
5054 vector<pair<string, string> > ifaces = get_ifaces(c_node->table_name, ifdb, n_virtual_ifaces, hfta_parallelism, hfta_idx);
5055 if (ifaces.empty()) {
5056 fprintf(stderr,"INTERNAL ERROR in join_eq_hash_qpn::split_node_for_fta - empty interface set, node is %s\n", node_name.c_str());
5060 if(ifaces.size() == 1){
5061 c_node->table_name->set_machine(ifaces[0].first);
5062 c_node->table_name->set_interface(ifaces[0].second);
5063 c_node->table_name->set_ifq(false);
5064 if(c_node->resolve_if_params(ifdb, this->err_str)){
5065 this->error_code = 3;
5068 ret_vec.push_back(c_node);
5070 vector<string> sel_names;
5072 for(si=0;si<ifaces.size();++si){
5073 spx_qpn *subq_node = new spx_qpn();
5075 // Name the subquery
5076 string new_name = "_"+c_node->node_name+"_"+ifaces[si].first+"_"+ifaces[si].second;
5078 subq_node->set_node_name( new_name) ;
5079 sel_names.push_back(subq_node->get_node_name());
5082 subq_node->table_name = c_node->table_name->duplicate();
5083 subq_node->table_name->set_machine(ifaces[si].first);
5084 subq_node->table_name->set_interface(ifaces[si].second);
5085 subq_node->table_name->set_ifq(false);
5087 for(s=0;s<c_node->select_list.size();s++){
5088 subq_node->select_list.push_back(dup_select(c_node->select_list[s], NULL));
5090 for(p=0;p<c_node->where.size();p++){
5091 predicate_t *new_pr = dup_pr(c_node->where[p]->pr, NULL);
5092 cnf_elem *new_cnf = new cnf_elem(new_pr);
5093 analyze_cnf(new_cnf);
5095 printf("table name is %s\n",subq_node->table_name->to_string().c_str());
5096 subq_node->where.push_back(new_cnf);
5098 // Xfer all of the parameters.
5099 // Use existing handle annotations.
5100 // vector<string> param_names = param_tbl->get_param_names();
5102 // for(pi=0;pi<param_names.size();pi++){
5103 // data_type *dt = param_tbl->get_data_type(param_names[pi]);
5104 // subq_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
5105 // param_tbl->handle_access(param_names[pi]));
5107 // subq_node->definitions = definitions;
5109 if(subq_node->resolve_if_params(ifdb, this->err_str)){
5110 this->error_code = 3;
5114 ret_vec.push_back(subq_node);
5116 int lpos = ret_vec.size()-1 ;
5117 mrg_qpn *mrg_node = new mrg_qpn((spx_qpn *)(ret_vec[lpos]),c_node->node_name,sel_names, ifaces, ifdb);
5119 Do not split sources until we are done with optimizations
5120 vector<mrg_qpn *> split_merge = mrg_node->split_sources();
5122 for(i=0;i<split_merge.size();++i){
5123 hfta_nodes.push_back(split_merge[i]);
5126 hfta_nodes.push_back(mrg_node);
5131 for(i=0;i<hfta_nodes.size();++i) ret_vec.push_back(hfta_nodes[i]);
5132 ret_vec.push_back(stream_node);
5133 hfta_returned = hfta_nodes.size()+1;
5135 // Currently : assume that the stream receives all parameters
5136 // and parameter updates, incorporates them, then passes
5137 // all of the parameters to the FTA.
5138 // This will need to change (tables, fta-unsafe types. etc.)
5140 // I will pass on the use_handle_access marking, even
5141 // though the fcn call that requires handle access might
5142 // exist in only one of the parts of the query.
5143 // Parameter manipulation and handle access determination will
5144 // need to be revisited anyway.
5145 vector<string> param_names = param_tbl->get_param_names();
5147 for(pi=0;pi<param_names.size();pi++){
5149 data_type *dt = param_tbl->get_data_type(param_names[pi]);
5150 for(ri=0;ri<ret_vec.size();++ri){
5151 ret_vec[ri]->param_tbl->add_param(param_names[pi],dt->duplicate(),
5152 param_tbl->handle_access(param_names[pi]));
5153 ret_vec[ri]->definitions = definitions; ret_vec[ri]->definitions.erase("_referenced_ifaces");
5164 /////////////////////////////////////////////////////////////
5167 // Common processing
5168 int process_opview(tablevar_t *fmtbl, int pos, string node_name,
5170 vector<query_node *> &qnodes,
5171 opview_set &opviews,
5172 vector<table_exp_t *> &ret, string rootnm, string silo_nm){
5176 int schref = fmtbl->get_schema_ref();
5180 if(Schema->get_schema_type(schref) == OPERATOR_VIEW_SCHEMA){
5181 opview_entry *opv = new opview_entry();
5182 opv->parent_qname = node_name;
5183 opv->root_name = rootnm;
5184 opv->view_name = fmtbl->get_schema_name();
5186 sprintf(tmpstr,"%s_UDOP%d_%s",node_name.c_str(),pos,opv->view_name.c_str());
5187 opv->udop_alias = tmpstr;
5188 fmtbl->set_udop_alias(opv->udop_alias);
5190 opv->exec_fl = Schema->get_op_prop(schref, string("file"));
5191 opv->liveness_timeout = atoi(Schema->get_op_prop(schref, string("liveness_timeout")).c_str());
5193 vector<subquery_spec *> subq = Schema->get_subqueryspecs(schref);
5194 for(s=0;s<subq.size();++s){
5195 // Validate that the fields match.
5196 subquery_spec *sqs = subq[s];
5197 vector<field_entry *> flds = Schema->get_fields(sqs->name+silo_nm);
5198 if(flds.size() == 0){
5199 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in Schema.\n",sqs->name.c_str(), opv->view_name.c_str());
5202 if(flds.size() < sqs->types.size()){
5203 fprintf(stderr,"ERROR: subquery %s of view %s does not have enough fields (%lu found, %lu expected).\n",sqs->name.c_str(), opv->view_name.c_str(),flds.size(), sqs->types.size());
5206 bool failed = false;
5207 for(f=0;f<sqs->types.size();++f){
5208 data_type dte(sqs->types[f],sqs->modifiers[f]);
5209 data_type dtf(flds[f]->get_type(),flds[f]->get_modifier_list());
5210 if(! dte.subsumes_type(&dtf) ){
5211 fprintf(stderr,"ERROR: subquery %s of view %s does not have the correct type for field %d (%s found, %s expected).\n",sqs->name.c_str(), opv->view_name.c_str(),f,dtf.to_string().c_str(), dte.to_string().c_str());
5215 if(dte.is_temporal() && (dte.get_temporal() != dtf.get_temporal()) ){
5216 string pstr = dte.get_temporal_string();
5217 fprintf(stderr,"ERROR: subquery %s of view %s does not have the expected temporal value %s of field %d.\n",sqs->name.c_str(), opv->view_name.c_str(),pstr.c_str(),f);
5224 /// Validation done, find the subquery, make a copy of the
5225 /// parse tree, and add it to the return list.
5226 for(q=0;q<qnodes.size();++q)
5227 if(qnodes[q]->name == sqs->name)
5229 if(q==qnodes.size()){
5230 fprintf(stderr,"INTERNAL ERROR: subquery %s of view %s not found in list of query names.\n",sqs->name.c_str(), opv->view_name.c_str());
5234 table_exp_t *newq = dup_table_exp(qnodes[q]->parse_tree);
5235 sprintf(tmpstr,"%s_OP%d_%s_SUBQ%d",node_name.c_str(),pos,opv->view_name.c_str(),s);
5236 string newq_name = tmpstr;
5237 newq->nmap["query_name"] = newq_name;
5238 ret.push_back(newq);
5239 opv->subq_names.push_back(newq_name);
5241 fmtbl->set_opview_idx(opviews.append(opv));
5247 vector<table_exp_t *> spx_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5248 vector<table_exp_t *> ret;
5250 int retval = process_opview(table_name,0,node_name,
5251 Schema,qnodes,opviews,ret, rootnm, silo_name);
5257 vector<table_exp_t *> sgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5258 vector<table_exp_t *> ret;
5260 int retval = process_opview(table_name,0,node_name,
5261 Schema,qnodes,opviews,ret, rootnm, silo_name);
5266 vector<table_exp_t *> rsgah_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5267 vector<table_exp_t *> ret;
5269 int retval = process_opview(table_name,0,node_name,
5270 Schema,qnodes,opviews,ret, rootnm, silo_name);
5276 vector<table_exp_t *> sgahcwcb_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5277 vector<table_exp_t *> ret;
5279 int retval = process_opview(table_name,0,node_name,
5280 Schema,qnodes,opviews,ret, rootnm, silo_name);
5287 vector<table_exp_t *> mrg_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5288 vector<table_exp_t *> ret;
5290 for(f=0;f<fm.size();++f){
5291 int retval = process_opview(fm[f],f,node_name,
5292 Schema,qnodes,opviews,ret, rootnm, silo_name);
5301 vector<table_exp_t *> join_eq_hash_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5302 vector<table_exp_t *> ret;
5304 for(f=0;f<from.size();++f){
5305 int retval = process_opview(from[f],f,node_name,
5306 Schema,qnodes,opviews,ret, rootnm, silo_name);
5312 vector<table_exp_t *> filter_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5313 vector<table_exp_t *> ret;
5315 for(f=0;f<from.size();++f){
5316 int retval = process_opview(from[f],f,node_name,
5317 Schema,qnodes,opviews,ret, rootnm, silo_name);
5323 vector<table_exp_t *> watch_join_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5324 vector<table_exp_t *> ret;
5325 int retval = process_opview(from[0],0,node_name,
5326 Schema,qnodes,opviews,ret, rootnm, silo_name);
5333 vector<table_exp_t *> watch_tbl_qpn::extract_opview(table_list *Schema, vector<query_node *> &qnodes, opview_set &opviews, string rootnm, string silo_name){
5334 vector<table_exp_t *> ret;
5335 return ret; // nothing to process
5340 //////////////////////////////////////////////////////////////////
5341 //////////////////////////////////////////////////////////////////
5342 /////// Additional methods
5346 //////////////////////////////////////////////////////////////////
5347 // Get schema of operator output
5349 table_def *mrg_qpn::get_fields(){
5350 return(table_layout);
5353 table_def *watch_tbl_qpn::get_fields(){
5354 return(table_layout);
5358 table_def *spx_qpn::get_fields(){
5359 return(create_attributes(node_name, select_list));
5362 table_def *sgah_qpn::get_fields(){
5363 return(create_attributes(node_name, select_list));
5366 table_def *rsgah_qpn::get_fields(){
5367 return(create_attributes(node_name, select_list));
5370 table_def *sgahcwcb_qpn::get_fields(){
5371 return(create_attributes(node_name, select_list));
5374 table_def *filter_join_qpn::get_fields(){
5375 return(create_attributes(node_name, select_list));
5378 table_def *watch_join_qpn::get_fields(){
5379 return(create_attributes(node_name, select_list));
5382 table_def *join_eq_hash_qpn::get_fields(){
5385 // First, gather temporal colrefs and SEs.
5386 map<col_id, temporal_type> temporal_cids;
5387 vector<scalarexp_t *> temporal_se;
5388 for(h=0;h<temporal_eq.size();++h){
5389 scalarexp_t *sel = temporal_eq[h]->pr->get_left_se();
5390 scalarexp_t *ser = temporal_eq[h]->pr->get_right_se();
5392 if(sel->get_operator_type() == SE_COLREF){
5393 col_id tcol(sel->get_colref());
5394 if(temporal_cids.count(tcol) == 0){
5395 temporal_cids[tcol] = sel->get_data_type()->get_temporal();
5398 temporal_se.push_back(sel);
5401 if(ser->get_operator_type() == SE_COLREF){
5402 col_id tcol(ser->get_colref());
5403 if(temporal_cids.count(tcol) == 0){
5404 temporal_cids[tcol] = ser->get_data_type()->get_temporal();
5407 temporal_se.push_back(ser);
5411 // Mark select elements as nontemporal, then deduce which
5412 // ones are temporal.
5413 for(s=0;s<select_list.size();++s){
5414 select_list[s]->se->get_data_type()->set_temporal(
5415 compute_se_temporal(select_list[s]->se, temporal_cids)
5417 // Second chance if it is an exact match to an SE.
5418 // for(s=0;s<select_list.size();++s){
5419 if(! select_list[s]->se->get_data_type()->is_temporal() ){
5420 for(t=0;t<temporal_se.size();++t){
5421 if(is_equivalent_se(temporal_se[t], select_list[s]->se)){
5422 select_list[s]->se->get_data_type()->set_temporal(
5423 temporal_se[t]->get_data_type()->get_temporal()
5431 // If there is an outer join, verify that
5432 // the temporal attributes are actually temporal.
5433 // NOTE: this code must be synchronized with the
5434 // equivalence finding in join_eq_hash_qpn::generate_functor
5435 // (and also, the join_eq_hash_qpn constructor)
5436 if(from[0]->get_property() || from[1]->get_property()){
5437 set<string> l_equiv, r_equiv;
5438 for(i=0;i<temporal_eq.size();i++){
5439 scalarexp_t *lse = temporal_eq[i]->pr->get_left_se();
5440 scalarexp_t *rse = temporal_eq[i]->pr->get_right_se();
5441 if(lse->get_operator_type()==SE_COLREF){
5442 l_equiv.insert(lse->get_colref()->get_field());
5444 if(rse->get_operator_type()==SE_COLREF){
5445 r_equiv.insert(rse->get_colref()->get_field());
5449 for(s=0;s<select_list.size();++s){
5450 if(select_list[s]->se->get_data_type()->is_temporal()){
5452 col_id_set::iterator ci;
5453 bool failed = false;
5454 gather_se_col_ids(select_list[s]->se,cid_set, NULL);
5455 for(ci=cid_set.begin();ci!=cid_set.end();++ci){
5456 if((*ci).tblvar_ref == 0){
5457 if(from[0]->get_property()){
5458 if(l_equiv.count((*ci).field) == 0){
5463 if(from[1]->get_property()){
5464 if(r_equiv.count((*ci).field) == 0){
5471 select_list[s]->se->get_data_type()->reset_temporal();
5478 return create_attributes(node_name, select_list);
5482 //-----------------------------------------------------------------
5483 // get output "keys"
5484 // This is a guess about the set of fields which are a key
5485 // Use as metadata output, e.g. in qtree.xml
5489 // refs to GB attribtues are keys, if a SE is not a GB colref
5490 // but refers to a GB colref (outside of an aggregation)
5491 // then set partial_keys to true
5492 vector<string> sgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5493 vector<string> keys;
5496 for(int i=0; i<gb_tbl.size();++i)
5499 for(int s=0;s<select_list.size();++s){
5500 if(select_list[s]->se->is_gb()){
5501 keys.push_back(select_list[s]->name);
5503 if(contains_gb_se(select_list[s]->se, gref_set)){
5504 partial_keys.push_back(select_list[s]->name);
5511 vector<string> rsgah_qpn::get_tbl_keys(vector<string> &partial_keys){
5512 vector<string> keys;
5515 for(int i=0; i<gb_tbl.size();++i)
5518 for(int s=0;s<select_list.size();++s){
5519 if(select_list[s]->se->is_gb()){
5520 keys.push_back(select_list[s]->name);
5522 if(contains_gb_se(select_list[s]->se, gref_set)){
5523 partial_keys.push_back(select_list[s]->name);
5534 //-----------------------------------------------------------------
5535 // get output tables
5538 // Get tablevar_t names of input and output tables
5540 // output_file_qpn::output_file_qpn(){source_op_name = ""; }
5541 vector<tablevar_t *> output_file_qpn::get_input_tbls(){
5545 vector<tablevar_t *> watch_tbl_qpn::get_input_tbls(){
5546 vector<tablevar_t *> ret;
5550 vector<tablevar_t *> mrg_qpn::get_input_tbls(){
5554 vector<tablevar_t *> spx_qpn::get_input_tbls(){
5555 vector<tablevar_t *> retval(1,table_name);
5559 vector<tablevar_t *> sgah_qpn::get_input_tbls(){
5560 vector<tablevar_t *> retval(1,table_name);
5564 vector<tablevar_t *> rsgah_qpn::get_input_tbls(){
5565 vector<tablevar_t *> retval(1,table_name);
5569 vector<tablevar_t *> sgahcwcb_qpn::get_input_tbls(){
5570 vector<tablevar_t *> retval(1,table_name);
5574 vector<tablevar_t *> join_eq_hash_qpn::get_input_tbls(){
5578 vector<tablevar_t *> filter_join_qpn::get_input_tbls(){
5582 vector<tablevar_t *> watch_join_qpn::get_input_tbls(){
5586 //-----------------------------------------------------------------
5587 // get output tables
5590 // This does not make sense, this fcn returns the output table *name*,
5591 // not its schema, and then there is another fcn to rturn the schema.
5592 vector<tablevar_t *> output_file_qpn::get_output_tbls(){
5593 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5597 vector<tablevar_t *> watch_tbl_qpn::get_output_tbls(){
5598 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5602 vector<tablevar_t *> mrg_qpn::get_output_tbls(){
5603 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5607 vector<tablevar_t *> spx_qpn::get_output_tbls(){
5608 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5612 vector<tablevar_t *> sgah_qpn::get_output_tbls(){
5613 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5617 vector<tablevar_t *> rsgah_qpn::get_output_tbls(){
5618 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5622 vector<tablevar_t *> sgahcwcb_qpn::get_output_tbls(){
5623 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5627 vector<tablevar_t *> join_eq_hash_qpn::get_output_tbls(){
5628 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5632 vector<tablevar_t *> filter_join_qpn::get_output_tbls(){
5633 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5638 vector<tablevar_t *> watch_join_qpn::get_output_tbls(){
5639 vector<tablevar_t *> retval(1,new tablevar_t(node_name.c_str()));
5645 //-----------------------------------------------------------------
5648 // Associate colrefs with this schema.
5649 // Also, use this opportunity to create table_layout (the output schema).
5650 // If the output schema is ever needed before
5651 void mrg_qpn::bind_to_schema(table_list *Schema){
5653 for(t=0;t<fm.size();++t){
5654 int tblref = Schema->get_table_ref(fm[t]->get_schema_name());
5656 fm[t]->set_schema_ref(tblref );
5659 // Here I assume that the colrefs have been reorderd
5660 // during analysis so that mvars line up with fm.
5661 mvars[0]->set_schema_ref(fm[0]->get_schema_ref());
5662 mvars[1]->set_schema_ref(fm[1]->get_schema_ref());
5669 // Associate colrefs in SEs with this schema.
5670 void spx_qpn::bind_to_schema(table_list *Schema){
5671 // Bind the tablevars in the From clause to the Schema
5672 // (it might have changed from analysis time)
5673 int t = Schema->get_table_ref(table_name->get_schema_name() );
5675 table_name->set_schema_ref(t );
5677 // Get the "from" clause
5678 tablevar_list_t fm(table_name);
5680 // Bind all SEs to this schema
5682 for(p=0;p<where.size();++p){
5683 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5686 for(s=0;s<select_list.size();++s){
5687 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5690 // Collect set of tuples referenced in this HFTA
5691 // input, internal, or output.
5695 col_id_set spx_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5696 col_id_set retval, tmp_cset;
5698 for(p=0;p<where.size();++p){
5699 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5702 for(s=0;s<select_list.size();++s){
5703 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5705 col_id_set::iterator cisi;
5707 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5708 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5709 if(fe->get_unpack_fcns().size()>0)
5710 retval.insert((*cisi));
5718 col_id_set filter_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5719 col_id_set retval, tmp_cset;
5721 for(p=0;p<where.size();++p){
5722 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5725 for(s=0;s<select_list.size();++s){
5726 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5728 col_id_set::iterator cisi;
5730 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5731 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5732 if(fe->get_unpack_fcns().size()>0)
5733 retval.insert((*cisi));
5741 col_id_set watch_join_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5742 col_id_set retval, tmp_cset;
5744 for(p=0;p<where.size();++p){
5745 gather_pr_col_ids(where[p]->pr, tmp_cset, NULL);
5748 for(s=0;s<select_list.size();++s){
5749 gather_se_col_ids(select_list[s]->se, tmp_cset, NULL);
5751 col_id_set::iterator cisi;
5753 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5754 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5755 if(fe->get_unpack_fcns().size()>0)
5756 retval.insert((*cisi));
5767 // Associate colrefs in SEs with this schema.
5768 void join_eq_hash_qpn::bind_to_schema(table_list *Schema){
5769 // Bind the tablevars in the From clause to the Schema
5770 // (it might have changed from analysis time)
5772 for(f=0;f<from.size();++f){
5773 string snm = from[f]->get_schema_name();
5774 int tbl_ref = Schema->get_table_ref(snm);
5776 from[f]->set_schema_ref(tbl_ref);
5779 // Bind all SEs to this schema
5780 tablevar_list_t fm(from);
5783 for(p=0;p<where.size();++p){
5784 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5787 for(s=0;s<select_list.size();++s){
5788 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5791 // Collect set of tuples referenced in this HFTA
5792 // input, internal, or output.
5796 void filter_join_qpn::bind_to_schema(table_list *Schema){
5797 // Bind the tablevars in the From clause to the Schema
5798 // (it might have changed from analysis time)
5800 for(f=0;f<from.size();++f){
5801 string snm = from[f]->get_schema_name();
5802 int tbl_ref = Schema->get_table_ref(snm);
5804 from[f]->set_schema_ref(tbl_ref);
5807 // Bind all SEs to this schema
5808 tablevar_list_t fm(from);
5811 for(p=0;p<where.size();++p){
5812 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5815 for(s=0;s<select_list.size();++s){
5816 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5819 // Collect set of tuples referenced in this HFTA
5820 // input, internal, or output.
5824 void watch_join_qpn::bind_to_schema(table_list *Schema){
5825 // Bind the tablevars in the From clause to the Schema
5826 // (it might have changed from analysis time)
5828 for(f=0;f<from.size();++f){
5829 string snm = from[f]->get_schema_name();
5830 int tbl_ref = Schema->get_table_ref(snm);
5832 from[f]->set_schema_ref(tbl_ref);
5835 // Bind all SEs to this schema
5836 tablevar_list_t fm(from);
5839 for(p=0;p<where.size();++p){
5840 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5843 for(s=0;s<select_list.size();++s){
5844 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5847 // Collect set of tuples referenced in this HFTA
5848 // input, internal, or output.
5856 void sgah_qpn::bind_to_schema(table_list *Schema){
5857 // Bind the tablevars in the From clause to the Schema
5858 // (it might have changed from analysis time)
5861 int t = Schema->get_table_ref(table_name->get_schema_name() );
5863 table_name->set_schema_ref(t );
5865 // Get the "from" clause
5866 tablevar_list_t fm(table_name);
5870 // Bind all SEs to this schema
5872 for(p=0;p<where.size();++p){
5873 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5875 for(p=0;p<having.size();++p){
5876 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5879 for(s=0;s<select_list.size();++s){
5880 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5883 for(g=0;g<gb_tbl.size();++g){
5884 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5887 for(a=0;a<aggr_tbl.size();++a){
5888 if(aggr_tbl.is_builtin(a)){
5889 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5891 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5893 for(o=0;o<opl.size();++o){
5894 bind_to_schema_se(opl[o],&fm,Schema);
5900 col_id_set sgah_qpn::get_colrefs(bool ext_fcns_only,table_list *Schema){
5901 col_id_set retval, tmp_cset;
5903 for(p=0;p<where.size();++p){
5904 gather_pr_col_ids(where[p]->pr, tmp_cset, &gb_tbl);
5907 for(g=0;g<gb_tbl.size();++g){
5908 gather_se_col_ids(gb_tbl.get_def(g), tmp_cset, &gb_tbl);
5911 for(a=0;a<aggr_tbl.size();++a){
5912 if(aggr_tbl.is_builtin(a)){
5913 gather_se_col_ids(aggr_tbl.get_aggr_se(a), tmp_cset, &gb_tbl);
5915 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5917 for(o=0;o<opl.size();++o){
5918 gather_se_col_ids(opl[o], tmp_cset, &gb_tbl);
5923 col_id_set::iterator cisi;
5925 for(cisi=tmp_cset.begin();cisi!=tmp_cset.end();++cisi){
5926 field_entry *fe = Schema->get_field((*cisi).schema_ref, (*cisi).field);
5927 if(fe->get_unpack_fcns().size()>0)
5928 retval.insert((*cisi));
5937 void rsgah_qpn::bind_to_schema(table_list *Schema){
5938 // Bind the tablevars in the From clause to the Schema
5939 // (it might have changed from analysis time)
5940 int t = Schema->get_table_ref(table_name->get_schema_name() );
5942 table_name->set_schema_ref(t );
5944 // Get the "from" clause
5945 tablevar_list_t fm(table_name);
5947 // Bind all SEs to this schema
5949 for(p=0;p<where.size();++p){
5950 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5952 for(p=0;p<having.size();++p){
5953 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5955 for(p=0;p<closing_when.size();++p){
5956 bind_to_schema_pr(closing_when[p]->pr, &fm, Schema);
5959 for(s=0;s<select_list.size();++s){
5960 bind_to_schema_se(select_list[s]->se, &fm, Schema);
5963 for(g=0;g<gb_tbl.size();++g){
5964 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
5967 for(a=0;a<aggr_tbl.size();++a){
5968 if(aggr_tbl.is_builtin(a)){
5969 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
5971 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
5973 for(o=0;o<opl.size();++o){
5974 bind_to_schema_se(opl[o],&fm,Schema);
5981 void sgahcwcb_qpn::bind_to_schema(table_list *Schema){
5982 // Bind the tablevars in the From clause to the Schema
5983 // (it might have changed from analysis time)
5984 int t = Schema->get_table_ref(table_name->get_schema_name() );
5986 table_name->set_schema_ref(t );
5988 // Get the "from" clause
5989 tablevar_list_t fm(table_name);
5991 // Bind all SEs to this schema
5993 for(p=0;p<where.size();++p){
5994 bind_to_schema_pr(where[p]->pr, &fm, Schema);
5996 for(p=0;p<having.size();++p){
5997 bind_to_schema_pr(having[p]->pr, &fm, Schema);
5999 for(p=0;p<having.size();++p){
6000 bind_to_schema_pr(cleanby[p]->pr, &fm, Schema);
6002 for(p=0;p<having.size();++p){
6003 bind_to_schema_pr(cleanwhen[p]->pr, &fm, Schema);
6006 for(s=0;s<select_list.size();++s){
6007 bind_to_schema_se(select_list[s]->se, &fm, Schema);
6010 for(g=0;g<gb_tbl.size();++g){
6011 bind_to_schema_se(gb_tbl.get_def(g), &fm, Schema);
6014 for(a=0;a<aggr_tbl.size();++a){
6015 if(aggr_tbl.is_builtin(a)){
6016 bind_to_schema_se(aggr_tbl.get_aggr_se(a), &fm, Schema);
6018 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(a);
6020 for(o=0;o<opl.size();++o){
6021 bind_to_schema_se(opl[o],&fm,Schema);
6032 ///////////////////////////////////////////////////////////////
6033 ///////////////////////////////////////////////////////////////
6034 /// Functions for code generation.
6037 //-----------------------------------------------------------------
6040 cplx_lit_table *watch_tbl_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6041 return(new cplx_lit_table());
6044 cplx_lit_table *mrg_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6045 return(new cplx_lit_table());
6048 cplx_lit_table *spx_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6050 cplx_lit_table *complex_literals = new cplx_lit_table();
6052 for(i=0;i<select_list.size();i++){
6053 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6055 for(i=0;i<where.size();++i){
6056 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6059 return(complex_literals);
6062 cplx_lit_table *sgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6064 cplx_lit_table *complex_literals = new cplx_lit_table();
6066 for(i=0;i<aggr_tbl.size();++i){
6067 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6068 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6070 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6071 for(j=0;j<opl.size();++j)
6072 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6076 for(i=0;i<select_list.size();i++){
6077 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6079 for(i=0;i<gb_tbl.size();i++){
6080 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6082 for(i=0;i<where.size();++i){
6083 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6085 for(i=0;i<having.size();++i){
6086 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6089 return(complex_literals);
6093 cplx_lit_table *rsgah_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6095 cplx_lit_table *complex_literals = new cplx_lit_table();
6097 for(i=0;i<aggr_tbl.size();++i){
6098 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6099 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6101 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6102 for(j=0;j<opl.size();++j)
6103 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6107 for(i=0;i<select_list.size();i++){
6108 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6110 for(i=0;i<gb_tbl.size();i++){
6111 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6113 for(i=0;i<where.size();++i){
6114 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6116 for(i=0;i<having.size();++i){
6117 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6119 for(i=0;i<closing_when.size();++i){
6120 find_complex_literal_pr(closing_when[i]->pr,Ext_fcns, complex_literals);
6123 return(complex_literals);
6127 cplx_lit_table *sgahcwcb_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6129 cplx_lit_table *complex_literals = new cplx_lit_table();
6131 for(i=0;i<aggr_tbl.size();++i){
6132 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6133 find_complex_literal_se(aggr_tbl.get_aggr_se(i), Ext_fcns, complex_literals);
6135 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6136 for(j=0;j<opl.size();++j)
6137 find_complex_literal_se(opl[j], Ext_fcns, complex_literals);
6141 for(i=0;i<select_list.size();i++){
6142 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6144 for(i=0;i<gb_tbl.size();i++){
6145 find_complex_literal_se(gb_tbl.get_def(i), Ext_fcns, complex_literals);
6147 for(i=0;i<where.size();++i){
6148 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6150 for(i=0;i<having.size();++i){
6151 find_complex_literal_pr(having[i]->pr,Ext_fcns, complex_literals);
6153 for(i=0;i<cleanwhen.size();++i){
6154 find_complex_literal_pr(cleanwhen[i]->pr,Ext_fcns, complex_literals);
6156 for(i=0;i<cleanby.size();++i){
6157 find_complex_literal_pr(cleanby[i]->pr,Ext_fcns, complex_literals);
6160 return(complex_literals);
6163 cplx_lit_table *join_eq_hash_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6165 cplx_lit_table *complex_literals = new cplx_lit_table();
6167 for(i=0;i<select_list.size();i++){
6168 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6170 for(i=0;i<where.size();++i){
6171 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6174 return(complex_literals);
6177 cplx_lit_table *filter_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6179 cplx_lit_table *complex_literals = new cplx_lit_table();
6181 for(i=0;i<select_list.size();i++){
6182 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6184 for(i=0;i<where.size();++i){
6185 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6188 return(complex_literals);
6191 cplx_lit_table *watch_join_qpn::get_cplx_lit_tbl(ext_fcn_list *Ext_fcns){
6193 cplx_lit_table *complex_literals = new cplx_lit_table();
6195 for(i=0;i<select_list.size();i++){
6196 find_complex_literal_se(select_list[i]->se, Ext_fcns, complex_literals);
6198 for(i=0;i<where.size();++i){
6199 find_complex_literal_pr(where[i]->pr,Ext_fcns, complex_literals);
6202 return(complex_literals);
6209 //-----------------------------------------------------------------
6210 // get_handle_param_tbl
6212 vector<handle_param_tbl_entry *> watch_tbl_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6213 vector<handle_param_tbl_entry *> retval;
6217 vector<handle_param_tbl_entry *> mrg_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6218 vector<handle_param_tbl_entry *> retval;
6223 vector<handle_param_tbl_entry *> spx_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6225 vector<handle_param_tbl_entry *> retval;
6227 for(i=0;i<select_list.size();i++){
6228 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6230 for(i=0;i<where.size();++i){
6231 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6238 vector<handle_param_tbl_entry *> sgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6240 vector<handle_param_tbl_entry *> retval;
6243 for(i=0;i<aggr_tbl.size();++i){
6244 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6245 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6247 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6248 for(j=0;j<opl.size();++j)
6249 find_param_handles_se(opl[j], Ext_fcns, retval);
6252 for(i=0;i<select_list.size();i++){
6253 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6255 for(i=0;i<gb_tbl.size();i++){
6256 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6258 for(i=0;i<where.size();++i){
6259 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6261 for(i=0;i<having.size();++i){
6262 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6269 vector<handle_param_tbl_entry *> rsgah_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6271 vector<handle_param_tbl_entry *> retval;
6274 for(i=0;i<aggr_tbl.size();++i){
6275 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6276 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6278 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6279 for(j=0;j<opl.size();++j)
6280 find_param_handles_se(opl[j], Ext_fcns, retval);
6283 for(i=0;i<select_list.size();i++){
6284 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6286 for(i=0;i<gb_tbl.size();i++){
6287 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6289 for(i=0;i<where.size();++i){
6290 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6292 for(i=0;i<having.size();++i){
6293 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6295 for(i=0;i<closing_when.size();++i){
6296 find_param_handles_pr(closing_when[i]->pr,Ext_fcns, retval);
6303 vector<handle_param_tbl_entry *> sgahcwcb_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6305 vector<handle_param_tbl_entry *> retval;
6308 for(i=0;i<aggr_tbl.size();++i){
6309 if(aggr_tbl.is_builtin(i) && !aggr_tbl.is_star_aggr(i)){
6310 find_param_handles_se(aggr_tbl.get_aggr_se(i), Ext_fcns, retval);
6312 vector<scalarexp_t *> opl = aggr_tbl.get_operand_list(i);
6313 for(j=0;j<opl.size();++j)
6314 find_param_handles_se(opl[j], Ext_fcns, retval);
6317 for(i=0;i<select_list.size();i++){
6318 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6320 for(i=0;i<gb_tbl.size();i++){
6321 find_param_handles_se(gb_tbl.get_def(i), Ext_fcns, retval);
6323 for(i=0;i<where.size();++i){
6324 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6326 for(i=0;i<having.size();++i){
6327 find_param_handles_pr(having[i]->pr,Ext_fcns, retval);
6329 for(i=0;i<cleanwhen.size();++i){
6330 find_param_handles_pr(cleanwhen[i]->pr,Ext_fcns, retval);
6332 for(i=0;i<cleanby.size();++i){
6333 find_param_handles_pr(cleanby[i]->pr,Ext_fcns, retval);
6339 vector<handle_param_tbl_entry *> join_eq_hash_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6341 vector<handle_param_tbl_entry *> retval;
6343 for(i=0;i<select_list.size();i++){
6344 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6346 for(i=0;i<where.size();++i){
6347 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6354 vector<handle_param_tbl_entry *> filter_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6356 vector<handle_param_tbl_entry *> retval;
6358 for(i=0;i<select_list.size();i++){
6359 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6361 for(i=0;i<where.size();++i){
6362 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6368 vector<handle_param_tbl_entry *> watch_join_qpn::get_handle_param_tbl(ext_fcn_list *Ext_fcns){
6370 vector<handle_param_tbl_entry *> retval;
6372 for(i=0;i<select_list.size();i++){
6373 find_param_handles_se(select_list[i]->se, Ext_fcns, retval);
6375 for(i=0;i<where.size();++i){
6376 find_param_handles_pr(where[i]->pr,Ext_fcns, retval);
6384 ///////////////////////////////////////////////////////////////
6385 ///////////////////////////////////////////////////////////////
6386 /// Functions for operator output rates estimations
6389 //-----------------------------------------------------------------
6390 // get_rate_estimate
6392 double spx_qpn::get_rate_estimate() {
6394 // dummy method for now
6395 return SPX_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6398 double sgah_qpn::get_rate_estimate() {
6400 // dummy method for now
6401 return SGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6404 double rsgah_qpn::get_rate_estimate() {
6406 // dummy method for now
6407 return RSGAH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6410 double sgahcwcb_qpn::get_rate_estimate() {
6412 // dummy method for now
6413 return SGAHCWCB_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6416 double watch_tbl_qpn::get_rate_estimate() {
6418 // dummy method for now
6419 return DEFAULT_INTERFACE_RATE;
6422 double mrg_qpn::get_rate_estimate() {
6424 // dummy method for now
6425 return MRG_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6428 double join_eq_hash_qpn::get_rate_estimate() {
6430 // dummy method for now
6431 return JOIN_EQ_HASH_SELECTIVITY * DEFAULT_INTERFACE_RATE;
6435 //////////////////////////////////////////////////////////////////////////////
6436 //////////////////////////////////////////////////////////////////////////////
6437 ///// Generate functors
6442 //-------------------------------------------------------------------------
6443 // Code generation utilities.
6444 //-------------------------------------------------------------------------
6446 // Globals referenced by generate utilities
6448 static gb_table *segen_gb_tbl; // Table of all group-by attributes.
6452 // Generate code that makes reference
6453 // to the tuple, and not to any aggregates.
6454 // NEW : it might reference a stateful function.
6455 static string generate_se_code(scalarexp_t *se,table_list *schema){
6457 data_type *ldt, *rdt;
6459 vector<scalarexp_t *> operands;
6462 switch(se->get_operator_type()){
6464 if(se->is_handle_ref()){
6465 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6469 if(se->get_literal()->is_cpx_lit()){
6470 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6474 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6476 if(se->is_handle_ref()){
6477 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6481 ret.append("param_");
6482 ret.append(se->get_param_name());
6485 ldt = se->get_left_se()->get_data_type();
6486 if(ldt->complex_operator(se->get_op()) ){
6487 ret.append( ldt->get_complex_operator(se->get_op()) );
6489 ret.append(generate_se_code(se->get_left_se(),schema));
6493 ret.append(se->get_op());
6494 ret.append(generate_se_code(se->get_left_se(),schema));
6499 ldt = se->get_left_se()->get_data_type();
6500 rdt = se->get_right_se()->get_data_type();
6502 if(ldt->complex_operator(rdt, se->get_op()) ){
6503 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6505 ret.append(generate_se_code(se->get_left_se(),schema));
6507 ret.append(generate_se_code(se->get_right_se(),schema));
6511 ret.append(generate_se_code(se->get_left_se(),schema));
6512 ret.append(se->get_op());
6513 ret.append(generate_se_code(se->get_right_se(),schema));
6518 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6519 // so return the defining code.
6520 int gref = se->get_gb_ref();
6521 scalarexp_t *gdef_se = segen_gb_tbl->get_def(gref);
6522 ret = generate_se_code(gdef_se, schema );
6525 sprintf(tmpstr,"unpack_var_%s_%d",
6526 se->get_colref()->get_field().c_str(), se->get_colref()->get_tablevar_ref() );
6531 if(se->is_partial()){
6532 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6535 ret += se->op + "(";
6536 operands = se->get_operands();
6537 bool first_elem = true;
6538 if(se->get_storage_state() != ""){
6539 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6542 for(o=0;o<operands.size();o++){
6543 if(first_elem) first_elem=false; else ret += ", ";
6544 if(operands[o]->get_data_type()->is_buffer_type() &&
6545 (! (operands[o]->is_handle_ref()) ) )
6547 ret += generate_se_code(operands[o], schema);
6553 fprintf(stderr,"INTERNAL ERROR in generate_se_code (hfta), line %d, character %d: unknown operator type %d\n",
6554 se->get_lineno(), se->get_charno(),se->get_operator_type());
6555 return("ERROR in generate_se_code");
6559 // generate code that refers only to aggregate data and constants.
6560 // NEW : modified to handle superaggregates and stateful fcn refs.
6561 // Assume that the state is in *stval
6562 static string generate_se_code_fm_aggr(scalarexp_t *se, string gbvar, string aggvar, table_list *schema){
6565 data_type *ldt, *rdt;
6567 vector<scalarexp_t *> operands;
6570 switch(se->get_operator_type()){
6572 if(se->is_handle_ref()){
6573 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6577 if(se->get_literal()->is_cpx_lit()){
6578 sprintf(tmpstr,"complex_literal_%d",se->get_literal()->get_cpx_lit_ref() );
6582 return(se->get_literal()->to_hfta_C_code("")); // not complex no constr.
6584 if(se->is_handle_ref()){
6585 sprintf(tmpstr,"handle_param_%d",se->get_handle_ref() );
6589 ret.append("param_");
6590 ret.append(se->get_param_name());
6593 ldt = se->get_left_se()->get_data_type();
6594 if(ldt->complex_operator(se->get_op()) ){
6595 ret.append( ldt->get_complex_operator(se->get_op()) );
6597 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6601 ret.append(se->get_op());
6602 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6607 ldt = se->get_left_se()->get_data_type();
6608 rdt = se->get_right_se()->get_data_type();
6610 if(ldt->complex_operator(rdt, se->get_op()) ){
6611 ret.append( ldt->get_complex_operator(rdt, se->get_op()) );
6613 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6615 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6619 ret.append(generate_se_code_fm_aggr(se->get_left_se(),gbvar,aggvar,schema));
6620 ret.append(se->get_op());
6621 ret.append(generate_se_code_fm_aggr(se->get_right_se(),gbvar,aggvar,schema));
6626 if(se->is_gb()){ // OK to ref gb attrs, but they're not yet unpacked ...
6627 // so return the defining code.
6628 sprintf(tmpstr,"%s%d",gbvar.c_str(),se->get_gb_ref());
6632 fprintf(stderr,"ERROR reference to non-GB column ref not permitted here,"
6633 "error in query_plan.cc:generate_se_code_fm_aggr, line %d, character %d.\n",
6634 se->get_lineno(), se->get_charno());
6640 if(se->is_superaggr()){
6641 sprintf(tmpstr,"stval->aggr_var%d",se->get_aggr_ref());
6643 sprintf(tmpstr,"%saggr_var%d",aggvar.c_str(),se->get_aggr_ref());
6649 if(se->get_aggr_ref() >= 0){
6650 sprintf(tmpstr,"udaf_ret_%d",se->get_aggr_ref());
6655 if(se->is_partial()){
6656 sprintf(tmpstr,"partial_fcn_result_%d",se->get_partial_ref());
6659 ret += se->op + "(";
6660 bool first_elem = true;
6661 if(se->get_storage_state() != ""){
6662 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd";
6665 operands = se->get_operands();
6666 for(o=0;o<operands.size();o++){
6667 if(first_elem) first_elem=false; else ret += ", ";
6668 if(operands[o]->get_data_type()->is_buffer_type() &&
6669 (! (operands[o]->is_handle_ref()) ) )
6671 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6677 fprintf(stderr,"INTERNAL ERROR in query_plan.cc::generate_se_code_fm_aggr, line %d, character %d: unknown operator type %d\n",
6678 se->get_lineno(), se->get_charno(),se->get_operator_type());
6679 return("ERROR in generate_se_code_fm_aggr");
6685 static string unpack_partial_fcn_fm_aggr(scalarexp_t *se, int pfn_id, string gbvar, string aggvar, table_list *schema){
6688 vector<scalarexp_t *> operands;
6691 if(se->get_operator_type() != SE_FUNC){
6692 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn_fm_aggr. line %d, character %d\n",
6693 se->get_lineno(), se->get_charno());
6694 return("ERROR in unpack_partial_fcn_fm_aggr");
6697 ret = "\tretval = " + se->get_op() + "( ",
6698 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6701 if(se->get_storage_state() != ""){
6702 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6705 operands = se->get_operands();
6706 for(o=0;o<operands.size();o++){
6708 if(operands[o]->get_data_type()->is_buffer_type() &&
6709 (! (operands[o]->is_handle_ref()) ) )
6711 ret += generate_se_code_fm_aggr(operands[o], gbvar,aggvar, schema);
6719 static string unpack_partial_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6722 vector<scalarexp_t *> operands;
6724 if(se->get_operator_type() != SE_FUNC){
6725 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to unpack_partial_fcn. line %d, character %d\n",
6726 se->get_lineno(), se->get_charno());
6727 return("ERROR in unpack_partial_fcn");
6730 ret = "\tretval = " + se->get_op() + "( ",
6731 sprintf(tmpstr, "&partial_fcn_result_%d",pfn_id);
6734 if(se->get_storage_state() != ""){
6735 ret += ",&(stval->state_var_"+se->get_storage_state()+"),cd";
6738 operands = se->get_operands();
6739 for(o=0;o<operands.size();o++){
6741 if(operands[o]->get_data_type()->is_buffer_type() &&
6742 (! (operands[o]->is_handle_ref()) ) )
6744 ret += generate_se_code(operands[o], schema);
6751 static string generate_cached_fcn(scalarexp_t *se, int pfn_id, table_list *schema){
6754 vector<scalarexp_t *> operands;
6756 if(se->get_operator_type() != SE_FUNC){
6757 fprintf(stderr,"INTERNAL ERROR, non-function SE passed to generate_cached_fcn. line %d, character %d\n",
6758 se->get_lineno(), se->get_charno());
6759 return("ERROR in generate_cached_fcn");
6762 ret = se->get_op()+"(";
6764 if(se->get_storage_state() != ""){
6765 ret += "&(stval->state_var_"+se->get_storage_state()+"),cd,";
6768 operands = se->get_operands();
6769 for(o=0;o<operands.size();o++){
6771 if(operands[o]->get_data_type()->is_buffer_type() &&
6772 (! (operands[o]->is_handle_ref()) ) )
6774 ret += generate_se_code(operands[o], schema);
6785 static string generate_C_comparison_op(string op){
6786 if(op == "=") return("==");
6787 if(op == "<>") return("!=");
6791 static string generate_C_boolean_op(string op){
6792 if( (op == "AND") || (op == "And") || (op == "and") ){
6795 if( (op == "OR") || (op == "Or") || (op == "or") ){
6798 if( (op == "NOT") || (op == "Not") || (op == "not") ){
6802 return("ERROR UNKNOWN BOOLEAN OPERATOR");
6806 static string generate_predicate_code(predicate_t *pr,table_list *schema){
6808 vector<literal_t *> litv;
6810 data_type *ldt, *rdt;
6811 vector<scalarexp_t *> op_list;
6814 switch(pr->get_operator_type()){
6816 ldt = pr->get_left_se()->get_data_type();
6819 litv = pr->get_lit_vec();
6820 for(i=0;i<litv.size();i++){
6821 if(i>0) ret.append(" || ");
6824 if(ldt->complex_comparison(ldt) ){
6825 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6827 if(ldt->is_buffer_type() )
6829 ret.append(generate_se_code(pr->get_left_se(), schema));
6831 if(ldt->is_buffer_type() )
6833 if(litv[i]->is_cpx_lit()){
6834 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6837 ret.append(litv[i]->to_C_code(""));
6839 ret.append(") == 0");
6841 ret.append(generate_se_code(pr->get_left_se(), schema));
6843 ret.append(litv[i]->to_hfta_C_code(""));
6852 ldt = pr->get_left_se()->get_data_type();
6853 rdt = pr->get_right_se()->get_data_type();
6856 if(ldt->complex_comparison(rdt) ){
6857 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6859 if(ldt->is_buffer_type() )
6861 ret.append(generate_se_code(pr->get_left_se(),schema) );
6863 if(rdt->is_buffer_type() )
6865 ret.append(generate_se_code(pr->get_right_se(),schema) );
6867 ret.append( generate_C_comparison_op(pr->get_op()));
6870 ret.append(generate_se_code(pr->get_left_se(),schema) );
6871 ret.append( generate_C_comparison_op(pr->get_op()));
6872 ret.append(generate_se_code(pr->get_right_se(),schema) );
6878 ret.append( generate_C_boolean_op(pr->get_op()) );
6879 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6882 case PRED_BINARY_OP:
6884 ret.append(generate_predicate_code(pr->get_left_pr(),schema) );
6885 ret.append( generate_C_boolean_op(pr->get_op()) );
6886 ret.append(generate_predicate_code(pr->get_right_pr(),schema) );
6890 ret += pr->get_op() + "( ";
6891 op_list = pr->get_op_list();
6892 for(o=0;o<op_list.size();++o){
6893 if(o>0) ret += ", ";
6894 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6896 ret += generate_se_code(op_list[o], schema);
6901 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
6902 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
6903 return("ERROR in generate_predicate_code");
6907 static string generate_predicate_code_fm_aggr(predicate_t *pr, string gbvar, string aggvar,table_list *schema){
6909 vector<literal_t *> litv;
6911 data_type *ldt, *rdt;
6912 vector<scalarexp_t *> op_list;
6915 switch(pr->get_operator_type()){
6917 ldt = pr->get_left_se()->get_data_type();
6920 litv = pr->get_lit_vec();
6921 for(i=0;i<litv.size();i++){
6922 if(i>0) ret.append(" || ");
6925 if(ldt->complex_comparison(ldt) ){
6926 ret.append( ldt->get_hfta_comparison_fcn(ldt) );
6928 if(ldt->is_buffer_type() )
6930 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6932 if(ldt->is_buffer_type() )
6934 if(litv[i]->is_cpx_lit()){
6935 sprintf(tmpstr,"complex_literal_%d",litv[i]->get_cpx_lit_ref() );
6938 ret.append(litv[i]->to_C_code(""));
6940 ret.append(") == 0");
6942 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar, schema));
6944 ret.append(litv[i]->to_hfta_C_code(""));
6953 ldt = pr->get_left_se()->get_data_type();
6954 rdt = pr->get_right_se()->get_data_type();
6957 if(ldt->complex_comparison(rdt) ){
6958 ret.append(ldt->get_hfta_comparison_fcn(rdt));
6960 if(ldt->is_buffer_type() )
6962 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6964 if(rdt->is_buffer_type() )
6966 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6968 ret.append( generate_C_comparison_op(pr->get_op()));
6971 ret.append(generate_se_code_fm_aggr(pr->get_left_se(), gbvar, aggvar,schema) );
6972 ret.append( generate_C_comparison_op(pr->get_op()));
6973 ret.append(generate_se_code_fm_aggr(pr->get_right_se(), gbvar, aggvar,schema) );
6979 ret.append( generate_C_boolean_op(pr->get_op()) );
6980 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6983 case PRED_BINARY_OP:
6985 ret.append(generate_predicate_code_fm_aggr(pr->get_left_pr(), gbvar, aggvar,schema) );
6986 ret.append( generate_C_boolean_op(pr->get_op()) );
6987 ret.append(generate_predicate_code_fm_aggr(pr->get_right_pr(), gbvar, aggvar,schema) );
6991 ret += pr->get_op() + "( ";
6992 op_list = pr->get_op_list();
6993 for(o=0;o<op_list.size();++o){
6994 if(o>0) ret += ", ";
6995 if(op_list[o]->get_data_type()->is_buffer_type() && (! (op_list[o]->is_handle_ref()) ) )
6997 ret += generate_se_code_fm_aggr(op_list[o], gbvar, aggvar, schema);
7002 fprintf(stderr,"INTERNAL ERROR in generate_predicate_code, line %d, character %d, unknown predicate operator type %d\n",
7003 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
7004 return("ERROR in generate_predicate_code");
7012 static string generate_equality_test(string &lhs_op, string &rhs_op, data_type *dt){
7015 if(dt->complex_comparison(dt) ){
7016 ret.append(dt->get_hfta_comparison_fcn(dt));
7018 if(dt->is_buffer_type() )
7022 if(dt->is_buffer_type() )
7024 ret.append(rhs_op );
7025 ret.append(") == 0");
7027 ret.append(lhs_op );
7029 ret.append(rhs_op );
7035 static string generate_comparison(string &lhs_op, string &rhs_op, data_type *dt){
7038 if(dt->complex_comparison(dt) ){
7039 ret.append(dt->get_hfta_comparison_fcn(dt));
7041 if(dt->is_buffer_type() )
7045 if(dt->is_buffer_type() )
7047 ret.append(rhs_op );
7048 ret.append(") == 0");
7050 ret.append(lhs_op );
7052 ret.append(rhs_op );
7059 // Here I assume that only MIN and MAX aggregates can be computed
7060 // over BUFFER data types.
7062 static string generate_aggr_update(string var, aggregate_table *atbl,int aidx, table_list *schema){
7063 string retval = "\t\t";
7064 string op = atbl->get_op(aidx);
7067 if(! atbl->is_builtin(aidx)) {
7069 retval += op+"_HFTA_AGGR_UPDATE_(";
7070 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7071 retval+="("+var+")";
7072 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7073 for(o=0;o<opl.size();++o){{
7075 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7077 retval += generate_se_code(opl[o], schema);
7086 // builtin processing
7087 data_type *dt = atbl->get_data_type(aidx);
7091 retval.append("++;\n");
7096 retval.append(" += ");
7097 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7098 retval.append(";\n");
7102 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7103 retval += dt->make_host_cvar(tmpstr);
7105 retval += generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7106 if(dt->complex_comparison(dt)){
7107 if(dt->is_buffer_type())
7108 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7110 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) < 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7112 sprintf(tmpstr,"\t\tif(aggr_tmp_%d < %s)\n",aidx,var.c_str());
7114 retval.append(tmpstr);
7115 if(dt->is_buffer_type()){
7116 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7118 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7120 retval.append(tmpstr);
7125 sprintf(tmpstr,"aggr_tmp_%d",aidx);
7126 retval+=dt->make_host_cvar(tmpstr);
7128 retval+=generate_se_code(atbl->get_aggr_se(aidx), schema )+";\n";
7129 if(dt->complex_comparison(dt)){
7130 if(dt->is_buffer_type())
7131 sprintf(tmpstr,"\t\tif(%s(&aggr_tmp_%d,&(%s)) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7133 sprintf(tmpstr,"\t\tif(%s(aggr_tmp_%d,%s) > 0)\n",dt->get_hfta_comparison_fcn(dt).c_str(), aidx, var.c_str());
7135 sprintf(tmpstr,"\t\tif(aggr_tmp_%d > %s)\n",aidx,var.c_str());
7137 retval.append(tmpstr);
7138 if(dt->is_buffer_type()){
7139 sprintf(tmpstr,"\t\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_replace().c_str(),var.c_str(),aidx);
7141 sprintf(tmpstr,"\t\t\t%s = aggr_tmp_%d;\n",var.c_str(),aidx);
7143 retval.append(tmpstr);
7148 if(op == "AND_AGGR"){
7150 retval.append(" &= ");
7151 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7152 retval.append(";\n");
7155 if(op == "OR_AGGR"){
7157 retval.append(" |= ");
7158 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7159 retval.append(";\n");
7162 if(op == "XOR_AGGR"){
7164 retval.append(" ^= ");
7165 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema) );
7166 retval.append(";\n");
7170 retval += var+"_sum += "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7171 retval += "\t\t"+var+"_cnt += 1;\n";
7172 retval += "\t\t"+var+" = "+var+"_sum / "+var+"_cnt;\n";
7176 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_update.\n",op.c_str());
7185 static string generate_superaggr_minus(string var, string supervar, aggregate_table *atbl,int aidx, table_list *schema){
7186 string retval = "\t\t";
7187 string op = atbl->get_op(aidx);
7190 if(! atbl->is_builtin(aidx)) {
7192 retval += op+"_HFTA_AGGR_MINUS_(";
7193 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7194 retval+="("+supervar+"),";
7195 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7196 retval+="("+var+");\n";
7202 if(op == "COUNT" || op == "SUM"){
7203 retval += supervar + "-=" +var + ";\n";
7207 if(op == "XOR_AGGR"){
7208 retval += supervar + "^=" +var + ";\n";
7212 if(op=="MIN" || op == "MAX")
7215 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_superaggr_minus.\n",op.c_str());
7224 static string generate_aggr_init(string var, aggregate_table *atbl,int aidx, table_list *schema){
7226 string op = atbl->get_op(aidx);
7229 if(! atbl->is_builtin(aidx)){
7231 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_INIT_(";
7232 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7233 retval+="("+var+"));\n";
7235 retval += "\t"+atbl->get_op(aidx)+"_HFTA_AGGR_UPDATE_(";
7236 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7237 retval+="("+var+")";
7238 vector<scalarexp_t *> opl = atbl->get_operand_list(aidx);
7240 for(o=0;o<opl.size();++o){
7242 if(opl[o]->get_data_type()->is_buffer_type() && (! (opl[o]->is_handle_ref()) ) )
7244 retval += generate_se_code(opl[o],schema);
7250 // builtin aggregate processing
7251 data_type *dt = atbl->get_data_type(aidx);
7255 retval.append(" = 1;\n");
7259 if(op == "SUM" || op == "MIN" || op == "MAX" || op == "AND_AGGR" ||
7260 op=="AVG" || op == "OR_AGGR" || op == "XOR_AGGR"){
7261 if(dt->is_buffer_type()){
7262 sprintf(tmpstr,"\t\taggr_tmp_%d = %s;\n",aidx,generate_se_code(atbl->get_aggr_se(aidx), schema ).c_str() );
7263 retval.append(tmpstr);
7264 sprintf(tmpstr,"\t\t%s(&(%s),&aggr_tmp_%d);\n",dt->get_hfta_buffer_assign_copy().c_str(),var.c_str(),aidx);
7265 retval.append(tmpstr);
7268 retval += var+"_sum = "+generate_se_code(atbl->get_aggr_se(aidx), schema)+";\n";
7269 retval += "\t"+var+"_cnt = 1;\n";
7270 retval += "\t"+var+" = "+var+"_sum;\n";
7274 retval.append(generate_se_code(atbl->get_aggr_se(aidx), schema));
7275 retval.append(";\n");
7281 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in query_plan::generate_aggr_init.\n",op.c_str());
7289 static string generate_aggr_reinitialize(string var, aggregate_table *atbl,int aidx, table_list *schema){
7291 string op = atbl->get_op(aidx);
7294 if(! atbl->is_builtin(aidx)){
7296 retval += "\t"+atbl->get_op(aidx);
7297 if(atbl->is_running_aggr(aidx)){
7298 retval += "_HFTA_AGGR_REINIT_(";
7300 retval += "_HFTA_AGGR_INIT_(";
7302 if(atbl->get_storage_type(aidx)->get_type() != fstring_t) retval+="&";
7303 retval+="("+var+"));\n";
7307 // builtin aggregate processing
7308 data_type *dt = atbl->get_data_type(aidx);
7312 retval.append(" = 0;\n");
7316 if(op == "SUM" || op == "AND_AGGR" ||
7317 op == "OR_AGGR" || op == "XOR_AGGR"){
7318 if(dt->is_buffer_type()){
7319 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7323 literal_t l(dt->type_indicator());
7324 retval.append(l.to_string());
7325 retval.append(";\n");
7331 if(dt->is_buffer_type()){
7332 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7336 retval.append(dt->get_max_literal());
7337 retval.append(";\n");
7343 if(dt->is_buffer_type()){
7344 return("ERROR, cannot yet handle reinitialization of builtin aggregates over strings.");
7348 retval.append(dt->get_min_literal());
7349 retval.append(";\n");
7354 fprintf(stderr,"INTERNAL ERROR : aggregate %s not recognized in generate_aggr_reinitialize.\n",op.c_str());
7361 // Generate parameter holding vars from a param table.
7362 static string generate_param_vars(param_table *param_tbl){
7365 vector<string> param_vec = param_tbl->get_param_names();
7366 for(p=0;p<param_vec.size();p++){
7367 data_type *dt = param_tbl->get_data_type(param_vec[p]);
7368 sprintf(tmpstr,"param_%s;\n", param_vec[p].c_str());
7369 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7370 if(param_tbl->handle_access(param_vec[p])){
7371 ret += "\tstruct search_handle *param_handle_"+param_vec[p]+";\n";
7377 // Parameter manipulation routines
7378 static string generate_load_param_block(string functor_name,
7379 param_table *param_tbl,
7380 vector<handle_param_tbl_entry *> param_handle_table
7383 vector<string> param_names = param_tbl->get_param_names();
7385 string ret = "int load_params_"+functor_name+"(gs_int32_t sz, void *value){\n";
7386 ret.append("\tint pos=0;\n");
7387 ret.append("\tint data_pos;\n");
7389 for(p=0;p<param_names.size();p++){
7390 data_type *dt = param_tbl->get_data_type(param_names[p]);
7391 if(dt->is_buffer_type()){
7392 sprintf(tmpstr,"tmp_var_%s;\n", param_names[p].c_str());
7393 ret += "\t"+dt->make_host_cvar(tmpstr)+";\n";
7398 // Verify that the block is of minimum size
7399 if(param_names.size() > 0){
7400 ret += "//\tVerify that the value block is large enough */\n";
7401 ret.append("\n\tdata_pos = ");
7402 for(p=0;p<param_names.size();p++){
7403 if(p>0) ret.append(" + ");
7404 data_type *dt = param_tbl->get_data_type(param_names[p]);
7405 ret.append("sizeof( ");
7406 ret.append( dt->get_host_cvar_type() );
7410 ret.append("\tif(data_pos > sz) return 1;\n\n");
7413 ///////////////////////
7414 /// Verify that all strings can be unpacked.
7416 ret += "//\tVerify that the strings can be unpacked */\n";
7417 for(p=0;p<param_names.size();p++){
7418 data_type *dt = param_tbl->get_data_type(param_names[p]);
7419 if(dt->is_buffer_type()){
7420 sprintf(tmpstr,"\ttmp_var_%s = *( (%s *)((gs_sp_t )value+pos) );\n",param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7422 switch( dt->get_type() ){
7424 // ret += "\ttmp_var_"+param_names[p]+".offset = ntohl( tmp_var_"+param_names[p]+".offset );\n"; // ntoh conversion
7425 // ret += "\ttmp_var_"+param_names[p]+".length = ntohl( tmp_var_"+param_names[p]+".length );\n"; // ntoh conversion
7426 sprintf(tmpstr,"\tif( (int)(tmp_var_%s.offset) + tmp_var_%s.length > sz) return 1;\n",param_names[p].c_str(), param_names[p].c_str() );
7428 sprintf(tmpstr,"\ttmp_var_%s.offset = (gs_p_t)( (gs_sp_t )value + (gs_p_t)(tmp_var_%s.offset) );\n",param_names[p].c_str(), param_names[p].c_str() );
7432 fprintf(stderr,"ERROR: parameter %s is of type %s, a buffered type, but I don't know how to unpack it as a parameter.\n",param_names[p].c_str(), dt->to_string().c_str() );
7437 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7441 /////////////////////////
7443 ret += "/*\tThe block is OK, do the unpacking. */\n";
7444 ret += "\tpos = 0;\n";
7446 for(p=0;p<param_names.size();p++){
7447 data_type *dt = param_tbl->get_data_type(param_names[p]);
7448 if(dt->is_buffer_type()){
7449 sprintf(tmpstr,"\t%s(¶m_%s, &tmp_var_%s);\n", dt->get_hfta_buffer_assign_copy().c_str(),param_names[p].c_str(),param_names[p].c_str() );
7452 // if(dt->needs_hn_translation()){
7453 // sprintf(tmpstr,"\tparam_%s = %s( *( (%s *)( (gs_sp_t )value+pos) ) );\n",
7454 // param_names[p].c_str(), dt->ntoh_translation().c_str(), dt->get_host_cvar_type().c_str() );
7456 sprintf(tmpstr,"\tparam_%s = *( (%s *)( (gs_sp_t )value+pos) );\n",
7457 param_names[p].c_str(), dt->get_host_cvar_type().c_str() );
7461 ret += "\tpos += sizeof( "+dt->get_host_cvar_type()+" );\n";
7464 // TODO: I think this method of handle registration is obsolete
7465 // and should be deleted.
7466 // some examination reveals that handle_access is always false.
7467 for(p=0;p<param_names.size();p++){
7468 if(param_tbl->handle_access(param_names[p]) ){
7469 data_type *pdt = param_tbl->get_data_type(param_names[p]);
7471 ret += "\tt->param_handle_"+param_names[p]+" = " +
7472 pdt->handle_registration_name() +
7473 "((struct FTA *)t, &(t->param_"+param_names[p]+"));\n";
7476 // Register the pass-by-handle parameters
7478 ret += "/* register the pass-by-handle parameters */\n";
7481 for(ph=0;ph<param_handle_table.size();++ph){
7482 data_type pdt(param_handle_table[ph]->type_name);
7483 switch(param_handle_table[ph]->val_type){
7489 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7491 if(pdt.is_buffer_type()) ret += "&(";
7492 ret += "param_"+param_handle_table[ph]->param_name;
7493 if(pdt.is_buffer_type()) ret += ")";
7497 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7503 ret += "\treturn(0);\n";
7504 ret.append("}\n\n");
7510 static string generate_delete_param_block(string functor_name,
7511 param_table *param_tbl,
7512 vector<handle_param_tbl_entry *> param_handle_table
7516 vector<string> param_names = param_tbl->get_param_names();
7518 string ret = "void destroy_params_"+functor_name+"(){\n";
7520 for(p=0;p<param_names.size();p++){
7521 data_type *dt = param_tbl->get_data_type(param_names[p]);
7522 if(dt->is_buffer_type()){
7523 sprintf(tmpstr,"\t\t%s(¶m_%s);\n",dt->get_hfta_buffer_destroy().c_str(),param_names[p].c_str());
7526 if(param_tbl->handle_access(param_names[p]) ){
7527 ret += "\t\t" + dt->get_handle_destructor() +
7528 "(t->param_handle_" + param_names[p] + ");\n";
7532 ret += "//\t\tDeregister handles.\n";
7534 for(ph=0;ph<param_handle_table.size();++ph){
7535 if(param_handle_table[ph]->val_type == param_e){
7536 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7537 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7546 // ---------------------------------------------------------------------
7547 // functions for creating functor variables.
7549 static string generate_access_vars(col_id_set &cid_set, table_list *schema){
7551 col_id_set::iterator csi;
7553 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7554 int schref = (*csi).schema_ref;
7555 int tblref = (*csi).tblvar_ref;
7556 string field = (*csi).field;
7557 data_type dt(schema->get_type_name(schref,field));
7558 sprintf(tmpstr,"unpack_var_%s_%d", field.c_str(), tblref);
7559 ret+="\t"+dt.make_host_cvar(tmpstr)+";\n";
7560 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", field.c_str(), tblref);
7566 static string generate_partial_fcn_vars(vector<scalarexp_t *> &partial_fcns,
7567 vector<int> &ref_cnt, vector<bool> &is_partial, bool gen_fcn_cache){
7572 for(p=0;p<partial_fcns.size();++p){
7573 if(!gen_fcn_cache || is_partial[p] || ref_cnt[p]>1){
7574 sprintf(tmpstr,"partial_fcn_result_%d", p);
7575 ret+="\t"+partial_fcns[p]->get_data_type()->make_host_cvar(tmpstr)+";\n";
7576 if(gen_fcn_cache && ref_cnt[p]>1){
7577 ret+="\tint fcn_ref_cnt_"+int_to_string(p)+";\n";
7585 static string generate_complex_lit_vars(cplx_lit_table *complex_literals){
7588 for(cl=0;cl<complex_literals->size();cl++){
7589 literal_t *l = complex_literals->get_literal(cl);
7590 data_type *dtl = new data_type( l->get_type() );
7591 sprintf(tmpstr,"complex_literal_%d",cl);
7592 ret += "\t"+dtl->make_host_cvar(tmpstr)+";\n";
7593 if(complex_literals->is_handle_ref(cl)){
7594 sprintf(tmpstr,"\tstruct search_handle *lit_handle_%d;\n",cl);
7602 static string generate_pass_by_handle_vars(
7603 vector<handle_param_tbl_entry *> ¶m_handle_table){
7607 for(p=0;p<param_handle_table.size();++p){
7608 sprintf(tmpstr,"\tgs_param_handle_t handle_param_%d;\n",p);
7616 // ------------------------------------------------------------
7617 // functions for generating initialization code.
7619 static string gen_access_var_init(col_id_set &cid_set){
7621 col_id_set::iterator csi;
7623 for(csi=cid_set.begin(); csi!=cid_set.end();++csi){
7624 int tblref = (*csi).tblvar_ref;
7625 string field = (*csi).field;
7626 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle%d, \"%s\");\n", field.c_str(),tblref,tblref,field.c_str());
7633 static string gen_complex_lit_init(cplx_lit_table *complex_literals){
7637 for(cl=0;cl<complex_literals->size();cl++){
7638 literal_t *l = complex_literals->get_literal(cl);
7639 // sprintf(tmpstr,"\tcomplex_literal_%d = ",cl);
7640 // ret += tmpstr + l->to_hfta_C_code() + ";\n";
7641 sprintf(tmpstr,"&(complex_literal_%d)",cl);
7642 ret += "\t" + l->to_hfta_C_code(tmpstr) + ";\n";
7643 // I think that the code below is obsolete
7644 // TODO: it is obsolete. add_cpx_lit is always
7645 // called with the handle indicator being false.
7646 // This entire structure should be cleansed.
7647 if(complex_literals->is_handle_ref(cl)){
7648 data_type *dt = new data_type( l->get_type() );
7649 sprintf(tmpstr,"\tlit_handle_%d = %s(&(f->complex_literal_%d));\n",
7650 cl, dt->hfta_handle_registration_name().c_str(), cl);
7659 static string gen_partial_fcn_init(vector<scalarexp_t *> &partial_fcns){
7663 for(p=0;p<partial_fcns.size();++p){
7664 data_type *pdt =partial_fcns[p]->get_data_type();
7665 literal_t empty_lit(pdt->type_indicator());
7666 if(pdt->is_buffer_type()){
7667 // sprintf(tmpstr,"\tpartial_fcn_result_%d = %s;\n",
7668 // p, empty_lit.to_hfta_C_code().c_str());
7669 sprintf(tmpstr,"&(partial_fcn_result_%d)",p);
7670 ret += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
7676 static string gen_pass_by_handle_init(
7677 vector<handle_param_tbl_entry *> ¶m_handle_table){
7681 for(ph=0;ph<param_handle_table.size();++ph){
7682 data_type pdt(param_handle_table[ph]->type_name);
7683 sprintf(tmpstr,"\thandle_param_%d = %s(",ph,param_handle_table[ph]->lfta_registration_fcn().c_str());
7684 switch(param_handle_table[ph]->val_type){
7687 if(pdt.is_buffer_type()) ret += "&(";
7688 sprintf(tmpstr,"complex_literal_%d",param_handle_table[ph]->complex_literal_idx);
7690 if(pdt.is_buffer_type()) ret += ")";
7695 ret += param_handle_table[ph]->litval->to_hfta_C_code("") + ");\n";
7699 // query parameter handles are regstered/deregistered in the
7700 // load_params function.
7701 // ret += "t->param_"+param_handle_table[ph]->param_name;
7704 fprintf(stderr, "INTERNAL ERROR unknown case found when processing pass-by-handle parameter table.\n");
7711 //------------------------------------------------------------
7712 // functions for destructor and deregistration code
7714 static string gen_complex_lit_dtr(cplx_lit_table *complex_literals){
7718 for(cl=0;cl<complex_literals->size();cl++){
7719 literal_t *l = complex_literals->get_literal(cl);
7720 data_type ldt( l->get_type() );
7721 if(ldt.is_buffer_type()){
7722 sprintf(tmpstr,"\t\t%s(&complex_literal_%d);\n",
7723 ldt.get_hfta_buffer_destroy().c_str(), cl );
7731 static string gen_pass_by_handle_dtr(
7732 vector<handle_param_tbl_entry *> ¶m_handle_table){
7736 for(ph=0;ph<param_handle_table.size();++ph){
7737 sprintf(tmpstr, "\t\t%s(handle_param_%d);\n",
7738 param_handle_table[ph]->lfta_deregistration_fcn().c_str(),ph);
7744 // Destroy all previous results
7745 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns){
7749 for(p=0;p<partial_fcns.size();++p){
7750 data_type *pdt =partial_fcns[p]->get_data_type();
7751 if(pdt->is_buffer_type()){
7752 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7753 pdt->get_hfta_buffer_destroy().c_str(), p );
7760 // Destroy previsou results of fcns in pfcn_set
7761 static string gen_partial_fcn_dtr(vector<scalarexp_t *> &partial_fcns, set<int> &pfcn_set){
7763 set<int>::iterator si;
7765 for(si=pfcn_set.begin(); si!=pfcn_set.end(); ++si){
7766 data_type *pdt =partial_fcns[(*si)]->get_data_type();
7767 if(pdt->is_buffer_type()){
7768 sprintf(tmpstr,"\t\t%s(&partial_fcn_result_%d);\n",
7769 pdt->get_hfta_buffer_destroy().c_str(), (*si) );
7777 //-------------------------------------------------------------------------
7778 // Functions related to se generation bookkeeping.
7780 static void get_new_pred_cids(predicate_t *pr, col_id_set &found_cids,
7781 col_id_set &new_cids, gb_table *gtbl){
7782 col_id_set this_pred_cids;
7783 col_id_set::iterator csi;
7785 // get colrefs in predicate not already found.
7786 gather_pr_col_ids(pr,this_pred_cids,gtbl);
7787 set_difference(this_pred_cids.begin(), this_pred_cids.end(),
7788 found_cids.begin(), found_cids.end(),
7789 inserter(new_cids,new_cids.begin()) );
7791 // We've found these cids, so update found_cids
7792 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7793 found_cids.insert((*csi));
7797 // after the call, new_cids will have the colrefs in se but not found_cids.
7798 // update found_cids with the new cids.
7799 static void get_new_se_cids(scalarexp_t *se, col_id_set &found_cids,
7800 col_id_set &new_cids, gb_table *gtbl){
7801 col_id_set this_se_cids;
7802 col_id_set::iterator csi;
7804 // get colrefs in se not already found.
7805 gather_se_col_ids(se,this_se_cids,gtbl);
7806 set_difference(this_se_cids.begin(), this_se_cids.end(),
7807 found_cids.begin(), found_cids.end(),
7808 inserter(new_cids,new_cids.begin()) );
7810 // We've found these cids, so update found_cids
7811 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi)
7812 found_cids.insert((*csi));
7816 static string gen_unpack_cids(table_list *schema, col_id_set &new_cids, string on_problem, vector<bool> &needs_xform){
7818 col_id_set::iterator csi;
7820 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7821 int schref = (*csi).schema_ref;
7822 int tblref = (*csi).tblvar_ref;
7823 string field = (*csi).field;
7824 data_type dt(schema->get_type_name(schref,field));
7826 if(needs_xform[tblref]){
7827 unpack_fcn = dt.get_hfta_unpack_fcn();
7829 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
7831 if(dt.is_buffer_type()){
7832 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
7834 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
7837 if(dt.is_buffer_type()){
7838 ret += "\tif(problem) return "+on_problem+" ;\n";
7844 // generates the declaration of all the variables related to
7845 // temp tuples generation
7846 static string gen_decl_temp_vars(){
7849 ret += "\t// variables related to temp tuple generation\n";
7850 ret += "\tbool temp_tuple_received;\n";
7855 // generates initialization code for variables related to temp tuple processing
7856 static string gen_init_temp_vars(table_list *schema, vector<select_element *>& select_list, gb_table *gtbl){
7858 col_id_set::iterator csi;
7861 // Initialize internal state
7862 ret += "\ttemp_tuple_received = false;\n";
7864 col_id_set temp_cids; // colrefs unpacked thus far.
7866 for(s=0;s<select_list.size();s++){
7867 if (select_list[s]->se->get_data_type()->is_temporal()) {
7868 // Find the set of attributes accessed in this SE
7869 col_id_set new_cids;
7870 get_new_se_cids(select_list[s]->se,temp_cids, new_cids, gtbl);
7873 for(csi=new_cids.begin(); csi!=new_cids.end(); ++csi){
7874 int schref = (*csi).schema_ref;
7875 int tblref = (*csi).tblvar_ref;
7876 string field = (*csi).field;
7877 data_type dt(schema->get_type_name(schref,field), schema->get_modifier_list(schref,field));
7879 sprintf(tmpstr,"\t unpack_var_%s_%d = %s;\n", field.c_str(), tblref,
7880 dt.is_increasing() ? dt.get_min_literal().c_str() : dt.get_max_literal().c_str());
7890 // generates a check if tuple is temporal
7891 static string gen_temp_tuple_check(string node_name, int channel) {
7895 sprintf(tmpstr, "tup%d", channel);
7896 string tup_name = tmpstr;
7897 sprintf(tmpstr, "schema_handle%d", channel);
7898 string schema_handle_name = tmpstr;
7899 string tuple_offset_name = "tuple_metadata_offset"+int_to_string(channel);
7901 // check if it is a temporary status tuple
7902 ret += "\t// check if tuple is temp status tuple\n";
7903 // ret += "\tif (ftaschema_is_temporal_tuple(" + schema_handle_name + ", " + tup_name + ".data)) {\n";
7904 ret += "\tif (ftaschema_is_temporal_tuple_offset(" + tuple_offset_name + ", " + tup_name + ".data)) {\n";
7905 ret += "\t\ttemp_tuple_received = true;\n";
7907 ret += "\telse\n\t\ttemp_tuple_received = false;\n\n";
7912 // generates unpacking code for all temporal attributes referenced in select
7913 static string gen_unpack_temp_vars(table_list *schema, col_id_set& found_cids, vector<select_element *>& select_list, gb_table *gtbl, vector<bool> &needs_xform) {
7917 // Unpack all the temporal attributes references in select list
7918 // we need it to be able to generate temp status tuples
7919 for(s=0;s<select_list.size();s++){
7920 if (select_list[s]->se->get_data_type()->is_temporal()) {
7921 // Find the set of attributes accessed in this SE
7922 col_id_set new_cids;
7923 get_new_se_cids(select_list[s]->se,found_cids, new_cids, gtbl);
7924 // Unpack these values.
7925 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
7933 // Generates temporal tuple generation code (except attribute packing)
7934 static string gen_init_temp_status_tuple(string node_name) {
7937 ret += "\t// create temp status tuple\n";
7938 ret += "\tresult.tuple_size = sizeof("+generate_tuple_name( node_name)+") + sizeof(gs_uint8_t);\n";
7939 ret += "\tresult.data = (gs_sp_t )malloc(result.tuple_size);\n";
7940 ret += "\tresult.heap_resident = true;\n";
7941 ret += "\t// Mark tuple as temporal\n";
7942 ret += "\t*((gs_sp_t )result.data + sizeof("+generate_tuple_name( node_name)+")) = TEMPORAL_TUPLE;\n";
7944 ret += "\t"+generate_tuple_name( node_name)+" *tuple = ("+
7945 generate_tuple_name( node_name) +" *)(result.data);\n";
7951 // Assume that all colrefs unpacked already ...
7952 static string gen_unpack_partial_fcn(table_list *schema,
7953 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7956 set<int>::iterator si;
7958 // Since set<..> is a "Sorted Associative Container",
7959 // we can walk through it in sorted order by walking from
7960 // begin() to end(). (and the partial fcns must be
7961 // evaluated in this order).
7962 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7963 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7964 ret += "\tif(retval) return "+on_problem+" ;\n";
7969 // Assume that all colrefs unpacked already ...
7970 // this time with cached functions.
7971 static string gen_unpack_partial_fcn(table_list *schema,
7972 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
7973 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
7976 set<int>::iterator si;
7978 // Since set<..> is a "Sorted Associative Container",
7979 // we can walk through it in sorted order by walking from
7980 // begin() to end(). (and the partial fcns must be
7981 // evaluated in this order).
7982 for(si=pfcn_refs.begin();si!=pfcn_refs.end();++si){
7983 if(fcn_ref_cnt[(*si)] > 1){
7984 ret += "\tif(fcn_ref_cnt_"+int_to_string((*si))+"==0){\n";
7986 if(is_partial_fcn[(*si)]){
7987 ret += unpack_partial_fcn(partial_fcns[(*si)], (*si), schema);
7988 ret += "\tif(retval) return "+on_problem+" ;\n";
7990 if(fcn_ref_cnt[(*si)] > 1){
7991 if(!is_partial_fcn[(*si)]){
7992 ret += "\t\tpartial_fcn_result_"+int_to_string((*si))+"="+generate_cached_fcn(partial_fcns[(*si)],(*si),schema)+";\n";
7994 ret += "\t\tfcn_ref_cnt_"+int_to_string((*si))+"=1;\n";
8003 // This version finds and unpacks new colrefs.
8004 // found_cids gets updated with the newly unpacked cids.
8005 static string gen_full_unpack_partial_fcn(table_list *schema,
8006 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8007 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8008 vector<bool> &needs_xform){
8010 set<int>::iterator slsi;
8012 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8013 // find all new fields ref'd by this partial fcn.
8014 col_id_set new_cids;
8015 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8016 // Unpack these values.
8017 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8019 // Now evaluate the partial fcn.
8020 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8021 ret += "\tif(retval) return "+on_problem+" ;\n";
8026 // This version finds and unpacks new colrefs.
8027 // found_cids gets updated with the newly unpacked cids.
8028 // BUT : only for the partial functions.
8029 static string gen_full_unpack_partial_fcn(table_list *schema,
8030 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8031 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn,
8032 col_id_set &found_cids, gb_table *gtbl, string on_problem,
8033 vector<bool> &needs_xform){
8035 set<int>::iterator slsi;
8037 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8038 if(is_partial_fcn[(*slsi)]){
8039 // find all new fields ref'd by this partial fcn.
8040 col_id_set new_cids;
8041 get_new_se_cids(partial_fcns[(*slsi)], found_cids, new_cids, gtbl);
8042 // Unpack these values.
8043 ret += gen_unpack_cids(schema, new_cids, on_problem, needs_xform);
8045 // Now evaluate the partial fcn.
8046 if(fcn_ref_cnt[(*slsi)] > 1){
8047 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8049 if(is_partial_fcn[(*slsi)]){
8050 ret += unpack_partial_fcn(partial_fcns[(*slsi)], (*slsi), schema);
8051 ret += "\tif(retval) return "+on_problem+" ;\n";
8053 if(fcn_ref_cnt[(*slsi)] > 1){
8054 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8063 static string gen_remaining_cached_fcns(table_list *schema,
8064 vector<scalarexp_t *> &partial_fcns,set<int> &pfcn_refs,
8065 vector<int> &fcn_ref_cnt, vector<bool> &is_partial_fcn){
8067 set<int>::iterator slsi;
8069 for(slsi=pfcn_refs.begin(); slsi!=pfcn_refs.end(); ++slsi){
8070 if(!is_partial_fcn[(*slsi)] && fcn_ref_cnt[(*slsi)] > 1){
8072 if(fcn_ref_cnt[(*slsi)] > 1){
8073 ret += "\tif(fcn_ref_cnt_"+int_to_string((*slsi))+"==0){\n";
8074 ret += "\t\tpartial_fcn_result_"+int_to_string((*slsi))+"="+generate_cached_fcn(partial_fcns[(*slsi)],(*slsi),schema)+";\n";
8075 ret += "\t\tfcn_ref_cnt_"+int_to_string((*slsi))+"=1;\n";
8084 // unpack the colrefs in cid_set not in found_cids
8085 static string gen_remaining_colrefs(table_list *schema,
8086 col_id_set &cid_set, col_id_set &found_cids, string on_problem,
8087 vector<bool> &needs_xform){
8089 col_id_set::iterator csi;
8091 for(csi=cid_set.begin(); csi!=cid_set.end();csi++){
8092 if(found_cids.count( (*csi) ) == 0){
8093 int schref = (*csi).schema_ref;
8094 int tblref = (*csi).tblvar_ref;
8095 string field = (*csi).field;
8096 data_type dt(schema->get_type_name(schref,field));
8098 if(needs_xform[tblref]){
8099 unpack_fcn = dt.get_hfta_unpack_fcn();
8101 unpack_fcn = dt.get_hfta_unpack_fcn_noxf();
8103 if(dt.is_buffer_type()){
8104 sprintf(tmpstr,"\t unpack_var_%s_%d = %s(tup%d.data, tup%d.tuple_size, unpack_offset_%s_%d, &problem);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, tblref, field.c_str(), tblref);
8106 sprintf(tmpstr,"\t unpack_var_%s_%d = %s_nocheck(tup%d.data, unpack_offset_%s_%d);\n",field.c_str(), tblref, unpack_fcn.c_str(), tblref, field.c_str(), tblref);
8109 if(dt.is_buffer_type()){
8110 ret.append("\tif(problem) return "+on_problem+" ;\n");
8117 static string gen_buffer_selvars(table_list *schema,
8118 vector<select_element *> &select_list){
8122 for(s=0;s<select_list.size();s++){
8123 scalarexp_t *se = select_list[s]->se;
8124 data_type *sdt = se->get_data_type();
8125 if(sdt->is_buffer_type() &&
8126 !( (se->get_operator_type() == SE_COLREF) ||
8127 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8128 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8130 sprintf(tmpstr,"selvar_%d",s);
8131 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
8132 ret += generate_se_code(se,schema) +";\n";
8138 static string gen_buffer_selvars_size(vector<select_element *> &select_list,table_list *schema){
8142 for(s=0;s<select_list.size();s++){
8143 scalarexp_t *se = select_list[s]->se;
8144 data_type *sdt = se->get_data_type();
8145 if(sdt->is_buffer_type()){
8146 if( !( (se->get_operator_type() == SE_COLREF) ||
8147 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8148 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8150 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
8153 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),
8154 generate_se_code(se,schema).c_str());
8162 static string gen_buffer_selvars_dtr(vector<select_element *> &select_list){
8166 for(s=0;s<select_list.size();s++){
8167 scalarexp_t *se = select_list[s]->se;
8168 data_type *sdt = se->get_data_type();
8169 if(sdt->is_buffer_type() &&
8170 !( (se->get_operator_type() == SE_COLREF) ||
8171 (se->get_operator_type() == SE_AGGR_STAR) ||
8172 (se->get_operator_type() == SE_AGGR_SE) ||
8173 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
8174 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
8176 sprintf(tmpstr,"\t\t%s(&selvar_%d);\n",
8177 sdt->get_hfta_buffer_destroy().c_str(), s );
8185 static string gen_pack_tuple(table_list *schema, vector<select_element *> &select_list, string node_name, bool temporal_only){
8189 ret += "\tint tuple_pos = sizeof("+generate_tuple_name(node_name)+") + sizeof(gs_uint8_t);\n";
8190 for(s=0;s<select_list.size();s++){
8191 scalarexp_t *se = select_list[s]->se;
8192 data_type *sdt = se->get_data_type();
8194 if(!temporal_only && sdt->is_buffer_type()){
8195 if( !( (se->get_operator_type() == SE_COLREF) ||
8196 (se->get_operator_type() == SE_FUNC && se->is_partial()))
8198 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
8200 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
8203 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code(se,schema).c_str());
8205 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code(se,schema).c_str());
8208 }else if (!temporal_only || sdt->is_temporal()) {
8209 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
8211 ret.append(generate_se_code(se,schema) );
8219 //-------------------------------------------------------------------------
8220 // functor generation methods
8221 //-------------------------------------------------------------------------
8223 /////////////////////////////////////////////////////////
8224 //// File Output Operator
8225 string output_file_qpn::generate_functor_name(){
8226 return("output_file_functor_" + normalize_name(get_node_name()));
8230 string output_file_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8231 string ret = "class " + this->generate_functor_name() + "{\n";
8233 // Find the temporal field
8234 int temporal_field_idx;
8235 data_type *tdt = NULL;
8236 for(temporal_field_idx=0;temporal_field_idx<fields.size();temporal_field_idx++){
8237 tdt = new data_type(fields[temporal_field_idx]->get_type(), fields[temporal_field_idx]->get_modifier_list());
8238 if(tdt->is_temporal()){
8245 if(temporal_field_idx == fields.size()){
8246 fprintf(stderr,"ERROR, no temporal field for file output operator %s\n",node_name.c_str());
8250 ret += "private:\n";
8252 // var to save the schema handle
8253 ret += "\tint schema_handle0;\n";
8254 // tuple metadata offset
8255 ret += "\tint tuple_metadata_offset0;\n";
8256 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[temporal_field_idx]->get_name().c_str());
8259 // For unpacking the hashing fields, if any
8261 for(h=0;h<hash_flds.size();++h){
8262 sprintf(tmpstr,"unpack_var_%s", fields[hash_flds[h]]->get_name().c_str());
8263 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8264 ret+="\t"+hdt->make_host_cvar(tmpstr)+";\n";
8265 if(hash_flds[h]!=temporal_field_idx){
8266 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_0;\n", fields[hash_flds[h]]->get_name().c_str());
8270 // Specail case for output file hashing
8271 if(n_streams>1 && hash_flds.size()==0){
8272 ret+="\tgs_uint32_t outfl_cnt;\n";
8275 ret += "//\t\tRemember the last posted timestamp.\n";
8276 ret+="\t"+tdt->make_host_cvar("timestamp")+";\n";
8277 ret+="\t"+tdt->make_host_cvar("last_bucket")+";\n";
8278 ret+="\t"+tdt->make_host_cvar("slack")+";\n";
8279 ret += "\tbool first_execution;\n";
8280 ret += "\tbool temp_tuple_received;\n";
8281 ret += "\tbool is_eof;\n";
8283 ret += "\tgs_int32_t bucketwidth;\n";
8286 //-------------------
8287 // The functor constructor
8288 // pass in a schema handle (e.g. for the 1st input stream),
8289 // use it to determine how to unpack the merge variable.
8290 // ASSUME that both streams have the same layout,
8291 // just duplicate it.
8294 ret += "//\t\tFunctor constructor.\n";
8295 ret += this->generate_functor_name()+"(int schema_hndl){\n";
8297 ret += "\tschema_handle0 = schema_hndl;\n";
8298 // tuple metadata offset
8299 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8301 if(output_spec->bucketwidth == 0)
8302 ret += "\tbucketwidth = 60;\n";
8304 ret += "\tbucketwidth = "+int_to_string(output_spec->bucketwidth)+";\n";
8305 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8307 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[temporal_field_idx]->get_name().c_str(), fields[temporal_field_idx]->get_name().c_str());
8309 // Hashing field unpacking, if any
8310 for(h=0;h<hash_flds.size();++h){
8311 if(hash_flds[h]!=temporal_field_idx){
8312 sprintf(tmpstr,"\tunpack_offset_%s_0 = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", fields[hash_flds[h]]->get_name().c_str(),fields[hash_flds[h]]->get_name().c_str());
8317 ret+="\tfirst_execution = true;\n";
8319 // Initialize internal state
8320 ret += "\ttemp_tuple_received = false;\n";
8322 // Init last timestamp values to minimum value for their type
8323 if (tdt->is_increasing()){
8324 ret+="\ttimestamp = " + tdt->get_min_literal() + ";\n";
8325 ret+="\tlast_bucket = " + tdt->get_min_literal() + ";\n";
8327 ret+="\ttimestamp = " + tdt->get_max_literal() + ";\n";
8328 ret+="\tlast_bucket = " + tdt->get_max_literal() + ";\n";
8334 ret += "//\t\tFunctor destructor.\n";
8335 ret += "~"+this->generate_functor_name()+"(){\n";
8339 ret += "int load_params_"+this->generate_functor_name()+"(gs_int32_t sz, void *value){return 0;}\n";
8340 ret += "void destroy_params_"+this->generate_functor_name()+"(){}\n";
8342 // Register new parameter block
8343 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8344 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8345 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8349 ret+="\nbool temp_status_received(const host_tuple& tup0)/* const*/ {\n";
8350 ret+="\tgs_int32_t problem;\n";
8352 ret += "\tvoid *tup_ptr = (void *)(&tup0);\n";
8353 ret += "\tis_eof = ftaschema_is_eof_tuple(schema_handle0,tup_ptr);\n";
8355 ret += gen_temp_tuple_check(this->node_name, 0);
8357 sprintf(tmpstr,"\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", tdt->get_hfta_unpack_fcn_noxf().c_str(), fields[temporal_field_idx]->get_name().c_str(), 0);
8360 for(h=0;h<hash_flds.size();++h){
8361 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8362 sprintf(tmpstr,"\tunpack_var_%s = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", fields[hash_flds[h]]->get_name().c_str(), hdt->get_hfta_unpack_fcn_noxf().c_str(), fields[hash_flds[h]]->get_name().c_str(), 0);
8366 " return temp_tuple_received;\n"
8372 "bool new_epoch(){\n"
8373 " if(first_execution || (last_bucket + 1) * bucketwidth <= timestamp){\n"
8374 " last_bucket = timestamp / bucketwidth;\n"
8375 " first_execution = false;\n"
8385 "inline gs_uint32_t output_hash(){return 0;}\n\n";
8387 if(hash_flds.size()==0){
8389 "gs_uint32_t output_hash(){\n"
8391 " if(outfl_cnt >= "+int_to_string(n_streams)+")\n"
8393 " return outfl_cnt;\n"
8399 "gs_uint32_t output_hash(){\n"
8400 " gs_uint32_t ret = "
8402 for(h=0;h<hash_flds.size();++h){
8404 data_type *hdt = new data_type(fields[hash_flds[h]]->get_type(), fields[hash_flds[h]]->get_modifier_list());
8405 if(hdt->use_hashfunc()){
8406 sprintf(tmpstr,"%s(&(unpack_var_%s))",hdt->get_hfta_hashfunc().c_str(),fields[hash_flds[h]]->get_name().c_str());
8408 sprintf(tmpstr,"unpack_var_%s",fields[hash_flds[h]]->get_name().c_str());
8414 " return ret % "+int_to_string(hash_flds.size())+";\n"
8421 "gs_uint32_t num_file_streams(){\n"
8422 " return("+int_to_string(n_streams)+");\n"
8427 "string get_filename_base(){\n"
8428 " char tmp_fname[500];\n";
8430 string output_filename_base = hfta_query_name+filestream_id;
8432 if(n_hfta_clones > 1){
8433 output_filename_base += "_"+int_to_string(parallel_idx);
8439 if(output_spec->output_directory == "")
8441 " sprintf(tmp_fname,\""+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8443 " sprintf(tmp_fname,\""+output_spec->output_directory+"/"+output_filename_base+"_%lld\",(gs_int64_t)(last_bucket*bucketwidth));\n";
8445 " return (string)(tmp_fname);\n"
8451 "bool do_compression(){\n";
8453 ret += " return true;\n";
8455 ret += " return false;\n";
8459 "bool is_eof_tuple(){\n"
8463 "bool propagate_tuple(){\n"
8466 ret+="\treturn false;\n";
8468 ret+="\treturn true;\n";
8470 // create a temp status tuple
8471 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8473 ret += gen_init_temp_status_tuple(this->hfta_query_name);
8475 sprintf(tmpstr,"\ttuple->tuple_var%d = timestamp;\n",temporal_field_idx);
8480 ret += "\treturn 0;\n";
8488 string output_file_qpn::generate_operator(int i, string params){
8489 string optype = "file_output_operator";
8490 switch(compression_type){
8492 optype = "file_output_operator";
8495 optype = "zfile_output_operator";
8498 optype = "bfile_output_operator";
8502 return(" "+optype+"<" +
8503 generate_functor_name() +
8504 "> *op"+int_to_string(i)+" = new "+optype+"<"+
8505 generate_functor_name() +">("+params+", \"" + hfta_query_name + "\""
8506 + "," + hfta_query_name + "_schema_definition);\n");
8509 /////////////////////////////////////////////////////////
8513 string spx_qpn::generate_functor_name(){
8514 return("spx_functor_" + normalize_name(normalize_name(this->get_node_name())));
8517 string spx_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8518 // Initialize generate utility globals
8519 segen_gb_tbl = NULL;
8521 string ret = "class " + this->generate_functor_name() + "{\n";
8523 // Find variables referenced in this query node.
8526 col_id_set::iterator csi;
8529 for(w=0;w<where.size();++w)
8530 gather_pr_col_ids(where[w]->pr,cid_set,NULL);
8531 for(s=0;s<select_list.size();s++){
8532 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
8536 // Private variables : store the state of the functor.
8537 // 1) variables for unpacked attributes
8538 // 2) offsets of the upacked attributes
8539 // 3) storage of partial functions
8540 // 4) storage of complex literals (i.e., require a constructor)
8542 ret += "private:\n";
8543 ret += "\tbool first_execution;\t// internal processing state \n";
8544 ret += "\tint schema_handle0;\n";
8546 // generate the declaration of all the variables related to
8547 // temp tuples generation
8548 ret += gen_decl_temp_vars();
8551 // unpacked attribute storage, offsets
8552 ret += "//\t\tstorage and offsets of accessed fields.\n";
8553 ret += generate_access_vars(cid_set,schema);
8554 // tuple metadata management
8555 ret += "\tint tuple_metadata_offset0;\n";
8557 // Variables to store results of partial functions.
8558 // WARNING find_partial_functions modifies the SE
8559 // (it marks the partial function id).
8560 ret += "//\t\tParital function result storage\n";
8561 vector<scalarexp_t *> partial_fcns;
8562 vector<int> fcn_ref_cnt;
8563 vector<bool> is_partial_fcn;
8564 for(s=0;s<select_list.size();s++){
8565 find_partial_fcns(select_list[s]->se, &partial_fcns,&fcn_ref_cnt,&is_partial_fcn, Ext_fcns);
8567 for(w=0;w<where.size();w++){
8568 find_partial_fcns_pr(where[w]->pr, &partial_fcns, &fcn_ref_cnt,&is_partial_fcn,Ext_fcns);
8570 // Unmark non-partial expensive functions referenced only once.
8571 for(p=0; p<partial_fcns.size();p++){
8572 if(!is_partial_fcn[p] && fcn_ref_cnt[p] <= 1){
8573 partial_fcns[p]->set_partial_ref(-1);
8576 if(partial_fcns.size()>0){
8577 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,true);
8580 // Complex literals (i.e., they need constructors)
8581 ret += "//\t\tComplex literal storage.\n";
8582 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
8583 ret += generate_complex_lit_vars(complex_literals);
8585 // Pass-by-handle parameters
8586 ret += "//\t\tPass-by-handle storage.\n";
8587 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
8588 ret += generate_pass_by_handle_vars(param_handle_table);
8590 // Variables to hold parameters
8591 ret += "//\tfor query parameters\n";
8592 ret += generate_param_vars(param_tbl);
8595 // The publicly exposed functions
8597 ret += "\npublic:\n";
8600 //-------------------
8601 // The functor constructor
8602 // pass in the schema handle.
8603 // 1) make assignments to the unpack offset variables
8604 // 2) initialize the complex literals
8605 // 3) Set the initial values of the temporal attributes
8606 // referenced in select clause (in case we need to emit
8607 // temporal tuple before receiving first tuple )
8609 ret += "//\t\tFunctor constructor.\n";
8610 ret += this->generate_functor_name()+"(int schema_handle0){\n";
8612 // save schema handle
8613 ret += "this->schema_handle0 = schema_handle0;\n";
8616 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
8617 ret += gen_access_var_init(cid_set);
8619 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
8622 ret += "//\t\tInitialize complex literals.\n";
8623 ret += gen_complex_lit_init(complex_literals);
8625 // Initialize partial function results so they can be safely GC'd
8626 ret += gen_partial_fcn_init(partial_fcns);
8628 // Initialize non-query-parameter parameter handles
8629 ret += gen_pass_by_handle_init(param_handle_table);
8631 // Init temporal attributes referenced in select list
8632 ret += gen_init_temp_vars(schema, select_list, NULL);
8637 //-------------------
8638 // Functor destructor
8639 ret += "//\t\tFunctor destructor.\n";
8640 ret += "~"+this->generate_functor_name()+"(){\n";
8642 // clean up buffer-type complex literals.
8643 ret += gen_complex_lit_dtr(complex_literals);
8645 // Deregister the pass-by-handle parameters
8646 ret += "/* register and de-register the pass-by-handle parameters */\n";
8647 ret += gen_pass_by_handle_dtr(param_handle_table);
8649 // Reclaim buffer space for partial fucntion results
8650 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8651 ret += gen_partial_fcn_dtr(partial_fcns);
8654 // Destroy the parameters, if any need to be destroyed
8655 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8660 //-------------------
8661 // Parameter manipulation routines
8662 ret += generate_load_param_block(this->generate_functor_name(),
8663 this->param_tbl,param_handle_table );
8664 ret += generate_delete_param_block(this->generate_functor_name(),
8665 this->param_tbl,param_handle_table);
8668 //-------------------
8669 // Register new parameter block
8670 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
8671 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
8672 ret += "\treturn this->load_params_"+this->generate_functor_name()+
8677 //-------------------
8678 // The selection predicate.
8679 // Unpack variables for 1 cnf element
8680 // at a time, return false immediately if the
8682 // optimization : evaluate the cheap cnf elements
8683 // first, the expensive ones last.
8685 ret += "bool predicate(host_tuple &tup0){\n";
8686 // Variables for execution of the function.
8687 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
8688 // Initialize cached function indicators.
8689 for(p=0;p<partial_fcns.size();++p){
8690 if(fcn_ref_cnt[p]>1){
8691 ret+="\tfcn_ref_cnt_"+int_to_string(p)+"=0;\n";
8696 ret += gen_temp_tuple_check(this->node_name, 0);
8698 if(partial_fcns.size()>0){ // partial fcn access failure
8699 ret += "\tgs_retval_t retval = 0;\n";
8703 // Reclaim buffer space for partial fucntion results
8704 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
8705 ret += gen_partial_fcn_dtr(partial_fcns);
8707 col_id_set found_cids; // colrefs unpacked thus far.
8708 ret += gen_unpack_temp_vars(schema, found_cids, select_list, NULL, needs_xform);
8710 // For temporal status tuple we don't need to do anything else
8711 ret += "\tif (temp_tuple_received) return false;\n\n";
8714 for(w=0;w<where.size();++w){
8715 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
8717 // Find the set of variables accessed in this CNF elem,
8718 // but in no previous element.
8719 col_id_set new_cids;
8720 get_new_pred_cids(where[w]->pr,found_cids, new_cids, NULL);
8721 // Unpack these values.
8722 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
8723 // Find partial fcns ref'd in this cnf element
8725 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
8726 ret += gen_unpack_partial_fcn(schema,partial_fcns,pfcn_refs,fcn_ref_cnt, is_partial_fcn, "false");
8728 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
8729 +") ) return(false);\n";
8732 // The partial functions ref'd in the select list
8733 // must also be evaluated. If one returns false,
8734 // then implicitly the predicate is false.
8736 for(s=0;s<select_list.size();s++){
8737 collect_partial_fcns(select_list[s]->se, sl_pfcns);
8739 if(sl_pfcns.size() > 0)
8740 ret += "//\t\tUnpack remaining partial fcns.\n";
8741 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
8742 fcn_ref_cnt, is_partial_fcn,
8743 found_cids, NULL, "false", needs_xform);
8745 // Unpack remaining fields
8746 ret += "//\t\tunpack any remaining fields from the input tuple.\n";
8747 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
8750 ret += "\treturn(true);\n";
8754 //-------------------
8755 // The output tuple function.
8756 // Unpack the remaining attributes into
8757 // the placeholder variables, unpack the
8758 // partial fcn refs, then pack up the tuple.
8760 ret += "host_tuple create_output_tuple() {\n";
8761 ret += "\thost_tuple tup;\n";
8762 ret += "\tgs_retval_t retval = 0;\n";
8764 // Unpack any remaining cached functions.
8765 ret += gen_remaining_cached_fcns(schema, partial_fcns, sl_pfcns,
8766 fcn_ref_cnt, is_partial_fcn);
8769 // Now, compute the size of the tuple.
8771 // Unpack any BUFFER type selections into temporaries
8772 // so that I can compute their size and not have
8773 // to recompute their value during tuple packing.
8774 // I can use regular assignment here because
8775 // these temporaries are non-persistent.
8777 ret += "//\t\tCompute the size of the tuple.\n";
8778 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
8780 // Unpack all buffer type selections, to be able to compute their size
8781 ret += gen_buffer_selvars(schema, select_list);
8783 // The size of the tuple is the size of the tuple struct plus the
8784 // size of the buffers to be copied in.
8787 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
8788 ret += gen_buffer_selvars_size(select_list,schema);
8791 // Allocate tuple data block.
8792 ret += "//\t\tCreate the tuple block.\n";
8793 ret += "\ttup.data = malloc(tup.tuple_size);\n";
8794 ret += "\ttup.heap_resident = true;\n";
8795 // Mark tuple as regular
8796 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
8798 // ret += "\ttup.channel = 0;\n";
8799 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
8800 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
8803 // (Here, offsets are hard-wired. is this a problem?)
8805 ret += "//\t\tPack the fields into the tuple.\n";
8806 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
8808 // Delete string temporaries
8809 ret += gen_buffer_selvars_dtr(select_list);
8811 ret += "\treturn tup;\n";
8814 //-------------------------------------------------------------------
8815 // Temporal update functions
8817 ret += "bool temp_status_received(){return temp_tuple_received;};\n\n";
8820 // create a temp status tuple
8821 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
8823 ret += gen_init_temp_status_tuple(this->get_node_name());
8826 // (Here, offsets are hard-wired. is this a problem?)
8828 ret += "//\t\tPack the fields into the tuple.\n";
8829 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
8831 ret += "\treturn 0;\n";
8838 string spx_qpn::generate_operator(int i, string params){
8840 return(" select_project_operator<" +
8841 generate_functor_name() +
8842 "> *op"+int_to_string(i)+" = new select_project_operator<"+
8843 generate_functor_name() +">("+params+", \"" + get_node_name() + "\");\n");
8847 ////////////////////////////////////////////////////////////////
8852 string sgah_qpn::generate_functor_name(){
8853 return("sgah_functor_" + normalize_name(this->get_node_name()));
8857 string sgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
8861 // Initialize generate utility globals
8862 segen_gb_tbl = &(gb_tbl);
8864 // Might need to generate empty values for cube processing.
8865 map<int, string> structured_types;
8866 for(g=0;g<gb_tbl.size();++g){
8867 if(gb_tbl.get_data_type(g)->is_structured_type()){
8868 structured_types[gb_tbl.get_data_type(g)->type_indicator()] = gb_tbl.get_data_type(g)->get_type_str();
8872 //--------------------------------
8873 // group definition class
8874 string ret = "class " + generate_functor_name() + "_groupdef{\n";
8876 for(g=0;g<this->gb_tbl.size();g++){
8877 sprintf(tmpstr,"gb_var%d",g);
8878 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
8880 // empty strucutred literals
8881 map<int, string>::iterator sii;
8882 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8883 data_type dt(sii->second);
8884 literal_t empty_lit(sii->first);
8885 ret += "\t"+dt.make_host_cvar(empty_lit.hfta_empty_literal_name())+";\n";
8888 if(structured_types.size()==0){
8889 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
8891 ret += "\t"+generate_functor_name() + "_groupdef(){}\n";
8895 ret += "\t"+generate_functor_name() + "_groupdef("+
8896 this->generate_functor_name() + "_groupdef *gd){\n";
8897 for(g=0;g<gb_tbl.size();g++){
8898 data_type *gdt = gb_tbl.get_data_type(g);
8899 if(gdt->is_buffer_type()){
8900 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8901 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8904 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8909 ret += "\t"+generate_functor_name() + "_groupdef("+
8910 this->generate_functor_name() + "_groupdef *gd, bool *pattern){\n";
8911 for(sii=structured_types.begin();sii!=structured_types.end();++sii){
8912 literal_t empty_lit(sii->first);
8913 ret += "\t\t"+empty_lit.to_hfta_C_code("&"+empty_lit.hfta_empty_literal_name())+";\n";
8915 for(g=0;g<gb_tbl.size();g++){
8916 data_type *gdt = gb_tbl.get_data_type(g);
8917 ret += "\t\tif(pattern["+int_to_string(g)+"]){\n";
8918 if(gdt->is_buffer_type()){
8919 sprintf(tmpstr,"\t\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
8920 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
8923 sprintf(tmpstr,"\t\t\tgb_var%d = gd->gb_var%d;\n",g,g);
8926 ret += "\t\t}else{\n";
8927 literal_t empty_lit(gdt->type_indicator());
8928 if(empty_lit.is_cpx_lit()){
8929 ret +="\t\t\tgb_var"+int_to_string(g)+"= "+empty_lit.hfta_empty_literal_name()+";\n";
8931 ret +="\t\t\tgb_var"+int_to_string(g)+"="+empty_lit.to_hfta_C_code("")+";\n";
8937 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
8938 for(g=0;g<gb_tbl.size();g++){
8939 data_type *gdt = gb_tbl.get_data_type(g);
8940 if(gdt->is_buffer_type()){
8941 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
8942 gdt->get_hfta_buffer_destroy().c_str(), g );
8949 for(g=0;g<gb_tbl.size();g++){
8950 data_type *gdt = gb_tbl.get_data_type(g);
8951 if(gdt->is_temporal()){
8956 ret += tgdt->get_host_cvar_type()+" get_curr_gb(){\n";
8957 ret+="\treturn gb_var"+int_to_string(g)+";\n";
8962 //--------------------------------
8963 // aggr definition class
8964 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
8966 for(a=0;a<aggr_tbl.size();a++){
8967 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
8968 sprintf(tmpstr,"aggr_var%d",a);
8969 if(aggr_tbl.is_builtin(a)){
8970 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
8971 if(aggr_tbl.get_op(a) == "AVG"){ // HACK!
8972 data_type cnt_type = data_type("ullong");
8973 ret+="\t"+cnt_type.make_host_cvar(string(tmpstr)+"_cnt")+";\n";
8974 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(string(tmpstr)+"_sum")+";\n";
8977 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
8981 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
8983 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
8984 for(a=0;a<aggr_tbl.size();a++){
8985 if(aggr_tbl.is_builtin(a)){
8986 data_type *adt = aggr_tbl.get_data_type(a);
8987 if(adt->is_buffer_type()){
8988 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
8989 adt->get_hfta_buffer_destroy().c_str(), a );
8993 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
8994 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
8995 ret+="(aggr_var"+int_to_string(a)+"));\n";
9001 //-------------------------------------------
9002 // group-by patterns for the functor,
9003 // initialization within the class is cumbersome.
9004 int n_patterns = gb_tbl.gb_patterns.size();
9006 ret += "bool "+this->generate_functor_name()+"_gb_patterns["+int_to_string(n_patterns)+
9007 "]["+int_to_string(gb_tbl.size())+"] = {\n";
9008 if(n_patterns == 0){
9009 for(i=0;i<gb_tbl.size();++i){
9014 for(i=0;i<n_patterns;++i){
9015 if(i>0) ret += ",\n";
9017 for(j=0;j<gb_tbl.size();j++){
9018 if(j>0) ret += ", ";
9019 if(gb_tbl.gb_patterns[i][j]){
9032 //--------------------------------
9034 ret += "class " + this->generate_functor_name() + "{\n";
9036 // Find variables referenced in this query node.
9039 col_id_set::iterator csi;
9041 for(w=0;w<where.size();++w)
9042 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
9043 for(w=0;w<having.size();++w)
9044 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
9045 for(g=0;g<gb_tbl.size();g++)
9046 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
9048 for(s=0;s<select_list.size();s++){
9049 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
9053 // Private variables : store the state of the functor.
9054 // 1) variables for unpacked attributes
9055 // 2) offsets of the upacked attributes
9056 // 3) storage of partial functions
9057 // 4) storage of complex literals (i.e., require a constructor)
9059 ret += "private:\n";
9061 // var to save the schema handle
9062 ret += "\tint schema_handle0;\n";
9063 // metadata from schema handle
9064 ret += "\tint tuple_metadata_offset0;\n";
9066 // generate the declaration of all the variables related to
9067 // temp tuples generation
9068 ret += gen_decl_temp_vars();
9070 // unpacked attribute storage, offsets
9071 ret += "//\t\tstorage and offsets of accessed fields.\n";
9072 ret += generate_access_vars(cid_set, schema);
9074 // Variables to store results of partial functions.
9075 // WARNING find_partial_functions modifies the SE
9076 // (it marks the partial function id).
9077 ret += "//\t\tParital function result storage\n";
9078 vector<scalarexp_t *> partial_fcns;
9079 vector<int> fcn_ref_cnt;
9080 vector<bool> is_partial_fcn;
9081 for(s=0;s<select_list.size();s++){
9082 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
9084 for(w=0;w<where.size();w++){
9085 find_partial_fcns_pr(where[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9087 for(w=0;w<having.size();w++){
9088 find_partial_fcns_pr(having[w]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
9090 for(g=0;g<gb_tbl.size();g++){
9091 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns,NULL,NULL, Ext_fcns);
9093 for(a=0;a<aggr_tbl.size();a++){
9094 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns,NULL,NULL, Ext_fcns);
9096 if(partial_fcns.size()>0){
9097 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
9098 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
9101 // Complex literals (i.e., they need constructors)
9102 ret += "//\t\tComplex literal storage.\n";
9103 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
9104 ret += generate_complex_lit_vars(complex_literals);
9106 // Pass-by-handle parameters
9107 ret += "//\t\tPass-by-handle storage.\n";
9108 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
9109 ret += generate_pass_by_handle_vars(param_handle_table);
9112 // variables to hold parameters.
9113 ret += "//\tfor query parameters\n";
9114 ret += generate_param_vars(param_tbl);
9116 // Is there a temporal flush? If so create flush temporaries,
9117 // create flush indicator.
9118 bool uses_temporal_flush = false;
9119 for(g=0;g<gb_tbl.size();g++){
9120 data_type *gdt = gb_tbl.get_data_type(g);
9121 if(gdt->is_temporal())
9122 uses_temporal_flush = true;
9125 if(uses_temporal_flush){
9126 ret += "//\t\tFor temporal flush\n";
9127 for(g=0;g<gb_tbl.size();g++){
9128 data_type *gdt = gb_tbl.get_data_type(g);
9129 if(gdt->is_temporal()){
9130 sprintf(tmpstr,"last_gb%d",g);
9131 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9132 sprintf(tmpstr,"last_flushed_gb%d",g);
9133 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
9136 ret += "\tbool needs_temporal_flush;\n";
9140 // The publicly exposed functions
9142 ret += "\npublic:\n";
9145 //-------------------
9146 // The functor constructor
9147 // pass in the schema handle.
9148 // 1) make assignments to the unpack offset variables
9149 // 2) initialize the complex literals
9151 ret += "//\t\tFunctor constructor.\n";
9152 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9154 // save the schema handle
9155 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
9158 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9159 ret += gen_access_var_init(cid_set);
9161 ret += "tuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9164 ret += "//\t\tInitialize complex literals.\n";
9165 ret += gen_complex_lit_init(complex_literals);
9167 // Initialize partial function results so they can be safely GC'd
9168 ret += gen_partial_fcn_init(partial_fcns);
9170 // Initialize non-query-parameter parameter handles
9171 ret += gen_pass_by_handle_init(param_handle_table);
9173 // temporal flush variables
9174 // ASSUME that structured values won't be temporal.
9175 if(uses_temporal_flush){
9176 ret += "//\t\tInitialize temporal flush variables.\n";
9177 for(g=0;g<gb_tbl.size();g++){
9178 data_type *gdt = gb_tbl.get_data_type(g);
9179 if(gdt->is_temporal()){
9180 literal_t gl(gdt->type_indicator());
9181 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9183 sprintf(tmpstr,"\tlast_flushed_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
9187 ret += "\tneeds_temporal_flush = false;\n";
9190 // Init temporal attributes referenced in select list
9191 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
9195 //-------------------
9196 // Functor destructor
9197 ret += "//\t\tFunctor destructor.\n";
9198 ret += "~"+this->generate_functor_name()+"(){\n";
9200 // clean up buffer type complex literals
9201 ret += gen_complex_lit_dtr(complex_literals);
9203 // Deregister the pass-by-handle parameters
9204 ret += "/* register and de-register the pass-by-handle parameters */\n";
9205 ret += gen_pass_by_handle_dtr(param_handle_table);
9207 // clean up partial function results.
9208 ret += "/* clean up partial function storage */\n";
9209 ret += gen_partial_fcn_dtr(partial_fcns);
9211 // Destroy the parameters, if any need to be destroyed
9212 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9217 //-------------------
9218 // Parameter manipulation routines
9219 ret += generate_load_param_block(this->generate_functor_name(),
9220 this->param_tbl,param_handle_table);
9221 ret += generate_delete_param_block(this->generate_functor_name(),
9222 this->param_tbl,param_handle_table);
9224 //-------------------
9225 // Register new parameter block
9227 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9228 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9229 ret += "\treturn this->load_params_"+this->generate_functor_name()+
9233 // -----------------------------------
9234 // group-by pattern support
9237 "int n_groupby_patterns(){\n"
9238 " return "+int_to_string(gb_tbl.gb_patterns.size())+";\n"
9240 "bool *get_pattern(int p){\n"
9241 " return "+this->generate_functor_name()+"_gb_patterns[p];\n"
9248 //-------------------
9249 // the create_group method.
9250 // This method creates a group in a buffer passed in
9251 // (to allow for creation on the stack).
9252 // There are also a couple of side effects:
9253 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
9254 // 2) determine if a temporal flush is required.
9256 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
9257 // Variables for execution of the function.
9258 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9260 if(partial_fcns.size()>0){ // partial fcn access failure
9261 ret += "\tgs_retval_t retval = 0;\n";
9265 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
9266 "_groupdef *) buffer;\n";
9268 // Start by cleaning up partial function results
9269 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
9270 set<int> w_pfcns; // partial fcns in where clause
9271 for(w=0;w<where.size();++w)
9272 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
9274 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
9275 for(g=0;g<gb_tbl.size();g++){
9276 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
9278 for(a=0;a<aggr_tbl.size();a++){
9279 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
9281 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
9282 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
9283 // ret += gen_partial_fcn_dtr(partial_fcns);
9286 ret += gen_temp_tuple_check(this->node_name, 0);
9287 col_id_set found_cids; // colrefs unpacked thus far.
9288 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
9291 // Save temporal group-by variables
9294 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
9296 for(g=0;g<gb_tbl.size();g++){
9298 data_type *gdt = gb_tbl.get_data_type(g);
9300 if(gdt->is_temporal()){
9301 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9302 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9310 // Compare the temporal GB vars with the stored ones,
9311 // set flush indicator and update stored GB vars if there is any change.
9313 ret += "// hfta_disorder = "+int_to_string(hfta_disorder)+"\n";
9314 if(hfta_disorder < 2){
9315 if(uses_temporal_flush){
9317 bool first_one = true;
9318 for(g=0;g<gb_tbl.size();g++){
9319 data_type *gdt = gb_tbl.get_data_type(g);
9321 if(gdt->is_temporal()){
9322 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9323 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9324 if(first_one){first_one = false;} else {ret += ") && (";}
9325 ret += generate_equality_test(lhs_op, rhs_op, gdt);
9329 for(g=0;g<gb_tbl.size();g++){
9330 data_type *gdt = gb_tbl.get_data_type(g);
9331 if(gdt->is_temporal()){
9332 if(gdt->is_buffer_type()){
9333 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9335 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9337 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9342 ret += "\t\tneeds_temporal_flush=true;\n";
9343 ret += "\t\t}else{\n"
9344 "\t\t\tneeds_temporal_flush=false;\n"
9348 ret+= "\tif(temp_tuple_received && !( (";
9349 bool first_one = true;
9350 for(g=0;g<gb_tbl.size();g++){
9351 data_type *gdt = gb_tbl.get_data_type(g);
9353 if(gdt->is_temporal()){
9354 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
9355 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
9356 if(first_one){first_one = false;} else {ret += ") && (";}
9357 ret += generate_equality_test(lhs_op, rhs_op, gdt);
9363 for(g=0;g<gb_tbl.size();g++){
9364 data_type *gdt = gb_tbl.get_data_type(g);
9365 if(gdt->is_temporal()){
9367 if(gdt->is_buffer_type()){
9368 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
9370 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
9372 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
9378 data_type *tgdt = gb_tbl.get_data_type(temporal_g);
9379 literal_t gl(tgdt->type_indicator());
9380 ret += "\t\tif(last_flushed_gb"+int_to_string(temporal_g)+">"+gl.to_hfta_C_code("")+")\n";
9381 ret += "\t\t\tneeds_temporal_flush=true;\n";
9382 ret += "\t\t}else{\n"
9383 "\t\t\tneeds_temporal_flush=false;\n"
9388 // For temporal status tuple we don't need to do anything else
9389 ret += "\tif (temp_tuple_received) return NULL;\n\n";
9391 for(w=0;w<where.size();++w){
9392 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
9394 // Find the set of variables accessed in this CNF elem,
9395 // but in no previous element.
9396 col_id_set new_cids;
9397 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
9399 // Unpack these values.
9400 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9401 // Find partial fcns ref'd in this cnf element
9403 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
9404 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
9406 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
9407 +") ) return(NULL);\n";
9410 // The partial functions ref'd in the group-by var and aggregate
9411 // definitions must also be evaluated. If one returns false,
9412 // then implicitly the predicate is false.
9413 set<int>::iterator pfsi;
9415 if(ag_gb_pfcns.size() > 0)
9416 ret += "//\t\tUnpack remaining partial fcns.\n";
9417 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
9418 found_cids, segen_gb_tbl, "NULL", needs_xform);
9420 // Unpack the group-by variables
9422 for(g=0;g<gb_tbl.size();g++){
9423 data_type *gdt = gb_tbl.get_data_type(g);
9425 if(!gdt->is_temporal()){
9426 // Find the new fields ref'd by this GBvar def.
9427 col_id_set new_cids;
9428 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
9429 // Unpack these values.
9430 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
9432 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9433 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9435 // There seems to be no difference between the two
9436 // branches of the IF statement.
9437 data_type *gdt = gb_tbl.get_data_type(g);
9438 if(gdt->is_buffer_type()){
9439 // Create temporary copy.
9440 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9441 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
9443 scalarexp_t *gse = gb_tbl.get_def(g);
9444 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
9445 g,generate_se_code(gse,schema).c_str());
9454 ret+= "\treturn gbval;\n";
9457 //--------------------------------------------------------
9458 // Create and initialize an aggregate object
9460 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
9461 // Variables for execution of the function.
9462 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9465 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
9466 "_aggrdef *)buffer;\n";
9468 for(a=0;a<aggr_tbl.size();a++){
9469 if(aggr_tbl.is_builtin(a)){
9470 // Create temporaries for buffer return values
9471 data_type *adt = aggr_tbl.get_data_type(a);
9472 if(adt->is_buffer_type()){
9473 sprintf(tmpstr,"aggr_tmp_%d", a);
9474 ret+=adt->make_host_cvar(tmpstr)+";\n";
9479 // Unpack all remaining attributes
9480 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
9481 for(a=0;a<aggr_tbl.size();a++){
9482 sprintf(tmpstr,"aggval->aggr_var%d",a);
9483 string assignto_var = tmpstr;
9484 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
9487 ret += "\treturn aggval;\n";
9490 //--------------------------------------------------------
9491 // update an aggregate object
9493 ret += "void update_aggregate(host_tuple &tup0, "
9494 +generate_functor_name()+"_groupdef *gbval, "+
9495 generate_functor_name()+"_aggrdef *aggval){\n";
9496 // Variables for execution of the function.
9497 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
9499 // use of temporaries depends on the aggregate,
9500 // generate them in generate_aggr_update
9503 // Unpack all remaining attributes
9504 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
9505 for(a=0;a<aggr_tbl.size();a++){
9506 sprintf(tmpstr,"aggval->aggr_var%d",a);
9507 string varname = tmpstr;
9508 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
9511 ret += "\treturn;\n";
9514 //---------------------------------------------------
9517 ret += "\tbool flush_needed(){\n";
9518 if(uses_temporal_flush){
9519 ret += "\t\treturn needs_temporal_flush;\n";
9521 ret += "\t\treturn false;\n";
9525 //---------------------------------------------------
9526 // create output tuple
9527 // Unpack the partial functions ref'd in the where clause,
9528 // select clause. Evaluate the where clause.
9529 // Finally, pack the tuple.
9531 // I need to use special code generation here,
9532 // so I'll leave it in longhand.
9534 ret += "host_tuple create_output_tuple("
9535 +generate_functor_name()+"_groupdef *gbval, "+
9536 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
9538 ret += "\thost_tuple tup;\n";
9539 ret += "\tfailed = false;\n";
9540 ret += "\tgs_retval_t retval = 0;\n";
9542 string gbvar = "gbval->gb_var";
9543 string aggvar = "aggval->";
9545 // Create cached temporaries for UDAF return values.
9546 for(a=0;a<aggr_tbl.size();a++){
9547 if(! aggr_tbl.is_builtin(a)){
9548 int afcn_id = aggr_tbl.get_fcn_id(a);
9549 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9550 sprintf(tmpstr,"udaf_ret_%d", a);
9551 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
9556 // First, get the return values from the UDAFS
9557 for(a=0;a<aggr_tbl.size();a++){
9558 if(! aggr_tbl.is_builtin(a)){
9559 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
9560 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
9561 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
9565 set<int> hv_sl_pfcns;
9566 for(w=0;w<having.size();w++){
9567 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
9569 for(s=0;s<select_list.size();s++){
9570 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
9573 // clean up the partial fcn results from any previous execution
9574 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
9577 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
9578 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
9579 ret += "\tif(retval){ failed = true; return(tup);}\n";
9582 // Evalaute the HAVING clause
9583 // TODO: this seems to have a ++ operator rather than a + operator.
9584 for(w=0;w<having.size();++w){
9585 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
9588 // Now, compute the size of the tuple.
9590 // Unpack any BUFFER type selections into temporaries
9591 // so that I can compute their size and not have
9592 // to recompute their value during tuple packing.
9593 // I can use regular assignment here because
9594 // these temporaries are non-persistent.
9595 // TODO: should I be using the selvar generation routine?
9597 ret += "//\t\tCompute the size of the tuple.\n";
9598 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
9599 for(s=0;s<select_list.size();s++){
9600 scalarexp_t *se = select_list[s]->se;
9601 data_type *sdt = se->get_data_type();
9602 if(sdt->is_buffer_type() &&
9603 !( (se->get_operator_type() == SE_COLREF) ||
9604 (se->get_operator_type() == SE_AGGR_STAR) ||
9605 (se->get_operator_type() == SE_AGGR_SE) ||
9606 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9607 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9609 sprintf(tmpstr,"selvar_%d",s);
9610 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
9611 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
9615 // The size of the tuple is the size of the tuple struct plus the
9616 // size of the buffers to be copied in.
9618 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
9619 for(s=0;s<select_list.size();s++){
9620 // if(s>0) ret += "+";
9621 scalarexp_t *se = select_list[s]->se;
9622 data_type *sdt = select_list[s]->se->get_data_type();
9623 if(sdt->is_buffer_type()){
9624 if(!( (se->get_operator_type() == SE_COLREF) ||
9625 (se->get_operator_type() == SE_AGGR_STAR) ||
9626 (se->get_operator_type() == SE_AGGR_SE) ||
9627 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9628 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9630 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
9633 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9640 // Allocate tuple data block.
9641 ret += "//\t\tCreate the tuple block.\n";
9642 ret += "\ttup.data = malloc(tup.tuple_size);\n";
9643 ret += "\ttup.heap_resident = true;\n";
9645 // Mark tuple as regular
9646 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
9648 // ret += "\ttup.channel = 0;\n";
9649 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
9650 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
9653 // (Here, offsets are hard-wired. is this a problem?)
9655 ret += "//\t\tPack the fields into the tuple.\n";
9656 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
9657 for(s=0;s<select_list.size();s++){
9658 scalarexp_t *se = select_list[s]->se;
9659 data_type *sdt = se->get_data_type();
9660 if(sdt->is_buffer_type()){
9661 if(!( (se->get_operator_type() == SE_COLREF) ||
9662 (se->get_operator_type() == SE_AGGR_STAR) ||
9663 (se->get_operator_type() == SE_AGGR_SE) ||
9664 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
9665 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
9667 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
9669 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
9672 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t)tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9674 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
9678 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9680 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
9685 // Destroy string temporaries
9686 ret += gen_buffer_selvars_dtr(select_list);
9687 // Destroy string return vals of UDAFs
9688 for(a=0;a<aggr_tbl.size();a++){
9689 if(! aggr_tbl.is_builtin(a)){
9690 int afcn_id = aggr_tbl.get_fcn_id(a);
9691 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
9692 if(adt->is_buffer_type()){
9693 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
9694 adt->get_hfta_buffer_destroy().c_str(), a );
9701 ret += "\treturn tup;\n";
9705 //-------------------------------------------------------------------
9706 // Temporal update functions
9708 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
9710 for(g=0;g<gb_tbl.size();g++){
9711 data_type *gdt = gb_tbl.get_data_type(g);
9712 if(gdt->is_temporal()){
9717 ret += tgdt->get_host_cvar_type()+" get_last_flushed_gb(){\n";
9718 ret+="\treturn last_flushed_gb"+int_to_string(g)+";\n";
9720 ret += tgdt->get_host_cvar_type()+" get_last_gb(){\n";
9721 ret+="\treturn last_gb"+int_to_string(g)+";\n";
9727 // create a temp status tuple
9728 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
9730 ret += gen_init_temp_status_tuple(this->get_node_name());
9733 // (Here, offsets are hard-wired. is this a problem?)
9735 ret += "//\t\tPack the fields into the tuple.\n";
9736 for(s=0;s<select_list.size();s++){
9737 data_type *sdt = select_list[s]->se->get_data_type();
9738 if(sdt->is_temporal()){
9739 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
9742 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
9749 ret += "\treturn 0;\n";
9750 ret += "};};\n\n\n";
9753 //----------------------------------------------------------
9754 // The hash function
9756 ret += "struct "+generate_functor_name()+"_hash_func{\n";
9757 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
9758 "_groupdef *grp) const{\n";
9759 ret += "\t\treturn( (";
9760 for(g=0;g<gb_tbl.size();g++){
9762 data_type *gdt = gb_tbl.get_data_type(g);
9763 if(gdt->use_hashfunc()){
9764 if(gdt->is_buffer_type())
9765 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9767 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
9769 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
9773 ret += ") >> 32);\n";
9777 //----------------------------------------------------------
9778 // The comparison function
9780 ret += "struct "+generate_functor_name()+"_equal_func{\n";
9781 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
9782 generate_functor_name()+"_groupdef *grp2) const{\n";
9783 ret += "\t\treturn( (";
9785 for(g=0;g<gb_tbl.size();g++){
9786 if(g>0) ret += ") && (";
9787 data_type *gdt = gb_tbl.get_data_type(g);
9788 if(gdt->complex_comparison(gdt)){
9789 if(gdt->is_buffer_type())
9790 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
9791 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9793 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
9794 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
9796 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
9808 string sgah_qpn::generate_operator(int i, string params){
9810 if(hfta_disorder < 2){
9812 " groupby_operator<" +
9813 generate_functor_name()+","+
9814 generate_functor_name() + "_groupdef, " +
9815 generate_functor_name() + "_aggrdef, " +
9816 generate_functor_name()+"_hash_func, "+
9817 generate_functor_name()+"_equal_func "
9818 "> *op"+int_to_string(i)+" = new groupby_operator<"+
9819 generate_functor_name()+","+
9820 generate_functor_name() + "_groupdef, " +
9821 generate_functor_name() + "_aggrdef, " +
9822 generate_functor_name()+"_hash_func, "+
9823 generate_functor_name()+"_equal_func "
9824 ">("+params+", \"" + get_node_name() +
9829 for(int g=0;g<gb_tbl.size();g++){
9830 data_type *gdt = gb_tbl.get_data_type(g);
9831 if(gdt->is_temporal()){
9838 " groupby_operator_oop<" +
9839 generate_functor_name()+","+
9840 generate_functor_name() + "_groupdef, " +
9841 generate_functor_name() + "_aggrdef, " +
9842 generate_functor_name()+"_hash_func, "+
9843 generate_functor_name()+"_equal_func, " +
9844 tgdt->get_host_cvar_type() +
9845 "> *op"+int_to_string(i)+" = new groupby_operator_oop<"+
9846 generate_functor_name()+","+
9847 generate_functor_name() + "_groupdef, " +
9848 generate_functor_name() + "_aggrdef, " +
9849 generate_functor_name()+"_hash_func, "+
9850 generate_functor_name()+"_equal_func, " +
9851 tgdt->get_host_cvar_type() +
9852 ">("+params+", \"" + get_node_name() +
9858 ////////////////////////////////////////////////
9861 ////////////////////////////////////////////
9863 string mrg_qpn::generate_functor_name(){
9864 return("mrg_functor_" + normalize_name(this->get_node_name()));
9867 string mrg_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
9872 if(fm.size() != mvars.size()){
9873 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=%lu, mvars.size=%lu\n",fm.size(),mvars.size());
9877 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::generate_functor fm.size=mvars.size=%lu\n",fm.size());
9882 // Initialize generate utility globals
9883 segen_gb_tbl = NULL;
9885 string ret = "class " + this->generate_functor_name() + "{\n";
9887 // Private variable:
9888 // 1) Vars for unpacked attrs.
9889 // 2) offsets ofthe unpakced attrs
9890 // 3) last_posted_timestamp
9893 schema->get_type_name(mvars[0]->get_schema_ref(), mvars[0]->get_field()),
9894 schema->get_modifier_list(mvars[0]->get_schema_ref(), mvars[0]->get_field())
9897 schema->get_type_name(mvars[1]->get_schema_ref(), mvars[1]->get_field()),
9898 schema->get_modifier_list(mvars[1]->get_schema_ref(), mvars[1]->get_field())
9901 ret += "private:\n";
9903 // var to save the schema handle
9904 ret += "\tint schema_handle0;\n";
9906 // generate the declaration of all the variables related to
9907 // temp tuples generation
9908 ret += gen_decl_temp_vars();
9910 // unpacked attribute storage, offsets
9911 ret += "//\t\tstorage and offsets of accessed fields.\n";
9912 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
9914 sprintf(tmpstr,"unpack_var_%s_%d", mvars[0]->get_field().c_str(), tblref);
9915 ret+="\t"+dta.make_host_cvar(tmpstr)+";\n";
9916 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[0]->get_field().c_str(), tblref);
9919 sprintf(tmpstr,"unpack_var_%s_%d", mvars[1]->get_field().c_str(), tblref);
9920 ret+="\t"+dtb.make_host_cvar(tmpstr)+";\n";
9921 sprintf(tmpstr,"\tgs_int32_t unpack_offset_%s_%d;\n", mvars[1]->get_field().c_str(), tblref);
9924 ret += "//\t\tRemember the last posted timestamp.\n";
9925 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_0")+";\n";
9926 ret+="\t"+dta.make_host_cvar("last_posted_timestamp_1")+";\n";
9927 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
9928 ret+="\t"+dta.make_host_cvar("slack")+";\n";
9929 // ret += "\t bool first_execution_0, first_execution_1;\n";
9931 // variables to hold parameters.
9932 ret += "//\tfor query parameters\n";
9933 ret += generate_param_vars(param_tbl);
9936 //-------------------
9937 // The functor constructor
9938 // pass in a schema handle (e.g. for the 1st input stream),
9939 // use it to determine how to unpack the merge variable.
9940 // ASSUME that both streams have the same layout,
9941 // just duplicate it.
9944 ret += "//\t\tFunctor constructor.\n";
9945 ret += this->generate_functor_name()+"(int schema_handle0){\n";
9947 // var to save the schema handle
9948 ret += "\tthis->schema_handle0 = schema_handle0;\n";
9949 ret += "\ttuple_metadata_offset0=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9950 ret += "\ttuple_metadata_offset1=ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
9952 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
9954 sprintf(tmpstr,"\tunpack_offset_%s_%d = ftaschema_get_field_offset_by_name(schema_handle0, \"%s\");\n", mvars[0]->get_field().c_str(), 0,mvars[0]->get_field().c_str());
9956 sprintf(tmpstr,"\tunpack_offset_%s_%d = unpack_offset_%s_%d;\n",mvars[1]->get_field().c_str(), 1,mvars[0]->get_field().c_str(), 0);
9958 // ret+="\tfirst_execution_0 = first_execution_1 = true;\n";
9960 ret+="\tslack = "+generate_se_code(slack,schema)+";\n";
9962 ret+="\tslack = 0;\n";
9964 // Initialize internal state
9965 ret += "\ttemp_tuple_received = false;\n";
9967 // Init last timestamp values to minimum value for their type
9968 if (dta.is_increasing())
9969 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_min_literal() + ";\n";
9971 ret+="\tlast_posted_timestamp_0 = last_posted_timestamp_1 = " + dta.get_max_literal() + ";\n";
9976 ret += "//\t\tFunctor destructor.\n";
9977 ret += "~"+this->generate_functor_name()+"(){\n";
9979 // Destroy the parameters, if any need to be destroyed
9980 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9985 // no pass-by-handle params.
9986 vector<handle_param_tbl_entry *> param_handle_table;
9988 // Parameter manipulation routines
9989 ret += generate_load_param_block(this->generate_functor_name(),
9990 this->param_tbl,param_handle_table);
9991 ret += generate_delete_param_block(this->generate_functor_name(),
9992 this->param_tbl,param_handle_table);
9994 // Register new parameter block
9996 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
9997 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
9998 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10003 // -----------------------------------
10006 string unpack_fcna;
10007 if(needs_xform[0]) unpack_fcna = dta.get_hfta_unpack_fcn();
10008 else unpack_fcna = dta.get_hfta_unpack_fcn_noxf();
10009 string unpack_fcnb;
10010 if(needs_xform[1]) unpack_fcnb = dtb.get_hfta_unpack_fcn();
10011 else unpack_fcnb = dtb.get_hfta_unpack_fcn_noxf();
10014 ret+="\tint compare(const host_tuple& tup1, const host_tuple& tup2) const{ \n";
10015 ret+="\t"+dta.make_host_cvar("timestamp1")+";\n";
10016 ret+="\t"+dta.make_host_cvar("timestamp2")+";\n";
10017 ret+="\tgs_int32_t problem;\n";
10018 ret+="\tif (tup1.channel == 0) {\n";
10019 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10021 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10024 sprintf(tmpstr,"\t\ttimestamp1 = %s(tup1.data, tup1.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 1);
10026 sprintf(tmpstr,"\t\ttimestamp2 = %s(tup2.data, tup2.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 0);
10030 " if (timestamp1 > timestamp2+slack)\n"
10032 " else if (timestamp1 < timestamp2)\n"
10041 " void get_timestamp(const host_tuple& tup0){\n"
10042 " gs_int32_t problem;\n"
10044 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10053 // Compare to temp status.
10055 " int compare_with_temp_status(int channel) {\n"
10056 " // check if tuple is temp status tuple\n"
10058 " if (channel == 0) {\n"
10059 //" if(first_execution_0) return 1;\n"
10060 " if (timestamp == last_posted_timestamp_0)\n"
10062 " else if (timestamp < last_posted_timestamp_0)\n"
10067 //" if(first_execution_1) return 1;\n"
10068 " if (timestamp == last_posted_timestamp_1)\n"
10070 " else if (timestamp < last_posted_timestamp_1)\n"
10079 " int compare_stored_with_temp_status(const host_tuple& tup0, int channel)/* const*/ {\n"
10081 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10082 ret+="\tgs_int32_t problem;\n";
10084 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup0.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10086 ret+="\tif (channel == 0) {\n";
10087 // ret+="\tif(first_execution_0) return 1;\n";
10089 " if (l_timestamp == last_posted_timestamp_0)\n"
10091 " else if (l_timestamp < last_posted_timestamp_0)\n"
10096 // ret+="\tif(first_execution_1) return 1;\n";
10098 " if (l_timestamp == last_posted_timestamp_1)\n"
10100 " else if (l_timestamp < last_posted_timestamp_1)\n"
10108 // update temp status.
10110 " int update_temp_status(const host_tuple& tup) {\n"
10111 " if (tup.channel == 0) {\n"
10112 " last_posted_timestamp_0=timestamp;\n"
10113 //" first_execution_0 = false;\n"
10115 " last_posted_timestamp_1=timestamp;\n"
10116 //" first_execution_1 = false;\n"
10122 " int update_stored_temp_status(const host_tuple& tup, int channel) {\n"
10124 ret+="\t"+dta.make_host_cvar("l_timestamp")+";\n";
10125 ret+="\tgs_int32_t problem;\n";
10126 sprintf(tmpstr,"\t\tl_timestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10129 " if (tup.channel == 0) {\n"
10130 " last_posted_timestamp_0=l_timestamp;\n"
10131 //" first_execution_0 = false;\n"
10133 " last_posted_timestamp_1=l_timestamp;\n"
10134 //" first_execution_1 = false;\n"
10140 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10141 ret+="\tgs_int32_t problem;\n";
10142 ret+="\tif (tup.channel == 0) {\n";
10143 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10146 sprintf(tmpstr,"\t\ttimestamp = %s(tup.data, tup.tuple_size, unpack_offset_%s_%d, &problem);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10149 ret+="\tif (tup.channel == 0) {\n";
10150 ret+="\tlast_posted_timestamp_0=timestamp;\n";
10151 ret +="\tfirst_execution_0 = false;\n";
10153 ret+="\tlast_posted_timestamp_1=timestamp;\n";
10154 ret +="\tfirst_execution_1 = false;\n";
10161 // update temp status modulo slack.
10162 ret+="\tint update_temp_status_by_slack(const host_tuple& tup, int channel) {\n";
10164 ret+="\t"+dta.make_host_cvar("timestamp")+";\n";
10165 ret+="\tgs_int32_t problem;\n";
10166 ret+="\tif (tup.channel == 0) {\n";
10167 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcna.c_str(), mvars[0]->get_field().c_str(), 0);
10170 sprintf(tmpstr,"\t\ttimestamp = %s_nocheck(tup.data, unpack_offset_%s_%d);\n", unpack_fcnb.c_str(), mvars[1]->get_field().c_str(), 1);
10174 " if (channel == 0) {\n"
10175 " if(first_execution_0){\n"
10176 " last_posted_timestamp_0=timestamp - slack;\n"
10177 " first_execution_0 = false;\n"
10179 " if(last_posted_timestamp_0 < timestamp-slack)\n"
10180 " last_posted_timestamp_0 = timestamp-slack;\n"
10183 " if(first_execution_1){\n"
10184 " last_posted_timestamp_1=timestamp - slack;\n"
10185 " first_execution_1 = false;\n"
10187 " if(last_posted_timestamp_1 < timestamp-slack)\n"
10188 " last_posted_timestamp_1 = timestamp-slack;\n"
10202 "bool temp_status_received(const host_tuple& tup0){\n"
10203 " return ftaschema_is_temporal_tuple_offset(tuple_metadata_offset0, tup0.data);\n"
10206 //"bool temp_status_received(){return temp_tuple_received;};\n\n";
10209 // create a temp status tuple
10210 ret += "int create_temp_status_tuple(host_tuple& result) {\n\n";
10212 ret += gen_init_temp_status_tuple(this->get_node_name());
10215 ret += "//\t\tPack the fields into the tuple.\n";
10217 string fld_name = mvars[0]->get_field();
10218 int idx = table_layout->get_field_idx(fld_name);
10219 field_entry* fld = table_layout->get_field(idx);
10220 data_type dt(fld->get_type());
10222 // if (needs_xform[0] && needs_xform[1] && dt.needs_hn_translation())
10223 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s((last_posted_timestamp_0 < last_posted_timestamp_1) ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx, dt.hton_translation().c_str());
10225 sprintf(tmpstr,"\ttuple->tuple_var%d = (last_posted_timestamp_0 < last_posted_timestamp_1 ? last_posted_timestamp_0 : last_posted_timestamp_1);\n",idx);
10229 ret += "\treturn 0;\n";
10232 // Transform tuple (before output)
10235 ret += "void xform_tuple(host_tuple &tup){\n";
10236 if((needs_xform[0] && !needs_xform[1]) || (needs_xform[1] && !needs_xform[0])){
10237 ret += "\tstruct "+generate_tuple_name(this->get_node_name())+" *tuple = ("+
10238 generate_tuple_name(this->get_node_name())+" *)(tup.data);\n";
10240 vector<field_entry *> flds = table_layout->get_fields();
10242 ret+="\tif(tup.channel == 0){\n";
10243 if(needs_xform[0] && !needs_xform[1]){
10245 for(f=0;f<flds.size();f++){
10247 data_type dt(flds[f]->get_type());
10248 if(dt.get_type() == v_str_t){
10249 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10251 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10253 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10256 if(dt.needs_hn_translation()){
10257 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10258 // f, dt.hton_translation().c_str(), f);
10264 ret += "\t\treturn;\n";
10266 ret.append("\t}\n");
10269 ret+="\tif(tup.channel == 1){\n";
10270 if(needs_xform[1] && !needs_xform[0]){
10272 for(f=0;f<flds.size();f++){
10274 data_type dt(flds[f]->get_type());
10275 if(dt.get_type() == v_str_t){
10276 // sprintf(tmpstr,"\ttuple->tuple_var%d.offset = htonl(tuple->tuple_var%d.offset);\n",f,f);
10278 // sprintf(tmpstr,"\ttuple->tuple_var%d.length = htonl(tuple->tuple_var%d.length);\n",f,f);
10280 // sprintf(tmpstr,"\ttuple->tuple_var%d.reserved = htonl(tuple->tuple_var%d.reserved);\n",f,f);
10283 if(dt.needs_hn_translation()){
10284 // sprintf(tmpstr,"\ttuple->tuple_var%d = %s(tuple->tuple_var%d);\n",
10285 // f, dt.hton_translation().c_str(), f);
10291 ret += "\t\treturn;\n";
10293 ret.append("\t}\n");
10296 ret.append("};\n\n");
10298 // print_warnings() : tell the functor if the user wants to print warnings.
10299 ret += "bool print_warnings(){\n";
10300 if(definitions.count("print_warnings") && (
10301 definitions["print_warnings"] == "yes" ||
10302 definitions["print_warnings"] == "Yes" ||
10303 definitions["print_warnings"] == "YES" )) {
10304 ret += "return true;\n";
10306 ret += "return false;\n";
10308 ret.append("};\n\n");
10311 // Done with methods.
10318 string mrg_qpn::generate_operator(int i, string params){
10322 " merge_operator<" +
10323 generate_functor_name()+
10324 "> *op"+int_to_string(i)+" = new merge_operator<"+
10325 generate_functor_name()+
10326 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10330 " merge_operator_oop<" +
10331 generate_functor_name()+
10332 "> *op"+int_to_string(i)+" = new merge_operator_oop<"+
10333 generate_functor_name()+
10334 ">("+params+",10000,\"" + get_node_name() + "\");\n"
10338 ////////////////////////////////////////////////
10339 /// WATCHLIST_TBL operator
10340 /// WATCHLIST_TBL functor
10341 ////////////////////////////////////////////
10343 string watch_tbl_qpn::generate_functor_name(){
10344 return("watch_tbl_functor_" + normalize_name(this->get_node_name()));
10347 string watch_tbl_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10349 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10352 string watch_tbl_qpn::generate_operator(int i, string params){
10353 return("ERROR_WATCH_TBL_FUNCTOR_NOT_YET_IMPLEMENTED");
10356 /////////////////////////////////////////////////////////
10357 ////// JOIN_EQ_HASH functor
10360 string join_eq_hash_qpn::generate_functor_name(){
10361 return("join_eq_hash_functor_" + normalize_name(this->get_node_name()));
10364 string join_eq_hash_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
10366 vector<data_type *> hashkey_dt; // data types in the hash key
10367 vector<data_type *> temporal_dt; // data types in the temporal key
10368 map<string,scalarexp_t *> l_equiv, r_equiv; // field equivalences
10369 set<int> pfcn_refs;
10370 col_id_set new_cids, local_cids;
10372 //--------------------------------
10375 string plus_op = "+";
10377 //--------------------------------
10378 // key definition class
10379 string ret = "class " + generate_functor_name() + "_keydef{\n";
10380 ret += "public:\n";
10381 // Collect attributes from hash join predicates.
10382 // ASSUME equality predicate.
10383 // Use the upwardly compatible data type
10384 // (infer from '+' operator if possible, else use left type)
10385 for(p=0;p<this->hash_eq.size();++p){
10386 scalarexp_t *lse = hash_eq[p]->pr->get_left_se();
10387 scalarexp_t *rse = hash_eq[p]->pr->get_right_se();
10388 data_type *hdt = new data_type(
10389 lse->get_data_type(), rse->get_data_type(), plus_op );
10390 if(hdt->get_type() == undefined_t){
10391 hashkey_dt.push_back(lse->get_data_type()->duplicate());
10394 hashkey_dt.push_back(hdt);
10396 sprintf(tmpstr,"hashkey_var%d",p);
10397 ret+="\t"+hashkey_dt[p]->make_host_cvar(tmpstr)+";\n";
10399 // find equivalences
10400 // NOTE: this code needs to be synched with the temporality
10401 // checking done at join_eq_hash_qpn::get_fields
10402 if(lse->get_operator_type()==SE_COLREF){
10403 l_equiv[lse->get_colref()->get_field()] = rse;
10405 if(rse->get_operator_type()==SE_COLREF){
10406 r_equiv[rse->get_colref()->get_field()] = lse;
10409 ret += "\tbool touched;\n";
10412 ret += "\t"+generate_functor_name() + "_keydef(){touched=false;};\n";
10414 ret += "\t~"+ generate_functor_name() + "_keydef(){\n";
10415 for(p=0;p<hashkey_dt.size();p++){
10416 if(hashkey_dt[p]->is_buffer_type()){
10417 sprintf(tmpstr,"\t\t%s(&hashkey_var%d);\n",
10418 hashkey_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10423 ret+="\tvoid touch(){touched = true;};\n";
10424 ret+="\tbool is_touched(){return touched;};\n";
10428 //--------------------------------
10429 // temporal equality definition class
10430 ret += "class " + generate_functor_name() + "_tempeqdef{\n";
10431 ret += "public:\n";
10432 // Collect attributes from hash join predicates.
10433 // ASSUME equality predicate.
10434 // Use the upwardly compatible date type
10435 // (infer from '+' operator if possible, else use left type)
10436 for(p=0;p<this->temporal_eq.size();++p){
10437 scalarexp_t *lse = temporal_eq[p]->pr->get_left_se();
10438 scalarexp_t *rse = temporal_eq[p]->pr->get_right_se();
10439 data_type *hdt = new data_type(
10440 lse->get_data_type(), rse->get_data_type(), plus_op );
10441 if(hdt->get_type() == undefined_t){
10442 temporal_dt.push_back(hash_eq[p]->pr->get_left_se()->get_data_type()->duplicate());
10445 temporal_dt.push_back(hdt);
10447 sprintf(tmpstr,"tempeq_var%d",p);
10448 ret+="\t"+temporal_dt[p]->make_host_cvar(tmpstr)+";\n";
10449 // find equivalences
10450 if(lse->get_operator_type()==SE_COLREF){
10451 l_equiv[lse->get_colref()->get_field()] = rse;
10453 if(rse->get_operator_type()==SE_COLREF){
10454 r_equiv[rse->get_colref()->get_field()] = lse;
10459 ret += "\t"+generate_functor_name() + "_tempeqdef(){};\n";
10461 ret += "\t~"+ generate_functor_name() + "_tempeqdef(){\n";
10462 for(p=0;p<temporal_dt.size();p++){
10463 if(temporal_dt[p]->is_buffer_type()){
10464 sprintf(tmpstr,"\t\t%s(&tempeq_var%d);\n",
10465 temporal_dt[p]->get_hfta_buffer_destroy().c_str(), p );
10473 //--------------------------------
10474 // temporal eq, hash join functor class
10475 ret += "class " + this->generate_functor_name() + "{\n";
10477 // Find variables referenced in this query node.
10479 col_id_set cid_set;
10480 col_id_set::iterator csi;
10482 for(p=0;p<where.size();++p)
10483 gather_pr_col_ids(where[p]->pr,cid_set,NULL);
10484 for(s=0;s<select_list.size();s++)
10485 gather_se_col_ids(select_list[s]->se,cid_set,NULL);
10487 // Private variables : store the state of the functor.
10488 // 1) variables for unpacked attributes
10489 // 2) offsets of the upacked attributes
10490 // 3) storage of partial functions
10491 // 4) storage of complex literals (i.e., require a constructor)
10493 ret += "private:\n";
10495 // var to save the schema handles
10496 ret += "\tint schema_handle0;\n";
10497 ret += "\tint schema_handle1;\n";
10499 // generate the declaration of all the variables related to
10500 // temp tuples generation
10501 ret += gen_decl_temp_vars();
10502 // tuple metadata offsets
10503 ret += "\tint tuple_metadata_offset0, tuple_metadata_offset1;\n";
10505 // unpacked attribute storage, offsets
10506 ret += "//\t\tstorage and offsets of accessed fields.\n";
10507 ret += generate_access_vars(cid_set, schema);
10510 // Variables to store results of partial functions.
10511 // WARNING find_partial_functions modifies the SE
10512 // (it marks the partial function id).
10513 ret += "//\t\tParital function result storage\n";
10514 vector<scalarexp_t *> partial_fcns;
10515 vector<int> fcn_ref_cnt;
10516 vector<bool> is_partial_fcn;
10517 for(s=0;s<select_list.size();s++){
10518 find_partial_fcns(select_list[s]->se, &partial_fcns,NULL,NULL, Ext_fcns);
10520 for(p=0;p<where.size();p++){
10521 find_partial_fcns_pr(where[p]->pr, &partial_fcns,NULL,NULL, Ext_fcns);
10523 if(partial_fcns.size()>0){
10524 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
10525 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
10528 // Complex literals (i.e., they need constructors)
10529 ret += "//\t\tComplex literal storage.\n";
10530 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
10531 ret += generate_complex_lit_vars(complex_literals);
10532 // We need the following to handle strings in outer joins.
10533 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10534 ret += "\tstruct vstring EmptyString;\n";
10535 ret += "\tstruct hfta_ipv6_str EmptyIp6;\n";
10537 // Pass-by-handle parameters
10538 ret += "//\t\tPass-by-handle storage.\n";
10539 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
10540 ret += generate_pass_by_handle_vars(param_handle_table);
10543 // variables to hold parameters.
10544 ret += "//\tfor query parameters\n";
10545 ret += generate_param_vars(param_tbl);
10548 ret += "\npublic:\n";
10549 //-------------------
10550 // The functor constructor
10551 // pass in the schema handle.
10552 // 1) make assignments to the unpack offset variables
10553 // 2) initialize the complex literals
10555 ret += "//\t\tFunctor constructor.\n";
10556 ret += this->generate_functor_name()+"(int schema_handle0, int schema_handle1){\n";
10558 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
10559 ret += "\t\tthis->schema_handle1 = schema_handle1;\n";
10560 // metadata offsets
10561 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
10562 ret += "\ttuple_metadata_offset1 = ftaschema_get_tuple_metadata_offset(schema_handle1);\n";
10565 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
10566 ret += gen_access_var_init(cid_set);
10568 // complex literals
10569 ret += "//\t\tInitialize complex literals.\n";
10570 ret += gen_complex_lit_init(complex_literals);
10571 // Initialize EmptyString to the ... empty string
10572 // NEED AN EMPTY LITERAL FOR EAcH STRUCTURED LITERAL
10573 literal_t mtstr_lit("");
10574 ret += "\t" + mtstr_lit.to_hfta_C_code("&EmptyString")+";\n";
10575 literal_t mip6_lit("0:0:0:0:0:0:0:0",LITERAL_IPV6);
10576 ret += "\t" + mip6_lit.to_hfta_C_code("&EmptyIp6")+";\n";
10578 // Initialize partial function results so they can be safely GC'd
10579 ret += gen_partial_fcn_init(partial_fcns);
10581 // Initialize non-query-parameter parameter handles
10582 ret += gen_pass_by_handle_init(param_handle_table);
10584 // Init temporal attributes referenced in select list
10585 ret += gen_init_temp_vars(schema, select_list, NULL);
10592 //-------------------
10593 // Functor destructor
10594 ret += "//\t\tFunctor destructor.\n";
10595 ret += "~"+this->generate_functor_name()+"(){\n";
10597 // clean up buffer type complex literals
10598 ret += gen_complex_lit_dtr(complex_literals);
10600 // Deregister the pass-by-handle parameters
10601 ret += "/* register and de-register the pass-by-handle parameters */\n";
10602 ret += gen_pass_by_handle_dtr(param_handle_table);
10604 // clean up partial function results.
10605 ret += "/* clean up partial function storage */\n";
10606 ret += gen_partial_fcn_dtr(partial_fcns);
10608 // Destroy the parameters, if any need to be destroyed
10609 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10614 //-------------------
10615 // Parameter manipulation routines
10616 ret += generate_load_param_block(this->generate_functor_name(),
10617 this->param_tbl,param_handle_table);
10618 ret += generate_delete_param_block(this->generate_functor_name(),
10619 this->param_tbl,param_handle_table);
10621 //-------------------
10622 // Register new parameter block
10624 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
10625 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
10626 ret += "\treturn this->load_params_"+this->generate_functor_name()+
10631 //-------------------
10632 // The create_key method.
10633 // Perform heap allocation.
10634 // ASSUME : the LHS of the preds reference channel 0 attributes
10635 // NOTE : it may fail if a partial function fails.
10637 ret += this->generate_functor_name()+"_keydef *create_key(host_tuple &tup, bool &failed){\n";
10638 // Variables for execution of the function.
10639 ret+="\t"+this->generate_functor_name()+"_keydef *retval = NULL;\n";
10640 ret+="\tgs_int32_t problem = 0;\n";
10642 // Assume unsuccessful completion
10643 ret+= "\tfailed = true;\n";
10645 // Switch the processing based on the channel
10646 ret+="\tif(tup.channel == 0){\n";
10647 ret+="// ------------ processing for channel 0\n";
10648 ret+="\t\thost_tuple &tup0 = tup;\n";
10649 // Gather partial fcns and colids ref'd by this branch
10651 new_cids.clear(); local_cids.clear();
10652 for(p=0;p<hash_eq.size();p++){
10653 collect_partial_fcns(hash_eq[p]->pr->get_left_se(), pfcn_refs);
10654 gather_se_col_ids(hash_eq[p]->pr->get_left_se(),local_cids,NULL);
10657 // Start by cleaning up partial function results
10658 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10659 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10661 // Evaluate the partial functions
10662 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10663 new_cids, NULL, "NULL", needs_xform);
10664 // test passed -- unpack remaining cids.
10665 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10667 // Alloc and load a key object
10668 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10669 for(p=0;p<hash_eq.size();p++){
10670 data_type *hdt = hash_eq[p]->pr->get_left_se()->get_data_type();
10671 if(hdt->is_buffer_type()){
10672 string vname = "tmp_keyvar"+int_to_string(p);
10673 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_left_se(),schema)+";\n";
10674 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10676 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10677 p,generate_se_code(hash_eq[p]->pr->get_left_se(),schema).c_str() );
10681 ret += "\t}else{\n";
10683 ret+="// ------------ processing for channel 1\n";
10684 ret+="\t\thost_tuple &tup1 = tup;\n";
10685 // Gather partial fcns and colids ref'd by this branch
10687 new_cids.clear(); local_cids.clear();
10688 for(p=0;p<hash_eq.size();p++){
10689 collect_partial_fcns(hash_eq[p]->pr->get_right_se(), pfcn_refs);
10690 gather_se_col_ids(hash_eq[p]->pr->get_right_se(),local_cids,NULL);
10693 // Start by cleaning up partial function results
10694 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10695 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10697 // Evaluate the partial functions
10698 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10699 new_cids, NULL, "NULL", needs_xform);
10701 // test passed -- unpack remaining cids.
10702 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "NULL", needs_xform);
10704 // Alloc and load a key object
10705 ret += "\t\tretval = new "+this->generate_functor_name()+"_keydef();\n";
10706 for(p=0;p<hash_eq.size();p++){
10707 data_type *hdt = hash_eq[p]->pr->get_right_se()->get_data_type();
10708 if(hdt->is_buffer_type()){
10709 string vname = "tmp_keyvar"+int_to_string(p);
10710 ret += "\t\t"+hdt->make_host_cvar(vname)+" = "+generate_se_code(hash_eq[p]->pr->get_right_se(),schema)+";\n";
10711 ret += "\t\t"+hdt->get_hfta_buffer_assign_copy()+"(&(retval->hashkey_var"+int_to_string(p)+"),&"+vname+");\n";
10713 sprintf(tmpstr,"\t\tretval->hashkey_var%d = %s;\n",
10714 p,generate_se_code(hash_eq[p]->pr->get_right_se(),schema).c_str() );
10720 ret += "\tfailed = false;\n";
10721 ret += "\t return retval;\n";
10725 //-------------------
10726 // The load_ts method.
10727 // load into an allocated buffer.
10728 // ASSUME : the LHS of the preds reference channel 0 attributes
10729 // NOTE : it may fail if a partial function fails.
10730 // NOTE : cann't handle buffer attributes
10732 ret += "bool load_ts_from_tup("+this->generate_functor_name()+"_tempeqdef *ts, host_tuple &tup){\n";
10733 // Variables for execution of the function.
10734 ret+="\tgs_int32_t problem = 0;\n";
10736 // Switch the processing based on the channel
10737 ret+="\tif(tup.channel == 0){\n";
10738 ret+="// ------------ processing for channel 0\n";
10739 ret+="\t\thost_tuple &tup0 = tup;\n";
10741 // Gather partial fcns and colids ref'd by this branch
10743 new_cids.clear(); local_cids.clear();
10744 for(p=0;p<temporal_eq.size();p++){
10745 collect_partial_fcns(temporal_eq[p]->pr->get_left_se(), pfcn_refs);
10746 gather_se_col_ids(temporal_eq[p]->pr->get_left_se(),local_cids,NULL);
10749 // Start by cleaning up partial function results
10750 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10751 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10753 // Evaluate the partial functions
10754 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10755 new_cids, NULL, "false", needs_xform);
10757 // test passed -- unpack remaining cids.
10758 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10760 // load the temporal key object
10761 for(p=0;p<temporal_eq.size();p++){
10762 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10763 p,generate_se_code(temporal_eq[p]->pr->get_left_se(),schema).c_str() );
10767 ret += "\t}else{\n";
10769 ret+="// ------------ processing for channel 1\n";
10770 ret+="\t\thost_tuple &tup1 = tup;\n";
10772 // Gather partial fcns and colids ref'd by this branch
10774 new_cids.clear(); local_cids.clear();
10775 for(p=0;p<temporal_eq.size();p++){
10776 collect_partial_fcns(temporal_eq[p]->pr->get_right_se(), pfcn_refs);
10777 gather_se_col_ids(temporal_eq[p]->pr->get_right_se(),local_cids,NULL);
10780 // Start by cleaning up partial function results
10781 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10782 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10784 // Evaluate the partial functions
10785 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,
10786 new_cids, NULL, "false", needs_xform);
10788 // test passed -- unpack remaining cids.
10789 ret += gen_remaining_colrefs(schema, local_cids, new_cids, "false", needs_xform);
10791 // load the key object
10792 for(p=0;p<temporal_eq.size();p++){
10793 sprintf(tmpstr,"\t\tts->tempeq_var%d = %s;\n",
10794 p,generate_se_code(temporal_eq[p]->pr->get_right_se(),schema).c_str() );
10800 ret += "\t return true;\n";
10804 // ------------------------------
10806 // (i.e make a copy)
10808 ret += "bool load_ts_from_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10809 for(p=0;p<temporal_eq.size();p++){
10810 sprintf(tmpstr,"\tlts->tempeq_var%d = rts->tempeq_var%d;\n",p,p);
10815 // -------------------------------------
10816 // compare_ts_to_ts
10817 // There should be only one variable to compare.
10818 // If there is more, assume an arbitrary lexicographic order.
10820 ret += "int compare_ts_with_ts("+this->generate_functor_name()+"_tempeqdef *lts,"+this->generate_functor_name()+"_tempeqdef *rts){\n";
10821 for(p=0;p<temporal_eq.size();p++){
10822 sprintf(tmpstr,"\tif(lts->tempeq_var%d < rts->tempeq_var%d) return(-1);\n",p,p);
10824 sprintf(tmpstr,"\tif(lts->tempeq_var%d > rts->tempeq_var%d) return(1);\n",p,p);
10827 ret += "\treturn(0);\n";
10830 // ------------------------------------------
10832 // apply the prefilter
10834 ret += "bool apply_prefilter(host_tuple &tup){\n";
10836 // Variables for this procedure
10837 ret+="\tgs_int32_t problem = 0;\n";
10838 ret+="\tgs_retval_t retval;\n";
10840 // Switch the processing based on the channel
10841 ret+="\tif(tup.channel == 0){\n";
10842 ret+="// ------------ processing for channel 0\n";
10843 ret+="\t\thost_tuple &tup0 = tup;\n";
10844 // Gather partial fcns and colids ref'd by this branch
10846 new_cids.clear(); local_cids.clear();
10847 for(p=0;p<prefilter[0].size();p++){
10848 collect_partial_fcns_pr((prefilter[0])[p]->pr, pfcn_refs);
10851 // Start by cleaning up partial function results
10852 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10853 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10855 for(p=0;p<(prefilter[0]).size();++p){
10856 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10858 // Find the set of variables accessed in this CNF elem,
10859 // but in no previous element.
10860 col_id_set new_pr_cids;
10861 get_new_pred_cids((prefilter[0])[p]->pr,local_cids,new_pr_cids, NULL);
10862 // Unpack these values.
10863 ret += gen_unpack_cids(schema, new_pr_cids, "false", needs_xform);
10864 // Find partial fcns ref'd in this cnf element
10865 set<int> pr_pfcn_refs;
10866 collect_partial_fcns_pr((prefilter[0])[p]->pr, pr_pfcn_refs);
10867 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10869 ret += "\t\tif( !("+generate_predicate_code((prefilter[0])[p]->pr,schema)+") ) return(false);\n";
10871 ret += "\t}else{\n";
10872 ret+="// ------------ processing for channel 1\n";
10873 ret+="\t\thost_tuple &tup1 = tup;\n";
10874 // Gather partial fcns and colids ref'd by this branch
10876 new_cids.clear(); local_cids.clear();
10877 for(p=0;p<prefilter[1].size();p++){
10878 collect_partial_fcns_pr((prefilter[1])[p]->pr, pfcn_refs);
10881 // Start by cleaning up partial function results
10882 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10883 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10885 for(p=0;p<(prefilter[1]).size();++p){
10886 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10888 // Find the set of variables accessed in this CNF elem,
10889 // but in no previous element.
10890 col_id_set pr_new_cids;
10891 get_new_pred_cids((prefilter[1])[p]->pr,local_cids, pr_new_cids, NULL);
10892 // Unpack these values.
10893 ret += gen_unpack_cids(schema, pr_new_cids, "false", needs_xform);
10894 // Find partial fcns ref'd in this cnf element
10895 set<int> pr_pfcn_refs;
10896 collect_partial_fcns_pr((prefilter[1])[p]->pr, pr_pfcn_refs);
10897 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"false");
10899 ret += "\t\tif( !("+generate_predicate_code((prefilter[1])[p]->pr,schema)+ ") ) return(false);\n";
10903 ret+="\treturn true;\n";
10907 // -------------------------------------
10908 // create_output_tuple
10909 // If the postfilter on the pair of tuples passes,
10910 // create an output tuple from the combined information.
10911 // (Plus, outer join processing)
10913 ret += "host_tuple create_output_tuple(const host_tuple &tup0, const host_tuple &tup1, bool &failed){\n";
10915 ret += "\thost_tuple tup;\n";
10916 ret += "\tfailed = true;\n";
10917 ret += "\tgs_retval_t retval = 0;\n";
10918 ret += "\tgs_int32_t problem = 0;\n";
10920 // Start by cleaning up partial function results
10921 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
10923 new_cids.clear(); local_cids.clear();
10924 for(p=0;p<postfilter.size();p++){
10925 collect_partial_fcns_pr(postfilter[p]->pr, pfcn_refs);
10927 for(s=0;s<select_list.size();s++){
10928 collect_partial_fcns(select_list[s]->se, pfcn_refs);
10930 ret += gen_partial_fcn_dtr(partial_fcns,pfcn_refs);
10933 ret+="\tif(tup0.data && tup1.data){\n";
10934 // Evaluate the postfilter
10935 new_cids.clear(); local_cids.clear();
10936 for(p=0;p<postfilter.size();p++){
10937 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",p);
10939 // Find the set of variables accessed in this CNF elem,
10940 // but in no previous element.
10941 col_id_set pr_new_cids;
10942 get_new_pred_cids(postfilter[p]->pr,local_cids, pr_new_cids, NULL);
10943 // Unpack these values.
10944 ret += gen_unpack_cids(schema, pr_new_cids, "tup", needs_xform);
10945 // Find partial fcns ref'd in this cnf element
10946 set<int> pr_pfcn_refs;
10947 collect_partial_fcns_pr(postfilter[p]->pr, pr_pfcn_refs);
10948 ret += gen_unpack_partial_fcn(schema,partial_fcns,pr_pfcn_refs,"tup");
10950 ret += "\t\tif( !("+generate_predicate_code(postfilter[p]->pr,schema)+ ") ) return(tup);\n";
10954 // postfilter passed, evaluate partial functions for select list
10957 col_id_set se_cids;
10958 for(s=0;s<select_list.size();s++){
10959 collect_partial_fcns(select_list[s]->se, sl_pfcns);
10962 if(sl_pfcns.size() > 0)
10963 ret += "//\t\tUnpack remaining partial fcns.\n";
10964 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, sl_pfcns,
10965 local_cids, NULL, "tup", needs_xform);
10967 // Unpack remaining fields
10968 ret += "//\t\tunpack any remaining fields from the input tuples.\n";
10969 for(s=0;s<select_list.size();s++)
10970 get_new_se_cids(select_list[s]->se, local_cids,se_cids,NULL);
10971 ret += gen_unpack_cids(schema, se_cids,"tup", needs_xform);
10974 // Deal with outer join stuff
10975 col_id_set l_cids, r_cids;
10976 col_id_set::iterator ocsi;
10977 for(ocsi=local_cids.begin();ocsi!=local_cids.end();++ocsi){
10978 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10979 else r_cids.insert((*ocsi));
10981 for(ocsi=se_cids.begin();ocsi!=se_cids.end();++ocsi){
10982 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
10983 else r_cids.insert((*ocsi));
10986 ret += "\t}else if(tup0.data){\n";
10987 string unpack_null = ""; col_id_set extra_cids;
10988 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
10989 string field = (*ocsi).field;
10990 if(r_equiv.count(field)){
10991 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
10992 get_new_se_cids(r_equiv[field],l_cids,new_cids,NULL);
10994 int schref = (*ocsi).schema_ref;
10995 data_type dt(schema->get_type_name(schref,field));
10996 literal_t empty_lit(dt.type_indicator());
10997 if(empty_lit.is_cpx_lit()){
10998 // sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
10999 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11000 // NB : works for string type only
11001 // NNB: installed fix for ipv6, more of this should be pushed
11002 // into the literal_t code.
11003 unpack_null+="\tunpack_var_"+field+"_1= "+empty_lit.hfta_empty_literal_name()+";\n";
11005 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
11009 ret += gen_unpack_cids(schema, l_cids, "tup", needs_xform);
11010 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11011 ret += unpack_null;
11012 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11015 unpack_null = ""; extra_cids.clear();
11016 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
11017 string field = (*ocsi).field;
11018 if(l_equiv.count(field)){
11019 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
11020 get_new_se_cids(l_equiv[field],r_cids,new_cids,NULL);
11022 int schref = (*ocsi).schema_ref;
11023 data_type dt(schema->get_type_name(schref,field));
11024 literal_t empty_lit(dt.type_indicator());
11025 if(empty_lit.is_cpx_lit()){
11026 // sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
11027 // unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11028 // NB : works for string type only
11029 // NNB: installed fix for ipv6, more of this should be pushed
11030 // into the literal_t code.
11031 unpack_null+="\tunpack_var_"+field+"_0= "+empty_lit.hfta_empty_literal_name()+";\n";
11033 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
11037 ret += gen_unpack_cids(schema, r_cids, "tup", needs_xform);
11038 ret += gen_unpack_cids(schema, extra_cids, "tup", needs_xform);
11039 ret += unpack_null;
11040 ret += gen_unpack_partial_fcn(schema, partial_fcns, sl_pfcns, "tup");
11045 // Unpack any BUFFER type selections into temporaries
11046 // so that I can compute their size and not have
11047 // to recompute their value during tuple packing.
11048 // I can use regular assignment here because
11049 // these temporaries are non-persistent.
11051 ret += "//\t\tCompute the size of the tuple.\n";
11052 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
11054 // Unpack all buffer type selections, to be able to compute their size
11055 ret += gen_buffer_selvars(schema, select_list);
11057 // The size of the tuple is the size of the tuple struct plus the
11058 // size of the buffers to be copied in.
11060 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
11061 ret += gen_buffer_selvars_size(select_list,schema);
11064 // Allocate tuple data block.
11065 ret += "//\t\tCreate the tuple block.\n";
11066 ret += "\ttup.data = malloc(tup.tuple_size);\n";
11067 ret += "\ttup.heap_resident = true;\n";
11068 // ret += "\ttup.channel = 0;\n";
11070 // Mark tuple as regular
11071 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
11074 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
11075 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
11078 // (Here, offsets are hard-wired. is this a problem?)
11080 ret += "//\t\tPack the fields into the tuple.\n";
11081 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), false );
11083 // Delete string temporaries
11084 ret += gen_buffer_selvars_dtr(select_list);
11086 ret += "\tfailed = false;\n";
11087 ret += "\treturn tup;\n";
11092 //-----------------------------
11093 // Method for checking whether tuple is temporal
11095 ret += "bool temp_status_received(host_tuple &tup){\n";
11097 // Switch the processing based on the channel
11098 ret+="\tif(tup.channel == 0){\n";
11099 ret+="\t\thost_tuple &tup0 = tup;\n";
11100 ret += gen_temp_tuple_check(this->node_name, 0);
11101 ret += "\t}else{\n";
11102 ret+="\t\thost_tuple &tup1 = tup;\n";
11103 ret += gen_temp_tuple_check(this->node_name, 1);
11105 ret += "\treturn temp_tuple_received;\n};\n\n";
11108 //-------------------------------------------------------------------
11109 // Temporal update functions
11112 // create a temp status tuple
11113 ret += "int create_temp_status_tuple(const host_tuple &tup0, const host_tuple &tup1, host_tuple& result) {\n\n";
11115 ret += "\tgs_retval_t retval = 0;\n";
11116 ret += "\tgs_int32_t problem = 0;\n";
11118 ret += "\tif(tup0.data){\n";
11120 // Unpack all the temporal attributes references in select list
11121 col_id_set found_cids;
11123 for(s=0;s<select_list.size();s++){
11124 if (select_list[s]->se->get_data_type()->is_temporal()) {
11125 // Find the set of attributes accessed in this SE
11126 col_id_set new_cids;
11127 get_new_se_cids(select_list[s]->se,found_cids, new_cids, NULL);
11131 // Deal with outer join stuff
11132 l_cids.clear(), r_cids.clear();
11133 for(ocsi=found_cids.begin();ocsi!=found_cids.end();++ocsi){
11134 if((*ocsi).tblvar_ref == 0) l_cids.insert((*ocsi));
11135 else r_cids.insert((*ocsi));
11138 extra_cids.clear();
11139 for(ocsi=r_cids.begin();ocsi!=r_cids.end();++ocsi){
11140 string field = (*ocsi).field;
11141 if(r_equiv.count(field)){
11142 unpack_null+="\t\tunpack_var_"+field+"_1="+generate_se_code(r_equiv[field],schema)+";\n";
11143 col_id_set addnl_cids;
11144 get_new_se_cids(r_equiv[field],l_cids,addnl_cids,NULL);
11146 int schref = (*ocsi).schema_ref;
11147 data_type dt(schema->get_type_name(schref,field));
11148 literal_t empty_lit(dt.type_indicator());
11149 if(empty_lit.is_cpx_lit()){
11150 sprintf(tmpstr,"&(unpack_var_%s_1)",field.c_str());
11151 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11153 unpack_null+="\tunpack_var_"+field+"_1="+empty_lit.to_hfta_C_code("")+";\n";
11157 ret += gen_unpack_cids(schema, l_cids, "1", needs_xform);
11158 ret += gen_unpack_cids(schema, extra_cids, "1", needs_xform);
11159 ret += unpack_null;
11161 ret+="\t}else if (tup1.data) {\n";
11162 unpack_null = ""; extra_cids.clear();
11163 for(ocsi=l_cids.begin();ocsi!=l_cids.end();++ocsi){
11164 string field = (*ocsi).field;
11165 if(l_equiv.count(field)){
11166 unpack_null+="\t\tunpack_var_"+field+"_0="+generate_se_code(l_equiv[field],schema)+";\n";
11167 col_id_set addnl_cids;
11168 get_new_se_cids(l_equiv[field],r_cids,addnl_cids,NULL);
11170 int schref = (*ocsi).schema_ref;
11171 data_type dt(schema->get_type_name(schref,field));
11172 literal_t empty_lit(dt.type_indicator());
11173 if(empty_lit.is_cpx_lit()){
11174 sprintf(tmpstr,"&(unpack_var_%s_0)",field.c_str());
11175 unpack_null += "\t"+empty_lit.to_hfta_C_code(tmpstr)+";\n";
11177 unpack_null+="\tunpack_var_"+field+"_0="+empty_lit.to_hfta_C_code("")+";\n";
11181 ret += gen_unpack_cids(schema, r_cids, "1", needs_xform);
11182 ret += gen_unpack_cids(schema, extra_cids, "1", needs_xform);
11183 ret += unpack_null;
11186 ret += gen_init_temp_status_tuple(this->get_node_name());
11189 ret += "//\t\tPack the fields into the tuple.\n";
11190 ret += gen_pack_tuple(schema,select_list,this->get_node_name(), true );
11193 ret += "\treturn 0;\n";
11199 //----------------------------------------------------------
11200 // The hash function
11202 ret += "struct "+generate_functor_name()+"_hash_func{\n";
11203 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
11204 "_keydef *key) const{\n";
11205 ret += "\t\treturn( (";
11206 if(hashkey_dt.size() > 0){
11207 for(p=0;p<hashkey_dt.size();p++){
11208 if(p>0) ret += "^";
11209 if(hashkey_dt[p]->use_hashfunc()){
11210 // sprintf(tmpstr,"%s(&(key->hashkey_var%d))",hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11211 if(hashkey_dt[p]->is_buffer_type())
11212 sprintf(tmpstr,"(%s*%s(&(key->hashkey_var%d)))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11214 sprintf(tmpstr,"(%s*%s(key->hashkey_var%d))",hash_nums[p%NRANDS].c_str(),hashkey_dt[p]->get_hfta_hashfunc().c_str(),p);
11216 sprintf(tmpstr,"(%s*key->hashkey_var%d)",hash_nums[p%NRANDS].c_str(),p);
11223 ret += ") >> 32);\n";
11227 //----------------------------------------------------------
11228 // The comparison function
11230 ret += "struct "+generate_functor_name()+"_equal_func{\n";
11231 ret += "\tbool operator()(const "+generate_functor_name()+"_keydef *key1, "+
11232 generate_functor_name()+"_keydef *key2) const{\n";
11233 ret += "\t\treturn( (";
11234 if(hashkey_dt.size() > 0){
11235 for(p=0;p<hashkey_dt.size();p++){
11236 if(p>0) ret += ") && (";
11237 if(hashkey_dt[p]->complex_comparison(hashkey_dt[p])){
11238 if(hashkey_dt[p]->is_buffer_type())
11239 sprintf(tmpstr,"(%s(&(key1->hashkey_var%d), &(key2->hashkey_var%d))==0)",
11240 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
11242 sprintf(tmpstr,"(%s((key1->hashkey_var%d), (key2->hashkey_var%d))==0)",
11243 hashkey_dt[p]->get_hfta_comparison_fcn(hashkey_dt[p]).c_str(),p,p);
11245 sprintf(tmpstr,"key1->hashkey_var%d == key2->hashkey_var%d",p,p);
11262 string join_eq_hash_qpn::generate_operator(int i, string params){
11265 " join_eq_hash_operator<" +
11266 generate_functor_name()+ ","+
11267 generate_functor_name() + "_tempeqdef,"+
11268 generate_functor_name() + "_keydef,"+
11269 generate_functor_name()+"_hash_func,"+
11270 generate_functor_name()+"_equal_func"
11271 "> *op"+int_to_string(i)+" = new join_eq_hash_operator<"+
11272 generate_functor_name()+","+
11273 generate_functor_name() + "_tempeqdef,"+
11274 generate_functor_name() + "_keydef,"+
11275 generate_functor_name()+"_hash_func,"+
11276 generate_functor_name()+"_equal_func"
11278 int_to_string(from[0]->get_property()+2*from[1]->get_property())+", \"" + get_node_name() +
11285 ////////////////////////////////////////////////////////////////
11286 //// SGAHCWCB functor
11290 string sgahcwcb_qpn::generate_functor_name(){
11291 return("sgahcwcb_functor_" + normalize_name(this->get_node_name()));
11295 string sgahcwcb_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
11299 // Initialize generate utility globals
11300 segen_gb_tbl = &(gb_tbl);
11303 //--------------------------------
11304 // group definition class
11305 string ret = "class " + generate_functor_name() + "_groupdef{\n";
11306 ret += "public:\n";
11307 ret += "\tbool valid;\n";
11308 for(g=0;g<this->gb_tbl.size();g++){
11309 sprintf(tmpstr,"gb_var%d",g);
11310 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11313 ret += "\t"+generate_functor_name() + "_groupdef(){valid=true;};\n";
11314 ret += "\t"+generate_functor_name() + "_groupdef("+
11315 this->generate_functor_name() + "_groupdef *gd){\n";
11316 for(g=0;g<gb_tbl.size();g++){
11317 data_type *gdt = gb_tbl.get_data_type(g);
11318 if(gdt->is_buffer_type()){
11319 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
11320 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
11323 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
11327 ret += "\tvalid=true;\n";
11330 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
11331 for(g=0;g<gb_tbl.size();g++){
11332 data_type *gdt = gb_tbl.get_data_type(g);
11333 if(gdt->is_buffer_type()){
11334 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
11335 gdt->get_hfta_buffer_destroy().c_str(), g );
11342 //--------------------------------
11343 // aggr definition class
11344 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
11345 ret += "public:\n";
11346 for(a=0;a<aggr_tbl.size();a++){
11347 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11348 sprintf(tmpstr,"aggr_var%d",a);
11349 if(aggr_tbl.is_builtin(a))
11350 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11352 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11355 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
11357 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
11358 for(a=0;a<aggr_tbl.size();a++){
11359 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11360 if(aggr_tbl.is_builtin(a)){
11361 data_type *adt = aggr_tbl.get_data_type(a);
11362 if(adt->is_buffer_type()){
11363 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11364 adt->get_hfta_buffer_destroy().c_str(), a );
11368 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11369 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11370 ret+="(aggr_var"+int_to_string(a)+"));\n";
11376 //--------------------------------
11377 // superaggr definition class
11378 ret += "class " + this->generate_functor_name() + "_statedef{\n";
11379 ret += "public:\n";
11380 for(a=0;a<aggr_tbl.size();a++){
11381 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11382 if(ate->is_superaggr()){
11383 sprintf(tmpstr,"aggr_var%d",a);
11384 if(aggr_tbl.is_builtin(a))
11385 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
11387 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
11390 set<string>::iterator ssi;
11391 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11392 string state_nm = (*ssi);
11393 int state_id = Ext_fcns->lookup_state(state_nm);
11394 data_type *dt = Ext_fcns->get_storage_dt(state_id);
11395 string state_var = "state_var_"+state_nm;
11396 ret += "\t"+dt->make_host_cvar(state_var)+";\n";
11399 ret += "\t"+this->generate_functor_name() + "_statedef(){};\n";
11401 ret += "\t~"+this->generate_functor_name() + "_statedef(){\n";
11402 for(a=0;a<aggr_tbl.size();a++){
11403 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
11404 if(ate->is_superaggr()){
11405 if(aggr_tbl.is_builtin(a)){
11406 data_type *adt = aggr_tbl.get_data_type(a);
11407 if(adt->is_buffer_type()){
11408 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
11409 adt->get_hfta_buffer_destroy().c_str(), a );
11413 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
11414 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
11415 ret+="(aggr_var"+int_to_string(a)+"));\n";
11419 for(ssi=states_refd.begin(); ssi!=states_refd.end(); ++ssi){
11420 string state_nm = (*ssi);
11421 int state_id = Ext_fcns->lookup_state(state_nm);
11422 string state_var = "state_var_"+state_nm;
11423 ret += "\t_sfun_state_destroy_"+state_nm+"(&"+state_var+");\n";
11430 //--------------------------------
11431 // gb functor class
11432 ret += "class " + this->generate_functor_name() + "{\n";
11434 // Find variables referenced in this query node.
11436 col_id_set cid_set;
11437 col_id_set::iterator csi;
11439 for(w=0;w<where.size();++w)
11440 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
11441 for(w=0;w<having.size();++w)
11442 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
11443 for(w=0;w<cleanby.size();++w)
11444 gather_pr_col_ids(cleanby[w]->pr,cid_set,segen_gb_tbl);
11445 for(w=0;w<cleanwhen.size();++w)
11446 gather_pr_col_ids(cleanwhen[w]->pr,cid_set,segen_gb_tbl);
11447 for(g=0;g<gb_tbl.size();g++)
11448 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
11450 for(s=0;s<select_list.size();s++){
11451 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
11455 // Private variables : store the state of the functor.
11456 // 1) variables for unpacked attributes
11457 // 2) offsets of the upacked attributes
11458 // 3) storage of partial functions
11459 // 4) storage of complex literals (i.e., require a constructor)
11461 ret += "private:\n";
11463 // var to save the schema handle
11464 ret += "\tint schema_handle0;\n";
11466 // generate the declaration of all the variables related to
11467 // temp tuples generation
11468 ret += gen_decl_temp_vars();
11470 // unpacked attribute storage, offsets
11471 ret += "//\t\tstorage and offsets of accessed fields.\n";
11472 ret += generate_access_vars(cid_set, schema);
11473 // tuple metadata offset
11474 ret += "\ttuple_metadata_offset0;\n";
11476 // Variables to store results of partial functions.
11477 // WARNING find_partial_functions modifies the SE
11478 // (it marks the partial function id).
11479 ret += "//\t\tParital function result storage\n";
11480 vector<scalarexp_t *> partial_fcns;
11481 vector<int> fcn_ref_cnt;
11482 vector<bool> is_partial_fcn;
11483 for(s=0;s<select_list.size();s++){
11484 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
11486 for(w=0;w<where.size();w++){
11487 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11489 for(w=0;w<having.size();w++){
11490 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11492 for(w=0;w<cleanby.size();w++){
11493 find_partial_fcns_pr(cleanby[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11495 for(w=0;w<cleanwhen.size();w++){
11496 find_partial_fcns_pr(cleanwhen[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
11498 for(g=0;g<gb_tbl.size();g++){
11499 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
11501 for(a=0;a<aggr_tbl.size();a++){
11502 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
11504 if(partial_fcns.size()>0){
11505 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
11506 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
11509 // Complex literals (i.e., they need constructors)
11510 ret += "//\t\tComplex literal storage.\n";
11511 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
11512 ret += generate_complex_lit_vars(complex_literals);
11514 // Pass-by-handle parameters
11515 ret += "//\t\tPass-by-handle storage.\n";
11516 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
11517 ret += generate_pass_by_handle_vars(param_handle_table);
11519 // Create cached temporaries for UDAF return values.
11520 ret += "//\t\tTemporaries for UDAF return values.\n";
11521 for(a=0;a<aggr_tbl.size();a++){
11522 if(! aggr_tbl.is_builtin(a)){
11523 int afcn_id = aggr_tbl.get_fcn_id(a);
11524 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11525 sprintf(tmpstr,"udaf_ret_%d", a);
11526 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11532 // variables to hold parameters.
11533 ret += "//\tfor query parameters\n";
11534 ret += generate_param_vars(param_tbl);
11536 // Is there a temporal flush? If so create flush temporaries,
11537 // create flush indicator.
11538 bool uses_temporal_flush = false;
11539 for(g=0;g<gb_tbl.size();g++){
11540 data_type *gdt = gb_tbl.get_data_type(g);
11541 if(gdt->is_temporal())
11542 uses_temporal_flush = true;
11545 if(uses_temporal_flush){
11546 ret += "//\t\tFor temporal flush\n";
11547 for(g=0;g<gb_tbl.size();g++){
11548 data_type *gdt = gb_tbl.get_data_type(g);
11549 if(gdt->is_temporal()){
11550 sprintf(tmpstr,"last_gb%d",g);
11551 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11552 sprintf(tmpstr,"last_flushed_gb%d",g);
11553 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
11556 ret += "\tbool needs_temporal_flush;\n";
11559 // The publicly exposed functions
11561 ret += "\npublic:\n";
11564 //-------------------
11565 // The functor constructor
11566 // pass in the schema handle.
11567 // 1) make assignments to the unpack offset variables
11568 // 2) initialize the complex literals
11570 ret += "//\t\tFunctor constructor.\n";
11571 ret += this->generate_functor_name()+"(int schema_handle0){\n";
11573 // save the schema handle
11574 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
11575 // tuple metadata offset
11576 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
11579 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
11580 ret += gen_access_var_init(cid_set);
11582 // aggregate return vals : refd in both final_sample
11583 // and create_output_tuple
11584 // Create cached temporaries for UDAF return values.
11585 for(a=0;a<aggr_tbl.size();a++){
11586 if(! aggr_tbl.is_builtin(a)){
11587 int afcn_id = aggr_tbl.get_fcn_id(a);
11588 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
11589 sprintf(tmpstr,"udaf_ret_%d", a);
11590 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
11594 // complex literals
11595 ret += "//\t\tInitialize complex literals.\n";
11596 ret += gen_complex_lit_init(complex_literals);
11598 // Initialize partial function results so they can be safely GC'd
11599 ret += gen_partial_fcn_init(partial_fcns);
11601 // Initialize non-query-parameter parameter handles
11602 ret += gen_pass_by_handle_init(param_handle_table);
11604 // temporal flush variables
11605 // ASSUME that structured values won't be temporal.
11606 if(uses_temporal_flush){
11607 ret += "//\t\tInitialize temporal flush variables.\n";
11608 for(g=0;g<gb_tbl.size();g++){
11609 data_type *gdt = gb_tbl.get_data_type(g);
11610 if(gdt->is_temporal()){
11611 literal_t gl(gdt->type_indicator());
11612 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
11613 ret.append(tmpstr);
11616 ret += "\tneeds_temporal_flush = false;\n";
11619 // Init temporal attributes referenced in select list
11620 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
11625 //-------------------
11626 // Functor destructor
11627 ret += "//\t\tFunctor destructor.\n";
11628 ret += "~"+this->generate_functor_name()+"(){\n";
11630 // clean up buffer type complex literals
11631 ret += gen_complex_lit_dtr(complex_literals);
11633 // Deregister the pass-by-handle parameters
11634 ret += "/* register and de-register the pass-by-handle parameters */\n";
11635 ret += gen_pass_by_handle_dtr(param_handle_table);
11637 // clean up partial function results.
11638 ret += "/* clean up partial function storage */\n";
11639 ret += gen_partial_fcn_dtr(partial_fcns);
11641 // Destroy the parameters, if any need to be destroyed
11642 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11647 //-------------------
11648 // Parameter manipulation routines
11649 ret += generate_load_param_block(this->generate_functor_name(),
11650 this->param_tbl,param_handle_table);
11651 ret += generate_delete_param_block(this->generate_functor_name(),
11652 this->param_tbl,param_handle_table);
11654 //-------------------
11655 // Register new parameter block
11657 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
11658 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
11659 ret += "\treturn this->load_params_"+this->generate_functor_name()+
11663 //-------------------
11664 // the create_group method.
11665 // This method creates a group in a buffer passed in
11666 // (to allow for creation on the stack).
11667 // There are also a couple of side effects:
11668 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11669 // 2) determine if a temporal flush is required.
11671 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
11672 // Variables for execution of the function.
11673 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11675 if(partial_fcns.size()>0){ // partial fcn access failure
11676 ret += "\tgs_retval_t retval = 0;\n";
11680 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
11681 "_groupdef *) buffer;\n";
11683 // Start by cleaning up partial function results
11684 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11686 set<int> gb_pfcns; // partial fcns in gbdefs, aggr se's
11687 for(g=0;g<gb_tbl.size();g++){
11688 collect_partial_fcns(gb_tbl.get_def(g), gb_pfcns);
11690 ret += gen_partial_fcn_dtr(partial_fcns,gb_pfcns);
11691 // ret += gen_partial_fcn_dtr(partial_fcns);
11694 ret += gen_temp_tuple_check(this->node_name, 0);
11695 col_id_set found_cids; // colrefs unpacked thus far.
11696 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
11700 // Save temporal group-by variables
11703 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
11705 for(g=0;g<gb_tbl.size();g++){
11707 data_type *gdt = gb_tbl.get_data_type(g);
11709 if(gdt->is_temporal()){
11710 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11711 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11712 ret.append(tmpstr);
11719 // Compare the temporal GB vars with the stored ones,
11720 // set flush indicator and update stored GB vars if there is any change.
11722 if(uses_temporal_flush){
11723 ret+= "\tif( !( (";
11724 bool first_one = true;
11725 for(g=0;g<gb_tbl.size();g++){
11726 data_type *gdt = gb_tbl.get_data_type(g);
11728 if(gdt->is_temporal()){
11729 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
11730 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
11731 if(first_one){first_one = false;} else {ret += ") && (";}
11732 ret += generate_equality_test(lhs_op, rhs_op, gdt);
11736 for(g=0;g<gb_tbl.size();g++){
11737 data_type *gdt = gb_tbl.get_data_type(g);
11738 if(gdt->is_temporal()){
11739 if(gdt->is_buffer_type()){
11740 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
11742 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
11744 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
11750 if(uses_temporal_flush){
11751 for(g=0;g<gb_tbl.size();g++){
11752 data_type *gdt = gb_tbl.get_data_type(g);
11753 if(gdt->is_temporal()){
11754 ret+="if(last_flushed_gb"+int_to_string(g)+">0)\n";
11760 ret += "\t\tneeds_temporal_flush=true;\n";
11761 ret += "\t\t}else{\n"
11762 "\t\t\tneeds_temporal_flush=false;\n"
11767 // For temporal status tuple we don't need to do anything else
11768 ret += "\tif (temp_tuple_received) return NULL;\n\n";
11771 // The partial functions ref'd in the group-by var
11772 // definitions must be evaluated. If one returns false,
11773 // then implicitly the predicate is false.
11774 set<int>::iterator pfsi;
11776 if(gb_pfcns.size() > 0)
11777 ret += "//\t\tUnpack partial fcns.\n";
11778 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, gb_pfcns,
11779 found_cids, segen_gb_tbl, "NULL", needs_xform);
11781 // Unpack the group-by variables
11783 for(g=0;g<gb_tbl.size();g++){
11784 // Find the new fields ref'd by this GBvar def.
11785 col_id_set new_cids;
11786 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
11787 // Unpack these values.
11788 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
11790 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11791 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11793 // There seems to be no difference between the two
11794 // branches of the IF statement.
11795 data_type *gdt = gb_tbl.get_data_type(g);
11796 if(gdt->is_buffer_type()){
11797 // Create temporary copy.
11798 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11799 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
11801 scalarexp_t *gse = gb_tbl.get_def(g);
11802 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
11803 g,generate_se_code(gse,schema).c_str());
11806 ret.append(tmpstr);
11811 ret+= "\treturn gbval;\n";
11816 //-------------------
11817 // the create_group method.
11818 // This method creates a group in a buffer passed in
11819 // (to allow for creation on the stack).
11820 // There are also a couple of side effects:
11821 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
11822 // 2) determine if a temporal flush is required.
11824 ret += "bool evaluate_predicate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, int cd){\n";
11825 // Variables for execution of the function.
11826 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11828 if(partial_fcns.size()>0){ // partial fcn access failure
11829 ret += "\tgs_retval_t retval = 0;\n";
11833 // Start by cleaning up partial function results
11834 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
11835 set<int> w_pfcns; // partial fcns in where clause
11836 for(w=0;w<where.size();++w)
11837 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
11839 set<int> ag_pfcns; // partial fcns in gbdefs, aggr se's
11840 for(a=0;a<aggr_tbl.size();a++){
11841 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_pfcns);
11843 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
11844 ret += gen_partial_fcn_dtr(partial_fcns,ag_pfcns);
11846 ret+="//\t\tEvaluate clauses which don't reference stateful fcns first \n";
11847 for(w=0;w<where.size();++w){
11848 if(! pred_refs_sfun(where[w]->pr)){
11849 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11851 // Find the set of variables accessed in this CNF elem,
11852 // but in no previous element.
11853 col_id_set new_cids;
11854 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11856 // Unpack these values.
11857 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11858 // Find partial fcns ref'd in this cnf element
11859 set<int> pfcn_refs;
11860 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11861 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11863 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11864 +") ) return(false);\n";
11869 // The partial functions ref'd in the and aggregate
11870 // definitions must also be evaluated. If one returns false,
11871 // then implicitly the predicate is false.
11872 // ASSUME that aggregates cannot reference stateful fcns.
11874 if(ag_pfcns.size() > 0)
11875 ret += "//\t\tUnpack remaining partial fcns.\n";
11876 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_pfcns,
11877 found_cids, segen_gb_tbl, "false", needs_xform);
11879 ret+="//\t\tEvaluate all remaining where clauses.\n";
11880 ret+="\tbool retval = true;\n";
11881 for(w=0;w<where.size();++w){
11882 if( pred_refs_sfun(where[w]->pr)){
11883 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
11885 // Find the set of variables accessed in this CNF elem,
11886 // but in no previous element.
11887 col_id_set new_cids;
11888 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
11890 // Unpack these values.
11891 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
11892 // Find partial fcns ref'd in this cnf element
11893 set<int> pfcn_refs;
11894 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
11895 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
11897 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
11898 +") ) retval = false;\n";
11902 ret+="// Unpack all remaining attributes\n";
11903 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "false", needs_xform);
11905 ret += "\n\treturn retval;\n";
11908 //--------------------------------------------------------
11909 // Create and initialize an aggregate object
11911 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, gs_sp_t a,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11912 // Variables for execution of the function.
11913 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11916 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+ "_aggrdef *)a;\n";
11918 for(a=0;a<aggr_tbl.size();a++){
11919 if(aggr_tbl.is_builtin(a)){
11920 // Create temporaries for buffer return values
11921 data_type *adt = aggr_tbl.get_data_type(a);
11922 if(adt->is_buffer_type()){
11923 sprintf(tmpstr,"aggr_tmp_%d", a);
11924 ret+=adt->make_host_cvar(tmpstr)+";\n";
11929 for(a=0;a<aggr_tbl.size();a++){
11930 sprintf(tmpstr,"aggval->aggr_var%d",a);
11931 string assignto_var = tmpstr;
11932 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11935 ret += "\treturn aggval;\n";
11939 //--------------------------------------------------------
11940 // initialize an aggregate object inplace
11942 ret += "void create_aggregate(host_tuple &tup0, "+this->generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
11943 // Variables for execution of the function.
11944 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11948 for(a=0;a<aggr_tbl.size();a++){
11949 if(aggr_tbl.is_builtin(a)){
11950 // Create temporaries for buffer return values
11951 data_type *adt = aggr_tbl.get_data_type(a);
11952 if(adt->is_buffer_type()){
11953 sprintf(tmpstr,"aggr_tmp_%d", a);
11954 ret+=adt->make_host_cvar(tmpstr)+";\n";
11959 for(a=0;a<aggr_tbl.size();a++){
11960 sprintf(tmpstr,"aggval->aggr_var%d",a);
11961 string assignto_var = tmpstr;
11962 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11968 //--------------------------------------------------------
11969 // Create and clean-initialize an state object
11971 ret += "void initialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval){\n";
11972 // Variables for execution of the function.
11973 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
11976 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
11978 for(a=0;a<aggr_tbl.size();a++){
11979 if( aggr_tbl.is_superaggr(a)){
11980 if(aggr_tbl.is_builtin(a)){
11981 // Create temporaries for buffer return values
11982 data_type *adt = aggr_tbl.get_data_type(a);
11983 if(adt->is_buffer_type()){
11984 sprintf(tmpstr,"aggr_tmp_%d", a);
11985 ret+=adt->make_host_cvar(tmpstr)+";\n";
11991 for(a=0;a<aggr_tbl.size();a++){
11992 if( aggr_tbl.is_superaggr(a)){
11993 sprintf(tmpstr,"stval->aggr_var%d",a);
11994 string assignto_var = tmpstr;
11995 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
11999 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12000 string state_nm = (*ssi);
12001 ret += "_sfun_state_clean_init_"+state_nm+"(&(stval->state_var_"+state_nm+"));\n";
12007 //--------------------------------------------------------
12008 // Create and dirty-initialize an state object
12010 ret += "void reinitialize_state(host_tuple &tup0, "+generate_functor_name()+"_groupdef *gbval, "+generate_functor_name()+"_statedef *stval, "+generate_functor_name()+"_statedef *old_stval, int cd){\n";
12011 // Variables for execution of the function.
12012 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12015 // ret += "\t"+generate_functor_name()+"_statedef *stval = ("+generate_functor_name()+ "_statedef *)s;\n";
12017 for(a=0;a<aggr_tbl.size();a++){
12018 if( aggr_tbl.is_superaggr(a)){
12019 if(aggr_tbl.is_builtin(a)){
12020 // Create temporaries for buffer return values
12021 data_type *adt = aggr_tbl.get_data_type(a);
12022 if(adt->is_buffer_type()){
12023 sprintf(tmpstr,"aggr_tmp_%d", a);
12024 ret+=adt->make_host_cvar(tmpstr)+";\n";
12030 // initialize superaggregates
12031 for(a=0;a<aggr_tbl.size();a++){
12032 if( aggr_tbl.is_superaggr(a)){
12033 sprintf(tmpstr,"stval->aggr_var%d",a);
12034 string assignto_var = tmpstr;
12035 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
12039 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12040 string state_nm = (*ssi);
12041 ret += "_sfun_state_dirty_init_"+state_nm+"(&(stval->state_var_"+state_nm+"),&(old_stval->state_var_"+state_nm+"), cd );\n";
12046 //--------------------------------------------------------
12047 // Finalize_state : call the finalize fcn on all states
12050 ret += "void finalize_state( "+generate_functor_name()+"_statedef *stval, int cd){\n";
12052 for(ssi=states_refd.begin(); ssi!=states_refd.end();++ssi){
12053 string state_nm = (*ssi);
12054 ret += "_sfun_state_final_init_"+state_nm+"(&(stval->state_var_"+state_nm+"), cd);\n";
12062 //--------------------------------------------------------
12063 // update (plus) a superaggregate object
12065 ret += "void update_plus_superaggr(host_tuple &tup0, " +
12066 generate_functor_name()+"_groupdef *gbval, "+
12067 generate_functor_name()+"_statedef *stval){\n";
12068 // Variables for execution of the function.
12069 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12071 // use of temporaries depends on the aggregate,
12072 // generate them in generate_aggr_update
12075 for(a=0;a<aggr_tbl.size();a++){
12076 if(aggr_tbl.is_superaggr(a)){
12077 sprintf(tmpstr,"stval->aggr_var%d",a);
12078 string varname = tmpstr;
12079 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12083 ret += "\treturn;\n";
12088 //--------------------------------------------------------
12089 // update (minus) a superaggregate object
12091 ret += "void update_minus_superaggr( "+
12092 generate_functor_name()+"_groupdef *gbval, "+
12093 generate_functor_name()+"_aggrdef *aggval,"+
12094 generate_functor_name()+"_statedef *stval"+
12096 // Variables for execution of the function.
12097 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12099 // use of temporaries depends on the aggregate,
12100 // generate them in generate_aggr_update
12103 for(a=0;a<aggr_tbl.size();a++){
12104 if(aggr_tbl.is_superaggr(a)){
12105 sprintf(tmpstr,"stval->aggr_var%d",a);
12106 string super_varname = tmpstr;
12107 sprintf(tmpstr,"aggval->aggr_var%d",a);
12108 string sub_varname = tmpstr;
12109 ret.append(generate_superaggr_minus(sub_varname, super_varname,&aggr_tbl,a, schema));
12113 ret += "\treturn;\n";
12117 //--------------------------------------------------------
12118 // update an aggregate object
12120 ret += "void update_aggregate(host_tuple &tup0, "
12121 +generate_functor_name()+"_groupdef *gbval, "+
12122 generate_functor_name()+"_aggrdef *aggval,"+generate_functor_name()+"_statedef *stval, int cd){\n";
12123 // Variables for execution of the function.
12124 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12126 // use of temporaries depends on the aggregate,
12127 // generate them in generate_aggr_update
12130 for(a=0;a<aggr_tbl.size();a++){
12131 sprintf(tmpstr,"aggval->aggr_var%d",a);
12132 string varname = tmpstr;
12133 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
12136 ret += "\treturn;\n";
12139 //---------------------------------------------------
12142 ret += "\tbool flush_needed(){\n";
12143 if(uses_temporal_flush){
12144 ret += "\t\treturn needs_temporal_flush;\n";
12146 ret += "\t\treturn false;\n";
12151 //------------------------------------------------------
12152 // THe cleaning_when predicate
12154 string gbvar = "gbval->gb_var";
12155 string aggvar = "aggval->";
12157 ret += "bool need_to_clean( "
12158 +generate_functor_name()+"_groupdef *gbval, "+
12159 generate_functor_name()+"_statedef *stval, int cd"+
12162 if(cleanwhen.size()>0)
12163 ret += "\tbool predval = true;\n";
12165 ret += "\tbool predval = false;\n";
12167 // Find the udafs ref'd in the having clause
12169 for(w=0;w<cleanwhen.size();++w)
12170 collect_aggr_refs_pr(cleanwhen[w]->pr, cw_aggs);
12173 // get the return values from the UDAFS
12174 for(a=0;a<aggr_tbl.size();a++){
12175 if(! aggr_tbl.is_builtin(a) && cw_aggs.count(a)){
12176 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12177 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12178 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12183 // Start by cleaning up partial function results
12184 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12185 set<int> cw_pfcns; // partial fcns in where clause
12186 for(w=0;w<cleanwhen.size();++w)
12187 collect_partial_fcns_pr(cleanwhen[w]->pr, cw_pfcns);
12189 ret += gen_partial_fcn_dtr(partial_fcns,cw_pfcns);
12192 for(w=0;w<cleanwhen.size();++w){
12193 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12195 // Find partial fcns ref'd in this cnf element
12196 set<int> pfcn_refs;
12197 collect_partial_fcns_pr(cleanwhen[w]->pr, pfcn_refs);
12198 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12199 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12200 ret += "\tif(retval){ return false;}\n";
12202 // ret += unpack_partial_fcn_fm_aggr(schema, partial_fcns, pfcn_refs,"false");
12204 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanwhen[w]->pr,gbvar, aggvar, schema)+
12205 ") ) predval = false;\n";
12208 ret += "\treturn predval;\n";
12211 //------------------------------------------------------
12212 // THe cleaning_by predicate
12214 ret += "bool sample_group("
12215 +generate_functor_name()+"_groupdef *gbval, "+
12216 generate_functor_name()+"_aggrdef *aggval,"+
12217 generate_functor_name()+"_statedef *stval, int cd"+
12220 if(cleanby.size()>0)
12221 ret += "\tbool retval = true;\n";
12223 ret += "\tbool retval = false;\n";
12225 // Find the udafs ref'd in the having clause
12227 for(w=0;w<cleanby.size();++w)
12228 collect_aggr_refs_pr(cleanby[w]->pr, cb_aggs);
12231 // get the return values from the UDAFS
12232 for(a=0;a<aggr_tbl.size();a++){
12233 if(! aggr_tbl.is_builtin(a) && cb_aggs.count(a)){
12234 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12235 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12236 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12241 // Start by cleaning up partial function results
12242 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12243 set<int> cb_pfcns; // partial fcns in where clause
12244 for(w=0;w<cleanby.size();++w)
12245 collect_partial_fcns_pr(cleanby[w]->pr, cb_pfcns);
12247 ret += gen_partial_fcn_dtr(partial_fcns,cb_pfcns);
12250 for(w=0;w<cleanwhen.size();++w){
12251 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
12255 // Find the set of variables accessed in this CNF elem,
12256 // but in no previous element.
12257 col_id_set new_cids;
12258 get_new_pred_cids(cleanby[w]->pr, found_cids, new_cids, segen_gb_tbl);
12260 // Unpack these values.
12261 ret += gen_unpack_cids(schema, new_cids, "false", needs_xform);
12264 // Find partial fcns ref'd in this cnf element
12265 set<int> pfcn_refs;
12266 collect_partial_fcns_pr(cleanby[w]->pr, pfcn_refs);
12267 for(pfsi=pfcn_refs.begin();pfsi!=pfcn_refs.end();++pfsi){
12268 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12269 ret += "\tif(retval){ return false;}\n";
12271 // ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"false");
12273 ret += "\tif( !("+generate_predicate_code_fm_aggr(cleanby[w]->pr,gbvar, aggvar, schema)+
12274 +") ) retval = false;\n";
12277 ret += "\treturn retval;\n";
12281 //-----------------------------------------------------
12283 ret += "bool final_sample_group("
12284 +generate_functor_name()+"_groupdef *gbval, "+
12285 generate_functor_name()+"_aggrdef *aggval,"+
12286 generate_functor_name()+"_statedef *stval,"+
12289 ret += "\tgs_retval_t retval = 0;\n";
12291 // Find the udafs ref'd in the having clause
12293 for(w=0;w<having.size();++w)
12294 collect_aggr_refs_pr(having[w]->pr, hv_aggs);
12297 // get the return values from the UDAFS
12298 for(a=0;a<aggr_tbl.size();a++){
12299 if(! aggr_tbl.is_builtin(a) && hv_aggs.count(a)){
12300 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12301 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12302 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12307 set<int> hv_sl_pfcns;
12308 for(w=0;w<having.size();w++){
12309 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
12312 // clean up the partial fcn results from any previous execution
12313 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
12316 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
12317 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12318 ret += "\tif(retval){ return false;}\n";
12321 // Evalaute the HAVING clause
12322 // TODO: this seems to have a ++ operator rather than a + operator.
12323 for(w=0;w<having.size();++w){
12324 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
12327 ret += "\treturn true;\n";
12330 //---------------------------------------------------
12331 // create output tuple
12332 // Unpack the partial functions ref'd in the where clause,
12333 // select clause. Evaluate the where clause.
12334 // Finally, pack the tuple.
12336 // I need to use special code generation here,
12337 // so I'll leave it in longhand.
12339 ret += "host_tuple create_output_tuple("
12340 +generate_functor_name()+"_groupdef *gbval, "+
12341 generate_functor_name()+"_aggrdef *aggval,"+
12342 generate_functor_name()+"_statedef *stval,"+
12343 "int cd, bool &failed){\n";
12345 ret += "\thost_tuple tup;\n";
12346 ret += "\tfailed = false;\n";
12347 ret += "\tgs_retval_t retval = 0;\n";
12350 // Find the udafs ref'd in the select clause
12352 for(s=0;s<select_list.size();s++)
12353 collect_agg_refs(select_list[s]->se, sl_aggs);
12356 // get the return values from the UDAFS
12357 for(a=0;a<aggr_tbl.size();a++){
12358 if(! aggr_tbl.is_builtin(a) && sl_aggs.count(a)){
12359 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
12360 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12361 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
12366 // I can't cache partial fcn results from the having
12367 // clause because evaluation is separated.
12369 for(s=0;s<select_list.size();s++){
12370 collect_partial_fcns(select_list[s]->se, sl_pfcns);
12373 for(pfsi=sl_pfcns.begin();pfsi!=sl_pfcns.end();++pfsi){
12374 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
12375 ret += "\tif(retval){ failed=true; return tup;}\n";
12379 // Now, compute the size of the tuple.
12381 // Unpack any BUFFER type selections into temporaries
12382 // so that I can compute their size and not have
12383 // to recompute their value during tuple packing.
12384 // I can use regular assignment here because
12385 // these temporaries are non-persistent.
12386 // TODO: should I be using the selvar generation routine?
12388 ret += "//\t\tCompute the size of the tuple.\n";
12389 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
12390 for(s=0;s<select_list.size();s++){
12391 scalarexp_t *se = select_list[s]->se;
12392 data_type *sdt = se->get_data_type();
12393 if(sdt->is_buffer_type() &&
12394 !( (se->get_operator_type() == SE_COLREF) ||
12395 (se->get_operator_type() == SE_AGGR_STAR) ||
12396 (se->get_operator_type() == SE_AGGR_SE) ||
12397 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12398 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12400 sprintf(tmpstr,"selvar_%d",s);
12401 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
12402 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
12406 // The size of the tuple is the size of the tuple struct plus the
12407 // size of the buffers to be copied in.
12409 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
12410 for(s=0;s<select_list.size();s++){
12411 // if(s>0) ret += "+";
12412 scalarexp_t *se = select_list[s]->se;
12413 data_type *sdt = select_list[s]->se->get_data_type();
12414 if(sdt->is_buffer_type()){
12415 if(!( (se->get_operator_type() == SE_COLREF) ||
12416 (se->get_operator_type() == SE_AGGR_STAR) ||
12417 (se->get_operator_type() == SE_AGGR_SE) ||
12418 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12419 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12421 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
12422 ret.append(tmpstr);
12424 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12425 ret.append(tmpstr);
12431 // Allocate tuple data block.
12432 ret += "//\t\tCreate the tuple block.\n";
12433 ret += "\ttup.data = malloc(tup.tuple_size);\n";
12434 ret += "\ttup.heap_resident = true;\n";
12436 // Mark tuple as regular
12437 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
12439 // ret += "\ttup.channel = 0;\n";
12440 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
12441 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
12444 // (Here, offsets are hard-wired. is this a problem?)
12446 ret += "//\t\tPack the fields into the tuple.\n";
12447 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
12448 for(s=0;s<select_list.size();s++){
12449 scalarexp_t *se = select_list[s]->se;
12450 data_type *sdt = se->get_data_type();
12451 if(sdt->is_buffer_type()){
12452 if(!( (se->get_operator_type() == SE_COLREF) ||
12453 (se->get_operator_type() == SE_AGGR_STAR) ||
12454 (se->get_operator_type() == SE_AGGR_SE) ||
12455 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
12456 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
12458 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
12459 ret.append(tmpstr);
12460 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
12461 ret.append(tmpstr);
12463 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12464 ret.append(tmpstr);
12465 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
12466 ret.append(tmpstr);
12469 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12470 ret.append(tmpstr);
12471 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
12476 // Destroy string temporaries
12477 ret += gen_buffer_selvars_dtr(select_list);
12478 // Destroy string return vals of UDAFs
12479 for(a=0;a<aggr_tbl.size();a++){
12480 if(! aggr_tbl.is_builtin(a)){
12481 int afcn_id = aggr_tbl.get_fcn_id(a);
12482 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12483 if(adt->is_buffer_type()){
12484 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
12485 adt->get_hfta_buffer_destroy().c_str(), a );
12492 ret += "\treturn tup;\n";
12496 //-------------------------------------------------------------------
12497 // Temporal update functions
12499 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
12501 // create a temp status tuple
12502 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
12504 ret += gen_init_temp_status_tuple(this->get_node_name());
12507 // (Here, offsets are hard-wired. is this a problem?)
12509 ret += "//\t\tPack the fields into the tuple.\n";
12510 for(s=0;s<select_list.size();s++){
12511 data_type *sdt = select_list[s]->se->get_data_type();
12512 if(sdt->is_temporal()){
12513 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
12515 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
12521 ret += "\treturn 0;\n";
12522 ret += "};};\n\n\n";
12525 //----------------------------------------------------------
12526 // The hash function
12528 ret += "struct "+generate_functor_name()+"_hash_func{\n";
12529 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12530 "_groupdef *grp) const{\n";
12531 ret += "\t\treturn(";
12532 for(g=0;g<gb_tbl.size();g++){
12533 if(g>0) ret += "^";
12534 data_type *gdt = gb_tbl.get_data_type(g);
12535 if(gdt->use_hashfunc()){
12536 if(gdt->is_buffer_type())
12537 sprintf(tmpstr,"(%s*%s(&)grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12539 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12541 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12545 ret += ") >> 32);\n";
12549 //----------------------------------------------------------
12550 // The superhash function
12552 ret += "struct "+generate_functor_name()+"_superhash_func{\n";
12553 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
12554 "_groupdef *grp) const{\n";
12555 ret += "\t\treturn(0";
12557 for(g=0;g<gb_tbl.size();g++){
12558 if(sg_tbl.count(g)>0){
12560 data_type *gdt = gb_tbl.get_data_type(g);
12561 if(gdt->use_hashfunc()){
12562 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
12564 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
12569 ret += ") >> 32);\n";
12574 //----------------------------------------------------------
12575 // The comparison function
12577 ret += "struct "+generate_functor_name()+"_equal_func{\n";
12578 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12579 generate_functor_name()+"_groupdef *grp2) const{\n";
12580 ret += "\t\treturn( (";
12581 for(g=0;g<gb_tbl.size();g++){
12582 if(g>0) ret += ") && (";
12583 data_type *gdt = gb_tbl.get_data_type(g);
12584 if(gdt->complex_comparison(gdt)){
12585 if(gdt->is_buffer_type())
12586 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12587 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12589 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12590 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12592 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12601 //----------------------------------------------------------
12602 // The superhashcomparison function
12604 ret += "struct "+generate_functor_name()+"_superequal_func{\n";
12605 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
12606 generate_functor_name()+"_groupdef *grp2) const{\n";
12607 ret += "\t\treturn( (";
12609 bool first_elem = true;
12610 for(g=0;g<gb_tbl.size();g++){
12611 if(sg_tbl.count(g)){
12612 if(first_elem) first_elem=false; else ret += ") && (";
12613 data_type *gdt = gb_tbl.get_data_type(g);
12614 if(gdt->complex_comparison(gdt)){
12615 if(gdt->is_buffer_type())
12616 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
12617 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12619 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
12620 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
12622 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
12639 string sgahcwcb_qpn::generate_operator(int i, string params){
12642 " clean_operator<" +
12643 generate_functor_name()+",\n\t"+
12644 generate_functor_name() + "_groupdef, \n\t" +
12645 generate_functor_name() + "_aggrdef, \n\t" +
12646 generate_functor_name() + "_statedef, \n\t" +
12647 generate_functor_name()+"_hash_func, \n\t"+
12648 generate_functor_name()+"_equal_func ,\n\t"+
12649 generate_functor_name()+"_superhash_func,\n\t "+
12650 generate_functor_name()+"_superequal_func \n\t"+
12651 "> *op"+int_to_string(i)+" = new clean_operator<"+
12652 generate_functor_name()+",\n\t"+
12653 generate_functor_name() + "_groupdef,\n\t " +
12654 generate_functor_name() + "_aggrdef, \n\t" +
12655 generate_functor_name() + "_statedef, \n\t" +
12656 generate_functor_name()+"_hash_func, \n\t"+
12657 generate_functor_name()+"_equal_func, \n\t"+
12658 generate_functor_name()+"_superhash_func, \n\t"+
12659 generate_functor_name()+"_superequal_func\n\t "
12660 ">("+params+", \"" + get_node_name() + "\");\n"
12664 ////////////////////////////////////////////////////////////////
12669 string rsgah_qpn::generate_functor_name(){
12670 return("rsgah_functor_" + normalize_name(this->get_node_name()));
12674 string rsgah_qpn::generate_functor(table_list *schema, ext_fcn_list *Ext_fcns, vector<bool> &needs_xform){
12678 // Initialize generate utility globals
12679 segen_gb_tbl = &(gb_tbl);
12682 //--------------------------------
12683 // group definition class
12684 string ret = "class " + generate_functor_name() + "_groupdef{\n";
12685 ret += "public:\n";
12686 for(g=0;g<this->gb_tbl.size();g++){
12687 sprintf(tmpstr,"gb_var%d",g);
12688 ret+="\t"+this->gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12691 ret += "\t"+generate_functor_name() + "_groupdef(){};\n";
12692 ret += "\t"+generate_functor_name() + "_groupdef("+
12693 this->generate_functor_name() + "_groupdef *gd){\n";
12694 for(g=0;g<gb_tbl.size();g++){
12695 data_type *gdt = gb_tbl.get_data_type(g);
12696 if(gdt->is_buffer_type()){
12697 sprintf(tmpstr,"\t\t%s(&gb_var%d, &(gd->gb_var%d));\n",
12698 gdt->get_hfta_buffer_assign_copy().c_str(),g,g );
12701 sprintf(tmpstr,"\t\tgb_var%d = gd->gb_var%d;\n",g,g);
12707 ret += "\t~"+ generate_functor_name() + "_groupdef(){\n";
12708 for(g=0;g<gb_tbl.size();g++){
12709 data_type *gdt = gb_tbl.get_data_type(g);
12710 if(gdt->is_buffer_type()){
12711 sprintf(tmpstr,"\t\t%s(&gb_var%d);\n",
12712 gdt->get_hfta_buffer_destroy().c_str(), g );
12719 //--------------------------------
12720 // aggr definition class
12721 ret += "class " + this->generate_functor_name() + "_aggrdef{\n";
12722 ret += "public:\n";
12723 for(a=0;a<aggr_tbl.size();a++){
12724 aggr_table_entry *ate = aggr_tbl.agr_tbl[a];
12725 sprintf(tmpstr,"aggr_var%d",a);
12726 if(aggr_tbl.is_builtin(a))
12727 ret+="\t"+ aggr_tbl.get_data_type(a)->make_host_cvar(tmpstr)+";\n";
12729 ret+="\t"+ aggr_tbl.get_storage_type(a)->make_host_cvar(tmpstr)+";\n";
12732 ret += "\t"+this->generate_functor_name() + "_aggrdef(){};\n";
12734 ret += "\t~"+this->generate_functor_name() + "_aggrdef(){\n";
12735 for(a=0;a<aggr_tbl.size();a++){
12736 if(aggr_tbl.is_builtin(a)){
12737 data_type *adt = aggr_tbl.get_data_type(a);
12738 if(adt->is_buffer_type()){
12739 sprintf(tmpstr,"\t\t%s(&aggr_var%d);\n",
12740 adt->get_hfta_buffer_destroy().c_str(), a );
12744 ret+="\t\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_DESTROY_(";
12745 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
12746 ret+="(aggr_var"+int_to_string(a)+"));\n";
12752 //--------------------------------
12753 // gb functor class
12754 ret += "class " + this->generate_functor_name() + "{\n";
12756 // Find variables referenced in this query node.
12758 col_id_set cid_set;
12759 col_id_set::iterator csi;
12761 for(w=0;w<where.size();++w)
12762 gather_pr_col_ids(where[w]->pr,cid_set,segen_gb_tbl);
12763 for(w=0;w<having.size();++w)
12764 gather_pr_col_ids(having[w]->pr,cid_set,segen_gb_tbl);
12765 for(w=0;w<closing_when.size();++w)
12766 gather_pr_col_ids(closing_when[w]->pr,cid_set,segen_gb_tbl);
12767 for(g=0;g<gb_tbl.size();g++)
12768 gather_se_col_ids(gb_tbl.get_def(g),cid_set,segen_gb_tbl);
12770 for(s=0;s<select_list.size();s++){
12771 gather_se_col_ids(select_list[s]->se,cid_set,segen_gb_tbl); // descends into aggregates
12775 // Private variables : store the state of the functor.
12776 // 1) variables for unpacked attributes
12777 // 2) offsets of the upacked attributes
12778 // 3) storage of partial functions
12779 // 4) storage of complex literals (i.e., require a constructor)
12781 ret += "private:\n";
12783 // var to save the schema handle
12784 ret += "\tint schema_handle0;\n";
12786 // generate the declaration of all the variables related to
12787 // temp tuples generation
12788 ret += gen_decl_temp_vars();
12790 // unpacked attribute storage, offsets
12791 ret += "//\t\tstorage and offsets of accessed fields.\n";
12792 ret += generate_access_vars(cid_set, schema);
12793 // tuple metadata offset
12794 ret += "\tint tuple_metadata_offset0;\n";
12796 // Variables to store results of partial functions.
12797 // WARNING find_partial_functions modifies the SE
12798 // (it marks the partial function id).
12799 ret += "//\t\tParital function result storage\n";
12800 vector<scalarexp_t *> partial_fcns;
12801 vector<int> fcn_ref_cnt;
12802 vector<bool> is_partial_fcn;
12803 for(s=0;s<select_list.size();s++){
12804 find_partial_fcns(select_list[s]->se, &partial_fcns, NULL,NULL, Ext_fcns);
12806 for(w=0;w<where.size();w++){
12807 find_partial_fcns_pr(where[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12809 for(w=0;w<having.size();w++){
12810 find_partial_fcns_pr(having[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12812 for(w=0;w<closing_when.size();w++){
12813 find_partial_fcns_pr(closing_when[w]->pr, &partial_fcns, NULL,NULL, Ext_fcns);
12815 for(g=0;g<gb_tbl.size();g++){
12816 find_partial_fcns(gb_tbl.get_def(g), &partial_fcns, NULL,NULL, Ext_fcns);
12818 for(a=0;a<aggr_tbl.size();a++){
12819 find_partial_fcns(aggr_tbl.get_aggr_se(a), &partial_fcns, NULL,NULL, Ext_fcns);
12821 if(partial_fcns.size()>0){
12822 ret += "/*\t\tVariables for storing results of partial functions. \t*/\n";
12823 ret += generate_partial_fcn_vars(partial_fcns,fcn_ref_cnt,is_partial_fcn,false);
12826 // Create cached temporaries for UDAF return values.
12827 for(a=0;a<aggr_tbl.size();a++){
12828 if(! aggr_tbl.is_builtin(a)){
12829 int afcn_id = aggr_tbl.get_fcn_id(a);
12830 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
12831 sprintf(tmpstr,"udaf_ret_%d", a);
12832 ret+="\t"+adt->make_host_cvar(tmpstr)+";\n";
12837 // Complex literals (i.e., they need constructors)
12838 ret += "//\t\tComplex literal storage.\n";
12839 cplx_lit_table *complex_literals = this->get_cplx_lit_tbl(Ext_fcns);
12840 ret += generate_complex_lit_vars(complex_literals);
12842 // Pass-by-handle parameters
12843 ret += "//\t\tPass-by-handle storage.\n";
12844 vector<handle_param_tbl_entry *> param_handle_table = this->get_handle_param_tbl(Ext_fcns);
12845 ret += generate_pass_by_handle_vars(param_handle_table);
12848 // variables to hold parameters.
12849 ret += "//\tfor query parameters\n";
12850 ret += generate_param_vars(param_tbl);
12852 // Is there a temporal flush? If so create flush temporaries,
12853 // create flush indicator.
12854 bool uses_temporal_flush = false;
12855 for(g=0;g<gb_tbl.size();g++){
12856 data_type *gdt = gb_tbl.get_data_type(g);
12857 if(gdt->is_temporal())
12858 uses_temporal_flush = true;
12861 if(uses_temporal_flush){
12862 ret += "//\t\tFor temporal flush\n";
12863 for(g=0;g<gb_tbl.size();g++){
12864 data_type *gdt = gb_tbl.get_data_type(g);
12865 if(gdt->is_temporal()){
12866 sprintf(tmpstr,"last_gb%d",g);
12867 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12868 sprintf(tmpstr,"last_flushed_gb%d",g);
12869 ret+="\t"+gb_tbl.get_data_type(g)->make_host_cvar(tmpstr)+";\n";
12872 ret += "\tbool needs_temporal_flush;\n";
12875 // The publicly exposed functions
12877 ret += "\npublic:\n";
12880 //-------------------
12881 // The functor constructor
12882 // pass in the schema handle.
12883 // 1) make assignments to the unpack offset variables
12884 // 2) initialize the complex literals
12886 ret += "//\t\tFunctor constructor.\n";
12887 ret += this->generate_functor_name()+"(int schema_handle0){\n";
12889 // save the schema handle
12890 ret += "\t\tthis->schema_handle0 = schema_handle0;\n";
12892 ret += "\ttuple_metadata_offset0 = ftaschema_get_tuple_metadata_offset(schema_handle0);\n";
12895 ret += "//\t\tGet offsets for unpacking fields from input tuple.\n";
12896 ret += gen_access_var_init(cid_set);
12898 // complex literals
12899 ret += "//\t\tInitialize complex literals.\n";
12900 ret += gen_complex_lit_init(complex_literals);
12902 // Initialize partial function results so they can be safely GC'd
12903 ret += gen_partial_fcn_init(partial_fcns);
12905 // Initialize non-query-parameter parameter handles
12906 ret += gen_pass_by_handle_init(param_handle_table);
12908 // temporal flush variables
12909 // ASSUME that structured values won't be temporal.
12910 if(uses_temporal_flush){
12911 ret += "//\t\tInitialize temporal flush variables.\n";
12912 for(g=0;g<gb_tbl.size();g++){
12913 data_type *gdt = gb_tbl.get_data_type(g);
12914 if(gdt->is_temporal()){
12915 literal_t gl(gdt->type_indicator());
12916 sprintf(tmpstr,"\tlast_gb%d = %s;\n",g, gl.to_hfta_C_code("").c_str());
12917 ret.append(tmpstr);
12920 ret += "\tneeds_temporal_flush = false;\n";
12923 // Init temporal attributes referenced in select list
12924 ret += gen_init_temp_vars(schema, select_list, segen_gb_tbl);
12929 //-------------------
12930 // Functor destructor
12931 ret += "//\t\tFunctor destructor.\n";
12932 ret += "~"+this->generate_functor_name()+"(){\n";
12934 // clean up buffer type complex literals
12935 ret += gen_complex_lit_dtr(complex_literals);
12937 // Deregister the pass-by-handle parameters
12938 ret += "/* register and de-register the pass-by-handle parameters */\n";
12939 ret += gen_pass_by_handle_dtr(param_handle_table);
12941 // clean up partial function results.
12942 ret += "/* clean up partial function storage */\n";
12943 ret += gen_partial_fcn_dtr(partial_fcns);
12945 // Destroy the parameters, if any need to be destroyed
12946 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12951 //-------------------
12952 // Parameter manipulation routines
12953 ret += generate_load_param_block(this->generate_functor_name(),
12954 this->param_tbl,param_handle_table);
12955 ret += generate_delete_param_block(this->generate_functor_name(),
12956 this->param_tbl,param_handle_table);
12958 //-------------------
12959 // Register new parameter block
12961 ret += "int set_param_block(gs_int32_t sz, void* value){\n";
12962 ret += "\tthis->destroy_params_"+this->generate_functor_name()+"();\n";
12963 ret += "\treturn this->load_params_"+this->generate_functor_name()+
12968 //-------------------
12969 // the create_group method.
12970 // This method creates a group in a buffer passed in
12971 // (to allow for creation on the stack).
12972 // There are also a couple of side effects:
12973 // 1) evaluate the WHERE clause (and therefore, unpack all partial fcns)
12974 // 2) determine if a temporal flush is required.
12976 ret += this->generate_functor_name()+"_groupdef *create_group(host_tuple &tup0, gs_sp_t buffer){\n";
12977 // Variables for execution of the function.
12978 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
12980 if(partial_fcns.size()>0){ // partial fcn access failure
12981 ret += "\tgs_retval_t retval = 0;\n";
12985 ret += "\t"+generate_functor_name()+"_groupdef *gbval = ("+generate_functor_name()+
12986 "_groupdef *) buffer;\n";
12988 // Start by cleaning up partial function results
12989 ret += "//\t\tcall destructors for partial fcn storage vars of buffer type\n";
12990 set<int> w_pfcns; // partial fcns in where clause
12991 for(w=0;w<where.size();++w)
12992 collect_partial_fcns_pr(where[w]->pr, w_pfcns);
12994 set<int> ag_gb_pfcns; // partial fcns in gbdefs, aggr se's
12995 for(g=0;g<gb_tbl.size();g++){
12996 collect_partial_fcns(gb_tbl.get_def(g), ag_gb_pfcns);
12998 for(a=0;a<aggr_tbl.size();a++){
12999 collect_partial_fcns(aggr_tbl.get_aggr_se(a), ag_gb_pfcns);
13001 ret += gen_partial_fcn_dtr(partial_fcns,w_pfcns);
13002 ret += gen_partial_fcn_dtr(partial_fcns,ag_gb_pfcns);
13003 // ret += gen_partial_fcn_dtr(partial_fcns);
13006 ret += gen_temp_tuple_check(this->node_name, 0);
13007 col_id_set found_cids; // colrefs unpacked thus far.
13008 ret += gen_unpack_temp_vars(schema, found_cids, select_list, segen_gb_tbl, needs_xform);
13011 // Save temporal group-by variables
13014 ret.append("\n\t//\t\tCompute temporal groupby attributes\n\n");
13016 for(g=0;g<gb_tbl.size();g++){
13018 data_type *gdt = gb_tbl.get_data_type(g);
13020 if(gdt->is_temporal()){
13021 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13022 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13023 ret.append(tmpstr);
13030 // Compare the temporal GB vars with the stored ones,
13031 // set flush indicator and update stored GB vars if there is any change.
13033 if(uses_temporal_flush){
13034 ret+= "\tif( !( (";
13035 bool first_one = true;
13036 for(g=0;g<gb_tbl.size();g++){
13037 data_type *gdt = gb_tbl.get_data_type(g);
13039 if(gdt->is_temporal()){
13040 sprintf(tmpstr,"last_gb%d",g); string lhs_op = tmpstr;
13041 sprintf(tmpstr,"gbval->gb_var%d",g); string rhs_op = tmpstr;
13042 if(first_one){first_one = false;} else {ret += ") && (";}
13043 ret += generate_equality_test(lhs_op, rhs_op, gdt);
13047 for(g=0;g<gb_tbl.size();g++){
13048 data_type *gdt = gb_tbl.get_data_type(g);
13049 if(gdt->is_temporal()){
13050 if(gdt->is_buffer_type()){
13051 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13053 sprintf(tmpstr,"\t\tlast_flushed_gb%d = last_gb%d;\n",g,g);
13055 sprintf(tmpstr,"\t\tlast_gb%d = gbval->gb_var%d;\n",g,g);
13060 ret += "\t\tneeds_temporal_flush=true;\n";
13061 ret += "\t\t}else{\n"
13062 "\t\t\tneeds_temporal_flush=false;\n"
13067 // For temporal status tuple we don't need to do anything else
13068 ret += "\tif (temp_tuple_received) return NULL;\n\n";
13070 for(w=0;w<where.size();++w){
13071 sprintf(tmpstr,"//\t\tPredicate clause %d.\n",w);
13073 // Find the set of variables accessed in this CNF elem,
13074 // but in no previous element.
13075 col_id_set new_cids;
13076 get_new_pred_cids(where[w]->pr, found_cids, new_cids, segen_gb_tbl);
13078 // Unpack these values.
13079 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13080 // Find partial fcns ref'd in this cnf element
13081 set<int> pfcn_refs;
13082 collect_partial_fcns_pr(where[w]->pr, pfcn_refs);
13083 ret += gen_unpack_partial_fcn(schema, partial_fcns, pfcn_refs,"NULL");
13085 ret += "\tif( !("+generate_predicate_code(where[w]->pr,schema)+
13086 +") ) return(NULL);\n";
13089 // The partial functions ref'd in the group-by var and aggregate
13090 // definitions must also be evaluated. If one returns false,
13091 // then implicitly the predicate is false.
13092 set<int>::iterator pfsi;
13094 if(ag_gb_pfcns.size() > 0)
13095 ret += "//\t\tUnpack remaining partial fcns.\n";
13096 ret += gen_full_unpack_partial_fcn(schema, partial_fcns, ag_gb_pfcns,
13097 found_cids, segen_gb_tbl, "NULL", needs_xform);
13099 // Unpack the group-by variables
13101 for(g=0;g<gb_tbl.size();g++){
13102 data_type *gdt = gb_tbl.get_data_type(g);
13103 if(!gdt->is_temporal()){ // temproal gbs already computed
13104 // Find the new fields ref'd by this GBvar def.
13105 col_id_set new_cids;
13106 get_new_se_cids(gb_tbl.get_def(g), found_cids, new_cids, segen_gb_tbl);
13107 // Unpack these values.
13108 ret += gen_unpack_cids(schema, new_cids, "NULL", needs_xform);
13110 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13111 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13113 // There seems to be no difference between the two
13114 // branches of the IF statement.
13115 data_type *gdt = gb_tbl.get_data_type(g);
13116 if(gdt->is_buffer_type()){
13117 // Create temporary copy.
13118 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13119 g, generate_se_code(gb_tbl.get_def(g),schema).c_str() );
13121 scalarexp_t *gse = gb_tbl.get_def(g);
13122 sprintf(tmpstr,"\tgbval->gb_var%d = %s;\n",
13123 g,generate_se_code(gse,schema).c_str());
13126 ret.append(tmpstr);
13132 ret+= "\treturn gbval;\n";
13135 //--------------------------------------------------------
13136 // Create and initialize an aggregate object
13138 ret += this->generate_functor_name()+"_aggrdef *create_aggregate(host_tuple &tup0, gs_sp_t buffer){\n";
13139 // Variables for execution of the function.
13140 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13143 ret += "\t"+generate_functor_name()+"_aggrdef *aggval = ("+generate_functor_name()+
13144 "_aggrdef *)buffer;\n";
13146 for(a=0;a<aggr_tbl.size();a++){
13147 if(aggr_tbl.is_builtin(a)){
13148 // Create temporaries for buffer return values
13149 data_type *adt = aggr_tbl.get_data_type(a);
13150 if(adt->is_buffer_type()){
13151 sprintf(tmpstr,"aggr_tmp_%d", a);
13152 ret+=adt->make_host_cvar(tmpstr)+";\n";
13157 // Unpack all remaining attributes
13158 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "NULL", needs_xform);
13159 for(a=0;a<aggr_tbl.size();a++){
13160 sprintf(tmpstr,"aggval->aggr_var%d",a);
13161 string assignto_var = tmpstr;
13162 ret += "\t"+generate_aggr_init(assignto_var,&aggr_tbl,a, schema);
13165 ret += "\treturn aggval;\n";
13168 //--------------------------------------------------------
13169 // update an aggregate object
13171 ret += "void update_aggregate(host_tuple &tup0, "
13172 +generate_functor_name()+"_groupdef *gbval, "+
13173 generate_functor_name()+"_aggrdef *aggval){\n";
13174 // Variables for execution of the function.
13175 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13177 // use of temporaries depends on the aggregate,
13178 // generate them in generate_aggr_update
13181 // Unpack all remaining attributes
13182 ret += gen_remaining_colrefs(schema, cid_set, found_cids, "", needs_xform);
13183 for(a=0;a<aggr_tbl.size();a++){
13184 sprintf(tmpstr,"aggval->aggr_var%d",a);
13185 string varname = tmpstr;
13186 ret.append(generate_aggr_update(varname,&aggr_tbl,a, schema));
13189 ret += "\treturn;\n";
13192 //--------------------------------------------------------
13193 // reinitialize an aggregate object
13195 ret += "void reinit_aggregates( "+
13196 generate_functor_name()+"_groupdef *gbval, "+
13197 generate_functor_name()+"_aggrdef *aggval){\n";
13198 // Variables for execution of the function.
13199 ret += "\tgs_int32_t problem = 0;\n"; // return unpack failure
13201 // use of temporaries depends on the aggregate,
13202 // generate them in generate_aggr_update
13204 for(g=0;g<gb_tbl.size();g++){
13205 data_type *gdt = gb_tbl.get_data_type(g);
13206 if(gdt->is_temporal()){
13207 if(gdt->is_buffer_type()){
13208 sprintf(tmpstr,"\t\t%s(&(gbval->gb_var%d),&last_gb%d);\n",gdt->get_hfta_buffer_replace().c_str(),g,g);
13210 sprintf(tmpstr,"\t\t gbval->gb_var%d =last_gb%d;\n",g,g);
13216 // Unpack all remaining attributes
13217 for(a=0;a<aggr_tbl.size();a++){
13218 sprintf(tmpstr,"aggval->aggr_var%d",a);
13219 string varname = tmpstr;
13220 ret.append(generate_aggr_reinitialize(varname,&aggr_tbl,a, schema));
13223 ret += "\treturn;\n";
13230 //---------------------------------------------------
13233 ret += "\tbool flush_needed(){\n";
13234 if(uses_temporal_flush){
13235 ret += "\t\treturn needs_temporal_flush;\n";
13237 ret += "\t\treturn false;\n";
13241 //---------------------------------------------------
13242 // create output tuple
13243 // Unpack the partial functions ref'd in the where clause,
13244 // select clause. Evaluate the where clause.
13245 // Finally, pack the tuple.
13247 // I need to use special code generation here,
13248 // so I'll leave it in longhand.
13250 ret += "host_tuple create_output_tuple("
13251 +generate_functor_name()+"_groupdef *gbval, "+
13252 generate_functor_name()+"_aggrdef *aggval, bool &failed){\n";
13254 ret += "\thost_tuple tup;\n";
13255 ret += "\tfailed = false;\n";
13256 ret += "\tgs_retval_t retval = 0;\n";
13258 string gbvar = "gbval->gb_var";
13259 string aggvar = "aggval->";
13262 // First, get the return values from the UDAFS
13263 for(a=0;a<aggr_tbl.size();a++){
13264 if(! aggr_tbl.is_builtin(a)){
13265 ret += "\t"+aggr_tbl.get_op(a)+"_HFTA_AGGR_OUTPUT_(&(udaf_ret_"+int_to_string(a)+"),";
13266 if(aggr_tbl.get_storage_type(a)->get_type() != fstring_t) ret+="&";
13267 ret+="("+aggvar+"aggr_var"+int_to_string(a)+"));\n";
13271 set<int> hv_sl_pfcns;
13272 for(w=0;w<having.size();w++){
13273 collect_partial_fcns_pr(having[w]->pr, hv_sl_pfcns);
13275 for(s=0;s<select_list.size();s++){
13276 collect_partial_fcns(select_list[s]->se, hv_sl_pfcns);
13279 // clean up the partial fcn results from any previous execution
13280 ret += gen_partial_fcn_dtr(partial_fcns,hv_sl_pfcns);
13283 for(pfsi=hv_sl_pfcns.begin();pfsi!=hv_sl_pfcns.end();++pfsi){
13284 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13285 ret += "\tif(retval){ failed = true; return(tup);}\n";
13288 // Evalaute the HAVING clause
13289 // TODO: this seems to have a ++ operator rather than a + operator.
13290 for(w=0;w<having.size();++w){
13291 ret += "\tif( !("+generate_predicate_code_fm_aggr(having[w]->pr,gbvar, aggvar, schema) +") ) { failed = true; return(tup);}\n";
13294 // Now, compute the size of the tuple.
13296 // Unpack any BUFFER type selections into temporaries
13297 // so that I can compute their size and not have
13298 // to recompute their value during tuple packing.
13299 // I can use regular assignment here because
13300 // these temporaries are non-persistent.
13301 // TODO: should I be using the selvar generation routine?
13303 ret += "//\t\tCompute the size of the tuple.\n";
13304 ret += "//\t\t\tNote: buffer storage packed at the end of the tuple.\n";
13305 for(s=0;s<select_list.size();s++){
13306 scalarexp_t *se = select_list[s]->se;
13307 data_type *sdt = se->get_data_type();
13308 if(sdt->is_buffer_type() &&
13309 !( (se->get_operator_type() == SE_COLREF) ||
13310 (se->get_operator_type() == SE_AGGR_STAR) ||
13311 (se->get_operator_type() == SE_AGGR_SE) ||
13312 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13313 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13315 sprintf(tmpstr,"selvar_%d",s);
13316 ret+="\t"+sdt->make_host_cvar(tmpstr)+" = ";
13317 ret += generate_se_code_fm_aggr(se,gbvar, aggvar, schema) +";\n";
13321 // The size of the tuple is the size of the tuple struct plus the
13322 // size of the buffers to be copied in.
13324 ret+="\ttup.tuple_size = sizeof(" + generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t)";
13325 for(s=0;s<select_list.size();s++){
13326 // if(s>0) ret += "+";
13327 scalarexp_t *se = select_list[s]->se;
13328 data_type *sdt = select_list[s]->se->get_data_type();
13329 if(sdt->is_buffer_type()){
13330 if(!( (se->get_operator_type() == SE_COLREF) ||
13331 (se->get_operator_type() == SE_AGGR_STAR) ||
13332 (se->get_operator_type() == SE_AGGR_SE) ||
13333 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13334 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13336 sprintf(tmpstr," + %s(&selvar_%d)", sdt->get_hfta_buffer_size().c_str(),s);
13337 ret.append(tmpstr);
13339 sprintf(tmpstr," + %s(&%s)", sdt->get_hfta_buffer_size().c_str(),generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13340 ret.append(tmpstr);
13346 // Allocate tuple data block.
13347 ret += "//\t\tCreate the tuple block.\n";
13348 ret += "\ttup.data = malloc(tup.tuple_size);\n";
13349 ret += "\ttup.heap_resident = true;\n";
13351 // Mark tuple as regular
13352 ret += "\t*((gs_sp_t )tup.data + sizeof(" + generate_tuple_name( this->get_node_name()) +")) = REGULAR_TUPLE;\n";
13354 // ret += "\ttup.channel = 0;\n";
13355 ret += "\t"+generate_tuple_name( this->get_node_name())+" *tuple = ("+
13356 generate_tuple_name( this->get_node_name())+" *)(tup.data);\n";
13359 // (Here, offsets are hard-wired. is this a problem?)
13361 ret += "//\t\tPack the fields into the tuple.\n";
13362 ret += "\tint tuple_pos = sizeof("+generate_tuple_name( this->get_node_name()) +") + sizeof(gs_uint8_t);\n";
13363 for(s=0;s<select_list.size();s++){
13364 scalarexp_t *se = select_list[s]->se;
13365 data_type *sdt = se->get_data_type();
13366 if(sdt->is_buffer_type()){
13367 if(!( (se->get_operator_type() == SE_COLREF) ||
13368 (se->get_operator_type() == SE_AGGR_STAR) ||
13369 (se->get_operator_type() == SE_AGGR_SE) ||
13370 (se->get_operator_type() == SE_FUNC && se->is_partial()) ||
13371 (se->get_operator_type() == SE_FUNC && se->get_aggr_ref() >= 0))
13373 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &selvar_%d, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, s);
13374 ret.append(tmpstr);
13375 sprintf(tmpstr,"\ttuple_pos += %s(&selvar_%d);\n", sdt->get_hfta_buffer_size().c_str(), s);
13376 ret.append(tmpstr);
13378 sprintf(tmpstr,"\t%s(&(tuple->tuple_var%d), &%s, ((gs_sp_t )tuple)+tuple_pos, tuple_pos);\n", sdt->get_hfta_buffer_tuple_copy().c_str(),s, generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13379 ret.append(tmpstr);
13380 sprintf(tmpstr,"\ttuple_pos += %s(&%s);\n", sdt->get_hfta_buffer_size().c_str(), generate_se_code_fm_aggr(se,gbvar, aggvar, schema).c_str());
13381 ret.append(tmpstr);
13384 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13385 ret.append(tmpstr);
13386 ret.append(generate_se_code_fm_aggr(se,gbvar, aggvar, schema) );
13391 // Destroy string temporaries
13392 ret += gen_buffer_selvars_dtr(select_list);
13394 ret += "\treturn tup;\n";
13397 //------------------------------------------------------------------
13398 // Cleaning_when : evaluate the cleaning_when clause.
13399 // ASSUME that the udaf return values have already
13400 // been unpacked. delete the string udaf return values at the end.
13402 ret += "bool cleaning_when("
13403 +generate_functor_name()+"_groupdef *gbval, "+
13404 generate_functor_name()+"_aggrdef *aggval){\n";
13406 ret += "\tbool retval = true;\n";
13409 gbvar = "gbval->gb_var";
13410 aggvar = "aggval->";
13413 set<int> clw_pfcns;
13414 for(w=0;w<closing_when.size();w++){
13415 collect_partial_fcns_pr(closing_when[w]->pr, clw_pfcns);
13418 // clean up the partial fcn results from any previous execution
13419 ret += gen_partial_fcn_dtr(partial_fcns,clw_pfcns);
13422 for(pfsi=clw_pfcns.begin();pfsi!=clw_pfcns.end();++pfsi){
13423 ret += unpack_partial_fcn_fm_aggr(partial_fcns[(*pfsi)], (*pfsi), gbvar, aggvar, schema);
13424 ret += "\tif(retval){ return false;}\n";
13427 // Evalaute the Closing When clause
13428 // TODO: this seems to have a ++ operator rather than a + operator.
13429 for(w=0;w<closing_when.size();++w){
13430 ret += "\tif( !("+generate_predicate_code_fm_aggr(closing_when[w]->pr,gbvar, aggvar, schema) +") ) { return false;}\n";
13434 // Destroy string return vals of UDAFs
13435 for(a=0;a<aggr_tbl.size();a++){
13436 if(! aggr_tbl.is_builtin(a)){
13437 int afcn_id = aggr_tbl.get_fcn_id(a);
13438 data_type *adt = Ext_fcns->get_fcn_dt(afcn_id);
13439 if(adt->is_buffer_type()){
13440 sprintf(tmpstr,"\t%s(&udaf_ret_%d);\n",
13441 adt->get_hfta_buffer_destroy().c_str(), a );
13447 ret += "\treturn retval;\n";
13453 //-------------------------------------------------------------------
13454 // Temporal update functions
13456 ret+="bool temp_status_received(){return temp_tuple_received;};\n\n";
13458 // create a temp status tuple
13459 ret += "int create_temp_status_tuple(host_tuple& result, bool flush_finished) {\n\n";
13461 ret += gen_init_temp_status_tuple(this->get_node_name());
13464 // (Here, offsets are hard-wired. is this a problem?)
13466 ret += "//\t\tPack the fields into the tuple.\n";
13467 for(s=0;s<select_list.size();s++){
13468 data_type *sdt = select_list[s]->se->get_data_type();
13469 if(sdt->is_temporal()){
13470 sprintf(tmpstr,"\ttuple->tuple_var%d = ",s);
13472 sprintf(tmpstr,"(flush_finished) ? %s : %s ", generate_se_code(select_list[s]->se,schema).c_str(), generate_se_code_fm_aggr(select_list[s]->se,"last_flushed_gb", "", schema).c_str());
13478 ret += "\treturn 0;\n";
13479 ret += "};};\n\n\n";
13482 //----------------------------------------------------------
13483 // The hash function
13485 ret += "struct "+generate_functor_name()+"_hash_func{\n";
13486 ret += "\tgs_uint32_t operator()(const "+generate_functor_name()+
13487 "_groupdef *grp) const{\n";
13488 ret += "\t\treturn(0";
13489 for(g=0;g<gb_tbl.size();g++){
13490 data_type *gdt = gb_tbl.get_data_type(g);
13491 if(! gdt->is_temporal()){
13493 if(gdt->use_hashfunc()){
13494 if(gdt->is_buffer_type())
13495 sprintf(tmpstr,"(%s*%s(&(grp->gb_var%d)))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13497 sprintf(tmpstr,"(%s*%s(grp->gb_var%d))",hash_nums[g%NRANDS].c_str(),gdt->get_hfta_hashfunc().c_str(),g);
13499 sprintf(tmpstr,"(%s*grp->gb_var%d)",hash_nums[g%NRANDS].c_str(),g);
13504 ret += " >> 32);\n";
13508 //----------------------------------------------------------
13509 // The comparison function
13511 ret += "struct "+generate_functor_name()+"_equal_func{\n";
13512 ret += "\tbool operator()(const "+generate_functor_name()+"_groupdef *grp1, "+
13513 generate_functor_name()+"_groupdef *grp2) const{\n";
13514 ret += "\t\treturn( (";
13517 bool first_exec = true;
13518 for(g=0;g<gb_tbl.size();g++){
13519 data_type *gdt = gb_tbl.get_data_type(g);
13520 if(! gdt->is_temporal()){
13521 if(first_exec){first_exec=false;}else{ hcmpr += ") && (";}
13522 if(gdt->complex_comparison(gdt)){
13523 if(gdt->is_buffer_type())
13524 sprintf(tmpstr,"(%s(&(grp1->gb_var%d), &(grp2->gb_var%d))==0)",
13525 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13527 sprintf(tmpstr,"(%s((grp1->gb_var%d), (grp2->gb_var%d))==0)",
13528 gdt->get_hfta_comparison_fcn(gdt).c_str(),g,g);
13530 sprintf(tmpstr,"grp1->gb_var%d == grp2->gb_var%d",g,g);
13547 string rsgah_qpn::generate_operator(int i, string params){
13550 " running_agg_operator<" +
13551 generate_functor_name()+","+
13552 generate_functor_name() + "_groupdef, " +
13553 generate_functor_name() + "_aggrdef, " +
13554 generate_functor_name()+"_hash_func, "+
13555 generate_functor_name()+"_equal_func "
13556 "> *op"+int_to_string(i)+" = new running_agg_operator<"+
13557 generate_functor_name()+","+
13558 generate_functor_name() + "_groupdef, " +
13559 generate_functor_name() + "_aggrdef, " +
13560 generate_functor_name()+"_hash_func, "+
13561 generate_functor_name()+"_equal_func "
13562 ">("+params+", \"" + get_node_name() + "\");\n"
13568 // Split aggregation into two HFTA components - sub and superaggregation
13569 // If unable to split the aggreagates, empty vector will be returned
13570 vector<qp_node *> sgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13572 vector<qp_node *> ret_vec;
13573 int s, p, g, a, o, i;
13576 vector<string> fta_flds, stream_flds;
13577 int t = table_name->get_schema_ref();
13579 // Get the set of interfaces it accesses.
13581 vector<string> sel_names;
13583 // Verify that all of the ref'd UDAFs can be split.
13585 for(a=0;a<aggr_tbl.size();++a){
13586 if(! aggr_tbl.is_builtin(a)){
13587 int afcn = aggr_tbl.get_fcn_id(a);
13588 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13589 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13590 if(hfta_super_id < 0 || hfta_sub_id < 0){
13596 /////////////////////////////////////////////////////
13597 // Split into aggr/aggr.
13600 sgah_qpn *low_hfta_node = new sgah_qpn();
13601 low_hfta_node->table_name = table_name;
13602 low_hfta_node->set_node_name( "_"+node_name );
13603 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13606 sgah_qpn *hi_hfta_node = new sgah_qpn();
13607 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13608 hi_hfta_node->set_node_name( node_name );
13609 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13611 // First, process the group-by variables.
13612 // both low and hi level queries duplicate group-by variables of original query
13615 for(g=0;g<gb_tbl.size();g++){
13616 // Insert the gbvar into both low- and hi level hfta.
13617 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13618 low_hfta_node->gb_tbl.add_gb_var(
13619 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13622 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13623 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13624 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13625 gbvar_fta->set_gb_ref(g);
13626 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13627 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13629 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13630 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13631 hi_hfta_node->gb_tbl.add_gb_var(
13632 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13636 // hi_hfta_node->gb_tbl.gb_patterns = gb_tbl.gb_patterns; // pattern processing at higtest level
13637 hi_hfta_node->gb_tbl.set_pattern_info( &gb_tbl); // pattern processing at higtest level
13639 // SEs in the aggregate definitions.
13640 // They are all safe, so split them up for later processing.
13641 map<int, scalarexp_t *> hfta_aggr_se;
13642 for(a=0;a<aggr_tbl.size();++a){
13643 split_hfta_aggr( &(aggr_tbl), a,
13644 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13645 low_hfta_node->select_list,
13652 // Next, the select list.
13654 for(s=0;s<select_list.size();s++){
13655 bool fta_forbidden = false;
13656 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13657 hi_hfta_node->select_list.push_back(
13658 new select_element(root_se, select_list[s]->name));
13663 // All the predicates in the where clause must execute
13664 // in the low-level hfta.
13666 for(p=0;p<where.size();p++){
13667 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13668 cnf_elem *new_cnf = new cnf_elem(new_pr);
13669 analyze_cnf(new_cnf);
13671 low_hfta_node->where.push_back(new_cnf);
13674 // All of the predicates in the having clause must
13675 // execute in the high-level hfta node.
13677 for(p=0;p<having.size();p++){
13678 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13679 cnf_elem *cnf_root = new cnf_elem(pr_root);
13680 analyze_cnf(cnf_root);
13682 hi_hfta_node->having.push_back(cnf_root);
13686 // Copy parameters to both nodes
13687 vector<string> param_names = param_tbl->get_param_names();
13689 for(pi=0;pi<param_names.size();pi++){
13690 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13691 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13692 param_tbl->handle_access(param_names[pi]));
13693 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13694 param_tbl->handle_access(param_names[pi]));
13696 low_hfta_node->definitions = definitions;
13697 hi_hfta_node->definitions = definitions;
13700 low_hfta_node->table_name->set_machine(table_name->get_machine());
13701 low_hfta_node->table_name->set_interface(table_name->get_interface());
13702 low_hfta_node->table_name->set_ifq(false);
13704 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13705 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13706 hi_hfta_node->table_name->set_ifq(false);
13708 ret_vec.push_back(low_hfta_node);
13709 ret_vec.push_back(hi_hfta_node);
13715 // TODO: add splitting into selection/aggregation
13719 // Split aggregation into two HFTA components - sub and superaggregation
13720 // If unable to split the aggreagates, empty vector will be returned
13721 // Similar to sgah, but super aggregate is rsgah, subaggr is sgah
13722 vector<qp_node *> rsgah_qpn::split_node_for_hfta(ext_fcn_list *Ext_fcns, table_list *Schema){
13724 vector<qp_node *> ret_vec;
13725 int s, p, g, a, o, i;
13728 vector<string> fta_flds, stream_flds;
13729 int t = table_name->get_schema_ref();
13731 // Get the set of interfaces it accesses.
13733 vector<string> sel_names;
13735 // Verify that all of the ref'd UDAFs can be split.
13737 for(a=0;a<aggr_tbl.size();++a){
13738 if(! aggr_tbl.is_builtin(a)){
13739 int afcn = aggr_tbl.get_fcn_id(a);
13740 int hfta_super_id = Ext_fcns->get_hfta_superaggr_id(afcn);
13741 int hfta_sub_id = Ext_fcns->get_hfta_subaggr_id(afcn);
13742 if(hfta_super_id < 0 || hfta_sub_id < 0){
13748 /////////////////////////////////////////////////////
13749 // Split into aggr/aggr.
13752 sgah_qpn *low_hfta_node = new sgah_qpn();
13753 low_hfta_node->table_name = table_name;
13754 low_hfta_node->set_node_name( "_"+node_name );
13755 low_hfta_node->table_name->set_range_var(table_name->get_var_name());
13758 rsgah_qpn *hi_hfta_node = new rsgah_qpn();
13759 hi_hfta_node->table_name = new tablevar_t( ("_"+node_name).c_str());
13760 hi_hfta_node->set_node_name( node_name );
13761 hi_hfta_node->table_name->set_range_var(table_name->get_var_name());
13763 // First, process the group-by variables.
13764 // both low and hi level queries duplicate group-by variables of original query
13767 for(g=0;g<gb_tbl.size();g++){
13768 // Insert the gbvar into both low- and hi level hfta.
13769 scalarexp_t *gbvar_def = dup_se(gb_tbl.get_def(g), &aggr_tbl);
13770 low_hfta_node->gb_tbl.add_gb_var(
13771 gb_tbl.get_name(g), gb_tbl.get_tblvar_ref(g), gbvar_def, gb_tbl.get_reftype(g)
13774 // Insert a ref to the value of the gbvar into the low-level hfta select list.
13775 colref_t *new_cr = new colref_t(gb_tbl.get_name(g).c_str() );
13776 scalarexp_t *gbvar_fta = new scalarexp_t(new_cr);
13777 gbvar_fta->set_gb_ref(g);
13778 gbvar_fta->set_data_type( gb_tbl.get_def(g)->get_data_type() );
13779 scalarexp_t *gbvar_stream = make_fta_se_ref(low_hfta_node->select_list, gbvar_fta,0);
13781 // Insert the corresponding gbvar ref (gbvar_stream) into the stream.
13782 gbvar_stream->set_gb_ref(-1); // used as GBvar def
13783 hi_hfta_node->gb_tbl.add_gb_var(
13784 gbvar_stream->get_colref()->get_field(), -1, gbvar_stream, gb_tbl.get_reftype(g)
13789 // SEs in the aggregate definitions.
13790 // They are all safe, so split them up for later processing.
13791 map<int, scalarexp_t *> hfta_aggr_se;
13792 for(a=0;a<aggr_tbl.size();++a){
13793 split_hfta_aggr( &(aggr_tbl), a,
13794 &(hi_hfta_node->aggr_tbl), &(low_hfta_node->aggr_tbl) ,
13795 low_hfta_node->select_list,
13802 // Next, the select list.
13804 for(s=0;s<select_list.size();s++){
13805 bool fta_forbidden = false;
13806 scalarexp_t *root_se = rehome_fta_se(select_list[s]->se, &hfta_aggr_se);
13807 hi_hfta_node->select_list.push_back(
13808 new select_element(root_se, select_list[s]->name));
13813 // All the predicates in the where clause must execute
13814 // in the low-level hfta.
13816 for(p=0;p<where.size();p++){
13817 predicate_t *new_pr = dup_pr(where[p]->pr, &aggr_tbl);
13818 cnf_elem *new_cnf = new cnf_elem(new_pr);
13819 analyze_cnf(new_cnf);
13821 low_hfta_node->where.push_back(new_cnf);
13824 // All of the predicates in the having clause must
13825 // execute in the high-level hfta node.
13827 for(p=0;p<having.size();p++){
13828 predicate_t *pr_root = rehome_fta_pr( having[p]->pr, &hfta_aggr_se);
13829 cnf_elem *cnf_root = new cnf_elem(pr_root);
13830 analyze_cnf(cnf_root);
13832 hi_hfta_node->having.push_back(cnf_root);
13835 // Similar for closing when
13836 for(p=0;p<closing_when.size();p++){
13837 predicate_t *pr_root = rehome_fta_pr( closing_when[p]->pr, &hfta_aggr_se);
13838 cnf_elem *cnf_root = new cnf_elem(pr_root);
13839 analyze_cnf(cnf_root);
13841 hi_hfta_node->closing_when.push_back(cnf_root);
13845 // Copy parameters to both nodes
13846 vector<string> param_names = param_tbl->get_param_names();
13848 for(pi=0;pi<param_names.size();pi++){
13849 data_type *dt = param_tbl->get_data_type(param_names[pi]);
13850 low_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13851 param_tbl->handle_access(param_names[pi]));
13852 hi_hfta_node->param_tbl->add_param(param_names[pi],dt->duplicate(),
13853 param_tbl->handle_access(param_names[pi]));
13855 low_hfta_node->definitions = definitions;
13856 hi_hfta_node->definitions = definitions;
13859 low_hfta_node->table_name->set_machine(table_name->get_machine());
13860 low_hfta_node->table_name->set_interface(table_name->get_interface());
13861 low_hfta_node->table_name->set_ifq(false);
13863 hi_hfta_node->table_name->set_machine(table_name->get_machine());
13864 hi_hfta_node->table_name->set_interface(table_name->get_interface());
13865 hi_hfta_node->table_name->set_ifq(false);
13867 ret_vec.push_back(low_hfta_node);
13868 ret_vec.push_back(hi_hfta_node);
13874 // TODO: add splitting into selection/aggregation
13877 //---------------------------------------------------------------
13878 // Code for propagating Protocol field source information
13881 scalarexp_t *resolve_protocol_se(scalarexp_t *se, vector<map<string, scalarexp_t *> *> &src_vec, gb_table *gb_tbl, table_list *Schema){
13882 scalarexp_t *rse, *lse,*p_se, *gb_se;
13883 int tno, schema_type;
13884 map<string, scalarexp_t *> *pse_map;
13886 switch(se->get_operator_type()){
13888 return new scalarexp_t(se->get_literal());
13890 return scalarexp_t::make_param_reference(se->get_op().c_str());
13894 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, se->gb_ref=%d, but gb_tbl is NULL\n",se->get_gb_ref());
13895 gb_se = gb_tbl->get_def(se->get_gb_ref());
13896 return resolve_protocol_se(gb_se,src_vec,gb_tbl,Schema);
13899 schema_type = Schema->get_schema_type(se->get_colref()->get_schema_ref());
13900 if(schema_type == PROTOCOL_SCHEMA)
13901 return dup_se(se,NULL);
13903 tno = se->get_colref()->get_tablevar_ref();
13904 if(tno >= src_vec.size()){
13905 fprintf(stderr,"INTERNAL ERROR, in resolve_protocol_se, tno=%d, src_vec.size()=%lu\n",tno,src_vec.size());
13907 if(src_vec[tno] == NULL)
13910 pse_map =src_vec[tno];
13911 p_se = (*pse_map)[se->get_colref()->get_field()];
13914 return dup_se(p_se,NULL);
13916 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13920 return new scalarexp_t(se->get_op().c_str(),lse);
13922 lse = resolve_protocol_se(se->get_left_se(),src_vec,gb_tbl,Schema);
13925 rse = resolve_protocol_se(se->get_right_se(),src_vec,gb_tbl,Schema);
13928 return new scalarexp_t(se->get_op().c_str(),lse,rse);
13942 void spx_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13944 vector<map<string, scalarexp_t *> *> src_vec;
13946 for(i=0;i<q_sources.size();i++){
13947 if(q_sources[i] != NULL)
13948 src_vec.push_back(q_sources[i]->get_protocol_se());
13950 src_vec.push_back(NULL);
13953 for(i=0;i<select_list.size();i++){
13954 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13958 void join_eq_hash_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13960 vector<map<string, scalarexp_t *> *> src_vec;
13962 for(i=0;i<q_sources.size();i++){
13963 if(q_sources[i] != NULL)
13964 src_vec.push_back(q_sources[i]->get_protocol_se());
13966 src_vec.push_back(NULL);
13969 for(i=0;i<select_list.size();i++){
13970 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13973 for(i=0;i<hash_eq.size();i++){
13974 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13975 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
13979 void filter_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
13981 vector<map<string, scalarexp_t *> *> src_vec;
13983 for(i=0;i<q_sources.size();i++){
13984 if(q_sources[i] != NULL)
13985 src_vec.push_back(q_sources[i]->get_protocol_se());
13987 src_vec.push_back(NULL);
13990 for(i=0;i<select_list.size();i++){
13991 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
13994 for(i=0;i<hash_eq.size();i++){
13995 hash_src_l.push_back(resolve_protocol_se(hash_eq[i]->pr->get_left_se(),src_vec,NULL,Schema));
13996 hash_src_r.push_back(resolve_protocol_se(hash_eq[i]->pr->get_right_se(),src_vec,NULL,Schema));
14000 void watch_join_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14002 vector<map<string, scalarexp_t *> *> src_vec;
14004 for(i=0;i<q_sources.size();i++){
14005 if(q_sources[i] != NULL)
14006 src_vec.push_back(q_sources[i]->get_protocol_se());
14008 src_vec.push_back(NULL);
14011 for(i=0;i<select_list.size();i++){
14012 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,NULL,Schema);
14015 for(i=0;i<key_flds.size();i++){
14016 string kfld = key_flds[i];
14017 hash_src_l.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_left_se(),src_vec,NULL,Schema));
14018 hash_src_r.push_back(resolve_protocol_se(hash_eq[kfld]->pr->get_right_se(),src_vec,NULL,Schema));
14023 void sgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14025 vector<map<string, scalarexp_t *> *> src_vec;
14027 for(i=0;i<q_sources.size();i++){
14028 if(q_sources[i] != NULL)
14029 src_vec.push_back(q_sources[i]->get_protocol_se());
14031 src_vec.push_back(NULL);
14034 for(i=0;i<select_list.size();i++){
14035 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14038 for(i=0;i<gb_tbl.size();i++)
14039 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14043 void rsgah_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14045 vector<map<string, scalarexp_t *> *> src_vec;
14047 for(i=0;i<q_sources.size();i++){
14048 if(q_sources[i] != NULL)
14049 src_vec.push_back(q_sources[i]->get_protocol_se());
14051 src_vec.push_back(NULL);
14054 for(i=0;i<select_list.size();i++){
14055 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14058 for(i=0;i<gb_tbl.size();i++)
14059 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14062 void sgahcwcb_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14064 vector<map<string, scalarexp_t *> *> src_vec;
14066 for(i=0;i<q_sources.size();i++){
14067 if(q_sources[i] != NULL)
14068 src_vec.push_back(q_sources[i]->get_protocol_se());
14070 src_vec.push_back(NULL);
14073 for(i=0;i<select_list.size();i++){
14074 protocol_map[select_list[i]->name] = resolve_protocol_se(select_list[i]->se,src_vec,&gb_tbl,Schema);
14077 for(i=0;i<gb_tbl.size();i++)
14078 gb_sources.push_back(resolve_protocol_se(gb_tbl.get_def(i),src_vec,&gb_tbl,Schema));
14081 void mrg_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){
14083 scalarexp_t *first_se;
14085 vector<map<string, scalarexp_t *> *> src_vec;
14086 map<string, scalarexp_t *> *pse_map;
14088 for(i=0;i<q_sources.size();i++){
14089 if(q_sources[i] != NULL)
14090 src_vec.push_back(q_sources[i]->get_protocol_se());
14092 src_vec.push_back(NULL);
14095 if(q_sources.size() == 0){
14096 fprintf(stderr,"INTERNAL ERROR in mrg_qpn::create_protocol_se, q_sources.size() == 0\n");
14100 vector<field_entry *> tbl_flds = table_layout->get_fields();
14101 for(f=0;f<tbl_flds.size();f++){
14103 string fld_nm = tbl_flds[f]->get_name();
14104 pse_map = src_vec[0];
14105 first_se = (*pse_map)[fld_nm];
14106 if(first_se == NULL)
14108 for(s=1;s<src_vec.size() && match;s++){
14109 pse_map = src_vec[s];
14110 scalarexp_t *match_se = (*pse_map)[fld_nm];
14111 if(match_se == NULL)
14114 match = is_equivalent_se_base(first_se, match_se, Schema);
14117 protocol_map[fld_nm] = first_se;
14119 protocol_map[fld_nm] = NULL;
14123 void watch_tbl_qpn::create_protocol_se(vector<qp_node *> q_sources, table_list *Schema){