1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
18 #include "parse_fta.h"
19 #include "parse_schema.h"
20 #include "parse_ext_fcns.h"
23 #include"analyze_fta.h"
25 #include"type_objects.h"
32 extern string hostname; // name of the current host
36 string int_to_string(int i){
39 sprintf(tmpstr,"%d",i);
47 // These represent derived information from the
48 // query analysis stage. I extract them from a class,
49 // perhaps this is dangerous.
51 static gb_table *gb_tbl=NULL; // Table of all group-by attributes.
52 static aggregate_table *aggr_tbl=NULL; // Table of all referenced aggregates.
54 // static cplx_lit_table *complex_literals=NULL; // Table of literals with constructors.
55 static param_table *param_tbl=NULL; // Table of all referenced parameters.
57 vector<scalarexp_t *> partial_fcns_list;
58 int wh_partial_start, wh_partial_end;
59 int gb_partial_start, gb_partial_end;
60 int aggr_partial_start, aggr_partial_end;
61 int sl_partial_start, sl_partial_end;
64 // Infer the table of a column refrence and return the table ref.
66 // field name and table name. If no table name is used,
67 // search all tables to try to find a unique match.
68 // Of course, plenty of error checking.
70 // Return the set of tablevar indices in the FROM clause
71 // which contain a field with the same name.
72 vector<int> find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){
75 // vector<string> tn = fm->get_schema_names();
76 vector<int> tn = fm->get_schema_refs();
77 // printf("Calling find_source_tables on field %s\n",field.c_str());
78 for(i=0;i<tn.size();i++){
79 // if(Schema->contains_field(Schema->find_tbl(tn[i]), field) ){
80 if(Schema->contains_field(tn[i], field) ){
82 // printf("\tfound in table %s\n",tn[i].c_str());
88 int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){
90 string tname = ir->get_tablevar();
92 if(fm->size()==1) return 0;
93 fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str());
96 for(i=0;i<fm->size();++i){
97 if(tname == fm->get_tablevar_name(i))
100 fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str());
105 // compute the index of the tablevar in the from clause that the
107 // return -1 if no tablevar can be imputed.
108 int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){
113 vector<tablevar_t *> fm_tbls = fm->get_table_list();
115 string field = cr->get_field();
117 // printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str());
118 if(cr->uses_default_table() ){
119 tv = find_source_tables(field, fm, schema);
121 fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ",
122 cr->get_lineno(), cr->get_charno(),field.c_str() );
123 for(i=0;i<tv.size();i++){
124 fprintf(stderr,"%s ",fm_tbls[ tv[i] ]->to_string().c_str() );
126 fprintf(stderr,"\n\tYou must specify one of these.\n");
130 fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n",
131 cr->get_lineno(), cr->get_charno(),field.c_str() );
138 // The table source is named -- but is it a schema name
141 string interface = cr->get_interface();
142 table_name = cr->get_table_name();
144 // if interface is not specified, prefer to look at the tablevar names
145 // Check for duplicates.
147 for(i=0;i<fm_tbls.size();++i){
148 if(table_name == fm_tbls[i]->get_var_name())
152 fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() );
155 if(tv.size() == 1) return(tv[0]);
158 // Tableref not found by looking at tableref vars, or an interface
159 // was specified. Try to match on schema and interface.
160 // Check for duplicates.
161 for(i=0;i<fm_tbls.size();++i){
162 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
166 fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n",
167 cr->get_lineno(), cr->get_charno() );
168 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
169 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
174 fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno());
175 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
176 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
184 // Reset temporal properties of a scalar expression
185 void reset_temporal(scalarexp_t *se){
187 vector<scalarexp_t *> operands;
190 se->get_data_type()->reset_temporal();
192 switch(se->get_operator_type()){
199 reset_temporal(se->get_left_se());
202 reset_temporal(se->get_left_se());
203 reset_temporal(se->get_right_se());
208 reset_temporal(se->get_left_se());
211 operands = se->get_operands();
212 for(o=0;o<operands.size();o++){
213 reset_temporal(operands[o]);
217 fprintf(stderr,"INTERNAL ERROR in reset_temporal, line %d, character %d: unknown operator type %d\n",
218 se->get_lineno(), se->get_charno(),se->get_operator_type());
223 // Verify that column references exist in their
224 // declared tables. As a side effect, assign
225 // their data types. Other side effects :
227 // return -1 on error
229 int verify_colref(scalarexp_t *se, tablevar_list_t *fm,
230 table_list *schema, gb_table *gtbl){
235 string field, table_source, type_name;
241 vector<scalarexp_t *> operands;
243 switch(se->get_operator_type()){
248 ir = se->get_ifpref();
249 table_var = infer_tablevar_from_ifpref(ir, fm);
250 if(table_var < 0) return(table_var);
251 ir->set_tablevar_ref(table_var);
254 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
256 l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl);
257 r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl);
258 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
261 cr = se->get_colref();
262 field = cr->get_field();
264 // Determine if this is really a GB ref.
265 // (the parser can only see that its a colref).
267 gb_ref = gtbl->find_gb(cr, fm, schema);
272 se->set_gb_ref(gb_ref);
275 // Its a colref, verify its existance and
276 // record the data type.
277 table_var = infer_tablevar_from_colref(cr,fm,schema);
278 if(table_var < 0) return(table_var);
280 // Store the table ref in the colref.
281 cr->set_tablevar_ref(table_var);
282 cr->set_schema_ref(fm->get_schema_ref(table_var));
283 cr->set_interface("");
284 cr->set_table_name(fm->get_tablevar_name(table_var));
287 type_name = schema->get_type_name(cr->get_schema_ref(), field);
288 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
289 dt = new data_type(type_name, modifiers);
290 se->set_data_type(dt);
292 // Else, its a gbref, use the GB var's data type.
293 se->set_data_type(gtbl->get_data_type(gb_ref));
300 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
302 operands = se->get_operands();
304 for(o=0;o<operands.size();o++){
305 l_ret = verify_colref(operands[o], fm, schema, gtbl);
306 if(l_ret < 0) r_ret = -1;
310 fprintf(stderr,"INTERNAL ERROR in verify_colref, line %d, character %d: unknown operator type %d\n",
311 se->get_lineno(), se->get_charno(),se->get_operator_type());
318 int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){
320 std::vector<scalarexp_t *> op_list;
323 switch(pr->get_operator_type()){
325 return(verify_colref(pr->get_left_se(),fm,schema, gtbl) );
327 l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ;
328 r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ;
329 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
332 return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl));
334 l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ;
335 r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ;
336 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
339 op_list = pr->get_op_list();
341 for(o=0;o<op_list.size();++o){
342 if(verify_colref(op_list[o],fm,schema,gtbl) < 0) l_ret = -1;
346 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
347 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
354 bool literal_only_se(scalarexp_t *se){ // really only literals.
356 vector<scalarexp_t *> operands;
358 if(se == NULL) return(1);
359 switch(se->get_operator_type()){
367 return( literal_only_se(se->get_left_se()) );
369 return( literal_only_se(se->get_left_se()) &&
370 literal_only_se(se->get_right_se()) );
389 // Verify that column references exist in their
390 // declared tables. As a side effect, assign
391 // their data types. Other side effects :
394 int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){
398 string field, table_source, type_name;
404 vector<scalarexp_t *> operands;
406 if(se == NULL) return(1);
408 switch(se->get_operator_type()){
416 return( bind_to_schema_se(se->get_left_se(), fm, schema) );
418 l_ret = bind_to_schema_se(se->get_left_se(), fm, schema);
419 r_ret = bind_to_schema_se(se->get_right_se(), fm, schema);
420 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
423 if(se->is_gb()) return(1); // gb ref not a colref.
425 cr = se->get_colref();
426 field = cr->get_field();
428 tablevar_ref = infer_tablevar_from_colref(cr,fm,schema);
429 if(tablevar_ref < 0){
430 return(tablevar_ref);
432 // Store the table ref in the colref.
433 cr->set_tablevar_ref(tablevar_ref);
434 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
435 cr->set_interface("");
436 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
438 // Check the data type
439 type_name = schema->get_type_name(cr->get_schema_ref(), field);
440 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
441 data_type dt(type_name, modifiers);
442 // if(! dt.equals(se->get_data_type()) ){
443 // if(! dt.subsumes_type(se->get_data_type()) ){
444 if(! se->get_data_type()->subsumes_type(&dt) ){
445 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n",
446 dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str());
453 case SE_AGGR_SE: // Probably I should just return,
454 // aggregate se's are explicitly bound to the schema.
455 // return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) );
458 if(se->get_aggr_ref() >= 0) return 1;
460 operands = se->get_operands();
462 for(o=0;o<operands.size();o++){
463 l_ret = bind_to_schema_se(operands[o], fm, schema);
464 if(l_ret < 0) r_ret = -1;
468 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se, line %d, character %d: unknown operator type %d\n",
469 se->get_lineno(), se->get_charno(),se->get_operator_type());
476 int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){
478 vector<scalarexp_t *> op_list;
481 switch(pr->get_operator_type()){
483 return(bind_to_schema_se(pr->get_left_se(),fm,schema) );
485 l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ;
486 r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ;
487 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
490 return(bind_to_schema_pr(pr->get_left_pr(),fm,schema));
492 l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ;
493 r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ;
494 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
497 op_list = pr->get_op_list();
499 for(o=0;o<op_list.size();++o){
500 if(bind_to_schema_se(op_list[o],fm,schema) < 0) l_ret = -1;
504 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_pr, line %d, character %d, unknown predicate operator type %d\n",
505 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
516 // verify_colref assigned data types to the column refs.
517 // Now assign data types to all other nodes in the
518 // scalar expression.
520 // return -1 on error
522 temporal_type compute_se_temporal(scalarexp_t *se, map<col_id, temporal_type> &tcol){
526 vector<scalarexp_t *> operands;
527 vector<data_type *> odt;
529 vector<bool> handle_ind;
531 switch(se->get_operator_type()){
537 return(varying_t); // actually, this should not be called.
539 return data_type::compute_temporal_type(
540 compute_se_temporal(se->get_left_se(), tcol), se->get_op()
543 return data_type::compute_temporal_type(
544 compute_se_temporal(se->get_left_se(), tcol),
545 compute_se_temporal(se->get_right_se(), tcol),
546 se->get_left_se()->get_data_type()->get_type(),
547 se->get_right_se()->get_data_type()->get_type(),
552 col_id cid(se->get_colref() );
553 if(tcol.count(cid) > 0){ return tcol[cid];
554 }else{ return varying_t;}
567 // verify_colref assigned data types to the column refs.
568 // Now assign data types to all other nodes in the
569 // scalar expression.
571 // return -1 on error
573 int assign_data_types(scalarexp_t *se, table_list *schema,
574 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
578 vector<scalarexp_t *> operands;
579 vector<data_type *> odt;
581 vector<bool> handle_ind;
582 vector<bool> constant_ind;
584 switch(se->get_operator_type()){
586 dt = new data_type( se->get_literal()->get_type() );
587 se->set_data_type(dt);
588 if( ! dt->is_defined() ){
589 fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n",
590 se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() );
597 string pname = se->get_param_name();
598 dt = param_tbl->get_data_type(pname);
599 // A SE_PARRAM can change its value mid-query so using one
600 // to set a window is dangerous. TODO check for this and issue a warning.
601 dt->set_temporal(constant_t);
602 se->set_data_type(dt);
603 if( ! dt->is_defined() ){
604 fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n",
605 pname.c_str(), se->get_lineno(),se->get_charno() );
611 dt = new data_type( "STRING" );
612 se->set_data_type(dt);
615 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
616 if(l_ret < 0) return -1;
618 dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() );
619 se->set_data_type(dt);
620 if( ! dt->is_defined() ){
621 fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n",
622 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
623 se->get_lineno(), se->get_charno() );
629 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
630 r_ret = assign_data_types(se->get_right_se(), schema, fta_tree, Ext_fcns);
631 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
633 dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() );
634 se->set_data_type(dt);
635 if( ! dt->is_defined() ){
636 fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n",
637 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
638 se->get_right_se()->get_data_type()->to_string().c_str(),
639 se->get_lineno(), se->get_charno() );
645 dt = se->get_data_type();
646 bret = dt->is_defined();
650 fprintf(stderr,"ERROR, column reference type is undefined, line =%d, char = %d, colref=%s\n",
651 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
655 dt = new data_type("Int"); // changed Uint to Int
656 se->set_data_type(dt);
659 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
660 if(l_ret < 0) return -1;
662 dt = new data_type();
663 dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type());
664 se->set_data_type(dt);
666 if( ! dt->is_defined() ){
667 fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n",
668 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
669 se->get_lineno(), se->get_charno() );
676 operands = se->get_operands();
678 for(o=0;o<operands.size();o++){
679 l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns);
680 odt.push_back(operands[o]->get_data_type());
681 if(l_ret < 0) r_ret = -1;
683 if(r_ret < 0) return(r_ret);
685 // Is it an aggregate extraction function?
686 fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt);
688 int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id);
689 int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id);
690 int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id);
691 // Construct a se for the subaggregate.
692 vector<scalarexp_t *> op_a;
693 int n_aggr_oprs = operands.size()-n_fcn_params+1;
694 for(o=0;o<n_aggr_oprs;++o){
695 op_a.push_back(operands[o]);
697 // check handle params
698 vector<bool> handle_a = Ext_fcns->get_handle_indicators(subaggr_id);
699 for(o=0;o<op_a.size();o++){
701 if(op_a[o]->get_operator_type() != SE_LITERAL &&
702 op_a[o]->get_operator_type() != SE_IFACE_PARAM &&
703 op_a[o]->get_operator_type() != SE_PARAM){
704 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
705 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
710 vector<bool> is_const_a=Ext_fcns->get_const_indicators(subaggr_id);
711 for(o=0;o<op_a.size();o++){
713 if(op_a[o]->get_data_type()->get_temporal() != constant_t){
714 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n Line=%d, char=%d.\n",
715 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
721 scalarexp_t *se_a = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a);
722 se_a->set_fcn_id(subaggr_id);
723 se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id));
724 se_a->set_aggr_id(0); // label this as a UDAF.
727 // Change this se to be the actual function
728 vector<scalarexp_t *> op_f;
729 op_f.push_back(se_a);
730 for(o=n_aggr_oprs;o<operands.size();++o)
731 op_f.push_back(operands[o]);
732 // check handle params
733 vector<bool> handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id);
734 for(o=0;o<op_f.size();o++){
736 if(op_f[o]->get_operator_type() != SE_LITERAL &&
737 op_f[o]->get_operator_type() != SE_IFACE_PARAM &&
738 op_f[o]->get_operator_type() != SE_PARAM){
739 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
740 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
745 vector<bool> is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id);
746 for(o=0;o<op_f.size();o++){
748 if(op_f[o]->get_data_type()->get_temporal() != constant_t){
749 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n Line=%d, char=%d.\n",
750 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
756 se->param_list = op_f;
757 se->op = Ext_fcns->get_fcn_name(actual_fcn_id);
758 se->set_fcn_id(actual_fcn_id);
759 se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id));
763 fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str());
767 fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt);
769 se->set_fcn_id(fcn_id);
770 se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
771 se->set_aggr_id(0); // label this as a UDAF.
772 // Finally, verify that all HANDLE parameters are literals or params.
773 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
774 for(o=0;o<operands.size();o++){
776 if(operands[o]->get_operator_type() != SE_LITERAL &&
777 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
778 operands[o]->get_operator_type() != SE_PARAM){
779 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
780 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
785 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
786 for(o=0;o<operands.size();o++){
788 if(operands[o]->get_data_type()->get_temporal() != constant_t){
789 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n",
790 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
796 // UDAFS as superaggregates not yet supported.
797 if(se->is_superaggr()){
798 fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
799 se->set_superaggr(false);
804 fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str());
807 // Is it a stateful fcn?
808 fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt);
810 se->set_fcn_id(fcn_id);
811 se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
812 se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun
813 // Finally, verify that all HANDLE parameters are literals or params.
814 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
815 for(o=0;o<operands.size();o++){
817 if(operands[o]->get_operator_type() != SE_LITERAL &&
818 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
819 operands[o]->get_operator_type() != SE_PARAM){
820 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
821 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
826 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
827 for(o=0;o<operands.size();o++){
829 if(operands[o]->get_data_type()->get_temporal() != constant_t){
830 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n",
831 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
837 if(se->is_superaggr()){
838 fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
843 fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str());
847 // Is it a regular function?
848 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt);
850 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
851 for(o=0;o<operands.size();o++){
852 if(o>0) fprintf(stderr,", ");
853 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
855 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
856 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
861 se->set_fcn_id(fcn_id);
862 dt = Ext_fcns->get_fcn_dt(fcn_id);
864 if(! dt->is_defined() ){
865 fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str());
866 for(o=0;o<operands.size();o++){
867 if(o>0) fprintf(stderr,", ");
868 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
870 fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() );
874 // Finally, verify that all HANDLE parameters are literals or params.
875 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
876 for(o=0;o<operands.size();o++){
878 if(operands[o]->get_operator_type() != SE_LITERAL &&
879 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
880 operands[o]->get_operator_type() != SE_PARAM){
881 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
882 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
887 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
888 for(o=0;o<operands.size();o++){
890 if(operands[o]->get_data_type()->get_temporal() != constant_t){
891 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be constant.\n Line=%d, char=%d.\n",
892 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
899 if(se->is_superaggr()){
900 fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
903 se->set_data_type(dt);
906 fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n",
907 se->get_lineno(), se->get_charno(),se->get_operator_type());
914 int assign_predicate_data_types(predicate_t *pr, table_list *schema,
915 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
919 vector<data_type *> odt;
920 vector<literal_t *> litl;
921 vector<scalarexp_t *> operands;
922 vector<bool> handle_ind;
923 vector<bool> constant_ind;
926 switch(pr->get_operator_type()){
928 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set);
929 litl = pr->get_lit_vec();
930 dt = pr->get_left_se()->get_data_type();
932 for(i=0;i<litl.size();i++){
933 dtl = new data_type( litl[i]->get_type() );
934 if( ! dt->is_comparable(dtl,pr->get_op()) ){
935 fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n",
936 litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() );
944 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
945 r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
946 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
948 if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){
949 fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n",
950 pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(),
951 pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() );
957 return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set));
959 l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
960 r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
961 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
964 operands = pr->get_op_list();
966 for(o=0;o<operands.size();o++){
967 l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns); // , ext_fcn_set);
968 odt.push_back(operands[o]->get_data_type());
969 if(l_ret < 0) r_ret = -1;
971 if(r_ret < 0) return(r_ret);
973 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt);
975 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
976 for(o=0;o<operands.size();o++){
977 if(o>0) fprintf(stderr,", ");
978 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
980 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
981 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
985 // ext_fcn_set.insert(fcn_id);
986 pr->set_fcn_id(fcn_id);
988 // Finally, verify that all HANDLE parameters are literals or params.
989 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
990 for(o=0;o<operands.size();o++){
992 if(operands[o]->get_operator_type() != SE_LITERAL &&
993 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
994 operands[o]->get_operator_type() != SE_PARAM){
995 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
996 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1001 constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id());
1002 for(o=0;o<operands.size();o++){
1003 if(constant_ind[o]){
1004 if(operands[o]->get_data_type()->get_temporal() != constant_t){
1005 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be constant.\n Line=%d, char=%d.\n",
1006 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1013 // Check if this predicate function is special sampling function
1014 pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id());
1019 fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n",
1020 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1028 /////////////////////////////////////////////////////////////////////
1029 //////////////// Make a deep copy of a se / pred tree
1030 /////////////////////////////////////////////////////////////////////
1033 // duplicate a select element
1034 select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){
1035 return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str());
1038 // duplicate a scalar expression.
1039 scalarexp_t *dup_se(scalarexp_t *se,
1040 aggregate_table *aggr_tbl
1043 vector<scalarexp_t *> operand_list;
1044 vector<data_type *> dt_signature;
1045 scalarexp_t *ret_se, *l_se, *r_se;
1047 switch(se->get_operator_type()){
1049 ret_se = new scalarexp_t(se->get_literal());
1050 ret_se->use_decorations_of(se);
1054 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1055 ret_se->use_decorations_of(se);
1058 case SE_IFACE_PARAM:
1059 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1060 ret_se->use_decorations_of(se);
1064 ret_se = new scalarexp_t(se->get_colref()->duplicate());
1065 ret_se->rhs.scalarp = se->rhs.scalarp; // carry along notation
1066 ret_se->use_decorations_of(se);
1070 l_se = dup_se(se->get_left_se(), aggr_tbl);
1071 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1072 ret_se->use_decorations_of(se);
1076 l_se = dup_se(se->get_left_se(), aggr_tbl);
1077 r_se = dup_se(se->get_right_se(), aggr_tbl);
1079 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1080 ret_se->use_decorations_of(se);
1085 ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str());
1086 ret_se->use_decorations_of(se);
1090 l_se = dup_se(se->get_left_se(), aggr_tbl);
1091 ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se);
1092 ret_se->use_decorations_of(se);
1097 operand_list = se->get_operands();
1098 vector<scalarexp_t *> new_operands;
1099 for(p=0;p<operand_list.size();p++){
1100 l_se = dup_se(operand_list[p], aggr_tbl);
1101 new_operands.push_back(l_se);
1104 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1105 ret_se->use_decorations_of(se);
1110 printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type());
1120 predicate_t *dup_pr(predicate_t *pr,
1121 aggregate_table *aggr_tbl
1124 vector<literal_t *> llist;
1125 scalarexp_t *se_l, *se_r;
1126 predicate_t *pr_l, *pr_r, *ret_pr;
1127 vector<scalarexp_t *> op_list, new_op_list;
1131 switch(pr->get_operator_type()){
1133 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1134 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1138 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1139 se_r = dup_se(pr->get_right_se(), aggr_tbl);
1140 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1144 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1145 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1148 case PRED_BINARY_OP:
1149 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1150 pr_r = dup_pr(pr->get_right_pr(), aggr_tbl);
1151 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1154 op_list = pr->get_op_list();
1155 for(o=0;o<op_list.size();++o){
1156 se_l = dup_se(op_list[o], aggr_tbl);
1157 new_op_list.push_back(se_l);
1159 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1160 ret_pr->set_fcn_id(pr->get_fcn_id());
1161 ret_pr->is_sampling_fcn = pr->is_sampling_fcn;
1165 fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n",
1166 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1174 table_exp_t *dup_table_exp(table_exp_t *te){
1176 table_exp_t *ret = new table_exp_t();
1178 ret->query_type = te->query_type;
1180 ss_map::iterator ss_i;
1181 for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){
1182 ret->nmap[(*ss_i).first] = (*ss_i).second;
1185 for(i=0;i<te->query_params.size();++i){
1186 ret->query_params.push_back(new
1187 var_pair_t(te->query_params[i]->name,te->query_params[i]->val) );
1191 ret->sl = new select_list_t();
1192 ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno;
1193 vector<select_element *> select_list = te->sl->get_select_list();
1194 for(i=0;i<select_list.size();++i){
1195 scalarexp_t *se = dup_se(select_list[i]->se,NULL);
1196 ret->sl->append(se,select_list[i]->name);
1200 ret->fm = te->fm->duplicate();
1202 if(te->wh) ret->wh = dup_pr(te->wh,NULL);
1203 if(te->hv) ret->hv = dup_pr(te->hv,NULL);
1204 if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL);
1205 if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL);
1206 if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL);
1208 for(i=0;i<te->gb.size();++i){
1209 extended_gb_t *tmp_g = te->gb[i]->duplicate();
1210 ret->gb.push_back(tmp_g);
1213 ret->mergevars = te->mergevars;
1215 ret->slack = dup_se(te->slack,NULL);
1216 ret->lineno = te->lineno;
1217 ret->charno = te->charno;
1228 /////////////////////////////////////////////////////////////////////////
1229 // Bind colrefs to a member of their FROM list
1231 void bind_colref_se(scalarexp_t *se,
1232 vector<tablevar_t *> &fm,
1233 int prev_ref, int new_ref
1236 vector<scalarexp_t *> operand_list;
1240 switch(se->get_operator_type()){
1244 case SE_IFACE_PARAM:
1245 ir = se->get_ifpref();
1246 if(ir->get_tablevar_ref() == prev_ref){
1247 ir->set_tablevar_ref(new_ref);
1248 ir->set_tablevar(fm[new_ref]->get_var_name());
1253 cr=se->get_colref();
1254 if(cr->get_tablevar_ref() == prev_ref){
1255 cr->set_tablevar_ref(new_ref);
1256 // cr->set_interface(fm[new_ref]->get_interface());
1257 cr->set_table_name(fm[new_ref]->get_var_name());
1262 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1266 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1267 bind_colref_se(se->get_right_se(), fm, prev_ref, new_ref);
1275 if(se->get_aggr_ref() >= 0) return;
1277 operand_list = se->get_operands();
1278 for(p=0;p<operand_list.size();p++){
1279 bind_colref_se(operand_list[p], fm, prev_ref, new_ref);
1284 printf("INTERNAL ERROR in bind_colref_se: operator type %d\n",se->get_operator_type());
1295 void bind_colref_pr(predicate_t *pr,
1296 vector<tablevar_t *> &fm,
1297 int prev_ref, int new_ref
1299 vector<scalarexp_t *> op_list;
1302 switch(pr->get_operator_type()){
1304 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1308 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1309 bind_colref_se(pr->get_right_se(), fm, prev_ref, new_ref);
1313 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1316 case PRED_BINARY_OP:
1317 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1318 bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref);
1321 op_list = pr->get_op_list();
1322 for(o=0;o<op_list.size();++o){
1323 bind_colref_se(op_list[o], fm, prev_ref, new_ref);
1328 fprintf(stderr,"INTERNAL ERROR in bind_colref_pr, line %d, character %d, unknown predicate operator type %d\n",
1329 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1338 /////////////////////////////////////////////////////////////////////
1339 // verify that the se refs only literals and params.
1340 // (use to verify that the expression should stay in the hfta
1342 /////////////////////////////////////////////////////////////////////
1344 bool is_literal_or_param_only(scalarexp_t *se){
1346 vector<scalarexp_t *> operands;
1349 if(se == NULL) return(true);
1351 switch(se->get_operator_type()){
1355 case SE_IFACE_PARAM:
1356 return(false); // need to treat as colref
1358 return(is_literal_or_param_only(se->get_left_se()) );
1361 is_literal_or_param_only(se->get_left_se()) &&
1362 is_literal_or_param_only(se->get_right_se())
1370 // The fcn might have special meaning at the lfta ...
1374 fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n",
1375 se->get_lineno(), se->get_charno(),se->get_operator_type());
1383 /////////////////////////////////////////////////////////////////////
1384 // Search for gb refs.
1385 // (use to verify that no gbrefs in a gb def.)
1386 /////////////////////////////////////////////////////////////////////
1389 int count_gb_se(scalarexp_t *se){
1391 vector<scalarexp_t *> operands;
1394 if(se == NULL) return(0);
1396 switch(se->get_operator_type()){
1399 case SE_IFACE_PARAM:
1402 return(count_gb_se(se->get_left_se()) );
1405 count_gb_se(se->get_left_se()) +
1406 count_gb_se(se->get_right_se())
1409 if(se->get_gb_ref() < 0) return(0);
1415 operands = se->get_operands();
1416 for(o=0;o<operands.size();o++){
1417 sum += count_gb_se(operands[o]);
1422 fprintf(stderr,"INTERNAL ERROR in count_gb_se, line %d, character %d: unknown operator type %d\n",
1423 se->get_lineno(), se->get_charno(),se->get_operator_type());
1430 /////////////////////////////////////////////////////////////////////
1431 //////////////// Search for stateful fcns.
1432 /////////////////////////////////////////////////////////////////////
1435 int se_refs_sfun(scalarexp_t *se){
1437 vector<scalarexp_t *> operands;
1440 if(se == NULL) return(0);
1442 switch(se->get_operator_type()){
1445 case SE_IFACE_PARAM:
1448 return(se_refs_sfun(se->get_left_se()) );
1451 se_refs_sfun(se->get_left_se()) +
1452 se_refs_sfun(se->get_right_se())
1460 operands = se->get_operands();
1461 for(o=0;o<operands.size();o++){
1462 sum += se_refs_sfun(operands[o]);
1464 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1466 // for now, stateful functions count as aggregates.
1467 if(se->get_storage_state() != "")
1473 fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n",
1474 se->get_lineno(), se->get_charno(),se->get_operator_type());
1481 // Return a count of the number of stateful fcns in this predicate.
1482 int pred_refs_sfun(predicate_t *pr){
1483 vector<scalarexp_t *> op_list;
1486 switch(pr->get_operator_type()){
1488 return(se_refs_sfun(pr->get_left_se()) );
1491 se_refs_sfun(pr->get_left_se()) +
1492 se_refs_sfun(pr->get_right_se())
1495 return(pred_refs_sfun(pr->get_left_pr()) );
1496 case PRED_BINARY_OP:
1498 pred_refs_sfun(pr->get_left_pr()) +
1499 pred_refs_sfun(pr->get_right_pr())
1502 op_list = pr->get_op_list();
1504 for(o=0;o<op_list.size();++o){
1505 aggr_sum += se_refs_sfun(op_list[o]);
1510 fprintf(stderr,"INTERNAL ERROR in pred_refs_sfun, line %d, character %d, unknown predicate operator type %d\n",
1511 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1518 //////////////////////////////////////////////////
1520 /////////////////////////////////////////////////////////////////////
1521 //////////////// Search for aggregates.
1522 /////////////////////////////////////////////////////////////////////
1525 int count_aggr_se(scalarexp_t *se, bool strict){
1527 vector<scalarexp_t *> operands;
1530 if(se == NULL) return(0);
1532 switch(se->get_operator_type()){
1535 case SE_IFACE_PARAM:
1538 return(count_aggr_se(se->get_left_se(), strict) );
1541 count_aggr_se(se->get_left_se(), strict) +
1542 count_aggr_se(se->get_right_se(), strict)
1550 operands = se->get_operands();
1551 for(o=0;o<operands.size();o++){
1552 sum += count_aggr_se(operands[o], strict);
1554 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1556 // now, stateful functions can count as aggregates.
1557 // if we are being strict.
1558 if(! strict && se->get_storage_state() != "")
1564 fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n",
1565 se->get_lineno(), se->get_charno(),se->get_operator_type());
1572 // Return a count of the number of aggregate fcns in this predicate.
1573 int count_aggr_pred(predicate_t *pr, bool strict){
1574 vector<scalarexp_t *> op_list;
1577 switch(pr->get_operator_type()){
1579 return(count_aggr_se(pr->get_left_se(), strict) );
1582 count_aggr_se(pr->get_left_se(), strict) +
1583 count_aggr_se(pr->get_right_se(), strict)
1586 return(count_aggr_pred(pr->get_left_pr(), strict) );
1587 case PRED_BINARY_OP:
1589 count_aggr_pred(pr->get_left_pr(), strict) +
1590 count_aggr_pred(pr->get_right_pr(), strict)
1593 op_list = pr->get_op_list();
1595 for(o=0;o<op_list.size();++o){
1596 aggr_sum += count_aggr_se(op_list[o], strict);
1601 fprintf(stderr,"INTERNAL ERROR in count_aggr_pred, line %d, character %d, unknown predicate operator type %d\n",
1602 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1609 //////////////////////////////////////////////////
1610 /// Analyze tablevar refs
1612 void get_tablevar_ref_se(scalarexp_t *se, vector<int> &reflist){
1614 vector<scalarexp_t *> operands;
1619 if(se == NULL) return;
1621 switch(se->get_operator_type()){
1625 case SE_IFACE_PARAM:
1626 ir = se->get_ifpref();
1627 vref = ir->get_tablevar_ref();
1628 for(o=0;o<reflist.size();++o){
1629 if(vref == reflist[o]) return;
1631 reflist.push_back(vref);
1634 get_tablevar_ref_se(se->get_left_se(), reflist);
1637 get_tablevar_ref_se(se->get_left_se(), reflist);
1638 get_tablevar_ref_se(se->get_right_se(), reflist);
1641 if(se->is_gb()) return;
1642 cr = se->get_colref();
1643 vref = cr->get_tablevar_ref();
1644 for(o=0;o<reflist.size();++o){
1645 if(vref == reflist[o]) return;
1647 reflist.push_back(vref);
1653 if(se->get_aggr_ref() >= 0) return;
1655 operands = se->get_operands();
1656 for(o=0;o<operands.size();o++){
1657 get_tablevar_ref_se(operands[o], reflist);
1662 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_se, line %d, character %d: unknown operator type %d\n",
1663 se->get_lineno(), se->get_charno(),se->get_operator_type());
1670 void get_tablevar_ref_pr(predicate_t *pr, vector<int> &reflist){
1671 vector<scalarexp_t *> op_list;
1674 switch(pr->get_operator_type()){
1676 get_tablevar_ref_se(pr->get_left_se(),reflist);
1679 get_tablevar_ref_se(pr->get_left_se(),reflist);
1680 get_tablevar_ref_se(pr->get_right_se(),reflist);
1683 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1685 case PRED_BINARY_OP:
1686 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1687 get_tablevar_ref_pr(pr->get_right_pr(),reflist);
1690 op_list = pr->get_op_list();
1691 for(o=0;o<op_list.size();++o){
1692 get_tablevar_ref_se(op_list[o],reflist);
1696 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_pr, line %d, character %d, unknown predicate operator type %d\n",
1697 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1704 // Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1706 void gather_fcn_states_se(scalarexp_t *se, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1709 vector<scalarexp_t *> operands;
1711 switch(se->get_operator_type()){
1714 case SE_IFACE_PARAM:
1717 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ;
1720 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1721 gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns);
1728 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1731 operands = se->get_operands();
1732 for(o=0;o<operands.size();o++){
1733 gather_fcn_states_se(operands[o], states_refd, Ext_fcns);
1735 if(se->get_storage_state() != ""){
1736 states_refd.insert(se->get_storage_state());
1741 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n",
1742 se->get_lineno(), se->get_charno(),se->get_operator_type());
1749 // Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1751 void gather_fcn_states_pr(predicate_t *pr, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1752 vector<scalarexp_t *> op_list;
1755 switch(pr->get_operator_type()){
1757 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1760 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1761 gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ;
1764 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns);
1766 case PRED_BINARY_OP:
1767 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ;
1768 gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ;
1771 op_list = pr->get_op_list();
1772 for(o=0;o<op_list.size();++o){
1773 gather_fcn_states_se(op_list[o],states_refd, Ext_fcns);
1778 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_pr, line %d, character %d, unknown predicate operator type %d\n",
1779 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1789 // walk se tree and collect aggregates into aggregate table.
1790 // duplicate aggregates receive the same idx to the table.
1792 void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){
1795 vector<scalarexp_t *> operands;
1797 switch(se->get_operator_type()){
1800 case SE_IFACE_PARAM:
1803 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ;
1806 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns);
1807 build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns);
1812 agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr());
1813 se->set_aggr_id(agg_id);
1816 agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr());
1817 se->set_aggr_id(agg_id);
1820 operands = se->get_operands();
1821 for(o=0;o<operands.size();o++){
1822 build_aggr_tbl_fm_se(operands[o], aggregate_table, Ext_fcns);
1824 if(se->get_aggr_ref() >= 0){ // it's been tagged as a UDAF
1825 agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id()));
1826 se->set_aggr_id(agg_id);
1831 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n",
1832 se->get_lineno(), se->get_charno(),se->get_operator_type());
1839 // walk se tree and collect aggregates into aggregate table.
1840 // duplicate aggregates receive the same idx to the table.
1842 void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){
1843 vector<scalarexp_t *> op_list;
1846 switch(pr->get_operator_type()){
1848 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1851 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1852 build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ;
1855 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns);
1857 case PRED_BINARY_OP:
1858 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ;
1859 build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ;
1862 op_list = pr->get_op_list();
1863 for(o=0;o<op_list.size();++o){
1864 build_aggr_tbl_fm_se(op_list[o],aggregate_table, Ext_fcns);
1869 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_pred, line %d, character %d, unknown predicate operator type %d\n",
1870 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1878 // Return true if the two scalar expressions
1879 // represent the same value (e.g., use to eliminate
1880 // duplicate aggregates).
1881 bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){
1882 vector<scalarexp_t *> operands1;
1883 vector<scalarexp_t *> operands2;
1886 // First handle the case of nulls (e.g. COUNT aggrs)
1887 if(se1 == NULL && se2 == NULL) return(true);
1888 if(se1 == NULL || se2 == NULL) return(false);
1890 // In all cases, must be the same oeprator type and same operator.
1891 if(se1->get_operator_type() != se2->get_operator_type())
1893 if(se1->get_op() != se2->get_op() )
1896 switch(se1->get_operator_type()){
1898 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1900 return(se1->get_param_name() == se2->get_param_name() );
1901 case SE_IFACE_PARAM:
1902 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1904 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1906 if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) )
1907 return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) );
1910 if(se1->is_gb() && se2->is_gb())
1911 return( se1->get_gb_ref() == se2->get_gb_ref() );
1912 if(se1->is_gb() || se2->is_gb())
1914 return(se1->get_colref()->is_equivalent(se2->get_colref()) );
1918 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1920 if(se1->get_op() != se2->get_op()) return(false);
1922 operands1 = se1->get_operands();
1923 operands2 = se2->get_operands();
1924 if(operands1.size() != operands2.size()) return(false);
1926 for(o=0;o<operands1.size();o++){
1927 if(! is_equivalent_se(operands1[o], operands2[o]) )
1932 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
1933 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
1940 // Similar to is_equivalent_se, but with a looser definition
1941 // of equivalence of colrefs. Here, say they are equivalent
1942 // if their base table is the same. Use to find equivalent
1943 // predicates on base tables.
1944 bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){
1945 vector<scalarexp_t *> operands1;
1946 vector<scalarexp_t *> operands2;
1949 if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){
1950 se1 = se1->get_right_se();
1952 if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){
1953 se2 = se2->get_right_se();
1956 // First handle the case of nulls (e.g. COUNT aggrs)
1957 if(se1 == NULL && se2 == NULL) return(true);
1958 if(se1 == NULL || se2 == NULL) return(false);
1960 // In all cases, must be the same oeprator type and same operator.
1961 if(se1->get_operator_type() != se2->get_operator_type())
1963 if(se1->get_op() != se2->get_op() )
1966 switch(se1->get_operator_type()){
1968 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1970 return(se1->get_param_name() == se2->get_param_name() );
1971 case SE_IFACE_PARAM:
1972 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1974 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1976 if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) )
1977 return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) );
1981 if(se1->is_gb() && se2->is_gb())
1982 return( se1->get_gb_ref() == se2->get_gb_ref() );
1983 if(se1->is_gb() || se2->is_gb())
1986 return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) );
1990 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1992 if(se1->get_op() != se2->get_op()) return(false);
1994 operands1 = se1->get_operands();
1995 operands2 = se2->get_operands();
1996 if(operands1.size() != operands2.size()) return(false);
1998 for(o=0;o<operands1.size();o++){
1999 if(! is_equivalent_se_base(operands1[o], operands2[o], Schema) )
2004 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
2005 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
2012 // Find predicates which are equivalent when
2013 // looking at the base tables. Use to find
2014 // common prefilter.
2015 bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){
2018 // First handle the case of nulls
2019 if(p1 == NULL && p2 == NULL) return(true);
2020 if(p1 == NULL || p2 == NULL) return(false);
2023 if(p1->get_operator_type() != p2->get_operator_type())
2025 if(p1->get_op() != p2->get_op())
2028 vector<literal_t *> ll1;
2029 vector<literal_t *> ll2;
2030 vector<scalarexp_t *> op_list1, op_list2;
2033 switch(p2->get_operator_type()){
2035 if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2037 return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) );
2040 if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2042 ll1 = p1->get_lit_vec();
2043 ll2 = p2->get_lit_vec();
2044 if(ll1.size() != ll2.size())
2046 for(i=0;i<ll1.size();i++){
2047 if(! ll1[i]->is_equivalent( ll2[i] ) )
2053 return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) );
2055 case PRED_BINARY_OP:
2056 if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema))
2058 return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) );
2061 op_list1 = p1->get_op_list();
2062 op_list2 = p2->get_op_list();
2063 if(op_list1.size() != op_list2.size()) return(false);
2064 for(o=0;o<op_list1.size();++o){
2065 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) ) return(false);
2076 bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns){
2077 if((p1->get_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC))
2079 if(p1->get_fcn_id() != p2->get_fcn_id())
2081 vector<bool> cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id());
2083 vector<scalarexp_t *> op_list1 = p1->get_op_list();
2084 vector<scalarexp_t *> op_list2 = p2->get_op_list();
2085 if(op_list1.size() != op_list2.size()) return(false);
2086 for(o=0;o<op_list1.size();++o){
2088 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) )
2099 // Verify that the scalar expression (in a such that clause)
2100 // is acceptable in an aggregation query. No column
2101 // references allowed outside aggergates, except for
2102 // references to group-by attributes.
2103 // return true if OK, false if bad.
2104 bool verify_aggr_query_se(scalarexp_t *se){
2105 vector <scalarexp_t *> operands;
2108 switch(se->get_operator_type()){
2111 case SE_IFACE_PARAM:
2114 return(verify_aggr_query_se(se->get_left_se() ) );
2116 return(verify_aggr_query_se(se->get_left_se() ) &&
2117 verify_aggr_query_se(se->get_right_se() ) );
2119 if(se->is_gb() ) return(true);
2120 fprintf(stderr,"ERROR: the select clause in an aggregate query can "
2121 "only reference constants, group-by attributes, and "
2122 "aggregates, (%s) line %d, character %d.\n",
2123 se->get_colref()->to_string().c_str(),
2124 se->get_lineno(), se->get_charno() );
2128 // colrefs and gbrefs allowed.
2129 // check for nested aggregation elsewhere, so just return TRUE
2132 // If its a UDAF, just return true
2133 if(se->get_aggr_ref() >= 0) return true;
2135 operands = se->get_operands();
2137 for(o=0;o<operands.size();o++){
2138 if(! verify_aggr_query_se(operands[o]) )
2143 fprintf(stderr,"INTERNAL ERROR in verify_aggr_query_se, line %d, character %d: unknown operator type %d\n",
2144 se->get_lineno(), se->get_charno(),se->get_operator_type());
2153 // Find complex literals.
2154 // NOTE : This analysis should be deferred to
2155 // code generation time.
2156 // This analysis drills into aggr se specs.
2157 // Shouldn't this be done at the aggregate table?
2158 // But, its not a major loss of efficiency.
2159 // UPDATE : drilling into aggr se's is causnig a problem
2160 // so I've eliminated it.
2162 bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2163 cplx_lit_table *complex_literals){
2165 vector<scalarexp_t *> operands;
2167 scalarexp_t *param_se;
2170 switch(se->get_operator_type()){
2172 l = se->get_literal();
2173 if(l->constructor_name() != ""){
2174 int cl_idx = complex_literals->add_cpx_lit(l, false);
2175 l->set_cpx_lit_ref(cl_idx);
2180 // SE_IFACE_PARAM should not exist when this is called.
2182 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2184 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) &&
2185 find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) );
2192 // return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2194 if(se->get_aggr_ref() >= 0) return true;
2196 operands = se->get_operands();
2197 for(o=0;o<operands.size();o++){
2198 find_complex_literal_se(operands[o], Ext_fcns, complex_literals);
2202 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_se, line %d, character %d: unknown operator type %d\n",
2203 se->get_lineno(), se->get_charno(),se->get_operator_type());
2212 void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2213 cplx_lit_table *complex_literals){
2215 vector<literal_t *> litl;
2216 vector<scalarexp_t *> op_list;
2219 switch(pr->get_operator_type()){
2221 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2222 litl = pr->get_lit_vec();
2223 for(i=0;i<litl.size();i++){
2224 if(litl[i]->constructor_name() != ""){
2225 int cl_idx = complex_literals->add_cpx_lit(litl[i],false);
2226 litl[i]->set_cpx_lit_ref(cl_idx);
2231 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2232 find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ;
2235 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals);
2237 case PRED_BINARY_OP:
2238 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ;
2239 find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ;
2242 op_list = pr->get_op_list();
2243 for(o=0;o<op_list.size();++o){
2244 find_complex_literal_se(op_list[o],Ext_fcns, complex_literals);
2248 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_pr, line %d, character %d, unknown predicate operator type %d\n",
2249 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2257 // Find all things which are passed as handle parameters to functions
2258 // (query parameters, (simple) literals, complex literals)
2259 // These expressions MUST be processed with find_complex_literal_??
2261 // TODO: this analysis drills into the aggregate SEs.
2262 // Shouldn't this be done on the aggr table SEs instead?
2263 // to avoid duplication. THe handle registration
2264 // might be expensive ...
2265 // REVISED : drilling into aggr se's is causing problems, eliminated.
2267 void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2268 vector<handle_param_tbl_entry *> &handle_tbl){
2269 vector<scalarexp_t *> operands;
2270 vector<bool> handle_ind;
2272 scalarexp_t *param_se;
2276 switch(se->get_operator_type()){
2281 // case SE_IFACE_PARAM: SHOULD NOT EXIST when this is called
2283 find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2286 find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ;
2287 find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ;
2294 // find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2297 if(se->get_aggr_ref() >= 0) return ;
2299 operands = se->get_operands();
2300 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
2301 for(o=0;o<operands.size();o++){
2303 handle_param_tbl_entry *he;
2304 param_se = operands[o];
2305 if(param_se->get_operator_type() != SE_LITERAL &&
2306 param_se->get_operator_type() != SE_PARAM){
2307 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
2308 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
2312 if(param_se->get_operator_type() == SE_PARAM){
2313 he = new handle_param_tbl_entry(
2314 se->get_op(), o, param_se->get_param_name(),
2315 param_se->get_data_type()->get_type_str());
2317 l = param_se->get_literal();
2318 if(l->is_cpx_lit()){
2319 he = new handle_param_tbl_entry(
2320 se->get_op(), o, l->get_cpx_lit_ref(),
2321 param_se->get_data_type()->get_type_str());
2323 he = new handle_param_tbl_entry(
2325 param_se->get_data_type()->get_type_str());
2328 param_se->set_handle_ref(handle_tbl.size());
2329 handle_tbl.push_back(he);
2331 find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ;
2336 fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n",
2337 se->get_lineno(), se->get_charno(),se->get_operator_type());
2344 void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2345 vector<handle_param_tbl_entry *> &handle_tbl){
2346 vector<literal_t *> litl;
2347 vector<scalarexp_t *> op_list;
2348 scalarexp_t *param_se;
2349 vector<bool> handle_ind;
2353 switch(pr->get_operator_type()){
2355 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2358 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2359 find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ;
2362 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl);
2364 case PRED_BINARY_OP:
2365 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ;
2366 find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ;
2369 op_list = pr->get_op_list();
2370 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
2371 for(o=0;o<op_list.size();++o){
2373 handle_param_tbl_entry *he;
2374 param_se = op_list[o];
2375 if(param_se->get_operator_type() != SE_LITERAL &&
2376 param_se->get_operator_type() != SE_PARAM){
2377 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
2378 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
2382 if(param_se->get_operator_type() == SE_PARAM){
2383 he = new handle_param_tbl_entry(
2384 pr->get_op(), o, param_se->get_param_name(),
2385 param_se->get_data_type()->get_type_str());
2387 l = param_se->get_literal();
2388 if(l->is_cpx_lit()){
2389 he = new handle_param_tbl_entry(
2390 pr->get_op(), o, l->get_cpx_lit_ref(),
2391 param_se->get_data_type()->get_type_str());
2393 he = new handle_param_tbl_entry(
2395 param_se->get_data_type()->get_type_str());
2398 param_se->set_handle_ref(handle_tbl.size());
2399 handle_tbl.push_back(he);
2401 find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ;
2406 fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n",
2407 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2415 // Verify the HAVING predicate : it
2416 // can access gb vars, aggregates, and constants,
2418 // return 1 if OK, -1 if bad.
2419 // Perhaps replace by a pair of fcns which counts non-gb colrefs?
2421 // Extended to deal with cleaning_by, cleaning_when :
2422 // verify that any aggregate function
2423 // has the multiple output property.
2425 int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){
2427 vector<scalarexp_t *> operands;
2428 vector<data_type *> odt;
2431 switch(se->get_operator_type()){
2435 case SE_IFACE_PARAM:
2438 return(verify_having_se(se->get_left_se(), clause, Ext_fcns) );
2440 l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns);
2441 r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns);
2442 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
2445 if(se->is_gb()) return 1;
2446 fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause,
2447 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
2451 // colrefs and gbrefs allowed.
2452 // check for nested aggregation elsewhere, so just return TRUE
2453 if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2454 fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2455 se->get_lineno(),se->get_charno(), se->get_op().c_str() );
2459 // Ensure that aggregate refs allow multiple outputs
2460 // in CLEANING_WHEN, CLEANING_BY
2461 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2462 if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2463 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2464 se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2472 if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2473 fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2474 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() );
2478 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2479 if(se->get_aggr_ref() >= 0 && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2480 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2481 se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2486 if(se->get_aggr_ref() >= 0) // don't descent into aggregates.
2489 operands = se->get_operands();
2491 for(o=0;o<operands.size();o++){
2492 l_ret = verify_having_se(operands[o], clause, Ext_fcns);
2493 if(l_ret < 0) r_ret = -1;
2495 if(r_ret < 0) return(-1); else return(1);
2498 fprintf(stderr,"INTERNAL ERROR in verify_having_se, line %d, character %d: unknown operator type %d\n",
2499 se->get_lineno(), se->get_charno(),se->get_operator_type());
2506 // Verify the HAVING predicate : it
2507 // can access gb vars, aggregates, and constants,
2509 // return 1 if OK, -1 if bad.
2510 // Perhaps replace by a pair of fcns which counts non-gb colrefs?
2513 int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){
2515 vector<literal_t *> litl;
2516 vector<scalarexp_t *> op_list;
2519 switch(pr->get_operator_type()){
2521 return(verify_having_se(pr->get_left_se(), clause, Ext_fcns));
2523 l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ;
2524 r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ;
2525 if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1);
2527 return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns));
2528 case PRED_BINARY_OP:
2529 l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns);
2530 r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns);
2531 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
2534 op_list = pr->get_op_list();
2536 for(o=0;o<op_list.size();++o){
2537 if( verify_having_se(op_list[o], clause, Ext_fcns) < 0) l_ret = -1;
2542 fprintf(stderr,"INTERNAL ERROR in verify_having_pred, line %d, character %d, unknown predicate operator type %d\n",
2543 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2550 //////////////////////////////////////////////////////////////////////////
2551 //////////////////////////////////////////////////////////////////////////
2552 /////// cnf and pred analysis and manipulation
2554 // ----------------------------------------------------------------------
2555 //Â Â Convert the predicates to a list of conjuncts
2556 //Â Â (not actually cnf). Do some analysis
2557 //Â Â on their properties.
2558 // ----------------------------------------------------------------------
2561 //Â Â Put into list clist the predicates that
2562 //Â Â are AND'ed together.
2564 void make_cnf_from_pr(predicate_t *pr, vector<cnf_elem *> &clist){
2566 if(pr == NULL) return;
2568 switch(pr->get_operator_type()){
2570 clist.push_back(new cnf_elem(pr));
2574 clist.push_back(new cnf_elem(pr));
2578 clist.push_back(new cnf_elem(pr));
2581 case PRED_BINARY_OP:
2582 if(pr->get_op() == "OR"){
2583 clist.push_back(new cnf_elem(pr));
2586 if(pr->get_op() =="AND"){
2587 make_cnf_from_pr(pr->get_left_pr(),clist);
2588 make_cnf_from_pr(pr->get_right_pr(),clist);
2592 clist.push_back(new cnf_elem(pr));
2596 fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str());
2604 //Â Â Find out what things are referenced in a se,
2605 //Â Â to use for analyzing a predicate.
2606 //Â Â Currently, is it simple (no operators), does it
2607 //Â Â reference a group-by column, does it reference an
2608 //Â Â attribute of a table.
2610 // analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf
2613 void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){
2615 vector<scalarexp_t *> operand_list;
2617 switch(se->get_operator_type()){
2620 case SE_IFACE_PARAM:
2623 if(se->is_gb() ) g=1;
2628 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2632 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2633 analyze_cnf_se(se->get_right_se(),s,g,a,agr);
2640 if(se->get_aggr_ref() >= 0){
2645 operand_list = se->get_operands();
2646 for(p=0;p<operand_list.size();p++){
2647 analyze_cnf_se(operand_list[p],s,g,a,agr);
2657 void analyze_cnf_pr(predicate_t *pr, int &g, int &a, int &agr){
2659 vector<scalarexp_t *> op_list;
2662 switch(pr->get_operator_type()){
2664 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2665 analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr);
2668 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2671 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2673 case PRED_BINARY_OP:
2674 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2675 analyze_cnf_pr(pr->get_right_pr(),g,a,agr);
2678 op_list = pr->get_op_list();
2679 for(o=0;o<op_list.size();++o){
2680 analyze_cnf_se(op_list[o],dum_simple,g,a,agr);
2684 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2685 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2692 //Â Â analyze a conjunct of a predicate.
2693 //Â Â Is it atomic (e.g., a single predicate),
2694 //Â Â and if so do a further analysis.
2696 void analyze_cnf(cnf_elem *c){
2698 //Â Â analyze the predicate.
2699 analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr);
2701 if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){
2706 //Â Â its an atomic predicate -- get more info
2709 if(c->pr->get_op() == "=")
2714 if(c->pr->get_operator_type() == PRED_IN)
2719 c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0;
2720 analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr);
2722 if(c->pr->get_operator_type() == PRED_COMPARE){
2723 c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0;
2724 analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr);
2728 void analyze_constraint_se(scalarexp_t *se,
2729 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){
2730 int l_agr, l_gb, l_par, l_func, l_op;
2731 int r_agr, r_gb, r_par, r_func, r_op;
2733 vector<scalarexp_t *> operand_list;
2735 switch(se->get_operator_type()){
2737 case SE_IFACE_PARAM:
2738 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2741 n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0;
2744 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2747 analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2754 analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2758 analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2759 analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb);
2763 n_func=l_func+r_func;
2768 n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2771 if(se->get_aggr_ref() >= 0){
2772 n_agr=1; n_gb = 0; n_par = 0; n_op = 0;
2774 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2779 n_agr=0; n_gb = 0; n_par = 0; n_op = 0;
2781 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2784 operand_list = se->get_operands();
2785 for(p=0;p<operand_list.size();p++){
2786 analyze_constraint_se(operand_list[p],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2799 // Estimate the cost of a constraint.
2800 // WARNING a lot of cost assumptions are embedded in the code.
2801 void analyze_constraint_pr(predicate_t *pr,
2802 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op,
2803 int &n_cmp_s, int &n_cmp_c, int &n_in, int &n_pred, int &n_bool, ext_fcn_list *Ext_fcns, bool enter_gb){
2804 int l_agr, l_gb, l_par, l_func, l_op, l_cmp_s, l_cmp_c, l_in, l_pred,l_bool;
2805 int r_agr, r_gb, r_par, r_func, r_op, r_cmp_s, r_cmp_c, r_in, r_pred,r_bool;
2808 vector<scalarexp_t *> op_list;
2811 switch(pr->get_operator_type()){
2813 analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb);
2814 analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb);
2815 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2816 n_func=l_func+r_func; n_op=l_op+r_op;
2817 if(pr->get_left_se()->get_data_type()->complex_comparison(
2818 pr->get_right_se()->get_data_type())
2820 n_cmp_s = 0; n_cmp_c=1;
2822 n_cmp_s = 1; n_cmp_c=0;
2824 n_in = 0; n_pred = 0; n_bool = 0;
2827 // Tread IN predicate as sequence of comparisons
2828 analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2829 if(pr->get_left_se()->get_data_type()->complex_comparison(
2830 pr->get_right_se()->get_data_type())
2832 n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size();
2834 n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0;
2836 n_in = 0; n_pred = 0; n_bool = 0;
2839 analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb);
2842 case PRED_BINARY_OP:
2843 analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb);
2844 analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb);
2845 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2846 n_func=l_func+r_func; n_op=l_op+r_op;
2847 n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c;
2848 n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1;
2851 n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0;
2853 n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id());
2856 op_list = pr->get_op_list();
2857 for(o=0;o<op_list.size();++o){
2858 analyze_constraint_se(op_list[o],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2859 n_agr+=l_agr; n_gb+=l_gb; n_par+=l_par; n_func+=l_func; n_op+=l_op;
2863 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2864 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2869 void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){
2870 int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2871 analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2872 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false);
2874 //printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool);
2875 c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2878 bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){
2879 int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2880 analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2881 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true);
2882 //printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool);
2885 int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2886 //printf("cost=%d\n",cost);
2890 // The prefilter needs to translate constraints on
2891 // gbvars into constraints involving their underlying SEs.
2892 // The following two routines attach GB def info.
2894 void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){
2896 vector<scalarexp_t *> operand_list;
2898 switch(se->get_operator_type()){
2900 case SE_IFACE_PARAM:
2906 se->rhs.scalarp = gtbl->get_def(se->get_gb_ref());
2910 insert_gb_def_se(se->get_left_se(),gtbl);
2913 insert_gb_def_se(se->get_left_se(),gtbl);
2914 insert_gb_def_se(se->get_right_se(),gtbl);
2917 insert_gb_def_se(se->get_left_se(),gtbl);
2920 operand_list = se->get_operands();
2921 for(p=0;p<operand_list.size();p++){
2922 insert_gb_def_se(operand_list[p],gtbl);
2929 void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl){
2930 vector<scalarexp_t *> op_list;
2933 switch(pr->get_operator_type()){
2935 insert_gb_def_se(pr->get_left_se(),gtbl);
2936 insert_gb_def_se(pr->get_right_se(),gtbl);
2939 insert_gb_def_se(pr->get_left_se(),gtbl);
2942 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2944 case PRED_BINARY_OP:
2945 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2946 insert_gb_def_pr(pr->get_right_pr(),gtbl);
2949 op_list = pr->get_op_list();
2950 for(o=0;o<op_list.size();++o){
2951 insert_gb_def_se(op_list[o],gtbl);
2955 fprintf(stderr,"INTERNAL ERROR in insert_gb_def_pr, line %d, character %d, unknown predicate operator type %d\n",
2956 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2961 // Substitute gbrefs with their definitions
2962 void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){
2964 vector<scalarexp_t *> operand_list;
2965 scalarexp_t *lse,*rse;
2970 switch(se->get_operator_type()){
2972 case SE_IFACE_PARAM:
2977 cr = se->get_colref();
2978 b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field);
2979 b_idx = Schema->get_table_ref(b_tbl);
2980 cr->tablevar_ref = b_idx;
2983 lse=se->get_left_se();
2984 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2985 se->lhs.scalarp = lse->get_right_se();
2986 subs_gbrefs_se(se,Schema);
2989 subs_gbrefs_se(se->get_left_se(),Schema);
2992 lse=se->get_left_se();
2993 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2994 se->lhs.scalarp = lse->get_right_se();
2995 subs_gbrefs_se(se,Schema);
2998 rse=se->get_right_se();
2999 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3000 se->rhs.scalarp = rse->get_right_se();
3001 subs_gbrefs_se(se,Schema);
3004 subs_gbrefs_se(se->get_left_se(),Schema);
3005 subs_gbrefs_se(se->get_right_se(),Schema);
3008 lse=se->get_left_se();
3009 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3010 se->lhs.scalarp = lse->get_right_se();
3011 subs_gbrefs_se(se,Schema);
3014 subs_gbrefs_se(se->get_left_se(),Schema);
3017 operand_list = se->get_operands();
3018 for(p=0;p<operand_list.size();p++){
3019 lse=operand_list[p];
3020 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3021 se->param_list[p] = lse->get_right_se();
3022 subs_gbrefs_se(se,Schema);
3026 for(p=0;p<operand_list.size();p++){
3027 subs_gbrefs_se(operand_list[p],Schema);
3035 void subs_gbrefs_pr(predicate_t *pr, table_list *Schema){
3036 vector<scalarexp_t *> op_list;
3038 scalarexp_t *lse,*rse;
3040 switch(pr->get_operator_type()){
3042 lse=pr->get_left_se();
3043 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3044 pr->lhs.sexp = lse->get_right_se();
3045 subs_gbrefs_pr(pr,Schema);
3048 rse=pr->get_right_se();
3049 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3050 pr->rhs.sexp = rse->get_right_se();
3051 subs_gbrefs_pr(pr,Schema);
3054 subs_gbrefs_se(pr->get_left_se(),Schema);
3055 subs_gbrefs_se(pr->get_right_se(),Schema);
3058 lse=pr->get_left_se();
3059 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3060 pr->lhs.sexp = lse->get_right_se();
3061 subs_gbrefs_pr(pr,Schema);
3064 subs_gbrefs_se(pr->get_left_se(),Schema);
3067 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3069 case PRED_BINARY_OP:
3070 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3071 subs_gbrefs_pr(pr->get_right_pr(),Schema);
3074 op_list = pr->get_op_list();
3075 for(o=0;o<op_list.size();++o){
3077 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3078 pr->param_list[o] = lse->get_right_se();
3079 subs_gbrefs_pr(pr,Schema);
3082 subs_gbrefs_se(op_list[o],Schema);
3086 fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n",
3087 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3093 // Search for references to "expensive" fields.
3094 int expensive_refs_se(scalarexp_t *se, table_list *Schema){
3096 vector<scalarexp_t *> operand_list;
3101 switch(se->get_operator_type()){
3103 case SE_IFACE_PARAM:
3110 return expensive_refs_se(se->rhs.scalarp,Schema);
3111 td = Schema->get_table(se->lhs.colref->schema_ref);
3112 plist = td->get_modifier_list(se->lhs.colref->field);
3113 if(plist->contains_key("expensive"))
3117 return expensive_refs_se(se->get_left_se(),Schema);
3119 cnt += expensive_refs_se(se->get_left_se(),Schema);
3120 cnt += expensive_refs_se(se->get_right_se(),Schema);
3123 operand_list = se->get_operands();
3124 for(p=0;p<operand_list.size();p++){
3125 cnt += expensive_refs_se(operand_list[p],Schema);
3134 int expensive_refs_pr(predicate_t *pr, table_list *Schema){
3135 vector<scalarexp_t *> op_list;
3139 switch(pr->get_operator_type()){
3141 cnt += expensive_refs_se(pr->get_left_se(),Schema);
3142 cnt += expensive_refs_se(pr->get_right_se(),Schema);
3145 return expensive_refs_se(pr->get_left_se(),Schema);
3147 return expensive_refs_pr(pr->get_left_pr(),Schema);
3148 case PRED_BINARY_OP:
3149 cnt += expensive_refs_pr(pr->get_left_pr(),Schema);
3150 cnt += expensive_refs_pr(pr->get_right_pr(),Schema);
3153 op_list = pr->get_op_list();
3154 for(o=0;o<op_list.size();++o){
3155 cnt += expensive_refs_se(op_list[o],Schema);
3159 fprintf(stderr,"INTERNAL ERROR in expensive_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3160 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3166 // TODO: allow "cheap" functions and predicates.
3167 bool simple_field_constraint(cnf_elem *c){
3168 vector<literal_t *> ll;
3170 predicate_t *p = c->pr;
3171 int l_agr, l_gb, l_par, l_func, l_op;
3172 int r_agr, r_gb, r_par, r_func, r_op;
3173 col_id_set left_colids, right_colids;
3175 // Verify that it is a simple atom
3176 switch(p->get_operator_type()){
3178 // Must be an equality predicate which references
3179 // which referecnes no aggregates, parameters, functions, or
3180 // group-by variables, and should be a constraint of
3182 // AND should not require a complex comparison.
3183 if(p->get_op() != "=") return(false);
3184 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3185 analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false);
3186 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ||
3187 r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false);
3188 // I will count on there being no gbvars in the constraint.
3189 // TODO: allow gbvars which are colrefs.
3190 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3191 gather_se_col_ids(p->get_right_se(), right_colids, NULL);
3192 if(left_colids.size()+right_colids.size() != 1) return(false);
3195 // Normalize : the colref should be on the lhs.
3196 if(right_colids.size() > 0){
3197 p->swap_scalar_operands();
3200 // Disallow complex (and therefore expensive) comparisons.
3201 if(p->get_left_se()->get_data_type()->complex_comparison(
3202 p->get_right_se()->get_data_type() ) )
3205 // passed all the tests.
3208 // LHS must be a non-gbvar colref.
3209 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3210 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false);
3211 // I will count on there being no gbvars in the constraint.
3212 // TODO: allow gbvars which are colrefs.
3213 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3214 if(left_colids.size() != 1) return(false);
3215 // Disallow complex (and therefore expensive) comparisons.
3216 if(p->get_left_se()->get_data_type()->complex_comparison(
3217 p->get_left_se()->get_data_type() ) )
3221 // All entries in the IN list must be literals
3222 // Currently, this is the only possibility.
3227 case PRED_BINARY_OP:
3232 fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n",
3233 p->get_lineno(), p->get_charno(), p->get_operator_type() );
3240 // As the name implies, return the colref constrained by the
3241 // cnf elem. I will be counting on the LHS being a SE pointing
3244 // This fcn assumes that in fact exactly
3245 // one colref is constrained.
3246 colref_t *get_constrained_colref(scalarexp_t *se){
3248 vector<scalarexp_t *> operand_list;
3251 switch(se->get_operator_type()){
3255 case SE_IFACE_PARAM:
3258 return(se->get_colref());
3260 return(get_constrained_colref(se->get_left_se()));
3262 ret=get_constrained_colref(se->get_left_se());
3263 if(ret == NULL) return(get_constrained_colref(se->get_right_se()));
3269 if(se->get_aggr_ref() >= 0) return NULL;
3271 operand_list = se->get_operands();
3272 for(p=0;p<operand_list.size();p++){
3273 ret=get_constrained_colref(operand_list[p]);
3274 if(ret != NULL) return(ret);
3285 colref_t *get_constrained_colref(predicate_t *p){
3286 return(get_constrained_colref(p->get_left_se()));
3288 colref_t *get_constrained_colref(cnf_elem *c){
3289 return get_constrained_colref(c->pr->get_left_se());
3296 void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p,
3297 string target_fld, string target_tbl, int tblref){
3299 // Make a copy of the predicate to be added.
3300 // ASSUME no aggregates.
3301 predicate_t *pr = dup_pr(src_p,NULL);
3303 // Modify the ref to the base table.
3304 // ASSUME lhs is the colref
3305 pr->get_left_se()->get_colref()->set_table_name(target_tbl);
3306 pr->get_left_se()->get_colref()->set_table_ref(tblref);
3308 if(dst->pr == NULL) dst->pr = pr;
3309 else dst->pr = new predicate_t("OR", dst->pr, pr);
3315 //////////////////////////////////////////////////////
3316 /////////////// Represent a node in a predicate tree
3317 struct common_pred_node{
3320 vector<predicate_t *> predecessor_preds;
3321 vector<common_pred_node *> children;
3333 predicate_t *make_common_pred(common_pred_node *pn){
3336 if(pn->children.size() == 0){
3338 fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n");
3341 return( dup_pr(pn->pr,NULL) );
3344 predicate_t *curr_pr = make_common_pred( pn->children[0] );
3345 for(n=1;n<pn->children.size();++n){
3346 curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr);
3350 curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr);
3356 bool operator<(const cnf_set &c1, const cnf_set &c2){
3357 if(c1.lfta_id.size() < c2.lfta_id.size())
3363 // Compute the predicates for the prefilter.
3364 // the prefilter preds are returned in prefilter_preds.
3365 // pred_ids is the set of predicates used in the prefilter.
3366 // the encoding is the lfta index, in the top 16 bits,
3367 // then the index of the cnf element in the bottom 16 bits.
3368 // This set of for identifying which preds do not need
3369 // to be generated in the lftas.
3370 void find_common_filter(vector< vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int > &pred_ids){
3373 vector<cnf_set *> pred_list, sort_list;
3375 // Create list of tagged, prefilter-safe CNFs.
3376 for(l=0;l<where_list.size();++l){
3377 for(c=0;c<where_list[l].size();++c){
3378 if(prefilter_compatible(where_list[l][c],Ext_fcns)){
3379 if(expensive_refs_pr(where_list[l][c]->pr,Schema)==0)
3380 pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c));
3385 // Eliminate duplicates
3386 for(p=0;p<pred_list.size();++p){
3388 for(p2=p+1;p2<pred_list.size();++p2){
3390 if(is_equivalent_pred_base(pred_list[p]->pr, pred_list[p2]->pr,Schema)){
3391 pred_list[p]->subsume(pred_list[p2]);
3392 delete pred_list[p2];
3393 pred_list[p2] = NULL;
3400 // combine preds that occur in the exact same lftas.
3401 for(p=0;p<pred_list.size();++p){
3403 for(p2=p+1;p2<pred_list.size();++p2){
3405 if(pred_list[p]->lfta_id == pred_list[p2]->lfta_id){
3406 pred_list[p]->combine_pred(pred_list[p2]);
3407 delete pred_list[p2];
3408 pred_list[p2] = NULL;
3415 // Compress the list
3416 for(p=0;p<pred_list.size();++p){
3418 sort_list.push_back(pred_list[p]);
3422 sort(sort_list.begin(), sort_list.end(),compare_cnf_set());
3424 // Return the top preds, up to 64 of them.
3425 for(p=0;p<sort_list.size() && p<64;p++){
3426 prefilter_preds.push_back(sort_list[p]);
3427 sort_list[p]->add_pred_ids(pred_ids);
3430 // Substitute gb refs with their defs
3431 // While I'm at it, substitute base table sch ref for tblref.
3432 for(p=0;p<prefilter_preds.size() ;p++){
3433 subs_gbrefs_pr(prefilter_preds[p]->pr,Schema);
3442 ///////////////////////////////////////////////////////////////////////////
3443 //////////////////////////////////////////////////////////////////////////
3445 // Find partial functions and register them.
3446 // Do a DFS so that nested partial fcn calls
3447 // get evaluated in the right order.
3448 // Don't drill down into aggregates -- their arguments are evaluated
3449 // earlier than the select list is.
3451 // Modification for function caching:
3452 // Pass in a ref counter, and partial fcn indicator.
3453 // Cache fcns ref'd at least once.
3454 // pass in NULL for fcn_ref_cnt to turn off fcn caching analysis
3457 void find_partial_fcns(scalarexp_t *se, vector<scalarexp_t *> *pf_list,
3458 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3459 ext_fcn_list *Ext_fcns){
3460 vector<scalarexp_t *> operands;
3463 if(se == NULL) return;
3465 switch(se->get_operator_type()){
3468 case SE_IFACE_PARAM:
3471 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3474 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3475 find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3482 // find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ;
3485 if(se->get_aggr_ref() >= 0) return;
3487 operands = se->get_operands();
3488 for(o=0;o<operands.size();o++){
3489 find_partial_fcns(operands[o], pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3492 if(Ext_fcns->is_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){
3494 for(f=0;f<pf_list->size();++f){
3495 if(is_equivalent_se(se,(*pf_list)[f])){
3496 se->set_partial_ref(f);
3497 (*fcn_ref_cnt)[f]++;
3504 if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) || fcn_ref_cnt)){
3505 se->set_partial_ref(pf_list->size());
3506 pf_list->push_back(se);
3508 fcn_ref_cnt->push_back(1);
3509 is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id()));
3515 fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n",
3516 se->get_lineno(), se->get_charno(),se->get_operator_type());
3523 void find_partial_fcns_pr(predicate_t *pr, vector<scalarexp_t *> *pf_list,
3524 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3525 ext_fcn_list *Ext_fcns){
3526 vector<literal_t *> litl;
3527 vector<scalarexp_t *> op_list;
3530 switch(pr->get_operator_type()){
3532 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3535 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3536 find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3539 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3541 case PRED_BINARY_OP:
3542 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3543 find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3546 op_list = pr->get_op_list();
3547 for(o=0;o<op_list.size();++o){
3548 find_partial_fcns(op_list[o],pf_list,fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3552 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3553 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3562 void find_combinable_preds(predicate_t *pr, vector<predicate_t *> *pr_list,
3563 table_list *Schema, ext_fcn_list *Ext_fcns){
3564 vector<literal_t *> litl;
3565 vector<scalarexp_t *> op_list;
3568 switch(pr->get_operator_type()){
3574 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns);
3576 case PRED_BINARY_OP:
3577 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ;
3578 find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ;
3581 if(Ext_fcns->is_combinable(pr->get_fcn_id())){
3582 for(f=0;f<pr_list->size();++f){
3583 if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){
3584 pr->set_combinable_ref(f);
3588 if(f == pr_list->size()){
3589 pr->set_combinable_ref(pr_list->size());
3590 pr_list->push_back(pr);
3595 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3596 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3604 //--------------------------------------------------------------------
3605 // Collect refs to aggregates.
3608 void collect_agg_refs(scalarexp_t *se, set<int> &agg_refs){
3609 vector<scalarexp_t *> operands;
3612 if(se == NULL) return;
3614 switch(se->get_operator_type()){
3617 case SE_IFACE_PARAM:
3620 collect_agg_refs(se->get_left_se(), agg_refs) ;
3623 collect_agg_refs(se->get_left_se(), agg_refs);
3624 collect_agg_refs(se->get_right_se(), agg_refs);
3630 agg_refs.insert(se->get_aggr_ref());
3633 if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref());
3635 operands = se->get_operands();
3636 for(o=0;o<operands.size();o++){
3637 collect_agg_refs(operands[o], agg_refs);
3642 fprintf(stderr,"INTERNAL ERROR in collect_agg_refs, line %d, character %d: unknown operator type %d\n",
3643 se->get_lineno(), se->get_charno(),se->get_operator_type());
3650 void collect_aggr_refs_pr(predicate_t *pr, set<int> &agg_refs){
3651 vector<literal_t *> litl;
3652 vector<scalarexp_t *> op_list;
3655 switch(pr->get_operator_type()){
3657 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3660 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3661 collect_agg_refs(pr->get_right_se(), agg_refs) ;
3664 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs);
3666 case PRED_BINARY_OP:
3667 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ;
3668 collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ;
3671 op_list = pr->get_op_list();
3672 for(o=0;o<op_list.size();++o){
3673 collect_agg_refs(op_list[o],agg_refs);
3677 fprintf(stderr,"INTERNAL ERROR in collect_aggr_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3678 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3686 //--------------------------------------------------------------------
3687 // Collect previously registered partial fcn refs.
3688 // Do a DFS so that nested partial fcn calls
3689 // get evaluated in the right order.
3690 // Don't drill down into aggregates -- their arguments are evaluated
3691 // earlier than the select list is.
3692 // ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES?
3694 void collect_partial_fcns(scalarexp_t *se, set<int> &pfcn_refs){
3695 vector<scalarexp_t *> operands;
3698 if(se == NULL) return;
3700 switch(se->get_operator_type()){
3703 case SE_IFACE_PARAM:
3706 collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3709 collect_partial_fcns(se->get_left_se(), pfcn_refs);
3710 collect_partial_fcns(se->get_right_se(), pfcn_refs);
3717 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3720 if(se->get_aggr_ref() >= 0) return;
3722 operands = se->get_operands();
3723 for(o=0;o<operands.size();o++){
3724 collect_partial_fcns(operands[o], pfcn_refs);
3727 if(se->is_partial()){
3728 pfcn_refs.insert(se->get_partial_ref());
3733 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n",
3734 se->get_lineno(), se->get_charno(),se->get_operator_type());
3741 void collect_partial_fcns_pr(predicate_t *pr, set<int> &pfcn_refs){
3742 vector<literal_t *> litl;
3743 vector<scalarexp_t *> op_list;
3746 switch(pr->get_operator_type()){
3748 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3751 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3752 collect_partial_fcns(pr->get_right_se(), pfcn_refs) ;
3755 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs);
3757 case PRED_BINARY_OP:
3758 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ;
3759 collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ;
3762 op_list = pr->get_op_list();
3763 for(o=0;o<op_list.size();++o){
3764 collect_partial_fcns(op_list[o],pfcn_refs);
3768 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns_pr, line %d, character %d, unknown predicate operator type %d\n",
3769 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3779 ///////////////////////////////////////////////////////////////
3780 //////////// Exported Functions ///////////////////////////
3781 ///////////////////////////////////////////////////////////////
3784 // Count and collect refs to interface parameters.
3786 int count_se_ifp_refs(scalarexp_t *se, set<string> &ifpnames){
3787 vector<scalarexp_t *> operands;
3791 if(se == NULL) return 0;
3793 switch(se->get_operator_type()){
3797 case SE_IFACE_PARAM:
3798 ifpnames.insert(se->get_ifpref()->to_string());
3801 return count_se_ifp_refs(se->get_left_se(), ifpnames) ;
3803 ret = count_se_ifp_refs(se->get_left_se(), ifpnames);
3804 ret += count_se_ifp_refs(se->get_right_se(), ifpnames);
3811 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3814 if(se->get_aggr_ref() >= 0) return 0;
3816 operands = se->get_operands();
3817 for(o=0;o<operands.size();o++){
3818 ret += count_se_ifp_refs(operands[o], ifpnames);
3823 fprintf(stderr,"INTERNAL ERROR in count_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3824 se->get_lineno(), se->get_charno(),se->get_operator_type());
3831 int count_pr_ifp_refs(predicate_t *pr, set<string> &ifpnames){
3832 vector<literal_t *> litl;
3833 vector<scalarexp_t *> op_list;
3836 if(pr == NULL) return 0;
3838 switch(pr->get_operator_type()){
3840 return count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3842 ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3843 ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ;
3846 return count_pr_ifp_refs(pr->get_left_pr(), ifpnames);
3847 case PRED_BINARY_OP:
3848 ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ;
3849 ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ;
3852 op_list = pr->get_op_list();
3853 for(o=0;o<op_list.size();++o){
3854 ret += count_se_ifp_refs(op_list[o],ifpnames);
3858 fprintf(stderr,"INTERNAL ERROR in count_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3859 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3866 // Resolve ifp refs, convert them to string literals.
3868 int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb, string &err){
3869 vector<scalarexp_t *> operands;
3870 vector<string> ifvals;
3878 if(se == NULL) return 0;
3880 switch(se->get_operator_type()){
3884 case SE_IFACE_PARAM:
3885 ir = se->get_ifpref();
3886 ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr);
3888 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n";
3891 if(ifvals.size() == 0){
3892 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n";
3895 if(ifvals.size() > 1){
3896 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n";
3899 tmp_l = new literal_t( ifvals[0]);
3900 se->convert_to_literal(tmp_l);
3903 return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ;
3905 ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err);
3906 ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err);
3913 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3916 if(se->get_aggr_ref() >= 0) return 0;
3918 operands = se->get_operands();
3919 for(o=0;o<operands.size();o++){
3920 ret += resolve_se_ifp_refs(operands[o], ifm, ifn, ifdb,err);
3925 fprintf(stderr,"INTERNAL ERROR in resolve_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3926 se->get_lineno(), se->get_charno(),se->get_operator_type());
3933 int resolve_pr_ifp_refs(predicate_t *pr, string ifm, string ifn, ifq_t *ifdb, string &err){
3934 vector<literal_t *> litl;
3935 vector<scalarexp_t *> op_list;
3939 switch(pr->get_operator_type()){
3941 return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3943 ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3944 ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ;
3947 return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err);
3948 case PRED_BINARY_OP:
3949 ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ;
3950 ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ;
3953 op_list = pr->get_op_list();
3954 for(o=0;o<op_list.size();++o){
3955 ret += resolve_se_ifp_refs(op_list[o],ifm, ifn, ifdb, err);
3959 fprintf(stderr,"INTERNAL ERROR in resolve_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3960 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3968 string impute_query_name(table_exp_t *fta_tree, string default_nm){
3969 string retval = fta_tree->get_val_of_name("query_name");
3970 if(retval == "") retval = default_nm;
3971 if(retval == "") retval = "default_query";
3975 // Convert the parse tree into an intermediate form,
3976 // which admits analysis better.
3978 // TODO : rationalize the error return policy.
3980 // TODO : the query_summary_class object contains
3982 // TODO: revisit the issue when nested subqueries are implemented.
3983 // One possibility: implement accessor methods to hide the
3985 // For now: this class contains data structures not in table_exp_t
3986 // (with a bit of duplication)
3988 // Return NULL on error.
3989 // print error messages to stderr.
3992 query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema,
3993 ext_fcn_list *Ext_fcns, string default_name){
3996 // Create the summary struct -- no analysis is done here.
3997 query_summary_class *qs = new query_summary_class(fta_tree);
3998 qs->query_type = fta_tree->query_type;
4000 ////////////// Do common analysis
4002 // Extract query name. Already imputed for the qnodes.
4003 // qs->query_name = impute_query_name(fta_tree, default_name);
4004 qs->query_name = default_name;
4005 //printf("query name is %s\n",qs->query_name.c_str());
4007 // extract definitions. Don't grab the query name.
4009 map<string, string> nmap = fta_tree->get_name_map();
4010 map<string, string>::iterator nmi;
4011 for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){
4012 string pname = (*nmi).first;
4013 if(pname != "query_name" )
4014 (qs->definitions)[pname] = (*nmi).second;
4020 // First, verify that all the referenced tables are defined.
4021 // Then, bind the tablerefs in the FROM list to schemas in
4023 tablevar_list_t *tlist = fta_tree->get_from();
4024 vector<tablevar_t *> tbl_vec = tlist->get_table_list();
4026 bool found_error = false;
4027 for(i=0;i<tbl_vec.size();i++){
4028 int sch_no = schema->find_tbl(tbl_vec[i]->get_schema_name());
4030 fprintf(stderr,"Error, table <%s> not found in the schema file\n",
4031 tbl_vec[i]->get_schema_name().c_str() );
4032 fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(),
4033 tbl_vec[i]->get_charno() );
4037 tbl_vec[i]->set_schema_ref(sch_no);
4039 // If accessing a UDOP, mangle the name
4040 // This needs to be done in translate_fta.cc, not here.
4042 if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){
4043 string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm;
4044 tbl_vec[i]->set_schema_name(mngl_name);
4048 // No FTA schema should have an interface defined on it.
4049 if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){
4050 fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str());
4052 // Fill in default interface
4053 if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4054 tbl_vec[i]->set_interface("default");
4055 tbl_vec[i]->set_ifq(true);
4057 // Fill in default machine
4058 if(tbl_vec[i]->get_interface()!="" && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){
4059 tbl_vec[i]->set_machine(hostname);
4062 if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4063 // Record the set of interfaces accessed
4065 if(tbl_vec[i]->get_ifq()){
4066 ifstr = "["+tbl_vec[i]->get_interface()+"]";
4068 if(tbl_vec[i]->get_machine() != "localhost"){
4069 ifstr = "'"+tbl_vec[i]->get_machine()+"'."+tbl_vec[i]->get_interface();
4071 ifstr = tbl_vec[i]->get_interface();
4074 //printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str());
4075 if(qs->definitions.count("_referenced_ifaces")){
4076 ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr;
4078 qs->definitions["_referenced_ifaces"] = ifstr;
4082 if(found_error) return(NULL);
4084 // Ensure that all tablevars have are named
4085 // and that no two tablevars have the same name.
4087 // First, gather the set of variable
4088 set<string> tblvar_names;
4089 for(i=0;i<tbl_vec.size();i++){
4090 if(tbl_vec[i]->get_var_name() != ""){
4091 if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){
4092 fprintf(stderr,"ERROR, query has two table variables named %s. line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno());
4095 tblvar_names.insert(tbl_vec[i]->get_var_name());
4098 // Now generate variable names for unnamed tablevars
4099 for(i=0;i<tbl_vec.size();i++){
4100 if(tbl_vec[i]->get_var_name() == ""){
4102 sprintf(tmpstr,"_t%d",tblvar_no);
4103 string newvar = tmpstr;
4104 while(tblvar_names.count(newvar) > 0){
4106 sprintf(tmpstr,"_t%d",tblvar_no);
4109 tbl_vec[i]->set_range_var(newvar);
4110 tblvar_names.insert(newvar);
4114 // Process inner/outer join properties
4115 int jprop = fta_tree->get_from()->get_properties();
4116 // Require explicit INNER_JOIN, ... specification for join queries.
4118 if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){
4119 fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, FILTER_JOIN.\n");
4124 if(jprop == OUTER_JOIN_PROPERTY){
4125 for(i=0;i<tbl_vec.size();i++) tbl_vec[i]->set_property(1);
4127 if(jprop == LEFT_OUTER_JOIN_PROPERTY)
4128 tbl_vec[0]->set_property(1);
4129 if(jprop == RIGHT_OUTER_JOIN_PROPERTY)
4130 tbl_vec[tbl_vec.size()-1]->set_property(1);
4131 if(jprop == FILTER_JOIN_PROPERTY){
4132 if(fta_tree->get_from()->get_temporal_range() == 0){
4133 fprintf(stderr,"ERROR, a filter join must have a non-zero temporal range.\n");
4136 if(tbl_vec.size() != 2){
4137 fprintf(stderr,"ERROR, a filter join must be between two table variables.\n");
4140 colref_t *cr = fta_tree->get_from()->get_colref();
4141 string field = cr->get_field();
4143 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field);
4145 fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str());
4148 cr->set_schema_ref(tbl_vec[0]->get_schema_ref());
4149 cr->set_tablevar_ref(0);
4150 string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field);
4151 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
4152 data_type *dt0 = new data_type(type_name, modifiers);
4153 string dt0_type = dt0->get_type_str();
4154 if(dt0_type != "INT" && dt0_type != "UINT" && dt0_type != "LLONG" && dt0_type != "ULLONG"){
4155 // if(dt0->get_type_str() != "UINT"){
4156 fprintf(stderr,"ERROR, the temporal attribute in a filter join must be one of INT/UINT/LLONG/ULLONG.\n");
4159 if(! dt0->is_increasing()){
4160 fprintf(stderr,"ERROR, the temporal attribtue in a filter join must be temporal increasing.\n");
4167 /////////////////////
4168 /// Build the query param table
4169 vector<var_pair_t *> query_params = fta_tree->query_params;
4171 for(p=0;p<query_params.size();++p){
4172 string pname = query_params[p]->name;
4173 string dtname = query_params[p]->val;
4176 fprintf(stderr,"ERROR parameter has empty name.\n");
4180 fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str());
4183 data_type *dt = new data_type(dtname);
4184 if(!(dt->is_defined())){
4185 fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str());
4189 qs->add_query_param(pname, dt, false);
4191 if(found_error) return(NULL);
4192 // unpack the param table to a global for easier analysis.
4193 param_tbl=qs->param_tbl;
4195 ////////////////// MERGE specialized analysis
4197 if(qs->query_type == MERGE_QUERY){
4199 // 1) there are two *different* streams ref'd in the FROM clause
4200 // However, only emit a warning.
4201 // (can't detect a problem if one of the interfaces is the
4202 // default interface).
4203 // 2) They have the same layout (e.g. same types but the
4204 // names can be different
4205 // 3) the two columns can unambiguously be mapped to
4206 // fields of the two tables, one per table. Exception:
4207 // the column names are the same and exist in both tables.
4208 // FURTHERMORE the positions must be the same
4209 // 4) after mapping, verify that both colrefs are temporal
4210 // and in the same direction.
4211 if(tbl_vec.size() < 2){
4212 fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() );
4216 vector<field_entry *> fev0 = schema->get_fields(
4217 tbl_vec[0]->get_schema_name()
4222 for(cv=1;cv<tbl_vec.size();++cv){
4223 vector<field_entry *> fev1 = schema->get_fields(
4224 tbl_vec[cv]->get_schema_name()
4227 if(fev0.size() != fev1.size()){
4228 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4232 // Only need to ensure that the list of types are the same.
4233 // THe first table supplies the output colnames,
4234 // and all temporal properties are lost, except for the
4235 // merge-by columns.
4237 for(f=0;f<fev0.size();++f){
4238 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4239 data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list());
4240 if(! dt0.equal_subtypes(&dt1) ){
4241 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4247 // copy over the merge-by cols.
4248 qs->mvars = fta_tree->mergevars;
4250 if(qs->mvars.size() == 0){ // need to discover the merge vars.
4251 int mergevar_pos = -1;
4253 for(f=0;f<fev0.size();++f){
4254 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4255 if(dt0.is_temporal()){
4260 if(mergevar_pos >= 0){
4261 for(cv=0;cv<tbl_vec.size();++cv){
4262 vector<field_entry *> fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name());
4263 qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() ));
4266 fprintf(stderr,"ERROR, no merge-by column found.\n");
4271 // Ensure same number of tables, merge cols.
4272 if(tbl_vec.size() != qs->mvars.size()){
4273 fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size());
4277 // Ensure that the merge-by are from different tables
4278 // also, sort colrefs so that they align with the FROM list using tmp_crl
4279 set<int> refd_sources;
4280 vector<colref_t *> tmp_crl(qs->mvars.size(),NULL);
4281 for(cv=0;cv<qs->mvars.size();++cv){
4282 int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema);
4284 fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str());
4286 refd_sources.insert(tblvar);
4287 tmp_crl[tblvar] = qs->mvars[cv];
4289 if(refd_sources.size() != qs->mvars.size()){
4290 fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size());
4294 // 1-1 mapping, so use tmp_crl as the merge column list.
4295 qs->mvars = tmp_crl;
4299 // Look up the colrefs in their schemas, verify that
4300 // they are at the same place, that they are both temporal
4302 // It seems that this should be done more in the schema objects.
4303 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field());
4305 fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str());
4308 for(cv=1;cv<qs->mvars.size();++cv){
4309 int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field());
4311 fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str());
4316 field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0);
4317 data_type dt0(fe0->get_type(),fe0->get_modifier_list());
4318 if( (!dt0.is_temporal()) ){
4319 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0);
4322 for(cv=0;cv<qs->mvars.size();++cv){
4323 field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0);
4324 data_type dt1(fe1->get_type(),fe1->get_modifier_list());
4325 if( (!dt1.is_temporal()) ){
4326 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv);
4331 if( dt0.get_temporal() != dt1.get_temporal()){
4332 fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv);
4337 // If there is a SLACK specification, verify
4338 // that it is literal-only and that its type is compatible
4339 // with that of the merge columns
4340 qs->slack = fta_tree->slack;
4342 if(! literal_only_se(qs->slack)){
4343 fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n");
4347 assign_data_types(qs->slack, schema, fta_tree, Ext_fcns );
4348 data_type sdt(&dt0, qs->slack->get_data_type(), string("+"));
4349 if(sdt.get_type() == undefined_t){
4350 fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n");
4356 // All the tests have passed, there is nothing
4361 ////////////////// SELECT specialized analysis
4363 if(qs->query_type == SELECT_QUERY){
4364 // unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals
4365 // objects into globals, for easier syntax.
4366 gb_tbl = qs->gb_tbl;
4367 aggr_tbl = qs->aggr_tbl;
4370 // Build the table of group-by attributes.
4371 // (se processing done automatically).
4372 // NOTE : Doing the SE processing here is getting cumbersome,
4373 // I should process these individually.
4374 // NOTE : I should check for duplicate names.
4375 // NOTE : I should ensure that the def of one GB does not
4376 // refrence the value of another.
4377 vector<extended_gb_t *> gb_list = fta_tree->get_groupby();
4379 string temporal_gbvars = "";
4380 map<string, int> gset_gbnames;
4382 // For generating the set of GB patterns for this aggregation query.
4383 vector<bool> inner_pattern;
4384 vector<vector<bool> > pattern_set;
4385 vector<vector<vector<bool> > > pattern_components;
4387 vector<gb_t *> r_gbs, c_gbs, g_gbs;
4390 for(i=0;i<gb_list.size();i++){
4391 switch(gb_list[i]->type){
4393 retval = gb_tbl->add_gb_attr(
4394 gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns
4397 return NULL; // nothing added to gb_tbl, so this can trigger a segfault 2 lines below
4399 if(gb_tbl->get_data_type(i)->is_temporal()){
4401 if(temporal_gbvars != "") temporal_gbvars+=" ";
4402 temporal_gbvars += gb_tbl->get_name(i);
4406 inner_pattern.clear();
4407 pattern_set.clear();
4408 inner_pattern.push_back(true);
4409 pattern_set.push_back(inner_pattern);
4410 pattern_components.push_back(pattern_set);
4412 gb_tbl->gb_entry_type.push_back("");
4413 gb_tbl->gb_entry_count.push_back(1);
4414 gb_tbl->pattern_components.push_back(pattern_set);
4417 case rollup_egb_type:
4418 r_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4419 for(j=0;j<r_gbs.size();++j){
4420 retval = gb_tbl->add_gb_attr(
4421 r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4425 }else{ // rollup gb can't be temporal
4426 gb_tbl->reset_temporal(gb_tbl->size()-1);
4430 inner_pattern.resize(r_gbs.size());
4431 pattern_set.clear();
4432 for(j=0;j<=r_gbs.size();++j){
4433 for(k=0;k<r_gbs.size();++k){
4435 inner_pattern[k] = true;
4437 inner_pattern[k] = false;
4439 pattern_set.push_back(inner_pattern);
4441 pattern_components.push_back(pattern_set);
4443 gb_tbl->gb_entry_type.push_back("ROLLUP");
4444 gb_tbl->gb_entry_count.push_back(r_gbs.size());
4445 gb_tbl->pattern_components.push_back(pattern_set);
4448 c_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4449 for(j=0;j<c_gbs.size();++j){
4450 retval = gb_tbl->add_gb_attr(
4451 c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4455 }else{ // cube gb can't be temporal
4456 gb_tbl->reset_temporal(gb_tbl->size()-1);
4460 inner_pattern.resize(c_gbs.size());
4461 pattern_set.clear();
4462 n_patterns = 1 << c_gbs.size();
4463 for(j=0;j<n_patterns;++j){
4465 for(k=0;k<c_gbs.size();++k,test_bit = test_bit << 1){
4466 if((j & test_bit) != 0)
4467 inner_pattern[k] = true;
4469 inner_pattern[k] = false;
4471 pattern_set.push_back(inner_pattern);
4473 pattern_components.push_back(pattern_set);
4475 gb_tbl->gb_entry_type.push_back("CUBE");
4476 gb_tbl->gb_entry_count.push_back(c_gbs.size());
4477 gb_tbl->pattern_components.push_back(pattern_set);
4479 case gsets_egb_type:
4481 gset_gbnames.clear();
4482 for(j=0;j<gb_list[i]->gb_lists.size();++j){
4483 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4484 for(k=0;k<g_gbs.size();++k){
4485 if(g_gbs[k]->type != GB_COLREF){
4486 fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str());
4489 if(gset_gbnames.count(g_gbs[k]->name) == 0){
4490 retval = gb_tbl->add_gb_attr(
4491 g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns
4495 }else{ // gsets gb can't be temporal
4496 gb_tbl->reset_temporal(gb_tbl->size()-1);
4498 int pos = gset_gbnames.size();
4499 gset_gbnames[g_gbs[k]->name] = pos;
4505 if(gset_gbnames.size() > 63){
4506 fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n");
4510 inner_pattern.resize(gset_gbnames.size());
4511 pattern_set.clear();
4512 set<unsigned long long int> signatures;
4513 for(j=0;j<gb_list[i]->gb_lists.size();++j){
4514 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4515 set<string> refd_gbs;
4516 for(k=0;k<g_gbs.size();++k){
4517 refd_gbs.insert(g_gbs[k]->name);
4519 fill(inner_pattern.begin(),inner_pattern.end(),false);
4520 unsigned long long int signature = 0;
4521 set<string>::iterator ssi;
4522 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4523 inner_pattern[gset_gbnames[(*ssi)]] = true;
4524 signature |= (1 << gset_gbnames[(*ssi)]);
4526 if(signatures.count(signature)){
4527 fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t");
4528 set<string>::iterator ssi;
4529 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4530 fprintf(stderr," %s",(*ssi).c_str());
4532 fprintf(stderr,"\n");
4534 signatures.insert(signature);
4535 pattern_set.push_back(inner_pattern);
4538 pattern_components.push_back(pattern_set);
4540 gb_tbl->gb_entry_type.push_back("GROUPING_SETS");
4541 gb_tbl->gb_entry_count.push_back(gset_gbnames.size());
4542 gb_tbl->pattern_components.push_back(pattern_set);
4549 if(found_error) return(NULL);
4551 fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s). Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str());
4555 // Compute the set of patterns. Take the cross product of all pattern components.
4556 vector<vector<bool> > gb_patterns;
4557 int n_components = pattern_components.size();
4558 vector<int> pattern_pos(n_components,0);
4561 vector<bool> pattern;
4562 for(j=0;j<n_components;j++){
4563 pattern.insert(pattern.end(),pattern_components[j][pattern_pos[j]].begin(),
4564 pattern_components[j][pattern_pos[j]].end());
4566 gb_patterns.push_back(pattern);
4567 for(j=0;j<n_components;j++){
4569 if(pattern_pos[j] >= pattern_components[j].size())
4574 if(j >= n_components)
4577 gb_tbl->gb_patterns = gb_patterns;
4580 // Process the supergroup, if any.
4581 vector<colref_t *> sgb = fta_tree->get_supergb();
4582 for(i=0;i<sgb.size();++i){
4583 int gbr = gb_tbl->find_gb(sgb[i],fta_tree->fm, schema);
4585 fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str());
4588 if(qs->sg_tbl.count(gbr)){
4589 fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str());
4591 qs->sg_tbl.insert(gbr);
4593 if(found_error) return(NULL);
4595 if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){
4596 fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4602 predicate_t *wh = fta_tree->get_where();
4603 predicate_t *hv = fta_tree->get_having();
4604 predicate_t *cw = fta_tree->get_cleaning_when();
4605 predicate_t *cb = fta_tree->get_cleaning_by();
4606 predicate_t *closew = fta_tree->get_closing_when();
4608 if(closew != NULL && gb_tbl->gb_patterns.size()>1){
4609 fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4615 // Verify that all column references are valid, and if so assign
4618 vector<select_element *> sl_list = fta_tree->get_sl_vec();
4619 for(i=0;i<sl_list.size();i++){
4620 retval = verify_colref(sl_list[i]->se, fta_tree->fm, schema, gb_tbl);
4621 if(retval < 0) found_error = true;
4624 retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl);
4625 if(retval < 0) found_error = true;
4627 retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl);
4628 if(retval < 0) found_error = true;
4630 retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl);
4631 if(retval < 0) found_error = true;
4633 retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl);
4634 if(retval < 0) found_error = true;
4636 retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl);
4637 if(retval < 0) found_error = true;
4639 if(found_error) return(NULL);
4641 // Verify that all of the scalar expressions
4642 // and comparison predicates have compatible types.
4645 string temporal_output_fields;
4646 for(i=0;i<sl_list.size();i++){
4647 retval = assign_data_types(sl_list[i]->se, schema, fta_tree, Ext_fcns );
4651 if(sl_list[i]->se->get_data_type()->is_temporal()){
4653 temporal_output_fields += " "+int_to_string(i);
4658 fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s). Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str());
4662 retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns);
4663 if(retval < 0) found_error = true;
4665 retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns);
4666 if(retval < 0) found_error = true;
4668 retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns);
4669 if(retval < 0) found_error = true;
4671 retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns);
4672 if(retval < 0) found_error = true;
4674 retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns);
4675 if(retval < 0) found_error = true;
4677 if(found_error) return(NULL);
4679 // Impute names for the unnamed columns.
4680 set<string> curr_names;
4682 for(s=0;s<sl_list.size();++s){
4683 curr_names.insert(sl_list[s]->name);
4685 for(s=0;s<sl_list.size();++s){
4686 if(sl_list[s]->name == "")
4687 sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se);
4691 // Check the aggregates.
4692 // No aggrs allowed in the WHERE predicate.
4693 // (no aggrs in the GB defs, but that is examined elsewhere)
4694 // Therefore, aggregates are allowed only the select clause.
4696 // The query is an aggregation query if there is a group-by clause, or
4697 // if any aggregate is referenced. If there is a group-by clause,
4698 // at least one aggregate must be referenced.
4699 // If the query is an aggregate query, the scalar expressions in
4700 // the select clause can reference only constants, aggregates, or group-by
4702 // Also, if the query is an aggregate query, build a table referencing
4705 // No nested aggregates allowed.
4708 // First, count references in the WHERE predicate.
4709 // (if there are any references, report an error).
4710 // can ref group vars, tuple fields, and stateful fcns.
4713 retval = count_aggr_pred(wh, true);
4715 fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n");
4720 // NOTE : Here I need an analysis of the having clause
4721 // to verify that it only refs GB attrs and aggregates.
4722 // (also, superaggregates, stateful fcns)
4724 retval = verify_having_pred(hv, "HAVING", Ext_fcns);
4725 if(retval < 0) return(NULL);
4728 // Cleaning by has same reference rules as Having
4730 retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns);
4731 if(retval < 0) return(NULL);
4734 // Cleaning when has same reference rules as Having,
4735 // except that references to non-superaggregates are not allowed.
4736 // This is tested for when "CLEANING_BY" is passed in as the clause.
4738 retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns);
4739 if(retval < 0) return(NULL);
4742 // CLOSING_WHEN : same rules as HAVING
4744 retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns);
4745 if(retval < 0) return(NULL);
4749 // Collect aggregates in the HAVING and CLEANING clauses
4751 build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns);
4754 build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns);
4757 build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns);
4760 build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns);
4763 // Collect aggregate refs in the SELECT clause.
4765 for(i=0;i<sl_list.size();i++)
4766 build_aggr_tbl_fm_se(sl_list[i]->se, aggr_tbl, Ext_fcns);
4769 // Collect references to states of stateful functions
4771 gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns);
4774 gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns);
4777 gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns);
4780 gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns);
4782 if(closew != NULL){ // should be no stateful fcns here ...
4783 gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns);
4785 for(i=0;i<sl_list.size();i++)
4786 gather_fcn_states_se(sl_list[i]->se, qs->states_refd, Ext_fcns);
4789 // If this is an aggregate query, it had normally references
4790 // some aggregates. Its not necessary though, just emit a warning.
4791 // (acts as SELECT DISTINCT)
4793 bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0;
4794 if(is_aggr_query && aggr_tbl->size() == 0){
4795 fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n");
4798 // If this is an aggregate query,
4799 // 1) verify that the SEs in the SELECT clause reference
4800 // only constants, aggregates, and group-by attributes.
4801 // 2) No aggregate scalar expression references an aggregate
4802 // or any stateful function.
4803 // 3) either it references both CLEANING clauses or neither.
4804 // 4) all superaggregates must have the superaggr_allowed property.
4805 // 5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY
4806 // clauses must have the multiple_output property.
4810 if(gb_list.size() == 0){
4811 fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n");
4814 // Ensure that at least one gbvar is temporal
4815 if(! fta_tree->name_exists("no_temporal_aggr")){
4816 bool found_temporal = false;
4817 for(i=0;i<gb_tbl->size();i++){
4818 if(gb_tbl->get_data_type(i)->is_temporal()){
4819 found_temporal = true;
4822 if(! found_temporal){
4823 fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n");
4828 if((!cb && cw) || (cb && !cw)){
4829 fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n");
4833 bool refs_running = false;
4835 for(a=0; a<aggr_tbl->size(); ++a){
4836 refs_running |= aggr_tbl->is_running_aggr(a);
4841 fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n");
4845 fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n");
4850 if(refs_running && !closew){
4851 fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n");
4856 for(i=0;i<sl_list.size();i++){
4857 bool ret_bool = verify_aggr_query_se(sl_list[i]->se);
4858 st_ok = st_ok && ret_bool;
4863 for(i=0;i<aggr_tbl->size();i++){
4864 if(aggr_tbl->is_superaggr(i)){
4865 if(! aggr_tbl->superaggr_allowed(i)){
4866 fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str());
4870 if(aggr_tbl->is_builtin(i)){
4871 if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){
4872 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4876 vector<scalarexp_t *> opl = aggr_tbl->get_operand_list(i);
4878 for(o=0;o<opl.size();++o){
4879 if(count_aggr_se(opl[o], true) > 0){
4880 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4887 // Ensure that non-aggregate query doesn't reference some things
4889 fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n");
4893 fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n");
4896 if(qs->states_refd.size()){
4897 fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n");
4904 // Convert the predicates into CNF. OK to pass NULL ptr.
4905 make_cnf_from_pr(wh, qs->wh_cnf);
4906 make_cnf_from_pr(hv, qs->hav_cnf);
4907 make_cnf_from_pr(cb, qs->cb_cnf);
4908 make_cnf_from_pr(cw, qs->cw_cnf);
4909 make_cnf_from_pr(closew, qs->closew_cnf);
4911 // Analyze the predicates.
4913 for(i=0;i<qs->wh_cnf.size();i++)
4914 analyze_cnf(qs->wh_cnf[i]);
4915 for(i=0;i<qs->hav_cnf.size();i++)
4916 analyze_cnf(qs->hav_cnf[i]);
4917 for(i=0;i<qs->cb_cnf.size();i++)
4918 analyze_cnf(qs->cb_cnf[i]);
4919 for(i=0;i<qs->cw_cnf.size();i++)
4920 analyze_cnf(qs->cw_cnf[i]);
4921 for(i=0;i<qs->closew_cnf.size();i++)
4922 analyze_cnf(qs->closew_cnf[i]);
4925 // At this point, the old analysis program
4926 // gathered all refs to partial functions,
4927 // complex literals, and parameters accessed via a handle.
4928 // I think its better to delay this
4929 // until code generation time, as the query will be
4930 // in general split.
4937 ///////////////////////////////////////////////////////////////////////
4939 // Expand gbvars with their definitions.
4941 scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){
4944 switch(se->get_operator_type()){
4947 case SE_IFACE_PARAM:
4950 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4953 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4954 se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl);
4958 return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) );
4961 // don't descend into aggr defs.
4967 for(o=0;o<se->param_list.size();o++){
4968 se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl);
4972 fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n",
4973 se->get_lineno(), se->get_charno(),se->get_operator_type());
4979 void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){
4980 vector<scalarexp_t *> op_list;
4984 switch(pr->get_operator_type()){
4986 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl);
4989 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ;
4990 pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ;
4993 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
4995 case PRED_BINARY_OP:
4996 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
4997 expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ;
5000 for(o=0;o<pr->param_list.size();++o){
5001 pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ;
5005 fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n",
5006 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5014 // return true if the se / pr contains any gbvar on the list.
5017 bool contains_gb_se(scalarexp_t *se, set<int> &gref_set){
5018 vector<scalarexp_t *> operands;
5022 switch(se->get_operator_type()){
5025 case SE_IFACE_PARAM:
5028 return contains_gb_se(se->get_left_se(),gref_set);
5030 return( contains_gb_se(se->get_left_se(),gref_set) ||
5031 contains_gb_se(se->get_right_se(),gref_set) );
5034 return( gref_set.count(se->get_gb_ref()) > 0);
5037 // don't descend into aggr defs.
5043 operands = se->get_operands();
5044 for(o=0;o<operands.size();o++){
5045 found = found || contains_gb_se(operands[o], gref_set);
5049 fprintf(stderr,"INTERNAL ERROR in contains_gb_se, line %d, character %d: unknown operator type %d\n",
5050 se->get_lineno(), se->get_charno(),se->get_operator_type());
5057 bool contains_gb_pr(predicate_t *pr, set<int> &gref_set){
5058 vector<scalarexp_t *> op_list;
5062 switch(pr->get_operator_type()){
5064 return contains_gb_se(pr->get_left_se(), gref_set);
5066 return (contains_gb_se(pr->get_left_se(),gref_set)
5067 || contains_gb_se(pr->get_right_se(),gref_set) );
5069 return contains_gb_pr(pr->get_left_pr(),gref_set) ;
5070 case PRED_BINARY_OP:
5071 return (contains_gb_pr(pr->get_left_pr(),gref_set)
5072 || contains_gb_pr(pr->get_right_pr(),gref_set) );
5074 op_list = pr->get_op_list();
5075 for(o=0;o<op_list.size();++o){
5076 found = found ||contains_gb_se(op_list[o],gref_set) ;
5080 fprintf(stderr,"INTERNAL ERROR in contains_gb_pr, line %d, character %d, unknown predicate operator type %d\n",
5081 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5088 // Gather the set of columns accessed in this se.
5089 // Descend into aggregate functions.
5091 void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){
5093 vector<scalarexp_t *> operands;
5099 switch(se->get_operator_type()){
5102 case SE_IFACE_PARAM:
5105 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5108 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5109 gather_se_col_ids(se->get_right_se(),cid_set,gtbl);
5113 ci.load_from_colref(se->get_colref() );
5114 if(ci.tblvar_ref < 0){
5115 fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str());
5120 fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n");
5123 gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl);
5129 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5132 operands = se->get_operands();
5133 for(o=0;o<operands.size();o++){
5134 gather_se_col_ids(operands[o], cid_set,gtbl);
5138 fprintf(stderr,"INTERNAL ERROR in gather_se_col_ids, line %d, character %d: unknown operator type %d\n",
5139 se->get_lineno(), se->get_charno(),se->get_operator_type());
5145 // Gather the set of columns accessed in this se.
5147 void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){
5148 vector<scalarexp_t *> op_list;
5151 switch(pr->get_operator_type()){
5153 gather_se_col_ids(pr->get_left_se(), cid_set,gtbl);
5156 gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ;
5157 gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ;
5160 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5162 case PRED_BINARY_OP:
5163 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5164 gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ;
5167 op_list = pr->get_op_list();
5168 for(o=0;o<op_list.size();++o){
5169 gather_se_col_ids(op_list[o],cid_set,gtbl) ;
5173 fprintf(stderr,"INTERNAL ERROR in gather_pr_col_ids, line %d, character %d, unknown predicate operator type %d\n",
5174 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5181 // Gather the set of special operator or comparison functions referenced by this se.
5183 void gather_se_opcmp_fcns(scalarexp_t *se, set<string> &fcn_set){
5185 data_type *ldt, *rdt;
5187 vector<scalarexp_t *> operands;
5189 switch(se->get_operator_type()){
5191 if( se->get_literal()->constructor_name() != "")
5192 fcn_set.insert( se->get_literal()->constructor_name() );
5196 // SE_IFACE_PARAM should not exist when this is called.
5198 ldt = se->get_left_se()->get_data_type();
5199 if(ldt->complex_operator(se->get_op()) ){
5200 fcn_set.insert( ldt->get_complex_operator(se->get_op()) );
5202 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5205 ldt = se->get_left_se()->get_data_type();
5206 rdt = se->get_right_se()->get_data_type();
5208 if(ldt->complex_operator(rdt, se->get_op()) ){
5209 fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) );
5211 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5212 gather_se_opcmp_fcns(se->get_right_se(),fcn_set);
5219 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5222 operands = se->get_operands();
5223 for(o=0;o<operands.size();o++){
5224 gather_se_opcmp_fcns(operands[o], fcn_set);
5228 fprintf(stderr,"INTERNAL ERROR in gather_se_opcmp_fcns, line %d, character %d: unknown operator type %d\n",
5229 se->get_lineno(), se->get_charno(),se->get_operator_type());
5235 // Gather the set of special operator or comparison functions referenced by this se.
5237 void gather_pr_opcmp_fcns(predicate_t *pr, set<string> &fcn_set){
5238 data_type *ldt, *rdt;
5239 vector<scalarexp_t *> operands;
5242 switch(pr->get_operator_type()){
5244 ldt = pr->get_left_se()->get_data_type();
5245 if(ldt->complex_comparison(ldt) ){
5246 fcn_set.insert( ldt->get_comparison_fcn(ldt) );
5248 gather_se_opcmp_fcns(pr->get_left_se(), fcn_set);
5251 ldt = pr->get_left_se()->get_data_type();
5252 rdt = pr->get_right_se()->get_data_type();
5253 if(ldt->complex_comparison(rdt) ){
5254 fcn_set.insert( ldt->get_comparison_fcn(rdt) );
5256 gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ;
5257 gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ;
5260 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5262 case PRED_BINARY_OP:
5263 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5264 gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ;
5267 operands = pr->get_op_list();
5268 for(o=0;o<operands.size();o++){
5269 gather_se_opcmp_fcns(operands[o], fcn_set);
5273 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
5274 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5281 // find the temporal variable divisor if any.
5282 // Only forms allowed : temporal_colref, temporal_colref/const
5283 // temporal_colref/const + const
5286 long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){
5287 long long int retval = 0;
5288 data_type *ldt, *rdt;
5290 vector<scalarexp_t *> operands;
5291 scalarexp_t *t_se, *c_se;
5294 switch(se->get_operator_type()){
5299 // SE_IFACE_PARAM should not exist when this is called.
5303 ldt = se->get_left_se()->get_data_type();
5304 if(ldt->is_temporal()){
5305 t_se = se->get_left_se();
5306 c_se = se->get_right_se();
5308 t_se = se->get_left_se();
5309 c_se = se->get_right_se();
5311 if((! t_se->get_data_type()->is_temporal()) || c_se->get_data_type()->is_temporal())
5314 the_op = se->get_op();
5315 if(the_op == "+" || the_op == "-")
5316 return find_temporal_divisor(t_se, gbt,fnm);
5318 if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){
5319 fnm = t_se->get_colref()->get_field();
5320 string lits = c_se->get_literal()->to_string();
5321 sscanf(lits.c_str(),"%qd",&retval);
5329 return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm);
5331 if(se->get_data_type()->is_temporal()){
5332 fnm = se->get_colref()->get_field();
5343 fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n",
5344 se->get_lineno(), se->get_charno(),se->get_operator_type());
5351 // Create meaningful but unique names for the columns.
5352 string impute_colname(vector<select_element *> &sel_list, scalarexp_t *se){
5353 set<string> curr_names;
5355 for(s=0;s<sel_list.size();++s){
5356 curr_names.insert(sel_list[s]->name);
5358 return impute_colname(curr_names, se);
5361 string impute_colname(set<string> &curr_names, scalarexp_t *se){
5364 vector<scalarexp_t *> operand_list;
5367 switch(se->get_operator_type()){
5372 ret = "Param_" + se->get_param_name();
5374 case SE_IFACE_PARAM:
5375 ret = "Iparam_" + se->get_ifpref()->get_pname();
5378 ret = se->get_colref()->get_field() ;
5389 seo = se->get_left_se();
5390 switch(se->get_left_se()->get_operator_type()){
5392 ret += "_PARAM_"+seo->get_param_name();
5394 case SE_IFACE_PARAM:
5395 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5398 opstr = seo->get_colref()->get_field();
5399 if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5407 opstr = seo->get_op();
5408 if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5409 ret += "_" + seo->get_op();
5415 opstr = seo->get_op();
5416 ret += "_" + seo->get_op();
5429 operand_list = se->get_operands();
5430 if(operand_list.size() > 0){
5431 seo = operand_list[0];
5432 switch(seo->get_operator_type()){
5434 ret += "_PARAM_"+seo->get_param_name();
5436 case SE_IFACE_PARAM:
5437 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5440 ret += "_" + seo->get_colref()->get_field();
5445 ret += "_" + seo->get_op();
5462 if(curr_names.count("Field0") == 0)
5467 while(curr_names.count(ret) > 0){
5469 sprintf(tmpstr,"%s%d",base.c_str(),iter);
5475 curr_names.insert(ret);
5482 //////////////////////////////////////////////////////////////////////
5483 ////////////// Methods of defined classes ///////////////////////
5484 //////////////////////////////////////////////////////////////////////
5486 // helper fcn to enable col_id as map key.
5488 bool operator<(const col_id &cr1, const col_id &cr2){
5489 if(cr1.tblvar_ref < cr2.tblvar_ref) return(true);
5490 if(cr1.tblvar_ref == cr2.tblvar_ref)
5491 return (cr1.field < cr2.field);
5496 // Process the GB variables.
5497 // At parse time, GB vars are either GB_COLREF,
5498 // or GB_COMPUTED if the AS keyword is used.
5499 // Cast GB vars as named entities with a SE as
5500 // their definition (the colref in the case of GB_COLREF).
5502 // TODO: if there is a gbref in a gbdef,
5503 // then I won't be able to compute the value without
5504 // a complex dependence analysis. So verify that there is no
5505 // gbref in any of the GBdefs.
5506 // BUT: a GBVAR_COLREF should be converted to a regular colref,
5507 // which is not yet done.
5509 // TODO : sort out issue of GBVAR naming and identification.
5510 // Determine where it is advantageous to convert GV_COLREF
5511 // GBVARS to colrefs -- e.g. in group definition, in the WHERE clause,
5514 // return -1 if there is a problem.
5516 int gb_table::add_gb_attr(
5518 tablevar_list_t *fm,
5520 table_exp_t *fta_tree,
5521 ext_fcn_list *Ext_fcns
5525 gb_table_entry *entry;
5527 if(gb->type == GB_COLREF){
5530 gb->interface.c_str(),gb->table.c_str(), gb->name.c_str()
5533 cr = new colref_t(gb->name.c_str());
5535 int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema);
5536 if(tablevar_ref < 0) return(tablevar_ref);
5538 cr->set_tablevar_ref(tablevar_ref);
5539 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
5540 cr->set_interface("");
5541 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
5543 entry = new gb_table_entry();
5544 entry->name.field = cr->get_field();
5545 entry->name.tblvar_ref = tablevar_ref;
5546 entry->definition = new scalarexp_t(cr);
5547 entry->ref_type = GBVAR_COLREF;
5549 entry = new gb_table_entry();
5550 entry->name.field = gb->name;
5551 entry->name.tblvar_ref = -1;
5552 entry->definition = gb->def;
5553 entry->ref_type = GBVAR_SE;
5556 retval = verify_colref(entry->definition, fm, schema, NULL);
5557 if(retval < 0) return(retval);
5559 retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns);
5560 if(retval < 0) return(retval);
5562 // Verify that the gbvar def references no aggregates and no gbvars.
5563 if(count_gb_se(entry->definition) > 0){
5564 fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() );
5567 if(count_aggr_se(entry->definition, true) > 0){
5568 fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() );
5572 // Check for duplicates
5574 for(i=0;i<gtbl.size();++i){
5575 if(entry->name.field == gtbl[i]->name.field){
5576 fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size());
5582 gtbl.push_back(entry);
5588 // Try to determine if the colref is actually
5590 // a) if no tablename associated with the colref,
5591 // 1) try to find a matching GB_COMPUTED gbvar.
5592 // 2) failing that, try to match to a single tablevar
5593 // 3) if successful, search among GB_COLREF
5594 // b) else, try to match the tablename to a single tablevar
5595 // if successful, search among GB_COLREF
5596 int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){
5597 string c_field = cr->get_field();
5601 vector<int> candidates;
5603 if(cr->uses_default_table()){
5604 for(i=0;i<gtbl.size();i++){
5605 if(gtbl[i]->ref_type==GBVAR_SE && c_field == gtbl[i]->name.field){
5609 candidates = find_source_tables(c_field, fm, schema);
5610 if(candidates.size() != 1) return(-1); // can't find unique tablevar
5611 for(i=0;i<gtbl.size();i++){
5612 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5613 c_field == gtbl[i]->name.field &&
5614 candidates[0] == gtbl[i]->name.tblvar_ref){
5618 return(-1); // colref is not in gb table.
5621 // A table name must have been given.
5622 vector<tablevar_t *> fm_tbls = fm->get_table_list();
5623 string interface = cr->get_interface();
5624 string table_name = cr->get_table_name();
5627 // if no interface name is given, try to search for the table
5628 // name among the tablevar names first.
5630 for(i=0;i<fm_tbls.size();++i){
5631 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5632 candidates.push_back(i);
5634 if(candidates.size()>1) return(-1);
5635 if(candidates.size()==1){
5636 for(i=0;i<gtbl.size();i++){
5637 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5638 c_field == gtbl[i]->name.field &&
5639 candidates[0] == gtbl[i]->name.tblvar_ref){
5643 return(-1); // match semantics of bind to tablevar name first
5647 // Interface name given, or no interface but no
5648 // no tablevar match. Try to match on schema name.
5649 for(i=0;i<fm_tbls.size();++i){
5650 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5651 candidates.push_back(i);
5653 if(candidates.size() != 1) return(-1);
5654 for(i=0;i<gtbl.size();i++){
5655 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5656 c_field == gtbl[i]->name.field &&
5657 candidates[0] == gtbl[i]->name.tblvar_ref){
5669 bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){
5671 if( (op == "COUNT") || (op == "SUM") || (op == "MIN") ||
5672 (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") ||
5673 (op == "XOR_AGGR") )
5676 return Ext_fcns->fta_legal(fcn_id);
5682 // Return the set of subaggregates required to compute
5683 // the desired aggregate. THe operand of the subaggregates
5684 // can only be * or the scalarexp used in the superaggr.
5685 // This is indicated by the use_se vector.
5687 // Is this code generation specific?
5689 vector<string> aggr_table_entry::get_subaggr_fcns(vector<bool> &use_se){
5693 ret.push_back("COUNT");
5694 use_se.push_back(false);
5697 ret.push_back("SUM");
5698 use_se.push_back(true);
5701 ret.push_back("SUM");
5702 ret.push_back("COUNT");
5703 use_se.push_back(true);
5704 use_se.push_back(false);
5707 ret.push_back("MIN");
5708 use_se.push_back(true);
5711 ret.push_back("MAX");
5712 use_se.push_back(true);
5714 if(op == "AND_AGGR"){
5715 ret.push_back("AND_AGGR");
5716 use_se.push_back(true);
5718 if(op == "OR_AGGR"){
5719 ret.push_back("OR_AGGR");
5720 use_se.push_back(true);
5722 if(op == "XOR_AGGR"){
5723 ret.push_back("XOR_AGGR");
5724 use_se.push_back(true);
5730 // Code generation specific?
5732 vector<data_type *> aggr_table_entry::get_subaggr_dt(){
5733 vector<data_type *> ret;
5737 dt = new data_type("Int"); // was Uint
5738 ret.push_back( dt );
5741 dt = new data_type();
5742 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5746 dt = new data_type();
5747 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5748 ret.push_back( dt );
5749 dt = new data_type("Int");
5750 ret.push_back( dt );
5753 dt = new data_type();
5754 dt->set_aggr_data_type( "MIN",operand->get_data_type() );
5755 ret.push_back( dt );
5758 dt = new data_type();
5759 dt->set_aggr_data_type( "MAX",operand->get_data_type() );
5760 ret.push_back( dt );
5762 if(op == "AND_AGGR"){
5763 dt = new data_type();
5764 dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() );
5765 ret.push_back( dt );
5767 if(op == "OR_AGGR"){
5768 dt = new data_type();
5769 dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() );
5770 ret.push_back( dt );
5772 if(op == "XOR_AGGR"){
5773 dt = new data_type();
5774 dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() );
5775 ret.push_back( dt );
5781 // Code generation specific?
5783 scalarexp_t *aggr_table_entry::make_superaggr_se(vector<scalarexp_t *> se_refs){
5784 scalarexp_t *se_l, *se_r, *ret_se = NULL;
5787 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5791 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5795 se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5796 se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]);
5798 ret_se = new scalarexp_t("/", se_l, se_r);
5802 ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]);
5806 ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]);
5809 if(op == "AND_AGGR"){
5810 ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]);
5813 if(op == "OR_AGGR"){
5814 ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]);
5817 if(op == "XOR_AGGR"){
5818 ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]);
5827 // Add a built-in aggr.
5828 int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){
5831 for(i=0;i<agr_tbl.size();i++){
5832 if(agr_tbl[i]->is_builtin() && op == agr_tbl[i]->op
5833 && is_equivalent_se(se,agr_tbl[i]->operand) ){
5834 // && is_super == agr_tbl[i]->is_superaggr())
5835 if(is_super) agr_tbl[i]->set_super(true);
5840 aggr_table_entry *ate = new aggr_table_entry(op, se, is_super);
5841 agr_tbl.push_back(ate);
5842 return(agr_tbl.size() - 1);
5846 int aggregate_table::add_aggr(string op, int fcn_id, vector<scalarexp_t *> opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){
5849 for(i=0;i<agr_tbl.size();i++){
5850 if((! agr_tbl[i]->is_builtin()) && fcn_id == agr_tbl[i]->fcn_id
5851 && opl.size() == agr_tbl[i]->oplist.size() ){
5852 // && is_super == agr_tbl[i]->is_superaggr() ){
5853 for(o=0;o<opl.size();++o){
5854 if(! is_equivalent_se(opl[o],agr_tbl[i]->oplist[o]) )
5857 if(o == opl.size()){
5858 if(is_super) agr_tbl[i]->set_super(true);
5864 aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout);
5865 agr_tbl.push_back(ate);
5866 return(agr_tbl.size() - 1);
5870 int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){
5873 for(i=0;i<cplx_lit_tbl.size();i++){
5874 if(l->is_equivalent(cplx_lit_tbl[i])){
5875 hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref;
5880 cplx_lit_tbl.push_back(l);
5881 hdl_ref_tbl.push_back(is_handle_ref);
5882 return(cplx_lit_tbl.size() - 1);
5887 //------------------------------------------------------------
5891 gb_t *gb_t::duplicate(){
5892 gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str());
5894 ret->lineno = lineno;
5895 ret->charno = charno;
5897 ret->def = dup_se(def,NULL);