1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
18 #include "parse_fta.h"
19 #include "parse_schema.h"
20 #include "parse_ext_fcns.h"
23 #include"analyze_fta.h"
25 #include"type_objects.h"
32 extern string hostname; // name of the current host
36 string int_to_string(int i){
39 sprintf(tmpstr,"%d",i);
47 // These represent derived information from the
48 // query analysis stage. I extract them from a class,
49 // perhaps this is dangerous.
51 static gb_table *gb_tbl=NULL; // Table of all group-by attributes.
52 static aggregate_table *aggr_tbl=NULL; // Table of all referenced aggregates.
54 // static cplx_lit_table *complex_literals=NULL; // Table of literals with constructors.
55 static param_table *param_tbl=NULL; // Table of all referenced parameters.
57 vector<scalarexp_t *> partial_fcns_list;
58 int wh_partial_start, wh_partial_end;
59 int gb_partial_start, gb_partial_end;
60 int aggr_partial_start, aggr_partial_end;
61 int sl_partial_start, sl_partial_end;
64 // Infer the table of a column refrence and return the table ref.
66 // field name and table name. If no table name is used,
67 // search all tables to try to find a unique match.
68 // Of course, plenty of error checking.
70 // Return the set of tablevar indices in the FROM clause
71 // which contain a field with the same name.
72 vector<int> find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){
75 // vector<string> tn = fm->get_schema_names();
76 vector<int> tn = fm->get_schema_refs();
77 // printf("Calling find_source_tables on field %s\n",field.c_str());
78 for(i=0;i<tn.size();i++){
79 // if(Schema->contains_field(Schema->find_tbl(tn[i]), field) ){
80 if(Schema->contains_field(tn[i], field) ){
82 // printf("\tfound in table %s\n",tn[i].c_str());
88 int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){
90 string tname = ir->get_tablevar();
92 if(fm->size()==1) return 0;
93 fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str());
96 for(i=0;i<fm->size();++i){
97 if(tname == fm->get_tablevar_name(i))
100 fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str());
105 // compute the index of the tablevar in the from clause that the
107 // return -1 if no tablevar can be imputed.
108 int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){
113 vector<tablevar_t *> fm_tbls = fm->get_table_list();
115 string field = cr->get_field();
117 // printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str());
118 if(cr->uses_default_table() ){
119 tv = find_source_tables(field, fm, schema);
121 fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ",
122 cr->get_lineno(), cr->get_charno(),field.c_str() );
123 for(i=0;i<tv.size();i++){
124 fprintf(stderr,"%s ",fm_tbls[ tv[i] ]->to_string().c_str() );
126 fprintf(stderr,"\n\tYou must specify one of these.\n");
130 fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n",
131 cr->get_lineno(), cr->get_charno(),field.c_str() );
138 // The table source is named -- but is it a schema name
141 string interface = cr->get_interface();
142 table_name = cr->get_table_name();
144 // if interface is not specified, prefer to look at the tablevar names
145 // Check for duplicates.
147 for(i=0;i<fm_tbls.size();++i){
148 if(table_name == fm_tbls[i]->get_var_name())
152 fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() );
155 if(tv.size() == 1) return(tv[0]);
158 // Tableref not found by looking at tableref vars, or an interface
159 // was specified. Try to match on schema and interface.
160 // Check for duplicates.
161 for(i=0;i<fm_tbls.size();++i){
162 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
166 fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n",
167 cr->get_lineno(), cr->get_charno() );
168 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
169 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
174 fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno());
175 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
176 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
184 // Reset temporal properties of a scalar expression
185 void reset_temporal(scalarexp_t *se){
187 vector<scalarexp_t *> operands;
190 se->get_data_type()->reset_temporal();
192 switch(se->get_operator_type()){
199 reset_temporal(se->get_left_se());
202 reset_temporal(se->get_left_se());
203 reset_temporal(se->get_right_se());
208 reset_temporal(se->get_left_se());
211 operands = se->get_operands();
212 for(o=0;o<operands.size();o++){
213 reset_temporal(operands[o]);
217 fprintf(stderr,"INTERNAL ERROR in reset_temporal, line %d, character %d: unknown operator type %d\n",
218 se->get_lineno(), se->get_charno(),se->get_operator_type());
223 // Verify that column references exist in their
224 // declared tables. As a side effect, assign
225 // their data types. Other side effects :
227 // return -1 on error
229 int verify_colref(scalarexp_t *se, tablevar_list_t *fm,
230 table_list *schema, gb_table *gtbl){
235 string field, table_source, type_name;
241 vector<scalarexp_t *> operands;
243 switch(se->get_operator_type()){
248 ir = se->get_ifpref();
249 table_var = infer_tablevar_from_ifpref(ir, fm);
250 if(table_var < 0) return(table_var);
251 ir->set_tablevar_ref(table_var);
254 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
256 l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl);
257 r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl);
258 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
261 cr = se->get_colref();
262 field = cr->get_field();
264 // Determine if this is really a GB ref.
265 // (the parser can only see that its a colref).
267 gb_ref = gtbl->find_gb(cr, fm, schema);
272 se->set_gb_ref(gb_ref);
275 // Its a colref, verify its existance and
276 // record the data type.
277 table_var = infer_tablevar_from_colref(cr,fm,schema);
278 if(table_var < 0) return(table_var);
280 // Store the table ref in the colref.
281 cr->set_tablevar_ref(table_var);
282 cr->set_schema_ref(fm->get_schema_ref(table_var));
283 cr->set_interface("");
284 cr->set_table_name(fm->get_tablevar_name(table_var));
286 if(! schema->contains_field(cr->get_schema_ref(), field)){
287 fprintf(stderr, "Error, field %s is not in stream %s\n", field.c_str(), schema->get_table_name( cr->get_schema_ref() ).c_str());
291 type_name = schema->get_type_name(cr->get_schema_ref(), field);
292 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
293 dt = new data_type(type_name, modifiers);
294 se->set_data_type(dt);
296 // Else, its a gbref, use the GB var's data type.
297 se->set_data_type(gtbl->get_data_type(gb_ref));
304 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
306 operands = se->get_operands();
308 for(o=0;o<operands.size();o++){
309 l_ret = verify_colref(operands[o], fm, schema, gtbl);
310 if(l_ret < 0) r_ret = -1;
314 fprintf(stderr,"INTERNAL ERROR in verify_colref, line %d, character %d: unknown operator type %d\n",
315 se->get_lineno(), se->get_charno(),se->get_operator_type());
322 int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){
324 std::vector<scalarexp_t *> op_list;
327 switch(pr->get_operator_type()){
329 return(verify_colref(pr->get_left_se(),fm,schema, gtbl) );
331 l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ;
332 r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ;
333 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
336 return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl));
338 l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ;
339 r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ;
340 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
343 op_list = pr->get_op_list();
345 for(o=0;o<op_list.size();++o){
346 if(verify_colref(op_list[o],fm,schema,gtbl) < 0) l_ret = -1;
350 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
351 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
358 bool literal_only_se(scalarexp_t *se){ // really only literals.
360 vector<scalarexp_t *> operands;
362 if(se == NULL) return(1);
363 switch(se->get_operator_type()){
371 return( literal_only_se(se->get_left_se()) );
373 return( literal_only_se(se->get_left_se()) &&
374 literal_only_se(se->get_right_se()) );
393 // Verify that column references exist in their
394 // declared tables. As a side effect, assign
395 // their data types. Other side effects :
398 int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){
402 string field, table_source, type_name;
408 vector<scalarexp_t *> operands;
410 if(se == NULL) return(1);
412 switch(se->get_operator_type()){
420 return( bind_to_schema_se(se->get_left_se(), fm, schema) );
422 l_ret = bind_to_schema_se(se->get_left_se(), fm, schema);
423 r_ret = bind_to_schema_se(se->get_right_se(), fm, schema);
424 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
427 if(se->is_gb()) return(1); // gb ref not a colref.
429 cr = se->get_colref();
430 field = cr->get_field();
432 tablevar_ref = infer_tablevar_from_colref(cr,fm,schema);
433 if(tablevar_ref < 0){
434 return(tablevar_ref);
436 // Store the table ref in the colref.
437 cr->set_tablevar_ref(tablevar_ref);
438 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
439 cr->set_interface("");
440 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
442 // Check the data type
443 type_name = schema->get_type_name(cr->get_schema_ref(), field);
444 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
445 data_type dt(type_name, modifiers);
446 // if(! dt.equals(se->get_data_type()) ){
447 // if(! dt.subsumes_type(se->get_data_type()) ){
448 if(! se->get_data_type()->subsumes_type(&dt) ){
449 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n",
450 dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str());
457 case SE_AGGR_SE: // Probably I should just return,
458 // aggregate se's are explicitly bound to the schema.
459 // return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) );
462 if(se->get_aggr_ref() >= 0) return 1;
464 operands = se->get_operands();
466 for(o=0;o<operands.size();o++){
467 l_ret = bind_to_schema_se(operands[o], fm, schema);
468 if(l_ret < 0) r_ret = -1;
472 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se, line %d, character %d: unknown operator type %d\n",
473 se->get_lineno(), se->get_charno(),se->get_operator_type());
480 int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){
482 vector<scalarexp_t *> op_list;
485 switch(pr->get_operator_type()){
487 return(bind_to_schema_se(pr->get_left_se(),fm,schema) );
489 l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ;
490 r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ;
491 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
494 return(bind_to_schema_pr(pr->get_left_pr(),fm,schema));
496 l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ;
497 r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ;
498 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
501 op_list = pr->get_op_list();
503 for(o=0;o<op_list.size();++o){
504 if(bind_to_schema_se(op_list[o],fm,schema) < 0) l_ret = -1;
508 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_pr, line %d, character %d, unknown predicate operator type %d\n",
509 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
520 // verify_colref assigned data types to the column refs.
521 // Now assign data types to all other nodes in the
522 // scalar expression.
524 // return -1 on error
526 temporal_type compute_se_temporal(scalarexp_t *se, map<col_id, temporal_type> &tcol){
530 vector<scalarexp_t *> operands;
531 vector<data_type *> odt;
533 vector<bool> handle_ind;
535 switch(se->get_operator_type()){
541 return(varying_t); // actually, this should not be called.
543 return data_type::compute_temporal_type(
544 compute_se_temporal(se->get_left_se(), tcol), se->get_op()
547 return data_type::compute_temporal_type(
548 compute_se_temporal(se->get_left_se(), tcol),
549 compute_se_temporal(se->get_right_se(), tcol),
550 se->get_left_se()->get_data_type()->get_type(),
551 se->get_right_se()->get_data_type()->get_type(),
556 col_id cid(se->get_colref() );
557 if(tcol.count(cid) > 0){ return tcol[cid];
558 }else{ return varying_t;}
571 // verify_colref assigned data types to the column refs.
572 // Now assign data types to all other nodes in the
573 // scalar expression.
575 // return -1 on error
577 int assign_data_types(scalarexp_t *se, table_list *schema,
578 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
582 vector<scalarexp_t *> operands;
583 vector<data_type *> odt;
585 vector<bool> handle_ind;
586 vector<bool> constant_ind;
588 switch(se->get_operator_type()){
590 dt = new data_type( se->get_literal()->get_type() );
591 se->set_data_type(dt);
592 if( ! dt->is_defined() ){
593 fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n",
594 se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() );
601 string pname = se->get_param_name();
602 dt = param_tbl->get_data_type(pname);
603 // A SE_PARRAM can change its value mid-query so using one
604 // to set a window is dangerous. TODO check for this and issue a warning.
605 dt->set_temporal(constant_t);
606 se->set_data_type(dt);
607 if( ! dt->is_defined() ){
608 fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n",
609 pname.c_str(), se->get_lineno(),se->get_charno() );
615 dt = new data_type( "STRING" );
616 se->set_data_type(dt);
619 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
620 if(l_ret < 0) return -1;
622 dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() );
623 se->set_data_type(dt);
624 if( ! dt->is_defined() ){
625 fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n",
626 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
627 se->get_lineno(), se->get_charno() );
633 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
634 r_ret = assign_data_types(se->get_right_se(), schema, fta_tree, Ext_fcns);
635 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
637 dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() );
638 se->set_data_type(dt);
639 if( ! dt->is_defined() ){
640 fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n",
641 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
642 se->get_right_se()->get_data_type()->to_string().c_str(),
643 se->get_lineno(), se->get_charno() );
649 dt = se->get_data_type();
650 bret = dt->is_defined();
654 fprintf(stderr,"ERROR, column reference type is undefined, line =%d, char = %d, colref=%s\n",
655 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
659 dt = new data_type("Int"); // changed Uint to Int
660 se->set_data_type(dt);
663 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
664 if(l_ret < 0) return -1;
666 dt = new data_type();
667 dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type());
668 se->set_data_type(dt);
670 if( ! dt->is_defined() ){
671 fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n",
672 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
673 se->get_lineno(), se->get_charno() );
680 operands = se->get_operands();
682 for(o=0;o<operands.size();o++){
683 l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns);
684 odt.push_back(operands[o]->get_data_type());
685 if(l_ret < 0) r_ret = -1;
687 if(r_ret < 0) return(r_ret);
689 // Is it an aggregate extraction function?
690 fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt);
692 int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id);
693 int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id);
694 int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id);
695 // Construct a se for the subaggregate.
696 vector<scalarexp_t *> op_a;
697 int n_aggr_oprs = operands.size()-n_fcn_params+1;
698 for(o=0;o<n_aggr_oprs;++o){
699 op_a.push_back(operands[o]);
701 // check handle params
702 vector<bool> handle_a = Ext_fcns->get_handle_indicators(subaggr_id);
703 for(o=0;o<op_a.size();o++){
705 if(op_a[o]->get_operator_type() != SE_LITERAL &&
706 op_a[o]->get_operator_type() != SE_IFACE_PARAM &&
707 op_a[o]->get_operator_type() != SE_PARAM){
708 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
709 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
714 vector<bool> is_const_a=Ext_fcns->get_const_indicators(subaggr_id);
715 for(o=0;o<op_a.size();o++){
717 if(op_a[o]->get_data_type()->get_temporal() != constant_t){
718 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n Line=%d, char=%d.\n",
719 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
725 scalarexp_t *se_a = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a);
726 se_a->set_fcn_id(subaggr_id);
727 se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id));
728 se_a->set_aggr_id(0); // label this as a UDAF.
731 // Change this se to be the actual function
732 vector<scalarexp_t *> op_f;
733 op_f.push_back(se_a);
734 for(o=n_aggr_oprs;o<operands.size();++o)
735 op_f.push_back(operands[o]);
736 // check handle params
737 vector<bool> handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id);
738 for(o=0;o<op_f.size();o++){
740 if(op_f[o]->get_operator_type() != SE_LITERAL &&
741 op_f[o]->get_operator_type() != SE_IFACE_PARAM &&
742 op_f[o]->get_operator_type() != SE_PARAM){
743 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
744 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
749 vector<bool> is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id);
750 for(o=0;o<op_f.size();o++){
752 if(op_f[o]->get_data_type()->get_temporal() != constant_t){
753 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n Line=%d, char=%d.\n",
754 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
760 se->param_list = op_f;
761 se->op = Ext_fcns->get_fcn_name(actual_fcn_id);
762 se->set_fcn_id(actual_fcn_id);
763 se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id));
767 fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str());
771 fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt);
773 se->set_fcn_id(fcn_id);
774 se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
775 se->set_aggr_id(0); // label this as a UDAF.
776 // Finally, verify that all HANDLE parameters are literals or params.
777 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
778 for(o=0;o<operands.size();o++){
780 if(operands[o]->get_operator_type() != SE_LITERAL &&
781 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
782 operands[o]->get_operator_type() != SE_PARAM){
783 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
784 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
789 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
790 for(o=0;o<operands.size();o++){
792 if(operands[o]->get_data_type()->get_temporal() != constant_t){
793 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n",
794 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
800 // UDAFS as superaggregates not yet supported.
801 if(se->is_superaggr()){
802 fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
803 se->set_superaggr(false);
808 fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str());
811 // Is it a stateful fcn?
812 fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt);
814 se->set_fcn_id(fcn_id);
815 se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
816 se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun
817 // Finally, verify that all HANDLE parameters are literals or params.
818 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
819 for(o=0;o<operands.size();o++){
821 if(operands[o]->get_operator_type() != SE_LITERAL &&
822 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
823 operands[o]->get_operator_type() != SE_PARAM){
824 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
825 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
830 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
831 for(o=0;o<operands.size();o++){
833 if(operands[o]->get_data_type()->get_temporal() != constant_t){
834 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n",
835 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
841 if(se->is_superaggr()){
842 fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
847 fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str());
851 // Is it a regular function?
852 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt);
854 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
855 for(o=0;o<operands.size();o++){
856 if(o>0) fprintf(stderr,", ");
857 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
859 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
860 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
865 se->set_fcn_id(fcn_id);
866 dt = Ext_fcns->get_fcn_dt(fcn_id);
868 if(! dt->is_defined() ){
869 fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str());
870 for(o=0;o<operands.size();o++){
871 if(o>0) fprintf(stderr,", ");
872 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
874 fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() );
878 // Finally, verify that all HANDLE parameters are literals or params.
879 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
880 for(o=0;o<operands.size();o++){
882 if(operands[o]->get_operator_type() != SE_LITERAL &&
883 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
884 operands[o]->get_operator_type() != SE_PARAM){
885 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
886 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
891 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
892 for(o=0;o<operands.size();o++){
894 if(operands[o]->get_data_type()->get_temporal() != constant_t){
895 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be constant.\n Line=%d, char=%d.\n",
896 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
903 if(se->is_superaggr()){
904 fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
907 se->set_data_type(dt);
910 fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n",
911 se->get_lineno(), se->get_charno(),se->get_operator_type());
918 int assign_predicate_data_types(predicate_t *pr, table_list *schema,
919 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
923 vector<data_type *> odt;
924 vector<literal_t *> litl;
925 vector<scalarexp_t *> operands;
926 vector<bool> handle_ind;
927 vector<bool> constant_ind;
930 switch(pr->get_operator_type()){
932 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set);
933 litl = pr->get_lit_vec();
934 dt = pr->get_left_se()->get_data_type();
936 for(i=0;i<litl.size();i++){
937 dtl = new data_type( litl[i]->get_type() );
938 if( ! dt->is_comparable(dtl,pr->get_op()) ){
939 fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n",
940 litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() );
948 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
949 r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
950 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
952 if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){
953 fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n",
954 pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(),
955 pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() );
961 return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set));
963 l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
964 r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
965 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
968 operands = pr->get_op_list();
970 for(o=0;o<operands.size();o++){
971 l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns); // , ext_fcn_set);
972 odt.push_back(operands[o]->get_data_type());
973 if(l_ret < 0) r_ret = -1;
975 if(r_ret < 0) return(r_ret);
977 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt);
979 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
980 for(o=0;o<operands.size();o++){
981 if(o>0) fprintf(stderr,", ");
982 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
984 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
985 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
989 // ext_fcn_set.insert(fcn_id);
990 pr->set_fcn_id(fcn_id);
992 // Finally, verify that all HANDLE parameters are literals or params.
993 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
994 for(o=0;o<operands.size();o++){
996 if(operands[o]->get_operator_type() != SE_LITERAL &&
997 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
998 operands[o]->get_operator_type() != SE_PARAM){
999 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
1000 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1005 constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id());
1006 for(o=0;o<operands.size();o++){
1007 if(constant_ind[o]){
1008 if(operands[o]->get_data_type()->get_temporal() != constant_t){
1009 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be constant.\n Line=%d, char=%d.\n",
1010 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1017 // Check if this predicate function is special sampling function
1018 pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id());
1023 fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n",
1024 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1032 /////////////////////////////////////////////////////////////////////
1033 //////////////// Make a deep copy of a se / pred tree
1034 /////////////////////////////////////////////////////////////////////
1037 // duplicate a select element
1038 select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){
1039 return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str());
1042 // duplicate a scalar expression.
1043 scalarexp_t *dup_se(scalarexp_t *se,
1044 aggregate_table *aggr_tbl
1047 vector<scalarexp_t *> operand_list;
1048 vector<data_type *> dt_signature;
1049 scalarexp_t *ret_se, *l_se, *r_se;
1051 switch(se->get_operator_type()){
1053 ret_se = new scalarexp_t(se->get_literal());
1054 ret_se->use_decorations_of(se);
1058 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1059 ret_se->use_decorations_of(se);
1062 case SE_IFACE_PARAM:
1063 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1064 ret_se->use_decorations_of(se);
1068 ret_se = new scalarexp_t(se->get_colref()->duplicate());
1069 ret_se->rhs.scalarp = se->rhs.scalarp; // carry along notation
1070 ret_se->use_decorations_of(se);
1074 l_se = dup_se(se->get_left_se(), aggr_tbl);
1075 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1076 ret_se->use_decorations_of(se);
1080 l_se = dup_se(se->get_left_se(), aggr_tbl);
1081 r_se = dup_se(se->get_right_se(), aggr_tbl);
1083 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1084 ret_se->use_decorations_of(se);
1089 ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str());
1090 ret_se->use_decorations_of(se);
1094 l_se = dup_se(se->get_left_se(), aggr_tbl);
1095 ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se);
1096 ret_se->use_decorations_of(se);
1101 operand_list = se->get_operands();
1102 vector<scalarexp_t *> new_operands;
1103 for(p=0;p<operand_list.size();p++){
1104 l_se = dup_se(operand_list[p], aggr_tbl);
1105 new_operands.push_back(l_se);
1108 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1109 ret_se->use_decorations_of(se);
1114 printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type());
1124 predicate_t *dup_pr(predicate_t *pr,
1125 aggregate_table *aggr_tbl
1128 vector<literal_t *> llist;
1129 scalarexp_t *se_l, *se_r;
1130 predicate_t *pr_l, *pr_r, *ret_pr;
1131 vector<scalarexp_t *> op_list, new_op_list;
1135 switch(pr->get_operator_type()){
1137 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1138 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1142 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1143 se_r = dup_se(pr->get_right_se(), aggr_tbl);
1144 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1148 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1149 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1152 case PRED_BINARY_OP:
1153 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1154 pr_r = dup_pr(pr->get_right_pr(), aggr_tbl);
1155 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1158 op_list = pr->get_op_list();
1159 for(o=0;o<op_list.size();++o){
1160 se_l = dup_se(op_list[o], aggr_tbl);
1161 new_op_list.push_back(se_l);
1163 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1164 ret_pr->set_fcn_id(pr->get_fcn_id());
1165 ret_pr->is_sampling_fcn = pr->is_sampling_fcn;
1169 fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n",
1170 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1178 table_exp_t *dup_table_exp(table_exp_t *te){
1180 table_exp_t *ret = new table_exp_t();
1182 ret->query_type = te->query_type;
1184 ss_map::iterator ss_i;
1185 for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){
1186 ret->nmap[(*ss_i).first] = (*ss_i).second;
1189 for(i=0;i<te->query_params.size();++i){
1190 ret->query_params.push_back(new
1191 var_pair_t(te->query_params[i]->name,te->query_params[i]->val) );
1195 ret->sl = new select_list_t();
1196 ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno;
1197 vector<select_element *> select_list = te->sl->get_select_list();
1198 for(i=0;i<select_list.size();++i){
1199 scalarexp_t *se = dup_se(select_list[i]->se,NULL);
1200 ret->sl->append(se,select_list[i]->name);
1204 ret->fm = te->fm->duplicate();
1206 if(te->wh) ret->wh = dup_pr(te->wh,NULL);
1207 if(te->hv) ret->hv = dup_pr(te->hv,NULL);
1208 if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL);
1209 if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL);
1210 if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL);
1212 for(i=0;i<te->gb.size();++i){
1213 extended_gb_t *tmp_g = te->gb[i]->duplicate();
1214 ret->gb.push_back(tmp_g);
1217 ret->mergevars = te->mergevars;
1219 ret->slack = dup_se(te->slack,NULL);
1220 ret->lineno = te->lineno;
1221 ret->charno = te->charno;
1232 /////////////////////////////////////////////////////////////////////////
1233 // Bind colrefs to a member of their FROM list
1235 void bind_colref_se(scalarexp_t *se,
1236 vector<tablevar_t *> &fm,
1237 int prev_ref, int new_ref
1240 vector<scalarexp_t *> operand_list;
1244 switch(se->get_operator_type()){
1248 case SE_IFACE_PARAM:
1249 ir = se->get_ifpref();
1250 if(ir->get_tablevar_ref() == prev_ref){
1251 ir->set_tablevar_ref(new_ref);
1252 ir->set_tablevar(fm[new_ref]->get_var_name());
1257 cr=se->get_colref();
1258 if(cr->get_tablevar_ref() == prev_ref){
1259 cr->set_tablevar_ref(new_ref);
1260 // cr->set_interface(fm[new_ref]->get_interface());
1261 cr->set_table_name(fm[new_ref]->get_var_name());
1266 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1270 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1271 bind_colref_se(se->get_right_se(), fm, prev_ref, new_ref);
1279 if(se->get_aggr_ref() >= 0) return;
1281 operand_list = se->get_operands();
1282 for(p=0;p<operand_list.size();p++){
1283 bind_colref_se(operand_list[p], fm, prev_ref, new_ref);
1288 printf("INTERNAL ERROR in bind_colref_se: operator type %d\n",se->get_operator_type());
1299 void bind_colref_pr(predicate_t *pr,
1300 vector<tablevar_t *> &fm,
1301 int prev_ref, int new_ref
1303 vector<scalarexp_t *> op_list;
1306 switch(pr->get_operator_type()){
1308 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1312 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1313 bind_colref_se(pr->get_right_se(), fm, prev_ref, new_ref);
1317 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1320 case PRED_BINARY_OP:
1321 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1322 bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref);
1325 op_list = pr->get_op_list();
1326 for(o=0;o<op_list.size();++o){
1327 bind_colref_se(op_list[o], fm, prev_ref, new_ref);
1332 fprintf(stderr,"INTERNAL ERROR in bind_colref_pr, line %d, character %d, unknown predicate operator type %d\n",
1333 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1342 /////////////////////////////////////////////////////////////////////
1343 // verify that the se refs only literals and params.
1344 // (use to verify that the expression should stay in the hfta
1346 /////////////////////////////////////////////////////////////////////
1348 bool is_literal_or_param_only(scalarexp_t *se){
1350 vector<scalarexp_t *> operands;
1353 if(se == NULL) return(true);
1355 switch(se->get_operator_type()){
1359 case SE_IFACE_PARAM:
1360 return(false); // need to treat as colref
1362 return(is_literal_or_param_only(se->get_left_se()) );
1365 is_literal_or_param_only(se->get_left_se()) &&
1366 is_literal_or_param_only(se->get_right_se())
1374 // The fcn might have special meaning at the lfta ...
1378 fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n",
1379 se->get_lineno(), se->get_charno(),se->get_operator_type());
1387 /////////////////////////////////////////////////////////////////////
1388 // Search for gb refs.
1389 // (use to verify that no gbrefs in a gb def.)
1390 /////////////////////////////////////////////////////////////////////
1393 int count_gb_se(scalarexp_t *se){
1395 vector<scalarexp_t *> operands;
1398 if(se == NULL) return(0);
1400 switch(se->get_operator_type()){
1403 case SE_IFACE_PARAM:
1406 return(count_gb_se(se->get_left_se()) );
1409 count_gb_se(se->get_left_se()) +
1410 count_gb_se(se->get_right_se())
1413 if(se->get_gb_ref() < 0) return(0);
1419 operands = se->get_operands();
1420 for(o=0;o<operands.size();o++){
1421 sum += count_gb_se(operands[o]);
1426 fprintf(stderr,"INTERNAL ERROR in count_gb_se, line %d, character %d: unknown operator type %d\n",
1427 se->get_lineno(), se->get_charno(),se->get_operator_type());
1434 /////////////////////////////////////////////////////////////////////
1435 //////////////// Search for stateful fcns.
1436 /////////////////////////////////////////////////////////////////////
1439 int se_refs_sfun(scalarexp_t *se){
1441 vector<scalarexp_t *> operands;
1444 if(se == NULL) return(0);
1446 switch(se->get_operator_type()){
1449 case SE_IFACE_PARAM:
1452 return(se_refs_sfun(se->get_left_se()) );
1455 se_refs_sfun(se->get_left_se()) +
1456 se_refs_sfun(se->get_right_se())
1464 operands = se->get_operands();
1465 for(o=0;o<operands.size();o++){
1466 sum += se_refs_sfun(operands[o]);
1468 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1470 // for now, stateful functions count as aggregates.
1471 if(se->get_storage_state() != "")
1477 fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n",
1478 se->get_lineno(), se->get_charno(),se->get_operator_type());
1485 // Return a count of the number of stateful fcns in this predicate.
1486 int pred_refs_sfun(predicate_t *pr){
1487 vector<scalarexp_t *> op_list;
1490 switch(pr->get_operator_type()){
1492 return(se_refs_sfun(pr->get_left_se()) );
1495 se_refs_sfun(pr->get_left_se()) +
1496 se_refs_sfun(pr->get_right_se())
1499 return(pred_refs_sfun(pr->get_left_pr()) );
1500 case PRED_BINARY_OP:
1502 pred_refs_sfun(pr->get_left_pr()) +
1503 pred_refs_sfun(pr->get_right_pr())
1506 op_list = pr->get_op_list();
1508 for(o=0;o<op_list.size();++o){
1509 aggr_sum += se_refs_sfun(op_list[o]);
1514 fprintf(stderr,"INTERNAL ERROR in pred_refs_sfun, line %d, character %d, unknown predicate operator type %d\n",
1515 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1522 //////////////////////////////////////////////////
1524 /////////////////////////////////////////////////////////////////////
1525 //////////////// Search for aggregates.
1526 /////////////////////////////////////////////////////////////////////
1529 int count_aggr_se(scalarexp_t *se, bool strict){
1531 vector<scalarexp_t *> operands;
1534 if(se == NULL) return(0);
1536 switch(se->get_operator_type()){
1539 case SE_IFACE_PARAM:
1542 return(count_aggr_se(se->get_left_se(), strict) );
1545 count_aggr_se(se->get_left_se(), strict) +
1546 count_aggr_se(se->get_right_se(), strict)
1554 operands = se->get_operands();
1555 for(o=0;o<operands.size();o++){
1556 sum += count_aggr_se(operands[o], strict);
1558 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1560 // now, stateful functions can count as aggregates.
1561 // if we are being strict.
1562 if(! strict && se->get_storage_state() != "")
1568 fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n",
1569 se->get_lineno(), se->get_charno(),se->get_operator_type());
1576 // Return a count of the number of aggregate fcns in this predicate.
1577 int count_aggr_pred(predicate_t *pr, bool strict){
1578 vector<scalarexp_t *> op_list;
1581 switch(pr->get_operator_type()){
1583 return(count_aggr_se(pr->get_left_se(), strict) );
1586 count_aggr_se(pr->get_left_se(), strict) +
1587 count_aggr_se(pr->get_right_se(), strict)
1590 return(count_aggr_pred(pr->get_left_pr(), strict) );
1591 case PRED_BINARY_OP:
1593 count_aggr_pred(pr->get_left_pr(), strict) +
1594 count_aggr_pred(pr->get_right_pr(), strict)
1597 op_list = pr->get_op_list();
1599 for(o=0;o<op_list.size();++o){
1600 aggr_sum += count_aggr_se(op_list[o], strict);
1605 fprintf(stderr,"INTERNAL ERROR in count_aggr_pred, line %d, character %d, unknown predicate operator type %d\n",
1606 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1613 //////////////////////////////////////////////////
1614 /// Analyze tablevar refs
1616 void get_tablevar_ref_se(scalarexp_t *se, vector<int> &reflist){
1618 vector<scalarexp_t *> operands;
1623 if(se == NULL) return;
1625 switch(se->get_operator_type()){
1629 case SE_IFACE_PARAM:
1630 ir = se->get_ifpref();
1631 vref = ir->get_tablevar_ref();
1632 for(o=0;o<reflist.size();++o){
1633 if(vref == reflist[o]) return;
1635 reflist.push_back(vref);
1638 get_tablevar_ref_se(se->get_left_se(), reflist);
1641 get_tablevar_ref_se(se->get_left_se(), reflist);
1642 get_tablevar_ref_se(se->get_right_se(), reflist);
1645 if(se->is_gb()) return;
1646 cr = se->get_colref();
1647 vref = cr->get_tablevar_ref();
1648 for(o=0;o<reflist.size();++o){
1649 if(vref == reflist[o]) return;
1651 reflist.push_back(vref);
1657 if(se->get_aggr_ref() >= 0) return;
1659 operands = se->get_operands();
1660 for(o=0;o<operands.size();o++){
1661 get_tablevar_ref_se(operands[o], reflist);
1666 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_se, line %d, character %d: unknown operator type %d\n",
1667 se->get_lineno(), se->get_charno(),se->get_operator_type());
1674 void get_tablevar_ref_pr(predicate_t *pr, vector<int> &reflist){
1675 vector<scalarexp_t *> op_list;
1678 switch(pr->get_operator_type()){
1680 get_tablevar_ref_se(pr->get_left_se(),reflist);
1683 get_tablevar_ref_se(pr->get_left_se(),reflist);
1684 get_tablevar_ref_se(pr->get_right_se(),reflist);
1687 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1689 case PRED_BINARY_OP:
1690 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1691 get_tablevar_ref_pr(pr->get_right_pr(),reflist);
1694 op_list = pr->get_op_list();
1695 for(o=0;o<op_list.size();++o){
1696 get_tablevar_ref_se(op_list[o],reflist);
1700 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_pr, line %d, character %d, unknown predicate operator type %d\n",
1701 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1708 // Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1710 void gather_fcn_states_se(scalarexp_t *se, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1713 vector<scalarexp_t *> operands;
1715 switch(se->get_operator_type()){
1718 case SE_IFACE_PARAM:
1721 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ;
1724 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1725 gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns);
1732 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1735 operands = se->get_operands();
1736 for(o=0;o<operands.size();o++){
1737 gather_fcn_states_se(operands[o], states_refd, Ext_fcns);
1739 if(se->get_storage_state() != ""){
1740 states_refd.insert(se->get_storage_state());
1745 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n",
1746 se->get_lineno(), se->get_charno(),se->get_operator_type());
1753 // Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1755 void gather_fcn_states_pr(predicate_t *pr, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1756 vector<scalarexp_t *> op_list;
1759 switch(pr->get_operator_type()){
1761 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1764 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1765 gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ;
1768 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns);
1770 case PRED_BINARY_OP:
1771 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ;
1772 gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ;
1775 op_list = pr->get_op_list();
1776 for(o=0;o<op_list.size();++o){
1777 gather_fcn_states_se(op_list[o],states_refd, Ext_fcns);
1782 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_pr, line %d, character %d, unknown predicate operator type %d\n",
1783 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1793 // walk se tree and collect aggregates into aggregate table.
1794 // duplicate aggregates receive the same idx to the table.
1796 void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){
1799 vector<scalarexp_t *> operands;
1801 switch(se->get_operator_type()){
1804 case SE_IFACE_PARAM:
1807 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ;
1810 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns);
1811 build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns);
1816 agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr());
1817 se->set_aggr_id(agg_id);
1820 agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr());
1821 se->set_aggr_id(agg_id);
1824 operands = se->get_operands();
1825 for(o=0;o<operands.size();o++){
1826 build_aggr_tbl_fm_se(operands[o], aggregate_table, Ext_fcns);
1828 if(se->get_aggr_ref() >= 0){ // it's been tagged as a UDAF
1829 agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id()));
1830 se->set_aggr_id(agg_id);
1835 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n",
1836 se->get_lineno(), se->get_charno(),se->get_operator_type());
1843 // walk se tree and collect aggregates into aggregate table.
1844 // duplicate aggregates receive the same idx to the table.
1846 void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){
1847 vector<scalarexp_t *> op_list;
1850 switch(pr->get_operator_type()){
1852 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1855 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1856 build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ;
1859 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns);
1861 case PRED_BINARY_OP:
1862 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ;
1863 build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ;
1866 op_list = pr->get_op_list();
1867 for(o=0;o<op_list.size();++o){
1868 build_aggr_tbl_fm_se(op_list[o],aggregate_table, Ext_fcns);
1873 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_pred, line %d, character %d, unknown predicate operator type %d\n",
1874 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1882 // Return true if the two scalar expressions
1883 // represent the same value (e.g., use to eliminate
1884 // duplicate aggregates).
1885 bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){
1886 vector<scalarexp_t *> operands1;
1887 vector<scalarexp_t *> operands2;
1890 // First handle the case of nulls (e.g. COUNT aggrs)
1891 if(se1 == NULL && se2 == NULL) return(true);
1892 if(se1 == NULL || se2 == NULL) return(false);
1894 // In all cases, must be the same oeprator type and same operator.
1895 if(se1->get_operator_type() != se2->get_operator_type())
1897 if(se1->get_op() != se2->get_op() )
1900 switch(se1->get_operator_type()){
1902 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1904 return(se1->get_param_name() == se2->get_param_name() );
1905 case SE_IFACE_PARAM:
1906 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1908 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1910 if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) )
1911 return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) );
1914 if(se1->is_gb() && se2->is_gb())
1915 return( se1->get_gb_ref() == se2->get_gb_ref() );
1916 if(se1->is_gb() || se2->is_gb())
1918 return(se1->get_colref()->is_equivalent(se2->get_colref()) );
1922 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1924 if(se1->get_op() != se2->get_op()) return(false);
1926 operands1 = se1->get_operands();
1927 operands2 = se2->get_operands();
1928 if(operands1.size() != operands2.size()) return(false);
1930 for(o=0;o<operands1.size();o++){
1931 if(! is_equivalent_se(operands1[o], operands2[o]) )
1936 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
1937 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
1944 // Similar to is_equivalent_se, but with a looser definition
1945 // of equivalence of colrefs. Here, say they are equivalent
1946 // if their base table is the same. Use to find equivalent
1947 // predicates on base tables.
1948 bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){
1949 vector<scalarexp_t *> operands1;
1950 vector<scalarexp_t *> operands2;
1953 if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){
1954 se1 = se1->get_right_se();
1956 if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){
1957 se2 = se2->get_right_se();
1960 // First handle the case of nulls (e.g. COUNT aggrs)
1961 if(se1 == NULL && se2 == NULL) return(true);
1962 if(se1 == NULL || se2 == NULL) return(false);
1964 // In all cases, must be the same oeprator type and same operator.
1965 if(se1->get_operator_type() != se2->get_operator_type())
1967 if(se1->get_op() != se2->get_op() )
1970 switch(se1->get_operator_type()){
1972 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1974 return(se1->get_param_name() == se2->get_param_name() );
1975 case SE_IFACE_PARAM:
1976 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1978 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1980 if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) )
1981 return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) );
1985 if(se1->is_gb() && se2->is_gb())
1986 return( se1->get_gb_ref() == se2->get_gb_ref() );
1987 if(se1->is_gb() || se2->is_gb())
1990 return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) );
1994 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1996 if(se1->get_op() != se2->get_op()) return(false);
1998 operands1 = se1->get_operands();
1999 operands2 = se2->get_operands();
2000 if(operands1.size() != operands2.size()) return(false);
2002 for(o=0;o<operands1.size();o++){
2003 if(! is_equivalent_se_base(operands1[o], operands2[o], Schema) )
2008 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
2009 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
2016 // Find predicates which are equivalent when
2017 // looking at the base tables. Use to find
2018 // common prefilter.
2019 bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){
2022 // First handle the case of nulls
2023 if(p1 == NULL && p2 == NULL) return(true);
2024 if(p1 == NULL || p2 == NULL) return(false);
2027 if(p1->get_operator_type() != p2->get_operator_type())
2029 if(p1->get_op() != p2->get_op())
2032 vector<literal_t *> ll1;
2033 vector<literal_t *> ll2;
2034 vector<scalarexp_t *> op_list1, op_list2;
2037 switch(p2->get_operator_type()){
2039 if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2041 return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) );
2044 if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2046 ll1 = p1->get_lit_vec();
2047 ll2 = p2->get_lit_vec();
2048 if(ll1.size() != ll2.size())
2050 for(i=0;i<ll1.size();i++){
2051 if(! ll1[i]->is_equivalent( ll2[i] ) )
2057 return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) );
2059 case PRED_BINARY_OP:
2060 if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema))
2062 return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) );
2065 op_list1 = p1->get_op_list();
2066 op_list2 = p2->get_op_list();
2067 if(op_list1.size() != op_list2.size()) return(false);
2068 for(o=0;o<op_list1.size();++o){
2069 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) ) return(false);
2080 bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns){
2081 if((p1->get_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC))
2083 if(p1->get_fcn_id() != p2->get_fcn_id())
2085 vector<bool> cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id());
2087 vector<scalarexp_t *> op_list1 = p1->get_op_list();
2088 vector<scalarexp_t *> op_list2 = p2->get_op_list();
2089 if(op_list1.size() != op_list2.size()) return(false);
2090 for(o=0;o<op_list1.size();++o){
2092 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) )
2103 // Verify that the scalar expression (in a such that clause)
2104 // is acceptable in an aggregation query. No column
2105 // references allowed outside aggergates, except for
2106 // references to group-by attributes.
2107 // return true if OK, false if bad.
2108 bool verify_aggr_query_se(scalarexp_t *se){
2109 vector <scalarexp_t *> operands;
2112 switch(se->get_operator_type()){
2115 case SE_IFACE_PARAM:
2118 return(verify_aggr_query_se(se->get_left_se() ) );
2120 return(verify_aggr_query_se(se->get_left_se() ) &&
2121 verify_aggr_query_se(se->get_right_se() ) );
2123 if(se->is_gb() ) return(true);
2124 fprintf(stderr,"ERROR: the select clause in an aggregate query can "
2125 "only reference constants, group-by attributes, and "
2126 "aggregates, (%s) line %d, character %d.\n",
2127 se->get_colref()->to_string().c_str(),
2128 se->get_lineno(), se->get_charno() );
2132 // colrefs and gbrefs allowed.
2133 // check for nested aggregation elsewhere, so just return TRUE
2136 // If its a UDAF, just return true
2137 if(se->get_aggr_ref() >= 0) return true;
2139 operands = se->get_operands();
2141 for(o=0;o<operands.size();o++){
2142 if(! verify_aggr_query_se(operands[o]) )
2147 fprintf(stderr,"INTERNAL ERROR in verify_aggr_query_se, line %d, character %d: unknown operator type %d\n",
2148 se->get_lineno(), se->get_charno(),se->get_operator_type());
2157 // Find complex literals.
2158 // NOTE : This analysis should be deferred to
2159 // code generation time.
2160 // This analysis drills into aggr se specs.
2161 // Shouldn't this be done at the aggregate table?
2162 // But, its not a major loss of efficiency.
2163 // UPDATE : drilling into aggr se's is causnig a problem
2164 // so I've eliminated it.
2166 bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2167 cplx_lit_table *complex_literals){
2169 vector<scalarexp_t *> operands;
2171 scalarexp_t *param_se;
2174 switch(se->get_operator_type()){
2176 l = se->get_literal();
2177 if(l->constructor_name() != ""){
2178 int cl_idx = complex_literals->add_cpx_lit(l, false);
2179 l->set_cpx_lit_ref(cl_idx);
2184 // SE_IFACE_PARAM should not exist when this is called.
2186 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2188 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) &&
2189 find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) );
2196 // return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2198 if(se->get_aggr_ref() >= 0) return true;
2200 operands = se->get_operands();
2201 for(o=0;o<operands.size();o++){
2202 find_complex_literal_se(operands[o], Ext_fcns, complex_literals);
2206 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_se, line %d, character %d: unknown operator type %d\n",
2207 se->get_lineno(), se->get_charno(),se->get_operator_type());
2216 void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2217 cplx_lit_table *complex_literals){
2219 vector<literal_t *> litl;
2220 vector<scalarexp_t *> op_list;
2223 switch(pr->get_operator_type()){
2225 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2226 litl = pr->get_lit_vec();
2227 for(i=0;i<litl.size();i++){
2228 if(litl[i]->constructor_name() != ""){
2229 int cl_idx = complex_literals->add_cpx_lit(litl[i],false);
2230 litl[i]->set_cpx_lit_ref(cl_idx);
2235 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2236 find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ;
2239 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals);
2241 case PRED_BINARY_OP:
2242 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ;
2243 find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ;
2246 op_list = pr->get_op_list();
2247 for(o=0;o<op_list.size();++o){
2248 find_complex_literal_se(op_list[o],Ext_fcns, complex_literals);
2252 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_pr, line %d, character %d, unknown predicate operator type %d\n",
2253 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2261 // Find all things which are passed as handle parameters to functions
2262 // (query parameters, (simple) literals, complex literals)
2263 // These expressions MUST be processed with find_complex_literal_??
2265 // TODO: this analysis drills into the aggregate SEs.
2266 // Shouldn't this be done on the aggr table SEs instead?
2267 // to avoid duplication. THe handle registration
2268 // might be expensive ...
2269 // REVISED : drilling into aggr se's is causing problems, eliminated.
2271 void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2272 vector<handle_param_tbl_entry *> &handle_tbl){
2273 vector<scalarexp_t *> operands;
2274 vector<bool> handle_ind;
2276 scalarexp_t *param_se;
2280 switch(se->get_operator_type()){
2285 // case SE_IFACE_PARAM: SHOULD NOT EXIST when this is called
2287 find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2290 find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ;
2291 find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ;
2298 // find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2301 if(se->get_aggr_ref() >= 0) return ;
2303 operands = se->get_operands();
2304 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
2305 for(o=0;o<operands.size();o++){
2307 handle_param_tbl_entry *he;
2308 param_se = operands[o];
2309 if(param_se->get_operator_type() != SE_LITERAL &&
2310 param_se->get_operator_type() != SE_PARAM){
2311 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
2312 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
2316 if(param_se->get_operator_type() == SE_PARAM){
2317 he = new handle_param_tbl_entry(
2318 se->get_op(), o, param_se->get_param_name(),
2319 param_se->get_data_type()->get_type_str());
2321 l = param_se->get_literal();
2322 if(l->is_cpx_lit()){
2323 he = new handle_param_tbl_entry(
2324 se->get_op(), o, l->get_cpx_lit_ref(),
2325 param_se->get_data_type()->get_type_str());
2327 he = new handle_param_tbl_entry(
2329 param_se->get_data_type()->get_type_str());
2332 param_se->set_handle_ref(handle_tbl.size());
2333 handle_tbl.push_back(he);
2335 find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ;
2340 fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n",
2341 se->get_lineno(), se->get_charno(),se->get_operator_type());
2348 void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2349 vector<handle_param_tbl_entry *> &handle_tbl){
2350 vector<literal_t *> litl;
2351 vector<scalarexp_t *> op_list;
2352 scalarexp_t *param_se;
2353 vector<bool> handle_ind;
2357 switch(pr->get_operator_type()){
2359 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2362 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2363 find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ;
2366 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl);
2368 case PRED_BINARY_OP:
2369 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ;
2370 find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ;
2373 op_list = pr->get_op_list();
2374 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
2375 for(o=0;o<op_list.size();++o){
2377 handle_param_tbl_entry *he;
2378 param_se = op_list[o];
2379 if(param_se->get_operator_type() != SE_LITERAL &&
2380 param_se->get_operator_type() != SE_PARAM){
2381 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
2382 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
2386 if(param_se->get_operator_type() == SE_PARAM){
2387 he = new handle_param_tbl_entry(
2388 pr->get_op(), o, param_se->get_param_name(),
2389 param_se->get_data_type()->get_type_str());
2391 l = param_se->get_literal();
2392 if(l->is_cpx_lit()){
2393 he = new handle_param_tbl_entry(
2394 pr->get_op(), o, l->get_cpx_lit_ref(),
2395 param_se->get_data_type()->get_type_str());
2397 he = new handle_param_tbl_entry(
2399 param_se->get_data_type()->get_type_str());
2402 param_se->set_handle_ref(handle_tbl.size());
2403 handle_tbl.push_back(he);
2405 find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ;
2410 fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n",
2411 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2419 // Verify the HAVING predicate : it
2420 // can access gb vars, aggregates, and constants,
2422 // return 1 if OK, -1 if bad.
2423 // Perhaps replace by a pair of fcns which counts non-gb colrefs?
2425 // Extended to deal with cleaning_by, cleaning_when :
2426 // verify that any aggregate function
2427 // has the multiple output property.
2429 int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){
2431 vector<scalarexp_t *> operands;
2432 vector<data_type *> odt;
2435 switch(se->get_operator_type()){
2439 case SE_IFACE_PARAM:
2442 return(verify_having_se(se->get_left_se(), clause, Ext_fcns) );
2444 l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns);
2445 r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns);
2446 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
2449 if(se->is_gb()) return 1;
2450 fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause,
2451 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
2455 // colrefs and gbrefs allowed.
2456 // check for nested aggregation elsewhere, so just return TRUE
2457 if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2458 fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2459 se->get_lineno(),se->get_charno(), se->get_op().c_str() );
2463 // Ensure that aggregate refs allow multiple outputs
2464 // in CLEANING_WHEN, CLEANING_BY
2465 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2466 if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2467 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2468 se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2476 if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2477 fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2478 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() );
2482 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2483 if(se->get_aggr_ref() >= 0 && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2484 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2485 se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2490 if(se->get_aggr_ref() >= 0) // don't descent into aggregates.
2493 operands = se->get_operands();
2495 for(o=0;o<operands.size();o++){
2496 l_ret = verify_having_se(operands[o], clause, Ext_fcns);
2497 if(l_ret < 0) r_ret = -1;
2499 if(r_ret < 0) return(-1); else return(1);
2502 fprintf(stderr,"INTERNAL ERROR in verify_having_se, line %d, character %d: unknown operator type %d\n",
2503 se->get_lineno(), se->get_charno(),se->get_operator_type());
2510 // Verify the HAVING predicate : it
2511 // can access gb vars, aggregates, and constants,
2513 // return 1 if OK, -1 if bad.
2514 // Perhaps replace by a pair of fcns which counts non-gb colrefs?
2517 int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){
2519 vector<literal_t *> litl;
2520 vector<scalarexp_t *> op_list;
2523 switch(pr->get_operator_type()){
2525 return(verify_having_se(pr->get_left_se(), clause, Ext_fcns));
2527 l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ;
2528 r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ;
2529 if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1);
2531 return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns));
2532 case PRED_BINARY_OP:
2533 l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns);
2534 r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns);
2535 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
2538 op_list = pr->get_op_list();
2540 for(o=0;o<op_list.size();++o){
2541 if( verify_having_se(op_list[o], clause, Ext_fcns) < 0) l_ret = -1;
2546 fprintf(stderr,"INTERNAL ERROR in verify_having_pred, line %d, character %d, unknown predicate operator type %d\n",
2547 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2554 //////////////////////////////////////////////////////////////////////////
2555 //////////////////////////////////////////////////////////////////////////
2556 /////// cnf and pred analysis and manipulation
2558 // ----------------------------------------------------------------------
2559 //Â Â Convert the predicates to a list of conjuncts
2560 //Â Â (not actually cnf). Do some analysis
2561 //Â Â on their properties.
2562 // ----------------------------------------------------------------------
2565 //Â Â Put into list clist the predicates that
2566 //Â Â are AND'ed together.
2568 void make_cnf_from_pr(predicate_t *pr, vector<cnf_elem *> &clist){
2570 if(pr == NULL) return;
2572 switch(pr->get_operator_type()){
2574 clist.push_back(new cnf_elem(pr));
2578 clist.push_back(new cnf_elem(pr));
2582 clist.push_back(new cnf_elem(pr));
2585 case PRED_BINARY_OP:
2586 if(pr->get_op() == "OR"){
2587 clist.push_back(new cnf_elem(pr));
2590 if(pr->get_op() =="AND"){
2591 make_cnf_from_pr(pr->get_left_pr(),clist);
2592 make_cnf_from_pr(pr->get_right_pr(),clist);
2596 clist.push_back(new cnf_elem(pr));
2600 fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str());
2608 //Â Â Find out what things are referenced in a se,
2609 //Â Â to use for analyzing a predicate.
2610 //Â Â Currently, is it simple (no operators), does it
2611 //Â Â reference a group-by column, does it reference an
2612 //Â Â attribute of a table.
2614 // analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf
2617 void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){
2619 vector<scalarexp_t *> operand_list;
2621 switch(se->get_operator_type()){
2624 case SE_IFACE_PARAM:
2627 if(se->is_gb() ) g=1;
2632 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2636 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2637 analyze_cnf_se(se->get_right_se(),s,g,a,agr);
2644 if(se->get_aggr_ref() >= 0){
2649 operand_list = se->get_operands();
2650 for(p=0;p<operand_list.size();p++){
2651 analyze_cnf_se(operand_list[p],s,g,a,agr);
2661 void analyze_cnf_pr(predicate_t *pr, int &g, int &a, int &agr){
2663 vector<scalarexp_t *> op_list;
2666 switch(pr->get_operator_type()){
2668 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2669 analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr);
2672 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2675 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2677 case PRED_BINARY_OP:
2678 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2679 analyze_cnf_pr(pr->get_right_pr(),g,a,agr);
2682 op_list = pr->get_op_list();
2683 for(o=0;o<op_list.size();++o){
2684 analyze_cnf_se(op_list[o],dum_simple,g,a,agr);
2688 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2689 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2696 //Â Â analyze a conjunct of a predicate.
2697 //Â Â Is it atomic (e.g., a single predicate),
2698 //Â Â and if so do a further analysis.
2700 void analyze_cnf(cnf_elem *c){
2702 //Â Â analyze the predicate.
2703 analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr);
2705 if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){
2710 //Â Â its an atomic predicate -- get more info
2713 if(c->pr->get_op() == "=")
2718 if(c->pr->get_operator_type() == PRED_IN)
2723 c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0;
2724 analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr);
2726 if(c->pr->get_operator_type() == PRED_COMPARE){
2727 c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0;
2728 analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr);
2732 void analyze_constraint_se(scalarexp_t *se,
2733 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){
2734 int l_agr, l_gb, l_par, l_func, l_op;
2735 int r_agr, r_gb, r_par, r_func, r_op;
2737 vector<scalarexp_t *> operand_list;
2739 switch(se->get_operator_type()){
2741 case SE_IFACE_PARAM:
2742 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2745 n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0;
2748 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2751 analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2758 analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2762 analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2763 analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb);
2767 n_func=l_func+r_func;
2772 n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2775 if(se->get_aggr_ref() >= 0){
2776 n_agr=1; n_gb = 0; n_par = 0; n_op = 0;
2778 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2783 n_agr=0; n_gb = 0; n_par = 0; n_op = 0;
2785 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2788 operand_list = se->get_operands();
2789 for(p=0;p<operand_list.size();p++){
2790 analyze_constraint_se(operand_list[p],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2803 // Estimate the cost of a constraint.
2804 // WARNING a lot of cost assumptions are embedded in the code.
2805 void analyze_constraint_pr(predicate_t *pr,
2806 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op,
2807 int &n_cmp_s, int &n_cmp_c, int &n_in, int &n_pred, int &n_bool, ext_fcn_list *Ext_fcns, bool enter_gb){
2808 int l_agr, l_gb, l_par, l_func, l_op, l_cmp_s, l_cmp_c, l_in, l_pred,l_bool;
2809 int r_agr, r_gb, r_par, r_func, r_op, r_cmp_s, r_cmp_c, r_in, r_pred,r_bool;
2812 vector<scalarexp_t *> op_list;
2815 switch(pr->get_operator_type()){
2817 analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb);
2818 analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb);
2819 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2820 n_func=l_func+r_func; n_op=l_op+r_op;
2821 if(pr->get_left_se()->get_data_type()->complex_comparison(
2822 pr->get_right_se()->get_data_type())
2824 n_cmp_s = 0; n_cmp_c=1;
2826 n_cmp_s = 1; n_cmp_c=0;
2828 n_in = 0; n_pred = 0; n_bool = 0;
2831 // Tread IN predicate as sequence of comparisons
2832 analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2833 if(pr->get_left_se()->get_data_type()->complex_comparison(
2834 pr->get_right_se()->get_data_type())
2836 n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size();
2838 n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0;
2840 n_in = 0; n_pred = 0; n_bool = 0;
2843 analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb);
2846 case PRED_BINARY_OP:
2847 analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb);
2848 analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb);
2849 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2850 n_func=l_func+r_func; n_op=l_op+r_op;
2851 n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c;
2852 n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1;
2855 n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0;
2857 n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id());
2860 op_list = pr->get_op_list();
2861 for(o=0;o<op_list.size();++o){
2862 analyze_constraint_se(op_list[o],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2863 n_agr+=l_agr; n_gb+=l_gb; n_par+=l_par; n_func+=l_func; n_op+=l_op;
2867 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2868 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2873 void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){
2874 int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2875 analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2876 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false);
2878 //printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool);
2879 c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2882 bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){
2883 int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2884 analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2885 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true);
2886 //printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool);
2889 int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2890 //printf("cost=%d\n",cost);
2894 // The prefilter needs to translate constraints on
2895 // gbvars into constraints involving their underlying SEs.
2896 // The following two routines attach GB def info.
2898 void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){
2900 vector<scalarexp_t *> operand_list;
2902 switch(se->get_operator_type()){
2904 case SE_IFACE_PARAM:
2910 se->rhs.scalarp = gtbl->get_def(se->get_gb_ref());
2914 insert_gb_def_se(se->get_left_se(),gtbl);
2917 insert_gb_def_se(se->get_left_se(),gtbl);
2918 insert_gb_def_se(se->get_right_se(),gtbl);
2921 insert_gb_def_se(se->get_left_se(),gtbl);
2924 operand_list = se->get_operands();
2925 for(p=0;p<operand_list.size();p++){
2926 insert_gb_def_se(operand_list[p],gtbl);
2933 void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl){
2934 vector<scalarexp_t *> op_list;
2937 switch(pr->get_operator_type()){
2939 insert_gb_def_se(pr->get_left_se(),gtbl);
2940 insert_gb_def_se(pr->get_right_se(),gtbl);
2943 insert_gb_def_se(pr->get_left_se(),gtbl);
2946 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2948 case PRED_BINARY_OP:
2949 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2950 insert_gb_def_pr(pr->get_right_pr(),gtbl);
2953 op_list = pr->get_op_list();
2954 for(o=0;o<op_list.size();++o){
2955 insert_gb_def_se(op_list[o],gtbl);
2959 fprintf(stderr,"INTERNAL ERROR in insert_gb_def_pr, line %d, character %d, unknown predicate operator type %d\n",
2960 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2965 // Substitute gbrefs with their definitions
2966 void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){
2968 vector<scalarexp_t *> operand_list;
2969 scalarexp_t *lse,*rse;
2974 switch(se->get_operator_type()){
2976 case SE_IFACE_PARAM:
2981 cr = se->get_colref();
2982 b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field);
2983 b_idx = Schema->get_table_ref(b_tbl);
2984 cr->tablevar_ref = b_idx;
2987 lse=se->get_left_se();
2988 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2989 se->lhs.scalarp = lse->get_right_se();
2990 subs_gbrefs_se(se,Schema);
2993 subs_gbrefs_se(se->get_left_se(),Schema);
2996 lse=se->get_left_se();
2997 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2998 se->lhs.scalarp = lse->get_right_se();
2999 subs_gbrefs_se(se,Schema);
3002 rse=se->get_right_se();
3003 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3004 se->rhs.scalarp = rse->get_right_se();
3005 subs_gbrefs_se(se,Schema);
3008 subs_gbrefs_se(se->get_left_se(),Schema);
3009 subs_gbrefs_se(se->get_right_se(),Schema);
3012 lse=se->get_left_se();
3013 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3014 se->lhs.scalarp = lse->get_right_se();
3015 subs_gbrefs_se(se,Schema);
3018 subs_gbrefs_se(se->get_left_se(),Schema);
3021 operand_list = se->get_operands();
3022 for(p=0;p<operand_list.size();p++){
3023 lse=operand_list[p];
3024 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3025 se->param_list[p] = lse->get_right_se();
3026 subs_gbrefs_se(se,Schema);
3030 for(p=0;p<operand_list.size();p++){
3031 subs_gbrefs_se(operand_list[p],Schema);
3039 void subs_gbrefs_pr(predicate_t *pr, table_list *Schema){
3040 vector<scalarexp_t *> op_list;
3042 scalarexp_t *lse,*rse;
3044 switch(pr->get_operator_type()){
3046 lse=pr->get_left_se();
3047 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3048 pr->lhs.sexp = lse->get_right_se();
3049 subs_gbrefs_pr(pr,Schema);
3052 rse=pr->get_right_se();
3053 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3054 pr->rhs.sexp = rse->get_right_se();
3055 subs_gbrefs_pr(pr,Schema);
3058 subs_gbrefs_se(pr->get_left_se(),Schema);
3059 subs_gbrefs_se(pr->get_right_se(),Schema);
3062 lse=pr->get_left_se();
3063 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3064 pr->lhs.sexp = lse->get_right_se();
3065 subs_gbrefs_pr(pr,Schema);
3068 subs_gbrefs_se(pr->get_left_se(),Schema);
3071 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3073 case PRED_BINARY_OP:
3074 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3075 subs_gbrefs_pr(pr->get_right_pr(),Schema);
3078 op_list = pr->get_op_list();
3079 for(o=0;o<op_list.size();++o){
3081 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3082 pr->param_list[o] = lse->get_right_se();
3083 subs_gbrefs_pr(pr,Schema);
3086 subs_gbrefs_se(op_list[o],Schema);
3090 fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n",
3091 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3097 // Search for references to "expensive" fields.
3098 int expensive_refs_se(scalarexp_t *se, table_list *Schema){
3100 vector<scalarexp_t *> operand_list;
3105 switch(se->get_operator_type()){
3107 case SE_IFACE_PARAM:
3114 return expensive_refs_se(se->rhs.scalarp,Schema);
3115 td = Schema->get_table(se->lhs.colref->schema_ref);
3116 plist = td->get_modifier_list(se->lhs.colref->field);
3117 if(plist->contains_key("expensive"))
3121 return expensive_refs_se(se->get_left_se(),Schema);
3123 cnt += expensive_refs_se(se->get_left_se(),Schema);
3124 cnt += expensive_refs_se(se->get_right_se(),Schema);
3127 operand_list = se->get_operands();
3128 for(p=0;p<operand_list.size();p++){
3129 cnt += expensive_refs_se(operand_list[p],Schema);
3138 int expensive_refs_pr(predicate_t *pr, table_list *Schema){
3139 vector<scalarexp_t *> op_list;
3143 switch(pr->get_operator_type()){
3145 cnt += expensive_refs_se(pr->get_left_se(),Schema);
3146 cnt += expensive_refs_se(pr->get_right_se(),Schema);
3149 return expensive_refs_se(pr->get_left_se(),Schema);
3151 return expensive_refs_pr(pr->get_left_pr(),Schema);
3152 case PRED_BINARY_OP:
3153 cnt += expensive_refs_pr(pr->get_left_pr(),Schema);
3154 cnt += expensive_refs_pr(pr->get_right_pr(),Schema);
3157 op_list = pr->get_op_list();
3158 for(o=0;o<op_list.size();++o){
3159 cnt += expensive_refs_se(op_list[o],Schema);
3163 fprintf(stderr,"INTERNAL ERROR in expensive_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3164 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3170 // TODO: allow "cheap" functions and predicates.
3171 bool simple_field_constraint(cnf_elem *c){
3172 vector<literal_t *> ll;
3174 predicate_t *p = c->pr;
3175 int l_agr, l_gb, l_par, l_func, l_op;
3176 int r_agr, r_gb, r_par, r_func, r_op;
3177 col_id_set left_colids, right_colids;
3179 // Verify that it is a simple atom
3180 switch(p->get_operator_type()){
3182 // Must be an equality predicate which references
3183 // which referecnes no aggregates, parameters, functions, or
3184 // group-by variables, and should be a constraint of
3186 // AND should not require a complex comparison.
3187 if(p->get_op() != "=") return(false);
3188 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3189 analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false);
3190 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ||
3191 r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false);
3192 // I will count on there being no gbvars in the constraint.
3193 // TODO: allow gbvars which are colrefs.
3194 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3195 gather_se_col_ids(p->get_right_se(), right_colids, NULL);
3196 if(left_colids.size()+right_colids.size() != 1) return(false);
3199 // Normalize : the colref should be on the lhs.
3200 if(right_colids.size() > 0){
3201 p->swap_scalar_operands();
3204 // Disallow complex (and therefore expensive) comparisons.
3205 if(p->get_left_se()->get_data_type()->complex_comparison(
3206 p->get_right_se()->get_data_type() ) )
3209 // passed all the tests.
3212 // LHS must be a non-gbvar colref.
3213 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3214 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false);
3215 // I will count on there being no gbvars in the constraint.
3216 // TODO: allow gbvars which are colrefs.
3217 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3218 if(left_colids.size() != 1) return(false);
3219 // Disallow complex (and therefore expensive) comparisons.
3220 if(p->get_left_se()->get_data_type()->complex_comparison(
3221 p->get_left_se()->get_data_type() ) )
3225 // All entries in the IN list must be literals
3226 // Currently, this is the only possibility.
3231 case PRED_BINARY_OP:
3236 fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n",
3237 p->get_lineno(), p->get_charno(), p->get_operator_type() );
3244 // As the name implies, return the colref constrained by the
3245 // cnf elem. I will be counting on the LHS being a SE pointing
3248 // This fcn assumes that in fact exactly
3249 // one colref is constrained.
3250 colref_t *get_constrained_colref(scalarexp_t *se){
3252 vector<scalarexp_t *> operand_list;
3255 switch(se->get_operator_type()){
3259 case SE_IFACE_PARAM:
3262 return(se->get_colref());
3264 return(get_constrained_colref(se->get_left_se()));
3266 ret=get_constrained_colref(se->get_left_se());
3267 if(ret == NULL) return(get_constrained_colref(se->get_right_se()));
3273 if(se->get_aggr_ref() >= 0) return NULL;
3275 operand_list = se->get_operands();
3276 for(p=0;p<operand_list.size();p++){
3277 ret=get_constrained_colref(operand_list[p]);
3278 if(ret != NULL) return(ret);
3289 colref_t *get_constrained_colref(predicate_t *p){
3290 return(get_constrained_colref(p->get_left_se()));
3292 colref_t *get_constrained_colref(cnf_elem *c){
3293 return get_constrained_colref(c->pr->get_left_se());
3300 void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p,
3301 string target_fld, string target_tbl, int tblref){
3303 // Make a copy of the predicate to be added.
3304 // ASSUME no aggregates.
3305 predicate_t *pr = dup_pr(src_p,NULL);
3307 // Modify the ref to the base table.
3308 // ASSUME lhs is the colref
3309 pr->get_left_se()->get_colref()->set_table_name(target_tbl);
3310 pr->get_left_se()->get_colref()->set_table_ref(tblref);
3312 if(dst->pr == NULL) dst->pr = pr;
3313 else dst->pr = new predicate_t("OR", dst->pr, pr);
3319 //////////////////////////////////////////////////////
3320 /////////////// Represent a node in a predicate tree
3321 struct common_pred_node{
3324 vector<predicate_t *> predecessor_preds;
3325 vector<common_pred_node *> children;
3337 predicate_t *make_common_pred(common_pred_node *pn){
3340 if(pn->children.size() == 0){
3342 fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n");
3345 return( dup_pr(pn->pr,NULL) );
3348 predicate_t *curr_pr = make_common_pred( pn->children[0] );
3349 for(n=1;n<pn->children.size();++n){
3350 curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr);
3354 curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr);
3360 bool operator<(const cnf_set &c1, const cnf_set &c2){
3361 if(c1.lfta_id.size() < c2.lfta_id.size())
3367 // Compute the predicates for the prefilter.
3368 // the prefilter preds are returned in prefilter_preds.
3369 // pred_ids is the set of predicates used in the prefilter.
3370 // the encoding is the lfta index, in the top 16 bits,
3371 // then the index of the cnf element in the bottom 16 bits.
3372 // This set of for identifying which preds do not need
3373 // to be generated in the lftas.
3374 void find_common_filter(vector< vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int > &pred_ids){
3377 vector<cnf_set *> pred_list, sort_list;
3379 // Create list of tagged, prefilter-safe CNFs.
3380 for(l=0;l<where_list.size();++l){
3381 for(c=0;c<where_list[l].size();++c){
3382 if(prefilter_compatible(where_list[l][c],Ext_fcns)){
3383 if(expensive_refs_pr(where_list[l][c]->pr,Schema)==0)
3384 pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c));
3389 // Eliminate duplicates
3390 for(p=0;p<pred_list.size();++p){
3392 for(p2=p+1;p2<pred_list.size();++p2){
3394 if(is_equivalent_pred_base(pred_list[p]->pr, pred_list[p2]->pr,Schema)){
3395 pred_list[p]->subsume(pred_list[p2]);
3396 delete pred_list[p2];
3397 pred_list[p2] = NULL;
3404 // combine preds that occur in the exact same lftas.
3405 for(p=0;p<pred_list.size();++p){
3407 for(p2=p+1;p2<pred_list.size();++p2){
3409 if(pred_list[p]->lfta_id == pred_list[p2]->lfta_id){
3410 pred_list[p]->combine_pred(pred_list[p2]);
3411 delete pred_list[p2];
3412 pred_list[p2] = NULL;
3419 // Compress the list
3420 for(p=0;p<pred_list.size();++p){
3422 sort_list.push_back(pred_list[p]);
3426 sort(sort_list.begin(), sort_list.end(),compare_cnf_set());
3428 // Return the top preds, up to 64 of them.
3429 for(p=0;p<sort_list.size() && p<64;p++){
3430 prefilter_preds.push_back(sort_list[p]);
3431 sort_list[p]->add_pred_ids(pred_ids);
3434 // Substitute gb refs with their defs
3435 // While I'm at it, substitute base table sch ref for tblref.
3436 for(p=0;p<prefilter_preds.size() ;p++){
3437 subs_gbrefs_pr(prefilter_preds[p]->pr,Schema);
3446 ///////////////////////////////////////////////////////////////////////////
3447 //////////////////////////////////////////////////////////////////////////
3449 // Find partial functions and register them.
3450 // Do a DFS so that nested partial fcn calls
3451 // get evaluated in the right order.
3452 // Don't drill down into aggregates -- their arguments are evaluated
3453 // earlier than the select list is.
3455 // Modification for function caching:
3456 // Pass in a ref counter, and partial fcn indicator.
3457 // Cache fcns ref'd at least once.
3458 // pass in NULL for fcn_ref_cnt to turn off fcn caching analysis
3461 void find_partial_fcns(scalarexp_t *se, vector<scalarexp_t *> *pf_list,
3462 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3463 ext_fcn_list *Ext_fcns){
3464 vector<scalarexp_t *> operands;
3467 if(se == NULL) return;
3469 switch(se->get_operator_type()){
3472 case SE_IFACE_PARAM:
3475 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3478 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3479 find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3486 // find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ;
3489 if(se->get_aggr_ref() >= 0) return;
3491 operands = se->get_operands();
3492 for(o=0;o<operands.size();o++){
3493 find_partial_fcns(operands[o], pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3496 if(Ext_fcns->is_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){
3498 for(f=0;f<pf_list->size();++f){
3499 if(is_equivalent_se(se,(*pf_list)[f])){
3500 se->set_partial_ref(f);
3501 (*fcn_ref_cnt)[f]++;
3508 if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) || fcn_ref_cnt)){
3509 se->set_partial_ref(pf_list->size());
3510 pf_list->push_back(se);
3512 fcn_ref_cnt->push_back(1);
3513 is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id()));
3519 fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n",
3520 se->get_lineno(), se->get_charno(),se->get_operator_type());
3527 void find_partial_fcns_pr(predicate_t *pr, vector<scalarexp_t *> *pf_list,
3528 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3529 ext_fcn_list *Ext_fcns){
3530 vector<literal_t *> litl;
3531 vector<scalarexp_t *> op_list;
3534 switch(pr->get_operator_type()){
3536 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3539 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3540 find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3543 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3545 case PRED_BINARY_OP:
3546 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3547 find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3550 op_list = pr->get_op_list();
3551 for(o=0;o<op_list.size();++o){
3552 find_partial_fcns(op_list[o],pf_list,fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3556 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3557 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3566 void find_combinable_preds(predicate_t *pr, vector<predicate_t *> *pr_list,
3567 table_list *Schema, ext_fcn_list *Ext_fcns){
3568 vector<literal_t *> litl;
3569 vector<scalarexp_t *> op_list;
3572 switch(pr->get_operator_type()){
3578 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns);
3580 case PRED_BINARY_OP:
3581 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ;
3582 find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ;
3585 if(Ext_fcns->is_combinable(pr->get_fcn_id())){
3586 for(f=0;f<pr_list->size();++f){
3587 if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){
3588 pr->set_combinable_ref(f);
3592 if(f == pr_list->size()){
3593 pr->set_combinable_ref(pr_list->size());
3594 pr_list->push_back(pr);
3599 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3600 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3608 //--------------------------------------------------------------------
3609 // Collect refs to aggregates.
3612 void collect_agg_refs(scalarexp_t *se, set<int> &agg_refs){
3613 vector<scalarexp_t *> operands;
3616 if(se == NULL) return;
3618 switch(se->get_operator_type()){
3621 case SE_IFACE_PARAM:
3624 collect_agg_refs(se->get_left_se(), agg_refs) ;
3627 collect_agg_refs(se->get_left_se(), agg_refs);
3628 collect_agg_refs(se->get_right_se(), agg_refs);
3634 agg_refs.insert(se->get_aggr_ref());
3637 if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref());
3639 operands = se->get_operands();
3640 for(o=0;o<operands.size();o++){
3641 collect_agg_refs(operands[o], agg_refs);
3646 fprintf(stderr,"INTERNAL ERROR in collect_agg_refs, line %d, character %d: unknown operator type %d\n",
3647 se->get_lineno(), se->get_charno(),se->get_operator_type());
3654 void collect_aggr_refs_pr(predicate_t *pr, set<int> &agg_refs){
3655 vector<literal_t *> litl;
3656 vector<scalarexp_t *> op_list;
3659 switch(pr->get_operator_type()){
3661 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3664 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3665 collect_agg_refs(pr->get_right_se(), agg_refs) ;
3668 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs);
3670 case PRED_BINARY_OP:
3671 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ;
3672 collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ;
3675 op_list = pr->get_op_list();
3676 for(o=0;o<op_list.size();++o){
3677 collect_agg_refs(op_list[o],agg_refs);
3681 fprintf(stderr,"INTERNAL ERROR in collect_aggr_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3682 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3690 //--------------------------------------------------------------------
3691 // Collect previously registered partial fcn refs.
3692 // Do a DFS so that nested partial fcn calls
3693 // get evaluated in the right order.
3694 // Don't drill down into aggregates -- their arguments are evaluated
3695 // earlier than the select list is.
3696 // ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES?
3698 void collect_partial_fcns(scalarexp_t *se, set<int> &pfcn_refs){
3699 vector<scalarexp_t *> operands;
3702 if(se == NULL) return;
3704 switch(se->get_operator_type()){
3707 case SE_IFACE_PARAM:
3710 collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3713 collect_partial_fcns(se->get_left_se(), pfcn_refs);
3714 collect_partial_fcns(se->get_right_se(), pfcn_refs);
3721 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3724 if(se->get_aggr_ref() >= 0) return;
3726 operands = se->get_operands();
3727 for(o=0;o<operands.size();o++){
3728 collect_partial_fcns(operands[o], pfcn_refs);
3731 if(se->is_partial()){
3732 pfcn_refs.insert(se->get_partial_ref());
3737 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n",
3738 se->get_lineno(), se->get_charno(),se->get_operator_type());
3745 void collect_partial_fcns_pr(predicate_t *pr, set<int> &pfcn_refs){
3746 vector<literal_t *> litl;
3747 vector<scalarexp_t *> op_list;
3750 switch(pr->get_operator_type()){
3752 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3755 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3756 collect_partial_fcns(pr->get_right_se(), pfcn_refs) ;
3759 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs);
3761 case PRED_BINARY_OP:
3762 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ;
3763 collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ;
3766 op_list = pr->get_op_list();
3767 for(o=0;o<op_list.size();++o){
3768 collect_partial_fcns(op_list[o],pfcn_refs);
3772 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns_pr, line %d, character %d, unknown predicate operator type %d\n",
3773 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3783 ///////////////////////////////////////////////////////////////
3784 //////////// Exported Functions ///////////////////////////
3785 ///////////////////////////////////////////////////////////////
3788 // Count and collect refs to interface parameters.
3790 int count_se_ifp_refs(scalarexp_t *se, set<string> &ifpnames){
3791 vector<scalarexp_t *> operands;
3795 if(se == NULL) return 0;
3797 switch(se->get_operator_type()){
3801 case SE_IFACE_PARAM:
3802 ifpnames.insert(se->get_ifpref()->to_string());
3805 return count_se_ifp_refs(se->get_left_se(), ifpnames) ;
3807 ret = count_se_ifp_refs(se->get_left_se(), ifpnames);
3808 ret += count_se_ifp_refs(se->get_right_se(), ifpnames);
3815 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3818 if(se->get_aggr_ref() >= 0) return 0;
3820 operands = se->get_operands();
3821 for(o=0;o<operands.size();o++){
3822 ret += count_se_ifp_refs(operands[o], ifpnames);
3827 fprintf(stderr,"INTERNAL ERROR in count_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3828 se->get_lineno(), se->get_charno(),se->get_operator_type());
3835 int count_pr_ifp_refs(predicate_t *pr, set<string> &ifpnames){
3836 vector<literal_t *> litl;
3837 vector<scalarexp_t *> op_list;
3840 if(pr == NULL) return 0;
3842 switch(pr->get_operator_type()){
3844 return count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3846 ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3847 ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ;
3850 return count_pr_ifp_refs(pr->get_left_pr(), ifpnames);
3851 case PRED_BINARY_OP:
3852 ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ;
3853 ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ;
3856 op_list = pr->get_op_list();
3857 for(o=0;o<op_list.size();++o){
3858 ret += count_se_ifp_refs(op_list[o],ifpnames);
3862 fprintf(stderr,"INTERNAL ERROR in count_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3863 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3870 // Resolve ifp refs, convert them to string literals.
3872 int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb, string &err){
3873 vector<scalarexp_t *> operands;
3874 vector<string> ifvals;
3882 if(se == NULL) return 0;
3884 switch(se->get_operator_type()){
3888 case SE_IFACE_PARAM:
3889 ir = se->get_ifpref();
3890 ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr);
3892 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n";
3895 if(ifvals.size() == 0){
3896 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n";
3899 if(ifvals.size() > 1){
3900 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n";
3903 tmp_l = new literal_t( ifvals[0]);
3904 se->convert_to_literal(tmp_l);
3907 return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ;
3909 ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err);
3910 ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err);
3917 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3920 if(se->get_aggr_ref() >= 0) return 0;
3922 operands = se->get_operands();
3923 for(o=0;o<operands.size();o++){
3924 ret += resolve_se_ifp_refs(operands[o], ifm, ifn, ifdb,err);
3929 fprintf(stderr,"INTERNAL ERROR in resolve_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3930 se->get_lineno(), se->get_charno(),se->get_operator_type());
3937 int resolve_pr_ifp_refs(predicate_t *pr, string ifm, string ifn, ifq_t *ifdb, string &err){
3938 vector<literal_t *> litl;
3939 vector<scalarexp_t *> op_list;
3943 switch(pr->get_operator_type()){
3945 return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3947 ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3948 ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ;
3951 return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err);
3952 case PRED_BINARY_OP:
3953 ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ;
3954 ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ;
3957 op_list = pr->get_op_list();
3958 for(o=0;o<op_list.size();++o){
3959 ret += resolve_se_ifp_refs(op_list[o],ifm, ifn, ifdb, err);
3963 fprintf(stderr,"INTERNAL ERROR in resolve_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3964 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3972 string impute_query_name(table_exp_t *fta_tree, string default_nm){
3973 string retval = fta_tree->get_val_of_name("query_name");
3974 if(retval == "") retval = default_nm;
3975 if(retval == "") retval = "default_query";
3979 // Convert the parse tree into an intermediate form,
3980 // which admits analysis better.
3982 // TODO : rationalize the error return policy.
3984 // TODO : the query_summary_class object contains
3986 // TODO: revisit the issue when nested subqueries are implemented.
3987 // One possibility: implement accessor methods to hide the
3989 // For now: this class contains data structures not in table_exp_t
3990 // (with a bit of duplication)
3992 // Return NULL on error.
3993 // print error messages to stderr.
3996 query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema,
3997 ext_fcn_list *Ext_fcns, string default_name){
4000 // Create the summary struct -- no analysis is done here.
4001 query_summary_class *qs = new query_summary_class(fta_tree);
4002 qs->query_type = fta_tree->query_type;
4004 ////////////// Do common analysis
4006 // Extract query name. Already imputed for the qnodes.
4007 // qs->query_name = impute_query_name(fta_tree, default_name);
4008 qs->query_name = default_name;
4009 //printf("query name is %s\n",qs->query_name.c_str());
4011 // extract definitions. Don't grab the query name.
4013 map<string, string> nmap = fta_tree->get_name_map();
4014 map<string, string>::iterator nmi;
4015 for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){
4016 string pname = (*nmi).first;
4017 if(pname != "query_name" )
4018 (qs->definitions)[pname] = (*nmi).second;
4024 // First, verify that all the referenced tables are defined.
4025 // Then, bind the tablerefs in the FROM list to schemas in
4027 tablevar_list_t *tlist = fta_tree->get_from();
4028 vector<tablevar_t *> tbl_vec = tlist->get_table_list();
4030 bool found_error = false;
4031 for(i=0;i<tbl_vec.size();i++){
4032 int sch_no = schema->find_tbl(tbl_vec[i]->get_schema_name());
4034 fprintf(stderr,"Error, table <%s> not found in the schema file\n",
4035 tbl_vec[i]->get_schema_name().c_str() );
4036 fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(),
4037 tbl_vec[i]->get_charno() );
4041 tbl_vec[i]->set_schema_ref(sch_no);
4043 // If accessing a UDOP, mangle the name
4044 // This needs to be done in translate_fta.cc, not here.
4046 if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){
4047 string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm;
4048 tbl_vec[i]->set_schema_name(mngl_name);
4052 // No FTA schema should have an interface defined on it.
4053 if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){
4054 fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str());
4056 // Fill in default interface
4057 if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4058 tbl_vec[i]->set_interface("default");
4059 tbl_vec[i]->set_ifq(true);
4061 // Fill in default machine
4062 if(tbl_vec[i]->get_interface()!="" && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){
4063 tbl_vec[i]->set_machine(hostname);
4066 if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4067 // Record the set of interfaces accessed
4069 if(tbl_vec[i]->get_ifq()){
4070 ifstr = "["+tbl_vec[i]->get_interface()+"]";
4072 if(tbl_vec[i]->get_machine() != "localhost"){
4073 ifstr = "'"+tbl_vec[i]->get_machine()+"'."+tbl_vec[i]->get_interface();
4075 ifstr = tbl_vec[i]->get_interface();
4078 //printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str());
4079 if(qs->definitions.count("_referenced_ifaces")){
4080 ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr;
4082 qs->definitions["_referenced_ifaces"] = ifstr;
4086 if(found_error) return(NULL);
4088 // Ensure that all tablevars have are named
4089 // and that no two tablevars have the same name.
4091 // First, gather the set of variable
4092 set<string> tblvar_names;
4093 for(i=0;i<tbl_vec.size();i++){
4094 if(tbl_vec[i]->get_var_name() != ""){
4095 if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){
4096 fprintf(stderr,"ERROR, query has two table variables named %s. line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno());
4099 tblvar_names.insert(tbl_vec[i]->get_var_name());
4102 // Now generate variable names for unnamed tablevars
4103 for(i=0;i<tbl_vec.size();i++){
4104 if(tbl_vec[i]->get_var_name() == ""){
4106 sprintf(tmpstr,"_t%d",tblvar_no);
4107 string newvar = tmpstr;
4108 while(tblvar_names.count(newvar) > 0){
4110 sprintf(tmpstr,"_t%d",tblvar_no);
4113 tbl_vec[i]->set_range_var(newvar);
4114 tblvar_names.insert(newvar);
4118 // Process inner/outer join properties
4119 int jprop = fta_tree->get_from()->get_properties();
4120 // Require explicit INNER_JOIN, ... specification for join queries.
4122 if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){
4123 fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, WATCHLIST_JOIN, FILTER_JOIN.\n");
4128 if(jprop == OUTER_JOIN_PROPERTY){
4129 for(i=0;i<tbl_vec.size();i++) tbl_vec[i]->set_property(1);
4131 if(jprop == LEFT_OUTER_JOIN_PROPERTY)
4132 tbl_vec[0]->set_property(1);
4133 if(jprop == RIGHT_OUTER_JOIN_PROPERTY)
4134 tbl_vec[tbl_vec.size()-1]->set_property(1);
4135 if(jprop == FILTER_JOIN_PROPERTY){
4136 if(fta_tree->get_from()->get_temporal_range() == 0){
4137 fprintf(stderr,"ERROR, a filter join must have a non-zero temporal range.\n");
4140 if(tbl_vec.size() != 2){
4141 fprintf(stderr,"ERROR, a filter join must be between two table variables.\n");
4144 colref_t *cr = fta_tree->get_from()->get_colref();
4145 string field = cr->get_field();
4147 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field);
4149 fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str());
4152 cr->set_schema_ref(tbl_vec[0]->get_schema_ref());
4153 cr->set_tablevar_ref(0);
4154 string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field);
4155 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
4156 data_type *dt0 = new data_type(type_name, modifiers);
4157 string dt0_type = dt0->get_type_str();
4158 if(dt0_type != "INT" && dt0_type != "UINT" && dt0_type != "LLONG" && dt0_type != "ULLONG"){
4159 fprintf(stderr,"ERROR, the temporal attribute in a filter join must be one of INT/UINT/LLONG/ULLONG.\n");
4162 if(! dt0->is_increasing()){
4163 fprintf(stderr,"ERROR, the temporal attribute in a filter join must be temporal increasing.\n");
4170 /////////////////////
4171 /// Build the query param table
4172 vector<var_pair_t *> query_params = fta_tree->query_params;
4174 for(p=0;p<query_params.size();++p){
4175 string pname = query_params[p]->name;
4176 string dtname = query_params[p]->val;
4179 fprintf(stderr,"ERROR parameter has empty name.\n");
4183 fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str());
4186 data_type *dt = new data_type(dtname);
4187 if(!(dt->is_defined())){
4188 fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str());
4192 qs->add_query_param(pname, dt, false);
4194 if(found_error) return(NULL);
4195 // unpack the param table to a global for easier analysis.
4196 param_tbl=qs->param_tbl;
4199 ////////////////// WATCHLIST specialized analysis
4200 if(qs->query_type == WATCHLIST_QUERY){
4201 // Populate a SELECT clause?
4204 ////////////////// MERGE specialized analysis
4206 if(qs->query_type == MERGE_QUERY){
4208 // 1) there are two *different* streams ref'd in the FROM clause
4209 // However, only emit a warning.
4210 // (can't detect a problem if one of the interfaces is the
4211 // default interface).
4212 // 2) They have the same layout (e.g. same types but the
4213 // names can be different
4214 // 3) the two columns can unambiguously be mapped to
4215 // fields of the two tables, one per table. Exception:
4216 // the column names are the same and exist in both tables.
4217 // FURTHERMORE the positions must be the same
4218 // 4) after mapping, verify that both colrefs are temporal
4219 // and in the same direction.
4220 if(tbl_vec.size() < 2){
4221 fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() );
4225 vector<field_entry *> fev0 = schema->get_fields(
4226 tbl_vec[0]->get_schema_name()
4231 for(cv=1;cv<tbl_vec.size();++cv){
4232 vector<field_entry *> fev1 = schema->get_fields(
4233 tbl_vec[cv]->get_schema_name()
4236 if(fev0.size() != fev1.size()){
4237 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4241 // Only need to ensure that the list of types are the same.
4242 // THe first table supplies the output colnames,
4243 // and all temporal properties are lost, except for the
4244 // merge-by columns.
4246 for(f=0;f<fev0.size();++f){
4247 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4248 data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list());
4249 if(! dt0.equal_subtypes(&dt1) ){
4250 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4256 // copy over the merge-by cols.
4257 qs->mvars = fta_tree->mergevars;
4259 if(qs->mvars.size() == 0){ // need to discover the merge vars.
4260 int mergevar_pos = -1;
4262 for(f=0;f<fev0.size();++f){
4263 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4264 if(dt0.is_temporal()){
4269 if(mergevar_pos >= 0){
4270 for(cv=0;cv<tbl_vec.size();++cv){
4271 vector<field_entry *> fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name());
4272 qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() ));
4275 fprintf(stderr,"ERROR, no merge-by column found.\n");
4280 // Ensure same number of tables, merge cols.
4281 if(tbl_vec.size() != qs->mvars.size()){
4282 fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size());
4286 // Ensure that the merge-by are from different tables
4287 // also, sort colrefs so that they align with the FROM list using tmp_crl
4288 set<int> refd_sources;
4289 vector<colref_t *> tmp_crl(qs->mvars.size(),NULL);
4290 for(cv=0;cv<qs->mvars.size();++cv){
4291 int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema);
4293 fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str());
4296 refd_sources.insert(tblvar);
4297 tmp_crl[tblvar] = qs->mvars[cv];
4299 if(refd_sources.size() != qs->mvars.size()){
4300 fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size());
4304 // 1-1 mapping, so use tmp_crl as the merge column list.
4305 qs->mvars = tmp_crl;
4309 // Look up the colrefs in their schemas, verify that
4310 // they are at the same place, that they are both temporal
4312 // It seems that this should be done more in the schema objects.
4313 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field());
4315 fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str());
4318 for(cv=1;cv<qs->mvars.size();++cv){
4319 int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field());
4321 fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str());
4326 field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0);
4327 data_type dt0(fe0->get_type(),fe0->get_modifier_list());
4328 if( (!dt0.is_temporal()) ){
4329 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0);
4332 for(cv=0;cv<qs->mvars.size();++cv){
4333 field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0);
4334 data_type dt1(fe1->get_type(),fe1->get_modifier_list());
4335 if( (!dt1.is_temporal()) ){
4336 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv);
4341 if( dt0.get_temporal() != dt1.get_temporal()){
4342 fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv);
4347 // If there is a SLACK specification, verify
4348 // that it is literal-only and that its type is compatible
4349 // with that of the merge columns
4350 qs->slack = fta_tree->slack;
4352 if(! literal_only_se(qs->slack)){
4353 fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n");
4357 assign_data_types(qs->slack, schema, fta_tree, Ext_fcns );
4358 data_type sdt(&dt0, qs->slack->get_data_type(), string("+"));
4359 if(sdt.get_type() == undefined_t){
4360 fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n");
4366 // All the tests have passed, there is nothing
4371 ////////////////// SELECT specialized analysis
4373 if(qs->query_type == SELECT_QUERY){
4374 // unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals
4375 // objects into globals, for easier syntax.
4376 gb_tbl = qs->gb_tbl;
4377 aggr_tbl = qs->aggr_tbl;
4380 // Build the table of group-by attributes.
4381 // (se processing done automatically).
4382 // NOTE : Doing the SE processing here is getting cumbersome,
4383 // I should process these individually.
4384 // NOTE : I should check for duplicate names.
4385 // NOTE : I should ensure that the def of one GB does not
4386 // refrence the value of another.
4387 vector<extended_gb_t *> gb_list = fta_tree->get_groupby();
4389 string temporal_gbvars = "";
4390 map<string, int> gset_gbnames;
4392 // For generating the set of GB patterns for this aggregation query.
4393 vector<bool> inner_pattern;
4394 vector<vector<bool> > pattern_set;
4395 vector<vector<vector<bool> > > pattern_components;
4397 vector<gb_t *> r_gbs, c_gbs, g_gbs;
4400 for(i=0;i<gb_list.size();i++){
4401 switch(gb_list[i]->type){
4403 retval = gb_tbl->add_gb_attr(
4404 gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns
4407 return NULL; // nothing added to gb_tbl, so this can trigger a segfault 2 lines below
4409 if(gb_tbl->get_data_type(i)->is_temporal()){
4411 if(temporal_gbvars != "") temporal_gbvars+=" ";
4412 temporal_gbvars += gb_tbl->get_name(i);
4416 inner_pattern.clear();
4417 pattern_set.clear();
4418 inner_pattern.push_back(true);
4419 pattern_set.push_back(inner_pattern);
4420 pattern_components.push_back(pattern_set);
4422 gb_tbl->gb_entry_type.push_back("");
4423 gb_tbl->gb_entry_count.push_back(1);
4424 gb_tbl->pattern_components.push_back(pattern_set);
4427 case rollup_egb_type:
4428 r_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4429 for(j=0;j<r_gbs.size();++j){
4430 retval = gb_tbl->add_gb_attr(
4431 r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4435 }else{ // rollup gb can't be temporal
4436 gb_tbl->reset_temporal(gb_tbl->size()-1);
4440 inner_pattern.resize(r_gbs.size());
4441 pattern_set.clear();
4442 for(j=0;j<=r_gbs.size();++j){
4443 for(k=0;k<r_gbs.size();++k){
4445 inner_pattern[k] = true;
4447 inner_pattern[k] = false;
4449 pattern_set.push_back(inner_pattern);
4451 pattern_components.push_back(pattern_set);
4453 gb_tbl->gb_entry_type.push_back("ROLLUP");
4454 gb_tbl->gb_entry_count.push_back(r_gbs.size());
4455 gb_tbl->pattern_components.push_back(pattern_set);
4458 c_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4459 for(j=0;j<c_gbs.size();++j){
4460 retval = gb_tbl->add_gb_attr(
4461 c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4465 }else{ // cube gb can't be temporal
4466 gb_tbl->reset_temporal(gb_tbl->size()-1);
4470 inner_pattern.resize(c_gbs.size());
4471 pattern_set.clear();
4472 n_patterns = 1 << c_gbs.size();
4473 for(j=0;j<n_patterns;++j){
4475 for(k=0;k<c_gbs.size();++k,test_bit = test_bit << 1){
4476 if((j & test_bit) != 0)
4477 inner_pattern[k] = true;
4479 inner_pattern[k] = false;
4481 pattern_set.push_back(inner_pattern);
4483 pattern_components.push_back(pattern_set);
4485 gb_tbl->gb_entry_type.push_back("CUBE");
4486 gb_tbl->gb_entry_count.push_back(c_gbs.size());
4487 gb_tbl->pattern_components.push_back(pattern_set);
4489 case gsets_egb_type:
4491 gset_gbnames.clear();
4492 for(j=0;j<gb_list[i]->gb_lists.size();++j){
4493 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4494 for(k=0;k<g_gbs.size();++k){
4495 if(g_gbs[k]->type != GB_COLREF){
4496 fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str());
4499 if(gset_gbnames.count(g_gbs[k]->name) == 0){
4500 retval = gb_tbl->add_gb_attr(
4501 g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns
4505 }else{ // gsets gb can't be temporal
4506 gb_tbl->reset_temporal(gb_tbl->size()-1);
4508 int pos = gset_gbnames.size();
4509 gset_gbnames[g_gbs[k]->name] = pos;
4515 if(gset_gbnames.size() > 63){
4516 fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n");
4520 inner_pattern.resize(gset_gbnames.size());
4521 pattern_set.clear();
4522 set<unsigned long long int> signatures;
4523 for(j=0;j<gb_list[i]->gb_lists.size();++j){
4524 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4525 set<string> refd_gbs;
4526 for(k=0;k<g_gbs.size();++k){
4527 refd_gbs.insert(g_gbs[k]->name);
4529 fill(inner_pattern.begin(),inner_pattern.end(),false);
4530 unsigned long long int signature = 0;
4531 set<string>::iterator ssi;
4532 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4533 inner_pattern[gset_gbnames[(*ssi)]] = true;
4534 signature |= (1 << gset_gbnames[(*ssi)]);
4536 if(signatures.count(signature)){
4537 fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t");
4538 set<string>::iterator ssi;
4539 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4540 fprintf(stderr," %s",(*ssi).c_str());
4542 fprintf(stderr,"\n");
4544 signatures.insert(signature);
4545 pattern_set.push_back(inner_pattern);
4548 pattern_components.push_back(pattern_set);
4550 gb_tbl->gb_entry_type.push_back("GROUPING_SETS");
4551 gb_tbl->gb_entry_count.push_back(gset_gbnames.size());
4552 gb_tbl->pattern_components.push_back(pattern_set);
4559 if(found_error) return(NULL);
4561 fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s). Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str());
4565 // Compute the set of patterns. Take the cross product of all pattern components.
4566 vector<vector<bool> > gb_patterns;
4567 int n_components = pattern_components.size();
4568 vector<int> pattern_pos(n_components,0);
4571 vector<bool> pattern;
4572 for(j=0;j<n_components;j++){
4573 pattern.insert(pattern.end(),pattern_components[j][pattern_pos[j]].begin(),
4574 pattern_components[j][pattern_pos[j]].end());
4576 gb_patterns.push_back(pattern);
4577 for(j=0;j<n_components;j++){
4579 if(pattern_pos[j] >= pattern_components[j].size())
4584 if(j >= n_components)
4587 gb_tbl->gb_patterns = gb_patterns;
4590 // Process the supergroup, if any.
4591 vector<colref_t *> sgb = fta_tree->get_supergb();
4592 for(i=0;i<sgb.size();++i){
4593 int gbr = gb_tbl->find_gb(sgb[i],fta_tree->fm, schema);
4595 fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str());
4598 if(qs->sg_tbl.count(gbr)){
4599 fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str());
4601 qs->sg_tbl.insert(gbr);
4603 if(found_error) return(NULL);
4605 if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){
4606 fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4612 predicate_t *wh = fta_tree->get_where();
4613 predicate_t *hv = fta_tree->get_having();
4614 predicate_t *cw = fta_tree->get_cleaning_when();
4615 predicate_t *cb = fta_tree->get_cleaning_by();
4616 predicate_t *closew = fta_tree->get_closing_when();
4618 if(closew != NULL && gb_tbl->gb_patterns.size()>1){
4619 fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4625 // Verify that all column references are valid, and if so assign
4628 vector<select_element *> sl_list = fta_tree->get_sl_vec();
4629 for(i=0;i<sl_list.size();i++){
4630 retval = verify_colref(sl_list[i]->se, fta_tree->fm, schema, gb_tbl);
4631 if(retval < 0) found_error = true;
4634 retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl);
4635 if(retval < 0) found_error = true;
4637 retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl);
4638 if(retval < 0) found_error = true;
4640 retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl);
4641 if(retval < 0) found_error = true;
4643 retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl);
4644 if(retval < 0) found_error = true;
4646 retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl);
4647 if(retval < 0) found_error = true;
4649 if(found_error) return(NULL);
4651 // Verify that all of the scalar expressions
4652 // and comparison predicates have compatible types.
4655 string temporal_output_fields;
4656 for(i=0;i<sl_list.size();i++){
4657 retval = assign_data_types(sl_list[i]->se, schema, fta_tree, Ext_fcns );
4661 if(sl_list[i]->se->get_data_type()->is_temporal()){
4663 temporal_output_fields += " "+int_to_string(i);
4668 fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s). Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str());
4672 retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns);
4673 if(retval < 0) found_error = true;
4675 retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns);
4676 if(retval < 0) found_error = true;
4678 retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns);
4679 if(retval < 0) found_error = true;
4681 retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns);
4682 if(retval < 0) found_error = true;
4684 retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns);
4685 if(retval < 0) found_error = true;
4687 if(found_error) return(NULL);
4689 // Impute names for the unnamed columns.
4690 set<string> curr_names;
4692 for(s=0;s<sl_list.size();++s){
4693 curr_names.insert(sl_list[s]->name);
4695 for(s=0;s<sl_list.size();++s){
4696 if(sl_list[s]->name == "")
4697 sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se);
4701 // Check the aggregates.
4702 // No aggrs allowed in the WHERE predicate.
4703 // (no aggrs in the GB defs, but that is examined elsewhere)
4704 // Therefore, aggregates are allowed only the select clause.
4706 // The query is an aggregation query if there is a group-by clause, or
4707 // if any aggregate is referenced. If there is a group-by clause,
4708 // at least one aggregate must be referenced.
4709 // If the query is an aggregate query, the scalar expressions in
4710 // the select clause can reference only constants, aggregates, or group-by
4712 // Also, if the query is an aggregate query, build a table referencing
4715 // No nested aggregates allowed.
4718 // First, count references in the WHERE predicate.
4719 // (if there are any references, report an error).
4720 // can ref group vars, tuple fields, and stateful fcns.
4723 retval = count_aggr_pred(wh, true);
4725 fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n");
4730 // NOTE : Here I need an analysis of the having clause
4731 // to verify that it only refs GB attrs and aggregates.
4732 // (also, superaggregates, stateful fcns)
4734 retval = verify_having_pred(hv, "HAVING", Ext_fcns);
4735 if(retval < 0) return(NULL);
4738 // Cleaning by has same reference rules as Having
4740 retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns);
4741 if(retval < 0) return(NULL);
4744 // Cleaning when has same reference rules as Having,
4745 // except that references to non-superaggregates are not allowed.
4746 // This is tested for when "CLEANING_BY" is passed in as the clause.
4748 retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns);
4749 if(retval < 0) return(NULL);
4752 // CLOSING_WHEN : same rules as HAVING
4754 retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns);
4755 if(retval < 0) return(NULL);
4759 // Collect aggregates in the HAVING and CLEANING clauses
4761 build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns);
4764 build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns);
4767 build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns);
4770 build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns);
4773 // Collect aggregate refs in the SELECT clause.
4775 for(i=0;i<sl_list.size();i++)
4776 build_aggr_tbl_fm_se(sl_list[i]->se, aggr_tbl, Ext_fcns);
4779 // Collect references to states of stateful functions
4781 gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns);
4784 gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns);
4787 gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns);
4790 gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns);
4792 if(closew != NULL){ // should be no stateful fcns here ...
4793 gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns);
4795 for(i=0;i<sl_list.size();i++)
4796 gather_fcn_states_se(sl_list[i]->se, qs->states_refd, Ext_fcns);
4799 // If this is an aggregate query, it had normally references
4800 // some aggregates. Its not necessary though, just emit a warning.
4801 // (acts as SELECT DISTINCT)
4803 bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0;
4804 if(is_aggr_query && aggr_tbl->size() == 0){
4805 fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n");
4808 // If this is an aggregate query,
4809 // 1) verify that the SEs in the SELECT clause reference
4810 // only constants, aggregates, and group-by attributes.
4811 // 2) No aggregate scalar expression references an aggregate
4812 // or any stateful function.
4813 // 3) either it references both CLEANING clauses or neither.
4814 // 4) all superaggregates must have the superaggr_allowed property.
4815 // 5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY
4816 // clauses must have the multiple_output property.
4820 if(gb_list.size() == 0){
4821 fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n");
4824 // Ensure that at least one gbvar is temporal
4825 if(! fta_tree->name_exists("no_temporal_aggr")){
4826 bool found_temporal = false;
4827 for(i=0;i<gb_tbl->size();i++){
4828 if(gb_tbl->get_data_type(i)->is_temporal()){
4829 found_temporal = true;
4832 if(! found_temporal){
4833 fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n");
4838 if((!cb && cw) || (cb && !cw)){
4839 fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n");
4843 bool refs_running = false;
4845 for(a=0; a<aggr_tbl->size(); ++a){
4846 refs_running |= aggr_tbl->is_running_aggr(a);
4851 fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n");
4855 fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n");
4860 if(refs_running && !closew){
4861 fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n");
4866 for(i=0;i<sl_list.size();i++){
4867 bool ret_bool = verify_aggr_query_se(sl_list[i]->se);
4868 st_ok = st_ok && ret_bool;
4873 for(i=0;i<aggr_tbl->size();i++){
4874 if(aggr_tbl->is_superaggr(i)){
4875 if(! aggr_tbl->superaggr_allowed(i)){
4876 fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str());
4880 if(aggr_tbl->is_builtin(i)){
4881 if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){
4882 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4886 vector<scalarexp_t *> opl = aggr_tbl->get_operand_list(i);
4888 for(o=0;o<opl.size();++o){
4889 if(count_aggr_se(opl[o], true) > 0){
4890 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4897 // Ensure that non-aggregate query doesn't reference some things
4899 fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n");
4903 fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n");
4906 if(qs->states_refd.size()){
4907 fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n");
4914 // Convert the predicates into CNF. OK to pass NULL ptr.
4915 make_cnf_from_pr(wh, qs->wh_cnf);
4916 make_cnf_from_pr(hv, qs->hav_cnf);
4917 make_cnf_from_pr(cb, qs->cb_cnf);
4918 make_cnf_from_pr(cw, qs->cw_cnf);
4919 make_cnf_from_pr(closew, qs->closew_cnf);
4921 // Analyze the predicates.
4923 for(i=0;i<qs->wh_cnf.size();i++)
4924 analyze_cnf(qs->wh_cnf[i]);
4925 for(i=0;i<qs->hav_cnf.size();i++)
4926 analyze_cnf(qs->hav_cnf[i]);
4927 for(i=0;i<qs->cb_cnf.size();i++)
4928 analyze_cnf(qs->cb_cnf[i]);
4929 for(i=0;i<qs->cw_cnf.size();i++)
4930 analyze_cnf(qs->cw_cnf[i]);
4931 for(i=0;i<qs->closew_cnf.size();i++)
4932 analyze_cnf(qs->closew_cnf[i]);
4935 // At this point, the old analysis program
4936 // gathered all refs to partial functions,
4937 // complex literals, and parameters accessed via a handle.
4938 // I think its better to delay this
4939 // until code generation time, as the query will be
4940 // in general split.
4947 ///////////////////////////////////////////////////////////////////////
4949 // Expand gbvars with their definitions.
4951 scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){
4954 switch(se->get_operator_type()){
4957 case SE_IFACE_PARAM:
4960 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4963 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4964 se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl);
4968 return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) );
4971 // don't descend into aggr defs.
4977 for(o=0;o<se->param_list.size();o++){
4978 se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl);
4982 fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n",
4983 se->get_lineno(), se->get_charno(),se->get_operator_type());
4989 void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){
4990 vector<scalarexp_t *> op_list;
4994 switch(pr->get_operator_type()){
4996 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl);
4999 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ;
5000 pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ;
5003 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
5005 case PRED_BINARY_OP:
5006 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
5007 expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ;
5010 for(o=0;o<pr->param_list.size();++o){
5011 pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ;
5015 fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n",
5016 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5024 // return true if the se / pr contains any gbvar on the list.
5027 bool contains_gb_se(scalarexp_t *se, set<int> &gref_set){
5028 vector<scalarexp_t *> operands;
5032 switch(se->get_operator_type()){
5035 case SE_IFACE_PARAM:
5038 return contains_gb_se(se->get_left_se(),gref_set);
5040 return( contains_gb_se(se->get_left_se(),gref_set) ||
5041 contains_gb_se(se->get_right_se(),gref_set) );
5044 return( gref_set.count(se->get_gb_ref()) > 0);
5047 // don't descend into aggr defs.
5053 operands = se->get_operands();
5054 for(o=0;o<operands.size();o++){
5055 found = found || contains_gb_se(operands[o], gref_set);
5059 fprintf(stderr,"INTERNAL ERROR in contains_gb_se, line %d, character %d: unknown operator type %d\n",
5060 se->get_lineno(), se->get_charno(),se->get_operator_type());
5067 bool contains_gb_pr(predicate_t *pr, set<int> &gref_set){
5068 vector<scalarexp_t *> op_list;
5072 switch(pr->get_operator_type()){
5074 return contains_gb_se(pr->get_left_se(), gref_set);
5076 return (contains_gb_se(pr->get_left_se(),gref_set)
5077 || contains_gb_se(pr->get_right_se(),gref_set) );
5079 return contains_gb_pr(pr->get_left_pr(),gref_set) ;
5080 case PRED_BINARY_OP:
5081 return (contains_gb_pr(pr->get_left_pr(),gref_set)
5082 || contains_gb_pr(pr->get_right_pr(),gref_set) );
5084 op_list = pr->get_op_list();
5085 for(o=0;o<op_list.size();++o){
5086 found = found ||contains_gb_se(op_list[o],gref_set) ;
5090 fprintf(stderr,"INTERNAL ERROR in contains_gb_pr, line %d, character %d, unknown predicate operator type %d\n",
5091 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5098 // Gather the set of columns accessed in this se.
5099 // Descend into aggregate functions.
5101 void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){
5103 vector<scalarexp_t *> operands;
5109 switch(se->get_operator_type()){
5112 case SE_IFACE_PARAM:
5115 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5118 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5119 gather_se_col_ids(se->get_right_se(),cid_set,gtbl);
5123 ci.load_from_colref(se->get_colref() );
5124 if(ci.tblvar_ref < 0){
5125 fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str());
5130 fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n");
5133 gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl);
5139 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5142 operands = se->get_operands();
5143 for(o=0;o<operands.size();o++){
5144 gather_se_col_ids(operands[o], cid_set,gtbl);
5148 fprintf(stderr,"INTERNAL ERROR in gather_se_col_ids, line %d, character %d: unknown operator type %d\n",
5149 se->get_lineno(), se->get_charno(),se->get_operator_type());
5155 // Gather the set of columns accessed in this se.
5157 void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){
5158 vector<scalarexp_t *> op_list;
5161 switch(pr->get_operator_type()){
5163 gather_se_col_ids(pr->get_left_se(), cid_set,gtbl);
5166 gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ;
5167 gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ;
5170 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5172 case PRED_BINARY_OP:
5173 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5174 gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ;
5177 op_list = pr->get_op_list();
5178 for(o=0;o<op_list.size();++o){
5179 gather_se_col_ids(op_list[o],cid_set,gtbl) ;
5183 fprintf(stderr,"INTERNAL ERROR in gather_pr_col_ids, line %d, character %d, unknown predicate operator type %d\n",
5184 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5191 // Gather the set of special operator or comparison functions referenced by this se.
5193 void gather_se_opcmp_fcns(scalarexp_t *se, set<string> &fcn_set){
5195 data_type *ldt, *rdt;
5197 vector<scalarexp_t *> operands;
5199 switch(se->get_operator_type()){
5201 if( se->get_literal()->constructor_name() != "")
5202 fcn_set.insert( se->get_literal()->constructor_name() );
5206 // SE_IFACE_PARAM should not exist when this is called.
5208 ldt = se->get_left_se()->get_data_type();
5209 if(ldt->complex_operator(se->get_op()) ){
5210 fcn_set.insert( ldt->get_complex_operator(se->get_op()) );
5212 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5215 ldt = se->get_left_se()->get_data_type();
5216 rdt = se->get_right_se()->get_data_type();
5218 if(ldt->complex_operator(rdt, se->get_op()) ){
5219 fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) );
5221 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5222 gather_se_opcmp_fcns(se->get_right_se(),fcn_set);
5229 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5232 operands = se->get_operands();
5233 for(o=0;o<operands.size();o++){
5234 gather_se_opcmp_fcns(operands[o], fcn_set);
5238 fprintf(stderr,"INTERNAL ERROR in gather_se_opcmp_fcns, line %d, character %d: unknown operator type %d\n",
5239 se->get_lineno(), se->get_charno(),se->get_operator_type());
5245 // Gather the set of special operator or comparison functions referenced by this se.
5247 void gather_pr_opcmp_fcns(predicate_t *pr, set<string> &fcn_set){
5248 data_type *ldt, *rdt;
5249 vector<scalarexp_t *> operands;
5252 switch(pr->get_operator_type()){
5254 ldt = pr->get_left_se()->get_data_type();
5255 if(ldt->complex_comparison(ldt) ){
5256 fcn_set.insert( ldt->get_equals_fcn(ldt) );
5258 gather_se_opcmp_fcns(pr->get_left_se(), fcn_set);
5261 ldt = pr->get_left_se()->get_data_type();
5262 rdt = pr->get_right_se()->get_data_type();
5263 if(ldt->complex_comparison(rdt) ){
5264 fcn_set.insert( ldt->get_comparison_fcn(ldt) );
5266 gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ;
5267 gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ;
5270 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5272 case PRED_BINARY_OP:
5273 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5274 gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ;
5277 operands = pr->get_op_list();
5278 for(o=0;o<operands.size();o++){
5279 gather_se_opcmp_fcns(operands[o], fcn_set);
5283 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
5284 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5291 // find the temporal variable divisor if any.
5292 // Only forms allowed : temporal_colref, temporal_colref/const
5293 // temporal_colref/const + const
5296 long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){
5297 long long int retval = 0;
5298 data_type *ldt, *rdt;
5300 vector<scalarexp_t *> operands;
5301 scalarexp_t *t_se, *c_se;
5304 switch(se->get_operator_type()){
5309 // SE_IFACE_PARAM should not exist when this is called.
5313 ldt = se->get_left_se()->get_data_type();
5314 if(ldt->is_temporal()){
5315 t_se = se->get_left_se();
5316 c_se = se->get_right_se();
5318 t_se = se->get_left_se();
5319 c_se = se->get_right_se();
5321 if((! t_se->get_data_type()->is_temporal()) || c_se->get_data_type()->is_temporal())
5324 the_op = se->get_op();
5325 if(the_op == "+" || the_op == "-")
5326 return find_temporal_divisor(t_se, gbt,fnm);
5328 if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){
5329 fnm = t_se->get_colref()->get_field();
5330 string lits = c_se->get_literal()->to_string();
5331 sscanf(lits.c_str(),"%qd",&retval);
5339 return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm);
5341 if(se->get_data_type()->is_temporal()){
5342 fnm = se->get_colref()->get_field();
5353 fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n",
5354 se->get_lineno(), se->get_charno(),se->get_operator_type());
5361 // Create meaningful but unique names for the columns.
5362 string impute_colname(vector<select_element *> &sel_list, scalarexp_t *se){
5363 set<string> curr_names;
5365 for(s=0;s<sel_list.size();++s){
5366 curr_names.insert(sel_list[s]->name);
5368 return impute_colname(curr_names, se);
5371 string impute_colname(set<string> &curr_names, scalarexp_t *se){
5374 vector<scalarexp_t *> operand_list;
5377 switch(se->get_operator_type()){
5382 ret = "Param_" + se->get_param_name();
5384 case SE_IFACE_PARAM:
5385 ret = "Iparam_" + se->get_ifpref()->get_pname();
5388 ret = se->get_colref()->get_field() ;
5399 seo = se->get_left_se();
5400 switch(se->get_left_se()->get_operator_type()){
5402 ret += "_PARAM_"+seo->get_param_name();
5404 case SE_IFACE_PARAM:
5405 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5408 opstr = seo->get_colref()->get_field();
5409 if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5417 opstr = seo->get_op();
5418 if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5419 ret += "_" + seo->get_op();
5425 opstr = seo->get_op();
5426 ret += "_" + seo->get_op();
5439 operand_list = se->get_operands();
5440 if(operand_list.size() > 0){
5441 seo = operand_list[0];
5442 switch(seo->get_operator_type()){
5444 ret += "_PARAM_"+seo->get_param_name();
5446 case SE_IFACE_PARAM:
5447 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5450 ret += "_" + seo->get_colref()->get_field();
5455 ret += "_" + seo->get_op();
5472 if(curr_names.count("Field0") == 0)
5477 while(curr_names.count(ret) > 0){
5479 sprintf(tmpstr,"%s%d",base.c_str(),iter);
5485 curr_names.insert(ret);
5492 //////////////////////////////////////////////////////////////////////
5493 ////////////// Methods of defined classes ///////////////////////
5494 //////////////////////////////////////////////////////////////////////
5496 // helper fcn to enable col_id as map key.
5498 bool operator<(const col_id &cr1, const col_id &cr2){
5499 if(cr1.tblvar_ref < cr2.tblvar_ref) return(true);
5500 if(cr1.tblvar_ref == cr2.tblvar_ref)
5501 return (cr1.field < cr2.field);
5506 // Process the GB variables.
5507 // At parse time, GB vars are either GB_COLREF,
5508 // or GB_COMPUTED if the AS keyword is used.
5509 // Cast GB vars as named entities with a SE as
5510 // their definition (the colref in the case of GB_COLREF).
5512 // TODO: if there is a gbref in a gbdef,
5513 // then I won't be able to compute the value without
5514 // a complex dependence analysis. So verify that there is no
5515 // gbref in any of the GBdefs.
5516 // BUT: a GBVAR_COLREF should be converted to a regular colref,
5517 // which is not yet done.
5519 // TODO : sort out issue of GBVAR naming and identification.
5520 // Determine where it is advantageous to convert GV_COLREF
5521 // GBVARS to colrefs -- e.g. in group definition, in the WHERE clause,
5524 // return -1 if there is a problem.
5526 int gb_table::add_gb_attr(
5528 tablevar_list_t *fm,
5530 table_exp_t *fta_tree,
5531 ext_fcn_list *Ext_fcns
5535 gb_table_entry *entry;
5537 if(gb->type == GB_COLREF){
5540 gb->interface.c_str(),gb->table.c_str(), gb->name.c_str()
5543 cr = new colref_t(gb->name.c_str());
5545 int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema);
5546 if(tablevar_ref < 0) return(tablevar_ref);
5548 cr->set_tablevar_ref(tablevar_ref);
5549 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
5550 cr->set_interface("");
5551 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
5553 entry = new gb_table_entry();
5554 entry->name.field = cr->get_field();
5555 entry->name.tblvar_ref = tablevar_ref;
5556 entry->definition = new scalarexp_t(cr);
5557 entry->ref_type = GBVAR_COLREF;
5559 entry = new gb_table_entry();
5560 entry->name.field = gb->name;
5561 entry->name.tblvar_ref = -1;
5562 entry->definition = gb->def;
5563 entry->ref_type = GBVAR_SE;
5566 retval = verify_colref(entry->definition, fm, schema, NULL);
5567 if(retval < 0) return(retval);
5569 retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns);
5570 if(retval < 0) return(retval);
5572 // Verify that the gbvar def references no aggregates and no gbvars.
5573 if(count_gb_se(entry->definition) > 0){
5574 fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() );
5577 if(count_aggr_se(entry->definition, true) > 0){
5578 fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() );
5582 // Check for duplicates
5584 for(i=0;i<gtbl.size();++i){
5585 if(entry->name.field == gtbl[i]->name.field){
5586 fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size());
5592 gtbl.push_back(entry);
5598 // Try to determine if the colref is actually
5600 // a) if no tablename associated with the colref,
5601 // 1) try to find a matching GB_COMPUTED gbvar.
5602 // 2) failing that, try to match to a single tablevar
5603 // 3) if successful, search among GB_COLREF
5604 // b) else, try to match the tablename to a single tablevar
5605 // if successful, search among GB_COLREF
5606 int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){
5607 string c_field = cr->get_field();
5611 vector<int> candidates;
5613 if(cr->uses_default_table()){
5614 for(i=0;i<gtbl.size();i++){
5615 if(gtbl[i]->ref_type==GBVAR_SE && c_field == gtbl[i]->name.field){
5619 candidates = find_source_tables(c_field, fm, schema);
5620 if(candidates.size() != 1) return(-1); // can't find unique tablevar
5621 for(i=0;i<gtbl.size();i++){
5622 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5623 c_field == gtbl[i]->name.field &&
5624 candidates[0] == gtbl[i]->name.tblvar_ref){
5628 return(-1); // colref is not in gb table.
5631 // A table name must have been given.
5632 vector<tablevar_t *> fm_tbls = fm->get_table_list();
5633 string interface = cr->get_interface();
5634 string table_name = cr->get_table_name();
5637 // if no interface name is given, try to search for the table
5638 // name among the tablevar names first.
5640 for(i=0;i<fm_tbls.size();++i){
5641 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5642 candidates.push_back(i);
5644 if(candidates.size()>1) return(-1);
5645 if(candidates.size()==1){
5646 for(i=0;i<gtbl.size();i++){
5647 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5648 c_field == gtbl[i]->name.field &&
5649 candidates[0] == gtbl[i]->name.tblvar_ref){
5653 return(-1); // match semantics of bind to tablevar name first
5657 // Interface name given, or no interface but no
5658 // no tablevar match. Try to match on schema name.
5659 for(i=0;i<fm_tbls.size();++i){
5660 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5661 candidates.push_back(i);
5663 if(candidates.size() != 1) return(-1);
5664 for(i=0;i<gtbl.size();i++){
5665 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5666 c_field == gtbl[i]->name.field &&
5667 candidates[0] == gtbl[i]->name.tblvar_ref){
5679 bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){
5681 if( (op == "COUNT") || (op == "SUM") || (op == "MIN") ||
5682 (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") ||
5683 (op == "XOR_AGGR") )
5686 return Ext_fcns->fta_legal(fcn_id);
5692 // Return the set of subaggregates required to compute
5693 // the desired aggregate. THe operand of the subaggregates
5694 // can only be * or the scalarexp used in the superaggr.
5695 // This is indicated by the use_se vector.
5697 // Is this code generation specific?
5699 vector<string> aggr_table_entry::get_subaggr_fcns(vector<bool> &use_se){
5703 ret.push_back("COUNT");
5704 use_se.push_back(false);
5707 ret.push_back("SUM");
5708 use_se.push_back(true);
5711 ret.push_back("SUM");
5712 ret.push_back("COUNT");
5713 use_se.push_back(true);
5714 use_se.push_back(false);
5717 ret.push_back("MIN");
5718 use_se.push_back(true);
5721 ret.push_back("MAX");
5722 use_se.push_back(true);
5724 if(op == "AND_AGGR"){
5725 ret.push_back("AND_AGGR");
5726 use_se.push_back(true);
5728 if(op == "OR_AGGR"){
5729 ret.push_back("OR_AGGR");
5730 use_se.push_back(true);
5732 if(op == "XOR_AGGR"){
5733 ret.push_back("XOR_AGGR");
5734 use_se.push_back(true);
5740 // Code generation specific?
5742 vector<data_type *> aggr_table_entry::get_subaggr_dt(){
5743 vector<data_type *> ret;
5747 dt = new data_type("Int"); // was Uint
5748 ret.push_back( dt );
5751 dt = new data_type();
5752 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5756 dt = new data_type();
5757 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5758 ret.push_back( dt );
5759 dt = new data_type("Int");
5760 ret.push_back( dt );
5763 dt = new data_type();
5764 dt->set_aggr_data_type( "MIN",operand->get_data_type() );
5765 ret.push_back( dt );
5768 dt = new data_type();
5769 dt->set_aggr_data_type( "MAX",operand->get_data_type() );
5770 ret.push_back( dt );
5772 if(op == "AND_AGGR"){
5773 dt = new data_type();
5774 dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() );
5775 ret.push_back( dt );
5777 if(op == "OR_AGGR"){
5778 dt = new data_type();
5779 dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() );
5780 ret.push_back( dt );
5782 if(op == "XOR_AGGR"){
5783 dt = new data_type();
5784 dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() );
5785 ret.push_back( dt );
5791 // Code generation specific?
5793 scalarexp_t *aggr_table_entry::make_superaggr_se(vector<scalarexp_t *> se_refs){
5794 scalarexp_t *se_l, *se_r, *ret_se = NULL;
5797 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5801 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5805 se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5806 se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]);
5808 ret_se = new scalarexp_t("/", se_l, se_r);
5812 ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]);
5816 ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]);
5819 if(op == "AND_AGGR"){
5820 ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]);
5823 if(op == "OR_AGGR"){
5824 ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]);
5827 if(op == "XOR_AGGR"){
5828 ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]);
5837 // Add a built-in aggr.
5838 int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){
5841 for(i=0;i<agr_tbl.size();i++){
5842 if(agr_tbl[i]->is_builtin() && op == agr_tbl[i]->op
5843 && is_equivalent_se(se,agr_tbl[i]->operand) ){
5844 // && is_super == agr_tbl[i]->is_superaggr())
5845 if(is_super) agr_tbl[i]->set_super(true);
5850 aggr_table_entry *ate = new aggr_table_entry(op, se, is_super);
5851 agr_tbl.push_back(ate);
5852 return(agr_tbl.size() - 1);
5856 int aggregate_table::add_aggr(string op, int fcn_id, vector<scalarexp_t *> opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){
5859 for(i=0;i<agr_tbl.size();i++){
5860 if((! agr_tbl[i]->is_builtin()) && fcn_id == agr_tbl[i]->fcn_id
5861 && opl.size() == agr_tbl[i]->oplist.size() ){
5862 // && is_super == agr_tbl[i]->is_superaggr() ){
5863 for(o=0;o<opl.size();++o){
5864 if(! is_equivalent_se(opl[o],agr_tbl[i]->oplist[o]) )
5867 if(o == opl.size()){
5868 if(is_super) agr_tbl[i]->set_super(true);
5874 aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout);
5875 agr_tbl.push_back(ate);
5876 return(agr_tbl.size() - 1);
5880 int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){
5883 for(i=0;i<cplx_lit_tbl.size();i++){
5884 if(l->is_equivalent(cplx_lit_tbl[i])){
5885 hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref;
5890 cplx_lit_tbl.push_back(l);
5891 hdl_ref_tbl.push_back(is_handle_ref);
5892 return(cplx_lit_tbl.size() - 1);
5897 //------------------------------------------------------------
5901 gb_t *gb_t::duplicate(){
5902 gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str());
5904 ret->lineno = lineno;
5905 ret->charno = charno;
5907 ret->def = dup_se(def,NULL);