1 /* ------------------------------------------------
2 Copyright 2014 AT&T Intellectual Property
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
7 http://www.apache.org/licenses/LICENSE-2.0
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ------------------------------------------- */
18 #include "parse_fta.h"
19 #include "parse_schema.h"
20 #include "parse_ext_fcns.h"
23 #include"analyze_fta.h"
25 #include"type_objects.h"
32 extern string hostname; // name of the current host
36 string int_to_string(int i){
39 sprintf(tmpstr,"%d",i);
47 // These represent derived information from the
48 // query analysis stage. I extract them from a class,
49 // perhaps this is dangerous.
51 static gb_table *gb_tbl=NULL; // Table of all group-by attributes.
52 static aggregate_table *aggr_tbl=NULL; // Table of all referenced aggregates.
54 // static cplx_lit_table *complex_literals=NULL; // Table of literals with constructors.
55 static param_table *param_tbl=NULL; // Table of all referenced parameters.
57 vector<scalarexp_t *> partial_fcns_list;
58 int wh_partial_start, wh_partial_end;
59 int gb_partial_start, gb_partial_end;
60 int aggr_partial_start, aggr_partial_end;
61 int sl_partial_start, sl_partial_end;
64 // Infer the table of a column refrence and return the table ref.
66 // field name and table name. If no table name is used,
67 // search all tables to try to find a unique match.
68 // Of course, plenty of error checking.
70 // Return the set of tablevar indices in the FROM clause
71 // which contain a field with the same name.
72 vector<int> find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){
75 // vector<string> tn = fm->get_schema_names();
76 vector<int> tn = fm->get_schema_refs();
77 // printf("Calling find_source_tables on field %s\n",field.c_str());
78 for(i=0;i<tn.size();i++){
79 // if(Schema->contains_field(Schema->find_tbl(tn[i]), field) ){
80 if(Schema->contains_field(tn[i], field) ){
82 // printf("\tfound in table %s\n",tn[i].c_str());
88 int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){
90 string tname = ir->get_tablevar();
92 if(fm->size()==1) return 0;
93 fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str());
96 for(i=0;i<fm->size();++i){
97 if(tname == fm->get_tablevar_name(i))
100 fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str());
105 // compute the index of the tablevar in the from clause that the
107 // return -1 if no tablevar can be imputed.
108 int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){
113 vector<tablevar_t *> fm_tbls = fm->get_table_list();
115 string field = cr->get_field();
117 // printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str());
118 if(cr->uses_default_table() ){
119 tv = find_source_tables(field, fm, schema);
121 fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ",
122 cr->get_lineno(), cr->get_charno(),field.c_str() );
123 for(i=0;i<tv.size();i++){
124 fprintf(stderr,"%s ",fm_tbls[ tv[i] ]->to_string().c_str() );
126 fprintf(stderr,"\n\tYou must specify one of these.\n");
130 fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n",
131 cr->get_lineno(), cr->get_charno(),field.c_str() );
138 // The table source is named -- but is it a schema name
141 string interface = cr->get_interface();
142 table_name = cr->get_table_name();
144 // if interface is not specified, prefer to look at the tablevar names
145 // Check for duplicates.
147 for(i=0;i<fm_tbls.size();++i){
148 if(table_name == fm_tbls[i]->get_var_name())
152 fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() );
155 if(tv.size() == 1) return(tv[0]);
158 // Tableref not found by looking at tableref vars, or an interface
159 // was specified. Try to match on schema and interface.
160 // Check for duplicates.
161 for(i=0;i<fm_tbls.size();++i){
162 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
166 fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n",
167 cr->get_lineno(), cr->get_charno() );
168 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
169 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
174 fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno());
175 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
176 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
184 // Reset temporal properties of a scalar expression
185 void reset_temporal(scalarexp_t *se){
187 vector<scalarexp_t *> operands;
190 se->get_data_type()->reset_temporal();
192 switch(se->get_operator_type()){
199 reset_temporal(se->get_left_se());
202 reset_temporal(se->get_left_se());
203 reset_temporal(se->get_right_se());
208 reset_temporal(se->get_left_se());
211 operands = se->get_operands();
212 for(o=0;o<operands.size();o++){
213 reset_temporal(operands[o]);
217 fprintf(stderr,"INTERNAL ERROR in reset_temporal, line %d, character %d: unknown operator type %d\n",
218 se->get_lineno(), se->get_charno(),se->get_operator_type());
223 // Verify that column references exist in their
224 // declared tables. As a side effect, assign
225 // their data types. Other side effects :
227 // return -1 on error
229 int verify_colref(scalarexp_t *se, tablevar_list_t *fm,
230 table_list *schema, gb_table *gtbl){
235 string field, table_source, type_name;
241 vector<scalarexp_t *> operands;
243 switch(se->get_operator_type()){
248 ir = se->get_ifpref();
249 table_var = infer_tablevar_from_ifpref(ir, fm);
250 if(table_var < 0) return(table_var);
251 ir->set_tablevar_ref(table_var);
254 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
256 l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl);
257 r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl);
258 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
261 cr = se->get_colref();
262 field = cr->get_field();
264 // Determine if this is really a GB ref.
265 // (the parser can only see that its a colref).
267 gb_ref = gtbl->find_gb(cr, fm, schema);
272 se->set_gb_ref(gb_ref);
275 // Its a colref, verify its existance and
276 // record the data type.
277 table_var = infer_tablevar_from_colref(cr,fm,schema);
278 if(table_var < 0) return(table_var);
280 // Store the table ref in the colref.
281 cr->set_tablevar_ref(table_var);
282 cr->set_schema_ref(fm->get_schema_ref(table_var));
283 cr->set_interface("");
284 cr->set_table_name(fm->get_tablevar_name(table_var));
286 if(! schema->contains_field(cr->get_schema_ref(), field)){
287 fprintf(stderr, "Error, field %s is not in stream %s\n", field.c_str(), schema->get_table_name( cr->get_schema_ref() ).c_str());
291 type_name = schema->get_type_name(cr->get_schema_ref(), field);
292 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
293 dt = new data_type(type_name, modifiers);
294 se->set_data_type(dt);
296 // Else, its a gbref, use the GB var's data type.
297 se->set_data_type(gtbl->get_data_type(gb_ref));
304 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
306 operands = se->get_operands();
308 for(o=0;o<operands.size();o++){
309 l_ret = verify_colref(operands[o], fm, schema, gtbl);
310 if(l_ret < 0) r_ret = -1;
314 fprintf(stderr,"INTERNAL ERROR in verify_colref, line %d, character %d: unknown operator type %d\n",
315 se->get_lineno(), se->get_charno(),se->get_operator_type());
322 int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){
324 std::vector<scalarexp_t *> op_list;
327 switch(pr->get_operator_type()){
329 return(verify_colref(pr->get_left_se(),fm,schema, gtbl) );
331 l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ;
332 r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ;
333 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
336 return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl));
338 l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ;
339 r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ;
340 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
343 op_list = pr->get_op_list();
345 for(o=0;o<op_list.size();++o){
346 if(verify_colref(op_list[o],fm,schema,gtbl) < 0) l_ret = -1;
350 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
351 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
358 bool literal_only_se(scalarexp_t *se){ // really only literals.
360 vector<scalarexp_t *> operands;
362 if(se == NULL) return(1);
363 switch(se->get_operator_type()){
371 return( literal_only_se(se->get_left_se()) );
373 return( literal_only_se(se->get_left_se()) &&
374 literal_only_se(se->get_right_se()) );
393 // Verify that column references exist in their
394 // declared tables. As a side effect, assign
395 // their data types. Other side effects :
398 int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){
402 string field, table_source, type_name;
408 vector<scalarexp_t *> operands;
410 if(se == NULL) return(1);
412 switch(se->get_operator_type()){
420 return( bind_to_schema_se(se->get_left_se(), fm, schema) );
422 l_ret = bind_to_schema_se(se->get_left_se(), fm, schema);
423 r_ret = bind_to_schema_se(se->get_right_se(), fm, schema);
424 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
427 if(se->is_gb()) return(1); // gb ref not a colref.
429 cr = se->get_colref();
430 field = cr->get_field();
432 tablevar_ref = infer_tablevar_from_colref(cr,fm,schema);
433 if(tablevar_ref < 0){
434 return(tablevar_ref);
436 // Store the table ref in the colref.
437 cr->set_tablevar_ref(tablevar_ref);
438 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
439 cr->set_interface("");
440 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
442 // Check the data type
443 type_name = schema->get_type_name(cr->get_schema_ref(), field);
444 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
445 data_type dt(type_name, modifiers);
446 // if(! dt.equals(se->get_data_type()) ){
447 // if(! dt.subsumes_type(se->get_data_type()) ){
448 if(! se->get_data_type()->subsumes_type(&dt) ){
449 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n",
450 dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str());
457 case SE_AGGR_SE: // Probably I should just return,
458 // aggregate se's are explicitly bound to the schema.
459 // return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) );
462 if(se->get_aggr_ref() >= 0) return 1;
464 operands = se->get_operands();
466 for(o=0;o<operands.size();o++){
467 l_ret = bind_to_schema_se(operands[o], fm, schema);
468 if(l_ret < 0) r_ret = -1;
472 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se, line %d, character %d: unknown operator type %d\n",
473 se->get_lineno(), se->get_charno(),se->get_operator_type());
480 int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){
482 vector<scalarexp_t *> op_list;
485 switch(pr->get_operator_type()){
487 return(bind_to_schema_se(pr->get_left_se(),fm,schema) );
489 l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ;
490 r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ;
491 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
494 return(bind_to_schema_pr(pr->get_left_pr(),fm,schema));
496 l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ;
497 r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ;
498 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
501 op_list = pr->get_op_list();
503 for(o=0;o<op_list.size();++o){
504 if(bind_to_schema_se(op_list[o],fm,schema) < 0) l_ret = -1;
508 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_pr, line %d, character %d, unknown predicate operator type %d\n",
509 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
520 // verify_colref assigned data types to the column refs.
521 // Now assign data types to all other nodes in the
522 // scalar expression.
524 // return -1 on error
526 temporal_type compute_se_temporal(scalarexp_t *se, map<col_id, temporal_type> &tcol){
530 vector<scalarexp_t *> operands;
531 vector<data_type *> odt;
533 vector<bool> handle_ind;
535 switch(se->get_operator_type()){
541 return(varying_t); // actually, this should not be called.
543 return data_type::compute_temporal_type(
544 compute_se_temporal(se->get_left_se(), tcol), se->get_op()
547 return data_type::compute_temporal_type(
548 compute_se_temporal(se->get_left_se(), tcol),
549 compute_se_temporal(se->get_right_se(), tcol),
550 se->get_left_se()->get_data_type()->get_type(),
551 se->get_right_se()->get_data_type()->get_type(),
556 col_id cid(se->get_colref() );
557 if(tcol.count(cid) > 0){ return tcol[cid];
558 }else{ return varying_t;}
571 // verify_colref assigned data types to the column refs.
572 // Now assign data types to all other nodes in the
573 // scalar expression.
575 // return -1 on error
577 int assign_data_types(scalarexp_t *se, table_list *schema,
578 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
582 vector<scalarexp_t *> operands;
583 vector<data_type *> odt;
585 vector<bool> handle_ind;
586 vector<bool> constant_ind;
588 switch(se->get_operator_type()){
590 dt = new data_type( se->get_literal()->get_type() );
591 se->set_data_type(dt);
592 if( ! dt->is_defined() ){
593 fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n",
594 se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() );
601 string pname = se->get_param_name();
602 dt = param_tbl->get_data_type(pname);
603 // A SE_PARRAM can change its value mid-query so using one
604 // to set a window is dangerous. TODO check for this and issue a warning.
605 dt->set_temporal(constant_t);
606 se->set_data_type(dt);
607 if( ! dt->is_defined() ){
608 fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n",
609 pname.c_str(), se->get_lineno(),se->get_charno() );
615 dt = new data_type( "STRING" );
616 se->set_data_type(dt);
619 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
620 if(l_ret < 0) return -1;
622 dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() );
623 se->set_data_type(dt);
624 if( ! dt->is_defined() ){
625 fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n",
626 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
627 se->get_lineno(), se->get_charno() );
633 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
634 r_ret = assign_data_types(se->get_right_se(), schema, fta_tree, Ext_fcns);
635 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
637 dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() );
638 se->set_data_type(dt);
639 if( ! dt->is_defined() ){
640 fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n",
641 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
642 se->get_right_se()->get_data_type()->to_string().c_str(),
643 se->get_lineno(), se->get_charno() );
649 dt = se->get_data_type();
650 bret = dt->is_defined();
654 fprintf(stderr,"ERROR, column reference type is undefined, line =%d, char = %d, colref=%s\n",
655 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
659 dt = new data_type("Int"); // changed Uint to Int
660 se->set_data_type(dt);
663 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
664 if(l_ret < 0) return -1;
666 dt = new data_type();
667 dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type());
668 se->set_data_type(dt);
670 if( ! dt->is_defined() ){
671 fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n",
672 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
673 se->get_lineno(), se->get_charno() );
680 operands = se->get_operands();
682 for(o=0;o<operands.size();o++){
683 l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns);
684 odt.push_back(operands[o]->get_data_type());
685 if(l_ret < 0) r_ret = -1;
687 if(r_ret < 0) return(r_ret);
689 // Is it an aggregate extraction function?
690 fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt);
692 int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id);
693 int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id);
694 int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id);
695 // Construct a se for the subaggregate.
696 vector<scalarexp_t *> op_a;
697 int n_aggr_oprs = operands.size()-n_fcn_params+1;
698 for(o=0;o<n_aggr_oprs;++o){
699 op_a.push_back(operands[o]);
701 // check handle params
702 vector<bool> handle_a = Ext_fcns->get_handle_indicators(subaggr_id);
703 for(o=0;o<op_a.size();o++){
705 if(op_a[o]->get_operator_type() != SE_LITERAL &&
706 op_a[o]->get_operator_type() != SE_IFACE_PARAM &&
707 op_a[o]->get_operator_type() != SE_PARAM){
708 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
709 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
714 vector<bool> is_const_a=Ext_fcns->get_const_indicators(subaggr_id);
715 for(o=0;o<op_a.size();o++){
717 if(op_a[o]->get_data_type()->get_temporal() != constant_t){
718 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n Line=%d, char=%d.\n",
719 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
725 scalarexp_t *se_a = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a);
726 se_a->set_fcn_id(subaggr_id);
727 se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id));
728 se_a->set_aggr_id(0); // label this as a UDAF.
731 // Change this se to be the actual function
732 vector<scalarexp_t *> op_f;
733 op_f.push_back(se_a);
734 for(o=n_aggr_oprs;o<operands.size();++o)
735 op_f.push_back(operands[o]);
736 // check handle params
737 vector<bool> handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id);
738 for(o=0;o<op_f.size();o++){
740 if(op_f[o]->get_operator_type() != SE_LITERAL &&
741 op_f[o]->get_operator_type() != SE_IFACE_PARAM &&
742 op_f[o]->get_operator_type() != SE_PARAM){
743 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
744 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
749 vector<bool> is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id);
750 for(o=0;o<op_f.size();o++){
752 if(op_f[o]->get_data_type()->get_temporal() != constant_t){
753 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n Line=%d, char=%d.\n",
754 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
760 se->param_list = op_f;
761 se->op = Ext_fcns->get_fcn_name(actual_fcn_id);
762 se->set_fcn_id(actual_fcn_id);
763 se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id));
767 fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str());
771 fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt);
773 se->set_fcn_id(fcn_id);
774 se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
775 se->set_aggr_id(0); // label this as a UDAF.
776 // Finally, verify that all HANDLE parameters are literals or params.
777 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
778 for(o=0;o<operands.size();o++){
780 if(operands[o]->get_operator_type() != SE_LITERAL &&
781 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
782 operands[o]->get_operator_type() != SE_PARAM){
783 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
784 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
789 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
790 for(o=0;o<operands.size();o++){
792 if(operands[o]->get_data_type()->get_temporal() != constant_t){
793 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n",
794 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
800 // UDAFS as superaggregates not yet supported.
801 if(se->is_superaggr()){
802 fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
803 se->set_superaggr(false);
808 fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str());
811 // Is it a stateful fcn?
812 fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt);
814 se->set_fcn_id(fcn_id);
815 se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
816 se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun
817 // Finally, verify that all HANDLE parameters are literals or params.
818 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
819 for(o=0;o<operands.size();o++){
821 if(operands[o]->get_operator_type() != SE_LITERAL &&
822 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
823 operands[o]->get_operator_type() != SE_PARAM){
824 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
825 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
830 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
831 for(o=0;o<operands.size();o++){
833 if(operands[o]->get_data_type()->get_temporal() != constant_t){
834 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n",
835 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
841 if(se->is_superaggr()){
842 fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
847 fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str());
851 // Is it a regular function?
852 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt);
854 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
855 for(o=0;o<operands.size();o++){
856 if(o>0) fprintf(stderr,", ");
857 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
859 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
860 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
865 se->set_fcn_id(fcn_id);
866 dt = Ext_fcns->get_fcn_dt(fcn_id);
868 if(! dt->is_defined() ){
869 fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str());
870 for(o=0;o<operands.size();o++){
871 if(o>0) fprintf(stderr,", ");
872 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
874 fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() );
878 // Finally, verify that all HANDLE parameters are literals or params.
879 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
880 for(o=0;o<operands.size();o++){
882 if(operands[o]->get_operator_type() != SE_LITERAL &&
883 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
884 operands[o]->get_operator_type() != SE_PARAM){
885 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
886 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
891 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
892 for(o=0;o<operands.size();o++){
894 if(operands[o]->get_data_type()->get_temporal() != constant_t){
895 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be constant.\n Line=%d, char=%d.\n",
896 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
903 if(se->is_superaggr()){
904 fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
907 se->set_data_type(dt);
910 fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n",
911 se->get_lineno(), se->get_charno(),se->get_operator_type());
918 int assign_predicate_data_types(predicate_t *pr, table_list *schema,
919 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
923 vector<data_type *> odt;
924 vector<literal_t *> litl;
925 vector<scalarexp_t *> operands;
926 vector<bool> handle_ind;
927 vector<bool> constant_ind;
930 switch(pr->get_operator_type()){
932 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set);
933 litl = pr->get_lit_vec();
934 dt = pr->get_left_se()->get_data_type();
936 for(i=0;i<litl.size();i++){
937 dtl = new data_type( litl[i]->get_type() );
938 if( ! dt->is_comparable(dtl,pr->get_op()) ){
939 fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n",
940 litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() );
948 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
949 r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
950 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
952 if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){
953 fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n",
954 pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(),
955 pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() );
961 return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set));
963 l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
964 r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
965 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
968 operands = pr->get_op_list();
970 for(o=0;o<operands.size();o++){
971 l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns); // , ext_fcn_set);
972 odt.push_back(operands[o]->get_data_type());
973 if(l_ret < 0) r_ret = -1;
975 if(r_ret < 0) return(r_ret);
977 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt);
979 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
980 for(o=0;o<operands.size();o++){
981 if(o>0) fprintf(stderr,", ");
982 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
984 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
985 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
989 // ext_fcn_set.insert(fcn_id);
990 pr->set_fcn_id(fcn_id);
992 // Finally, verify that all HANDLE parameters are literals or params.
993 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
994 for(o=0;o<operands.size();o++){
996 if(operands[o]->get_operator_type() != SE_LITERAL &&
997 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
998 operands[o]->get_operator_type() != SE_PARAM){
999 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
1000 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1005 constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id());
1006 for(o=0;o<operands.size();o++){
1007 if(constant_ind[o]){
1008 if(operands[o]->get_data_type()->get_temporal() != constant_t){
1009 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be constant.\n Line=%d, char=%d.\n",
1010 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
1017 // Check if this predicate function is special sampling function
1018 pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id());
1023 fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n",
1024 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1032 /////////////////////////////////////////////////////////////////////
1033 //////////////// Make a deep copy of a se / pred tree
1034 /////////////////////////////////////////////////////////////////////
1037 // duplicate a select element
1038 select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){
1039 return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str());
1042 // duplicate a scalar expression.
1043 scalarexp_t *dup_se(scalarexp_t *se,
1044 aggregate_table *aggr_tbl
1047 vector<scalarexp_t *> operand_list;
1048 vector<data_type *> dt_signature;
1049 scalarexp_t *ret_se, *l_se, *r_se;
1051 switch(se->get_operator_type()){
1053 ret_se = new scalarexp_t(se->get_literal());
1054 ret_se->use_decorations_of(se);
1058 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
1059 ret_se->use_decorations_of(se);
1062 case SE_IFACE_PARAM:
1063 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
1064 ret_se->use_decorations_of(se);
1068 ret_se = new scalarexp_t(se->get_colref()->duplicate());
1069 ret_se->rhs.scalarp = se->rhs.scalarp; // carry along notation
1070 ret_se->use_decorations_of(se);
1074 l_se = dup_se(se->get_left_se(), aggr_tbl);
1075 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
1076 ret_se->use_decorations_of(se);
1080 l_se = dup_se(se->get_left_se(), aggr_tbl);
1081 r_se = dup_se(se->get_right_se(), aggr_tbl);
1083 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
1084 ret_se->use_decorations_of(se);
1089 ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str());
1090 ret_se->use_decorations_of(se);
1094 l_se = dup_se(se->get_left_se(), aggr_tbl);
1095 ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se);
1096 ret_se->use_decorations_of(se);
1101 operand_list = se->get_operands();
1102 vector<scalarexp_t *> new_operands;
1103 for(p=0;p<operand_list.size();p++){
1104 l_se = dup_se(operand_list[p], aggr_tbl);
1105 new_operands.push_back(l_se);
1108 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
1109 ret_se->use_decorations_of(se);
1114 printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type());
1124 predicate_t *dup_pr(predicate_t *pr,
1125 aggregate_table *aggr_tbl
1128 vector<literal_t *> llist;
1129 scalarexp_t *se_l, *se_r;
1130 predicate_t *pr_l, *pr_r, *ret_pr;
1131 vector<scalarexp_t *> op_list, new_op_list;
1135 switch(pr->get_operator_type()){
1137 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1138 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
1142 se_l = dup_se(pr->get_left_se(), aggr_tbl);
1143 se_r = dup_se(pr->get_right_se(), aggr_tbl);
1144 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
1148 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1149 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
1152 case PRED_BINARY_OP:
1153 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
1154 pr_r = dup_pr(pr->get_right_pr(), aggr_tbl);
1155 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
1158 op_list = pr->get_op_list();
1159 for(o=0;o<op_list.size();++o){
1160 se_l = dup_se(op_list[o], aggr_tbl);
1161 new_op_list.push_back(se_l);
1163 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
1164 ret_pr->set_fcn_id(pr->get_fcn_id());
1165 ret_pr->is_sampling_fcn = pr->is_sampling_fcn;
1169 fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n",
1170 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1178 table_exp_t *dup_table_exp(table_exp_t *te){
1180 table_exp_t *ret = new table_exp_t();
1182 ret->query_type = te->query_type;
1184 ss_map::iterator ss_i;
1185 for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){
1186 ret->nmap[(*ss_i).first] = (*ss_i).second;
1189 for(i=0;i<te->query_params.size();++i){
1190 ret->query_params.push_back(new
1191 var_pair_t(te->query_params[i]->name,te->query_params[i]->val) );
1195 ret->sl = new select_list_t();
1196 ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno;
1197 vector<select_element *> select_list = te->sl->get_select_list();
1198 for(i=0;i<select_list.size();++i){
1199 scalarexp_t *se = dup_se(select_list[i]->se,NULL);
1200 ret->sl->append(se,select_list[i]->name);
1204 ret->fm = te->fm->duplicate();
1206 if(te->wh) ret->wh = dup_pr(te->wh,NULL);
1207 if(te->hv) ret->hv = dup_pr(te->hv,NULL);
1208 if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL);
1209 if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL);
1210 if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL);
1212 for(i=0;i<te->gb.size();++i){
1213 extended_gb_t *tmp_g = te->gb[i]->duplicate();
1214 ret->gb.push_back(tmp_g);
1217 ret->mergevars = te->mergevars;
1219 ret->slack = dup_se(te->slack,NULL);
1220 ret->lineno = te->lineno;
1221 ret->charno = te->charno;
1232 /////////////////////////////////////////////////////////////////////////
1233 // Bind colrefs to a member of their FROM list
1235 void bind_colref_se(scalarexp_t *se,
1236 vector<tablevar_t *> &fm,
1237 int prev_ref, int new_ref
1240 vector<scalarexp_t *> operand_list;
1244 switch(se->get_operator_type()){
1248 case SE_IFACE_PARAM:
1249 ir = se->get_ifpref();
1250 if(ir->get_tablevar_ref() == prev_ref){
1251 ir->set_tablevar_ref(new_ref);
1252 ir->set_tablevar(fm[new_ref]->get_var_name());
1257 cr=se->get_colref();
1258 if(cr->get_tablevar_ref() == prev_ref){
1259 cr->set_tablevar_ref(new_ref);
1260 // cr->set_interface(fm[new_ref]->get_interface());
1261 cr->set_table_name(fm[new_ref]->get_var_name());
1266 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1270 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
1271 bind_colref_se(se->get_right_se(), fm, prev_ref, new_ref);
1279 if(se->get_aggr_ref() >= 0) return;
1281 operand_list = se->get_operands();
1282 for(p=0;p<operand_list.size();p++){
1283 bind_colref_se(operand_list[p], fm, prev_ref, new_ref);
1288 printf("INTERNAL ERROR in bind_colref_se: operator type %d\n",se->get_operator_type());
1299 void bind_colref_pr(predicate_t *pr,
1300 vector<tablevar_t *> &fm,
1301 int prev_ref, int new_ref
1303 vector<scalarexp_t *> op_list;
1306 switch(pr->get_operator_type()){
1308 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1312 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
1313 bind_colref_se(pr->get_right_se(), fm, prev_ref, new_ref);
1317 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1320 case PRED_BINARY_OP:
1321 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
1322 bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref);
1325 op_list = pr->get_op_list();
1326 for(o=0;o<op_list.size();++o){
1327 bind_colref_se(op_list[o], fm, prev_ref, new_ref);
1332 fprintf(stderr,"INTERNAL ERROR in bind_colref_pr, line %d, character %d, unknown predicate operator type %d\n",
1333 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1342 /////////////////////////////////////////////////////////////////////
1343 // verify that the se refs only literals and params.
1344 // (use to verify that the expression should stay in the hfta
1346 /////////////////////////////////////////////////////////////////////
1348 bool is_literal_or_param_only(scalarexp_t *se){
1350 vector<scalarexp_t *> operands;
1353 if(se == NULL) return(true);
1355 switch(se->get_operator_type()){
1359 case SE_IFACE_PARAM:
1360 return(false); // need to treat as colref
1362 return(is_literal_or_param_only(se->get_left_se()) );
1365 is_literal_or_param_only(se->get_left_se()) &&
1366 is_literal_or_param_only(se->get_right_se())
1374 // The fcn might have special meaning at the lfta ...
1378 fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n",
1379 se->get_lineno(), se->get_charno(),se->get_operator_type());
1387 /////////////////////////////////////////////////////////////////////
1388 // Search for gb refs.
1389 // (use to verify that no gbrefs in a gb def.)
1390 /////////////////////////////////////////////////////////////////////
1393 int count_gb_se(scalarexp_t *se){
1395 vector<scalarexp_t *> operands;
1398 if(se == NULL) return(0);
1400 switch(se->get_operator_type()){
1403 case SE_IFACE_PARAM:
1406 return(count_gb_se(se->get_left_se()) );
1409 count_gb_se(se->get_left_se()) +
1410 count_gb_se(se->get_right_se())
1413 if(se->get_gb_ref() < 0) return(0);
1419 operands = se->get_operands();
1420 for(o=0;o<operands.size();o++){
1421 sum += count_gb_se(operands[o]);
1426 fprintf(stderr,"INTERNAL ERROR in count_gb_se, line %d, character %d: unknown operator type %d\n",
1427 se->get_lineno(), se->get_charno(),se->get_operator_type());
1434 /////////////////////////////////////////////////////////////////////
1435 //////////////// Search for stateful fcns.
1436 /////////////////////////////////////////////////////////////////////
1439 int se_refs_sfun(scalarexp_t *se){
1441 vector<scalarexp_t *> operands;
1444 if(se == NULL) return(0);
1446 switch(se->get_operator_type()){
1449 case SE_IFACE_PARAM:
1452 return(se_refs_sfun(se->get_left_se()) );
1455 se_refs_sfun(se->get_left_se()) +
1456 se_refs_sfun(se->get_right_se())
1464 operands = se->get_operands();
1465 for(o=0;o<operands.size();o++){
1466 sum += se_refs_sfun(operands[o]);
1468 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1470 // for now, stateful functions count as aggregates.
1471 if(se->get_storage_state() != "")
1477 fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n",
1478 se->get_lineno(), se->get_charno(),se->get_operator_type());
1485 // Return a count of the number of stateful fcns in this predicate.
1486 int pred_refs_sfun(predicate_t *pr){
1487 vector<scalarexp_t *> op_list;
1490 switch(pr->get_operator_type()){
1492 return(se_refs_sfun(pr->get_left_se()) );
1495 se_refs_sfun(pr->get_left_se()) +
1496 se_refs_sfun(pr->get_right_se())
1499 return(pred_refs_sfun(pr->get_left_pr()) );
1500 case PRED_BINARY_OP:
1502 pred_refs_sfun(pr->get_left_pr()) +
1503 pred_refs_sfun(pr->get_right_pr())
1506 op_list = pr->get_op_list();
1508 for(o=0;o<op_list.size();++o){
1509 aggr_sum += se_refs_sfun(op_list[o]);
1514 fprintf(stderr,"INTERNAL ERROR in pred_refs_sfun, line %d, character %d, unknown predicate operator type %d\n",
1515 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1522 //////////////////////////////////////////////////
1524 /////////////////////////////////////////////////////////////////////
1525 //////////////// Search for aggregates.
1526 /////////////////////////////////////////////////////////////////////
1529 int count_aggr_se(scalarexp_t *se, bool strict){
1531 vector<scalarexp_t *> operands;
1534 if(se == NULL) return(0);
1536 switch(se->get_operator_type()){
1539 case SE_IFACE_PARAM:
1542 return(count_aggr_se(se->get_left_se(), strict) );
1545 count_aggr_se(se->get_left_se(), strict) +
1546 count_aggr_se(se->get_right_se(), strict)
1554 operands = se->get_operands();
1555 for(o=0;o<operands.size();o++){
1556 sum += count_aggr_se(operands[o], strict);
1558 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
1560 // now, stateful functions can count as aggregates.
1561 // if we are being strict.
1562 if(! strict && se->get_storage_state() != "")
1568 fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n",
1569 se->get_lineno(), se->get_charno(),se->get_operator_type());
1576 // Return a count of the number of aggregate fcns in this predicate.
1577 int count_aggr_pred(predicate_t *pr, bool strict){
1578 vector<scalarexp_t *> op_list;
1581 switch(pr->get_operator_type()){
1583 return(count_aggr_se(pr->get_left_se(), strict) );
1586 count_aggr_se(pr->get_left_se(), strict) +
1587 count_aggr_se(pr->get_right_se(), strict)
1590 return(count_aggr_pred(pr->get_left_pr(), strict) );
1591 case PRED_BINARY_OP:
1593 count_aggr_pred(pr->get_left_pr(), strict) +
1594 count_aggr_pred(pr->get_right_pr(), strict)
1597 op_list = pr->get_op_list();
1599 for(o=0;o<op_list.size();++o){
1600 aggr_sum += count_aggr_se(op_list[o], strict);
1605 fprintf(stderr,"INTERNAL ERROR in count_aggr_pred, line %d, character %d, unknown predicate operator type %d\n",
1606 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1613 //////////////////////////////////////////////////
1614 /// Analyze tablevar refs
1616 void get_tablevar_ref_se(scalarexp_t *se, vector<int> &reflist){
1618 vector<scalarexp_t *> operands;
1623 if(se == NULL) return;
1625 switch(se->get_operator_type()){
1629 case SE_IFACE_PARAM:
1630 ir = se->get_ifpref();
1631 vref = ir->get_tablevar_ref();
1632 for(o=0;o<reflist.size();++o){
1633 if(vref == reflist[o]) return;
1635 reflist.push_back(vref);
1638 get_tablevar_ref_se(se->get_left_se(), reflist);
1641 get_tablevar_ref_se(se->get_left_se(), reflist);
1642 get_tablevar_ref_se(se->get_right_se(), reflist);
1645 if(se->is_gb()) return;
1646 cr = se->get_colref();
1647 vref = cr->get_tablevar_ref();
1648 for(o=0;o<reflist.size();++o){
1649 if(vref == reflist[o]) return;
1651 reflist.push_back(vref);
1657 if(se->get_aggr_ref() >= 0) return;
1659 operands = se->get_operands();
1660 for(o=0;o<operands.size();o++){
1661 get_tablevar_ref_se(operands[o], reflist);
1666 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_se, line %d, character %d: unknown operator type %d\n",
1667 se->get_lineno(), se->get_charno(),se->get_operator_type());
1674 void get_tablevar_ref_pr(predicate_t *pr, vector<int> &reflist){
1675 vector<scalarexp_t *> op_list;
1678 switch(pr->get_operator_type()){
1680 get_tablevar_ref_se(pr->get_left_se(),reflist);
1683 get_tablevar_ref_se(pr->get_left_se(),reflist);
1684 get_tablevar_ref_se(pr->get_right_se(),reflist);
1687 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1689 case PRED_BINARY_OP:
1690 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
1691 get_tablevar_ref_pr(pr->get_right_pr(),reflist);
1694 op_list = pr->get_op_list();
1695 for(o=0;o<op_list.size();++o){
1696 get_tablevar_ref_se(op_list[o],reflist);
1700 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_pr, line %d, character %d, unknown predicate operator type %d\n",
1701 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1708 // Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1710 void gather_fcn_states_se(scalarexp_t *se, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1713 vector<scalarexp_t *> operands;
1715 switch(se->get_operator_type()){
1718 case SE_IFACE_PARAM:
1721 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ;
1724 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1725 gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns);
1732 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
1735 operands = se->get_operands();
1736 for(o=0;o<operands.size();o++){
1737 gather_fcn_states_se(operands[o], states_refd, Ext_fcns);
1739 if(se->get_storage_state() != ""){
1740 states_refd.insert(se->get_storage_state());
1745 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n",
1746 se->get_lineno(), se->get_charno(),se->get_operator_type());
1753 // Walk SE tree and gather STATES ref'd by STATEFUL fcns.
1755 void gather_fcn_states_pr(predicate_t *pr, set<string> &states_refd, ext_fcn_list *Ext_fcns){
1756 vector<scalarexp_t *> op_list;
1759 switch(pr->get_operator_type()){
1761 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1764 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
1765 gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ;
1768 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns);
1770 case PRED_BINARY_OP:
1771 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ;
1772 gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ;
1775 op_list = pr->get_op_list();
1776 for(o=0;o<op_list.size();++o){
1777 gather_fcn_states_se(op_list[o],states_refd, Ext_fcns);
1782 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_pr, line %d, character %d, unknown predicate operator type %d\n",
1783 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1793 // walk se tree and collect aggregates into aggregate table.
1794 // duplicate aggregates receive the same idx to the table.
1796 void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){
1799 vector<scalarexp_t *> operands;
1801 switch(se->get_operator_type()){
1804 case SE_IFACE_PARAM:
1807 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ;
1810 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns);
1811 build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns);
1816 agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr());
1817 se->set_aggr_id(agg_id);
1820 agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr());
1821 se->set_aggr_id(agg_id);
1824 operands = se->get_operands();
1825 for(o=0;o<operands.size();o++){
1826 build_aggr_tbl_fm_se(operands[o], aggregate_table, Ext_fcns);
1828 if(se->get_aggr_ref() >= 0){ // it's been tagged as a UDAF
1829 agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id()));
1830 se->set_aggr_id(agg_id);
1835 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n",
1836 se->get_lineno(), se->get_charno(),se->get_operator_type());
1843 // walk se tree and collect aggregates into aggregate table.
1844 // duplicate aggregates receive the same idx to the table.
1846 void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){
1847 vector<scalarexp_t *> op_list;
1850 switch(pr->get_operator_type()){
1852 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1855 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
1856 build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ;
1859 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns);
1861 case PRED_BINARY_OP:
1862 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ;
1863 build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ;
1866 op_list = pr->get_op_list();
1867 for(o=0;o<op_list.size();++o){
1868 build_aggr_tbl_fm_se(op_list[o],aggregate_table, Ext_fcns);
1873 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_pred, line %d, character %d, unknown predicate operator type %d\n",
1874 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
1882 // Return true if the two scalar expressions
1883 // represent the same value (e.g., use to eliminate
1884 // duplicate aggregates).
1885 bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){
1886 vector<scalarexp_t *> operands1;
1887 vector<scalarexp_t *> operands2;
1890 // First handle the case of nulls (e.g. COUNT aggrs)
1891 if(se1 == NULL && se2 == NULL) return(true);
1892 if(se1 == NULL || se2 == NULL) return(false);
1894 // In all cases, must be the same oeprator type and same operator.
1895 if(se1->get_operator_type() != se2->get_operator_type())
1897 if(se1->get_op() != se2->get_op() )
1900 switch(se1->get_operator_type()){
1902 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1904 return(se1->get_param_name() == se2->get_param_name() );
1905 case SE_IFACE_PARAM:
1906 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1908 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1910 if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) )
1911 return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) );
1914 if(se1->is_gb() && se2->is_gb())
1915 return( se1->get_gb_ref() == se2->get_gb_ref() );
1916 if(se1->is_gb() || se2->is_gb())
1918 return(se1->get_colref()->is_equivalent(se2->get_colref()) );
1922 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
1924 if(se1->get_op() != se2->get_op()) return(false);
1926 operands1 = se1->get_operands();
1927 operands2 = se2->get_operands();
1928 if(operands1.size() != operands2.size()) return(false);
1930 for(o=0;o<operands1.size();o++){
1931 if(! is_equivalent_se(operands1[o], operands2[o]) )
1936 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
1937 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
1944 // Similar to is_equivalent_se, but with a looser definition
1945 // of equivalence of colrefs. Here, say they are equivalent
1946 // if their base table is the same. Use to find equivalent
1947 // predicates on base tables.
1948 bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){
1949 vector<scalarexp_t *> operands1;
1950 vector<scalarexp_t *> operands2;
1953 if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){
1954 se1 = se1->get_right_se();
1956 if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){
1957 se2 = se2->get_right_se();
1960 // First handle the case of nulls (e.g. COUNT aggrs)
1961 if(se1 == NULL && se2 == NULL) return(true);
1962 if(se1 == NULL || se2 == NULL) return(false);
1964 // In all cases, must be the same oeprator type and same operator.
1965 if(se1->get_operator_type() != se2->get_operator_type())
1967 if(se1->get_op() != se2->get_op() )
1970 switch(se1->get_operator_type()){
1972 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
1974 return(se1->get_param_name() == se2->get_param_name() );
1975 case SE_IFACE_PARAM:
1976 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
1978 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1980 if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) )
1981 return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) );
1985 if(se1->is_gb() && se2->is_gb())
1986 return( se1->get_gb_ref() == se2->get_gb_ref() );
1987 if(se1->is_gb() || se2->is_gb())
1990 return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) );
1994 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
1996 if(se1->get_op() != se2->get_op()) return(false);
1998 operands1 = se1->get_operands();
1999 operands2 = se2->get_operands();
2000 if(operands1.size() != operands2.size()) return(false);
2002 for(o=0;o<operands1.size();o++){
2003 if(! is_equivalent_se_base(operands1[o], operands2[o], Schema) )
2008 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
2009 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
2016 // Find predicates which are equivalent when
2017 // looking at the base tables. Use to find
2018 // common prefilter.
2019 bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){
2022 // First handle the case of nulls
2023 if(p1 == NULL && p2 == NULL) return(true);
2024 if(p1 == NULL || p2 == NULL) return(false);
2027 if(p1->get_operator_type() != p2->get_operator_type())
2029 if(p1->get_op() != p2->get_op())
2032 vector<literal_t *> ll1;
2033 vector<literal_t *> ll2;
2034 vector<scalarexp_t *> op_list1, op_list2;
2037 switch(p2->get_operator_type()){
2039 if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2041 return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) );
2044 if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
2046 ll1 = p1->get_lit_vec();
2047 ll2 = p2->get_lit_vec();
2048 if(ll1.size() != ll2.size())
2050 for(i=0;i<ll1.size();i++){
2051 if(! ll1[i]->is_equivalent( ll2[i] ) )
2057 return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) );
2059 case PRED_BINARY_OP:
2060 if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema))
2062 return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) );
2065 op_list1 = p1->get_op_list();
2066 op_list2 = p2->get_op_list();
2067 if(op_list1.size() != op_list2.size()) return(false);
2068 for(o=0;o<op_list1.size();++o){
2069 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) ) return(false);
2080 bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns){
2081 if((p1->get_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC))
2083 if(p1->get_fcn_id() != p2->get_fcn_id())
2085 vector<bool> cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id());
2087 vector<scalarexp_t *> op_list1 = p1->get_op_list();
2088 vector<scalarexp_t *> op_list2 = p2->get_op_list();
2089 if(op_list1.size() != op_list2.size()) return(false);
2090 for(o=0;o<op_list1.size();++o){
2092 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) )
2103 // Verify that the scalar expression (in a such that clause)
2104 // is acceptable in an aggregation query. No column
2105 // references allowed outside aggergates, except for
2106 // references to group-by attributes.
2107 // return true if OK, false if bad.
2108 bool verify_aggr_query_se(scalarexp_t *se){
2109 vector <scalarexp_t *> operands;
2112 switch(se->get_operator_type()){
2115 case SE_IFACE_PARAM:
2118 return(verify_aggr_query_se(se->get_left_se() ) );
2120 return(verify_aggr_query_se(se->get_left_se() ) &&
2121 verify_aggr_query_se(se->get_right_se() ) );
2123 if(se->is_gb() ) return(true);
2124 fprintf(stderr,"ERROR: the select clause in an aggregate query can "
2125 "only reference constants, group-by attributes, and "
2126 "aggregates, (%s) line %d, character %d.\n",
2127 se->get_colref()->to_string().c_str(),
2128 se->get_lineno(), se->get_charno() );
2132 // colrefs and gbrefs allowed.
2133 // check for nested aggregation elsewhere, so just return TRUE
2136 // If its a UDAF, just return true
2137 if(se->get_aggr_ref() >= 0) return true;
2139 operands = se->get_operands();
2141 for(o=0;o<operands.size();o++){
2142 if(! verify_aggr_query_se(operands[o]) )
2147 fprintf(stderr,"INTERNAL ERROR in verify_aggr_query_se, line %d, character %d: unknown operator type %d\n",
2148 se->get_lineno(), se->get_charno(),se->get_operator_type());
2157 // Find complex literals.
2158 // NOTE : This analysis should be deferred to
2159 // code generation time.
2160 // This analysis drills into aggr se specs.
2161 // Shouldn't this be done at the aggregate table?
2162 // But, its not a major loss of efficiency.
2163 // UPDATE : drilling into aggr se's is causnig a problem
2164 // so I've eliminated it.
2166 bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2167 cplx_lit_table *complex_literals){
2169 vector<scalarexp_t *> operands;
2171 scalarexp_t *param_se;
2174 switch(se->get_operator_type()){
2176 l = se->get_literal();
2177 if(l->constructor_name() != ""){
2178 int cl_idx = complex_literals->add_cpx_lit(l, false);
2179 l->set_cpx_lit_ref(cl_idx);
2184 // SE_IFACE_PARAM should not exist when this is called.
2186 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2188 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) &&
2189 find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) );
2196 // return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
2198 if(se->get_aggr_ref() >= 0) return true;
2200 operands = se->get_operands();
2201 for(o=0;o<operands.size();o++){
2202 find_complex_literal_se(operands[o], Ext_fcns, complex_literals);
2206 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_se, line %d, character %d: unknown operator type %d\n",
2207 se->get_lineno(), se->get_charno(),se->get_operator_type());
2216 void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2217 cplx_lit_table *complex_literals){
2219 vector<literal_t *> litl;
2220 vector<scalarexp_t *> op_list;
2223 switch(pr->get_operator_type()){
2225 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2226 litl = pr->get_lit_vec();
2227 for(i=0;i<litl.size();i++){
2228 if(litl[i]->constructor_name() != ""){
2229 int cl_idx = complex_literals->add_cpx_lit(litl[i],false);
2230 litl[i]->set_cpx_lit_ref(cl_idx);
2235 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
2236 find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ;
2239 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals);
2241 case PRED_BINARY_OP:
2242 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ;
2243 find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ;
2246 op_list = pr->get_op_list();
2247 for(o=0;o<op_list.size();++o){
2248 find_complex_literal_se(op_list[o],Ext_fcns, complex_literals);
2252 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_pr, line %d, character %d, unknown predicate operator type %d\n",
2253 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2261 // Find all things which are passed as handle parameters to functions
2262 // (query parameters, (simple) literals, complex literals)
2263 // These expressions MUST be processed with find_complex_literal_??
2265 // TODO: this analysis drills into the aggregate SEs.
2266 // Shouldn't this be done on the aggr table SEs instead?
2267 // to avoid duplication. THe handle registration
2268 // might be expensive ...
2269 // REVISED : drilling into aggr se's is causing problems, eliminated.
2271 void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
2272 vector<handle_param_tbl_entry *> &handle_tbl){
2273 vector<scalarexp_t *> operands;
2274 vector<bool> handle_ind;
2276 scalarexp_t *param_se;
2280 switch(se->get_operator_type()){
2285 // case SE_IFACE_PARAM: SHOULD NOT EXIST when this is called
2287 find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2290 find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ;
2291 find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ;
2298 // find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
2301 if(se->get_aggr_ref() >= 0) return ;
2303 operands = se->get_operands();
2304 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
2305 for(o=0;o<operands.size();o++){
2307 handle_param_tbl_entry *he;
2308 param_se = operands[o];
2309 if(param_se->get_operator_type() != SE_LITERAL &&
2310 param_se->get_operator_type() != SE_PARAM){
2311 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
2312 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
2316 if(param_se->get_operator_type() == SE_PARAM){
2317 he = new handle_param_tbl_entry(
2318 se->get_op(), o, param_se->get_param_name(),
2319 param_se->get_data_type()->get_type_str());
2321 l = param_se->get_literal();
2322 if(l->is_cpx_lit()){
2323 he = new handle_param_tbl_entry(
2324 se->get_op(), o, l->get_cpx_lit_ref(),
2325 param_se->get_data_type()->get_type_str());
2327 he = new handle_param_tbl_entry(
2329 param_se->get_data_type()->get_type_str());
2332 param_se->set_handle_ref(handle_tbl.size());
2333 handle_tbl.push_back(he);
2335 find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ;
2340 fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n",
2341 se->get_lineno(), se->get_charno(),se->get_operator_type());
2348 void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
2349 vector<handle_param_tbl_entry *> &handle_tbl){
2350 vector<literal_t *> litl;
2351 vector<scalarexp_t *> op_list;
2352 scalarexp_t *param_se;
2353 vector<bool> handle_ind;
2357 switch(pr->get_operator_type()){
2359 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2362 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
2363 find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ;
2366 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl);
2368 case PRED_BINARY_OP:
2369 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ;
2370 find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ;
2373 op_list = pr->get_op_list();
2374 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
2375 for(o=0;o<op_list.size();++o){
2377 handle_param_tbl_entry *he;
2378 param_se = op_list[o];
2379 if(param_se->get_operator_type() != SE_LITERAL &&
2380 param_se->get_operator_type() != SE_PARAM){
2381 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
2382 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
2386 if(param_se->get_operator_type() == SE_PARAM){
2387 he = new handle_param_tbl_entry(
2388 pr->get_op(), o, param_se->get_param_name(),
2389 param_se->get_data_type()->get_type_str());
2391 l = param_se->get_literal();
2392 if(l->is_cpx_lit()){
2393 he = new handle_param_tbl_entry(
2394 pr->get_op(), o, l->get_cpx_lit_ref(),
2395 param_se->get_data_type()->get_type_str());
2397 he = new handle_param_tbl_entry(
2399 param_se->get_data_type()->get_type_str());
2402 param_se->set_handle_ref(handle_tbl.size());
2403 handle_tbl.push_back(he);
2405 find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ;
2410 fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n",
2411 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2419 // Verify the HAVING predicate : it
2420 // can access gb vars, aggregates, and constants,
2422 // return 1 if OK, -1 if bad.
2423 // Perhaps replace by a pair of fcns which counts non-gb colrefs?
2425 // Extended to deal with cleaning_by, cleaning_when :
2426 // verify that any aggregate function
2427 // has the multiple output property.
2429 int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){
2431 vector<scalarexp_t *> operands;
2432 vector<data_type *> odt;
2435 switch(se->get_operator_type()){
2439 case SE_IFACE_PARAM:
2442 return(verify_having_se(se->get_left_se(), clause, Ext_fcns) );
2444 l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns);
2445 r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns);
2446 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
2449 if(se->is_gb()) return 1;
2450 fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause,
2451 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
2455 // colrefs and gbrefs allowed.
2456 // check for nested aggregation elsewhere, so just return TRUE
2457 if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2458 fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2459 se->get_lineno(),se->get_charno(), se->get_op().c_str() );
2463 // Ensure that aggregate refs allow multiple outputs
2464 // in CLEANING_WHEN, CLEANING_BY
2465 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2466 if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2467 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2468 se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2476 if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
2477 fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
2478 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() );
2482 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
2483 if(se->get_aggr_ref() >= 0 && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
2484 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
2485 se->get_op().c_str(),se->get_lineno(),se->get_charno() );
2490 if(se->get_aggr_ref() >= 0) // don't descent into aggregates.
2493 operands = se->get_operands();
2495 for(o=0;o<operands.size();o++){
2496 l_ret = verify_having_se(operands[o], clause, Ext_fcns);
2497 if(l_ret < 0) r_ret = -1;
2499 if(r_ret < 0) return(-1); else return(1);
2502 fprintf(stderr,"INTERNAL ERROR in verify_having_se, line %d, character %d: unknown operator type %d\n",
2503 se->get_lineno(), se->get_charno(),se->get_operator_type());
2510 // Verify the HAVING predicate : it
2511 // can access gb vars, aggregates, and constants,
2513 // return 1 if OK, -1 if bad.
2514 // Perhaps replace by a pair of fcns which counts non-gb colrefs?
2517 int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){
2519 vector<literal_t *> litl;
2520 vector<scalarexp_t *> op_list;
2523 switch(pr->get_operator_type()){
2525 return(verify_having_se(pr->get_left_se(), clause, Ext_fcns));
2527 l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ;
2528 r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ;
2529 if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1);
2531 return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns));
2532 case PRED_BINARY_OP:
2533 l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns);
2534 r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns);
2535 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
2538 op_list = pr->get_op_list();
2540 for(o=0;o<op_list.size();++o){
2541 if( verify_having_se(op_list[o], clause, Ext_fcns) < 0) l_ret = -1;
2546 fprintf(stderr,"INTERNAL ERROR in verify_having_pred, line %d, character %d, unknown predicate operator type %d\n",
2547 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2554 //////////////////////////////////////////////////////////////////////////
2555 //////////////////////////////////////////////////////////////////////////
2556 /////// cnf and pred analysis and manipulation
2558 // ----------------------------------------------------------------------
2559 //Â Â Convert the predicates to a list of conjuncts
2560 //Â Â (not actually cnf). Do some analysis
2561 //Â Â on their properties.
2562 // ----------------------------------------------------------------------
2565 //Â Â Put into list clist the predicates that
2566 //Â Â are AND'ed together.
2568 void make_cnf_from_pr(predicate_t *pr, vector<cnf_elem *> &clist){
2570 if(pr == NULL) return;
2572 switch(pr->get_operator_type()){
2574 clist.push_back(new cnf_elem(pr));
2578 clist.push_back(new cnf_elem(pr));
2582 clist.push_back(new cnf_elem(pr));
2585 case PRED_BINARY_OP:
2586 if(pr->get_op() == "OR"){
2587 clist.push_back(new cnf_elem(pr));
2590 if(pr->get_op() =="AND"){
2591 make_cnf_from_pr(pr->get_left_pr(),clist);
2592 make_cnf_from_pr(pr->get_right_pr(),clist);
2596 clist.push_back(new cnf_elem(pr));
2600 fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str());
2608 //Â Â Find out what things are referenced in a se,
2609 //Â Â to use for analyzing a predicate.
2610 //Â Â Currently, is it simple (no operators), does it
2611 //Â Â reference a group-by column, does it reference an
2612 //Â Â attribute of a table.
2614 // analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf
2617 void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){
2619 vector<scalarexp_t *> operand_list;
2621 switch(se->get_operator_type()){
2624 case SE_IFACE_PARAM:
2627 if(se->is_gb() ) g=1;
2632 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2636 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
2637 analyze_cnf_se(se->get_right_se(),s,g,a,agr);
2644 if(se->get_aggr_ref() >= 0){
2649 operand_list = se->get_operands();
2650 for(p=0;p<operand_list.size();p++){
2651 analyze_cnf_se(operand_list[p],s,g,a,agr);
2661 void analyze_cnf_pr(predicate_t *pr, int &g, int &a, int &agr){
2663 vector<scalarexp_t *> op_list;
2666 switch(pr->get_operator_type()){
2668 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2669 analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr);
2672 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
2675 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2677 case PRED_BINARY_OP:
2678 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
2679 analyze_cnf_pr(pr->get_right_pr(),g,a,agr);
2682 op_list = pr->get_op_list();
2683 for(o=0;o<op_list.size();++o){
2684 analyze_cnf_se(op_list[o],dum_simple,g,a,agr);
2688 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2689 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2696 //Â Â analyze a conjunct of a predicate.
2697 //Â Â Is it atomic (e.g., a single predicate),
2698 //Â Â and if so do a further analysis.
2700 void analyze_cnf(cnf_elem *c){
2702 //Â Â analyze the predicate.
2703 analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr);
2705 if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){
2710 //Â Â its an atomic predicate -- get more info
2713 if(c->pr->get_op() == "=")
2718 if(c->pr->get_operator_type() == PRED_IN)
2723 c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0;
2724 analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr);
2726 if(c->pr->get_operator_type() == PRED_COMPARE){
2727 c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0;
2728 analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr);
2732 void analyze_constraint_se(scalarexp_t *se,
2733 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){
2734 int l_agr, l_gb, l_par, l_func, l_op;
2735 int r_agr, r_gb, r_par, r_func, r_op;
2737 vector<scalarexp_t *> operand_list;
2739 switch(se->get_operator_type()){
2741 case SE_IFACE_PARAM:
2742 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2745 n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0;
2748 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2751 analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2758 analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2762 analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2763 analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb);
2767 n_func=l_func+r_func;
2772 n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
2775 if(se->get_aggr_ref() >= 0){
2776 n_agr=1; n_gb = 0; n_par = 0; n_op = 0;
2778 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2783 n_agr=0; n_gb = 0; n_par = 0; n_op = 0;
2785 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
2788 operand_list = se->get_operands();
2789 for(p=0;p<operand_list.size();p++){
2790 analyze_constraint_se(operand_list[p],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2803 // Estimate the cost of a constraint.
2804 // WARNING a lot of cost assumptions are embedded in the code.
2805 void analyze_constraint_pr(predicate_t *pr,
2806 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op,
2807 int &n_cmp_s, int &n_cmp_c, int &n_in, int &n_pred, int &n_bool, ext_fcn_list *Ext_fcns, bool enter_gb){
2808 int l_agr, l_gb, l_par, l_func, l_op, l_cmp_s, l_cmp_c, l_in, l_pred,l_bool;
2809 int r_agr, r_gb, r_par, r_func, r_op, r_cmp_s, r_cmp_c, r_in, r_pred,r_bool;
2812 vector<scalarexp_t *> op_list;
2815 switch(pr->get_operator_type()){
2817 analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb);
2818 analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb);
2819 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2820 n_func=l_func+r_func; n_op=l_op+r_op;
2821 if(pr->get_left_se()->get_data_type()->complex_comparison(
2822 pr->get_right_se()->get_data_type())
2824 n_cmp_s = 0; n_cmp_c=1;
2826 n_cmp_s = 1; n_cmp_c=0;
2828 n_in = 0; n_pred = 0; n_bool = 0;
2831 // Tread IN predicate as sequence of comparisons
2832 analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
2833 if(pr->get_left_se()->get_data_type()->complex_comparison(
2834 pr->get_right_se()->get_data_type())
2836 n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size();
2838 n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0;
2840 n_in = 0; n_pred = 0; n_bool = 0;
2843 analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb);
2846 case PRED_BINARY_OP:
2847 analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb);
2848 analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb);
2849 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
2850 n_func=l_func+r_func; n_op=l_op+r_op;
2851 n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c;
2852 n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1;
2855 n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0;
2857 n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id());
2860 op_list = pr->get_op_list();
2861 for(o=0;o<op_list.size();++o){
2862 analyze_constraint_se(op_list[o],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
2863 n_agr+=l_agr; n_gb+=l_gb; n_par+=l_par; n_func+=l_func; n_op+=l_op;
2867 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
2868 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2873 void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){
2874 int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2875 analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2876 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false);
2878 //printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool);
2879 c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2882 bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){
2883 int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
2884 analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
2885 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true);
2886 //printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool);
2889 int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
2890 //printf("cost=%d\n",cost);
2894 // The prefilter needs to translate constraints on
2895 // gbvars into constraints involving their underlying SEs.
2896 // The following two routines attach GB def info.
2898 void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){
2900 vector<scalarexp_t *> operand_list;
2902 switch(se->get_operator_type()){
2904 case SE_IFACE_PARAM:
2910 se->rhs.scalarp = gtbl->get_def(se->get_gb_ref());
2914 insert_gb_def_se(se->get_left_se(),gtbl);
2917 insert_gb_def_se(se->get_left_se(),gtbl);
2918 insert_gb_def_se(se->get_right_se(),gtbl);
2921 insert_gb_def_se(se->get_left_se(),gtbl);
2924 operand_list = se->get_operands();
2925 for(p=0;p<operand_list.size();p++){
2926 insert_gb_def_se(operand_list[p],gtbl);
2933 void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl){
2934 vector<scalarexp_t *> op_list;
2937 switch(pr->get_operator_type()){
2939 insert_gb_def_se(pr->get_left_se(),gtbl);
2940 insert_gb_def_se(pr->get_right_se(),gtbl);
2943 insert_gb_def_se(pr->get_left_se(),gtbl);
2946 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2948 case PRED_BINARY_OP:
2949 insert_gb_def_pr(pr->get_left_pr(),gtbl);
2950 insert_gb_def_pr(pr->get_right_pr(),gtbl);
2953 op_list = pr->get_op_list();
2954 for(o=0;o<op_list.size();++o){
2955 insert_gb_def_se(op_list[o],gtbl);
2959 fprintf(stderr,"INTERNAL ERROR in insert_gb_def_pr, line %d, character %d, unknown predicate operator type %d\n",
2960 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
2965 // Substitute gbrefs with their definitions
2966 void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){
2968 vector<scalarexp_t *> operand_list;
2969 scalarexp_t *lse,*rse;
2974 switch(se->get_operator_type()){
2976 case SE_IFACE_PARAM:
2981 cr = se->get_colref();
2982 b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field);
2983 b_idx = Schema->get_table_ref(b_tbl);
2984 cr->tablevar_ref = b_idx;
2987 lse=se->get_left_se();
2988 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2989 se->lhs.scalarp = lse->get_right_se();
2990 subs_gbrefs_se(se,Schema);
2993 subs_gbrefs_se(se->get_left_se(),Schema);
2996 lse=se->get_left_se();
2997 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
2998 se->lhs.scalarp = lse->get_right_se();
2999 subs_gbrefs_se(se,Schema);
3002 rse=se->get_right_se();
3003 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3004 se->rhs.scalarp = rse->get_right_se();
3005 subs_gbrefs_se(se,Schema);
3008 subs_gbrefs_se(se->get_left_se(),Schema);
3009 subs_gbrefs_se(se->get_right_se(),Schema);
3012 lse=se->get_left_se();
3013 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3014 se->lhs.scalarp = lse->get_right_se();
3015 subs_gbrefs_se(se,Schema);
3018 subs_gbrefs_se(se->get_left_se(),Schema);
3021 operand_list = se->get_operands();
3022 for(p=0;p<operand_list.size();p++){
3023 lse=operand_list[p];
3024 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3025 se->param_list[p] = lse->get_right_se();
3026 subs_gbrefs_se(se,Schema);
3030 for(p=0;p<operand_list.size();p++){
3031 subs_gbrefs_se(operand_list[p],Schema);
3039 void subs_gbrefs_pr(predicate_t *pr, table_list *Schema){
3040 vector<scalarexp_t *> op_list;
3042 scalarexp_t *lse,*rse;
3044 switch(pr->get_operator_type()){
3046 lse=pr->get_left_se();
3047 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3048 pr->lhs.sexp = lse->get_right_se();
3049 subs_gbrefs_pr(pr,Schema);
3052 rse=pr->get_right_se();
3053 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
3054 pr->rhs.sexp = rse->get_right_se();
3055 subs_gbrefs_pr(pr,Schema);
3058 subs_gbrefs_se(pr->get_left_se(),Schema);
3059 subs_gbrefs_se(pr->get_right_se(),Schema);
3062 lse=pr->get_left_se();
3063 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3064 pr->lhs.sexp = lse->get_right_se();
3065 subs_gbrefs_pr(pr,Schema);
3068 subs_gbrefs_se(pr->get_left_se(),Schema);
3071 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3073 case PRED_BINARY_OP:
3074 subs_gbrefs_pr(pr->get_left_pr(),Schema);
3075 subs_gbrefs_pr(pr->get_right_pr(),Schema);
3078 op_list = pr->get_op_list();
3079 for(o=0;o<op_list.size();++o){
3081 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
3082 pr->param_list[o] = lse->get_right_se();
3083 subs_gbrefs_pr(pr,Schema);
3086 subs_gbrefs_se(op_list[o],Schema);
3090 fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n",
3091 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3097 // Search for references to "expensive" fields.
3098 int expensive_refs_se(scalarexp_t *se, table_list *Schema){
3100 vector<scalarexp_t *> operand_list;
3105 switch(se->get_operator_type()){
3107 case SE_IFACE_PARAM:
3114 return expensive_refs_se(se->rhs.scalarp,Schema);
3115 td = Schema->get_table(se->lhs.colref->schema_ref);
3116 plist = td->get_modifier_list(se->lhs.colref->field);
3117 if(plist->contains_key("expensive"))
3121 return expensive_refs_se(se->get_left_se(),Schema);
3123 cnt += expensive_refs_se(se->get_left_se(),Schema);
3124 cnt += expensive_refs_se(se->get_right_se(),Schema);
3127 operand_list = se->get_operands();
3128 for(p=0;p<operand_list.size();p++){
3129 cnt += expensive_refs_se(operand_list[p],Schema);
3138 int expensive_refs_pr(predicate_t *pr, table_list *Schema){
3139 vector<scalarexp_t *> op_list;
3143 switch(pr->get_operator_type()){
3145 cnt += expensive_refs_se(pr->get_left_se(),Schema);
3146 cnt += expensive_refs_se(pr->get_right_se(),Schema);
3149 return expensive_refs_se(pr->get_left_se(),Schema);
3151 return expensive_refs_pr(pr->get_left_pr(),Schema);
3152 case PRED_BINARY_OP:
3153 cnt += expensive_refs_pr(pr->get_left_pr(),Schema);
3154 cnt += expensive_refs_pr(pr->get_right_pr(),Schema);
3157 op_list = pr->get_op_list();
3158 for(o=0;o<op_list.size();++o){
3159 cnt += expensive_refs_se(op_list[o],Schema);
3163 fprintf(stderr,"INTERNAL ERROR in expensive_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3164 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3170 // TODO: allow "cheap" functions and predicates.
3171 bool simple_field_constraint(cnf_elem *c){
3172 vector<literal_t *> ll;
3174 predicate_t *p = c->pr;
3175 int l_agr, l_gb, l_par, l_func, l_op;
3176 int r_agr, r_gb, r_par, r_func, r_op;
3177 col_id_set left_colids, right_colids;
3179 // Verify that it is a simple atom
3180 switch(p->get_operator_type()){
3182 // Must be an equality predicate which references
3183 // which referecnes no aggregates, parameters, functions, or
3184 // group-by variables, and should be a constraint of
3186 // AND should not require a complex comparison.
3187 if(p->get_op() != "=") return(false);
3188 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3189 analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false);
3190 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ||
3191 r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false);
3192 // I will count on there being no gbvars in the constraint.
3193 // TODO: allow gbvars which are colrefs.
3194 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3195 gather_se_col_ids(p->get_right_se(), right_colids, NULL);
3196 if(left_colids.size()+right_colids.size() != 1) return(false);
3199 // Normalize : the colref should be on the lhs.
3200 if(right_colids.size() > 0){
3201 p->swap_scalar_operands();
3204 // Disallow complex (and therefore expensive) comparisons.
3205 if(p->get_left_se()->get_data_type()->complex_comparison(
3206 p->get_right_se()->get_data_type() ) )
3209 // passed all the tests.
3212 // LHS must be a non-gbvar colref.
3213 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
3214 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false);
3215 // I will count on there being no gbvars in the constraint.
3216 // TODO: allow gbvars which are colrefs.
3217 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
3218 if(left_colids.size() != 1) return(false);
3219 // Disallow complex (and therefore expensive) comparisons.
3220 if(p->get_left_se()->get_data_type()->complex_comparison(
3221 p->get_left_se()->get_data_type() ) )
3225 // All entries in the IN list must be literals
3226 // Currently, this is the only possibility.
3231 case PRED_BINARY_OP:
3236 fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n",
3237 p->get_lineno(), p->get_charno(), p->get_operator_type() );
3244 // As the name implies, return the colref constrained by the
3245 // cnf elem. I will be counting on the LHS being a SE pointing
3248 // This fcn assumes that in fact exactly
3249 // one colref is constrained.
3250 colref_t *get_constrained_colref(scalarexp_t *se){
3252 vector<scalarexp_t *> operand_list;
3255 switch(se->get_operator_type()){
3259 case SE_IFACE_PARAM:
3262 return(se->get_colref());
3264 return(get_constrained_colref(se->get_left_se()));
3266 ret=get_constrained_colref(se->get_left_se());
3267 if(ret == NULL) return(get_constrained_colref(se->get_right_se()));
3273 if(se->get_aggr_ref() >= 0) return NULL;
3275 operand_list = se->get_operands();
3276 for(p=0;p<operand_list.size();p++){
3277 ret=get_constrained_colref(operand_list[p]);
3278 if(ret != NULL) return(ret);
3289 colref_t *get_constrained_colref(predicate_t *p){
3290 return(get_constrained_colref(p->get_left_se()));
3292 colref_t *get_constrained_colref(cnf_elem *c){
3293 return get_constrained_colref(c->pr->get_left_se());
3300 void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p,
3301 string target_fld, string target_tbl, int tblref){
3303 // Make a copy of the predicate to be added.
3304 // ASSUME no aggregates.
3305 predicate_t *pr = dup_pr(src_p,NULL);
3307 // Modify the ref to the base table.
3308 // ASSUME lhs is the colref
3309 pr->get_left_se()->get_colref()->set_table_name(target_tbl);
3310 pr->get_left_se()->get_colref()->set_table_ref(tblref);
3312 if(dst->pr == NULL) dst->pr = pr;
3313 else dst->pr = new predicate_t("OR", dst->pr, pr);
3319 //////////////////////////////////////////////////////
3320 /////////////// Represent a node in a predicate tree
3321 struct common_pred_node{
3324 vector<predicate_t *> predecessor_preds;
3325 vector<common_pred_node *> children;
3337 predicate_t *make_common_pred(common_pred_node *pn){
3340 if(pn->children.size() == 0){
3342 fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n");
3345 return( dup_pr(pn->pr,NULL) );
3348 predicate_t *curr_pr = make_common_pred( pn->children[0] );
3349 for(n=1;n<pn->children.size();++n){
3350 curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr);
3354 curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr);
3360 bool operator<(const cnf_set &c1, const cnf_set &c2){
3361 if(c1.lfta_id.size() < c2.lfta_id.size())
3367 // Compute the predicates for the prefilter.
3368 // the prefilter preds are returned in prefilter_preds.
3369 // pred_ids is the set of predicates used in the prefilter.
3370 // the encoding is the lfta index, in the top 16 bits,
3371 // then the index of the cnf element in the bottom 16 bits.
3372 // This set of for identifying which preds do not need
3373 // to be generated in the lftas.
3374 void find_common_filter(vector< vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int > &pred_ids){
3377 vector<cnf_set *> pred_list, sort_list;
3379 // Create list of tagged, prefilter-safe CNFs.
3380 for(l=0;l<where_list.size();++l){
3381 for(c=0;c<where_list[l].size();++c){
3382 if(prefilter_compatible(where_list[l][c],Ext_fcns)){
3383 if(expensive_refs_pr(where_list[l][c]->pr,Schema)==0)
3384 pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c));
3389 // Eliminate duplicates
3390 for(p=0;p<pred_list.size();++p){
3392 for(p2=p+1;p2<pred_list.size();++p2){
3394 if(is_equivalent_pred_base(pred_list[p]->pr, pred_list[p2]->pr,Schema)){
3395 pred_list[p]->subsume(pred_list[p2]);
3396 delete pred_list[p2];
3397 pred_list[p2] = NULL;
3404 // combine preds that occur in the exact same lftas.
3405 for(p=0;p<pred_list.size();++p){
3407 for(p2=p+1;p2<pred_list.size();++p2){
3409 if(pred_list[p]->lfta_id == pred_list[p2]->lfta_id){
3410 pred_list[p]->combine_pred(pred_list[p2]);
3411 delete pred_list[p2];
3412 pred_list[p2] = NULL;
3419 // Compress the list
3420 for(p=0;p<pred_list.size();++p){
3422 sort_list.push_back(pred_list[p]);
3426 sort(sort_list.begin(), sort_list.end(),compare_cnf_set());
3428 // Return the top preds, up to 64 of them.
3429 for(p=0;p<sort_list.size() && p<64;p++){
3430 prefilter_preds.push_back(sort_list[p]);
3431 sort_list[p]->add_pred_ids(pred_ids);
3434 // Substitute gb refs with their defs
3435 // While I'm at it, substitute base table sch ref for tblref.
3436 for(p=0;p<prefilter_preds.size() ;p++){
3437 subs_gbrefs_pr(prefilter_preds[p]->pr,Schema);
3446 ///////////////////////////////////////////////////////////////////////////
3447 //////////////////////////////////////////////////////////////////////////
3449 // Find partial functions and register them.
3450 // Do a DFS so that nested partial fcn calls
3451 // get evaluated in the right order.
3452 // Don't drill down into aggregates -- their arguments are evaluated
3453 // earlier than the select list is.
3455 // Modification for function caching:
3456 // Pass in a ref counter, and partial fcn indicator.
3457 // Cache fcns ref'd at least once.
3458 // pass in NULL for fcn_ref_cnt to turn off fcn caching analysis
3461 void find_partial_fcns(scalarexp_t *se, vector<scalarexp_t *> *pf_list,
3462 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3463 ext_fcn_list *Ext_fcns){
3464 vector<scalarexp_t *> operands;
3467 if(se == NULL) return;
3469 switch(se->get_operator_type()){
3472 case SE_IFACE_PARAM:
3475 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3478 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3479 find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3486 // find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ;
3489 if(se->get_aggr_ref() >= 0) return;
3491 operands = se->get_operands();
3492 for(o=0;o<operands.size();o++){
3493 find_partial_fcns(operands[o], pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3496 if(Ext_fcns->is_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){
3498 for(f=0;f<pf_list->size();++f){
3499 if(is_equivalent_se(se,(*pf_list)[f])){
3500 se->set_partial_ref(f);
3501 (*fcn_ref_cnt)[f]++;
3508 if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) || fcn_ref_cnt)){
3509 se->set_partial_ref(pf_list->size());
3510 pf_list->push_back(se);
3512 fcn_ref_cnt->push_back(1);
3513 is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id()));
3519 fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n",
3520 se->get_lineno(), se->get_charno(),se->get_operator_type());
3527 void find_partial_fcns_pr(predicate_t *pr, vector<scalarexp_t *> *pf_list,
3528 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
3529 ext_fcn_list *Ext_fcns){
3530 vector<literal_t *> litl;
3531 vector<scalarexp_t *> op_list;
3534 switch(pr->get_operator_type()){
3536 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3539 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3540 find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3543 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3545 case PRED_BINARY_OP:
3546 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3547 find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
3550 op_list = pr->get_op_list();
3551 for(o=0;o<op_list.size();++o){
3552 find_partial_fcns(op_list[o],pf_list,fcn_ref_cnt, is_partial_fcn, Ext_fcns);
3556 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3557 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3566 void find_combinable_preds(predicate_t *pr, vector<predicate_t *> *pr_list,
3567 table_list *Schema, ext_fcn_list *Ext_fcns){
3568 vector<literal_t *> litl;
3569 vector<scalarexp_t *> op_list;
3572 switch(pr->get_operator_type()){
3578 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns);
3580 case PRED_BINARY_OP:
3581 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ;
3582 find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ;
3585 if(Ext_fcns->is_combinable(pr->get_fcn_id())){
3586 for(f=0;f<pr_list->size();++f){
3587 if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){
3588 pr->set_combinable_ref(f);
3592 if(f == pr_list->size()){
3593 pr->set_combinable_ref(pr_list->size());
3594 pr_list->push_back(pr);
3599 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
3600 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3608 //--------------------------------------------------------------------
3609 // Collect refs to aggregates.
3612 void collect_agg_refs(scalarexp_t *se, set<int> &agg_refs){
3613 vector<scalarexp_t *> operands;
3616 if(se == NULL) return;
3618 switch(se->get_operator_type()){
3621 case SE_IFACE_PARAM:
3624 collect_agg_refs(se->get_left_se(), agg_refs) ;
3627 collect_agg_refs(se->get_left_se(), agg_refs);
3628 collect_agg_refs(se->get_right_se(), agg_refs);
3634 agg_refs.insert(se->get_aggr_ref());
3637 if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref());
3639 operands = se->get_operands();
3640 for(o=0;o<operands.size();o++){
3641 collect_agg_refs(operands[o], agg_refs);
3646 fprintf(stderr,"INTERNAL ERROR in collect_agg_refs, line %d, character %d: unknown operator type %d\n",
3647 se->get_lineno(), se->get_charno(),se->get_operator_type());
3654 void collect_aggr_refs_pr(predicate_t *pr, set<int> &agg_refs){
3655 vector<literal_t *> litl;
3656 vector<scalarexp_t *> op_list;
3659 switch(pr->get_operator_type()){
3661 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3664 collect_agg_refs(pr->get_left_se(), agg_refs) ;
3665 collect_agg_refs(pr->get_right_se(), agg_refs) ;
3668 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs);
3670 case PRED_BINARY_OP:
3671 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ;
3672 collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ;
3675 op_list = pr->get_op_list();
3676 for(o=0;o<op_list.size();++o){
3677 collect_agg_refs(op_list[o],agg_refs);
3681 fprintf(stderr,"INTERNAL ERROR in collect_aggr_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
3682 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3690 //--------------------------------------------------------------------
3691 // Collect previously registered partial fcn refs.
3692 // Do a DFS so that nested partial fcn calls
3693 // get evaluated in the right order.
3694 // Don't drill down into aggregates -- their arguments are evaluated
3695 // earlier than the select list is.
3696 // ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES?
3698 void collect_partial_fcns(scalarexp_t *se, set<int> &pfcn_refs){
3699 vector<scalarexp_t *> operands;
3702 if(se == NULL) return;
3704 switch(se->get_operator_type()){
3707 case SE_IFACE_PARAM:
3710 collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3713 collect_partial_fcns(se->get_left_se(), pfcn_refs);
3714 collect_partial_fcns(se->get_right_se(), pfcn_refs);
3721 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3724 if(se->get_aggr_ref() >= 0) return;
3726 operands = se->get_operands();
3727 for(o=0;o<operands.size();o++){
3728 collect_partial_fcns(operands[o], pfcn_refs);
3731 if(se->is_partial()){
3732 pfcn_refs.insert(se->get_partial_ref());
3737 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n",
3738 se->get_lineno(), se->get_charno(),se->get_operator_type());
3745 void collect_partial_fcns_pr(predicate_t *pr, set<int> &pfcn_refs){
3746 vector<literal_t *> litl;
3747 vector<scalarexp_t *> op_list;
3750 switch(pr->get_operator_type()){
3752 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3755 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
3756 collect_partial_fcns(pr->get_right_se(), pfcn_refs) ;
3759 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs);
3761 case PRED_BINARY_OP:
3762 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ;
3763 collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ;
3766 op_list = pr->get_op_list();
3767 for(o=0;o<op_list.size();++o){
3768 collect_partial_fcns(op_list[o],pfcn_refs);
3772 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns_pr, line %d, character %d, unknown predicate operator type %d\n",
3773 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3783 ///////////////////////////////////////////////////////////////
3784 //////////// Exported Functions ///////////////////////////
3785 ///////////////////////////////////////////////////////////////
3788 // Count and collect refs to interface parameters.
3790 int count_se_ifp_refs(scalarexp_t *se, set<string> &ifpnames){
3791 vector<scalarexp_t *> operands;
3795 if(se == NULL) return 0;
3797 switch(se->get_operator_type()){
3801 case SE_IFACE_PARAM:
3802 ifpnames.insert(se->get_ifpref()->to_string());
3805 return count_se_ifp_refs(se->get_left_se(), ifpnames) ;
3807 ret = count_se_ifp_refs(se->get_left_se(), ifpnames);
3808 ret += count_se_ifp_refs(se->get_right_se(), ifpnames);
3815 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3818 if(se->get_aggr_ref() >= 0) return 0;
3820 operands = se->get_operands();
3821 for(o=0;o<operands.size();o++){
3822 ret += count_se_ifp_refs(operands[o], ifpnames);
3827 fprintf(stderr,"INTERNAL ERROR in count_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3828 se->get_lineno(), se->get_charno(),se->get_operator_type());
3835 int count_pr_ifp_refs(predicate_t *pr, set<string> &ifpnames){
3836 vector<literal_t *> litl;
3837 vector<scalarexp_t *> op_list;
3840 if(pr == NULL) return 0;
3842 switch(pr->get_operator_type()){
3844 return count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3846 ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
3847 ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ;
3850 return count_pr_ifp_refs(pr->get_left_pr(), ifpnames);
3851 case PRED_BINARY_OP:
3852 ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ;
3853 ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ;
3856 op_list = pr->get_op_list();
3857 for(o=0;o<op_list.size();++o){
3858 ret += count_se_ifp_refs(op_list[o],ifpnames);
3862 fprintf(stderr,"INTERNAL ERROR in count_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3863 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3870 // Resolve ifp refs, convert them to string literals.
3872 int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb, string &err){
3873 vector<scalarexp_t *> operands;
3874 vector<string> ifvals;
3882 if(se == NULL) return 0;
3884 switch(se->get_operator_type()){
3888 case SE_IFACE_PARAM:
3889 ir = se->get_ifpref();
3890 ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr);
3892 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n";
3895 if(ifvals.size() == 0){
3896 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n";
3899 if(ifvals.size() > 1){
3900 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n";
3903 tmp_l = new literal_t( ifvals[0]);
3904 se->convert_to_literal(tmp_l);
3907 return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ;
3909 ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err);
3910 ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err);
3917 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
3920 if(se->get_aggr_ref() >= 0) return 0;
3922 operands = se->get_operands();
3923 for(o=0;o<operands.size();o++){
3924 ret += resolve_se_ifp_refs(operands[o], ifm, ifn, ifdb,err);
3929 fprintf(stderr,"INTERNAL ERROR in resolve_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
3930 se->get_lineno(), se->get_charno(),se->get_operator_type());
3937 int resolve_pr_ifp_refs(predicate_t *pr, string ifm, string ifn, ifq_t *ifdb, string &err){
3938 vector<literal_t *> litl;
3939 vector<scalarexp_t *> op_list;
3943 switch(pr->get_operator_type()){
3945 return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3947 ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
3948 ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ;
3951 return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err);
3952 case PRED_BINARY_OP:
3953 ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ;
3954 ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ;
3957 op_list = pr->get_op_list();
3958 for(o=0;o<op_list.size();++o){
3959 ret += resolve_se_ifp_refs(op_list[o],ifm, ifn, ifdb, err);
3963 fprintf(stderr,"INTERNAL ERROR in resolve_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
3964 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
3972 string impute_query_name(table_exp_t *fta_tree, string default_nm){
3973 string retval = fta_tree->get_val_of_name("query_name");
3974 if(retval == "") retval = default_nm;
3975 if(retval == "") retval = "default_query";
3979 // Convert the parse tree into an intermediate form,
3980 // which admits analysis better.
3982 // TODO : rationalize the error return policy.
3984 // TODO : the query_summary_class object contains
3986 // TODO: revisit the issue when nested subqueries are implemented.
3987 // One possibility: implement accessor methods to hide the
3989 // For now: this class contains data structures not in table_exp_t
3990 // (with a bit of duplication)
3992 // Return NULL on error.
3993 // print error messages to stderr.
3996 query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema,
3997 ext_fcn_list *Ext_fcns, string default_name){
4000 // Create the summary struct -- no analysis is done here.
4001 query_summary_class *qs = new query_summary_class(fta_tree);
4002 qs->query_type = fta_tree->query_type;
4004 ////////////// Do common analysis
4006 // Extract query name. Already imputed for the qnodes.
4007 // qs->query_name = impute_query_name(fta_tree, default_name);
4008 qs->query_name = default_name;
4009 //printf("query name is %s\n",qs->query_name.c_str());
4011 // extract definitions. Don't grab the query name.
4013 map<string, string> nmap = fta_tree->get_name_map();
4014 map<string, string>::iterator nmi;
4015 for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){
4016 string pname = (*nmi).first;
4017 if(pname != "query_name" )
4018 (qs->definitions)[pname] = (*nmi).second;
4024 // First, verify that all the referenced tables are defined.
4025 // Then, bind the tablerefs in the FROM list to schemas in
4027 tablevar_list_t *tlist = fta_tree->get_from();
4028 vector<tablevar_t *> tbl_vec = tlist->get_table_list();
4030 bool found_error = false;
4031 for(i=0;i<tbl_vec.size();i++){
4032 int sch_no = schema->find_tbl(tbl_vec[i]->get_schema_name());
4034 fprintf(stderr,"Error, table <%s> not found in the schema file\n",
4035 tbl_vec[i]->get_schema_name().c_str() );
4036 fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(),
4037 tbl_vec[i]->get_charno() );
4041 tbl_vec[i]->set_schema_ref(sch_no);
4043 // If accessing a UDOP, mangle the name
4044 // This needs to be done in translate_fta.cc, not here.
4046 if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){
4047 string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm;
4048 tbl_vec[i]->set_schema_name(mngl_name);
4052 // No FTA schema should have an interface defined on it.
4053 if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){
4054 fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str());
4056 // Fill in default interface
4057 if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4058 tbl_vec[i]->set_interface("default");
4059 tbl_vec[i]->set_ifq(true);
4061 // Fill in default machine
4062 if(tbl_vec[i]->get_interface()!="" && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){
4063 tbl_vec[i]->set_machine(hostname);
4066 if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
4067 // Record the set of interfaces accessed
4069 if(tbl_vec[i]->get_ifq()){
4070 ifstr = "["+tbl_vec[i]->get_interface()+"]";
4072 if(tbl_vec[i]->get_machine() != "localhost"){
4073 ifstr = "'"+tbl_vec[i]->get_machine()+"'."+tbl_vec[i]->get_interface();
4075 ifstr = tbl_vec[i]->get_interface();
4078 //printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str());
4079 if(qs->definitions.count("_referenced_ifaces")){
4080 ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr;
4082 qs->definitions["_referenced_ifaces"] = ifstr;
4086 if(found_error) return(NULL);
4088 // Ensure that all tablevars have are named
4089 // and that no two tablevars have the same name.
4091 // First, gather the set of variable
4092 set<string> tblvar_names;
4093 for(i=0;i<tbl_vec.size();i++){
4094 if(tbl_vec[i]->get_var_name() != ""){
4095 if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){
4096 fprintf(stderr,"ERROR, query has two table variables named %s. line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno());
4099 tblvar_names.insert(tbl_vec[i]->get_var_name());
4102 // Now generate variable names for unnamed tablevars
4103 for(i=0;i<tbl_vec.size();i++){
4104 if(tbl_vec[i]->get_var_name() == ""){
4106 sprintf(tmpstr,"_t%d",tblvar_no);
4107 string newvar = tmpstr;
4108 while(tblvar_names.count(newvar) > 0){
4110 sprintf(tmpstr,"_t%d",tblvar_no);
4113 tbl_vec[i]->set_range_var(newvar);
4114 tblvar_names.insert(newvar);
4118 // Process inner/outer join properties
4119 int jprop = fta_tree->get_from()->get_properties();
4120 // Require explicit INNER_JOIN, ... specification for join queries.
4122 if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){
4123 fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, WATCHLIST_JOIN, FILTER_JOIN.\n");
4128 if(jprop == OUTER_JOIN_PROPERTY){
4129 for(i=0;i<tbl_vec.size();i++) tbl_vec[i]->set_property(1);
4131 if(jprop == LEFT_OUTER_JOIN_PROPERTY)
4132 tbl_vec[0]->set_property(1);
4133 if(jprop == RIGHT_OUTER_JOIN_PROPERTY)
4134 tbl_vec[tbl_vec.size()-1]->set_property(1);
4135 if(jprop == FILTER_JOIN_PROPERTY){
4136 if(fta_tree->get_from()->get_temporal_range() == 0){
4137 fprintf(stderr,"ERROR, a filter join must have a non-zero temporal range.\n");
4140 if(tbl_vec.size() != 2){
4141 fprintf(stderr,"ERROR, a filter join must be between two table variables.\n");
4144 colref_t *cr = fta_tree->get_from()->get_colref();
4145 string field = cr->get_field();
4147 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field);
4149 fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str());
4152 cr->set_schema_ref(tbl_vec[0]->get_schema_ref());
4153 cr->set_tablevar_ref(0);
4154 string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field);
4155 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
4156 data_type *dt0 = new data_type(type_name, modifiers);
4157 string dt0_type = dt0->get_type_str();
4158 if(dt0_type != "INT" && dt0_type != "UINT" && dt0_type != "LLONG" && dt0_type != "ULLONG"){
4159 fprintf(stderr,"ERROR, the temporal attribute in a filter join must be one of INT/UINT/LLONG/ULLONG.\n");
4162 if(! dt0->is_increasing()){
4163 fprintf(stderr,"ERROR, the temporal attribute in a filter join must be temporal increasing.\n");
4170 /////////////////////
4171 /// Build the query param table
4172 vector<var_pair_t *> query_params = fta_tree->query_params;
4174 for(p=0;p<query_params.size();++p){
4175 string pname = query_params[p]->name;
4176 string dtname = query_params[p]->val;
4179 fprintf(stderr,"ERROR parameter has empty name.\n");
4183 fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str());
4186 data_type *dt = new data_type(dtname);
4187 if(!(dt->is_defined())){
4188 fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str());
4192 qs->add_query_param(pname, dt, false);
4194 if(found_error) return(NULL);
4195 // unpack the param table to a global for easier analysis.
4196 param_tbl=qs->param_tbl;
4199 ////////////////// WATCHLIST specialized analysis
4200 if(qs->query_type == WATCHLIST_QUERY){
4201 // Populate a SELECT clause?
4204 ////////////////// MERGE specialized analysis
4206 if(qs->query_type == MERGE_QUERY){
4208 // 1) there are two *different* streams ref'd in the FROM clause
4209 // However, only emit a warning.
4210 // (can't detect a problem if one of the interfaces is the
4211 // default interface).
4212 // 2) They have the same layout (e.g. same types but the
4213 // names can be different
4214 // 3) the two columns can unambiguously be mapped to
4215 // fields of the two tables, one per table. Exception:
4216 // the column names are the same and exist in both tables.
4217 // FURTHERMORE the positions must be the same
4218 // 4) after mapping, verify that both colrefs are temporal
4219 // and in the same direction.
4220 if(tbl_vec.size() < 2){
4221 fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() );
4225 vector<field_entry *> fev0 = schema->get_fields(
4226 tbl_vec[0]->get_schema_name()
4231 for(cv=1;cv<tbl_vec.size();++cv){
4232 vector<field_entry *> fev1 = schema->get_fields(
4233 tbl_vec[cv]->get_schema_name()
4236 if(fev0.size() != fev1.size()){
4237 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4241 // Only need to ensure that the list of types are the same.
4242 // THe first table supplies the output colnames,
4243 // and all temporal properties are lost, except for the
4244 // merge-by columns.
4246 for(f=0;f<fev0.size();++f){
4247 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4248 data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list());
4249 if(! dt0.equal_subtypes(&dt1) ){
4250 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
4256 // copy over the merge-by cols.
4257 qs->mvars = fta_tree->mergevars;
4259 if(qs->mvars.size() == 0){ // need to discover the merge vars.
4260 int mergevar_pos = -1;
4262 for(f=0;f<fev0.size();++f){
4263 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
4264 if(dt0.is_temporal()){
4269 if(mergevar_pos >= 0){
4270 for(cv=0;cv<tbl_vec.size();++cv){
4271 vector<field_entry *> fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name());
4272 qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() ));
4275 fprintf(stderr,"ERROR, no merge-by column found.\n");
4280 // Ensure same number of tables, merge cols.
4281 if(tbl_vec.size() != qs->mvars.size()){
4282 fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size());
4286 // Ensure that the merge-by are from different tables
4287 // also, sort colrefs so that they align with the FROM list using tmp_crl
4288 set<int> refd_sources;
4289 vector<colref_t *> tmp_crl(qs->mvars.size(),NULL);
4290 for(cv=0;cv<qs->mvars.size();++cv){
4291 int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema);
4293 fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str());
4295 refd_sources.insert(tblvar);
4296 tmp_crl[tblvar] = qs->mvars[cv];
4298 if(refd_sources.size() != qs->mvars.size()){
4299 fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size());
4303 // 1-1 mapping, so use tmp_crl as the merge column list.
4304 qs->mvars = tmp_crl;
4308 // Look up the colrefs in their schemas, verify that
4309 // they are at the same place, that they are both temporal
4311 // It seems that this should be done more in the schema objects.
4312 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field());
4314 fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str());
4317 for(cv=1;cv<qs->mvars.size();++cv){
4318 int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field());
4320 fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str());
4325 field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0);
4326 data_type dt0(fe0->get_type(),fe0->get_modifier_list());
4327 if( (!dt0.is_temporal()) ){
4328 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0);
4331 for(cv=0;cv<qs->mvars.size();++cv){
4332 field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0);
4333 data_type dt1(fe1->get_type(),fe1->get_modifier_list());
4334 if( (!dt1.is_temporal()) ){
4335 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv);
4340 if( dt0.get_temporal() != dt1.get_temporal()){
4341 fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv);
4346 // If there is a SLACK specification, verify
4347 // that it is literal-only and that its type is compatible
4348 // with that of the merge columns
4349 qs->slack = fta_tree->slack;
4351 if(! literal_only_se(qs->slack)){
4352 fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n");
4356 assign_data_types(qs->slack, schema, fta_tree, Ext_fcns );
4357 data_type sdt(&dt0, qs->slack->get_data_type(), string("+"));
4358 if(sdt.get_type() == undefined_t){
4359 fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n");
4365 // All the tests have passed, there is nothing
4370 ////////////////// SELECT specialized analysis
4372 if(qs->query_type == SELECT_QUERY){
4373 // unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals
4374 // objects into globals, for easier syntax.
4375 gb_tbl = qs->gb_tbl;
4376 aggr_tbl = qs->aggr_tbl;
4379 // Build the table of group-by attributes.
4380 // (se processing done automatically).
4381 // NOTE : Doing the SE processing here is getting cumbersome,
4382 // I should process these individually.
4383 // NOTE : I should check for duplicate names.
4384 // NOTE : I should ensure that the def of one GB does not
4385 // refrence the value of another.
4386 vector<extended_gb_t *> gb_list = fta_tree->get_groupby();
4388 string temporal_gbvars = "";
4389 map<string, int> gset_gbnames;
4391 // For generating the set of GB patterns for this aggregation query.
4392 vector<bool> inner_pattern;
4393 vector<vector<bool> > pattern_set;
4394 vector<vector<vector<bool> > > pattern_components;
4396 vector<gb_t *> r_gbs, c_gbs, g_gbs;
4399 for(i=0;i<gb_list.size();i++){
4400 switch(gb_list[i]->type){
4402 retval = gb_tbl->add_gb_attr(
4403 gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns
4406 return NULL; // nothing added to gb_tbl, so this can trigger a segfault 2 lines below
4408 if(gb_tbl->get_data_type(i)->is_temporal()){
4410 if(temporal_gbvars != "") temporal_gbvars+=" ";
4411 temporal_gbvars += gb_tbl->get_name(i);
4415 inner_pattern.clear();
4416 pattern_set.clear();
4417 inner_pattern.push_back(true);
4418 pattern_set.push_back(inner_pattern);
4419 pattern_components.push_back(pattern_set);
4421 gb_tbl->gb_entry_type.push_back("");
4422 gb_tbl->gb_entry_count.push_back(1);
4423 gb_tbl->pattern_components.push_back(pattern_set);
4426 case rollup_egb_type:
4427 r_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4428 for(j=0;j<r_gbs.size();++j){
4429 retval = gb_tbl->add_gb_attr(
4430 r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4434 }else{ // rollup gb can't be temporal
4435 gb_tbl->reset_temporal(gb_tbl->size()-1);
4439 inner_pattern.resize(r_gbs.size());
4440 pattern_set.clear();
4441 for(j=0;j<=r_gbs.size();++j){
4442 for(k=0;k<r_gbs.size();++k){
4444 inner_pattern[k] = true;
4446 inner_pattern[k] = false;
4448 pattern_set.push_back(inner_pattern);
4450 pattern_components.push_back(pattern_set);
4452 gb_tbl->gb_entry_type.push_back("ROLLUP");
4453 gb_tbl->gb_entry_count.push_back(r_gbs.size());
4454 gb_tbl->pattern_components.push_back(pattern_set);
4457 c_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
4458 for(j=0;j<c_gbs.size();++j){
4459 retval = gb_tbl->add_gb_attr(
4460 c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
4464 }else{ // cube gb can't be temporal
4465 gb_tbl->reset_temporal(gb_tbl->size()-1);
4469 inner_pattern.resize(c_gbs.size());
4470 pattern_set.clear();
4471 n_patterns = 1 << c_gbs.size();
4472 for(j=0;j<n_patterns;++j){
4474 for(k=0;k<c_gbs.size();++k,test_bit = test_bit << 1){
4475 if((j & test_bit) != 0)
4476 inner_pattern[k] = true;
4478 inner_pattern[k] = false;
4480 pattern_set.push_back(inner_pattern);
4482 pattern_components.push_back(pattern_set);
4484 gb_tbl->gb_entry_type.push_back("CUBE");
4485 gb_tbl->gb_entry_count.push_back(c_gbs.size());
4486 gb_tbl->pattern_components.push_back(pattern_set);
4488 case gsets_egb_type:
4490 gset_gbnames.clear();
4491 for(j=0;j<gb_list[i]->gb_lists.size();++j){
4492 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4493 for(k=0;k<g_gbs.size();++k){
4494 if(g_gbs[k]->type != GB_COLREF){
4495 fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str());
4498 if(gset_gbnames.count(g_gbs[k]->name) == 0){
4499 retval = gb_tbl->add_gb_attr(
4500 g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns
4504 }else{ // gsets gb can't be temporal
4505 gb_tbl->reset_temporal(gb_tbl->size()-1);
4507 int pos = gset_gbnames.size();
4508 gset_gbnames[g_gbs[k]->name] = pos;
4514 if(gset_gbnames.size() > 63){
4515 fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n");
4519 inner_pattern.resize(gset_gbnames.size());
4520 pattern_set.clear();
4521 set<unsigned long long int> signatures;
4522 for(j=0;j<gb_list[i]->gb_lists.size();++j){
4523 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
4524 set<string> refd_gbs;
4525 for(k=0;k<g_gbs.size();++k){
4526 refd_gbs.insert(g_gbs[k]->name);
4528 fill(inner_pattern.begin(),inner_pattern.end(),false);
4529 unsigned long long int signature = 0;
4530 set<string>::iterator ssi;
4531 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4532 inner_pattern[gset_gbnames[(*ssi)]] = true;
4533 signature |= (1 << gset_gbnames[(*ssi)]);
4535 if(signatures.count(signature)){
4536 fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t");
4537 set<string>::iterator ssi;
4538 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
4539 fprintf(stderr," %s",(*ssi).c_str());
4541 fprintf(stderr,"\n");
4543 signatures.insert(signature);
4544 pattern_set.push_back(inner_pattern);
4547 pattern_components.push_back(pattern_set);
4549 gb_tbl->gb_entry_type.push_back("GROUPING_SETS");
4550 gb_tbl->gb_entry_count.push_back(gset_gbnames.size());
4551 gb_tbl->pattern_components.push_back(pattern_set);
4558 if(found_error) return(NULL);
4560 fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s). Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str());
4564 // Compute the set of patterns. Take the cross product of all pattern components.
4565 vector<vector<bool> > gb_patterns;
4566 int n_components = pattern_components.size();
4567 vector<int> pattern_pos(n_components,0);
4570 vector<bool> pattern;
4571 for(j=0;j<n_components;j++){
4572 pattern.insert(pattern.end(),pattern_components[j][pattern_pos[j]].begin(),
4573 pattern_components[j][pattern_pos[j]].end());
4575 gb_patterns.push_back(pattern);
4576 for(j=0;j<n_components;j++){
4578 if(pattern_pos[j] >= pattern_components[j].size())
4583 if(j >= n_components)
4586 gb_tbl->gb_patterns = gb_patterns;
4589 // Process the supergroup, if any.
4590 vector<colref_t *> sgb = fta_tree->get_supergb();
4591 for(i=0;i<sgb.size();++i){
4592 int gbr = gb_tbl->find_gb(sgb[i],fta_tree->fm, schema);
4594 fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str());
4597 if(qs->sg_tbl.count(gbr)){
4598 fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str());
4600 qs->sg_tbl.insert(gbr);
4602 if(found_error) return(NULL);
4604 if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){
4605 fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4611 predicate_t *wh = fta_tree->get_where();
4612 predicate_t *hv = fta_tree->get_having();
4613 predicate_t *cw = fta_tree->get_cleaning_when();
4614 predicate_t *cb = fta_tree->get_cleaning_by();
4615 predicate_t *closew = fta_tree->get_closing_when();
4617 if(closew != NULL && gb_tbl->gb_patterns.size()>1){
4618 fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
4624 // Verify that all column references are valid, and if so assign
4627 vector<select_element *> sl_list = fta_tree->get_sl_vec();
4628 for(i=0;i<sl_list.size();i++){
4629 retval = verify_colref(sl_list[i]->se, fta_tree->fm, schema, gb_tbl);
4630 if(retval < 0) found_error = true;
4633 retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl);
4634 if(retval < 0) found_error = true;
4636 retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl);
4637 if(retval < 0) found_error = true;
4639 retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl);
4640 if(retval < 0) found_error = true;
4642 retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl);
4643 if(retval < 0) found_error = true;
4645 retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl);
4646 if(retval < 0) found_error = true;
4648 if(found_error) return(NULL);
4650 // Verify that all of the scalar expressions
4651 // and comparison predicates have compatible types.
4654 string temporal_output_fields;
4655 for(i=0;i<sl_list.size();i++){
4656 retval = assign_data_types(sl_list[i]->se, schema, fta_tree, Ext_fcns );
4660 if(sl_list[i]->se->get_data_type()->is_temporal()){
4662 temporal_output_fields += " "+int_to_string(i);
4667 fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s). Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str());
4671 retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns);
4672 if(retval < 0) found_error = true;
4674 retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns);
4675 if(retval < 0) found_error = true;
4677 retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns);
4678 if(retval < 0) found_error = true;
4680 retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns);
4681 if(retval < 0) found_error = true;
4683 retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns);
4684 if(retval < 0) found_error = true;
4686 if(found_error) return(NULL);
4688 // Impute names for the unnamed columns.
4689 set<string> curr_names;
4691 for(s=0;s<sl_list.size();++s){
4692 curr_names.insert(sl_list[s]->name);
4694 for(s=0;s<sl_list.size();++s){
4695 if(sl_list[s]->name == "")
4696 sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se);
4700 // Check the aggregates.
4701 // No aggrs allowed in the WHERE predicate.
4702 // (no aggrs in the GB defs, but that is examined elsewhere)
4703 // Therefore, aggregates are allowed only the select clause.
4705 // The query is an aggregation query if there is a group-by clause, or
4706 // if any aggregate is referenced. If there is a group-by clause,
4707 // at least one aggregate must be referenced.
4708 // If the query is an aggregate query, the scalar expressions in
4709 // the select clause can reference only constants, aggregates, or group-by
4711 // Also, if the query is an aggregate query, build a table referencing
4714 // No nested aggregates allowed.
4717 // First, count references in the WHERE predicate.
4718 // (if there are any references, report an error).
4719 // can ref group vars, tuple fields, and stateful fcns.
4722 retval = count_aggr_pred(wh, true);
4724 fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n");
4729 // NOTE : Here I need an analysis of the having clause
4730 // to verify that it only refs GB attrs and aggregates.
4731 // (also, superaggregates, stateful fcns)
4733 retval = verify_having_pred(hv, "HAVING", Ext_fcns);
4734 if(retval < 0) return(NULL);
4737 // Cleaning by has same reference rules as Having
4739 retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns);
4740 if(retval < 0) return(NULL);
4743 // Cleaning when has same reference rules as Having,
4744 // except that references to non-superaggregates are not allowed.
4745 // This is tested for when "CLEANING_BY" is passed in as the clause.
4747 retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns);
4748 if(retval < 0) return(NULL);
4751 // CLOSING_WHEN : same rules as HAVING
4753 retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns);
4754 if(retval < 0) return(NULL);
4758 // Collect aggregates in the HAVING and CLEANING clauses
4760 build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns);
4763 build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns);
4766 build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns);
4769 build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns);
4772 // Collect aggregate refs in the SELECT clause.
4774 for(i=0;i<sl_list.size();i++)
4775 build_aggr_tbl_fm_se(sl_list[i]->se, aggr_tbl, Ext_fcns);
4778 // Collect references to states of stateful functions
4780 gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns);
4783 gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns);
4786 gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns);
4789 gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns);
4791 if(closew != NULL){ // should be no stateful fcns here ...
4792 gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns);
4794 for(i=0;i<sl_list.size();i++)
4795 gather_fcn_states_se(sl_list[i]->se, qs->states_refd, Ext_fcns);
4798 // If this is an aggregate query, it had normally references
4799 // some aggregates. Its not necessary though, just emit a warning.
4800 // (acts as SELECT DISTINCT)
4802 bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0;
4803 if(is_aggr_query && aggr_tbl->size() == 0){
4804 fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n");
4807 // If this is an aggregate query,
4808 // 1) verify that the SEs in the SELECT clause reference
4809 // only constants, aggregates, and group-by attributes.
4810 // 2) No aggregate scalar expression references an aggregate
4811 // or any stateful function.
4812 // 3) either it references both CLEANING clauses or neither.
4813 // 4) all superaggregates must have the superaggr_allowed property.
4814 // 5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY
4815 // clauses must have the multiple_output property.
4819 if(gb_list.size() == 0){
4820 fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n");
4823 // Ensure that at least one gbvar is temporal
4824 if(! fta_tree->name_exists("no_temporal_aggr")){
4825 bool found_temporal = false;
4826 for(i=0;i<gb_tbl->size();i++){
4827 if(gb_tbl->get_data_type(i)->is_temporal()){
4828 found_temporal = true;
4831 if(! found_temporal){
4832 fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n");
4837 if((!cb && cw) || (cb && !cw)){
4838 fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n");
4842 bool refs_running = false;
4844 for(a=0; a<aggr_tbl->size(); ++a){
4845 refs_running |= aggr_tbl->is_running_aggr(a);
4850 fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n");
4854 fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n");
4859 if(refs_running && !closew){
4860 fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n");
4865 for(i=0;i<sl_list.size();i++){
4866 bool ret_bool = verify_aggr_query_se(sl_list[i]->se);
4867 st_ok = st_ok && ret_bool;
4872 for(i=0;i<aggr_tbl->size();i++){
4873 if(aggr_tbl->is_superaggr(i)){
4874 if(! aggr_tbl->superaggr_allowed(i)){
4875 fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str());
4879 if(aggr_tbl->is_builtin(i)){
4880 if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){
4881 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4885 vector<scalarexp_t *> opl = aggr_tbl->get_operand_list(i);
4887 for(o=0;o<opl.size();++o){
4888 if(count_aggr_se(opl[o], true) > 0){
4889 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
4896 // Ensure that non-aggregate query doesn't reference some things
4898 fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n");
4902 fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n");
4905 if(qs->states_refd.size()){
4906 fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n");
4913 // Convert the predicates into CNF. OK to pass NULL ptr.
4914 make_cnf_from_pr(wh, qs->wh_cnf);
4915 make_cnf_from_pr(hv, qs->hav_cnf);
4916 make_cnf_from_pr(cb, qs->cb_cnf);
4917 make_cnf_from_pr(cw, qs->cw_cnf);
4918 make_cnf_from_pr(closew, qs->closew_cnf);
4920 // Analyze the predicates.
4922 for(i=0;i<qs->wh_cnf.size();i++)
4923 analyze_cnf(qs->wh_cnf[i]);
4924 for(i=0;i<qs->hav_cnf.size();i++)
4925 analyze_cnf(qs->hav_cnf[i]);
4926 for(i=0;i<qs->cb_cnf.size();i++)
4927 analyze_cnf(qs->cb_cnf[i]);
4928 for(i=0;i<qs->cw_cnf.size();i++)
4929 analyze_cnf(qs->cw_cnf[i]);
4930 for(i=0;i<qs->closew_cnf.size();i++)
4931 analyze_cnf(qs->closew_cnf[i]);
4934 // At this point, the old analysis program
4935 // gathered all refs to partial functions,
4936 // complex literals, and parameters accessed via a handle.
4937 // I think its better to delay this
4938 // until code generation time, as the query will be
4939 // in general split.
4946 ///////////////////////////////////////////////////////////////////////
4948 // Expand gbvars with their definitions.
4950 scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){
4953 switch(se->get_operator_type()){
4956 case SE_IFACE_PARAM:
4959 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4962 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
4963 se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl);
4967 return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) );
4970 // don't descend into aggr defs.
4976 for(o=0;o<se->param_list.size();o++){
4977 se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl);
4981 fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n",
4982 se->get_lineno(), se->get_charno(),se->get_operator_type());
4988 void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){
4989 vector<scalarexp_t *> op_list;
4993 switch(pr->get_operator_type()){
4995 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl);
4998 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ;
4999 pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ;
5002 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
5004 case PRED_BINARY_OP:
5005 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
5006 expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ;
5009 for(o=0;o<pr->param_list.size();++o){
5010 pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ;
5014 fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n",
5015 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5023 // return true if the se / pr contains any gbvar on the list.
5026 bool contains_gb_se(scalarexp_t *se, set<int> &gref_set){
5027 vector<scalarexp_t *> operands;
5031 switch(se->get_operator_type()){
5034 case SE_IFACE_PARAM:
5037 return contains_gb_se(se->get_left_se(),gref_set);
5039 return( contains_gb_se(se->get_left_se(),gref_set) ||
5040 contains_gb_se(se->get_right_se(),gref_set) );
5043 return( gref_set.count(se->get_gb_ref()) > 0);
5046 // don't descend into aggr defs.
5052 operands = se->get_operands();
5053 for(o=0;o<operands.size();o++){
5054 found = found || contains_gb_se(operands[o], gref_set);
5058 fprintf(stderr,"INTERNAL ERROR in contains_gb_se, line %d, character %d: unknown operator type %d\n",
5059 se->get_lineno(), se->get_charno(),se->get_operator_type());
5066 bool contains_gb_pr(predicate_t *pr, set<int> &gref_set){
5067 vector<scalarexp_t *> op_list;
5071 switch(pr->get_operator_type()){
5073 return contains_gb_se(pr->get_left_se(), gref_set);
5075 return (contains_gb_se(pr->get_left_se(),gref_set)
5076 || contains_gb_se(pr->get_right_se(),gref_set) );
5078 return contains_gb_pr(pr->get_left_pr(),gref_set) ;
5079 case PRED_BINARY_OP:
5080 return (contains_gb_pr(pr->get_left_pr(),gref_set)
5081 || contains_gb_pr(pr->get_right_pr(),gref_set) );
5083 op_list = pr->get_op_list();
5084 for(o=0;o<op_list.size();++o){
5085 found = found ||contains_gb_se(op_list[o],gref_set) ;
5089 fprintf(stderr,"INTERNAL ERROR in contains_gb_pr, line %d, character %d, unknown predicate operator type %d\n",
5090 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5097 // Gather the set of columns accessed in this se.
5098 // Descend into aggregate functions.
5100 void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){
5102 vector<scalarexp_t *> operands;
5108 switch(se->get_operator_type()){
5111 case SE_IFACE_PARAM:
5114 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5117 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5118 gather_se_col_ids(se->get_right_se(),cid_set,gtbl);
5122 ci.load_from_colref(se->get_colref() );
5123 if(ci.tblvar_ref < 0){
5124 fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str());
5129 fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n");
5132 gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl);
5138 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
5141 operands = se->get_operands();
5142 for(o=0;o<operands.size();o++){
5143 gather_se_col_ids(operands[o], cid_set,gtbl);
5147 fprintf(stderr,"INTERNAL ERROR in gather_se_col_ids, line %d, character %d: unknown operator type %d\n",
5148 se->get_lineno(), se->get_charno(),se->get_operator_type());
5154 // Gather the set of columns accessed in this se.
5156 void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){
5157 vector<scalarexp_t *> op_list;
5160 switch(pr->get_operator_type()){
5162 gather_se_col_ids(pr->get_left_se(), cid_set,gtbl);
5165 gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ;
5166 gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ;
5169 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5171 case PRED_BINARY_OP:
5172 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
5173 gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ;
5176 op_list = pr->get_op_list();
5177 for(o=0;o<op_list.size();++o){
5178 gather_se_col_ids(op_list[o],cid_set,gtbl) ;
5182 fprintf(stderr,"INTERNAL ERROR in gather_pr_col_ids, line %d, character %d, unknown predicate operator type %d\n",
5183 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5190 // Gather the set of special operator or comparison functions referenced by this se.
5192 void gather_se_opcmp_fcns(scalarexp_t *se, set<string> &fcn_set){
5194 data_type *ldt, *rdt;
5196 vector<scalarexp_t *> operands;
5198 switch(se->get_operator_type()){
5200 if( se->get_literal()->constructor_name() != "")
5201 fcn_set.insert( se->get_literal()->constructor_name() );
5205 // SE_IFACE_PARAM should not exist when this is called.
5207 ldt = se->get_left_se()->get_data_type();
5208 if(ldt->complex_operator(se->get_op()) ){
5209 fcn_set.insert( ldt->get_complex_operator(se->get_op()) );
5211 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5214 ldt = se->get_left_se()->get_data_type();
5215 rdt = se->get_right_se()->get_data_type();
5217 if(ldt->complex_operator(rdt, se->get_op()) ){
5218 fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) );
5220 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5221 gather_se_opcmp_fcns(se->get_right_se(),fcn_set);
5228 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
5231 operands = se->get_operands();
5232 for(o=0;o<operands.size();o++){
5233 gather_se_opcmp_fcns(operands[o], fcn_set);
5237 fprintf(stderr,"INTERNAL ERROR in gather_se_opcmp_fcns, line %d, character %d: unknown operator type %d\n",
5238 se->get_lineno(), se->get_charno(),se->get_operator_type());
5244 // Gather the set of special operator or comparison functions referenced by this se.
5246 void gather_pr_opcmp_fcns(predicate_t *pr, set<string> &fcn_set){
5247 data_type *ldt, *rdt;
5248 vector<scalarexp_t *> operands;
5251 switch(pr->get_operator_type()){
5253 ldt = pr->get_left_se()->get_data_type();
5254 if(ldt->complex_comparison(ldt) ){
5255 fcn_set.insert( ldt->get_comparison_fcn(ldt) );
5257 gather_se_opcmp_fcns(pr->get_left_se(), fcn_set);
5260 ldt = pr->get_left_se()->get_data_type();
5261 rdt = pr->get_right_se()->get_data_type();
5262 if(ldt->complex_comparison(rdt) ){
5263 fcn_set.insert( ldt->get_comparison_fcn(rdt) );
5265 gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ;
5266 gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ;
5269 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5271 case PRED_BINARY_OP:
5272 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
5273 gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ;
5276 operands = pr->get_op_list();
5277 for(o=0;o<operands.size();o++){
5278 gather_se_opcmp_fcns(operands[o], fcn_set);
5282 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
5283 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
5290 // find the temporal variable divisor if any.
5291 // Only forms allowed : temporal_colref, temporal_colref/const
5292 // temporal_colref/const + const
5295 long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){
5296 long long int retval = 0;
5297 data_type *ldt, *rdt;
5299 vector<scalarexp_t *> operands;
5300 scalarexp_t *t_se, *c_se;
5303 switch(se->get_operator_type()){
5308 // SE_IFACE_PARAM should not exist when this is called.
5312 ldt = se->get_left_se()->get_data_type();
5313 if(ldt->is_temporal()){
5314 t_se = se->get_left_se();
5315 c_se = se->get_right_se();
5317 t_se = se->get_left_se();
5318 c_se = se->get_right_se();
5320 if((! t_se->get_data_type()->is_temporal()) || c_se->get_data_type()->is_temporal())
5323 the_op = se->get_op();
5324 if(the_op == "+" || the_op == "-")
5325 return find_temporal_divisor(t_se, gbt,fnm);
5327 if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){
5328 fnm = t_se->get_colref()->get_field();
5329 string lits = c_se->get_literal()->to_string();
5330 sscanf(lits.c_str(),"%qd",&retval);
5338 return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm);
5340 if(se->get_data_type()->is_temporal()){
5341 fnm = se->get_colref()->get_field();
5352 fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n",
5353 se->get_lineno(), se->get_charno(),se->get_operator_type());
5360 // Create meaningful but unique names for the columns.
5361 string impute_colname(vector<select_element *> &sel_list, scalarexp_t *se){
5362 set<string> curr_names;
5364 for(s=0;s<sel_list.size();++s){
5365 curr_names.insert(sel_list[s]->name);
5367 return impute_colname(curr_names, se);
5370 string impute_colname(set<string> &curr_names, scalarexp_t *se){
5373 vector<scalarexp_t *> operand_list;
5376 switch(se->get_operator_type()){
5381 ret = "Param_" + se->get_param_name();
5383 case SE_IFACE_PARAM:
5384 ret = "Iparam_" + se->get_ifpref()->get_pname();
5387 ret = se->get_colref()->get_field() ;
5398 seo = se->get_left_se();
5399 switch(se->get_left_se()->get_operator_type()){
5401 ret += "_PARAM_"+seo->get_param_name();
5403 case SE_IFACE_PARAM:
5404 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5407 opstr = seo->get_colref()->get_field();
5408 if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5416 opstr = seo->get_op();
5417 if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
5418 ret += "_" + seo->get_op();
5424 opstr = seo->get_op();
5425 ret += "_" + seo->get_op();
5438 operand_list = se->get_operands();
5439 if(operand_list.size() > 0){
5440 seo = operand_list[0];
5441 switch(seo->get_operator_type()){
5443 ret += "_PARAM_"+seo->get_param_name();
5445 case SE_IFACE_PARAM:
5446 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
5449 ret += "_" + seo->get_colref()->get_field();
5454 ret += "_" + seo->get_op();
5471 if(curr_names.count("Field0") == 0)
5476 while(curr_names.count(ret) > 0){
5478 sprintf(tmpstr,"%s%d",base.c_str(),iter);
5484 curr_names.insert(ret);
5491 //////////////////////////////////////////////////////////////////////
5492 ////////////// Methods of defined classes ///////////////////////
5493 //////////////////////////////////////////////////////////////////////
5495 // helper fcn to enable col_id as map key.
5497 bool operator<(const col_id &cr1, const col_id &cr2){
5498 if(cr1.tblvar_ref < cr2.tblvar_ref) return(true);
5499 if(cr1.tblvar_ref == cr2.tblvar_ref)
5500 return (cr1.field < cr2.field);
5505 // Process the GB variables.
5506 // At parse time, GB vars are either GB_COLREF,
5507 // or GB_COMPUTED if the AS keyword is used.
5508 // Cast GB vars as named entities with a SE as
5509 // their definition (the colref in the case of GB_COLREF).
5511 // TODO: if there is a gbref in a gbdef,
5512 // then I won't be able to compute the value without
5513 // a complex dependence analysis. So verify that there is no
5514 // gbref in any of the GBdefs.
5515 // BUT: a GBVAR_COLREF should be converted to a regular colref,
5516 // which is not yet done.
5518 // TODO : sort out issue of GBVAR naming and identification.
5519 // Determine where it is advantageous to convert GV_COLREF
5520 // GBVARS to colrefs -- e.g. in group definition, in the WHERE clause,
5523 // return -1 if there is a problem.
5525 int gb_table::add_gb_attr(
5527 tablevar_list_t *fm,
5529 table_exp_t *fta_tree,
5530 ext_fcn_list *Ext_fcns
5534 gb_table_entry *entry;
5536 if(gb->type == GB_COLREF){
5539 gb->interface.c_str(),gb->table.c_str(), gb->name.c_str()
5542 cr = new colref_t(gb->name.c_str());
5544 int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema);
5545 if(tablevar_ref < 0) return(tablevar_ref);
5547 cr->set_tablevar_ref(tablevar_ref);
5548 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
5549 cr->set_interface("");
5550 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
5552 entry = new gb_table_entry();
5553 entry->name.field = cr->get_field();
5554 entry->name.tblvar_ref = tablevar_ref;
5555 entry->definition = new scalarexp_t(cr);
5556 entry->ref_type = GBVAR_COLREF;
5558 entry = new gb_table_entry();
5559 entry->name.field = gb->name;
5560 entry->name.tblvar_ref = -1;
5561 entry->definition = gb->def;
5562 entry->ref_type = GBVAR_SE;
5565 retval = verify_colref(entry->definition, fm, schema, NULL);
5566 if(retval < 0) return(retval);
5568 retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns);
5569 if(retval < 0) return(retval);
5571 // Verify that the gbvar def references no aggregates and no gbvars.
5572 if(count_gb_se(entry->definition) > 0){
5573 fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() );
5576 if(count_aggr_se(entry->definition, true) > 0){
5577 fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() );
5581 // Check for duplicates
5583 for(i=0;i<gtbl.size();++i){
5584 if(entry->name.field == gtbl[i]->name.field){
5585 fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size());
5591 gtbl.push_back(entry);
5597 // Try to determine if the colref is actually
5599 // a) if no tablename associated with the colref,
5600 // 1) try to find a matching GB_COMPUTED gbvar.
5601 // 2) failing that, try to match to a single tablevar
5602 // 3) if successful, search among GB_COLREF
5603 // b) else, try to match the tablename to a single tablevar
5604 // if successful, search among GB_COLREF
5605 int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){
5606 string c_field = cr->get_field();
5610 vector<int> candidates;
5612 if(cr->uses_default_table()){
5613 for(i=0;i<gtbl.size();i++){
5614 if(gtbl[i]->ref_type==GBVAR_SE && c_field == gtbl[i]->name.field){
5618 candidates = find_source_tables(c_field, fm, schema);
5619 if(candidates.size() != 1) return(-1); // can't find unique tablevar
5620 for(i=0;i<gtbl.size();i++){
5621 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5622 c_field == gtbl[i]->name.field &&
5623 candidates[0] == gtbl[i]->name.tblvar_ref){
5627 return(-1); // colref is not in gb table.
5630 // A table name must have been given.
5631 vector<tablevar_t *> fm_tbls = fm->get_table_list();
5632 string interface = cr->get_interface();
5633 string table_name = cr->get_table_name();
5636 // if no interface name is given, try to search for the table
5637 // name among the tablevar names first.
5639 for(i=0;i<fm_tbls.size();++i){
5640 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5641 candidates.push_back(i);
5643 if(candidates.size()>1) return(-1);
5644 if(candidates.size()==1){
5645 for(i=0;i<gtbl.size();i++){
5646 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5647 c_field == gtbl[i]->name.field &&
5648 candidates[0] == gtbl[i]->name.tblvar_ref){
5652 return(-1); // match semantics of bind to tablevar name first
5656 // Interface name given, or no interface but no
5657 // no tablevar match. Try to match on schema name.
5658 for(i=0;i<fm_tbls.size();++i){
5659 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
5660 candidates.push_back(i);
5662 if(candidates.size() != 1) return(-1);
5663 for(i=0;i<gtbl.size();i++){
5664 if(gtbl[i]->ref_type==GBVAR_COLREF &&
5665 c_field == gtbl[i]->name.field &&
5666 candidates[0] == gtbl[i]->name.tblvar_ref){
5678 bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){
5680 if( (op == "COUNT") || (op == "SUM") || (op == "MIN") ||
5681 (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") ||
5682 (op == "XOR_AGGR") )
5685 return Ext_fcns->fta_legal(fcn_id);
5691 // Return the set of subaggregates required to compute
5692 // the desired aggregate. THe operand of the subaggregates
5693 // can only be * or the scalarexp used in the superaggr.
5694 // This is indicated by the use_se vector.
5696 // Is this code generation specific?
5698 vector<string> aggr_table_entry::get_subaggr_fcns(vector<bool> &use_se){
5702 ret.push_back("COUNT");
5703 use_se.push_back(false);
5706 ret.push_back("SUM");
5707 use_se.push_back(true);
5710 ret.push_back("SUM");
5711 ret.push_back("COUNT");
5712 use_se.push_back(true);
5713 use_se.push_back(false);
5716 ret.push_back("MIN");
5717 use_se.push_back(true);
5720 ret.push_back("MAX");
5721 use_se.push_back(true);
5723 if(op == "AND_AGGR"){
5724 ret.push_back("AND_AGGR");
5725 use_se.push_back(true);
5727 if(op == "OR_AGGR"){
5728 ret.push_back("OR_AGGR");
5729 use_se.push_back(true);
5731 if(op == "XOR_AGGR"){
5732 ret.push_back("XOR_AGGR");
5733 use_se.push_back(true);
5739 // Code generation specific?
5741 vector<data_type *> aggr_table_entry::get_subaggr_dt(){
5742 vector<data_type *> ret;
5746 dt = new data_type("Int"); // was Uint
5747 ret.push_back( dt );
5750 dt = new data_type();
5751 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5755 dt = new data_type();
5756 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
5757 ret.push_back( dt );
5758 dt = new data_type("Int");
5759 ret.push_back( dt );
5762 dt = new data_type();
5763 dt->set_aggr_data_type( "MIN",operand->get_data_type() );
5764 ret.push_back( dt );
5767 dt = new data_type();
5768 dt->set_aggr_data_type( "MAX",operand->get_data_type() );
5769 ret.push_back( dt );
5771 if(op == "AND_AGGR"){
5772 dt = new data_type();
5773 dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() );
5774 ret.push_back( dt );
5776 if(op == "OR_AGGR"){
5777 dt = new data_type();
5778 dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() );
5779 ret.push_back( dt );
5781 if(op == "XOR_AGGR"){
5782 dt = new data_type();
5783 dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() );
5784 ret.push_back( dt );
5790 // Code generation specific?
5792 scalarexp_t *aggr_table_entry::make_superaggr_se(vector<scalarexp_t *> se_refs){
5793 scalarexp_t *se_l, *se_r, *ret_se = NULL;
5796 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5800 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5804 se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
5805 se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]);
5807 ret_se = new scalarexp_t("/", se_l, se_r);
5811 ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]);
5815 ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]);
5818 if(op == "AND_AGGR"){
5819 ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]);
5822 if(op == "OR_AGGR"){
5823 ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]);
5826 if(op == "XOR_AGGR"){
5827 ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]);
5836 // Add a built-in aggr.
5837 int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){
5840 for(i=0;i<agr_tbl.size();i++){
5841 if(agr_tbl[i]->is_builtin() && op == agr_tbl[i]->op
5842 && is_equivalent_se(se,agr_tbl[i]->operand) ){
5843 // && is_super == agr_tbl[i]->is_superaggr())
5844 if(is_super) agr_tbl[i]->set_super(true);
5849 aggr_table_entry *ate = new aggr_table_entry(op, se, is_super);
5850 agr_tbl.push_back(ate);
5851 return(agr_tbl.size() - 1);
5855 int aggregate_table::add_aggr(string op, int fcn_id, vector<scalarexp_t *> opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){
5858 for(i=0;i<agr_tbl.size();i++){
5859 if((! agr_tbl[i]->is_builtin()) && fcn_id == agr_tbl[i]->fcn_id
5860 && opl.size() == agr_tbl[i]->oplist.size() ){
5861 // && is_super == agr_tbl[i]->is_superaggr() ){
5862 for(o=0;o<opl.size();++o){
5863 if(! is_equivalent_se(opl[o],agr_tbl[i]->oplist[o]) )
5866 if(o == opl.size()){
5867 if(is_super) agr_tbl[i]->set_super(true);
5873 aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout);
5874 agr_tbl.push_back(ate);
5875 return(agr_tbl.size() - 1);
5879 int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){
5882 for(i=0;i<cplx_lit_tbl.size();i++){
5883 if(l->is_equivalent(cplx_lit_tbl[i])){
5884 hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref;
5889 cplx_lit_tbl.push_back(l);
5890 hdl_ref_tbl.push_back(is_handle_ref);
5891 return(cplx_lit_tbl.size() - 1);
5896 //------------------------------------------------------------
5900 gb_t *gb_t::duplicate(){
5901 gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str());
5903 ret->lineno = lineno;
5904 ret->charno = charno;
5906 ret->def = dup_se(def,NULL);