1 /* ------------------------------------------------
\r
2 Copyright 2014 AT&T Intellectual Property
\r
3 Licensed under the Apache License, Version 2.0 (the "License");
\r
4 you may not use this file except in compliance with the License.
\r
5 You may obtain a copy of the License at
\r
7 http://www.apache.org/licenses/LICENSE-2.0
\r
9 Unless required by applicable law or agreed to in writing, software
\r
10 distributed under the License is distributed on an "AS IS" BASIS,
\r
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
12 See the License for the specific language governing permissions and
\r
13 limitations under the License.
\r
14 ------------------------------------------- */
\r
18 #include "parse_fta.h"
\r
19 #include "parse_schema.h"
\r
20 #include "parse_ext_fcns.h"
\r
23 #include"analyze_fta.h"
\r
25 #include"type_objects.h"
\r
30 using namespace std;
\r
32 extern string hostname; // name of the current host
\r
36 string int_to_string(int i){
\r
39 sprintf(tmpstr,"%d",i);
\r
47 // These represent derived information from the
\r
48 // query analysis stage. I extract them from a class,
\r
49 // perhaps this is dangerous.
\r
51 static gb_table *gb_tbl=NULL; // Table of all group-by attributes.
\r
52 static aggregate_table *aggr_tbl=NULL; // Table of all referenced aggregates.
\r
54 // static cplx_lit_table *complex_literals=NULL; // Table of literals with constructors.
\r
55 static param_table *param_tbl=NULL; // Table of all referenced parameters.
\r
57 vector<scalarexp_t *> partial_fcns_list;
\r
58 int wh_partial_start, wh_partial_end;
\r
59 int gb_partial_start, gb_partial_end;
\r
60 int aggr_partial_start, aggr_partial_end;
\r
61 int sl_partial_start, sl_partial_end;
\r
64 // Infer the table of a column refrence and return the table ref.
\r
65 // First, extract the
\r
66 // field name and table name. If no table name is used,
\r
67 // search all tables to try to find a unique match.
\r
68 // Of course, plenty of error checking.
\r
70 // Return the set of tablevar indices in the FROM clause
\r
71 // which contain a field with the same name.
\r
72 vector<int> find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){
\r
75 // vector<string> tn = fm->get_schema_names();
\r
76 vector<int> tn = fm->get_schema_refs();
\r
77 // printf("Calling find_source_tables on field %s\n",field.c_str());
\r
78 for(i=0;i<tn.size();i++){
\r
79 // if(Schema->contains_field(Schema->find_tbl(tn[i]), field) ){
\r
80 if(Schema->contains_field(tn[i], field) ){
\r
82 // printf("\tfound in table %s\n",tn[i].c_str());
\r
88 int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){
\r
90 string tname = ir->get_tablevar();
\r
92 if(fm->size()==1) return 0;
\r
93 fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str());
\r
96 for(i=0;i<fm->size();++i){
\r
97 if(tname == fm->get_tablevar_name(i))
\r
100 fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str());
\r
105 // compute the index of the tablevar in the from clause that the
\r
107 // return -1 if no tablevar can be imputed.
\r
108 int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){
\r
113 vector<tablevar_t *> fm_tbls = fm->get_table_list();
\r
115 string field = cr->get_field();
\r
117 // printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str());
\r
118 if(cr->uses_default_table() ){
\r
119 tv = find_source_tables(field, fm, schema);
\r
121 fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ",
\r
122 cr->get_lineno(), cr->get_charno(),field.c_str() );
\r
123 for(i=0;i<tv.size();i++){
\r
124 fprintf(stderr,"%s ",fm_tbls[ tv[i] ]->to_string().c_str() );
\r
126 fprintf(stderr,"\n\tYou must specify one of these.\n");
\r
129 if(tv.size() == 0){
\r
130 fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n",
\r
131 cr->get_lineno(), cr->get_charno(),field.c_str() );
\r
138 // The table source is named -- but is it a schema name
\r
141 string interface = cr->get_interface();
\r
142 table_name = cr->get_table_name();
\r
144 // if interface is not specified, prefer to look at the tablevar names
\r
145 // Check for duplicates.
\r
147 for(i=0;i<fm_tbls.size();++i){
\r
148 if(table_name == fm_tbls[i]->get_var_name())
\r
152 fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() );
\r
155 if(tv.size() == 1) return(tv[0]);
\r
158 // Tableref not found by looking at tableref vars, or an interface
\r
159 // was specified. Try to match on schema and interface.
\r
160 // Check for duplicates.
\r
161 for(i=0;i<fm_tbls.size();++i){
\r
162 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
\r
166 fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n",
\r
167 cr->get_lineno(), cr->get_charno() );
\r
168 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
\r
169 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
\r
173 if(tv.size() == 0 ){
\r
174 fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno());
\r
175 if(interface != "") fprintf(stderr,"%s.",interface.c_str());
\r
176 fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str());
\r
184 // Reset temporal properties of a scalar expression
\r
185 void reset_temporal(scalarexp_t *se){
\r
187 vector<scalarexp_t *> operands;
\r
190 se->get_data_type()->reset_temporal();
\r
192 switch(se->get_operator_type()){
\r
195 case SE_IFACE_PARAM:
\r
199 reset_temporal(se->get_left_se());
\r
202 reset_temporal(se->get_left_se());
\r
203 reset_temporal(se->get_right_se());
\r
208 reset_temporal(se->get_left_se());
\r
211 operands = se->get_operands();
\r
212 for(o=0;o<operands.size();o++){
\r
213 reset_temporal(operands[o]);
\r
217 fprintf(stderr,"INTERNAL ERROR in reset_temporal, line %d, character %d: unknown operator type %d\n",
\r
218 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
223 // Verify that column references exist in their
\r
224 // declared tables. As a side effect, assign
\r
225 // their data types. Other side effects :
\r
227 // return -1 on error
\r
229 int verify_colref(scalarexp_t *se, tablevar_list_t *fm,
\r
230 table_list *schema, gb_table *gtbl){
\r
235 string field, table_source, type_name;
\r
241 vector<scalarexp_t *> operands;
\r
243 switch(se->get_operator_type()){
\r
247 case SE_IFACE_PARAM:
\r
248 ir = se->get_ifpref();
\r
249 table_var = infer_tablevar_from_ifpref(ir, fm);
\r
250 if(table_var < 0) return(table_var);
\r
251 ir->set_tablevar_ref(table_var);
\r
254 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
\r
256 l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl);
\r
257 r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl);
\r
258 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
\r
261 cr = se->get_colref();
\r
262 field = cr->get_field();
\r
264 // Determine if this is really a GB ref.
\r
265 // (the parser can only see that its a colref).
\r
267 gb_ref = gtbl->find_gb(cr, fm, schema);
\r
272 se->set_gb_ref(gb_ref);
\r
275 // Its a colref, verify its existance and
\r
276 // record the data type.
\r
277 table_var = infer_tablevar_from_colref(cr,fm,schema);
\r
278 if(table_var < 0) return(table_var);
\r
280 // Store the table ref in the colref.
\r
281 cr->set_tablevar_ref(table_var);
\r
282 cr->set_schema_ref(fm->get_schema_ref(table_var));
\r
283 cr->set_interface("");
\r
284 cr->set_table_name(fm->get_tablevar_name(table_var));
\r
287 type_name = schema->get_type_name(cr->get_schema_ref(), field);
\r
288 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
\r
289 dt = new data_type(type_name, modifiers);
\r
290 se->set_data_type(dt);
\r
292 // Else, its a gbref, use the GB var's data type.
\r
293 se->set_data_type(gtbl->get_data_type(gb_ref));
\r
300 return( verify_colref(se->get_left_se(), fm, schema, gtbl) );
\r
302 operands = se->get_operands();
\r
304 for(o=0;o<operands.size();o++){
\r
305 l_ret = verify_colref(operands[o], fm, schema, gtbl);
\r
306 if(l_ret < 0) r_ret = -1;
\r
310 fprintf(stderr,"INTERNAL ERROR in verify_colref, line %d, character %d: unknown operator type %d\n",
\r
311 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
318 int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){
\r
320 std::vector<scalarexp_t *> op_list;
\r
323 switch(pr->get_operator_type()){
\r
325 return(verify_colref(pr->get_left_se(),fm,schema, gtbl) );
\r
327 l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ;
\r
328 r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ;
\r
329 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
\r
331 case PRED_UNARY_OP:
\r
332 return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl));
\r
333 case PRED_BINARY_OP:
\r
334 l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ;
\r
335 r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ;
\r
336 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
\r
339 op_list = pr->get_op_list();
\r
341 for(o=0;o<op_list.size();++o){
\r
342 if(verify_colref(op_list[o],fm,schema,gtbl) < 0) l_ret = -1;
\r
346 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
\r
347 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
354 bool literal_only_se(scalarexp_t *se){ // really only literals.
\r
356 vector<scalarexp_t *> operands;
\r
358 if(se == NULL) return(1);
\r
359 switch(se->get_operator_type()){
\r
364 case SE_IFACE_PARAM:
\r
367 return( literal_only_se(se->get_left_se()) );
\r
369 return( literal_only_se(se->get_left_se()) &&
\r
370 literal_only_se(se->get_right_se()) );
\r
389 // Verify that column references exist in their
\r
390 // declared tables. As a side effect, assign
\r
391 // their data types. Other side effects :
\r
394 int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){
\r
398 string field, table_source, type_name;
\r
404 vector<scalarexp_t *> operands;
\r
406 if(se == NULL) return(1);
\r
408 switch(se->get_operator_type()){
\r
413 case SE_IFACE_PARAM:
\r
416 return( bind_to_schema_se(se->get_left_se(), fm, schema) );
\r
418 l_ret = bind_to_schema_se(se->get_left_se(), fm, schema);
\r
419 r_ret = bind_to_schema_se(se->get_right_se(), fm, schema);
\r
420 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
\r
423 if(se->is_gb()) return(1); // gb ref not a colref.
\r
425 cr = se->get_colref();
\r
426 field = cr->get_field();
\r
428 tablevar_ref = infer_tablevar_from_colref(cr,fm,schema);
\r
429 if(tablevar_ref < 0){
\r
430 return(tablevar_ref);
\r
432 // Store the table ref in the colref.
\r
433 cr->set_tablevar_ref(tablevar_ref);
\r
434 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
\r
435 cr->set_interface("");
\r
436 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
\r
438 // Check the data type
\r
439 type_name = schema->get_type_name(cr->get_schema_ref(), field);
\r
440 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
\r
441 data_type dt(type_name, modifiers);
\r
442 // if(! dt.equals(se->get_data_type()) ){
\r
443 // if(! dt.subsumes_type(se->get_data_type()) ){
\r
444 if(! se->get_data_type()->subsumes_type(&dt) ){
\r
445 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n",
\r
446 dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str());
\r
453 case SE_AGGR_SE: // Probably I should just return,
\r
454 // aggregate se's are explicitly bound to the schema.
\r
455 // return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) );
\r
458 if(se->get_aggr_ref() >= 0) return 1;
\r
460 operands = se->get_operands();
\r
462 for(o=0;o<operands.size();o++){
\r
463 l_ret = bind_to_schema_se(operands[o], fm, schema);
\r
464 if(l_ret < 0) r_ret = -1;
\r
468 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se, line %d, character %d: unknown operator type %d\n",
\r
469 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
476 int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){
\r
478 vector<scalarexp_t *> op_list;
\r
481 switch(pr->get_operator_type()){
\r
483 return(bind_to_schema_se(pr->get_left_se(),fm,schema) );
\r
485 l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ;
\r
486 r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ;
\r
487 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
\r
489 case PRED_UNARY_OP:
\r
490 return(bind_to_schema_pr(pr->get_left_pr(),fm,schema));
\r
491 case PRED_BINARY_OP:
\r
492 l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ;
\r
493 r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ;
\r
494 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
\r
497 op_list = pr->get_op_list();
\r
499 for(o=0;o<op_list.size();++o){
\r
500 if(bind_to_schema_se(op_list[o],fm,schema) < 0) l_ret = -1;
\r
504 fprintf(stderr,"INTERNAL ERROR in bind_to_schema_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
505 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
516 // verify_colref assigned data types to the column refs.
\r
517 // Now assign data types to all other nodes in the
\r
518 // scalar expression.
\r
520 // return -1 on error
\r
522 temporal_type compute_se_temporal(scalarexp_t *se, map<col_id, temporal_type> &tcol){
\r
526 vector<scalarexp_t *> operands;
\r
527 vector<data_type *> odt;
\r
529 vector<bool> handle_ind;
\r
531 switch(se->get_operator_type()){
\r
533 return(constant_t);
\r
536 case SE_IFACE_PARAM:
\r
537 return(varying_t); // actually, this should not be called.
\r
539 return data_type::compute_temporal_type(
\r
540 compute_se_temporal(se->get_left_se(), tcol), se->get_op()
\r
543 return data_type::compute_temporal_type(
\r
544 compute_se_temporal(se->get_left_se(), tcol),
\r
545 compute_se_temporal(se->get_right_se(), tcol),
\r
546 se->get_left_se()->get_data_type()->get_type(),
\r
547 se->get_right_se()->get_data_type()->get_type(),
\r
552 col_id cid(se->get_colref() );
\r
553 if(tcol.count(cid) > 0){ return tcol[cid];
\r
554 }else{ return varying_t;}
\r
567 // verify_colref assigned data types to the column refs.
\r
568 // Now assign data types to all other nodes in the
\r
569 // scalar expression.
\r
571 // return -1 on error
\r
573 int assign_data_types(scalarexp_t *se, table_list *schema,
\r
574 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
\r
578 vector<scalarexp_t *> operands;
\r
579 vector<data_type *> odt;
\r
581 vector<bool> handle_ind;
\r
582 vector<bool> constant_ind;
\r
584 switch(se->get_operator_type()){
\r
586 dt = new data_type( se->get_literal()->get_type() );
\r
587 se->set_data_type(dt);
\r
588 if( ! dt->is_defined() ){
\r
589 fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n",
\r
590 se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() );
\r
597 string pname = se->get_param_name();
\r
598 dt = param_tbl->get_data_type(pname);
\r
599 // A SE_PARRAM can change its value mid-query so using one
\r
600 // to set a window is dangerous. TODO check for this and issue a warning.
\r
601 dt->set_temporal(constant_t);
\r
602 se->set_data_type(dt);
\r
603 if( ! dt->is_defined() ){
\r
604 fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n",
\r
605 pname.c_str(), se->get_lineno(),se->get_charno() );
\r
610 case SE_IFACE_PARAM:
\r
611 dt = new data_type( "STRING" );
\r
612 se->set_data_type(dt);
\r
615 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
\r
616 if(l_ret < 0) return -1;
\r
618 dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() );
\r
619 se->set_data_type(dt);
\r
620 if( ! dt->is_defined() ){
\r
621 fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n",
\r
622 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
\r
623 se->get_lineno(), se->get_charno() );
\r
629 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
\r
630 r_ret = assign_data_types(se->get_right_se(), schema, fta_tree, Ext_fcns);
\r
631 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
\r
633 dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() );
\r
634 se->set_data_type(dt);
\r
635 if( ! dt->is_defined() ){
\r
636 fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n",
\r
637 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
\r
638 se->get_right_se()->get_data_type()->to_string().c_str(),
\r
639 se->get_lineno(), se->get_charno() );
\r
645 dt = se->get_data_type();
\r
646 bret = dt->is_defined();
\r
650 fprintf(stderr,"ERROR, column reference type is undefined, line =%d, char = %d, colref=%s\n",
\r
651 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
\r
655 dt = new data_type("Int"); // changed Uint to Int
\r
656 se->set_data_type(dt);
\r
659 l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns);
\r
660 if(l_ret < 0) return -1;
\r
662 dt = new data_type();
\r
663 dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type());
\r
664 se->set_data_type(dt);
\r
666 if( ! dt->is_defined() ){
\r
667 fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n",
\r
668 se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(),
\r
669 se->get_lineno(), se->get_charno() );
\r
676 operands = se->get_operands();
\r
678 for(o=0;o<operands.size();o++){
\r
679 l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns);
\r
680 odt.push_back(operands[o]->get_data_type());
\r
681 if(l_ret < 0) r_ret = -1;
\r
683 if(r_ret < 0) return(r_ret);
\r
685 // Is it an aggregate extraction function?
\r
686 fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt);
\r
688 int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id);
\r
689 int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id);
\r
690 int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id);
\r
691 // Construct a se for the subaggregate.
\r
692 vector<scalarexp_t *> op_a;
\r
693 int n_aggr_oprs = operands.size()-n_fcn_params+1;
\r
694 for(o=0;o<n_aggr_oprs;++o){
\r
695 op_a.push_back(operands[o]);
\r
697 // check handle params
\r
698 vector<bool> handle_a = Ext_fcns->get_handle_indicators(subaggr_id);
\r
699 for(o=0;o<op_a.size();o++){
\r
701 if(op_a[o]->get_operator_type() != SE_LITERAL &&
\r
702 op_a[o]->get_operator_type() != SE_IFACE_PARAM &&
\r
703 op_a[o]->get_operator_type() != SE_PARAM){
\r
704 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
\r
705 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
710 vector<bool> is_const_a=Ext_fcns->get_const_indicators(subaggr_id);
\r
711 for(o=0;o<op_a.size();o++){
\r
713 if(op_a[o]->get_data_type()->get_temporal() != constant_t){
\r
714 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n Line=%d, char=%d.\n",
\r
715 o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
721 scalarexp_t *se_a = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a);
\r
722 se_a->set_fcn_id(subaggr_id);
\r
723 se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id));
\r
724 se_a->set_aggr_id(0); // label this as a UDAF.
\r
727 // Change this se to be the actual function
\r
728 vector<scalarexp_t *> op_f;
\r
729 op_f.push_back(se_a);
\r
730 for(o=n_aggr_oprs;o<operands.size();++o)
\r
731 op_f.push_back(operands[o]);
\r
732 // check handle params
\r
733 vector<bool> handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id);
\r
734 for(o=0;o<op_f.size();o++){
\r
736 if(op_f[o]->get_operator_type() != SE_LITERAL &&
\r
737 op_f[o]->get_operator_type() != SE_IFACE_PARAM &&
\r
738 op_f[o]->get_operator_type() != SE_PARAM){
\r
739 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
\r
740 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
745 vector<bool> is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id);
\r
746 for(o=0;o<op_f.size();o++){
\r
748 if(op_f[o]->get_data_type()->get_temporal() != constant_t){
\r
749 fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n Line=%d, char=%d.\n",
\r
750 o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
756 se->param_list = op_f;
\r
757 se->op = Ext_fcns->get_fcn_name(actual_fcn_id);
\r
758 se->set_fcn_id(actual_fcn_id);
\r
759 se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id));
\r
763 fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str());
\r
767 fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt);
\r
769 se->set_fcn_id(fcn_id);
\r
770 se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
\r
771 se->set_aggr_id(0); // label this as a UDAF.
\r
772 // Finally, verify that all HANDLE parameters are literals or params.
\r
773 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
\r
774 for(o=0;o<operands.size();o++){
\r
776 if(operands[o]->get_operator_type() != SE_LITERAL &&
\r
777 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
\r
778 operands[o]->get_operator_type() != SE_PARAM){
\r
779 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
\r
780 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
785 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
\r
786 for(o=0;o<operands.size();o++){
\r
787 if(constant_ind[o]){
\r
788 if(operands[o]->get_data_type()->get_temporal() != constant_t){
\r
789 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n",
\r
790 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
796 // UDAFS as superaggregates not yet supported.
\r
797 if(se->is_superaggr()){
\r
798 fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
799 se->set_superaggr(false);
\r
804 fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str());
\r
807 // Is it a stateful fcn?
\r
808 fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt);
\r
810 se->set_fcn_id(fcn_id);
\r
811 se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id));
\r
812 se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun
\r
813 // Finally, verify that all HANDLE parameters are literals or params.
\r
814 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
\r
815 for(o=0;o<operands.size();o++){
\r
817 if(operands[o]->get_operator_type() != SE_LITERAL &&
\r
818 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
\r
819 operands[o]->get_operator_type() != SE_PARAM){
\r
820 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
\r
821 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
826 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
\r
827 for(o=0;o<operands.size();o++){
\r
828 if(constant_ind[o]){
\r
829 if(operands[o]->get_data_type()->get_temporal() != constant_t){
\r
830 fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n",
\r
831 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
837 if(se->is_superaggr()){
\r
838 fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
843 fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str());
\r
847 // Is it a regular function?
\r
848 fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt);
\r
850 fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str());
\r
851 for(o=0;o<operands.size();o++){
\r
852 if(o>0) fprintf(stderr,", ");
\r
853 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
\r
855 fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() );
\r
856 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n");
\r
861 se->set_fcn_id(fcn_id);
\r
862 dt = Ext_fcns->get_fcn_dt(fcn_id);
\r
864 if(! dt->is_defined() ){
\r
865 fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str());
\r
866 for(o=0;o<operands.size();o++){
\r
867 if(o>0) fprintf(stderr,", ");
\r
868 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
\r
870 fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() );
\r
874 // Finally, verify that all HANDLE parameters are literals or params.
\r
875 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
\r
876 for(o=0;o<operands.size();o++){
\r
878 if(operands[o]->get_operator_type() != SE_LITERAL &&
\r
879 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
\r
880 operands[o]->get_operator_type() != SE_PARAM){
\r
881 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
\r
882 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
887 constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id());
\r
888 for(o=0;o<operands.size();o++){
\r
889 if(constant_ind[o]){
\r
890 if(operands[o]->get_data_type()->get_temporal() != constant_t){
\r
891 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be constant.\n Line=%d, char=%d.\n",
\r
892 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
899 if(se->is_superaggr()){
\r
900 fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
903 se->set_data_type(dt);
\r
906 fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n",
\r
907 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
914 int assign_predicate_data_types(predicate_t *pr, table_list *schema,
\r
915 table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){
\r
918 data_type *dt, *dtl;
\r
919 vector<data_type *> odt;
\r
920 vector<literal_t *> litl;
\r
921 vector<scalarexp_t *> operands;
\r
922 vector<bool> handle_ind;
\r
923 vector<bool> constant_ind;
\r
926 switch(pr->get_operator_type()){
\r
928 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set);
\r
929 litl = pr->get_lit_vec();
\r
930 dt = pr->get_left_se()->get_data_type();
\r
932 for(i=0;i<litl.size();i++){
\r
933 dtl = new data_type( litl[i]->get_type() );
\r
934 if( ! dt->is_comparable(dtl,pr->get_op()) ){
\r
935 fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n",
\r
936 litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() );
\r
944 l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
\r
945 r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ;
\r
946 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
\r
948 if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){
\r
949 fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n",
\r
950 pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(),
\r
951 pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() );
\r
956 case PRED_UNARY_OP:
\r
957 return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set));
\r
958 case PRED_BINARY_OP:
\r
959 l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
\r
960 r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set);
\r
961 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
\r
964 operands = pr->get_op_list();
\r
966 for(o=0;o<operands.size();o++){
\r
967 l_ret = assign_data_types(operands[o], schema, fta_tree, Ext_fcns); // , ext_fcn_set);
\r
968 odt.push_back(operands[o]->get_data_type());
\r
969 if(l_ret < 0) r_ret = -1;
\r
971 if(r_ret < 0) return(r_ret);
\r
973 fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt);
\r
975 fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str());
\r
976 for(o=0;o<operands.size();o++){
\r
977 if(o>0) fprintf(stderr,", ");
\r
978 fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str());
\r
980 fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() );
\r
981 if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n");
\r
985 // ext_fcn_set.insert(fcn_id);
\r
986 pr->set_fcn_id(fcn_id);
\r
988 // Finally, verify that all HANDLE parameters are literals or params.
\r
989 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
\r
990 for(o=0;o<operands.size();o++){
\r
992 if(operands[o]->get_operator_type() != SE_LITERAL &&
\r
993 operands[o]->get_operator_type() != SE_IFACE_PARAM &&
\r
994 operands[o]->get_operator_type() != SE_PARAM){
\r
995 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
\r
996 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
\r
1001 constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id());
\r
1002 for(o=0;o<operands.size();o++){
\r
1003 if(constant_ind[o]){
\r
1004 if(operands[o]->get_data_type()->get_temporal() != constant_t){
\r
1005 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be constant.\n Line=%d, char=%d.\n",
\r
1006 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
\r
1013 // Check if this predicate function is special sampling function
\r
1014 pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id());
\r
1019 fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n",
\r
1020 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
1028 /////////////////////////////////////////////////////////////////////
\r
1029 //////////////// Make a deep copy of a se / pred tree
\r
1030 /////////////////////////////////////////////////////////////////////
\r
1033 // duplicate a select element
\r
1034 select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){
\r
1035 return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str());
\r
1038 // duplicate a scalar expression.
\r
1039 scalarexp_t *dup_se(scalarexp_t *se,
\r
1040 aggregate_table *aggr_tbl
\r
1043 vector<scalarexp_t *> operand_list;
\r
1044 vector<data_type *> dt_signature;
\r
1045 scalarexp_t *ret_se, *l_se, *r_se;
\r
1047 switch(se->get_operator_type()){
\r
1049 ret_se = new scalarexp_t(se->get_literal());
\r
1050 ret_se->use_decorations_of(se);
\r
1054 ret_se = scalarexp_t::make_param_reference(se->get_op().c_str());
\r
1055 ret_se->use_decorations_of(se);
\r
1058 case SE_IFACE_PARAM:
\r
1059 ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref());
\r
1060 ret_se->use_decorations_of(se);
\r
1064 ret_se = new scalarexp_t(se->get_colref()->duplicate());
\r
1065 ret_se->rhs.scalarp = se->rhs.scalarp; // carry along notation
\r
1066 ret_se->use_decorations_of(se);
\r
1070 l_se = dup_se(se->get_left_se(), aggr_tbl);
\r
1071 ret_se = new scalarexp_t(se->get_op().c_str(), l_se);
\r
1072 ret_se->use_decorations_of(se);
\r
1075 case SE_BINARY_OP:
\r
1076 l_se = dup_se(se->get_left_se(), aggr_tbl);
\r
1077 r_se = dup_se(se->get_right_se(), aggr_tbl);
\r
1079 ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se);
\r
1080 ret_se->use_decorations_of(se);
\r
1084 case SE_AGGR_STAR:
\r
1085 ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str());
\r
1086 ret_se->use_decorations_of(se);
\r
1090 l_se = dup_se(se->get_left_se(), aggr_tbl);
\r
1091 ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se);
\r
1092 ret_se->use_decorations_of(se);
\r
1097 operand_list = se->get_operands();
\r
1098 vector<scalarexp_t *> new_operands;
\r
1099 for(p=0;p<operand_list.size();p++){
\r
1100 l_se = dup_se(operand_list[p], aggr_tbl);
\r
1101 new_operands.push_back(l_se);
\r
1104 ret_se = new scalarexp_t(se->get_op().c_str(), new_operands);
\r
1105 ret_se->use_decorations_of(se);
\r
1110 printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type());
\r
1120 predicate_t *dup_pr(predicate_t *pr,
\r
1121 aggregate_table *aggr_tbl
\r
1124 vector<literal_t *> llist;
\r
1125 scalarexp_t *se_l, *se_r;
\r
1126 predicate_t *pr_l, *pr_r, *ret_pr;
\r
1127 vector<scalarexp_t *> op_list, new_op_list;
\r
1131 switch(pr->get_operator_type()){
\r
1133 se_l = dup_se(pr->get_left_se(), aggr_tbl);
\r
1134 ret_pr = new predicate_t(se_l, pr->get_lit_vec());
\r
1137 case PRED_COMPARE:
\r
1138 se_l = dup_se(pr->get_left_se(), aggr_tbl);
\r
1139 se_r = dup_se(pr->get_right_se(), aggr_tbl);
\r
1140 ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r);
\r
1143 case PRED_UNARY_OP:
\r
1144 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
\r
1145 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l);
\r
1148 case PRED_BINARY_OP:
\r
1149 pr_l = dup_pr(pr->get_left_pr(), aggr_tbl);
\r
1150 pr_r = dup_pr(pr->get_right_pr(), aggr_tbl);
\r
1151 ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r);
\r
1154 op_list = pr->get_op_list();
\r
1155 for(o=0;o<op_list.size();++o){
\r
1156 se_l = dup_se(op_list[o], aggr_tbl);
\r
1157 new_op_list.push_back(se_l);
\r
1159 ret_pr= new predicate_t(pr->get_op().c_str(), new_op_list);
\r
1160 ret_pr->set_fcn_id(pr->get_fcn_id());
\r
1161 ret_pr->is_sampling_fcn = pr->is_sampling_fcn;
\r
1165 fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
1166 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
1174 table_exp_t *dup_table_exp(table_exp_t *te){
\r
1176 table_exp_t *ret = new table_exp_t();
\r
1178 ret->query_type = te->query_type;
\r
1180 ss_map::iterator ss_i;
\r
1181 for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){
\r
1182 ret->nmap[(*ss_i).first] = (*ss_i).second;
\r
1185 for(i=0;i<te->query_params.size();++i){
\r
1186 ret->query_params.push_back(new
\r
1187 var_pair_t(te->query_params[i]->name,te->query_params[i]->val) );
\r
1191 ret->sl = new select_list_t();
\r
1192 ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno;
\r
1193 vector<select_element *> select_list = te->sl->get_select_list();
\r
1194 for(i=0;i<select_list.size();++i){
\r
1195 scalarexp_t *se = dup_se(select_list[i]->se,NULL);
\r
1196 ret->sl->append(se,select_list[i]->name);
\r
1200 ret->fm = te->fm->duplicate();
\r
1202 if(te->wh) ret->wh = dup_pr(te->wh,NULL);
\r
1203 if(te->hv) ret->hv = dup_pr(te->hv,NULL);
\r
1204 if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL);
\r
1205 if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL);
\r
1206 if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL);
\r
1208 for(i=0;i<te->gb.size();++i){
\r
1209 extended_gb_t *tmp_g = te->gb[i]->duplicate();
\r
1210 ret->gb.push_back(tmp_g);
\r
1213 ret->mergevars = te->mergevars;
\r
1215 ret->slack = dup_se(te->slack,NULL);
\r
1216 ret->lineno = te->lineno;
\r
1217 ret->charno = te->charno;
\r
1228 /////////////////////////////////////////////////////////////////////////
\r
1229 // Bind colrefs to a member of their FROM list
\r
1231 void bind_colref_se(scalarexp_t *se,
\r
1232 vector<tablevar_t *> &fm,
\r
1233 int prev_ref, int new_ref
\r
1236 vector<scalarexp_t *> operand_list;
\r
1240 switch(se->get_operator_type()){
\r
1244 case SE_IFACE_PARAM:
\r
1245 ir = se->get_ifpref();
\r
1246 if(ir->get_tablevar_ref() == prev_ref){
\r
1247 ir->set_tablevar_ref(new_ref);
\r
1248 ir->set_tablevar(fm[new_ref]->get_var_name());
\r
1253 cr=se->get_colref();
\r
1254 if(cr->get_tablevar_ref() == prev_ref){
\r
1255 cr->set_tablevar_ref(new_ref);
\r
1256 // cr->set_interface(fm[new_ref]->get_interface());
\r
1257 cr->set_table_name(fm[new_ref]->get_var_name());
\r
1262 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
\r
1265 case SE_BINARY_OP:
\r
1266 bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref);
\r
1267 bind_colref_se(se->get_right_se(), fm, prev_ref, new_ref);
\r
1270 case SE_AGGR_STAR:
\r
1275 if(se->get_aggr_ref() >= 0) return;
\r
1277 operand_list = se->get_operands();
\r
1278 for(p=0;p<operand_list.size();p++){
\r
1279 bind_colref_se(operand_list[p], fm, prev_ref, new_ref);
\r
1284 printf("INTERNAL ERROR in bind_colref_se: operator type %d\n",se->get_operator_type());
\r
1295 void bind_colref_pr(predicate_t *pr,
\r
1296 vector<tablevar_t *> &fm,
\r
1297 int prev_ref, int new_ref
\r
1299 vector<scalarexp_t *> op_list;
\r
1302 switch(pr->get_operator_type()){
\r
1304 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
\r
1307 case PRED_COMPARE:
\r
1308 bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref);
\r
1309 bind_colref_se(pr->get_right_se(), fm, prev_ref, new_ref);
\r
1312 case PRED_UNARY_OP:
\r
1313 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
\r
1316 case PRED_BINARY_OP:
\r
1317 bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref);
\r
1318 bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref);
\r
1321 op_list = pr->get_op_list();
\r
1322 for(o=0;o<op_list.size();++o){
\r
1323 bind_colref_se(op_list[o], fm, prev_ref, new_ref);
\r
1328 fprintf(stderr,"INTERNAL ERROR in bind_colref_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
1329 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
1338 /////////////////////////////////////////////////////////////////////
\r
1339 // verify that the se refs only literals and params.
\r
1340 // (use to verify that the expression should stay in the hfta
\r
1341 // during a split)
\r
1342 /////////////////////////////////////////////////////////////////////
\r
1344 bool is_literal_or_param_only(scalarexp_t *se){
\r
1346 vector<scalarexp_t *> operands;
\r
1349 if(se == NULL) return(true);
\r
1351 switch(se->get_operator_type()){
\r
1355 case SE_IFACE_PARAM:
\r
1356 return(false); // need to treat as colref
\r
1358 return(is_literal_or_param_only(se->get_left_se()) );
\r
1359 case SE_BINARY_OP:
\r
1361 is_literal_or_param_only(se->get_left_se()) &&
\r
1362 is_literal_or_param_only(se->get_right_se())
\r
1366 case SE_AGGR_STAR:
\r
1370 // The fcn might have special meaning at the lfta ...
\r
1374 fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n",
\r
1375 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
1383 /////////////////////////////////////////////////////////////////////
\r
1384 // Search for gb refs.
\r
1385 // (use to verify that no gbrefs in a gb def.)
\r
1386 /////////////////////////////////////////////////////////////////////
\r
1389 int count_gb_se(scalarexp_t *se){
\r
1391 vector<scalarexp_t *> operands;
\r
1394 if(se == NULL) return(0);
\r
1396 switch(se->get_operator_type()){
\r
1399 case SE_IFACE_PARAM:
\r
1402 return(count_gb_se(se->get_left_se()) );
\r
1403 case SE_BINARY_OP:
\r
1405 count_gb_se(se->get_left_se()) +
\r
1406 count_gb_se(se->get_right_se())
\r
1409 if(se->get_gb_ref() < 0) return(0);
\r
1411 case SE_AGGR_STAR:
\r
1415 operands = se->get_operands();
\r
1416 for(o=0;o<operands.size();o++){
\r
1417 sum += count_gb_se(operands[o]);
\r
1422 fprintf(stderr,"INTERNAL ERROR in count_gb_se, line %d, character %d: unknown operator type %d\n",
\r
1423 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
1430 /////////////////////////////////////////////////////////////////////
\r
1431 //////////////// Search for stateful fcns.
\r
1432 /////////////////////////////////////////////////////////////////////
\r
1435 int se_refs_sfun(scalarexp_t *se){
\r
1437 vector<scalarexp_t *> operands;
\r
1440 if(se == NULL) return(0);
\r
1442 switch(se->get_operator_type()){
\r
1445 case SE_IFACE_PARAM:
\r
1448 return(se_refs_sfun(se->get_left_se()) );
\r
1449 case SE_BINARY_OP:
\r
1451 se_refs_sfun(se->get_left_se()) +
\r
1452 se_refs_sfun(se->get_right_se())
\r
1456 case SE_AGGR_STAR:
\r
1460 operands = se->get_operands();
\r
1461 for(o=0;o<operands.size();o++){
\r
1462 sum += se_refs_sfun(operands[o]);
\r
1464 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
\r
1466 // for now, stateful functions count as aggregates.
\r
1467 if(se->get_storage_state() != "")
\r
1473 fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n",
\r
1474 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
1481 // Return a count of the number of stateful fcns in this predicate.
\r
1482 int pred_refs_sfun(predicate_t *pr){
\r
1483 vector<scalarexp_t *> op_list;
\r
1486 switch(pr->get_operator_type()){
\r
1488 return(se_refs_sfun(pr->get_left_se()) );
\r
1489 case PRED_COMPARE:
\r
1491 se_refs_sfun(pr->get_left_se()) +
\r
1492 se_refs_sfun(pr->get_right_se())
\r
1494 case PRED_UNARY_OP:
\r
1495 return(pred_refs_sfun(pr->get_left_pr()) );
\r
1496 case PRED_BINARY_OP:
\r
1498 pred_refs_sfun(pr->get_left_pr()) +
\r
1499 pred_refs_sfun(pr->get_right_pr())
\r
1502 op_list = pr->get_op_list();
\r
1504 for(o=0;o<op_list.size();++o){
\r
1505 aggr_sum += se_refs_sfun(op_list[o]);
\r
1510 fprintf(stderr,"INTERNAL ERROR in pred_refs_sfun, line %d, character %d, unknown predicate operator type %d\n",
\r
1511 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
1518 //////////////////////////////////////////////////
\r
1520 /////////////////////////////////////////////////////////////////////
\r
1521 //////////////// Search for aggregates.
\r
1522 /////////////////////////////////////////////////////////////////////
\r
1525 int count_aggr_se(scalarexp_t *se, bool strict){
\r
1527 vector<scalarexp_t *> operands;
\r
1530 if(se == NULL) return(0);
\r
1532 switch(se->get_operator_type()){
\r
1535 case SE_IFACE_PARAM:
\r
1538 return(count_aggr_se(se->get_left_se(), strict) );
\r
1539 case SE_BINARY_OP:
\r
1541 count_aggr_se(se->get_left_se(), strict) +
\r
1542 count_aggr_se(se->get_right_se(), strict)
\r
1546 case SE_AGGR_STAR:
\r
1550 operands = se->get_operands();
\r
1551 for(o=0;o<operands.size();o++){
\r
1552 sum += count_aggr_se(operands[o], strict);
\r
1554 if(se->get_aggr_ref()>=0) sum++; // is it tagged as a UDAF?
\r
1556 // now, stateful functions can count as aggregates.
\r
1557 // if we are being strict.
\r
1558 if(! strict && se->get_storage_state() != "")
\r
1564 fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n",
\r
1565 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
1572 // Return a count of the number of aggregate fcns in this predicate.
\r
1573 int count_aggr_pred(predicate_t *pr, bool strict){
\r
1574 vector<scalarexp_t *> op_list;
\r
1577 switch(pr->get_operator_type()){
\r
1579 return(count_aggr_se(pr->get_left_se(), strict) );
\r
1580 case PRED_COMPARE:
\r
1582 count_aggr_se(pr->get_left_se(), strict) +
\r
1583 count_aggr_se(pr->get_right_se(), strict)
\r
1585 case PRED_UNARY_OP:
\r
1586 return(count_aggr_pred(pr->get_left_pr(), strict) );
\r
1587 case PRED_BINARY_OP:
\r
1589 count_aggr_pred(pr->get_left_pr(), strict) +
\r
1590 count_aggr_pred(pr->get_right_pr(), strict)
\r
1593 op_list = pr->get_op_list();
\r
1595 for(o=0;o<op_list.size();++o){
\r
1596 aggr_sum += count_aggr_se(op_list[o], strict);
\r
1601 fprintf(stderr,"INTERNAL ERROR in count_aggr_pred, line %d, character %d, unknown predicate operator type %d\n",
\r
1602 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
1609 //////////////////////////////////////////////////
\r
1610 /// Analyze tablevar refs
\r
1612 void get_tablevar_ref_se(scalarexp_t *se, vector<int> &reflist){
\r
1614 vector<scalarexp_t *> operands;
\r
1619 if(se == NULL) return;
\r
1621 switch(se->get_operator_type()){
\r
1625 case SE_IFACE_PARAM:
\r
1626 ir = se->get_ifpref();
\r
1627 vref = ir->get_tablevar_ref();
\r
1628 for(o=0;o<reflist.size();++o){
\r
1629 if(vref == reflist[o]) return;
\r
1631 reflist.push_back(vref);
\r
1634 get_tablevar_ref_se(se->get_left_se(), reflist);
\r
1636 case SE_BINARY_OP:
\r
1637 get_tablevar_ref_se(se->get_left_se(), reflist);
\r
1638 get_tablevar_ref_se(se->get_right_se(), reflist);
\r
1641 if(se->is_gb()) return;
\r
1642 cr = se->get_colref();
\r
1643 vref = cr->get_tablevar_ref();
\r
1644 for(o=0;o<reflist.size();++o){
\r
1645 if(vref == reflist[o]) return;
\r
1647 reflist.push_back(vref);
\r
1649 case SE_AGGR_STAR:
\r
1653 if(se->get_aggr_ref() >= 0) return;
\r
1655 operands = se->get_operands();
\r
1656 for(o=0;o<operands.size();o++){
\r
1657 get_tablevar_ref_se(operands[o], reflist);
\r
1662 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_se, line %d, character %d: unknown operator type %d\n",
\r
1663 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
1670 void get_tablevar_ref_pr(predicate_t *pr, vector<int> &reflist){
\r
1671 vector<scalarexp_t *> op_list;
\r
1674 switch(pr->get_operator_type()){
\r
1676 get_tablevar_ref_se(pr->get_left_se(),reflist);
\r
1678 case PRED_COMPARE:
\r
1679 get_tablevar_ref_se(pr->get_left_se(),reflist);
\r
1680 get_tablevar_ref_se(pr->get_right_se(),reflist);
\r
1682 case PRED_UNARY_OP:
\r
1683 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
\r
1685 case PRED_BINARY_OP:
\r
1686 get_tablevar_ref_pr(pr->get_left_pr(),reflist);
\r
1687 get_tablevar_ref_pr(pr->get_right_pr(),reflist);
\r
1690 op_list = pr->get_op_list();
\r
1691 for(o=0;o<op_list.size();++o){
\r
1692 get_tablevar_ref_se(op_list[o],reflist);
\r
1696 fprintf(stderr,"INTERNAL ERROR in get_tablevar_ref_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
1697 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
1704 // Walk SE tree and gather STATES ref'd by STATEFUL fcns.
\r
1706 void gather_fcn_states_se(scalarexp_t *se, set<string> &states_refd, ext_fcn_list *Ext_fcns){
\r
1709 vector<scalarexp_t *> operands;
\r
1711 switch(se->get_operator_type()){
\r
1714 case SE_IFACE_PARAM:
\r
1717 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ;
\r
1719 case SE_BINARY_OP:
\r
1720 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
\r
1721 gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns);
\r
1725 case SE_AGGR_STAR:
\r
1728 gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns);
\r
1731 operands = se->get_operands();
\r
1732 for(o=0;o<operands.size();o++){
\r
1733 gather_fcn_states_se(operands[o], states_refd, Ext_fcns);
\r
1735 if(se->get_storage_state() != ""){
\r
1736 states_refd.insert(se->get_storage_state());
\r
1741 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n",
\r
1742 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
1749 // Walk SE tree and gather STATES ref'd by STATEFUL fcns.
\r
1751 void gather_fcn_states_pr(predicate_t *pr, set<string> &states_refd, ext_fcn_list *Ext_fcns){
\r
1752 vector<scalarexp_t *> op_list;
\r
1755 switch(pr->get_operator_type()){
\r
1757 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
\r
1759 case PRED_COMPARE:
\r
1760 gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ;
\r
1761 gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ;
\r
1763 case PRED_UNARY_OP:
\r
1764 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns);
\r
1766 case PRED_BINARY_OP:
\r
1767 gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ;
\r
1768 gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ;
\r
1771 op_list = pr->get_op_list();
\r
1772 for(o=0;o<op_list.size();++o){
\r
1773 gather_fcn_states_se(op_list[o],states_refd, Ext_fcns);
\r
1778 fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
1779 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
1789 // walk se tree and collect aggregates into aggregate table.
\r
1790 // duplicate aggregates receive the same idx to the table.
\r
1792 void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){
\r
1795 vector<scalarexp_t *> operands;
\r
1797 switch(se->get_operator_type()){
\r
1800 case SE_IFACE_PARAM:
\r
1803 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ;
\r
1805 case SE_BINARY_OP:
\r
1806 build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns);
\r
1807 build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns);
\r
1811 case SE_AGGR_STAR:
\r
1812 agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr());
\r
1813 se->set_aggr_id(agg_id);
\r
1816 agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr());
\r
1817 se->set_aggr_id(agg_id);
\r
1820 operands = se->get_operands();
\r
1821 for(o=0;o<operands.size();o++){
\r
1822 build_aggr_tbl_fm_se(operands[o], aggregate_table, Ext_fcns);
\r
1824 if(se->get_aggr_ref() >= 0){ // it's been tagged as a UDAF
\r
1825 agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id()));
\r
1826 se->set_aggr_id(agg_id);
\r
1831 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n",
\r
1832 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
1839 // walk se tree and collect aggregates into aggregate table.
\r
1840 // duplicate aggregates receive the same idx to the table.
\r
1842 void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){
\r
1843 vector<scalarexp_t *> op_list;
\r
1846 switch(pr->get_operator_type()){
\r
1848 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
\r
1850 case PRED_COMPARE:
\r
1851 build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ;
\r
1852 build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ;
\r
1854 case PRED_UNARY_OP:
\r
1855 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns);
\r
1857 case PRED_BINARY_OP:
\r
1858 build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ;
\r
1859 build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ;
\r
1862 op_list = pr->get_op_list();
\r
1863 for(o=0;o<op_list.size();++o){
\r
1864 build_aggr_tbl_fm_se(op_list[o],aggregate_table, Ext_fcns);
\r
1869 fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_pred, line %d, character %d, unknown predicate operator type %d\n",
\r
1870 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
1878 // Return true if the two scalar expressions
\r
1879 // represent the same value (e.g., use to eliminate
\r
1880 // duplicate aggregates).
\r
1881 bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){
\r
1882 vector<scalarexp_t *> operands1;
\r
1883 vector<scalarexp_t *> operands2;
\r
1886 // First handle the case of nulls (e.g. COUNT aggrs)
\r
1887 if(se1 == NULL && se2 == NULL) return(true);
\r
1888 if(se1 == NULL || se2 == NULL) return(false);
\r
1890 // In all cases, must be the same oeprator type and same operator.
\r
1891 if(se1->get_operator_type() != se2->get_operator_type())
\r
1893 if(se1->get_op() != se2->get_op() )
\r
1896 switch(se1->get_operator_type()){
\r
1898 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
\r
1900 return(se1->get_param_name() == se2->get_param_name() );
\r
1901 case SE_IFACE_PARAM:
\r
1902 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
\r
1904 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
\r
1905 case SE_BINARY_OP:
\r
1906 if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) )
\r
1907 return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) );
\r
1910 if(se1->is_gb() && se2->is_gb())
\r
1911 return( se1->get_gb_ref() == se2->get_gb_ref() );
\r
1912 if(se1->is_gb() || se2->is_gb())
\r
1914 return(se1->get_colref()->is_equivalent(se2->get_colref()) );
\r
1915 case SE_AGGR_STAR:
\r
1918 return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) );
\r
1920 if(se1->get_op() != se2->get_op()) return(false);
\r
1922 operands1 = se1->get_operands();
\r
1923 operands2 = se2->get_operands();
\r
1924 if(operands1.size() != operands2.size()) return(false);
\r
1926 for(o=0;o<operands1.size();o++){
\r
1927 if(! is_equivalent_se(operands1[o], operands2[o]) )
\r
1932 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
\r
1933 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
\r
1940 // Similar to is_equivalent_se, but with a looser definition
\r
1941 // of equivalence of colrefs. Here, say they are equivalent
\r
1942 // if their base table is the same. Use to find equivalent
\r
1943 // predicates on base tables.
\r
1944 bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){
\r
1945 vector<scalarexp_t *> operands1;
\r
1946 vector<scalarexp_t *> operands2;
\r
1949 if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){
\r
1950 se1 = se1->get_right_se();
\r
1952 if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){
\r
1953 se2 = se2->get_right_se();
\r
1956 // First handle the case of nulls (e.g. COUNT aggrs)
\r
1957 if(se1 == NULL && se2 == NULL) return(true);
\r
1958 if(se1 == NULL || se2 == NULL) return(false);
\r
1960 // In all cases, must be the same oeprator type and same operator.
\r
1961 if(se1->get_operator_type() != se2->get_operator_type())
\r
1963 if(se1->get_op() != se2->get_op() )
\r
1966 switch(se1->get_operator_type()){
\r
1968 return(se1->get_literal()->is_equivalent(se2->get_literal()) );
\r
1970 return(se1->get_param_name() == se2->get_param_name() );
\r
1971 case SE_IFACE_PARAM:
\r
1972 return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) );
\r
1974 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
\r
1975 case SE_BINARY_OP:
\r
1976 if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) )
\r
1977 return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) );
\r
1981 if(se1->is_gb() && se2->is_gb())
\r
1982 return( se1->get_gb_ref() == se2->get_gb_ref() );
\r
1983 if(se1->is_gb() || se2->is_gb())
\r
1986 return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) );
\r
1987 case SE_AGGR_STAR:
\r
1990 return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) );
\r
1992 if(se1->get_op() != se2->get_op()) return(false);
\r
1994 operands1 = se1->get_operands();
\r
1995 operands2 = se2->get_operands();
\r
1996 if(operands1.size() != operands2.size()) return(false);
\r
1998 for(o=0;o<operands1.size();o++){
\r
1999 if(! is_equivalent_se_base(operands1[o], operands2[o], Schema) )
\r
2004 fprintf(stderr,"INTERNAL ERROR in is_equivalent_se, line %d, character %d: unknown operator type %d\n",
\r
2005 se1->get_lineno(), se1->get_charno(),se1->get_operator_type());
\r
2012 // Find predicates which are equivalent when
\r
2013 // looking at the base tables. Use to find
\r
2014 // common prefilter.
\r
2015 bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){
\r
2018 // First handle the case of nulls
\r
2019 if(p1 == NULL && p2 == NULL) return(true);
\r
2020 if(p1 == NULL || p2 == NULL) return(false);
\r
2023 if(p1->get_operator_type() != p2->get_operator_type())
\r
2025 if(p1->get_op() != p2->get_op())
\r
2028 vector<literal_t *> ll1;
\r
2029 vector<literal_t *> ll2;
\r
2030 vector<scalarexp_t *> op_list1, op_list2;
\r
2033 switch(p2->get_operator_type()){
\r
2034 case PRED_COMPARE:
\r
2035 if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
\r
2037 return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) );
\r
2040 if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) )
\r
2042 ll1 = p1->get_lit_vec();
\r
2043 ll2 = p2->get_lit_vec();
\r
2044 if(ll1.size() != ll2.size())
\r
2046 for(i=0;i<ll1.size();i++){
\r
2047 if(! ll1[i]->is_equivalent( ll2[i] ) )
\r
2052 case PRED_UNARY_OP:
\r
2053 return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) );
\r
2055 case PRED_BINARY_OP:
\r
2056 if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema))
\r
2058 return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) );
\r
2061 op_list1 = p1->get_op_list();
\r
2062 op_list2 = p2->get_op_list();
\r
2063 if(op_list1.size() != op_list2.size()) return(false);
\r
2064 for(o=0;o<op_list1.size();++o){
\r
2065 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) ) return(false);
\r
2076 bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns){
\r
2077 if((p1->get_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC))
\r
2079 if(p1->get_fcn_id() != p2->get_fcn_id())
\r
2081 vector<bool> cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id());
\r
2083 vector<scalarexp_t *> op_list1 = p1->get_op_list();
\r
2084 vector<scalarexp_t *> op_list2 = p2->get_op_list();
\r
2085 if(op_list1.size() != op_list2.size()) return(false);
\r
2086 for(o=0;o<op_list1.size();++o){
\r
2088 if(! is_equivalent_se_base(op_list1[o],op_list2[o], Schema) )
\r
2099 // Verify that the scalar expression (in a such that clause)
\r
2100 // is acceptable in an aggregation query. No column
\r
2101 // references allowed outside aggergates, except for
\r
2102 // references to group-by attributes.
\r
2103 // return true if OK, false if bad.
\r
2104 bool verify_aggr_query_se(scalarexp_t *se){
\r
2105 vector <scalarexp_t *> operands;
\r
2108 switch(se->get_operator_type()){
\r
2111 case SE_IFACE_PARAM:
\r
2114 return(verify_aggr_query_se(se->get_left_se() ) );
\r
2115 case SE_BINARY_OP:
\r
2116 return(verify_aggr_query_se(se->get_left_se() ) &&
\r
2117 verify_aggr_query_se(se->get_right_se() ) );
\r
2119 if(se->is_gb() ) return(true);
\r
2120 fprintf(stderr,"ERROR: the select clause in an aggregate query can "
\r
2121 "only reference constants, group-by attributes, and "
\r
2122 "aggregates, (%s) line %d, character %d.\n",
\r
2123 se->get_colref()->to_string().c_str(),
\r
2124 se->get_lineno(), se->get_charno() );
\r
2126 case SE_AGGR_STAR:
\r
2128 // colrefs and gbrefs allowed.
\r
2129 // check for nested aggregation elsewhere, so just return TRUE
\r
2132 // If its a UDAF, just return true
\r
2133 if(se->get_aggr_ref() >= 0) return true;
\r
2135 operands = se->get_operands();
\r
2137 for(o=0;o<operands.size();o++){
\r
2138 if(! verify_aggr_query_se(operands[o]) )
\r
2143 fprintf(stderr,"INTERNAL ERROR in verify_aggr_query_se, line %d, character %d: unknown operator type %d\n",
\r
2144 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
2153 // Find complex literals.
\r
2154 // NOTE : This analysis should be deferred to
\r
2155 // code generation time.
\r
2156 // This analysis drills into aggr se specs.
\r
2157 // Shouldn't this be done at the aggregate table?
\r
2158 // But, its not a major loss of efficiency.
\r
2159 // UPDATE : drilling into aggr se's is causnig a problem
\r
2160 // so I've eliminated it.
\r
2162 bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
\r
2163 cplx_lit_table *complex_literals){
\r
2165 vector<scalarexp_t *> operands;
\r
2167 scalarexp_t *param_se;
\r
2170 switch(se->get_operator_type()){
\r
2172 l = se->get_literal();
\r
2173 if(l->constructor_name() != ""){
\r
2174 int cl_idx = complex_literals->add_cpx_lit(l, false);
\r
2175 l->set_cpx_lit_ref(cl_idx);
\r
2180 // SE_IFACE_PARAM should not exist when this is called.
\r
2182 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
\r
2183 case SE_BINARY_OP:
\r
2184 return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) &&
\r
2185 find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) );
\r
2188 case SE_AGGR_STAR:
\r
2192 // return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) );
\r
2194 if(se->get_aggr_ref() >= 0) return true;
\r
2196 operands = se->get_operands();
\r
2197 for(o=0;o<operands.size();o++){
\r
2198 find_complex_literal_se(operands[o], Ext_fcns, complex_literals);
\r
2202 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_se, line %d, character %d: unknown operator type %d\n",
\r
2203 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
2212 void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
\r
2213 cplx_lit_table *complex_literals){
\r
2215 vector<literal_t *> litl;
\r
2216 vector<scalarexp_t *> op_list;
\r
2219 switch(pr->get_operator_type()){
\r
2221 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
\r
2222 litl = pr->get_lit_vec();
\r
2223 for(i=0;i<litl.size();i++){
\r
2224 if(litl[i]->constructor_name() != ""){
\r
2225 int cl_idx = complex_literals->add_cpx_lit(litl[i],false);
\r
2226 litl[i]->set_cpx_lit_ref(cl_idx);
\r
2230 case PRED_COMPARE:
\r
2231 find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ;
\r
2232 find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ;
\r
2234 case PRED_UNARY_OP:
\r
2235 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals);
\r
2237 case PRED_BINARY_OP:
\r
2238 find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ;
\r
2239 find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ;
\r
2242 op_list = pr->get_op_list();
\r
2243 for(o=0;o<op_list.size();++o){
\r
2244 find_complex_literal_se(op_list[o],Ext_fcns, complex_literals);
\r
2248 fprintf(stderr,"INTERNAL ERROR in find_complex_literal_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
2249 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
2257 // Find all things which are passed as handle parameters to functions
\r
2258 // (query parameters, (simple) literals, complex literals)
\r
2259 // These expressions MUST be processed with find_complex_literal_??
\r
2261 // TODO: this analysis drills into the aggregate SEs.
\r
2262 // Shouldn't this be done on the aggr table SEs instead?
\r
2263 // to avoid duplication. THe handle registration
\r
2264 // might be expensive ...
\r
2265 // REVISED : drilling into aggr se's is causing problems, eliminated.
\r
2267 void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
\r
2268 vector<handle_param_tbl_entry *> &handle_tbl){
\r
2269 vector<scalarexp_t *> operands;
\r
2270 vector<bool> handle_ind;
\r
2272 scalarexp_t *param_se;
\r
2276 switch(se->get_operator_type()){
\r
2281 // case SE_IFACE_PARAM: SHOULD NOT EXIST when this is called
\r
2283 find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
\r
2285 case SE_BINARY_OP:
\r
2286 find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ;
\r
2287 find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ;
\r
2291 case SE_AGGR_STAR:
\r
2294 // find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ;
\r
2297 if(se->get_aggr_ref() >= 0) return ;
\r
2299 operands = se->get_operands();
\r
2300 handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() );
\r
2301 for(o=0;o<operands.size();o++){
\r
2302 if(handle_ind[o]){
\r
2303 handle_param_tbl_entry *he;
\r
2304 param_se = operands[o];
\r
2305 if(param_se->get_operator_type() != SE_LITERAL &&
\r
2306 param_se->get_operator_type() != SE_PARAM){
\r
2307 fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
\r
2308 o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno());
\r
2312 if(param_se->get_operator_type() == SE_PARAM){
\r
2313 he = new handle_param_tbl_entry(
\r
2314 se->get_op(), o, param_se->get_param_name(),
\r
2315 param_se->get_data_type()->get_type_str());
\r
2317 l = param_se->get_literal();
\r
2318 if(l->is_cpx_lit()){
\r
2319 he = new handle_param_tbl_entry(
\r
2320 se->get_op(), o, l->get_cpx_lit_ref(),
\r
2321 param_se->get_data_type()->get_type_str());
\r
2323 he = new handle_param_tbl_entry(
\r
2324 se->get_op(), o, l,
\r
2325 param_se->get_data_type()->get_type_str());
\r
2328 param_se->set_handle_ref(handle_tbl.size());
\r
2329 handle_tbl.push_back(he);
\r
2331 find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ;
\r
2336 fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n",
\r
2337 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
2344 void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
\r
2345 vector<handle_param_tbl_entry *> &handle_tbl){
\r
2346 vector<literal_t *> litl;
\r
2347 vector<scalarexp_t *> op_list;
\r
2348 scalarexp_t *param_se;
\r
2349 vector<bool> handle_ind;
\r
2353 switch(pr->get_operator_type()){
\r
2355 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
\r
2357 case PRED_COMPARE:
\r
2358 find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ;
\r
2359 find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ;
\r
2361 case PRED_UNARY_OP:
\r
2362 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl);
\r
2364 case PRED_BINARY_OP:
\r
2365 find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ;
\r
2366 find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ;
\r
2369 op_list = pr->get_op_list();
\r
2370 handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() );
\r
2371 for(o=0;o<op_list.size();++o){
\r
2372 if(handle_ind[o]){
\r
2373 handle_param_tbl_entry *he;
\r
2374 param_se = op_list[o];
\r
2375 if(param_se->get_operator_type() != SE_LITERAL &&
\r
2376 param_se->get_operator_type() != SE_PARAM){
\r
2377 fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n",
\r
2378 o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno());
\r
2382 if(param_se->get_operator_type() == SE_PARAM){
\r
2383 he = new handle_param_tbl_entry(
\r
2384 pr->get_op(), o, param_se->get_param_name(),
\r
2385 param_se->get_data_type()->get_type_str());
\r
2387 l = param_se->get_literal();
\r
2388 if(l->is_cpx_lit()){
\r
2389 he = new handle_param_tbl_entry(
\r
2390 pr->get_op(), o, l->get_cpx_lit_ref(),
\r
2391 param_se->get_data_type()->get_type_str());
\r
2393 he = new handle_param_tbl_entry(
\r
2394 pr->get_op(), o, l,
\r
2395 param_se->get_data_type()->get_type_str());
\r
2398 param_se->set_handle_ref(handle_tbl.size());
\r
2399 handle_tbl.push_back(he);
\r
2401 find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ;
\r
2406 fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
2407 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
2415 // Verify the HAVING predicate : it
\r
2416 // can access gb vars, aggregates, and constants,
\r
2417 // but not colrefs.
\r
2418 // return 1 if OK, -1 if bad.
\r
2419 // Perhaps replace by a pair of fcns which counts non-gb colrefs?
\r
2421 // Extended to deal with cleaning_by, cleaning_when :
\r
2422 // verify that any aggregate function
\r
2423 // has the multiple output property.
\r
2425 int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){
\r
2427 vector<scalarexp_t *> operands;
\r
2428 vector<data_type *> odt;
\r
2431 switch(se->get_operator_type()){
\r
2435 case SE_IFACE_PARAM:
\r
2438 return(verify_having_se(se->get_left_se(), clause, Ext_fcns) );
\r
2439 case SE_BINARY_OP:
\r
2440 l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns);
\r
2441 r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns);
\r
2442 if( (l_ret < 0 ) || (r_ret < 0) ) return(-1);
\r
2445 if(se->is_gb()) return 1;
\r
2446 fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause,
\r
2447 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() );
\r
2449 case SE_AGGR_STAR:
\r
2451 // colrefs and gbrefs allowed.
\r
2452 // check for nested aggregation elsewhere, so just return TRUE
\r
2453 if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
\r
2454 fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
\r
2455 se->get_lineno(),se->get_charno(), se->get_op().c_str() );
\r
2459 // Ensure that aggregate refs allow multiple outputs
\r
2460 // in CLEANING_WHEN, CLEANING_BY
\r
2461 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
\r
2462 if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
\r
2463 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
\r
2464 se->get_op().c_str(),se->get_lineno(),se->get_charno() );
\r
2472 if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){
\r
2473 fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause,
\r
2474 se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() );
\r
2478 if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){
\r
2479 if(se->get_aggr_ref() >= 0 && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){
\r
2480 fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause,
\r
2481 se->get_op().c_str(),se->get_lineno(),se->get_charno() );
\r
2486 if(se->get_aggr_ref() >= 0) // don't descent into aggregates.
\r
2489 operands = se->get_operands();
\r
2491 for(o=0;o<operands.size();o++){
\r
2492 l_ret = verify_having_se(operands[o], clause, Ext_fcns);
\r
2493 if(l_ret < 0) r_ret = -1;
\r
2495 if(r_ret < 0) return(-1); else return(1);
\r
2498 fprintf(stderr,"INTERNAL ERROR in verify_having_se, line %d, character %d: unknown operator type %d\n",
\r
2499 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
2506 // Verify the HAVING predicate : it
\r
2507 // can access gb vars, aggregates, and constants,
\r
2508 // but not colrefs.
\r
2509 // return 1 if OK, -1 if bad.
\r
2510 // Perhaps replace by a pair of fcns which counts non-gb colrefs?
\r
2513 int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){
\r
2515 vector<literal_t *> litl;
\r
2516 vector<scalarexp_t *> op_list;
\r
2519 switch(pr->get_operator_type()){
\r
2521 return(verify_having_se(pr->get_left_se(), clause, Ext_fcns));
\r
2522 case PRED_COMPARE:
\r
2523 l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ;
\r
2524 r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ;
\r
2525 if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1);
\r
2526 case PRED_UNARY_OP:
\r
2527 return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns));
\r
2528 case PRED_BINARY_OP:
\r
2529 l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns);
\r
2530 r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns);
\r
2531 if( (l_ret < 0) || (r_ret < 0) ) return(-1);
\r
2534 op_list = pr->get_op_list();
\r
2536 for(o=0;o<op_list.size();++o){
\r
2537 if( verify_having_se(op_list[o], clause, Ext_fcns) < 0) l_ret = -1;
\r
2542 fprintf(stderr,"INTERNAL ERROR in verify_having_pred, line %d, character %d, unknown predicate operator type %d\n",
\r
2543 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
2550 //////////////////////////////////////////////////////////////////////////
\r
2551 //////////////////////////////////////////////////////////////////////////
\r
2552 /////// cnf and pred analysis and manipulation
\r
2554 // ----------------------------------------------------------------------
\r
2555 //Â Â Convert the predicates to a list of conjuncts
\r
2556 //Â Â (not actually cnf). Do some analysis
\r
2557 //Â Â on their properties.
\r
2558 // ----------------------------------------------------------------------
\r
2561 //Â Â Put into list clist the predicates that
\r
2562 //Â Â are AND'ed together.
\r
2564 void make_cnf_from_pr(predicate_t *pr, vector<cnf_elem *> &clist){
\r
2566 if(pr == NULL) return;
\r
2568 switch(pr->get_operator_type()){
\r
2569 case PRED_COMPARE:
\r
2570 clist.push_back(new cnf_elem(pr));
\r
2574 clist.push_back(new cnf_elem(pr));
\r
2577 case PRED_UNARY_OP:
\r
2578 clist.push_back(new cnf_elem(pr));
\r
2581 case PRED_BINARY_OP:
\r
2582 if(pr->get_op() == "OR"){
\r
2583 clist.push_back(new cnf_elem(pr));
\r
2586 if(pr->get_op() =="AND"){
\r
2587 make_cnf_from_pr(pr->get_left_pr(),clist);
\r
2588 make_cnf_from_pr(pr->get_right_pr(),clist);
\r
2592 clist.push_back(new cnf_elem(pr));
\r
2596 fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str());
\r
2604 //Â Â Find out what things are referenced in a se,
\r
2605 //Â Â to use for analyzing a predicate.
\r
2606 //Â Â Currently, is it simple (no operators), does it
\r
2607 //Â Â reference a group-by column, does it reference an
\r
2608 //Â Â attribute of a table.
\r
2610 // analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf
\r
2613 void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){
\r
2615 vector<scalarexp_t *> operand_list;
\r
2617 switch(se->get_operator_type()){
\r
2620 case SE_IFACE_PARAM:
\r
2623 if(se->is_gb() ) g=1;
\r
2628 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
\r
2630 case SE_BINARY_OP:
\r
2632 analyze_cnf_se(se->get_left_se(),s,g,a,agr);
\r
2633 analyze_cnf_se(se->get_right_se(),s,g,a,agr);
\r
2635 case SE_AGGR_STAR:
\r
2640 if(se->get_aggr_ref() >= 0){
\r
2645 operand_list = se->get_operands();
\r
2646 for(p=0;p<operand_list.size();p++){
\r
2647 analyze_cnf_se(operand_list[p],s,g,a,agr);
\r
2657 void analyze_cnf_pr(predicate_t *pr, int &g, int &a, int &agr){
\r
2658 int dum_simple, o;
\r
2659 vector<scalarexp_t *> op_list;
\r
2662 switch(pr->get_operator_type()){
\r
2663 case PRED_COMPARE:
\r
2664 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
\r
2665 analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr);
\r
2668 analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr);
\r
2670 case PRED_UNARY_OP:
\r
2671 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
\r
2673 case PRED_BINARY_OP:
\r
2674 analyze_cnf_pr(pr->get_left_pr(),g,a,agr);
\r
2675 analyze_cnf_pr(pr->get_right_pr(),g,a,agr);
\r
2678 op_list = pr->get_op_list();
\r
2679 for(o=0;o<op_list.size();++o){
\r
2680 analyze_cnf_se(op_list[o],dum_simple,g,a,agr);
\r
2684 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
2685 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
2692 //Â Â analyze a conjunct of a predicate.
\r
2693 //Â Â Is it atomic (e.g., a single predicate),
\r
2694 //Â Â and if so do a further analysis.
\r
2696 void analyze_cnf(cnf_elem *c){
\r
2698 //Â Â analyze the predicate.
\r
2699 analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr);
\r
2701 if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){
\r
2706 //Â Â its an atomic predicate -- get more info
\r
2709 if(c->pr->get_op() == "=")
\r
2714 if(c->pr->get_operator_type() == PRED_IN)
\r
2719 c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0;
\r
2720 analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr);
\r
2722 if(c->pr->get_operator_type() == PRED_COMPARE){
\r
2723 c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0;
\r
2724 analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr);
\r
2728 void analyze_constraint_se(scalarexp_t *se,
\r
2729 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){
\r
2730 int l_agr, l_gb, l_par, l_func, l_op;
\r
2731 int r_agr, r_gb, r_par, r_func, r_op;
\r
2733 vector<scalarexp_t *> operand_list;
\r
2735 switch(se->get_operator_type()){
\r
2737 case SE_IFACE_PARAM:
\r
2738 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
\r
2741 n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0;
\r
2744 n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
\r
2747 analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
\r
2754 analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
\r
2757 case SE_BINARY_OP:
\r
2758 analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
\r
2759 analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb);
\r
2760 n_agr=l_agr+r_agr;
\r
2762 n_par=l_par+r_par;
\r
2763 n_func=l_func+r_func;
\r
2766 case SE_AGGR_STAR:
\r
2768 n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0;
\r
2771 if(se->get_aggr_ref() >= 0){
\r
2772 n_agr=1; n_gb = 0; n_par = 0; n_op = 0;
\r
2774 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
\r
2779 n_agr=0; n_gb = 0; n_par = 0; n_op = 0;
\r
2781 n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id());
\r
2784 operand_list = se->get_operands();
\r
2785 for(p=0;p<operand_list.size();p++){
\r
2786 analyze_constraint_se(operand_list[p],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
\r
2799 // Estimate the cost of a constraint.
\r
2800 // WARNING a lot of cost assumptions are embedded in the code.
\r
2801 void analyze_constraint_pr(predicate_t *pr,
\r
2802 int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op,
\r
2803 int &n_cmp_s, int &n_cmp_c, int &n_in, int &n_pred, int &n_bool, ext_fcn_list *Ext_fcns, bool enter_gb){
\r
2804 int l_agr, l_gb, l_par, l_func, l_op, l_cmp_s, l_cmp_c, l_in, l_pred,l_bool;
\r
2805 int r_agr, r_gb, r_par, r_func, r_op, r_cmp_s, r_cmp_c, r_in, r_pred,r_bool;
\r
2808 vector<scalarexp_t *> op_list;
\r
2811 switch(pr->get_operator_type()){
\r
2812 case PRED_COMPARE:
\r
2813 analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb);
\r
2814 analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb);
\r
2815 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
\r
2816 n_func=l_func+r_func; n_op=l_op+r_op;
\r
2817 if(pr->get_left_se()->get_data_type()->complex_comparison(
\r
2818 pr->get_right_se()->get_data_type())
\r
2820 n_cmp_s = 0; n_cmp_c=1;
\r
2822 n_cmp_s = 1; n_cmp_c=0;
\r
2824 n_in = 0; n_pred = 0; n_bool = 0;
\r
2827 // Tread IN predicate as sequence of comparisons
\r
2828 analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb);
\r
2829 if(pr->get_left_se()->get_data_type()->complex_comparison(
\r
2830 pr->get_right_se()->get_data_type())
\r
2832 n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size();
\r
2834 n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0;
\r
2836 n_in = 0; n_pred = 0; n_bool = 0;
\r
2838 case PRED_UNARY_OP:
\r
2839 analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb);
\r
2842 case PRED_BINARY_OP:
\r
2843 analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb);
\r
2844 analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb);
\r
2845 n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par;
\r
2846 n_func=l_func+r_func; n_op=l_op+r_op;
\r
2847 n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c;
\r
2848 n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1;
\r
2851 n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0;
\r
2853 n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id());
\r
2856 op_list = pr->get_op_list();
\r
2857 for(o=0;o<op_list.size();++o){
\r
2858 analyze_constraint_se(op_list[o],l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb);
\r
2859 n_agr+=l_agr; n_gb+=l_gb; n_par+=l_par; n_func+=l_func; n_op+=l_op;
\r
2863 fprintf(stderr,"INTERNAL ERROR in analyze_cnf_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
2864 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
2869 void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){
\r
2870 int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
\r
2871 analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
\r
2872 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false);
\r
2874 //printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool);
\r
2875 c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
\r
2878 bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){
\r
2879 int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool;
\r
2880 analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op,
\r
2881 n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true);
\r
2882 //printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool);
\r
2883 if(n_par || n_agr)
\r
2885 int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool;
\r
2886 //printf("cost=%d\n",cost);
\r
2890 // The prefilter needs to translate constraints on
\r
2891 // gbvars into constraints involving their underlying SEs.
\r
2892 // The following two routines attach GB def info.
\r
2894 void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){
\r
2896 vector<scalarexp_t *> operand_list;
\r
2898 switch(se->get_operator_type()){
\r
2900 case SE_IFACE_PARAM:
\r
2902 case SE_AGGR_STAR:
\r
2906 se->rhs.scalarp = gtbl->get_def(se->get_gb_ref());
\r
2910 insert_gb_def_se(se->get_left_se(),gtbl);
\r
2912 case SE_BINARY_OP:
\r
2913 insert_gb_def_se(se->get_left_se(),gtbl);
\r
2914 insert_gb_def_se(se->get_right_se(),gtbl);
\r
2917 insert_gb_def_se(se->get_left_se(),gtbl);
\r
2920 operand_list = se->get_operands();
\r
2921 for(p=0;p<operand_list.size();p++){
\r
2922 insert_gb_def_se(operand_list[p],gtbl);
\r
2929 void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl){
\r
2930 vector<scalarexp_t *> op_list;
\r
2933 switch(pr->get_operator_type()){
\r
2934 case PRED_COMPARE:
\r
2935 insert_gb_def_se(pr->get_left_se(),gtbl);
\r
2936 insert_gb_def_se(pr->get_right_se(),gtbl);
\r
2939 insert_gb_def_se(pr->get_left_se(),gtbl);
\r
2941 case PRED_UNARY_OP:
\r
2942 insert_gb_def_pr(pr->get_left_pr(),gtbl);
\r
2944 case PRED_BINARY_OP:
\r
2945 insert_gb_def_pr(pr->get_left_pr(),gtbl);
\r
2946 insert_gb_def_pr(pr->get_right_pr(),gtbl);
\r
2949 op_list = pr->get_op_list();
\r
2950 for(o=0;o<op_list.size();++o){
\r
2951 insert_gb_def_se(op_list[o],gtbl);
\r
2955 fprintf(stderr,"INTERNAL ERROR in insert_gb_def_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
2956 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
2961 // Substitute gbrefs with their definitions
\r
2962 void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){
\r
2964 vector<scalarexp_t *> operand_list;
\r
2965 scalarexp_t *lse,*rse;
\r
2970 switch(se->get_operator_type()){
\r
2972 case SE_IFACE_PARAM:
\r
2974 case SE_AGGR_STAR:
\r
2977 cr = se->get_colref();
\r
2978 b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field);
\r
2979 b_idx = Schema->get_table_ref(b_tbl);
\r
2980 cr->tablevar_ref = b_idx;
\r
2983 lse=se->get_left_se();
\r
2984 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
\r
2985 se->lhs.scalarp = lse->get_right_se();
\r
2986 subs_gbrefs_se(se,Schema);
\r
2989 subs_gbrefs_se(se->get_left_se(),Schema);
\r
2991 case SE_BINARY_OP:
\r
2992 lse=se->get_left_se();
\r
2993 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
\r
2994 se->lhs.scalarp = lse->get_right_se();
\r
2995 subs_gbrefs_se(se,Schema);
\r
2998 rse=se->get_right_se();
\r
2999 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
\r
3000 se->rhs.scalarp = rse->get_right_se();
\r
3001 subs_gbrefs_se(se,Schema);
\r
3004 subs_gbrefs_se(se->get_left_se(),Schema);
\r
3005 subs_gbrefs_se(se->get_right_se(),Schema);
\r
3008 lse=se->get_left_se();
\r
3009 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
\r
3010 se->lhs.scalarp = lse->get_right_se();
\r
3011 subs_gbrefs_se(se,Schema);
\r
3014 subs_gbrefs_se(se->get_left_se(),Schema);
\r
3017 operand_list = se->get_operands();
\r
3018 for(p=0;p<operand_list.size();p++){
\r
3019 lse=operand_list[p];
\r
3020 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
\r
3021 se->param_list[p] = lse->get_right_se();
\r
3022 subs_gbrefs_se(se,Schema);
\r
3026 for(p=0;p<operand_list.size();p++){
\r
3027 subs_gbrefs_se(operand_list[p],Schema);
\r
3035 void subs_gbrefs_pr(predicate_t *pr, table_list *Schema){
\r
3036 vector<scalarexp_t *> op_list;
\r
3038 scalarexp_t *lse,*rse;
\r
3040 switch(pr->get_operator_type()){
\r
3041 case PRED_COMPARE:
\r
3042 lse=pr->get_left_se();
\r
3043 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
\r
3044 pr->lhs.sexp = lse->get_right_se();
\r
3045 subs_gbrefs_pr(pr,Schema);
\r
3048 rse=pr->get_right_se();
\r
3049 if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){
\r
3050 pr->rhs.sexp = rse->get_right_se();
\r
3051 subs_gbrefs_pr(pr,Schema);
\r
3054 subs_gbrefs_se(pr->get_left_se(),Schema);
\r
3055 subs_gbrefs_se(pr->get_right_se(),Schema);
\r
3058 lse=pr->get_left_se();
\r
3059 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
\r
3060 pr->lhs.sexp = lse->get_right_se();
\r
3061 subs_gbrefs_pr(pr,Schema);
\r
3064 subs_gbrefs_se(pr->get_left_se(),Schema);
\r
3066 case PRED_UNARY_OP:
\r
3067 subs_gbrefs_pr(pr->get_left_pr(),Schema);
\r
3069 case PRED_BINARY_OP:
\r
3070 subs_gbrefs_pr(pr->get_left_pr(),Schema);
\r
3071 subs_gbrefs_pr(pr->get_right_pr(),Schema);
\r
3074 op_list = pr->get_op_list();
\r
3075 for(o=0;o<op_list.size();++o){
\r
3077 if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){
\r
3078 pr->param_list[o] = lse->get_right_se();
\r
3079 subs_gbrefs_pr(pr,Schema);
\r
3082 subs_gbrefs_se(op_list[o],Schema);
\r
3086 fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
3087 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
3093 // Search for references to "expensive" fields.
\r
3094 int expensive_refs_se(scalarexp_t *se, table_list *Schema){
\r
3096 vector<scalarexp_t *> operand_list;
\r
3099 param_list *plist;
\r
3101 switch(se->get_operator_type()){
\r
3103 case SE_IFACE_PARAM:
\r
3105 case SE_AGGR_STAR:
\r
3110 return expensive_refs_se(se->rhs.scalarp,Schema);
\r
3111 td = Schema->get_table(se->lhs.colref->schema_ref);
\r
3112 plist = td->get_modifier_list(se->lhs.colref->field);
\r
3113 if(plist->contains_key("expensive"))
\r
3117 return expensive_refs_se(se->get_left_se(),Schema);
\r
3118 case SE_BINARY_OP:
\r
3119 cnt += expensive_refs_se(se->get_left_se(),Schema);
\r
3120 cnt += expensive_refs_se(se->get_right_se(),Schema);
\r
3123 operand_list = se->get_operands();
\r
3124 for(p=0;p<operand_list.size();p++){
\r
3125 cnt += expensive_refs_se(operand_list[p],Schema);
\r
3134 int expensive_refs_pr(predicate_t *pr, table_list *Schema){
\r
3135 vector<scalarexp_t *> op_list;
\r
3139 switch(pr->get_operator_type()){
\r
3140 case PRED_COMPARE:
\r
3141 cnt += expensive_refs_se(pr->get_left_se(),Schema);
\r
3142 cnt += expensive_refs_se(pr->get_right_se(),Schema);
\r
3145 return expensive_refs_se(pr->get_left_se(),Schema);
\r
3146 case PRED_UNARY_OP:
\r
3147 return expensive_refs_pr(pr->get_left_pr(),Schema);
\r
3148 case PRED_BINARY_OP:
\r
3149 cnt += expensive_refs_pr(pr->get_left_pr(),Schema);
\r
3150 cnt += expensive_refs_pr(pr->get_right_pr(),Schema);
\r
3153 op_list = pr->get_op_list();
\r
3154 for(o=0;o<op_list.size();++o){
\r
3155 cnt += expensive_refs_se(op_list[o],Schema);
\r
3159 fprintf(stderr,"INTERNAL ERROR in expensive_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
3160 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
3166 // TODO: allow "cheap" functions and predicates.
\r
3167 bool simple_field_constraint(cnf_elem *c){
\r
3168 vector<literal_t *> ll;
\r
3170 predicate_t *p = c->pr;
\r
3171 int l_agr, l_gb, l_par, l_func, l_op;
\r
3172 int r_agr, r_gb, r_par, r_func, r_op;
\r
3173 col_id_set left_colids, right_colids;
\r
3175 // Verify that it is a simple atom
\r
3176 switch(p->get_operator_type()){
\r
3177 case PRED_COMPARE:
\r
3178 // Must be an equality predicate which references
\r
3179 // which referecnes no aggregates, parameters, functions, or
\r
3180 // group-by variables, and should be a constraint of
\r
3181 // a single colref.
\r
3182 // AND should not require a complex comparison.
\r
3183 if(p->get_op() != "=") return(false);
\r
3184 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
\r
3185 analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false);
\r
3186 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ||
\r
3187 r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false);
\r
3188 // I will count on there being no gbvars in the constraint.
\r
3189 // TODO: allow gbvars which are colrefs.
\r
3190 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
\r
3191 gather_se_col_ids(p->get_right_se(), right_colids, NULL);
\r
3192 if(left_colids.size()+right_colids.size() != 1) return(false);
\r
3195 // Normalize : the colref should be on the lhs.
\r
3196 if(right_colids.size() > 0){
\r
3197 p->swap_scalar_operands();
\r
3200 // Disallow complex (and therefore expensive) comparisons.
\r
3201 if(p->get_left_se()->get_data_type()->complex_comparison(
\r
3202 p->get_right_se()->get_data_type() ) )
\r
3205 // passed all the tests.
\r
3208 // LHS must be a non-gbvar colref.
\r
3209 analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false);
\r
3210 if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false);
\r
3211 // I will count on there being no gbvars in the constraint.
\r
3212 // TODO: allow gbvars which are colrefs.
\r
3213 gather_se_col_ids(p->get_left_se(), left_colids, NULL);
\r
3214 if(left_colids.size() != 1) return(false);
\r
3215 // Disallow complex (and therefore expensive) comparisons.
\r
3216 if(p->get_left_se()->get_data_type()->complex_comparison(
\r
3217 p->get_left_se()->get_data_type() ) )
\r
3221 // All entries in the IN list must be literals
\r
3222 // Currently, this is the only possibility.
\r
3225 case PRED_UNARY_OP:
\r
3227 case PRED_BINARY_OP:
\r
3232 fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n",
\r
3233 p->get_lineno(), p->get_charno(), p->get_operator_type() );
\r
3240 // As the name implies, return the colref constrained by the
\r
3241 // cnf elem. I will be counting on the LHS being a SE pointing
\r
3244 // This fcn assumes that in fact exactly
\r
3245 // one colref is constrained.
\r
3246 colref_t *get_constrained_colref(scalarexp_t *se){
\r
3248 vector<scalarexp_t *> operand_list;
\r
3251 switch(se->get_operator_type()){
\r
3255 case SE_IFACE_PARAM:
\r
3258 return(se->get_colref());
\r
3260 return(get_constrained_colref(se->get_left_se()));
\r
3261 case SE_BINARY_OP:
\r
3262 ret=get_constrained_colref(se->get_left_se());
\r
3263 if(ret == NULL) return(get_constrained_colref(se->get_right_se()));
\r
3265 case SE_AGGR_STAR:
\r
3269 if(se->get_aggr_ref() >= 0) return NULL;
\r
3271 operand_list = se->get_operands();
\r
3272 for(p=0;p<operand_list.size();p++){
\r
3273 ret=get_constrained_colref(operand_list[p]);
\r
3274 if(ret != NULL) return(ret);
\r
3285 colref_t *get_constrained_colref(predicate_t *p){
\r
3286 return(get_constrained_colref(p->get_left_se()));
\r
3288 colref_t *get_constrained_colref(cnf_elem *c){
\r
3289 return get_constrained_colref(c->pr->get_left_se());
\r
3296 void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p,
\r
3297 string target_fld, string target_tbl, int tblref){
\r
3299 // Make a copy of the predicate to be added.
\r
3300 // ASSUME no aggregates.
\r
3301 predicate_t *pr = dup_pr(src_p,NULL);
\r
3303 // Modify the ref to the base table.
\r
3304 // ASSUME lhs is the colref
\r
3305 pr->get_left_se()->get_colref()->set_table_name(target_tbl);
\r
3306 pr->get_left_se()->get_colref()->set_table_ref(tblref);
\r
3308 if(dst->pr == NULL) dst->pr = pr;
\r
3309 else dst->pr = new predicate_t("OR", dst->pr, pr);
\r
3315 //////////////////////////////////////////////////////
\r
3316 /////////////// Represent a node in a predicate tree
\r
3317 struct common_pred_node{
\r
3320 vector<predicate_t *> predecessor_preds;
\r
3321 vector<common_pred_node *> children;
\r
3323 string target_tbl;
\r
3324 string target_fld;
\r
3327 common_pred_node(){
\r
3333 predicate_t *make_common_pred(common_pred_node *pn){
\r
3336 if(pn->children.size() == 0){
\r
3337 if(pn->pr == NULL){
\r
3338 fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n");
\r
3341 return( dup_pr(pn->pr,NULL) );
\r
3344 predicate_t *curr_pr = make_common_pred( pn->children[0] );
\r
3345 for(n=1;n<pn->children.size();++n){
\r
3346 curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr);
\r
3349 if(pn->pr != NULL)
\r
3350 curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr);
\r
3356 bool operator<(const cnf_set &c1, const cnf_set &c2){
\r
3357 if(c1.lfta_id.size() < c2.lfta_id.size())
\r
3363 // Compute the predicates for the prefilter.
\r
3364 // the prefilter preds are returned in prefilter_preds.
\r
3365 // pred_ids is the set of predicates used in the prefilter.
\r
3366 // the encoding is the lfta index, in the top 16 bits,
\r
3367 // then the index of the cnf element in the bottom 16 bits.
\r
3368 // This set of for identifying which preds do not need
\r
3369 // to be generated in the lftas.
\r
3370 void find_common_filter(vector< vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector<cnf_set *> &prefilter_preds, set<unsigned int > &pred_ids){
\r
3373 vector<cnf_set *> pred_list, sort_list;
\r
3375 // Create list of tagged, prefilter-safe CNFs.
\r
3376 for(l=0;l<where_list.size();++l){
\r
3377 for(c=0;c<where_list[l].size();++c){
\r
3378 if(prefilter_compatible(where_list[l][c],Ext_fcns)){
\r
3379 if(expensive_refs_pr(where_list[l][c]->pr,Schema)==0)
\r
3380 pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c));
\r
3385 // Eliminate duplicates
\r
3386 for(p=0;p<pred_list.size();++p){
\r
3388 for(p2=p+1;p2<pred_list.size();++p2){
\r
3389 if(pred_list[p2]){
\r
3390 if(is_equivalent_pred_base(pred_list[p]->pr, pred_list[p2]->pr,Schema)){
\r
3391 pred_list[p]->subsume(pred_list[p2]);
\r
3392 delete pred_list[p2];
\r
3393 pred_list[p2] = NULL;
\r
3400 // combine preds that occur in the exact same lftas.
\r
3401 for(p=0;p<pred_list.size();++p){
\r
3403 for(p2=p+1;p2<pred_list.size();++p2){
\r
3404 if(pred_list[p2]){
\r
3405 if(pred_list[p]->lfta_id == pred_list[p2]->lfta_id){
\r
3406 pred_list[p]->combine_pred(pred_list[p2]);
\r
3407 delete pred_list[p2];
\r
3408 pred_list[p2] = NULL;
\r
3415 // Compress the list
\r
3416 for(p=0;p<pred_list.size();++p){
\r
3418 sort_list.push_back(pred_list[p]);
\r
3422 sort(sort_list.begin(), sort_list.end(),compare_cnf_set());
\r
3424 // Return the top preds, up to 64 of them.
\r
3425 for(p=0;p<sort_list.size() && p<64;p++){
\r
3426 prefilter_preds.push_back(sort_list[p]);
\r
3427 sort_list[p]->add_pred_ids(pred_ids);
\r
3430 // Substitute gb refs with their defs
\r
3431 // While I'm at it, substitute base table sch ref for tblref.
\r
3432 for(p=0;p<prefilter_preds.size() ;p++){
\r
3433 subs_gbrefs_pr(prefilter_preds[p]->pr,Schema);
\r
3442 ///////////////////////////////////////////////////////////////////////////
\r
3443 //////////////////////////////////////////////////////////////////////////
\r
3445 // Find partial functions and register them.
\r
3446 // Do a DFS so that nested partial fcn calls
\r
3447 // get evaluated in the right order.
\r
3448 // Don't drill down into aggregates -- their arguments are evaluated
\r
3449 // earlier than the select list is.
\r
3451 // Modification for function caching:
\r
3452 // Pass in a ref counter, and partial fcn indicator.
\r
3453 // Cache fcns ref'd at least once.
\r
3454 // pass in NULL for fcn_ref_cnt to turn off fcn caching analysis
\r
3457 void find_partial_fcns(scalarexp_t *se, vector<scalarexp_t *> *pf_list,
\r
3458 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
\r
3459 ext_fcn_list *Ext_fcns){
\r
3460 vector<scalarexp_t *> operands;
\r
3463 if(se == NULL) return;
\r
3465 switch(se->get_operator_type()){
\r
3468 case SE_IFACE_PARAM:
\r
3471 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
\r
3473 case SE_BINARY_OP:
\r
3474 find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
\r
3475 find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
\r
3479 case SE_AGGR_STAR:
\r
3482 // find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ;
\r
3485 if(se->get_aggr_ref() >= 0) return;
\r
3487 operands = se->get_operands();
\r
3488 for(o=0;o<operands.size();o++){
\r
3489 find_partial_fcns(operands[o], pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
\r
3492 if(Ext_fcns->is_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){
\r
3494 for(f=0;f<pf_list->size();++f){
\r
3495 if(is_equivalent_se(se,(*pf_list)[f])){
\r
3496 se->set_partial_ref(f);
\r
3497 (*fcn_ref_cnt)[f]++;
\r
3502 f=pf_list->size();
\r
3504 if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) || fcn_ref_cnt)){
\r
3505 se->set_partial_ref(pf_list->size());
\r
3506 pf_list->push_back(se);
\r
3508 fcn_ref_cnt->push_back(1);
\r
3509 is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id()));
\r
3515 fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n",
\r
3516 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
3523 void find_partial_fcns_pr(predicate_t *pr, vector<scalarexp_t *> *pf_list,
\r
3524 vector<int> *fcn_ref_cnt, vector<bool> *is_partial_fcn,
\r
3525 ext_fcn_list *Ext_fcns){
\r
3526 vector<literal_t *> litl;
\r
3527 vector<scalarexp_t *> op_list;
\r
3530 switch(pr->get_operator_type()){
\r
3532 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
\r
3534 case PRED_COMPARE:
\r
3535 find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
\r
3536 find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
\r
3538 case PRED_UNARY_OP:
\r
3539 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns);
\r
3541 case PRED_BINARY_OP:
\r
3542 find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
\r
3543 find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ;
\r
3546 op_list = pr->get_op_list();
\r
3547 for(o=0;o<op_list.size();++o){
\r
3548 find_partial_fcns(op_list[o],pf_list,fcn_ref_cnt, is_partial_fcn, Ext_fcns);
\r
3552 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
3553 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
3562 void find_combinable_preds(predicate_t *pr, vector<predicate_t *> *pr_list,
\r
3563 table_list *Schema, ext_fcn_list *Ext_fcns){
\r
3564 vector<literal_t *> litl;
\r
3565 vector<scalarexp_t *> op_list;
\r
3568 switch(pr->get_operator_type()){
\r
3571 case PRED_COMPARE:
\r
3573 case PRED_UNARY_OP:
\r
3574 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns);
\r
3576 case PRED_BINARY_OP:
\r
3577 find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ;
\r
3578 find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ;
\r
3581 if(Ext_fcns->is_combinable(pr->get_fcn_id())){
\r
3582 for(f=0;f<pr_list->size();++f){
\r
3583 if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){
\r
3584 pr->set_combinable_ref(f);
\r
3588 if(f == pr_list->size()){
\r
3589 pr->set_combinable_ref(pr_list->size());
\r
3590 pr_list->push_back(pr);
\r
3595 fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
3596 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
3604 //--------------------------------------------------------------------
\r
3605 // Collect refs to aggregates.
\r
3608 void collect_agg_refs(scalarexp_t *se, set<int> &agg_refs){
\r
3609 vector<scalarexp_t *> operands;
\r
3612 if(se == NULL) return;
\r
3614 switch(se->get_operator_type()){
\r
3617 case SE_IFACE_PARAM:
\r
3620 collect_agg_refs(se->get_left_se(), agg_refs) ;
\r
3622 case SE_BINARY_OP:
\r
3623 collect_agg_refs(se->get_left_se(), agg_refs);
\r
3624 collect_agg_refs(se->get_right_se(), agg_refs);
\r
3628 case SE_AGGR_STAR:
\r
3630 agg_refs.insert(se->get_aggr_ref());
\r
3633 if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref());
\r
3635 operands = se->get_operands();
\r
3636 for(o=0;o<operands.size();o++){
\r
3637 collect_agg_refs(operands[o], agg_refs);
\r
3642 fprintf(stderr,"INTERNAL ERROR in collect_agg_refs, line %d, character %d: unknown operator type %d\n",
\r
3643 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
3650 void collect_aggr_refs_pr(predicate_t *pr, set<int> &agg_refs){
\r
3651 vector<literal_t *> litl;
\r
3652 vector<scalarexp_t *> op_list;
\r
3655 switch(pr->get_operator_type()){
\r
3657 collect_agg_refs(pr->get_left_se(), agg_refs) ;
\r
3659 case PRED_COMPARE:
\r
3660 collect_agg_refs(pr->get_left_se(), agg_refs) ;
\r
3661 collect_agg_refs(pr->get_right_se(), agg_refs) ;
\r
3663 case PRED_UNARY_OP:
\r
3664 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs);
\r
3666 case PRED_BINARY_OP:
\r
3667 collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ;
\r
3668 collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ;
\r
3671 op_list = pr->get_op_list();
\r
3672 for(o=0;o<op_list.size();++o){
\r
3673 collect_agg_refs(op_list[o],agg_refs);
\r
3677 fprintf(stderr,"INTERNAL ERROR in collect_aggr_refs_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
3678 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
3686 //--------------------------------------------------------------------
\r
3687 // Collect previously registered partial fcn refs.
\r
3688 // Do a DFS so that nested partial fcn calls
\r
3689 // get evaluated in the right order.
\r
3690 // Don't drill down into aggregates -- their arguments are evaluated
\r
3691 // earlier than the select list is.
\r
3692 // ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES?
\r
3694 void collect_partial_fcns(scalarexp_t *se, set<int> &pfcn_refs){
\r
3695 vector<scalarexp_t *> operands;
\r
3698 if(se == NULL) return;
\r
3700 switch(se->get_operator_type()){
\r
3703 case SE_IFACE_PARAM:
\r
3706 collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
\r
3708 case SE_BINARY_OP:
\r
3709 collect_partial_fcns(se->get_left_se(), pfcn_refs);
\r
3710 collect_partial_fcns(se->get_right_se(), pfcn_refs);
\r
3714 case SE_AGGR_STAR:
\r
3717 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
\r
3720 if(se->get_aggr_ref() >= 0) return;
\r
3722 operands = se->get_operands();
\r
3723 for(o=0;o<operands.size();o++){
\r
3724 collect_partial_fcns(operands[o], pfcn_refs);
\r
3727 if(se->is_partial()){
\r
3728 pfcn_refs.insert(se->get_partial_ref());
\r
3733 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n",
\r
3734 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
3741 void collect_partial_fcns_pr(predicate_t *pr, set<int> &pfcn_refs){
\r
3742 vector<literal_t *> litl;
\r
3743 vector<scalarexp_t *> op_list;
\r
3746 switch(pr->get_operator_type()){
\r
3748 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
\r
3750 case PRED_COMPARE:
\r
3751 collect_partial_fcns(pr->get_left_se(), pfcn_refs) ;
\r
3752 collect_partial_fcns(pr->get_right_se(), pfcn_refs) ;
\r
3754 case PRED_UNARY_OP:
\r
3755 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs);
\r
3757 case PRED_BINARY_OP:
\r
3758 collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ;
\r
3759 collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ;
\r
3762 op_list = pr->get_op_list();
\r
3763 for(o=0;o<op_list.size();++o){
\r
3764 collect_partial_fcns(op_list[o],pfcn_refs);
\r
3768 fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
3769 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
3779 ///////////////////////////////////////////////////////////////
\r
3780 //////////// Exported Functions ///////////////////////////
\r
3781 ///////////////////////////////////////////////////////////////
\r
3784 // Count and collect refs to interface parameters.
\r
3786 int count_se_ifp_refs(scalarexp_t *se, set<string> &ifpnames){
\r
3787 vector<scalarexp_t *> operands;
\r
3791 if(se == NULL) return 0;
\r
3793 switch(se->get_operator_type()){
\r
3797 case SE_IFACE_PARAM:
\r
3798 ifpnames.insert(se->get_ifpref()->to_string());
\r
3801 return count_se_ifp_refs(se->get_left_se(), ifpnames) ;
\r
3802 case SE_BINARY_OP:
\r
3803 ret = count_se_ifp_refs(se->get_left_se(), ifpnames);
\r
3804 ret += count_se_ifp_refs(se->get_right_se(), ifpnames);
\r
3808 case SE_AGGR_STAR:
\r
3811 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
\r
3814 if(se->get_aggr_ref() >= 0) return 0;
\r
3816 operands = se->get_operands();
\r
3817 for(o=0;o<operands.size();o++){
\r
3818 ret += count_se_ifp_refs(operands[o], ifpnames);
\r
3823 fprintf(stderr,"INTERNAL ERROR in count_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
\r
3824 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
3831 int count_pr_ifp_refs(predicate_t *pr, set<string> &ifpnames){
\r
3832 vector<literal_t *> litl;
\r
3833 vector<scalarexp_t *> op_list;
\r
3836 if(pr == NULL) return 0;
\r
3838 switch(pr->get_operator_type()){
\r
3840 return count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
\r
3841 case PRED_COMPARE:
\r
3842 ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ;
\r
3843 ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ;
\r
3845 case PRED_UNARY_OP:
\r
3846 return count_pr_ifp_refs(pr->get_left_pr(), ifpnames);
\r
3847 case PRED_BINARY_OP:
\r
3848 ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ;
\r
3849 ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ;
\r
3852 op_list = pr->get_op_list();
\r
3853 for(o=0;o<op_list.size();++o){
\r
3854 ret += count_se_ifp_refs(op_list[o],ifpnames);
\r
3858 fprintf(stderr,"INTERNAL ERROR in count_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
\r
3859 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
3866 // Resolve ifp refs, convert them to string literals.
\r
3868 int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb, string &err){
\r
3869 vector<scalarexp_t *> operands;
\r
3870 vector<string> ifvals;
\r
3878 if(se == NULL) return 0;
\r
3880 switch(se->get_operator_type()){
\r
3884 case SE_IFACE_PARAM:
\r
3885 ir = se->get_ifpref();
\r
3886 ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr);
\r
3888 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n";
\r
3891 if(ifvals.size() == 0){
\r
3892 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n";
\r
3895 if(ifvals.size() > 1){
\r
3896 err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n";
\r
3899 tmp_l = new literal_t( ifvals[0]);
\r
3900 se->convert_to_literal(tmp_l);
\r
3903 return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ;
\r
3904 case SE_BINARY_OP:
\r
3905 ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err);
\r
3906 ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err);
\r
3910 case SE_AGGR_STAR:
\r
3913 // collect_partial_fcns(se->get_left_se(), pfcn_refs) ;
\r
3916 if(se->get_aggr_ref() >= 0) return 0;
\r
3918 operands = se->get_operands();
\r
3919 for(o=0;o<operands.size();o++){
\r
3920 ret += resolve_se_ifp_refs(operands[o], ifm, ifn, ifdb,err);
\r
3925 fprintf(stderr,"INTERNAL ERROR in resolve_se_ifp_refs, line %d, character %d: unknown operator type %d\n",
\r
3926 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
3933 int resolve_pr_ifp_refs(predicate_t *pr, string ifm, string ifn, ifq_t *ifdb, string &err){
\r
3934 vector<literal_t *> litl;
\r
3935 vector<scalarexp_t *> op_list;
\r
3939 switch(pr->get_operator_type()){
\r
3941 return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
\r
3942 case PRED_COMPARE:
\r
3943 ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ;
\r
3944 ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ;
\r
3946 case PRED_UNARY_OP:
\r
3947 return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err);
\r
3948 case PRED_BINARY_OP:
\r
3949 ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ;
\r
3950 ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ;
\r
3953 op_list = pr->get_op_list();
\r
3954 for(o=0;o<op_list.size();++o){
\r
3955 ret += resolve_se_ifp_refs(op_list[o],ifm, ifn, ifdb, err);
\r
3959 fprintf(stderr,"INTERNAL ERROR in resolve_pr_ifp_refs, line %d, character %d, unknown predicate operator type %d\n",
\r
3960 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
3968 string impute_query_name(table_exp_t *fta_tree, string default_nm){
\r
3969 string retval = fta_tree->get_val_of_name("query_name");
\r
3970 if(retval == "") retval = default_nm;
\r
3971 if(retval == "") retval = "default_query";
\r
3975 // Convert the parse tree into an intermediate form,
\r
3976 // which admits analysis better.
\r
3978 // TODO : rationalize the error return policy.
\r
3980 // TODO : the query_summary_class object contains
\r
3981 // the parse tree.
\r
3982 // TODO: revisit the issue when nested subqueries are implemented.
\r
3983 // One possibility: implement accessor methods to hide the
\r
3985 // For now: this class contains data structures not in table_exp_t
\r
3986 // (with a bit of duplication)
\r
3988 // Return NULL on error.
\r
3989 // print error messages to stderr.
\r
3992 query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema,
\r
3993 ext_fcn_list *Ext_fcns, string default_name){
\r
3994 int i,j, k, retval;
\r
3996 // Create the summary struct -- no analysis is done here.
\r
3997 query_summary_class *qs = new query_summary_class(fta_tree);
\r
3998 qs->query_type = fta_tree->query_type;
\r
4000 ////////////// Do common analysis
\r
4002 // Extract query name. Already imputed for the qnodes.
\r
4003 // qs->query_name = impute_query_name(fta_tree, default_name);
\r
4004 qs->query_name = default_name;
\r
4005 //printf("query name is %s\n",qs->query_name.c_str());
\r
4007 // extract definitions. Don't grab the query name.
\r
4009 map<string, string> nmap = fta_tree->get_name_map();
\r
4010 map<string, string>::iterator nmi;
\r
4011 for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){
\r
4012 string pname = (*nmi).first;
\r
4013 if(pname != "query_name" )
\r
4014 (qs->definitions)[pname] = (*nmi).second;
\r
4020 // First, verify that all the referenced tables are defined.
\r
4021 // Then, bind the tablerefs in the FROM list to schemas in
\r
4022 // the schema list.
\r
4023 tablevar_list_t *tlist = fta_tree->get_from();
\r
4024 vector<tablevar_t *> tbl_vec = tlist->get_table_list();
\r
4026 bool found_error = false;
\r
4027 for(i=0;i<tbl_vec.size();i++){
\r
4028 int sch_no = schema->find_tbl(tbl_vec[i]->get_schema_name());
\r
4030 fprintf(stderr,"Error, table <%s> not found in the schema file\n",
\r
4031 tbl_vec[i]->get_schema_name().c_str() );
\r
4032 fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(),
\r
4033 tbl_vec[i]->get_charno() );
\r
4037 tbl_vec[i]->set_schema_ref(sch_no);
\r
4039 // If accessing a UDOP, mangle the name
\r
4040 // This needs to be done in translate_fta.cc, not here.
\r
4042 if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){
\r
4043 string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm;
\r
4044 tbl_vec[i]->set_schema_name(mngl_name);
\r
4048 // No FTA schema should have an interface defined on it.
\r
4049 if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){
\r
4050 fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str());
\r
4052 // Fill in default interface
\r
4053 if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
\r
4054 tbl_vec[i]->set_interface("default");
\r
4055 tbl_vec[i]->set_ifq(true);
\r
4057 // Fill in default machine
\r
4058 if(tbl_vec[i]->get_interface()!="" && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){
\r
4059 tbl_vec[i]->set_machine(hostname);
\r
4062 if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){
\r
4063 // Record the set of interfaces accessed
\r
4065 if(tbl_vec[i]->get_ifq()){
\r
4066 ifstr = "["+tbl_vec[i]->get_interface()+"]";
\r
4068 if(tbl_vec[i]->get_machine() != "localhost"){
\r
4069 ifstr = "'"+tbl_vec[i]->get_machine()+"'."+tbl_vec[i]->get_interface();
\r
4071 ifstr = tbl_vec[i]->get_interface();
\r
4074 //printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str());
\r
4075 if(qs->definitions.count("_referenced_ifaces")){
\r
4076 ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr;
\r
4078 qs->definitions["_referenced_ifaces"] = ifstr;
\r
4082 if(found_error) return(NULL);
\r
4084 // Ensure that all tablevars have are named
\r
4085 // and that no two tablevars have the same name.
\r
4086 int tblvar_no = 0;
\r
4087 // First, gather the set of variable
\r
4088 set<string> tblvar_names;
\r
4089 for(i=0;i<tbl_vec.size();i++){
\r
4090 if(tbl_vec[i]->get_var_name() != ""){
\r
4091 if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){
\r
4092 fprintf(stderr,"ERROR, query has two table variables named %s. line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno());
\r
4095 tblvar_names.insert(tbl_vec[i]->get_var_name());
\r
4098 // Now generate variable names for unnamed tablevars
\r
4099 for(i=0;i<tbl_vec.size();i++){
\r
4100 if(tbl_vec[i]->get_var_name() == ""){
\r
4102 sprintf(tmpstr,"_t%d",tblvar_no);
\r
4103 string newvar = tmpstr;
\r
4104 while(tblvar_names.count(newvar) > 0){
\r
4106 sprintf(tmpstr,"_t%d",tblvar_no);
\r
4109 tbl_vec[i]->set_range_var(newvar);
\r
4110 tblvar_names.insert(newvar);
\r
4114 // Process inner/outer join properties
\r
4115 int jprop = fta_tree->get_from()->get_properties();
\r
4116 // Require explicit INNER_JOIN, ... specification for join queries.
\r
4118 if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){
\r
4119 fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, FILTER_JOIN.\n");
\r
4124 if(jprop == OUTER_JOIN_PROPERTY){
\r
4125 for(i=0;i<tbl_vec.size();i++) tbl_vec[i]->set_property(1);
\r
4127 if(jprop == LEFT_OUTER_JOIN_PROPERTY)
\r
4128 tbl_vec[0]->set_property(1);
\r
4129 if(jprop == RIGHT_OUTER_JOIN_PROPERTY)
\r
4130 tbl_vec[tbl_vec.size()-1]->set_property(1);
\r
4131 if(jprop == FILTER_JOIN_PROPERTY){
\r
4132 if(fta_tree->get_from()->get_temporal_range() == 0){
\r
4133 fprintf(stderr,"ERROR, a filter join must have a non-zero tempoal range.\n");
\r
4136 if(tbl_vec.size() != 2){
\r
4137 fprintf(stderr,"ERROR, a filter join must be between two table variables.\n");
\r
4140 colref_t *cr = fta_tree->get_from()->get_colref();
\r
4141 string field = cr->get_field();
\r
4143 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field);
\r
4145 fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str());
\r
4148 cr->set_schema_ref(tbl_vec[0]->get_schema_ref());
\r
4149 cr->set_tablevar_ref(0);
\r
4150 string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field);
\r
4151 param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field);
\r
4152 data_type *dt0 = new data_type(type_name, modifiers);
\r
4153 if(dt0->get_type_str() != "UINT"){
\r
4154 fprintf(stderr,"ERROR, the temporal attribute in a filter join must be a UINT.\n");
\r
4157 if(! dt0->is_increasing()){
\r
4158 fprintf(stderr,"ERROR, the temporal attribtue in a filter join must be temporal increasing.\n");
\r
4165 /////////////////////
\r
4166 /// Build the query param table
\r
4167 vector<var_pair_t *> query_params = fta_tree->query_params;
\r
4169 for(p=0;p<query_params.size();++p){
\r
4170 string pname = query_params[p]->name;
\r
4171 string dtname = query_params[p]->val;
\r
4174 fprintf(stderr,"ERROR parameter has empty name.\n");
\r
4175 found_error = true;
\r
4178 fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str());
\r
4179 found_error = true;
\r
4181 data_type *dt = new data_type(dtname);
\r
4182 if(!(dt->is_defined())){
\r
4183 fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str());
\r
4184 found_error = true;
\r
4187 qs->add_query_param(pname, dt, false);
\r
4189 if(found_error) return(NULL);
\r
4190 // unpack the param table to a global for easier analysis.
\r
4191 param_tbl=qs->param_tbl;
\r
4193 ////////////////// MERGE specialized analysis
\r
4195 if(qs->query_type == MERGE_QUERY){
\r
4197 // 1) there are two *different* streams ref'd in the FROM clause
\r
4198 // However, only emit a warning.
\r
4199 // (can't detect a problem if one of the interfaces is the
\r
4200 // default interface).
\r
4201 // 2) They have the same layout (e.g. same types but the
\r
4202 // names can be different
\r
4203 // 3) the two columns can unambiguously be mapped to
\r
4204 // fields of the two tables, one per table. Exception:
\r
4205 // the column names are the same and exist in both tables.
\r
4206 // FURTHERMORE the positions must be the same
\r
4207 // 4) after mapping, verify that both colrefs are temporal
\r
4208 // and in the same direction.
\r
4209 if(tbl_vec.size() < 2){
\r
4210 fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() );
\r
4214 vector<field_entry *> fev0 = schema->get_fields(
\r
4215 tbl_vec[0]->get_schema_name()
\r
4220 for(cv=1;cv<tbl_vec.size();++cv){
\r
4221 vector<field_entry *> fev1 = schema->get_fields(
\r
4222 tbl_vec[cv]->get_schema_name()
\r
4225 if(fev0.size() != fev1.size()){
\r
4226 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
\r
4230 // Only need to ensure that the list of types are the same.
\r
4231 // THe first table supplies the output colnames,
\r
4232 // and all temporal properties are lost, except for the
\r
4233 // merge-by columns.
\r
4235 for(f=0;f<fev0.size();++f){
\r
4236 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
\r
4237 data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list());
\r
4238 if(! dt0.equal_subtypes(&dt1) ){
\r
4239 fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str());
\r
4245 // copy over the merge-by cols.
\r
4246 qs->mvars = fta_tree->mergevars;
\r
4248 if(qs->mvars.size() == 0){ // need to discover the merge vars.
\r
4249 int mergevar_pos = -1;
\r
4251 for(f=0;f<fev0.size();++f){
\r
4252 data_type dt0(fev0[f]->get_type(),fev0[f]->get_modifier_list());
\r
4253 if(dt0.is_temporal()){
\r
4258 if(mergevar_pos >= 0){
\r
4259 for(cv=0;cv<tbl_vec.size();++cv){
\r
4260 vector<field_entry *> fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name());
\r
4261 qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() ));
\r
4264 fprintf(stderr,"ERROR, no merge-by column found.\n");
\r
4269 // Ensure same number of tables, merge cols.
\r
4270 if(tbl_vec.size() != qs->mvars.size()){
\r
4271 fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size());
\r
4275 // Ensure that the merge-by are from different tables
\r
4276 // also, sort colrefs so that they align with the FROM list using tmp_crl
\r
4277 set<int> refd_sources;
\r
4278 vector<colref_t *> tmp_crl(qs->mvars.size(),NULL);
\r
4279 for(cv=0;cv<qs->mvars.size();++cv){
\r
4280 int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema);
\r
4282 fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str());
\r
4284 refd_sources.insert(tblvar);
\r
4285 tmp_crl[tblvar] = qs->mvars[cv];
\r
4287 if(refd_sources.size() != qs->mvars.size()){
\r
4288 fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size());
\r
4292 // 1-1 mapping, so use tmp_crl as the merge column list.
\r
4293 qs->mvars = tmp_crl;
\r
4297 // Look up the colrefs in their schemas, verify that
\r
4298 // they are at the same place, that they are both temporal
\r
4299 // in the same way.
\r
4300 // It seems that this should be done more in the schema objects.
\r
4301 int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field());
\r
4303 fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str());
\r
4306 for(cv=1;cv<qs->mvars.size();++cv){
\r
4307 int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field());
\r
4309 fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str());
\r
4314 field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0);
\r
4315 data_type dt0(fe0->get_type(),fe0->get_modifier_list());
\r
4316 if( (!dt0.is_temporal()) ){
\r
4317 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0);
\r
4320 for(cv=0;cv<qs->mvars.size();++cv){
\r
4321 field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0);
\r
4322 data_type dt1(fe1->get_type(),fe1->get_modifier_list());
\r
4323 if( (!dt1.is_temporal()) ){
\r
4324 fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv);
\r
4329 if( dt0.get_temporal() != dt1.get_temporal()){
\r
4330 fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv);
\r
4335 // If there is a SLACK specification, verify
\r
4336 // that it is literal-only and that its type is compatible
\r
4337 // with that of the merge columns
\r
4338 qs->slack = fta_tree->slack;
\r
4340 if(! literal_only_se(qs->slack)){
\r
4341 fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n");
\r
4345 assign_data_types(qs->slack, schema, fta_tree, Ext_fcns );
\r
4346 data_type sdt(&dt0, qs->slack->get_data_type(), string("+"));
\r
4347 if(sdt.get_type() == undefined_t){
\r
4348 fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n");
\r
4354 // All the tests have passed, there is nothing
\r
4355 // else to fill in.
\r
4359 ////////////////// SELECT specialized analysis
\r
4361 if(qs->query_type == SELECT_QUERY){
\r
4362 // unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals
\r
4363 // objects into globals, for easier syntax.
\r
4364 gb_tbl = qs->gb_tbl;
\r
4365 aggr_tbl = qs->aggr_tbl;
\r
4368 // Build the table of group-by attributes.
\r
4369 // (se processing done automatically).
\r
4370 // NOTE : Doing the SE processing here is getting cumbersome,
\r
4371 // I should process these individually.
\r
4372 // NOTE : I should check for duplicate names.
\r
4373 // NOTE : I should ensure that the def of one GB does not
\r
4374 // refrence the value of another.
\r
4375 vector<extended_gb_t *> gb_list = fta_tree->get_groupby();
\r
4376 int n_temporal = 0;
\r
4377 string temporal_gbvars = "";
\r
4378 map<string, int> gset_gbnames;
\r
4380 // For generating the set of GB patterns for this aggregation query.
\r
4381 vector<bool> inner_pattern;
\r
4382 vector<vector<bool> > pattern_set;
\r
4383 vector<vector<vector<bool> > > pattern_components;
\r
4385 vector<gb_t *> r_gbs, c_gbs, g_gbs;
\r
4388 for(i=0;i<gb_list.size();i++){
\r
4389 switch(gb_list[i]->type){
\r
4391 retval = gb_tbl->add_gb_attr(
\r
4392 gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns
\r
4395 return NULL; // nothing added to gb_tbl, so this can trigger a segfault 2 lines below
\r
4397 if(gb_tbl->get_data_type(i)->is_temporal()){
\r
4399 if(temporal_gbvars != "") temporal_gbvars+=" ";
\r
4400 temporal_gbvars += gb_tbl->get_name(i);
\r
4404 inner_pattern.clear();
\r
4405 pattern_set.clear();
\r
4406 inner_pattern.push_back(true);
\r
4407 pattern_set.push_back(inner_pattern);
\r
4408 pattern_components.push_back(pattern_set);
\r
4410 gb_tbl->gb_entry_type.push_back("");
\r
4411 gb_tbl->gb_entry_count.push_back(1);
\r
4412 gb_tbl->pattern_components.push_back(pattern_set);
\r
4415 case rollup_egb_type:
\r
4416 r_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
\r
4417 for(j=0;j<r_gbs.size();++j){
\r
4418 retval = gb_tbl->add_gb_attr(
\r
4419 r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
\r
4422 found_error = true;
\r
4423 }else{ // rollup gb can't be temporal
\r
4424 gb_tbl->reset_temporal(gb_tbl->size()-1);
\r
4428 inner_pattern.resize(r_gbs.size());
\r
4429 pattern_set.clear();
\r
4430 for(j=0;j<=r_gbs.size();++j){
\r
4431 for(k=0;k<r_gbs.size();++k){
\r
4433 inner_pattern[k] = true;
\r
4435 inner_pattern[k] = false;
\r
4437 pattern_set.push_back(inner_pattern);
\r
4439 pattern_components.push_back(pattern_set);
\r
4441 gb_tbl->gb_entry_type.push_back("ROLLUP");
\r
4442 gb_tbl->gb_entry_count.push_back(r_gbs.size());
\r
4443 gb_tbl->pattern_components.push_back(pattern_set);
\r
4445 case cube_egb_type:
\r
4446 c_gbs = gb_list[i]->gb_lists[0]->get_gb_list();
\r
4447 for(j=0;j<c_gbs.size();++j){
\r
4448 retval = gb_tbl->add_gb_attr(
\r
4449 c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns
\r
4452 found_error = true;
\r
4453 }else{ // cube gb can't be temporal
\r
4454 gb_tbl->reset_temporal(gb_tbl->size()-1);
\r
4458 inner_pattern.resize(c_gbs.size());
\r
4459 pattern_set.clear();
\r
4460 n_patterns = 1 << c_gbs.size();
\r
4461 for(j=0;j<n_patterns;++j){
\r
4463 for(k=0;k<c_gbs.size();++k,test_bit = test_bit << 1){
\r
4464 if((j & test_bit) != 0)
\r
4465 inner_pattern[k] = true;
\r
4467 inner_pattern[k] = false;
\r
4469 pattern_set.push_back(inner_pattern);
\r
4471 pattern_components.push_back(pattern_set);
\r
4473 gb_tbl->gb_entry_type.push_back("CUBE");
\r
4474 gb_tbl->gb_entry_count.push_back(c_gbs.size());
\r
4475 gb_tbl->pattern_components.push_back(pattern_set);
\r
4477 case gsets_egb_type:
\r
4479 gset_gbnames.clear();
\r
4480 for(j=0;j<gb_list[i]->gb_lists.size();++j){
\r
4481 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
\r
4482 for(k=0;k<g_gbs.size();++k){
\r
4483 if(g_gbs[k]->type != GB_COLREF){
\r
4484 fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str());
\r
4485 found_error = true;
\r
4487 if(gset_gbnames.count(g_gbs[k]->name) == 0){
\r
4488 retval = gb_tbl->add_gb_attr(
\r
4489 g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns
\r
4492 found_error = true;
\r
4493 }else{ // gsets gb can't be temporal
\r
4494 gb_tbl->reset_temporal(gb_tbl->size()-1);
\r
4496 int pos = gset_gbnames.size();
\r
4497 gset_gbnames[g_gbs[k]->name] = pos;
\r
4503 if(gset_gbnames.size() > 63){
\r
4504 fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n");
\r
4505 found_error = true;
\r
4508 inner_pattern.resize(gset_gbnames.size());
\r
4509 pattern_set.clear();
\r
4510 set<unsigned long long int> signatures;
\r
4511 for(j=0;j<gb_list[i]->gb_lists.size();++j){
\r
4512 g_gbs = gb_list[i]->gb_lists[j]->get_gb_list();
\r
4513 set<string> refd_gbs;
\r
4514 for(k=0;k<g_gbs.size();++k){
\r
4515 refd_gbs.insert(g_gbs[k]->name);
\r
4517 fill(inner_pattern.begin(),inner_pattern.end(),false);
\r
4518 unsigned long long int signature = 0;
\r
4519 set<string>::iterator ssi;
\r
4520 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
\r
4521 inner_pattern[gset_gbnames[(*ssi)]] = true;
\r
4522 signature |= (1 << gset_gbnames[(*ssi)]);
\r
4524 if(signatures.count(signature)){
\r
4525 fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t");
\r
4526 set<string>::iterator ssi;
\r
4527 for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){
\r
4528 fprintf(stderr," %s",(*ssi).c_str());
\r
4530 fprintf(stderr,"\n");
\r
4532 signatures.insert(signature);
\r
4533 pattern_set.push_back(inner_pattern);
\r
4536 pattern_components.push_back(pattern_set);
\r
4538 gb_tbl->gb_entry_type.push_back("GROUPING_SETS");
\r
4539 gb_tbl->gb_entry_count.push_back(gset_gbnames.size());
\r
4540 gb_tbl->pattern_components.push_back(pattern_set);
\r
4547 if(found_error) return(NULL);
\r
4548 if(n_temporal > 1){
\r
4549 fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s). Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str());
\r
4553 // Compute the set of patterns. Take the cross product of all pattern components.
\r
4554 vector<vector<bool> > gb_patterns;
\r
4555 int n_components = pattern_components.size();
\r
4556 vector<int> pattern_pos(n_components,0);
\r
4557 bool done = false;
\r
4559 vector<bool> pattern;
\r
4560 for(j=0;j<n_components;j++){
\r
4561 pattern.insert(pattern.end(),pattern_components[j][pattern_pos[j]].begin(),
\r
4562 pattern_components[j][pattern_pos[j]].end());
\r
4564 gb_patterns.push_back(pattern);
\r
4565 for(j=0;j<n_components;j++){
\r
4567 if(pattern_pos[j] >= pattern_components[j].size())
\r
4568 pattern_pos[j] = 0;
\r
4572 if(j >= n_components)
\r
4575 gb_tbl->gb_patterns = gb_patterns;
\r
4578 // Process the supergroup, if any.
\r
4579 vector<colref_t *> sgb = fta_tree->get_supergb();
\r
4580 for(i=0;i<sgb.size();++i){
\r
4581 int gbr = gb_tbl->find_gb(sgb[i],fta_tree->fm, schema);
\r
4583 fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str());
\r
4584 found_error = true;
\r
4586 if(qs->sg_tbl.count(gbr)){
\r
4587 fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str());
\r
4589 qs->sg_tbl.insert(gbr);
\r
4591 if(found_error) return(NULL);
\r
4593 if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){
\r
4594 fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
\r
4600 predicate_t *wh = fta_tree->get_where();
\r
4601 predicate_t *hv = fta_tree->get_having();
\r
4602 predicate_t *cw = fta_tree->get_cleaning_when();
\r
4603 predicate_t *cb = fta_tree->get_cleaning_by();
\r
4604 predicate_t *closew = fta_tree->get_closing_when();
\r
4606 if(closew != NULL && gb_tbl->gb_patterns.size()>1){
\r
4607 fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n");
\r
4613 // Verify that all column references are valid, and if so assign
\r
4616 vector<select_element *> sl_list = fta_tree->get_sl_vec();
\r
4617 for(i=0;i<sl_list.size();i++){
\r
4618 retval = verify_colref(sl_list[i]->se, fta_tree->fm, schema, gb_tbl);
\r
4619 if(retval < 0) found_error = true;
\r
4622 retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl);
\r
4623 if(retval < 0) found_error = true;
\r
4625 retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl);
\r
4626 if(retval < 0) found_error = true;
\r
4628 retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl);
\r
4629 if(retval < 0) found_error = true;
\r
4631 retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl);
\r
4632 if(retval < 0) found_error = true;
\r
4633 if(closew != NULL)
\r
4634 retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl);
\r
4635 if(retval < 0) found_error = true;
\r
4637 if(found_error) return(NULL);
\r
4639 // Verify that all of the scalar expressions
\r
4640 // and comparison predicates have compatible types.
\r
4643 string temporal_output_fields;
\r
4644 for(i=0;i<sl_list.size();i++){
\r
4645 retval = assign_data_types(sl_list[i]->se, schema, fta_tree, Ext_fcns );
\r
4647 found_error = true;
\r
4649 if(sl_list[i]->se->get_data_type()->is_temporal()){
\r
4651 temporal_output_fields += " "+int_to_string(i);
\r
4655 if(n_temporal > 1){
\r
4656 fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s). Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str());
\r
4660 retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns);
\r
4661 if(retval < 0) found_error = true;
\r
4663 retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns);
\r
4664 if(retval < 0) found_error = true;
\r
4666 retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns);
\r
4667 if(retval < 0) found_error = true;
\r
4669 retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns);
\r
4670 if(retval < 0) found_error = true;
\r
4671 if(closew != NULL)
\r
4672 retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns);
\r
4673 if(retval < 0) found_error = true;
\r
4675 if(found_error) return(NULL);
\r
4677 // Impute names for the unnamed columns.
\r
4678 set<string> curr_names;
\r
4680 for(s=0;s<sl_list.size();++s){
\r
4681 curr_names.insert(sl_list[s]->name);
\r
4683 for(s=0;s<sl_list.size();++s){
\r
4684 if(sl_list[s]->name == "")
\r
4685 sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se);
\r
4689 // Check the aggregates.
\r
4690 // No aggrs allowed in the WHERE predicate.
\r
4691 // (no aggrs in the GB defs, but that is examined elsewhere)
\r
4692 // Therefore, aggregates are allowed only the select clause.
\r
4694 // The query is an aggregation query if there is a group-by clause, or
\r
4695 // if any aggregate is referenced. If there is a group-by clause,
\r
4696 // at least one aggregate must be referenced.
\r
4697 // If the query is an aggregate query, the scalar expressions in
\r
4698 // the select clause can reference only constants, aggregates, or group-by
\r
4700 // Also, if the query is an aggregate query, build a table referencing
\r
4701 // the aggregates.
\r
4703 // No nested aggregates allowed.
\r
4706 // First, count references in the WHERE predicate.
\r
4707 // (if there are any references, report an error).
\r
4708 // can ref group vars, tuple fields, and stateful fcns.
\r
4711 retval = count_aggr_pred(wh, true);
\r
4713 fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n");
\r
4718 // NOTE : Here I need an analysis of the having clause
\r
4719 // to verify that it only refs GB attrs and aggregates.
\r
4720 // (also, superaggregates, stateful fcns)
\r
4722 retval = verify_having_pred(hv, "HAVING", Ext_fcns);
\r
4723 if(retval < 0) return(NULL);
\r
4726 // Cleaning by has same reference rules as Having
\r
4728 retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns);
\r
4729 if(retval < 0) return(NULL);
\r
4732 // Cleaning when has same reference rules as Having,
\r
4733 // except that references to non-superaggregates are not allowed.
\r
4734 // This is tested for when "CLEANING_BY" is passed in as the clause.
\r
4736 retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns);
\r
4737 if(retval < 0) return(NULL);
\r
4740 // CLOSING_WHEN : same rules as HAVING
\r
4742 retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns);
\r
4743 if(retval < 0) return(NULL);
\r
4747 // Collect aggregates in the HAVING and CLEANING clauses
\r
4749 build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns);
\r
4752 build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns);
\r
4755 build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns);
\r
4757 if(closew != NULL){
\r
4758 build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns);
\r
4761 // Collect aggregate refs in the SELECT clause.
\r
4763 for(i=0;i<sl_list.size();i++)
\r
4764 build_aggr_tbl_fm_se(sl_list[i]->se, aggr_tbl, Ext_fcns);
\r
4767 // Collect references to states of stateful functions
\r
4769 gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns);
\r
4772 gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns);
\r
4775 gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns);
\r
4778 gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns);
\r
4780 if(closew != NULL){ // should be no stateful fcns here ...
\r
4781 gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns);
\r
4783 for(i=0;i<sl_list.size();i++)
\r
4784 gather_fcn_states_se(sl_list[i]->se, qs->states_refd, Ext_fcns);
\r
4787 // If this is an aggregate query, it had normally references
\r
4788 // some aggregates. Its not necessary though, just emit a warning.
\r
4789 // (acts as SELECT DISTINCT)
\r
4791 bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0;
\r
4792 if(is_aggr_query && aggr_tbl->size() == 0){
\r
4793 fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n");
\r
4796 // If this is an aggregate query,
\r
4797 // 1) verify that the SEs in the SELECT clause reference
\r
4798 // only constants, aggregates, and group-by attributes.
\r
4799 // 2) No aggregate scalar expression references an aggregate
\r
4800 // or any stateful function.
\r
4801 // 3) either it references both CLEANING clauses or neither.
\r
4802 // 4) all superaggregates must have the superaggr_allowed property.
\r
4803 // 5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY
\r
4804 // clauses must have the multiple_output property.
\r
4807 if(is_aggr_query){
\r
4808 if(gb_list.size() == 0){
\r
4809 fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n");
\r
4812 // Ensure that at least one gbvar is temporal
\r
4813 if(! fta_tree->name_exists("no_temporal_aggr")){
\r
4814 bool found_temporal = false;
\r
4815 for(i=0;i<gb_tbl->size();i++){
\r
4816 if(gb_tbl->get_data_type(i)->is_temporal()){
\r
4817 found_temporal = true;
\r
4820 if(! found_temporal){
\r
4821 fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n");
\r
4826 if((!cb && cw) || (cb && !cw)){
\r
4827 fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n");
\r
4831 bool refs_running = false;
\r
4833 for(a=0; a<aggr_tbl->size(); ++a){
\r
4834 refs_running |= aggr_tbl->is_running_aggr(a);
\r
4839 fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n");
\r
4842 if(!refs_running){
\r
4843 fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n");
\r
4848 if(refs_running && !closew){
\r
4849 fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n");
\r
4853 bool st_ok = true;
\r
4854 for(i=0;i<sl_list.size();i++){
\r
4855 bool ret_bool = verify_aggr_query_se(sl_list[i]->se);
\r
4856 st_ok = st_ok && ret_bool;
\r
4861 for(i=0;i<aggr_tbl->size();i++){
\r
4862 if(aggr_tbl->is_superaggr(i)){
\r
4863 if(! aggr_tbl->superaggr_allowed(i)){
\r
4864 fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str());
\r
4868 if(aggr_tbl->is_builtin(i)){
\r
4869 if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){
\r
4870 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
\r
4874 vector<scalarexp_t *> opl = aggr_tbl->get_operand_list(i);
\r
4876 for(o=0;o<opl.size();++o){
\r
4877 if(count_aggr_se(opl[o], true) > 0){
\r
4878 fprintf(stderr,"ERROR no nested aggregation allowed.\n");
\r
4885 // Ensure that non-aggregate query doesn't reference some things
\r
4887 fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n");
\r
4891 fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n");
\r
4894 if(qs->states_refd.size()){
\r
4895 fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n");
\r
4902 // Convert the predicates into CNF. OK to pass NULL ptr.
\r
4903 make_cnf_from_pr(wh, qs->wh_cnf);
\r
4904 make_cnf_from_pr(hv, qs->hav_cnf);
\r
4905 make_cnf_from_pr(cb, qs->cb_cnf);
\r
4906 make_cnf_from_pr(cw, qs->cw_cnf);
\r
4907 make_cnf_from_pr(closew, qs->closew_cnf);
\r
4909 // Analyze the predicates.
\r
4911 for(i=0;i<qs->wh_cnf.size();i++)
\r
4912 analyze_cnf(qs->wh_cnf[i]);
\r
4913 for(i=0;i<qs->hav_cnf.size();i++)
\r
4914 analyze_cnf(qs->hav_cnf[i]);
\r
4915 for(i=0;i<qs->cb_cnf.size();i++)
\r
4916 analyze_cnf(qs->cb_cnf[i]);
\r
4917 for(i=0;i<qs->cw_cnf.size();i++)
\r
4918 analyze_cnf(qs->cw_cnf[i]);
\r
4919 for(i=0;i<qs->closew_cnf.size();i++)
\r
4920 analyze_cnf(qs->closew_cnf[i]);
\r
4923 // At this point, the old analysis program
\r
4924 // gathered all refs to partial functions,
\r
4925 // complex literals, and parameters accessed via a handle.
\r
4926 // I think its better to delay this
\r
4927 // until code generation time, as the query will be
\r
4928 // in general split.
\r
4935 ///////////////////////////////////////////////////////////////////////
\r
4937 // Expand gbvars with their definitions.
\r
4939 scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){
\r
4942 switch(se->get_operator_type()){
\r
4945 case SE_IFACE_PARAM:
\r
4948 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
\r
4950 case SE_BINARY_OP:
\r
4951 se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl);
\r
4952 se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl);
\r
4955 if( se->is_gb() ){
\r
4956 return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) );
\r
4959 // don't descend into aggr defs.
\r
4960 case SE_AGGR_STAR:
\r
4965 for(o=0;o<se->param_list.size();o++){
\r
4966 se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl);
\r
4970 fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n",
\r
4971 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
4977 void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){
\r
4978 vector<scalarexp_t *> op_list;
\r
4980 bool found = false;
\r
4982 switch(pr->get_operator_type()){
\r
4984 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl);
\r
4986 case PRED_COMPARE:
\r
4987 pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ;
\r
4988 pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ;
\r
4990 case PRED_UNARY_OP:
\r
4991 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
\r
4993 case PRED_BINARY_OP:
\r
4994 expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ;
\r
4995 expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ;
\r
4998 for(o=0;o<pr->param_list.size();++o){
\r
4999 pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ;
\r
5003 fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
5004 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
5012 // return true if the se / pr contains any gbvar on the list.
\r
5015 bool contains_gb_se(scalarexp_t *se, set<int> &gref_set){
\r
5016 vector<scalarexp_t *> operands;
\r
5018 bool found = false;
\r
5020 switch(se->get_operator_type()){
\r
5023 case SE_IFACE_PARAM:
\r
5026 return contains_gb_se(se->get_left_se(),gref_set);
\r
5027 case SE_BINARY_OP:
\r
5028 return( contains_gb_se(se->get_left_se(),gref_set) ||
\r
5029 contains_gb_se(se->get_right_se(),gref_set) );
\r
5031 if( se->is_gb() ){
\r
5032 return( gref_set.count(se->get_gb_ref()) > 0);
\r
5035 // don't descend into aggr defs.
\r
5036 case SE_AGGR_STAR:
\r
5041 operands = se->get_operands();
\r
5042 for(o=0;o<operands.size();o++){
\r
5043 found = found || contains_gb_se(operands[o], gref_set);
\r
5047 fprintf(stderr,"INTERNAL ERROR in contains_gb_se, line %d, character %d: unknown operator type %d\n",
\r
5048 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
5055 bool contains_gb_pr(predicate_t *pr, set<int> &gref_set){
\r
5056 vector<scalarexp_t *> op_list;
\r
5058 bool found = false;
\r
5060 switch(pr->get_operator_type()){
\r
5062 return contains_gb_se(pr->get_left_se(), gref_set);
\r
5063 case PRED_COMPARE:
\r
5064 return (contains_gb_se(pr->get_left_se(),gref_set)
\r
5065 || contains_gb_se(pr->get_right_se(),gref_set) );
\r
5066 case PRED_UNARY_OP:
\r
5067 return contains_gb_pr(pr->get_left_pr(),gref_set) ;
\r
5068 case PRED_BINARY_OP:
\r
5069 return (contains_gb_pr(pr->get_left_pr(),gref_set)
\r
5070 || contains_gb_pr(pr->get_right_pr(),gref_set) );
\r
5072 op_list = pr->get_op_list();
\r
5073 for(o=0;o<op_list.size();++o){
\r
5074 found = found ||contains_gb_se(op_list[o],gref_set) ;
\r
5078 fprintf(stderr,"INTERNAL ERROR in contains_gb_pr, line %d, character %d, unknown predicate operator type %d\n",
\r
5079 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
5086 // Gather the set of columns accessed in this se.
\r
5087 // Descend into aggregate functions.
\r
5089 void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){
\r
5091 vector<scalarexp_t *> operands;
\r
5097 switch(se->get_operator_type()){
\r
5100 case SE_IFACE_PARAM:
\r
5103 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
\r
5105 case SE_BINARY_OP:
\r
5106 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
\r
5107 gather_se_col_ids(se->get_right_se(),cid_set,gtbl);
\r
5110 if(! se->is_gb() ){
\r
5111 ci.load_from_colref(se->get_colref() );
\r
5112 if(ci.tblvar_ref < 0){
\r
5113 fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str());
\r
5115 cid_set.insert(ci);
\r
5118 fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n");
\r
5121 gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl);
\r
5124 case SE_AGGR_STAR:
\r
5127 gather_se_col_ids(se->get_left_se(),cid_set,gtbl);
\r
5130 operands = se->get_operands();
\r
5131 for(o=0;o<operands.size();o++){
\r
5132 gather_se_col_ids(operands[o], cid_set,gtbl);
\r
5136 fprintf(stderr,"INTERNAL ERROR in gather_se_col_ids, line %d, character %d: unknown operator type %d\n",
\r
5137 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
5143 // Gather the set of columns accessed in this se.
\r
5145 void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){
\r
5146 vector<scalarexp_t *> op_list;
\r
5149 switch(pr->get_operator_type()){
\r
5151 gather_se_col_ids(pr->get_left_se(), cid_set,gtbl);
\r
5153 case PRED_COMPARE:
\r
5154 gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ;
\r
5155 gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ;
\r
5157 case PRED_UNARY_OP:
\r
5158 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
\r
5160 case PRED_BINARY_OP:
\r
5161 gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ;
\r
5162 gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ;
\r
5165 op_list = pr->get_op_list();
\r
5166 for(o=0;o<op_list.size();++o){
\r
5167 gather_se_col_ids(op_list[o],cid_set,gtbl) ;
\r
5171 fprintf(stderr,"INTERNAL ERROR in gather_pr_col_ids, line %d, character %d, unknown predicate operator type %d\n",
\r
5172 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
5179 // Gather the set of special operator or comparison functions referenced by this se.
\r
5181 void gather_se_opcmp_fcns(scalarexp_t *se, set<string> &fcn_set){
\r
5183 data_type *ldt, *rdt;
\r
5185 vector<scalarexp_t *> operands;
\r
5187 switch(se->get_operator_type()){
\r
5189 if( se->get_literal()->constructor_name() != "")
\r
5190 fcn_set.insert( se->get_literal()->constructor_name() );
\r
5194 // SE_IFACE_PARAM should not exist when this is called.
\r
5196 ldt = se->get_left_se()->get_data_type();
\r
5197 if(ldt->complex_operator(se->get_op()) ){
\r
5198 fcn_set.insert( ldt->get_complex_operator(se->get_op()) );
\r
5200 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
\r
5202 case SE_BINARY_OP:
\r
5203 ldt = se->get_left_se()->get_data_type();
\r
5204 rdt = se->get_right_se()->get_data_type();
\r
5206 if(ldt->complex_operator(rdt, se->get_op()) ){
\r
5207 fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) );
\r
5209 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
\r
5210 gather_se_opcmp_fcns(se->get_right_se(),fcn_set);
\r
5214 case SE_AGGR_STAR:
\r
5217 gather_se_opcmp_fcns(se->get_left_se(),fcn_set);
\r
5220 operands = se->get_operands();
\r
5221 for(o=0;o<operands.size();o++){
\r
5222 gather_se_opcmp_fcns(operands[o], fcn_set);
\r
5226 fprintf(stderr,"INTERNAL ERROR in gather_se_opcmp_fcns, line %d, character %d: unknown operator type %d\n",
\r
5227 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
5233 // Gather the set of special operator or comparison functions referenced by this se.
\r
5235 void gather_pr_opcmp_fcns(predicate_t *pr, set<string> &fcn_set){
\r
5236 data_type *ldt, *rdt;
\r
5237 vector<scalarexp_t *> operands;
\r
5240 switch(pr->get_operator_type()){
\r
5242 ldt = pr->get_left_se()->get_data_type();
\r
5243 if(ldt->complex_comparison(ldt) ){
\r
5244 fcn_set.insert( ldt->get_comparison_fcn(ldt) );
\r
5246 gather_se_opcmp_fcns(pr->get_left_se(), fcn_set);
\r
5248 case PRED_COMPARE:
\r
5249 ldt = pr->get_left_se()->get_data_type();
\r
5250 rdt = pr->get_right_se()->get_data_type();
\r
5251 if(ldt->complex_comparison(rdt) ){
\r
5252 fcn_set.insert( ldt->get_comparison_fcn(rdt) );
\r
5254 gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ;
\r
5255 gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ;
\r
5257 case PRED_UNARY_OP:
\r
5258 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
\r
5260 case PRED_BINARY_OP:
\r
5261 gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ;
\r
5262 gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ;
\r
5265 operands = pr->get_op_list();
\r
5266 for(o=0;o<operands.size();o++){
\r
5267 gather_se_opcmp_fcns(operands[o], fcn_set);
\r
5271 fprintf(stderr,"INTERNAL ERROR in verify_predicate_colref, line %d, character %d, unknown predicate operator type %d\n",
\r
5272 pr->get_lineno(), pr->get_charno(), pr->get_operator_type() );
\r
5279 // find the temporal variable divisor if any.
\r
5280 // Only forms allowed : temporal_colref, temporal_colref/const
\r
5281 // temporal_colref/const + const
\r
5284 long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){
\r
5285 long long int retval = 0;
\r
5286 data_type *ldt, *rdt;
\r
5288 vector<scalarexp_t *> operands;
\r
5289 scalarexp_t *t_se, *c_se;
\r
5292 switch(se->get_operator_type()){
\r
5297 // SE_IFACE_PARAM should not exist when this is called.
\r
5300 case SE_BINARY_OP:
\r
5301 ldt = se->get_left_se()->get_data_type();
\r
5302 if(ldt->is_temporal()){
\r
5303 t_se = se->get_left_se();
\r
5304 c_se = se->get_right_se();
\r
5306 t_se = se->get_left_se();
\r
5307 c_se = se->get_right_se();
\r
5309 if((! t_se->get_data_type()->is_temporal()) || c_se->get_data_type()->is_temporal())
\r
5312 the_op = se->get_op();
\r
5313 if(the_op == "+" || the_op == "-")
\r
5314 return find_temporal_divisor(t_se, gbt,fnm);
\r
5315 if(the_op == "/"){
\r
5316 if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){
\r
5317 fnm = t_se->get_colref()->get_field();
\r
5318 string lits = c_se->get_literal()->to_string();
\r
5319 sscanf(lits.c_str(),"%qd",&retval);
\r
5327 return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm);
\r
5329 if(se->get_data_type()->is_temporal()){
\r
5330 fnm = se->get_colref()->get_field();
\r
5334 case SE_AGGR_STAR:
\r
5341 fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n",
\r
5342 se->get_lineno(), se->get_charno(),se->get_operator_type());
\r
5348 // impute_colnames:
\r
5349 // Create meaningful but unique names for the columns.
\r
5350 string impute_colname(vector<select_element *> &sel_list, scalarexp_t *se){
\r
5351 set<string> curr_names;
\r
5353 for(s=0;s<sel_list.size();++s){
\r
5354 curr_names.insert(sel_list[s]->name);
\r
5356 return impute_colname(curr_names, se);
\r
5359 string impute_colname(set<string> &curr_names, scalarexp_t *se){
\r
5362 vector<scalarexp_t *> operand_list;
\r
5365 switch(se->get_operator_type()){
\r
5370 ret = "Param_" + se->get_param_name();
\r
5372 case SE_IFACE_PARAM:
\r
5373 ret = "Iparam_" + se->get_ifpref()->get_pname();
\r
5376 ret = se->get_colref()->get_field() ;
\r
5379 case SE_BINARY_OP:
\r
5382 case SE_AGGR_STAR:
\r
5386 ret = se->get_op();
\r
5387 seo = se->get_left_se();
\r
5388 switch(se->get_left_se()->get_operator_type()){
\r
5390 ret += "_PARAM_"+seo->get_param_name();
\r
5392 case SE_IFACE_PARAM:
\r
5393 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
\r
5396 opstr = seo->get_colref()->get_field();
\r
5397 if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
\r
5398 ret += "_" + opstr;
\r
5403 case SE_AGGR_STAR:
\r
5405 opstr = seo->get_op();
\r
5406 if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){
\r
5407 ret += "_" + seo->get_op();
\r
5413 opstr = seo->get_op();
\r
5414 ret += "_" + seo->get_op();
\r
5417 case SE_BINARY_OP:
\r
5426 ret = se->get_op();
\r
5427 operand_list = se->get_operands();
\r
5428 if(operand_list.size() > 0){
\r
5429 seo = operand_list[0];
\r
5430 switch(seo->get_operator_type()){
\r
5432 ret += "_PARAM_"+seo->get_param_name();
\r
5434 case SE_IFACE_PARAM:
\r
5435 ret += "_IPARAM_"+seo->get_ifpref()->get_pname();
\r
5438 ret += "_" + seo->get_colref()->get_field();
\r
5440 case SE_AGGR_STAR:
\r
5443 ret += "_" + seo->get_op();
\r
5446 case SE_BINARY_OP:
\r
5459 if(ret == "Field"){
\r
5460 if(curr_names.count("Field0") == 0)
\r
5464 string base = ret;
\r
5465 while(curr_names.count(ret) > 0){
\r
5467 sprintf(tmpstr,"%s%d",base.c_str(),iter);
\r
5473 curr_names.insert(ret);
\r
5480 //////////////////////////////////////////////////////////////////////
\r
5481 ////////////// Methods of defined classes ///////////////////////
\r
5482 //////////////////////////////////////////////////////////////////////
\r
5484 // helper fcn to enable col_id as map key.
\r
5486 bool operator<(const col_id &cr1, const col_id &cr2){
\r
5487 if(cr1.tblvar_ref < cr2.tblvar_ref) return(true);
\r
5488 if(cr1.tblvar_ref == cr2.tblvar_ref)
\r
5489 return (cr1.field < cr2.field);
\r
5494 // Process the GB variables.
\r
5495 // At parse time, GB vars are either GB_COLREF,
\r
5496 // or GB_COMPUTED if the AS keyword is used.
\r
5497 // Cast GB vars as named entities with a SE as
\r
5498 // their definition (the colref in the case of GB_COLREF).
\r
5500 // TODO: if there is a gbref in a gbdef,
\r
5501 // then I won't be able to compute the value without
\r
5502 // a complex dependence analysis. So verify that there is no
\r
5503 // gbref in any of the GBdefs.
\r
5504 // BUT: a GBVAR_COLREF should be converted to a regular colref,
\r
5505 // which is not yet done.
\r
5507 // TODO : sort out issue of GBVAR naming and identification.
\r
5508 // Determine where it is advantageous to convert GV_COLREF
\r
5509 // GBVARS to colrefs -- e.g. in group definition, in the WHERE clause,
\r
5512 // return -1 if there is a problem.
\r
5514 int gb_table::add_gb_attr(
\r
5516 tablevar_list_t *fm,
\r
5517 table_list *schema,
\r
5518 table_exp_t *fta_tree,
\r
5519 ext_fcn_list *Ext_fcns
\r
5523 gb_table_entry *entry;
\r
5525 if(gb->type == GB_COLREF){
\r
5526 if(gb->table != "")
\r
5527 cr = new colref_t(
\r
5528 gb->interface.c_str(),gb->table.c_str(), gb->name.c_str()
\r
5531 cr = new colref_t(gb->name.c_str());
\r
5533 int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema);
\r
5534 if(tablevar_ref < 0) return(tablevar_ref);
\r
5536 cr->set_tablevar_ref(tablevar_ref);
\r
5537 cr->set_schema_ref(fm->get_schema_ref(tablevar_ref));
\r
5538 cr->set_interface("");
\r
5539 cr->set_table_name(fm->get_tablevar_name(tablevar_ref));
\r
5541 entry = new gb_table_entry();
\r
5542 entry->name.field = cr->get_field();
\r
5543 entry->name.tblvar_ref = tablevar_ref;
\r
5544 entry->definition = new scalarexp_t(cr);
\r
5545 entry->ref_type = GBVAR_COLREF;
\r
5547 entry = new gb_table_entry();
\r
5548 entry->name.field = gb->name;
\r
5549 entry->name.tblvar_ref = -1;
\r
5550 entry->definition = gb->def;
\r
5551 entry->ref_type = GBVAR_SE;
\r
5554 retval = verify_colref(entry->definition, fm, schema, NULL);
\r
5555 if(retval < 0) return(retval);
\r
5557 retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns);
\r
5558 if(retval < 0) return(retval);
\r
5560 // Verify that the gbvar def references no aggregates and no gbvars.
\r
5561 if(count_gb_se(entry->definition) > 0){
\r
5562 fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() );
\r
5565 if(count_aggr_se(entry->definition, true) > 0){
\r
5566 fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() );
\r
5570 // Check for duplicates
\r
5572 for(i=0;i<gtbl.size();++i){
\r
5573 if(entry->name.field == gtbl[i]->name.field){
\r
5574 fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size());
\r
5580 gtbl.push_back(entry);
\r
5586 // Try to determine if the colref is actually
\r
5588 // a) if no tablename associated with the colref,
\r
5589 // 1) try to find a matching GB_COMPUTED gbvar.
\r
5590 // 2) failing that, try to match to a single tablevar
\r
5591 // 3) if successful, search among GB_COLREF
\r
5592 // b) else, try to match the tablename to a single tablevar
\r
5593 // if successful, search among GB_COLREF
\r
5594 int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){
\r
5595 string c_field = cr->get_field();
\r
5599 vector<int> candidates;
\r
5601 if(cr->uses_default_table()){
\r
5602 for(i=0;i<gtbl.size();i++){
\r
5603 if(gtbl[i]->ref_type==GBVAR_SE && c_field == gtbl[i]->name.field){
\r
5607 candidates = find_source_tables(c_field, fm, schema);
\r
5608 if(candidates.size() != 1) return(-1); // can't find unique tablevar
\r
5609 for(i=0;i<gtbl.size();i++){
\r
5610 if(gtbl[i]->ref_type==GBVAR_COLREF &&
\r
5611 c_field == gtbl[i]->name.field &&
\r
5612 candidates[0] == gtbl[i]->name.tblvar_ref){
\r
5616 return(-1); // colref is not in gb table.
\r
5619 // A table name must have been given.
\r
5620 vector<tablevar_t *> fm_tbls = fm->get_table_list();
\r
5621 string interface = cr->get_interface();
\r
5622 string table_name = cr->get_table_name();
\r
5625 // if no interface name is given, try to search for the table
\r
5626 // name among the tablevar names first.
\r
5627 if(interface==""){
\r
5628 for(i=0;i<fm_tbls.size();++i){
\r
5629 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
\r
5630 candidates.push_back(i);
\r
5632 if(candidates.size()>1) return(-1);
\r
5633 if(candidates.size()==1){
\r
5634 for(i=0;i<gtbl.size();i++){
\r
5635 if(gtbl[i]->ref_type==GBVAR_COLREF &&
\r
5636 c_field == gtbl[i]->name.field &&
\r
5637 candidates[0] == gtbl[i]->name.tblvar_ref){
\r
5641 return(-1); // match semantics of bind to tablevar name first
\r
5645 // Interface name given, or no interface but no
\r
5646 // no tablevar match. Try to match on schema name.
\r
5647 for(i=0;i<fm_tbls.size();++i){
\r
5648 if(table_name == fm_tbls[i]->get_var_name() && interface == fm_tbls[i]->get_interface())
\r
5649 candidates.push_back(i);
\r
5651 if(candidates.size() != 1) return(-1);
\r
5652 for(i=0;i<gtbl.size();i++){
\r
5653 if(gtbl[i]->ref_type==GBVAR_COLREF &&
\r
5654 c_field == gtbl[i]->name.field &&
\r
5655 candidates[0] == gtbl[i]->name.tblvar_ref){
\r
5660 // No match found.
\r
5667 bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){
\r
5669 if( (op == "COUNT") || (op == "SUM") || (op == "MIN") ||
\r
5670 (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") ||
\r
5671 (op == "XOR_AGGR") )
\r
5674 return Ext_fcns->fta_legal(fcn_id);
\r
5680 // Return the set of subaggregates required to compute
\r
5681 // the desired aggregate. THe operand of the subaggregates
\r
5682 // can only be * or the scalarexp used in the superaggr.
\r
5683 // This is indicated by the use_se vector.
\r
5685 // Is this code generation specific?
\r
5687 vector<string> aggr_table_entry::get_subaggr_fcns(vector<bool> &use_se){
\r
5688 vector<string> ret;
\r
5690 if(op == "COUNT"){
\r
5691 ret.push_back("COUNT");
\r
5692 use_se.push_back(false);
\r
5695 ret.push_back("SUM");
\r
5696 use_se.push_back(true);
\r
5699 ret.push_back("SUM");
\r
5700 ret.push_back("COUNT");
\r
5701 use_se.push_back(true);
\r
5702 use_se.push_back(false);
\r
5705 ret.push_back("MIN");
\r
5706 use_se.push_back(true);
\r
5709 ret.push_back("MAX");
\r
5710 use_se.push_back(true);
\r
5712 if(op == "AND_AGGR"){
\r
5713 ret.push_back("AND_AGGR");
\r
5714 use_se.push_back(true);
\r
5716 if(op == "OR_AGGR"){
\r
5717 ret.push_back("OR_AGGR");
\r
5718 use_se.push_back(true);
\r
5720 if(op == "XOR_AGGR"){
\r
5721 ret.push_back("XOR_AGGR");
\r
5722 use_se.push_back(true);
\r
5728 // Code generation specific?
\r
5730 vector<data_type *> aggr_table_entry::get_subaggr_dt(){
\r
5731 vector<data_type *> ret;
\r
5734 if(op == "COUNT"){
\r
5735 dt = new data_type("Int"); // was Uint
\r
5736 ret.push_back( dt );
\r
5739 dt = new data_type();
\r
5740 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
\r
5741 ret.push_back(dt);
\r
5744 dt = new data_type();
\r
5745 dt->set_aggr_data_type( "SUM",operand->get_data_type() );
\r
5746 ret.push_back( dt );
\r
5747 dt = new data_type("Int");
\r
5748 ret.push_back( dt );
\r
5751 dt = new data_type();
\r
5752 dt->set_aggr_data_type( "MIN",operand->get_data_type() );
\r
5753 ret.push_back( dt );
\r
5756 dt = new data_type();
\r
5757 dt->set_aggr_data_type( "MAX",operand->get_data_type() );
\r
5758 ret.push_back( dt );
\r
5760 if(op == "AND_AGGR"){
\r
5761 dt = new data_type();
\r
5762 dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() );
\r
5763 ret.push_back( dt );
\r
5765 if(op == "OR_AGGR"){
\r
5766 dt = new data_type();
\r
5767 dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() );
\r
5768 ret.push_back( dt );
\r
5770 if(op == "XOR_AGGR"){
\r
5771 dt = new data_type();
\r
5772 dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() );
\r
5773 ret.push_back( dt );
\r
5779 // Code generation specific?
\r
5781 scalarexp_t *aggr_table_entry::make_superaggr_se(vector<scalarexp_t *> se_refs){
\r
5782 scalarexp_t *se_l, *se_r, *ret_se = NULL;
\r
5784 if(op == "COUNT"){
\r
5785 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
\r
5789 ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
\r
5793 se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]);
\r
5794 se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]);
\r
5796 ret_se = new scalarexp_t("/", se_l, se_r);
\r
5800 ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]);
\r
5804 ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]);
\r
5807 if(op == "AND_AGGR"){
\r
5808 ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]);
\r
5811 if(op == "OR_AGGR"){
\r
5812 ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]);
\r
5815 if(op == "XOR_AGGR"){
\r
5816 ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]);
\r
5825 // Add a built-in aggr.
\r
5826 int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){
\r
5829 for(i=0;i<agr_tbl.size();i++){
\r
5830 if(agr_tbl[i]->is_builtin() && op == agr_tbl[i]->op
\r
5831 && is_equivalent_se(se,agr_tbl[i]->operand) ){
\r
5832 // && is_super == agr_tbl[i]->is_superaggr())
\r
5833 if(is_super) agr_tbl[i]->set_super(true);
\r
5838 aggr_table_entry *ate = new aggr_table_entry(op, se, is_super);
\r
5839 agr_tbl.push_back(ate);
\r
5840 return(agr_tbl.size() - 1);
\r
5844 int aggregate_table::add_aggr(string op, int fcn_id, vector<scalarexp_t *> opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){
\r
5847 for(i=0;i<agr_tbl.size();i++){
\r
5848 if((! agr_tbl[i]->is_builtin()) && fcn_id == agr_tbl[i]->fcn_id
\r
5849 && opl.size() == agr_tbl[i]->oplist.size() ){
\r
5850 // && is_super == agr_tbl[i]->is_superaggr() ){
\r
5851 for(o=0;o<opl.size();++o){
\r
5852 if(! is_equivalent_se(opl[o],agr_tbl[i]->oplist[o]) )
\r
5855 if(o == opl.size()){
\r
5856 if(is_super) agr_tbl[i]->set_super(true);
\r
5862 aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout);
\r
5863 agr_tbl.push_back(ate);
\r
5864 return(agr_tbl.size() - 1);
\r
5868 int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){
\r
5871 for(i=0;i<cplx_lit_tbl.size();i++){
\r
5872 if(l->is_equivalent(cplx_lit_tbl[i])){
\r
5873 hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref;
\r
5878 cplx_lit_tbl.push_back(l);
\r
5879 hdl_ref_tbl.push_back(is_handle_ref);
\r
5880 return(cplx_lit_tbl.size() - 1);
\r
5885 //------------------------------------------------------------
\r
5889 gb_t *gb_t::duplicate(){
\r
5890 gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str());
\r
5892 ret->lineno = lineno;
\r
5893 ret->charno = charno;
\r
5895 ret->def = dup_se(def,NULL);
\r