X-Git-Url: https://gerrit.o-ran-sc.org/r/gitweb?a=blobdiff_plain;ds=sidebyside;f=src%2Fftacmp%2Fanalyze_fta.cc;h=d793577a162068446eddb49536321b088fb2905f;hb=f1754ecea2eab7bd0a302042ac82eb11667b166c;hp=9e20e4986cef82467d54a7a23f72f64598dfcff9;hpb=07495effe193ca3f73c3bf0ce417068f9ac9dcdd;p=com%2Fgs-lite.git diff --git a/src/ftacmp/analyze_fta.cc b/src/ftacmp/analyze_fta.cc index 9e20e49..d793577 100644 --- a/src/ftacmp/analyze_fta.cc +++ b/src/ftacmp/analyze_fta.cc @@ -1,5897 +1,5908 @@ -/* ------------------------------------------------ -Copyright 2014 AT&T Intellectual Property - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - ------------------------------------------- */ - -#include - -#include "parse_fta.h" -#include "parse_schema.h" -#include "parse_ext_fcns.h" - - -#include"analyze_fta.h" - -#include"type_objects.h" - -#include -#include - -using namespace std; - -extern string hostname; // name of the current host - -// Utility function - -string int_to_string(int i){ - string ret; - char tmpstr[100]; - sprintf(tmpstr,"%d",i); - ret=tmpstr; - return(ret); -} - - -// Globals - -// These represent derived information from the -// query analysis stage. I extract them from a class, -// perhaps this is dangerous. - -static gb_table *gb_tbl=NULL; // Table of all group-by attributes. -static aggregate_table *aggr_tbl=NULL; // Table of all referenced aggregates. - -// static cplx_lit_table *complex_literals=NULL; // Table of literals with constructors. -static param_table *param_tbl=NULL; // Table of all referenced parameters. - -vector partial_fcns_list; -int wh_partial_start, wh_partial_end; -int gb_partial_start, gb_partial_end; -int aggr_partial_start, aggr_partial_end; -int sl_partial_start, sl_partial_end; - - -// Infer the table of a column refrence and return the table ref. -// First, extract the -// field name and table name. If no table name is used, -// search all tables to try to find a unique match. -// Of course, plenty of error checking. - -// Return the set of tablevar indices in the FROM clause -// which contain a field with the same name. -vector find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){ - int i; - vector tv; -// vector tn = fm->get_schema_names(); - vector tn = fm->get_schema_refs(); -// printf("Calling find_source_tables on field %s\n",field.c_str()); - for(i=0;icontains_field(Schema->find_tbl(tn[i]), field) ){ - if(Schema->contains_field(tn[i], field) ){ - tv.push_back(i); -// printf("\tfound in table %s\n",tn[i].c_str()); - } - } - return(tv); -} - -int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){ - int i; - string tname = ir->get_tablevar(); - if(tname ==""){ - if(fm->size()==1) return 0; - fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str()); - return -1; - } - for(i=0;isize();++i){ - if(tname == fm->get_tablevar_name(i)) - return i; - } - fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str()); - return -1; -} - - -// compute the index of the tablevar in the from clause that the -// colref is in. -// return -1 if no tablevar can be imputed. -int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){ - int i; - string table_name; - int table_ref; - vector tv; - vector fm_tbls = fm->get_table_list(); - - string field = cr->get_field(); - -// printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str()); - if(cr->uses_default_table() ){ - tv = find_source_tables(field, fm, schema); - if(tv.size() > 1){ - fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ", - cr->get_lineno(), cr->get_charno(),field.c_str() ); - for(i=0;ito_string().c_str() ); - } - fprintf(stderr,"\n\tYou must specify one of these.\n"); - return(-1); - } - if(tv.size() == 0){ - fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n", - cr->get_lineno(), cr->get_charno(),field.c_str() ); - return(-1); - } - - return(tv[0]); - } - -// The table source is named -- but is it a schema name -// or a var name? - - string interface = cr->get_interface(); - table_name = cr->get_table_name(); - -// if interface is not specified, prefer to look at the tablevar names -// Check for duplicates. - if(interface==""){ - for(i=0;iget_var_name()) - tv.push_back(i); - } - if(tv.size() > 1){ - fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() ); - return(-1); - } - if(tv.size() == 1) return(tv[0]); - } - -// Tableref not found by looking at tableref vars, or an interface -// was specified. Try to match on schema and interface. -// Check for duplicates. - for(i=0;iget_var_name() && interface == fm_tbls[i]->get_interface()) - tv.push_back(i); - } - if(tv.size() > 1){ - fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n", - cr->get_lineno(), cr->get_charno() ); - if(interface != "") fprintf(stderr,"%s.",interface.c_str()); - fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str()); - return(-1); - } - - if(tv.size() == 0 ){ - fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno()); - if(interface != "") fprintf(stderr,"%s.",interface.c_str()); - fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str()); - return(-1) ; - } - - return(tv[0]); -} - - -// Reset temporal properties of a scalar expression -void reset_temporal(scalarexp_t *se){ - col_id ci; - vector operands; - int o; - - se->get_data_type()->reset_temporal(); - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - case SE_COLREF: - return; - case SE_UNARY_OP: - reset_temporal(se->get_left_se()); - return; - case SE_BINARY_OP: - reset_temporal(se->get_left_se()); - reset_temporal(se->get_right_se()); - return; - case SE_AGGR_STAR: - return; - case SE_AGGR_SE: - reset_temporal(se->get_left_se()); - return; - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } -} - -// Verify that column references exist in their -// declared tables. As a side effect, assign -// their data types. Other side effects : -// -// return -1 on error - -int verify_colref(scalarexp_t *se, tablevar_list_t *fm, - table_list *schema, gb_table *gtbl){ - int l_ret, r_ret; - int gb_ref; - colref_t *cr; - ifpref_t *ir; - string field, table_source, type_name; - data_type *dt; - vector tn; - vector tv; - int table_var; - int o; - vector operands; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - return(1); - case SE_IFACE_PARAM: - ir = se->get_ifpref(); - table_var = infer_tablevar_from_ifpref(ir, fm); - if(table_var < 0) return(table_var); - ir->set_tablevar_ref(table_var); - return(1); - case SE_UNARY_OP: - return( verify_colref(se->get_left_se(), fm, schema, gtbl) ); - case SE_BINARY_OP: - l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl); - r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl); - if( (l_ret < 0 ) || (r_ret < 0) ) return(-1); - return(1); - case SE_COLREF: - cr = se->get_colref(); - field = cr->get_field(); - -// Determine if this is really a GB ref. -// (the parser can only see that its a colref). - if(gtbl != NULL){ - gb_ref = gtbl->find_gb(cr, fm, schema); - }else{ - gb_ref = -1; - } - - se->set_gb_ref(gb_ref); - - if(gb_ref < 0){ -// Its a colref, verify its existance and -// record the data type. - table_var = infer_tablevar_from_colref(cr,fm,schema); - if(table_var < 0) return(table_var); - - // Store the table ref in the colref. - cr->set_tablevar_ref(table_var); - cr->set_schema_ref(fm->get_schema_ref(table_var)); - cr->set_interface(""); - cr->set_table_name(fm->get_tablevar_name(table_var)); - - - type_name = schema->get_type_name(cr->get_schema_ref(), field); - param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field); - dt = new data_type(type_name, modifiers); - se->set_data_type(dt); - }else{ -// Else, its a gbref, use the GB var's data type. - se->set_data_type(gtbl->get_data_type(gb_ref)); - } - - return(1); - case SE_AGGR_STAR: - return(1); - case SE_AGGR_SE: - return( verify_colref(se->get_left_se(), fm, schema, gtbl) ); - case SE_FUNC: - operands = se->get_operands(); - r_ret = 1; - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - return(-1); - } - return(-1); -} - - -int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){ - int l_ret, r_ret; - std::vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - return(verify_colref(pr->get_left_se(),fm,schema, gtbl) ); - case PRED_COMPARE: - l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ; - r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ; - if( (l_ret < 0) || (r_ret < 0) ) return(-1); - return(1); - case PRED_UNARY_OP: - return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl)); - case PRED_BINARY_OP: - l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ; - r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ; - if( (l_ret < 0) || (r_ret < 0) ) return(-1); - return(1); - case PRED_FUNC: - op_list = pr->get_op_list(); - l_ret = 0; - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - } - - return(-1); -} - - -bool literal_only_se(scalarexp_t *se){ // really only literals. - int o; - vector operands; - - if(se == NULL) return(1); - switch(se->get_operator_type()){ - case SE_LITERAL: - return(true); - case SE_PARAM: - return(false); - case SE_IFACE_PARAM: - return(false); - case SE_UNARY_OP: - return( literal_only_se(se->get_left_se()) ); - case SE_BINARY_OP: - return( literal_only_se(se->get_left_se()) && - literal_only_se(se->get_right_se()) ); - case SE_COLREF: - return false; - case SE_AGGR_STAR: - return false; - case SE_AGGR_SE: - return false; - return(1); - case SE_FUNC: - return false; - default: - return false; - } - return false; -} - - - - -// Verify that column references exist in their -// declared tables. As a side effect, assign -// their data types. Other side effects : -// - -int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){ - int l_ret, r_ret; - int gb_ref; - colref_t *cr; - string field, table_source, type_name; - data_type *dt; - vector tn; - vector tv; - int tablevar_ref; - int o; - vector operands; - - if(se == NULL) return(1); - - switch(se->get_operator_type()){ - case SE_LITERAL: - return(1); - case SE_PARAM: - return(1); - case SE_IFACE_PARAM: - return(1); - case SE_UNARY_OP: - return( bind_to_schema_se(se->get_left_se(), fm, schema) ); - case SE_BINARY_OP: - l_ret = bind_to_schema_se(se->get_left_se(), fm, schema); - r_ret = bind_to_schema_se(se->get_right_se(), fm, schema); - if( (l_ret < 0 ) || (r_ret < 0) ) return(-1); - return(1); - case SE_COLREF: - if(se->is_gb()) return(1); // gb ref not a colref. - - cr = se->get_colref(); - field = cr->get_field(); - - tablevar_ref = infer_tablevar_from_colref(cr,fm,schema); - if(tablevar_ref < 0){ - return(tablevar_ref); - }else{ - // Store the table ref in the colref. - cr->set_tablevar_ref(tablevar_ref); - cr->set_schema_ref(fm->get_schema_ref(tablevar_ref)); - cr->set_interface(""); - cr->set_table_name(fm->get_tablevar_name(tablevar_ref)); - -// Check the data type - type_name = schema->get_type_name(cr->get_schema_ref(), field); - param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field); - data_type dt(type_name, modifiers); -// if(! dt.equals(se->get_data_type()) ){ -// if(! dt.subsumes_type(se->get_data_type()) ){ - if(! se->get_data_type()->subsumes_type(&dt) ){ - fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n", - dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str()); - return(-1); - } - } - return(1); - case SE_AGGR_STAR: - return(1); - case SE_AGGR_SE: // Probably I should just return, - // aggregate se's are explicitly bound to the schema. -// return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) ); - return(1); - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return 1; - - operands = se->get_operands(); - r_ret = 1; - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - return(-1); - } - return(-1); -} - - -int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){ - int l_ret, r_ret; - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - return(bind_to_schema_se(pr->get_left_se(),fm,schema) ); - case PRED_COMPARE: - l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ; - r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ; - if( (l_ret < 0) || (r_ret < 0) ) return(-1); - return(1); - case PRED_UNARY_OP: - return(bind_to_schema_pr(pr->get_left_pr(),fm,schema)); - case PRED_BINARY_OP: - l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ; - r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ; - if( (l_ret < 0) || (r_ret < 0) ) return(-1); - return(1); - case PRED_FUNC: - op_list = pr->get_op_list(); - l_ret = 0; - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - } - - return(-1); -} - - - - - - -// verify_colref assigned data types to the column refs. -// Now assign data types to all other nodes in the -// scalar expression. -// -// return -1 on error - -temporal_type compute_se_temporal(scalarexp_t *se, map &tcol){ - int l_ret, r_ret; - data_type *dt; - bool bret; - vector operands; - vector odt; - int o, fcn_id; - vector handle_ind; - - switch(se->get_operator_type()){ - case SE_LITERAL: - return(constant_t); - case SE_PARAM: - return(varying_t); - case SE_IFACE_PARAM: - return(varying_t); // actually, this should not be called. - case SE_UNARY_OP: - return data_type::compute_temporal_type( - compute_se_temporal(se->get_left_se(), tcol), se->get_op() - ); - case SE_BINARY_OP: - return data_type::compute_temporal_type( - compute_se_temporal(se->get_left_se(), tcol), - compute_se_temporal(se->get_right_se(), tcol), - se->get_left_se()->get_data_type()->get_type(), - se->get_right_se()->get_data_type()->get_type(), - se->get_op() - ); - case SE_COLREF: - { - col_id cid(se->get_colref() ); - if(tcol.count(cid) > 0){ return tcol[cid]; - }else{ return varying_t;} - } - case SE_AGGR_STAR: - case SE_AGGR_SE: - case SE_FUNC: - default: - return varying_t; - } - return(varying_t); -} - - - -// verify_colref assigned data types to the column refs. -// Now assign data types to all other nodes in the -// scalar expression. -// -// return -1 on error - -int assign_data_types(scalarexp_t *se, table_list *schema, - table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){ - int l_ret, r_ret; - data_type *dt; - bool bret; - vector operands; - vector odt; - int o, fcn_id; - vector handle_ind; - vector constant_ind; - - switch(se->get_operator_type()){ - case SE_LITERAL: - dt = new data_type( se->get_literal()->get_type() ); - se->set_data_type(dt); - if( ! dt->is_defined() ){ - fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n", - se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() ); - return(-1); - }else{ - return(1); - } - case SE_PARAM: - { - string pname = se->get_param_name(); - dt = param_tbl->get_data_type(pname); -// A SE_PARRAM can change its value mid-query so using one -// to set a window is dangerous. TODO check for this and issue a warning. - dt->set_temporal(constant_t); - se->set_data_type(dt); - if( ! dt->is_defined() ){ - fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n", - pname.c_str(), se->get_lineno(),se->get_charno() ); - return(-1); - } - return(1); - } - case SE_IFACE_PARAM: - dt = new data_type( "STRING" ); - se->set_data_type(dt); - return(1); - case SE_UNARY_OP: - l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns); - if(l_ret < 0) return -1; - - dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() ); - se->set_data_type(dt); - if( ! dt->is_defined() ){ - fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n", - se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(), - se->get_lineno(), se->get_charno() ); - return(-1); - }else{ - return(1); - } - case SE_BINARY_OP: - l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns); - r_ret = assign_data_types(se->get_right_se(), schema, fta_tree, Ext_fcns); - if( (l_ret < 0 ) || (r_ret < 0) ) return(-1); - - dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() ); - se->set_data_type(dt); - if( ! dt->is_defined() ){ - fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n", - se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(), - se->get_right_se()->get_data_type()->to_string().c_str(), - se->get_lineno(), se->get_charno() ); - return(-1); - }else{ - return(1); - } - case SE_COLREF: - dt = se->get_data_type(); - bret = dt->is_defined(); - if( bret ){ - return(1); - }else{ - fprintf(stderr,"ERROR, column reference type is undefined, line =%d, char = %d, colref=%s\n", - se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() ); - return(-1); - } - case SE_AGGR_STAR: - dt = new data_type("Int"); // changed Uint to Int - se->set_data_type(dt); - return(1); - case SE_AGGR_SE: - l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns); - if(l_ret < 0) return -1; - - dt = new data_type(); - dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type()); - se->set_data_type(dt); - - if( ! dt->is_defined() ){ - fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n", - se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(), - se->get_lineno(), se->get_charno() ); - return(-1); - }else{ - return(1); - } - case SE_FUNC: - - operands = se->get_operands(); - r_ret = 1; - for(o=0;oget_data_type()); - if(l_ret < 0) r_ret = -1; - } - if(r_ret < 0) return(r_ret); - -// Is it an aggregate extraction function? - fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt); - if(fcn_id >= 0){ - int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id); - int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id); - int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id); -// Construct a se for the subaggregate. - vector op_a; - int n_aggr_oprs = operands.size()-n_fcn_params+1; - for(o=0;o handle_a = Ext_fcns->get_handle_indicators(subaggr_id); - for(o=0;oget_operator_type() != SE_LITERAL && - op_a[o]->get_operator_type() != SE_IFACE_PARAM && - op_a[o]->get_operator_type() != SE_PARAM){ - fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", - o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - vector is_const_a=Ext_fcns->get_const_indicators(subaggr_id); - for(o=0;oget_data_type()->get_temporal() != constant_t){ - fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n Line=%d, char=%d.\n", - o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - - scalarexp_t *se_a = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a); - se_a->set_fcn_id(subaggr_id); - se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id)); - se_a->set_aggr_id(0); // label this as a UDAF. - - -// Change this se to be the actual function - vector op_f; - op_f.push_back(se_a); - for(o=n_aggr_oprs;o handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id); - for(o=0;oget_operator_type() != SE_LITERAL && - op_f[o]->get_operator_type() != SE_IFACE_PARAM && - op_f[o]->get_operator_type() != SE_PARAM){ - fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", - o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - vector is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id); - for(o=0;oget_data_type()->get_temporal() != constant_t){ - fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n Line=%d, char=%d.\n", - o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - - se->param_list = op_f; - se->op = Ext_fcns->get_fcn_name(actual_fcn_id); - se->set_fcn_id(actual_fcn_id); - se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id)); - return(1); - } - if(fcn_id == -2){ - fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str()); - } - -// Is it a UDAF? - fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt); - if(fcn_id >= 0){ - se->set_fcn_id(fcn_id); - se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id)); - se->set_aggr_id(0); // label this as a UDAF. -// Finally, verify that all HANDLE parameters are literals or params. - handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() ); - for(o=0;oget_operator_type() != SE_LITERAL && - operands[o]->get_operator_type() != SE_IFACE_PARAM && - operands[o]->get_operator_type() != SE_PARAM){ - fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", - o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id()); - for(o=0;oget_data_type()->get_temporal() != constant_t){ - fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n", - o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - -// UDAFS as superaggregates not yet supported. -if(se->is_superaggr()){ -fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno()); -se->set_superaggr(false); -} - return(1); - } - if(fcn_id == -2){ - fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str()); - } - -// Is it a stateful fcn? - fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt); - if(fcn_id >= 0){ - se->set_fcn_id(fcn_id); - se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id)); - se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun -// Finally, verify that all HANDLE parameters are literals or params. - handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() ); - for(o=0;oget_operator_type() != SE_LITERAL && - operands[o]->get_operator_type() != SE_IFACE_PARAM && - operands[o]->get_operator_type() != SE_PARAM){ - fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", - o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id()); - for(o=0;oget_data_type()->get_temporal() != constant_t){ - fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n", - o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - - if(se->is_superaggr()){ - fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno()); - } - return(1); - } - if(fcn_id == -2){ - fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str()); - } - - -// Is it a regular function? - fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt); - if( fcn_id < 0 ){ - fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str()); - for(o=0;o0) fprintf(stderr,", "); - fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str()); - } - fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() ); - if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n"); - - return(-1); - } - - se->set_fcn_id(fcn_id); - dt = Ext_fcns->get_fcn_dt(fcn_id); - - if(! dt->is_defined() ){ - fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str()); - for(o=0;o0) fprintf(stderr,", "); - fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str()); - } - fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() ); - return(-1); - } - -// Finally, verify that all HANDLE parameters are literals or params. - handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() ); - for(o=0;oget_operator_type() != SE_LITERAL && - operands[o]->get_operator_type() != SE_IFACE_PARAM && - operands[o]->get_operator_type() != SE_PARAM){ - fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", - o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id()); - for(o=0;oget_data_type()->get_temporal() != constant_t){ - fprintf(stderr,"ERROR, the %d-th parameter of function %s must be constant.\n Line=%d, char=%d.\n", - o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); - return(-1); - } - } - } - - - if(se->is_superaggr()){ - fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno()); - } - - se->set_data_type(dt); - return(1); - default: - fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - return(-1); - } - return(-1); -} - - -int assign_predicate_data_types(predicate_t *pr, table_list *schema, - table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){ - int l_ret, r_ret; - int i; - data_type *dt, *dtl; - vector odt; - vector litl; - vector operands; - vector handle_ind; - vector constant_ind; - int o, fcn_id; - - switch(pr->get_operator_type()){ - case PRED_IN: - l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set); - litl = pr->get_lit_vec(); - dt = pr->get_left_se()->get_data_type(); - - for(i=0;iget_type() ); - if( ! dt->is_comparable(dtl,pr->get_op()) ){ - fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n", - litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() ); - delete dtl; - return(-1); - } - delete dtl; - } - return(1); - case PRED_COMPARE: - l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ; - r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ; - if( (l_ret < 0) || (r_ret < 0) ) return(-1); - - if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){ - fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n", - pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(), - pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() ); - return(-1); - }else{ - return(1); - } - case PRED_UNARY_OP: - return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set)); - case PRED_BINARY_OP: - l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set); - r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set); - if( (l_ret < 0) || (r_ret < 0) ) return(-1); - return(1); - case PRED_FUNC: - operands = pr->get_op_list(); - r_ret = 1; - for(o=0;oget_data_type()); - if(l_ret < 0) r_ret = -1; - } - if(r_ret < 0) return(r_ret); - - fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt); - if( fcn_id < 0 ){ - fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str()); - for(o=0;o0) fprintf(stderr,", "); - fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str()); - } - fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() ); - if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n"); - return(-1); - } - -// ext_fcn_set.insert(fcn_id); - pr->set_fcn_id(fcn_id); - -// Finally, verify that all HANDLE parameters are literals or params. - handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() ); - for(o=0;oget_operator_type() != SE_LITERAL && - operands[o]->get_operator_type() != SE_IFACE_PARAM && - operands[o]->get_operator_type() != SE_PARAM){ - fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", - o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno()); - exit(1); - } - } - } - constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id()); - for(o=0;oget_data_type()->get_temporal() != constant_t){ - fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be constant.\n Line=%d, char=%d.\n", - o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno()); - exit(1); - } - } - } - - -// Check if this predicate function is special sampling function - pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id()); - - - return(l_ret); - default: - fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n", - pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); - } - - return(-1); -} - - - -///////////////////////////////////////////////////////////////////// -//////////////// Make a deep copy of a se / pred tree -///////////////////////////////////////////////////////////////////// - - -// duplicate a select element -select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){ - return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str()); -} - -// duplicate a scalar expression. -scalarexp_t *dup_se(scalarexp_t *se, - aggregate_table *aggr_tbl - ){ - int p; - vector operand_list; - vector dt_signature; - scalarexp_t *ret_se, *l_se, *r_se; - - switch(se->get_operator_type()){ - case SE_LITERAL: - ret_se = new scalarexp_t(se->get_literal()); - ret_se->use_decorations_of(se); - return(ret_se); - - case SE_PARAM: - ret_se = scalarexp_t::make_param_reference(se->get_op().c_str()); - ret_se->use_decorations_of(se); - return(ret_se); - - case SE_IFACE_PARAM: - ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref()); - ret_se->use_decorations_of(se); - return(ret_se); - - case SE_COLREF: - ret_se = new scalarexp_t(se->get_colref()->duplicate()); - ret_se->rhs.scalarp = se->rhs.scalarp; // carry along notation - ret_se->use_decorations_of(se); - return(ret_se); - - case SE_UNARY_OP: - l_se = dup_se(se->get_left_se(), aggr_tbl); - ret_se = new scalarexp_t(se->get_op().c_str(), l_se); - ret_se->use_decorations_of(se); - return(ret_se); - - case SE_BINARY_OP: - l_se = dup_se(se->get_left_se(), aggr_tbl); - r_se = dup_se(se->get_right_se(), aggr_tbl); - - ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se); - ret_se->use_decorations_of(se); - - return(ret_se); - - case SE_AGGR_STAR: - ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str()); - ret_se->use_decorations_of(se); - return(ret_se); - - case SE_AGGR_SE: - l_se = dup_se(se->get_left_se(), aggr_tbl); - ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se); - ret_se->use_decorations_of(se); - return(ret_se); - - case SE_FUNC: - { - operand_list = se->get_operands(); - vector new_operands; - for(p=0;pget_op().c_str(), new_operands); - ret_se->use_decorations_of(se); - return(ret_se); - } - - default: - printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type()); - exit(1); - break; - } - return(NULL); - -} - - - -predicate_t *dup_pr(predicate_t *pr, - aggregate_table *aggr_tbl - ){ - - vector llist; - scalarexp_t *se_l, *se_r; - predicate_t *pr_l, *pr_r, *ret_pr; - vector op_list, new_op_list; - int o; - - - switch(pr->get_operator_type()){ - case PRED_IN: - se_l = dup_se(pr->get_left_se(), aggr_tbl); - ret_pr = new predicate_t(se_l, pr->get_lit_vec()); - return(ret_pr); - - case PRED_COMPARE: - se_l = dup_se(pr->get_left_se(), aggr_tbl); - se_r = dup_se(pr->get_right_se(), aggr_tbl); - ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r); - return(ret_pr); - - case PRED_UNARY_OP: - pr_l = dup_pr(pr->get_left_pr(), aggr_tbl); - ret_pr = new predicate_t(pr->get_op().c_str(), pr_l); - return(ret_pr); - - case PRED_BINARY_OP: - pr_l = dup_pr(pr->get_left_pr(), aggr_tbl); - pr_r = dup_pr(pr->get_right_pr(), aggr_tbl); - ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r); - return(ret_pr); - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_op().c_str(), new_op_list); - ret_pr->set_fcn_id(pr->get_fcn_id()); - ret_pr->is_sampling_fcn = pr->is_sampling_fcn; - return(ret_pr); - - default: - fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n", - pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return(0); - -} - -table_exp_t *dup_table_exp(table_exp_t *te){ - int i; - table_exp_t *ret = new table_exp_t(); - - ret->query_type = te->query_type; - - ss_map::iterator ss_i; - for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){ - ret->nmap[(*ss_i).first] = (*ss_i).second; - } - - for(i=0;iquery_params.size();++i){ - ret->query_params.push_back(new - var_pair_t(te->query_params[i]->name,te->query_params[i]->val) ); - } - - if(te->sl){ - ret->sl = new select_list_t(); - ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno; - vector select_list = te->sl->get_select_list(); - for(i=0;ise,NULL); - ret->sl->append(se,select_list[i]->name); - } - } - - ret->fm = te->fm->duplicate(); - - if(te->wh) ret->wh = dup_pr(te->wh,NULL); - if(te->hv) ret->hv = dup_pr(te->hv,NULL); - if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL); - if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL); - if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL); - - for(i=0;igb.size();++i){ - extended_gb_t *tmp_g = te->gb[i]->duplicate(); - ret->gb.push_back(tmp_g); - } - - ret->mergevars = te->mergevars; - if(te->slack) - ret->slack = dup_se(te->slack,NULL); - ret->lineno = te->lineno; - ret->charno = te->charno; - - return(ret); -} - - - - - - - -///////////////////////////////////////////////////////////////////////// -// Bind colrefs to a member of their FROM list - -void bind_colref_se(scalarexp_t *se, - vector &fm, - int prev_ref, int new_ref - ){ - int p; - vector operand_list; - colref_t *cr; - ifpref_t *ir; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - return; - case SE_IFACE_PARAM: - ir = se->get_ifpref(); - if(ir->get_tablevar_ref() == prev_ref){ - ir->set_tablevar_ref(new_ref); - ir->set_tablevar(fm[new_ref]->get_var_name()); - } - return; - - case SE_COLREF: - cr=se->get_colref(); - if(cr->get_tablevar_ref() == prev_ref){ - cr->set_tablevar_ref(new_ref); -// cr->set_interface(fm[new_ref]->get_interface()); - cr->set_table_name(fm[new_ref]->get_var_name()); - } - return; - - case SE_UNARY_OP: - bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref); - return; - - case SE_BINARY_OP: - bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref); - bind_colref_se(se->get_right_se(), fm, prev_ref, new_ref); - return; - - case SE_AGGR_STAR: - case SE_AGGR_SE: - return; - - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return; - - operand_list = se->get_operands(); - for(p=0;pget_operator_type()); - exit(1); - break; - } - return; - -} - - - - -void bind_colref_pr(predicate_t *pr, - vector &fm, - int prev_ref, int new_ref - ){ - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref); - return; - - case PRED_COMPARE: - bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref); - bind_colref_se(pr->get_right_se(), fm, prev_ref, new_ref); - return; - - case PRED_UNARY_OP: - bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref); - return; - - case PRED_BINARY_OP: - bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref); - bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref); - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; - -} - - -///////////////////////////////////////////////////////////////////// -// verify that the se refs only literals and params. -// (use to verify that the expression should stay in the hfta -// during a split) -///////////////////////////////////////////////////////////////////// - -bool is_literal_or_param_only(scalarexp_t *se){ - int o; - vector operands; - bool sum = true; - - if(se == NULL) return(true); - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - return(true); - case SE_IFACE_PARAM: - return(false); // need to treat as colref - case SE_UNARY_OP: - return(is_literal_or_param_only(se->get_left_se()) ); - case SE_BINARY_OP: - return( - is_literal_or_param_only(se->get_left_se()) && - is_literal_or_param_only(se->get_right_se()) - ); - case SE_COLREF: - return(false); - case SE_AGGR_STAR: - case SE_AGGR_SE: - return(false); - case SE_FUNC: -// The fcn might have special meaning at the lfta ... - return(false); - - default: - fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return(0); -} - - - -///////////////////////////////////////////////////////////////////// -// Search for gb refs. -// (use to verify that no gbrefs in a gb def.) -///////////////////////////////////////////////////////////////////// - - -int count_gb_se(scalarexp_t *se){ - int o; - vector operands; - int sum = 0; - - if(se == NULL) return(0); - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return(0); - case SE_UNARY_OP: - return(count_gb_se(se->get_left_se()) ); - case SE_BINARY_OP: - return( - count_gb_se(se->get_left_se()) + - count_gb_se(se->get_right_se()) - ); - case SE_COLREF: - if(se->get_gb_ref() < 0) return(0); - return(1); - case SE_AGGR_STAR: - case SE_AGGR_SE: - return(0); - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return(0); -} - - -///////////////////////////////////////////////////////////////////// -//////////////// Search for stateful fcns. -///////////////////////////////////////////////////////////////////// - - -int se_refs_sfun(scalarexp_t *se){ - int o; - vector operands; - int sum = 0; - - if(se == NULL) return(0); - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return(0); - case SE_UNARY_OP: - return(se_refs_sfun(se->get_left_se()) ); - case SE_BINARY_OP: - return( - se_refs_sfun(se->get_left_se()) + - se_refs_sfun(se->get_right_se()) - ); - case SE_COLREF: - return(0); - case SE_AGGR_STAR: - case SE_AGGR_SE: - return(0); - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_aggr_ref()>=0) sum++; // is it tagged as a UDAF? - -// for now, stateful functions count as aggregates. - if(se->get_storage_state() != "") - sum++; - - return(sum); - - default: - fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return(0); -} - - -// Return a count of the number of stateful fcns in this predicate. -int pred_refs_sfun(predicate_t *pr){ - vector op_list; - int o, aggr_sum; - - switch(pr->get_operator_type()){ - case PRED_IN: - return(se_refs_sfun(pr->get_left_se()) ); - case PRED_COMPARE: - return( - se_refs_sfun(pr->get_left_se()) + - se_refs_sfun(pr->get_right_se()) - ); - case PRED_UNARY_OP: - return(pred_refs_sfun(pr->get_left_pr()) ); - case PRED_BINARY_OP: - return( - pred_refs_sfun(pr->get_left_pr()) + - pred_refs_sfun(pr->get_right_pr()) - ); - case PRED_FUNC: - op_list = pr->get_op_list(); - aggr_sum = 0; - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return(0); -} - -////////////////////////////////////////////////// - -///////////////////////////////////////////////////////////////////// -//////////////// Search for aggregates. -///////////////////////////////////////////////////////////////////// - - -int count_aggr_se(scalarexp_t *se, bool strict){ - int o; - vector operands; - int sum = 0; - - if(se == NULL) return(0); - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return(0); - case SE_UNARY_OP: - return(count_aggr_se(se->get_left_se(), strict) ); - case SE_BINARY_OP: - return( - count_aggr_se(se->get_left_se(), strict) + - count_aggr_se(se->get_right_se(), strict) - ); - case SE_COLREF: - return(0); - case SE_AGGR_STAR: - case SE_AGGR_SE: - return(1); - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_aggr_ref()>=0) sum++; // is it tagged as a UDAF? - -// now, stateful functions can count as aggregates. -// if we are being strict. - if(! strict && se->get_storage_state() != "") - sum++; - - return(sum); - - default: - fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return(0); -} - - -// Return a count of the number of aggregate fcns in this predicate. -int count_aggr_pred(predicate_t *pr, bool strict){ - vector op_list; - int o, aggr_sum; - - switch(pr->get_operator_type()){ - case PRED_IN: - return(count_aggr_se(pr->get_left_se(), strict) ); - case PRED_COMPARE: - return( - count_aggr_se(pr->get_left_se(), strict) + - count_aggr_se(pr->get_right_se(), strict) - ); - case PRED_UNARY_OP: - return(count_aggr_pred(pr->get_left_pr(), strict) ); - case PRED_BINARY_OP: - return( - count_aggr_pred(pr->get_left_pr(), strict) + - count_aggr_pred(pr->get_right_pr(), strict) - ); - case PRED_FUNC: - op_list = pr->get_op_list(); - aggr_sum = 0; - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return(0); -} - -////////////////////////////////////////////////// -/// Analyze tablevar refs - -void get_tablevar_ref_se(scalarexp_t *se, vector &reflist){ - int o; - vector operands; - int vref; - colref_t *cr; - ifpref_t *ir; - - if(se == NULL) return; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - return; - case SE_IFACE_PARAM: - ir = se->get_ifpref(); - vref = ir->get_tablevar_ref(); - for(o=0;oget_left_se(), reflist); - return; - case SE_BINARY_OP: - get_tablevar_ref_se(se->get_left_se(), reflist); - get_tablevar_ref_se(se->get_right_se(), reflist); - return; - case SE_COLREF: - if(se->is_gb()) return; - cr = se->get_colref(); - vref = cr->get_tablevar_ref(); - for(o=0;oget_aggr_ref() >= 0) return; - - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return; -} - - -void get_tablevar_ref_pr(predicate_t *pr, vector &reflist){ - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - get_tablevar_ref_se(pr->get_left_se(),reflist); - return; - case PRED_COMPARE: - get_tablevar_ref_se(pr->get_left_se(),reflist); - get_tablevar_ref_se(pr->get_right_se(),reflist); - return; - case PRED_UNARY_OP: - get_tablevar_ref_pr(pr->get_left_pr(),reflist); - return; - case PRED_BINARY_OP: - get_tablevar_ref_pr(pr->get_left_pr(),reflist); - get_tablevar_ref_pr(pr->get_right_pr(),reflist); - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - } - - return; -} - - -// Walk SE tree and gather STATES ref'd by STATEFUL fcns. - -void gather_fcn_states_se(scalarexp_t *se, set &states_refd, ext_fcn_list *Ext_fcns){ - int agg_id; - int o; - vector operands; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return; - case SE_UNARY_OP: - gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ; - return; - case SE_BINARY_OP: - gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns); - gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns); - return; - case SE_COLREF: - return; - case SE_AGGR_STAR: - return; - case SE_AGGR_SE: - gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns); - return; - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_storage_state() != ""){ - states_refd.insert(se->get_storage_state()); - } - return; - - default: - fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return; -} - - -// Walk SE tree and gather STATES ref'd by STATEFUL fcns. - -void gather_fcn_states_pr(predicate_t *pr, set &states_refd, ext_fcn_list *Ext_fcns){ - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ; - return; - case PRED_COMPARE: - gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ; - gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ; - return; - case PRED_UNARY_OP: - gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns); - return; - case PRED_BINARY_OP: - gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ; - gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ; - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; -} - - - - -// walk se tree and collect aggregates into aggregate table. -// duplicate aggregates receive the same idx to the table. - -void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){ - int agg_id; - int o; - vector operands; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return; - case SE_UNARY_OP: - build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ; - return; - case SE_BINARY_OP: - build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns); - build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns); - return; - case SE_COLREF: - return; - case SE_AGGR_STAR: - agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr()); - se->set_aggr_id(agg_id); - return; - case SE_AGGR_SE: - agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr()); - se->set_aggr_id(agg_id); - return; - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_aggr_ref() >= 0){ // it's been tagged as a UDAF - agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id())); - se->set_aggr_id(agg_id); - } - return; - - default: - fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return; -} - - -// walk se tree and collect aggregates into aggregate table. -// duplicate aggregates receive the same idx to the table. - -void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){ - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ; - return; - case PRED_COMPARE: - build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ; - build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ; - return; - case PRED_UNARY_OP: - build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns); - return; - case PRED_BINARY_OP: - build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ; - build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ; - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; -} - - -// Return true if the two scalar expressions -// represent the same value (e.g., use to eliminate -// duplicate aggregates). -bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){ - vector operands1; - vector operands2; - int o; - -// First handle the case of nulls (e.g. COUNT aggrs) - if(se1 == NULL && se2 == NULL) return(true); - if(se1 == NULL || se2 == NULL) return(false); - -// In all cases, must be the same oeprator type and same operator. - if(se1->get_operator_type() != se2->get_operator_type()) - return(false); - if(se1->get_op() != se2->get_op() ) - return(false); - - switch(se1->get_operator_type()){ - case SE_LITERAL: - return(se1->get_literal()->is_equivalent(se2->get_literal()) ); - case SE_PARAM: - return(se1->get_param_name() == se2->get_param_name() ); - case SE_IFACE_PARAM: - return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) ); - case SE_UNARY_OP: - return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) ); - case SE_BINARY_OP: - if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) ) - return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) ); - return(false); - case SE_COLREF: - if(se1->is_gb() && se2->is_gb()) - return( se1->get_gb_ref() == se2->get_gb_ref() ); - if(se1->is_gb() || se2->is_gb()) - return(false); - return(se1->get_colref()->is_equivalent(se2->get_colref()) ); - case SE_AGGR_STAR: - return(true); - case SE_AGGR_SE: - return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) ); - case SE_FUNC: - if(se1->get_op() != se2->get_op()) return(false); - - operands1 = se1->get_operands(); - operands2 = se2->get_operands(); - if(operands1.size() != operands2.size()) return(false); - - for(o=0;oget_lineno(), se1->get_charno(),se1->get_operator_type()); - exit(1); - } - return(false); -} - - -// Similar to is_equivalent_se, but with a looser definition -// of equivalence of colrefs. Here, say they are equivalent -// if their base table is the same. Use to find equivalent -// predicates on base tables. -bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){ - vector operands1; - vector operands2; - int o; - - if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){ - se1 = se1->get_right_se(); - } - if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){ - se2 = se2->get_right_se(); - } - -// First handle the case of nulls (e.g. COUNT aggrs) - if(se1 == NULL && se2 == NULL) return(true); - if(se1 == NULL || se2 == NULL) return(false); - -// In all cases, must be the same oeprator type and same operator. - if(se1->get_operator_type() != se2->get_operator_type()) - return(false); - if(se1->get_op() != se2->get_op() ) - return(false); - - switch(se1->get_operator_type()){ - case SE_LITERAL: - return(se1->get_literal()->is_equivalent(se2->get_literal()) ); - case SE_PARAM: - return(se1->get_param_name() == se2->get_param_name() ); - case SE_IFACE_PARAM: - return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) ); - case SE_UNARY_OP: - return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) ); - case SE_BINARY_OP: - if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) ) - return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) ); - return(false); - case SE_COLREF: -/* - if(se1->is_gb() && se2->is_gb()) - return( se1->get_gb_ref() == se2->get_gb_ref() ); - if(se1->is_gb() || se2->is_gb()) - return(false); -*/ - return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) ); - case SE_AGGR_STAR: - return(true); - case SE_AGGR_SE: - return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) ); - case SE_FUNC: - if(se1->get_op() != se2->get_op()) return(false); - - operands1 = se1->get_operands(); - operands2 = se2->get_operands(); - if(operands1.size() != operands2.size()) return(false); - - for(o=0;oget_lineno(), se1->get_charno(),se1->get_operator_type()); - exit(1); - } - return(false); -} - - -// Find predicates which are equivalent when -// looking at the base tables. Use to find -// common prefilter. -bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){ -int i, o; - -// First handle the case of nulls - if(p1 == NULL && p2 == NULL) return(true); - if(p1 == NULL || p2 == NULL) return(false); - - - if(p1->get_operator_type() != p2->get_operator_type()) - return(false); - if(p1->get_op() != p2->get_op()) - return(false); - - vector ll1; - vector ll2; - vector op_list1, op_list2; - - - switch(p2->get_operator_type()){ - case PRED_COMPARE: - if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) ) - return(false); - return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) ); - break; - case PRED_IN: - if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) ) - return(false); - ll1 = p1->get_lit_vec(); - ll2 = p2->get_lit_vec(); - if(ll1.size() != ll2.size()) - return(false); - for(i=0;iis_equivalent( ll2[i] ) ) - return(false); - } - return(true); - break; - case PRED_UNARY_OP: - return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) ); - break; - case PRED_BINARY_OP: - if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema)) - return(false); - return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) ); - break; - case PRED_FUNC: - op_list1 = p1->get_op_list(); - op_list2 = p2->get_op_list(); - if(op_list1.size() != op_list2.size()) return(false); - for(o=0;oget_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC)) - return(false); - if(p1->get_fcn_id() != p2->get_fcn_id()) - return false; - vector cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id()); - int o; - vector op_list1 = p1->get_op_list(); - vector op_list2 = p2->get_op_list(); - if(op_list1.size() != op_list2.size()) return(false); - for(o=0;o operands; - int o; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return(true ); - case SE_UNARY_OP: - return(verify_aggr_query_se(se->get_left_se() ) ); - case SE_BINARY_OP: - return(verify_aggr_query_se(se->get_left_se() ) && - verify_aggr_query_se(se->get_right_se() ) ); - case SE_COLREF: - if(se->is_gb() ) return(true); - fprintf(stderr,"ERROR: the select clause in an aggregate query can " - "only reference constants, group-by attributes, and " - "aggregates, (%s) line %d, character %d.\n", - se->get_colref()->to_string().c_str(), - se->get_lineno(), se->get_charno() ); - return(false); - case SE_AGGR_STAR: - case SE_AGGR_SE: -// colrefs and gbrefs allowed. -// check for nested aggregation elsewhere, so just return TRUE - return(true); - case SE_FUNC: -// If its a UDAF, just return true - if(se->get_aggr_ref() >= 0) return true; - - operands = se->get_operands(); - - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return(false); -} - - - - -// Find complex literals. -// NOTE : This analysis should be deferred to -// code generation time. -// This analysis drills into aggr se specs. -// Shouldn't this be done at the aggregate table? -// But, its not a major loss of efficiency. -// UPDATE : drilling into aggr se's is causnig a problem -// so I've eliminated it. - -bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns, - cplx_lit_table *complex_literals){ - literal_t *l; - vector operands; - int o; - scalarexp_t *param_se; - data_type *dt; - - switch(se->get_operator_type()){ - case SE_LITERAL: - l = se->get_literal(); - if(l->constructor_name() != ""){ - int cl_idx = complex_literals->add_cpx_lit(l, false); - l->set_cpx_lit_ref(cl_idx); - } - return(true); - case SE_PARAM: - return(true ); -// SE_IFACE_PARAM should not exist when this is called. - case SE_UNARY_OP: - return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) ); - case SE_BINARY_OP: - return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) && - find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) ); - case SE_COLREF: - return(true); - case SE_AGGR_STAR: - return(true); - case SE_AGGR_SE: - return true; -// return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) ); - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return true; - - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return(false); -} - - - - -void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns, - cplx_lit_table *complex_literals){ - int i,o; - vector litl; - vector op_list; - - - switch(pr->get_operator_type()){ - case PRED_IN: - find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ; - litl = pr->get_lit_vec(); - for(i=0;iconstructor_name() != ""){ - int cl_idx = complex_literals->add_cpx_lit(litl[i],false); - litl[i]->set_cpx_lit_ref(cl_idx); - } - } - return; - case PRED_COMPARE: - find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ; - find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ; - return; - case PRED_UNARY_OP: - find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals); - return; - case PRED_BINARY_OP: - find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ; - find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ; - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; -} - - -// Find all things which are passed as handle parameters to functions -// (query parameters, (simple) literals, complex literals) -// These expressions MUST be processed with find_complex_literal_?? -// first. -// TODO: this analysis drills into the aggregate SEs. -// Shouldn't this be done on the aggr table SEs instead? -// to avoid duplication. THe handle registration -// might be expensive ... -// REVISED : drilling into aggr se's is causing problems, eliminated. - -void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns, - vector &handle_tbl){ - vector operands; - vector handle_ind; - int o; - scalarexp_t *param_se; - data_type *dt; - literal_t *l; - - switch(se->get_operator_type()){ - case SE_LITERAL: - return; - case SE_PARAM: - return; -// case SE_IFACE_PARAM: SHOULD NOT EXIST when this is called - case SE_UNARY_OP: - find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ; - return; - case SE_BINARY_OP: - find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ; - find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ; - return; - case SE_COLREF: - return; - case SE_AGGR_STAR: - return; - case SE_AGGR_SE: -// find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ; - return; - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return ; - - operands = se->get_operands(); - handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() ); - for(o=0;oget_operator_type() != SE_LITERAL && - param_se->get_operator_type() != SE_PARAM){ - fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", - o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); - exit(1); - } - - if(param_se->get_operator_type() == SE_PARAM){ - he = new handle_param_tbl_entry( - se->get_op(), o, param_se->get_param_name(), - param_se->get_data_type()->get_type_str()); - }else{ - l = param_se->get_literal(); - if(l->is_cpx_lit()){ - he = new handle_param_tbl_entry( - se->get_op(), o, l->get_cpx_lit_ref(), - param_se->get_data_type()->get_type_str()); - }else{ - he = new handle_param_tbl_entry( - se->get_op(), o, l, - param_se->get_data_type()->get_type_str()); - } - } - param_se->set_handle_ref(handle_tbl.size()); - handle_tbl.push_back(he); - }else{ - find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ; - } - } - return; - default: - fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return; -} - - -void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns, - vector &handle_tbl){ - vector litl; - vector op_list; - scalarexp_t *param_se; - vector handle_ind; - int o; - literal_t *l; - - switch(pr->get_operator_type()){ - case PRED_IN: - find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ; - return; - case PRED_COMPARE: - find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ; - find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ; - return; - case PRED_UNARY_OP: - find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl); - return; - case PRED_BINARY_OP: - find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ; - find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ; - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() ); - for(o=0;oget_operator_type() != SE_LITERAL && - param_se->get_operator_type() != SE_PARAM){ - fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", - o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno()); - exit(1); - } - - if(param_se->get_operator_type() == SE_PARAM){ - he = new handle_param_tbl_entry( - pr->get_op(), o, param_se->get_param_name(), - param_se->get_data_type()->get_type_str()); - }else{ - l = param_se->get_literal(); - if(l->is_cpx_lit()){ - he = new handle_param_tbl_entry( - pr->get_op(), o, l->get_cpx_lit_ref(), - param_se->get_data_type()->get_type_str()); - }else{ - he = new handle_param_tbl_entry( - pr->get_op(), o, l, - param_se->get_data_type()->get_type_str()); - } - } - param_se->set_handle_ref(handle_tbl.size()); - handle_tbl.push_back(he); - }else{ - find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ; - } - } - return; - default: - fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n", - pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; -} - - -// Verify the HAVING predicate : it -// can access gb vars, aggregates, and constants, -// but not colrefs. -// return 1 if OK, -1 if bad. -// Perhaps replace by a pair of fcns which counts non-gb colrefs? - -// Extended to deal with cleaning_by, cleaning_when : -// verify that any aggregate function -// has the multiple output property. - -int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){ - int l_ret, r_ret; - vector operands; - vector odt; - int o; - - switch(se->get_operator_type()){ - case SE_LITERAL: - return(1); - case SE_PARAM: - case SE_IFACE_PARAM: - return(1); - case SE_UNARY_OP: - return(verify_having_se(se->get_left_se(), clause, Ext_fcns) ); - case SE_BINARY_OP: - l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns); - r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns); - if( (l_ret < 0 ) || (r_ret < 0) ) return(-1); - return(1); - case SE_COLREF: - if(se->is_gb()) return 1; - fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause, - se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() ); - return(-1); - case SE_AGGR_STAR: - case SE_AGGR_SE: -// colrefs and gbrefs allowed. -// check for nested aggregation elsewhere, so just return TRUE - if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){ - fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause, - se->get_lineno(),se->get_charno(), se->get_op().c_str() ); - return(-1); - } - -// Ensure that aggregate refs allow multiple outputs -// in CLEANING_WHEN, CLEANING_BY - if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){ - if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){ - fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause, - se->get_op().c_str(),se->get_lineno(),se->get_charno() ); - return(-1); - } - } - - - return(1); - case SE_FUNC: - if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){ - fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause, - se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() ); - return(-1); - } - - if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){ - if(se->get_aggr_ref() >= 0 && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){ - fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause, - se->get_op().c_str(),se->get_lineno(),se->get_charno() ); - return(-1); - } - } - - if(se->get_aggr_ref() >= 0) // don't descent into aggregates. - return 1; - - operands = se->get_operands(); - r_ret = 1; - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - return(-1); - } - return(-1); -} - - -// Verify the HAVING predicate : it -// can access gb vars, aggregates, and constants, -// but not colrefs. -// return 1 if OK, -1 if bad. -// Perhaps replace by a pair of fcns which counts non-gb colrefs? - - -int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){ - int l_ret, r_ret; - vector litl; - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - return(verify_having_se(pr->get_left_se(), clause, Ext_fcns)); - case PRED_COMPARE: - l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ; - r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ; - if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1); - case PRED_UNARY_OP: - return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns)); - case PRED_BINARY_OP: - l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns); - r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns); - if( (l_ret < 0) || (r_ret < 0) ) return(-1); - return(1); - case PRED_FUNC: - op_list = pr->get_op_list(); - l_ret = 1; - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - } - - return(-1); -} - - -////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////// -/////// cnf and pred analysis and manipulation - -// ---------------------------------------------------------------------- -//  Convert the predicates to a list of conjuncts -//  (not actually cnf). Do some analysis -//  on their properties. -// ---------------------------------------------------------------------- - - -//  Put into list clist the predicates that -//  are AND'ed together. - -void make_cnf_from_pr(predicate_t *pr, vector &clist){ - - if(pr == NULL) return; - - switch(pr->get_operator_type()){ - case PRED_COMPARE: - clist.push_back(new cnf_elem(pr)); - return; - break; - case PRED_IN: - clist.push_back(new cnf_elem(pr)); - return; - break; - case PRED_UNARY_OP: - clist.push_back(new cnf_elem(pr)); - return; - break; - case PRED_BINARY_OP: - if(pr->get_op() == "OR"){ - clist.push_back(new cnf_elem(pr)); - return; - } - if(pr->get_op() =="AND"){ - make_cnf_from_pr(pr->get_left_pr(),clist); - make_cnf_from_pr(pr->get_right_pr(),clist); - return; - } - case PRED_FUNC: - clist.push_back(new cnf_elem(pr)); - return; - break; - default: - fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str()); - exit(1); - break; - } -} - - - -//  Find out what things are referenced in a se, -//  to use for analyzing a predicate. -//  Currently, is it simple (no operators), does it -//  reference a group-by column, does it reference an -//  attribute of a table. -// -// analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf - - -void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){ - int p; - vector operand_list; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return; - case SE_COLREF: - if(se->is_gb() ) g=1; - else a=1; - return; - case SE_UNARY_OP: - s=0; - analyze_cnf_se(se->get_left_se(),s,g,a,agr); - return; - case SE_BINARY_OP: - s=0; - analyze_cnf_se(se->get_left_se(),s,g,a,agr); - analyze_cnf_se(se->get_right_se(),s,g,a,agr); - return; - case SE_AGGR_STAR: - case SE_AGGR_SE: - agr = 1; - return; - case SE_FUNC: - if(se->get_aggr_ref() >= 0){ - agr = 1; - return; - } - s = 0; - operand_list = se->get_operands(); - for(p=0;p op_list; - - - switch(pr->get_operator_type()){ - case PRED_COMPARE: - analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr); - analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr); - return; - case PRED_IN: - analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr); - return; - case PRED_UNARY_OP: - analyze_cnf_pr(pr->get_left_pr(),g,a,agr); - return; - case PRED_BINARY_OP: - analyze_cnf_pr(pr->get_left_pr(),g,a,agr); - analyze_cnf_pr(pr->get_right_pr(),g,a,agr); - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } -} - - - -//  analyze a conjunct of a predicate. -//  Is it atomic (e.g., a single predicate), -//  and if so do a further analysis. - -void analyze_cnf(cnf_elem *c){ - -//  analyze the predicate. - analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr); - - if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){ - return; - } - - -//  its an atomic predicate -- get more info - c->is_atom = 1; - - if(c->pr->get_op() == "=") - c->eq_pred = 1; - else - c->eq_pred = 0; - - if(c->pr->get_operator_type() == PRED_IN) - c->in_pred = 1; - else - c->in_pred = 0; - - c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0; - analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr); - - if(c->pr->get_operator_type() == PRED_COMPARE){ - c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0; - analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr); - } -} - -void analyze_constraint_se(scalarexp_t *se, - int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){ - int l_agr, l_gb, l_par, l_func, l_op; - int r_agr, r_gb, r_par, r_func, r_op; - int p; - vector operand_list; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_IFACE_PARAM: - n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0; - return; - case SE_PARAM: - n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0; - return; - case SE_COLREF: - n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0; - if(se->is_gb() ){ - if(enter_gb){ - analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb); - }else{ - n_gb=1; - } - } - return; - case SE_UNARY_OP: - analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb); - n_op++; - return; - case SE_BINARY_OP: - analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb); - analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb); - n_agr=l_agr+r_agr; - n_gb=l_gb+r_gb; - n_par=l_par+r_par; - n_func=l_func+r_func; - n_op=l_op+r_op+1; - return; - case SE_AGGR_STAR: - case SE_AGGR_SE: - n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0; - return; - case SE_FUNC: - if(se->get_aggr_ref() >= 0){ - n_agr=1; n_gb = 0; n_par = 0; n_op = 0; - if(Ext_fcns) - n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id()); - else - n_func = 1; - return; - } - n_agr=0; n_gb = 0; n_par = 0; n_op = 0; - if(Ext_fcns) - n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id()); - else - n_func = 1; - operand_list = se->get_operands(); - for(p=0;p op_list; - - - switch(pr->get_operator_type()){ - case PRED_COMPARE: - analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb); - analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb); - n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par; - n_func=l_func+r_func; n_op=l_op+r_op; - if(pr->get_left_se()->get_data_type()->complex_comparison( - pr->get_right_se()->get_data_type()) - ){ - n_cmp_s = 0; n_cmp_c=1; - }else{ - n_cmp_s = 1; n_cmp_c=0; - } - n_in = 0; n_pred = 0; n_bool = 0; - return; - case PRED_IN: -// Tread IN predicate as sequence of comparisons - analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb); - if(pr->get_left_se()->get_data_type()->complex_comparison( - pr->get_right_se()->get_data_type()) - ){ - n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size(); - }else{ - n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0; - } - n_in = 0; n_pred = 0; n_bool = 0; - return; - case PRED_UNARY_OP: - analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb); - n_bool++; - return; - case PRED_BINARY_OP: - analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb); - analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb); - n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par; - n_func=l_func+r_func; n_op=l_op+r_op; - n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c; - n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1; - return; - case PRED_FUNC: - n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0; - if(Ext_fcns) - n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id()); - else - n_pred = 1; - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } -} - -void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){ - int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool; - analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op, - n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false); - -//printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool); - c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool; -} - -bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){ - int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool; - analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op, - n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true); -//printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool); - if(n_par || n_agr) - return false; - int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool; -//printf("cost=%d\n",cost); - return cost<10; -} - -// The prefilter needs to translate constraints on -// gbvars into constraints involving their underlying SEs. -// The following two routines attach GB def info. - -void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){ - int p; - vector operand_list; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_IFACE_PARAM: - case SE_PARAM: - case SE_AGGR_STAR: - return; - case SE_COLREF: - if(se->is_gb() ){ - se->rhs.scalarp = gtbl->get_def(se->get_gb_ref()); - } - return; - case SE_UNARY_OP: - insert_gb_def_se(se->get_left_se(),gtbl); - return; - case SE_BINARY_OP: - insert_gb_def_se(se->get_left_se(),gtbl); - insert_gb_def_se(se->get_right_se(),gtbl); - return; - case SE_AGGR_SE: - insert_gb_def_se(se->get_left_se(),gtbl); - return; - case SE_FUNC: - operand_list = se->get_operands(); - for(p=0;p op_list; -int o; - - switch(pr->get_operator_type()){ - case PRED_COMPARE: - insert_gb_def_se(pr->get_left_se(),gtbl); - insert_gb_def_se(pr->get_right_se(),gtbl); - return; - case PRED_IN: - insert_gb_def_se(pr->get_left_se(),gtbl); - return; - case PRED_UNARY_OP: - insert_gb_def_pr(pr->get_left_pr(),gtbl); - return; - case PRED_BINARY_OP: - insert_gb_def_pr(pr->get_left_pr(),gtbl); - insert_gb_def_pr(pr->get_right_pr(),gtbl); - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } -} - -// Substitute gbrefs with their definitions -void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){ - int p; - vector operand_list; - scalarexp_t *lse,*rse; - colref_t *cr; - string b_tbl; - int b_idx; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_IFACE_PARAM: - case SE_PARAM: - case SE_AGGR_STAR: - return; - case SE_COLREF: - cr = se->get_colref(); - b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field); - b_idx = Schema->get_table_ref(b_tbl); - cr->tablevar_ref = b_idx; - return; - case SE_UNARY_OP: - lse=se->get_left_se(); - if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ - se->lhs.scalarp = lse->get_right_se(); - subs_gbrefs_se(se,Schema); - return; - } - subs_gbrefs_se(se->get_left_se(),Schema); - return; - case SE_BINARY_OP: - lse=se->get_left_se(); - if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ - se->lhs.scalarp = lse->get_right_se(); - subs_gbrefs_se(se,Schema); - return; - } - rse=se->get_right_se(); - if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){ - se->rhs.scalarp = rse->get_right_se(); - subs_gbrefs_se(se,Schema); - return; - } - subs_gbrefs_se(se->get_left_se(),Schema); - subs_gbrefs_se(se->get_right_se(),Schema); - return; - case SE_AGGR_SE: - lse=se->get_left_se(); - if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ - se->lhs.scalarp = lse->get_right_se(); - subs_gbrefs_se(se,Schema); - return; - } - subs_gbrefs_se(se->get_left_se(),Schema); - return; - case SE_FUNC: - operand_list = se->get_operands(); - for(p=0;pget_operator_type()==SE_COLREF && lse->is_gb()){ - se->param_list[p] = lse->get_right_se(); - subs_gbrefs_se(se,Schema); - return; - } - } - for(p=0;p op_list; -int o; -scalarexp_t *lse,*rse; - - switch(pr->get_operator_type()){ - case PRED_COMPARE: - lse=pr->get_left_se(); - if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ - pr->lhs.sexp = lse->get_right_se(); - subs_gbrefs_pr(pr,Schema); - return; - } - rse=pr->get_right_se(); - if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){ - pr->rhs.sexp = rse->get_right_se(); - subs_gbrefs_pr(pr,Schema); - return; - } - subs_gbrefs_se(pr->get_left_se(),Schema); - subs_gbrefs_se(pr->get_right_se(),Schema); - return; - case PRED_IN: - lse=pr->get_left_se(); - if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ - pr->lhs.sexp = lse->get_right_se(); - subs_gbrefs_pr(pr,Schema); - return; - } - subs_gbrefs_se(pr->get_left_se(),Schema); - return; - case PRED_UNARY_OP: - subs_gbrefs_pr(pr->get_left_pr(),Schema); - return; - case PRED_BINARY_OP: - subs_gbrefs_pr(pr->get_left_pr(),Schema); - subs_gbrefs_pr(pr->get_right_pr(),Schema); - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_operator_type()==SE_COLREF && lse->is_gb()){ - pr->param_list[o] = lse->get_right_se(); - subs_gbrefs_pr(pr,Schema); - return; - } - subs_gbrefs_se(op_list[o],Schema); - } - return; - default: - fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n", - pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } -} - - -// Search for references to "expensive" fields. -int expensive_refs_se(scalarexp_t *se, table_list *Schema){ - int p; - vector operand_list; - int cnt=0; -table_def *td; -param_list *plist; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_IFACE_PARAM: - case SE_PARAM: - case SE_AGGR_STAR: - case SE_AGGR_SE: - return 0; - case SE_COLREF: - if(se->is_gb()) - return expensive_refs_se(se->rhs.scalarp,Schema); - td = Schema->get_table(se->lhs.colref->schema_ref); - plist = td->get_modifier_list(se->lhs.colref->field); - if(plist->contains_key("expensive")) - return 1; - return 0; - case SE_UNARY_OP: - return expensive_refs_se(se->get_left_se(),Schema); - case SE_BINARY_OP: - cnt += expensive_refs_se(se->get_left_se(),Schema); - cnt += expensive_refs_se(se->get_right_se(),Schema); - return cnt; - case SE_FUNC: - operand_list = se->get_operands(); - for(p=0;p op_list; -int o; -int cnt=0; - - switch(pr->get_operator_type()){ - case PRED_COMPARE: - cnt += expensive_refs_se(pr->get_left_se(),Schema); - cnt += expensive_refs_se(pr->get_right_se(),Schema); - return cnt; - case PRED_IN: - return expensive_refs_se(pr->get_left_se(),Schema); - case PRED_UNARY_OP: - return expensive_refs_pr(pr->get_left_pr(),Schema); - case PRED_BINARY_OP: - cnt += expensive_refs_pr(pr->get_left_pr(),Schema); - cnt += expensive_refs_pr(pr->get_right_pr(),Schema); - return cnt; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } -} - - -// TODO: allow "cheap" functions and predicates. -bool simple_field_constraint(cnf_elem *c){ - vector ll; - int l; - predicate_t *p = c->pr; - int l_agr, l_gb, l_par, l_func, l_op; - int r_agr, r_gb, r_par, r_func, r_op; - col_id_set left_colids, right_colids; - -// Verify that it is a simple atom - switch(p->get_operator_type()){ - case PRED_COMPARE: -// Must be an equality predicate which references -// which referecnes no aggregates, parameters, functions, or -// group-by variables, and should be a constraint of -// a single colref. -// AND should not require a complex comparison. - if(p->get_op() != "=") return(false); - analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false); - analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false); - if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 || - r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false); -// I will count on there being no gbvars in the constraint. -// TODO: allow gbvars which are colrefs. - gather_se_col_ids(p->get_left_se(), left_colids, NULL); - gather_se_col_ids(p->get_right_se(), right_colids, NULL); - if(left_colids.size()+right_colids.size() != 1) return(false); - - -// Normalize : the colref should be on the lhs. - if(right_colids.size() > 0){ - p->swap_scalar_operands(); - } - -// Disallow complex (and therefore expensive) comparisons. - if(p->get_left_se()->get_data_type()->complex_comparison( - p->get_right_se()->get_data_type() ) ) - return(false); - -// passed all the tests. - return(true); - case PRED_IN: -// LHS must be a non-gbvar colref. - analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false); - if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false); -// I will count on there being no gbvars in the constraint. -// TODO: allow gbvars which are colrefs. - gather_se_col_ids(p->get_left_se(), left_colids, NULL); - if(left_colids.size() != 1) return(false); -// Disallow complex (and therefore expensive) comparisons. - if(p->get_left_se()->get_data_type()->complex_comparison( - p->get_left_se()->get_data_type() ) ) - return(false); - - -// All entries in the IN list must be literals -// Currently, this is the only possibility. - return(true); - break; - case PRED_UNARY_OP: - return(false); - case PRED_BINARY_OP: - return(false); - case PRED_FUNC: - return(false); - default: - fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n", - p->get_lineno(), p->get_charno(), p->get_operator_type() ); - exit(1); - } - - return(false); -} - -// As the name implies, return the colref constrained by the -// cnf elem. I will be counting on the LHS being a SE pointing -// to a colref. - -// This fcn assumes that in fact exactly -// one colref is constrained. -colref_t *get_constrained_colref(scalarexp_t *se){ - int p; - vector operand_list; -colref_t *ret; - - switch(se->get_operator_type()){ - case SE_LITERAL: - return(NULL); - case SE_PARAM: - case SE_IFACE_PARAM: - return(NULL); - case SE_COLREF: - return(se->get_colref()); - case SE_UNARY_OP: - return(get_constrained_colref(se->get_left_se())); - case SE_BINARY_OP: - ret=get_constrained_colref(se->get_left_se()); - if(ret == NULL) return(get_constrained_colref(se->get_right_se())); - else return ret; - case SE_AGGR_STAR: - case SE_AGGR_SE: - return(NULL); - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return NULL; - - operand_list = se->get_operands(); - for(p=0;pget_left_se())); -} -colref_t *get_constrained_colref(cnf_elem *c){ - return get_constrained_colref(c->pr->get_left_se()); -} - - - - -/* -void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p, - string target_fld, string target_tbl, int tblref){ - -// Make a copy of the predicate to be added. -// ASSUME no aggregates. - predicate_t *pr = dup_pr(src_p,NULL); - -// Modify the ref to the base table. -// ASSUME lhs is the colref - pr->get_left_se()->get_colref()->set_table_name(target_tbl); - pr->get_left_se()->get_colref()->set_table_ref(tblref); - - if(dst->pr == NULL) dst->pr = pr; - else dst->pr = new predicate_t("OR", dst->pr, pr); - -} -*/ - - -////////////////////////////////////////////////////// -/////////////// Represent a node in a predicate tree -struct common_pred_node{ - set lftas; - predicate_t *pr; - vector predecessor_preds; - vector children; - - string target_tbl; - string target_fld; - int target_ref; - - common_pred_node(){ - pr = NULL; - } -}; - - -predicate_t *make_common_pred(common_pred_node *pn){ - int n; - - if(pn->children.size() == 0){ - if(pn->pr == NULL){ - fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n"); - exit(1); - } - return( dup_pr(pn->pr,NULL) ); - } - - predicate_t *curr_pr = make_common_pred( pn->children[0] ); - for(n=1;nchildren.size();++n){ - curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr); - } - - if(pn->pr != NULL) - curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr); - - return(curr_pr); -} - - -bool operator<(const cnf_set &c1, const cnf_set &c2){ - if(c1.lfta_id.size() < c2.lfta_id.size()) - return true; - return false; -} - - -// Compute the predicates for the prefilter. -// the prefilter preds are returned in prefilter_preds. -// pred_ids is the set of predicates used in the prefilter. -// the encoding is the lfta index, in the top 16 bits, -// then the index of the cnf element in the bottom 16 bits. -// This set of for identifying which preds do not need -// to be generated in the lftas. -void find_common_filter(vector< vector > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector &prefilter_preds, set &pred_ids){ - int p, p2, l, c; - - vector pred_list, sort_list; - -// Create list of tagged, prefilter-safe CNFs. - for(l=0;lpr,Schema)==0) - pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c)); - } - } - } - -// Eliminate duplicates - for(p=0;ppr, pred_list[p2]->pr,Schema)){ - pred_list[p]->subsume(pred_list[p2]); - delete pred_list[p2]; - pred_list[p2] = NULL; - } - } - } - } - } - -// combine preds that occur in the exact same lftas. - for(p=0;plfta_id == pred_list[p2]->lfta_id){ - pred_list[p]->combine_pred(pred_list[p2]); - delete pred_list[p2]; - pred_list[p2] = NULL; - } - } - } - } - } - -// Compress the list - for(p=0;padd_pred_ids(pred_ids); - } - -// Substitute gb refs with their defs -// While I'm at it, substitute base table sch ref for tblref. - for(p=0;ppr,Schema); - } - -} - - - - - -/////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////// - -// Find partial functions and register them. -// Do a DFS so that nested partial fcn calls -// get evaluated in the right order. -// Don't drill down into aggregates -- their arguments are evaluated -// earlier than the select list is. -// -// Modification for function caching: -// Pass in a ref counter, and partial fcn indicator. -// Cache fcns ref'd at least once. -// pass in NULL for fcn_ref_cnt to turn off fcn caching analysis - - -void find_partial_fcns(scalarexp_t *se, vector *pf_list, - vector *fcn_ref_cnt, vector *is_partial_fcn, - ext_fcn_list *Ext_fcns){ - vector operands; - int o, f; - - if(se == NULL) return; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return; - case SE_UNARY_OP: - find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; - return; - case SE_BINARY_OP: - find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns); - find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns); - return; - case SE_COLREF: - return; - case SE_AGGR_STAR: - return; - case SE_AGGR_SE: -// find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ; - return; - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return; - - operands = se->get_operands(); - for(o=0;ois_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){ - if(fcn_ref_cnt){ - for(f=0;fsize();++f){ - if(is_equivalent_se(se,(*pf_list)[f])){ - se->set_partial_ref(f); - (*fcn_ref_cnt)[f]++; - break; - } - } - }else{ - f=pf_list->size(); - } - if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) || fcn_ref_cnt)){ - se->set_partial_ref(pf_list->size()); - pf_list->push_back(se); - if(fcn_ref_cnt){ - fcn_ref_cnt->push_back(1); - is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id())); - } - } - } - return; - default: - fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return; -} - - -void find_partial_fcns_pr(predicate_t *pr, vector *pf_list, - vector *fcn_ref_cnt, vector *is_partial_fcn, - ext_fcn_list *Ext_fcns){ - vector litl; - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; - return; - case PRED_COMPARE: - find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; - find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; - return; - case PRED_UNARY_OP: - find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns); - return; - case PRED_BINARY_OP: - find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; - find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; -} - - - -void find_combinable_preds(predicate_t *pr, vector *pr_list, - table_list *Schema, ext_fcn_list *Ext_fcns){ - vector litl; - vector op_list; - int f,o; - - switch(pr->get_operator_type()){ - case PRED_IN: - return; - case PRED_COMPARE: - return; - case PRED_UNARY_OP: - find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns); - return; - case PRED_BINARY_OP: - find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ; - find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ; - return; - case PRED_FUNC: - if(Ext_fcns->is_combinable(pr->get_fcn_id())){ - for(f=0;fsize();++f){ - if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){ - pr->set_combinable_ref(f); - break; - } - } - if(f == pr_list->size()){ - pr->set_combinable_ref(pr_list->size()); - pr_list->push_back(pr); - } - } - return; - default: - fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n", - pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; -} - - -//-------------------------------------------------------------------- -// Collect refs to aggregates. - - -void collect_agg_refs(scalarexp_t *se, set &agg_refs){ - vector operands; - int o; - - if(se == NULL) return; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return; - case SE_UNARY_OP: - collect_agg_refs(se->get_left_se(), agg_refs) ; - return; - case SE_BINARY_OP: - collect_agg_refs(se->get_left_se(), agg_refs); - collect_agg_refs(se->get_right_se(), agg_refs); - return; - case SE_COLREF: - return; - case SE_AGGR_STAR: - case SE_AGGR_SE: - agg_refs.insert(se->get_aggr_ref()); - return; - case SE_FUNC: - if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref()); - - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return; -} - - -void collect_aggr_refs_pr(predicate_t *pr, set &agg_refs){ - vector litl; - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - collect_agg_refs(pr->get_left_se(), agg_refs) ; - return; - case PRED_COMPARE: - collect_agg_refs(pr->get_left_se(), agg_refs) ; - collect_agg_refs(pr->get_right_se(), agg_refs) ; - return; - case PRED_UNARY_OP: - collect_aggr_refs_pr(pr->get_left_pr(), agg_refs); - return; - case PRED_BINARY_OP: - collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ; - collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ; - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; -} - - -//-------------------------------------------------------------------- -// Collect previously registered partial fcn refs. -// Do a DFS so that nested partial fcn calls -// get evaluated in the right order. -// Don't drill down into aggregates -- their arguments are evaluated -// earlier than the select list is. -// ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES? - -void collect_partial_fcns(scalarexp_t *se, set &pfcn_refs){ - vector operands; - int o; - - if(se == NULL) return; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return; - case SE_UNARY_OP: - collect_partial_fcns(se->get_left_se(), pfcn_refs) ; - return; - case SE_BINARY_OP: - collect_partial_fcns(se->get_left_se(), pfcn_refs); - collect_partial_fcns(se->get_right_se(), pfcn_refs); - return; - case SE_COLREF: - return; - case SE_AGGR_STAR: - return; - case SE_AGGR_SE: -// collect_partial_fcns(se->get_left_se(), pfcn_refs) ; - return; - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return; - - operands = se->get_operands(); - for(o=0;ois_partial()){ - pfcn_refs.insert(se->get_partial_ref()); - } - - return; - default: - fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return; -} - - -void collect_partial_fcns_pr(predicate_t *pr, set &pfcn_refs){ - vector litl; - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - collect_partial_fcns(pr->get_left_se(), pfcn_refs) ; - return; - case PRED_COMPARE: - collect_partial_fcns(pr->get_left_se(), pfcn_refs) ; - collect_partial_fcns(pr->get_right_se(), pfcn_refs) ; - return; - case PRED_UNARY_OP: - collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs); - return; - case PRED_BINARY_OP: - collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ; - collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ; - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return; -} - - - - -/////////////////////////////////////////////////////////////// -//////////// Exported Functions /////////////////////////// -/////////////////////////////////////////////////////////////// - - -// Count and collect refs to interface parameters. - -int count_se_ifp_refs(scalarexp_t *se, set &ifpnames){ - vector operands; - int o; - int ret = 0; - - if(se == NULL) return 0; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - return 0; - case SE_IFACE_PARAM: - ifpnames.insert(se->get_ifpref()->to_string()); - return 1; - case SE_UNARY_OP: - return count_se_ifp_refs(se->get_left_se(), ifpnames) ; - case SE_BINARY_OP: - ret = count_se_ifp_refs(se->get_left_se(), ifpnames); - ret += count_se_ifp_refs(se->get_right_se(), ifpnames); - return ret; - case SE_COLREF: - return 0; - case SE_AGGR_STAR: - return 0; - case SE_AGGR_SE: -// collect_partial_fcns(se->get_left_se(), pfcn_refs) ; - return 0; - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return 0; - - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return 0; -} - - -int count_pr_ifp_refs(predicate_t *pr, set &ifpnames){ - vector litl; - vector op_list; - int o; - int ret = 0; - if(pr == NULL) return 0; - - switch(pr->get_operator_type()){ - case PRED_IN: - return count_se_ifp_refs(pr->get_left_se(), ifpnames) ; - case PRED_COMPARE: - ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ; - ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ; - return ret; - case PRED_UNARY_OP: - return count_pr_ifp_refs(pr->get_left_pr(), ifpnames); - case PRED_BINARY_OP: - ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ; - ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ; - return ret; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return 0; -} - -// Resolve ifp refs, convert them to string literals. - -int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb, string &err){ - vector operands; - vector ifvals; - int o; - int ierr; - string serr; - int ret = 0; - literal_t *tmp_l; - ifpref_t *ir; - - if(se == NULL) return 0; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - return 0; - case SE_IFACE_PARAM: - ir = se->get_ifpref(); - ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr); - if(ierr){ - err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n"; - return 1; - } - if(ifvals.size() == 0){ - err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n"; - return 1; - } - if(ifvals.size() > 1){ - err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n"; - return 1; - } - tmp_l = new literal_t( ifvals[0]); - se->convert_to_literal(tmp_l); - return 0; - case SE_UNARY_OP: - return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ; - case SE_BINARY_OP: - ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err); - ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err); - return ret; - case SE_COLREF: - return 0; - case SE_AGGR_STAR: - return 0; - case SE_AGGR_SE: -// collect_partial_fcns(se->get_left_se(), pfcn_refs) ; - return 0; - case SE_FUNC: - if(se->get_aggr_ref() >= 0) return 0; - - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return 0; -} - - -int resolve_pr_ifp_refs(predicate_t *pr, string ifm, string ifn, ifq_t *ifdb, string &err){ - vector litl; - vector op_list; - int o; - int ret = 0; - - switch(pr->get_operator_type()){ - case PRED_IN: - return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ; - case PRED_COMPARE: - ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ; - ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ; - return ret; - case PRED_UNARY_OP: - return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err); - case PRED_BINARY_OP: - ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ; - ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ; - return ret; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - exit(1); - } - - return 0; -} - - -string impute_query_name(table_exp_t *fta_tree, string default_nm){ - string retval = fta_tree->get_val_of_name("query_name"); - if(retval == "") retval = default_nm; - if(retval == "") retval = "default_query"; - return(retval); -} - -// Convert the parse tree into an intermediate form, -// which admits analysis better. -// -// TODO : rationalize the error return policy. -// -// TODO : the query_summary_class object contains -// the parse tree. -// TODO: revisit the issue when nested subqueries are implemented. -// One possibility: implement accessor methods to hide the -// complexity -// For now: this class contains data structures not in table_exp_t -// (with a bit of duplication) - -// Return NULL on error. -// print error messages to stderr. - - -query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema, - ext_fcn_list *Ext_fcns, string default_name){ - int i,j, k, retval; - -// Create the summary struct -- no analysis is done here. - query_summary_class *qs = new query_summary_class(fta_tree); - qs->query_type = fta_tree->query_type; - -////////////// Do common analysis - -// Extract query name. Already imputed for the qnodes. -// qs->query_name = impute_query_name(fta_tree, default_name); - qs->query_name = default_name; -//printf("query name is %s\n",qs->query_name.c_str()); - -// extract definitions. Don't grab the query name. - - map nmap = fta_tree->get_name_map(); - map::iterator nmi; - for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){ - string pname = (*nmi).first; - if(pname != "query_name" ) - (qs->definitions)[pname] = (*nmi).second; - } - -/// -/// FROM analysis - -// First, verify that all the referenced tables are defined. -// Then, bind the tablerefs in the FROM list to schemas in -// the schema list. - tablevar_list_t *tlist = fta_tree->get_from(); - vector tbl_vec = tlist->get_table_list(); - - bool found_error = false; - for(i=0;ifind_tbl(tbl_vec[i]->get_schema_name()); - if(sch_no < 0) { - fprintf(stderr,"Error, table <%s> not found in the schema file\n", - tbl_vec[i]->get_schema_name().c_str() ); - fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(), - tbl_vec[i]->get_charno() ); - return(NULL); - } - - tbl_vec[i]->set_schema_ref(sch_no); - -// If accessing a UDOP, mangle the name -// This needs to be done in translate_fta.cc, not here. -/* - if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){ - string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm; - tbl_vec[i]->set_schema_name(mngl_name); - } -*/ - -// No FTA schema should have an interface defined on it. - if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){ - fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str()); - } -// Fill in default interface - if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){ - tbl_vec[i]->set_interface("default"); - tbl_vec[i]->set_ifq(true); - } -// Fill in default machine - if(tbl_vec[i]->get_interface()!="" && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){ - tbl_vec[i]->set_machine(hostname); - } - - if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){ -// Record the set of interfaces accessed - string ifstr; - if(tbl_vec[i]->get_ifq()){ - ifstr = "["+tbl_vec[i]->get_interface()+"]"; - }else{ - if(tbl_vec[i]->get_machine() != "localhost"){ - ifstr = "'"+tbl_vec[i]->get_machine()+"'."+tbl_vec[i]->get_interface(); - }else{ - ifstr = tbl_vec[i]->get_interface(); - } - } -//printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str()); - if(qs->definitions.count("_referenced_ifaces")){ - ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr; - } - qs->definitions["_referenced_ifaces"] = ifstr; - } - - } - if(found_error) return(NULL); - -// Ensure that all tablevars have are named -// and that no two tablevars have the same name. - int tblvar_no = 0; -// First, gather the set of variable - set tblvar_names; - for(i=0;iget_var_name() != ""){ - if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){ - fprintf(stderr,"ERROR, query has two table variables named %s. line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno()); - return(NULL); - } - tblvar_names.insert(tbl_vec[i]->get_var_name()); - } - } -// Now generate variable names for unnamed tablevars - for(i=0;iget_var_name() == ""){ - char tmpstr[200]; - sprintf(tmpstr,"_t%d",tblvar_no); - string newvar = tmpstr; - while(tblvar_names.count(newvar) > 0){ - tblvar_no++; - sprintf(tmpstr,"_t%d",tblvar_no); - newvar = tmpstr; - } - tbl_vec[i]->set_range_var(newvar); - tblvar_names.insert(newvar); - } - } - -// Process inner/outer join properties - int jprop = fta_tree->get_from()->get_properties(); -// Require explicit INNER_JOIN, ... specification for join queries. - if(jprop < 0){ - if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){ - fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, FILTER_JOIN.\n"); - return(NULL); - } - } - - if(jprop == OUTER_JOIN_PROPERTY){ - for(i=0;iset_property(1); - } - if(jprop == LEFT_OUTER_JOIN_PROPERTY) - tbl_vec[0]->set_property(1); - if(jprop == RIGHT_OUTER_JOIN_PROPERTY) - tbl_vec[tbl_vec.size()-1]->set_property(1); - if(jprop == FILTER_JOIN_PROPERTY){ - if(fta_tree->get_from()->get_temporal_range() == 0){ - fprintf(stderr,"ERROR, a filter join must have a non-zero tempoal range.\n"); - return NULL; - } - if(tbl_vec.size() != 2){ - fprintf(stderr,"ERROR, a filter join must be between two table variables.\n"); - return NULL; - } - colref_t *cr = fta_tree->get_from()->get_colref(); - string field = cr->get_field(); - - int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field); - if(fi0 < 0){ - fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str()); - return NULL; - } - cr->set_schema_ref(tbl_vec[0]->get_schema_ref()); - cr->set_tablevar_ref(0); - string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field); - param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field); - data_type *dt0 = new data_type(type_name, modifiers); - if(dt0->get_type_str() != "UINT"){ - fprintf(stderr,"ERROR, the temporal attribute in a filter join must be a UINT.\n"); - return NULL; - } - if(! dt0->is_increasing()){ - fprintf(stderr,"ERROR, the temporal attribtue in a filter join must be temporal increasing.\n"); - return NULL; - } - } - - - -///////////////////// -/// Build the query param table - vector query_params = fta_tree->query_params; - int p; - for(p=0;pname; - string dtname = query_params[p]->val; - - if(pname == ""){ - fprintf(stderr,"ERROR parameter has empty name.\n"); - found_error = true; - } - if(dtname == ""){ - fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str()); - found_error = true; - } - data_type *dt = new data_type(dtname); - if(!(dt->is_defined())){ - fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str()); - found_error = true; - } - - qs->add_query_param(pname, dt, false); - } - if(found_error) return(NULL); -// unpack the param table to a global for easier analysis. - param_tbl=qs->param_tbl; - -////////////////// MERGE specialized analysis - - if(qs->query_type == MERGE_QUERY){ -// Verify that -// 1) there are two *different* streams ref'd in the FROM clause -// However, only emit a warning. -// (can't detect a problem if one of the interfaces is the -// default interface). -// 2) They have the same layout (e.g. same types but the -// names can be different -// 3) the two columns can unambiguously be mapped to -// fields of the two tables, one per table. Exception: -// the column names are the same and exist in both tables. -// FURTHERMORE the positions must be the same -// 4) after mapping, verify that both colrefs are temporal -// and in the same direction. - if(tbl_vec.size() < 2){ - fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() ); - return(NULL); - } - - vector fev0 = schema->get_fields( - tbl_vec[0]->get_schema_name() - ); - - - int cv; - for(cv=1;cv fev1 = schema->get_fields( - tbl_vec[cv]->get_schema_name() - ); - - if(fev0.size() != fev1.size()){ - fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str()); - return(NULL); - } - -// Only need to ensure that the list of types are the same. -// THe first table supplies the output colnames, -// and all temporal properties are lost, except for the -// merge-by columns. - int f; - for(f=0;fget_type(),fev0[f]->get_modifier_list()); - data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list()); - if(! dt0.equal_subtypes(&dt1) ){ - fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str()); - return(NULL); - } - } - } - -// copy over the merge-by cols. - qs->mvars = fta_tree->mergevars; - - if(qs->mvars.size() == 0){ // need to discover the merge vars. - int mergevar_pos = -1; - int f; - for(f=0;fget_type(),fev0[f]->get_modifier_list()); - if(dt0.is_temporal()){ - mergevar_pos = f; - break; - } - } - if(mergevar_pos >= 0){ - for(cv=0;cv fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name()); - qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() )); - } - }else{ - fprintf(stderr,"ERROR, no merge-by column found.\n"); - return(NULL); - } - } - -// Ensure same number of tables, merge cols. - if(tbl_vec.size() != qs->mvars.size()){ - fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size()); - return(NULL); - } - -// Ensure that the merge-by are from different tables -// also, sort colrefs so that they align with the FROM list using tmp_crl - set refd_sources; - vector tmp_crl(qs->mvars.size(),NULL); - for(cv=0;cvmvars.size();++cv){ - int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema); - if(tblvar<0){ - fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str()); - } - refd_sources.insert(tblvar); - tmp_crl[tblvar] = qs->mvars[cv]; - } - if(refd_sources.size() != qs->mvars.size()){ - fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size()); - return(NULL); - } - -// 1-1 mapping, so use tmp_crl as the merge column list. - qs->mvars = tmp_crl; - - - -// Look up the colrefs in their schemas, verify that -// they are at the same place, that they are both temporal -// in the same way. -// It seems that this should be done more in the schema objects. - int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field()); - if(fi0 < 0){ - fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str()); - exit(1); - } - for(cv=1;cvmvars.size();++cv){ - int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field()); - if(fi0!=fi1){ - fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str()); - return NULL; - } - } - - field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0); - data_type dt0(fe0->get_type(),fe0->get_modifier_list()); - if( (!dt0.is_temporal()) ){ - fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0); - return(NULL); - } - for(cv=0;cvmvars.size();++cv){ - field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0); - data_type dt1(fe1->get_type(),fe1->get_modifier_list()); - if( (!dt1.is_temporal()) ){ - fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv); - return(NULL); - } - - - if( dt0.get_temporal() != dt1.get_temporal()){ - fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv); - return(NULL); - } - } - -// If there is a SLACK specification, verify -// that it is literal-only and that its type is compatible -// with that of the merge columns - qs->slack = fta_tree->slack; - if(qs->slack){ - if(! literal_only_se(qs->slack)){ - fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n"); - return NULL; - } - - assign_data_types(qs->slack, schema, fta_tree, Ext_fcns ); - data_type sdt(&dt0, qs->slack->get_data_type(), string("+")); - if(sdt.get_type() == undefined_t){ - fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n"); - return NULL; - } - } - - -// All the tests have passed, there is nothing -// else to fill in. - - } - -////////////////// SELECT specialized analysis - - if(qs->query_type == SELECT_QUERY){ -// unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals -// objects into globals, for easier syntax. - gb_tbl = qs->gb_tbl; - aggr_tbl = qs->aggr_tbl; - - -// Build the table of group-by attributes. -// (se processing done automatically). -// NOTE : Doing the SE processing here is getting cumbersome, -// I should process these individually. -// NOTE : I should check for duplicate names. -// NOTE : I should ensure that the def of one GB does not -// refrence the value of another. - vector gb_list = fta_tree->get_groupby(); - int n_temporal = 0; - string temporal_gbvars = ""; - map gset_gbnames; - -// For generating the set of GB patterns for this aggregation query. - vector inner_pattern; - vector > pattern_set; - vector > > pattern_components; - - vector r_gbs, c_gbs, g_gbs; - int n_patterns; - - for(i=0;itype){ - case gb_egb_type: - retval = gb_tbl->add_gb_attr( - gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns - ); - if(retval < 0){ - return NULL; // nothing added to gb_tbl, so this can trigger a segfault 2 lines below - }else{ - if(gb_tbl->get_data_type(i)->is_temporal()){ - n_temporal++; - if(temporal_gbvars != "") temporal_gbvars+=" "; - temporal_gbvars += gb_tbl->get_name(i); - } - } - - inner_pattern.clear(); - pattern_set.clear(); - inner_pattern.push_back(true); - pattern_set.push_back(inner_pattern); - pattern_components.push_back(pattern_set); - - gb_tbl->gb_entry_type.push_back(""); - gb_tbl->gb_entry_count.push_back(1); - gb_tbl->pattern_components.push_back(pattern_set); - - break; - case rollup_egb_type: - r_gbs = gb_list[i]->gb_lists[0]->get_gb_list(); - for(j=0;jadd_gb_attr( - r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns - ); - if(retval < 0){ - found_error = true; - }else{ // rollup gb can't be temporal - gb_tbl->reset_temporal(gb_tbl->size()-1); - } - } - - inner_pattern.resize(r_gbs.size()); - pattern_set.clear(); - for(j=0;j<=r_gbs.size();++j){ - for(k=0;kgb_entry_type.push_back("ROLLUP"); - gb_tbl->gb_entry_count.push_back(r_gbs.size()); - gb_tbl->pattern_components.push_back(pattern_set); - break; - case cube_egb_type: - c_gbs = gb_list[i]->gb_lists[0]->get_gb_list(); - for(j=0;jadd_gb_attr( - c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns - ); - if(retval < 0){ - found_error = true; - }else{ // cube gb can't be temporal - gb_tbl->reset_temporal(gb_tbl->size()-1); - } - } - - inner_pattern.resize(c_gbs.size()); - pattern_set.clear(); - n_patterns = 1 << c_gbs.size(); - for(j=0;jgb_entry_type.push_back("CUBE"); - gb_tbl->gb_entry_count.push_back(c_gbs.size()); - gb_tbl->pattern_components.push_back(pattern_set); - break; - case gsets_egb_type: - { - gset_gbnames.clear(); - for(j=0;jgb_lists.size();++j){ - g_gbs = gb_list[i]->gb_lists[j]->get_gb_list(); - for(k=0;ktype != GB_COLREF){ - fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str()); - found_error = true; - }else{ - if(gset_gbnames.count(g_gbs[k]->name) == 0){ - retval = gb_tbl->add_gb_attr( - g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns - ); - if(retval < 0){ - found_error = true; - }else{ // gsets gb can't be temporal - gb_tbl->reset_temporal(gb_tbl->size()-1); - } - int pos = gset_gbnames.size(); - gset_gbnames[g_gbs[k]->name] = pos; - } - } - } - } - - if(gset_gbnames.size() > 63){ - fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n"); - found_error = true; - } - - inner_pattern.resize(gset_gbnames.size()); - pattern_set.clear(); - set signatures; - for(j=0;jgb_lists.size();++j){ - g_gbs = gb_list[i]->gb_lists[j]->get_gb_list(); - set refd_gbs; - for(k=0;kname); - } - fill(inner_pattern.begin(),inner_pattern.end(),false); - unsigned long long int signature = 0; - set::iterator ssi; - for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){ - inner_pattern[gset_gbnames[(*ssi)]] = true; - signature |= (1 << gset_gbnames[(*ssi)]); - } - if(signatures.count(signature)){ - fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t"); - set::iterator ssi; - for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){ - fprintf(stderr," %s",(*ssi).c_str()); - } - fprintf(stderr,"\n"); - }else{ - signatures.insert(signature); - pattern_set.push_back(inner_pattern); - } - } - pattern_components.push_back(pattern_set); - - gb_tbl->gb_entry_type.push_back("GROUPING_SETS"); - gb_tbl->gb_entry_count.push_back(gset_gbnames.size()); - gb_tbl->pattern_components.push_back(pattern_set); - } - break; - default: - break; - } - } - if(found_error) return(NULL); - if(n_temporal > 1){ - fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s). Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str()); - return NULL; - } - -// Compute the set of patterns. Take the cross product of all pattern components. - vector > gb_patterns; - int n_components = pattern_components.size(); - vector pattern_pos(n_components,0); - bool done = false; - while(! done){ - vector pattern; - for(j=0;j= pattern_components[j].size()) - pattern_pos[j] = 0; - else - break; - } - if(j >= n_components) - done = true; - } - gb_tbl->gb_patterns = gb_patterns; - - -// Process the supergroup, if any. - vector sgb = fta_tree->get_supergb(); - for(i=0;ifind_gb(sgb[i],fta_tree->fm, schema); - if(gbr < 0){ - fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str()); - found_error = true; - } - if(qs->sg_tbl.count(gbr)){ - fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str()); - } - qs->sg_tbl.insert(gbr); - } - if(found_error) return(NULL); - - if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){ - fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n"); - return NULL; - } - - - - predicate_t *wh = fta_tree->get_where(); - predicate_t *hv = fta_tree->get_having(); - predicate_t *cw = fta_tree->get_cleaning_when(); - predicate_t *cb = fta_tree->get_cleaning_by(); - predicate_t *closew = fta_tree->get_closing_when(); - - if(closew != NULL && gb_tbl->gb_patterns.size()>1){ - fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n"); - return NULL; - } - - - -// Verify that all column references are valid, and if so assign -// the data type. - - vector sl_list = fta_tree->get_sl_vec(); - for(i=0;ise, fta_tree->fm, schema, gb_tbl); - if(retval < 0) found_error = true; - } - if(wh != NULL) - retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl); - if(retval < 0) found_error = true; - if(hv != NULL) - retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl); - if(retval < 0) found_error = true; - if(cw != NULL) - retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl); - if(retval < 0) found_error = true; - if(cb != NULL) - retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl); - if(retval < 0) found_error = true; - if(closew != NULL) - retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl); - if(retval < 0) found_error = true; - - if(found_error) return(NULL); - -// Verify that all of the scalar expressions -// and comparison predicates have compatible types. - - n_temporal = 0; - string temporal_output_fields; - for(i=0;ise, schema, fta_tree, Ext_fcns ); - if(retval < 0){ - found_error = true; - }else{ - if(sl_list[i]->se->get_data_type()->is_temporal()){ - n_temporal++; - temporal_output_fields += " "+int_to_string(i); - } - } - } - if(n_temporal > 1){ - fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s). Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str()); - found_error=true; - } - if(wh != NULL) - retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns); - if(retval < 0) found_error = true; - if(hv != NULL) - retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns); - if(retval < 0) found_error = true; - if(cw != NULL) - retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns); - if(retval < 0) found_error = true; - if(cb != NULL) - retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns); - if(retval < 0) found_error = true; - if(closew != NULL) - retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns); - if(retval < 0) found_error = true; - - if(found_error) return(NULL); - -// Impute names for the unnamed columns. - set curr_names; - int s; - for(s=0;sname); - } - for(s=0;sname == "") - sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se); - } - - -// Check the aggregates. -// No aggrs allowed in the WHERE predicate. -// (no aggrs in the GB defs, but that is examined elsewhere) -// Therefore, aggregates are allowed only the select clause. -// -// The query is an aggregation query if there is a group-by clause, or -// if any aggregate is referenced. If there is a group-by clause, -// at least one aggregate must be referenced. -// If the query is an aggregate query, the scalar expressions in -// the select clause can reference only constants, aggregates, or group-by -// attributes. -// Also, if the query is an aggregate query, build a table referencing -// the aggregates. -// -// No nested aggregates allowed. -// - -// First, count references in the WHERE predicate. -// (if there are any references, report an error). -// can ref group vars, tuple fields, and stateful fcns. - - if(wh != NULL){ - retval = count_aggr_pred(wh, true); - if(retval > 0){ - fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n"); - return(NULL); - } - } - -// NOTE : Here I need an analysis of the having clause -// to verify that it only refs GB attrs and aggregates. -// (also, superaggregates, stateful fcns) - if(hv!=NULL){ - retval = verify_having_pred(hv, "HAVING", Ext_fcns); - if(retval < 0) return(NULL); - } - -// Cleaning by has same reference rules as Having - if(cb!=NULL){ - retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns); - if(retval < 0) return(NULL); - } - -// Cleaning when has same reference rules as Having, -// except that references to non-superaggregates are not allowed. -// This is tested for when "CLEANING_BY" is passed in as the clause. - if(cw!=NULL){ - retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns); - if(retval < 0) return(NULL); - } - -// CLOSING_WHEN : same rules as HAVING - if(closew!=NULL){ - retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns); - if(retval < 0) return(NULL); - } - - -// Collect aggregates in the HAVING and CLEANING clauses - if(hv != NULL){ - build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns); - } - if(cw != NULL){ - build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns); - } - if(cb != NULL){ - build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns); - } - if(closew != NULL){ - build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns); - } - -// Collect aggregate refs in the SELECT clause. - - for(i=0;ise, aggr_tbl, Ext_fcns); - - -// Collect references to states of stateful functions - if(wh != NULL){ - gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns); - } - if(hv != NULL){ - gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns); - } - if(cw != NULL){ - gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns); - } - if(cb != NULL){ - gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns); - } - if(closew != NULL){ // should be no stateful fcns here ... - gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns); - } - for(i=0;ise, qs->states_refd, Ext_fcns); - - -// If this is an aggregate query, it had normally references -// some aggregates. Its not necessary though, just emit a warning. -// (acts as SELECT DISTINCT) - - bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0; - if(is_aggr_query && aggr_tbl->size() == 0){ - fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n"); - } - -// If this is an aggregate query, -// 1) verify that the SEs in the SELECT clause reference -// only constants, aggregates, and group-by attributes. -// 2) No aggregate scalar expression references an aggregate -// or any stateful function. -// 3) either it references both CLEANING clauses or neither. -// 4) all superaggregates must have the superaggr_allowed property. -// 5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY -// clauses must have the multiple_output property. - - - if(is_aggr_query){ - if(gb_list.size() == 0){ - fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n"); - return NULL; - } -// Ensure that at least one gbvar is temporal - if(! fta_tree->name_exists("no_temporal_aggr")){ - bool found_temporal = false; - for(i=0;isize();i++){ - if(gb_tbl->get_data_type(i)->is_temporal()){ - found_temporal = true; - } - } - if(! found_temporal){ - fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n"); - exit(1); - } - } - - if((!cb && cw) || (cb && !cw)){ - fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n"); - return(NULL); - } - - bool refs_running = false; - int a; - for(a=0; asize(); ++a){ - refs_running |= aggr_tbl->is_running_aggr(a); - } - - if(closew){ - if(cb || cw){ - fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n"); - return(NULL); - } - if(!refs_running){ - fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n"); - return(NULL); - } - } - - if(refs_running && !closew){ - fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n"); - return(NULL); - } - - bool st_ok = true; - for(i=0;ise); - st_ok = st_ok && ret_bool; - } - if(! st_ok) - return(NULL); - - for(i=0;isize();i++){ - if(aggr_tbl->is_superaggr(i)){ - if(! aggr_tbl->superaggr_allowed(i)){ - fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str()); - return NULL; - } - } - if(aggr_tbl->is_builtin(i)){ - if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){ - fprintf(stderr,"ERROR no nested aggregation allowed.\n"); - return(NULL); - } - }else{ - vector opl = aggr_tbl->get_operand_list(i); - int o; - for(o=0;o 0){ - fprintf(stderr,"ERROR no nested aggregation allowed.\n"); - return(NULL); - } - } - } - } - }else{ -// Ensure that non-aggregate query doesn't reference some things - if(cb || cw){ - fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n"); - return(NULL); - } - if(closew){ - fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n"); - return(NULL); - } - if(qs->states_refd.size()){ - fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n"); - return(NULL); - } - } - - - -// Convert the predicates into CNF. OK to pass NULL ptr. - make_cnf_from_pr(wh, qs->wh_cnf); - make_cnf_from_pr(hv, qs->hav_cnf); - make_cnf_from_pr(cb, qs->cb_cnf); - make_cnf_from_pr(cw, qs->cw_cnf); - make_cnf_from_pr(closew, qs->closew_cnf); - -// Analyze the predicates. - - for(i=0;iwh_cnf.size();i++) - analyze_cnf(qs->wh_cnf[i]); - for(i=0;ihav_cnf.size();i++) - analyze_cnf(qs->hav_cnf[i]); - for(i=0;icb_cnf.size();i++) - analyze_cnf(qs->cb_cnf[i]); - for(i=0;icw_cnf.size();i++) - analyze_cnf(qs->cw_cnf[i]); - for(i=0;iclosew_cnf.size();i++) - analyze_cnf(qs->closew_cnf[i]); - - -// At this point, the old analysis program -// gathered all refs to partial functions, -// complex literals, and parameters accessed via a handle. -// I think its better to delay this -// until code generation time, as the query will be -// in general split. - - } - - return(qs); -} - -/////////////////////////////////////////////////////////////////////// - -// Expand gbvars with their definitions. - -scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){ - int o; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return se; - case SE_UNARY_OP: - se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl); - return se; - case SE_BINARY_OP: - se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl); - se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl); - return se; - case SE_COLREF: - if( se->is_gb() ){ - return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) ); - } - return se; -// don't descend into aggr defs. - case SE_AGGR_STAR: - return se; - case SE_AGGR_SE: - return se; - case SE_FUNC: - for(o=0;oparam_list.size();o++){ - se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl); - } - return se; - default: - fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return se; -} - -void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){ - vector op_list; - int o; - bool found = false; - - switch(pr->get_operator_type()){ - case PRED_IN: - pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl); - return; - case PRED_COMPARE: - pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ; - pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ; - return; - case PRED_UNARY_OP: - expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ; - return; - case PRED_BINARY_OP: - expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ; - expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ; - return; - case PRED_FUNC: - for(o=0;oparam_list.size();++o){ - pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ; - } - return; - default: - fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n", - pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); - } - return; -} - - - - -// return true if the se / pr contains any gbvar on the list. - - -bool contains_gb_se(scalarexp_t *se, set &gref_set){ - vector operands; - int o; - bool found = false; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return false; - case SE_UNARY_OP: - return contains_gb_se(se->get_left_se(),gref_set); - case SE_BINARY_OP: - return( contains_gb_se(se->get_left_se(),gref_set) || - contains_gb_se(se->get_right_se(),gref_set) ); - case SE_COLREF: - if( se->is_gb() ){ - return( gref_set.count(se->get_gb_ref()) > 0); - } - return false; -// don't descend into aggr defs. - case SE_AGGR_STAR: - return false; - case SE_AGGR_SE: - return false; - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } - return false; -} - - -bool contains_gb_pr(predicate_t *pr, set &gref_set){ - vector op_list; - int o; - bool found = false; - - switch(pr->get_operator_type()){ - case PRED_IN: - return contains_gb_se(pr->get_left_se(), gref_set); - case PRED_COMPARE: - return (contains_gb_se(pr->get_left_se(),gref_set) - || contains_gb_se(pr->get_right_se(),gref_set) ); - case PRED_UNARY_OP: - return contains_gb_pr(pr->get_left_pr(),gref_set) ; - case PRED_BINARY_OP: - return (contains_gb_pr(pr->get_left_pr(),gref_set) - || contains_gb_pr(pr->get_right_pr(),gref_set) ); - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - } - - return found; -} - - -// Gather the set of columns accessed in this se. -// Descend into aggregate functions. - -void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){ - col_id ci; - vector operands; - int o; - - if(! se) - return; - - switch(se->get_operator_type()){ - case SE_LITERAL: - case SE_PARAM: - case SE_IFACE_PARAM: - return; - case SE_UNARY_OP: - gather_se_col_ids(se->get_left_se(),cid_set,gtbl); - return; - case SE_BINARY_OP: - gather_se_col_ids(se->get_left_se(),cid_set,gtbl); - gather_se_col_ids(se->get_right_se(),cid_set,gtbl); - return; - case SE_COLREF: - if(! se->is_gb() ){ - ci.load_from_colref(se->get_colref() ); - if(ci.tblvar_ref < 0){ - fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str()); - } - cid_set.insert(ci); - }else{ - if(gtbl==NULL){ - fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n"); - exit(1); - } - gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl); - } - return; - case SE_AGGR_STAR: - return; - case SE_AGGR_SE: - gather_se_col_ids(se->get_left_se(),cid_set,gtbl); - return; - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } -} - - -// Gather the set of columns accessed in this se. - -void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){ - vector op_list; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - gather_se_col_ids(pr->get_left_se(), cid_set,gtbl); - return; - case PRED_COMPARE: - gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ; - gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ; - return; - case PRED_UNARY_OP: - gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ; - return; - case PRED_BINARY_OP: - gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ; - gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ; - return; - case PRED_FUNC: - op_list = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - } -} - - - - -// Gather the set of special operator or comparison functions referenced by this se. - -void gather_se_opcmp_fcns(scalarexp_t *se, set &fcn_set){ - col_id ci; - data_type *ldt, *rdt; - int o; - vector operands; - - switch(se->get_operator_type()){ - case SE_LITERAL: - if( se->get_literal()->constructor_name() != "") - fcn_set.insert( se->get_literal()->constructor_name() ); - return; - case SE_PARAM: - return; -// SE_IFACE_PARAM should not exist when this is called. - case SE_UNARY_OP: - ldt = se->get_left_se()->get_data_type(); - if(ldt->complex_operator(se->get_op()) ){ - fcn_set.insert( ldt->get_complex_operator(se->get_op()) ); - } - gather_se_opcmp_fcns(se->get_left_se(),fcn_set); - return; - case SE_BINARY_OP: - ldt = se->get_left_se()->get_data_type(); - rdt = se->get_right_se()->get_data_type(); - - if(ldt->complex_operator(rdt, se->get_op()) ){ - fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) ); - } - gather_se_opcmp_fcns(se->get_left_se(),fcn_set); - gather_se_opcmp_fcns(se->get_right_se(),fcn_set); - return; - case SE_COLREF: - return; - case SE_AGGR_STAR: - return; - case SE_AGGR_SE: - gather_se_opcmp_fcns(se->get_left_se(),fcn_set); - return; - case SE_FUNC: - operands = se->get_operands(); - for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } -} - - -// Gather the set of special operator or comparison functions referenced by this se. - -void gather_pr_opcmp_fcns(predicate_t *pr, set &fcn_set){ - data_type *ldt, *rdt; - vector operands; - int o; - - switch(pr->get_operator_type()){ - case PRED_IN: - ldt = pr->get_left_se()->get_data_type(); - if(ldt->complex_comparison(ldt) ){ - fcn_set.insert( ldt->get_comparison_fcn(ldt) ); - } - gather_se_opcmp_fcns(pr->get_left_se(), fcn_set); - return; - case PRED_COMPARE: - ldt = pr->get_left_se()->get_data_type(); - rdt = pr->get_right_se()->get_data_type(); - if(ldt->complex_comparison(rdt) ){ - fcn_set.insert( ldt->get_comparison_fcn(rdt) ); - } - gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ; - gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ; - return; - case PRED_UNARY_OP: - gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ; - return; - case PRED_BINARY_OP: - gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ; - gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ; - return; - case PRED_FUNC: - operands = pr->get_op_list(); - for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); - } -} - - - - -// find the temporal variable divisor if any. -// Only forms allowed : temporal_colref, temporal_colref/const -// temporal_colref/const + const - - -long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){ - long long int retval = 0; - data_type *ldt, *rdt; - int o; - vector operands; - scalarexp_t *t_se, *c_se; - string the_op; - - switch(se->get_operator_type()){ - case SE_LITERAL: - return(-1); - case SE_PARAM: - return(-1); -// SE_IFACE_PARAM should not exist when this is called. - case SE_UNARY_OP: - return(-1); - case SE_BINARY_OP: - ldt = se->get_left_se()->get_data_type(); - if(ldt->is_temporal()){ - t_se = se->get_left_se(); - c_se = se->get_right_se(); - }else{ - t_se = se->get_left_se(); - c_se = se->get_right_se(); - } - if((! t_se->get_data_type()->is_temporal()) || c_se->get_data_type()->is_temporal()) - return -1; - - the_op = se->get_op(); - if(the_op == "+" || the_op == "-") - return find_temporal_divisor(t_se, gbt,fnm); - if(the_op == "/"){ - if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){ - fnm = t_se->get_colref()->get_field(); - string lits = c_se->get_literal()->to_string(); - sscanf(lits.c_str(),"%qd",&retval); - return retval; - } - } - - return -1; - case SE_COLREF: - if(se->is_gb()){ - return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm); - } - if(se->get_data_type()->is_temporal()){ - fnm = se->get_colref()->get_field(); - return 1; - } - return 0; - case SE_AGGR_STAR: - return -1; - case SE_AGGR_SE: - return -1; - case SE_FUNC: - return -1; - default: - fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n", - se->get_lineno(), se->get_charno(),se->get_operator_type()); - exit(1); - } -} - - -// impute_colnames: -// Create meaningful but unique names for the columns. -string impute_colname(vector &sel_list, scalarexp_t *se){ - set curr_names; - int s; - for(s=0;sname); - } - return impute_colname(curr_names, se); -} - -string impute_colname(set &curr_names, scalarexp_t *se){ -string ret; -scalarexp_t *seo; -vector operand_list; -string opstr; - - switch(se->get_operator_type()){ - case SE_LITERAL: - ret = "Literal"; - break; - case SE_PARAM: - ret = "Param_" + se->get_param_name(); - break; - case SE_IFACE_PARAM: - ret = "Iparam_" + se->get_ifpref()->get_pname(); - break; - case SE_COLREF: - ret = se->get_colref()->get_field() ; - break; - case SE_UNARY_OP: - case SE_BINARY_OP: - ret = "Field"; - break; - case SE_AGGR_STAR: - ret = "Cnt"; - break; - case SE_AGGR_SE: - ret = se->get_op(); - seo = se->get_left_se(); - switch(se->get_left_se()->get_operator_type()){ - case SE_PARAM: - ret += "_PARAM_"+seo->get_param_name(); - break; - case SE_IFACE_PARAM: - ret += "_IPARAM_"+seo->get_ifpref()->get_pname(); - break; - case SE_COLREF: - opstr = seo->get_colref()->get_field(); - if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){ - ret += "_" + opstr; - }else{ - ret = opstr; - } - break; - case SE_AGGR_STAR: - case SE_AGGR_SE: - opstr = seo->get_op(); - if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){ - ret += "_" + seo->get_op(); - }else{ - ret = opstr; - } - break; - case SE_FUNC: - opstr = seo->get_op(); - ret += "_" + seo->get_op(); - break; - case SE_UNARY_OP: - case SE_BINARY_OP: - ret += "_SE"; - break; - default: - ret += "_"; - break; - } - break; - case SE_FUNC: - ret = se->get_op(); - operand_list = se->get_operands(); - if(operand_list.size() > 0){ - seo = operand_list[0]; - switch(seo->get_operator_type()){ - case SE_PARAM: - ret += "_PARAM_"+seo->get_param_name(); - break; - case SE_IFACE_PARAM: - ret += "_IPARAM_"+seo->get_ifpref()->get_pname(); - break; - case SE_COLREF: - ret += "_" + seo->get_colref()->get_field(); - break; - case SE_AGGR_STAR: - case SE_AGGR_SE: - case SE_FUNC: - ret += "_" + seo->get_op(); - break; - case SE_UNARY_OP: - case SE_BINARY_OP: - ret += "_SE"; - break; - default: - ret += "_"; - break; - } - }else{ - ret += "_func"; - } - break; - } - - if(ret == "Field"){ - if(curr_names.count("Field0") == 0) - ret = "Field0"; - } - int iter = 1; - string base = ret; - while(curr_names.count(ret) > 0){ - char tmpstr[500]; - sprintf(tmpstr,"%s%d",base.c_str(),iter); - ret = tmpstr; - iter++; - } - - - curr_names.insert(ret); - return(ret); - -} - - - -////////////////////////////////////////////////////////////////////// -////////////// Methods of defined classes /////////////////////// -////////////////////////////////////////////////////////////////////// - -// helper fcn to enable col_id as map key. - - bool operator<(const col_id &cr1, const col_id &cr2){ - if(cr1.tblvar_ref < cr2.tblvar_ref) return(true); - if(cr1.tblvar_ref == cr2.tblvar_ref) - return (cr1.field < cr2.field); - return(false); - } - - -// Process the GB variables. -// At parse time, GB vars are either GB_COLREF, -// or GB_COMPUTED if the AS keyword is used. -// Cast GB vars as named entities with a SE as -// their definition (the colref in the case of GB_COLREF). -// -// TODO: if there is a gbref in a gbdef, -// then I won't be able to compute the value without -// a complex dependence analysis. So verify that there is no -// gbref in any of the GBdefs. -// BUT: a GBVAR_COLREF should be converted to a regular colref, -// which is not yet done. -// -// TODO : sort out issue of GBVAR naming and identification. -// Determine where it is advantageous to convert GV_COLREF -// GBVARS to colrefs -- e.g. in group definition, in the WHERE clause, -// etc. -// -// return -1 if there is a problem. - -int gb_table::add_gb_attr( - gb_t *gb, - tablevar_list_t *fm, - table_list *schema, - table_exp_t *fta_tree, - ext_fcn_list *Ext_fcns - ){ - colref_t *cr; - int retval; - gb_table_entry *entry; - - if(gb->type == GB_COLREF){ - if(gb->table != "") - cr = new colref_t( - gb->interface.c_str(),gb->table.c_str(), gb->name.c_str() - ); - else - cr = new colref_t(gb->name.c_str()); - - int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema); - if(tablevar_ref < 0) return(tablevar_ref); - - cr->set_tablevar_ref(tablevar_ref); - cr->set_schema_ref(fm->get_schema_ref(tablevar_ref)); - cr->set_interface(""); - cr->set_table_name(fm->get_tablevar_name(tablevar_ref)); - - entry = new gb_table_entry(); - entry->name.field = cr->get_field(); - entry->name.tblvar_ref = tablevar_ref; - entry->definition = new scalarexp_t(cr); - entry->ref_type = GBVAR_COLREF; - }else{ - entry = new gb_table_entry(); - entry->name.field = gb->name; - entry->name.tblvar_ref = -1; - entry->definition = gb->def; - entry->ref_type = GBVAR_SE; - } - - retval = verify_colref(entry->definition, fm, schema, NULL); - if(retval < 0) return(retval); - - retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns); - if(retval < 0) return(retval); - -// Verify that the gbvar def references no aggregates and no gbvars. - if(count_gb_se(entry->definition) > 0){ - fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() ); - return(-1); - } - if(count_aggr_se(entry->definition, true) > 0){ - fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() ); - return(-1); - } - -// Check for duplicates - int i; - for(i=0;iname.field == gtbl[i]->name.field){ - fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size()); - return -1; - } - } - - - gtbl.push_back(entry); - - return(1); -} - - -// Try to determine if the colref is actually -// a gbvar ref. -// a) if no tablename associated with the colref, -// 1) try to find a matching GB_COMPUTED gbvar. -// 2) failing that, try to match to a single tablevar -// 3) if successful, search among GB_COLREF -// b) else, try to match the tablename to a single tablevar -// if successful, search among GB_COLREF -int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){ - string c_field = cr->get_field(); - int c_tblref; - int n_tbl; - int i; - vector candidates; - - if(cr->uses_default_table()){ - for(i=0;iref_type==GBVAR_SE && c_field == gtbl[i]->name.field){ - return(i); - } - } - candidates = find_source_tables(c_field, fm, schema); - if(candidates.size() != 1) return(-1); // can't find unique tablevar - for(i=0;iref_type==GBVAR_COLREF && - c_field == gtbl[i]->name.field && - candidates[0] == gtbl[i]->name.tblvar_ref){ - return(i); - } - } - return(-1); // colref is not in gb table. - } - -// A table name must have been given. - vector fm_tbls = fm->get_table_list(); - string interface = cr->get_interface(); - string table_name = cr->get_table_name(); - - -// if no interface name is given, try to search for the table -// name among the tablevar names first. - if(interface==""){ - for(i=0;iget_var_name() && interface == fm_tbls[i]->get_interface()) - candidates.push_back(i); - } - if(candidates.size()>1) return(-1); - if(candidates.size()==1){ - for(i=0;iref_type==GBVAR_COLREF && - c_field == gtbl[i]->name.field && - candidates[0] == gtbl[i]->name.tblvar_ref){ - return(i); - } - } - return(-1); // match semantics of bind to tablevar name first - } - } - -// Interface name given, or no interface but no -// no tablevar match. Try to match on schema name. - for(i=0;iget_var_name() && interface == fm_tbls[i]->get_interface()) - candidates.push_back(i); - } - if(candidates.size() != 1) return(-1); - for(i=0;iref_type==GBVAR_COLREF && - c_field == gtbl[i]->name.field && - candidates[0] == gtbl[i]->name.tblvar_ref){ - return(i); - } - } - -// No match found. - return(-1); - -} - - - -bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){ - if(is_builtin()){ - if( (op == "COUNT") || (op == "SUM") || (op == "MIN") || - (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") || - (op == "XOR_AGGR") ) - return(true); - }else{ - return Ext_fcns->fta_legal(fcn_id); - } - return(false); -} - - -// Return the set of subaggregates required to compute -// the desired aggregate. THe operand of the subaggregates -// can only be * or the scalarexp used in the superaggr. -// This is indicated by the use_se vector. - -// Is this code generation specific? - -vector aggr_table_entry::get_subaggr_fcns(vector &use_se){ - vector ret; - - if(op == "COUNT"){ - ret.push_back("COUNT"); - use_se.push_back(false); - } - if(op == "SUM"){ - ret.push_back("SUM"); - use_se.push_back(true); - } - if(op == "AVG"){ - ret.push_back("SUM"); - ret.push_back("COUNT"); - use_se.push_back(true); - use_se.push_back(false); - } - if(op == "MIN"){ - ret.push_back("MIN"); - use_se.push_back(true); - } - if(op == "MAX"){ - ret.push_back("MAX"); - use_se.push_back(true); - } - if(op == "AND_AGGR"){ - ret.push_back("AND_AGGR"); - use_se.push_back(true); - } - if(op == "OR_AGGR"){ - ret.push_back("OR_AGGR"); - use_se.push_back(true); - } - if(op == "XOR_AGGR"){ - ret.push_back("XOR_AGGR"); - use_se.push_back(true); - } - - return(ret); -} - -// Code generation specific? - -vector aggr_table_entry::get_subaggr_dt(){ - vector ret; - data_type *dt; - - if(op == "COUNT"){ - dt = new data_type("Int"); // was Uint - ret.push_back( dt ); - } - if(op == "SUM"){ - dt = new data_type(); - dt->set_aggr_data_type( "SUM",operand->get_data_type() ); - ret.push_back(dt); - } - if(op == "AVG"){ - dt = new data_type(); - dt->set_aggr_data_type( "SUM",operand->get_data_type() ); - ret.push_back( dt ); - dt = new data_type("Int"); - ret.push_back( dt ); - } - if(op == "MIN"){ - dt = new data_type(); - dt->set_aggr_data_type( "MIN",operand->get_data_type() ); - ret.push_back( dt ); - } - if(op == "MAX"){ - dt = new data_type(); - dt->set_aggr_data_type( "MAX",operand->get_data_type() ); - ret.push_back( dt ); - } - if(op == "AND_AGGR"){ - dt = new data_type(); - dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() ); - ret.push_back( dt ); - } - if(op == "OR_AGGR"){ - dt = new data_type(); - dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() ); - ret.push_back( dt ); - } - if(op == "XOR_AGGR"){ - dt = new data_type(); - dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() ); - ret.push_back( dt ); - } - - return(ret); -} - -// Code generation specific? - -scalarexp_t *aggr_table_entry::make_superaggr_se(vector se_refs){ - scalarexp_t *se_l, *se_r, *ret_se = NULL; - - if(op == "COUNT"){ - ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]); - return(ret_se); - } - if(op == "SUM"){ - ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]); - return(ret_se); - } - if(op == "AVG"){ - se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]); - se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]); - - ret_se = new scalarexp_t("/", se_l, se_r); - return(ret_se); - } - if(op == "MIN"){ - ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]); - return(ret_se); - } - if(op == "MAX"){ - ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]); - return(ret_se); - } - if(op == "AND_AGGR"){ - ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]); - return(ret_se); - } - if(op == "OR_AGGR"){ - ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]); - return(ret_se); - } - if(op == "XOR_AGGR"){ - ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]); - return(ret_se); - } - - return(ret_se); - -} - - -// Add a built-in aggr. -int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){ - int i; - - for(i=0;iis_builtin() && op == agr_tbl[i]->op - && is_equivalent_se(se,agr_tbl[i]->operand) ){ -// && is_super == agr_tbl[i]->is_superaggr()) - if(is_super) agr_tbl[i]->set_super(true); - return(i); - } - } - - aggr_table_entry *ate = new aggr_table_entry(op, se, is_super); - agr_tbl.push_back(ate); - return(agr_tbl.size() - 1); -} - -// add a UDAF -int aggregate_table::add_aggr(string op, int fcn_id, vector opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){ - int i,o; - - for(i=0;iis_builtin()) && fcn_id == agr_tbl[i]->fcn_id - && opl.size() == agr_tbl[i]->oplist.size() ){ -// && is_super == agr_tbl[i]->is_superaggr() ){ - for(o=0;ooplist[o]) ) - break; - } - if(o == opl.size()){ - if(is_super) agr_tbl[i]->set_super(true); - return i; - } - } - } - - aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout); - agr_tbl.push_back(ate); - return(agr_tbl.size() - 1); -} - - -int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){ - int i; - - for(i=0;iis_equivalent(cplx_lit_tbl[i])){ - hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref; - return(i); - } - } - - cplx_lit_tbl.push_back(l); - hdl_ref_tbl.push_back(is_handle_ref); - return(cplx_lit_tbl.size() - 1); -} - - - -//------------------------------------------------------------ -// parse_fta code - - -gb_t *gb_t::duplicate(){ - gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str()); - ret->type = type; - ret->lineno = lineno; - ret->charno = charno; - if(def != NULL) - ret->def = dup_se(def,NULL); - return ret; -} +/* ------------------------------------------------ +Copyright 2014 AT&T Intellectual Property + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + ------------------------------------------- */ + +#include + +#include "parse_fta.h" +#include "parse_schema.h" +#include "parse_ext_fcns.h" + + +#include"analyze_fta.h" + +#include"type_objects.h" + +#include +#include + +using namespace std; + +extern string hostname; // name of the current host + +// Utility function + +string int_to_string(int i){ + string ret; + char tmpstr[100]; + sprintf(tmpstr,"%d",i); + ret=tmpstr; + return(ret); +} + + +// Globals + +// These represent derived information from the +// query analysis stage. I extract them from a class, +// perhaps this is dangerous. + +static gb_table *gb_tbl=NULL; // Table of all group-by attributes. +static aggregate_table *aggr_tbl=NULL; // Table of all referenced aggregates. + +// static cplx_lit_table *complex_literals=NULL; // Table of literals with constructors. +static param_table *param_tbl=NULL; // Table of all referenced parameters. + +vector partial_fcns_list; +int wh_partial_start, wh_partial_end; +int gb_partial_start, gb_partial_end; +int aggr_partial_start, aggr_partial_end; +int sl_partial_start, sl_partial_end; + + +// Infer the table of a column refrence and return the table ref. +// First, extract the +// field name and table name. If no table name is used, +// search all tables to try to find a unique match. +// Of course, plenty of error checking. + +// Return the set of tablevar indices in the FROM clause +// which contain a field with the same name. +vector find_source_tables(string field, tablevar_list_t *fm, table_list *Schema){ + int i; + vector tv; +// vector tn = fm->get_schema_names(); + vector tn = fm->get_schema_refs(); +// printf("Calling find_source_tables on field %s\n",field.c_str()); + for(i=0;icontains_field(Schema->find_tbl(tn[i]), field) ){ + if(Schema->contains_field(tn[i], field) ){ + tv.push_back(i); +// printf("\tfound in table %s\n",tn[i].c_str()); + } + } + return(tv); +} + +int infer_tablevar_from_ifpref(ifpref_t *ir, tablevar_list_t *fm){ + int i; + string tname = ir->get_tablevar(); + if(tname ==""){ + if(fm->size()==1) return 0; + fprintf(stderr,"ERROR, interface parameter %s has no tablevar specified and there is more than one table variable in the FROM clause.\n",ir->to_string().c_str()); + return -1; + } + for(i=0;isize();++i){ + if(tname == fm->get_tablevar_name(i)) + return i; + } + fprintf(stderr,"ERROR, interface parameter %s has no matching table variable in the FROM clause.\n",ir->to_string().c_str()); + return -1; +} + + +// compute the index of the tablevar in the from clause that the +// colref is in. +// return -1 if no tablevar can be imputed. +int infer_tablevar_from_colref(colref_t *cr, tablevar_list_t *fm, table_list *schema){ + int i; + string table_name; + int table_ref; + vector tv; + vector fm_tbls = fm->get_table_list(); + + string field = cr->get_field(); + +// printf("Calling infer_tablevar_from_colref on field %s.\n",field.c_str()); + if(cr->uses_default_table() ){ + tv = find_source_tables(field, fm, schema); + if(tv.size() > 1){ + fprintf(stderr,"ERROR, line %d, character %d : field %s exists in multiple table variables: ", + cr->get_lineno(), cr->get_charno(),field.c_str() ); + for(i=0;ito_string().c_str() ); + } + fprintf(stderr,"\n\tYou must specify one of these.\n"); + return(-1); + } + if(tv.size() == 0){ + fprintf(stderr,"ERROR, line %d, character %d: field %s does not exist in any table.\n", + cr->get_lineno(), cr->get_charno(),field.c_str() ); + return(-1); + } + + return(tv[0]); + } + +// The table source is named -- but is it a schema name +// or a var name? + + string interface = cr->get_interface(); + table_name = cr->get_table_name(); + +// if interface is not specified, prefer to look at the tablevar names +// Check for duplicates. + if(interface==""){ + for(i=0;iget_var_name()) + tv.push_back(i); + } + if(tv.size() > 1){ + fprintf(stderr,"ERROR, there are two or more table variables for column ref %s.%s (line %d, char %d).\n",table_name.c_str(), field.c_str(), cr->get_lineno(), cr->get_charno() ); + return(-1); + } + if(tv.size() == 1) return(tv[0]); + } + +// Tableref not found by looking at tableref vars, or an interface +// was specified. Try to match on schema and interface. +// Check for duplicates. + for(i=0;iget_var_name() && interface == fm_tbls[i]->get_interface()) + tv.push_back(i); + } + if(tv.size() > 1){ + fprintf(stderr,"ERROR, (line %d, char %d) there are two or more table variables whose schemas match for column ref \n", + cr->get_lineno(), cr->get_charno() ); + if(interface != "") fprintf(stderr,"%s.",interface.c_str()); + fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str()); + return(-1); + } + + if(tv.size() == 0 ){ + fprintf(stderr,"ERROR, line %d, character %d : no table reference found for column ref ", cr->get_lineno(), cr->get_charno()); + if(interface != "") fprintf(stderr,"%s.",interface.c_str()); + fprintf(stderr,"%s.%s\n",table_name.c_str(), field.c_str()); + return(-1) ; + } + + return(tv[0]); +} + + +// Reset temporal properties of a scalar expression +void reset_temporal(scalarexp_t *se){ + col_id ci; + vector operands; + int o; + + se->get_data_type()->reset_temporal(); + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + case SE_COLREF: + return; + case SE_UNARY_OP: + reset_temporal(se->get_left_se()); + return; + case SE_BINARY_OP: + reset_temporal(se->get_left_se()); + reset_temporal(se->get_right_se()); + return; + case SE_AGGR_STAR: + return; + case SE_AGGR_SE: + reset_temporal(se->get_left_se()); + return; + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } +} + +// Verify that column references exist in their +// declared tables. As a side effect, assign +// their data types. Other side effects : +// +// return -1 on error + +int verify_colref(scalarexp_t *se, tablevar_list_t *fm, + table_list *schema, gb_table *gtbl){ + int l_ret, r_ret; + int gb_ref; + colref_t *cr; + ifpref_t *ir; + string field, table_source, type_name; + data_type *dt; + vector tn; + vector tv; + int table_var; + int o; + vector operands; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + return(1); + case SE_IFACE_PARAM: + ir = se->get_ifpref(); + table_var = infer_tablevar_from_ifpref(ir, fm); + if(table_var < 0) return(table_var); + ir->set_tablevar_ref(table_var); + return(1); + case SE_UNARY_OP: + return( verify_colref(se->get_left_se(), fm, schema, gtbl) ); + case SE_BINARY_OP: + l_ret = verify_colref(se->get_left_se(), fm, schema, gtbl); + r_ret = verify_colref(se->get_right_se(), fm, schema, gtbl); + if( (l_ret < 0 ) || (r_ret < 0) ) return(-1); + return(1); + case SE_COLREF: + cr = se->get_colref(); + field = cr->get_field(); + +// Determine if this is really a GB ref. +// (the parser can only see that its a colref). + if(gtbl != NULL){ + gb_ref = gtbl->find_gb(cr, fm, schema); + }else{ + gb_ref = -1; + } + + se->set_gb_ref(gb_ref); + + if(gb_ref < 0){ +// Its a colref, verify its existance and +// record the data type. + table_var = infer_tablevar_from_colref(cr,fm,schema); + if(table_var < 0) return(table_var); + + // Store the table ref in the colref. + cr->set_tablevar_ref(table_var); + cr->set_schema_ref(fm->get_schema_ref(table_var)); + cr->set_interface(""); + cr->set_table_name(fm->get_tablevar_name(table_var)); + + if(! schema->contains_field(cr->get_schema_ref(), field)){ + fprintf(stderr, "Error, field %s is not in stream %s\n", field.c_str(), schema->get_table_name( cr->get_schema_ref() ).c_str()); + return -1; + } + + type_name = schema->get_type_name(cr->get_schema_ref(), field); + param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field); + dt = new data_type(type_name, modifiers); + se->set_data_type(dt); + }else{ +// Else, its a gbref, use the GB var's data type. + se->set_data_type(gtbl->get_data_type(gb_ref)); + } + + return(1); + case SE_AGGR_STAR: + return(1); + case SE_AGGR_SE: + return( verify_colref(se->get_left_se(), fm, schema, gtbl) ); + case SE_FUNC: + operands = se->get_operands(); + r_ret = 1; + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + return(-1); + } + return(-1); +} + + +int verify_predicate_colref(predicate_t *pr, tablevar_list_t *fm, table_list *schema, gb_table *gtbl){ + int l_ret, r_ret; + std::vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + return(verify_colref(pr->get_left_se(),fm,schema, gtbl) ); + case PRED_COMPARE: + l_ret = verify_colref(pr->get_left_se(),fm,schema, gtbl) ; + r_ret = verify_colref(pr->get_right_se(),fm,schema, gtbl) ; + if( (l_ret < 0) || (r_ret < 0) ) return(-1); + return(1); + case PRED_UNARY_OP: + return(verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl)); + case PRED_BINARY_OP: + l_ret = verify_predicate_colref(pr->get_left_pr(),fm,schema, gtbl) ; + r_ret = verify_predicate_colref(pr->get_right_pr(),fm,schema, gtbl) ; + if( (l_ret < 0) || (r_ret < 0) ) return(-1); + return(1); + case PRED_FUNC: + op_list = pr->get_op_list(); + l_ret = 0; + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + } + + return(-1); +} + + +bool literal_only_se(scalarexp_t *se){ // really only literals. + int o; + vector operands; + + if(se == NULL) return(1); + switch(se->get_operator_type()){ + case SE_LITERAL: + return(true); + case SE_PARAM: + return(false); + case SE_IFACE_PARAM: + return(false); + case SE_UNARY_OP: + return( literal_only_se(se->get_left_se()) ); + case SE_BINARY_OP: + return( literal_only_se(se->get_left_se()) && + literal_only_se(se->get_right_se()) ); + case SE_COLREF: + return false; + case SE_AGGR_STAR: + return false; + case SE_AGGR_SE: + return false; + return(1); + case SE_FUNC: + return false; + default: + return false; + } + return false; +} + + + + +// Verify that column references exist in their +// declared tables. As a side effect, assign +// their data types. Other side effects : +// + +int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema){ + int l_ret, r_ret; + int gb_ref; + colref_t *cr; + string field, table_source, type_name; + data_type *dt; + vector tn; + vector tv; + int tablevar_ref; + int o; + vector operands; + + if(se == NULL) return(1); + + switch(se->get_operator_type()){ + case SE_LITERAL: + return(1); + case SE_PARAM: + return(1); + case SE_IFACE_PARAM: + return(1); + case SE_UNARY_OP: + return( bind_to_schema_se(se->get_left_se(), fm, schema) ); + case SE_BINARY_OP: + l_ret = bind_to_schema_se(se->get_left_se(), fm, schema); + r_ret = bind_to_schema_se(se->get_right_se(), fm, schema); + if( (l_ret < 0 ) || (r_ret < 0) ) return(-1); + return(1); + case SE_COLREF: + if(se->is_gb()) return(1); // gb ref not a colref. + + cr = se->get_colref(); + field = cr->get_field(); + + tablevar_ref = infer_tablevar_from_colref(cr,fm,schema); + if(tablevar_ref < 0){ + return(tablevar_ref); + }else{ + // Store the table ref in the colref. + cr->set_tablevar_ref(tablevar_ref); + cr->set_schema_ref(fm->get_schema_ref(tablevar_ref)); + cr->set_interface(""); + cr->set_table_name(fm->get_tablevar_name(tablevar_ref)); + +// Check the data type + type_name = schema->get_type_name(cr->get_schema_ref(), field); + param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field); + data_type dt(type_name, modifiers); +// if(! dt.equals(se->get_data_type()) ){ +// if(! dt.subsumes_type(se->get_data_type()) ){ + if(! se->get_data_type()->subsumes_type(&dt) ){ + fprintf(stderr,"INTERNAL ERROR in bind_to_schema_se: se's type is %d, table's is %d, colref is %s.\n", + dt.type_indicator(), se->get_data_type()->type_indicator(), cr->to_string().c_str()); + return(-1); + } + } + return(1); + case SE_AGGR_STAR: + return(1); + case SE_AGGR_SE: // Probably I should just return, + // aggregate se's are explicitly bound to the schema. +// return( bind_to_schema_se(se->get_left_se(), fm, schema, gtbl) ); + return(1); + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return 1; + + operands = se->get_operands(); + r_ret = 1; + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + return(-1); + } + return(-1); +} + + +int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema){ + int l_ret, r_ret; + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + return(bind_to_schema_se(pr->get_left_se(),fm,schema) ); + case PRED_COMPARE: + l_ret = bind_to_schema_se(pr->get_left_se(),fm,schema) ; + r_ret = bind_to_schema_se(pr->get_right_se(),fm,schema) ; + if( (l_ret < 0) || (r_ret < 0) ) return(-1); + return(1); + case PRED_UNARY_OP: + return(bind_to_schema_pr(pr->get_left_pr(),fm,schema)); + case PRED_BINARY_OP: + l_ret = bind_to_schema_pr(pr->get_left_pr(),fm,schema) ; + r_ret = bind_to_schema_pr(pr->get_right_pr(),fm,schema) ; + if( (l_ret < 0) || (r_ret < 0) ) return(-1); + return(1); + case PRED_FUNC: + op_list = pr->get_op_list(); + l_ret = 0; + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + } + + return(-1); +} + + + + + + +// verify_colref assigned data types to the column refs. +// Now assign data types to all other nodes in the +// scalar expression. +// +// return -1 on error + +temporal_type compute_se_temporal(scalarexp_t *se, map &tcol){ + int l_ret, r_ret; + data_type *dt; + bool bret; + vector operands; + vector odt; + int o, fcn_id; + vector handle_ind; + + switch(se->get_operator_type()){ + case SE_LITERAL: + return(constant_t); + case SE_PARAM: + return(varying_t); + case SE_IFACE_PARAM: + return(varying_t); // actually, this should not be called. + case SE_UNARY_OP: + return data_type::compute_temporal_type( + compute_se_temporal(se->get_left_se(), tcol), se->get_op() + ); + case SE_BINARY_OP: + return data_type::compute_temporal_type( + compute_se_temporal(se->get_left_se(), tcol), + compute_se_temporal(se->get_right_se(), tcol), + se->get_left_se()->get_data_type()->get_type(), + se->get_right_se()->get_data_type()->get_type(), + se->get_op() + ); + case SE_COLREF: + { + col_id cid(se->get_colref() ); + if(tcol.count(cid) > 0){ return tcol[cid]; + }else{ return varying_t;} + } + case SE_AGGR_STAR: + case SE_AGGR_SE: + case SE_FUNC: + default: + return varying_t; + } + return(varying_t); +} + + + +// verify_colref assigned data types to the column refs. +// Now assign data types to all other nodes in the +// scalar expression. +// +// return -1 on error + +int assign_data_types(scalarexp_t *se, table_list *schema, + table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){ + int l_ret, r_ret; + data_type *dt; + bool bret; + vector operands; + vector odt; + int o, fcn_id; + vector handle_ind; + vector constant_ind; + + switch(se->get_operator_type()){ + case SE_LITERAL: + dt = new data_type( se->get_literal()->get_type() ); + se->set_data_type(dt); + if( ! dt->is_defined() ){ + fprintf(stderr,"ERROR, Literal type is undefined, line =%d, char = %d, literal=%s\n", + se->get_literal()->get_lineno(),se->get_literal()->get_charno(), se->get_literal()->to_string().c_str() ); + return(-1); + }else{ + return(1); + } + case SE_PARAM: + { + string pname = se->get_param_name(); + dt = param_tbl->get_data_type(pname); +// A SE_PARRAM can change its value mid-query so using one +// to set a window is dangerous. TODO check for this and issue a warning. + dt->set_temporal(constant_t); + se->set_data_type(dt); + if( ! dt->is_defined() ){ + fprintf(stderr,"ERROR, parameter %s has undefined type, line =%d, char = %d\n", + pname.c_str(), se->get_lineno(),se->get_charno() ); + return(-1); + } + return(1); + } + case SE_IFACE_PARAM: + dt = new data_type( "STRING" ); + se->set_data_type(dt); + return(1); + case SE_UNARY_OP: + l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns); + if(l_ret < 0) return -1; + + dt = new data_type(se->get_left_se()->get_data_type(),se->get_op() ); + se->set_data_type(dt); + if( ! dt->is_defined() ){ + fprintf(stderr,"ERROR, unary operator %s not defined for type %s, line=%d, char = %d\n", + se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(), + se->get_lineno(), se->get_charno() ); + return(-1); + }else{ + return(1); + } + case SE_BINARY_OP: + l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns); + r_ret = assign_data_types(se->get_right_se(), schema, fta_tree, Ext_fcns); + if( (l_ret < 0 ) || (r_ret < 0) ) return(-1); + + dt = new data_type(se->get_left_se()->get_data_type(),se->get_right_se()->get_data_type(),se->get_op() ); + se->set_data_type(dt); + if( ! dt->is_defined() ){ + fprintf(stderr,"ERROR, Binary operator %s not defined for type %s, %s line=%d, char = %d\n", + se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(), + se->get_right_se()->get_data_type()->to_string().c_str(), + se->get_lineno(), se->get_charno() ); + return(-1); + }else{ + return(1); + } + case SE_COLREF: + dt = se->get_data_type(); + bret = dt->is_defined(); + if( bret ){ + return(1); + }else{ + fprintf(stderr,"ERROR, column reference type is undefined, line =%d, char = %d, colref=%s\n", + se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() ); + return(-1); + } + case SE_AGGR_STAR: + dt = new data_type("Int"); // changed Uint to Int + se->set_data_type(dt); + return(1); + case SE_AGGR_SE: + l_ret = assign_data_types(se->get_left_se(), schema, fta_tree, Ext_fcns); + if(l_ret < 0) return -1; + + dt = new data_type(); + dt->set_aggr_data_type(se->get_op(), se->get_left_se()->get_data_type()); + se->set_data_type(dt); + + if( ! dt->is_defined() ){ + fprintf(stderr,"ERROR, aggregate %s not defined for type %s, line=%d, char = %d\n", + se->get_op().c_str(), se->get_left_se()->get_data_type()->to_string().c_str(), + se->get_lineno(), se->get_charno() ); + return(-1); + }else{ + return(1); + } + case SE_FUNC: + + operands = se->get_operands(); + r_ret = 1; + for(o=0;oget_data_type()); + if(l_ret < 0) r_ret = -1; + } + if(r_ret < 0) return(r_ret); + +// Is it an aggregate extraction function? + fcn_id = Ext_fcns->lookup_extr(se->get_op(), odt); + if(fcn_id >= 0){ + int actual_fcn_id = Ext_fcns->get_actual_fcn_id(fcn_id); + int subaggr_id = Ext_fcns->get_subaggr_id(fcn_id); + int n_fcn_params = Ext_fcns->get_nparams(actual_fcn_id); +// Construct a se for the subaggregate. + vector op_a; + int n_aggr_oprs = operands.size()-n_fcn_params+1; + for(o=0;o handle_a = Ext_fcns->get_handle_indicators(subaggr_id); + for(o=0;oget_operator_type() != SE_LITERAL && + op_a[o]->get_operator_type() != SE_IFACE_PARAM && + op_a[o]->get_operator_type() != SE_PARAM){ + fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", + o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + vector is_const_a=Ext_fcns->get_const_indicators(subaggr_id); + for(o=0;oget_data_type()->get_temporal() != constant_t){ + fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s (extractor %s) must be constant.\n Line=%d, char=%d.\n", + o+1, Ext_fcns->get_fcn_name(subaggr_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + + scalarexp_t *se_a = new scalarexp_t(Ext_fcns->get_fcn_name(subaggr_id).c_str(), op_a); + se_a->set_fcn_id(subaggr_id); + se_a->set_data_type(Ext_fcns->get_fcn_dt(subaggr_id)); + se_a->set_aggr_id(0); // label this as a UDAF. + + +// Change this se to be the actual function + vector op_f; + op_f.push_back(se_a); + for(o=n_aggr_oprs;o handle_f = Ext_fcns->get_handle_indicators(actual_fcn_id); + for(o=0;oget_operator_type() != SE_LITERAL && + op_f[o]->get_operator_type() != SE_IFACE_PARAM && + op_f[o]->get_operator_type() != SE_PARAM){ + fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", + o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + vector is_const_f=Ext_fcns->get_const_indicators(actual_fcn_id); + for(o=0;oget_data_type()->get_temporal() != constant_t){ + fprintf(stderr,"ERROR, the %d-th parameter of fcn %s (extractor %s) must be constant.\n Line=%d, char=%d.\n", + o+1, Ext_fcns->get_fcn_name(actual_fcn_id).c_str(), se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + + se->param_list = op_f; + se->op = Ext_fcns->get_fcn_name(actual_fcn_id); + se->set_fcn_id(actual_fcn_id); + se->set_data_type(Ext_fcns->get_fcn_dt(actual_fcn_id)); + return(1); + } + if(fcn_id == -2){ + fprintf(stderr,"Warning: multiple subsuming aggregate extractors found for %s\n",se->get_op().c_str()); + } + +// Is it a UDAF? + fcn_id = Ext_fcns->lookup_udaf(se->get_op(), odt); + if(fcn_id >= 0){ + se->set_fcn_id(fcn_id); + se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id)); + se->set_aggr_id(0); // label this as a UDAF. +// Finally, verify that all HANDLE parameters are literals or params. + handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() ); + for(o=0;oget_operator_type() != SE_LITERAL && + operands[o]->get_operator_type() != SE_IFACE_PARAM && + operands[o]->get_operator_type() != SE_PARAM){ + fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", + o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id()); + for(o=0;oget_data_type()->get_temporal() != constant_t){ + fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n", + o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + +// UDAFS as superaggregates not yet supported. +if(se->is_superaggr()){ +fprintf(stderr,"WARNING: UDAF superagggregates (%s) are not yet supported, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno()); +se->set_superaggr(false); +} + return(1); + } + if(fcn_id == -2){ + fprintf(stderr,"Warning: multiple subsuming UDAFs found for %s\n",se->get_op().c_str()); + } + +// Is it a stateful fcn? + fcn_id = Ext_fcns->lookup_sfun(se->get_op(), odt); + if(fcn_id >= 0){ + se->set_fcn_id(fcn_id); + se->set_data_type(Ext_fcns->get_fcn_dt(fcn_id)); + se->set_storage_state(Ext_fcns->get_storage_state(fcn_id)); // label as sfun +// Finally, verify that all HANDLE parameters are literals or params. + handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() ); + for(o=0;oget_operator_type() != SE_LITERAL && + operands[o]->get_operator_type() != SE_IFACE_PARAM && + operands[o]->get_operator_type() != SE_PARAM){ + fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", + o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id()); + for(o=0;oget_data_type()->get_temporal() != constant_t){ + fprintf(stderr,"ERROR, the %d-th parameter of UDAF %s must be constant.\n Line=%d, char=%d.\n", + o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + + if(se->is_superaggr()){ + fprintf(stderr,"WARNING: stateful function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno()); + } + return(1); + } + if(fcn_id == -2){ + fprintf(stderr,"Warning: multiple stateful fcns found for %s\n",se->get_op().c_str()); + } + + +// Is it a regular function? + fcn_id = Ext_fcns->lookup_fcn(se->get_op(), odt); + if( fcn_id < 0 ){ + fprintf(stderr,"ERROR, no external function %s(",se->get_op().c_str()); + for(o=0;o0) fprintf(stderr,", "); + fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str()); + } + fprintf(stderr,") is defined, line %d, char %d\n", se->get_lineno(), se->get_charno() ); + if(fcn_id == -2) fprintf(stderr,"(multiple subsuming functions found)\n"); + + return(-1); + } + + se->set_fcn_id(fcn_id); + dt = Ext_fcns->get_fcn_dt(fcn_id); + + if(! dt->is_defined() ){ + fprintf(stderr,"ERROR, external function %s(",se->get_op().c_str()); + for(o=0;o0) fprintf(stderr,", "); + fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str()); + } + fprintf(stderr,") has undefined type, line %d, char %d\n", se->get_lineno(), se->get_charno() ); + return(-1); + } + +// Finally, verify that all HANDLE parameters are literals or params. + handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() ); + for(o=0;oget_operator_type() != SE_LITERAL && + operands[o]->get_operator_type() != SE_IFACE_PARAM && + operands[o]->get_operator_type() != SE_PARAM){ + fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", + o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + constant_ind = Ext_fcns->get_const_indicators(se->get_fcn_id()); + for(o=0;oget_data_type()->get_temporal() != constant_t){ + fprintf(stderr,"ERROR, the %d-th parameter of function %s must be constant.\n Line=%d, char=%d.\n", + o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); + return(-1); + } + } + } + + + if(se->is_superaggr()){ + fprintf(stderr,"WARNING: function %s cannot be marked as a superaggregate, ignored.\n Line=%d, char=%d.\n", se->get_op().c_str(),se->get_lineno(), se->get_charno()); + } + + se->set_data_type(dt); + return(1); + default: + fprintf(stderr,"INTERNAL ERROR in assign_data_types, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + return(-1); + } + return(-1); +} + + +int assign_predicate_data_types(predicate_t *pr, table_list *schema, + table_exp_t *fta_tree, ext_fcn_list *Ext_fcns){ + int l_ret, r_ret; + int i; + data_type *dt, *dtl; + vector odt; + vector litl; + vector operands; + vector handle_ind; + vector constant_ind; + int o, fcn_id; + + switch(pr->get_operator_type()){ + case PRED_IN: + l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set); + litl = pr->get_lit_vec(); + dt = pr->get_left_se()->get_data_type(); + + for(i=0;iget_type() ); + if( ! dt->is_comparable(dtl,pr->get_op()) ){ + fprintf(stderr,"ERROR line %d, char %d: IS_IN types must be comparable (lhs type is %s, rhs type is %s).\n", + litl[i]->get_lineno(), litl[i]->get_charno(), dt->to_string().c_str(),dtl->to_string().c_str() ); + delete dtl; + return(-1); + } + delete dtl; + } + return(1); + case PRED_COMPARE: + l_ret = assign_data_types(pr->get_left_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ; + r_ret = assign_data_types(pr->get_right_se(),schema, fta_tree, Ext_fcns); // , ext_fcn_set) ; + if( (l_ret < 0) || (r_ret < 0) ) return(-1); + + if( !(pr->get_left_se()->get_data_type()->is_comparable(pr->get_right_se()->get_data_type(), pr->get_op() ) )){ + fprintf(stderr,"ERROR line %d, char %d, operands of comparison must have comparable types (%s %s %s).\n", + pr->get_lineno(), pr->get_charno(), pr->get_left_se()->get_data_type()->to_string().c_str(), + pr->get_right_se()->get_data_type()->to_string().c_str(), pr->get_op().c_str() ); + return(-1); + }else{ + return(1); + } + case PRED_UNARY_OP: + return(assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns)); // , ext_fcn_set)); + case PRED_BINARY_OP: + l_ret = assign_predicate_data_types(pr->get_left_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set); + r_ret = assign_predicate_data_types(pr->get_right_pr(),schema,fta_tree, Ext_fcns); // , ext_fcn_set); + if( (l_ret < 0) || (r_ret < 0) ) return(-1); + return(1); + case PRED_FUNC: + operands = pr->get_op_list(); + r_ret = 1; + for(o=0;oget_data_type()); + if(l_ret < 0) r_ret = -1; + } + if(r_ret < 0) return(r_ret); + + fcn_id = Ext_fcns->lookup_pred(pr->get_op(), odt); + if( fcn_id < 0 ){ + fprintf(stderr,"ERROR, no external predicate %s(",pr->get_op().c_str()); + for(o=0;o0) fprintf(stderr,", "); + fprintf(stderr,"%s",operands[o]->get_data_type()->to_string().c_str()); + } + fprintf(stderr,") is defined, line %d, char %d\n", pr->get_lineno(), pr->get_charno() ); + if(fcn_id == -2) fprintf(stderr,"(multiple subsuming predicates found)\n"); + return(-1); + } + +// ext_fcn_set.insert(fcn_id); + pr->set_fcn_id(fcn_id); + +// Finally, verify that all HANDLE parameters are literals or params. + handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() ); + for(o=0;oget_operator_type() != SE_LITERAL && + operands[o]->get_operator_type() != SE_IFACE_PARAM && + operands[o]->get_operator_type() != SE_PARAM){ + fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", + o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno()); + exit(1); + } + } + } + constant_ind = Ext_fcns->get_const_indicators(pr->get_fcn_id()); + for(o=0;oget_data_type()->get_temporal() != constant_t){ + fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be constant.\n Line=%d, char=%d.\n", + o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno()); + exit(1); + } + } + } + + +// Check if this predicate function is special sampling function + pr->is_sampling_fcn = Ext_fcns->is_sampling_fcn(pr->get_fcn_id()); + + + return(l_ret); + default: + fprintf(stderr,"INTERNAL ERROR in assign_predicate_data_types, line %d, character %d, unknown predicate operator type %d\n", + pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); + } + + return(-1); +} + + + +///////////////////////////////////////////////////////////////////// +//////////////// Make a deep copy of a se / pred tree +///////////////////////////////////////////////////////////////////// + + +// duplicate a select element +select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl){ + return new select_element(dup_se(sl->se,aggr_tbl),sl->name.c_str()); +} + +// duplicate a scalar expression. +scalarexp_t *dup_se(scalarexp_t *se, + aggregate_table *aggr_tbl + ){ + int p; + vector operand_list; + vector dt_signature; + scalarexp_t *ret_se, *l_se, *r_se; + + switch(se->get_operator_type()){ + case SE_LITERAL: + ret_se = new scalarexp_t(se->get_literal()); + ret_se->use_decorations_of(se); + return(ret_se); + + case SE_PARAM: + ret_se = scalarexp_t::make_param_reference(se->get_op().c_str()); + ret_se->use_decorations_of(se); + return(ret_se); + + case SE_IFACE_PARAM: + ret_se = scalarexp_t::make_iface_param_reference(se->get_ifpref()); + ret_se->use_decorations_of(se); + return(ret_se); + + case SE_COLREF: + ret_se = new scalarexp_t(se->get_colref()->duplicate()); + ret_se->rhs.scalarp = se->rhs.scalarp; // carry along notation + ret_se->use_decorations_of(se); + return(ret_se); + + case SE_UNARY_OP: + l_se = dup_se(se->get_left_se(), aggr_tbl); + ret_se = new scalarexp_t(se->get_op().c_str(), l_se); + ret_se->use_decorations_of(se); + return(ret_se); + + case SE_BINARY_OP: + l_se = dup_se(se->get_left_se(), aggr_tbl); + r_se = dup_se(se->get_right_se(), aggr_tbl); + + ret_se = new scalarexp_t(se->get_op().c_str(), l_se, r_se); + ret_se->use_decorations_of(se); + + return(ret_se); + + case SE_AGGR_STAR: + ret_se = scalarexp_t::make_star_aggr(se->get_op().c_str()); + ret_se->use_decorations_of(se); + return(ret_se); + + case SE_AGGR_SE: + l_se = dup_se(se->get_left_se(), aggr_tbl); + ret_se = scalarexp_t::make_se_aggr(se->get_op().c_str(), l_se); + ret_se->use_decorations_of(se); + return(ret_se); + + case SE_FUNC: + { + operand_list = se->get_operands(); + vector new_operands; + for(p=0;pget_op().c_str(), new_operands); + ret_se->use_decorations_of(se); + return(ret_se); + } + + default: + printf("INTERNAL ERROR in dup_se: operator type %d\n",se->get_operator_type()); + exit(1); + break; + } + return(NULL); + +} + + + +predicate_t *dup_pr(predicate_t *pr, + aggregate_table *aggr_tbl + ){ + + vector llist; + scalarexp_t *se_l, *se_r; + predicate_t *pr_l, *pr_r, *ret_pr; + vector op_list, new_op_list; + int o; + + + switch(pr->get_operator_type()){ + case PRED_IN: + se_l = dup_se(pr->get_left_se(), aggr_tbl); + ret_pr = new predicate_t(se_l, pr->get_lit_vec()); + return(ret_pr); + + case PRED_COMPARE: + se_l = dup_se(pr->get_left_se(), aggr_tbl); + se_r = dup_se(pr->get_right_se(), aggr_tbl); + ret_pr = new predicate_t(se_l, pr->get_op().c_str(), se_r); + return(ret_pr); + + case PRED_UNARY_OP: + pr_l = dup_pr(pr->get_left_pr(), aggr_tbl); + ret_pr = new predicate_t(pr->get_op().c_str(), pr_l); + return(ret_pr); + + case PRED_BINARY_OP: + pr_l = dup_pr(pr->get_left_pr(), aggr_tbl); + pr_r = dup_pr(pr->get_right_pr(), aggr_tbl); + ret_pr = new predicate_t(pr->get_op().c_str(), pr_l, pr_r); + return(ret_pr); + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_op().c_str(), new_op_list); + ret_pr->set_fcn_id(pr->get_fcn_id()); + ret_pr->is_sampling_fcn = pr->is_sampling_fcn; + return(ret_pr); + + default: + fprintf(stderr,"INTERNAL ERROR in dup_pr, line %d, character %d, unknown predicate operator type %d\n", + pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return(0); + +} + +table_exp_t *dup_table_exp(table_exp_t *te){ + int i; + table_exp_t *ret = new table_exp_t(); + + ret->query_type = te->query_type; + + ss_map::iterator ss_i; + for(ss_i=te->nmap.begin();ss_i!=te->nmap.end();++ss_i){ + ret->nmap[(*ss_i).first] = (*ss_i).second; + } + + for(i=0;iquery_params.size();++i){ + ret->query_params.push_back(new + var_pair_t(te->query_params[i]->name,te->query_params[i]->val) ); + } + + if(te->sl){ + ret->sl = new select_list_t(); + ret->sl->lineno = te->sl->lineno; ret->sl->charno = te->sl->charno; + vector select_list = te->sl->get_select_list(); + for(i=0;ise,NULL); + ret->sl->append(se,select_list[i]->name); + } + } + + ret->fm = te->fm->duplicate(); + + if(te->wh) ret->wh = dup_pr(te->wh,NULL); + if(te->hv) ret->hv = dup_pr(te->hv,NULL); + if(te->cleaning_when) ret->cleaning_when = dup_pr(te->cleaning_when,NULL); + if(te->cleaning_by) ret->cleaning_by = dup_pr(te->cleaning_by,NULL); + if(te->closing_when) ret->closing_when = dup_pr(te->closing_when,NULL); + + for(i=0;igb.size();++i){ + extended_gb_t *tmp_g = te->gb[i]->duplicate(); + ret->gb.push_back(tmp_g); + } + + ret->mergevars = te->mergevars; + if(te->slack) + ret->slack = dup_se(te->slack,NULL); + ret->lineno = te->lineno; + ret->charno = te->charno; + + return(ret); +} + + + + + + + +///////////////////////////////////////////////////////////////////////// +// Bind colrefs to a member of their FROM list + +void bind_colref_se(scalarexp_t *se, + vector &fm, + int prev_ref, int new_ref + ){ + int p; + vector operand_list; + colref_t *cr; + ifpref_t *ir; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + return; + case SE_IFACE_PARAM: + ir = se->get_ifpref(); + if(ir->get_tablevar_ref() == prev_ref){ + ir->set_tablevar_ref(new_ref); + ir->set_tablevar(fm[new_ref]->get_var_name()); + } + return; + + case SE_COLREF: + cr=se->get_colref(); + if(cr->get_tablevar_ref() == prev_ref){ + cr->set_tablevar_ref(new_ref); +// cr->set_interface(fm[new_ref]->get_interface()); + cr->set_table_name(fm[new_ref]->get_var_name()); + } + return; + + case SE_UNARY_OP: + bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref); + return; + + case SE_BINARY_OP: + bind_colref_se(se->get_left_se(), fm, prev_ref, new_ref); + bind_colref_se(se->get_right_se(), fm, prev_ref, new_ref); + return; + + case SE_AGGR_STAR: + case SE_AGGR_SE: + return; + + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return; + + operand_list = se->get_operands(); + for(p=0;pget_operator_type()); + exit(1); + break; + } + return; + +} + + + + +void bind_colref_pr(predicate_t *pr, + vector &fm, + int prev_ref, int new_ref + ){ + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref); + return; + + case PRED_COMPARE: + bind_colref_se(pr->get_left_se(), fm, prev_ref, new_ref); + bind_colref_se(pr->get_right_se(), fm, prev_ref, new_ref); + return; + + case PRED_UNARY_OP: + bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref); + return; + + case PRED_BINARY_OP: + bind_colref_pr(pr->get_left_pr(), fm, prev_ref, new_ref); + bind_colref_pr(pr->get_right_pr(), fm, prev_ref, new_ref); + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; + +} + + +///////////////////////////////////////////////////////////////////// +// verify that the se refs only literals and params. +// (use to verify that the expression should stay in the hfta +// during a split) +///////////////////////////////////////////////////////////////////// + +bool is_literal_or_param_only(scalarexp_t *se){ + int o; + vector operands; + bool sum = true; + + if(se == NULL) return(true); + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + return(true); + case SE_IFACE_PARAM: + return(false); // need to treat as colref + case SE_UNARY_OP: + return(is_literal_or_param_only(se->get_left_se()) ); + case SE_BINARY_OP: + return( + is_literal_or_param_only(se->get_left_se()) && + is_literal_or_param_only(se->get_right_se()) + ); + case SE_COLREF: + return(false); + case SE_AGGR_STAR: + case SE_AGGR_SE: + return(false); + case SE_FUNC: +// The fcn might have special meaning at the lfta ... + return(false); + + default: + fprintf(stderr,"INTERNAL ERROR in is_literal_or_param_only, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return(0); +} + + + +///////////////////////////////////////////////////////////////////// +// Search for gb refs. +// (use to verify that no gbrefs in a gb def.) +///////////////////////////////////////////////////////////////////// + + +int count_gb_se(scalarexp_t *se){ + int o; + vector operands; + int sum = 0; + + if(se == NULL) return(0); + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return(0); + case SE_UNARY_OP: + return(count_gb_se(se->get_left_se()) ); + case SE_BINARY_OP: + return( + count_gb_se(se->get_left_se()) + + count_gb_se(se->get_right_se()) + ); + case SE_COLREF: + if(se->get_gb_ref() < 0) return(0); + return(1); + case SE_AGGR_STAR: + case SE_AGGR_SE: + return(0); + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return(0); +} + + +///////////////////////////////////////////////////////////////////// +//////////////// Search for stateful fcns. +///////////////////////////////////////////////////////////////////// + + +int se_refs_sfun(scalarexp_t *se){ + int o; + vector operands; + int sum = 0; + + if(se == NULL) return(0); + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return(0); + case SE_UNARY_OP: + return(se_refs_sfun(se->get_left_se()) ); + case SE_BINARY_OP: + return( + se_refs_sfun(se->get_left_se()) + + se_refs_sfun(se->get_right_se()) + ); + case SE_COLREF: + return(0); + case SE_AGGR_STAR: + case SE_AGGR_SE: + return(0); + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_aggr_ref()>=0) sum++; // is it tagged as a UDAF? + +// for now, stateful functions count as aggregates. + if(se->get_storage_state() != "") + sum++; + + return(sum); + + default: + fprintf(stderr,"INTERNAL ERROR in se_refs_sfun, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return(0); +} + + +// Return a count of the number of stateful fcns in this predicate. +int pred_refs_sfun(predicate_t *pr){ + vector op_list; + int o, aggr_sum; + + switch(pr->get_operator_type()){ + case PRED_IN: + return(se_refs_sfun(pr->get_left_se()) ); + case PRED_COMPARE: + return( + se_refs_sfun(pr->get_left_se()) + + se_refs_sfun(pr->get_right_se()) + ); + case PRED_UNARY_OP: + return(pred_refs_sfun(pr->get_left_pr()) ); + case PRED_BINARY_OP: + return( + pred_refs_sfun(pr->get_left_pr()) + + pred_refs_sfun(pr->get_right_pr()) + ); + case PRED_FUNC: + op_list = pr->get_op_list(); + aggr_sum = 0; + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return(0); +} + +////////////////////////////////////////////////// + +///////////////////////////////////////////////////////////////////// +//////////////// Search for aggregates. +///////////////////////////////////////////////////////////////////// + + +int count_aggr_se(scalarexp_t *se, bool strict){ + int o; + vector operands; + int sum = 0; + + if(se == NULL) return(0); + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return(0); + case SE_UNARY_OP: + return(count_aggr_se(se->get_left_se(), strict) ); + case SE_BINARY_OP: + return( + count_aggr_se(se->get_left_se(), strict) + + count_aggr_se(se->get_right_se(), strict) + ); + case SE_COLREF: + return(0); + case SE_AGGR_STAR: + case SE_AGGR_SE: + return(1); + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_aggr_ref()>=0) sum++; // is it tagged as a UDAF? + +// now, stateful functions can count as aggregates. +// if we are being strict. + if(! strict && se->get_storage_state() != "") + sum++; + + return(sum); + + default: + fprintf(stderr,"INTERNAL ERROR in count_aggr_se, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return(0); +} + + +// Return a count of the number of aggregate fcns in this predicate. +int count_aggr_pred(predicate_t *pr, bool strict){ + vector op_list; + int o, aggr_sum; + + switch(pr->get_operator_type()){ + case PRED_IN: + return(count_aggr_se(pr->get_left_se(), strict) ); + case PRED_COMPARE: + return( + count_aggr_se(pr->get_left_se(), strict) + + count_aggr_se(pr->get_right_se(), strict) + ); + case PRED_UNARY_OP: + return(count_aggr_pred(pr->get_left_pr(), strict) ); + case PRED_BINARY_OP: + return( + count_aggr_pred(pr->get_left_pr(), strict) + + count_aggr_pred(pr->get_right_pr(), strict) + ); + case PRED_FUNC: + op_list = pr->get_op_list(); + aggr_sum = 0; + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return(0); +} + +////////////////////////////////////////////////// +/// Analyze tablevar refs + +void get_tablevar_ref_se(scalarexp_t *se, vector &reflist){ + int o; + vector operands; + int vref; + colref_t *cr; + ifpref_t *ir; + + if(se == NULL) return; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + return; + case SE_IFACE_PARAM: + ir = se->get_ifpref(); + vref = ir->get_tablevar_ref(); + for(o=0;oget_left_se(), reflist); + return; + case SE_BINARY_OP: + get_tablevar_ref_se(se->get_left_se(), reflist); + get_tablevar_ref_se(se->get_right_se(), reflist); + return; + case SE_COLREF: + if(se->is_gb()) return; + cr = se->get_colref(); + vref = cr->get_tablevar_ref(); + for(o=0;oget_aggr_ref() >= 0) return; + + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return; +} + + +void get_tablevar_ref_pr(predicate_t *pr, vector &reflist){ + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + get_tablevar_ref_se(pr->get_left_se(),reflist); + return; + case PRED_COMPARE: + get_tablevar_ref_se(pr->get_left_se(),reflist); + get_tablevar_ref_se(pr->get_right_se(),reflist); + return; + case PRED_UNARY_OP: + get_tablevar_ref_pr(pr->get_left_pr(),reflist); + return; + case PRED_BINARY_OP: + get_tablevar_ref_pr(pr->get_left_pr(),reflist); + get_tablevar_ref_pr(pr->get_right_pr(),reflist); + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + } + + return; +} + + +// Walk SE tree and gather STATES ref'd by STATEFUL fcns. + +void gather_fcn_states_se(scalarexp_t *se, set &states_refd, ext_fcn_list *Ext_fcns){ + int agg_id; + int o; + vector operands; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return; + case SE_UNARY_OP: + gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns) ; + return; + case SE_BINARY_OP: + gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns); + gather_fcn_states_se(se->get_right_se(), states_refd,Ext_fcns); + return; + case SE_COLREF: + return; + case SE_AGGR_STAR: + return; + case SE_AGGR_SE: + gather_fcn_states_se(se->get_left_se(), states_refd, Ext_fcns); + return; + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_storage_state() != ""){ + states_refd.insert(se->get_storage_state()); + } + return; + + default: + fprintf(stderr,"INTERNAL ERROR in gather_fcn_states_se, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return; +} + + +// Walk SE tree and gather STATES ref'd by STATEFUL fcns. + +void gather_fcn_states_pr(predicate_t *pr, set &states_refd, ext_fcn_list *Ext_fcns){ + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ; + return; + case PRED_COMPARE: + gather_fcn_states_se(pr->get_left_se(),states_refd, Ext_fcns) ; + gather_fcn_states_se(pr->get_right_se(),states_refd, Ext_fcns) ; + return; + case PRED_UNARY_OP: + gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns); + return; + case PRED_BINARY_OP: + gather_fcn_states_pr(pr->get_left_pr(),states_refd, Ext_fcns) ; + gather_fcn_states_pr(pr->get_right_pr(),states_refd, Ext_fcns) ; + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; +} + + + + +// walk se tree and collect aggregates into aggregate table. +// duplicate aggregates receive the same idx to the table. + +void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *aggregate_table, ext_fcn_list *Ext_fcns){ + int agg_id; + int o; + vector operands; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return; + case SE_UNARY_OP: + build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns) ; + return; + case SE_BINARY_OP: + build_aggr_tbl_fm_se(se->get_left_se(), aggregate_table, Ext_fcns); + build_aggr_tbl_fm_se(se->get_right_se(), aggregate_table,Ext_fcns); + return; + case SE_COLREF: + return; + case SE_AGGR_STAR: + agg_id = aggregate_table->add_aggr(se->get_op(),NULL,se->is_superaggr()); + se->set_aggr_id(agg_id); + return; + case SE_AGGR_SE: + agg_id = aggregate_table->add_aggr(se->get_op(),se->get_left_se(),se->is_superaggr()); + se->set_aggr_id(agg_id); + return; + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_aggr_ref() >= 0){ // it's been tagged as a UDAF + agg_id = aggregate_table->add_aggr(se->get_op(), se->get_fcn_id(), operands, Ext_fcns->get_storage_dt(se->get_fcn_id()), se->is_superaggr(), Ext_fcns->is_running_aggr(se->get_fcn_id()),Ext_fcns->has_lfta_bailout(se->get_fcn_id())); + se->set_aggr_id(agg_id); + } + return; + + default: + fprintf(stderr,"INTERNAL ERROR in build_aggr_tbl_fm_se, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return; +} + + +// walk se tree and collect aggregates into aggregate table. +// duplicate aggregates receive the same idx to the table. + +void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *aggregate_table,ext_fcn_list *Ext_fcns){ + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ; + return; + case PRED_COMPARE: + build_aggr_tbl_fm_se(pr->get_left_se(),aggregate_table, Ext_fcns) ; + build_aggr_tbl_fm_se(pr->get_right_se(),aggregate_table, Ext_fcns) ; + return; + case PRED_UNARY_OP: + build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns); + return; + case PRED_BINARY_OP: + build_aggr_tbl_fm_pred(pr->get_left_pr(),aggregate_table, Ext_fcns) ; + build_aggr_tbl_fm_pred(pr->get_right_pr(),aggregate_table, Ext_fcns) ; + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; +} + + +// Return true if the two scalar expressions +// represent the same value (e.g., use to eliminate +// duplicate aggregates). +bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2){ + vector operands1; + vector operands2; + int o; + +// First handle the case of nulls (e.g. COUNT aggrs) + if(se1 == NULL && se2 == NULL) return(true); + if(se1 == NULL || se2 == NULL) return(false); + +// In all cases, must be the same oeprator type and same operator. + if(se1->get_operator_type() != se2->get_operator_type()) + return(false); + if(se1->get_op() != se2->get_op() ) + return(false); + + switch(se1->get_operator_type()){ + case SE_LITERAL: + return(se1->get_literal()->is_equivalent(se2->get_literal()) ); + case SE_PARAM: + return(se1->get_param_name() == se2->get_param_name() ); + case SE_IFACE_PARAM: + return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) ); + case SE_UNARY_OP: + return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) ); + case SE_BINARY_OP: + if(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) ) + return(is_equivalent_se(se1->get_right_se(), se2->get_right_se()) ); + return(false); + case SE_COLREF: + if(se1->is_gb() && se2->is_gb()) + return( se1->get_gb_ref() == se2->get_gb_ref() ); + if(se1->is_gb() || se2->is_gb()) + return(false); + return(se1->get_colref()->is_equivalent(se2->get_colref()) ); + case SE_AGGR_STAR: + return(true); + case SE_AGGR_SE: + return(is_equivalent_se(se1->get_left_se(), se2->get_left_se()) ); + case SE_FUNC: + if(se1->get_op() != se2->get_op()) return(false); + + operands1 = se1->get_operands(); + operands2 = se2->get_operands(); + if(operands1.size() != operands2.size()) return(false); + + for(o=0;oget_lineno(), se1->get_charno(),se1->get_operator_type()); + exit(1); + } + return(false); +} + + +// Similar to is_equivalent_se, but with a looser definition +// of equivalence of colrefs. Here, say they are equivalent +// if their base table is the same. Use to find equivalent +// predicates on base tables. +bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema){ + vector operands1; + vector operands2; + int o; + + if(se1->get_operator_type() == SE_COLREF && se1->is_gb()){ + se1 = se1->get_right_se(); + } + if(se2->get_operator_type() == SE_COLREF && se2->is_gb()){ + se2 = se2->get_right_se(); + } + +// First handle the case of nulls (e.g. COUNT aggrs) + if(se1 == NULL && se2 == NULL) return(true); + if(se1 == NULL || se2 == NULL) return(false); + +// In all cases, must be the same oeprator type and same operator. + if(se1->get_operator_type() != se2->get_operator_type()) + return(false); + if(se1->get_op() != se2->get_op() ) + return(false); + + switch(se1->get_operator_type()){ + case SE_LITERAL: + return(se1->get_literal()->is_equivalent(se2->get_literal()) ); + case SE_PARAM: + return(se1->get_param_name() == se2->get_param_name() ); + case SE_IFACE_PARAM: + return(se1->get_ifpref()->is_equivalent(se2->get_ifpref()) ); + case SE_UNARY_OP: + return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) ); + case SE_BINARY_OP: + if(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) ) + return(is_equivalent_se_base(se1->get_right_se(), se2->get_right_se(), Schema) ); + return(false); + case SE_COLREF: +/* + if(se1->is_gb() && se2->is_gb()) + return( se1->get_gb_ref() == se2->get_gb_ref() ); + if(se1->is_gb() || se2->is_gb()) + return(false); +*/ + return(se1->get_colref()->is_equivalent_base(se2->get_colref(), Schema) ); + case SE_AGGR_STAR: + return(true); + case SE_AGGR_SE: + return(is_equivalent_se_base(se1->get_left_se(), se2->get_left_se(), Schema) ); + case SE_FUNC: + if(se1->get_op() != se2->get_op()) return(false); + + operands1 = se1->get_operands(); + operands2 = se2->get_operands(); + if(operands1.size() != operands2.size()) return(false); + + for(o=0;oget_lineno(), se1->get_charno(),se1->get_operator_type()); + exit(1); + } + return(false); +} + + +// Find predicates which are equivalent when +// looking at the base tables. Use to find +// common prefilter. +bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema){ +int i, o; + +// First handle the case of nulls + if(p1 == NULL && p2 == NULL) return(true); + if(p1 == NULL || p2 == NULL) return(false); + + + if(p1->get_operator_type() != p2->get_operator_type()) + return(false); + if(p1->get_op() != p2->get_op()) + return(false); + + vector ll1; + vector ll2; + vector op_list1, op_list2; + + + switch(p2->get_operator_type()){ + case PRED_COMPARE: + if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) ) + return(false); + return( is_equivalent_se_base(p1->get_right_se(),p2->get_right_se(), Schema) ); + break; + case PRED_IN: + if( ! is_equivalent_se_base(p1->get_left_se(),p2->get_left_se(), Schema) ) + return(false); + ll1 = p1->get_lit_vec(); + ll2 = p2->get_lit_vec(); + if(ll1.size() != ll2.size()) + return(false); + for(i=0;iis_equivalent( ll2[i] ) ) + return(false); + } + return(true); + break; + case PRED_UNARY_OP: + return(is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema) ); + break; + case PRED_BINARY_OP: + if(! is_equivalent_pred_base(p1->get_left_pr(), p2->get_left_pr(), Schema)) + return(false); + return(is_equivalent_pred_base(p1->get_right_pr(), p2->get_right_pr(), Schema) ); + break; + case PRED_FUNC: + op_list1 = p1->get_op_list(); + op_list2 = p2->get_op_list(); + if(op_list1.size() != op_list2.size()) return(false); + for(o=0;oget_operator_type()!=PRED_FUNC)||(p2->get_operator_type()!=PRED_FUNC)) + return(false); + if(p1->get_fcn_id() != p2->get_fcn_id()) + return false; + vector cl_op = Ext_fcns->get_class_indicators(p1->get_fcn_id()); + int o; + vector op_list1 = p1->get_op_list(); + vector op_list2 = p2->get_op_list(); + if(op_list1.size() != op_list2.size()) return(false); + for(o=0;o operands; + int o; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return(true ); + case SE_UNARY_OP: + return(verify_aggr_query_se(se->get_left_se() ) ); + case SE_BINARY_OP: + return(verify_aggr_query_se(se->get_left_se() ) && + verify_aggr_query_se(se->get_right_se() ) ); + case SE_COLREF: + if(se->is_gb() ) return(true); + fprintf(stderr,"ERROR: the select clause in an aggregate query can " + "only reference constants, group-by attributes, and " + "aggregates, (%s) line %d, character %d.\n", + se->get_colref()->to_string().c_str(), + se->get_lineno(), se->get_charno() ); + return(false); + case SE_AGGR_STAR: + case SE_AGGR_SE: +// colrefs and gbrefs allowed. +// check for nested aggregation elsewhere, so just return TRUE + return(true); + case SE_FUNC: +// If its a UDAF, just return true + if(se->get_aggr_ref() >= 0) return true; + + operands = se->get_operands(); + + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return(false); +} + + + + +// Find complex literals. +// NOTE : This analysis should be deferred to +// code generation time. +// This analysis drills into aggr se specs. +// Shouldn't this be done at the aggregate table? +// But, its not a major loss of efficiency. +// UPDATE : drilling into aggr se's is causnig a problem +// so I've eliminated it. + +bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns, + cplx_lit_table *complex_literals){ + literal_t *l; + vector operands; + int o; + scalarexp_t *param_se; + data_type *dt; + + switch(se->get_operator_type()){ + case SE_LITERAL: + l = se->get_literal(); + if(l->constructor_name() != ""){ + int cl_idx = complex_literals->add_cpx_lit(l, false); + l->set_cpx_lit_ref(cl_idx); + } + return(true); + case SE_PARAM: + return(true ); +// SE_IFACE_PARAM should not exist when this is called. + case SE_UNARY_OP: + return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) ); + case SE_BINARY_OP: + return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) && + find_complex_literal_se(se->get_right_se(), Ext_fcns, complex_literals ) ); + case SE_COLREF: + return(true); + case SE_AGGR_STAR: + return(true); + case SE_AGGR_SE: + return true; +// return(find_complex_literal_se(se->get_left_se(), Ext_fcns, complex_literals ) ); + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return true; + + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return(false); +} + + + + +void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns, + cplx_lit_table *complex_literals){ + int i,o; + vector litl; + vector op_list; + + + switch(pr->get_operator_type()){ + case PRED_IN: + find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ; + litl = pr->get_lit_vec(); + for(i=0;iconstructor_name() != ""){ + int cl_idx = complex_literals->add_cpx_lit(litl[i],false); + litl[i]->set_cpx_lit_ref(cl_idx); + } + } + return; + case PRED_COMPARE: + find_complex_literal_se(pr->get_left_se(), Ext_fcns, complex_literals) ; + find_complex_literal_se(pr->get_right_se(), Ext_fcns, complex_literals) ; + return; + case PRED_UNARY_OP: + find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals); + return; + case PRED_BINARY_OP: + find_complex_literal_pr(pr->get_left_pr(), Ext_fcns, complex_literals) ; + find_complex_literal_pr(pr->get_right_pr(), Ext_fcns, complex_literals) ; + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; +} + + +// Find all things which are passed as handle parameters to functions +// (query parameters, (simple) literals, complex literals) +// These expressions MUST be processed with find_complex_literal_?? +// first. +// TODO: this analysis drills into the aggregate SEs. +// Shouldn't this be done on the aggr table SEs instead? +// to avoid duplication. THe handle registration +// might be expensive ... +// REVISED : drilling into aggr se's is causing problems, eliminated. + +void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns, + vector &handle_tbl){ + vector operands; + vector handle_ind; + int o; + scalarexp_t *param_se; + data_type *dt; + literal_t *l; + + switch(se->get_operator_type()){ + case SE_LITERAL: + return; + case SE_PARAM: + return; +// case SE_IFACE_PARAM: SHOULD NOT EXIST when this is called + case SE_UNARY_OP: + find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ; + return; + case SE_BINARY_OP: + find_param_handles_se(se->get_left_se(), Ext_fcns , handle_tbl) ; + find_param_handles_se(se->get_right_se(), Ext_fcns, handle_tbl ) ; + return; + case SE_COLREF: + return; + case SE_AGGR_STAR: + return; + case SE_AGGR_SE: +// find_param_handles_se(se->get_left_se(), Ext_fcns, handle_tbl ) ; + return; + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return ; + + operands = se->get_operands(); + handle_ind = Ext_fcns->get_handle_indicators(se->get_fcn_id() ); + for(o=0;oget_operator_type() != SE_LITERAL && + param_se->get_operator_type() != SE_PARAM){ + fprintf(stderr,"ERROR, the %d-th parameter of function %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", + o+1, se->get_op().c_str(),se->get_lineno(), se->get_charno()); + exit(1); + } + + if(param_se->get_operator_type() == SE_PARAM){ + he = new handle_param_tbl_entry( + se->get_op(), o, param_se->get_param_name(), + param_se->get_data_type()->get_type_str()); + }else{ + l = param_se->get_literal(); + if(l->is_cpx_lit()){ + he = new handle_param_tbl_entry( + se->get_op(), o, l->get_cpx_lit_ref(), + param_se->get_data_type()->get_type_str()); + }else{ + he = new handle_param_tbl_entry( + se->get_op(), o, l, + param_se->get_data_type()->get_type_str()); + } + } + param_se->set_handle_ref(handle_tbl.size()); + handle_tbl.push_back(he); + }else{ + find_param_handles_se(operands[o], Ext_fcns, handle_tbl ) ; + } + } + return; + default: + fprintf(stderr,"INTERNAL ERROR in find_param_handles, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return; +} + + +void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns, + vector &handle_tbl){ + vector litl; + vector op_list; + scalarexp_t *param_se; + vector handle_ind; + int o; + literal_t *l; + + switch(pr->get_operator_type()){ + case PRED_IN: + find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ; + return; + case PRED_COMPARE: + find_param_handles_se(pr->get_left_se(), Ext_fcns, handle_tbl) ; + find_param_handles_se(pr->get_right_se(), Ext_fcns, handle_tbl) ; + return; + case PRED_UNARY_OP: + find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl); + return; + case PRED_BINARY_OP: + find_param_handles_pr(pr->get_left_pr(), Ext_fcns, handle_tbl) ; + find_param_handles_pr(pr->get_right_pr(), Ext_fcns, handle_tbl) ; + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + handle_ind = Ext_fcns->get_handle_indicators(pr->get_fcn_id() ); + for(o=0;oget_operator_type() != SE_LITERAL && + param_se->get_operator_type() != SE_PARAM){ + fprintf(stderr,"ERROR, the %d-th parameter of predicate %s must be a literal or query parameter (because it is a pass-by-HANDLE parameter).\n Line=%d, char=%d.\n", + o+1, pr->get_op().c_str(),pr->get_lineno(), pr->get_charno()); + exit(1); + } + + if(param_se->get_operator_type() == SE_PARAM){ + he = new handle_param_tbl_entry( + pr->get_op(), o, param_se->get_param_name(), + param_se->get_data_type()->get_type_str()); + }else{ + l = param_se->get_literal(); + if(l->is_cpx_lit()){ + he = new handle_param_tbl_entry( + pr->get_op(), o, l->get_cpx_lit_ref(), + param_se->get_data_type()->get_type_str()); + }else{ + he = new handle_param_tbl_entry( + pr->get_op(), o, l, + param_se->get_data_type()->get_type_str()); + } + } + param_se->set_handle_ref(handle_tbl.size()); + handle_tbl.push_back(he); + }else{ + find_param_handles_se(op_list[o], Ext_fcns, handle_tbl ) ; + } + } + return; + default: + fprintf(stderr,"INTERNAL ERROR in find_param_handles_pr, line %d, character %d, unknown predicate operator type %d\n", + pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; +} + + +// Verify the HAVING predicate : it +// can access gb vars, aggregates, and constants, +// but not colrefs. +// return 1 if OK, -1 if bad. +// Perhaps replace by a pair of fcns which counts non-gb colrefs? + +// Extended to deal with cleaning_by, cleaning_when : +// verify that any aggregate function +// has the multiple output property. + +int verify_having_se(scalarexp_t *se, const char *clause, ext_fcn_list *Ext_fcns){ + int l_ret, r_ret; + vector operands; + vector odt; + int o; + + switch(se->get_operator_type()){ + case SE_LITERAL: + return(1); + case SE_PARAM: + case SE_IFACE_PARAM: + return(1); + case SE_UNARY_OP: + return(verify_having_se(se->get_left_se(), clause, Ext_fcns) ); + case SE_BINARY_OP: + l_ret = verify_having_se(se->get_left_se(), clause, Ext_fcns); + r_ret = verify_having_se(se->get_right_se(), clause, Ext_fcns); + if( (l_ret < 0 ) || (r_ret < 0) ) return(-1); + return(1); + case SE_COLREF: + if(se->is_gb()) return 1; + fprintf(stderr,"ERROR, %s clause references a non-group by attribute line =%d, char = %d, colref=%s\n", clause, + se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_colref()->to_string().c_str() ); + return(-1); + case SE_AGGR_STAR: + case SE_AGGR_SE: +// colrefs and gbrefs allowed. +// check for nested aggregation elsewhere, so just return TRUE + if(!se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){ + fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause, + se->get_lineno(),se->get_charno(), se->get_op().c_str() ); + return(-1); + } + +// Ensure that aggregate refs allow multiple outputs +// in CLEANING_WHEN, CLEANING_BY + if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){ + if(! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){ + fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause, + se->get_op().c_str(),se->get_lineno(),se->get_charno() ); + return(-1); + } + } + + + return(1); + case SE_FUNC: + if(se->get_aggr_ref() >= 0 && !se->is_superaggr() && !strcmp(clause,"CLEANING_WHEN")){ + fprintf(stderr,"ERROR, %s clause references a superaggregate, line =%d, char = %d, op=%s\n", clause, + se->get_colref()->get_lineno(),se->get_colref()->get_charno(), se->get_op().c_str() ); + return(-1); + } + + if(!strcmp(clause,"CLEANING_WHEN") || !strcmp(clause,"CLEANING_BY")){ + if(se->get_aggr_ref() >= 0 && ! aggr_table_entry::multiple_return_allowed(true,Ext_fcns,se->get_fcn_id())){ + fprintf(stderr,"ERROR, the %s clause references aggregate %s, which does not allow multiple outputs, line=%d, char=%d\n",clause, + se->get_op().c_str(),se->get_lineno(),se->get_charno() ); + return(-1); + } + } + + if(se->get_aggr_ref() >= 0) // don't descent into aggregates. + return 1; + + operands = se->get_operands(); + r_ret = 1; + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + return(-1); + } + return(-1); +} + + +// Verify the HAVING predicate : it +// can access gb vars, aggregates, and constants, +// but not colrefs. +// return 1 if OK, -1 if bad. +// Perhaps replace by a pair of fcns which counts non-gb colrefs? + + +int verify_having_pred(predicate_t *pr, const char *clause, ext_fcn_list *Ext_fcns){ + int l_ret, r_ret; + vector litl; + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + return(verify_having_se(pr->get_left_se(), clause, Ext_fcns)); + case PRED_COMPARE: + l_ret = verify_having_se(pr->get_left_se(), clause, Ext_fcns) ; + r_ret = verify_having_se(pr->get_right_se(), clause, Ext_fcns) ; + if( (l_ret < 0) || (r_ret < 0) ) return(-1); else return(1); + case PRED_UNARY_OP: + return(verify_having_pred(pr->get_left_pr(), clause, Ext_fcns)); + case PRED_BINARY_OP: + l_ret = verify_having_pred(pr->get_left_pr(), clause, Ext_fcns); + r_ret = verify_having_pred(pr->get_right_pr(), clause, Ext_fcns); + if( (l_ret < 0) || (r_ret < 0) ) return(-1); + return(1); + case PRED_FUNC: + op_list = pr->get_op_list(); + l_ret = 1; + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + } + + return(-1); +} + + +////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////// +/////// cnf and pred analysis and manipulation + +// ---------------------------------------------------------------------- +//  Convert the predicates to a list of conjuncts +//  (not actually cnf). Do some analysis +//  on their properties. +// ---------------------------------------------------------------------- + + +//  Put into list clist the predicates that +//  are AND'ed together. + +void make_cnf_from_pr(predicate_t *pr, vector &clist){ + + if(pr == NULL) return; + + switch(pr->get_operator_type()){ + case PRED_COMPARE: + clist.push_back(new cnf_elem(pr)); + return; + break; + case PRED_IN: + clist.push_back(new cnf_elem(pr)); + return; + break; + case PRED_UNARY_OP: + clist.push_back(new cnf_elem(pr)); + return; + break; + case PRED_BINARY_OP: + if(pr->get_op() == "OR"){ + clist.push_back(new cnf_elem(pr)); + return; + } + if(pr->get_op() =="AND"){ + make_cnf_from_pr(pr->get_left_pr(),clist); + make_cnf_from_pr(pr->get_right_pr(),clist); + return; + } + case PRED_FUNC: + clist.push_back(new cnf_elem(pr)); + return; + break; + default: + fprintf(stderr,"INTERNAL ERROR in make_cnf_from_pr: I don't recognize predicate operator %s\n",pr->get_op().c_str()); + exit(1); + break; + } +} + + + +//  Find out what things are referenced in a se, +//  to use for analyzing a predicate. +//  Currently, is it simple (no operators), does it +//  reference a group-by column, does it reference an +//  attribute of a table. +// +// analyze_cnf_se and analyze_cnf_pr are called by analyze_cnf + + +void analyze_cnf_se(scalarexp_t *se, int &s, int &g, int &a, int &agr){ + int p; + vector operand_list; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return; + case SE_COLREF: + if(se->is_gb() ) g=1; + else a=1; + return; + case SE_UNARY_OP: + s=0; + analyze_cnf_se(se->get_left_se(),s,g,a,agr); + return; + case SE_BINARY_OP: + s=0; + analyze_cnf_se(se->get_left_se(),s,g,a,agr); + analyze_cnf_se(se->get_right_se(),s,g,a,agr); + return; + case SE_AGGR_STAR: + case SE_AGGR_SE: + agr = 1; + return; + case SE_FUNC: + if(se->get_aggr_ref() >= 0){ + agr = 1; + return; + } + s = 0; + operand_list = se->get_operands(); + for(p=0;p op_list; + + + switch(pr->get_operator_type()){ + case PRED_COMPARE: + analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr); + analyze_cnf_se(pr->get_right_se(),dum_simple,g,a,agr); + return; + case PRED_IN: + analyze_cnf_se(pr->get_left_se(),dum_simple,g,a,agr); + return; + case PRED_UNARY_OP: + analyze_cnf_pr(pr->get_left_pr(),g,a,agr); + return; + case PRED_BINARY_OP: + analyze_cnf_pr(pr->get_left_pr(),g,a,agr); + analyze_cnf_pr(pr->get_right_pr(),g,a,agr); + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } +} + + + +//  analyze a conjunct of a predicate. +//  Is it atomic (e.g., a single predicate), +//  and if so do a further analysis. + +void analyze_cnf(cnf_elem *c){ + +//  analyze the predicate. + analyze_cnf_pr(c->pr, c->pr_gb, c->pr_attr, c->pr_aggr); + + if((c->pr->get_operator_type()!= PRED_COMPARE) && (c->pr->get_operator_type()!= PRED_IN)){ + return; + } + + +//  its an atomic predicate -- get more info + c->is_atom = 1; + + if(c->pr->get_op() == "=") + c->eq_pred = 1; + else + c->eq_pred = 0; + + if(c->pr->get_operator_type() == PRED_IN) + c->in_pred = 1; + else + c->in_pred = 0; + + c->l_simple = 1; c->l_gb = c->l_attr = c->l_aggr = 0; + analyze_cnf_se(c->pr->get_left_se(),c->l_simple,c->l_gb,c->l_attr, c->l_aggr); + + if(c->pr->get_operator_type() == PRED_COMPARE){ + c->r_simple = 1; c->r_gb = c->r_attr = c->r_aggr = 0; + analyze_cnf_se(c->pr->get_left_se(),c->r_simple,c->r_gb,c->r_attr, c->r_aggr); + } +} + +void analyze_constraint_se(scalarexp_t *se, + int &n_agr, int &n_gb, int &n_par, int &n_func, int &n_op, ext_fcn_list *Ext_fcns, bool enter_gb){ + int l_agr, l_gb, l_par, l_func, l_op; + int r_agr, r_gb, r_par, r_func, r_op; + int p; + vector operand_list; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_IFACE_PARAM: + n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0; + return; + case SE_PARAM: + n_agr=0; n_gb = 0; n_par = 1; n_func = 0; n_op = 0; + return; + case SE_COLREF: + n_agr=0; n_gb = 0; n_par = 0; n_func = 0; n_op = 0; + if(se->is_gb() ){ + if(enter_gb){ + analyze_constraint_se(se->get_right_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb); + }else{ + n_gb=1; + } + } + return; + case SE_UNARY_OP: + analyze_constraint_se(se->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb); + n_op++; + return; + case SE_BINARY_OP: + analyze_constraint_se(se->get_left_se(),l_agr,l_gb,l_par,l_func,l_op,Ext_fcns,enter_gb); + analyze_constraint_se(se->get_right_se(),r_agr,r_gb,r_par, r_func,r_op,Ext_fcns,enter_gb); + n_agr=l_agr+r_agr; + n_gb=l_gb+r_gb; + n_par=l_par+r_par; + n_func=l_func+r_func; + n_op=l_op+r_op+1; + return; + case SE_AGGR_STAR: + case SE_AGGR_SE: + n_agr=1; n_gb = 0; n_par = 0; n_func = 0; n_op = 0; + return; + case SE_FUNC: + if(se->get_aggr_ref() >= 0){ + n_agr=1; n_gb = 0; n_par = 0; n_op = 0; + if(Ext_fcns) + n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id()); + else + n_func = 1; + return; + } + n_agr=0; n_gb = 0; n_par = 0; n_op = 0; + if(Ext_fcns) + n_func = Ext_fcns->estimate_fcn_cost(se->get_fcn_id()); + else + n_func = 1; + operand_list = se->get_operands(); + for(p=0;p op_list; + + + switch(pr->get_operator_type()){ + case PRED_COMPARE: + analyze_constraint_se(pr->get_left_se(),l_agr,l_gb,l_par,l_func, l_op,Ext_fcns,enter_gb); + analyze_constraint_se(pr->get_right_se(),r_agr,r_gb,r_par,r_func,r_op,Ext_fcns,enter_gb); + n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par; + n_func=l_func+r_func; n_op=l_op+r_op; + if(pr->get_left_se()->get_data_type()->complex_comparison( + pr->get_right_se()->get_data_type()) + ){ + n_cmp_s = 0; n_cmp_c=1; + }else{ + n_cmp_s = 1; n_cmp_c=0; + } + n_in = 0; n_pred = 0; n_bool = 0; + return; + case PRED_IN: +// Tread IN predicate as sequence of comparisons + analyze_constraint_se(pr->get_left_se(),n_agr,n_gb,n_par,n_func,n_op,Ext_fcns,enter_gb); + if(pr->get_left_se()->get_data_type()->complex_comparison( + pr->get_right_se()->get_data_type()) + ){ + n_cmp_s = 0; n_cmp_c=pr->get_lit_vec().size(); + }else{ + n_cmp_s = pr->get_lit_vec().size(); n_cmp_c=0; + } + n_in = 0; n_pred = 0; n_bool = 0; + return; + case PRED_UNARY_OP: + analyze_constraint_pr(pr->get_left_pr(),n_agr,n_gb,n_par,n_func,n_op,n_cmp_s,n_cmp_c,n_in,n_pred,n_bool,Ext_fcns,enter_gb); + n_bool++; + return; + case PRED_BINARY_OP: + analyze_constraint_pr(pr->get_left_pr(),l_agr,l_gb,l_par,l_func,l_op,l_cmp_s,l_cmp_c,l_in,l_pred,l_bool,Ext_fcns,enter_gb); + analyze_constraint_pr(pr->get_right_pr(),r_agr,r_gb,r_par,r_func,r_op,r_cmp_s,r_cmp_c,r_in,r_pred,r_bool,Ext_fcns,enter_gb); + n_agr=l_agr+r_agr; n_gb=l_gb+r_gb; n_par=l_par+r_par; + n_func=l_func+r_func; n_op=l_op+r_op; + n_cmp_s=l_cmp_s+r_cmp_s; n_cmp_c=l_cmp_c+r_cmp_c; + n_in=l_in+r_in; n_pred=l_pred+r_pred; n_bool=l_bool+r_bool+1; + return; + case PRED_FUNC: + n_agr=n_gb=n_par=n_func=n_op=n_cmp_s=n_cmp_c=n_in=n_bool=0; + if(Ext_fcns) + n_pred = Ext_fcns->estimate_fcn_cost(pr->get_fcn_id()); + else + n_pred = 1; + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } +} + +void compute_cnf_cost(cnf_elem *c, ext_fcn_list *Ext_fcns){ + int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool; + analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op, + n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,false); + +//printf("nfunc=%d n_pred=%d, n_cmp_c=%d, n_op=%d, n_cmp_s=%d,n_bool=%d\n", n_func, n_pred, n_cmp_c, n_op, n_cmp_s, n_bool); + c->cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool; +} + +bool prefilter_compatible(cnf_elem *c, ext_fcn_list *Ext_fcns){ + int n_agr, n_gb, n_par, n_func, n_op, n_cmp_s, n_cmp_c, n_in, n_pred,n_bool; + analyze_constraint_pr(c->pr,n_agr, n_gb, n_par, n_func, n_op, + n_cmp_s, n_cmp_c, n_in, n_pred,n_bool, Ext_fcns,true); +//printf("prefilter_compatible, n_par=%d, n_gb=%d, n_agr=%d, n_func=%d, n_pred=%d, n_comp_c=%d, n_cmp_s=%d, n_bool=%d\n",n_gb,n_par,n_agr,n_func,n_pred,n_cmp_c,n_cmp_s,n_bool); + if(n_par || n_agr) + return false; + int cost = (n_func+n_pred)+10*n_cmp_c+n_op+n_cmp_s+n_bool; +//printf("cost=%d\n",cost); + return cost<10; +} + +// The prefilter needs to translate constraints on +// gbvars into constraints involving their underlying SEs. +// The following two routines attach GB def info. + +void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl){ + int p; + vector operand_list; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_IFACE_PARAM: + case SE_PARAM: + case SE_AGGR_STAR: + return; + case SE_COLREF: + if(se->is_gb() ){ + se->rhs.scalarp = gtbl->get_def(se->get_gb_ref()); + } + return; + case SE_UNARY_OP: + insert_gb_def_se(se->get_left_se(),gtbl); + return; + case SE_BINARY_OP: + insert_gb_def_se(se->get_left_se(),gtbl); + insert_gb_def_se(se->get_right_se(),gtbl); + return; + case SE_AGGR_SE: + insert_gb_def_se(se->get_left_se(),gtbl); + return; + case SE_FUNC: + operand_list = se->get_operands(); + for(p=0;p op_list; +int o; + + switch(pr->get_operator_type()){ + case PRED_COMPARE: + insert_gb_def_se(pr->get_left_se(),gtbl); + insert_gb_def_se(pr->get_right_se(),gtbl); + return; + case PRED_IN: + insert_gb_def_se(pr->get_left_se(),gtbl); + return; + case PRED_UNARY_OP: + insert_gb_def_pr(pr->get_left_pr(),gtbl); + return; + case PRED_BINARY_OP: + insert_gb_def_pr(pr->get_left_pr(),gtbl); + insert_gb_def_pr(pr->get_right_pr(),gtbl); + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } +} + +// Substitute gbrefs with their definitions +void subs_gbrefs_se(scalarexp_t *se, table_list *Schema){ + int p; + vector operand_list; + scalarexp_t *lse,*rse; + colref_t *cr; + string b_tbl; + int b_idx; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_IFACE_PARAM: + case SE_PARAM: + case SE_AGGR_STAR: + return; + case SE_COLREF: + cr = se->get_colref(); + b_tbl = Schema->get_basetbl_name(cr->schema_ref,cr->field); + b_idx = Schema->get_table_ref(b_tbl); + cr->tablevar_ref = b_idx; + return; + case SE_UNARY_OP: + lse=se->get_left_se(); + if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ + se->lhs.scalarp = lse->get_right_se(); + subs_gbrefs_se(se,Schema); + return; + } + subs_gbrefs_se(se->get_left_se(),Schema); + return; + case SE_BINARY_OP: + lse=se->get_left_se(); + if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ + se->lhs.scalarp = lse->get_right_se(); + subs_gbrefs_se(se,Schema); + return; + } + rse=se->get_right_se(); + if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){ + se->rhs.scalarp = rse->get_right_se(); + subs_gbrefs_se(se,Schema); + return; + } + subs_gbrefs_se(se->get_left_se(),Schema); + subs_gbrefs_se(se->get_right_se(),Schema); + return; + case SE_AGGR_SE: + lse=se->get_left_se(); + if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ + se->lhs.scalarp = lse->get_right_se(); + subs_gbrefs_se(se,Schema); + return; + } + subs_gbrefs_se(se->get_left_se(),Schema); + return; + case SE_FUNC: + operand_list = se->get_operands(); + for(p=0;pget_operator_type()==SE_COLREF && lse->is_gb()){ + se->param_list[p] = lse->get_right_se(); + subs_gbrefs_se(se,Schema); + return; + } + } + for(p=0;p op_list; +int o; +scalarexp_t *lse,*rse; + + switch(pr->get_operator_type()){ + case PRED_COMPARE: + lse=pr->get_left_se(); + if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ + pr->lhs.sexp = lse->get_right_se(); + subs_gbrefs_pr(pr,Schema); + return; + } + rse=pr->get_right_se(); + if(rse->get_operator_type()==SE_COLREF && rse->is_gb()){ + pr->rhs.sexp = rse->get_right_se(); + subs_gbrefs_pr(pr,Schema); + return; + } + subs_gbrefs_se(pr->get_left_se(),Schema); + subs_gbrefs_se(pr->get_right_se(),Schema); + return; + case PRED_IN: + lse=pr->get_left_se(); + if(lse->get_operator_type()==SE_COLREF && lse->is_gb()){ + pr->lhs.sexp = lse->get_right_se(); + subs_gbrefs_pr(pr,Schema); + return; + } + subs_gbrefs_se(pr->get_left_se(),Schema); + return; + case PRED_UNARY_OP: + subs_gbrefs_pr(pr->get_left_pr(),Schema); + return; + case PRED_BINARY_OP: + subs_gbrefs_pr(pr->get_left_pr(),Schema); + subs_gbrefs_pr(pr->get_right_pr(),Schema); + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_operator_type()==SE_COLREF && lse->is_gb()){ + pr->param_list[o] = lse->get_right_se(); + subs_gbrefs_pr(pr,Schema); + return; + } + subs_gbrefs_se(op_list[o],Schema); + } + return; + default: + fprintf(stderr,"INTERNAL ERROR in subs_gbrefs_pr, line %d, character %d, unknown predicate operator type %d\n", + pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } +} + + +// Search for references to "expensive" fields. +int expensive_refs_se(scalarexp_t *se, table_list *Schema){ + int p; + vector operand_list; + int cnt=0; +table_def *td; +param_list *plist; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_IFACE_PARAM: + case SE_PARAM: + case SE_AGGR_STAR: + case SE_AGGR_SE: + return 0; + case SE_COLREF: + if(se->is_gb()) + return expensive_refs_se(se->rhs.scalarp,Schema); + td = Schema->get_table(se->lhs.colref->schema_ref); + plist = td->get_modifier_list(se->lhs.colref->field); + if(plist->contains_key("expensive")) + return 1; + return 0; + case SE_UNARY_OP: + return expensive_refs_se(se->get_left_se(),Schema); + case SE_BINARY_OP: + cnt += expensive_refs_se(se->get_left_se(),Schema); + cnt += expensive_refs_se(se->get_right_se(),Schema); + return cnt; + case SE_FUNC: + operand_list = se->get_operands(); + for(p=0;p op_list; +int o; +int cnt=0; + + switch(pr->get_operator_type()){ + case PRED_COMPARE: + cnt += expensive_refs_se(pr->get_left_se(),Schema); + cnt += expensive_refs_se(pr->get_right_se(),Schema); + return cnt; + case PRED_IN: + return expensive_refs_se(pr->get_left_se(),Schema); + case PRED_UNARY_OP: + return expensive_refs_pr(pr->get_left_pr(),Schema); + case PRED_BINARY_OP: + cnt += expensive_refs_pr(pr->get_left_pr(),Schema); + cnt += expensive_refs_pr(pr->get_right_pr(),Schema); + return cnt; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } +} + + +// TODO: allow "cheap" functions and predicates. +bool simple_field_constraint(cnf_elem *c){ + vector ll; + int l; + predicate_t *p = c->pr; + int l_agr, l_gb, l_par, l_func, l_op; + int r_agr, r_gb, r_par, r_func, r_op; + col_id_set left_colids, right_colids; + +// Verify that it is a simple atom + switch(p->get_operator_type()){ + case PRED_COMPARE: +// Must be an equality predicate which references +// which referecnes no aggregates, parameters, functions, or +// group-by variables, and should be a constraint of +// a single colref. +// AND should not require a complex comparison. + if(p->get_op() != "=") return(false); + analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false); + analyze_constraint_se(p->get_right_se(),r_agr, r_gb, r_par, r_func,l_op,NULL,false); + if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 || + r_agr>0 || r_gb>0 || r_par>0 || r_func>0 ) return(false); +// I will count on there being no gbvars in the constraint. +// TODO: allow gbvars which are colrefs. + gather_se_col_ids(p->get_left_se(), left_colids, NULL); + gather_se_col_ids(p->get_right_se(), right_colids, NULL); + if(left_colids.size()+right_colids.size() != 1) return(false); + + +// Normalize : the colref should be on the lhs. + if(right_colids.size() > 0){ + p->swap_scalar_operands(); + } + +// Disallow complex (and therefore expensive) comparisons. + if(p->get_left_se()->get_data_type()->complex_comparison( + p->get_right_se()->get_data_type() ) ) + return(false); + +// passed all the tests. + return(true); + case PRED_IN: +// LHS must be a non-gbvar colref. + analyze_constraint_se(p->get_left_se(),l_agr, l_gb, l_par, l_func,l_op,NULL,false); + if(l_agr>0 || l_gb>0 || l_par>0 || l_func>0 ) return(false); +// I will count on there being no gbvars in the constraint. +// TODO: allow gbvars which are colrefs. + gather_se_col_ids(p->get_left_se(), left_colids, NULL); + if(left_colids.size() != 1) return(false); +// Disallow complex (and therefore expensive) comparisons. + if(p->get_left_se()->get_data_type()->complex_comparison( + p->get_left_se()->get_data_type() ) ) + return(false); + + +// All entries in the IN list must be literals +// Currently, this is the only possibility. + return(true); + break; + case PRED_UNARY_OP: + return(false); + case PRED_BINARY_OP: + return(false); + case PRED_FUNC: + return(false); + default: + fprintf(stderr,"INTERNAL ERROR in simple_field_cosntraint, line %d, character %d, unknown predicate operator type %d\n", + p->get_lineno(), p->get_charno(), p->get_operator_type() ); + exit(1); + } + + return(false); +} + +// As the name implies, return the colref constrained by the +// cnf elem. I will be counting on the LHS being a SE pointing +// to a colref. + +// This fcn assumes that in fact exactly +// one colref is constrained. +colref_t *get_constrained_colref(scalarexp_t *se){ + int p; + vector operand_list; +colref_t *ret; + + switch(se->get_operator_type()){ + case SE_LITERAL: + return(NULL); + case SE_PARAM: + case SE_IFACE_PARAM: + return(NULL); + case SE_COLREF: + return(se->get_colref()); + case SE_UNARY_OP: + return(get_constrained_colref(se->get_left_se())); + case SE_BINARY_OP: + ret=get_constrained_colref(se->get_left_se()); + if(ret == NULL) return(get_constrained_colref(se->get_right_se())); + else return ret; + case SE_AGGR_STAR: + case SE_AGGR_SE: + return(NULL); + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return NULL; + + operand_list = se->get_operands(); + for(p=0;pget_left_se())); +} +colref_t *get_constrained_colref(cnf_elem *c){ + return get_constrained_colref(c->pr->get_left_se()); +} + + + + +/* +void add_colref_constraint_to_cnf(cnf_elem *dst, predicate_t *src_p, + string target_fld, string target_tbl, int tblref){ + +// Make a copy of the predicate to be added. +// ASSUME no aggregates. + predicate_t *pr = dup_pr(src_p,NULL); + +// Modify the ref to the base table. +// ASSUME lhs is the colref + pr->get_left_se()->get_colref()->set_table_name(target_tbl); + pr->get_left_se()->get_colref()->set_table_ref(tblref); + + if(dst->pr == NULL) dst->pr = pr; + else dst->pr = new predicate_t("OR", dst->pr, pr); + +} +*/ + + +////////////////////////////////////////////////////// +/////////////// Represent a node in a predicate tree +struct common_pred_node{ + set lftas; + predicate_t *pr; + vector predecessor_preds; + vector children; + + string target_tbl; + string target_fld; + int target_ref; + + common_pred_node(){ + pr = NULL; + } +}; + + +predicate_t *make_common_pred(common_pred_node *pn){ + int n; + + if(pn->children.size() == 0){ + if(pn->pr == NULL){ + fprintf(stderr,"INTERNAL ERROR in make_common_pred, pred node ahs no children and no predicate.\n"); + exit(1); + } + return( dup_pr(pn->pr,NULL) ); + } + + predicate_t *curr_pr = make_common_pred( pn->children[0] ); + for(n=1;nchildren.size();++n){ + curr_pr = new predicate_t("OR", make_common_pred(pn->children[n]),curr_pr); + } + + if(pn->pr != NULL) + curr_pr = new predicate_t("AND", dup_pr(pn->pr,NULL), curr_pr); + + return(curr_pr); +} + + +bool operator<(const cnf_set &c1, const cnf_set &c2){ + if(c1.lfta_id.size() < c2.lfta_id.size()) + return true; + return false; +} + + +// Compute the predicates for the prefilter. +// the prefilter preds are returned in prefilter_preds. +// pred_ids is the set of predicates used in the prefilter. +// the encoding is the lfta index, in the top 16 bits, +// then the index of the cnf element in the bottom 16 bits. +// This set of for identifying which preds do not need +// to be generated in the lftas. +void find_common_filter(vector< vector > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, vector &prefilter_preds, set &pred_ids){ + int p, p2, l, c; + + vector pred_list, sort_list; + +// Create list of tagged, prefilter-safe CNFs. + for(l=0;lpr,Schema)==0) + pred_list.push_back(new cnf_set(where_list[l][c]->pr,l,c)); + } + } + } + +// Eliminate duplicates + for(p=0;ppr, pred_list[p2]->pr,Schema)){ + pred_list[p]->subsume(pred_list[p2]); + delete pred_list[p2]; + pred_list[p2] = NULL; + } + } + } + } + } + +// combine preds that occur in the exact same lftas. + for(p=0;plfta_id == pred_list[p2]->lfta_id){ + pred_list[p]->combine_pred(pred_list[p2]); + delete pred_list[p2]; + pred_list[p2] = NULL; + } + } + } + } + } + +// Compress the list + for(p=0;padd_pred_ids(pred_ids); + } + +// Substitute gb refs with their defs +// While I'm at it, substitute base table sch ref for tblref. + for(p=0;ppr,Schema); + } + +} + + + + + +/////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////// + +// Find partial functions and register them. +// Do a DFS so that nested partial fcn calls +// get evaluated in the right order. +// Don't drill down into aggregates -- their arguments are evaluated +// earlier than the select list is. +// +// Modification for function caching: +// Pass in a ref counter, and partial fcn indicator. +// Cache fcns ref'd at least once. +// pass in NULL for fcn_ref_cnt to turn off fcn caching analysis + + +void find_partial_fcns(scalarexp_t *se, vector *pf_list, + vector *fcn_ref_cnt, vector *is_partial_fcn, + ext_fcn_list *Ext_fcns){ + vector operands; + int o, f; + + if(se == NULL) return; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return; + case SE_UNARY_OP: + find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; + return; + case SE_BINARY_OP: + find_partial_fcns(se->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns); + find_partial_fcns(se->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns); + return; + case SE_COLREF: + return; + case SE_AGGR_STAR: + return; + case SE_AGGR_SE: +// find_partial_fcns(se->get_left_se(), pf_list, Ext_fcns) ; + return; + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return; + + operands = se->get_operands(); + for(o=0;ois_partial(se->get_fcn_id()) || Ext_fcns->get_fcn_cost(se->get_fcn_id()) >= COST_HIGH){ + if(fcn_ref_cnt){ + for(f=0;fsize();++f){ + if(is_equivalent_se(se,(*pf_list)[f])){ + se->set_partial_ref(f); + (*fcn_ref_cnt)[f]++; + break; + } + } + }else{ + f=pf_list->size(); + } + if(f==pf_list->size() && (Ext_fcns->is_partial(se->get_fcn_id()) || fcn_ref_cnt)){ + se->set_partial_ref(pf_list->size()); + pf_list->push_back(se); + if(fcn_ref_cnt){ + fcn_ref_cnt->push_back(1); + is_partial_fcn->push_back(Ext_fcns->is_partial(se->get_fcn_id())); + } + } + } + return; + default: + fprintf(stderr,"INTERNAL ERROR in find_partial_fcns, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return; +} + + +void find_partial_fcns_pr(predicate_t *pr, vector *pf_list, + vector *fcn_ref_cnt, vector *is_partial_fcn, + ext_fcn_list *Ext_fcns){ + vector litl; + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; + return; + case PRED_COMPARE: + find_partial_fcns(pr->get_left_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; + find_partial_fcns(pr->get_right_se(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; + return; + case PRED_UNARY_OP: + find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns); + return; + case PRED_BINARY_OP: + find_partial_fcns_pr(pr->get_left_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; + find_partial_fcns_pr(pr->get_right_pr(), pf_list, fcn_ref_cnt, is_partial_fcn, Ext_fcns) ; + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; +} + + + +void find_combinable_preds(predicate_t *pr, vector *pr_list, + table_list *Schema, ext_fcn_list *Ext_fcns){ + vector litl; + vector op_list; + int f,o; + + switch(pr->get_operator_type()){ + case PRED_IN: + return; + case PRED_COMPARE: + return; + case PRED_UNARY_OP: + find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns); + return; + case PRED_BINARY_OP: + find_combinable_preds(pr->get_left_pr(), pr_list, Schema, Ext_fcns) ; + find_combinable_preds(pr->get_right_pr(), pr_list, Schema, Ext_fcns) ; + return; + case PRED_FUNC: + if(Ext_fcns->is_combinable(pr->get_fcn_id())){ + for(f=0;fsize();++f){ + if(is_equivalent_pred_base(pr,(*pr_list)[f],Schema)){ + pr->set_combinable_ref(f); + break; + } + } + if(f == pr_list->size()){ + pr->set_combinable_ref(pr_list->size()); + pr_list->push_back(pr); + } + } + return; + default: + fprintf(stderr,"INTERNAL ERROR in find_partial_pr, line %d, character %d, unknown predicate operator type %d\n", + pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; +} + + +//-------------------------------------------------------------------- +// Collect refs to aggregates. + + +void collect_agg_refs(scalarexp_t *se, set &agg_refs){ + vector operands; + int o; + + if(se == NULL) return; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return; + case SE_UNARY_OP: + collect_agg_refs(se->get_left_se(), agg_refs) ; + return; + case SE_BINARY_OP: + collect_agg_refs(se->get_left_se(), agg_refs); + collect_agg_refs(se->get_right_se(), agg_refs); + return; + case SE_COLREF: + return; + case SE_AGGR_STAR: + case SE_AGGR_SE: + agg_refs.insert(se->get_aggr_ref()); + return; + case SE_FUNC: + if(se->get_aggr_ref() >= 0) agg_refs.insert(se->get_aggr_ref()); + + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return; +} + + +void collect_aggr_refs_pr(predicate_t *pr, set &agg_refs){ + vector litl; + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + collect_agg_refs(pr->get_left_se(), agg_refs) ; + return; + case PRED_COMPARE: + collect_agg_refs(pr->get_left_se(), agg_refs) ; + collect_agg_refs(pr->get_right_se(), agg_refs) ; + return; + case PRED_UNARY_OP: + collect_aggr_refs_pr(pr->get_left_pr(), agg_refs); + return; + case PRED_BINARY_OP: + collect_aggr_refs_pr(pr->get_left_pr(), agg_refs) ; + collect_aggr_refs_pr(pr->get_right_pr(), agg_refs) ; + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; +} + + +//-------------------------------------------------------------------- +// Collect previously registered partial fcn refs. +// Do a DFS so that nested partial fcn calls +// get evaluated in the right order. +// Don't drill down into aggregates -- their arguments are evaluated +// earlier than the select list is. +// ------------->>> THEN WHY AM I DRILLING DOWN INTO AGGREGATES? + +void collect_partial_fcns(scalarexp_t *se, set &pfcn_refs){ + vector operands; + int o; + + if(se == NULL) return; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return; + case SE_UNARY_OP: + collect_partial_fcns(se->get_left_se(), pfcn_refs) ; + return; + case SE_BINARY_OP: + collect_partial_fcns(se->get_left_se(), pfcn_refs); + collect_partial_fcns(se->get_right_se(), pfcn_refs); + return; + case SE_COLREF: + return; + case SE_AGGR_STAR: + return; + case SE_AGGR_SE: +// collect_partial_fcns(se->get_left_se(), pfcn_refs) ; + return; + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return; + + operands = se->get_operands(); + for(o=0;ois_partial()){ + pfcn_refs.insert(se->get_partial_ref()); + } + + return; + default: + fprintf(stderr,"INTERNAL ERROR in collect_partial_fcns, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return; +} + + +void collect_partial_fcns_pr(predicate_t *pr, set &pfcn_refs){ + vector litl; + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + collect_partial_fcns(pr->get_left_se(), pfcn_refs) ; + return; + case PRED_COMPARE: + collect_partial_fcns(pr->get_left_se(), pfcn_refs) ; + collect_partial_fcns(pr->get_right_se(), pfcn_refs) ; + return; + case PRED_UNARY_OP: + collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs); + return; + case PRED_BINARY_OP: + collect_partial_fcns_pr(pr->get_left_pr(), pfcn_refs) ; + collect_partial_fcns_pr(pr->get_right_pr(), pfcn_refs) ; + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return; +} + + + + +/////////////////////////////////////////////////////////////// +//////////// Exported Functions /////////////////////////// +/////////////////////////////////////////////////////////////// + + +// Count and collect refs to interface parameters. + +int count_se_ifp_refs(scalarexp_t *se, set &ifpnames){ + vector operands; + int o; + int ret = 0; + + if(se == NULL) return 0; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + return 0; + case SE_IFACE_PARAM: + ifpnames.insert(se->get_ifpref()->to_string()); + return 1; + case SE_UNARY_OP: + return count_se_ifp_refs(se->get_left_se(), ifpnames) ; + case SE_BINARY_OP: + ret = count_se_ifp_refs(se->get_left_se(), ifpnames); + ret += count_se_ifp_refs(se->get_right_se(), ifpnames); + return ret; + case SE_COLREF: + return 0; + case SE_AGGR_STAR: + return 0; + case SE_AGGR_SE: +// collect_partial_fcns(se->get_left_se(), pfcn_refs) ; + return 0; + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return 0; + + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return 0; +} + + +int count_pr_ifp_refs(predicate_t *pr, set &ifpnames){ + vector litl; + vector op_list; + int o; + int ret = 0; + if(pr == NULL) return 0; + + switch(pr->get_operator_type()){ + case PRED_IN: + return count_se_ifp_refs(pr->get_left_se(), ifpnames) ; + case PRED_COMPARE: + ret = count_se_ifp_refs(pr->get_left_se(), ifpnames) ; + ret += count_se_ifp_refs(pr->get_right_se(), ifpnames) ; + return ret; + case PRED_UNARY_OP: + return count_pr_ifp_refs(pr->get_left_pr(), ifpnames); + case PRED_BINARY_OP: + ret = count_pr_ifp_refs(pr->get_left_pr(), ifpnames) ; + ret += count_pr_ifp_refs(pr->get_right_pr(), ifpnames) ; + return ret; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return 0; +} + +// Resolve ifp refs, convert them to string literals. + +int resolve_se_ifp_refs(scalarexp_t *se, string ifm, string ifn, ifq_t *ifdb, string &err){ + vector operands; + vector ifvals; + int o; + int ierr; + string serr; + int ret = 0; + literal_t *tmp_l; + ifpref_t *ir; + + if(se == NULL) return 0; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + return 0; + case SE_IFACE_PARAM: + ir = se->get_ifpref(); + ifvals = ifdb->get_iface_vals(ifm, ifn, ir->get_pname(), ierr, serr); + if(ierr){ + err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", "+serr+"\n"; + return 1; + } + if(ifvals.size() == 0){ + err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", no parameter values.\n"; + return 1; + } + if(ifvals.size() > 1){ + err += "ERROR looking for parameter "+ir->get_pname()+" in interface "+ifm+"."+ifn+", multiple parameter values ("+int_to_string(ifvals.size())+").\n"; + return 1; + } + tmp_l = new literal_t( ifvals[0]); + se->convert_to_literal(tmp_l); + return 0; + case SE_UNARY_OP: + return resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err) ; + case SE_BINARY_OP: + ret = resolve_se_ifp_refs( se->get_left_se(), ifm, ifn,ifdb,err); + ret += resolve_se_ifp_refs( se->get_right_se(), ifm, ifn,ifdb,err); + return ret; + case SE_COLREF: + return 0; + case SE_AGGR_STAR: + return 0; + case SE_AGGR_SE: +// collect_partial_fcns(se->get_left_se(), pfcn_refs) ; + return 0; + case SE_FUNC: + if(se->get_aggr_ref() >= 0) return 0; + + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return 0; +} + + +int resolve_pr_ifp_refs(predicate_t *pr, string ifm, string ifn, ifq_t *ifdb, string &err){ + vector litl; + vector op_list; + int o; + int ret = 0; + + switch(pr->get_operator_type()){ + case PRED_IN: + return resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ; + case PRED_COMPARE: + ret = resolve_se_ifp_refs(pr->get_left_se(), ifm, ifn, ifdb, err) ; + ret += resolve_se_ifp_refs(pr->get_right_se(), ifm, ifn, ifdb, err) ; + return ret; + case PRED_UNARY_OP: + return resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err); + case PRED_BINARY_OP: + ret = resolve_pr_ifp_refs(pr->get_left_pr(), ifm, ifn, ifdb, err) ; + ret += resolve_pr_ifp_refs(pr->get_right_pr(), ifm, ifn, ifdb, err) ; + return ret; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + exit(1); + } + + return 0; +} + + +string impute_query_name(table_exp_t *fta_tree, string default_nm){ + string retval = fta_tree->get_val_of_name("query_name"); + if(retval == "") retval = default_nm; + if(retval == "") retval = "default_query"; + return(retval); +} + +// Convert the parse tree into an intermediate form, +// which admits analysis better. +// +// TODO : rationalize the error return policy. +// +// TODO : the query_summary_class object contains +// the parse tree. +// TODO: revisit the issue when nested subqueries are implemented. +// One possibility: implement accessor methods to hide the +// complexity +// For now: this class contains data structures not in table_exp_t +// (with a bit of duplication) + +// Return NULL on error. +// print error messages to stderr. + + +query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema, + ext_fcn_list *Ext_fcns, string default_name){ + int i,j, k, retval; + +// Create the summary struct -- no analysis is done here. + query_summary_class *qs = new query_summary_class(fta_tree); + qs->query_type = fta_tree->query_type; + +////////////// Do common analysis + +// Extract query name. Already imputed for the qnodes. +// qs->query_name = impute_query_name(fta_tree, default_name); + qs->query_name = default_name; +//printf("query name is %s\n",qs->query_name.c_str()); + +// extract definitions. Don't grab the query name. + + map nmap = fta_tree->get_name_map(); + map::iterator nmi; + for(nmi=nmap.begin(); nmi!=nmap.end(); ++nmi){ + string pname = (*nmi).first; + if(pname != "query_name" ) + (qs->definitions)[pname] = (*nmi).second; + } + +/// +/// FROM analysis + +// First, verify that all the referenced tables are defined. +// Then, bind the tablerefs in the FROM list to schemas in +// the schema list. + tablevar_list_t *tlist = fta_tree->get_from(); + vector tbl_vec = tlist->get_table_list(); + + bool found_error = false; + for(i=0;ifind_tbl(tbl_vec[i]->get_schema_name()); + if(sch_no < 0) { + fprintf(stderr,"Error, table <%s> not found in the schema file\n", + tbl_vec[i]->get_schema_name().c_str() ); + fprintf(stderr,"\tline=%d, char=%d\n",tbl_vec[i]->get_lineno(), + tbl_vec[i]->get_charno() ); + return(NULL); + } + + tbl_vec[i]->set_schema_ref(sch_no); + +// If accessing a UDOP, mangle the name +// This needs to be done in translate_fta.cc, not here. +/* + if(schema->get_schema_type(sch_no) == OPERATOR_VIEW_SCHEMA){ + string mngl_name = tbl_vec[i]->get_schema_name() + silo_nm; + tbl_vec[i]->set_schema_name(mngl_name); + } +*/ + +// No FTA schema should have an interface defined on it. + if(tbl_vec[i]->get_interface()!="" && schema->get_schema_type(sch_no) != PROTOCOL_SCHEMA){ + fprintf(stderr,"WARNING: interface %s specified for schema %s, but this schema is a STREAM and does not have an interface.\n",tbl_vec[i]->get_interface().c_str(), tbl_vec[i]->get_schema_name().c_str()); + } +// Fill in default interface + if(tbl_vec[i]->get_interface()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){ + tbl_vec[i]->set_interface("default"); + tbl_vec[i]->set_ifq(true); + } +// Fill in default machine + if(tbl_vec[i]->get_interface()!="" && tbl_vec[i]->get_machine()=="" && schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA && (! tbl_vec[i]->get_ifq())){ + tbl_vec[i]->set_machine(hostname); + } + + if(schema->get_schema_type(sch_no) == PROTOCOL_SCHEMA){ +// Record the set of interfaces accessed + string ifstr; + if(tbl_vec[i]->get_ifq()){ + ifstr = "["+tbl_vec[i]->get_interface()+"]"; + }else{ + if(tbl_vec[i]->get_machine() != "localhost"){ + ifstr = "'"+tbl_vec[i]->get_machine()+"'."+tbl_vec[i]->get_interface(); + }else{ + ifstr = tbl_vec[i]->get_interface(); + } + } +//printf("ifstr is %s, i=%d, machine=%s, interface=%s\n",ifstr.c_str(),i,tbl_vec[i]->get_machine().c_str(),tbl_vec[i]->get_interface().c_str()); + if(qs->definitions.count("_referenced_ifaces")){ + ifstr = qs->definitions["_referenced_ifaces"]+","+ifstr; + } + qs->definitions["_referenced_ifaces"] = ifstr; + } + + } + if(found_error) return(NULL); + +// Ensure that all tablevars have are named +// and that no two tablevars have the same name. + int tblvar_no = 0; +// First, gather the set of variable + set tblvar_names; + for(i=0;iget_var_name() != ""){ + if(tblvar_names.count(tbl_vec[i]->get_var_name()) > 0){ + fprintf(stderr,"ERROR, query has two table variables named %s. line=%d, char=%d\n", tbl_vec[i]->get_var_name().c_str(), tbl_vec[i]->get_lineno(), tbl_vec[i]->get_charno()); + return(NULL); + } + tblvar_names.insert(tbl_vec[i]->get_var_name()); + } + } +// Now generate variable names for unnamed tablevars + for(i=0;iget_var_name() == ""){ + char tmpstr[200]; + sprintf(tmpstr,"_t%d",tblvar_no); + string newvar = tmpstr; + while(tblvar_names.count(newvar) > 0){ + tblvar_no++; + sprintf(tmpstr,"_t%d",tblvar_no); + newvar = tmpstr; + } + tbl_vec[i]->set_range_var(newvar); + tblvar_names.insert(newvar); + } + } + +// Process inner/outer join properties + int jprop = fta_tree->get_from()->get_properties(); +// Require explicit INNER_JOIN, ... specification for join queries. + if(jprop < 0){ + if(qs->query_type != MERGE_QUERY && tbl_vec.size() > 1){ + fprintf(stderr,"ERROR, a join query must specify one of INNER_JOIM, OUTER_JOIN, LEFT_OUTER_JOIN, RIGHT_OUTER_JOIN, WATCHLIST_JOIN, FILTER_JOIN.\n"); + return(NULL); + } + } + + if(jprop == OUTER_JOIN_PROPERTY){ + for(i=0;iset_property(1); + } + if(jprop == LEFT_OUTER_JOIN_PROPERTY) + tbl_vec[0]->set_property(1); + if(jprop == RIGHT_OUTER_JOIN_PROPERTY) + tbl_vec[tbl_vec.size()-1]->set_property(1); + if(jprop == FILTER_JOIN_PROPERTY){ + if(fta_tree->get_from()->get_temporal_range() == 0){ + fprintf(stderr,"ERROR, a filter join must have a non-zero temporal range.\n"); + return NULL; + } + if(tbl_vec.size() != 2){ + fprintf(stderr,"ERROR, a filter join must be between two table variables.\n"); + return NULL; + } + colref_t *cr = fta_tree->get_from()->get_colref(); + string field = cr->get_field(); + + int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), field); + if(fi0 < 0){ + fprintf(stderr,"ERROR, temporal attribute %s for a filter join can't be found in schema %s\n",field.c_str(), tbl_vec[0]->get_schema_name().c_str()); + return NULL; + } + cr->set_schema_ref(tbl_vec[0]->get_schema_ref()); + cr->set_tablevar_ref(0); + string type_name = schema->get_type_name(tbl_vec[0]->get_schema_ref(),field); + param_list *modifiers = schema->get_modifier_list(cr->get_schema_ref(), field); + data_type *dt0 = new data_type(type_name, modifiers); + string dt0_type = dt0->get_type_str(); + if(dt0_type != "INT" && dt0_type != "UINT" && dt0_type != "LLONG" && dt0_type != "ULLONG"){ + fprintf(stderr,"ERROR, the temporal attribute in a filter join must be one of INT/UINT/LLONG/ULLONG.\n"); + return NULL; + } + if(! dt0->is_increasing()){ + fprintf(stderr,"ERROR, the temporal attribute in a filter join must be temporal increasing.\n"); + return NULL; + } + } + + + +///////////////////// +/// Build the query param table + vector query_params = fta_tree->query_params; + int p; + for(p=0;pname; + string dtname = query_params[p]->val; + + if(pname == ""){ + fprintf(stderr,"ERROR parameter has empty name.\n"); + found_error = true; + } + if(dtname == ""){ + fprintf(stderr,"ERROR parameter %s has empty type.\n",pname.c_str()); + found_error = true; + } + data_type *dt = new data_type(dtname); + if(!(dt->is_defined())){ + fprintf(stderr,"ERROR parameter %s has invalid type (%s).\n",pname.c_str(), dtname.c_str()); + found_error = true; + } + + qs->add_query_param(pname, dt, false); + } + if(found_error) return(NULL); +// unpack the param table to a global for easier analysis. + param_tbl=qs->param_tbl; + + +////////////////// WATCHLIST specialized analysis + if(qs->query_type == WATCHLIST_QUERY){ +// Populate a SELECT clause? + } + +////////////////// MERGE specialized analysis + + if(qs->query_type == MERGE_QUERY){ +// Verify that +// 1) there are two *different* streams ref'd in the FROM clause +// However, only emit a warning. +// (can't detect a problem if one of the interfaces is the +// default interface). +// 2) They have the same layout (e.g. same types but the +// names can be different +// 3) the two columns can unambiguously be mapped to +// fields of the two tables, one per table. Exception: +// the column names are the same and exist in both tables. +// FURTHERMORE the positions must be the same +// 4) after mapping, verify that both colrefs are temporal +// and in the same direction. + if(tbl_vec.size() < 2){ + fprintf(stderr,"ERROR, a MERGE query operates over at least 2 tables, %lu were supplied.\n",tbl_vec.size() ); + return(NULL); + } + + vector fev0 = schema->get_fields( + tbl_vec[0]->get_schema_name() + ); + + + int cv; + for(cv=1;cv fev1 = schema->get_fields( + tbl_vec[cv]->get_schema_name() + ); + + if(fev0.size() != fev1.size()){ + fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str()); + return(NULL); + } + +// Only need to ensure that the list of types are the same. +// THe first table supplies the output colnames, +// and all temporal properties are lost, except for the +// merge-by columns. + int f; + for(f=0;fget_type(),fev0[f]->get_modifier_list()); + data_type dt1(fev1[f]->get_type(),fev1[f]->get_modifier_list()); + if(! dt0.equal_subtypes(&dt1) ){ + fprintf(stderr,"ERROR, the input stream %s to the merge operator has different schema than input stream %s.\n",tbl_vec[cv]->get_schema_name().c_str(), tbl_vec[0]->get_schema_name().c_str()); + return(NULL); + } + } + } + +// copy over the merge-by cols. + qs->mvars = fta_tree->mergevars; + + if(qs->mvars.size() == 0){ // need to discover the merge vars. + int mergevar_pos = -1; + int f; + for(f=0;fget_type(),fev0[f]->get_modifier_list()); + if(dt0.is_temporal()){ + mergevar_pos = f; + break; + } + } + if(mergevar_pos >= 0){ + for(cv=0;cv fev1 = schema->get_fields(tbl_vec[cv]->get_schema_name()); + qs->mvars.push_back(new colref_t(tbl_vec[cv]->get_var_name().c_str(),fev1[mergevar_pos]->get_name().c_str() )); + } + }else{ + fprintf(stderr,"ERROR, no merge-by column found.\n"); + return(NULL); + } + } + +// Ensure same number of tables, merge cols. + if(tbl_vec.size() != qs->mvars.size()){ + fprintf(stderr,"ERROR, merge query has different numbers of table variables (%lu) and merge columns (%lu)\n",tbl_vec.size(), qs->mvars.size()); + return(NULL); + } + +// Ensure that the merge-by are from different tables +// also, sort colrefs so that they align with the FROM list using tmp_crl + set refd_sources; + vector tmp_crl(qs->mvars.size(),NULL); + for(cv=0;cvmvars.size();++cv){ + int tblvar=infer_tablevar_from_colref(qs->mvars[cv],fta_tree->fm,schema); + if(tblvar<0){ + fprintf(stderr,"ERROR, Merge column %d (%s) was not found in any of the tables.\n",cv,qs->mvars[cv]->to_string().c_str()); + } + refd_sources.insert(tblvar); + tmp_crl[tblvar] = qs->mvars[cv]; + } + if(refd_sources.size() != qs->mvars.size()){ + fprintf(stderr,"ERROR, The %lu merge columns reference only %lu table variables.\n",qs->mvars.size(), refd_sources.size()); + return(NULL); + } + +// 1-1 mapping, so use tmp_crl as the merge column list. + qs->mvars = tmp_crl; + + + +// Look up the colrefs in their schemas, verify that +// they are at the same place, that they are both temporal +// in the same way. +// It seems that this should be done more in the schema objects. + int fi0 = schema->get_field_idx(tbl_vec[0]->get_schema_name(), qs->mvars[0]->get_field()); + if(fi0 < 0){ + fprintf(stderr,"ERROR, Merge temporal field %s not found.\n",qs->mvars[0]->get_field().c_str()); + exit(1); + } + for(cv=1;cvmvars.size();++cv){ + int fi1 = schema->get_field_idx(tbl_vec[cv]->get_schema_name(), qs->mvars[0]->get_field()); + if(fi0!=fi1){ + fprintf(stderr,"ERROR, the merge columns for table variables %s and %s must be in the same position.\n",tbl_vec[0]->get_var_name().c_str(), tbl_vec[cv]->get_var_name().c_str()); + return NULL; + } + } + + field_entry *fe0 = schema->get_field(tbl_vec[0]->get_schema_name(),fi0); + data_type dt0(fe0->get_type(),fe0->get_modifier_list()); + if( (!dt0.is_temporal()) ){ + fprintf(stderr,"ERROR, merge column %d must be temporal.\n",0); + return(NULL); + } + for(cv=0;cvmvars.size();++cv){ + field_entry *fe1 = schema->get_field(tbl_vec[cv]->get_schema_name(),fi0); + data_type dt1(fe1->get_type(),fe1->get_modifier_list()); + if( (!dt1.is_temporal()) ){ + fprintf(stderr,"ERROR, merge column %d must be temporal.\n",cv); + return(NULL); + } + + + if( dt0.get_temporal() != dt1.get_temporal()){ + fprintf(stderr,"ERROR, the merge columns (0 and %d) must be temporal in the same direction.\n",cv); + return(NULL); + } + } + +// If there is a SLACK specification, verify +// that it is literal-only and that its type is compatible +// with that of the merge columns + qs->slack = fta_tree->slack; + if(qs->slack){ + if(! literal_only_se(qs->slack)){ + fprintf(stderr,"ERROR, the SLACK expression is not literal-only.\n"); + return NULL; + } + + assign_data_types(qs->slack, schema, fta_tree, Ext_fcns ); + data_type sdt(&dt0, qs->slack->get_data_type(), string("+")); + if(sdt.get_type() == undefined_t){ + fprintf(stderr,"ERROR, the SLACK expression data type is not compatible with the data type of the merge columns.\n"); + return NULL; + } + } + + +// All the tests have passed, there is nothing +// else to fill in. + + } + +////////////////// SELECT specialized analysis + + if(qs->query_type == SELECT_QUERY){ +// unpack the gb_tbl, aggr_tbl, param_tbl, and complex_literals +// objects into globals, for easier syntax. + gb_tbl = qs->gb_tbl; + aggr_tbl = qs->aggr_tbl; + + +// Build the table of group-by attributes. +// (se processing done automatically). +// NOTE : Doing the SE processing here is getting cumbersome, +// I should process these individually. +// NOTE : I should check for duplicate names. +// NOTE : I should ensure that the def of one GB does not +// refrence the value of another. + vector gb_list = fta_tree->get_groupby(); + int n_temporal = 0; + string temporal_gbvars = ""; + map gset_gbnames; + +// For generating the set of GB patterns for this aggregation query. + vector inner_pattern; + vector > pattern_set; + vector > > pattern_components; + + vector r_gbs, c_gbs, g_gbs; + int n_patterns; + + for(i=0;itype){ + case gb_egb_type: + retval = gb_tbl->add_gb_attr( + gb_list[i]->gb, fta_tree->fm, schema,fta_tree, Ext_fcns + ); + if(retval < 0){ + return NULL; // nothing added to gb_tbl, so this can trigger a segfault 2 lines below + }else{ + if(gb_tbl->get_data_type(i)->is_temporal()){ + n_temporal++; + if(temporal_gbvars != "") temporal_gbvars+=" "; + temporal_gbvars += gb_tbl->get_name(i); + } + } + + inner_pattern.clear(); + pattern_set.clear(); + inner_pattern.push_back(true); + pattern_set.push_back(inner_pattern); + pattern_components.push_back(pattern_set); + + gb_tbl->gb_entry_type.push_back(""); + gb_tbl->gb_entry_count.push_back(1); + gb_tbl->pattern_components.push_back(pattern_set); + + break; + case rollup_egb_type: + r_gbs = gb_list[i]->gb_lists[0]->get_gb_list(); + for(j=0;jadd_gb_attr( + r_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns + ); + if(retval < 0){ + found_error = true; + }else{ // rollup gb can't be temporal + gb_tbl->reset_temporal(gb_tbl->size()-1); + } + } + + inner_pattern.resize(r_gbs.size()); + pattern_set.clear(); + for(j=0;j<=r_gbs.size();++j){ + for(k=0;kgb_entry_type.push_back("ROLLUP"); + gb_tbl->gb_entry_count.push_back(r_gbs.size()); + gb_tbl->pattern_components.push_back(pattern_set); + break; + case cube_egb_type: + c_gbs = gb_list[i]->gb_lists[0]->get_gb_list(); + for(j=0;jadd_gb_attr( + c_gbs[j], fta_tree->fm, schema,fta_tree, Ext_fcns + ); + if(retval < 0){ + found_error = true; + }else{ // cube gb can't be temporal + gb_tbl->reset_temporal(gb_tbl->size()-1); + } + } + + inner_pattern.resize(c_gbs.size()); + pattern_set.clear(); + n_patterns = 1 << c_gbs.size(); + for(j=0;jgb_entry_type.push_back("CUBE"); + gb_tbl->gb_entry_count.push_back(c_gbs.size()); + gb_tbl->pattern_components.push_back(pattern_set); + break; + case gsets_egb_type: + { + gset_gbnames.clear(); + for(j=0;jgb_lists.size();++j){ + g_gbs = gb_list[i]->gb_lists[j]->get_gb_list(); + for(k=0;ktype != GB_COLREF){ + fprintf(stderr,"Error, group-by fields in a GROUPING_SETS clause must be table references, not computed values (field is %s\n",g_gbs[k]->name.c_str()); + found_error = true; + }else{ + if(gset_gbnames.count(g_gbs[k]->name) == 0){ + retval = gb_tbl->add_gb_attr( + g_gbs[k], fta_tree->fm, schema,fta_tree, Ext_fcns + ); + if(retval < 0){ + found_error = true; + }else{ // gsets gb can't be temporal + gb_tbl->reset_temporal(gb_tbl->size()-1); + } + int pos = gset_gbnames.size(); + gset_gbnames[g_gbs[k]->name] = pos; + } + } + } + } + + if(gset_gbnames.size() > 63){ + fprintf(stderr,"Error, at most 63 distinct fields can be referenced in a GROUPING_SETS clause.\n"); + found_error = true; + } + + inner_pattern.resize(gset_gbnames.size()); + pattern_set.clear(); + set signatures; + for(j=0;jgb_lists.size();++j){ + g_gbs = gb_list[i]->gb_lists[j]->get_gb_list(); + set refd_gbs; + for(k=0;kname); + } + fill(inner_pattern.begin(),inner_pattern.end(),false); + unsigned long long int signature = 0; + set::iterator ssi; + for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){ + inner_pattern[gset_gbnames[(*ssi)]] = true; + signature |= (1 << gset_gbnames[(*ssi)]); + } + if(signatures.count(signature)){ + fprintf(stderr,"Warning, duplicate GROUPING_SETS pattern found, ignoring:\n\t"); + set::iterator ssi; + for(ssi = refd_gbs.begin(); ssi != refd_gbs.end(); ++ssi){ + fprintf(stderr," %s",(*ssi).c_str()); + } + fprintf(stderr,"\n"); + }else{ + signatures.insert(signature); + pattern_set.push_back(inner_pattern); + } + } + pattern_components.push_back(pattern_set); + + gb_tbl->gb_entry_type.push_back("GROUPING_SETS"); + gb_tbl->gb_entry_count.push_back(gset_gbnames.size()); + gb_tbl->pattern_components.push_back(pattern_set); + } + break; + default: + break; + } + } + if(found_error) return(NULL); + if(n_temporal > 1){ + fprintf(stderr,"ERROR, query has multiple temporal group-by variables (%s). Cast away the temporality of all but one of these.\n", temporal_gbvars.c_str()); + return NULL; + } + +// Compute the set of patterns. Take the cross product of all pattern components. + vector > gb_patterns; + int n_components = pattern_components.size(); + vector pattern_pos(n_components,0); + bool done = false; + while(! done){ + vector pattern; + for(j=0;j= pattern_components[j].size()) + pattern_pos[j] = 0; + else + break; + } + if(j >= n_components) + done = true; + } + gb_tbl->gb_patterns = gb_patterns; + + +// Process the supergroup, if any. + vector sgb = fta_tree->get_supergb(); + for(i=0;ifind_gb(sgb[i],fta_tree->fm, schema); + if(gbr < 0){ + fprintf(stderr, "ERROR, supergroup attribute %s is not defined as a group-by variable.\n",sgb[i]->to_string().c_str()); + found_error = true; + } + if(qs->sg_tbl.count(gbr)){ + fprintf(stderr,"WARNING, duplicate supergroup attribute %s.\n",sgb[i]->to_string().c_str()); + } + qs->sg_tbl.insert(gbr); + } + if(found_error) return(NULL); + + if(qs->sg_tbl.size() > 0 && gb_tbl->gb_patterns.size()>0){ + fprintf(stderr,"Error, SUPERGROUP incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n"); + return NULL; + } + + + + predicate_t *wh = fta_tree->get_where(); + predicate_t *hv = fta_tree->get_having(); + predicate_t *cw = fta_tree->get_cleaning_when(); + predicate_t *cb = fta_tree->get_cleaning_by(); + predicate_t *closew = fta_tree->get_closing_when(); + + if(closew != NULL && gb_tbl->gb_patterns.size()>1){ + fprintf(stderr,"Error, CLOSING_WHEN incompatible with CUBE, ROLLUP, and GROUPING_SETS.\n"); + return NULL; + } + + + +// Verify that all column references are valid, and if so assign +// the data type. + + vector sl_list = fta_tree->get_sl_vec(); + for(i=0;ise, fta_tree->fm, schema, gb_tbl); + if(retval < 0) found_error = true; + } + if(wh != NULL) + retval = verify_predicate_colref(wh, fta_tree->fm, schema, gb_tbl); + if(retval < 0) found_error = true; + if(hv != NULL) + retval = verify_predicate_colref(hv, fta_tree->fm, schema, gb_tbl); + if(retval < 0) found_error = true; + if(cw != NULL) + retval = verify_predicate_colref(cw, fta_tree->fm, schema, gb_tbl); + if(retval < 0) found_error = true; + if(cb != NULL) + retval = verify_predicate_colref(cb, fta_tree->fm, schema, gb_tbl); + if(retval < 0) found_error = true; + if(closew != NULL) + retval = verify_predicate_colref(closew, fta_tree->fm, schema, gb_tbl); + if(retval < 0) found_error = true; + + if(found_error) return(NULL); + +// Verify that all of the scalar expressions +// and comparison predicates have compatible types. + + n_temporal = 0; + string temporal_output_fields; + for(i=0;ise, schema, fta_tree, Ext_fcns ); + if(retval < 0){ + found_error = true; + }else{ + if(sl_list[i]->se->get_data_type()->is_temporal()){ + n_temporal++; + temporal_output_fields += " "+int_to_string(i); + } + } + } + if(n_temporal > 1){ + fprintf(stderr,"ERROR, query has multiple temporal output fields (positions%s). Cast away the temporality of all but one of these.\n", temporal_output_fields.c_str()); + found_error=true; + } + if(wh != NULL) + retval = assign_predicate_data_types(wh, schema, fta_tree, Ext_fcns); + if(retval < 0) found_error = true; + if(hv != NULL) + retval = assign_predicate_data_types(hv, schema, fta_tree, Ext_fcns); + if(retval < 0) found_error = true; + if(cw != NULL) + retval = assign_predicate_data_types(cw, schema, fta_tree, Ext_fcns); + if(retval < 0) found_error = true; + if(cb != NULL) + retval = assign_predicate_data_types(cb, schema, fta_tree, Ext_fcns); + if(retval < 0) found_error = true; + if(closew != NULL) + retval = assign_predicate_data_types(closew, schema, fta_tree, Ext_fcns); + if(retval < 0) found_error = true; + + if(found_error) return(NULL); + +// Impute names for the unnamed columns. + set curr_names; + int s; + for(s=0;sname); + } + for(s=0;sname == "") + sl_list[s]->name = impute_colname(curr_names, sl_list[s]->se); + } + + +// Check the aggregates. +// No aggrs allowed in the WHERE predicate. +// (no aggrs in the GB defs, but that is examined elsewhere) +// Therefore, aggregates are allowed only the select clause. +// +// The query is an aggregation query if there is a group-by clause, or +// if any aggregate is referenced. If there is a group-by clause, +// at least one aggregate must be referenced. +// If the query is an aggregate query, the scalar expressions in +// the select clause can reference only constants, aggregates, or group-by +// attributes. +// Also, if the query is an aggregate query, build a table referencing +// the aggregates. +// +// No nested aggregates allowed. +// + +// First, count references in the WHERE predicate. +// (if there are any references, report an error). +// can ref group vars, tuple fields, and stateful fcns. + + if(wh != NULL){ + retval = count_aggr_pred(wh, true); + if(retval > 0){ + fprintf(stderr,"ERROR, no aggregate references are allowed in the WHERE clause.\n"); + return(NULL); + } + } + +// NOTE : Here I need an analysis of the having clause +// to verify that it only refs GB attrs and aggregates. +// (also, superaggregates, stateful fcns) + if(hv!=NULL){ + retval = verify_having_pred(hv, "HAVING", Ext_fcns); + if(retval < 0) return(NULL); + } + +// Cleaning by has same reference rules as Having + if(cb!=NULL){ + retval = verify_having_pred(cb, "CLEANING_BY", Ext_fcns); + if(retval < 0) return(NULL); + } + +// Cleaning when has same reference rules as Having, +// except that references to non-superaggregates are not allowed. +// This is tested for when "CLEANING_BY" is passed in as the clause. + if(cw!=NULL){ + retval = verify_having_pred(cw, "CLEANING_WHEN", Ext_fcns); + if(retval < 0) return(NULL); + } + +// CLOSING_WHEN : same rules as HAVING + if(closew!=NULL){ + retval = verify_having_pred(closew, "CLOSING_WHEN", Ext_fcns); + if(retval < 0) return(NULL); + } + + +// Collect aggregates in the HAVING and CLEANING clauses + if(hv != NULL){ + build_aggr_tbl_fm_pred(hv, aggr_tbl, Ext_fcns); + } + if(cw != NULL){ + build_aggr_tbl_fm_pred(cw, aggr_tbl, Ext_fcns); + } + if(cb != NULL){ + build_aggr_tbl_fm_pred(cb, aggr_tbl, Ext_fcns); + } + if(closew != NULL){ + build_aggr_tbl_fm_pred(closew, aggr_tbl, Ext_fcns); + } + +// Collect aggregate refs in the SELECT clause. + + for(i=0;ise, aggr_tbl, Ext_fcns); + + +// Collect references to states of stateful functions + if(wh != NULL){ + gather_fcn_states_pr(wh, qs->states_refd, Ext_fcns); + } + if(hv != NULL){ + gather_fcn_states_pr(hv, qs->states_refd, Ext_fcns); + } + if(cw != NULL){ + gather_fcn_states_pr(cw, qs->states_refd, Ext_fcns); + } + if(cb != NULL){ + gather_fcn_states_pr(cb, qs->states_refd, Ext_fcns); + } + if(closew != NULL){ // should be no stateful fcns here ... + gather_fcn_states_pr(closew, qs->states_refd, Ext_fcns); + } + for(i=0;ise, qs->states_refd, Ext_fcns); + + +// If this is an aggregate query, it had normally references +// some aggregates. Its not necessary though, just emit a warning. +// (acts as SELECT DISTINCT) + + bool is_aggr_query = gb_tbl->size() > 0 || aggr_tbl->size() > 0; + if(is_aggr_query && aggr_tbl->size() == 0){ + fprintf(stderr,"Warning, query contains a group-by clause but does not reference aggregates..\n"); + } + +// If this is an aggregate query, +// 1) verify that the SEs in the SELECT clause reference +// only constants, aggregates, and group-by attributes. +// 2) No aggregate scalar expression references an aggregate +// or any stateful function. +// 3) either it references both CLEANING clauses or neither. +// 4) all superaggregates must have the superaggr_allowed property. +// 5) all aggregates ref'd in the CLEANING_WHEN ad CLEANING_BY +// clauses must have the multiple_output property. + + + if(is_aggr_query){ + if(gb_list.size() == 0){ + fprintf(stderr,"ERROR, aggregation queries must have at least one group-by variable (which should be temporal).\n"); + return NULL; + } +// Ensure that at least one gbvar is temporal + if(! fta_tree->name_exists("no_temporal_aggr")){ + bool found_temporal = false; + for(i=0;isize();i++){ + if(gb_tbl->get_data_type(i)->is_temporal()){ + found_temporal = true; + } + } + if(! found_temporal){ + fprintf(stderr,"ERROR, at least one of the group-by variables must be temporal (unless no_temporal_aggr is set)\n"); + exit(1); + } + } + + if((!cb && cw) || (cb && !cw)){ + fprintf(stderr,"ERROR, an aggregate query must either include both a CLEANING_WHEN and a CLEANING_BY clause, or neither.\n"); + return(NULL); + } + + bool refs_running = false; + int a; + for(a=0; asize(); ++a){ + refs_running |= aggr_tbl->is_running_aggr(a); + } + + if(closew){ + if(cb || cw){ + fprintf(stderr, "ERROR, cannot reference both CLOSING_WHEN and either CLEANING_WHEN or CLEANING_BY.\n"); + return(NULL); + } + if(!refs_running){ + fprintf(stderr, "ERROR, if you reference CLOSING_WHEN you must reference at least one running window aggregate.\n"); + return(NULL); + } + } + + if(refs_running && !closew){ + fprintf(stderr, "ERROR, if you reference a running window aggregate you must reference a CLOSING_WHEN clause.\n"); + return(NULL); + } + + bool st_ok = true; + for(i=0;ise); + st_ok = st_ok && ret_bool; + } + if(! st_ok) + return(NULL); + + for(i=0;isize();i++){ + if(aggr_tbl->is_superaggr(i)){ + if(! aggr_tbl->superaggr_allowed(i)){ + fprintf(stderr,"ERROR, aggregate %s cannot be a superaggregate\n",aggr_tbl->get_op(i).c_str()); + return NULL; + } + } + if(aggr_tbl->is_builtin(i)){ + if(count_aggr_se(aggr_tbl->get_aggr_se(i), true) > 0){ + fprintf(stderr,"ERROR no nested aggregation allowed.\n"); + return(NULL); + } + }else{ + vector opl = aggr_tbl->get_operand_list(i); + int o; + for(o=0;o 0){ + fprintf(stderr,"ERROR no nested aggregation allowed.\n"); + return(NULL); + } + } + } + } + }else{ +// Ensure that non-aggregate query doesn't reference some things + if(cb || cw){ + fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLEANING_WHEN or a CLEANING_BY clause.\n"); + return(NULL); + } + if(closew){ + fprintf(stderr,"ERROR, a non-aggregate query may not reference a CLOSING_WHEN clause.\n"); + return(NULL); + } + if(qs->states_refd.size()){ + fprintf(stderr,"ERROR, a non-aggregate query may not refernece stateful functions.\n"); + return(NULL); + } + } + + + +// Convert the predicates into CNF. OK to pass NULL ptr. + make_cnf_from_pr(wh, qs->wh_cnf); + make_cnf_from_pr(hv, qs->hav_cnf); + make_cnf_from_pr(cb, qs->cb_cnf); + make_cnf_from_pr(cw, qs->cw_cnf); + make_cnf_from_pr(closew, qs->closew_cnf); + +// Analyze the predicates. + + for(i=0;iwh_cnf.size();i++) + analyze_cnf(qs->wh_cnf[i]); + for(i=0;ihav_cnf.size();i++) + analyze_cnf(qs->hav_cnf[i]); + for(i=0;icb_cnf.size();i++) + analyze_cnf(qs->cb_cnf[i]); + for(i=0;icw_cnf.size();i++) + analyze_cnf(qs->cw_cnf[i]); + for(i=0;iclosew_cnf.size();i++) + analyze_cnf(qs->closew_cnf[i]); + + +// At this point, the old analysis program +// gathered all refs to partial functions, +// complex literals, and parameters accessed via a handle. +// I think its better to delay this +// until code generation time, as the query will be +// in general split. + + } + + return(qs); +} + +/////////////////////////////////////////////////////////////////////// + +// Expand gbvars with their definitions. + +scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl){ + int o; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return se; + case SE_UNARY_OP: + se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl); + return se; + case SE_BINARY_OP: + se->lhs.scalarp = expand_gbvars_se(se->get_left_se(),gb_tbl); + se->rhs.scalarp = expand_gbvars_se(se->get_right_se(),gb_tbl); + return se; + case SE_COLREF: + if( se->is_gb() ){ + return( dup_se(gb_tbl.get_def(se->get_gb_ref()),NULL) ); + } + return se; +// don't descend into aggr defs. + case SE_AGGR_STAR: + return se; + case SE_AGGR_SE: + return se; + case SE_FUNC: + for(o=0;oparam_list.size();o++){ + se->param_list[o] = expand_gbvars_se(se->param_list[o], gb_tbl); + } + return se; + default: + fprintf(stderr,"INTERNAL ERROR in expand_gbvars, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return se; +} + +void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl){ + vector op_list; + int o; + bool found = false; + + switch(pr->get_operator_type()){ + case PRED_IN: + pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(), gb_tbl); + return; + case PRED_COMPARE: + pr->lhs.sexp = expand_gbvars_se(pr->get_left_se(),gb_tbl) ; + pr->rhs.sexp = expand_gbvars_se(pr->get_right_se(),gb_tbl) ; + return; + case PRED_UNARY_OP: + expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ; + return; + case PRED_BINARY_OP: + expand_gbvars_pr(pr->get_left_pr(),gb_tbl) ; + expand_gbvars_pr(pr->get_right_pr(),gb_tbl) ; + return; + case PRED_FUNC: + for(o=0;oparam_list.size();++o){ + pr->param_list[o] = expand_gbvars_se(pr->param_list[o],gb_tbl) ; + } + return; + default: + fprintf(stderr,"INTERNAL ERROR in expand_gbvars_pr, line %d, character %d, unknown predicate operator type %d\n", + pr->get_lineno(), pr->get_charno(), pr->get_operator_type() ); + } + return; +} + + + + +// return true if the se / pr contains any gbvar on the list. + + +bool contains_gb_se(scalarexp_t *se, set &gref_set){ + vector operands; + int o; + bool found = false; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return false; + case SE_UNARY_OP: + return contains_gb_se(se->get_left_se(),gref_set); + case SE_BINARY_OP: + return( contains_gb_se(se->get_left_se(),gref_set) || + contains_gb_se(se->get_right_se(),gref_set) ); + case SE_COLREF: + if( se->is_gb() ){ + return( gref_set.count(se->get_gb_ref()) > 0); + } + return false; +// don't descend into aggr defs. + case SE_AGGR_STAR: + return false; + case SE_AGGR_SE: + return false; + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } + return false; +} + + +bool contains_gb_pr(predicate_t *pr, set &gref_set){ + vector op_list; + int o; + bool found = false; + + switch(pr->get_operator_type()){ + case PRED_IN: + return contains_gb_se(pr->get_left_se(), gref_set); + case PRED_COMPARE: + return (contains_gb_se(pr->get_left_se(),gref_set) + || contains_gb_se(pr->get_right_se(),gref_set) ); + case PRED_UNARY_OP: + return contains_gb_pr(pr->get_left_pr(),gref_set) ; + case PRED_BINARY_OP: + return (contains_gb_pr(pr->get_left_pr(),gref_set) + || contains_gb_pr(pr->get_right_pr(),gref_set) ); + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + } + + return found; +} + + +// Gather the set of columns accessed in this se. +// Descend into aggregate functions. + +void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set, gb_table *gtbl){ + col_id ci; + vector operands; + int o; + + if(! se) + return; + + switch(se->get_operator_type()){ + case SE_LITERAL: + case SE_PARAM: + case SE_IFACE_PARAM: + return; + case SE_UNARY_OP: + gather_se_col_ids(se->get_left_se(),cid_set,gtbl); + return; + case SE_BINARY_OP: + gather_se_col_ids(se->get_left_se(),cid_set,gtbl); + gather_se_col_ids(se->get_right_se(),cid_set,gtbl); + return; + case SE_COLREF: + if(! se->is_gb() ){ + ci.load_from_colref(se->get_colref() ); + if(ci.tblvar_ref < 0){ + fprintf(stderr,"INTERNAL WARNING: unbound colref (%s) accessed.\n",ci.field.c_str()); + } + cid_set.insert(ci); + }else{ + if(gtbl==NULL){ + fprintf(stderr,"INTERNAL ERROR: gbvar ref in gather_se_col_ids, but gtbl is NULL.\n"); + exit(1); + } + gather_se_col_ids(gtbl->get_def(se->get_gb_ref()),cid_set,gtbl); + } + return; + case SE_AGGR_STAR: + return; + case SE_AGGR_SE: + gather_se_col_ids(se->get_left_se(),cid_set,gtbl); + return; + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } +} + + +// Gather the set of columns accessed in this se. + +void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl){ + vector op_list; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + gather_se_col_ids(pr->get_left_se(), cid_set,gtbl); + return; + case PRED_COMPARE: + gather_se_col_ids(pr->get_left_se(),cid_set,gtbl) ; + gather_se_col_ids(pr->get_right_se(),cid_set,gtbl) ; + return; + case PRED_UNARY_OP: + gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ; + return; + case PRED_BINARY_OP: + gather_pr_col_ids(pr->get_left_pr(),cid_set,gtbl) ; + gather_pr_col_ids(pr->get_right_pr(),cid_set,gtbl) ; + return; + case PRED_FUNC: + op_list = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + } +} + + + + +// Gather the set of special operator or comparison functions referenced by this se. + +void gather_se_opcmp_fcns(scalarexp_t *se, set &fcn_set){ + col_id ci; + data_type *ldt, *rdt; + int o; + vector operands; + + switch(se->get_operator_type()){ + case SE_LITERAL: + if( se->get_literal()->constructor_name() != "") + fcn_set.insert( se->get_literal()->constructor_name() ); + return; + case SE_PARAM: + return; +// SE_IFACE_PARAM should not exist when this is called. + case SE_UNARY_OP: + ldt = se->get_left_se()->get_data_type(); + if(ldt->complex_operator(se->get_op()) ){ + fcn_set.insert( ldt->get_complex_operator(se->get_op()) ); + } + gather_se_opcmp_fcns(se->get_left_se(),fcn_set); + return; + case SE_BINARY_OP: + ldt = se->get_left_se()->get_data_type(); + rdt = se->get_right_se()->get_data_type(); + + if(ldt->complex_operator(rdt, se->get_op()) ){ + fcn_set.insert( ldt->get_complex_operator(rdt, se->get_op()) ); + } + gather_se_opcmp_fcns(se->get_left_se(),fcn_set); + gather_se_opcmp_fcns(se->get_right_se(),fcn_set); + return; + case SE_COLREF: + return; + case SE_AGGR_STAR: + return; + case SE_AGGR_SE: + gather_se_opcmp_fcns(se->get_left_se(),fcn_set); + return; + case SE_FUNC: + operands = se->get_operands(); + for(o=0;oget_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } +} + + +// Gather the set of special operator or comparison functions referenced by this se. + +void gather_pr_opcmp_fcns(predicate_t *pr, set &fcn_set){ + data_type *ldt, *rdt; + vector operands; + int o; + + switch(pr->get_operator_type()){ + case PRED_IN: + ldt = pr->get_left_se()->get_data_type(); + if(ldt->complex_comparison(ldt) ){ + fcn_set.insert( ldt->get_comparison_fcn(ldt) ); + } + gather_se_opcmp_fcns(pr->get_left_se(), fcn_set); + return; + case PRED_COMPARE: + ldt = pr->get_left_se()->get_data_type(); + rdt = pr->get_right_se()->get_data_type(); + if(ldt->complex_comparison(rdt) ){ + fcn_set.insert( ldt->get_comparison_fcn(rdt) ); + } + gather_se_opcmp_fcns(pr->get_left_se(),fcn_set) ; + gather_se_opcmp_fcns(pr->get_right_se(),fcn_set) ; + return; + case PRED_UNARY_OP: + gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ; + return; + case PRED_BINARY_OP: + gather_pr_opcmp_fcns(pr->get_left_pr(),fcn_set) ; + gather_pr_opcmp_fcns(pr->get_right_pr(),fcn_set) ; + return; + case PRED_FUNC: + operands = pr->get_op_list(); + for(o=0;oget_lineno(), pr->get_charno(), pr->get_operator_type() ); + } +} + + + + +// find the temporal variable divisor if any. +// Only forms allowed : temporal_colref, temporal_colref/const +// temporal_colref/const + const + + +long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt,string &fnm){ + long long int retval = 0; + data_type *ldt, *rdt; + int o; + vector operands; + scalarexp_t *t_se, *c_se; + string the_op; + + switch(se->get_operator_type()){ + case SE_LITERAL: + return(-1); + case SE_PARAM: + return(-1); +// SE_IFACE_PARAM should not exist when this is called. + case SE_UNARY_OP: + return(-1); + case SE_BINARY_OP: + ldt = se->get_left_se()->get_data_type(); + if(ldt->is_temporal()){ + t_se = se->get_left_se(); + c_se = se->get_right_se(); + }else{ + t_se = se->get_left_se(); + c_se = se->get_right_se(); + } + if((! t_se->get_data_type()->is_temporal()) || c_se->get_data_type()->is_temporal()) + return -1; + + the_op = se->get_op(); + if(the_op == "+" || the_op == "-") + return find_temporal_divisor(t_se, gbt,fnm); + if(the_op == "/"){ + if(t_se->get_operator_type() == SE_COLREF && c_se->get_operator_type() == SE_LITERAL){ + fnm = t_se->get_colref()->get_field(); + string lits = c_se->get_literal()->to_string(); + sscanf(lits.c_str(),"%qd",&retval); + return retval; + } + } + + return -1; + case SE_COLREF: + if(se->is_gb()){ + return find_temporal_divisor(gbt->get_def(se->get_gb_ref()), gbt,fnm); + } + if(se->get_data_type()->is_temporal()){ + fnm = se->get_colref()->get_field(); + return 1; + } + return 0; + case SE_AGGR_STAR: + return -1; + case SE_AGGR_SE: + return -1; + case SE_FUNC: + return -1; + default: + fprintf(stderr,"INTERNAL ERROR in find_temporal_divisor, line %d, character %d: unknown operator type %d\n", + se->get_lineno(), se->get_charno(),se->get_operator_type()); + exit(1); + } +} + + +// impute_colnames: +// Create meaningful but unique names for the columns. +string impute_colname(vector &sel_list, scalarexp_t *se){ + set curr_names; + int s; + for(s=0;sname); + } + return impute_colname(curr_names, se); +} + +string impute_colname(set &curr_names, scalarexp_t *se){ +string ret; +scalarexp_t *seo; +vector operand_list; +string opstr; + + switch(se->get_operator_type()){ + case SE_LITERAL: + ret = "Literal"; + break; + case SE_PARAM: + ret = "Param_" + se->get_param_name(); + break; + case SE_IFACE_PARAM: + ret = "Iparam_" + se->get_ifpref()->get_pname(); + break; + case SE_COLREF: + ret = se->get_colref()->get_field() ; + break; + case SE_UNARY_OP: + case SE_BINARY_OP: + ret = "Field"; + break; + case SE_AGGR_STAR: + ret = "Cnt"; + break; + case SE_AGGR_SE: + ret = se->get_op(); + seo = se->get_left_se(); + switch(se->get_left_se()->get_operator_type()){ + case SE_PARAM: + ret += "_PARAM_"+seo->get_param_name(); + break; + case SE_IFACE_PARAM: + ret += "_IPARAM_"+seo->get_ifpref()->get_pname(); + break; + case SE_COLREF: + opstr = seo->get_colref()->get_field(); + if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){ + ret += "_" + opstr; + }else{ + ret = opstr; + } + break; + case SE_AGGR_STAR: + case SE_AGGR_SE: + opstr = seo->get_op(); + if(strncmp(ret.c_str(), opstr.c_str(),ret.size())){ + ret += "_" + seo->get_op(); + }else{ + ret = opstr; + } + break; + case SE_FUNC: + opstr = seo->get_op(); + ret += "_" + seo->get_op(); + break; + case SE_UNARY_OP: + case SE_BINARY_OP: + ret += "_SE"; + break; + default: + ret += "_"; + break; + } + break; + case SE_FUNC: + ret = se->get_op(); + operand_list = se->get_operands(); + if(operand_list.size() > 0){ + seo = operand_list[0]; + switch(seo->get_operator_type()){ + case SE_PARAM: + ret += "_PARAM_"+seo->get_param_name(); + break; + case SE_IFACE_PARAM: + ret += "_IPARAM_"+seo->get_ifpref()->get_pname(); + break; + case SE_COLREF: + ret += "_" + seo->get_colref()->get_field(); + break; + case SE_AGGR_STAR: + case SE_AGGR_SE: + case SE_FUNC: + ret += "_" + seo->get_op(); + break; + case SE_UNARY_OP: + case SE_BINARY_OP: + ret += "_SE"; + break; + default: + ret += "_"; + break; + } + }else{ + ret += "_func"; + } + break; + } + + if(ret == "Field"){ + if(curr_names.count("Field0") == 0) + ret = "Field0"; + } + int iter = 1; + string base = ret; + while(curr_names.count(ret) > 0){ + char tmpstr[500]; + sprintf(tmpstr,"%s%d",base.c_str(),iter); + ret = tmpstr; + iter++; + } + + + curr_names.insert(ret); + return(ret); + +} + + + +////////////////////////////////////////////////////////////////////// +////////////// Methods of defined classes /////////////////////// +////////////////////////////////////////////////////////////////////// + +// helper fcn to enable col_id as map key. + + bool operator<(const col_id &cr1, const col_id &cr2){ + if(cr1.tblvar_ref < cr2.tblvar_ref) return(true); + if(cr1.tblvar_ref == cr2.tblvar_ref) + return (cr1.field < cr2.field); + return(false); + } + + +// Process the GB variables. +// At parse time, GB vars are either GB_COLREF, +// or GB_COMPUTED if the AS keyword is used. +// Cast GB vars as named entities with a SE as +// their definition (the colref in the case of GB_COLREF). +// +// TODO: if there is a gbref in a gbdef, +// then I won't be able to compute the value without +// a complex dependence analysis. So verify that there is no +// gbref in any of the GBdefs. +// BUT: a GBVAR_COLREF should be converted to a regular colref, +// which is not yet done. +// +// TODO : sort out issue of GBVAR naming and identification. +// Determine where it is advantageous to convert GV_COLREF +// GBVARS to colrefs -- e.g. in group definition, in the WHERE clause, +// etc. +// +// return -1 if there is a problem. + +int gb_table::add_gb_attr( + gb_t *gb, + tablevar_list_t *fm, + table_list *schema, + table_exp_t *fta_tree, + ext_fcn_list *Ext_fcns + ){ + colref_t *cr; + int retval; + gb_table_entry *entry; + + if(gb->type == GB_COLREF){ + if(gb->table != "") + cr = new colref_t( + gb->interface.c_str(),gb->table.c_str(), gb->name.c_str() + ); + else + cr = new colref_t(gb->name.c_str()); + + int tablevar_ref = infer_tablevar_from_colref(cr, fm, schema); + if(tablevar_ref < 0) return(tablevar_ref); + + cr->set_tablevar_ref(tablevar_ref); + cr->set_schema_ref(fm->get_schema_ref(tablevar_ref)); + cr->set_interface(""); + cr->set_table_name(fm->get_tablevar_name(tablevar_ref)); + + entry = new gb_table_entry(); + entry->name.field = cr->get_field(); + entry->name.tblvar_ref = tablevar_ref; + entry->definition = new scalarexp_t(cr); + entry->ref_type = GBVAR_COLREF; + }else{ + entry = new gb_table_entry(); + entry->name.field = gb->name; + entry->name.tblvar_ref = -1; + entry->definition = gb->def; + entry->ref_type = GBVAR_SE; + } + + retval = verify_colref(entry->definition, fm, schema, NULL); + if(retval < 0) return(retval); + + retval = assign_data_types(entry->definition, schema, fta_tree, Ext_fcns); + if(retval < 0) return(retval); + +// Verify that the gbvar def references no aggregates and no gbvars. + if(count_gb_se(entry->definition) > 0){ + fprintf(stderr,"ERROR, group-by variable %s references other group-by variables in its definition.\n",entry->name.field.c_str() ); + return(-1); + } + if(count_aggr_se(entry->definition, true) > 0){ + fprintf(stderr,"ERROR, group-by variable %s references aggregates in its definition.\n",entry->name.field.c_str() ); + return(-1); + } + +// Check for duplicates + int i; + for(i=0;iname.field == gtbl[i]->name.field){ + fprintf(stderr,"ERROR, duplicate group-by variable name %s, positions %d and %lu.\n",entry->name.field.c_str(),i,gtbl.size()); + return -1; + } + } + + + gtbl.push_back(entry); + + return(1); +} + + +// Try to determine if the colref is actually +// a gbvar ref. +// a) if no tablename associated with the colref, +// 1) try to find a matching GB_COMPUTED gbvar. +// 2) failing that, try to match to a single tablevar +// 3) if successful, search among GB_COLREF +// b) else, try to match the tablename to a single tablevar +// if successful, search among GB_COLREF +int gb_table::find_gb(colref_t *cr, tablevar_list_t *fm, table_list *schema){ + string c_field = cr->get_field(); + int c_tblref; + int n_tbl; + int i; + vector candidates; + + if(cr->uses_default_table()){ + for(i=0;iref_type==GBVAR_SE && c_field == gtbl[i]->name.field){ + return(i); + } + } + candidates = find_source_tables(c_field, fm, schema); + if(candidates.size() != 1) return(-1); // can't find unique tablevar + for(i=0;iref_type==GBVAR_COLREF && + c_field == gtbl[i]->name.field && + candidates[0] == gtbl[i]->name.tblvar_ref){ + return(i); + } + } + return(-1); // colref is not in gb table. + } + +// A table name must have been given. + vector fm_tbls = fm->get_table_list(); + string interface = cr->get_interface(); + string table_name = cr->get_table_name(); + + +// if no interface name is given, try to search for the table +// name among the tablevar names first. + if(interface==""){ + for(i=0;iget_var_name() && interface == fm_tbls[i]->get_interface()) + candidates.push_back(i); + } + if(candidates.size()>1) return(-1); + if(candidates.size()==1){ + for(i=0;iref_type==GBVAR_COLREF && + c_field == gtbl[i]->name.field && + candidates[0] == gtbl[i]->name.tblvar_ref){ + return(i); + } + } + return(-1); // match semantics of bind to tablevar name first + } + } + +// Interface name given, or no interface but no +// no tablevar match. Try to match on schema name. + for(i=0;iget_var_name() && interface == fm_tbls[i]->get_interface()) + candidates.push_back(i); + } + if(candidates.size() != 1) return(-1); + for(i=0;iref_type==GBVAR_COLREF && + c_field == gtbl[i]->name.field && + candidates[0] == gtbl[i]->name.tblvar_ref){ + return(i); + } + } + +// No match found. + return(-1); + +} + + + +bool aggr_table_entry::fta_legal(ext_fcn_list *Ext_fcns){ + if(is_builtin()){ + if( (op == "COUNT") || (op == "SUM") || (op == "MIN") || + (op == "MAX") || (op == "AND_AGGR") || (op == "OR_AGGR") || + (op == "XOR_AGGR") ) + return(true); + }else{ + return Ext_fcns->fta_legal(fcn_id); + } + return(false); +} + + +// Return the set of subaggregates required to compute +// the desired aggregate. THe operand of the subaggregates +// can only be * or the scalarexp used in the superaggr. +// This is indicated by the use_se vector. + +// Is this code generation specific? + +vector aggr_table_entry::get_subaggr_fcns(vector &use_se){ + vector ret; + + if(op == "COUNT"){ + ret.push_back("COUNT"); + use_se.push_back(false); + } + if(op == "SUM"){ + ret.push_back("SUM"); + use_se.push_back(true); + } + if(op == "AVG"){ + ret.push_back("SUM"); + ret.push_back("COUNT"); + use_se.push_back(true); + use_se.push_back(false); + } + if(op == "MIN"){ + ret.push_back("MIN"); + use_se.push_back(true); + } + if(op == "MAX"){ + ret.push_back("MAX"); + use_se.push_back(true); + } + if(op == "AND_AGGR"){ + ret.push_back("AND_AGGR"); + use_se.push_back(true); + } + if(op == "OR_AGGR"){ + ret.push_back("OR_AGGR"); + use_se.push_back(true); + } + if(op == "XOR_AGGR"){ + ret.push_back("XOR_AGGR"); + use_se.push_back(true); + } + + return(ret); +} + +// Code generation specific? + +vector aggr_table_entry::get_subaggr_dt(){ + vector ret; + data_type *dt; + + if(op == "COUNT"){ + dt = new data_type("Int"); // was Uint + ret.push_back( dt ); + } + if(op == "SUM"){ + dt = new data_type(); + dt->set_aggr_data_type( "SUM",operand->get_data_type() ); + ret.push_back(dt); + } + if(op == "AVG"){ + dt = new data_type(); + dt->set_aggr_data_type( "SUM",operand->get_data_type() ); + ret.push_back( dt ); + dt = new data_type("Int"); + ret.push_back( dt ); + } + if(op == "MIN"){ + dt = new data_type(); + dt->set_aggr_data_type( "MIN",operand->get_data_type() ); + ret.push_back( dt ); + } + if(op == "MAX"){ + dt = new data_type(); + dt->set_aggr_data_type( "MAX",operand->get_data_type() ); + ret.push_back( dt ); + } + if(op == "AND_AGGR"){ + dt = new data_type(); + dt->set_aggr_data_type( "AND_AGGR",operand->get_data_type() ); + ret.push_back( dt ); + } + if(op == "OR_AGGR"){ + dt = new data_type(); + dt->set_aggr_data_type( "OR_AGGR",operand->get_data_type() ); + ret.push_back( dt ); + } + if(op == "XOR_AGGR"){ + dt = new data_type(); + dt->set_aggr_data_type( "XOR_AGGR",operand->get_data_type() ); + ret.push_back( dt ); + } + + return(ret); +} + +// Code generation specific? + +scalarexp_t *aggr_table_entry::make_superaggr_se(vector se_refs){ + scalarexp_t *se_l, *se_r, *ret_se = NULL; + + if(op == "COUNT"){ + ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]); + return(ret_se); + } + if(op == "SUM"){ + ret_se = scalarexp_t::make_se_aggr("SUM", se_refs[0]); + return(ret_se); + } + if(op == "AVG"){ + se_l = scalarexp_t::make_se_aggr("SUM", se_refs[0]); + se_r = scalarexp_t::make_se_aggr("SUM", se_refs[1]); + + ret_se = new scalarexp_t("/", se_l, se_r); + return(ret_se); + } + if(op == "MIN"){ + ret_se = scalarexp_t::make_se_aggr("MIN", se_refs[0]); + return(ret_se); + } + if(op == "MAX"){ + ret_se = scalarexp_t::make_se_aggr("MAX", se_refs[0]); + return(ret_se); + } + if(op == "AND_AGGR"){ + ret_se = scalarexp_t::make_se_aggr("AND_AGGR", se_refs[0]); + return(ret_se); + } + if(op == "OR_AGGR"){ + ret_se = scalarexp_t::make_se_aggr("OR_AGGR", se_refs[0]); + return(ret_se); + } + if(op == "XOR_AGGR"){ + ret_se = scalarexp_t::make_se_aggr("XOR_AGGR", se_refs[0]); + return(ret_se); + } + + return(ret_se); + +} + + +// Add a built-in aggr. +int aggregate_table::add_aggr(string op, scalarexp_t *se, bool is_super){ + int i; + + for(i=0;iis_builtin() && op == agr_tbl[i]->op + && is_equivalent_se(se,agr_tbl[i]->operand) ){ +// && is_super == agr_tbl[i]->is_superaggr()) + if(is_super) agr_tbl[i]->set_super(true); + return(i); + } + } + + aggr_table_entry *ate = new aggr_table_entry(op, se, is_super); + agr_tbl.push_back(ate); + return(agr_tbl.size() - 1); +} + +// add a UDAF +int aggregate_table::add_aggr(string op, int fcn_id, vector opl, data_type *sdt, bool is_super, bool is_running, bool has_lfta_bailout){ + int i,o; + + for(i=0;iis_builtin()) && fcn_id == agr_tbl[i]->fcn_id + && opl.size() == agr_tbl[i]->oplist.size() ){ +// && is_super == agr_tbl[i]->is_superaggr() ){ + for(o=0;ooplist[o]) ) + break; + } + if(o == opl.size()){ + if(is_super) agr_tbl[i]->set_super(true); + return i; + } + } + } + + aggr_table_entry *ate = new aggr_table_entry(op, fcn_id, opl, sdt,is_super,is_running, has_lfta_bailout); + agr_tbl.push_back(ate); + return(agr_tbl.size() - 1); +} + + +int cplx_lit_table::add_cpx_lit(literal_t *l, bool is_handle_ref){ + int i; + + for(i=0;iis_equivalent(cplx_lit_tbl[i])){ + hdl_ref_tbl[i] = hdl_ref_tbl[i] | is_handle_ref; + return(i); + } + } + + cplx_lit_tbl.push_back(l); + hdl_ref_tbl.push_back(is_handle_ref); + return(cplx_lit_tbl.size() - 1); +} + + + +//------------------------------------------------------------ +// parse_fta code + + +gb_t *gb_t::duplicate(){ + gb_t *ret = new gb_t(interface.c_str(), table.c_str(), name.c_str()); + ret->type = type; + ret->lineno = lineno; + ret->charno = charno; + if(def != NULL) + ret->def = dup_se(def,NULL); + return ret; +}