Added quantiling UDAFs
[com/gs-lite.git] / src / ftacmp / analyze_fta.h
index 32833c5..2ecea00 100644 (file)
-/* ------------------------------------------------
-Copyright 2014 AT&T Intellectual Property
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
- ------------------------------------------- */
-
-#ifndef __ANALYZE_FTA_H_DEFINED__
-#define __ANALYZE_FTA_H_DEFINED__
-
-
-
-#include "parse_fta.h"
-#include "parse_schema.h"
-#include"type_objects.h"
-#include "parse_ext_fcns.h"
-#include "iface_q.h"
-
-
-#include<set>
-#include<algorithm>
-using namespace std;
-
-
-//             Represent a component of a predicate,
-//             these components are ANDed together.
-//             Often they are simple enough to analyze.
-class cnf_elem{
-public:
-       int is_atom;    // i.e., is an atomic predicate
-       int eq_pred;    // And, it is a test for equality
-       int in_pred;    // Or, it is a an IN predicate
-
-       int pr_gb;              // the predicate refs a gb column.
-       int pr_attr;    // the predicate refs a table column.
-       int pr_aggr;    // the predicate refs an aggregate
-
-       int l_simple;   // the lhs is simple (just a colref)
-       int l_gb;               // the lhs refs a gb var
-       int l_attr;             // the lhs refs a table column
-       int l_aggr;             // the lhs refs an aggregate.
-
-       int r_simple;   // the rhs is simple (just a colref)
-       int r_gb;               // the rhs refs a gb var
-       int r_attr;             // the rhs refs a table column
-       int r_aggr;             // the rhs refs an aggregate.
-
-       int cost;               // an estiamte of the evaluation cost.
-
-   predicate_t *pr;
-
-       cnf_elem(predicate_t *p){
-               is_atom = 0;
-               eq_pred = 0; in_pred = 0;
-               pr_gb = pr_attr = pr_aggr = 0;
-               l_simple = l_gb = l_attr = l_aggr = 0;
-               r_simple = r_gb = r_attr = r_aggr = 0;
-               pr = p;
-       };
-
-       cnf_elem(){ is_atom = 0;
-               eq_pred = 0; in_pred = 0;
-               pr_gb = pr_attr = pr_aggr = 0;
-               l_simple = l_gb = l_attr = l_aggr = 0;
-               r_simple = r_gb = r_attr = r_aggr = 0;
-               pr = NULL;
-       };
-
-
-
-       void swap_scalar_operands(){
-       int tmp;
-       if(in_pred) return;     // can't swap IN predicates, se list always on right.
-       if(! is_atom) return;   // can only swap scalar expressions.
-
-       tmp = l_simple; l_simple = r_simple; r_simple = tmp;
-       tmp = l_gb; l_gb = r_gb; r_gb = tmp;
-       tmp = l_attr; l_attr = r_attr; r_attr = tmp;
-       tmp = l_aggr; l_aggr = r_aggr; r_aggr = tmp;
-
-       pr->swap_scalar_operands();
-  }
-};
-
-
-//             A GB variable is identified by its name and
-//             its source table variable -- so that joins can be expressed.
-//             GB Vars defined AS a scalar expression have a null tableref.
-
-struct col_id{
-  std::string field;
-  int tblvar_ref;
-//             Also carry the schema ref -- used to get data type info
-//             when defining variables.
-  int schema_ref;
-
-  col_id(){tblvar_ref = -1;};
-  col_id(colref_t *cr){
-         field = cr->field;
-         tblvar_ref = cr-> get_tablevar_ref();
-         schema_ref = cr->get_schema_ref();
-  };
-
-
-
-  void load_from_colref(colref_t *cr){
-         field = cr->field;
-         tblvar_ref = cr-> get_tablevar_ref();
-         schema_ref = cr->get_schema_ref();
-  };
-};
-
-struct lt_col_id{
-  bool operator()(const col_id &cr1, const col_id &cr2) const{
-       if(cr1.tblvar_ref < cr2.tblvar_ref) return(1);
-       if(cr1.tblvar_ref == cr2.tblvar_ref)
-          return (cr1.field < cr2.field);
-       return(0);
-  }
-};
-
-  bool operator<(const col_id &cr1, const col_id &cr2);
-
-
-//             Every GB variable is represented by its name
-//             (defined above) and the scalar expression which
-//             defines its value.  If the GB var is devlared as a
-//             colref, the add_gb_attr method of gb_table will
-//             create an SE consisting of the colref.
-//                     NOTE : using col_id is dangerous when the
-//                                     agregation operator is defiend over a self-join
-
-#define GBVAR_COLREF 1
-#define GBVAR_SE 2
-
-struct gb_table_entry{
-       col_id name;            // tblref in col_id refers to the tablevar.
-       scalarexp_t *definition;
-       int ref_type;
-
-       gb_table_entry(){
-               definition = NULL;
-       };
-       data_type *get_data_type(){
-               return(definition->get_data_type());
-       };
-       void reset_temporal(){
-               if(definition) definition->reset_temporal();
-       }
-};
-
-
-//             The GB table is the symbol table containing
-//             the GB vars.  This object puts a wrapper around
-//             access to the table.
-
-class gb_table{
-private:
-       std::vector<gb_table_entry *> gtbl;
-
-public:
-       std::vector<vector<bool> > gb_patterns;
-//             rollup, cube, and grouping_sets cannot be readily reconstructed by
-//             analyzing the patterns, so explicitly record them here.
-//             used only so that sgah_qpn::to_query_string produces 
-//             something meaningful.
-       std::vector<std::string> gb_entry_type;
-       std::vector<int> gb_entry_count;
-       std::vector<std::vector<std::vector<bool> > > pattern_components;
-
-       gb_table(){};
-
-       int add_gb_attr(gb_t *c, tablevar_list_t *fm, table_list *schema,
-                                       table_exp_t *fta_tree, ext_fcn_list *Ext_fcns);
-
-       void add_gb_var(std::string n, int t, scalarexp_t *d, int rt){
-               gb_table_entry *entry = new gb_table_entry();
-               entry->name.field = n;
-               entry->name.tblvar_ref = t;
-               entry->definition = d;
-               entry->ref_type = rt;
-               gtbl.push_back(entry);
-       };
-       data_type *get_data_type(int g){return(gtbl[g]->get_data_type());       };
-       int find_gb(colref_t *c, tablevar_list_t *fm, table_list *schema);
-       scalarexp_t *get_def(int i){return gtbl[i]->definition; };
-       std::string get_name(int i){return gtbl[i]->name.field; };
-       int get_tblvar_ref(int i){return gtbl[i]->name.tblvar_ref;      };
-       int get_reftype(int i){return gtbl[i]->ref_type;        };
-       int size(){return(gtbl.size()); };
-       void set_pattern_info(gb_table *g){
-               gb_patterns = g->gb_patterns;
-               gb_entry_type = g->gb_entry_type;
-               gb_entry_count = g->gb_entry_count;
-               pattern_components = g->pattern_components;
-       }
-       void reset_temporal(int i){
-               gtbl[i]->reset_temporal();
-       }
-};
-
-
-
-//                     Represent an aggregate to be calculated.
-//                     The unique identifier is the aggregate fcn, and the
-//                     expression aggregated over.
-//                     TODO: unify udaf with built-in better
-//                             (but then again, the core dumps help with debugging)
-
-struct aggr_table_entry{
-       std::string op;
-       scalarexp_t *operand;
-       std::vector<scalarexp_t *> oplist;
-       int fcn_id;             // -1 for built-in aggrs.
-       data_type *sdt; // storage data type.
-       data_type *rdt; // return value data type.
-       bool is_superag;
-       bool is_running;
-       bool bailout;
-
-       aggr_table_entry(){ sdt = NULL; rdt = NULL; is_superag = false;};
-       aggr_table_entry(std::string o, scalarexp_t *se, bool s){
-               op = o; operand = se; fcn_id = -1; sdt = NULL;
-               if(se){
-                       rdt = new data_type();
-                       rdt->set_aggr_data_type(o,se->get_data_type());
-               }else{
-                       rdt = new data_type("INT");
-               }
-               is_superag = s;
-               is_running = false;
-               bailout = false;
-       };
-       aggr_table_entry(std::string o, int f, std::vector<scalarexp_t *> s, data_type *st, bool spr, bool r, bool b_o){
-               op = o;
-               fcn_id = f;
-               operand = NULL;
-               oplist = s;
-               sdt = st;
-               rdt = NULL;
-               is_superag = spr;
-               is_running = r;
-               bailout = b_o;
-       }
-
-       bool fta_legal(ext_fcn_list *Ext_fcns);
-    bool is_star_aggr(){return operand==NULL;};
-       bool is_builtin(){return fcn_id < 0;};
-       int get_fcn_id(){return fcn_id;};
-       bool is_superaggr(){return is_superag;};
-       bool is_running_aggr(){return is_running;};
-       bool has_bailout(){return bailout;};
-    void set_super(bool s){is_superag = s;};
-
-       std::vector<std::string> get_subaggr_fcns(std::vector<bool> &use_se);
-       std::vector<data_type *> get_subaggr_dt();
-       scalarexp_t *make_superaggr_se(std::vector<scalarexp_t *> se_refs);
-       data_type *get_storage_type(){return sdt;};
-       std::vector<scalarexp_t *> get_operand_list(){return oplist;};
-
-       aggr_table_entry *duplicate(){
-               aggr_table_entry *dup = new aggr_table_entry();
-               dup->op = op;
-               dup->operand = operand;
-               dup->oplist = oplist;
-               dup->fcn_id = fcn_id;
-               dup->sdt = sdt;
-               dup->rdt = rdt;
-               dup->is_superag = is_superag;
-               dup->is_running = is_running;
-               dup->bailout = bailout;
-               return dup;
-       }
-
-       static bool superaggr_allowed(std::string op, ext_fcn_list *Ext_fcns){
-               if(op == "SUM" || op == "COUNT" || op == "MIN" || op=="MAX")
-                       return true;
-               return false;
-       }
-
-       bool superaggr_allowed(){
-               if(is_builtin())
-                       return aggr_table_entry::superaggr_allowed(op,NULL);
-//             Currently, no UDAFS allowed as superaggregates
-               return false;
-       }
-
-       static bool multiple_return_allowed(bool is_built_in, ext_fcn_list *Ext_fcns, int fcn_id){
-               if(is_built_in)
-                       return true;
-//                     Currently, no UDAFS allowed as stateful fcn params.
-//                     in cleaning_when, cleaning_by clauses.
-               if(Ext_fcns->is_running_aggr(fcn_id) || Ext_fcns->multiple_returns(fcn_id))
-                       return true;
-               return false;
-       }
-
-};
-
-
-class aggregate_table{
-public:
-       std::vector<aggr_table_entry *> agr_tbl;
-
-       aggregate_table(){};
-
-       int add_aggr(std::string op, scalarexp_t *se, bool is_spr);
-       int add_aggr(std::string op, int fcn_id, std::vector<scalarexp_t *> opl, data_type *sdt, bool is_spr, bool is_running, bool has_lfta_bailout);
-       int add_aggr(aggr_table_entry *a){agr_tbl.push_back(a); return agr_tbl.size(); };
-       int size(){return(agr_tbl.size());      };
-       scalarexp_t *get_aggr_se(int i){return agr_tbl[i]->operand;     };
-
-       data_type *get_data_type(int i){
-//             if(agr_tbl[i]->op == "COUNT")
-//                      return new data_type("uint");
-//             else
-                       if(! agr_tbl[i]->rdt){
-                               fprintf(stderr,"INTERNAL ERROR in aggregate_table::get_data_type : rdt is NULL.\n");
-                               exit(1);
-                       }
-                       return agr_tbl[i]->rdt;
-       };
-
-       std::string get_op(int i){return agr_tbl[i]->op;};
-
-       bool fta_legal(int i,ext_fcn_list *Ext_fcns){return agr_tbl[i]->fta_legal(Ext_fcns);    };
-    bool is_star_aggr(int i){return agr_tbl[i]->is_star_aggr(); };
-       bool is_builtin(int i){return agr_tbl[i]->is_builtin(); };
-       int get_fcn_id(int i){return agr_tbl[i]->get_fcn_id();};
-       bool is_superaggr(int i){return agr_tbl[i]->is_superaggr();};
-       bool is_running_aggr(int i){return agr_tbl[i]->is_running_aggr();};
-       bool has_bailout(int i){return agr_tbl[i]->has_bailout();};
-//     bool multiple_return_allowed(int i){
-//             return agr_tbl[i]->multiple_return_allowed(agr_tbl[i]->is_builtin(),NULL);
-//     };
-       bool superaggr_allowed(int i){return agr_tbl[i]->superaggr_allowed();};
-       std::vector<scalarexp_t *> get_operand_list(int i){
-               return agr_tbl[i]->get_operand_list();
-       }
-
-       std::vector<std::string> get_subaggr_fcns(int i, std::vector<bool> &use_se){
-               return(agr_tbl[i]->get_subaggr_fcns(use_se) );
-       };
-
-       std::vector<data_type *> get_subaggr_dt(int i){
-               return(agr_tbl[i]->get_subaggr_dt() );
-       }
-       data_type *get_storage_type(int i){
-               return( agr_tbl[i]->get_storage_type());
-       }
-
-       aggr_table_entry *duplicate(int i){
-               return agr_tbl[i]->duplicate();
-       };
-
-       scalarexp_t *make_superaggr_se(int i, std::vector<scalarexp_t *> se_refs){
-               return(agr_tbl[i]->make_superaggr_se(se_refs) );
-       };
-};
-
-
-class cplx_lit_table{
-       std::vector<literal_t *> cplx_lit_tbl;
-       std::vector<bool> hdl_ref_tbl;
-
-public:
-       cplx_lit_table(){};
-
-       int add_cpx_lit(literal_t *, bool);
-       int size(){return cplx_lit_tbl.size();};
-       literal_t *get_literal(int i){return cplx_lit_tbl[i];};
-       bool is_handle_ref(int i){return hdl_ref_tbl[i];};
-
-};
-
-enum param_handle_type { cplx_lit_e, litval_e, param_e};
-
-class handle_param_tbl_entry{
-private:
-
-public:
-       std::string fcn_name;
-       int param_slot;
-       literal_t *litval;
-       int complex_literal_idx;
-       std::string param_name;
-       param_handle_type val_type;
-    std::string type_name;
-
-       handle_param_tbl_entry(std::string fname, int slot,
-                                                       literal_t *l, std::string tnm){
-               fcn_name = fname; param_slot = slot;
-               val_type = litval_e;
-               litval = l;
-               type_name = tnm;
-       };
-       handle_param_tbl_entry(std::string fname, int slot, std::string p, std::string tnm){
-               fcn_name = fname; param_slot = slot;
-               val_type = param_e;
-               param_name = p;
-               type_name = tnm;
-       };
-       handle_param_tbl_entry(std::string fname, int slot, int ci, std::string tnm){
-               fcn_name = fname; param_slot = slot;
-               val_type = cplx_lit_e;
-               complex_literal_idx = ci;
-               type_name = tnm;
-       };
-
-       std::string lfta_registration_fcn(){
-               char tmps[500];
-               sprintf(tmps,"register_handle_for_%s_slot_%d",fcn_name.c_str(),param_slot);
-               return(tmps);
-       };
-       std::string hfta_registration_fcn(){return lfta_registration_fcn();};
-
-       std::string lfta_deregistration_fcn(){
-               char tmps[500];
-               sprintf(tmps,"deregister_handle_for_%s_slot_%d",fcn_name.c_str(),param_slot);
-               return(tmps);
-       };
-       std::string hfta_rdeegistration_fcn(){return lfta_registration_fcn();};
-
-};
-
-
-class param_table{
-//                     Store the data in vectors to preserve
-//                     listing order.
-       std::vector<std::string> pname;
-       std::vector<data_type *> pdtype;
-       std::vector<bool> phandle;
-
-       int find_name(std::string n){
-               int p;
-               for(p=0;p<pname.size();++p){
-                       if(pname[p] == n) break;
-               }
-               if(p == pname.size()) return(-1);
-               return(p);
-       };
-
-public:
-       param_table(){};
-
-//                     Add the param with the given name and associated
-//                     data type and handle use.
-//                     if its a new name, return true.
-//                     Else, take OR of use_handle, return false.
-//                     (builds param table and collects handle references).
-       bool add_param(std::string pnm, data_type *pdt, bool use_handle){
-               int p = find_name(pnm);
-               if(p == -1){
-                       pname.push_back(pnm);
-                       pdtype.push_back(pdt);
-                       phandle.push_back(use_handle);
-                       return(true);
-        }else{
-                       if(use_handle) phandle[p] = use_handle;
-                       return(false);
-               }
-
-       };
-
-       int size(){return pname.size(); };
-
-       std::vector<std::string> get_param_names(){
-               return(pname);
-       };
-
-       data_type *get_data_type(std::string pnm){
-               int p = find_name(pnm);
-               if(p >= 0){
-                       return(pdtype[p]);
-               }
-               return(new data_type("undefined_type"));
-       };
-
-       bool handle_access(std::string pnm){
-               int p = find_name(pnm);
-               if(p >= 0){
-                       return(phandle[p]);
-               }
-               return(false);
-       };
-
-};
-
-
-//                     Two columns are the same if they come from the
-//                     same source, and have the same field name.
-//                     (use to determine which fields must be unpacked).
-//                     NOTE : dangerous in the presence of self-joins.
-typedef std::set<col_id, lt_col_id> col_id_set;
-
-bool contains_gb_pr(predicate_t *pr, std::set<int> &gref_set);
-bool contains_gb_se(scalarexp_t *se, std::set<int> &gref_set);
-
-
-void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set,  gb_table *gtbl);
-void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl);
-
-void gather_se_opcmp_fcns(scalarexp_t *se, std::set<std::string> &fcn_set);
-void gather_pr_opcmp_fcns(predicate_t *pr, std::set<std::string> &fcn_set);
-
-////////////////////////////////////////////
-//             Structures to record usage of operator views.
-
-class opview_entry{
-public:
-       std::string parent_qname;
-       std::string root_name;
-       std::string view_name;
-       std::string exec_fl;            // name of executable file (if any)
-       std::string udop_alias;         // unique ID of the UDOP
-       std::string mangler;
-       int liveness_timeout;           // maximum delay between hearbeats from udop
-       int pos;
-       std::vector<std::string> subq_names;
-
-       opview_entry(){mangler="";};
-};
-
-class opview_set{
-public:
-       std::vector<opview_entry *> opview_list;
-
-       int append(opview_entry *opv){opview_list.push_back(opv);
-                                                                 return opview_list.size()-1;};
-       int size(){return opview_list.size();};
-       opview_entry *get_entry(int i){ return opview_list[i];};
-};
-
-/////////////////////////////////////////////////////////////////
-//             Wrap up all of the analysis of the FTA into this package.
-//
-//             There is some duplication between query_summary_class,
-//             but I'd rather avoid pushing analysis structures
-//             into the parse modules.
-//
-//             TODO: revisit the issue when nested subqueries are implemented.
-//             One possibility: implement accessor methods to hide the
-//             complexity
-//             For now: this class contains data structures not in table_exp_t
-//             (with a bit of duplication)
-
-struct query_summary_class{
-       std::string query_name;                 // the name of the query, becomes the name
-                                                                       // of the output.
-       int query_type;         // e.g. SELECT, MERGE, etc.
-
-       gb_table *gb_tbl;                       // Table of all group-by attributes.
-       std::set<int> sg_tbl;           // Names of the superGB attributes
-       aggregate_table *aggr_tbl;      // Table of all referenced aggregates.
-       std::set<std::string> states_refd;      // states ref'd by stateful fcns.
-
-
-//             copied from parse tree, then into query node.
-//             interpret the string representation of data type into a type_object
-       param_table *param_tbl;         // Table of all query parameters.
-
-//             There is stuff that is not copied over by analyze_fta,
-//             it still resides in fta_tree.
-    table_exp_t *fta_tree;             // The (decorated) parse tree.
-
-
-//             This is copied from the parse tree (except for "query_name")
-//             then copied to the query node in query_plan.cc:145
-       std::map<std::string, std::string> definitions; // additional definitions.
-
-
-//             CNF representation of the where and having predicates.
-       std::vector<cnf_elem *> wh_cnf;
-       std::vector<cnf_elem *> hav_cnf;
-       std::vector<cnf_elem *> cb_cnf;
-       std::vector<cnf_elem *> cw_cnf;
-       std::vector<cnf_elem *> closew_cnf;
-
-
-
-
-//             For MERGE type queries.
-       std::vector<colref_t *> mvars;
-       scalarexp_t *slack;
-
-////////                       Constructors
-       query_summary_class(){
-               init();
-       }
-
-       query_summary_class(table_exp_t *f){
-               fta_tree = f;
-               init();
-       }
-
-/////                  Methods
-
-       bool add_query_param(std::string pnm, data_type *pdt, bool use_handle){
-               return( param_tbl->add_param(pnm,pdt,use_handle));
-       };
-
-
-private:
-       void init(){
-//             Create the gb_tbl, aggr_tbl, and param_tbl objects
-               gb_tbl = new gb_table();
-               aggr_tbl = new aggregate_table();
-               param_tbl = new param_table();
-
-       }
-};
-
-int verify_colref(scalarexp_t *se, tablevar_list_t *fm,
-                                       table_list *schema, gb_table *gtbl);
-std::string impute_query_name(table_exp_t *fta_tree, std::string default_nm);
-
-query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema, ext_fcn_list *Ext_fcns, std::string qname);
-bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2);
-bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema);
-bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema);
-
-bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns);
-void analyze_cnf(cnf_elem *c);
-
-void find_partial_fcns(scalarexp_t *se, std::vector<scalarexp_t *> *pf_list, std::vector<int> *fcn_ref_cnt, std::vector<bool> *is_partial_fcn, ext_fcn_list *Ext_fcns);
-void find_partial_fcns_pr(predicate_t *pr, std::vector<scalarexp_t *> *pf_list,std::vector<int> *fcn_ref_cnt, std::vector<bool> *is_partial_fcn, ext_fcn_list *Ext_fcns);
-void collect_partial_fcns(scalarexp_t *se, std::set<int> &pfcn_refs);
-void collect_partial_fcns_pr(predicate_t *pr,  std::set<int> &pfcn_refs);
-
-void find_combinable_preds(predicate_t *pr,  vector<predicate_t *> *pr_list,
-                                                               table_list *Schema, ext_fcn_list *Ext_fcns);
-
-void collect_agg_refs(scalarexp_t *se, std::set<int> &agg_refs);
-void collect_aggr_refs_pr(predicate_t *pr,  std::set<int> &agg_refs);
-
-int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema);
-int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema);
-
-temporal_type compute_se_temporal(scalarexp_t *se, std::map<col_id, temporal_type> &tcol);
-
-
-bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
-                                                               cplx_lit_table *complex_literals);
-void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
-                                                               cplx_lit_table *complex_literals);
-void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,
-                                               std::vector<handle_param_tbl_entry *> &handle_tbl);
-void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,
-                                               std::vector<handle_param_tbl_entry *> &handle_tbl);
-
-//             Copy a scalar expression / predicate tree
-scalarexp_t *dup_se(scalarexp_t *se, aggregate_table *aggr_tbl);
-select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl);
-predicate_t *dup_pr(predicate_t *pr, aggregate_table *aggr_tbl);
-
-//             Expand gbvars
-void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl);
-scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl);
-
-
-//             copy a query
-table_exp_t *dup_table_exp(table_exp_t *te);
-
-
-//             Tie colrefs to a new range var
-void bind_colref_se(scalarexp_t *se,
-                                 std::vector<tablevar_t *> &fm,
-                                 int prev_ref, int new_ref
-                                );
-void bind_colref_pr(predicate_t *pr,
-                                 std::vector<tablevar_t *> &fm,
-                                 int prev_ref, int new_ref
-                                );
-
-
-std::string impute_colname(std::vector<select_element *> &sel_list, scalarexp_t *se);
-std::string impute_colname(std::set<std::string> &curr_names, scalarexp_t *se);
-
-bool is_literal_or_param_only(scalarexp_t *se);
-
-
-void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *agr_tbl);
-void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *agr_tbl);
-
-void compute_cnf_cost(cnf_elem *cm, ext_fcn_list *Ext_fcns);
-struct compare_cnf_cost{
-  bool operator()(const cnf_elem *c1, const cnf_elem *c2) const{
-       if(c1->cost < c2->cost) return(true);
-       return(false);
-  }
-};
-
-void get_tablevar_ref_se(scalarexp_t *se, std::vector<int> &reflist);
-void get_tablevar_ref_pr(predicate_t *pr, std::vector<int> &reflist);
-
-long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt, std::string &fnm);
-
-
-int count_se_ifp_refs(scalarexp_t *se, std::set<std::string> &ifpnames);
-int count_pr_ifp_refs(predicate_t *pr, std::set<std::string> &ifpnames);
-int resolve_se_ifp_refs(scalarexp_t *se, std::string ifm, std::string ifn, ifq_t *ifdb,  std::string &err);
-int resolve_pr_ifp_refs(predicate_t *pr, std::string ifm, std::string ifn, ifq_t *ifdb,  std::string &err);
-
-int pred_refs_sfun(predicate_t *pr);
-int se_refs_sfun(scalarexp_t *se);
-
-
-//             Represent preds for the prefilter, and
-//             the LFTAs which reference them.
-struct cnf_set{
-       predicate_t *pr;
-       std::set<int> lfta_id;
-       std::set<unsigned int> pred_id;
-
-       cnf_set(){};
-       cnf_set(predicate_t *p, unsigned int l, unsigned int i){
-               pr = dup_pr(p,NULL);            // NEED TO UPDATE dup_se TO PROPAGATE
-               lfta_id.insert(l);
-               pred_id.insert((l << 16)+i);
-       }
-       void subsume(cnf_set *c){
-               std::set<int>::iterator ssi;
-               for(ssi=c->lfta_id.begin();ssi!=c->lfta_id.end();++ssi){
-                       lfta_id.insert((*ssi));
-               }
-               std::set<unsigned int >::iterator spi;
-               for(spi=c->pred_id.begin();spi!=c->pred_id.end();++spi){
-                       pred_id.insert((*spi));
-               }
-       }
-       void combine_pred(cnf_set *c){
-               pr = new predicate_t("AND",pr,c->pr);
-               std::set<unsigned int >::iterator spi;
-               for(spi=c->pred_id.begin();spi!=c->pred_id.end();++spi){
-                       pred_id.insert((*spi));
-               }
-       }
-
-       void add_pred_ids(set<unsigned int> &pred_set){
-               std::set<unsigned int >::iterator spi;
-               for(spi=pred_id.begin();spi!=pred_id.end();++spi){
-                       pred_set.insert((*spi));
-               }
-       }
-
-       static unsigned int make_lfta_key(unsigned int l){
-               return l << 16;
-       }
-
-
-       ~cnf_set(){};
-};
-struct compare_cnf_set{
-  bool operator()(const cnf_set *c1, const cnf_set *c2) const{
-       if(c1->lfta_id.size() > c2->lfta_id.size()) return(true);
-       return(false);
-  }
-};
-
-bool operator<(const cnf_set &c1, const cnf_set &c2);
-
-
-void find_common_filter(std::vector< std::vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, std::vector<cnf_set *> &prefilter_preds, std::set<unsigned int> &pred_ids);
-
-
-cnf_elem *find_common_filter(std::vector< std::vector<cnf_elem *> > &where_list, table_list *Schema);
-
-std::string int_to_string(int i);
-void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl);
-void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl);
-
-
-
-
-#endif
-
+/* ------------------------------------------------\r
+Copyright 2014 AT&T Intellectual Property\r
+   Licensed under the Apache License, Version 2.0 (the "License");\r
+   you may not use this file except in compliance with the License.\r
+   You may obtain a copy of the License at\r
+\r
+     http://www.apache.org/licenses/LICENSE-2.0\r
+\r
+   Unless required by applicable law or agreed to in writing, software\r
+   distributed under the License is distributed on an "AS IS" BASIS,\r
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r
+   See the License for the specific language governing permissions and\r
+   limitations under the License.\r
+ ------------------------------------------- */\r
+\r
+#ifndef __ANALYZE_FTA_H_DEFINED__\r
+#define __ANALYZE_FTA_H_DEFINED__\r
+\r
+\r
+\r
+#include "parse_fta.h"\r
+#include "parse_schema.h"\r
+#include"type_objects.h"\r
+#include "parse_ext_fcns.h"\r
+#include "iface_q.h"\r
+\r
+\r
+#include<set>\r
+#include<algorithm>\r
+using namespace std;\r
+\r
+\r
+//             Represent a component of a predicate,\r
+//             these components are ANDed together.\r
+//             Often they are simple enough to analyze.\r
+class cnf_elem{\r
+public:\r
+       int is_atom;    // i.e., is an atomic predicate\r
+       int eq_pred;    // And, it is a test for equality\r
+       int in_pred;    // Or, it is a an IN predicate\r
+\r
+       int pr_gb;              // the predicate refs a gb column.\r
+       int pr_attr;    // the predicate refs a table column.\r
+       int pr_aggr;    // the predicate refs an aggregate\r
+\r
+       int l_simple;   // the lhs is simple (just a colref)\r
+       int l_gb;               // the lhs refs a gb var\r
+       int l_attr;             // the lhs refs a table column\r
+       int l_aggr;             // the lhs refs an aggregate.\r
+\r
+       int r_simple;   // the rhs is simple (just a colref)\r
+       int r_gb;               // the rhs refs a gb var\r
+       int r_attr;             // the rhs refs a table column\r
+       int r_aggr;             // the rhs refs an aggregate.\r
+\r
+       int cost;               // an estiamte of the evaluation cost.\r
+\r
+   predicate_t *pr;\r
+\r
+       cnf_elem(predicate_t *p){\r
+               is_atom = 0;\r
+               eq_pred = 0; in_pred = 0;\r
+               pr_gb = pr_attr = pr_aggr = 0;\r
+               l_simple = l_gb = l_attr = l_aggr = 0;\r
+               r_simple = r_gb = r_attr = r_aggr = 0;\r
+               pr = p;\r
+       };\r
+\r
+       cnf_elem(){ is_atom = 0;\r
+               eq_pred = 0; in_pred = 0;\r
+               pr_gb = pr_attr = pr_aggr = 0;\r
+               l_simple = l_gb = l_attr = l_aggr = 0;\r
+               r_simple = r_gb = r_attr = r_aggr = 0;\r
+               pr = NULL;\r
+       };\r
+\r
+\r
+\r
+       void swap_scalar_operands(){\r
+       int tmp;\r
+       if(in_pred) return;     // can't swap IN predicates, se list always on right.\r
+       if(! is_atom) return;   // can only swap scalar expressions.\r
+\r
+       tmp = l_simple; l_simple = r_simple; r_simple = tmp;\r
+       tmp = l_gb; l_gb = r_gb; r_gb = tmp;\r
+       tmp = l_attr; l_attr = r_attr; r_attr = tmp;\r
+       tmp = l_aggr; l_aggr = r_aggr; r_aggr = tmp;\r
+\r
+       pr->swap_scalar_operands();\r
+  }\r
+};\r
+\r
+\r
+//             A GB variable is identified by its name and\r
+//             its source table variable -- so that joins can be expressed.\r
+//             GB Vars defined AS a scalar expression have a null tableref.\r
+\r
+struct col_id{\r
+  std::string field;\r
+  int tblvar_ref;\r
+//             Also carry the schema ref -- used to get data type info\r
+//             when defining variables.\r
+  int schema_ref;\r
+\r
+  col_id(){tblvar_ref = -1;};\r
+  col_id(colref_t *cr){\r
+         field = cr->field;\r
+         tblvar_ref = cr-> get_tablevar_ref();\r
+         schema_ref = cr->get_schema_ref();\r
+  };\r
+\r
+\r
+\r
+  void load_from_colref(colref_t *cr){\r
+         field = cr->field;\r
+         tblvar_ref = cr-> get_tablevar_ref();\r
+         schema_ref = cr->get_schema_ref();\r
+  };\r
+};\r
+\r
+struct lt_col_id{\r
+  bool operator()(const col_id &cr1, const col_id &cr2) const{\r
+       if(cr1.tblvar_ref < cr2.tblvar_ref) return(1);\r
+       if(cr1.tblvar_ref == cr2.tblvar_ref)\r
+          return (cr1.field < cr2.field);\r
+       return(0);\r
+  }\r
+};\r
+\r
+  bool operator<(const col_id &cr1, const col_id &cr2);\r
+\r
+\r
+//             Every GB variable is represented by its name\r
+//             (defined above) and the scalar expression which\r
+//             defines its value.  If the GB var is devlared as a\r
+//             colref, the add_gb_attr method of gb_table will\r
+//             create an SE consisting of the colref.\r
+//                     NOTE : using col_id is dangerous when the\r
+//                                     agregation operator is defiend over a self-join\r
+\r
+#define GBVAR_COLREF 1\r
+#define GBVAR_SE 2\r
+\r
+struct gb_table_entry{\r
+       col_id name;            // tblref in col_id refers to the tablevar.\r
+       scalarexp_t *definition;\r
+       int ref_type;\r
+\r
+       gb_table_entry(){\r
+               definition = NULL;\r
+       };\r
+       data_type *get_data_type(){\r
+               return(definition->get_data_type());\r
+       };\r
+       void reset_temporal(){\r
+               if(definition) definition->reset_temporal();\r
+       }\r
+};\r
+\r
+\r
+//             The GB table is the symbol table containing\r
+//             the GB vars.  This object puts a wrapper around\r
+//             access to the table.\r
+\r
+class gb_table{\r
+private:\r
+       std::vector<gb_table_entry *> gtbl;\r
+\r
+public:\r
+       std::vector<vector<bool> > gb_patterns;\r
+//             rollup, cube, and grouping_sets cannot be readily reconstructed by\r
+//             analyzing the patterns, so explicitly record them here.\r
+//             used only so that sgah_qpn::to_query_string produces \r
+//             something meaningful.\r
+       std::vector<std::string> gb_entry_type;\r
+       std::vector<int> gb_entry_count;\r
+       std::vector<std::vector<std::vector<bool> > > pattern_components;\r
+\r
+       gb_table(){};\r
+\r
+       int add_gb_attr(gb_t *c, tablevar_list_t *fm, table_list *schema,\r
+                                       table_exp_t *fta_tree, ext_fcn_list *Ext_fcns);\r
+\r
+       void add_gb_var(std::string n, int t, scalarexp_t *d, int rt){\r
+               gb_table_entry *entry = new gb_table_entry();\r
+               entry->name.field = n;\r
+               entry->name.tblvar_ref = t;\r
+               entry->definition = d;\r
+               entry->ref_type = rt;\r
+               gtbl.push_back(entry);\r
+       };\r
+       data_type *get_data_type(int g){return(gtbl[g]->get_data_type());       };\r
+       int find_gb(colref_t *c, tablevar_list_t *fm, table_list *schema);\r
+       scalarexp_t *get_def(int i){return gtbl[i]->definition; };\r
+       std::string get_name(int i){return gtbl[i]->name.field; };\r
+       int get_tblvar_ref(int i){return gtbl[i]->name.tblvar_ref;      };\r
+       int get_reftype(int i){return gtbl[i]->ref_type;        };\r
+       int size(){return(gtbl.size()); };\r
+       void set_pattern_info(gb_table *g){\r
+               gb_patterns = g->gb_patterns;\r
+               gb_entry_type = g->gb_entry_type;\r
+               gb_entry_count = g->gb_entry_count;\r
+               pattern_components = g->pattern_components;\r
+       }\r
+       void reset_temporal(int i){\r
+               gtbl[i]->reset_temporal();\r
+       }\r
+};\r
+\r
+\r
+\r
+//                     Represent an aggregate to be calculated.\r
+//                     The unique identifier is the aggregate fcn, and the\r
+//                     expression aggregated over.\r
+//                     TODO: unify udaf with built-in better\r
+//                             (but then again, the core dumps help with debugging)\r
+\r
+struct aggr_table_entry{\r
+       std::string op;\r
+       scalarexp_t *operand;\r
+       std::vector<scalarexp_t *> oplist;\r
+       int fcn_id;             // -1 for built-in aggrs.\r
+       data_type *sdt; // storage data type.\r
+       data_type *rdt; // return value data type.\r
+       bool is_superag;\r
+       bool is_running;\r
+       bool bailout;\r
+\r
+       aggr_table_entry(){ sdt = NULL; rdt = NULL; is_superag = false;};\r
+       aggr_table_entry(std::string o, scalarexp_t *se, bool s){\r
+               op = o; operand = se; fcn_id = -1; sdt = NULL;\r
+               if(se){\r
+                       rdt = new data_type();\r
+                       rdt->set_aggr_data_type(o,se->get_data_type());\r
+               }else{\r
+                       rdt = new data_type("INT");\r
+               }\r
+               is_superag = s;\r
+               is_running = false;\r
+               bailout = false;\r
+       };\r
+       aggr_table_entry(std::string o, int f, std::vector<scalarexp_t *> s, data_type *st, bool spr, bool r, bool b_o){\r
+               op = o;\r
+               fcn_id = f;\r
+               operand = NULL;\r
+               oplist = s;\r
+               sdt = st;\r
+               rdt = NULL;\r
+               is_superag = spr;\r
+               is_running = r;\r
+               bailout = b_o;\r
+       }\r
+\r
+       bool fta_legal(ext_fcn_list *Ext_fcns);\r
+    bool is_star_aggr(){return operand==NULL;};\r
+       bool is_builtin(){return fcn_id < 0;};\r
+       int get_fcn_id(){return fcn_id;};\r
+       bool is_superaggr(){return is_superag;};\r
+       bool is_running_aggr(){return is_running;};\r
+       bool has_bailout(){return bailout;};\r
+    void set_super(bool s){is_superag = s;};\r
+\r
+       std::vector<std::string> get_subaggr_fcns(std::vector<bool> &use_se);\r
+       std::vector<data_type *> get_subaggr_dt();\r
+       scalarexp_t *make_superaggr_se(std::vector<scalarexp_t *> se_refs);\r
+       data_type *get_storage_type(){return sdt;};\r
+       std::vector<scalarexp_t *> get_operand_list(){return oplist;};\r
+\r
+       aggr_table_entry *duplicate(){\r
+               aggr_table_entry *dup = new aggr_table_entry();\r
+               dup->op = op;\r
+               dup->operand = operand;\r
+               dup->oplist = oplist;\r
+               dup->fcn_id = fcn_id;\r
+               dup->sdt = sdt;\r
+               dup->rdt = rdt;\r
+               dup->is_superag = is_superag;\r
+               dup->is_running = is_running;\r
+               dup->bailout = bailout;\r
+               return dup;\r
+       }\r
+\r
+       static bool superaggr_allowed(std::string op, ext_fcn_list *Ext_fcns){\r
+               if(op == "SUM" || op == "COUNT" || op == "MIN" || op=="MAX")\r
+                       return true;\r
+               return false;\r
+       }\r
+\r
+       bool superaggr_allowed(){\r
+               if(is_builtin())\r
+                       return aggr_table_entry::superaggr_allowed(op,NULL);\r
+//             Currently, no UDAFS allowed as superaggregates\r
+               return false;\r
+       }\r
+\r
+       static bool multiple_return_allowed(bool is_built_in, ext_fcn_list *Ext_fcns, int fcn_id){\r
+               if(is_built_in)\r
+                       return true;\r
+//                     Currently, no UDAFS allowed as stateful fcn params.\r
+//                     in cleaning_when, cleaning_by clauses.\r
+               if(Ext_fcns->is_running_aggr(fcn_id) || Ext_fcns->multiple_returns(fcn_id))\r
+                       return true;\r
+               return false;\r
+       }\r
+\r
+};\r
+\r
+\r
+class aggregate_table{\r
+public:\r
+       std::vector<aggr_table_entry *> agr_tbl;\r
+\r
+       aggregate_table(){};\r
+\r
+       int add_aggr(std::string op, scalarexp_t *se, bool is_spr);\r
+       int add_aggr(std::string op, int fcn_id, std::vector<scalarexp_t *> opl, data_type *sdt, bool is_spr, bool is_running, bool has_lfta_bailout);\r
+       int add_aggr(aggr_table_entry *a){agr_tbl.push_back(a); return agr_tbl.size(); };\r
+       int size(){return(agr_tbl.size());      };\r
+       scalarexp_t *get_aggr_se(int i){return agr_tbl[i]->operand;     };\r
+\r
+       data_type *get_data_type(int i){\r
+//             if(agr_tbl[i]->op == "COUNT")\r
+//                      return new data_type("uint");\r
+//             else\r
+                       if(! agr_tbl[i]->rdt){\r
+                               fprintf(stderr,"INTERNAL ERROR in aggregate_table::get_data_type : rdt is NULL.\n");\r
+                               exit(1);\r
+                       }\r
+                       return agr_tbl[i]->rdt;\r
+       };\r
+\r
+       std::string get_op(int i){return agr_tbl[i]->op;};\r
+\r
+       bool fta_legal(int i,ext_fcn_list *Ext_fcns){return agr_tbl[i]->fta_legal(Ext_fcns);    };\r
+    bool is_star_aggr(int i){return agr_tbl[i]->is_star_aggr(); };\r
+       bool is_builtin(int i){return agr_tbl[i]->is_builtin(); };\r
+       int get_fcn_id(int i){return agr_tbl[i]->get_fcn_id();};\r
+       bool is_superaggr(int i){return agr_tbl[i]->is_superaggr();};\r
+       bool is_running_aggr(int i){return agr_tbl[i]->is_running_aggr();};\r
+       bool has_bailout(int i){return agr_tbl[i]->has_bailout();};\r
+//     bool multiple_return_allowed(int i){\r
+//             return agr_tbl[i]->multiple_return_allowed(agr_tbl[i]->is_builtin(),NULL);\r
+//     };\r
+       bool superaggr_allowed(int i){return agr_tbl[i]->superaggr_allowed();};\r
+       std::vector<scalarexp_t *> get_operand_list(int i){\r
+               return agr_tbl[i]->get_operand_list();\r
+       }\r
+\r
+       std::vector<std::string> get_subaggr_fcns(int i, std::vector<bool> &use_se){\r
+               return(agr_tbl[i]->get_subaggr_fcns(use_se) );\r
+       };\r
+\r
+       std::vector<data_type *> get_subaggr_dt(int i){\r
+               return(agr_tbl[i]->get_subaggr_dt() );\r
+       }\r
+       data_type *get_storage_type(int i){\r
+               return( agr_tbl[i]->get_storage_type());\r
+       }\r
+\r
+       aggr_table_entry *duplicate(int i){\r
+               return agr_tbl[i]->duplicate();\r
+       };\r
+\r
+       scalarexp_t *make_superaggr_se(int i, std::vector<scalarexp_t *> se_refs){\r
+               return(agr_tbl[i]->make_superaggr_se(se_refs) );\r
+       };\r
+};\r
+\r
+\r
+class cplx_lit_table{\r
+       std::vector<literal_t *> cplx_lit_tbl;\r
+       std::vector<bool> hdl_ref_tbl;\r
+\r
+public:\r
+       cplx_lit_table(){};\r
+\r
+       int add_cpx_lit(literal_t *, bool);\r
+       int size(){return cplx_lit_tbl.size();};\r
+       literal_t *get_literal(int i){return cplx_lit_tbl[i];};\r
+       bool is_handle_ref(int i){return hdl_ref_tbl[i];};\r
+\r
+};\r
+\r
+enum param_handle_type { cplx_lit_e, litval_e, param_e};\r
+\r
+class handle_param_tbl_entry{\r
+private:\r
+\r
+public:\r
+       std::string fcn_name;\r
+       int param_slot;\r
+       literal_t *litval;\r
+       int complex_literal_idx;\r
+       std::string param_name;\r
+       param_handle_type val_type;\r
+    std::string type_name;\r
+\r
+       handle_param_tbl_entry(std::string fname, int slot,\r
+                                                       literal_t *l, std::string tnm){\r
+               fcn_name = fname; param_slot = slot;\r
+               val_type = litval_e;\r
+               litval = l;\r
+               type_name = tnm;\r
+       };\r
+       handle_param_tbl_entry(std::string fname, int slot, std::string p, std::string tnm){\r
+               fcn_name = fname; param_slot = slot;\r
+               val_type = param_e;\r
+               param_name = p;\r
+               type_name = tnm;\r
+       };\r
+       handle_param_tbl_entry(std::string fname, int slot, int ci, std::string tnm){\r
+               fcn_name = fname; param_slot = slot;\r
+               val_type = cplx_lit_e;\r
+               complex_literal_idx = ci;\r
+               type_name = tnm;\r
+       };\r
+\r
+       std::string lfta_registration_fcn(){\r
+               char tmps[500];\r
+               sprintf(tmps,"register_handle_for_%s_slot_%d",fcn_name.c_str(),param_slot);\r
+               return(tmps);\r
+       };\r
+       std::string hfta_registration_fcn(){return lfta_registration_fcn();};\r
+\r
+       std::string lfta_deregistration_fcn(){\r
+               char tmps[500];\r
+               sprintf(tmps,"deregister_handle_for_%s_slot_%d",fcn_name.c_str(),param_slot);\r
+               return(tmps);\r
+       };\r
+       std::string hfta_rdeegistration_fcn(){return lfta_registration_fcn();};\r
+\r
+};\r
+\r
+\r
+class param_table{\r
+//                     Store the data in vectors to preserve\r
+//                     listing order.\r
+       std::vector<std::string> pname;\r
+       std::vector<data_type *> pdtype;\r
+       std::vector<bool> phandle;\r
+\r
+       int find_name(std::string n){\r
+               int p;\r
+               for(p=0;p<pname.size();++p){\r
+                       if(pname[p] == n) break;\r
+               }\r
+               if(p == pname.size()) return(-1);\r
+               return(p);\r
+       };\r
+\r
+public:\r
+       param_table(){};\r
+\r
+//                     Add the param with the given name and associated\r
+//                     data type and handle use.\r
+//                     if its a new name, return true.\r
+//                     Else, take OR of use_handle, return false.\r
+//                     (builds param table and collects handle references).\r
+       bool add_param(std::string pnm, data_type *pdt, bool use_handle){\r
+               int p = find_name(pnm);\r
+               if(p == -1){\r
+                       pname.push_back(pnm);\r
+                       pdtype.push_back(pdt);\r
+                       phandle.push_back(use_handle);\r
+                       return(true);\r
+        }else{\r
+                       if(use_handle) phandle[p] = use_handle;\r
+                       return(false);\r
+               }\r
+\r
+       };\r
+\r
+       int size(){return pname.size(); };\r
+\r
+       std::vector<std::string> get_param_names(){\r
+               return(pname);\r
+       };\r
+\r
+       data_type *get_data_type(std::string pnm){\r
+               int p = find_name(pnm);\r
+               if(p >= 0){\r
+                       return(pdtype[p]);\r
+               }\r
+               return(new data_type("undefined_type"));\r
+       };\r
+\r
+       bool handle_access(std::string pnm){\r
+               int p = find_name(pnm);\r
+               if(p >= 0){\r
+                       return(phandle[p]);\r
+               }\r
+               return(false);\r
+       };\r
+\r
+};\r
+\r
+\r
+//                     Two columns are the same if they come from the\r
+//                     same source, and have the same field name.\r
+//                     (use to determine which fields must be unpacked).\r
+//                     NOTE : dangerous in the presence of self-joins.\r
+typedef std::set<col_id, lt_col_id> col_id_set;\r
+\r
+bool contains_gb_pr(predicate_t *pr, std::set<int> &gref_set);\r
+bool contains_gb_se(scalarexp_t *se, std::set<int> &gref_set);\r
+\r
+\r
+void gather_se_col_ids(scalarexp_t *se, col_id_set &cid_set,  gb_table *gtbl);\r
+void gather_pr_col_ids(predicate_t *pr, col_id_set &cid_set, gb_table *gtbl);\r
+\r
+void gather_se_opcmp_fcns(scalarexp_t *se, std::set<std::string> &fcn_set);\r
+void gather_pr_opcmp_fcns(predicate_t *pr, std::set<std::string> &fcn_set);\r
+\r
+////////////////////////////////////////////\r
+//             Structures to record usage of operator views.\r
+\r
+class opview_entry{\r
+public:\r
+       std::string parent_qname;\r
+       std::string root_name;\r
+       std::string view_name;\r
+       std::string exec_fl;            // name of executable file (if any)\r
+       std::string udop_alias;         // unique ID of the UDOP\r
+       std::string mangler;\r
+       int liveness_timeout;           // maximum delay between hearbeats from udop\r
+       int pos;\r
+       std::vector<std::string> subq_names;\r
+\r
+       opview_entry(){mangler="";};\r
+};\r
+\r
+class opview_set{\r
+public:\r
+       std::vector<opview_entry *> opview_list;\r
+\r
+       int append(opview_entry *opv){opview_list.push_back(opv);\r
+                                                                 return opview_list.size()-1;};\r
+       int size(){return opview_list.size();};\r
+       opview_entry *get_entry(int i){ return opview_list[i];};\r
+};\r
+\r
+/////////////////////////////////////////////////////////////////\r
+//             Wrap up all of the analysis of the FTA into this package.\r
+//\r
+//             There is some duplication between query_summary_class,\r
+//             but I'd rather avoid pushing analysis structures\r
+//             into the parse modules.\r
+//\r
+//             TODO: revisit the issue when nested subqueries are implemented.\r
+//             One possibility: implement accessor methods to hide the\r
+//             complexity\r
+//             For now: this class contains data structures not in table_exp_t\r
+//             (with a bit of duplication)\r
+\r
+struct query_summary_class{\r
+       std::string query_name;                 // the name of the query, becomes the name\r
+                                                                       // of the output.\r
+       int query_type;         // e.g. SELECT, MERGE, etc.\r
+\r
+       gb_table *gb_tbl;                       // Table of all group-by attributes.\r
+       std::set<int> sg_tbl;           // Names of the superGB attributes\r
+       aggregate_table *aggr_tbl;      // Table of all referenced aggregates.\r
+       std::set<std::string> states_refd;      // states ref'd by stateful fcns.\r
+\r
+\r
+//             copied from parse tree, then into query node.\r
+//             interpret the string representation of data type into a type_object\r
+       param_table *param_tbl;         // Table of all query parameters.\r
+\r
+//             There is stuff that is not copied over by analyze_fta,\r
+//             it still resides in fta_tree.\r
+    table_exp_t *fta_tree;             // The (decorated) parse tree.\r
+\r
+\r
+//             This is copied from the parse tree (except for "query_name")\r
+//             then copied to the query node in query_plan.cc:145\r
+       std::map<std::string, std::string> definitions; // additional definitions.\r
+\r
+\r
+//             CNF representation of the where and having predicates.\r
+       std::vector<cnf_elem *> wh_cnf;\r
+       std::vector<cnf_elem *> hav_cnf;\r
+       std::vector<cnf_elem *> cb_cnf;\r
+       std::vector<cnf_elem *> cw_cnf;\r
+       std::vector<cnf_elem *> closew_cnf;\r
+\r
+\r
+\r
+\r
+//             For MERGE type queries.\r
+       std::vector<colref_t *> mvars;\r
+       scalarexp_t *slack;\r
+\r
+////////                       Constructors\r
+       query_summary_class(){\r
+               init();\r
+       }\r
+\r
+       query_summary_class(table_exp_t *f){\r
+               fta_tree = f;\r
+               init();\r
+       }\r
+\r
+/////                  Methods\r
+\r
+       bool add_query_param(std::string pnm, data_type *pdt, bool use_handle){\r
+               return( param_tbl->add_param(pnm,pdt,use_handle));\r
+       };\r
+\r
+\r
+private:\r
+       void init(){\r
+//             Create the gb_tbl, aggr_tbl, and param_tbl objects\r
+               gb_tbl = new gb_table();\r
+               aggr_tbl = new aggregate_table();\r
+               param_tbl = new param_table();\r
+\r
+       }\r
+};\r
+\r
+int verify_colref(scalarexp_t *se, tablevar_list_t *fm,\r
+                                       table_list *schema, gb_table *gtbl);\r
+std::string impute_query_name(table_exp_t *fta_tree, std::string default_nm);\r
+\r
+query_summary_class *analyze_fta(table_exp_t *fta_tree, table_list *schema, ext_fcn_list *Ext_fcns, std::string qname);\r
+bool is_equivalent_se(scalarexp_t *se1, scalarexp_t *se2);\r
+bool is_equivalent_se_base(scalarexp_t *se1, scalarexp_t *se2, table_list *Schema);\r
+bool is_equivalent_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema);\r
+\r
+bool is_equivalent_class_pred_base(predicate_t *p1, predicate_t *p2, table_list *Schema,ext_fcn_list *Ext_fcns);\r
+void analyze_cnf(cnf_elem *c);\r
+\r
+void find_partial_fcns(scalarexp_t *se, std::vector<scalarexp_t *> *pf_list, std::vector<int> *fcn_ref_cnt, std::vector<bool> *is_partial_fcn, ext_fcn_list *Ext_fcns);\r
+void find_partial_fcns_pr(predicate_t *pr, std::vector<scalarexp_t *> *pf_list,std::vector<int> *fcn_ref_cnt, std::vector<bool> *is_partial_fcn, ext_fcn_list *Ext_fcns);\r
+void collect_partial_fcns(scalarexp_t *se, std::set<int> &pfcn_refs);\r
+void collect_partial_fcns_pr(predicate_t *pr,  std::set<int> &pfcn_refs);\r
+\r
+void find_combinable_preds(predicate_t *pr,  vector<predicate_t *> *pr_list,\r
+                                                               table_list *Schema, ext_fcn_list *Ext_fcns);\r
+\r
+void collect_agg_refs(scalarexp_t *se, std::set<int> &agg_refs);\r
+void collect_aggr_refs_pr(predicate_t *pr,  std::set<int> &agg_refs);\r
+\r
+int bind_to_schema_pr(predicate_t *pr, tablevar_list_t *fm, table_list *schema);\r
+int bind_to_schema_se(scalarexp_t *se, tablevar_list_t *fm, table_list *schema);\r
+\r
+temporal_type compute_se_temporal(scalarexp_t *se, std::map<col_id, temporal_type> &tcol);\r
+\r
+\r
+bool find_complex_literal_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,\r
+                                                               cplx_lit_table *complex_literals);\r
+void find_complex_literal_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,\r
+                                                               cplx_lit_table *complex_literals);\r
+void find_param_handles_se(scalarexp_t *se, ext_fcn_list *Ext_fcns,\r
+                                               std::vector<handle_param_tbl_entry *> &handle_tbl);\r
+void find_param_handles_pr(predicate_t *pr, ext_fcn_list *Ext_fcns,\r
+                                               std::vector<handle_param_tbl_entry *> &handle_tbl);\r
+\r
+//             Copy a scalar expression / predicate tree\r
+scalarexp_t *dup_se(scalarexp_t *se, aggregate_table *aggr_tbl);\r
+select_element *dup_select(select_element *sl, aggregate_table *aggr_tbl);\r
+predicate_t *dup_pr(predicate_t *pr, aggregate_table *aggr_tbl);\r
+\r
+//             Expand gbvars\r
+void expand_gbvars_pr(predicate_t *pr, gb_table &gb_tbl);\r
+scalarexp_t *expand_gbvars_se(scalarexp_t *se, gb_table &gb_tbl);\r
+\r
+\r
+//             copy a query\r
+table_exp_t *dup_table_exp(table_exp_t *te);\r
+\r
+\r
+//             Tie colrefs to a new range var\r
+void bind_colref_se(scalarexp_t *se,\r
+                                 std::vector<tablevar_t *> &fm,\r
+                                 int prev_ref, int new_ref\r
+                                );\r
+void bind_colref_pr(predicate_t *pr,\r
+                                 std::vector<tablevar_t *> &fm,\r
+                                 int prev_ref, int new_ref\r
+                                );\r
+\r
+\r
+std::string impute_colname(std::vector<select_element *> &sel_list, scalarexp_t *se);\r
+std::string impute_colname(std::set<std::string> &curr_names, scalarexp_t *se);\r
+\r
+bool is_literal_or_param_only(scalarexp_t *se);\r
+\r
+\r
+void build_aggr_tbl_fm_pred(predicate_t *pr, aggregate_table *agr_tbl);\r
+void build_aggr_tbl_fm_se(scalarexp_t *se, aggregate_table *agr_tbl);\r
+\r
+void compute_cnf_cost(cnf_elem *cm, ext_fcn_list *Ext_fcns);\r
+struct compare_cnf_cost{\r
+  bool operator()(const cnf_elem *c1, const cnf_elem *c2) const{\r
+       if(c1->cost < c2->cost) return(true);\r
+       return(false);\r
+  }\r
+};\r
+\r
+void get_tablevar_ref_se(scalarexp_t *se, std::vector<int> &reflist);\r
+void get_tablevar_ref_pr(predicate_t *pr, std::vector<int> &reflist);\r
+\r
+long long int find_temporal_divisor(scalarexp_t *se, gb_table *gbt, std::string &fnm);\r
+\r
+\r
+int count_se_ifp_refs(scalarexp_t *se, std::set<std::string> &ifpnames);\r
+int count_pr_ifp_refs(predicate_t *pr, std::set<std::string> &ifpnames);\r
+int resolve_se_ifp_refs(scalarexp_t *se, std::string ifm, std::string ifn, ifq_t *ifdb,  std::string &err);\r
+int resolve_pr_ifp_refs(predicate_t *pr, std::string ifm, std::string ifn, ifq_t *ifdb,  std::string &err);\r
+\r
+int pred_refs_sfun(predicate_t *pr);\r
+int se_refs_sfun(scalarexp_t *se);\r
+\r
+\r
+//             Represent preds for the prefilter, and\r
+//             the LFTAs which reference them.\r
+struct cnf_set{\r
+       predicate_t *pr;\r
+       std::set<int> lfta_id;\r
+       std::set<unsigned int> pred_id;\r
+\r
+       cnf_set(){};\r
+       cnf_set(predicate_t *p, unsigned int l, unsigned int i){\r
+               pr = dup_pr(p,NULL);            // NEED TO UPDATE dup_se TO PROPAGATE\r
+               lfta_id.insert(l);\r
+               pred_id.insert((l << 16)+i);\r
+       }\r
+       void subsume(cnf_set *c){\r
+               std::set<int>::iterator ssi;\r
+               for(ssi=c->lfta_id.begin();ssi!=c->lfta_id.end();++ssi){\r
+                       lfta_id.insert((*ssi));\r
+               }\r
+               std::set<unsigned int >::iterator spi;\r
+               for(spi=c->pred_id.begin();spi!=c->pred_id.end();++spi){\r
+                       pred_id.insert((*spi));\r
+               }\r
+       }\r
+       void combine_pred(cnf_set *c){\r
+               pr = new predicate_t("AND",pr,c->pr);\r
+               std::set<unsigned int >::iterator spi;\r
+               for(spi=c->pred_id.begin();spi!=c->pred_id.end();++spi){\r
+                       pred_id.insert((*spi));\r
+               }\r
+       }\r
+\r
+       void add_pred_ids(set<unsigned int> &pred_set){\r
+               std::set<unsigned int >::iterator spi;\r
+               for(spi=pred_id.begin();spi!=pred_id.end();++spi){\r
+                       pred_set.insert((*spi));\r
+               }\r
+       }\r
+\r
+       static unsigned int make_lfta_key(unsigned int l){\r
+               return l << 16;\r
+       }\r
+\r
+\r
+       ~cnf_set(){};\r
+};\r
+struct compare_cnf_set{\r
+  bool operator()(const cnf_set *c1, const cnf_set *c2) const{\r
+       if(c1->lfta_id.size() > c2->lfta_id.size()) return(true);\r
+       return(false);\r
+  }\r
+};\r
+\r
+bool operator<(const cnf_set &c1, const cnf_set &c2);\r
+\r
+\r
+void find_common_filter(std::vector< std::vector<cnf_elem *> > &where_list, table_list *Schema, ext_fcn_list *Ext_fcns, std::vector<cnf_set *> &prefilter_preds, std::set<unsigned int> &pred_ids);\r
+\r
+\r
+cnf_elem *find_common_filter(std::vector< std::vector<cnf_elem *> > &where_list, table_list *Schema);\r
+\r
+std::string int_to_string(int i);\r
+void insert_gb_def_pr(predicate_t *pr, gb_table *gtbl);\r
+void insert_gb_def_se(scalarexp_t *se, gb_table *gtbl);\r
+\r
+\r
+\r
+\r
+#endif\r
+\r